From 3ee593ce44b92fed9f80b129a2b48c6eb708126e Mon Sep 17 00:00:00 2001 From: Lanqing Yuan Date: Sun, 17 Dec 2023 18:38:06 -0600 Subject: [PATCH] Viewer and basic matching (#70) * nesttype to 8 * match and visual * removed outdated versioning * debug * temporary patch * fixing nan * debug indexing * plot_wf debug * printing steps and lowered resolution * adding run info * adding run info --- jobs/config.ini | 12 +-- jobs/job.py | 13 ++- requirements.txt | 6 +- saltax/match/__init__.py | 4 + saltax/match/match.py | 199 +++++++++++++++++++++++++++++++++++++++ saltax/match/visual.py | 191 +++++++++++++++++++++++++++++++++++++ 6 files changed, 409 insertions(+), 16 deletions(-) create mode 100644 saltax/match/match.py create mode 100644 saltax/match/visual.py diff --git a/jobs/config.ini b/jobs/config.ini index c708c50..51b64ef 100644 --- a/jobs/config.ini +++ b/jobs/config.ini @@ -4,20 +4,20 @@ t_sleep = 1 [job] container = xenonnt-development.simg -runids = 021952,021967,022175,023747 -output_folder = /scratch/midway2/yuanlq/salt/sr0_rn220 +runids = 021952,022510,022776,022773,022738,022735,022732,022726,022723,022717,022694,022691,022685,028636,031661,031124,029866,027982,030329,025444,027513,030891,025416,025161,028945,031709,028239,030937,028510,028636,031661,031124,029866,027982,030329,025444,027513,030891,025416,025161,028945,031709,028239,030937,028510,027323,026550,025953,031657,029198,026577,028031,025719,029208,026624,031147,028426,030102,029144,029472,027705,031156,031802,026503,029139,028047,025579,030079,025953,028542,025413,031803,029938,030287,026603,028890,030128,029410,031761 +output_folder = /project/lgrandi/yuanlq/salt/ saltax_mode = salt faxconf_version = sr0_v4 generator_name = flat -recoil = 7 +recoil = 8 mode = all [slurm] username = yuanlq account = pi-lgrandi job_title = salt -partition = caslake -qos = caslake +partition = lgrandi +qos = lgrandi mem_per_cpu = 30000 cpus_per_task = 1 -log_dir = /home/yuanlq/.tmp_job_submission/saltax \ No newline at end of file +log_dir = /home/yuanlq/.tmp_job_submission/saltax diff --git a/jobs/job.py b/jobs/job.py index d7fac04..e723c82 100644 --- a/jobs/job.py +++ b/jobs/job.py @@ -61,7 +61,6 @@ st.make(strrunid, 'merged_s2s') st.make(strrunid, 'event_basics') st.make(strrunid, 'event_info') - st.make(strrunid, 'cuts_basic') print("Used time:", datetime.now() - now) now = datetime.now() @@ -70,12 +69,12 @@ saltax mode %s. "%(runid, 'data')) st = saltax.contexts.sxenonnt(runid = runid, - saltax_mode = 'simu', - output_folder = output_folder, - faxconf_version = faxconf_version, - generator_name = generator_name, - recoil = recoil, - mode = mode) + saltax_mode = 'simu', + output_folder = output_folder, + faxconf_version = faxconf_version, + generator_name = generator_name, + recoil = recoil, + mode = mode) st.make(strrunid, 'raw_records_simu') st.make(strrunid, 'records') st.make(strrunid, 'peaklets') diff --git a/requirements.txt b/requirements.txt index eff0804..f9f2bba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -strax==1.5.2 -straxen==2.1.2 -wfsim==1.0.2 +strax +straxen +wfsim scipy numpy pandas diff --git a/saltax/match/__init__.py b/saltax/match/__init__.py index c57bfd5..8b8fd99 100644 --- a/saltax/match/__init__.py +++ b/saltax/match/__init__.py @@ -1 +1,5 @@ __version__ = '0.0.0' +from . import match +from .match import * +from . import visual +from .visual import * \ No newline at end of file diff --git a/saltax/match/match.py b/saltax/match/match.py new file mode 100644 index 0000000..6546c18 --- /dev/null +++ b/saltax/match/match.py @@ -0,0 +1,199 @@ +import numpy as np +from tqdm import tqdm + + +def filter_out_missing_s1_s2(truth, match): + """ + Filter out simulated events that have no S1 or S2 or both + :param truth: truth from wfsim + :param match: match_acceptance_extended from pema + :return: filtered truth and match + """ + bad_mask = np.zeros(len(truth), dtype=bool) + max_event_number = np.max(np.unique(truth['event_number'])) + + for event_id in np.arange(max_event_number)+1: + selected_truth = truth[truth['event_number']==event_id] + indices = np.where(truth['event_number']==event_id) + + # If cannot find any S1 or S2, then remove the event + if len(selected_truth[selected_truth['type']==1]) == 0: + bad_mask[indices] = True + elif len(selected_truth[selected_truth['type']==2]) == 0: + bad_mask[indices] = True + + # If any S1 or S2 has 0 photon or 0 electron, then remove the event + elif 0 in selected_truth[selected_truth['type']==1]['n_photon']: + bad_mask[indices] = True + elif 0 in selected_truth[selected_truth['type']==2]['n_electron']: + bad_mask[indices] = True + + print("Filter out %s percent of events due to missing S1 or S2 or both"\ + % (np.sum(bad_mask)/len(truth)*100)) + return truth[~bad_mask], match[~bad_mask] + + +def filter_out_multiple_s1_s2(truth, match): + """ + Filter out simulated events that have multiple S1 or S2 reconstructed from wfsim. + :param truth: truth from wfsim + :param match: match_acceptance_extended from pema + :return: filtered truth and match + """ + bad_mask = np.zeros(len(truth), dtype=bool) + max_event_number = np.max(np.unique(truth['event_number'])) + + for event_id in np.arange(max_event_number)+1: + selected_truth = truth[truth['event_number']==event_id] + indices = np.where(truth['event_number']==event_id) + + # If there are multiple S1 or S2, then remove the event + if (len(selected_truth[selected_truth['type']==1]) != 1 or + len(selected_truth[selected_truth['type']==2]) != 1): + bad_mask[indices] = True + + print("Filter out %s percent of events due to multiple S1 or S2"\ + % (np.sum(bad_mask)/len(truth)*100)) + return truth[~bad_mask], match[~bad_mask] + + +def filter_out_not_found(truth, match): + """ + Filter out simulated events whose S1 and S2 are not found by pema. + :param truth: truth from wfsim + :param match: match_acceptance_extended from pema + :return: filtered truth and match + """ + bad_mask = np.zeros(len(truth), dtype=bool) + max_event_number = np.max(np.unique(truth['event_number'])) + + for event_id in np.arange(max_event_number)+1: + selected_truth = truth[truth['event_number']==event_id] + indices = np.where(truth['event_number']==event_id) + selected_match = match[indices] + + # The only outcome should be "found", otherwise remove the event + if len(selected_match['outcome']) == 1: + if np.unique(selected_match['outcome'])[0] != "found": + bad_mask[indices] = True + else: + bad_mask[indices] = True + + print("Filter out %s percent of events due to S1 or S2 not found"\ + % (np.sum(bad_mask)/len(truth)*100)) + return truth[~bad_mask], match[~bad_mask] + + +def pair_events_to_filtered_truth(truth, events): + """ + Pair events to filtered truth. + :param truth: filtered truth + :param events: events from wfsim or saltax after reconstruction + :return: matched_to, the index of matched truth event for each event + """ + array_dtype = [ + ('s1_time', np.int64), + ('s1_endtime', np.int64), + ('s2_time', np.int64), + ('s2_endtime', np.int64), + ] + matched_truth_events_timing = np.zeros(int(len(truth)/2), dtype=array_dtype) + matched_truth_events_timing['s1_time'] = truth[truth['type']==1]['time'] + matched_truth_events_timing['s1_endtime'] = truth[truth['type']==1]['endtime'] + matched_truth_events_timing['s2_time'] = truth[truth['type']==2]['time'] + matched_truth_events_timing['s2_endtime'] = truth[truth['type']==2]['endtime'] + + matched_to = np.zeros(len(matched_truth_events_timing), dtype=int) + for i,e_simu in enumerate(tqdm(matched_truth_events_timing)): + # Find the events whose S1 and S2 overlap with the truth's S1 and S2 time ranges + # Note that we only care about main S1 and main S2, otherwise we consider lost + # Temporary S1 only + j_selected_events = np.where((events['s1_endtime']>=e_simu['s1_time'])& + (e_simu['s1_endtime']>=events['s1_time']))[0] + #j_selected_events = np.where((events['s1_endtime']>=e_simu['s1_time'])& + # (e_simu['s1_endtime']>=events['s1_time'])& + # (events['s2_endtime']>=e_simu['s2_time'])& + # (e_simu['s2_endtime']>=events['s2_time']))[0] + assert len(j_selected_events) <= 1, "Multiple events found for one truth event!?" + + # If no event is found, then we consider lost + if len(j_selected_events) == 0: + matched_to[i] = -99999 + # If only one event is found, then we consider matched + elif len(j_selected_events) == 1: + matched_to[i] = j_selected_events[0] + + return matched_to + + +def pair_events_to_matched_simu(matched_simu, events): + """ + Pair salted events to matched simulation events. + :param matched_simu: simulation events already matched to truth + :param events: events from saltax after reconstruction + :return: matched_to, the index of matched simulation events for each event + """ + matched_to = np.zeros(len(matched_simu), dtype=int) + for i,e_simu in enumerate(tqdm(matched_simu)): + # Find the events whose S1 and S2 overlap with the truth's S1 and S2 time ranges + # Note that we only care about main S1 and main S2, otherwise we consider lost + j_selected_events = np.where((events['s1_endtime']>=e_simu['s1_time'])& + (e_simu['s1_endtime']>=events['s1_time']))[0] + + j_selected_events = np.where((events['s1_endtime']>=e_simu['s1_time'])& + (e_simu['s1_endtime']>=events['s1_time'])& + (events['s2_endtime']>=e_simu['s2_time'])& + (e_simu['s2_endtime']>=events['s2_time']))[0] + assert len(j_selected_events) <= 1, "Multiple events found for one truth event!?" + + # If no event is found, then we consider lost + if len(j_selected_events) == 0: + matched_to[i] = -99999 + # If only one event is found, then we consider matched + elif len(j_selected_events) == 1: + matched_to[i] = j_selected_events[0] + + return matched_to + + +def pair_salt_to_simu(truth, match, events_simu, events_salt): + """ + Filter out bad simulation truth and then pair salted events to matched simulation events. + :param truth: filtered truth + :param match: match_acceptance_extended from pema + :param events_simu: events from wfsim + :param events_salt: events from saltax after reconstruction + :return: ind_salt_matched_to_simu, ind_simu_matched_to_truth, truth_filtered, match_filtered + """ + truth_filtered, match_filtered = filter_out_missing_s1_s2(truth, match) + truth_filtered, match_filtered = filter_out_multiple_s1_s2(truth_filtered, match_filtered) + # Temporarily turn off this filter because of wfsim bug in s2 timing + #truth_filtered, match_filtered = filter_out_not_found(truth_filtered, match_filtered) + + ind_simu_matched_to_truth = pair_events_to_filtered_truth(truth_filtered, events_simu) + events_simu_matched_to_truth = events_simu[ind_simu_matched_to_truth[ind_simu_matched_to_truth>=0]] + + ind_salt_matched_to_simu = pair_events_to_matched_simu(events_simu_matched_to_truth, + events_salt) + + return ind_salt_matched_to_simu, ind_simu_matched_to_truth, truth_filtered, match_filtered + + +def match(truth, match, events_simu, events_salt): + """ + Match salted events to simulation events. + :param truth: truth from wfsim + :param match: match_acceptance_extended from pema + :param events_simu: event_info from wfsim + :param events_salt: event_info from saltax + :return: events_salt_matched_to_simu, events_simu_matched_to_salt: matched events with equal length + """ + ind_salt_matched_to_simu, \ + ind_simu_matched_to_truth, \ + _, _ = pair_salt_to_simu(truth, match, events_simu, events_salt) + + events_simu_matched_to_truth = events_simu[ind_simu_matched_to_truth[ind_simu_matched_to_truth>=0]] + events_salt_matched_to_simu = events_salt[ind_salt_matched_to_simu[ind_salt_matched_to_simu>=0]] + events_simu_matched_to_salt = events_simu_matched_to_truth[ind_salt_matched_to_simu>=0] + + return events_salt_matched_to_simu, events_simu_matched_to_salt diff --git a/saltax/match/visual.py b/saltax/match/visual.py new file mode 100644 index 0000000..734d203 --- /dev/null +++ b/saltax/match/visual.py @@ -0,0 +1,191 @@ +import matplotlib.pyplot as plt +import numpy as np + +def plot_wf(ind, st_salt, st_simu, st_data, runid, matched_simu, + event_ext_window_ns=2.4e6, + s1_ext_window_samples=25, + s2_ext_window_samples=100): + """ + Plot waveforms for a single event, including full event waveform and zoomed-in S1 and S2 waveforms. + :param ind: index of the event in the matched_simu dataframe + :param st_salt: saltax context for salt mode + :param st_simu: saltax context for simu mode + :param st_data: saltax context for data mode + :param runid: runid of the event, example: '066666' + :param matched_simu: simu event_info already matched to truth. (Not necessarily matched to data!) + :param event_ext_window_ns: time window in ns to plot around the event, default 2.4e6 ns = 2.4 ms + :param s1_ext_window_samples: time window in samples to plot around S1, default 25 samples + :param s2_ext_window_samples: time window in samples to plot around S2, default 100 samples + """ + print("Loading peaks and lone_hits for run %s event %s"%(runid, ind)) + + # Get time ranges in indices for events, S1 and S2 + extended_simu_event_timerange_ns = (matched_simu['s1_time'][ind]-event_ext_window_ns, + matched_simu['s2_endtime'][ind]+event_ext_window_ns) + matched_simu_s1_timerange_i = (int((matched_simu['s1_time'][ind] - extended_simu_event_timerange_ns[0])/10), + int((matched_simu['s1_endtime'][ind]-extended_simu_event_timerange_ns[0])/10)) + matched_simu_s2_timerange_i = (int((matched_simu['s2_time'][ind] - extended_simu_event_timerange_ns[0])/10), + int((matched_simu['s2_endtime'][ind]-extended_simu_event_timerange_ns[0])/10)) + + + # Get peaks and lone hits for the event + peaks_salt_selected = st_salt.get_array(runid, "peaks", + time_range=extended_simu_event_timerange_ns, + progress_bar=False) + peaks_simu_selected = st_simu.get_array(runid, "peaks", + time_range=extended_simu_event_timerange_ns, + progress_bar=False) + peaks_data_selected = st_data.get_array(runid, "peaks", + time_range=extended_simu_event_timerange_ns, + progress_bar=False) + lhs_salt_selected = st_salt.get_array(runid, "lone_hits", + time_range=extended_simu_event_timerange_ns, + progress_bar=False) + lhs_simu_selected = st_simu.get_array(runid, "lone_hits", + time_range=extended_simu_event_timerange_ns, + progress_bar=False) + lhs_data_selected = st_data.get_array(runid, "lone_hits", + time_range=extended_simu_event_timerange_ns, + progress_bar=False) + + + # Get waveforms for the event + print("Building waveforms...") + total_length = int((extended_simu_event_timerange_ns[1] - extended_simu_event_timerange_ns[0])/10) + to_pes = st_data.get_single_plugin(runid, 'peaklets').to_pe + + wf_salt_s1 = np.zeros(total_length) + wf_simu_s1 = np.zeros(total_length) + wf_salt_s2 = np.zeros(total_length) + wf_simu_s2 = np.zeros(total_length) + wf_salt_others = np.zeros(total_length) + wf_simu_others = np.zeros(total_length) + wf_data = np.zeros(total_length) + + if len(peaks_salt_selected): + for p in peaks_salt_selected: + start_i = int((p['time'] - int(extended_simu_event_timerange_ns[0]))/10) + length = p['length'] + dt = p['dt'] + if p['type'] == 1: + for i in range(length): + wf_salt_s1[start_i+i*int(dt/10):start_i+(i+1)*int(dt/10)] = p['data'][i] / dt * 10 + elif p['type'] == 2: + for i in range(length): + wf_salt_s2[start_i+i*int(dt/10):start_i+(i+1)*int(dt/10)] = p['data'][i] / dt * 10 + else: + for i in range(length): + wf_salt_others[start_i+i*int(dt/10):start_i+(i+1)*int(dt/10)] = p['data'][i] / dt * 10 + if len(lhs_salt_selected): + for l in lhs_salt_selected: + time_i = int((l['time'] - int(extended_simu_event_timerange_ns[0]))/10) + amp = l['area'] * to_pes[l['channel']] + wf_salt_others[time_i] += amp/10 + + if len(peaks_simu_selected): + for p in peaks_simu_selected: + start_i = int((p['time'] - int(extended_simu_event_timerange_ns[0]))/10) + length = p['length'] + dt = p['dt'] + if p['type'] == 1: + for i in range(length): + wf_simu_s1[start_i+i*int(dt/10):start_i+(i+1)*int(dt/10)] = p['data'][i] / dt * 10 + elif p['type'] == 2: + for i in range(length): + wf_simu_s2[start_i+i*int(dt/10):start_i+(i+1)*int(dt/10)] = p['data'][i] / dt * 10 + else: + for i in range(length): + wf_simu_others[start_i+i*int(dt/10):start_i+(i+1)*int(dt/10)] = p['data'][i] / dt * 10 + if len(lhs_simu_selected): + for l in lhs_simu_selected: + time_i = int((l['time'] - int(extended_simu_event_timerange_ns[0]))/10) + amp = l['area'] * to_pes[l['channel']] + wf_simu_others[time_i] += amp/10 + + if len(peaks_data_selected): + for p in peaks_data_selected: + start_i = int((p['time'] - int(extended_simu_event_timerange_ns[0]))/10) + length = p['length'] + dt = p['dt'] + for i in range(length): + wf_data[start_i+i*int(dt/10):start_i+(i+1)*int(dt/10)] = p['data'][i] / dt * 10 + if len(lhs_data_selected): + for l in lhs_data_selected: + time_i = int((l['time'] - int(extended_simu_event_timerange_ns[0]))/10) + amp = l['area'] * to_pes[l['channel']] + wf_data[time_i] += amp/10 + + # Plot full event waveform + print("Plotting waveforms for the whole event...") + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 8), dpi=150) + ax1.plot(wf_data, label='Data', color='k', alpha=0.2) + ax1.plot(wf_simu_s1, label='Simu S1', color='tab:blue') + ax1.plot(wf_simu_s2, label='Simu S2', color='tab:orange') + ax1.plot(wf_simu_others, label='Simu Others', color='tab:green') + ax1.axvspan(matched_simu_s1_timerange_i[0],matched_simu_s1_timerange_i[1], color='tab:blue', alpha=0.1, label='Simu S1 Range') + ax1.axvspan(matched_simu_s2_timerange_i[0],matched_simu_s2_timerange_i[1], color='tab:orange', alpha=0.1, label='Simu S2 Range') + ax1.legend() + ax1.set_title("Run %s Event %s: Simu CS1=%sPE, Simu CS2=%sPE"%(runid, ind, + int(10*matched_simu['cs1'][ind])/10, + int(10*matched_simu['cs2'][ind])/10)) + + ax2.plot(wf_data, label='Data', color='k', alpha=0.2) + ax2.plot(wf_salt_s1, label='Salt S1', color='b') + ax2.plot(wf_salt_s2, label='Salt S2', color='r') + ax2.plot(wf_salt_others, label='Salt Others', color='g') + ax2.axvspan(matched_simu_s1_timerange_i[0],matched_simu_s1_timerange_i[1], color='tab:blue', alpha=0.1, label='Simu S1 Range') + ax2.axvspan(matched_simu_s2_timerange_i[0],matched_simu_s2_timerange_i[1], color='tab:orange', alpha=0.1, label='Simu S2 Range') + ax2.legend() + + ax1.set_ylabel("Amplitude [PE/10ns]") + ax2.set_xlabel("Time [10ns]") + ax2.set_ylabel("Amplitude [PE/10ns]") + fig.show() + + # Zoom into S1 and S2 waveforms + print("Zooming into S1 and S2 respectively...") + fig, axs = plt.subplots(2, 2, figsize=(15, 8), dpi=150) + axs[0,0].plot(wf_data, label='Data', color='k', alpha=0.2) + axs[0,0].plot(wf_simu_s1, label='Simu S1', color='tab:blue') + axs[0,0].plot(wf_simu_s2, label='Simu S2', color='tab:orange') + axs[0,0].plot(wf_simu_others, label='Simu Others', color='tab:green') + axs[0,0].axvspan(matched_simu_s1_timerange_i[0],matched_simu_s1_timerange_i[1], color='tab:blue', alpha=0.1, label='Simu S1 Range') + axs[0,0].axvspan(matched_simu_s2_timerange_i[0],matched_simu_s2_timerange_i[1], color='tab:orange', alpha=0.1, label='Simu S2 Range') + axs[0,0].set_xlim(matched_simu_s1_timerange_i[0]-s1_ext_window_samples, + matched_simu_s1_timerange_i[1]+s1_ext_window_samples) + axs[0,0].set_ylabel("Amplitude [PE/10ns]") + axs[0,0].legend() + + axs[0,1].plot(wf_data, label='Data', color='k', alpha=0.2) + axs[0,1].plot(wf_simu_s1, label='Simu S1', color='tab:blue') + axs[0,1].plot(wf_simu_s2, label='Simu S2', color='tab:orange') + axs[0,1].plot(wf_simu_others, label='Simu Others', color='tab:green') + axs[0,1].axvspan(matched_simu_s1_timerange_i[0],matched_simu_s1_timerange_i[1], color='tab:blue', alpha=0.1, label='Simu S1 Range') + axs[0,1].axvspan(matched_simu_s2_timerange_i[0],matched_simu_s2_timerange_i[1], color='tab:orange', alpha=0.1, label='Simu S2 Range') + axs[0,1].set_xlim(matched_simu_s2_timerange_i[0]-s2_ext_window_samples, + matched_simu_s2_timerange_i[1]+s2_ext_window_samples) + axs[0,1].legend() + + axs[1,0].plot(wf_data, label='Data', color='k', alpha=0.2) + axs[1,0].plot(wf_salt_s1, label='Salt S1', color='b') + axs[1,0].plot(wf_salt_s2, label='Salt S2', color='r') + axs[1,0].plot(wf_salt_others, label='Salt Others', color='g') + axs[1,0].axvspan(matched_simu_s1_timerange_i[0],matched_simu_s1_timerange_i[1], color='tab:blue', alpha=0.1, label='Simu S1 Range') + axs[1,0].axvspan(matched_simu_s2_timerange_i[0],matched_simu_s2_timerange_i[1], color='tab:orange', alpha=0.1, label='Simu S2 Range') + axs[1,0].set_xlim(matched_simu_s1_timerange_i[0]-s1_ext_window_samples, + matched_simu_s1_timerange_i[1]+s1_ext_window_samples) + axs[1,0].set_xlabel("Time [10ns]") + axs[1,0].set_ylabel("Amplitude [PE/10ns]") + axs[1,0].legend() + + axs[1,1].plot(wf_data, label='Data', color='k', alpha=0.2) + axs[1,1].plot(wf_salt_s1, label='Salt S1', color='b') + axs[1,1].plot(wf_salt_s2, label='Salt S2', color='r') + axs[1,1].plot(wf_salt_others, label='Salt Others', color='g') + axs[1,1].axvspan(matched_simu_s1_timerange_i[0],matched_simu_s1_timerange_i[1], color='tab:blue', alpha=0.1, label='Simu S1 Range') + axs[1,1].axvspan(matched_simu_s2_timerange_i[0],matched_simu_s2_timerange_i[1], color='tab:orange', alpha=0.1, label='Simu S2 Range') + axs[1,1].set_xlim(matched_simu_s2_timerange_i[0]-s2_ext_window_samples, + matched_simu_s2_timerange_i[1]+s2_ext_window_samples) + axs[1,1].set_xlabel("Time [10ns]") + axs[1,1].legend() + fig.show() \ No newline at end of file