From 1ebbc9ad53414bceb4a00ad25c22556dc1fe72e2 Mon Sep 17 00:00:00 2001 From: Lanqing Yuan Date: Wed, 27 Mar 2024 08:03:30 -0500 Subject: [PATCH] Rewritten matching events to enable event building efficiency study (#84) * move to development * put fv cut as an option now in load_peaks * removed useless plugins * deprecated old event matching * annotation * redefined matching * fixed typo * inner _touching_windows bug * deprecated load_events * draft for load events refurbished --- jobs/config.ini | 2 +- saltax/match/match.py | 198 +++++++++++++++++++++++++++++------------- saltax/match/utils.py | 87 +++++++++++++++++-- 3 files changed, 221 insertions(+), 66 deletions(-) diff --git a/jobs/config.ini b/jobs/config.ini index 126e8cc..e417d03 100644 --- a/jobs/config.ini +++ b/jobs/config.ini @@ -3,7 +3,7 @@ max_num_submit = 200 t_sleep = 1 [job] -container = xenonnt-2024.02.1.simg +container = xenonnt-development.simg # SE bootstrapped #runids = 47876,51907,47860,51905,51904,51906,51909,51911,51913,51939,47865,51910,49079,49077,53139,49432,52997,48693,48699,48692,49080,48698,49433,49028,53153,53167 # AmBe diff --git a/saltax/match/match.py b/saltax/match/match.py index 395494f..a2a777e 100644 --- a/saltax/match/match.py +++ b/saltax/match/match.py @@ -3,7 +3,7 @@ import strax -def filter_out_missing_s1_s2(truth, match): +def _filter_out_missing_s1_s2(truth, match): """ Filter out simulated events that have no S1 or S2 or both :param truth: truth from wfsim @@ -34,7 +34,7 @@ def filter_out_missing_s1_s2(truth, match): return truth[~bad_mask], match[~bad_mask] -def filter_out_multiple_s1_s2(truth, match): +def _filter_out_multiple_s1_s2(truth, match): """ Filter out simulated events that have multiple S1 or S2 reconstructed from wfsim. :param truth: truth from wfsim @@ -58,7 +58,7 @@ def filter_out_multiple_s1_s2(truth, match): return truth[~bad_mask], match[~bad_mask] -def filter_out_not_found(truth, match, s1s2=1): +def _filter_out_not_found(truth, match, s1s2=1): """ Filter out simulated events whose S1 are not found by pema. :param truth: truth from wfsim @@ -88,7 +88,7 @@ def filter_out_not_found(truth, match, s1s2=1): return truth[~bad_mask], match[~bad_mask] -def pair_events_to_filtered_truth(truth, events): +def _pair_events_to_filtered_truth(truth, events): """ Pair events to filtered truth. :param truth: filtered truth @@ -130,7 +130,7 @@ def pair_events_to_filtered_truth(truth, events): return matched_to -def pair_events_to_matched_simu(matched_simu, events): +def _pair_events_to_matched_simu(matched_simu, events): """ Pair salted events to matched simulation events. :param matched_simu: simulation events already matched to truth @@ -160,31 +160,7 @@ def pair_events_to_matched_simu(matched_simu, events): return matched_to -def pair_peaks_to_matched_simu(matched_simu, peaks, safeguard=0): - """ - Pair salted peaks to simulation peaks who have been matched to truth. - :param matched_simu: simulation peaks already matched to truth - :param peaks: peaks from saltax after reconstruction - :param safeguard: extension of time range to consider as matched, as a workaround for timing problem in wfsim s2 - :return: ind_simu_matched_to_salt: the index of matched simulation peaks for each salted peak - :return: ind_salt_matched_to_simu: the index of matched salted peaks for each simulation peak - """ - windows = strax.touching_windows(peaks, - matched_simu, - window=safeguard) - windows_length = windows[:,1] - windows[:,0] - simu_matched_to_salt_mask = windows_length == 1 - print("Filter out %s percent of simulation due to multiple or no matched sprinkled \ - peaks"%(np.sum(~simu_matched_to_salt_mask)/len(matched_simu)*100)) - - matched_simu = matched_simu[simu_matched_to_salt_mask] - ind_simu_matched_to_salt = np.where(simu_matched_to_salt_mask)[0] - ind_salt_matched_to_simu = windows[simu_matched_to_salt_mask][:,0] - - return ind_simu_matched_to_salt, ind_salt_matched_to_simu - - -def pair_salt_to_simu_events(truth, match, events_simu, events_salt): +def _pair_salt_to_simu_events(truth, match, events_simu, events_salt): """ Filter out bad simulation truth and then pair salted events to matched simulation events. :param truth: filtered truth @@ -193,42 +169,23 @@ def pair_salt_to_simu_events(truth, match, events_simu, events_salt): :param events_salt: events from saltax after reconstruction :return: ind_salt_matched_to_simu, ind_simu_matched_to_truth, truth_filtered, match_filtered """ - truth_filtered, match_filtered = filter_out_missing_s1_s2(truth, match) - truth_filtered, match_filtered = filter_out_multiple_s1_s2(truth_filtered, match_filtered) - truth_filtered, match_filtered = filter_out_not_found(truth_filtered, match_filtered) + truth_filtered, match_filtered = _filter_out_missing_s1_s2(truth, match) + truth_filtered, match_filtered = _filter_out_multiple_s1_s2(truth_filtered, match_filtered) + truth_filtered, match_filtered = _filter_out_not_found(truth_filtered, match_filtered) - ind_simu_matched_to_truth = pair_events_to_filtered_truth(truth_filtered, events_simu) + ind_simu_matched_to_truth = _pair_events_to_filtered_truth(truth_filtered, events_simu) events_simu_matched_to_truth = events_simu[ind_simu_matched_to_truth[ind_simu_matched_to_truth>=0]] - ind_salt_matched_to_simu = pair_events_to_matched_simu(events_simu_matched_to_truth, + ind_salt_matched_to_simu = _pair_events_to_matched_simu(events_simu_matched_to_truth, events_salt) return ind_salt_matched_to_simu, ind_simu_matched_to_truth, truth_filtered, match_filtered -def pair_salt_to_simu_peaks(truth, match, peaks_simu, peaks_salt): - """ - Filter out bad simulation truth and then pair salted events to matched simulation events. - :param truth: filtered truth - :param match: match_acceptance_extended from pema - :param peaks_simu: peaks from wfsim - :param peaks_salt: peaks from saltax after reconstruction - :return: ind_salt_matched_to_simu, ind_simu_matched_to_truth - """ - ind_simu_matched_to_truth = match['matched_to'] - peaks_simu_matched_to_truth = peaks_simu[ind_simu_matched_to_truth] - - (_ind_simu_matched_to_truth, - ind_salt_matched_to_simu) = pair_peaks_to_matched_simu(peaks_simu_matched_to_truth, - peaks_salt) - ind_simu_matched_to_truth = ind_simu_matched_to_truth[_ind_simu_matched_to_truth] - - return ind_salt_matched_to_simu, ind_simu_matched_to_truth - - -def match_events(truth, match, events_simu, events_salt): +def match_events_deprecated(truth, match, events_simu, events_salt): """ - Match salted events to simulation events. + Match salted events to simulation events. This function has been deprecated because it cannot track + event building efficiency in a proper way. :param truth: truth from wfsim :param match: match_acceptance_extended from pema :param events_simu: event_info from wfsim @@ -237,7 +194,7 @@ def match_events(truth, match, events_simu, events_salt): """ ind_salt_matched_to_simu, \ ind_simu_matched_to_truth, \ - _, _ = pair_salt_to_simu_events(truth, match, events_simu, events_salt) + _, _ = _pair_salt_to_simu_events(truth, match, events_simu, events_salt) events_simu_matched_to_truth = events_simu[ind_simu_matched_to_truth[ind_simu_matched_to_truth>=0]] events_salt_matched_to_simu = events_salt[ind_salt_matched_to_simu[ind_salt_matched_to_simu>=0]] @@ -245,6 +202,87 @@ def match_events(truth, match, events_simu, events_salt): return events_salt_matched_to_simu, events_simu_matched_to_salt + +def match_events(events_simu, events_salt): + """ + Match salted events to simulation events based on event or S1-S2 timing, without checking + simulation truth. + The procedures are: + 1. Filter out events_simu which is missing S1. Then the rest events_simu is like 'truth' + without ambience interference. Those events_simu are called events_simu_filtered. + 2. Find indices of events_salt whose time range overlaps with events_simu_filtered: + ind_salt_event_found. Those events_salt[ind_salt_event_found] are called + events_salt_event_found. + 3. Find indcies of events_simu_filtered whose time range overlaps with events_salt_event_found: + ind_simu_event_found. Those events_simu_filtered[ind_simu_event_found] are called + events_simu_event_found. + 4. Find indcies of events_salt whose S1 time range overlaps with events_simu_filtered's S1 time + range: ind_salt_s1_found. Those events_salt[ind_salt_s1_found] are called events_salt_s1_found. + 5. Find indcies of events_simu_filtered whose S1 time range overlaps with events_salt_s1_found: + ind_simu_s1_found. Those events_simu_filtered[ind_simu_s1_found] are called events_simu_s1_found. + 6. Find indcies of events_salt whose S2 time range overlaps with events_simu_filtered's S2 time + range: ind_salt_s2_found. Those events_salt[ind_salt_s2_found] are called events_salt_s2_found. + 7. Find indcies of events_simu_filtered whose S2 time range overlaps with events_salt_s2_found: + ind_simu_s2_found. Those events_simu_filtered[ind_simu_s2_found] are called events_simu_s2_found. + :param events_simu: event_info from wfsim + :param events_salt: event_info from saltax + :return: events_simu_filtered, + ind_salt_event_found, ind_simu_event_found, ind_simu_event_lost, ind_simu_event_split, + ind_salt_s1_found, ind_simu_s1_found, + ind_salt_s2_found, ind_simu_s2_found + """ + # Step 1. + # Filter out events_simu which is missing S1 + events_simu_filtered = events_simu[events_simu['s1_time']>=0] + + # Step 2 & 3. + # Find indices of events_salt whose time range overlaps with events_simu_filtered + event_touching_windows = strax.touching_windows(events_salt, events_simu_filtered) + event_windows_length = event_touching_windows[:,1] - event_touching_windows[:,0] + # If windows_length is not 1, the sprinkled event is either split or not found + mask_simu_event_found = event_windows_length == 1 + mask_simu_event_lost = event_windows_length == 0 + mask_simu_event_split = event_windows_length >= 2 + ind_simu_event_found = np.where(mask_simu_event_found)[0] + ind_simu_event_lost = np.where(mask_simu_event_lost)[0] + ind_simu_event_split = np.where(mask_simu_event_split)[0] + # Find indcies of events_salt whose event time range overlaps with events_simu_filtered + ind_salt_event_found = event_touching_windows[mask_simu_event_found][:,0] + + # Step 4 & 5. + # Assumed s1 times has been sorted! It should be a safe assumption because their events are sorted. + s1_touching_windows = strax.processing.general._touching_windows( + events_salt["s1_time"], + events_salt["s1_endtime"], + events_simu_filtered["s1_time"], + events_simu_filtered["s1_endtime"], + ) + s1_windows_length = s1_touching_windows[:,1] - s1_touching_windows[:,0] + mask_simu_s1_found = s1_windows_length == 1 + ind_simu_s1_found = np.where(mask_simu_s1_found)[0] + ind_salt_s1_found = s1_touching_windows[mask_simu_s1_found][:,0] + + # Step 6 & 7. + # Assumed s2 times has been sorted! It should be a safe assumption because their events are sorted. + s2_touching_windows = strax.processing.general._touching_windows( + events_salt["s2_time"], + events_salt["s2_endtime"], + events_simu_filtered["s2_time"], + events_simu_filtered["s2_endtime"], + ) + s2_windows_length = s2_touching_windows[:,1] - s2_touching_windows[:,0] + mask_simu_s2_found = s2_windows_length == 1 + ind_simu_s2_found = np.where(mask_simu_s2_found)[0] + ind_salt_s2_found = s2_touching_windows[mask_simu_s2_found][:,0] + + return ( + events_simu_filtered, + ind_salt_event_found, ind_simu_event_found, ind_simu_event_lost, ind_simu_event_split, + ind_salt_s1_found, ind_simu_s1_found, + ind_salt_s2_found, ind_simu_s2_found + ) + + def match_peaks(truth, match, peaks_simu, peaks_salt): """ Match salted peaks to simulation peaks. @@ -267,3 +305,47 @@ def match_peaks(truth, match, peaks_simu, peaks_salt): peaks_simu_matched_to_salt = peaks_simu_matched_to_truth[ind_salt_matched_to_simu>=0] return peaks_salt_matched_to_simu, peaks_simu_matched_to_salt + + +def pair_salt_to_simu_peaks(truth, match, peaks_simu, peaks_salt): + """ + Filter out bad simulation truth and then pair salted events to matched simulation events. + :param truth: filtered truth + :param match: match_acceptance_extended from pema + :param peaks_simu: peaks from wfsim + :param peaks_salt: peaks from saltax after reconstruction + :return: ind_salt_matched_to_simu, ind_simu_matched_to_truth + """ + ind_simu_matched_to_truth = match['matched_to'] + peaks_simu_matched_to_truth = peaks_simu[ind_simu_matched_to_truth] + + (_ind_simu_matched_to_truth, + ind_salt_matched_to_simu) = pair_peaks_to_matched_simu(peaks_simu_matched_to_truth, + peaks_salt) + ind_simu_matched_to_truth = ind_simu_matched_to_truth[_ind_simu_matched_to_truth] + + return ind_salt_matched_to_simu, ind_simu_matched_to_truth + + +def pair_peaks_to_matched_simu(matched_simu, peaks, safeguard=0): + """ + Pair salted peaks to simulation peaks who have been matched to truth. + :param matched_simu: simulation peaks already matched to truth + :param peaks: peaks from saltax after reconstruction + :param safeguard: extension of time range to consider as matched, as a workaround for timing problem in wfsim s2 + :return: ind_simu_matched_to_salt: the index of matched simulation peaks for each salted peak + :return: ind_salt_matched_to_simu: the index of matched salted peaks for each simulation peak + """ + windows = strax.touching_windows(peaks, + matched_simu, + window=safeguard) + windows_length = windows[:,1] - windows[:,0] + simu_matched_to_salt_mask = windows_length == 1 + print("Filter out %s percent of simulation due to multiple or no matched sprinkled \ + peaks"%(np.sum(~simu_matched_to_salt_mask)/len(matched_simu)*100)) + + matched_simu = matched_simu[simu_matched_to_salt_mask] + ind_simu_matched_to_salt = np.where(simu_matched_to_salt_mask)[0] + ind_salt_matched_to_simu = windows[simu_matched_to_salt_mask][:,0] + + return ind_simu_matched_to_salt, ind_salt_matched_to_simu diff --git a/saltax/match/utils.py b/saltax/match/utils.py index 1901119..6c39c5e 100644 --- a/saltax/match/utils.py +++ b/saltax/match/utils.py @@ -60,14 +60,16 @@ 'cut_cs2_area_fraction_top',]) -def load_peaks(runs, st_salt, st_simu, plugins=('peak_basics', 'peak_positions', - 'peak_shadow', 'peak_ambience', - 'cut_se_peaks')): +def load_peaks(runs, st_salt, st_simu, + plugins=('peak_basics', 'peak_positions_mlp'), + truth_fv_cut=True): """ Load peaks from the runs and do basic filtering suggeted by saltax.match_peaks :param runs: list of runs. :param st_salt: saltax context for salt mode :param st_simu: saltax context for simu mode + :param plugins: plugins to be loaded, default to ('peak_basics', 'peak_positions_mlp') + :param truth_fv_cut: whether to apply truth FV cut, default to True :return: peaks_simu: peaks related plugins from simulated dataset :return: peaks_salt: peaks related plugins from sprinkled dataset :return: truth: truth information in simulation @@ -86,10 +88,14 @@ def load_peaks(runs, st_salt, st_simu, plugins=('peak_basics', 'peak_positions', match_i = st_simu.get_array(run, 'match_acceptance_extended', progress_bar=False) # TODO: Ugly hardcoding for FV cut, need to be fixed - peaks_salt_matched_to_simu_i, \ - peaks_simu_matched_to_salt_i = saltax.match_peaks(truth_i[(truth_i['z']<-13)&(truth_i['z']>-145)&(truth_i['x']**2+truth_i['y']**2<64**2)], - match_i[(truth_i['z']<-13)&(truth_i['z']>-145)&(truth_i['x']**2+truth_i['y']**2<64**2)], - peaks_simu_i, peaks_salt_i) + if truth_fv_cut: + peaks_salt_matched_to_simu_i, \ + peaks_simu_matched_to_salt_i = saltax.match_peaks(truth_i[(truth_i['z']<-13)&(truth_i['z']>-145)&(truth_i['x']**2+truth_i['y']**2<64**2)], + match_i[(truth_i['z']<-13)&(truth_i['z']>-145)&(truth_i['x']**2+truth_i['y']**2<64**2)], + peaks_simu_i, peaks_salt_i) + else: + peaks_salt_matched_to_simu_i, \ + peaks_simu_matched_to_salt_i = saltax.match_peaks(truth_i, match_i, peaks_simu_i, peaks_salt_i) if i==0: peaks_simu = peaks_simu_i @@ -112,6 +118,73 @@ def load_peaks(runs, st_salt, st_simu, plugins=('peak_basics', 'peak_positions', def load_events(runs, st_salt, st_simu, plugins=('event_info', 'cuts_basic')): + """ + Load events from the runs and do basic filtering suggeted by saltax.match_events + :param runs: list of runs. + :param st_salt: saltax context for salt mode + :param st_simu: saltax context for simu mode + :return: events_simu: events from simulated dataset, filtered out those who miss S1 + :return: events_salt: events from sprinkled dataset + :return inds_dict: dictionary of indices of events from sprinkled or filtered simulated dataset, + regarding matching events or s1 or s2 + """ + # Initialize the dictionary to store the indices + inds_dict = { + "ind_salt_event_found": np.array([], dtype=np.int32), + "ind_salt_s1_found": np.array([], dtype=np.int32), + "ind_salt_s2_found": np.array([], dtype=np.int32), + "ind_simu_event_found": np.array([], dtype=np.int32), + "ind_simu_s1_found": np.array([], dtype=np.int32), + "ind_simu_s2_found": np.array([], dtype=np.int32) + } + + for i, run in enumerate(runs): + print("Loading run %s"%(run)) + + # Load plugins for both salt and simu + events_simu_i = st_simu.get_array(run, plugins, progress_bar=False) + events_salt_i = st_salt.get_array(run, plugins, progress_bar=False) + + # Get matching result + ( + events_simu_filtered_i, + ind_salt_event_found_i, ind_simu_event_found_i, ind_simu_event_lost_i, ind_simu_event_split_i, + ind_salt_s1_found_i, ind_simu_s1_found_i, + ind_salt_s2_found_i, ind_simu_s2_found_i + ) = saltax.match_events(events_simu_i, events_salt_i) + + # Load the indices into the dictionary + inds_dict["ind_salt_event_found"] = np.concatenate( + (inds_dict["ind_salt_event_found"], ind_salt_event_found_i) + ) + inds_dict["ind_salt_s1_found"] = np.concatenate( + (inds_dict["ind_salt_s1_found"], ind_salt_s1_found_i) + ) + inds_dict["ind_salt_s2_found"] = np.concatenate( + (inds_dict["ind_salt_s2_found"], ind_salt_s2_found_i) + ) + inds_dict["ind_simu_event_found"] = np.concatenate( + (inds_dict["ind_simu_event_found"], ind_simu_event_found_i) + ) + inds_dict["ind_simu_s1_found"] = np.concatenate( + (inds_dict["ind_simu_s1_found"], ind_simu_s1_found_i) + ) + inds_dict["ind_simu_s2_found"] = np.concatenate( + (inds_dict["ind_simu_s2_found"], ind_simu_s2_found_i) + ) + + # Concatenate the events + if i==0: + events_simu = events_simu_filtered_i + events_salt = events_salt_i + else: + events_simu = np.concatenate((events_simu, events_simu_filtered_i)) + events_salt = np.concatenate((events_salt, events_salt_i)) + + return events_simu, events_salt, inds_dict + + +def load_events_deprecated(runs, st_salt, st_simu, plugins=('event_info', 'cuts_basic')): """ Load events from the runs and do basic filtering suggeted by saltax.match_events :param runs: list of runs.