From 2af01a34da9d2f54d61d3cbe8d5cde0664d6d6b4 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Thu, 18 Apr 2024 06:03:54 -0700 Subject: [PATCH] Start getting recent stat changes into module --- bin/pycbc_live | 36 +++++++++++++----- pycbc/events/coinc_rate.py | 19 +++++++--- pycbc/events/stat.py | 78 ++++++++++++++++++++++---------------- 3 files changed, 86 insertions(+), 47 deletions(-) diff --git a/bin/pycbc_live b/bin/pycbc_live index 7cfb3af8290..72914485133 100755 --- a/bin/pycbc_live +++ b/bin/pycbc_live @@ -104,6 +104,7 @@ class LiveEventManager(object): self.padata = livepau.PAstroData(args.p_astro_spec, args.bank_file) self.use_date_prefix = args.day_hour_output_prefix self.ifar_upload_threshold = args.ifar_upload_threshold + self.pvalue_lookback_time = args.pvalue_lookback_time self.pvalue_livetime = args.pvalue_combination_livetime self.gracedb_server = args.gracedb_server self.gracedb_search = args.gracedb_search @@ -221,7 +222,8 @@ class LiveEventManager(object): self.data_readers[ifo], self.bank, template_id, - coinc_times + coinc_times, + lookback=self.pvalue_lookback_time ) if pvalue_info is None: continue @@ -802,6 +804,20 @@ class LiveEventManager(object): store_psd[ifo].save(fname, group='%s/psd' % ifo) +def check_max_length(args, waveforms): + """Check that the `--max-length` option is sufficient to accomodate the + longest template in the bank and the PSD estimation options. + """ + lengths = numpy.array([1.0 / wf.delta_f for wf in waveforms]) + psd_len = args.psd_segment_length * (args.psd_samples // 2 + 1) + max_length = max(lengths.max() + args.pvalue_lookback_time, psd_len) + if max_length > args.max_length: + raise ValueError( + '--max-length is too short for this template bank. ' + f'Use at least {max_length}.' + ) + + parser = argparse.ArgumentParser(description=__doc__) pycbc.waveform.bank.add_approximant_arg(parser) parser.add_argument('--verbose', action='store_true') @@ -962,6 +978,10 @@ parser.add_argument('--enable-background-estimation', default=False, action='sto parser.add_argument('--ifar-double-followup-threshold', type=float, required=True, help='Inverse-FAR threshold to followup double coincs with' 'additional detectors') +parser.add_argument('--pvalue-lookback-time', type=float, default=150, + metavar='SECONDS', + help='Lookback time for the calculation of the p-value in ' + 'followup detectors.') parser.add_argument('--pvalue-combination-livetime', type=float, required=True, help="Livetime used for p-value combination with followup " "detectors, in years") @@ -985,6 +1005,10 @@ parser.add_argument('--gracedb-labels', metavar='LABEL', nargs='+', parser.add_argument('--ifar-upload-threshold', type=float, required=True, help='Inverse-FAR threshold for uploading candidate ' 'triggers to GraceDB, in years.') +parser.add_argument('--coinc-window-pad', type=float, + help="Amount of time allowed to form a coincidence in " + "addition to the time of flight in seconds.", + default=0.002) parser.add_argument('--file-prefix', default='Live') parser.add_argument('--round-start-time', type=int, metavar='X', @@ -1106,13 +1130,10 @@ with ctx: print(e) exit() - maxlen = args.psd_segment_length * (args.psd_samples // 2 + 1) if evnt.rank > 0: bank.table.sort(order='mchirp') waveforms = list(bank[evnt.rank-1::evnt.size-1]) - lengths = numpy.array([1.0 / wf.delta_f for wf in waveforms]) - psd_len = args.psd_segment_length * (args.psd_samples // 2 + 1) - maxlen = max(lengths.max(), psd_len) + check_max_length(args, waveforms) mf = LiveBatchMatchedFilter(waveforms, args.snr_threshold, args.chisq_bins, sg_chisq, snr_abort_threshold=args.snr_abort_threshold, @@ -1134,11 +1155,8 @@ with ctx: # Initialize the data readers for all detectors. For rank 0, we need data # from all detectors, including the localization-only ones. For higher # ranks, we only need the detectors that can generate candidates. - if args.max_length is not None: - maxlen = args.max_length - maxlen = int(maxlen) data_reader = { - ifo: StrainBuffer.from_cli(ifo, args, maxlen) + ifo: StrainBuffer.from_cli(ifo, args) for ifo in (evnt.ifos if evnt.rank == 0 else evnt.trigg_ifos) } evnt.data_readers = data_reader diff --git a/pycbc/events/coinc_rate.py b/pycbc/events/coinc_rate.py index 30ffa14e546..a05646f050a 100644 --- a/pycbc/events/coinc_rate.py +++ b/pycbc/events/coinc_rate.py @@ -92,7 +92,7 @@ def combination_noise_rate(rates, slop): return numpy.exp(combination_noise_lograte(log_rates, slop)) -def combination_noise_lograte(log_rates, slop): +def combination_noise_lograte(log_rates, slop, dets=None): """ Calculate the expected rate of noise coincidences for a combination of detectors given log of single detector noise rates @@ -105,6 +105,9 @@ def combination_noise_lograte(log_rates, slop): slop: float time added to maximum time-of-flight between detectors to account for timing error + dets: dict + Key: ifo string, Value: pycbc.detector.Detector object + If not provided, will be created from ifo strings Returns ------- @@ -112,13 +115,15 @@ def combination_noise_lograte(log_rates, slop): Expected log coincidence rate in the combination, units Hz """ # multiply product of trigger rates by the overlap time - allowed_area = multiifo_noise_coincident_area(list(log_rates), slop) + allowed_area = multiifo_noise_coincident_area(list(log_rates), + slop, + dets=dets) # list(dict.values()) is python-3-proof rateprod = numpy.sum(list(log_rates.values()), axis=0) return numpy.log(allowed_area) + rateprod -def multiifo_noise_coincident_area(ifos, slop): +def multiifo_noise_coincident_area(ifos, slop, dets=None): """ Calculate the total extent of time offset between 2 detectors, or area of the 2d space of time offsets for 3 detectors, for @@ -131,6 +136,9 @@ def multiifo_noise_coincident_area(ifos, slop): list of interferometers slop: float extra time to add to maximum time-of-flight for timing error + dets: dict + Key: ifo string, Value: pycbc.detector.Detector object + If not provided, will be created from ifo strings Returns ------- @@ -138,9 +146,8 @@ def multiifo_noise_coincident_area(ifos, slop): area in units of seconds^(n_ifos-1) that coincident values can fall in """ # set up detector objects - dets = {} - for ifo in ifos: - dets[ifo] = pycbc.detector.Detector(ifo) + if dets is None: + dets = {ifo: pycbc.detector.Detector(ifo) for ifo in ifos} n_ifos = len(ifos) if n_ifos == 2: diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index a6c647bcb54..b81ae9a76d3 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -760,8 +760,10 @@ def coinc_lim_for_thresh( if not self.has_hist: self.get_hist() - fixed_stat = [b["snglstat"] for a, b in sngls_list if a != limifo][0] - s1 = thresh**2.0 - fixed_stat**2.0 + fixed_stat_sq = sum( + [b["snglstat"] ** 2 for a, b in sngls_list if a != limifo] + ) + s1 = thresh ** 2.0 - fixed_stat_sq # Assume best case scenario and use maximum signal rate s1 -= 2.0 * self.hist_max s1[s1 < 0] = 0 @@ -838,6 +840,9 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.kwargs.get("minimum_statistic_cutoff", -30.0) ) + # This will be used to keep track of the template number being used + self.curr_tnum = None + # Go through the keywords and add class information as needed: if self.kwargs["sensitive_volume"]: # Add network sensitivity beckmark @@ -850,7 +855,6 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): axis=0, ) self.benchmark_logvol = 3.0 * numpy.log(hl_net_med_sigma) - self.curr_tnum = None if self.kwargs["dq"]: # Reweight the noise rate by the dq reweighting factor @@ -959,11 +963,6 @@ def setup_segments(self, key): def find_dq_noise_rate(self, trigs, dq_state): """Get dq values for a specific ifo and dq states""" - try: - tnum = trigs.template_num - except AttributeError: - tnum = trigs["template_id"] - try: ifo = trigs.ifo except AttributeError: @@ -976,10 +975,10 @@ def find_dq_noise_rate(self, trigs, dq_state): if ifo in self.dq_rates_by_state: for i, st in enumerate(dq_state): - if isinstance(tnum, numpy.ndarray): - bin_name = self.dq_bin_by_tid[ifo][tnum[i]] + if isinstance(self.curr_tnum, numpy.ndarray): + bin_name = self.dq_bin_by_tid[ifo][self.curr_tnum[i]] else: - bin_name = self.dq_bin_by_tid[ifo][tnum] + bin_name = self.dq_bin_by_tid[ifo][self.curr_tnum] dq_val[i] = self.dq_rates_by_state[ifo][bin_name][st] return dq_val @@ -1084,17 +1083,17 @@ def find_fits(self, trigs): The thresh fit value(s) """ try: - tnum = trigs.template_num # exists if accessed via coinc_findtrigs ifo = trigs.ifo except AttributeError: - tnum = trigs["template_id"] # exists for SingleDetTriggers - assert len(self.ifos) == 1 - # Should be exactly one ifo provided - ifo = self.ifos[0] + # trigs will be like a dictionary, get the ifo information + ifo = trigs.get('ifo', None) + if ifo is None: + ifo = self.ifos[0] + assert ifo in self.ifos # fits_by_tid is a dictionary of dictionaries of arrays # indexed by ifo / coefficient name / template_id - alphai = self.fits_by_tid[ifo]["smoothed_fit_coeff"][tnum] - ratei = self.fits_by_tid[ifo]["smoothed_rate_above_thresh"][tnum] + alphai = self.fits_by_tid[ifo]["smoothed_fit_coeff"][self.curr_tnum] + ratei = self.fits_by_tid[ifo]["smoothed_rate_above_thresh"][self.curr_tnum] thresh = self.fits_by_tid[ifo]["thresh"] return alphai, ratei, thresh @@ -1155,6 +1154,15 @@ def lognoiserate(self, trigs): lognoisel: numpy.array Array of log noise rate density for each input trigger. """ + # What is the template number currently being used? + try: + # exists if accessed via coinc_findtrigs, this is a number + self.curr_tnum = trigs.template_num + except AttributeError: + # exists for SingleDetTriggers & pycbc_live get_coinc, + # this is a numpy array + self.curr_tnum = trigs["template_id"] + alphai, ratei, thresh = self.find_fits(trigs) sngl_stat = self.get_sngl_ranking(trigs) lognoisel = ( @@ -1230,14 +1238,6 @@ def single(self, trigs): if self.kwargs["sensitive_volume"]: # populate fields to allow sensitive volume factor calculation singles["sigmasq"] = trigs["sigmasq"][:] - try: - # exists if accessed via coinc_findtrigs - self.curr_tnum = trigs.template_num - except AttributeError: - # exists for SingleDetTriggers - self.curr_tnum = trigs["template_id"] - # Should only be one ifo fit file provided - assert len(self.ifos) == 1 # Store benchmark log volume as single-ifo information since # the ranking methods do not have access to template id singles["benchmark_logvol"] = self.benchmark_logvol[self.curr_tnum] @@ -1249,9 +1249,14 @@ def single(self, trigs): if self.kwargs["chirp_mass"]: from pycbc.conversions import mchirp_from_mass1_mass2 - self.curr_mchirp = mchirp_from_mass1_mass2( - trigs.param["mass1"], trigs.param["mass2"] - ) + try: + mass1 = trigs.param['mass1'] + mass2 = trigs.param['mass2'] + except AttributeError: + mass1 = trigs['mass1'] + mass2 = trigs['mass2'] + self.curr_mchirp = mchirp_from_mass1_mass2(mass1, mass2) + return numpy.array(singles, ndmin=1) def sensitive_volume_factor(self, sngls): @@ -1316,7 +1321,11 @@ def logsignalrate_shared(self, sngls_info): if self.kwargs["chirp_mass"]: # chirp mass reweighting - mchirp = min(self.curr_mchirp, self.mcm) + if isinstance(self.curr_mchirp, numpy.ndarray): + mchirp = numpy.minimum(self.curr_mchirp, self.mcm) + else: + # curr_mchirp will be a number + mchirp = min(self.curr_mchirp, self.mcm) sr_factor += numpy.log((mchirp / 20.0) ** (11.0 / 3.0)) if self.kwargs["kde"]: @@ -1393,8 +1402,11 @@ def rank_stat_coinc( # Basic noise rate: sum of noise rates multiplied by the # window they can form coincidences in ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_dict, kwargs["time_addition"] + sngl_dict, + kwargs["time_addition"], + dets=kwargs.get('dets', None), ) + ln_noise_rate -= self.benchmark_lograte # Basic option is not to have any signal-based assumptions, @@ -1407,7 +1419,9 @@ def rank_stat_coinc( # normalized # Extent of time-difference space occupied noise_twindow = coinc_rate.multiifo_noise_coincident_area( - self.hist_ifos, kwargs["time_addition"] + self.hist_ifos, + kwargs["time_addition"], + dets=kwargs.get('dets', None), ) # Volume is the allowed time difference window, multiplied by 2pi # for each phase difference dimension and by allowed range of SNR