Merge branch 'gwastro:master' into master

gwastro · Jun 22, 2024 · cbf447c · cbf447c
2 parents 68b6f26 + fa0c248
commit cbf447c
Show file tree

Hide file tree

Showing 28 changed files with 587 additions and 309 deletions.
diff --git a/bin/live/pycbc_live_combine_single_significance_fits b/bin/live/pycbc_live_combine_single_significance_fits
@@ -62,8 +62,10 @@ for f in args.trfits_files:
  bl = fit_f['bins_lower'][:]
  bu = fit_f['bins_upper'][:]
  sngl_rank = fit_f.attrs['sngl_ranking']
- fit_thresh = fit_f.attrs['fit_threshold']
- fit_func = fit_f.attrs['fit_function']
+ fit_thresh = {ifo: fit_f[ifo].attrs['fit_threshold']
+ for ifo in args.ifos}
+ fit_func = {ifo: fit_f[ifo].attrs['fit_function']
+ for ifo in args.ifos}
 
 # Now go back through the fit files and read the actual information. Skip the
 # files that have fit parameters inconsistent with what we found earlier.
@@ -80,9 +82,13 @@ for f in args.trfits_files:
  with h5py.File(f, 'r') as fits_f:
  # Check that the file uses the same setup as file 0, to make sure
  # all coefficients are comparable
+ new_fit_func = {ifo: fits_f[ifo].attrs['fit_function']
+ for ifo in args.ifos}
+ new_fit_thresh = {ifo: fits_f[ifo].attrs['fit_threshold']
+ for ifo in args.ifos}
  same_conf = (fits_f.attrs['sngl_ranking'] == sngl_rank
- and fits_f.attrs['fit_threshold'] == fit_thresh
- and fits_f.attrs['fit_function'] == fit_func
+ and new_fit_thresh == fit_thresh
+ and new_fit_func == fit_func
  and fits_f['bins_lower'].size == bl.size
  and all(fits_f['bins_lower'][:] == bl)
  and all(fits_f['bins_upper'][:] == bu))
@@ -99,7 +105,7 @@ for f in args.trfits_files:
  gps_last = 0
  gps_first = np.inf
  for ifo in args.ifos:
- if ifo not in fits_f:
+ if ifo not in fits_f or 'triggers' not in fits_f[ifo]:
  continue
  trig_times = fits_f[ifo]['triggers']['end_time'][:]
  gps_last = max(gps_last, trig_times.max())
@@ -143,7 +149,6 @@ cons_counts_out = {ifo: np.inf * np.ones(len(alphas_bin[ifo])) for ifo in args.i
 logging.info("Writing results")
 
 fout = h5py.File(args.output, 'w')
-fout.attrs['fit_threshold'] = fit_thresh
 fout.attrs['conservative_percentile'] = args.conservative_percentile
 fout.attrs['ifos'] = args.ifos
 fout['bins_edges'] = list(bl) + [bu[-1]]
@@ -152,6 +157,8 @@ fout['fits_dates'] = ad + start_time_n
 for ifo in args.ifos:
  logging.info(ifo)
  fout_ifo = fout.create_group(ifo)
+ fout_ifo.attrs['fit_threshold'] = fit_thresh[ifo]
+ fout_ifo.attrs['fit_function'] = fit_func[ifo]
  l_times = np.array(live_times[ifo])
  total_time = l_times.sum()
  fout_ifo.attrs['live_time'] = total_time

diff --git a/bin/live/pycbc_live_plot_combined_single_significance_fits b/bin/live/pycbc_live_plot_combined_single_significance_fits
@@ -142,7 +142,7 @@ for ifo in ifos:
  continue
 
  l_times = separate_times[ifo]
- with np.errstate(divide='ignore'):
+ with np.errstate(divide='ignore', invalid='ignore'):
  rate = counts / l_times
 
  ma = mean_alpha[ifo][i]

diff --git a/bin/live/pycbc_live_plot_single_significance_fits b/bin/live/pycbc_live_plot_single_significance_fits
@@ -71,17 +71,22 @@ with h5py.File(args.trigger_fits_file, 'r') as trfit_f:
  ifos = [k for k in trfit_f.keys() if not k.startswith('bins')]
 
  # Grab some info from the attributes
- sngl_ranking = trfit_f.attrs['sngl_ranking']
- fit_threshold = trfit_f.attrs['fit_threshold']
- fit_function = trfit_f.attrs['fit_function']
+ if trfit_f.attrs['ranking_statistic'] in ['quadsum','single_ranking_only']:
+ x_label = trfit_f.attrs['sngl_ranking']
+ else:
+ x_label = "Ranking statistic"
+ fit_threshold = {ifo: trfit_f[ifo].attrs['fit_threshold'] for ifo in ifos}
+ fit_function = {ifo: trfit_f[ifo].attrs['fit_function'] for ifo in ifos}
  analysis_date = trfit_f.attrs['analysis_date']
 
  # Get the triggers for each detector
  # (This is ones which passed the cuts in the fitting code)
  stats = {ifo: {} for ifo in ifos}
  durations = {ifo: {} for ifo in ifos}
  for ifo in ifos:
- stats[ifo] = trfit_f[ifo]['triggers'][sngl_ranking][:]
+ if 'triggers' not in trfit_f[ifo]:
+ continue
+ stats[ifo] = trfit_f[ifo]['triggers']['stat'][:]
  durations[ifo] = trfit_f[ifo]['triggers']['template_duration'][:]
  live_time = {ifo: trfit_f[ifo].attrs['live_time'] for ifo in ifos}
  alphas = {ifo: trfit_f[ifo]['fit_coeff'][:] for ifo in ifos}
@@ -124,7 +129,10 @@ for ifo in all_ifos:
  maxstat = stats[ifo].max()
  max_rate = 0
 
- plotbins = np.linspace(fit_threshold, 1.05 * maxstat, 400)
+ statrange = maxstat - max(stats[ifo].min(), fit_threshold[ifo])
+ plotmax = maxstat + statrange * 0.05
+
+ plotbins = np.linspace(fit_threshold[ifo], plotmax, 400)
 
  logging.info("Putting events into bins")
  event_bin = np.array([tbins[d] for d in durations[ifo]])
@@ -134,8 +142,26 @@ for ifo in all_ifos:
  binlabel = f"{lower:.3g} - {upper:.3g}"
 
  inbin = event_bin == bin_num
+ bin_prop = bin_num / len(tbins)
+ bin_colour = plt.get_cmap(args.colormap)(bin_prop)
+
  # Skip if there are no triggers in this bin in this IFO
- if not any(inbin):
+ if not any(inbin) or alphas[ifo][bin_num] == -1:
+ ax.plot(
+ [],
+ [],
+ linewidth=2,
+ color=bin_colour,
+ label=binlabel,
+ alpha=0.6
+ )
+ ax.plot(
+ [],
+ [],
+ "--",
+ color=bin_colour,
+ label="No triggers"
+ )
  continue
  binned_sngl_stats = stats[ifo][event_bin == bin_num]
 
@@ -147,30 +173,28 @@ for ifo in all_ifos:
  max_rate = max(max_rate, cum_rate[0])
 
  ecf = eval_cum_fit(
- fit_function,
+ fit_function[ifo],
  plotbins,
  alphas[ifo][bin_num],
- fit_threshold
+ fit_threshold[ifo]
  )
  cum_fit = counts[ifo][bin_num] / live_time[ifo] * ecf
 
- bin_prop = bin_num / len(tbins)
- bin_colour = plt.get_cmap(args.colormap)(bin_prop)
  ax.plot(edges[:-1], cum_rate, linewidth=2,
  color=bin_colour, label=binlabel, alpha=0.6)
  ax.plot(plotbins, cum_fit, "--", color=bin_colour,
  label=r"$\alpha = $%.2f" % alphas[ifo][bin_num])
  ax.semilogy()
  ax.grid()
  ax.set_xlim(
- fit_threshold if args.x_lim_lower is None else args.x_lim_lower,
- 1.05 * maxstat if args.x_lim_upper is None else args.x_lim_upper
+ fit_threshold[ifo] if args.x_lim_lower is None else args.x_lim_lower,
+ plotmax if args.x_lim_upper is None else args.x_lim_upper
  )
  ax.set_ylim(
  0.5 / live_time[ifo] if args.y_lim_lower is None else args.y_lim_lower,
  1.5 * max_rate if args.y_lim_upper is None else args.y_lim_upper
  )
- ax.set_xlabel(sngl_ranking)
+ ax.set_xlabel(x_label)
  ax.set_ylabel("Number of louder triggers per live time")
  title = f"{ifo} {analysis_date} trigger fits"
  ax.set_title(title)

diff --git a/bin/live/pycbc_live_single_significance_fits b/bin/live/pycbc_live_single_significance_fits
@@ -25,10 +25,11 @@ import h5py
 
 import pycbc
 from pycbc.bin_utils import IrregularBins
-from pycbc.events import cuts, trigger_fits as trstats
+from pycbc.events import cuts, trigger_fits as trstats, stat
 from pycbc.io import DictArray
 from pycbc.events import ranking
 from pycbc.events.coinc import cluster_over_time
+from pycbc.types import MultiDetOptionAction
 
 
 def duration_bins_from_cli(args):
@@ -75,6 +76,7 @@ parser.add_argument("--file-identifier", default="H1L1V1-Live",
  help="String required in filename to be considered for "
  "analysis. Default: 'H1L1V1-Live'.")
 parser.add_argument("--fit-function", default="exponential",
+ action=MultiDetOptionAction,
  choices=["exponential", "rayleigh", "power"],
  help="Functional form for the maximum likelihood fit. "
  "Choose from exponential, rayleigh or power. "
@@ -112,18 +114,20 @@ parser.add_argument("--prune-stat-threshold", type=float,
  help="Minimum --sngl-ranking value to consider a "
  "trigger for pruning.")
 parser.add_argument("--fit-threshold", type=float, default=5,
+ action=MultiDetOptionAction,
  help="Lower threshold used in fitting the triggers."
- "Default 5.")
+ "Default 5. Can be supplied as a single value, "
+ "or as a set of IFO:value pairs, e.g. H1:0:, L1:-1")
 parser.add_argument("--cluster", action='store_true',
  help="Only use maximum of the --sngl-ranking value "
  "from each file.")
 parser.add_argument("--output", required=True,
  help="File in which to save the output trigger fit "
  "parameters.")
-parser.add_argument("--sngl-ranking", default="newsnr",
-  choices=ranking.sngls_ranking_function_dict.keys(),
-  help="The single-detector trigger ranking to use.")
-
+stat.insert_statistic_option_group(
+ parser,
+ default_ranking_statistic='single_ranking_only'
+)
 cuts.insert_cuts_option_group(parser)
 
 args = parser.parse_args()
@@ -133,8 +137,11 @@ pycbc.init_logging(args.verbose)
 # Check input options
 
 # Pruning options are mutually required or not needed
-prune_options = [args.prune_loudest, args.prune_window,
- args.prune_stat_threshold]
+prune_options = [
+ args.prune_loudest is not None,
+ args.prune_window is not None,
+ args.prune_stat_threshold is not None
+]
 
 if any(prune_options) and not all(prune_options):
  parser.error("Require all or none of --prune-loudest, "
@@ -162,6 +169,9 @@ if args.duration_bin_end and \
  "--duration-bin-start, got "
  f"{args.duration_bin_end} and {args.duration_bin_start}")
 
+
+stat_kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords)
+
 duration_bin_edges = duration_bins_from_cli(args)
 logging.info("Duration bin edges: %s", duration_bin_edges)
 
@@ -177,13 +187,7 @@ args.template_cuts = args.template_cuts or []
 args.template_cuts.append(f"template_duration:{min(duration_bin_edges)}:lower")
 args.template_cuts.append(f"template_duration:{max(duration_bin_edges)}:upper_inc")
 
-# Efficiency saving: add SNR cut before any others as sngl_ranking can
-# only be less than SNR.
 args.trigger_cuts = args.trigger_cuts or []
-args.trigger_cuts.insert(0, f"snr:{args.fit_threshold}:lower_inc")
-
-# Cut triggers with sngl-ranking below threshold
-args.trigger_cuts.append(f"{args.sngl_ranking}:{args.fit_threshold}:lower_inc")
 
 logging.info("Setting up the cut dictionaries")
 trigger_cut_dict, template_cut_dict = cuts.ingest_cuts_option_group(args)
@@ -211,6 +215,9 @@ files = [f for f in os.listdir(date_directory)
 
 events = {}
 
+rank_method = {ifo: stat.get_statistic_from_opts(args, [ifo]) for ifo in args.ifos}
+
+logging.info("Processing %d files", len(files))
 for counter, filename in enumerate(files):
  if counter and counter % 1000 == 0:
  logging.info("Processed %d/%d files", counter, len(files))
@@ -284,10 +291,13 @@ for counter, filename in enumerate(files):
  for k in trigs_ifo.keys()}
 
  # Calculate the sngl_ranking values
- sngls_value = ranking.get_sngls_ranking_from_trigs(
- triggers_cut, args.sngl_ranking)
+ sds = rank_method[ifo].single(triggers_cut)
+ sngls_value = rank_method[ifo].rank_stat_single(
+ (ifo, sds),
+ **stat_kwargs
+ )
 
- triggers_cut[args.sngl_ranking] = sngls_value
+ triggers_cut['stat'] = sngls_value
 
  triggers_da = DictArray(data=triggers_cut)
 
@@ -330,7 +340,7 @@ for ifo in events:
  for bin_num in range(n_bins):
  inbin = event_bins[ifo] == bin_num
 
- binned_events = events[ifo].data[args.sngl_ranking][inbin]
+ binned_events = events[ifo].data['stat'][inbin]
  binned_event_times = events[ifo].data['end_time'][inbin]
 
  # Cluster triggers in time with the pruning window to ensure
@@ -396,17 +406,32 @@ for ifo in events:
  alphas[ifo][bin_num] = -1
  continue
 
- counts[ifo][bin_num] = np.count_nonzero(inbin)
+ stat_inbin = events[ifo].data['stat'][inbin]
+ counts[ifo][bin_num] = \
+ np.count_nonzero(stat_inbin > args.fit_threshold[ifo])
+
  alphas[ifo][bin_num], _ = trstats.fit_above_thresh(
- args.fit_function,
- events[ifo].data[args.sngl_ranking][inbin],
- args.fit_threshold
+ args.fit_function[ifo],
+ stat_inbin,
+ args.fit_threshold[ifo]
  )
 
 logging.info("Writing results")
 with h5py.File(args.output, 'w') as fout:
- for ifo in events:
+ for ifo in args.ifos:
  fout_ifo = fout.create_group(ifo)
+ fout_ifo.attrs['fit_function'] = args.fit_function[ifo]
+ fout_ifo.attrs['fit_threshold'] = args.fit_threshold[ifo]
+ if ifo not in events:
+ # There were no triggers, but we should still produce some
+ # information
+ fout_ifo['fit_coeff'] = -1 * np.zeros(n_bins)
+ fout_ifo['counts'] = np.zeros(n_bins)
+ fout_ifo.attrs['live_time'] = live_time[ifo]
+ fout_ifo.attrs['pruned_times'] = []
+ fout_ifo.attrs['n_pruned'] = 0
+ continue
+
  # Save the triggers we have used for the fits
  fout_ifo_trigs = fout_ifo.create_group('triggers')
  for key in events[ifo].data:
@@ -427,8 +452,7 @@ with h5py.File(args.output, 'w') as fout:
  fout.attrs['analysis_date'] = args.analysis_date
  fout.attrs['input'] = sys.argv
  fout.attrs['cuts'] = args.template_cuts + args.trigger_cuts
- fout.attrs['fit_function'] = args.fit_function
- fout.attrs['fit_threshold'] = args.fit_threshold
  fout.attrs['sngl_ranking'] = args.sngl_ranking
+ fout.attrs['ranking_statistic'] = args.ranking_statistic
 
 logging.info("Done")