diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a9be169eb..46eb3ae2e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,31 @@ +v2.2 (2024-10-22) +----------------- + +New Features +^^^^^^^^^^^^ + +- Add a compute_rms function to LombScarglePowerspectrum (`#828 `__) +- Introduced FITSReader class for lazy-loading of event lists (`#834 `__) +- implementation of the shift-and-add technique for QPOs and other varying power spectral features (`#849 `__) + + +Bug Fixes +^^^^^^^^^ + +- The ``fold_events`` function now checks if the keyword arguments (`kwargs`) are in the list of optional parameters. + If any unidentified keys are present, it raises a `ValueError`. + This fix ensures that the function only accepts valid optional parameters and provides a clear error message for unsupported keys. (`#837 `__) + + +Internal Changes +^^^^^^^^^^^^^^^^ + +- Eliminated runtime dependency on setuptools (`#852 `__) +- Moved configuration to pyproject.toml as recommended by PEP 621 (`#842 `__) +- Added pre-commit hooks in ``pre-commit-config.yaml`` (`#847 `__) +- Improved main page of the documentation (`#748 `__) + + v2.1 (2024-05-29) ----------------- diff --git a/docs/changes/748.docs.rst b/docs/changes/748.docs.rst deleted file mode 100644 index 07c169ad5..000000000 --- a/docs/changes/748.docs.rst +++ /dev/null @@ -1 +0,0 @@ -Improved main page of the documentation \ No newline at end of file diff --git a/docs/changes/828.feature.rst b/docs/changes/828.feature.rst deleted file mode 100644 index e2f2fffe5..000000000 --- a/docs/changes/828.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Add a compute_rms function to LombScarglePowerspectrum diff --git a/docs/changes/837.bugfix.rst b/docs/changes/837.bugfix.rst deleted file mode 100644 index bdebe104b..000000000 --- a/docs/changes/837.bugfix.rst +++ /dev/null @@ -1,3 +0,0 @@ -The ``fold_events`` function now checks if the keyword arguments (`kwargs`) are in the list of optional parameters. -If any unidentified keys are present, it raises a `ValueError`. -This fix ensures that the function only accepts valid optional parameters and provides a clear error message for unsupported keys. diff --git a/docs/changes/842.trivial.rst b/docs/changes/842.trivial.rst deleted file mode 100644 index 978490481..000000000 --- a/docs/changes/842.trivial.rst +++ /dev/null @@ -1 +0,0 @@ -Moved configuration to pyproject.toml as recommended by PEP 621 diff --git a/docs/changes/847.trivial.rst b/docs/changes/847.trivial.rst deleted file mode 100644 index 1c8c5e9bf..000000000 --- a/docs/changes/847.trivial.rst +++ /dev/null @@ -1 +0,0 @@ -Added pre-commit hooks in ``pre-commit-config.yaml`` \ No newline at end of file diff --git a/docs/changes/849.feature.rst b/docs/changes/849.feature.rst deleted file mode 100644 index e5880abe5..000000000 --- a/docs/changes/849.feature.rst +++ /dev/null @@ -1,2 +0,0 @@ -implementation of the shift-and-add technique for QPOs and other varying power spectral features - diff --git a/docs/index.rst b/docs/index.rst index 14034087d..052eb9303 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -308,6 +308,7 @@ Advanced :maxdepth: 2 timeseries + largedata api Additional information diff --git a/docs/largedata.rst b/docs/largedata.rst new file mode 100644 index 000000000..eb2546188 --- /dev/null +++ b/docs/largedata.rst @@ -0,0 +1,7 @@ +Working with large data sets +**************************** + +.. toctree:: + :maxdepth: 2 + + notebooks/Performance/Dealing with large data files.ipynb diff --git a/docs/notebooks b/docs/notebooks index f3210ebe1..43ba05942 160000 --- a/docs/notebooks +++ b/docs/notebooks @@ -1 +1 @@ -Subproject commit f3210ebe12e48fd17c89f10c6f6b61e5ef733c69 +Subproject commit 43ba0594276c66e27a877488ec333b5c3e0b56b3 diff --git a/stingray/events.py b/stingray/events.py index c59ec49cf..bd7881107 100644 --- a/stingray/events.py +++ b/stingray/events.py @@ -14,7 +14,8 @@ from .base import StingrayTimeseries from .filters import get_deadtime_mask from .gti import generate_indices_of_boundaries -from .io import load_events_and_gtis, pi_to_energy +from .io import pi_to_energy, get_file_extension +from .io import FITSTimeseriesReader from .lightcurve import Lightcurve from .utils import simon, njit from .utils import histogram @@ -620,28 +621,18 @@ def read(cls, filename, fmt=None, rmf_file=None, **kwargs): ev: :class:`EventList` object The :class:`EventList` object reconstructed from file """ - + if fmt is None: + for fits_ext in ["fits", "evt"]: + if fits_ext in get_file_extension(filename).lower(): + fmt = "hea" + break if fmt is not None and fmt.lower() in ("hea", "ogip"): - evtdata = load_events_and_gtis(filename, **kwargs) - - evt = EventList( - time=evtdata.ev_list, - gti=evtdata.gti_list, - pi=evtdata.pi_list, - energy=evtdata.energy_list, - mjdref=evtdata.mjdref, - instr=evtdata.instr, - mission=evtdata.mission, - header=evtdata.header, - detector_id=evtdata.detector_id, - ephem=evtdata.ephem, - timeref=evtdata.timeref, - timesys=evtdata.timesys, - ) - if "additional_columns" in kwargs: - for key in evtdata.additional_data: - if not hasattr(evt, key.lower()): - setattr(evt, key.lower(), evtdata.additional_data[key]) + additional_columns = kwargs.pop("additional_columns", None) + + evt = FITSTimeseriesReader( + filename, output_class=EventList, additional_columns=additional_columns + )[:] + if rmf_file is not None: evt.convert_pi_to_energy(rmf_file) return evt diff --git a/stingray/fourier.py b/stingray/fourier.py index 8c9aff1df..3e88527c9 100644 --- a/stingray/fourier.py +++ b/stingray/fourier.py @@ -2045,7 +2045,7 @@ def local_show_progress(a): ft1 = fft(flux1) ft2 = fft(flux2) - # Calculate the sum of each light curve, to calculate the mean + # Calculate the sum of each light curve chunk, to calculate the mean n_ph1 = flux1.sum() n_ph2 = flux2.sum() n_ph = np.sqrt(n_ph1 * n_ph2) diff --git a/stingray/gti.py b/stingray/gti.py index 6a0648f13..52e368ecf 100644 --- a/stingray/gti.py +++ b/stingray/gti.py @@ -36,6 +36,9 @@ "gti_border_bins", "generate_indices_of_segment_boundaries_unbinned", "generate_indices_of_segment_boundaries_binned", + "split_gtis_by_exposure", + "split_gtis_at_index", + "find_large_bad_time_intervals", ] logger = setup_logger() @@ -270,6 +273,8 @@ def get_gti_from_all_extensions(lchdulist, accepted_gtistrings=["GTI"], det_numb ... det_numbers=[5]) >>> assert np.allclose(gti, [[200, 250]]) """ + if isinstance(lchdulist, str): + lchdulist = fits.open(lchdulist) acc_gti_strs = copy.deepcopy(accepted_gtistrings) if det_numbers is not None: for i in det_numbers: @@ -1687,3 +1692,158 @@ def generate_indices_of_segment_boundaries_binned(times, gti, segment_size, dt=N dt = 0 for idx0, idx1 in zip(startidx, stopidx): yield times[idx0] - dt / 2, times[min(idx1, times.size - 1)] - dt / 2, idx0, idx1 + + +def split_gtis_at_indices(gtis, index_list): + """Split a GTI list at the given indices, creating multiple GTI lists. + + Parameters + ---------- + gtis : 2-d float array + List of GTIs of the form ``[[gti0_0, gti0_1], [gti1_0, gti1_1], ...]`` + index_list : int or array-like + Index or list of indices at which to split the GTIs + + Returns + ------- + gti_lists : list of 2-d float arrays + List of GTI lists, split at the given indices + + Examples + -------- + >>> gtis = [[0, 30], [50, 60], [80, 90]] + >>> new_gtis = split_gtis_at_indices(gtis, 1) + >>> assert np.allclose(new_gtis[0], [[0, 30]]) + >>> assert np.allclose(new_gtis[1], [[50, 60], [80, 90]]) + """ + gtis = np.asanyarray(gtis) + if not isinstance(index_list, Iterable): + index_list = [index_list] + previous_idx = 0 + gti_lists = [] + if index_list[0] == 0: + index_list = index_list[1:] + for idx in index_list: + gti_lists.append(gtis[previous_idx:idx, :]) + previous_idx = idx + if index_list[-1] != -1 and index_list[-1] <= gtis[:, 0].size - 1: + gti_lists.append(gtis[previous_idx:, :]) + + return gti_lists + + +def find_large_bad_time_intervals(gtis, bti_length_limit=86400): + """Find large bad time intervals (BTIs) in a list of GTIs, and split the GTI list accordingly. + + Parameters + ---------- + gtis : 2-d float array + List of GTIs of the form ``[[gti0_0, gti0_1], [gti1_0, gti1_1], ...]`` + bti_length_limit : float + Maximum length of a BTI. If a BTI is longer than this, an edge will be + returned at the midpoint of the BTI. + + Returns + ------- + bad_interval_midpoints : list of float + List of midpoints of large bad time intervals + + Examples + -------- + >>> gtis = [[0, 30], [86450, 86460], [86480, 86490]] + >>> bad_interval_midpoints = find_large_bad_time_intervals(gtis) + >>> assert np.allclose(bad_interval_midpoints, [43240]) + """ + gtis = np.asanyarray(gtis) + bad_interval_midpoints = [] + # Check for compulsory edges + last_edge = gtis[0, 0] + for g in gtis: + if g[0] - last_edge > bti_length_limit: + logger.info(f"Detected large bad time interval between {g[0]} and {last_edge}") + bad_interval_midpoints.append((g[0] + last_edge) / 2) + last_edge = g[1] + + return bad_interval_midpoints + + +def split_gtis_by_exposure(gtis, exposure_per_chunk, new_interval_if_gti_sep=None): + """Split a list of GTIs into smaller GTI lists of a given total (approximate) exposure. + + Parameters + ---------- + gtis : 2-d float array + List of GTIs of the form ``[[gti0_0, gti0_1], [gti1_0, gti1_1], ...]`` + exposure_per_chunk : float + Total exposure of each chunk, in seconds + + Other Parameters + ---------------- + new_interval_if_gti_sep : float + If the GTIs are separated by more than this time, split the observation in two. + + Returns + ------- + gti_list : list of 2-d float arrays + List of GTI lists, split into chunks of the given exposure / separated by more + than the given limit separation + + Examples + -------- + >>> gtis = [[0, 30], [86450, 86460]] + >>> new_gtis = split_gtis_by_exposure(gtis, 400, new_interval_if_gti_sep=86400) + >>> assert np.allclose(new_gtis[0], [[0, 30]]) + >>> assert np.allclose(new_gtis[1], [[86450, 86460]]) + >>> gtis = [[0, 30], [40, 70], [90, 120], [130, 160]] + >>> new_gtis = split_gtis_by_exposure(gtis, 60) + >>> assert np.allclose(new_gtis[0], [[0, 30], [40, 70]]) + >>> assert np.allclose(new_gtis[1], [[90, 120], [130, 160]]) + + """ + gtis = np.asanyarray(gtis) + rough_total_exposure = np.sum(np.diff(gtis, axis=1)) + compulsory_edges = [] + if new_interval_if_gti_sep is not None: + compulsory_edges = find_large_bad_time_intervals(gtis, new_interval_if_gti_sep) + + base_gti_list = split_gtis_at_indices(gtis, np.searchsorted(gtis[:, 1], compulsory_edges)) + final_gti_list = [] + for local_gtis in base_gti_list: + local_split_gtis = split_gtis_by_exposure(local_gtis, exposure_per_chunk) + final_gti_list.extend(local_split_gtis) + return final_gti_list + + n_intervals = int(np.rint(rough_total_exposure / exposure_per_chunk)) + + if n_intervals <= 1: + return np.asarray([gtis]) + + if len(gtis) <= n_intervals: + new_gtis = [] + for g in gtis: + if g[1] - g[0] > exposure_per_chunk: + new_edges = np.arange(g[0], g[1], exposure_per_chunk) + if new_edges[-1] < g[1]: + new_edges = np.append(new_edges, g[1]) + + new_gtis.extend([[ed0, ed1] for ed0, ed1 in zip(new_edges[:-1], new_edges[1:])]) + else: + new_gtis.append(g) + gtis = np.asarray(new_gtis) + + exposure_edges = [] + last_exposure = 0 + for g in gtis: + exposure_edges.append(last_exposure) + last_exposure += g[1] - g[0] + total_exposure = last_exposure + + exposure_edges = np.asarray(exposure_edges) + + exposure_per_interval = total_exposure / n_intervals + exposure_intervals = np.arange(0, total_exposure + exposure_per_interval, exposure_per_interval) + + index_list = np.searchsorted(exposure_edges, exposure_intervals) + + vals = split_gtis_at_indices(gtis, index_list) + return vals diff --git a/stingray/io.py b/stingray/io.py index e3bb10de9..a20cedd16 100644 --- a/stingray/io.py +++ b/stingray/io.py @@ -16,6 +16,7 @@ import stingray.utils as utils from stingray.loggingconfig import setup_logger + from .utils import ( assign_value_if_none, is_string, @@ -23,7 +24,13 @@ is_sorted, make_dictionary_lowercase, ) -from .gti import get_gti_from_all_extensions, load_gtis +from .gti import ( + get_gti_from_all_extensions, + load_gtis, + get_total_gti_length, + split_gtis_by_exposure, + cross_two_gtis, +) from .mission_support import ( read_mission_info, @@ -36,12 +43,13 @@ import pickle _H5PY_INSTALLED = True +DEFAULT_FORMAT = "hdf5" try: import h5py except ImportError: _H5PY_INSTALLED = False - + DEFAULT_FORMAT = "pickle" HAS_128 = True try: @@ -557,8 +565,8 @@ def load_events_and_gtis( mission = probe_header[mission_key].lower() mission_specific_processing = mission_specific_event_interpretation(mission) - - mission_specific_processing(hdulist) + if mission_specific_processing is not None: + mission_specific_processing(hdulist) db = read_mission_info(mission) instkey = get_key_from_mission_info(db, "instkey", "INSTRUME") @@ -714,6 +722,540 @@ def __init__(self): pass +class FITSTimeseriesReader(object): + main_array_attr = "time" + + def __init__( + self, + fname, + output_class=None, + force_hduname=None, + gti_file=None, + gtistring=None, + additional_columns=None, + data_kind="events", + ): + self.fname = fname + self._meta_attrs = [] + self.gtistring = gtistring + self.output_class = output_class + self.additional_columns = additional_columns + if "EventList" in str(output_class) or data_kind.lower() in ["events", "times"]: + self._initialize_header_events(fname, force_hduname=force_hduname) + else: + raise NotImplementedError( + "Only events are supported by FITSTimeseriesReader at the moment. " + f"{data_kind} is an unknown data kind." + ) + self.data_kind = data_kind + if additional_columns is None and self.detector_key != "NONE": + additional_columns = [self.detector_key] + elif self.detector_key != "NONE": + additional_columns.append(self.detector_key) + self.data_hdu = fits.open(self.fname)[self.hduname] + self.gti_file = gti_file + self._read_gtis(self.gti_file) + + @property + def time(self): + return self[:].time + + def meta_attrs(self): + return self._meta_attrs + + def _add_meta_attr(self, name, value): + """Add a meta attribute to the object.""" + if name not in self._meta_attrs: + self._meta_attrs.append(name) + setattr(self, name, value) + + @property + def exposure(self): + """ + Return the total exposure of the time series, i.e. the sum of the GTIs. + + Returns + ------- + total_exposure : float + The total exposure of the time series, in seconds. + """ + + return get_total_gti_length(self.gti) + + def __getitem__(self, index): + """Return an element or a slice of the object, e.g. ``ts[1]`` or ``ts[1:2].""" + + data = self.data_hdu.data[index] + + return self.transform_slice(data) + + def transform_slice(self, data): + # Here there will be some logic to understand whether transfomring to events or something else + + if self.data_kind == "times": + return data[self.time_column][:] + self.timezero + if self.output_class is None: + return data + if self.data_kind == "events": + return self._transform_slice_into_events(data) + + def _transform_slice_into_events(self, data): + """Take a slice of data from a FITS event file and make it a StingrayTimeseries. + + Data taken from a FITS file will typically be a Numpy record array. This method + tries to interpret the information contained in the record array based on what + we know of the mission and the instrument. For sure, there will be a TIME column + that will become the ``time`` array of the timeseries object. If there is a PI/PHA + column, it will become the ``pi`` array, and if we know the conversion law for the mission, + this will also be converted to energy. If there is an ENERGY column, it will directly + be loaded into the energy column. + Additional meta (e.g. GTIs, MJDREF, etc.) information will also be added to the object. + + Parameters + ---------- + data : np.recarray + The slice of data to transform + + Returns + ------- + new_ts : any StingrayTimeseries subclass + The transformed timeseries object. It will typically be an ``EventList`` object, + but the user can change this by specifying the ``output_class`` parameter in the + constructor of the reader. + + """ + columns = [self.time_column] + for col in self.pi_column, self.energy_column: + if col is not None: + columns.append(col) + new_ts = self.output_class() + if self._mission_specific_processing is not None: + data = self._mission_specific_processing(data, header=self.header, hduname=self.hduname) + + # Set the times + setattr( + new_ts, + self.main_array_attr, + data[self.time_column][:] + self.timezero, + ) + # Get conversion function PI->Energy + try: + pi_energy_func = get_rough_conversion_function( + self.mission, + instrument=self.instr, + epoch=self.t_start / 86400 + self.mjdref, + ) + except ValueError: + pi_energy_func = None + + if self.energy_column in data.dtype.names: + new_ts.energy = data[self.energy_column] + elif self.pi_column in data.dtype.names: + new_ts.pi = data[self.pi_column] + if pi_energy_func is not None: + new_ts.energy = pi_energy_func(new_ts.pi) + + det_numbers = None + if self.detector_key is not None and self.detector_key in data.dtype.names: + new_ts.detector_id = data[self.detector_key] + det_numbers = list(set(new_ts.detector_id)) + self._read_gtis(self.gti_file, det_numbers=det_numbers) + + if self.additional_columns is not None: + for col in self.additional_columns: + if col == self.detector_key: + continue + if col in data.dtype.names: + setattr(new_ts, col.lower(), data[col]) + + for attr in self.meta_attrs(): + local_value = getattr(self, attr) + if attr in ["t_start", "t_stop", "gti"] and local_value is not None: + setattr(new_ts, attr, local_value + self.timezero) + else: + setattr(new_ts, attr, local_value) + + return new_ts + + def _initialize_header_events(self, fname, force_hduname=None): + """Read the header of the FITS file and set the relevant attributes. + + When possibile, some mission-specific information is read from the keywords and + extension names found in ``xselect.mdb``. + + Parameters + ---------- + fname : str + The name of the FITS file to read + + Other parameters + ---------------- + force_hduname : str or int, default None + If not None, the name of the HDU to read. If None, an extension called + EVENTS or the first extension. + """ + hdulist = fits.open(fname) + if not force_hduname: + probe_header = hdulist[0].header + else: + probe_header = hdulist[force_hduname].header + + # We need the minimal information to read the mission database. + # That is, the name of the mission/telescope, the instrument and, + # if available, the observing mode. + mission_key = "MISSION" + if mission_key not in probe_header: + mission_key = "TELESCOP" + self._add_meta_attr("mission", probe_header[mission_key].lower()) + self._add_meta_attr( + "_mission_specific_processing", + mission_specific_event_interpretation(self.mission), + ) + + # Now, we read the mission info, and we try to get the relevant + # information from the header using the mission-specific keywords. + db = read_mission_info(self.mission) + instkey = get_key_from_mission_info(db, "instkey", "INSTRUME") + instr = mode = None + if instkey in probe_header: + instr = probe_header[instkey].strip() + + modekey = get_key_from_mission_info(db, "dmodekey", None, instr) + if modekey is not None and modekey in probe_header: + mode = probe_header[modekey].strip() + self._add_meta_attr("instr", instr) + self._add_meta_attr("mode", mode) + + gtistring = self.gtistring + + if self.gtistring is None: + gtistring = get_key_from_mission_info(db, "gti", "GTI,STDGTI", instr, self.mode) + self._add_meta_attr("gtistring", gtistring) + + if force_hduname is None: + hduname = get_key_from_mission_info(db, "events", "EVENTS", instr, self.mode) + else: + hduname = force_hduname + + # If the EVENT/``force_hduname`` extension is not found, try the first extension + # which is usually the one containing the data + if hduname not in hdulist: + warnings.warn(f"HDU {hduname} not found. Trying first extension") + hduname = 1 + self._add_meta_attr("hduname", hduname) + + self._add_meta_attr("header", dict(hdulist[self.hduname].header)) + self._add_meta_attr("nphot", self.header["NAXIS2"]) + + # These are the important keywords for timing. + ephem = timeref = timesys = None + if "PLEPHEM" in self.header: + # For the rare cases where this is a number, e.g. 200, I add `str` + # It's supposed to be a string. + ephem = str(self.header["PLEPHEM"]).strip().lstrip("JPL-").lower() + if "TIMEREF" in self.header: + timeref = self.header["TIMEREF"].strip().lower() + if "TIMESYS" in self.header: + timesys = self.header["TIMESYS"].strip().lower() + self._add_meta_attr("ephem", ephem) + self._add_meta_attr("timeref", timeref) + self._add_meta_attr("timesys", timesys) + + timezero = np.longdouble(0.0) + if "TIMEZERO" in self.header: + timezero = np.longdouble(self.header["TIMEZERO"]) + t_start = t_stop = None + if "TSTART" in self.header: + t_start = np.longdouble(self.header["TSTART"]) + if "TSTOP" in self.header: + t_stop = np.longdouble(self.header["TSTOP"]) + self._add_meta_attr("timezero", timezero) + self._add_meta_attr("t_start", t_start) + self._add_meta_attr("t_stop", t_stop) + + self._add_meta_attr( + "time_column", + get_key_from_mission_info(db, "time", "TIME", instr, mode), + ) + + self._add_meta_attr( + "detector_key", + get_key_from_mission_info(db, "ccol", "NONE", instr, mode), + ) + + self._add_meta_attr( + "mjdref", np.longdouble(high_precision_keyword_read(self.header, "MJDREF")) + ) + + # Try to get the information needed to calculate the event energy. We start from the + # PI column + default_pi_column = get_key_from_mission_info(db, "ecol", "PI", instr, self.mode) + if default_pi_column not in hdulist[self.hduname].data.columns.names: + default_pi_column = None + self._add_meta_attr("pi_column", default_pi_column) + + # If a column named "energy" is found, we read it and assume the energy conversion + # is already done. + if "energy" in [val.lower() for val in hdulist[self.hduname].data.columns.names]: + energy_column = "energy" + else: + energy_column = None + self._add_meta_attr("energy_column", energy_column) + + def _read_gtis(self, gti_file=None, det_numbers=None): + """Read GTIs from the FITS file.""" + # This is ugly, but if, e.g., we are reading XMM data, we *need* the + # detector number to access GTIs. + # So, here I'm reading a bunch of rows hoping that they represent the + # detector number population + if self.detector_key is not None: + with fits.open(self.fname) as hdul: + data = hdul[self.hduname].data + if self.detector_key in data.dtype.names: + probe_vals = data[:100][self.detector_key] + det_numbers = list(set(probe_vals)) + del hdul + + accepted_gtistrings = self.gtistring.split(",") + gti_list = None + + if gti_file is not None: + self._add_meta_attr("gti", load_gtis(gti_file, self.gtistring)) + return + + # Select first GTI with accepted name + try: + gti_list = get_gti_from_all_extensions( + self.fname, + accepted_gtistrings=accepted_gtistrings, + det_numbers=det_numbers, + ) + except Exception as e: # pragma: no cover + warnings.warn( + ( + f"No valid GTI extensions found. \nError: {str(e)}\n" + "GTIs will be set to the entire time series." + ), + ) + + self._add_meta_attr("gti", gti_list) + + def _get_idx_from_time_range(self, start, stop): + """Get the index of the times in the event list that fall within the given time range. + + Instead of reading all the data from the file and doing ``np.searchsorted``, which could + easily fill up the memory, this function does a two-step procedure. It first uses + ``self._trace_nphots_in_file`` to get a grid of times and their corresponding + indices in the file. Then, it reads only the data that strictly include the requested time + range, and on those data it performs a searchsorted operation. The final indices will be + summed to the lower index of the data that was read. + + Parameters + ---------- + start : float + Start time of the interval + stop : float + Stop time of the interval + + Returns + ------- + lower_edge : int + Index of the first photon in the requested time range + upper_edge : int + Index of the last photon in the requested time range + """ + time_edges, idx_edges = self._trace_nphots_in_file( + nedges=int(self.exposure // (stop - start)) + 2 + ) + + raw_min_idx = np.searchsorted(time_edges, start, side="left") + raw_max_idx = np.searchsorted(time_edges, stop, side="right") + + raw_min_idx = max(0, raw_min_idx - 2) + raw_max_idx = min(time_edges.size - 1, raw_max_idx + 2) + + raw_lower_edge = idx_edges[raw_min_idx] + raw_upper_edge = idx_edges[raw_max_idx] + + assert ( + start - time_edges[raw_min_idx] >= 0 + ), f"Start: {start}; {start - time_edges[raw_min_idx]} > 0" + assert ( + time_edges[raw_max_idx] - stop >= 0 + ), f"Stop: {stop}; {time_edges[raw_max_idx] - stop} < 0" + + with fits.open(self.fname) as hdulist: + filtered_times = hdulist[self.hduname].data[self.time_column][ + raw_lower_edge : raw_upper_edge + 1 + ] + # lower_edge = np.searchsorted(filtered_times, [start, stop]) + lower_edge, upper_edge = np.searchsorted(filtered_times, [start, stop]) + # Searchsorted will find the first number above stop. We want the last number below stop! + upper_edge -= 1 + + return lower_edge + raw_lower_edge, upper_edge + raw_lower_edge + + def apply_gti_lists(self, new_gti_lists, root_file_name=None, fmt=DEFAULT_FORMAT): + """Split the event list into different files, each with a different GTI. + + Parameters + ---------- + new_gti_lists : list of lists + A list of lists of GTIs. Each sublist should contain a list of GTIs + for a new file. + + Other Parameters + ---------------- + root_file_name : str, default None + The root name of the output files. The file name will be appended with + "_00", "_01", etc. + If None, a generator is returned instead of writing the files. + fmt : str + The format of the output files. Default is 'hdf5'. + + Returns + ------- + output_files : list of str + A list of the output file names. + + """ + + if len(new_gti_lists[0]) == len(self.gti) and np.all( + np.abs(np.asanyarray(new_gti_lists[0]).flatten() - self.gti.flatten()) < 1e-3 + ): + ev = self[:] + if root_file_name is None: + yield ev + else: + output_file = root_file_name + f"_00." + fmt.lstrip(".") + ev.write(output_file, fmt=fmt) + yield output_file + + for i, gti in enumerate(new_gti_lists): + if len(gti) == 0: + continue + + lower_edge, upper_edge = self._get_idx_from_time_range(gti[0, 0], gti[-1, 1]) + + ev = self[lower_edge : upper_edge + 1] + if hasattr(ev, "gti"): + ev.gti = gti + + if root_file_name is not None: + new_file = root_file_name + f"_{i:002d}." + fmt.lstrip(".") + logger.info(f"Writing {new_file}") + ev.write(new_file, fmt=fmt) + yield new_file + else: + yield ev + + def _trace_nphots_in_file(self, nedges=1001): + """Trace the number of photons as time advances in the file. + + This function traces the number of photons as time advances in the file. + This is a way to quickly map the distribution of photons in time, without + reading the entire file. This map can be useful to then access the wanted + data without loading all the file in memory. + + Other Parameters + ---------------- + nedges : int + The number of time edges to trace. Default is 1001. + + Returns + ------- + time_edges : np.ndarray + The time edges + idx_edges : np.ndarray + The index edges + """ + + if hasattr(self, "_time_edges") and len(self._time_edges) >= nedges: + return self._time_edges, self._idx_edges + + fname = self.fname + + with fits.open(fname) as hdul: + size = hdul[1].header["NAXIS2"] + nedges = min(nedges, size // 10 + 2) + + time_edges = np.zeros(nedges) + idx_edges = np.zeros(nedges, dtype=int) + for i, edge_idx in enumerate(np.linspace(0, size - 1, nedges).astype(int)): + idx_edges[i] = edge_idx + time_edges[i] = hdul[1].data["TIME"][edge_idx] + + mingti, maxgti = np.min(self.gti), np.max(self.gti) + if time_edges[0] > mingti: + time_edges[0] = mingti + if time_edges[-1] < maxgti: + time_edges[-1] = maxgti + + self._time_edges, self._idx_edges = time_edges, idx_edges + + return time_edges, idx_edges + + def split_by_number_of_samples(self, nsamples, root_file_name=None, fmt=DEFAULT_FORMAT): + """Split the event list into different files, each with approx. the given no. of photons. + + Parameters + ---------- + nsamples : int + The number of photons in each output file. + + Other Parameters + ---------------- + root_file_name : str, default None + The root name of the output files. The file name will be appended with + "_00", "_01", etc. + If None, a generator is returned instead of writing the files. + fmt : str + The format of the output files. Default is 'hdf5'. + + Returns + ------- + output_files : list of str + A list of the output file names. + """ + n_intervals = int(np.rint(self.nphot / nsamples)) + exposure_per_interval = self.exposure / n_intervals + new_gti_lists = split_gtis_by_exposure(self.gti, exposure_per_interval) + + return self.apply_gti_lists(new_gti_lists, root_file_name=root_file_name, fmt=fmt) + + def filter_at_time_intervals( + self, time_intervals, root_file_name=None, fmt=DEFAULT_FORMAT, check_gtis=True + ): + """Filter the event list at the given time intervals. + + Parameters + ---------- + time_intervals : 2-d float array + List of time intervals of the form ``[[time0_0, time0_1], [time1_0, time1_1], ...]`` + + Other Parameters + ---------------- + root_file_name : str, default None + The root name of the output files. The file name will be appended with + "_00", "_01", etc. + If None, a generator is returned instead of writing the files. + fmt : str + The format of the output files. Default is 'hdf5'. + + Returns + ------- + output_files : list of str + A list of the output file names. + """ + if len(np.shape(time_intervals)) == 1: + time_intervals = [time_intervals] + if check_gtis: + new_gti = [cross_two_gtis(self.gti, [t_int]) for t_int in time_intervals] + else: + new_gti = [np.asarray([t_int]) for t_int in time_intervals] + return self.apply_gti_lists(new_gti, root_file_name=root_file_name, fmt=fmt) + + def mkdir_p(path): # pragma: no cover """Safe ``mkdir`` function diff --git a/stingray/mission_support/missions.py b/stingray/mission_support/missions.py index 8b1bb98a8..4e221e27e 100644 --- a/stingray/mission_support/missions.py +++ b/stingray/mission_support/missions.py @@ -181,14 +181,12 @@ def get_rough_conversion_function(mission, instrument=None, epoch=None): def mission_specific_event_interpretation(mission): """Get the mission-specific FITS interpretation function. - This function will read a FITS :class:`astropy.io.fits.HDUList` object and modify it - in place to make the read into Stingray easier. + This function will read a file name or a FITS :class:`astropy.io.fits.HDUList` + object and modify it (see, e.g., :func:`rxte_pca_event_file_interpretation` for an + example) """ if mission.lower() == "xte": return rxte_pca_event_file_interpretation - def _empty(x): - return x - - return _empty + return None diff --git a/stingray/mission_support/rxte.py b/stingray/mission_support/rxte.py index d0842ec36..50a24efc6 100644 --- a/stingray/mission_support/rxte.py +++ b/stingray/mission_support/rxte.py @@ -4,6 +4,8 @@ from astropy.time import Time from astropy.table import Table +from astropy.io import fits + c_match = re.compile(r"C\[(.*)\]") _EDGE_TIMES = [ @@ -308,7 +310,7 @@ def rxte_calibration_func(instrument, epoch): raise ValueError(f"Unknown XTE instrument: {instrument}") -def rxte_pca_event_file_interpretation(hdulist): +def rxte_pca_event_file_interpretation(input_data, header=None, hduname=None): """Interpret the FITS header of an RXTE event file. At the moment, only science event files are supported. In these files, @@ -322,16 +324,52 @@ def rxte_pca_event_file_interpretation(hdulist): Parameters ---------- - hdulist : `astropy.io.fits.HDUList` - The FITS file to interpret. + input_data : str, fits.HDUList, fits.HDU, np.array + The name of the FITS file to, or the HDUList inside, or the HDU with + the data, or the data. + + Other parameters + ---------------- + header : `fits.Header`, optional + Compulsory if ``hdulist`` is not a class:`fits._BaseHDU`, a + :class:`fits.HDUList`, or a file name. The header of the relevant extension. + hduname : str, optional + Name of the HDU (only relevant if hdulist is a :class:`fits.HDUList`), + ignored otherwise. + """ - if "XTE_SE" not in hdulist: + if isinstance(input_data, str): + return rxte_pca_event_file_interpretation( + fits.open(input_data), header=header, hduname=hduname + ) + + if isinstance(input_data, fits.HDUList): + if hduname is None and "XTE_SE" not in input_data: + raise ValueError( + "No XTE_SE extension found. At the moment, only science events " + "are supported by Stingray for XTE." + ) + if hduname is None: + hduname = "XTE_SE" + new_hdu = rxte_pca_event_file_interpretation(input_data[hduname], header=header) + input_data[hduname] = new_hdu + return input_data + + if isinstance(input_data, fits.hdu.base._BaseHDU): + if header is None: + header = input_data.header + input_data.data = rxte_pca_event_file_interpretation(input_data.data, header=header) + return input_data + + data = input_data + if header is None: raise ValueError( - "No XTE_SE extension found. At the moment, only science events " - "are supported by Stingray for XTE." + "If the input data is not a HDUList or a HDU, the header must be specified" ) - tevtb2 = hdulist["XTE_SE"].header["TEVTB2"] + + tevtb2 = header["TEVTB2"] local_chans = np.asarray([int(np.mean(ch)) for ch in _decode_energy_channels(tevtb2)]) - # channels = _decode_energy_channels(tevtb2) - hdulist["XTE_SE"].data["PHA"] = local_chans[hdulist["XTE_SE"].data["PHA"]] + data["PHA"] = local_chans[data["PHA"]] + + return data diff --git a/stingray/mission_support/tests/test_mission_support.py b/stingray/mission_support/tests/test_mission_support.py index 9a1f27fd5..3ee8dbec4 100644 --- a/stingray/mission_support/tests/test_mission_support.py +++ b/stingray/mission_support/tests/test_mission_support.py @@ -1,5 +1,6 @@ import pytest import os +import numpy as np from astropy.io import fits from stingray.mission_support import ( rxte_pca_event_file_interpretation, @@ -24,3 +25,27 @@ def setup_class(cls): def test_wrong_file_raises(self): with pytest.raises(ValueError, match="No XTE_SE extension found."): rxte_pca_event_file_interpretation(fits.open(self.wrongfile)) + + def test_rxte_pca_event_file_interpretation(self): + filename = os.path.join(datadir, "xte_test.evt.gz") + unchanged_hdulist = fits.open(filename) + assert np.min(unchanged_hdulist["XTE_SE"].data["PHA"]) == 0 + assert np.max(unchanged_hdulist["XTE_SE"].data["PHA"]) == 60 + + first_new_hdulist = rxte_pca_event_file_interpretation(filename) + + second_new_hdulist = rxte_pca_event_file_interpretation(fits.open(filename)) + + new_hdu = rxte_pca_event_file_interpretation(fits.open(filename)["XTE_SE"]) + new_data = rxte_pca_event_file_interpretation( + fits.open(filename)["XTE_SE"].data, header=fits.open(filename)["XTE_SE"].header + ) + + for data in ( + new_data, + new_hdu.data, + first_new_hdulist["XTE_SE"].data, + second_new_hdulist["XTE_SE"].data, + ): + assert np.min(data["PHA"]) == 2 + assert np.max(data["PHA"]) == 221 diff --git a/stingray/tests/test_gti.py b/stingray/tests/test_gti.py index 1cf29cda1..1848d08f4 100644 --- a/stingray/tests/test_gti.py +++ b/stingray/tests/test_gti.py @@ -7,6 +7,7 @@ from stingray.gti import create_gti_from_condition, gti_len, gti_border_bins from stingray.gti import time_intervals_from_gtis, bin_intervals_from_gtis from stingray.gti import create_gti_mask_complete, join_equal_gti_boundaries +from stingray.gti import split_gtis_at_indices, split_gtis_by_exposure from stingray import StingrayError curdir = os.path.abspath(os.path.dirname(__file__)) @@ -363,6 +364,38 @@ def test_join_boundaries(self): newg = join_equal_gti_boundaries(gti) assert np.allclose(newg, np.array([[1.16703354e08, 1.16703514e08]])) + def test_split_gtis_by_exposure_min_gti_sep(self): + gtis = [[0, 30], [86450, 86460]] + new_gtis = split_gtis_by_exposure(gtis, 400, new_interval_if_gti_sep=86400) + assert np.allclose(new_gtis[0], [[0, 30]]) + assert np.allclose(new_gtis[1], [[86450, 86460]]) + + def test_split_gtis_by_exposure_no_min_gti_sep(self): + gtis = [[0, 30], [86440, 86470], [86490, 86520], [86530, 86560]] + new_gtis = split_gtis_by_exposure(gtis, 60, new_interval_if_gti_sep=None) + assert np.allclose(new_gtis[0], [[0, 30], [86440, 86470]]) + assert np.allclose(new_gtis[1], [[86490, 86520], [86530, 86560]]) + + def test_split_gtis_by_exposure_small_exp(self): + gtis = [[0, 30], [86440, 86470], [86490, 86495], [86500, 86505]] + new_gtis = split_gtis_by_exposure(gtis, 15, new_interval_if_gti_sep=None) + assert np.allclose( + new_gtis[:4], + [ + [[0, 15]], + [[15, 30]], + [[86440, 86455]], + [[86455, 86470]], + ], + ) + assert np.allclose(new_gtis[4], [[86490, 86495], [86500, 86505]]) + + def test_split_gtis_at_indices(self): + gtis = [[0, 30], [50, 60], [80, 90]] + new_gtis = split_gtis_at_indices(gtis, 1) + assert np.allclose(new_gtis[0], [[0, 30]]) + assert np.allclose(new_gtis[1], [[50, 60], [80, 90]]) + _ALL_METHODS = ["intersection", "union", "infer", "append"] diff --git a/stingray/tests/test_io.py b/stingray/tests/test_io.py index 5f3603e31..8c22ab145 100644 --- a/stingray/tests/test_io.py +++ b/stingray/tests/test_io.py @@ -9,7 +9,8 @@ from ..io import ref_mjd from ..io import high_precision_keyword_read from ..io import load_events_and_gtis, read_mission_info -from ..io import read_header_key +from ..io import read_header_key, FITSTimeseriesReader, DEFAULT_FORMAT +from ..events import EventList import warnings @@ -214,3 +215,65 @@ def test_calibrate_spectrum(self): pis = np.array([1, 2, 3]) energies = pi_to_energy(pis, self.rmf) assert np.allclose(energies, [1.66, 1.70, 1.74]) + + +class TestFITSTimeseriesReader(object): + @classmethod + def setup_class(cls): + curdir = os.path.abspath(os.path.dirname(__file__)) + cls.datadir = os.path.join(curdir, "data") + cls.fname = os.path.join(datadir, "monol_testA.evt") + + def test_read_fits_timeseries_bad_kind(self): + with pytest.raises( + NotImplementedError, match="Only events are supported by FITSTimeseriesReader" + ): + FITSTimeseriesReader(self.fname, output_class="bubu", data_kind="BAD_KIND") + + def test_read_fits_timeseries(self): + reader = FITSTimeseriesReader(self.fname, output_class=EventList) + # Full slice + all_ev = reader[:] + assert np.all((all_ev.time > 80000000) & (all_ev.time < 80001024)) + + def test_read_fits_timeseries_by_nsamples(self): + reader = FITSTimeseriesReader(self.fname, output_class=EventList) + # Full slice + outfnames = list(reader.split_by_number_of_samples(500, root_file_name="test")) + assert len(outfnames) == 2 + ev0 = EventList.read(outfnames[0], fmt=DEFAULT_FORMAT) + ev1 = EventList.read(outfnames[1], fmt=DEFAULT_FORMAT) + assert np.all(ev0.time < 80000512.5) + assert np.all(ev1.time > 80000512.5) + for fname in outfnames: + os.unlink(fname) + + def test_read_fits_timeseries_by_time_intv(self): + reader = FITSTimeseriesReader(self.fname, output_class=EventList) + # Full slice + outfnames = list( + reader.filter_at_time_intervals([80000100, 80001100], root_file_name="test") + ) + assert len(outfnames) == 1 + ev0 = EventList.read(outfnames[0], fmt=DEFAULT_FORMAT) + assert np.all((ev0.time > 80000100) & (ev0.time < 80001100)) + assert np.all((ev0.gti >= 80000100) & (ev0.gti < 80001100)) + for fname in outfnames: + os.unlink(fname) + + def test_read_fits_timeseries_by_nsamples_generator(self): + reader = FITSTimeseriesReader(self.fname, output_class=EventList) + # Full slice + ev0, ev1 = list(reader.split_by_number_of_samples(500)) + + assert np.all(ev0.time < 80000512.5) + assert np.all(ev1.time > 80000512.5) + + def test_read_fits_timeseries_by_time_intv_generator(self): + reader = FITSTimeseriesReader(self.fname, output_class=EventList) + # Full slice + evs = list(reader.filter_at_time_intervals([80000100, 80001100])) + assert len(evs) == 1 + ev0 = evs[0] + assert np.all((ev0.time > 80000100) & (ev0.time < 80001100)) + assert np.all((ev0.gti >= 80000100) & (ev0.gti < 80001100))