From 40e2614fc3639cf639169b5a754765fa4ac958c2 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Fri, 30 Aug 2024 19:46:55 -0400 Subject: [PATCH] fixes, test, and docs for Lines prediction. --- doc/dataformat.rst | 66 ++++++++++++++++------------- prospect/io/read_results.py | 5 ++- prospect/models/sedmodel.py | 2 +- prospect/observation/observation.py | 56 +++++++++++++++--------- tests/test_predict.py | 43 ++++++++++++++++--- 5 files changed, 113 insertions(+), 59 deletions(-) diff --git a/doc/dataformat.rst b/doc/dataformat.rst index 29051163..1bba2862 100644 --- a/doc/dataformat.rst +++ b/doc/dataformat.rst @@ -9,13 +9,16 @@ The `Observation` class returned by :py:meth:`build_obs` (see below). Each Observation instance corresponds to a single dataset, and is basically a namespace that also supports dict-like accessing of important attributes. In addition to holding data and -uncertainties thereon, they tell prospector what data to predict, contain +uncertainties thereon, they tell |Codename| what data to predict, contain dataset-specific information for how to predict that data, and can even store methods for computing likelihoods in the case of complicated, dataset-specific -noise models. There are two fundamental kinds of data, `Photometry` and -`Spectrum` that are each subclasses of `Observation`. There is also also a -`Lines` class for integrated emission line fluxes. They have the following -attributes, most of which can also be accessed as dictionary keys. +noise models. + +There are two fundamental kinds of data, :py:class:`Photometry` and +:py:class:`Spectrum` that are each sub-classes of :py:class:`Observation`. There +is also also a :py:class:`Lines` class for integrated emission line fluxes. They +have the following attributes, most of which can also be accessed as dictionary +keys: - ``wavelength`` @@ -24,32 +27,35 @@ attributes, most of which can also be accessed as dictionary keys. Generally these should be observed frame wavelengths. - ``flux`` - The flux vector for a `Spectrum`, or the broadband fluxes for `Photometry` - ndarray of same length as the wavelength vector. For `Photometry` the units - are *maggies*. Maggies are a linear flux density unit defined as - :math:`{\rm maggie} = 10^{-0.4 \, m_{AB}}` where :math:`m_{AB}` is the AB - apparent magnitude. That is, 1 maggie is the flux density in Janskys divided - by 3631. If absolute spectrophotometry is available, the units for a - `Spectrum`` should also be maggies, otherwise photometry must be present and - a calibration vector must be supplied or fit. Note that for convenience - there is a `maggies_to_nJy` attribute of `Observation` that gives the - conversion factor. + The flux vector for a :py:class:`Spectrum`, or the broadband fluxes for + :py:class:`Photometry` ndarray of same length as the wavelength vector. + + For `Photometry` the units are *maggies*. Maggies are a linear flux density + unit defined as :math:`{\rm maggie} = 10^{-0.4 \, m_{AB}}` where + :math:`m_{AB}` is the AB apparent magnitude. That is, 1 maggie is the flux + density in Janskys divided by 3631. If absolute spectrophotometry is + available, the units for a :py:class:`Spectrum`` should also be maggies, + otherwise photometry must be present and a calibration vector must be + supplied or fit. Note that for convenience there is a `maggies_to_nJy` + attribute of `Observation` that gives the conversion factor. For + :py:class:`Lines`, the units should be erg/s/cm^2 - ``uncertainty`` The uncertainty vector (sigma), in same units as ``flux``, ndarray of same length as the wavelength vector. - ``mask`` - A boolean array of same length as the wavelength vector, where ``False`` - elements are ignored in the likelihood calculation. + A boolean array of same length as the wavelength vector, where ``False`` + elements are ignored in the likelihood calculation. - ``filters`` - For a `Photometry`, this is a list of strings corresponding to filter names - in `sedpy `_ + For a `Photometry`, this is a list of strings corresponding to filter names + in `sedpy `_ - ``line_ind`` - For a `Lines` instance, the (zero-based) index of the emission line in the - FSPS emission line table. + For a `Lines` instance, the (zero-based) index of the emission line in the + FSPS emission line + `table `_ In addition to these attributes, several additional aspects of an observation are used to help predict data or to compute likelihoods. The latter is @@ -57,18 +63,18 @@ particularly important in the case of complicated noise models, including outlie models, jitter terms, or covariant noise. - ``name`` - A string that can be used to identify the dataset. This can be useful for - dataset-specfic parameters. By default the name is constructed from the - `kind` and the memory location of the object. + A string that can be used to identify the dataset. This can be useful for + dataset-specfic parameters. By default the name is constructed from the + `kind` and the memory location of the object. - ``resolution`` - For a `Spectrum` this defines the instrumental resolution. Analagously to - the ``filters`` attribute for `Photometry`, this knowledge is used to - accurately predict the model in the space of the data. + For a `Spectrum` this defines the instrumental resolution. Analagously to + the ``filters`` attribute for `Photometry`, this knowledge is used to + accurately predict the model in the space of the data. -- ``noise`` A :py:class:`NoiseModel` instance. By default this implements a - simple chi-square calculation of independent noise, but it can be - complexified. +- ``noise`` + A :py:class:`NoiseModel` instance. By default this implements a simple + chi-square calculation of independent noise, but it can be complexified. Example diff --git a/prospect/io/read_results.py b/prospect/io/read_results.py index 0e9dac33..c4dd3e5c 100644 --- a/prospect/io/read_results.py +++ b/prospect/io/read_results.py @@ -185,7 +185,10 @@ def read_hdf5(filename, **extras): res["optimization"] = groups["optimization"] # do observations if 'observations' in hf: - obs = obs_from_h5(hf['observations']) + try: + obs = obs_from_h5(hf['observations']) + except: + obs = None else: obs = None diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index b7e73477..6a30764d 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -369,7 +369,7 @@ def predict_lines(self, obs, **extras): # find the indices of the observed emission lines #dw = np.abs(self._ewave_obs[:, None] - self._outwave[None, :]) #self._predicted_line_inds = np.argmin(dw, axis=0) - self._predicted_line_inds = obs.get("line_ind") + self._predicted_line_inds = obs["line_inds"] self._speccal = 1.0 self.line_norm = self.flux_norm() / (1 + self._zred) * (3631*jansky_cgs) diff --git a/prospect/observation/observation.py b/prospect/observation/observation.py index 60f916c3..0062db13 100644 --- a/prospect/observation/observation.py +++ b/prospect/observation/observation.py @@ -100,17 +100,19 @@ def rectify(self): appropriate sizes. Also auto-masks non-finite data or negative uncertainties. """ + n = self.__repr__ if self.flux is None: - print(f"{self.__repr__} has no data") + print(f"{n} has no data") return - assert self.wavelength.ndim == 1, "`wavelength` is not 1-d array" - assert self.flux.ndim == 1, "flux is not a 1d array" - assert self.uncertainty.ndim == 1, "uncertainty is not a 1d array" - assert self.ndata > 0, "no wavelength points supplied!" - assert self.uncertainty is not None, "No uncertainties." - assert len(self.wavelength) == len(self.flux), "Flux array not same shape as wavelength." - assert len(self.wavelength) == len(self.uncertainty), "Uncertainty array not same shape as wavelength." + assert self.flux.ndim == 1, f"{n}: flux is not a 1d array" + assert self.uncertainty.ndim == 1, f"{n}: uncertainty is not a 1d array" + assert self.ndata > 0, f"{n} no data supplied!" + assert self.uncertainty is not None, f"{n} No uncertainties." + assert len(self.flux) == len(self.uncertainty), f"{n}: flux and uncertainty of different length" + if self.wavelength is not None: + assert self.wavelength.ndim == 1, f"{n}: `wavelength` is not 1-d array" + assert len(self.wavelength) == len(self.flux), f"{n}: Wavelength array not same shape as flux array" self._automask() @@ -145,10 +147,10 @@ def ndof(self): @property def ndata(self): # TODO: cache this? - if self.wavelength is None: + if self.flux is None: return 0 else: - return len(self.wavelength) + return len(self.flux) @property def wave_min(self): @@ -240,7 +242,7 @@ def __init__(self, filters=[], The names or instances of Filters to use flux : iterable of floats - The flux through the filters, in units of maggies + The flux through the filters, in units of maggies. uncertainty : iterable of floats The uncertainty on the flux @@ -301,7 +303,6 @@ def __init__(self, response=None, name=None, lambda_pad=100, - polynomial_order=0., **kwargs): """ @@ -311,7 +312,8 @@ def __init__(self, The wavelength of each flux measurement, in vacuum AA flux : iterable of floats - The flux at each wavelength, in units of maggies, same length as ``wavelength`` + The flux at each wavelength, in units of maggies, same length as + ``wavelength`` uncertainty : iterable of floats The uncertainty on the flux @@ -329,7 +331,7 @@ def __init__(self, self.resolution = resolution self.response = response self.instrument_smoothing_parameters = dict(smoothtype="vel", fftsmooth=True) - self.wavelength = wavelength + self.wavelength = np.atleast_1d(wavelength) @property def wavelength(self): @@ -439,7 +441,6 @@ def instrumental_smoothing(self, model_wave_obsframe, model_flux, zred=0, libres return outspec_padded[self._unpadded_inds] - def compute_response(self, **extras): if self.response is not None: return self.response @@ -447,7 +448,7 @@ def compute_response(self, **extras): return 1.0 -class Lines(Spectrum): +class Lines(Observation): _kind = "lines" alias = dict(spectrum="flux", @@ -458,11 +459,12 @@ class Lines(Spectrum): _meta = ("name", "kind") _data = ("wavelength", "flux", "uncertainty", "mask", - "resolution", "calibration", "line_ind") + "line_ind") def __init__(self, line_ind=None, line_names=None, + wavelength=None, name=None, **kwargs): @@ -476,7 +478,8 @@ def __init__(self, The wavelength of each flux measurement, in vacuum AA flux : iterable of floats - The flux at each wavelength, in units of maggies, same length as ``wavelength`` + The flux at each wavelength, in units of erg/s/cm^2, same length as + ``wavelength`` uncertainty : iterable of floats The uncertainty on the flux @@ -492,16 +495,25 @@ def __init__(self, :param calibration: not sure yet .... """ - super(Lines, self).__init__(name=name, resolution=None, **kwargs) + super(Lines, self).__init__(name=name, **kwargs) assert (line_ind is not None), "You must identify the lines by their index in the FSPS emission line array" - self.line_ind = np.array(line_ind).as_type(int) + self.line_ind = np.array(line_ind).astype(int) self.line_names = line_names + if wavelength is not None: + self._wavelength = np.atleast_1d(wavelength) + else: + self._wavelength = None + + @property + def wavelength(self): + return self._wavelength + class UndersampledSpectrum(Spectrum): """ As for Spectrum, but account for pixelization effects when pixels - undersamplesthe instrumental LSF. + undersample the instrumental LSF. """ #TODO: Implement as a convolution with a square kernel (or sinc in frequency space) @@ -754,6 +766,8 @@ def from_oldstyle(obs, **kwargs): def from_serial(arr, meta): + # TODO: This does not account for composite or special classes, or include + # noise models kind = obstypes[meta.pop("kind")] adict = {a:arr[a] for a in arr.dtype.names} diff --git a/tests/test_predict.py b/tests/test_predict.py index b474b4bc..7f57282f 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -6,7 +6,7 @@ from prospect.sources import CSPSpecBasis from prospect.models import SpecModel, templates -from prospect.observation import Spectrum, Photometry +from prospect.observation import Spectrum, Photometry, Lines @pytest.fixture(scope="module") @@ -22,7 +22,7 @@ def build_model(add_neb=False): return SpecModel(model_params) -def build_obs(multispec=True): +def build_obs(multispec=True, add_lines=False): N = 1500 * (2 - multispec) wmax = 7000 wsplit = wmax - N * multispec @@ -39,9 +39,26 @@ def build_obs(multispec=True): flux=np.ones(N), uncertainty=np.ones(N) / 10, mask=slice(None))] - obslist = spec + phot - [obs.rectify() for obs in obslist] - return obslist + obs = spec + phot + + if add_lines: + line_ind = [59, 62, 74, 73, 75] # index of line in FSPS table + line_name = ["Hb", "OIII-5007", "Ha", "NII-6548", "NII-6584"] + line_wave = [4861, 5007, 6563, 6548, 6584] # can be approximate + n_line = len(line_ind) + lines = Lines(line_ind=line_ind, + flux=np.ones(n_line), # erg/s/cm^2 + uncertainty=np.ones(n_line)/10, + line_names=line_name, # optional + wavelength=line_wave, # optional + ) + obs += [lines] + + [ob.rectify() for ob in obs] + for ob in obs: + assert ob.ndof > 0 + + return obs def test_prediction_nodata(build_sps): @@ -86,12 +103,26 @@ def test_multispec(build_sps, plot=False): ax.plot(o.wavelength, p) +def test_line(build_sps, plot=False): + + obs = build_obs(multispec=False, add_lines=True) + model = build_model(add_neb=True) + + sps = build_sps + preds, mfrac = model.predict(model.theta, observations=obs, sps=sps) + (spec, phot, lines) = preds + assert np.all(lines) > 0 + + #print(f"log(OIII[5007]/Hb)={np.log10(lines[1]/lines[0])}") + #print(f"log(NII/Ha)={np.log10(lines[-2:]/lines[2])}") + + def lnlike_testing(build_sps): # testing lnprobfn - sps = build_sps observations = build_obs() model = build_model(add_neb=True) + sps = build_sps from prospect.likelihood.likelihood import compute_lnlike from prospect.fitting import lnprobfn