From 5a82f06ca66a435553bc3ea6af85934dfbfa25cc Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 18 Jun 2024 13:15:24 -0400 Subject: [PATCH 01/85] add ModelLibrary --- jwst/datamodels/__init__.py | 2 + jwst/datamodels/library.py | 116 +++++++++++++++++++++++ jwst/datamodels/tests/test_library.py | 131 ++++++++++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 jwst/datamodels/library.py create mode 100644 jwst/datamodels/tests/test_library.py diff --git a/jwst/datamodels/__init__.py b/jwst/datamodels/__init__.py index 2bf017f093..7bf474c86f 100644 --- a/jwst/datamodels/__init__.py +++ b/jwst/datamodels/__init__.py @@ -8,6 +8,7 @@ from stdatamodels.jwst.datamodels.util import open from .container import ModelContainer +from .library import ModelLibrary from .source_container import SourceModelContainer import stdatamodels.jwst.datamodels @@ -19,6 +20,7 @@ __all__ = [ 'open', 'ModelContainer', 'SourceModelContainer', + 'ModelLibrary', ] + stdatamodels.jwst.datamodels.__all__ diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py new file mode 100644 index 0000000000..49206b5643 --- /dev/null +++ b/jwst/datamodels/library.py @@ -0,0 +1,116 @@ +import io + +import asdf +from astropy.io import fits +from stdatamodels.jwst.datamodels.util import open as datamodels_open +from stpipe.library import AbstractModelLibrary, NoGroupID + +from jwst.associations import AssociationNotValidError, load_asn + +__all__ = ["ModelLibrary"] + + +class ModelLibrary(AbstractModelLibrary): + @property + def crds_observatory(self): + return "jwst" + + def _model_to_filename(self, model): + model_filename = model.meta.filename + if model_filename is None: + model_filename = "model.fits" + return model_filename + + def _datamodels_open(self, filename, **kwargs): + return datamodels_open(filename, **kwargs) + + @classmethod + def _load_asn(cls, asn_path): + try: + with open(asn_path) as asn_file: + asn_data = load_asn(asn_file) + except AssociationNotValidError as e: + raise OSError("Cannot read ASN file.") from e + return asn_data + + def _filename_to_group_id(self, filename): + """ + Compute a "group_id" without loading the file as a DataModel + + This function will return the meta.group_id stored in the ASDF + extension (if it exists) or a group_id calculated from the + FITS headers. + """ + # use astropy.io.fits directly to read header keywords + # avoiding the DataModel overhead + # TODO look up attribute to keyword in core schema + with fits.open(filename) as ff: + if "ASDF" in ff: + asdf_yaml = asdf.util.load_yaml(io.BytesIO(ff['ASDF'].data.tobytes())) + if group_id := asdf_yaml.get('meta', {}).get('group_id'): + return group_id + header = ff["PRIMARY"].header + program_number = header["PROGRAM"] + observation_number = header["OBSERVTN"] + visit_number = header["VISIT"] + visit_group = header["VISITGRP"] + sequence_id = header["SEQ_ID"] + activity_id = header["ACT_ID"] + exposure_number = header["EXPOSURE"] + + # FIXME try except and NoGroupID... + return _attrs_to_group_id( + program_number, + observation_number, + visit_number, + visit_group, + sequence_id, + activity_id, + exposure_number, + ) + + def _model_to_group_id(self, model): + """ + Compute a "group_id" from a model using the DataModel interface + """ + if group_id := getattr(model.meta, "group_id", None): + return group_id + # FIXME try except and NoGroupID... + return _attrs_to_group_id( + model.meta.observation.program_number, + model.meta.observation.observation_number, + model.meta.observation.visit_number, + model.meta.observation.visit_group, + model.meta.observation.sequence_id, + model.meta.observation.activity_id, + model.meta.observation.exposure_number, + ) + + def _assign_member_to_model(self, model, member): + for attr in ("group_id", "tweakreg_catalog", "exptype"): + if attr in member: + setattr(model.meta, attr, member[attr]) + if not hasattr(model.meta, "asn"): + model.meta["asn"] = {} + + model.meta.asn.table_name = self.asn.get("table_name", "") + model.meta.asn.pool_name = self.asn.get("asn_pool", "") + + +def _attrs_to_group_id( + program_number, + observation_number, + visit_number, + visit_group, + sequence_id, + activity_id, + exposure_number, + ): + """ + Combine a number of file metadata values into a ``group_id`` string + """ + return ( + f"jw{program_number}{observation_number}{visit_number}" + f"_{visit_group}{sequence_id}{activity_id}" + f"_{exposure_number}" + ) diff --git a/jwst/datamodels/tests/test_library.py b/jwst/datamodels/tests/test_library.py new file mode 100644 index 0000000000..cab92a3d6b --- /dev/null +++ b/jwst/datamodels/tests/test_library.py @@ -0,0 +1,131 @@ +from contextlib import nullcontext +import json + +import pytest +import stdatamodels.jwst.datamodels +from stdatamodels.jwst.datamodels import ImageModel + +from jwst.associations.asn_from_list import asn_from_list +from jwst.associations.load_as_asn import load_asn +from jwst.datamodels.library import ModelLibrary +import jwst.datamodels as dm + + + +# for the example association, set 2 different observation numbers +# so the association will have 2 groups (since all other group_id +# determining meta is the same, see `example_asn_path`) +_OBSERVATION_NUMBERS = ['1', '1', '2'] +_N_MODELS = len(_OBSERVATION_NUMBERS) +_N_GROUPS = len(set(_OBSERVATION_NUMBERS)) +_PRODUCT_NAME = "foo_out" + + +@pytest.fixture +def example_asn_path(tmp_path): + """ + Fixture that creates a simple association, saves it (and the models) + to disk, and returns the path of the saved association + """ + fns = [] + for i in range(_N_MODELS): + m = ImageModel() + m.meta.observation.program_number = '0001' + m.meta.observation.observation_number = _OBSERVATION_NUMBERS[i] + m.meta.observation.visit_number = '1' + m.meta.observation.visit_group = '1' + m.meta.observation.sequence_id = '01' + m.meta.observation.activity_id = '1' + m.meta.observation.exposure_number = '1' + m.meta.instrument.name = 'NIRCAM' + m.meta.instrument.channel = 'SHORT' + base_fn = f'{i}.fits' + m.meta.filename = base_fn + m.save(str(tmp_path / base_fn)) + fns.append(base_fn) + asn = asn_from_list(fns, product_name=_PRODUCT_NAME) + base_fn, contents = asn.dump(format="json") + asn_filename = tmp_path / base_fn + with open(asn_filename, 'w') as f: + f.write(contents) + return asn_filename + + +@pytest.fixture +def example_library(example_asn_path): + """ + Fixture that builds off of `example_asn_path` and returns a + library created from the association with default options + """ + return ModelLibrary(example_asn_path) + + +def _set_custom_member_attr(example_asn_path, member_index, attr, value): + """ + Helper function to modify the association at `example_asn_path` + by adding an attribute `attr` to the member list (at index + `member_index`) with value `value`. This is used to modify + the `group_id` or `exptype` of a certain member for some tests. + """ + with open(example_asn_path, 'r') as f: + asn_data = load_asn(f) + asn_data['products'][0]['members'][member_index][attr] = value + with open(example_asn_path, 'w') as f: + json.dump(asn_data, f) + + +def test_load_asn(example_library): + """ + Test that __len__ returns the number of models/members loaded + from the association (and does not require opening the library) + """ + assert len(example_library) == _N_MODELS + + +@pytest.mark.parametrize("attr", ["group_names", "group_indices"]) +def test_group_with_no_datamodels_open(example_asn_path, attr, monkeypatch): + """ + Test that the "grouping" methods do not call datamodels.open + """ + # patch datamodels.open to always raise an exception + # this will serve as a smoke test to see if any of the attribute + # accesses (or instance creation) attempts to open models + def no_open(*args, **kwargs): + raise Exception() + + monkeypatch.setattr(stdatamodels.jwst.datamodels, 'open', no_open) + + # use example_asn_path here to make the instance after we've patched + # datamodels.open + library = ModelLibrary(example_asn_path) + getattr(library, attr) + + +@pytest.mark.parametrize( + "asn_group_id, meta_group_id, expected_group_id", [ + ('42', None, '42'), + (None, '42', '42'), + ('42', '26', '42'), + ]) +def test_group_id_override(example_asn_path, asn_group_id, meta_group_id, expected_group_id): + """ + Test that overriding a models group_id via: + - the association member entry + - the model.meta.group_id + overwrites the automatically calculated group_id (with the asn taking precedence) + """ + if asn_group_id: + _set_custom_member_attr(example_asn_path, 0, 'group_id', asn_group_id) + if meta_group_id: + model_filename = example_asn_path.parent / '0.fits' + with dm.open(model_filename) as model: + model.meta.group_id = meta_group_id + model.save(model_filename) + library = ModelLibrary(example_asn_path) + group_names = library.group_names + assert len(group_names) == 3 + assert expected_group_id in group_names + with library: + model = library.borrow(0) + assert model.meta.group_id == expected_group_id + library.shelve(model, 0, modify=False) From 98fb07f7406b713d47167c9f7284eb7a5f975452 Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 20 Jun 2024 13:43:49 -0400 Subject: [PATCH 02/85] update tweakreg to use ModelLibrary --- jwst/lib/exposure_types.py | 6 +- jwst/pipeline/calwebb_image3.py | 71 +++--- jwst/pipeline/calwebb_spec3.py | 2 +- jwst/stpipe/core.py | 18 +- jwst/stpipe/utilities.py | 6 + jwst/tweakreg/tests/test_multichip_jwst.py | 45 ++-- jwst/tweakreg/tests/test_tweakreg.py | 81 ++++--- jwst/tweakreg/tweakreg_step.py | 248 +++++++++++---------- 8 files changed, 262 insertions(+), 215 deletions(-) diff --git a/jwst/lib/exposure_types.py b/jwst/lib/exposure_types.py index 70bcc04699..493695d57a 100644 --- a/jwst/lib/exposure_types.py +++ b/jwst/lib/exposure_types.py @@ -87,10 +87,8 @@ def is_nrs_autoflat(datamodel): return exp_type.lower() == 'nrs_autoflat' -def is_moving_target(input_models): +def is_moving_target(datamodel): """ Determine if a moving target exposure.""" - model = input_models[0] - if hasattr(model.meta.target, 'type') and \ - model.meta.target.type is not None and model.meta.target.type.lower() == 'moving': + if (hasattr(datamodel.meta.target, 'type') and datamodel.meta.target.type is not None and datamodel.meta.target.type.lower() == 'moving'): return True return False diff --git a/jwst/pipeline/calwebb_image3.py b/jwst/pipeline/calwebb_image3.py index d67a6171d5..927833af27 100644 --- a/jwst/pipeline/calwebb_image3.py +++ b/jwst/pipeline/calwebb_image3.py @@ -1,6 +1,6 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelLibrary from ..stpipe import Pipeline from ..lib.exposure_types import is_moving_target @@ -50,15 +50,11 @@ def process(self, input_data): Parameters ---------- - input_data: Level3 Association, or ~jwst.datamodels.ModelContainer + input_data: Level3 Association, or ~jwst.datamodels.ModelLibrary The exposures to process """ self.log.info('Starting calwebb_image3 ...') - # Only load science members from input ASN; - # background and target-acq members are not needed. - asn_exptypes = ['science'] - # Configure settings for saving results files self.outlier_detection.suffix = 'crf' self.outlier_detection.save_results = self.save_results @@ -68,36 +64,39 @@ def process(self, input_data): self.source_catalog.save_results = self.save_results - with datamodels.open(input_data, asn_exptypes=asn_exptypes) as input_models: - # If input is an association, set the output to the product name. - if self.output_file is None: - try: - self.output_file = input_models.meta.asn_table.products[0].name - except (AttributeError, IndexError): - pass - - # Check if input is single or multiple exposures - try: - has_groups = len(input_models.group_names) >= 1 - except (AttributeError, TypeError, KeyError): - has_groups = False - - if isinstance(input_models, ModelContainer) and has_groups: - if is_moving_target(input_models): - input_models = self.assign_mtwcs(input_models) - else: - input_models = self.tweakreg(input_models) - - input_models = self.skymatch(input_models) - input_models = self.outlier_detection(input_models) - - elif self.skymatch.skymethod == 'match': - self.log.warning("Turning 'skymatch' step off for a single " - "input image when 'skymethod' is 'match'") + # Only load science members from input ASN; + # background and target-acq members are not needed. + input_models = self._datamodels_open(input_data, asn_exptypes=['science']) + if output_file is None and isinstance(input_models, ModelLibrary): + # If input is an association, set the output to the product name. + self.output_file = input_models.asn["products"][0]["name"] + + if isinstance(input_models, ModelLibrary): + with input_models: + model = input_models.borrow(0) + is_moving = is_moving_target(model) + input_models.shelve(model, 0, modify=False) + if is_moving: + raise Exception("Broken...") # FIXME + input_models = self.assign_mtwcs(input_models) else: - input_models = self.skymatch(input_models) + input_models = self.tweakreg(input_models) + + input_models = self.skymatch(input_models) + input_models = self.outlier_detection(input_models) + + # elif self.skymatch.skymethod == 'match': + # self.log.warning("Turning 'skymatch' step off for a single " + # "input image when 'skymethod' is 'match'") + + # else: + # # FIXME: here input_models is a DataModel, passing + # # that to skymatch would cause an error when it tries to call + # # ModelContainer(DataModel). This can be seen by running + # # strun calwebb_image3 any_cal.fits --steps.skymatch.method=local + # input_models = self.skymatch(input_models) - result = self.resample(input_models) - if isinstance(result, datamodels.ImageModel) and result.meta.cal_step.resample == 'COMPLETE': - self.source_catalog(result) + result = self.resample(input_models) + if isinstance(result, datamodels.ImageModel) and result.meta.cal_step.resample == 'COMPLETE': + self.source_catalog(result) diff --git a/jwst/pipeline/calwebb_spec3.py b/jwst/pipeline/calwebb_spec3.py index 23b9902a7c..b3b89b9ee6 100644 --- a/jwst/pipeline/calwebb_spec3.py +++ b/jwst/pipeline/calwebb_spec3.py @@ -138,7 +138,7 @@ def process(self, input): for member in product['members']: members_by_type[member['exptype'].lower()].append(member['expname']) - if is_moving_target(input_models): + if is_moving_target(input_models[0]): self.log.info("Assigning WCS to a Moving Target exposure.") input_models = self.assign_mtwcs(input_models) diff --git a/jwst/stpipe/core.py b/jwst/stpipe/core.py index 2c86df37c8..edf2cd710e 100644 --- a/jwst/stpipe/core.py +++ b/jwst/stpipe/core.py @@ -1,16 +1,19 @@ """ JWST-specific Step and Pipeline base classes. """ +import logging +import os + from stdatamodels.jwst.datamodels import JwstDataModel from stdatamodels.jwst import datamodels - -from .. import __version_commit__, __version__ - from stpipe import crds_client from stpipe import Step from stpipe import Pipeline + +from .. import __version_commit__, __version__ from ..lib.suffix import remove_suffix -import logging +from jwst.datamodels.library import ModelLibrary + log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) @@ -24,7 +27,12 @@ class JwstStep(Step): @classmethod def _datamodels_open(cls, init, **kwargs): - return datamodels.open(init, **kwargs) + if isinstance(init, ModelLibrary): + return init + if isinstance(init, JwstDataModel) or os.path.splitext(init)[1] in (".asdf", ".fits"): + return datamodels.open(init, **kwargs) + return ModelLibrary(init) + def load_as_level2_asn(self, obj): """Load object as an association diff --git a/jwst/stpipe/utilities.py b/jwst/stpipe/utilities.py index 67a4769113..e4643d9512 100644 --- a/jwst/stpipe/utilities.py +++ b/jwst/stpipe/utilities.py @@ -36,6 +36,7 @@ import os import re from collections.abc import Sequence +from jwst.datamodels import ModelLibrary # Configure logging logger = logging.getLogger(__name__) @@ -172,6 +173,11 @@ def record_step_status(datamodel, cal_step, success=True): if isinstance(datamodel, Sequence): for model in datamodel: model.meta.cal_step._instance[cal_step] = status + elif isinstance(datamodel, ModelLibrary): + with datamodel: + for model in datamodel: + model.meta.cal_step._instance[cal_step] = status + datamodel.shelve(model) else: datamodel.meta.cal_step._instance[cal_step] = status diff --git a/jwst/tweakreg/tests/test_multichip_jwst.py b/jwst/tweakreg/tests/test_multichip_jwst.py index 952e61d81e..b29bd8959a 100644 --- a/jwst/tweakreg/tests/test_multichip_jwst.py +++ b/jwst/tweakreg/tests/test_multichip_jwst.py @@ -291,7 +291,7 @@ def test_multichip_jwst_alignment(monkeypatch): assert rmse_dec < _REF_RMSE_DEC -def test_multichip_alignment_step(monkeypatch): +def test_multichip_alignment_step_rel(monkeypatch): monkeypatch.setattr(tweakreg_step.twk, 'align_wcs', _align_wcs) monkeypatch.setattr(tweakreg_step, 'make_tweakreg_catalog', _make_tweakreg_catalog) @@ -402,24 +402,31 @@ def test_multichip_alignment_step(monkeypatch): # Alternatively, disable this '_is_wcs_correction_small' test: # step._is_wcs_correction_small = lambda x, y: True - mr, m1, m2 = step.process(mc) - for im in [mr, m1, m2]: - assert im.meta.cal_step.tweakreg == 'COMPLETE' - - wc1 = m1.meta.wcs - wc2 = m2.meta.wcs - - ra1, dec1 = wc1(imcat1['x'], imcat1['y']) - ra2, dec2 = wc2(imcat2['x'], imcat2['y']) - ra = np.concatenate([ra1, ra2]) - dec = np.concatenate([dec1, dec2]) - rra = refcat['RA'] - rdec = refcat['DEC'] - rmse_ra = np.sqrt(np.mean((ra - rra)**2)) - rmse_dec = np.sqrt(np.mean((dec - rdec)**2)) - - assert rmse_ra < _REF_RMSE_RA - assert rmse_dec < _REF_RMSE_DEC + result = step.process(mc) + with result: + for im in result: + assert im.meta.cal_step.tweakreg == 'COMPLETE' + result.shelve(im) + + with result: + m1 = result.borrow(1) + m2 = result.borrow(2) + wc1 = m1.meta.wcs + wc2 = m2.meta.wcs + + ra1, dec1 = wc1(imcat1['x'], imcat1['y']) + ra2, dec2 = wc2(imcat2['x'], imcat2['y']) + ra = np.concatenate([ra1, ra2]) + dec = np.concatenate([dec1, dec2]) + rra = refcat['RA'] + rdec = refcat['DEC'] + rmse_ra = np.sqrt(np.mean((ra - rra)**2)) + rmse_dec = np.sqrt(np.mean((dec - rdec)**2)) + + assert rmse_ra < _REF_RMSE_RA + assert rmse_dec < _REF_RMSE_DEC + result.shelve(m1, 1, modify=False) + result.shelve(m2, 2, modify=False) def test_multichip_alignment_step_abs(monkeypatch): diff --git a/jwst/tweakreg/tests/test_tweakreg.py b/jwst/tweakreg/tests/test_tweakreg.py index f17b378def..a038410e69 100644 --- a/jwst/tweakreg/tests/test_tweakreg.py +++ b/jwst/tweakreg/tests/test_tweakreg.py @@ -196,12 +196,18 @@ def test_tweakreg_step(example_input, with_shift): result = step(example_input) # check that step completed - for model in result: - assert model.meta.cal_step.tweakreg == 'COMPLETE' - - # and that the wcses differ by a small amount due to the shift above - # by projecting one point through each wcs and comparing the difference - abs_delta = abs(result[1].meta.wcs(0, 0)[0] - result[0].meta.wcs(0, 0)[0]) + with result: + for model in result: + assert model.meta.cal_step.tweakreg == 'COMPLETE' + result.shelve(model, modify=False) + + # and that the wcses differ by a small amount due to the shift above + # by projecting one point through each wcs and comparing the difference + r0 = result.borrow(0) + r1 = result.borrow(1) + abs_delta = abs(r1.meta.wcs(0, 0)[0] - r0.meta.wcs(0, 0)[0]) + result.shelve(r0, 0, modify=False) + result.shelve(r1, 1, modify=False) if with_shift: assert abs_delta > 1E-5 else: @@ -224,8 +230,10 @@ def test_src_confusion_pars(example_input, alignment_type): result = step(example_input) # check that step was skipped - for model in result: - assert model.meta.cal_step.tweakreg == 'SKIPPED' + with result: + for model in result: + assert model.meta.cal_step.tweakreg == 'SKIPPED' + result.shelve(model) @pytest.fixture() @@ -330,6 +338,7 @@ def test_custom_catalog(custom_catalog_path, example_input, catfile, asn, meta, kwargs = {'use_custom_catalogs': custom} if catfile != "no_catfile": kwargs["catfile"] = str(catfile_path) + step = tweakreg_step.TweakRegStep(**kwargs) # patch _construct_wcs_corrector to check the correct catalog was loaded @@ -349,6 +358,7 @@ def patched_construct_wcs_corrector(wcs, wcsinfo, catalog, group_id, _seen=[]): with pytest.raises(ValueError, match="done testing"): step(str(asn_path)) + @pytest.mark.parametrize("with_shift", [True, False]) def test_sip_approx(example_input, with_shift): """ @@ -376,31 +386,36 @@ def test_sip_approx(example_input, with_shift): # run the step on the example input modified above result = step(example_input) - # output wcs differs by a small amount due to the shift above: - # project one point through each wcs and compare the difference - abs_delta = abs(result[1].meta.wcs(0, 0)[0] - result[0].meta.wcs(0, 0)[0]) - if with_shift: - assert abs_delta > 1E-5 - else: - assert abs_delta < 1E-12 - - # the first wcs is identical to the input and - # does not have SIP approximation keywords -- - # they are normally set by assign_wcs - assert np.allclose(result[0].meta.wcs(0, 0)[0], example_input[0].meta.wcs(0, 0)[0]) - for key in ['ap_order', 'bp_order']: - assert key not in result[0].meta.wcsinfo.instance - - # for the second, SIP approximation should be present - for key in ['ap_order', 'bp_order']: - assert result[1].meta.wcsinfo.instance[key] == 3 - - # evaluate fits wcs and gwcs for the approximation, make sure they agree - wcs_info = result[1].meta.wcsinfo.instance - grid = grid_from_bounding_box(result[1].meta.wcs.bounding_box) - gwcs_ra, gwcs_dec = result[1].meta.wcs(*grid) - fits_wcs = WCS(wcs_info) - fitswcs_res = fits_wcs.pixel_to_world(*grid) + with result: + r0 = result.borrow(0) + r1 = result.borrow(1) + # output wcs differs by a small amount due to the shift above: + # project one point through each wcs and compare the difference + abs_delta = abs(r1.meta.wcs(0, 0)[0] - r0.meta.wcs(0, 0)[0]) + if with_shift: + assert abs_delta > 1E-5 + else: + assert abs_delta < 1E-12 + + # the first wcs is identical to the input and + # does not have SIP approximation keywords -- + # they are normally set by assign_wcs + assert np.allclose(r0.meta.wcs(0, 0)[0], example_input[0].meta.wcs(0, 0)[0]) + for key in ['ap_order', 'bp_order']: + assert key not in r0.meta.wcsinfo.instance + + # for the second, SIP approximation should be present + for key in ['ap_order', 'bp_order']: + assert r1.meta.wcsinfo.instance[key] == 3 + + # evaluate fits wcs and gwcs for the approximation, make sure they agree + wcs_info = r1.meta.wcsinfo.instance + grid = grid_from_bounding_box(r1.meta.wcs.bounding_box) + gwcs_ra, gwcs_dec = r1.meta.wcs(*grid) + fits_wcs = WCS(wcs_info) + fitswcs_res = fits_wcs.pixel_to_world(*grid) + result.shelve(r0, 0, modify=False) + result.shelve(r1, 1, modify=False) assert np.allclose(fitswcs_res.ra.deg, gwcs_ra) assert np.allclose(fitswcs_res.dec.deg, gwcs_dec) diff --git a/jwst/tweakreg/tweakreg_step.py b/jwst/tweakreg/tweakreg_step.py index 0ae6a51eb0..ab9a887e7b 100644 --- a/jwst/tweakreg/tweakreg_step.py +++ b/jwst/tweakreg/tweakreg_step.py @@ -12,9 +12,9 @@ import stcal.tweakreg.tweakreg as twk -from jwst.datamodels import ModelContainer from jwst.stpipe import record_step_status from jwst.assign_wcs.util import update_fits_wcsinfo, update_s_region_imaging +from jwst.datamodels import ModelLibrary, ModelContainer # LOCAL from ..stpipe import Step @@ -128,7 +128,12 @@ class TweakRegStep(Step): reference_file_types = [] def process(self, input): - images = ModelContainer(input) + if isinstance(input, ModelLibrary): + images = input + elif isinstance(input, ModelContainer): + images = ModelLibrary(input, on_disk=False) + else: + images = ModelLibrary(input, on_disk=True) if len(images) == 0: raise ValueError("Input must contain at least one image model.") @@ -153,15 +158,15 @@ def process(self, input): ) use_custom_catalogs = False # else, load from association - elif hasattr(images.meta, "asn_table") and getattr(images, "asn_file_path", None) is not None: + elif images._asn_dir is not None: catdict = {} - asn_dir = path.dirname(images.asn_file_path) - for member in images.meta.asn_table.products[0].members: - if hasattr(member, "tweakreg_catalog"): - if member.tweakreg_catalog is None or not member.tweakreg_catalog.strip(): - catdict[member.expname] = None + for member in images.asn["products"][0]["members"]: + if "tweakreg_catalog" in member: + tweakreg_catalog = member["tweakreg_catalog"] + if tweakreg_catalog is None or not tweakreg_catalog.strip(): + catdict[member["expname"]] = None else: - catdict[member.expname] = path.join(asn_dir, member.tweakreg_catalog) + catdict[member["expname"]] = path.join(images._asn_dir, tweakreg_catalog) if self.abs_refcat is not None and self.abs_refcat.strip(): align_to_abs_refcat = True @@ -186,62 +191,65 @@ def process(self, input): # pre-allocate collectors (same length and order as images) correctors = [None] * len(images) - # Build the catalog for each input image - for (model_index, image_model) in enumerate(images): - # now that the model is open, check it's metadata for a custom catalog - # only if it's not listed in the catdict - if use_custom_catalogs and image_model.meta.filename not in catdict: - if (image_model.meta.tweakreg_catalog is not None and image_model.meta.tweakreg_catalog.strip()): - catdict[image_model.meta.filename] = image_model.meta.tweakreg_catalog - if use_custom_catalogs and catdict.get(image_model.meta.filename, None) is not None: - # FIXME this modifies the input_model - image_model.meta.tweakreg_catalog = catdict[image_model.meta.filename] - # use user-supplied catalog: - self.log.info("Using user-provided input catalog " - f"'{image_model.meta.tweakreg_catalog}'") - catalog = Table.read( - image_model.meta.tweakreg_catalog, - ) - save_catalog = False - else: - # source finding - catalog = self._find_sources(image_model) - - # only save if catalog was computed from _find_sources and - # the user requested save_catalogs - save_catalog = self.save_catalogs - - # if needed rename xcentroid to x, ycentroid to y - catalog = _rename_catalog_columns(catalog) - - # filter all sources outside the wcs bounding box - catalog = twk.filter_catalog_by_bounding_box( - catalog, - image_model.meta.wcs.bounding_box) - - # setting 'name' is important for tweakwcs logging - if catalog.meta.get('name') is None: - catalog.meta['name'] = path.splitext(image_model.meta.filename)[0].strip('_- ') - - # log results of source finding (or user catalog) - filename = image_model.meta.filename - nsources = len(catalog) - if nsources == 0: - self.log.warning('No sources found in {}.'.format(filename)) - else: - self.log.info('Detected {} sources in {}.' - .format(len(catalog), filename)) - - # save catalog (if requested) - if save_catalog: - # FIXME this modifies the input_model - image_model.meta.tweakreg_catalog = self._write_catalog(catalog, filename) - - # construct the corrector since the model is open (and already has a group_id) - correctors[model_index] = twk.construct_wcs_corrector(image_model.meta.wcs, - image_model.meta.wcsinfo.instance, - catalog, - image_model.meta.group_id,) + # Build the catalog and corrector for each input images + with images: + for (model_index, image_model) in enumerate(images): + # now that the model is open, check it's metadata for a custom catalog + # only if it's not listed in the catdict + if use_custom_catalogs and image_model.meta.filename not in catdict: + if (image_model.meta.tweakreg_catalog is not None and image_model.meta.tweakreg_catalog.strip()): + catdict[image_model.meta.filename] = image_model.meta.tweakreg_catalog + if use_custom_catalogs and catdict.get(image_model.meta.filename, None) is not None: + # FIXME this modifies the input_model + image_model.meta.tweakreg_catalog = catdict[image_model.meta.filename] + # use user-supplied catalog: + self.log.info("Using user-provided input catalog " + f"'{image_model.meta.tweakreg_catalog}'") + catalog = Table.read( + image_model.meta.tweakreg_catalog, + ) + save_catalog = False + else: + # source finding + catalog = self._find_sources(image_model) + + # only save if catalog was computed from _find_sources and + # the user requested save_catalogs + save_catalog = self.save_catalogs + + # if needed rename xcentroid to x, ycentroid to y + catalog = _rename_catalog_columns(catalog) + + # filter all sources outside the wcs bounding box + catalog = twk.filter_catalog_by_bounding_box( + catalog, + image_model.meta.wcs.bounding_box) + + # setting 'name' is important for tweakwcs logging + if catalog.meta.get('name') is None: + catalog.meta['name'] = path.splitext(image_model.meta.filename)[0].strip('_- ') + + # log results of source finding (or user catalog) + filename = image_model.meta.filename + nsources = len(catalog) + if nsources == 0: + self.log.warning('No sources found in {}.'.format(filename)) + else: + self.log.info('Detected {} sources in {}.' + .format(len(catalog), filename)) + + # save catalog (if requested) + if save_catalog: + # FIXME this modifies the input_model + image_model.meta.tweakreg_catalog = self._write_catalog(catalog, filename) + + # construct the corrector since the model is open (and already has a group_id) + correctors[model_index] = \ + twk.construct_wcs_corrector(image_model.meta.wcs, + image_model.meta.wcsinfo.instance, + catalog, + image_model.meta.group_id,) + images.shelve(image_model, model_index) self.log.info('') self.log.info("Number of image groups to be aligned: {:d}." @@ -278,7 +286,9 @@ def process(self, input): # can (and does) occur after alignment between groups if align_to_abs_refcat: try: - ref_image = images[0] + with images: + ref_image = images.borrow(0) + images.shelve(ref_image, 0, modify=False) correctors = \ twk.absolute_align(correctors, self.abs_refcat, ref_wcs=ref_image.meta.wcs, @@ -298,13 +308,17 @@ def process(self, input): except twk.TweakregError as e: self.log.warning(str(e)) - for model in images: - model.meta.cal_step.tweakreg = "SKIPPED" - return images + with images: + for model in images: + record_step_status(model, "tweakreg", success=False) + images.shelve(model) + return images - if local_align_failed and not align_to_abs_refcat: - for model in images: - record_step_status(model, "tweakreg", success=False) + if local_align_failed and not align_to_abs_refcat: + with images: + for model in images: + record_step_status(model, "tweakreg", success=False) + images.shelve(model) return images # one final pass through all the models to update them based @@ -315,53 +329,53 @@ def process(self, input): def _apply_tweakreg_solution(self, - images: ModelContainer, + images: ModelLibrary, correctors: list[JWSTWCSCorrector], align_to_abs_refcat: bool = False, - ) -> ModelContainer: - - for (image_model, corrector) in zip(images, correctors): - - # retrieve fit status and update wcs if fit is successful: - if ("fit_info" in corrector.meta and - "SUCCESS" in corrector.meta["fit_info"]["status"]): - - # Update/create the WCS .name attribute with information - # on this astrometric fit as the only record that it was - # successful: - if align_to_abs_refcat: - # NOTE: This .name attrib agreed upon by the JWST Cal - # Working Group. - # Current value is merely a place-holder based - # on HST conventions. This value should also be - # translated to the FITS WCSNAME keyword - # IF that is what gets recorded in the archive - # for end-user searches. - corrector.wcs.name = f"FIT-LVL3-{self.abs_refcat}" - - image_model.meta.wcs = corrector.wcs - update_s_region_imaging(image_model) - - # Also update FITS representation in input exposures for - # subsequent reprocessing by the end-user. - if self.sip_approx: - try: - update_fits_wcsinfo( - image_model, - max_pix_error=self.sip_max_pix_error, - degree=self.sip_degree, - max_inv_pix_error=self.sip_max_inv_pix_error, - inv_degree=self.sip_inv_degree, - npoints=self.sip_npoints, - crpix=None - ) - except (ValueError, RuntimeError) as e: - msg = f"Failed to update 'meta.wcsinfo' with FITS SIP \ - approximation. Reported error is: \n {e.args[0]}" - self.log.warning(msg) - record_step_status(image_model, "tweakreg", success=True) - - return image_model + ) -> ModelLibrary: + with images: + for (image_model, corrector) in zip(images, correctors): + + # retrieve fit status and update wcs if fit is successful: + if ("fit_info" in corrector.meta and + "SUCCESS" in corrector.meta["fit_info"]["status"]): + + # Update/create the WCS .name attribute with information + # on this astrometric fit as the only record that it was + # successful: + if align_to_abs_refcat: + # NOTE: This .name attrib agreed upon by the JWST Cal + # Working Group. + # Current value is merely a place-holder based + # on HST conventions. This value should also be + # translated to the FITS WCSNAME keyword + # IF that is what gets recorded in the archive + # for end-user searches. + corrector.wcs.name = f"FIT-LVL3-{self.abs_refcat}" + + image_model.meta.wcs = corrector.wcs + update_s_region_imaging(image_model) + + # Also update FITS representation in input exposures for + # subsequent reprocessing by the end-user. + if self.sip_approx: + try: + update_fits_wcsinfo( + image_model, + max_pix_error=self.sip_max_pix_error, + degree=self.sip_degree, + max_inv_pix_error=self.sip_max_inv_pix_error, + inv_degree=self.sip_inv_degree, + npoints=self.sip_npoints, + crpix=None + ) + except (ValueError, RuntimeError) as e: + msg = f"Failed to update 'meta.wcsinfo' with FITS SIP \ + approximation. Reported error is: \n {e.args[0]}" + self.log.warning(msg) + images.shelve(image_model) + record_step_status(images, "tweakreg", success=True) + return images def _write_catalog(self, catalog, filename): From 395298d02bc034d6c7330aeacc49a0189f4f3218 Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 20 Jun 2024 14:45:23 -0400 Subject: [PATCH 03/85] remove minimize_memory option for skymatch --- jwst/skymatch/skymatch_step.py | 42 ++-------------------------- jwst/skymatch/tests/test_skymatch.py | 6 ---- 2 files changed, 3 insertions(+), 45 deletions(-) diff --git a/jwst/skymatch/skymatch_step.py b/jwst/skymatch/skymatch_step.py index 9723e0ef4d..701dddbfa8 100644 --- a/jwst/skymatch/skymatch_step.py +++ b/jwst/skymatch/skymatch_step.py @@ -20,7 +20,6 @@ from stdatamodels.jwst.datamodels.dqflags import pixel from stdatamodels.jwst.datamodels.util import ( open as datamodel_open, - is_association ) from jwst.datamodels import ModelContainer @@ -67,27 +66,13 @@ class SkyMatchStep(Step): reference_file_types = [] def __init__(self, *args, **kwargs): - minimize_memory = kwargs.pop('minimize_memory', False) super().__init__(*args, **kwargs) - self.minimize_memory = minimize_memory def process(self, input): self.log.setLevel(logging.DEBUG) - # for now turn off memory optimization until we have better machinery - # to handle outputs in a consistent way. - - if hasattr(self, 'minimize_memory') and self.minimize_memory: - self._is_asn = ( - is_association(input) or isinstance(input, str) - ) - - else: - self._is_asn = False img = ModelContainer( input, - save_open=not self._is_asn, - return_open=not self._is_asn ) self._dqbits = interpret_bit_flags(self.dqbits, flag_name_map=pixel) @@ -142,12 +127,10 @@ def process(self, input): "COMPLETE" if gim.is_sky_valid else "SKIPPED" ) - return input if self._is_asn else img + return img def _imodel2skyim(self, image_model): input_image_model = image_model - if self._is_asn: - image_model = datamodel_open(image_model) if self._dqbits is None: dqmask = np.isfinite(image_model.data).astype(dtype=np.uint8) @@ -163,9 +146,6 @@ def _imodel2skyim(self, image_model): # if 'subtract' mode has changed compared to the previous pass: if image_model.meta.background.subtracted is None: if image_model.meta.background.level is not None: - if self._is_asn: - image_model.close() - # report inconsistency: raise ValueError("Background level was set but the " "'subtracted' property is undefined (None).") @@ -179,9 +159,6 @@ def _imodel2skyim(self, image_model): # at this moment I think it is saver to quit and... # # report inconsistency: - if self._is_asn: - image_model.close() - raise ValueError("Background level was subtracted but the " "'level' property is undefined (None).") @@ -189,9 +166,6 @@ def _imodel2skyim(self, image_model): # cannot run 'skymatch' step on already "skymatched" images # when 'subtract' spec is inconsistent with # meta.background.subtracted: - if self._is_asn: - image_model.close() - raise ValueError("'subtract' step's specification is " "inconsistent with background info already " "present in image '{:s}' meta." @@ -209,13 +183,10 @@ def _imodel2skyim(self, image_model): id=image_model.meta.filename, # file name? skystat=self._skystat, stepsize=self.stepsize, - reduce_memory_usage=self._is_asn, + reduce_memory_usage=False, # FIXME: this overwrote input files meta={'image_model': input_image_model} ) - if self._is_asn: - image_model.close() - if self.subtract: sky_im.sky = level @@ -225,10 +196,7 @@ def _set_sky_background(self, sky_image, step_status): image = sky_image.meta['image_model'] sky = sky_image.sky - if self._is_asn: - dm = datamodel_open(image) - else: - dm = image + dm = image if step_status == "COMPLETE": dm.meta.background.method = str(self.skymethod) @@ -238,7 +206,3 @@ def _set_sky_background(self, sky_image, step_status): dm.data[...] = sky_image.image[...] dm.meta.cal_step.skymatch = step_status - - if self._is_asn: - dm.save(image) - dm.close() diff --git a/jwst/skymatch/tests/test_skymatch.py b/jwst/skymatch/tests/test_skymatch.py index 774b04e2b7..9888d4cde6 100644 --- a/jwst/skymatch/tests/test_skymatch.py +++ b/jwst/skymatch/tests/test_skymatch.py @@ -415,7 +415,6 @@ def test_asn_input(tmp_cwd, nircam_rate, tmp_path): # images are rotated and SATURATED pixels in the corners are not in the # common intersection of all input images. This is the purpose of this test step = SkyMatchStep( - minimize_memory=True, skymethod='match', match_down=True, subtract=True, @@ -426,13 +425,9 @@ def test_asn_input(tmp_cwd, nircam_rate, tmp_path): result = step.run(asn_out_fname) - assert isinstance(result, str) - ref_levels = np.subtract(levels, min(levels)) sub_levels = np.subtract(levels, ref_levels) - result = ModelContainer(result) - for im, lev, rlev, slev in zip(result, levels, ref_levels, sub_levels): # check that meta was set correctly: assert im.meta.background.method == 'match' @@ -498,7 +493,6 @@ def test_skymatch_2x(tmp_cwd, nircam_rate, tmp_path, skymethod, subtract): # images are rotated and SATURATED pixels in the corners are not in the # common intersection of all input images. This is the purpose of this test step = SkyMatchStep( - minimize_memory=True, skymethod=skymethod, match_down=True, subtract=subtract, From 4b2ce1371dc671c80a081ca253816a1f69a20d30 Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 20 Jun 2024 16:01:41 -0400 Subject: [PATCH 04/85] update skymatch to use ModelLibrary --- jwst/skymatch/skymatch_step.py | 78 ++++++++-------- jwst/skymatch/tests/test_skymatch.py | 128 ++++++++++++++------------- 2 files changed, 105 insertions(+), 101 deletions(-) diff --git a/jwst/skymatch/skymatch_step.py b/jwst/skymatch/skymatch_step.py index 701dddbfa8..3083f09c78 100644 --- a/jwst/skymatch/skymatch_step.py +++ b/jwst/skymatch/skymatch_step.py @@ -22,7 +22,7 @@ open as datamodel_open, ) -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelLibrary from ..stpipe import Step @@ -71,9 +71,10 @@ def __init__(self, *args, **kwargs): def process(self, input): self.log.setLevel(logging.DEBUG) - img = ModelContainer( - input, - ) + if isinstance(input, ModelLibrary): + library = input + else: + library = ModelLibrary(input) self._dqbits = interpret_bit_flags(self.dqbits, flag_name_map=pixel) @@ -88,49 +89,44 @@ def process(self, input): binwidth=self.binwidth ) - # group images by their "group id": - grp_img = img.models_grouped - - # create a list of "Sky" Images and/or Groups: images = [] - grp_id = 1 - - for g in grp_img: - if len(g) > 1: - images.append( - SkyGroup( - list(map(self._imodel2skyim, g)), - id=grp_id - ) - ) - grp_id += 1 - elif len(g) == 1: - images.append(self._imodel2skyim(g[0])) - else: - raise AssertionError("Logical error in the pipeline code.") + with library: + for group_index, (group_id, group_inds) in enumerate(library.group_indices.items()): + sky_images = [] + for index in group_inds: + model = library.borrow(index) + sky_images.append(self._imodel2skyim(model, index)) + library.shelve(model, index, modify=False) + if len(sky_images) == 1: + images.extend(sky_images) + else: + # FIXME: why does this use a number for group_index? + images.append(SkyGroup(sky_images, id=group_index)) # match/compute sky values: match(images, skymethod=self.skymethod, match_down=self.match_down, subtract=self.subtract) # set sky background value in each image's meta: - for im in images: - if isinstance(im, SkyImage): - self._set_sky_background( - im, - "COMPLETE" if im.is_sky_valid else "SKIPPED" - ) - else: - for gim in im: + with library: + for im in images: + if isinstance(im, SkyImage): self._set_sky_background( - gim, - "COMPLETE" if gim.is_sky_valid else "SKIPPED" + im, + library, + "COMPLETE" if im.is_sky_valid else "SKIPPED" ) + else: + for gim in im: + self._set_sky_background( + gim, + library, + "COMPLETE" if gim.is_sky_valid else "SKIPPED" + ) - return img + return library - def _imodel2skyim(self, image_model): - input_image_model = image_model + def _imodel2skyim(self, image_model, index): if self._dqbits is None: dqmask = np.isfinite(image_model.data).astype(dtype=np.uint8) @@ -184,7 +180,7 @@ def _imodel2skyim(self, image_model): skystat=self._skystat, stepsize=self.stepsize, reduce_memory_usage=False, # FIXME: this overwrote input files - meta={'image_model': input_image_model} + meta={'index': index} ) if self.subtract: @@ -192,12 +188,11 @@ def _imodel2skyim(self, image_model): return sky_im - def _set_sky_background(self, sky_image, step_status): - image = sky_image.meta['image_model'] + def _set_sky_background(self, sky_image, library, step_status): + index = sky_image.meta['index'] + dm = library.borrow(index) sky = sky_image.sky - dm = image - if step_status == "COMPLETE": dm.meta.background.method = str(self.skymethod) dm.meta.background.level = sky @@ -206,3 +201,4 @@ def _set_sky_background(self, sky_image, step_status): dm.data[...] = sky_image.image[...] dm.meta.cal_step.skymatch = step_status + library.shelve(dm, index) diff --git a/jwst/skymatch/tests/test_skymatch.py b/jwst/skymatch/tests/test_skymatch.py index 9888d4cde6..d05541aaf3 100644 --- a/jwst/skymatch/tests/test_skymatch.py +++ b/jwst/skymatch/tests/test_skymatch.py @@ -220,8 +220,10 @@ def test_skymatch(nircam_rate, skymethod, subtract, skystat, match_down, assert im.meta.background.subtracted is None # test that output models have original sky levels on failure: - for im, lev in zip(result, levels): - assert abs(np.mean(im.data[dq_mask]) - lev) < 0.01 + with result: + for im, lev in zip(result, levels): + assert abs(np.mean(im.data[dq_mask]) - lev) < 0.01 + result.shelve(im, modify=False) return @@ -243,19 +245,21 @@ def test_skymatch(nircam_rate, skymethod, subtract, skystat, match_down, sub_levels = np.subtract(levels, ref_levels) - for im, lev, rlev, slev in zip(result, levels, ref_levels, sub_levels): - # check that meta was set correctly: - assert im.meta.background.method == skymethod - assert im.meta.background.subtracted == subtract + with result: + for im, lev, rlev, slev in zip(result, levels, ref_levels, sub_levels): + # check that meta was set correctly: + assert im.meta.background.method == skymethod + assert im.meta.background.subtracted == subtract - # test computed/measured sky values: - assert abs(im.meta.background.level - rlev) < 0.01 + # test computed/measured sky values: + assert abs(im.meta.background.level - rlev) < 0.01 - # test - if subtract: - assert abs(np.mean(im.data[dq_mask]) - slev) < 0.01 - else: - assert abs(np.mean(im.data[dq_mask]) - lev) < 0.01 + # test + if subtract: + assert abs(np.mean(im.data[dq_mask]) - slev) < 0.01 + else: + assert abs(np.mean(im.data[dq_mask]) - lev) < 0.01 + result.shelve(im, modify=False) @pytest.mark.parametrize( @@ -334,33 +338,35 @@ def test_skymatch_overlap(nircam_rate, skymethod, subtract, skystat): sub_levels = np.subtract(levels, ref_levels) - for im, lev, rlev, slev in zip(result, levels, ref_levels, sub_levels): - # check that meta was set correctly: - assert im.meta.background.method == skymethod - assert im.meta.background.subtracted == subtract - - if skymethod in ['local', 'global']: - # These two sky methods must fail because they do not take - # into account (do not compute) overlap regions and use - # entire images: - - # test computed/measured sky values: - assert abs(im.meta.background.level - rlev) > 1000 # FAIL - - # test - if subtract: - assert abs(np.mean(im.data[dq_mask]) - slev) > 1000 # FAIL + with result: + for im, lev, rlev, slev in zip(result, levels, ref_levels, sub_levels): + # check that meta was set correctly: + assert im.meta.background.method == skymethod + assert im.meta.background.subtracted == subtract + + if skymethod in ['local', 'global']: + # These two sky methods must fail because they do not take + # into account (do not compute) overlap regions and use + # entire images: + + # test computed/measured sky values: + assert abs(im.meta.background.level - rlev) > 1000 # FAIL + + # test + if subtract: + assert abs(np.mean(im.data[dq_mask]) - slev) > 1000 # FAIL + else: + assert abs(np.mean(im.data[dq_mask]) - lev) < 0.01 # PASS else: - assert abs(np.mean(im.data[dq_mask]) - lev) < 0.01 # PASS - else: - # test computed/measured sky values: - assert abs(im.meta.background.level - rlev) < 0.01 + # test computed/measured sky values: + assert abs(im.meta.background.level - rlev) < 0.01 - # test - if subtract: - assert abs(np.mean(im.data[dq_mask]) - slev) < 0.01 - else: - assert abs(np.mean(im.data[dq_mask]) - lev) < 0.01 + # test + if subtract: + assert abs(np.mean(im.data[dq_mask]) - slev) < 0.01 + else: + assert abs(np.mean(im.data[dq_mask]) - lev) < 0.01 + result.shelve(im, modify=False) def test_asn_input(tmp_cwd, nircam_rate, tmp_path): @@ -428,16 +434,18 @@ def test_asn_input(tmp_cwd, nircam_rate, tmp_path): ref_levels = np.subtract(levels, min(levels)) sub_levels = np.subtract(levels, ref_levels) - for im, lev, rlev, slev in zip(result, levels, ref_levels, sub_levels): - # check that meta was set correctly: - assert im.meta.background.method == 'match' - assert im.meta.background.subtracted is True + with result: + for im, lev, rlev, slev in zip(result, levels, ref_levels, sub_levels): + # check that meta was set correctly: + assert im.meta.background.method == 'match' + assert im.meta.background.subtracted is True - # test computed/measured sky values: - assert abs(im.meta.background.level - rlev) < 0.01 + # test computed/measured sky values: + assert abs(im.meta.background.level - rlev) < 0.01 - # test - assert abs(np.mean(im.data[dq_mask]) - slev) < 0.01 + # test + assert abs(np.mean(im.data[dq_mask]) - slev) < 0.01 + result.shelve(im, modify=False) @pytest.mark.parametrize( @@ -509,7 +517,7 @@ def test_skymatch_2x(tmp_cwd, nircam_rate, tmp_path, skymethod, subtract): # 2nd run. step.subtract = subtract - result2 = step.run(result) + result2 = step.run(asn_out_fname) # compute expected levels if skymethod in ['local', 'global+match']: @@ -523,19 +531,19 @@ def test_skymatch_2x(tmp_cwd, nircam_rate, tmp_path, skymethod, subtract): sub_levels = np.subtract(levels, ref_levels) - result2 = ModelContainer(result2) - # compare results - for im2, lev, rlev, slev in zip(result2, levels, ref_levels, sub_levels): - # check that meta was set correctly: - assert im2.meta.background.method == skymethod - assert im2.meta.background.subtracted == subtract + with result2: + for im2, lev, rlev, slev in zip(result2, levels, ref_levels, sub_levels): + # check that meta was set correctly: + assert im2.meta.background.method == skymethod + assert im2.meta.background.subtracted == subtract - # test computed/measured sky values: - assert abs(im2.meta.background.level - rlev) < 0.01 + # test computed/measured sky values: + assert abs(im2.meta.background.level - rlev) < 0.01 - # test - if subtract: - assert abs(np.mean(im2.data[dq_mask]) - slev) < 0.01 - else: - assert abs(np.mean(im2.data[dq_mask]) - lev) < 0.01 + # test + if subtract: + assert abs(np.mean(im2.data[dq_mask]) - slev) < 0.01 + else: + assert abs(np.mean(im2.data[dq_mask]) - lev) < 0.01 + result2.shelve(im2) From 018e3ce02b35cefe0d22fc6409dbca61f3fe25dc Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 18 Jul 2024 14:10:45 -0400 Subject: [PATCH 05/85] temporary ModelContainer to and from ModelLibrary converter, assign_mtwcs to library --- jwst/assign_mtwcs/assign_mtwcs_step.py | 19 +++--- jwst/assign_mtwcs/moving_target_wcs.py | 91 +++++++++++++------------- jwst/datamodels/library.py | 36 ++++++++++ 3 files changed, 93 insertions(+), 53 deletions(-) diff --git a/jwst/assign_mtwcs/assign_mtwcs_step.py b/jwst/assign_mtwcs/assign_mtwcs_step.py index bd312ccddd..75243b5ae4 100755 --- a/jwst/assign_mtwcs/assign_mtwcs_step.py +++ b/jwst/assign_mtwcs/assign_mtwcs_step.py @@ -1,9 +1,8 @@ #! /usr/bin/env python import logging -from stdatamodels.jwst import datamodels - -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelLibrary, ModelContainer +from jwst.datamodels.library import container_to_library, library_to_container from ..stpipe import Step from .moving_target_wcs import assign_moving_target_wcs @@ -32,17 +31,19 @@ class AssignMTWcsStep(Step): """ def process(self, input): - if isinstance(input, str): - input = datamodels.open(input) + if isinstance(input, (str, dict)): + input = ModelLibrary(input) + elif isinstance(input, ModelContainer): + input = container_to_library(input) - # Can't apply the step if we aren't given a ModelContainer as input - if not isinstance(input, ModelContainer): + # Can't apply the step if we aren't given a ModelLibrary as input + if not isinstance(input, ModelLibrary): log.warning("Input data type is not supported.") # raise ValueError("Expected input to be an association file name or a ModelContainer.") input.meta.cal_step.assign_mtwcs = 'SKIPPED' - return input + return library_to_container(input) # Apply the step result = assign_moving_target_wcs(input) - return result + return library_to_container(result) diff --git a/jwst/assign_mtwcs/moving_target_wcs.py b/jwst/assign_mtwcs/moving_target_wcs.py index 7f61b84e9c..b61fc54cf4 100644 --- a/jwst/assign_mtwcs/moving_target_wcs.py +++ b/jwst/assign_mtwcs/moving_target_wcs.py @@ -16,7 +16,7 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelLibrary log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) @@ -24,49 +24,52 @@ __all__ = ["assign_moving_target_wcs"] -def assign_moving_target_wcs(input_model): - - if not isinstance(input_model, ModelContainer): - raise ValueError("Expected a ModelContainer object") - - # get the indices of the science exposures in the ModelContainer - ind = input_model.ind_asn_type('science') - sci_models = np.asarray(input_model._models)[ind] - # Get the MT RA/Dec values from all the input exposures - mt_ra = np.array([model.meta.wcsinfo.mt_ra for model in sci_models]) - mt_dec = np.array([model.meta.wcsinfo.mt_dec for model in sci_models]) - - # Compute the mean MT RA/Dec over all exposures - if None in mt_ra or None in mt_dec: - log.warning("One or more MT RA/Dec values missing in input images") - log.warning("Step will be skipped, resulting in target misalignment") - for model in sci_models: - model.meta.cal_step.assign_mtwcs = 'SKIPPED' - return input_model - else: - mt_avra = mt_ra.mean() - mt_avdec = mt_dec.mean() - - for model in sci_models: - model.meta.wcsinfo.mt_avra = mt_avra - model.meta.wcsinfo.mt_avdec = mt_avdec - if isinstance(model, datamodels.MultiSlitModel): - for ind, slit in enumerate(model.slits): - new_wcs = add_mt_frame(slit.meta.wcs, - mt_avra, mt_avdec, - slit.meta.wcsinfo.mt_ra, slit.meta.wcsinfo.mt_dec) - del model.slits[ind].meta.wcs - model.slits[ind].meta.wcs = new_wcs - else: - - new_wcs = add_mt_frame(model.meta.wcs, mt_avra, mt_avdec, - model.meta.wcsinfo.mt_ra, model.meta.wcsinfo.mt_dec) - del model.meta.wcs - model.meta.wcs = new_wcs - - model.meta.cal_step.assign_mtwcs = 'COMPLETE' - - return input_model +def assign_moving_target_wcs(input_models: ModelLibrary) -> ModelLibrary: + + with input_models: + # get the indices of the science exposures in the ModelLibrary + indices = input_models.ind_asn_type('science') + + mt_ra = [] + mt_dec = [] + for i in indices: + sci_model = input_models.borrow(i) + mt_ra.append(sci_model.meta.wcsinfo.mt_ra) + mt_dec.append(sci_model.meta.wcsinfo.mt_dec) + input_models.shelve(sci_model, i, modify=False) + + if None in mt_ra or None in mt_dec: + log.warning("One or more MT RA/Dec values missing in input images") + log.warning("Step will be skipped, resulting in target misalignment") + for i in indices: + sci_model = input_models.borrow(i) + sci_model.meta.cal_step.assign_mtwcs = 'SKIPPED' + input_models.shelve(sci_model, i, modify=True) + return input_models + + mt_avra = np.mean(mt_ra) + mt_avdec = np.mean(mt_dec) + + for i in indices: + sci_model = input_models.borrow(i) + sci_model.meta.wcsinfo.mt_avra = mt_avra + sci_model.meta.wcsinfo.mt_avdec = mt_avdec + if isinstance(sci_model, datamodels.MultiSlitModel): + for ind, slit in enumerate(sci_model.slits): + new_wcs = add_mt_frame(slit.meta.wcs, + mt_avra, mt_avdec, + slit.meta.wcsinfo.mt_ra, slit.meta.wcsinfo.mt_dec) + del sci_model.slits[ind].meta.wcs + sci_model.slits[ind].meta.wcs = new_wcs + else: + new_wcs = add_mt_frame(sci_model.meta.wcs, mt_avra, mt_avdec, + sci_model.meta.wcsinfo.mt_ra, sci_model.meta.wcsinfo.mt_dec) + del sci_model.meta.wcs + sci_model.meta.wcs = new_wcs + sci_model.meta.cal_step.assign_mtwcs = 'COMPLETE' + input_models.shelve(sci_model, i, modify=True) + + return input_models def add_mt_frame(wcs, ra_average, dec_average, mt_ra, mt_dec): diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 49206b5643..2ec7180229 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -1,4 +1,5 @@ import io +from pathlib import Path import asdf from astropy.io import fits @@ -6,6 +7,7 @@ from stpipe.library import AbstractModelLibrary, NoGroupID from jwst.associations import AssociationNotValidError, load_asn +from jwst.datamodels import ModelContainer __all__ = ["ModelLibrary"] @@ -96,6 +98,13 @@ def _assign_member_to_model(self, model, member): model.meta.asn.table_name = self.asn.get("table_name", "") model.meta.asn.pool_name = self.asn.get("asn_pool", "") + def ind_asn_type(self, exptype): + return [ + i + for i, member in enumerate(self.asn['products'][0]['members']) + if member['exptype'] == exptype + ] + def _attrs_to_group_id( program_number, @@ -114,3 +123,30 @@ def _attrs_to_group_id( f"_{visit_group}{sequence_id}{activity_id}" f"_{exposure_number}" ) + + +def container_to_library(container): + """ + Temporary converter function so that steps can start using ModelLibrary + without changing stdatamodels.jwst.open() to return ModelLibrary by default.""" + lib = ModelLibrary(container.asn_file_path) + with lib: + for i, model in enumerate(container): + lib.borrow(i) + lib.shelve(model, i) + lib.asn_table_name = getattr(container, "asn_table_name", "") + return lib + + +def library_to_container(library): + """ + Temporary converter function so that steps can start using ModelLibrary + without changing stdatamodels.jwst.open() to return ModelLibrary by default.""" + container = ModelContainer(str(Path(library._asn_dir) / Path(library._asn["table_name"]))) + with library: + for i, _ in enumerate(container): + model = library.borrow(i) + container[i] = model + library.shelve(model, i, modify=False) + container.asn_table_name = getattr(library, "asn_table_name", "") + return container \ No newline at end of file From 74b6607c429e340c0464343212738a896a609cba Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 22 Jul 2024 18:20:36 -0400 Subject: [PATCH 06/85] replaced container with library in resample --- jwst/datamodels/library.py | 2 +- jwst/exp_to_source/__init__.py | 3 +- jwst/exp_to_source/exp_to_source.py | 21 +- jwst/resample/resample.py | 361 ++++++++++++---------- jwst/resample/resample_spec.py | 82 +++-- jwst/resample/resample_spec_step.py | 103 +++--- jwst/resample/resample_step.py | 69 +++-- jwst/resample/tests/test_resample_step.py | 18 +- 8 files changed, 382 insertions(+), 277 deletions(-) diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 2ec7180229..3f5ebef6bf 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -149,4 +149,4 @@ def library_to_container(library): container[i] = model library.shelve(model, i, modify=False) container.asn_table_name = getattr(library, "asn_table_name", "") - return container \ No newline at end of file + return container diff --git a/jwst/exp_to_source/__init__.py b/jwst/exp_to_source/__init__.py index bcace776ec..d050f68b8f 100644 --- a/jwst/exp_to_source/__init__.py +++ b/jwst/exp_to_source/__init__.py @@ -1,6 +1,7 @@ from .exp_to_source import ( exp_to_source, multislit_to_container, + multislit_to_library, ) -__all__ = ['exp_to_source', 'multislit_to_container'] +__all__ = ['exp_to_source', 'multislit_to_container', 'multislit_to_library'] diff --git a/jwst/exp_to_source/exp_to_source.py b/jwst/exp_to_source/exp_to_source.py index c0f385a339..9a1279dc23 100644 --- a/jwst/exp_to_source/exp_to_source.py +++ b/jwst/exp_to_source/exp_to_source.py @@ -8,9 +8,9 @@ from stdatamodels.properties import merge_tree from stdatamodels.jwst.datamodels import MultiExposureModel -from jwst.datamodels import SourceModelContainer +from jwst.datamodels import SourceModelContainer, ModelLibrary -__all__ = ['exp_to_source', 'multislit_to_container'] +__all__ = ['exp_to_source', 'multislit_to_container', 'multislit_to_library'] log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) @@ -113,6 +113,23 @@ def multislit_to_container(inputs): return containers +# this is a hacky solution - fix it later +def multislit_to_library(inputs): + if isinstance(inputs, ModelLibrary): + # convert to list of MultiSlitModels expected by exp_to_source + multislit_list = [] + with inputs: + for i in range(len(inputs)): + multislit_list.append(inputs.borrow(i)) + inputs.shelve(multislit_list[-1], i, modify=False) + inputs = multislit_list + containers = exp_to_source(inputs) + for id in containers: + containers[id] = ModelLibrary(SourceModelContainer(containers[id])) + + return containers + + class DefaultOrderedDict(OrderedDict): # Source http://stackoverflow.com/a/6190500/562769 def __init__(self, default_factory=None, *a, **kw): diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index 95eaaef3e9..0d8191f3d7 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -10,7 +10,7 @@ from stdatamodels.jwst import datamodels from stdatamodels.jwst.library.basic_utils import bytes2human -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelLibrary from . import gwcs_drizzle from jwst.resample import resample_utils @@ -49,8 +49,8 @@ def __init__(self, input_models, output=None, single=False, blendheaders=True, """ Parameters ---------- - input_models : list of objects - list of data models, one for each input image + input_models : library of objects + library of data models, one for each input image output : str filename for output @@ -122,17 +122,21 @@ def __init__(self, input_models, output=None, single=False, blendheaders=True, output_pix_area = output_wcs.pixel_area else: - # Define output WCS based on all inputs, including a reference WCS: - self.output_wcs = resample_utils.make_output_wcs( - self.input_models, - ref_wcs=output_wcs, - pscale_ratio=self.pscale_ratio, - pscale=pscale, - rotation=rotation, - shape=None if output_shape is None else output_shape[::-1], - crpix=crpix, - crval=crval - ) + with self.input_models: + models = list(self.input_models) + # Define output WCS based on all inputs, including a reference WCS: + self.output_wcs = resample_utils.make_output_wcs( + models, + ref_wcs=output_wcs, + pscale_ratio=self.pscale_ratio, + pscale=pscale, + rotation=rotation, + shape=None if output_shape is None else output_shape[::-1], + crpix=crpix, + crval=crval + ) + for i, m in enumerate(models): + self.input_models.shelve(m, i, modify=False) # Estimate output pixel area in Sr. NOTE: in principle we could # use the same algorithm as for when output_wcs is provided by the @@ -180,14 +184,16 @@ def __init__(self, input_models, output=None, single=False, blendheaders=True, self.blank_output = datamodels.ImageModel(tuple(self.output_wcs.array_shape)) # update meta data and wcs - self.blank_output.update(input_models[0]) + with self.input_models: + example_model = self.input_models.borrow(0) + self.input_models.shelve(example_model, 0, modify=False) + self.blank_output.update(example_model) self.blank_output.meta.wcs = self.output_wcs self.blank_output.meta.photometry.pixelarea_steradians = output_pix_area self.blank_output.meta.photometry.pixelarea_arcsecsq = ( output_pix_area * np.rad2deg(3600)**2 ) - self.output_models = ModelContainer(open_models=False) def do_drizzle(self): """Pick the correct drizzling mode based on self.single @@ -266,6 +272,7 @@ def _get_intensity_scale(self, img): iscale = 1.0 return iscale + def resample_many_to_many(self): """Resample many inputs to many outputs where outputs have a common frame. @@ -275,74 +282,82 @@ def resample_many_to_many(self): Used for outlier detection """ - for exposure in self.input_models.models_grouped: + output_models = [] + for group_id, indices in self.input_models.group_indices.items(): output_model = self.blank_output - # Determine output file type from input exposure filenames - # Use this for defining the output filename - indx = exposure[0].meta.filename.rfind('.') - output_type = exposure[0].meta.filename[indx:] - output_root = '_'.join(exposure[0].meta.filename.replace( - output_type, '').split('_')[:-1]) - if self.asn_id is not None: - output_model.meta.filename = f'{output_root}_{self.asn_id}_outlier_i2d{output_type}' - else: - output_model.meta.filename = f'{output_root}_outlier_i2d{output_type}' - # Initialize the output with the wcs - driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, - kernel=self.kernel, fillval=self.fillval) + copy_asn_info_from_library(self.input_models, output_model) - log.info(f"{len(exposure)} exposures to drizzle together") - for img in exposure: - img = datamodels.open(img) - iscale = self._get_intensity_scale(img) - log.debug(f'Using intensity scale iscale={iscale}') + with self.input_models: + example_image = self.input_models.borrow(indices[0]) - inwht = resample_utils.build_driz_weight( - img, - weight_type=self.weight_type, - good_bits=self.good_bits - ) - - # apply sky subtraction - blevel = img.meta.background.level - if not img.meta.background.subtracted and blevel is not None: - data = img.data - blevel + # Determine output file type from input exposure filenames + # Use this for defining the output filename + indx = example_image.meta.filename.rfind('.') + output_type = example_image.meta.filename[indx:] + output_root = '_'.join(example_image.meta.filename.replace( + output_type, '').split('_')[:-1]) + if self.asn_id is not None: + output_model.meta.filename = f'{output_root}_{self.asn_id}_outlier_i2d{output_type}' else: - data = img.data + output_model.meta.filename = f'{output_root}_outlier_i2d{output_type}' + + # Initialize the output with the wcs + driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, + kernel=self.kernel, fillval=self.fillval) + + log.info(f"{len(indices)} exposures to drizzle together") + for index in indices: + img = self.input_models.borrow(index) + iscale = self._get_intensity_scale(img) + log.debug(f'Using intensity scale iscale={iscale}') + + inwht = resample_utils.build_driz_weight( + img, + weight_type=self.weight_type, + good_bits=self.good_bits + ) - xmin, xmax, ymin, ymax = resample_utils._resample_range( - data.shape, - img.meta.wcs.bounding_box - ) + # apply sky subtraction + blevel = img.meta.background.level + if not img.meta.background.subtracted and blevel is not None: + data = img.data - blevel + else: + data = img.data - driz.add_image( - data, - img.meta.wcs, - iscale=iscale, - inwht=inwht, - xmin=xmin, - xmax=xmax, - ymin=ymin, - ymax=ymax - ) - del data - img.close() + xmin, xmax, ymin, ymax = resample_utils._resample_range( + data.shape, + img.meta.wcs.bounding_box + ) + + driz.add_image( + data, + img.meta.wcs, + iscale=iscale, + inwht=inwht, + xmin=xmin, + xmax=xmax, + ymin=ymin, + ymax=ymax + ) + del data + self.input_models.shelve(img, index, modify=False) if not self.in_memory: + # FIXME: Is this needed anymore with ModelLibrary? # Write out model to disk, then return filename output_name = output_model.meta.filename if self.output_dir is not None: output_name = os.path.join(self.output_dir, output_name) output_model.save(output_name) log.info(f"Saved model in {output_name}") - self.output_models.append(output_name) + output_models.append(output_name) else: - self.output_models.append(output_model.copy()) + output_models.append(output_model.copy()) output_model.data *= 0. output_model.wht *= 0. - return self.output_models + return ModelLibrary(output_models) def resample_many_to_one(self): """Resample and coadd many inputs to a single output. @@ -357,42 +372,47 @@ def resample_many_to_one(self): if self.blendheaders: self.blend_output_metadata(output_model) + # copy over asn information + copy_asn_info_from_library(self.input_models, output_model) + # Initialize the output with the wcs driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, kernel=self.kernel, fillval=self.fillval) log.info("Resampling science data") - for img in self.input_models: - iscale = self._get_intensity_scale(img) - log.debug(f'Using intensity scale iscale={iscale}') - img.meta.iscale = iscale - - inwht = resample_utils.build_driz_weight(img, - weight_type=self.weight_type, - good_bits=self.good_bits) - # apply sky subtraction - blevel = img.meta.background.level - if not img.meta.background.subtracted and blevel is not None: - data = img.data - blevel - else: - data = img.data.copy() + with self.input_models: + for img in self.input_models: + iscale = self._get_intensity_scale(img) + log.debug(f'Using intensity scale iscale={iscale}') + img.meta.iscale = iscale - xmin, xmax, ymin, ymax = resample_utils._resample_range( - data.shape, - img.meta.wcs.bounding_box - ) + inwht = resample_utils.build_driz_weight(img, + weight_type=self.weight_type, + good_bits=self.good_bits) + # apply sky subtraction + blevel = img.meta.background.level + if not img.meta.background.subtracted and blevel is not None: + data = img.data - blevel + else: + data = img.data.copy() - driz.add_image( - data, - img.meta.wcs, - iscale=iscale, - inwht=inwht, - xmin=xmin, - xmax=xmax, - ymin=ymin, - ymax=ymax - ) - del data, inwht + xmin, xmax, ymin, ymax = resample_utils._resample_range( + data.shape, + img.meta.wcs.bounding_box + ) + + driz.add_image( + data, + img.meta.wcs, + iscale=iscale, + inwht=inwht, + xmin=xmin, + xmax=xmax, + ymin=ymin, + ymax=ymax + ) + del data, inwht + self.input_models.shelve(img, modify=False) # Resample variance arrays in self.input_models to output_model self.resample_variance_arrays(output_model) @@ -409,12 +429,9 @@ def resample_many_to_one(self): output_model.err[all_nan] = np.nan self.update_exposure_times(output_model) - self.output_models.append(output_model) - for img in self.input_models: - del img.meta.iscale + return ModelLibrary([output_model]) - return self.output_models def resample_variance_arrays(self, output_model): """Resample variance arrays from self.input_models to the output_model. @@ -433,63 +450,67 @@ def resample_variance_arrays(self, output_model): total_weight_rn_var = np.zeros_like(output_model.data) total_weight_pn_var = np.zeros_like(output_model.data) total_weight_flat_var = np.zeros_like(output_model.data) - for model in self.input_models: - # Do the read noise variance first, so it can be - # used for weights if needed - rn_var = self._resample_one_variance_array( - "var_rnoise", model, output_model) - - # Find valid weighting values in the variance - if rn_var is not None: - mask = (rn_var > 0) & np.isfinite(rn_var) - else: - mask = np.full_like(rn_var, False) - - # Set the weight for the image from the weight type - weight = np.ones(output_model.data.shape) - if self.weight_type == "ivm" and rn_var is not None: - weight[mask] = rn_var[mask] ** -1 - elif self.weight_type == "exptime": - if resample_utils.check_for_tmeasure(model): - weight[:] = model.meta.exposure.measurement_time + with self.input_models: + for i, model in enumerate(self.input_models): + # Do the read noise variance first, so it can be + # used for weights if needed + rn_var = self._resample_one_variance_array( + "var_rnoise", model, output_model) + + # Find valid weighting values in the variance + if rn_var is not None: + mask = (rn_var > 0) & np.isfinite(rn_var) else: - weight[:] = model.meta.exposure.exposure_time - - # Weight and add the readnoise variance - # Note: floating point overflow is an issue if variance weights - # are used - it can't be squared before multiplication - if rn_var is not None: - mask = (rn_var >= 0) & np.isfinite(rn_var) & (weight > 0) - weighted_rn_var[mask] = np.nansum( - [weighted_rn_var[mask], - rn_var[mask] * weight[mask] * weight[mask]], - axis=0 - ) - total_weight_rn_var[mask] += weight[mask] - - # Now do poisson and flat variance, updating only valid new values - # (zero is a valid value; negative, inf, or NaN are not) - pn_var = self._resample_one_variance_array( - "var_poisson", model, output_model) - if pn_var is not None: - mask = (pn_var >= 0) & np.isfinite(pn_var) & (weight > 0) - weighted_pn_var[mask] = np.nansum( - [weighted_pn_var[mask], - pn_var[mask] * weight[mask] * weight[mask]], - axis=0 - ) - total_weight_pn_var[mask] += weight[mask] - - flat_var = self._resample_one_variance_array( - "var_flat", model, output_model) - if flat_var is not None: - mask = (flat_var >= 0) & np.isfinite(flat_var) & (weight > 0) - weighted_flat_var[mask] = np.nansum( - [weighted_flat_var[mask], - flat_var[mask] * weight[mask] * weight[mask]], - axis=0 - ) - total_weight_flat_var[mask] += weight[mask] + mask = np.full_like(rn_var, False) + + # Set the weight for the image from the weight type + weight = np.ones(output_model.data.shape) + if self.weight_type == "ivm" and rn_var is not None: + weight[mask] = rn_var[mask] ** -1 + elif self.weight_type == "exptime": + if resample_utils.check_for_tmeasure(model): + weight[:] = model.meta.exposure.measurement_time + else: + weight[:] = model.meta.exposure.exposure_time + + # Weight and add the readnoise variance + # Note: floating point overflow is an issue if variance weights + # are used - it can't be squared before multiplication + if rn_var is not None: + mask = (rn_var >= 0) & np.isfinite(rn_var) & (weight > 0) + weighted_rn_var[mask] = np.nansum( + [weighted_rn_var[mask], + rn_var[mask] * weight[mask] * weight[mask]], + axis=0 + ) + total_weight_rn_var[mask] += weight[mask] + + # Now do poisson and flat variance, updating only valid new values + # (zero is a valid value; negative, inf, or NaN are not) + pn_var = self._resample_one_variance_array( + "var_poisson", model, output_model) + if pn_var is not None: + mask = (pn_var >= 0) & np.isfinite(pn_var) & (weight > 0) + weighted_pn_var[mask] = np.nansum( + [weighted_pn_var[mask], + pn_var[mask] * weight[mask] * weight[mask]], + axis=0 + ) + total_weight_pn_var[mask] += weight[mask] + + flat_var = self._resample_one_variance_array( + "var_flat", model, output_model) + if flat_var is not None: + mask = (flat_var >= 0) & np.isfinite(flat_var) & (weight > 0) + weighted_flat_var[mask] = np.nansum( + [weighted_flat_var[mask], + flat_var[mask] * weight[mask] * weight[mask]], + axis=0 + ) + total_weight_flat_var[mask] += weight[mask] + + del model.meta.iscale + self.input_models.shelve(model, i, modify=False) # We now have a sum of the weighted resampled variances. # Divide by the total weights, squared, and set in the output model. @@ -578,16 +599,19 @@ def update_exposure_times(self, output_model): duration = 0.0 total_measurement_time = 0.0 measurement_time_failures = [] - for exposure in self.input_models.models_grouped: - total_exposure_time += exposure[0].meta.exposure.exposure_time - if not resample_utils.check_for_tmeasure(exposure[0]): - measurement_time_failures.append(1) - else: - total_measurement_time += exposure[0].meta.exposure.measurement_time - measurement_time_failures.append(0) - exposure_times['start'].append(exposure[0].meta.exposure.start_time) - exposure_times['end'].append(exposure[0].meta.exposure.end_time) - duration += exposure[0].meta.exposure.duration + with self.input_models: + for _, indices in self.input_models.group_indices.items(): + model = self.input_models.borrow(indices[0]) + total_exposure_time += model.meta.exposure.exposure_time + if not resample_utils.check_for_tmeasure(model): + measurement_time_failures.append(1) + else: + total_measurement_time += model.meta.exposure.measurement_time + measurement_time_failures.append(0) + exposure_times['start'].append(model.meta.exposure.start_time) + exposure_times['end'].append(model.meta.exposure.end_time) + duration += model.meta.exposure.duration + self.input_models.shelve(model, indices[0], modify=False) # Update some basic exposure time values based on output_model output_model.meta.exposure.exposure_time = total_exposure_time @@ -892,3 +916,12 @@ def compute_image_pixel_area(wcs): pix_area = sky_area / image_area return pix_area + + +def copy_asn_info_from_library(library, output_model): + if (asn_pool := library.asn.get("asn_pool", None)) is not None: + output_model.meta.asn.pool_name = asn_pool + if ( + asn_table_name := library.asn.get("table_name", None) + ) is not None: + output_model.meta.asn.table_name = asn_table_name \ No newline at end of file diff --git a/jwst/resample/resample_spec.py b/jwst/resample/resample_spec.py index 43333df674..7a9bf42931 100644 --- a/jwst/resample/resample_spec.py +++ b/jwst/resample/resample_spec.py @@ -16,7 +16,7 @@ from stdatamodels.jwst import datamodels from jwst.assign_wcs.util import compute_scale, wrap_ra -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelContainer, ModelLibrary from jwst.resample import resample_utils from jwst.resample.resample import ResampleData @@ -52,7 +52,7 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, """ Parameters ---------- - input_models : list of objects + input_models : ModelLibrary list of data models, one for each input image output : str @@ -61,7 +61,10 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, kwargs : dict Other parameters """ - self.input_models = input_models + if isinstance(input_models, ModelContainer): + self.input_models = ModelLibrary(input_models) + else: + self.input_models = input_models self.output_dir = None self.output_filename = output if output is not None and '.fits' not in str(output): @@ -89,9 +92,12 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, self.asn_id = kwargs.get('asn_id', None) # Get an average input pixel scale for parameter calculations - disp_axis = self.input_models[0].meta.wcsinfo.dispersion_direction + with self.input_models: + example_model = self.input_models.borrow(0) + self.input_models.shelve(example_model, 0, modify=False) + disp_axis = example_model.meta.wcsinfo.dispersion_direction self.input_pixscale0 = compute_spectral_pixel_scale( - self.input_models[0].meta.wcs, disp_axis=disp_axis) + example_model.meta.wcs, disp_axis=disp_axis) if np.isnan(self.input_pixscale0): log.warning('Input pixel scale could not be determined.') if pscale is not None: @@ -99,7 +105,7 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, 'without an input pixel scale. Setting pscale=None.') pscale = None - nominal_area = self.input_models[0].meta.photometry.pixelarea_steradians + nominal_area = example_model.meta.photometry.pixelarea_steradians if nominal_area is None: log.warning('Nominal pixel area not set in input data.') if pscale is not None: @@ -160,8 +166,8 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, # These functions internally use self.pscale_ratio to accommodate # user settings. # Any other customizations (crpix, crval, rotation) are ignored. - if resample_utils.is_sky_like(self.input_models[0].meta.wcs.output_frame): - if self.input_models[0].meta.instrument.name != "NIRSPEC": + if resample_utils.is_sky_like(example_model.meta.wcs.output_frame): + if example_model.meta.instrument.name != "NIRSPEC": self.output_wcs = self.build_interpolated_output_wcs() else: self.output_wcs = self.build_nirspec_output_wcs() @@ -187,14 +193,16 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, self.blank_output = datamodels.SlitModel(tuple(self.output_wcs.array_shape)) # update meta data and wcs - self.blank_output.update(input_models[0]) + self.blank_output.update(example_model) self.blank_output.meta.wcs = self.output_wcs if output_pix_area is not None: self.blank_output.meta.photometry.pixelarea_steradians = output_pix_area self.blank_output.meta.photometry.pixelarea_arcsecsq = ( output_pix_area * np.rad2deg(3600)**2) + # FIXME: this should be a library self.output_models = ModelContainer() + del example_model def build_nirspec_output_wcs(self, refmodel=None): """ @@ -230,22 +238,28 @@ def build_nirspec_output_wcs(self, refmodel=None): output_wcs : `~gwcs.WCS` A gwcs WCS object defining the output frame WCS. """ - all_wcs = [m.meta.wcs for m in self.input_models if m is not refmodel] - if refmodel: - all_wcs.insert(0, refmodel.meta.wcs) - else: - # Use the first model with a reasonable amount of good data - # as the reference model - for model in self.input_models: - dq_mask = resample_utils.build_mask(model.dq, self.good_bits) - good = np.isfinite(model.data) & (model.data != 0) & dq_mask - if np.sum(good) > 100 and refmodel is None: - refmodel = model - break + with self.input_models: + all_wcs = [] + for i, model in enumerate(self.input_models): + all_wcs.append(model.meta.wcs) + self.input_models.shelve(model, i, modify=False) + if refmodel: + all_wcs.insert(0, refmodel.meta.wcs) + else: + # Use the first model with any good data as the reference model + for i, model in enumerate(self.input_models): + dq_mask = resample_utils.build_mask(model.dq, self.good_bits) + good = np.isfinite(model.data) & (model.data != 0) & dq_mask + if np.sum(good) > 100 and refmodel is None: + refmodel = model + self.input_models.shelve(model, i, modify=False) + break + self.input_models.shelve(model, i) # If no good data was found, use the first model. if refmodel is None: - refmodel = self.input_models[0] + refmodel = self.input_models.borrow(0) + self.input_models.shelve(refmodel, 0, modify=False) # Make a copy of the data array for internal manipulation refmodel_data = refmodel.data.copy() @@ -513,14 +527,26 @@ def build_interpolated_output_wcs(self): all_ra_slit = [] all_dec_slit = [] - for im, model in enumerate(self.input_models): - wcs = model.meta.wcs + all_wcs = [] + spectral_axes = [] + with self.input_models: + example_model = self.input_models.borrow(0) + self.input_models.shelve(example_model, 0, modify=False) + for im, model in enumerate(self.input_models): + wcs = model.meta.wcs + spectral_axis = find_dispersion_axis(model) + self.input_models.shelve(model, im, modify=False) + all_wcs.append(wcs) + spectral_axes.append(spectral_axis) + + for im in range(len(all_wcs)): + wcs = all_wcs[im] bbox = wcs.bounding_box grid = wcstools.grid_from_bounding_box(bbox) ra, dec, lam = np.array(wcs(*grid)) # Handle vertical (MIRI) or horizontal (NIRSpec) dispersion. The # following 2 variables are 0 or 1, i.e. zero-indexed in x,y WCS order - spectral_axis = find_dispersion_axis(model) + spectral_axis = spectral_axes[im] spatial_axis = spectral_axis ^ 1 # Compute the wavelength array, trimming NaNs from the ends @@ -625,7 +651,7 @@ def build_interpolated_output_wcs(self): # Check if the data is MIRI LRS FIXED Slit. If it is then # the wavelength array needs to be flipped so that the resampled # dispersion direction matches the dispersion direction on the detector. - if self.input_models[0].meta.exposure.type == 'MIR_LRS-FIXEDSLIT': + if example_model.meta.exposure.type == 'MIR_LRS-FIXEDSLIT': wavelength_array = np.flip(wavelength_array, axis=None) step = 1 @@ -754,7 +780,9 @@ def build_nirspec_lamp_output_wcs(self): output_wcs : `~gwcs.WCS` object A gwcs WCS object defining the output frame WCS. """ - model = self.input_models[0] + with self.input_models: + model = self.input_models.borrow(0) + self.input_models.shelve(model, 0, modify=False) wcs = model.meta.wcs bbox = wcs.bounding_box grid = wcstools.grid_from_bounding_box(bbox) diff --git a/jwst/resample/resample_spec_step.py b/jwst/resample/resample_spec_step.py index b0630ed590..1c4fb15f17 100755 --- a/jwst/resample/resample_spec_step.py +++ b/jwst/resample/resample_spec_step.py @@ -3,9 +3,11 @@ from stdatamodels.jwst import datamodels from stdatamodels.jwst.datamodels import MultiSlitModel, ImageModel -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelContainer, ModelLibrary +from jwst.datamodels.library import container_to_library from . import resample_spec, ResampleStep -from ..exp_to_source import multislit_to_container +from jwst.resample.resample import copy_asn_info_from_library +from ..exp_to_source import multislit_to_library from ..assign_wcs.util import update_s_region_spectral from jwst.lib.wcs_utils import get_wavelengths @@ -46,8 +48,18 @@ def process(self, input): input_new = datamodels.SlitModel(input_new) if isinstance(input_new, ModelContainer): + input_models = container_to_library(input_new) + try: + output = input_models.meta.asn_table.products[0].name + except AttributeError: + # NIRSpec MOS data goes through this path, as the container + # is only ModelContainer-like, and doesn't have an asn_table + # attribute attached. Output name handling gets done in + # _process_multislit() via the update method + # TODO: the container-like object should retain asn_table + output = None + elif isinstance(input_new, ModelLibrary): input_models = input_new - try: output = input_models.meta.asn_table.products[0].name except AttributeError: @@ -58,9 +70,9 @@ def process(self, input): # TODO: the container-like object should retain asn_table output = None else: - input_models = ModelContainer([input_new]) + input_models = ModelLibrary([input_new]) output = input_new.meta.filename - self.blendheaders = False + self.blendheaders = False # Setup drizzle-related parameters kwargs = self.get_drizpars() @@ -68,20 +80,23 @@ def process(self, input): self.drizpars = kwargs # Call resampling - if isinstance(input_models[0], MultiSlitModel): - result = self._process_multislit(input_models) + with input_models: + example_model = input_models.borrow(0) + input_models.shelve(example_model, 0, modify=False) + if isinstance(example_model, MultiSlitModel): + result = self._process_multislit(input_models) - elif len(input_models[0].data.shape) != 2: - # resample can only handle 2D images, not 3D cubes, etc - raise RuntimeError("Input {} is not a 2D image.".format(input_models[0])) + elif len(example_model.data.shape) != 2: + # resample can only handle 2D images, not 3D cubes, etc + raise RuntimeError("Input {} is not a 2D image.".format(example_model)) - else: - # result is a SlitModel - result = self._process_slit(input_models) + else: + # result is a SlitModel + result = self._process_slit(input_models) # Update ASNTABLE in output - result.meta.asn.table_name = input_models[0].meta.asn.table_name - result.meta.asn.pool_name = input_models[0].meta.asn.pool_name + result.meta.asn.table_name = example_model.meta.asn.table_name + result.meta.asn.pool_name = example_model.meta.asn.pool_name # populate the result wavelength attribute for MultiSlitModel if isinstance(result, MultiSlitModel): @@ -101,37 +116,41 @@ def _process_multislit(self, input_models): Parameters ---------- - input : `~jwst.datamodels.ModelContainer` - A container of `~jwst.datamodels.MultiSlitModel` + input_models : `~jwst.datamodels.ModelLibrary` + A library of `~jwst.datamodels.MultiSlitModel` Returns ------- result : `~jwst.datamodels.MultiSlitModel` The resampled output, one per source """ - containers = multislit_to_container(input_models) - result = datamodels.MultiSlitModel() - - result.update(input_models[0]) + library_ordereddict = multislit_to_library(input_models) + with input_models: + example_model = input_models.borrow(0) + result = datamodels.MultiSlitModel() + result.update(example_model) + input_models.shelve(example_model, 0, modify=False) pscale_ratio = None - for container in containers.values(): - resamp = resample_spec.ResampleSpecData(container, **self.drizpars) + for input_library in library_ordereddict.values(): - drizzled_models = resamp.do_drizzle() + resamp = resample_spec.ResampleSpecData(input_library, **self.drizpars) - for model in drizzled_models: - self.update_slit_metadata(model) - update_s_region_spectral(model) - result.slits.append(model) + drizzled_models = resamp.do_drizzle() + with drizzled_models: + for i, model in enumerate(drizzled_models): + self.update_slit_metadata(model) + update_s_region_spectral(model) + result.slits.append(model) + drizzled_models.shelve(model, i, modify=False) # Keep the first computed pixel scale ratio for storage if self.pixel_scale is not None and pscale_ratio is None: pscale_ratio = resamp.pscale_ratio result.meta.cal_step.resample = "COMPLETE" - result.meta.asn.pool_name = input_models.asn_pool_name - result.meta.asn.table_name = input_models.asn_table_name + # copy over asn information + copy_asn_info_from_library(input_models, result) if self.pixel_scale is None or pscale_ratio is None: result.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio else: @@ -160,18 +179,18 @@ def _process_slit(self, input_models): drizzled_models = resamp.do_drizzle() - result = drizzled_models[0] - result.meta.cal_step.resample = "COMPLETE" - result.meta.asn.pool_name = input_models.asn_pool_name - result.meta.asn.table_name = input_models.asn_table_name - result.meta.bunit_data = drizzled_models[0].meta.bunit_data - if self.pixel_scale is None: - result.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio - else: - result.meta.resample.pixel_scale_ratio = resamp.pscale_ratio - result.meta.resample.pixfrac = self.pixfrac - self.update_slit_metadata(result) - update_s_region_spectral(result) + with drizzled_models: + result = drizzled_models.borrow(0) + drizzled_models.shelve(result, 0, modify=False) + result.meta.cal_step.resample = "COMPLETE" + copy_asn_info_from_library(input_models, result) + if self.pixel_scale is None: + result.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio + else: + result.meta.resample.pixel_scale_ratio = resamp.pscale_ratio + result.meta.resample.pixfrac = self.pixfrac + self.update_slit_metadata(result) + update_s_region_spectral(result) return result diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 1f99d12a41..415645f6db 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -4,9 +4,8 @@ import asdf -from stdatamodels.jwst import datamodels - -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelContainer, ModelLibrary +from jwst.datamodels.library import container_to_library, library_to_container from . import resample from ..stpipe import Step @@ -61,10 +60,12 @@ class ResampleStep(Step): def process(self, input): - input = datamodels.open(input) - - if isinstance(input, ModelContainer): + if isinstance(input, ModelLibrary): input_models = input + elif isinstance(input, ModelContainer): + input_models = container_to_library(input) + + if isinstance(input, ModelLibrary): try: output = input_models.meta.asn_table.products[0].name except AttributeError: @@ -73,16 +74,20 @@ def process(self, input): # TODO: figure out why and make sure asn_table is carried along output = None else: - input_models = ModelContainer([input]) + input_models = ModelLibrary([input]) input_models.asn_pool_name = input.meta.asn.pool_name input_models.asn_table_name = input.meta.asn.table_name output = input.meta.filename self.blendheaders = False # Check that input models are 2D images - if len(input_models[0].data.shape) != 2: - # resample can only handle 2D images, not 3D cubes, etc - raise RuntimeError("Input {} is not a 2D image.".format(input_models[0])) + with input_models: + example_model = input_models.borrow(0) + data_shape = example_model.data.shape + input_models.shelve(example_model, 0, modify=False) + if len(data_shape) != 2: + # resample can only handle 2D images, not 3D cubes, etc + raise RuntimeError(f"Input {example_model} is not a 2D image.") # Setup drizzle-related parameters kwargs = self.get_drizpars() @@ -91,27 +96,29 @@ def process(self, input): resamp = resample.ResampleData(input_models, output=output, **kwargs) result = resamp.do_drizzle() - for model in result: - model.meta.cal_step.resample = 'COMPLETE' - self.update_fits_wcs(model) - util.update_s_region_imaging(model) - model.meta.asn.pool_name = input_models.asn_pool_name - model.meta.asn.table_name = input_models.asn_table_name - - # if pixel_scale exists, it will override pixel_scale_ratio. - # calculate the actual value of pixel_scale_ratio based on pixel_scale - # because source_catalog uses this value from the header. - if self.pixel_scale is None: - model.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio - else: - model.meta.resample.pixel_scale_ratio = resamp.pscale_ratio - model.meta.resample.pixfrac = kwargs['pixfrac'] - - if len(result) == 1: - result = result[0] - - input_models.close() - return result + with result: + for i, model in enumerate(result): + model.meta.cal_step.resample = 'COMPLETE' + self.update_fits_wcs(model) + util.update_s_region_imaging(model) + resample.copy_asn_info_from_library(input_models, model) + + # if pixel_scale exists, it will override pixel_scale_ratio. + # calculate the actual value of pixel_scale_ratio based on pixel_scale + # because source_catalog uses this value from the header. + if self.pixel_scale is None: + model.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio + else: + model.meta.resample.pixel_scale_ratio = resamp.pscale_ratio + model.meta.resample.pixfrac = kwargs['pixfrac'] + result.shelve(model, 0) + + if len(result) == 1: + model = result.borrow(0) + result.shelve(model, 0, modify=False) + return model + + return library_to_container(result) @staticmethod def _check_list_pars(vals, name, min_vals=None): diff --git a/jwst/resample/tests/test_resample_step.py b/jwst/resample/tests/test_resample_step.py index 62af3e496f..2d5c5e0c29 100644 --- a/jwst/resample/tests/test_resample_step.py +++ b/jwst/resample/tests/test_resample_step.py @@ -7,7 +7,7 @@ from stdatamodels.jwst.datamodels import ImageModel -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelContainer, ModelLibrary from jwst.assign_wcs import AssignWcsStep from jwst.assign_wcs.util import compute_fiducial, compute_scale from jwst.exp_to_source import multislit_to_container @@ -567,7 +567,7 @@ def test_weight_type(nircam_rate, tmp_cwd): im2.meta.observation.sequence_id = "2" im3.meta.observation.sequence_id = "3" - c = ModelContainer([im1, im2, im3]) + c = ModelLibrary([im1, im2, im3]) assert len(c.group_names) == 3 result1 = ResampleStep.call(c, weight_type="ivm", blendheaders=False, save_results=True) @@ -586,8 +586,10 @@ def test_weight_type(nircam_rate, tmp_cwd): # remove measurement time to force use of exposure time # this also implicitly shows that measurement time was indeed used above expected_ratio = im1.meta.exposure.exposure_time / im1.meta.exposure.measurement_time - for im in c: - del im.meta.exposure.measurement_time + with c: + for j, im in enumerate(c): + del im.meta.exposure.measurement_time + c.shelve(im, j) result3 = ResampleStep.call(c, weight_type="exptime", blendheaders=False) assert_allclose(result3.data[100:105, 100:105], 6.667, rtol=1e-2) @@ -628,7 +630,7 @@ def test_sip_coeffs_do_not_propagate(nircam_rate): def test_build_interpolated_output_wcs(miri_rate_pair): im1, im2 = miri_rate_pair - driz = ResampleSpecData(ModelContainer([im1, im2])) + driz = ResampleSpecData(ModelLibrary([im1, im2])) output_wcs = driz.build_interpolated_output_wcs() # Make sure that all RA, Dec values in the input image have a location in @@ -721,9 +723,7 @@ def test_resample_variance(nircam_rate, n_images, weight_type): im.err += err im.meta.filename = "foo.fits" - c = ModelContainer() - for n in range(n_images): - c.append(im.copy()) + c = ModelLibrary([im.copy() for _ in range(n_images)]) result = ResampleStep.call(c, blendheaders=False, weight_type=weight_type) @@ -744,7 +744,7 @@ def test_resample_undefined_variance(nircam_rate, shape): im.var_poisson = np.ones(shape, dtype=im.var_poisson.dtype.type) im.var_flat = np.ones(shape, dtype=im.var_flat.dtype.type) im.meta.filename = "foo.fits" - c = ModelContainer([im]) + c = ModelLibrary([im]) with pytest.warns(RuntimeWarning, match="var_rnoise array not available"): result = ResampleStep.call(c, blendheaders=False) From 9233dd232ab12e769579987fb6986d094026167f Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 29 Jul 2024 16:10:09 -0400 Subject: [PATCH 07/85] Revert "temporary ModelContainer to and from ModelLibrary converter, assign_mtwcs to library" This reverts commit 018e3ce02b35cefe0d22fc6409dbca61f3fe25dc. --- jwst/assign_mtwcs/assign_mtwcs_step.py | 19 +++--- jwst/assign_mtwcs/moving_target_wcs.py | 91 +++++++++++++------------- jwst/datamodels/library.py | 36 ---------- 3 files changed, 53 insertions(+), 93 deletions(-) diff --git a/jwst/assign_mtwcs/assign_mtwcs_step.py b/jwst/assign_mtwcs/assign_mtwcs_step.py index 75243b5ae4..bd312ccddd 100755 --- a/jwst/assign_mtwcs/assign_mtwcs_step.py +++ b/jwst/assign_mtwcs/assign_mtwcs_step.py @@ -1,8 +1,9 @@ #! /usr/bin/env python import logging -from jwst.datamodels import ModelLibrary, ModelContainer -from jwst.datamodels.library import container_to_library, library_to_container +from stdatamodels.jwst import datamodels + +from jwst.datamodels import ModelContainer from ..stpipe import Step from .moving_target_wcs import assign_moving_target_wcs @@ -31,19 +32,17 @@ class AssignMTWcsStep(Step): """ def process(self, input): - if isinstance(input, (str, dict)): - input = ModelLibrary(input) - elif isinstance(input, ModelContainer): - input = container_to_library(input) + if isinstance(input, str): + input = datamodels.open(input) - # Can't apply the step if we aren't given a ModelLibrary as input - if not isinstance(input, ModelLibrary): + # Can't apply the step if we aren't given a ModelContainer as input + if not isinstance(input, ModelContainer): log.warning("Input data type is not supported.") # raise ValueError("Expected input to be an association file name or a ModelContainer.") input.meta.cal_step.assign_mtwcs = 'SKIPPED' - return library_to_container(input) + return input # Apply the step result = assign_moving_target_wcs(input) - return library_to_container(result) + return result diff --git a/jwst/assign_mtwcs/moving_target_wcs.py b/jwst/assign_mtwcs/moving_target_wcs.py index b61fc54cf4..7f61b84e9c 100644 --- a/jwst/assign_mtwcs/moving_target_wcs.py +++ b/jwst/assign_mtwcs/moving_target_wcs.py @@ -16,7 +16,7 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import ModelLibrary +from jwst.datamodels import ModelContainer log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) @@ -24,52 +24,49 @@ __all__ = ["assign_moving_target_wcs"] -def assign_moving_target_wcs(input_models: ModelLibrary) -> ModelLibrary: - - with input_models: - # get the indices of the science exposures in the ModelLibrary - indices = input_models.ind_asn_type('science') - - mt_ra = [] - mt_dec = [] - for i in indices: - sci_model = input_models.borrow(i) - mt_ra.append(sci_model.meta.wcsinfo.mt_ra) - mt_dec.append(sci_model.meta.wcsinfo.mt_dec) - input_models.shelve(sci_model, i, modify=False) - - if None in mt_ra or None in mt_dec: - log.warning("One or more MT RA/Dec values missing in input images") - log.warning("Step will be skipped, resulting in target misalignment") - for i in indices: - sci_model = input_models.borrow(i) - sci_model.meta.cal_step.assign_mtwcs = 'SKIPPED' - input_models.shelve(sci_model, i, modify=True) - return input_models - - mt_avra = np.mean(mt_ra) - mt_avdec = np.mean(mt_dec) - - for i in indices: - sci_model = input_models.borrow(i) - sci_model.meta.wcsinfo.mt_avra = mt_avra - sci_model.meta.wcsinfo.mt_avdec = mt_avdec - if isinstance(sci_model, datamodels.MultiSlitModel): - for ind, slit in enumerate(sci_model.slits): - new_wcs = add_mt_frame(slit.meta.wcs, - mt_avra, mt_avdec, - slit.meta.wcsinfo.mt_ra, slit.meta.wcsinfo.mt_dec) - del sci_model.slits[ind].meta.wcs - sci_model.slits[ind].meta.wcs = new_wcs - else: - new_wcs = add_mt_frame(sci_model.meta.wcs, mt_avra, mt_avdec, - sci_model.meta.wcsinfo.mt_ra, sci_model.meta.wcsinfo.mt_dec) - del sci_model.meta.wcs - sci_model.meta.wcs = new_wcs - sci_model.meta.cal_step.assign_mtwcs = 'COMPLETE' - input_models.shelve(sci_model, i, modify=True) - - return input_models +def assign_moving_target_wcs(input_model): + + if not isinstance(input_model, ModelContainer): + raise ValueError("Expected a ModelContainer object") + + # get the indices of the science exposures in the ModelContainer + ind = input_model.ind_asn_type('science') + sci_models = np.asarray(input_model._models)[ind] + # Get the MT RA/Dec values from all the input exposures + mt_ra = np.array([model.meta.wcsinfo.mt_ra for model in sci_models]) + mt_dec = np.array([model.meta.wcsinfo.mt_dec for model in sci_models]) + + # Compute the mean MT RA/Dec over all exposures + if None in mt_ra or None in mt_dec: + log.warning("One or more MT RA/Dec values missing in input images") + log.warning("Step will be skipped, resulting in target misalignment") + for model in sci_models: + model.meta.cal_step.assign_mtwcs = 'SKIPPED' + return input_model + else: + mt_avra = mt_ra.mean() + mt_avdec = mt_dec.mean() + + for model in sci_models: + model.meta.wcsinfo.mt_avra = mt_avra + model.meta.wcsinfo.mt_avdec = mt_avdec + if isinstance(model, datamodels.MultiSlitModel): + for ind, slit in enumerate(model.slits): + new_wcs = add_mt_frame(slit.meta.wcs, + mt_avra, mt_avdec, + slit.meta.wcsinfo.mt_ra, slit.meta.wcsinfo.mt_dec) + del model.slits[ind].meta.wcs + model.slits[ind].meta.wcs = new_wcs + else: + + new_wcs = add_mt_frame(model.meta.wcs, mt_avra, mt_avdec, + model.meta.wcsinfo.mt_ra, model.meta.wcsinfo.mt_dec) + del model.meta.wcs + model.meta.wcs = new_wcs + + model.meta.cal_step.assign_mtwcs = 'COMPLETE' + + return input_model def add_mt_frame(wcs, ra_average, dec_average, mt_ra, mt_dec): diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 3f5ebef6bf..49206b5643 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -1,5 +1,4 @@ import io -from pathlib import Path import asdf from astropy.io import fits @@ -7,7 +6,6 @@ from stpipe.library import AbstractModelLibrary, NoGroupID from jwst.associations import AssociationNotValidError, load_asn -from jwst.datamodels import ModelContainer __all__ = ["ModelLibrary"] @@ -98,13 +96,6 @@ def _assign_member_to_model(self, model, member): model.meta.asn.table_name = self.asn.get("table_name", "") model.meta.asn.pool_name = self.asn.get("asn_pool", "") - def ind_asn_type(self, exptype): - return [ - i - for i, member in enumerate(self.asn['products'][0]['members']) - if member['exptype'] == exptype - ] - def _attrs_to_group_id( program_number, @@ -123,30 +114,3 @@ def _attrs_to_group_id( f"_{visit_group}{sequence_id}{activity_id}" f"_{exposure_number}" ) - - -def container_to_library(container): - """ - Temporary converter function so that steps can start using ModelLibrary - without changing stdatamodels.jwst.open() to return ModelLibrary by default.""" - lib = ModelLibrary(container.asn_file_path) - with lib: - for i, model in enumerate(container): - lib.borrow(i) - lib.shelve(model, i) - lib.asn_table_name = getattr(container, "asn_table_name", "") - return lib - - -def library_to_container(library): - """ - Temporary converter function so that steps can start using ModelLibrary - without changing stdatamodels.jwst.open() to return ModelLibrary by default.""" - container = ModelContainer(str(Path(library._asn_dir) / Path(library._asn["table_name"]))) - with library: - for i, _ in enumerate(container): - model = library.borrow(i) - container[i] = model - library.shelve(model, i, modify=False) - container.asn_table_name = getattr(library, "asn_table_name", "") - return container From bd6207b35aa9c70734d7ce49a51a6f2e683fdab0 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 29 Jul 2024 16:18:23 -0400 Subject: [PATCH 08/85] put in nogroupid try except statements --- jwst/datamodels/library.py | 73 ++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 35 deletions(-) diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 49206b5643..ae57b11045 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -43,31 +43,33 @@ def _filename_to_group_id(self, filename): """ # use astropy.io.fits directly to read header keywords # avoiding the DataModel overhead - # TODO look up attribute to keyword in core schema - with fits.open(filename) as ff: - if "ASDF" in ff: - asdf_yaml = asdf.util.load_yaml(io.BytesIO(ff['ASDF'].data.tobytes())) - if group_id := asdf_yaml.get('meta', {}).get('group_id'): - return group_id - header = ff["PRIMARY"].header - program_number = header["PROGRAM"] - observation_number = header["OBSERVTN"] - visit_number = header["VISIT"] - visit_group = header["VISITGRP"] - sequence_id = header["SEQ_ID"] - activity_id = header["ACT_ID"] - exposure_number = header["EXPOSURE"] - - # FIXME try except and NoGroupID... - return _attrs_to_group_id( - program_number, - observation_number, - visit_number, - visit_group, - sequence_id, - activity_id, - exposure_number, - ) + try: + with fits.open(filename) as ff: + if "ASDF" in ff: + asdf_yaml = asdf.util.load_yaml(io.BytesIO(ff['ASDF'].data.tobytes())) + if group_id := asdf_yaml.get('meta', {}).get('group_id'): + return group_id + header = ff["PRIMARY"].header + program_number = header["PROGRAM"] + observation_number = header["OBSERVTN"] + visit_number = header["VISIT"] + visit_group = header["VISITGRP"] + sequence_id = header["SEQ_ID"] + activity_id = header["ACT_ID"] + exposure_number = header["EXPOSURE"] + + return _attrs_to_group_id( + program_number, + observation_number, + visit_number, + visit_group, + sequence_id, + activity_id, + exposure_number, + ) + except KeyError as e: + msg = f"Cannot find header keyword {e} in {filename}" + raise NoGroupID(msg) from e def _model_to_group_id(self, model): """ @@ -75,16 +77,17 @@ def _model_to_group_id(self, model): """ if group_id := getattr(model.meta, "group_id", None): return group_id - # FIXME try except and NoGroupID... - return _attrs_to_group_id( - model.meta.observation.program_number, - model.meta.observation.observation_number, - model.meta.observation.visit_number, - model.meta.observation.visit_group, - model.meta.observation.sequence_id, - model.meta.observation.activity_id, - model.meta.observation.exposure_number, - ) + if hasattr(model.meta, "observation"): + return _attrs_to_group_id( + model.meta.observation.program_number, + model.meta.observation.observation_number, + model.meta.observation.visit_number, + model.meta.observation.visit_group, + model.meta.observation.sequence_id, + model.meta.observation.activity_id, + model.meta.observation.exposure_number, + ) + raise NoGroupID(f"{model} missing group_id") def _assign_member_to_model(self, model, member): for attr in ("group_id", "tweakreg_catalog", "exptype"): From 5c3cfc322667056f14a7ac5ad7dc17c79f348c4f Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 29 Jul 2024 17:33:06 -0400 Subject: [PATCH 09/85] ModelLibrary for resample imaging, ModelContainer for resample spec --- jwst/resample/resample_spec.py | 371 ++++++++++++++++++---- jwst/resample/resample_spec_step.py | 105 +++--- jwst/resample/resample_step.py | 11 +- jwst/resample/tests/test_resample_step.py | 2 +- 4 files changed, 366 insertions(+), 123 deletions(-) diff --git a/jwst/resample/resample_spec.py b/jwst/resample/resample_spec.py index 7a9bf42931..4d5f2b58a7 100644 --- a/jwst/resample/resample_spec.py +++ b/jwst/resample/resample_spec.py @@ -1,5 +1,6 @@ import logging import warnings +import os import numpy as np from astropy import coordinates as coord @@ -16,9 +17,10 @@ from stdatamodels.jwst import datamodels from jwst.assign_wcs.util import compute_scale, wrap_ra -from jwst.datamodels import ModelContainer, ModelLibrary +from jwst.datamodels import ModelContainer from jwst.resample import resample_utils from jwst.resample.resample import ResampleData +from . import gwcs_drizzle log = logging.getLogger(__name__) @@ -52,7 +54,7 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, """ Parameters ---------- - input_models : ModelLibrary + input_models : list of objects list of data models, one for each input image output : str @@ -61,10 +63,7 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, kwargs : dict Other parameters """ - if isinstance(input_models, ModelContainer): - self.input_models = ModelLibrary(input_models) - else: - self.input_models = input_models + self.input_models = input_models self.output_dir = None self.output_filename = output if output is not None and '.fits' not in str(output): @@ -92,12 +91,9 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, self.asn_id = kwargs.get('asn_id', None) # Get an average input pixel scale for parameter calculations - with self.input_models: - example_model = self.input_models.borrow(0) - self.input_models.shelve(example_model, 0, modify=False) - disp_axis = example_model.meta.wcsinfo.dispersion_direction + disp_axis = self.input_models[0].meta.wcsinfo.dispersion_direction self.input_pixscale0 = compute_spectral_pixel_scale( - example_model.meta.wcs, disp_axis=disp_axis) + self.input_models[0].meta.wcs, disp_axis=disp_axis) if np.isnan(self.input_pixscale0): log.warning('Input pixel scale could not be determined.') if pscale is not None: @@ -105,7 +101,7 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, 'without an input pixel scale. Setting pscale=None.') pscale = None - nominal_area = example_model.meta.photometry.pixelarea_steradians + nominal_area = self.input_models[0].meta.photometry.pixelarea_steradians if nominal_area is None: log.warning('Nominal pixel area not set in input data.') if pscale is not None: @@ -166,8 +162,8 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, # These functions internally use self.pscale_ratio to accommodate # user settings. # Any other customizations (crpix, crval, rotation) are ignored. - if resample_utils.is_sky_like(example_model.meta.wcs.output_frame): - if example_model.meta.instrument.name != "NIRSPEC": + if resample_utils.is_sky_like(self.input_models[0].meta.wcs.output_frame): + if self.input_models[0].meta.instrument.name != "NIRSPEC": self.output_wcs = self.build_interpolated_output_wcs() else: self.output_wcs = self.build_nirspec_output_wcs() @@ -193,16 +189,300 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, self.blank_output = datamodels.SlitModel(tuple(self.output_wcs.array_shape)) # update meta data and wcs - self.blank_output.update(example_model) + self.blank_output.update(input_models[0]) self.blank_output.meta.wcs = self.output_wcs if output_pix_area is not None: self.blank_output.meta.photometry.pixelarea_steradians = output_pix_area self.blank_output.meta.photometry.pixelarea_arcsecsq = ( output_pix_area * np.rad2deg(3600)**2) - # FIXME: this should be a library self.output_models = ModelContainer() - del example_model + + + def do_drizzle(self): + """Pick the correct drizzling mode based on self.single + """ + if self.single: + return self.resample_many_to_many() + else: + return self.resample_many_to_one() + + def resample_many_to_many(self): + """Resample many inputs to many outputs where outputs have a common frame. + + Coadd only different detectors of the same exposure, i.e. map NRCA5 and + NRCB5 onto the same output image, as they image different areas of the + sky. + + Used for outlier detection + """ + for exposure in self.input_models.models_grouped: + output_model = self.blank_output + # Determine output file type from input exposure filenames + # Use this for defining the output filename + indx = exposure[0].meta.filename.rfind('.') + output_type = exposure[0].meta.filename[indx:] + output_root = '_'.join(exposure[0].meta.filename.replace( + output_type, '').split('_')[:-1]) + if self.asn_id is not None: + output_model.meta.filename = f'{output_root}_{self.asn_id}_outlier_i2d{output_type}' + else: + output_model.meta.filename = f'{output_root}_outlier_i2d{output_type}' + + # Initialize the output with the wcs + driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, + kernel=self.kernel, fillval=self.fillval) + + log.info(f"{len(exposure)} exposures to drizzle together") + for img in exposure: + img = datamodels.open(img) + iscale = self._get_intensity_scale(img) + log.debug(f'Using intensity scale iscale={iscale}') + + inwht = resample_utils.build_driz_weight( + img, + weight_type=self.weight_type, + good_bits=self.good_bits + ) + + # apply sky subtraction + blevel = img.meta.background.level + if not img.meta.background.subtracted and blevel is not None: + data = img.data - blevel + else: + data = img.data + + xmin, xmax, ymin, ymax = resample_utils._resample_range( + data.shape, + img.meta.wcs.bounding_box + ) + + driz.add_image( + data, + img.meta.wcs, + iscale=iscale, + inwht=inwht, + xmin=xmin, + xmax=xmax, + ymin=ymin, + ymax=ymax + ) + del data + img.close() + + if not self.in_memory: + # Write out model to disk, then return filename + output_name = output_model.meta.filename + if self.output_dir is not None: + output_name = os.path.join(self.output_dir, output_name) + output_model.save(output_name) + log.info(f"Saved model in {output_name}") + self.output_models.append(output_name) + else: + self.output_models.append(output_model.copy()) + output_model.data *= 0. + output_model.wht *= 0. + + return self.output_models + + def resample_many_to_one(self): + """Resample and coadd many inputs to a single output. + + Used for stage 3 resampling + """ + output_model = self.blank_output.copy() + output_model.meta.filename = self.output_filename + output_model.meta.resample.weight_type = self.weight_type + output_model.meta.resample.pointings = len(self.input_models.group_names) + + if self.blendheaders: + self.blend_output_metadata(output_model) + + # Initialize the output with the wcs + driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, + kernel=self.kernel, fillval=self.fillval) + + log.info("Resampling science data") + for img in self.input_models: + iscale = self._get_intensity_scale(img) + log.debug(f'Using intensity scale iscale={iscale}') + img.meta.iscale = iscale + + inwht = resample_utils.build_driz_weight(img, + weight_type=self.weight_type, + good_bits=self.good_bits) + # apply sky subtraction + blevel = img.meta.background.level + if not img.meta.background.subtracted and blevel is not None: + data = img.data - blevel + else: + data = img.data.copy() + + xmin, xmax, ymin, ymax = resample_utils._resample_range( + data.shape, + img.meta.wcs.bounding_box + ) + + driz.add_image( + data, + img.meta.wcs, + iscale=iscale, + inwht=inwht, + xmin=xmin, + xmax=xmax, + ymin=ymin, + ymax=ymax + ) + del data, inwht + + # Resample variance arrays in self.input_models to output_model + self.resample_variance_arrays(output_model) + var_components = [ + output_model.var_rnoise, + output_model.var_poisson, + output_model.var_flat + ] + output_model.err = np.sqrt(np.nansum(var_components,axis=0)) + + # nansum returns zero for input that is all NaN - + # set those values to NaN instead + all_nan = np.all(np.isnan(var_components), axis=0) + output_model.err[all_nan] = np.nan + + self.update_exposure_times(output_model) + self.output_models.append(output_model) + + for img in self.input_models: + del img.meta.iscale + + return self.output_models + + def resample_variance_arrays(self, output_model): + """Resample variance arrays from self.input_models to the output_model. + + Variance images from each input model are resampled individually and + added to a weighted sum. If weight_type is 'ivm', the inverse of the + resampled read noise variance is used as the weight for all the variance + components. If weight_type is 'exptime', the exposure time is used. + + The output_model is modified in place. + """ + log.info("Resampling variance components") + weighted_rn_var = np.full_like(output_model.data, np.nan) + weighted_pn_var = np.full_like(output_model.data, np.nan) + weighted_flat_var = np.full_like(output_model.data, np.nan) + total_weight_rn_var = np.zeros_like(output_model.data) + total_weight_pn_var = np.zeros_like(output_model.data) + total_weight_flat_var = np.zeros_like(output_model.data) + for model in self.input_models: + # Do the read noise variance first, so it can be + # used for weights if needed + rn_var = self._resample_one_variance_array( + "var_rnoise", model, output_model) + + # Find valid weighting values in the variance + if rn_var is not None: + mask = (rn_var > 0) & np.isfinite(rn_var) + else: + mask = np.full_like(rn_var, False) + + # Set the weight for the image from the weight type + weight = np.ones(output_model.data.shape) + if self.weight_type == "ivm" and rn_var is not None: + weight[mask] = rn_var[mask] ** -1 + elif self.weight_type == "exptime": + if resample_utils.check_for_tmeasure(model): + weight[:] = model.meta.exposure.measurement_time + else: + weight[:] = model.meta.exposure.exposure_time + + # Weight and add the readnoise variance + # Note: floating point overflow is an issue if variance weights + # are used - it can't be squared before multiplication + if rn_var is not None: + mask = (rn_var >= 0) & np.isfinite(rn_var) & (weight > 0) + weighted_rn_var[mask] = np.nansum( + [weighted_rn_var[mask], + rn_var[mask] * weight[mask] * weight[mask]], + axis=0 + ) + total_weight_rn_var[mask] += weight[mask] + + # Now do poisson and flat variance, updating only valid new values + # (zero is a valid value; negative, inf, or NaN are not) + pn_var = self._resample_one_variance_array( + "var_poisson", model, output_model) + if pn_var is not None: + mask = (pn_var >= 0) & np.isfinite(pn_var) & (weight > 0) + weighted_pn_var[mask] = np.nansum( + [weighted_pn_var[mask], + pn_var[mask] * weight[mask] * weight[mask]], + axis=0 + ) + total_weight_pn_var[mask] += weight[mask] + + flat_var = self._resample_one_variance_array( + "var_flat", model, output_model) + if flat_var is not None: + mask = (flat_var >= 0) & np.isfinite(flat_var) & (weight > 0) + weighted_flat_var[mask] = np.nansum( + [weighted_flat_var[mask], + flat_var[mask] * weight[mask] * weight[mask]], + axis=0 + ) + total_weight_flat_var[mask] += weight[mask] + + # We now have a sum of the weighted resampled variances. + # Divide by the total weights, squared, and set in the output model. + # Zero weight and missing values are NaN in the output. + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "invalid value*", RuntimeWarning) + warnings.filterwarnings("ignore", "divide by zero*", RuntimeWarning) + + output_variance = (weighted_rn_var + / total_weight_rn_var / total_weight_rn_var) + setattr(output_model, "var_rnoise", output_variance) + + output_variance = (weighted_pn_var + / total_weight_pn_var / total_weight_pn_var) + setattr(output_model, "var_poisson", output_variance) + + output_variance = (weighted_flat_var + / total_weight_flat_var / total_weight_flat_var) + setattr(output_model, "var_flat", output_variance) + + def update_exposure_times(self, output_model): + """Modify exposure time metadata in-place""" + total_exposure_time = 0. + exposure_times = {'start': [], 'end': []} + duration = 0.0 + total_measurement_time = 0.0 + measurement_time_failures = [] + for exposure in self.input_models.models_grouped: + total_exposure_time += exposure[0].meta.exposure.exposure_time + if not resample_utils.check_for_tmeasure(exposure[0]): + measurement_time_failures.append(1) + else: + total_measurement_time += exposure[0].meta.exposure.measurement_time + measurement_time_failures.append(0) + exposure_times['start'].append(exposure[0].meta.exposure.start_time) + exposure_times['end'].append(exposure[0].meta.exposure.end_time) + duration += exposure[0].meta.exposure.duration + + # Update some basic exposure time values based on output_model + output_model.meta.exposure.exposure_time = total_exposure_time + if not any(measurement_time_failures): + output_model.meta.exposure.measurement_time = total_measurement_time + output_model.meta.exposure.start_time = min(exposure_times['start']) + output_model.meta.exposure.end_time = max(exposure_times['end']) + + # Update other exposure time keywords: + # XPOSURE (identical to the total effective exposure time, EFFEXPTM) + xposure = total_exposure_time + output_model.meta.exposure.effective_exposure_time = xposure + # DURATION (identical to TELAPSE, elapsed time) + output_model.meta.exposure.duration = duration + output_model.meta.exposure.elapsed_exposure_time = duration def build_nirspec_output_wcs(self, refmodel=None): """ @@ -238,28 +518,22 @@ def build_nirspec_output_wcs(self, refmodel=None): output_wcs : `~gwcs.WCS` A gwcs WCS object defining the output frame WCS. """ - with self.input_models: - all_wcs = [] - for i, model in enumerate(self.input_models): - all_wcs.append(model.meta.wcs) - self.input_models.shelve(model, i, modify=False) - if refmodel: - all_wcs.insert(0, refmodel.meta.wcs) - else: - # Use the first model with any good data as the reference model - for i, model in enumerate(self.input_models): - dq_mask = resample_utils.build_mask(model.dq, self.good_bits) - good = np.isfinite(model.data) & (model.data != 0) & dq_mask - if np.sum(good) > 100 and refmodel is None: - refmodel = model - self.input_models.shelve(model, i, modify=False) - break - self.input_models.shelve(model, i) + all_wcs = [m.meta.wcs for m in self.input_models if m is not refmodel] + if refmodel: + all_wcs.insert(0, refmodel.meta.wcs) + else: + # Use the first model with a reasonable amount of good data + # as the reference model + for model in self.input_models: + dq_mask = resample_utils.build_mask(model.dq, self.good_bits) + good = np.isfinite(model.data) & (model.data != 0) & dq_mask + if np.sum(good) > 100 and refmodel is None: + refmodel = model + break # If no good data was found, use the first model. if refmodel is None: - refmodel = self.input_models.borrow(0) - self.input_models.shelve(refmodel, 0, modify=False) + refmodel = self.input_models[0] # Make a copy of the data array for internal manipulation refmodel_data = refmodel.data.copy() @@ -523,30 +797,19 @@ def build_interpolated_output_wcs(self): # append wavelengths that fall outside the endpoint of # of wavelength array when looping over additional data + all_wavelength = [] all_ra_slit = [] all_dec_slit = [] - all_wcs = [] - spectral_axes = [] - with self.input_models: - example_model = self.input_models.borrow(0) - self.input_models.shelve(example_model, 0, modify=False) - for im, model in enumerate(self.input_models): - wcs = model.meta.wcs - spectral_axis = find_dispersion_axis(model) - self.input_models.shelve(model, im, modify=False) - all_wcs.append(wcs) - spectral_axes.append(spectral_axis) - - for im in range(len(all_wcs)): - wcs = all_wcs[im] + for im, model in enumerate(self.input_models): + wcs = model.meta.wcs bbox = wcs.bounding_box grid = wcstools.grid_from_bounding_box(bbox) ra, dec, lam = np.array(wcs(*grid)) # Handle vertical (MIRI) or horizontal (NIRSpec) dispersion. The # following 2 variables are 0 or 1, i.e. zero-indexed in x,y WCS order - spectral_axis = spectral_axes[im] + spectral_axis = find_dispersion_axis(model) spatial_axis = spectral_axis ^ 1 # Compute the wavelength array, trimming NaNs from the ends @@ -651,7 +914,7 @@ def build_interpolated_output_wcs(self): # Check if the data is MIRI LRS FIXED Slit. If it is then # the wavelength array needs to be flipped so that the resampled # dispersion direction matches the dispersion direction on the detector. - if example_model.meta.exposure.type == 'MIR_LRS-FIXEDSLIT': + if self.input_models[0].meta.exposure.type == 'MIR_LRS-FIXEDSLIT': wavelength_array = np.flip(wavelength_array, axis=None) step = 1 @@ -780,9 +1043,7 @@ def build_nirspec_lamp_output_wcs(self): output_wcs : `~gwcs.WCS` object A gwcs WCS object defining the output frame WCS. """ - with self.input_models: - model = self.input_models.borrow(0) - self.input_models.shelve(model, 0, modify=False) + model = self.input_models[0] wcs = model.meta.wcs bbox = wcs.bounding_box grid = wcstools.grid_from_bounding_box(bbox) diff --git a/jwst/resample/resample_spec_step.py b/jwst/resample/resample_spec_step.py index 1c4fb15f17..43443ced50 100755 --- a/jwst/resample/resample_spec_step.py +++ b/jwst/resample/resample_spec_step.py @@ -3,11 +3,9 @@ from stdatamodels.jwst import datamodels from stdatamodels.jwst.datamodels import MultiSlitModel, ImageModel -from jwst.datamodels import ModelContainer, ModelLibrary -from jwst.datamodels.library import container_to_library +from jwst.datamodels import ModelContainer from . import resample_spec, ResampleStep -from jwst.resample.resample import copy_asn_info_from_library -from ..exp_to_source import multislit_to_library +from ..exp_to_source import multislit_to_container from ..assign_wcs.util import update_s_region_spectral from jwst.lib.wcs_utils import get_wavelengths @@ -48,18 +46,8 @@ def process(self, input): input_new = datamodels.SlitModel(input_new) if isinstance(input_new, ModelContainer): - input_models = container_to_library(input_new) - try: - output = input_models.meta.asn_table.products[0].name - except AttributeError: - # NIRSpec MOS data goes through this path, as the container - # is only ModelContainer-like, and doesn't have an asn_table - # attribute attached. Output name handling gets done in - # _process_multislit() via the update method - # TODO: the container-like object should retain asn_table - output = None - elif isinstance(input_new, ModelLibrary): input_models = input_new + try: output = input_models.meta.asn_table.products[0].name except AttributeError: @@ -70,9 +58,9 @@ def process(self, input): # TODO: the container-like object should retain asn_table output = None else: - input_models = ModelLibrary([input_new]) + input_models = ModelContainer([input_new]) output = input_new.meta.filename - self.blendheaders = False + self.blendheaders = False # Setup drizzle-related parameters kwargs = self.get_drizpars() @@ -80,23 +68,20 @@ def process(self, input): self.drizpars = kwargs # Call resampling - with input_models: - example_model = input_models.borrow(0) - input_models.shelve(example_model, 0, modify=False) - if isinstance(example_model, MultiSlitModel): - result = self._process_multislit(input_models) + if isinstance(input_models[0], MultiSlitModel): + result = self._process_multislit(input_models) - elif len(example_model.data.shape) != 2: - # resample can only handle 2D images, not 3D cubes, etc - raise RuntimeError("Input {} is not a 2D image.".format(example_model)) + elif len(input_models[0].data.shape) != 2: + # resample can only handle 2D images, not 3D cubes, etc + raise RuntimeError("Input {} is not a 2D image.".format(input_models[0])) - else: - # result is a SlitModel - result = self._process_slit(input_models) + else: + # result is a SlitModel + result = self._process_slit(input_models) # Update ASNTABLE in output - result.meta.asn.table_name = example_model.meta.asn.table_name - result.meta.asn.pool_name = example_model.meta.asn.pool_name + result.meta.asn.table_name = input_models[0].meta.asn.table_name + result.meta.asn.pool_name = input_models[0].meta.asn.pool_name # populate the result wavelength attribute for MultiSlitModel if isinstance(result, MultiSlitModel): @@ -116,41 +101,37 @@ def _process_multislit(self, input_models): Parameters ---------- - input_models : `~jwst.datamodels.ModelLibrary` - A library of `~jwst.datamodels.MultiSlitModel` + input : `~jwst.datamodels.ModelContainer` + A container of `~jwst.datamodels.MultiSlitModel` Returns ------- result : `~jwst.datamodels.MultiSlitModel` The resampled output, one per source """ - library_ordereddict = multislit_to_library(input_models) - with input_models: - example_model = input_models.borrow(0) - result = datamodels.MultiSlitModel() - result.update(example_model) - input_models.shelve(example_model, 0, modify=False) + containers = multislit_to_container(input_models) + result = datamodels.MultiSlitModel() - pscale_ratio = None - for input_library in library_ordereddict.values(): + result.update(input_models[0]) - resamp = resample_spec.ResampleSpecData(input_library, **self.drizpars) + pscale_ratio = None + for container in containers.values(): + resamp = resample_spec.ResampleSpecData(container, **self.drizpars) drizzled_models = resamp.do_drizzle() - with drizzled_models: - for i, model in enumerate(drizzled_models): - self.update_slit_metadata(model) - update_s_region_spectral(model) - result.slits.append(model) - drizzled_models.shelve(model, i, modify=False) + + for model in drizzled_models: + self.update_slit_metadata(model) + update_s_region_spectral(model) + result.slits.append(model) # Keep the first computed pixel scale ratio for storage if self.pixel_scale is not None and pscale_ratio is None: pscale_ratio = resamp.pscale_ratio result.meta.cal_step.resample = "COMPLETE" - # copy over asn information - copy_asn_info_from_library(input_models, result) + result.meta.asn.pool_name = input_models.asn_pool_name + result.meta.asn.table_name = input_models.asn_table_name if self.pixel_scale is None or pscale_ratio is None: result.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio else: @@ -179,18 +160,18 @@ def _process_slit(self, input_models): drizzled_models = resamp.do_drizzle() - with drizzled_models: - result = drizzled_models.borrow(0) - drizzled_models.shelve(result, 0, modify=False) - result.meta.cal_step.resample = "COMPLETE" - copy_asn_info_from_library(input_models, result) - if self.pixel_scale is None: - result.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio - else: - result.meta.resample.pixel_scale_ratio = resamp.pscale_ratio - result.meta.resample.pixfrac = self.pixfrac - self.update_slit_metadata(result) - update_s_region_spectral(result) + result = drizzled_models[0] + result.meta.cal_step.resample = "COMPLETE" + result.meta.asn.pool_name = input_models.asn_pool_name + result.meta.asn.table_name = input_models.asn_table_name + result.meta.bunit_data = drizzled_models[0].meta.bunit_data + if self.pixel_scale is None: + result.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio + else: + result.meta.resample.pixel_scale_ratio = resamp.pscale_ratio + result.meta.resample.pixfrac = self.pixfrac + self.update_slit_metadata(result) + update_s_region_spectral(result) return result @@ -212,4 +193,4 @@ def update_slit_metadata(self, model): pass else: if val is not None: - setattr(model, attr, val) + setattr(model, attr, val) \ No newline at end of file diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 415645f6db..0fddbeca96 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -5,7 +5,6 @@ import asdf from jwst.datamodels import ModelContainer, ModelLibrary -from jwst.datamodels.library import container_to_library, library_to_container from . import resample from ..stpipe import Step @@ -62,8 +61,10 @@ def process(self, input): if isinstance(input, ModelLibrary): input_models = input - elif isinstance(input, ModelContainer): - input_models = container_to_library(input) + elif isinstance(input, (str, dict)): + input_models = ModelLibrary(input, on_disk=~self.in_memory) + elif isinstance(input, (ModelContainer, list)): + input_models = ModelLibrary(input, on_disk=False) #cannot instantiate on disk for data already in memory if isinstance(input, ModelLibrary): try: @@ -74,7 +75,7 @@ def process(self, input): # TODO: figure out why and make sure asn_table is carried along output = None else: - input_models = ModelLibrary([input]) + input_models = ModelLibrary([input], on_disk=False) #single model will not benefit from on_disk input_models.asn_pool_name = input.meta.asn.pool_name input_models.asn_table_name = input.meta.asn.table_name output = input.meta.filename @@ -118,7 +119,7 @@ def process(self, input): result.shelve(model, 0, modify=False) return model - return library_to_container(result) + return result @staticmethod def _check_list_pars(vals, name, min_vals=None): diff --git a/jwst/resample/tests/test_resample_step.py b/jwst/resample/tests/test_resample_step.py index 2d5c5e0c29..cdb0f1b379 100644 --- a/jwst/resample/tests/test_resample_step.py +++ b/jwst/resample/tests/test_resample_step.py @@ -630,7 +630,7 @@ def test_sip_coeffs_do_not_propagate(nircam_rate): def test_build_interpolated_output_wcs(miri_rate_pair): im1, im2 = miri_rate_pair - driz = ResampleSpecData(ModelLibrary([im1, im2])) + driz = ResampleSpecData(ModelContainer([im1, im2])) output_wcs = driz.build_interpolated_output_wcs() # Make sure that all RA, Dec values in the input image have a location in From b4a7ce1eda3840a2f8daa6e56b2e71322e1a4cbb Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 30 Jul 2024 10:11:32 -0400 Subject: [PATCH 10/85] fixing problems from merge of outlier detection changes --- jwst/resample/resample.py | 58 +++++++++++++++------------------ jwst/resample/resample_spec.py | 32 +++++++++--------- jwst/resample/resample_utils.py | 19 +++++++---- 3 files changed, 56 insertions(+), 53 deletions(-) diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index 34521314ba..c3dd4273d6 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -121,21 +121,17 @@ def __init__(self, input_models, output=None, single=False, blendheaders=True, output_pix_area = output_wcs.pixel_area else: - with self.input_models: - models = list(self.input_models) - # Define output WCS based on all inputs, including a reference WCS: - self.output_wcs = resample_utils.make_output_wcs( - models, - ref_wcs=output_wcs, - pscale_ratio=self.pscale_ratio, - pscale=pscale, - rotation=rotation, - shape=None if output_shape is None else output_shape[::-1], - crpix=crpix, - crval=crval - ) - for i, m in enumerate(models): - self.input_models.shelve(m, i, modify=False) + # Define output WCS based on all inputs, including a reference WCS: + self.output_wcs = resample_utils.make_output_wcs( + input_models, + ref_wcs=output_wcs, + pscale_ratio=self.pscale_ratio, + pscale=pscale, + rotation=rotation, + shape=None if output_shape is None else output_shape[::-1], + crpix=crpix, + crval=crval + ) # Estimate output pixel area in Sr. NOTE: in principle we could # use the same algorithm as for when output_wcs is provided by the @@ -183,9 +179,9 @@ def __init__(self, input_models, output=None, single=False, blendheaders=True, self.blank_output = datamodels.ImageModel(tuple(self.output_wcs.array_shape)) # update meta data and wcs - with self.input_models: - example_model = self.input_models.borrow(0) - self.input_models.shelve(example_model, 0, modify=False) + with input_models: + example_model = input_models.borrow(0) + input_models.shelve(example_model, 0, modify=False) self.blank_output.update(example_model) self.blank_output.meta.wcs = self.output_wcs self.blank_output.meta.photometry.pixelarea_steradians = output_pix_area @@ -284,10 +280,10 @@ def resample_many_to_many(self, input_models): for group_id, indices in input_models.group_indices.items(): output_model = self.blank_output - copy_asn_info_from_library(self.input_models, output_model) + copy_asn_info_from_library(input_models, output_model) - with self.input_models: - example_image = self.input_models.borrow(indices[0]) + with input_models: + example_image = input_models.borrow(indices[0]) # Determine output file type from input exposure filenames # Use this for defining the output filename @@ -306,7 +302,7 @@ def resample_many_to_many(self, input_models): log.info(f"{len(indices)} exposures to drizzle together") for index in indices: - img = self.input_models.borrow(index) + img = input_models.borrow(index) iscale = self._get_intensity_scale(img) log.debug(f'Using intensity scale iscale={iscale}') @@ -339,7 +335,7 @@ def resample_many_to_many(self, input_models): ymax=ymax ) del data - self.input_models.shelve(img, index, modify=False) + input_models.shelve(img, index, modify=False) if not self.in_memory: # FIXME: Is this needed anymore with ModelLibrary? @@ -357,7 +353,7 @@ def resample_many_to_many(self, input_models): return ModelLibrary(output_models) - def resample_many_to_one(self): + def resample_many_to_one(self, input_models): """Resample and coadd many inputs to a single output. Used for stage 3 resampling @@ -365,21 +361,21 @@ def resample_many_to_one(self): output_model = self.blank_output.copy() output_model.meta.filename = self.output_filename output_model.meta.resample.weight_type = self.weight_type - output_model.meta.resample.pointings = len(self.input_models.group_names) + output_model.meta.resample.pointings = len(input_models.group_names) if self.blendheaders: self.blend_output_metadata(output_model) # copy over asn information - copy_asn_info_from_library(self.input_models, output_model) + copy_asn_info_from_library(input_models, output_model) # Initialize the output with the wcs driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, kernel=self.kernel, fillval=self.fillval) log.info("Resampling science data") - with self.input_models: - for img in self.input_models: + with input_models: + for img in input_models: iscale = self._get_intensity_scale(img) log.debug(f'Using intensity scale iscale={iscale}') img.meta.iscale = iscale @@ -410,7 +406,7 @@ def resample_many_to_one(self): ymax=ymax ) del data, inwht - self.input_models.shelve(img, modify=False) + input_models.shelve(img, modify=False) # Resample variance arrays in input_models to output_model self.resample_variance_arrays(output_model, input_models) @@ -426,7 +422,7 @@ def resample_many_to_one(self): all_nan = np.all(np.isnan(var_components), axis=0) output_model.err[all_nan] = np.nan - self.update_exposure_times(output_model) + self.update_exposure_times(output_model, input_models) return ModelLibrary([output_model]) @@ -508,7 +504,7 @@ def resample_variance_arrays(self, output_model, input_models): total_weight_flat_var[mask] += weight[mask] del model.meta.iscale - self.input_models.shelve(model, i, modify=False) + input_models.shelve(model, i, modify=False) # We now have a sum of the weighted resampled variances. # Divide by the total weights, squared, and set in the output model. diff --git a/jwst/resample/resample_spec.py b/jwst/resample/resample_spec.py index 1e560bdf1b..09913d18d4 100644 --- a/jwst/resample/resample_spec.py +++ b/jwst/resample/resample_spec.py @@ -198,13 +198,13 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, self.output_models = ModelContainer() - def do_drizzle(self): + def do_drizzle(self, input_models): """Pick the correct drizzling mode based on self.single """ if self.single: - return self.resample_many_to_many() + return self.resample_many_to_many(input_models) else: - return self.resample_many_to_one() + return self.resample_many_to_one(input_models) def resample_many_to_many(self): """Resample many inputs to many outputs where outputs have a common frame. @@ -215,7 +215,7 @@ def resample_many_to_many(self): Used for outlier detection """ - for exposure in self.input_models.models_grouped: + for exposure in input_models.models_grouped: output_model = self.blank_output # Determine output file type from input exposure filenames # Use this for defining the output filename @@ -284,7 +284,7 @@ def resample_many_to_many(self): return self.output_models - def resample_many_to_one(self): + def resample_many_to_one(self, input_models): """Resample and coadd many inputs to a single output. Used for stage 3 resampling @@ -292,7 +292,7 @@ def resample_many_to_one(self): output_model = self.blank_output.copy() output_model.meta.filename = self.output_filename output_model.meta.resample.weight_type = self.weight_type - output_model.meta.resample.pointings = len(self.input_models.group_names) + output_model.meta.resample.pointings = len(input_models.group_names) if self.blendheaders: self.blend_output_metadata(output_model) @@ -302,7 +302,7 @@ def resample_many_to_one(self): kernel=self.kernel, fillval=self.fillval) log.info("Resampling science data") - for img in self.input_models: + for img in input_models: iscale = self._get_intensity_scale(img) log.debug(f'Using intensity scale iscale={iscale}') img.meta.iscale = iscale @@ -334,8 +334,8 @@ def resample_many_to_one(self): ) del data, inwht - # Resample variance arrays in self.input_models to output_model - self.resample_variance_arrays(output_model) + # Resample variance arrays in input_models to output_model + self.resample_variance_arrays(output_model, input_models) var_components = [ output_model.var_rnoise, output_model.var_poisson, @@ -348,15 +348,15 @@ def resample_many_to_one(self): all_nan = np.all(np.isnan(var_components), axis=0) output_model.err[all_nan] = np.nan - self.update_exposure_times(output_model) + self.update_exposure_times(output_model, input_models) self.output_models.append(output_model) - for img in self.input_models: + for img in input_models: del img.meta.iscale return self.output_models - def resample_variance_arrays(self, output_model): + def resample_variance_arrays(self, output_model, input_models): """Resample variance arrays from self.input_models to the output_model. Variance images from each input model are resampled individually and @@ -373,7 +373,7 @@ def resample_variance_arrays(self, output_model): total_weight_rn_var = np.zeros_like(output_model.data) total_weight_pn_var = np.zeros_like(output_model.data) total_weight_flat_var = np.zeros_like(output_model.data) - for model in self.input_models: + for model in input_models: # Do the read noise variance first, so it can be # used for weights if needed rn_var = self._resample_one_variance_array( @@ -450,14 +450,14 @@ def resample_variance_arrays(self, output_model): / total_weight_flat_var / total_weight_flat_var) setattr(output_model, "var_flat", output_variance) - def update_exposure_times(self, output_model): + def update_exposure_times(self, output_model, input_models): """Modify exposure time metadata in-place""" total_exposure_time = 0. exposure_times = {'start': [], 'end': []} duration = 0.0 total_measurement_time = 0.0 measurement_time_failures = [] - for exposure in self.input_models.models_grouped: + for exposure in input_models.models_grouped: total_exposure_time += exposure[0].meta.exposure.exposure_time if not resample_utils.check_for_tmeasure(exposure[0]): measurement_time_failures.append(1) @@ -483,7 +483,7 @@ def update_exposure_times(self, output_model): output_model.meta.exposure.duration = duration output_model.meta.exposure.elapsed_exposure_time = duration - def build_nirspec_output_wcs(self, refmodel=None): + def build_nirspec_output_wcs(self, input_models, refmodel=None): """ Create a spatial/spectral WCS covering the footprint of the input. diff --git a/jwst/resample/resample_utils.py b/jwst/resample/resample_utils.py index 0ba20c62c9..3545d30a1d 100644 --- a/jwst/resample/resample_utils.py +++ b/jwst/resample/resample_utils.py @@ -27,7 +27,7 @@ def make_output_wcs(input_models, ref_wcs=None, Parameters ---------- - input_models : list of `~jwst.datamodel.JwstDataModel` + input_models : `~jwst.datamodel.ModelLibrary` Each datamodel must have a ~gwcs.WCS object. pscale_ratio : float, optional @@ -67,10 +67,16 @@ def make_output_wcs(input_models, ref_wcs=None, WCS object, with defined domain, covering entire set of input frames """ if ref_wcs is None: - wcslist = [i.meta.wcs for i in input_models] - for w, i in zip(wcslist, input_models): - if w.bounding_box is None: - w.bounding_box = wcs_bbox_from_shape(i.data.shape) + with input_models: + wcslist = [] + for i, model in enumerate(input_models): + w = model.meta.wcs + if w.bounding_box is None: + w.bounding_box = wcs_bbox_from_shape(model.data.shape) + wcslist.append(w) + if i == 0: + example_model = model + input_models.shelve(model, modify=False) naxes = wcslist[0].output_frame.naxes if naxes != 2: @@ -81,7 +87,7 @@ def make_output_wcs(input_models, ref_wcs=None, output_wcs = util.wcs_from_footprints( wcslist, ref_wcs=wcslist[0], - ref_wcsinfo=input_models[0].meta.wcsinfo.instance, + ref_wcsinfo=example_model.meta.wcsinfo.instance, pscale_ratio=pscale_ratio, pscale=pscale, rotation=rotation, @@ -89,6 +95,7 @@ def make_output_wcs(input_models, ref_wcs=None, crpix=crpix, crval=crval ) + del example_model else: naxes = ref_wcs.output_frame.naxes From 36c2b401d7511a5501dfbd88372c9e9f1123c089 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 30 Jul 2024 14:06:32 -0400 Subject: [PATCH 11/85] outlier detection to ModelLibrary for imaging modes --- jwst/outlier_detection/imaging.py | 47 ++++-- .../outlier_detection_step.py | 70 ++++---- .../tests/test_outlier_detection.py | 158 +++++++++++++----- jwst/outlier_detection/utils.py | 134 ++++++++++++++- jwst/resample/resample.py | 2 +- 5 files changed, 321 insertions(+), 90 deletions(-) diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py index c6ba99da13..295123d494 100644 --- a/jwst/outlier_detection/imaging.py +++ b/jwst/outlier_detection/imaging.py @@ -8,12 +8,12 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelLibrary from jwst.resample import resample from jwst.resample.resample_utils import build_driz_weight from jwst.stpipe.utilities import record_step_status -from .utils import create_median, flag_crs_in_models, flag_crs_in_models_with_resampling +from .utils import create_median_library, flag_crs_in_models_library, flag_crs_in_models_with_resampling_library from ._fileio import remove_file, save_median log = logging.getLogger(__name__) @@ -46,10 +46,13 @@ def detect_outliers( """ Flag outliers in imaging data. + input_models is expected to be a ModelLibrary + See `OutlierDetectionStep.spec` for documentation of these arguments. """ - if not isinstance(input_models, ModelContainer): - input_models = ModelContainer(input_models, save_open=in_memory) + if not isinstance(input_models, ModelLibrary): + on_disk = not in_memory + input_models = ModelLibrary(input_models, on_disk=on_disk) if len(input_models) < 2: log.warning(f"Input only contains {len(input_models)} exposures") @@ -60,8 +63,11 @@ def detect_outliers( if resample_data: # Start by creating resampled/mosaic images for # each group of exposures - output_path = make_output_path(basepath=input_models[0].meta.filename, - suffix='') + with input_models: + example_model = input_models.borrow(0) + output_path = make_output_path(basepath=example_model.meta.filename, + suffix='i2d') + input_models.shelve(example_model, modify=False) output_path = os.path.dirname(output_path) resamp = resample.ResampleData( input_models, @@ -82,23 +88,30 @@ def detect_outliers( else: # for non-dithered data, the resampled image is just the original image drizzled_models = input_models - for i in range(len(input_models)): - drizzled_models[i].wht = build_driz_weight( - input_models[i], - weight_type=weight_type, - good_bits=good_bits) - # copy for when saving median and input is a filename? - median_wcs = copy.deepcopy(input_models[0].meta.wcs) + with input_models: + for i, model in enumerate(input_models): + model.wht = build_driz_weight( + model, + weight_type=weight_type, + good_bits=good_bits) + # copy for when saving median and input is a filename? + if i == 0: + median_wcs = copy.deepcopy(model.meta.wcs) # Perform median combination on set of drizzled mosaics - median_data = create_median(drizzled_models, maskpt) + on_disk = not in_memory + median_data = create_median_library(drizzled_models, maskpt, on_disk=on_disk) if save_intermediate_results: # make a median model - with datamodels.open(drizzled_models[0]) as dm0: + with drizzled_models: + example_model = drizzled_models.borrow(0) + drizzled_models.shelve(example_model, modify=False) + with datamodels.open(example_model) as dm0: median_model = datamodels.ImageModel(median_data) median_model.update(dm0) median_model.meta.wcs = median_wcs + del example_model save_median(median_model, make_output_path, asn_id) del median_model @@ -112,7 +125,7 @@ def detect_outliers( # Perform outlier detection using statistical comparisons between # each original input image and its blotted version of the median image if resample_data: - flag_crs_in_models_with_resampling( + flag_crs_in_models_with_resampling_library( input_models, median_data, median_wcs, @@ -123,5 +136,5 @@ def detect_outliers( backg, ) else: - flag_crs_in_models(input_models, median_data, snr1) + flag_crs_in_models_library(input_models, median_data, snr1) return input_models diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py index 936ec3767b..790fd7b461 100644 --- a/jwst/outlier_detection/outlier_detection_step.py +++ b/jwst/outlier_detection/outlier_detection_step.py @@ -3,7 +3,7 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelContainer, ModelLibrary from jwst.stpipe import Step from jwst.stpipe.utilities import record_step_status from jwst.lib.pipe_utils import is_tso @@ -39,9 +39,10 @@ class OutlierDetectionStep(Step): Parameters ----------- - input_data : asn file or ~jwst.datamodels.ModelContainer - Single filename association table, or a datamodels.ModelContainer. - + input_data : asn file, ~jwst.datamodels.ModelContainer, or ~jwst.datamodels.ModelLibrary + Single filename association table, datamodels.ModelContainer, or datamodels.ModelLibrary. + For imaging modes a ModelLibrary is expected, whereas for spectroscopic modes a + ModelContainer is expected. """ class_alias = "outlier_detection" @@ -159,9 +160,11 @@ def process(self, input_data): else: self.log.error("Outlier detection failed for unknown/unsupported ", f"mode: {mode}") - return self._set_status(input_data, False) + record_step_status(input_data, "outlier_detection", False) + return input_data - return self._set_status(result_models, True) + record_step_status(result_models, "outlier_detection", True) + return result_models def _guess_mode(self, input_models): # The pipelines should set this mode or ideally these should @@ -170,13 +173,17 @@ def _guess_mode(self, input_models): return self.mode # guess mode from input type - if not isinstance(input_models, datamodels.JwstDataModel): + if isinstance(input_models, (str, dict)): input_models = datamodels.open(input_models, asn_n_members=1) # Select which version of OutlierDetection # needs to be used depending on the input data if isinstance(input_models, ModelContainer): single_model = input_models[0] + elif isinstance(input_models, ModelLibrary): + with input_models: + single_model = input_models.borrow(0) + input_models.shelve(single_model, modify=False) else: single_model = input_models @@ -199,32 +206,39 @@ def _guess_mode(self, input_models): def _get_asn_id(self, input_models): # handle if input_models isn't open - if not isinstance(input_models, datamodels.JwstDataModel): + if isinstance(input_models, (str, dict)): input_models = datamodels.open(input_models, asn_n_members=1) # Setup output path naming if associations are involved. - asn_id = None - try: - asn_id = input_models.meta.asn_table.asn_id - except (AttributeError, KeyError): - pass + if isinstance(input_models, ModelLibrary): + asn_id = self._get_asn_id_library(input_models) + else: + asn_id = None + try: + asn_id = input_models.meta.asn_table.asn_id + except (AttributeError, KeyError): + pass + return asn_id + if asn_id is None: asn_id = self.search_attr('asn_id') - if asn_id is not None: - _make_output_path = self.search_attr( - '_make_output_path', parent_first=True - ) - self._make_output_path = partial( - _make_output_path, - asn_id=asn_id - ) - return asn_id + _make_output_path = self.search_attr( + '_make_output_path', parent_first=True + ) - def _set_status(self, input_models, status): - # this might be called with the input which might be a filename or path - if not isinstance(input_models, datamodels.JwstDataModel): - input_models = datamodels.open(input_models) + self._make_output_path = partial( + _make_output_path, + asn_id=asn_id, + suffix="i2d" + ) + return asn_id + + def _get_asn_id_library(self, input_models): - record_step_status(input_models, "outlier_detection", status) - return input_models + asn_id = None + try: + asn_id = input_models.asn.table_name + except (AttributeError, KeyError): + pass + return asn_id diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index 622fc791ff..43c57af2fa 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -6,7 +6,7 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelContainer, ModelLibrary from jwst.outlier_detection import OutlierDetectionStep from jwst.outlier_detection.utils import flag_resampled_model_crs from jwst.outlier_detection.outlier_detection_step import ( @@ -195,7 +195,7 @@ def test_outlier_step_no_outliers(we_three_sci, tmp_cwd): """Test whole step, no outliers""" container = ModelContainer(list(we_three_sci)) pristine = ModelContainer([m.copy() for m in container]) - OutlierDetectionStep.call(container) + OutlierDetectionStep.call(container, in_memory=True) # Make sure nothing changed in SCI and DQ arrays for image, uncorrected in zip(pristine, container): @@ -203,34 +203,51 @@ def test_outlier_step_no_outliers(we_three_sci, tmp_cwd): np.testing.assert_allclose(image.dq, uncorrected.dq) -def test_outlier_step(we_three_sci, tmp_cwd): +def test_outlier_step_base(we_three_sci, tmp_cwd): """Test whole step with an outlier including saving intermediate and results files""" - container = ModelContainer(list(we_three_sci)) + container = ModelLibrary(list(we_three_sci)) # Drop a CR on the science array - container[0].data[12, 12] += 1 + with container: + zeroth = container.borrow(0) + zeroth.data[12, 12] += 1 + container.shelve(zeroth) # Verify that intermediary files are removed - OutlierDetectionStep.call(container) + OutlierDetectionStep.call(container, in_memory=True) i2d_files = glob(os.path.join(tmp_cwd, '*i2d.fits')) median_files = glob(os.path.join(tmp_cwd, '*median.fits')) assert len(i2d_files) == 0 assert len(median_files) == 0 + # Save all the data into a separate array before passing into step + data_as_cube = [] + with container: + for model in container: + data_as_cube.append(model.data) + container.shelve(model, modify=False) + result = OutlierDetectionStep.call( - container, save_results=True, save_intermediate_results=True + container, save_results=True, save_intermediate_results=True, in_memory=True ) # Make sure nothing changed in SCI array - for image, corrected in zip(container, result): - np.testing.assert_allclose(image.data, corrected.data) + with result: + for i, corrected in enumerate(result): + np.testing.assert_allclose(data_as_cube[i], corrected.data) + result.shelve(corrected, modify=False) # Verify source is not flagged - for r in result: - assert r.dq[7, 7] == datamodels.dqflags.pixel["GOOD"] + with result: + for r in result: + assert r.dq[7, 7] == datamodels.dqflags.pixel["GOOD"] + result.shelve(r, modify=False) # Verify CR is flagged - assert result[0].dq[12, 12] == OUTLIER_DO_NOT_USE + with result: + zeroth = result.borrow(0) + assert zeroth.dq[12, 12] == OUTLIER_DO_NOT_USE + result.shelve(zeroth, modify=False) # Verify that intermediary files are saved at the specified location i2d_files = glob(os.path.join(tmp_cwd, '*i2d.fits')) @@ -251,47 +268,87 @@ def test_outlier_step_on_disk(we_three_sci, tmp_cwd): dm0.write(dm0.meta.filename) # Initialize inputs for the test based on filenames only - container = ModelContainer(filenames) + # needs to be an asn for ModelLibrary to load it in on_disk mode + asn = { + 'asn_type': 'test', + 'asn_id': 'o001', + 'products': [ + { + 'name': 'product_a', + 'members': [ + {'expname': filenames[0], 'exptype': 'science'}, + {'expname': filenames[1], 'exptype': 'science'}, + {'expname': filenames[2], 'exptype': 'science'}, + ] + }, + ] +} + container = ModelLibrary(asn, on_disk=True) + + # Save all the data into a separate array before passing into step + data_as_cube = [] + with container: + for model in container: + data_as_cube.append(model.data) + container.shelve(model, modify=False) result = OutlierDetectionStep.call( - container, save_results=True, save_intermediate_results=True + container, save_results=True, save_intermediate_results=True, in_memory=False ) # Make sure nothing changed in SCI array - for image, corrected in zip(container, result): - np.testing.assert_allclose(image.data, corrected.data) + with result: + for i, corrected in enumerate(result): + np.testing.assert_allclose(data_as_cube[i], corrected.data) + result.shelve(corrected, modify=False) # Verify source is not flagged - for r in result: - assert r.dq[7, 7] == datamodels.dqflags.pixel["GOOD"] + with result: + for r in result: + assert r.dq[7, 7] == datamodels.dqflags.pixel["GOOD"] + result.shelve(r, modify=False) # Verify CR is flagged - assert result[0].dq[12, 12] == OUTLIER_DO_NOT_USE + with result: + zeroth = result.borrow(0) + assert zeroth.dq[12, 12] == OUTLIER_DO_NOT_USE + result.shelve(zeroth, modify=False) def test_outlier_step_square_source_no_outliers(we_three_sci, tmp_cwd): """Test whole step with square source with sharp edges, no outliers""" - container = ModelContainer(list(we_three_sci)) + container = ModelLibrary(list(we_three_sci)) # put a square source in all three exposures - for ccont in container: - ccont.data[5:15, 5:15] += 1e3 - - pristine = container.copy() - result = OutlierDetectionStep.call(container) + with container: + for ccont in container: + ccont.data[5:15, 5:15] += 1e3 + container.shelve(ccont) + + # Save all the data into a separate array before passing into step + data_as_cube = [] + dq_as_cube = [] + with container: + for model in container: + data_as_cube.append(model.data) + dq_as_cube.append(model.dq) + container.shelve(model, modify=False) + + result = OutlierDetectionStep.call(container, in_memory=True) # Make sure nothing changed in SCI and DQ arrays - for image, uncorrected in zip(pristine, container): - np.testing.assert_allclose(image.data, uncorrected.data) - np.testing.assert_allclose(image.dq, uncorrected.dq) + with container: + for i, image in enumerate(container): + np.testing.assert_allclose(image.data, data_as_cube[i]) + np.testing.assert_allclose(image.dq, dq_as_cube[i]) + container.shelve(image, modify=False) # Make sure nothing changed in SCI and DQ arrays - for image, corrected in zip(container, result): - np.testing.assert_allclose(image.data, corrected.data) - np.testing.assert_allclose(image.dq, corrected.dq) - - container.close() - pristine.close() + with result: + for i, corrected in enumerate(result): + np.testing.assert_allclose(data_as_cube[i], corrected.data) + np.testing.assert_allclose(dq_as_cube[i], corrected.dq) + result.shelve(corrected, modify=False) @pytest.mark.parametrize("exptype", IMAGE_MODES) @@ -299,26 +356,43 @@ def test_outlier_step_image_weak_CR_dither(exptype, tmp_cwd): """Test whole step with an outlier for imaging modes""" bkg = 1.5 sig = 0.02 - container = ModelContainer( + container = ModelLibrary( we_many_sci(background=bkg, sigma=sig, signal=7.0, exptype=exptype) ) # Drop a weak CR on the science array # no noise so it should always be above the default threshold of 5 - container[0].data[12, 12] = bkg + sig * 10 + with container: + zeroth = container.borrow(0) + zeroth.data[12, 12] = bkg + sig * 10 + container.shelve(zeroth) + + # Save all the data into a separate array before passing into step + data_as_cube = [] + with container: + for model in container: + data_as_cube.append(model.data) + container.shelve(model, modify=False) - result = OutlierDetectionStep.call(container) + result = OutlierDetectionStep.call(container, in_memory=True) # Make sure nothing changed in SCI array - for image, corrected in zip(container, result): - np.testing.assert_allclose(image.data, corrected.data) + with result: + for i, corrected in enumerate(result): + np.testing.assert_allclose(data_as_cube[i], corrected.data) + result.shelve(corrected, modify=False) # Verify source is not flagged - for r in result: - assert r.dq[7, 7] == datamodels.dqflags.pixel["GOOD"] + with result: + for r in result: + assert r.dq[7, 7] == datamodels.dqflags.pixel["GOOD"] + result.shelve(r, modify=False) # Verify CR is flagged - assert result[0].dq[12, 12] == OUTLIER_DO_NOT_USE + with result: + example = result.borrow(0) + assert example.dq[12, 12] == OUTLIER_DO_NOT_USE + result.shelve(example, modify=False) @pytest.mark.parametrize("exptype, tsovisit", exptypes_coron) diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index f8c6cc1333..0d4258a2c9 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -16,6 +16,7 @@ DO_NOT_USE = datamodels.dqflags.pixel['DO_NOT_USE'] OUTLIER = datamodels.dqflags.pixel['OUTLIER'] +_ONE_MB = 1 << 20 def create_cube_median(cube_model, maskpt): @@ -30,8 +31,98 @@ def create_cube_median(cube_model, maskpt): return median +def create_median_library(resampled_models, maskpt, on_disk=True, buffer_size=1.0): + """Create a median image from the singly resampled images. + resampled_models is expected to be a ModelLibrary for imaging modes. + """ + # Compute the weight threshold for each input model + weight_thresholds = [] + with resampled_models: + for resampled in resampled_models: + weight = resampled.wht + weight_threshold = compute_weight_threshold(weight, maskpt) + weight_thresholds.append(weight_threshold) + # close and delete the model, just to explicitly try to keep the memory as clean as possible + resampled_models.shelve(resampled, modify=False) + + # compute median over all models + if not on_disk: + # easier case: all models in library can be loaded into memory at once + model_list = [] + with resampled_models: + for resampled in resampled_models: + model_list.append(resampled.data) + resampled_models.shelve(resampled, modify=False) + return np.nanmedian(np.array(model_list), axis=0) + else: + # set up buffered access to all input models + with resampled_models: + example_model = resampled_models.borrow(0) + shp = example_model.data.shape + dtype = example_model.data.dtype + nsections, section_nrows = _compute_buffer_indices(example_model, buffer_size) + resampled_models.shelve(example_model, modify=False) + + # get spatial sections of library and compute timewise median, one by one + resampled_sections = _get_sections_library(resampled_models, nsections, section_nrows, example_model.data.shape[0]) + median_image_empty = np.empty(shp, dtype) * np.nan + return _create_median(resampled_sections, resampled_models, weight_thresholds, median_image_empty) + + +def _get_sections_library(library, nsections, section_nrows, imrows): + """Iterator to return sections from a ModelLibrary. + + Parameters + ---------- + library : ModelLibrary + The input data models. + + nsections : int + The number of spatial sections in each model + + section_nrows : int + The number of rows in each section + + imrows : int + The total number of rows in the image + """ + with library: + example_model = library.borrow(0) + library.shelve(example_model, 0, modify=False) + for i in range(nsections): + row1 = i * section_nrows + row2 = min(row1 + section_nrows, imrows) + + data_list = np.empty((len(library), row2 - row1, example_model.data.shape[1]), example_model.data.dtype) + weight_list = np.empty((len(library), row2 - row1, example_model.data.shape[1]), example_model.wht.dtype) + with library: + for j, model in enumerate(library): + data_list[j] = model.data[row1:row2] + weight_list[j] = model.wht[row1:row2] + library.shelve(model, j, modify=False) + yield (data_list, weight_list, (row1, row2)) + + +def _compute_buffer_indices(model, buffer_size=None): + + imrows, imcols = model.data.shape + data_item_size = model.data.itemsize + #data_item_type = model.data.dtype + min_buffer_size = imcols * data_item_size + buffer_size = min_buffer_size if buffer_size is None else (buffer_size * _ONE_MB) + section_nrows = min(imrows, int(buffer_size // min_buffer_size)) + if section_nrows == 0: + buffer_size = min_buffer_size + log.warning("WARNING: Buffer size is too small to hold a single row." + f"Increasing buffer size to {buffer_size / _ONE_MB}MB") + section_nrows = 1 + nsections = int(np.ceil(imrows / section_nrows)) + return nsections, section_nrows + + def create_median(resampled_models, maskpt): """Create a median image from the singly resampled images. + Expects a ModelContainer, e.g. for spectroscopic modes """ log.info("Computing median") @@ -40,10 +131,14 @@ def create_median(resampled_models, maskpt): # Now, set up buffered access to all input models resampled_models.set_buffer(1.0) # Set buffer at 1Mb resampled_sections = resampled_models.get_sections() - median_image = np.empty((resampled_models.imrows, resampled_models.imcols), + median_image_empty = np.empty((resampled_models.imrows, resampled_models.imcols), resampled_models.imtype) - median_image[:] = np.nan # initialize with NaNs + median_image_empty[:] = np.nan # initialize with NaNs + return _create_median(resampled_sections, resampled_models, weight_thresholds, median_image_empty) + +def _create_median(resampled_sections, resampled_models, weight_thresholds, median_image_empty): + median_image = median_image_empty for (resampled_sci, resampled_weight, (row1, row2)) in resampled_sections: # Create a mask for each input image, masking out areas where there is # no data or the data has very low weight @@ -118,6 +213,16 @@ def flag_crs_in_models( # dq flags will be updated in-place flag_model_crs(image, median_data, snr1) +def flag_crs_in_models_library( + input_models, + median_data, + snr1, +): + with input_models: + for image in input_models: + # dq flags will be updated in-place + flag_model_crs(image, median_data, snr1) + input_models.shelve(image) def flag_crs_in_models_with_resampling( input_models, @@ -143,6 +248,31 @@ def flag_crs_in_models_with_resampling( # dq flags will be updated in-place flag_resampled_model_crs(image, blot, snr1, snr2, scale1, scale2, backg) +def flag_crs_in_models_with_resampling_library( + input_models, + median_data, + median_wcs, + snr1, + snr2, + scale1, + scale2, + backg, +): + with input_models: + for image in input_models: + if 'SPECTRAL' not in image.meta.wcs.output_frame.axes_type: + input_pixflux_area = image.meta.photometry.pixelarea_steradians + # Set array shape, needed to compute image pixel area + image.meta.wcs.array_shape = image.shape + input_pixel_area = compute_image_pixel_area(image.meta.wcs) + pix_ratio = np.sqrt(input_pixflux_area / input_pixel_area) + else: + pix_ratio = 1.0 + + blot = gwcs_blot(median_data, median_wcs, image.data.shape, image.meta.wcs, pix_ratio) + # dq flags will be updated in-place + flag_resampled_model_crs(image, blot, snr1, snr2, scale1, scale2, backg) + input_models.shelve(image) def flag_resampled_model_crs( image, diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index c3dd4273d6..d4368304af 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -295,6 +295,7 @@ def resample_many_to_many(self, input_models): output_model.meta.filename = f'{output_root}_{self.asn_id}_outlier_i2d{output_type}' else: output_model.meta.filename = f'{output_root}_outlier_i2d{output_type}' + input_models.shelve(example_image, indices[0], modify=False) # Initialize the output with the wcs driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, @@ -846,7 +847,6 @@ def compute_image_pixel_area(wcs): spatial_idx = np.where(np.array(wcs.output_frame.axes_type) == 'SPATIAL')[0] ny, nx = wcs.array_shape - ((xmin, xmax), (ymin, ymax)) = wcs.bounding_box xmin = max(0, int(xmin + 0.5)) From c78841aaa95837f4277b160be187631512934719 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 30 Jul 2024 14:46:32 -0400 Subject: [PATCH 12/85] small fix to resample_spec --- jwst/resample/resample_spec.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jwst/resample/resample_spec.py b/jwst/resample/resample_spec.py index 09913d18d4..2d8d92be4e 100644 --- a/jwst/resample/resample_spec.py +++ b/jwst/resample/resample_spec.py @@ -206,7 +206,7 @@ def do_drizzle(self, input_models): else: return self.resample_many_to_one(input_models) - def resample_many_to_many(self): + def resample_many_to_many(self, input_models): """Resample many inputs to many outputs where outputs have a common frame. Coadd only different detectors of the same exposure, i.e. map NRCA5 and diff --git a/pyproject.toml b/pyproject.toml index 460dcd6f15..f6c6912686 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "spherical-geometry>=1.2.22", # "stcal>=1.7.3,<1.8.0", # FIXME: switch to main (and then released) when stcal is updated - "stcal @ git+https://github.com/braingram/stcal.git@outlier_detection", + "stcal @ git+https://github.com/braingram/stcal.git@main", "stdatamodels>=2.0.0,<2.1.0", "stpipe>=0.6.0,<0.7.0", "stsci.image>=2.3.5", From 3103f82403ae3e99b649b77103139c6bb05ae922 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 30 Jul 2024 16:44:57 -0400 Subject: [PATCH 13/85] convert to ModelLibrary and back again in resample_spec_step to avoid duplicated utilities --- jwst/resample/resample.py | 4 + jwst/resample/resample_spec.py | 285 ---------------------------- jwst/resample/resample_spec_step.py | 26 ++- pyproject.toml | 2 - 4 files changed, 24 insertions(+), 293 deletions(-) diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index d4368304af..fa283f1fc9 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -913,6 +913,10 @@ def compute_image_pixel_area(wcs): def copy_asn_info_from_library(library, output_model): + if not hasattr(library, "asn"): + # No ASN table, occurs when input comes from ModelContainer in spectroscopic modes + # in this case the container should retain the asn information in ResampleSpecStep + return if (asn_pool := library.asn.get("asn_pool", None)) is not None: output_model.meta.asn.pool_name = asn_pool if ( diff --git a/jwst/resample/resample_spec.py b/jwst/resample/resample_spec.py index 2d8d92be4e..cafe9336e4 100644 --- a/jwst/resample/resample_spec.py +++ b/jwst/resample/resample_spec.py @@ -198,291 +198,6 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, self.output_models = ModelContainer() - def do_drizzle(self, input_models): - """Pick the correct drizzling mode based on self.single - """ - if self.single: - return self.resample_many_to_many(input_models) - else: - return self.resample_many_to_one(input_models) - - def resample_many_to_many(self, input_models): - """Resample many inputs to many outputs where outputs have a common frame. - - Coadd only different detectors of the same exposure, i.e. map NRCA5 and - NRCB5 onto the same output image, as they image different areas of the - sky. - - Used for outlier detection - """ - for exposure in input_models.models_grouped: - output_model = self.blank_output - # Determine output file type from input exposure filenames - # Use this for defining the output filename - indx = exposure[0].meta.filename.rfind('.') - output_type = exposure[0].meta.filename[indx:] - output_root = '_'.join(exposure[0].meta.filename.replace( - output_type, '').split('_')[:-1]) - if self.asn_id is not None: - output_model.meta.filename = f'{output_root}_{self.asn_id}_outlier_i2d{output_type}' - else: - output_model.meta.filename = f'{output_root}_outlier_i2d{output_type}' - - # Initialize the output with the wcs - driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, - kernel=self.kernel, fillval=self.fillval) - - log.info(f"{len(exposure)} exposures to drizzle together") - for img in exposure: - img = datamodels.open(img) - iscale = self._get_intensity_scale(img) - log.debug(f'Using intensity scale iscale={iscale}') - - inwht = resample_utils.build_driz_weight( - img, - weight_type=self.weight_type, - good_bits=self.good_bits - ) - - # apply sky subtraction - blevel = img.meta.background.level - if not img.meta.background.subtracted and blevel is not None: - data = img.data - blevel - else: - data = img.data - - xmin, xmax, ymin, ymax = resample_utils._resample_range( - data.shape, - img.meta.wcs.bounding_box - ) - - driz.add_image( - data, - img.meta.wcs, - iscale=iscale, - inwht=inwht, - xmin=xmin, - xmax=xmax, - ymin=ymin, - ymax=ymax - ) - del data - img.close() - - if not self.in_memory: - # Write out model to disk, then return filename - output_name = output_model.meta.filename - if self.output_dir is not None: - output_name = os.path.join(self.output_dir, output_name) - output_model.save(output_name) - log.info(f"Saved model in {output_name}") - self.output_models.append(output_name) - else: - self.output_models.append(output_model.copy()) - output_model.data *= 0. - output_model.wht *= 0. - - return self.output_models - - def resample_many_to_one(self, input_models): - """Resample and coadd many inputs to a single output. - - Used for stage 3 resampling - """ - output_model = self.blank_output.copy() - output_model.meta.filename = self.output_filename - output_model.meta.resample.weight_type = self.weight_type - output_model.meta.resample.pointings = len(input_models.group_names) - - if self.blendheaders: - self.blend_output_metadata(output_model) - - # Initialize the output with the wcs - driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, - kernel=self.kernel, fillval=self.fillval) - - log.info("Resampling science data") - for img in input_models: - iscale = self._get_intensity_scale(img) - log.debug(f'Using intensity scale iscale={iscale}') - img.meta.iscale = iscale - - inwht = resample_utils.build_driz_weight(img, - weight_type=self.weight_type, - good_bits=self.good_bits) - # apply sky subtraction - blevel = img.meta.background.level - if not img.meta.background.subtracted and blevel is not None: - data = img.data - blevel - else: - data = img.data.copy() - - xmin, xmax, ymin, ymax = resample_utils._resample_range( - data.shape, - img.meta.wcs.bounding_box - ) - - driz.add_image( - data, - img.meta.wcs, - iscale=iscale, - inwht=inwht, - xmin=xmin, - xmax=xmax, - ymin=ymin, - ymax=ymax - ) - del data, inwht - - # Resample variance arrays in input_models to output_model - self.resample_variance_arrays(output_model, input_models) - var_components = [ - output_model.var_rnoise, - output_model.var_poisson, - output_model.var_flat - ] - output_model.err = np.sqrt(np.nansum(var_components,axis=0)) - - # nansum returns zero for input that is all NaN - - # set those values to NaN instead - all_nan = np.all(np.isnan(var_components), axis=0) - output_model.err[all_nan] = np.nan - - self.update_exposure_times(output_model, input_models) - self.output_models.append(output_model) - - for img in input_models: - del img.meta.iscale - - return self.output_models - - def resample_variance_arrays(self, output_model, input_models): - """Resample variance arrays from self.input_models to the output_model. - - Variance images from each input model are resampled individually and - added to a weighted sum. If weight_type is 'ivm', the inverse of the - resampled read noise variance is used as the weight for all the variance - components. If weight_type is 'exptime', the exposure time is used. - - The output_model is modified in place. - """ - log.info("Resampling variance components") - weighted_rn_var = np.full_like(output_model.data, np.nan) - weighted_pn_var = np.full_like(output_model.data, np.nan) - weighted_flat_var = np.full_like(output_model.data, np.nan) - total_weight_rn_var = np.zeros_like(output_model.data) - total_weight_pn_var = np.zeros_like(output_model.data) - total_weight_flat_var = np.zeros_like(output_model.data) - for model in input_models: - # Do the read noise variance first, so it can be - # used for weights if needed - rn_var = self._resample_one_variance_array( - "var_rnoise", model, output_model) - - # Find valid weighting values in the variance - if rn_var is not None: - mask = (rn_var > 0) & np.isfinite(rn_var) - else: - mask = np.full_like(rn_var, False) - - # Set the weight for the image from the weight type - weight = np.ones(output_model.data.shape) - if self.weight_type == "ivm" and rn_var is not None: - weight[mask] = rn_var[mask] ** -1 - elif self.weight_type == "exptime": - if resample_utils.check_for_tmeasure(model): - weight[:] = model.meta.exposure.measurement_time - else: - weight[:] = model.meta.exposure.exposure_time - - # Weight and add the readnoise variance - # Note: floating point overflow is an issue if variance weights - # are used - it can't be squared before multiplication - if rn_var is not None: - mask = (rn_var >= 0) & np.isfinite(rn_var) & (weight > 0) - weighted_rn_var[mask] = np.nansum( - [weighted_rn_var[mask], - rn_var[mask] * weight[mask] * weight[mask]], - axis=0 - ) - total_weight_rn_var[mask] += weight[mask] - - # Now do poisson and flat variance, updating only valid new values - # (zero is a valid value; negative, inf, or NaN are not) - pn_var = self._resample_one_variance_array( - "var_poisson", model, output_model) - if pn_var is not None: - mask = (pn_var >= 0) & np.isfinite(pn_var) & (weight > 0) - weighted_pn_var[mask] = np.nansum( - [weighted_pn_var[mask], - pn_var[mask] * weight[mask] * weight[mask]], - axis=0 - ) - total_weight_pn_var[mask] += weight[mask] - - flat_var = self._resample_one_variance_array( - "var_flat", model, output_model) - if flat_var is not None: - mask = (flat_var >= 0) & np.isfinite(flat_var) & (weight > 0) - weighted_flat_var[mask] = np.nansum( - [weighted_flat_var[mask], - flat_var[mask] * weight[mask] * weight[mask]], - axis=0 - ) - total_weight_flat_var[mask] += weight[mask] - - # We now have a sum of the weighted resampled variances. - # Divide by the total weights, squared, and set in the output model. - # Zero weight and missing values are NaN in the output. - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "invalid value*", RuntimeWarning) - warnings.filterwarnings("ignore", "divide by zero*", RuntimeWarning) - - output_variance = (weighted_rn_var - / total_weight_rn_var / total_weight_rn_var) - setattr(output_model, "var_rnoise", output_variance) - - output_variance = (weighted_pn_var - / total_weight_pn_var / total_weight_pn_var) - setattr(output_model, "var_poisson", output_variance) - - output_variance = (weighted_flat_var - / total_weight_flat_var / total_weight_flat_var) - setattr(output_model, "var_flat", output_variance) - - def update_exposure_times(self, output_model, input_models): - """Modify exposure time metadata in-place""" - total_exposure_time = 0. - exposure_times = {'start': [], 'end': []} - duration = 0.0 - total_measurement_time = 0.0 - measurement_time_failures = [] - for exposure in input_models.models_grouped: - total_exposure_time += exposure[0].meta.exposure.exposure_time - if not resample_utils.check_for_tmeasure(exposure[0]): - measurement_time_failures.append(1) - else: - total_measurement_time += exposure[0].meta.exposure.measurement_time - measurement_time_failures.append(0) - exposure_times['start'].append(exposure[0].meta.exposure.start_time) - exposure_times['end'].append(exposure[0].meta.exposure.end_time) - duration += exposure[0].meta.exposure.duration - - # Update some basic exposure time values based on output_model - output_model.meta.exposure.exposure_time = total_exposure_time - if not any(measurement_time_failures): - output_model.meta.exposure.measurement_time = total_measurement_time - output_model.meta.exposure.start_time = min(exposure_times['start']) - output_model.meta.exposure.end_time = max(exposure_times['end']) - - # Update other exposure time keywords: - # XPOSURE (identical to the total effective exposure time, EFFEXPTM) - xposure = total_exposure_time - output_model.meta.exposure.effective_exposure_time = xposure - # DURATION (identical to TELAPSE, elapsed time) - output_model.meta.exposure.duration = duration - output_model.meta.exposure.elapsed_exposure_time = duration - def build_nirspec_output_wcs(self, input_models, refmodel=None): """ Create a spatial/spectral WCS covering the footprint of the input. diff --git a/jwst/resample/resample_spec_step.py b/jwst/resample/resample_spec_step.py index 46aa1a4f6c..e9d80b911f 100755 --- a/jwst/resample/resample_spec_step.py +++ b/jwst/resample/resample_spec_step.py @@ -3,7 +3,7 @@ from stdatamodels.jwst import datamodels from stdatamodels.jwst.datamodels import MultiSlitModel, ImageModel -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelContainer, ModelLibrary from . import resample_spec, ResampleStep from ..exp_to_source import multislit_to_container from ..assign_wcs.util import update_s_region_spectral @@ -118,9 +118,16 @@ def _process_multislit(self, input_models): for container in containers.values(): resamp = resample_spec.ResampleSpecData(container, **self.drizpars) - drizzled_models = resamp.do_drizzle(container) + if isinstance(container, ModelContainer): + library = ModelLibrary(container, on_disk=False) + library = resamp.do_drizzle(library) + with library: + for i, model in enumerate(library): + container[i] = model + library.shelve(model, modify=False) + del library - for model in drizzled_models: + for model in container: self.update_slit_metadata(model) update_s_region_spectral(model) result.slits.append(model) @@ -158,13 +165,20 @@ def _process_slit(self, input_models): resamp = resample_spec.ResampleSpecData(input_models, **self.drizpars) - drizzled_models = resamp.do_drizzle(input_models) + if isinstance(input_models, ModelContainer): + library = ModelLibrary(input_models, on_disk=False) + library = resamp.do_drizzle(library) + with library: + for i, model in enumerate(library): + input_models[i] = model + library.shelve(model, modify=False) + del library - result = drizzled_models[0] + result = input_models[0] result.meta.cal_step.resample = "COMPLETE" result.meta.asn.pool_name = input_models.asn_pool_name result.meta.asn.table_name = input_models.asn_table_name - result.meta.bunit_data = drizzled_models[0].meta.bunit_data + result.meta.bunit_data = input_models[0].meta.bunit_data if self.pixel_scale is None: result.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio else: diff --git a/pyproject.toml b/pyproject.toml index f6c6912686..b019111b83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,8 +34,6 @@ dependencies = [ "scikit-image>=0.19", "scipy>=1.9.3", "spherical-geometry>=1.2.22", - # "stcal>=1.7.3,<1.8.0", - # FIXME: switch to main (and then released) when stcal is updated "stcal @ git+https://github.com/braingram/stcal.git@main", "stdatamodels>=2.0.0,<2.1.0", "stpipe>=0.6.0,<0.7.0", From 2f8521821ff51012ad983dceef934ded89997845 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 31 Jul 2024 11:06:35 -0400 Subject: [PATCH 14/85] bugfixes for outlier detection unit tests --- jwst/outlier_detection/tests/test_outlier_detection.py | 5 ++--- jwst/resample/resample.py | 3 +-- jwst/resample/resample_utils.py | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index 43c57af2fa..1b8c8e8725 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -228,7 +228,7 @@ def test_outlier_step_base(we_three_sci, tmp_cwd): container.shelve(model, modify=False) result = OutlierDetectionStep.call( - container, save_results=True, save_intermediate_results=True, in_memory=True + container, save_results=True, save_intermediate_results=True, in_memory=False ) # Make sure nothing changed in SCI array @@ -281,8 +281,7 @@ def test_outlier_step_on_disk(we_three_sci, tmp_cwd): {'expname': filenames[2], 'exptype': 'science'}, ] }, - ] -} + ]} container = ModelLibrary(asn, on_disk=True) # Save all the data into a separate array before passing into step diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index fa283f1fc9..cf56e18d15 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -131,8 +131,7 @@ def __init__(self, input_models, output=None, single=False, blendheaders=True, shape=None if output_shape is None else output_shape[::-1], crpix=crpix, crval=crval - ) - + ) # Estimate output pixel area in Sr. NOTE: in principle we could # use the same algorithm as for when output_wcs is provided by the # user. diff --git a/jwst/resample/resample_utils.py b/jwst/resample/resample_utils.py index 3545d30a1d..c527b450f1 100644 --- a/jwst/resample/resample_utils.py +++ b/jwst/resample/resample_utils.py @@ -76,7 +76,7 @@ def make_output_wcs(input_models, ref_wcs=None, wcslist.append(w) if i == 0: example_model = model - input_models.shelve(model, modify=False) + input_models.shelve(model) naxes = wcslist[0].output_frame.naxes if naxes != 2: From 570dc26cbcdebbfc34b26694509f4d17a92c6e61 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 31 Jul 2024 13:33:52 -0400 Subject: [PATCH 15/85] expose on_disk for all steps, some cleanup to passing libraries between steps --- .../outlier_detection_step.py | 6 +- jwst/pipeline/calwebb_image3.py | 32 ++++--- jwst/pipeline/tests/test_calwebb_image3.py | 96 +++++++++++++++++++ jwst/resample/resample_step.py | 37 ++++--- jwst/skymatch/skymatch_step.py | 5 +- jwst/stpipe/core.py | 11 ++- jwst/tweakreg/tweakreg_step.py | 5 +- 7 files changed, 146 insertions(+), 46 deletions(-) create mode 100644 jwst/pipeline/tests/test_calwebb_image3.py diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py index 790fd7b461..b7040e7a09 100644 --- a/jwst/outlier_detection/outlier_detection_step.py +++ b/jwst/outlier_detection/outlier_detection_step.py @@ -80,6 +80,9 @@ def process(self, input_data): self.log.info(f"Outlier Detection mode: {mode}") # determine the asn_id (if not set by the pipeline) + if mode == "imaging": + if not isinstance(input_data, ModelLibrary): + input_data = ModelLibrary(input_data, on_disk=not self.in_memory) asn_id = self._get_asn_id(input_data) self.log.info(f"Outlier Detection asn_id: {asn_id}") @@ -235,7 +238,8 @@ def _get_asn_id(self, input_models): return asn_id def _get_asn_id_library(self, input_models): - + """Get the association ID from a ModelLibrary. + Does not open any models, so it should respect on_disk status.""" asn_id = None try: asn_id = input_models.asn.table_name diff --git a/jwst/pipeline/calwebb_image3.py b/jwst/pipeline/calwebb_image3.py index 272f7163a2..0701c00881 100644 --- a/jwst/pipeline/calwebb_image3.py +++ b/jwst/pipeline/calwebb_image3.py @@ -32,6 +32,7 @@ class Image3Pipeline(Pipeline): class_alias = "calwebb_image3" spec = """ + on_disk = boolean(default=False) # Preserve memory using temporary files """ # Define alias to steps @@ -67,25 +68,26 @@ def process(self, input_data): # Only load science members from input ASN; # background and target-acq members are not needed. - input_models = self._datamodels_open(input_data, asn_exptypes=['science']) + if isinstance(input_data, ModelLibrary): + input_models = input_data + else: + input_models = ModelLibrary(input_data, asn_exptypes=['science'], on_disk=self.on_disk) - if output_file is None and isinstance(input_models, ModelLibrary): + if self.output_file is None: # If input is an association, set the output to the product name. self.output_file = input_models.asn["products"][0]["name"] - if isinstance(input_models, ModelLibrary): - with input_models: - model = input_models.borrow(0) - is_moving = is_moving_target(model) - input_models.shelve(model, 0, modify=False) - if is_moving: - raise Exception("Broken...") # FIXME - input_models = self.assign_mtwcs(input_models) - else: - input_models = self.tweakreg(input_models) - - input_models = self.skymatch(input_models) - input_models = self.outlier_detection(input_models) + with input_models: + model = input_models.borrow(0) + is_moving = is_moving_target(model) + input_models.shelve(model, 0, modify=False) + if is_moving: + input_models = self.assign_mtwcs(input_models) + else: + input_models = self.tweakreg(input_models) + + input_models = self.skymatch(input_models) + input_models = self.outlier_detection(input_models) # elif self.skymatch.skymethod == 'match': # self.log.warning("Turning 'skymatch' step off for a single " diff --git a/jwst/pipeline/tests/test_calwebb_image3.py b/jwst/pipeline/tests/test_calwebb_image3.py new file mode 100644 index 0000000000..36b81b79d9 --- /dev/null +++ b/jwst/pipeline/tests/test_calwebb_image3.py @@ -0,0 +1,96 @@ +import pytest +import os +import shutil +from jwst.stpipe import Step +from jwst.assign_wcs import AssignWcsStep +from jwst.datamodels import ImageModel + + +INPUT_FILE = "dummy_cal.fits" +INPUT_FILE_2 = "dummy2_cal.fits" +INPUT_ASN = "dummy_asn.json" +OUTPUT_PRODUCT = "custom_name" +LOGFILE = "run_asn.log" +LOGCFG = "test_logs.cfg" + + +@pytest.fixture(scope='module') +def make_dummy_cal_file(tmp_cwd_module): + ''' + Make and save a dummy cal file in the temporary working directory + Partially copied from test_calwebb_image2.py + ''' + + image = ImageModel((2048, 2048)) + image.data[:, :] = 1 + image.meta.instrument.name = 'NIRCAM' + image.meta.instrument.filter = 'F210M' + image.meta.instrument.pupil = 'CLEAR' + image.meta.exposure.type = 'NRC_IMAGE' + image.meta.observation.date = '2024-02-27' + image.meta.observation.time = '13:37:18.548' + image.meta.date = '2024-02-27T13:37:18.548' + image.meta.subarray.xstart = 1 + image.meta.subarray.ystart = 1 + + image.meta.subarray.xsize = image.data.shape[-1] + image.meta.subarray.ysize = image.data.shape[-2] + + image.meta.instrument.channel = 'SHORT' + image.meta.instrument.module = 'A' + image.meta.instrument.detector = 'NRCA1' + + # bare minimum wcs info to get assign_wcs step to pass + image.meta.wcsinfo.crpix1 = 693.5 + image.meta.wcsinfo.crpix2 = 512.5 + image.meta.wcsinfo.v2_ref = -453.37849 + image.meta.wcsinfo.v3_ref = -373.810549 + image.meta.wcsinfo.roll_ref = 272.3237653262276 + image.meta.wcsinfo.ra_ref = 80.54724018120017 + image.meta.wcsinfo.dec_ref = -69.5081101864959 + + image = AssignWcsStep.call(image) + + with image as dm: + dm.save(INPUT_FILE) + + +@pytest.fixture(scope='module') +def make_dummy_association(make_dummy_cal_file): + + shutil.copy(INPUT_FILE, INPUT_FILE_2) + os.system(f"asn_from_list -o {INPUT_ASN} --product-name {OUTPUT_PRODUCT} -r DMS_Level3_Base {INPUT_FILE} {INPUT_FILE_2}") + + +@pytest.fixture(scope='module') +def run_image3_pipeline(make_dummy_association): + ''' + Two-product association passed in, run pipeline, skipping most steps + ''' + # save warnings to logfile so can be checked later + logcfg_content = f"[*] \n \ + level = INFO \n \ + handler = file:{LOGFILE}" + with open(LOGCFG, 'w') as f: + f.write(logcfg_content) + + args = ["calwebb_image3", INPUT_ASN, + f"--logcfg={LOGCFG}", + "--steps.tweakreg.skip=true", + "--steps.skymatch.skip=true", + "--steps.outlier_detection.skip=true", + "--steps.resample.skip=true", + "--steps.source_catalog.skip=true", + "--on_disk=True",] + + Step.from_cmdline(args) + + +def test_run_complete(run_image3_pipeline): + ''' + Check that the pipeline runs to completion + ''' + msg = "Step Image3Pipeline done" + with open(LOGFILE, 'r') as f: + log = f.read() + assert msg in log \ No newline at end of file diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 4baae25def..e66eb9a351 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -4,7 +4,7 @@ import asdf -from jwst.datamodels import ModelContainer, ModelLibrary +from jwst.datamodels import ModelLibrary from . import resample from ..stpipe import Step @@ -61,25 +61,20 @@ def process(self, input): if isinstance(input, ModelLibrary): input_models = input - elif isinstance(input, (str, dict)): - input_models = ModelLibrary(input, on_disk=~self.in_memory) - elif isinstance(input, (ModelContainer, list)): - input_models = ModelLibrary(input, on_disk=False) #cannot instantiate on disk for data already in memory - - if isinstance(input, ModelLibrary): - try: - output = input_models.meta.asn_table.products[0].name - except AttributeError: - # coron data goes through this path by the time it gets to - # resampling. - # TODO: figure out why and make sure asn_table is carried along - output = None else: - input_models = ModelLibrary([input], on_disk=False) #single model will not benefit from on_disk - input_models.asn_pool_name = input.meta.asn.pool_name - input_models.asn_table_name = input.meta.asn.table_name - output = input.meta.filename - self.blendheaders = False + input_models = ModelLibrary(input, on_disk=not self.in_memory) + + try: + output = input_models.meta.asn_table.products[0].name + except AttributeError: + # coron data goes through this path by the time it gets to + # resampling. + # TODO: figure out why and make sure asn_table is carried along + output = None + input_models.asn_pool_name = input.meta.asn.pool_name + input_models.asn_table_name = input.meta.asn.table_name + output = input.meta.filename + self.blendheaders = False # Check that input models are 2D images with input_models: @@ -98,7 +93,7 @@ def process(self, input): result = resamp.do_drizzle(input_models) with result: - for i, model in enumerate(result): + for model in result: model.meta.cal_step.resample = 'COMPLETE' self.update_fits_wcs(model) util.update_s_region_imaging(model) @@ -112,7 +107,7 @@ def process(self, input): else: model.meta.resample.pixel_scale_ratio = resamp.pscale_ratio model.meta.resample.pixfrac = kwargs['pixfrac'] - result.shelve(model, 0) + result.shelve(model) if len(result) == 1: model = result.borrow(0) diff --git a/jwst/skymatch/skymatch_step.py b/jwst/skymatch/skymatch_step.py index 3083f09c78..adf962d2d5 100644 --- a/jwst/skymatch/skymatch_step.py +++ b/jwst/skymatch/skymatch_step.py @@ -61,6 +61,9 @@ class SkyMatchStep(Step): lsigma = float(min=0.0, default=4.0) # Lower clipping limit, in sigma usigma = float(min=0.0, default=4.0) # Upper clipping limit, in sigma binwidth = float(min=0.0, default=0.1) # Bin width for 'mode' and 'midpt' `skystat`, in sigma + + # Memory management: + on_disk = boolean(default=False) # Preserve memory using temporary files """ # noqa: E501 reference_file_types = [] @@ -74,7 +77,7 @@ def process(self, input): if isinstance(input, ModelLibrary): library = input else: - library = ModelLibrary(input) + library = ModelLibrary(input, on_disk=self.on_disk) self._dqbits = interpret_bit_flags(self.dqbits, flag_name_map=pixel) diff --git a/jwst/stpipe/core.py b/jwst/stpipe/core.py index edf2cd710e..7c4ed9a34e 100644 --- a/jwst/stpipe/core.py +++ b/jwst/stpipe/core.py @@ -27,11 +27,12 @@ class JwstStep(Step): @classmethod def _datamodels_open(cls, init, **kwargs): - if isinstance(init, ModelLibrary): - return init - if isinstance(init, JwstDataModel) or os.path.splitext(init)[1] in (".asdf", ".fits"): - return datamodels.open(init, **kwargs) - return ModelLibrary(init) + return datamodels.open(init, **kwargs) + # if isinstance(init, ModelLibrary): + # return init + # if isinstance(init, JwstDataModel) or os.path.splitext(init)[1] in (".asdf", ".fits"): + # return datamodels.open(init, **kwargs) + # return ModelLibrary(init) def load_as_level2_asn(self, obj): diff --git a/jwst/tweakreg/tweakreg_step.py b/jwst/tweakreg/tweakreg_step.py index ab9a887e7b..921b08fafc 100644 --- a/jwst/tweakreg/tweakreg_step.py +++ b/jwst/tweakreg/tweakreg_step.py @@ -123,6 +123,7 @@ class TweakRegStep(Step): # stpipe general options output_use_model = boolean(default=True) # When saving use `DataModel.meta.filename` + on_disk = boolean(default=False) # Preserve memory using temporary files """ reference_file_types = [] @@ -130,10 +131,8 @@ class TweakRegStep(Step): def process(self, input): if isinstance(input, ModelLibrary): images = input - elif isinstance(input, ModelContainer): - images = ModelLibrary(input, on_disk=False) else: - images = ModelLibrary(input, on_disk=True) + images = ModelLibrary(input, on_disk=self.on_disk) if len(images) == 0: raise ValueError("Input must contain at least one image model.") From ad9902d1a275d263772aa0dfceae5ae709f26de4 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 31 Jul 2024 14:54:32 -0400 Subject: [PATCH 16/85] using map_function where applicable, more unit test bug fixes --- jwst/datamodels/tests/test_library.py | 1 - jwst/exp_to_source/__init__.py | 3 +- jwst/exp_to_source/exp_to_source.py | 21 ++--------- jwst/outlier_detection/imaging.py | 1 + .../tests/test_outlier_detection.py | 36 ++++++++----------- jwst/outlier_detection/utils.py | 6 +--- jwst/resample/resample.py | 1 + jwst/resample/resample_spec.py | 5 --- jwst/resample/resample_step.py | 22 +++++++----- jwst/skymatch/skymatch_step.py | 3 -- jwst/stpipe/core.py | 8 ----- jwst/tweakreg/tweakreg_step.py | 15 +++----- 12 files changed, 39 insertions(+), 83 deletions(-) diff --git a/jwst/datamodels/tests/test_library.py b/jwst/datamodels/tests/test_library.py index cab92a3d6b..5dc4c109de 100644 --- a/jwst/datamodels/tests/test_library.py +++ b/jwst/datamodels/tests/test_library.py @@ -1,4 +1,3 @@ -from contextlib import nullcontext import json import pytest diff --git a/jwst/exp_to_source/__init__.py b/jwst/exp_to_source/__init__.py index d050f68b8f..bcace776ec 100644 --- a/jwst/exp_to_source/__init__.py +++ b/jwst/exp_to_source/__init__.py @@ -1,7 +1,6 @@ from .exp_to_source import ( exp_to_source, multislit_to_container, - multislit_to_library, ) -__all__ = ['exp_to_source', 'multislit_to_container', 'multislit_to_library'] +__all__ = ['exp_to_source', 'multislit_to_container'] diff --git a/jwst/exp_to_source/exp_to_source.py b/jwst/exp_to_source/exp_to_source.py index 9a1279dc23..c0f385a339 100644 --- a/jwst/exp_to_source/exp_to_source.py +++ b/jwst/exp_to_source/exp_to_source.py @@ -8,9 +8,9 @@ from stdatamodels.properties import merge_tree from stdatamodels.jwst.datamodels import MultiExposureModel -from jwst.datamodels import SourceModelContainer, ModelLibrary +from jwst.datamodels import SourceModelContainer -__all__ = ['exp_to_source', 'multislit_to_container', 'multislit_to_library'] +__all__ = ['exp_to_source', 'multislit_to_container'] log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) @@ -113,23 +113,6 @@ def multislit_to_container(inputs): return containers -# this is a hacky solution - fix it later -def multislit_to_library(inputs): - if isinstance(inputs, ModelLibrary): - # convert to list of MultiSlitModels expected by exp_to_source - multislit_list = [] - with inputs: - for i in range(len(inputs)): - multislit_list.append(inputs.borrow(i)) - inputs.shelve(multislit_list[-1], i, modify=False) - inputs = multislit_list - containers = exp_to_source(inputs) - for id in containers: - containers[id] = ModelLibrary(SourceModelContainer(containers[id])) - - return containers - - class DefaultOrderedDict(OrderedDict): # Source http://stackoverflow.com/a/6190500/562769 def __init__(self, default_factory=None, *a, **kw): diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py index 295123d494..9c5bc65586 100644 --- a/jwst/outlier_detection/imaging.py +++ b/jwst/outlier_detection/imaging.py @@ -68,6 +68,7 @@ def detect_outliers( output_path = make_output_path(basepath=example_model.meta.filename, suffix='i2d') input_models.shelve(example_model, modify=False) + del example_model output_path = os.path.dirname(output_path) resamp = resample.ResampleData( input_models, diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index 1b8c8e8725..6e10e92738 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -221,21 +221,17 @@ def test_outlier_step_base(we_three_sci, tmp_cwd): assert len(median_files) == 0 # Save all the data into a separate array before passing into step - data_as_cube = [] - with container: - for model in container: - data_as_cube.append(model.data) - container.shelve(model, modify=False) + data_as_cube = container.map_function(lambda model, index: model.data, modify=False) result = OutlierDetectionStep.call( container, save_results=True, save_intermediate_results=True, in_memory=False ) # Make sure nothing changed in SCI array - with result: - for i, corrected in enumerate(result): - np.testing.assert_allclose(data_as_cube[i], corrected.data) - result.shelve(corrected, modify=False) + result.map_function( + lambda model, index: np.testing.assert_allclose(data_as_cube[index], model.data), + modify=False, + ) # Verify source is not flagged with result: @@ -285,21 +281,17 @@ def test_outlier_step_on_disk(we_three_sci, tmp_cwd): container = ModelLibrary(asn, on_disk=True) # Save all the data into a separate array before passing into step - data_as_cube = [] - with container: - for model in container: - data_as_cube.append(model.data) - container.shelve(model, modify=False) + data_as_cube = container.map_function(lambda model, index: model.data, modify=False) result = OutlierDetectionStep.call( container, save_results=True, save_intermediate_results=True, in_memory=False ) # Make sure nothing changed in SCI array - with result: - for i, corrected in enumerate(result): - np.testing.assert_allclose(data_as_cube[i], corrected.data) - result.shelve(corrected, modify=False) + result.map_function( + lambda model, index: np.testing.assert_allclose(data_as_cube[index], model.data), + modify=False, + ) # Verify source is not flagged with result: @@ -376,10 +368,10 @@ def test_outlier_step_image_weak_CR_dither(exptype, tmp_cwd): result = OutlierDetectionStep.call(container, in_memory=True) # Make sure nothing changed in SCI array - with result: - for i, corrected in enumerate(result): - np.testing.assert_allclose(data_as_cube[i], corrected.data) - result.shelve(corrected, modify=False) + result.map_function( + lambda model, index: np.testing.assert_allclose(data_as_cube[index], model.data), + modify=False, + ) # Verify source is not flagged with result: diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index 0d4258a2c9..887989d9ea 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -218,11 +218,7 @@ def flag_crs_in_models_library( median_data, snr1, ): - with input_models: - for image in input_models: - # dq flags will be updated in-place - flag_model_crs(image, median_data, snr1) - input_models.shelve(image) + input_models.map_function(lambda image, index: flag_model_crs(image, median_data, snr1), modify=True) def flag_crs_in_models_with_resampling( input_models, diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index cf56e18d15..748fce8ba9 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -295,6 +295,7 @@ def resample_many_to_many(self, input_models): else: output_model.meta.filename = f'{output_root}_outlier_i2d{output_type}' input_models.shelve(example_image, indices[0], modify=False) + del example_image # Initialize the output with the wcs driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, diff --git a/jwst/resample/resample_spec.py b/jwst/resample/resample_spec.py index cafe9336e4..5a7b2cc95b 100644 --- a/jwst/resample/resample_spec.py +++ b/jwst/resample/resample_spec.py @@ -1,6 +1,5 @@ import logging import warnings -import os import numpy as np from astropy import coordinates as coord @@ -13,21 +12,17 @@ from astropy.utils.exceptions import AstropyUserWarning from gwcs import wcstools, WCS from gwcs import coordinate_frames as cf -from gwcs.geometry import SphericalToCartesian from stdatamodels.jwst import datamodels from jwst.assign_wcs.util import compute_scale, wrap_ra from jwst.datamodels import ModelContainer from jwst.resample import resample_utils from jwst.resample.resample import ResampleData -from . import gwcs_drizzle log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) -_S2C = SphericalToCartesian() - __all__ = ["ResampleSpecData"] diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index e66eb9a351..30d775ed73 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -4,7 +4,7 @@ import asdf -from jwst.datamodels import ModelLibrary +from jwst.datamodels import ModelLibrary, ImageModel from . import resample from ..stpipe import Step @@ -61,20 +61,25 @@ def process(self, input): if isinstance(input, ModelLibrary): input_models = input - else: + elif isinstance(input, (str, dict, list)): input_models = ModelLibrary(input, on_disk=not self.in_memory) + elif isinstance(input, ImageModel): + input_models = ModelLibrary([input], on_disk=not self.in_memory) + input_models.asn_pool_name = input.meta.asn.pool_name + input_models.asn_table_name = input.meta.asn.table_name + output = input.meta.filename + self.blendheaders = False + else: + raise RuntimeError(f"Input {input} is not a 2D image.") try: - output = input_models.meta.asn_table.products[0].name - except AttributeError: + output = input_models.asn["products"][0]["members"][0]["expname"] + except KeyError: # coron data goes through this path by the time it gets to # resampling. # TODO: figure out why and make sure asn_table is carried along output = None - input_models.asn_pool_name = input.meta.asn.pool_name - input_models.asn_table_name = input.meta.asn.table_name - output = input.meta.filename - self.blendheaders = False + # Check that input models are 2D images with input_models: @@ -84,6 +89,7 @@ def process(self, input): if len(data_shape) != 2: # resample can only handle 2D images, not 3D cubes, etc raise RuntimeError(f"Input {example_model} is not a 2D image.") + del example_model # Setup drizzle-related parameters kwargs = self.get_drizpars() diff --git a/jwst/skymatch/skymatch_step.py b/jwst/skymatch/skymatch_step.py index adf962d2d5..1dae658135 100644 --- a/jwst/skymatch/skymatch_step.py +++ b/jwst/skymatch/skymatch_step.py @@ -18,9 +18,6 @@ ) from stdatamodels.jwst.datamodels.dqflags import pixel -from stdatamodels.jwst.datamodels.util import ( - open as datamodel_open, -) from jwst.datamodels import ModelLibrary diff --git a/jwst/stpipe/core.py b/jwst/stpipe/core.py index 7c4ed9a34e..2a30139190 100644 --- a/jwst/stpipe/core.py +++ b/jwst/stpipe/core.py @@ -2,7 +2,6 @@ JWST-specific Step and Pipeline base classes. """ import logging -import os from stdatamodels.jwst.datamodels import JwstDataModel from stdatamodels.jwst import datamodels @@ -12,7 +11,6 @@ from .. import __version_commit__, __version__ from ..lib.suffix import remove_suffix -from jwst.datamodels.library import ModelLibrary log = logging.getLogger(__name__) @@ -28,12 +26,6 @@ class JwstStep(Step): @classmethod def _datamodels_open(cls, init, **kwargs): return datamodels.open(init, **kwargs) - # if isinstance(init, ModelLibrary): - # return init - # if isinstance(init, JwstDataModel) or os.path.splitext(init)[1] in (".asdf", ".fits"): - # return datamodels.open(init, **kwargs) - # return ModelLibrary(init) - def load_as_level2_asn(self, obj): """Load object as an association diff --git a/jwst/tweakreg/tweakreg_step.py b/jwst/tweakreg/tweakreg_step.py index 921b08fafc..d15511a441 100644 --- a/jwst/tweakreg/tweakreg_step.py +++ b/jwst/tweakreg/tweakreg_step.py @@ -14,7 +14,7 @@ from jwst.stpipe import record_step_status from jwst.assign_wcs.util import update_fits_wcsinfo, update_s_region_imaging -from jwst.datamodels import ModelLibrary, ModelContainer +from jwst.datamodels import ModelLibrary # LOCAL from ..stpipe import Step @@ -304,20 +304,15 @@ def process(self, input): save_abs_catalog=self.save_abs_catalog, abs_catalog_output_dir=self.output_dir, ) + del ref_image except twk.TweakregError as e: self.log.warning(str(e)) - with images: - for model in images: - record_step_status(model, "tweakreg", success=False) - images.shelve(model) + record_step_status(images, "tweakreg", success=False) return images if local_align_failed and not align_to_abs_refcat: - with images: - for model in images: - record_step_status(model, "tweakreg", success=False) - images.shelve(model) + record_step_status(images, "tweakreg", success=False) return images # one final pass through all the models to update them based @@ -373,7 +368,7 @@ def _apply_tweakreg_solution(self, approximation. Reported error is: \n {e.args[0]}" self.log.warning(msg) images.shelve(image_model) - record_step_status(images, "tweakreg", success=True) + record_step_status(images, "tweakreg", success=True) return images From 3d4fa0d0c336f11baa9769152369fe93b5e51a4f Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 31 Jul 2024 15:04:30 -0400 Subject: [PATCH 17/85] fix typo in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b019111b83..b1ec3e64f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "scikit-image>=0.19", "scipy>=1.9.3", "spherical-geometry>=1.2.22", - "stcal @ git+https://github.com/braingram/stcal.git@main", + "stcal @ git+https://github.com/spacetelescope/stcal.git@main", "stdatamodels>=2.0.0,<2.1.0", "stpipe>=0.6.0,<0.7.0", "stsci.image>=2.3.5", From 3db69d3de98b104f35a4814bd2d4e7065e839351 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 31 Jul 2024 15:11:55 -0400 Subject: [PATCH 18/85] bump version of stpipe --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b1ec3e64f1..8dd98fcb02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "spherical-geometry>=1.2.22", "stcal @ git+https://github.com/spacetelescope/stcal.git@main", "stdatamodels>=2.0.0,<2.1.0", - "stpipe>=0.6.0,<0.7.0", + "stpipe @ git+https://github.com/spacetelescope/stpipe.git@main", "stsci.image>=2.3.5", "stsci.imagestats>=1.6.3", "synphot>=1.2", From 5ca774ee28acf662ec705a17925a0283bd2e23f5 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 31 Jul 2024 15:56:48 -0400 Subject: [PATCH 19/85] mark ModelLibrary as not part of stdatamodels --- jwst/datamodels/__init__.py | 4 +-- .../tests/test_outlier_detection.py | 27 +++++++++++++++---- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/jwst/datamodels/__init__.py b/jwst/datamodels/__init__.py index 7bf474c86f..1d3e0b23d6 100644 --- a/jwst/datamodels/__init__.py +++ b/jwst/datamodels/__init__.py @@ -25,10 +25,10 @@ # Modules that are not part of stdatamodels -_jwst_modules = ["container", "source_container"] +_jwst_modules = ["container", "source_container", "library"] # Models that are not part of stdatamodels -_jwst_models = ["ModelContainer", "SourceModelContainer"] +_jwst_models = ["ModelContainer", "SourceModelContainer", "ModelLibrary"] # Deprecated modules in stdatamodels _deprecated_modules = ['schema'] diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index 6e10e92738..d5c1f29d8e 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -8,7 +8,7 @@ from jwst.datamodels import ModelContainer, ModelLibrary from jwst.outlier_detection import OutlierDetectionStep -from jwst.outlier_detection.utils import flag_resampled_model_crs +from jwst.outlier_detection.utils import flag_resampled_model_crs, create_median_library from jwst.outlier_detection.outlier_detection_step import ( IMAGE_MODES, TSO_SPEC_MODES, @@ -252,9 +252,9 @@ def test_outlier_step_base(we_three_sci, tmp_cwd): assert len(median_files) != 0 -def test_outlier_step_on_disk(we_three_sci, tmp_cwd): - """Test whole step with an outlier including saving intermediate and results files""" - +@pytest.fixture +def three_sci_as_asn(we_three_sci, tmp_cwd): + """Create an association with the 3 science images""" for model in we_three_sci: model.save(model.meta.filename) filenames = [model.meta.filename for model in we_three_sci] @@ -278,7 +278,12 @@ def test_outlier_step_on_disk(we_three_sci, tmp_cwd): ] }, ]} - container = ModelLibrary(asn, on_disk=True) + return asn + + +def test_outlier_step_on_disk(three_sci_as_asn, tmp_cwd): + """Test whole step with an outlier including saving intermediate and results files""" + container = ModelLibrary(three_sci_as_asn, on_disk=True) # Save all the data into a separate array before passing into step data_as_cube = container.map_function(lambda model, index: model.data, modify=False) @@ -455,3 +460,15 @@ def test_outlier_step_weak_cr_tso(exptype, tsovisit): # Verify CR is flagged assert result.dq[cr_timestep, 12, 12] == OUTLIER_DO_NOT_USE + + +def test_create_median_library(three_sci_as_asn, tmp_cwd): + """Test creation of median library""" + lib_on_disk = ModelLibrary(three_sci_as_asn, on_disk=True) + lib_in_memory = ModelLibrary(three_sci_as_asn, on_disk=False) + + median_on_disk = create_median_library(lib_on_disk, 0.7) + median_in_memory = create_median_library(lib_in_memory, 0.7) + + # Make sure the median library is the same for on-disk and in-memory + assert np.allclose(median_on_disk, median_in_memory) From d694178f3cca6788551bcc6019b32587c8c31984 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 31 Jul 2024 16:14:21 -0400 Subject: [PATCH 20/85] added changelog entry --- CHANGES.rst | 32 ++++++++++++++++++++++++++++++++ jwst/resample/resample.py | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 1a3ef4af82..23dc40b9a9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,7 @@ align_refs ---------- - Compute alignment shifts from the first integration of the science exposure only. [#8643] + ami_average ----------- @@ -22,6 +23,12 @@ cube_build - Removed direct setting of the ``self.skip`` attribute from within the step itself. [#8600] +datamodels +---------- + +- Added `ModelLibrary` class to allow passing ``"on-disk"`` models between steps in the + image3 pipeline. [#8683] + master_background ----------------- @@ -37,6 +44,22 @@ outlier_detection - Refactored separate modes into submodules instead of inheriting from a base class. Moved non-JWST-specific code to stcal. [#8613] +- For imaging modes, step now uses `ModelLibrary` to handle accessing models consistently + whether they are in memory or on disk. [#8683] + +pipeline +-------- + +- Updated `calwebb_image3` to use `ModelLibrary` instead of `ModelContainer`, added + optional `on_disk` parameter to govern whether models in the library should be stored + in memory or on disk. [#8683] + +resample +-------- + +- Step now uses `ModelLibrary` to handle accessing models consistently + whether they are in memory or on disk. [#8683] + resample_spec ------------- @@ -60,6 +83,12 @@ scripts - Removed many non-working and out-dated scripts. Including many scripts that were replaced by ``strun``. [#8619] +skymatch +-------- + +- Step now uses `ModelLibrary` to handle accessing models consistently + whether they are in memory or on disk. [#8683] + stpipe ------ @@ -80,6 +109,9 @@ tweakreg - Removed direct setting of the ``self.skip`` attribute from within the step itself. [#8600] +- Step now uses `ModelLibrary` to handle accessing models consistently + whether they are in memory or on disk. [#8683] + 1.15.1 (2024-07-08) =================== diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index 748fce8ba9..2e9c7204f5 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -365,7 +365,7 @@ def resample_many_to_one(self, input_models): output_model.meta.resample.pointings = len(input_models.group_names) if self.blendheaders: - self.blend_output_metadata(output_model) + self.blend_output_metadata(output_model, input_models) # copy over asn information copy_asn_info_from_library(input_models, output_model) From 6066f2bbcdc8c136c06d93e89511c00e97f899a6 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 31 Jul 2024 17:03:31 -0400 Subject: [PATCH 21/85] first draft of docs changes --- .../outlier_detection_imaging.rst | 60 ++++--------------- docs/jwst/skymatch/arguments.rst | 6 ++ docs/jwst/tweakreg/README.rst | 21 +++++-- 3 files changed, 35 insertions(+), 52 deletions(-) diff --git a/docs/jwst/outlier_detection/outlier_detection_imaging.rst b/docs/jwst/outlier_detection/outlier_detection_imaging.rst index 6cf235c3f2..c0ec2f6bca 100644 --- a/docs/jwst/outlier_detection/outlier_detection_imaging.rst +++ b/docs/jwst/outlier_detection/outlier_detection_imaging.rst @@ -15,11 +15,11 @@ Specifically, this routine performs the following operations: #. Convert input data, as needed, to make sure it is in a format that can be processed. - * A :py:class:`~jwst.datamodels.ModelContainer` serves as the basic format for + * A :py:class:`~jwst.datamodels.ModelLibrary` serves as the basic format for all processing performed by this step, as each entry will be treated as an element of a stack of images to be processed to identify bad-pixels/cosmic-rays and other artifacts. - * If the input data is a :py:class:`~jwst.datamodels.CubeModel`, convert it into a ModelContainer. + * If the input data is a :py:class:`~jwst.datamodels.CubeModel`, convert it into a ModelLibrary. This allows each plane of the cube to be treated as a separate 2D image for resampling (if done) and for combining into a median image. @@ -62,13 +62,13 @@ Specifically, this routine performs the following operations: if the input model container has an , otherwise the suffix will be ``_outlier_i2d.fits`` by default. * **If resampling is turned off** through the use of the ``resample_data`` parameter, - a copy of the unrectified input images (as a ModelContainer) + a copy of the unrectified input images (as a ModelLibrary) will be used for subsequent processing. #. Create a median image from all grouped observation mosaics. * The median image is created by combining all grouped mosaic images or - non-resampled input data (as planes in a ModelContainer) pixel-by-pixel. + non-resampled input data (as planes in a ModelLibrary) pixel-by-pixel. * The ``maskpt`` parameter sets the percentage of the weight image values to use, and any pixel with a weight below this value gets flagged as "bad" and ignored when resampled. @@ -129,7 +129,7 @@ The outlier detection algorithm can end up using massive amounts of memory depending on the number of inputs, the size of each input, and the size of the final output product. Specifically, -#. The input :py:class:`~jwst.datamodels.ModelContainer` or +#. The input :py:class:`~jwst.datamodels.ModelLibrary` or :py:class:`~jwst.datamodels.CubeModel` for IFU data, by default, all input exposures would have been kept open in memory to make processing more efficient. @@ -152,56 +152,20 @@ memory usage at the expense of file I/O. The control over this memory model hap with the use of the ``in_memory`` parameter. The full impact of this parameter during processing includes: -#. The ``save_open`` parameter gets set to `False` - when opening the input :py:class:`~jwst.datamodels.ModelContainer` object. - This forces all input models in the input :py:class:`~jwst.datamodels.ModelContainer` or - :py:class:`~jwst.datamodels.CubeModel` to get written out to disk. The ModelContainer - then uses the filename of the input model during subsequent processing. +#. The input :py:class:`~jwst.datamodels.ModelLibrary` object is loaded with `on_disk=True`. + This ensures that input models are loaded into memory one at at time, + and saved to a temporary file when not in use; these read-write operations are handled by + the :py:class:`~jwst.datamodels.ModelLibrary` object. -#. The ``in_memory`` parameter gets passed to the :py:class:`~jwst.resample.ResampleStep` - to set whether or not to keep the resampled images in memory or not. By default, - the outlier detection processing sets this parameter to `False` so that each resampled - image gets written out to disk. +#. The ``on_disk`` status of the :py:class:`~jwst.datamodels.ModelLibrary` gets passed to the + :py:class:`~jwst.resample.ResampleStep` as well, to set whether or not to keep the + resampled images in memory or not. #. Computing the median image works section-by-section by only keeping 1Mb of each input in memory at a time. As a result, only the final output product array for the final median image along with a stack of 1Mb image sections are kept in memory. -#. The final resampling step also avoids keeping all inputs in memory by only reading - each input into memory 1 at a time as it gets resampled onto the final output product. - These changes result in a minimum amount of memory usage during processing at the obvious expense of reading and writing the products from disk. - -Outlier Detection for Coronagraphic Data ----------------------------------------- -Coronagraphic data is processed in a near-identical manner to direct imaging data, but -no resampling occurs. - - -Outlier Detection for TSO data -------------------------------- -Normal imaging data benefit from combining all integrations into a -single image. TSO data's value, however, comes from looking for variations from one -integration to the next. The outlier detection algorithm, therefore, gets run with -a few variations to accomodate the nature of these 3D data. See the -:ref:`TSO outlier detection ` documentation for details. - - -Outlier Detection for IFU data ------------------------------- -Integral Field Unit (IFU) data is handled as 2D images, similar to direct -imaging modes. The nature of the detection algorithm, however, is quite -different and involves measuring the differences between neighboring pixels -in the spatial (cross-dispersion) direction within the IFU slice images. -See the :ref:`IFU outlier detection ` documentation for -all the details. - - -Outlier Detection for Slit data -------------------------------- -See the :ref:`IFU outlier detection ` documentation for -details. - .. automodapi:: jwst.outlier_detection.imaging diff --git a/docs/jwst/skymatch/arguments.rst b/docs/jwst/skymatch/arguments.rst index f686a90395..75d8ccabf7 100644 --- a/docs/jwst/skymatch/arguments.rst +++ b/docs/jwst/skymatch/arguments.rst @@ -67,3 +67,9 @@ The ``skymatch`` step uses the following optional arguments: Bin width, in sigma, used to sample the distribution of pixel values in order to compute the sky background using statistics that require binning, such as `mode` and `midpt`. + +**Memory management parameters:** + +``on_disk`` (boolean, default=False) + Whether to preserve memory using temporary files + at the expense of having to run many I/O operations. diff --git a/docs/jwst/tweakreg/README.rst b/docs/jwst/tweakreg/README.rst index 6c6811a86a..699990afd4 100644 --- a/docs/jwst/tweakreg/README.rst +++ b/docs/jwst/tweakreg/README.rst @@ -86,7 +86,8 @@ models to the custom catalog file name, the ``tweakreg_step`` also supports two other ways of supplying custom source catalogs to the step: 1. Adding ``tweakreg_catalog`` attribute to the ``members`` of the input ASN - table - see `~jwst.datamodels.ModelContainer` for more details. + table - see `~jwst.datamodels.ModelLibrary` for more details. + # FIXME: does this still work as described? Catalog file names are relative to ASN file path. 2. Providing a simple two-column text file, specified via step's parameter @@ -165,17 +166,21 @@ telescope pointing will be identical in all these images and it is assumed that the relative positions of (e.g., NIRCam) detectors do not change. Identification of images that belong to the same "exposure" and therefore can be grouped together is based on several attributes described in -`~jwst.datamodels.ModelContainer`. This grouping is performed automatically +`~jwst.datamodels.ModelLibrary`. This grouping is performed automatically in the ``tweakreg`` step using the -`~jwst.datamodels.ModelContainer.models_grouped` property, which assigns +`~jwst.datamodels.ModelLibrary.models_grouped` property, which assigns a group ID to each input image model in ``meta.group_id``. +## FIXME: The ModelLibrary does not have a models_grouped property. +## However, the models_grouped property of ModelContainer is currently not +## accessed by tweakreg on master, either. Is this comment outdated, +## misleading, or incorrect, or am I misunderstanding something? However, when detector calibrations are not accurate, alignment of groups of images may fail (or result in poor alignment). In this case, it may be desirable to align each image independently. This can be achieved either by setting the ``image_model.meta.group_id`` attribute to a unique string or integer value for each image, or by adding the ``group_id`` attribute to the ``members`` of the input ASN -table - see `~jwst.datamodels.ModelContainer` for more details. +table - see `~jwst.datamodels.ModelLibrary` for more details. .. note:: Group ID (``group_id``) is used by both ``tweakreg`` and ``skymatch`` steps @@ -428,6 +433,14 @@ in the ``assign_wcs`` step. * ``sip_npoints``: Number of points for the SIP fit. (Default=12). +**stpipe general options:** + +* ``output_use_model``: A boolean indicating whether to use `DataModel.meta.filename` + when saving the results. (Default=True) + +* ``on_disk``: A boolean indicating whether to keep models in temporary files on disk + while not in use to save memory. (Default=False) + Further Documentation --------------------- The underlying algorithms as well as formats of source catalogs are described From 48b105006cba3aac7ee578af1b41271d0c43c023 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 1 Aug 2024 15:47:07 -0400 Subject: [PATCH 22/85] fixing regtest failures for spec3 pipeline, adding library to mtwcs --- jwst/assign_mtwcs/assign_mtwcs_step.py | 25 ++++---- jwst/assign_mtwcs/moving_target_wcs.py | 79 ++++++++++++++------------ jwst/assign_mtwcs/tests/test_mtwcs.py | 23 +++++--- jwst/datamodels/library.py | 14 +++++ jwst/outlier_detection/imaging.py | 6 -- jwst/outlier_detection/spec.py | 17 +++++- jwst/resample/resample.py | 20 ++++++- jwst/resample/resample_spec_step.py | 3 +- 8 files changed, 115 insertions(+), 72 deletions(-) diff --git a/jwst/assign_mtwcs/assign_mtwcs_step.py b/jwst/assign_mtwcs/assign_mtwcs_step.py index bd312ccddd..f85e25b1d2 100755 --- a/jwst/assign_mtwcs/assign_mtwcs_step.py +++ b/jwst/assign_mtwcs/assign_mtwcs_step.py @@ -1,9 +1,8 @@ #! /usr/bin/env python import logging -from stdatamodels.jwst import datamodels - -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelLibrary +from jwst.stpipe.utilities import record_step_status from ..stpipe import Step from .moving_target_wcs import assign_moving_target_wcs @@ -32,17 +31,15 @@ class AssignMTWcsStep(Step): """ def process(self, input): - if isinstance(input, str): - input = datamodels.open(input) - - # Can't apply the step if we aren't given a ModelContainer as input - if not isinstance(input, ModelContainer): - log.warning("Input data type is not supported.") - # raise ValueError("Expected input to be an association file name or a ModelContainer.") - input.meta.cal_step.assign_mtwcs = 'SKIPPED' - return input - # Apply the step + if not isinstance(input, ModelLibrary): + try: + input = ModelLibrary(input) + except Exception: + log.warning("Input data type is not supported.") + record_step_status(input, "assign_mtwcs", False) + return input + result = assign_moving_target_wcs(input) - + record_step_status(result, "assign_mtwcs", True) return result diff --git a/jwst/assign_mtwcs/moving_target_wcs.py b/jwst/assign_mtwcs/moving_target_wcs.py index 7f61b84e9c..5d33b5567f 100644 --- a/jwst/assign_mtwcs/moving_target_wcs.py +++ b/jwst/assign_mtwcs/moving_target_wcs.py @@ -16,7 +16,8 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelLibrary +from jwst.stpipe.utilities import record_step_status log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) @@ -24,49 +25,53 @@ __all__ = ["assign_moving_target_wcs"] -def assign_moving_target_wcs(input_model): +def assign_moving_target_wcs(input_models): - if not isinstance(input_model, ModelContainer): - raise ValueError("Expected a ModelContainer object") + if not isinstance(input_models, ModelLibrary): + raise ValueError("Expected a ModelLibrary object") - # get the indices of the science exposures in the ModelContainer - ind = input_model.ind_asn_type('science') - sci_models = np.asarray(input_model._models)[ind] - # Get the MT RA/Dec values from all the input exposures - mt_ra = np.array([model.meta.wcsinfo.mt_ra for model in sci_models]) - mt_dec = np.array([model.meta.wcsinfo.mt_dec for model in sci_models]) + # loop over only science exposures in the ModelLibrary + ind = input_models.ind_asn_type("science") + mt_ra = np.empty(len(ind)) + mt_dec = np.empty(len(ind)) + with input_models: + for i in ind: + model = input_models.borrow(i) + mt_ra[i] = model.meta.wcsinfo.mt_ra + mt_dec[i] = model.meta.wcsinfo.mt_dec + input_models.shelve(model, i, modify=False) # Compute the mean MT RA/Dec over all exposures if None in mt_ra or None in mt_dec: log.warning("One or more MT RA/Dec values missing in input images") log.warning("Step will be skipped, resulting in target misalignment") - for model in sci_models: - model.meta.cal_step.assign_mtwcs = 'SKIPPED' - return input_model - else: - mt_avra = mt_ra.mean() - mt_avdec = mt_dec.mean() - - for model in sci_models: - model.meta.wcsinfo.mt_avra = mt_avra - model.meta.wcsinfo.mt_avdec = mt_avdec - if isinstance(model, datamodels.MultiSlitModel): - for ind, slit in enumerate(model.slits): - new_wcs = add_mt_frame(slit.meta.wcs, - mt_avra, mt_avdec, - slit.meta.wcsinfo.mt_ra, slit.meta.wcsinfo.mt_dec) - del model.slits[ind].meta.wcs - model.slits[ind].meta.wcs = new_wcs - else: - - new_wcs = add_mt_frame(model.meta.wcs, mt_avra, mt_avdec, - model.meta.wcsinfo.mt_ra, model.meta.wcsinfo.mt_dec) - del model.meta.wcs - model.meta.wcs = new_wcs - - model.meta.cal_step.assign_mtwcs = 'COMPLETE' - - return input_model + record_step_status(input_models, "assign_mtwcs", False) + return input_models + + mt_avra = mt_ra.mean() + mt_avdec = mt_dec.mean() + + with input_models: + for i in ind: + model = input_models.borrow(i) + model.meta.wcsinfo.mt_avra = mt_avra + model.meta.wcsinfo.mt_avdec = mt_avdec + if isinstance(model, datamodels.MultiSlitModel): + for ind, slit in enumerate(model.slits): + new_wcs = add_mt_frame(slit.meta.wcs, + mt_avra, mt_avdec, + slit.meta.wcsinfo.mt_ra, slit.meta.wcsinfo.mt_dec) + del model.slits[ind].meta.wcs + model.slits[ind].meta.wcs = new_wcs + else: + + new_wcs = add_mt_frame(model.meta.wcs, mt_avra, mt_avdec, + model.meta.wcsinfo.mt_ra, model.meta.wcsinfo.mt_dec) + del model.meta.wcs + model.meta.wcs = new_wcs + input_models.shelve(model, i, modify=True) + + return input_models def add_mt_frame(wcs, ra_average, dec_average, mt_ra, mt_dec): diff --git a/jwst/assign_mtwcs/tests/test_mtwcs.py b/jwst/assign_mtwcs/tests/test_mtwcs.py index a33b848872..934869d4c5 100644 --- a/jwst/assign_mtwcs/tests/test_mtwcs.py +++ b/jwst/assign_mtwcs/tests/test_mtwcs.py @@ -2,7 +2,7 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelLibrary from jwst.assign_mtwcs import AssignMTWcsStep from jwst.assign_mtwcs.tests import data @@ -13,10 +13,17 @@ def test_mt_multislit(): file_path = os.path.join(data.__path__[0], 'test_mt_asn.json') with datamodels.open(file_path) as model: assert model[0].slits[0].meta.wcs.output_frame.name == 'world' - step = AssignMTWcsStep() - result = step.run(model) - assert isinstance(result, ModelContainer) - assert len(result[0].slits) == 1 - assert result[0].slits[0].meta.wcs.output_frame.name == 'moving_target' - assert len(result[1].slits) == 1 - assert result[1].slits[0].meta.wcs.output_frame.name == 'moving_target' + step = AssignMTWcsStep() + result = step.run(file_path) + assert isinstance(result, ModelLibrary) + with result: + zero = result.borrow(0) + one = result.borrow(1) + + assert len(zero.slits) == 1 + assert zero.slits[0].meta.wcs.output_frame.name == 'moving_target' + assert len(one.slits) == 1 + assert one.slits[0].meta.wcs.output_frame.name == 'moving_target' + + result.shelve(zero, 0, modify=False) + result.shelve(one, 1, modify=False) diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index ae57b11045..0b50b857fb 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -11,9 +11,23 @@ class ModelLibrary(AbstractModelLibrary): + """ + FIXME: Add docstring here, including like-for-like replacements for + ModelContainer association attribute accessors. + + ModelContainer: ind = container.ind_asn_type("science") + ModelLibrary: ind = library.ind_asn_type("science") + """ @property def crds_observatory(self): return "jwst" + + @property + def exptypes(self): + return [member["exptype"] for member in self._members] + + def ind_asn_type(self, exptype): + return [i for i, member in enumerate(self._members) if member["exptype"] == exptype] def _model_to_filename(self, model): model_filename = model.meta.filename diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py index 9c5bc65586..465c667b38 100644 --- a/jwst/outlier_detection/imaging.py +++ b/jwst/outlier_detection/imaging.py @@ -116,12 +116,6 @@ def detect_outliers( save_median(median_model, make_output_path, asn_id) del median_model - else: - # since we're not saving intermediate results if the drizzled models - # were written to disk, remove them - if not in_memory: - for fn in drizzled_models._models: - remove_file(fn) # Perform outlier detection using statistical comparisons between # each original input image and its blotted version of the median image diff --git a/jwst/outlier_detection/spec.py b/jwst/outlier_detection/spec.py index 7f35733aa6..956c767951 100644 --- a/jwst/outlier_detection/spec.py +++ b/jwst/outlier_detection/spec.py @@ -3,9 +3,10 @@ """ import copy +import numpy as np from stdatamodels.jwst import datamodels -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelContainer, ModelLibrary, SourceModelContainer from jwst.stpipe.utilities import record_step_status from ..resample import resample_spec, resample_utils @@ -69,7 +70,19 @@ def detect_outliers( asn_id=asn_id, ) median_wcs = resamp.output_wcs - drizzled_models = resamp.do_drizzle(input_models) + + # convert to library for resample, then back to container, + # for compatibility with image3 pipeline which uses + # ModelLibrary for memory savings + library = ModelLibrary(input_models, on_disk=False) + library = resamp.do_drizzle(library) + drizzled_models = ModelContainer() + with library: + for i, model in enumerate(library): + drizzled_models.append(model.copy()) + library.shelve(model, modify=False) + del library + if save_intermediate_results: for model in drizzled_models: model.meta.filename = make_output_path( diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index 2e9c7204f5..a194bf387e 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -339,7 +339,6 @@ def resample_many_to_many(self, input_models): input_models.shelve(img, index, modify=False) if not self.in_memory: - # FIXME: Is this needed anymore with ModelLibrary? # Write out model to disk, then return filename output_name = output_model.meta.filename if self.output_dir is not None: @@ -352,7 +351,13 @@ def resample_many_to_many(self, input_models): output_model.data *= 0. output_model.wht *= 0. - return ModelLibrary(output_models) + if not self.in_memory: + # FIXME: here rebuild ModelLibrary as an association from the output files + # and return that. + # this yields memory savings if there are multiple groups + # for now, just pass + pass + return ModelLibrary(output_models, on_disk=False) def resample_many_to_one(self, input_models): """Resample and coadd many inputs to a single output. @@ -365,7 +370,16 @@ def resample_many_to_one(self, input_models): output_model.meta.resample.pointings = len(input_models.group_names) if self.blendheaders: - self.blend_output_metadata(output_model, input_models) + # FIXME: right now this needs a list of input models, all in memory + # but it needs to conform with ModelLibrary only loading one into memory at once + # for now, just load the models as a list + input_list = [] + with input_models: + for i, model in enumerate(input_models): + input_list.append(model) + input_models.shelve(model, i, modify=False) + self.blend_output_metadata(output_model, input_list) + del input_list # copy over asn information copy_asn_info_from_library(input_models, output_model) diff --git a/jwst/resample/resample_spec_step.py b/jwst/resample/resample_spec_step.py index e9d80b911f..89d8fc7cf9 100755 --- a/jwst/resample/resample_spec_step.py +++ b/jwst/resample/resample_spec_step.py @@ -165,8 +165,7 @@ def _process_slit(self, input_models): resamp = resample_spec.ResampleSpecData(input_models, **self.drizpars) - if isinstance(input_models, ModelContainer): - library = ModelLibrary(input_models, on_disk=False) + library = ModelLibrary(input_models, on_disk=False) library = resamp.do_drizzle(library) with library: for i, model in enumerate(library): From 99ef2f6d1fb74648a5272f79eec9ca3d1f016a8e Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 1 Aug 2024 16:03:54 -0400 Subject: [PATCH 23/85] integrate assign_mtwcs changes with spec3 pipeline --- jwst/pipeline/calwebb_spec3.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/jwst/pipeline/calwebb_spec3.py b/jwst/pipeline/calwebb_spec3.py index 0da1a15f34..03ea4f2c2b 100644 --- a/jwst/pipeline/calwebb_spec3.py +++ b/jwst/pipeline/calwebb_spec3.py @@ -5,7 +5,7 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import SourceModelContainer +from jwst.datamodels import SourceModelContainer, ModelLibrary from jwst.stpipe import query_step_status from ..associations.lib.rules_level3_base import format_product @@ -140,7 +140,17 @@ def process(self, input): if is_moving_target(input_models[0]): self.log.info("Assigning WCS to a Moving Target exposure.") - input_models = self.assign_mtwcs(input_models) + + # for compatibility with calwebb_image3, need to convert to ModelLibrary then back here + # keep asn metadata from input container - only metadata of individual models is modified + # by the assign_mtwcs step + library = ModelLibrary(input_models, on_disk=False) + library = self.assign_mtwcs(input_models) + with library: + for i, model in enumerate(library): + input_models[i] = model.copy() + library.shelve(model, modify=False) + del library # If background data are present, call the master background step if members_by_type['background']: From 55262c0ed7df1bf77fffc33f1fbc7929654dc884 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 2 Aug 2024 10:35:53 -0400 Subject: [PATCH 24/85] debug coron3 pipeline --- jwst/outlier_detection/imaging.py | 2 +- jwst/outlier_detection/spec.py | 4 +--- jwst/pipeline/calwebb_coron3.py | 9 +++++++-- jwst/resample/resample.py | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py index 465c667b38..e1ad9c200f 100644 --- a/jwst/outlier_detection/imaging.py +++ b/jwst/outlier_detection/imaging.py @@ -14,7 +14,7 @@ from jwst.stpipe.utilities import record_step_status from .utils import create_median_library, flag_crs_in_models_library, flag_crs_in_models_with_resampling_library -from ._fileio import remove_file, save_median +from ._fileio import save_median log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) diff --git a/jwst/outlier_detection/spec.py b/jwst/outlier_detection/spec.py index 956c767951..60bfde1ca5 100644 --- a/jwst/outlier_detection/spec.py +++ b/jwst/outlier_detection/spec.py @@ -3,10 +3,8 @@ """ import copy -import numpy as np from stdatamodels.jwst import datamodels - -from jwst.datamodels import ModelContainer, ModelLibrary, SourceModelContainer +from jwst.datamodels import ModelContainer, ModelLibrary from jwst.stpipe.utilities import record_step_status from ..resample import resample_spec, resample_utils diff --git a/jwst/pipeline/calwebb_coron3.py b/jwst/pipeline/calwebb_coron3.py index a309fcc14d..70ab07dcec 100644 --- a/jwst/pipeline/calwebb_coron3.py +++ b/jwst/pipeline/calwebb_coron3.py @@ -5,7 +5,7 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import ModelContainer +from jwst.datamodels import ModelContainer, ModelLibrary from ..model_blender import blendmeta @@ -196,7 +196,12 @@ def process(self, user_input): resample_input.append(model) # Call the resample step to combine all psf-subtracted target images - result = self.resample(resample_input) + # for compatibility with image3 pipeline use of ModelLibrary, + # convert ModelContainer to ModelLibrary and then back + resample_library = ModelLibrary(resample_input, on_disk=False) + + # Output is a single datamodel + result = self.resample(resample_library) # Blend the science headers try: diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index a194bf387e..4a9b54dcc1 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -439,7 +439,7 @@ def resample_many_to_one(self, input_models): self.update_exposure_times(output_model, input_models) - return ModelLibrary([output_model]) + return ModelLibrary([output_model,], on_disk=False) def resample_variance_arrays(self, output_model, input_models): From f3ae92e2320bade1f4706cf89723353363b7745a Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 2 Aug 2024 10:49:38 -0400 Subject: [PATCH 25/85] emptying data arrays in input to model_blender inside resample --- jwst/resample/resample.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index 4a9b54dcc1..8bffce89d2 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -371,11 +371,19 @@ def resample_many_to_one(self, input_models): if self.blendheaders: # FIXME: right now this needs a list of input models, all in memory - # but it needs to conform with ModelLibrary only loading one into memory at once - # for now, just load the models as a list + # for now, just load the models as a list with empty data arrays + # but the blend_meta step itself should be refactored to expect a list of metadata objects + # instead of a list of datamodels input_list = [] with input_models: for i, model in enumerate(input_models): + model.data = np.empty((1, 1)) + model.dq = np.empty((1, 1)) + model.err = np.empty((1, 1)) + model.wht = np.empty((1, 1)) + model.var_rnoise = np.empty((1, 1)) + model.var_poisson = np.empty((1, 1)) + model.var_flat = np.empty((1, 1)) input_list.append(model) input_models.shelve(model, i, modify=False) self.blend_output_metadata(output_model, input_list) From e27acc211221f5a700025a92c9743c7a0e674f1d Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 2 Aug 2024 16:21:49 -0400 Subject: [PATCH 26/85] decreasing memory usage of outlier step using profiler --- jwst/outlier_detection/imaging.py | 10 +-- jwst/outlier_detection/utils.py | 14 ++-- jwst/resample/resample.py | 75 +++++++++++++++------- jwst/source_catalog/source_catalog_step.py | 2 + 4 files changed, 68 insertions(+), 33 deletions(-) diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py index e1ad9c200f..424fc071fd 100644 --- a/jwst/outlier_detection/imaging.py +++ b/jwst/outlier_detection/imaging.py @@ -108,11 +108,11 @@ def detect_outliers( with drizzled_models: example_model = drizzled_models.borrow(0) drizzled_models.shelve(example_model, modify=False) - with datamodels.open(example_model) as dm0: - median_model = datamodels.ImageModel(median_data) - median_model.update(dm0) - median_model.meta.wcs = median_wcs - del example_model + with datamodels.open(example_model) as dm0: + median_model = datamodels.ImageModel(median_data) + median_model.update(dm0) + median_model.meta.wcs = median_wcs + del example_model save_median(median_model, make_output_path, asn_id) del median_model diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index 887989d9ea..62ad2aa75d 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -31,7 +31,7 @@ def create_cube_median(cube_model, maskpt): return median -def create_median_library(resampled_models, maskpt, on_disk=True, buffer_size=1.0): +def create_median_library(resampled_models, maskpt, on_disk=True, buffer_size=10.0): """Create a median image from the singly resampled images. resampled_models is expected to be a ModelLibrary for imaging modes. """ @@ -62,9 +62,10 @@ def create_median_library(resampled_models, maskpt, on_disk=True, buffer_size=1. dtype = example_model.data.dtype nsections, section_nrows = _compute_buffer_indices(example_model, buffer_size) resampled_models.shelve(example_model, modify=False) + del example_model # get spatial sections of library and compute timewise median, one by one - resampled_sections = _get_sections_library(resampled_models, nsections, section_nrows, example_model.data.shape[0]) + resampled_sections = _get_sections_library(resampled_models, nsections, section_nrows, shp[0]) median_image_empty = np.empty(shp, dtype) * np.nan return _create_median(resampled_sections, resampled_models, weight_thresholds, median_image_empty) @@ -88,18 +89,23 @@ def _get_sections_library(library, nsections, section_nrows, imrows): """ with library: example_model = library.borrow(0) + dtype = example_model.data.dtype + dtype_wht = example_model.wht.dtype + shp = example_model.data.shape library.shelve(example_model, 0, modify=False) + del example_model for i in range(nsections): row1 = i * section_nrows row2 = min(row1 + section_nrows, imrows) - data_list = np.empty((len(library), row2 - row1, example_model.data.shape[1]), example_model.data.dtype) - weight_list = np.empty((len(library), row2 - row1, example_model.data.shape[1]), example_model.wht.dtype) + data_list = np.empty((len(library), row2 - row1, shp[1]), dtype) + weight_list = np.empty((len(library), row2 - row1, shp[1]), dtype_wht) with library: for j, model in enumerate(library): data_list[j] = model.data[row1:row2] weight_list[j] = model.wht[row1:row2] library.shelve(model, j, modify=False) + del model yield (data_list, weight_list, (row1, row2)) diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index 8bffce89d2..f196edb7d0 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -11,6 +11,7 @@ from stdatamodels.jwst.library.basic_utils import bytes2human from jwst.datamodels import ModelLibrary +from jwst.associations.asn_from_list import asn_from_list from . import gwcs_drizzle from jwst.resample import resample_utils @@ -352,11 +353,10 @@ def resample_many_to_many(self, input_models): output_model.wht *= 0. if not self.in_memory: - # FIXME: here rebuild ModelLibrary as an association from the output files - # and return that. + # rebuild ModelLibrary as an association from the output files # this yields memory savings if there are multiple groups - # for now, just pass - pass + asn = asn_from_list(output_models, product_name='outlier_i2d') + return ModelLibrary(asn, on_disk=True) return ModelLibrary(output_models, on_disk=False) def resample_many_to_one(self, input_models): @@ -377,14 +377,16 @@ def resample_many_to_one(self, input_models): input_list = [] with input_models: for i, model in enumerate(input_models): - model.data = np.empty((1, 1)) - model.dq = np.empty((1, 1)) - model.err = np.empty((1, 1)) - model.wht = np.empty((1, 1)) - model.var_rnoise = np.empty((1, 1)) - model.var_poisson = np.empty((1, 1)) - model.var_flat = np.empty((1, 1)) - input_list.append(model) + empty_model = type(model)() + empty_model.meta = model.meta + empty_model.data = np.empty((1, 1)) + empty_model.dq = np.empty((1, 1)) + empty_model.err = np.empty((1, 1)) + empty_model.wht = np.empty((1, 1)) + empty_model.var_rnoise = np.empty((1, 1)) + empty_model.var_poisson = np.empty((1, 1)) + empty_model.var_flat = np.empty((1, 1)) + input_list.append(empty_model) input_models.shelve(model, i, modify=False) self.blend_output_metadata(output_model, input_list) del input_list @@ -429,7 +431,7 @@ def resample_many_to_one(self, input_models): ymax=ymax ) del data, inwht - input_models.shelve(img, modify=False) + input_models.shelve(img) # Resample variance arrays in input_models to output_model self.resample_variance_arrays(output_model, input_models) @@ -465,7 +467,6 @@ def resample_variance_arrays(self, output_model, input_models): weighted_pn_var = np.full_like(output_model.data, np.nan) weighted_flat_var = np.full_like(output_model.data, np.nan) total_weight_rn_var = np.zeros_like(output_model.data) - total_weight_pn_var = np.zeros_like(output_model.data) total_weight_flat_var = np.zeros_like(output_model.data) with input_models: for i, model in enumerate(input_models): @@ -502,7 +503,24 @@ def resample_variance_arrays(self, output_model, input_models): ) total_weight_rn_var[mask] += weight[mask] - # Now do poisson and flat variance, updating only valid new values + input_models.shelve(model, i, modify=False) + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "invalid value*", RuntimeWarning) + warnings.filterwarnings("ignore", "divide by zero*", RuntimeWarning) + + output_variance = (weighted_rn_var + / total_weight_rn_var / total_weight_rn_var) + setattr(output_model, "var_rnoise", output_variance) + del weighted_rn_var, total_weight_rn_var, output_variance + + # Poisson variance + total_weight_pn_var = np.zeros_like(output_model.data) + weighted_pn_var = np.full_like(output_model.data, np.nan) + with input_models: + for i, model in enumerate(input_models): + + # updating only valid new values # (zero is a valid value; negative, inf, or NaN are not) pn_var = self._resample_one_variance_array( "var_poisson", model, output_model) @@ -515,6 +533,21 @@ def resample_variance_arrays(self, output_model, input_models): ) total_weight_pn_var[mask] += weight[mask] + input_models.shelve(model, i, modify=False) + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "invalid value*", RuntimeWarning) + warnings.filterwarnings("ignore", "divide by zero*", RuntimeWarning) + + output_variance = (weighted_pn_var + / total_weight_pn_var / total_weight_pn_var) + setattr(output_model, "var_poisson", output_variance) + del weighted_pn_var, total_weight_pn_var, output_variance + + # Flat field variance + with input_models: + for i, model in enumerate(input_models): + flat_var = self._resample_one_variance_array( "var_flat", model, output_model) if flat_var is not None: @@ -527,7 +560,7 @@ def resample_variance_arrays(self, output_model, input_models): total_weight_flat_var[mask] += weight[mask] del model.meta.iscale - input_models.shelve(model, i, modify=False) + input_models.shelve(model, i) # We now have a sum of the weighted resampled variances. # Divide by the total weights, squared, and set in the output model. @@ -536,17 +569,11 @@ def resample_variance_arrays(self, output_model, input_models): warnings.filterwarnings("ignore", "invalid value*", RuntimeWarning) warnings.filterwarnings("ignore", "divide by zero*", RuntimeWarning) - output_variance = (weighted_rn_var - / total_weight_rn_var / total_weight_rn_var) - setattr(output_model, "var_rnoise", output_variance) - - output_variance = (weighted_pn_var - / total_weight_pn_var / total_weight_pn_var) - setattr(output_model, "var_poisson", output_variance) - output_variance = (weighted_flat_var / total_weight_flat_var / total_weight_flat_var) setattr(output_model, "var_flat", output_variance) + del weighted_flat_var, total_weight_flat_var, output_variance + def _resample_one_variance_array(self, name, input_model, output_model): """Resample one variance image from an input model. diff --git a/jwst/source_catalog/source_catalog_step.py b/jwst/source_catalog/source_catalog_step.py index 0e3b78ef08..19a8e8bde4 100755 --- a/jwst/source_catalog/source_catalog_step.py +++ b/jwst/source_catalog/source_catalog_step.py @@ -84,6 +84,7 @@ def process(self, input_model): threshold = self.snr_threshold * bkg.background_rms finder = JWSTSourceFinder(threshold, self.npixels, deblend=self.deblend) + del threshold convolved_data = convolve_data(model.data, self.kernel_fwhm, mask=coverage_mask) @@ -96,6 +97,7 @@ def process(self, input_model): catobj = JWSTSourceCatalog(model, segment_img, convolved_data, self.kernel_fwhm, aperture_params, abvega_offset, ci_star_thresholds) + del convolved_data catalog = catobj.catalog # add back background to data so input model is unchanged From d6c1642ce2eb8a8f04a07c0e560be7f31d7df13e Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 2 Aug 2024 17:34:34 -0400 Subject: [PATCH 27/85] revert refactor that introduced a bug in resample --- jwst/resample/resample.py | 70 ++++++++++++++------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index f196edb7d0..283f8c0e73 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -467,6 +467,7 @@ def resample_variance_arrays(self, output_model, input_models): weighted_pn_var = np.full_like(output_model.data, np.nan) weighted_flat_var = np.full_like(output_model.data, np.nan) total_weight_rn_var = np.zeros_like(output_model.data) + total_weight_pn_var = np.zeros_like(output_model.data) total_weight_flat_var = np.zeros_like(output_model.data) with input_models: for i, model in enumerate(input_models): @@ -503,24 +504,7 @@ def resample_variance_arrays(self, output_model, input_models): ) total_weight_rn_var[mask] += weight[mask] - input_models.shelve(model, i, modify=False) - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "invalid value*", RuntimeWarning) - warnings.filterwarnings("ignore", "divide by zero*", RuntimeWarning) - - output_variance = (weighted_rn_var - / total_weight_rn_var / total_weight_rn_var) - setattr(output_model, "var_rnoise", output_variance) - del weighted_rn_var, total_weight_rn_var, output_variance - - # Poisson variance - total_weight_pn_var = np.zeros_like(output_model.data) - weighted_pn_var = np.full_like(output_model.data, np.nan) - with input_models: - for i, model in enumerate(input_models): - - # updating only valid new values + # Now do poisson and flat variance, updating only valid new values # (zero is a valid value; negative, inf, or NaN are not) pn_var = self._resample_one_variance_array( "var_poisson", model, output_model) @@ -533,21 +517,6 @@ def resample_variance_arrays(self, output_model, input_models): ) total_weight_pn_var[mask] += weight[mask] - input_models.shelve(model, i, modify=False) - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "invalid value*", RuntimeWarning) - warnings.filterwarnings("ignore", "divide by zero*", RuntimeWarning) - - output_variance = (weighted_pn_var - / total_weight_pn_var / total_weight_pn_var) - setattr(output_model, "var_poisson", output_variance) - del weighted_pn_var, total_weight_pn_var, output_variance - - # Flat field variance - with input_models: - for i, model in enumerate(input_models): - flat_var = self._resample_one_variance_array( "var_flat", model, output_model) if flat_var is not None: @@ -558,21 +527,32 @@ def resample_variance_arrays(self, output_model, input_models): axis=0 ) total_weight_flat_var[mask] += weight[mask] - + del model.meta.iscale + del weight input_models.shelve(model, i) - # We now have a sum of the weighted resampled variances. - # Divide by the total weights, squared, and set in the output model. - # Zero weight and missing values are NaN in the output. - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "invalid value*", RuntimeWarning) - warnings.filterwarnings("ignore", "divide by zero*", RuntimeWarning) - - output_variance = (weighted_flat_var - / total_weight_flat_var / total_weight_flat_var) - setattr(output_model, "var_flat", output_variance) - del weighted_flat_var, total_weight_flat_var, output_variance + # We now have a sum of the weighted resampled variances. + # Divide by the total weights, squared, and set in the output model. + # Zero weight and missing values are NaN in the output. + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "invalid value*", RuntimeWarning) + warnings.filterwarnings("ignore", "divide by zero*", RuntimeWarning) + + output_variance = (weighted_rn_var + / total_weight_rn_var / total_weight_rn_var) + setattr(output_model, "var_rnoise", output_variance) + + output_variance = (weighted_pn_var + / total_weight_pn_var / total_weight_pn_var) + setattr(output_model, "var_poisson", output_variance) + + output_variance = (weighted_flat_var + / total_weight_flat_var / total_weight_flat_var) + setattr(output_model, "var_flat", output_variance) + + del weighted_rn_var, weighted_pn_var, weighted_flat_var + del total_weight_rn_var, total_weight_pn_var, total_weight_flat_var def _resample_one_variance_array(self, name, input_model, output_model): From 189b375ead7812971767674968c31276d5dcc650 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 12 Aug 2024 11:05:06 -0400 Subject: [PATCH 28/85] update skymatch input spec --- docs/jwst/skymatch/arguments.rst | 4 ++-- jwst/skymatch/skymatch_step.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/jwst/skymatch/arguments.rst b/docs/jwst/skymatch/arguments.rst index 75d8ccabf7..a3af4a5918 100644 --- a/docs/jwst/skymatch/arguments.rst +++ b/docs/jwst/skymatch/arguments.rst @@ -70,6 +70,6 @@ The ``skymatch`` step uses the following optional arguments: **Memory management parameters:** -``on_disk`` (boolean, default=False) - Whether to preserve memory using temporary files +``in_memory`` (boolean, default=True) + If False, preserve memory using temporary files at the expense of having to run many I/O operations. diff --git a/jwst/skymatch/skymatch_step.py b/jwst/skymatch/skymatch_step.py index 1dae658135..07c56dcfbc 100644 --- a/jwst/skymatch/skymatch_step.py +++ b/jwst/skymatch/skymatch_step.py @@ -60,7 +60,7 @@ class SkyMatchStep(Step): binwidth = float(min=0.0, default=0.1) # Bin width for 'mode' and 'midpt' `skystat`, in sigma # Memory management: - on_disk = boolean(default=False) # Preserve memory using temporary files + in_memory = boolean(default=True) # If False, preserve memory using temporary files """ # noqa: E501 reference_file_types = [] @@ -74,7 +74,7 @@ def process(self, input): if isinstance(input, ModelLibrary): library = input else: - library = ModelLibrary(input, on_disk=self.on_disk) + library = ModelLibrary(input, on_disk=not self.in_memory) self._dqbits = interpret_bit_flags(self.dqbits, flag_name_map=pixel) @@ -176,10 +176,10 @@ def _imodel2skyim(self, image_model, index): pix_area=1.0, # TODO: pixel area convf=1.0, # TODO: conv. factor to brightness mask=dqmask, - id=image_model.meta.filename, # file name? + id=image_model.meta.filename, skystat=self._skystat, stepsize=self.stepsize, - reduce_memory_usage=False, # FIXME: this overwrote input files + reduce_memory_usage=False, # this overwrote input files meta={'index': index} ) From a2dd9479a31dafbb551373fb58c6c49cc858ca16 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 12 Aug 2024 13:55:59 -0400 Subject: [PATCH 29/85] bugfix for failed asdf load of recursive wcs transform --- jwst/datamodels/library.py | 2 +- jwst/resample/resample.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 0b50b857fb..6eeba1303b 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -60,7 +60,7 @@ def _filename_to_group_id(self, filename): try: with fits.open(filename) as ff: if "ASDF" in ff: - asdf_yaml = asdf.util.load_yaml(io.BytesIO(ff['ASDF'].data.tobytes())) + asdf_yaml = asdf.util.load_yaml(io.BytesIO(ff['ASDF'].data.tobytes()), tagged=True) if group_id := asdf_yaml.get('meta', {}).get('group_id'): return group_id header = ff["PRIMARY"].header diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index 283f8c0e73..67f710537f 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -1,6 +1,7 @@ import logging import os import warnings +import json import numpy as np import psutil @@ -353,10 +354,12 @@ def resample_many_to_many(self, input_models): output_model.wht *= 0. if not self.in_memory: - # rebuild ModelLibrary as an association from the output files - # this yields memory savings if there are multiple groups + # build ModelLibrary as an association from the output files + # this saves memory if there are multiple groups asn = asn_from_list(output_models, product_name='outlier_i2d') - return ModelLibrary(asn, on_disk=True) + asn_dict = json.loads(asn.dump()[1]) # serializes the asn and converts to dict + return ModelLibrary(asn_dict, on_disk=True) + # otherwise just build it as a list of in-memory models return ModelLibrary(output_models, on_disk=False) def resample_many_to_one(self, input_models): From 1ca5c9b7c875c914420fd1f856f0041ef79e199e Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 13 Aug 2024 14:59:48 -0400 Subject: [PATCH 30/85] small changes from memory profiling and review --- CHANGES.rst | 6 +++++ jwst/datamodels/library.py | 26 +++++++++++++++++----- jwst/pipeline/calwebb_image3.py | 1 + jwst/resample/resample_spec_step.py | 1 - jwst/source_catalog/source_catalog_step.py | 1 + 5 files changed, 29 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f7a25e4d99..50fc57345b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,12 @@ ami_average - Fix error in step spec that prevents step creation. [#8677] +assign_mtwcs +------------ + +- Step now uses `ModelLibrary` to handle accessing models consistently + whether they are in memory or on disk. [#8683] + assign_wcs ---------- diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 6eeba1303b..f910fb77e1 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -12,11 +12,10 @@ class ModelLibrary(AbstractModelLibrary): """ - FIXME: Add docstring here, including like-for-like replacements for - ModelContainer association attribute accessors. - - ModelContainer: ind = container.ind_asn_type("science") - ModelLibrary: ind = library.ind_asn_type("science") + JWST implementation of the ModelLibrary, a container designed to allow + efficient processing of datamodel instances created from an association. + See the `stpipe library documentation Date: Wed, 14 Aug 2024 12:50:23 -0400 Subject: [PATCH 31/85] bugfix for failed to load area extension and other metadata --- jwst/outlier_detection/spec.py | 63 ++++++++++++++++------------------ jwst/resample/resample.py | 1 + 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/jwst/outlier_detection/spec.py b/jwst/outlier_detection/spec.py index 60bfde1ca5..032e3943fa 100644 --- a/jwst/outlier_detection/spec.py +++ b/jwst/outlier_detection/spec.py @@ -8,8 +8,7 @@ from jwst.stpipe.utilities import record_step_status from ..resample import resample_spec, resample_utils -from .utils import create_median, flag_crs_in_models, flag_crs_in_models_with_resampling -from ._fileio import remove_file +from .utils import create_median_library, flag_crs_in_models, flag_crs_in_models_with_resampling import logging log = logging.getLogger(__name__) @@ -69,44 +68,45 @@ def detect_outliers( ) median_wcs = resamp.output_wcs - # convert to library for resample, then back to container, + # convert to library for resample # for compatibility with image3 pipeline which uses # ModelLibrary for memory savings library = ModelLibrary(input_models, on_disk=False) - library = resamp.do_drizzle(library) - drizzled_models = ModelContainer() - with library: - for i, model in enumerate(library): - drizzled_models.append(model.copy()) - library.shelve(model, modify=False) - del library + drizzled_models = resamp.do_drizzle(library) if save_intermediate_results: - for model in drizzled_models: - model.meta.filename = make_output_path( - basepath=model.meta.filename, - suffix="_outlier_s2d.fits", - ) - log.info("Writing out resampled spectra...") - model.save(model.meta.filename) + with drizzled_models: + for model in drizzled_models: + model.meta.filename = make_output_path( + basepath=model.meta.filename, + suffix="_outlier_s2d.fits", + ) + log.info("Writing out resampled spectra...") + model.save(model.meta.filename) + drizzled_models.shelve(model) + else: - drizzled_models = input_models - for i in range(len(input_models)): - drizzled_models[i].wht = resample_utils.build_driz_weight( - input_models[i], - weight_type=weight_type, - good_bits=good_bits) + drizzled_models = ModelLibrary(input_models) + with drizzled_models: + for i, model in enumerate(drizzled_models): + model.wht = resample_utils.build_driz_weight( + input_models[i], + weight_type=weight_type, + good_bits=good_bits) # copy for when saving median and input is a filename? median_wcs = copy.deepcopy(input_models[0].meta.wcs) # Perform median combination on set of drizzled mosaics # create_median should be called as a method from parent class - median_data = create_median(drizzled_models, maskpt) + median_data = create_median_library(drizzled_models, maskpt) if save_intermediate_results: # Initialize intermediate products used in the outlier detection median_model = datamodels.ImageModel(median_data) - median_model.meta = drizzled_models[0].meta + with drizzled_models: + example_model = drizzled_models.borrow(0) + drizzled_models.shelve(example_model, 0, modify=False) + median_model.meta = example_model.meta median_model.meta.filename = make_output_path( basepath=input_models[0].meta.filename, suffix='median' @@ -115,15 +115,12 @@ def detect_outliers( log.info("Writing out MEDIAN image to: {}".format( median_model.meta.filename)) median_model.save(median_model.meta.filename) - del median_model - else: - # since we're not saving intermediate results if the drizzled models - # were written to disk, remove them - if not in_memory: - for fn in drizzled_models._models: - remove_file(fn) - log.info(f"Removing file {fn}") + + with library: + for i, model in enumerate(library): + input_models[i] = model + library.shelve(model, i, modify=False) # Perform outlier detection using statistical comparisons between # each original input image and its blotted version of the median image diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index 67f710537f..4635e4d4f5 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -306,6 +306,7 @@ def resample_many_to_many(self, input_models): log.info(f"{len(indices)} exposures to drizzle together") for index in indices: img = input_models.borrow(index) + img = datamodels.open(img) # must call this explicitly to get area reference file iscale = self._get_intensity_scale(img) log.debug(f'Using intensity scale iscale={iscale}') From b124a8b98918ac48aa2d852402ece0d389be8f04 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 14 Aug 2024 14:37:59 -0400 Subject: [PATCH 32/85] handle asn_table and asn_pool metadata properly --- jwst/datamodels/library.py | 5 +++-- jwst/outlier_detection/spec.py | 5 ----- jwst/resample/resample_spec_step.py | 7 +------ 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index f910fb77e1..32f6fb13b0 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -126,8 +126,9 @@ def _assign_member_to_model(self, model, member): if not hasattr(model.meta, "asn"): model.meta["asn"] = {} - model.meta.asn.table_name = self.asn.get("table_name", "") - model.meta.asn.pool_name = self.asn.get("asn_pool", "") + for attr in ("table_name", "asn_pool"): + if not hasattr(model.meta.asn, attr) and hasattr(self.asn, attr): # do not clobber existing values + model.meta.asn.table_name = self.asn.get(attr, "") def _attrs_to_group_id( diff --git a/jwst/outlier_detection/spec.py b/jwst/outlier_detection/spec.py index 032e3943fa..261c51b290 100644 --- a/jwst/outlier_detection/spec.py +++ b/jwst/outlier_detection/spec.py @@ -117,11 +117,6 @@ def detect_outliers( median_model.save(median_model.meta.filename) del median_model - with library: - for i, model in enumerate(library): - input_models[i] = model - library.shelve(model, i, modify=False) - # Perform outlier detection using statistical comparisons between # each original input image and its blotted version of the median image if resample_data: diff --git a/jwst/resample/resample_spec_step.py b/jwst/resample/resample_spec_step.py index f9cf76f2d2..15a16e3a68 100755 --- a/jwst/resample/resample_spec_step.py +++ b/jwst/resample/resample_spec_step.py @@ -95,6 +95,7 @@ def process(self, input): result = self._process_slit(input_models) # Update ASNTABLE in output + result.meta.cal_step.resample = "COMPLETE" result.meta.asn.table_name = input_models[0].meta.asn.table_name result.meta.asn.pool_name = input_models[0].meta.asn.pool_name @@ -151,9 +152,6 @@ def _process_multislit(self, input_models): if self.pixel_scale is not None and pscale_ratio is None: pscale_ratio = resamp.pscale_ratio - result.meta.cal_step.resample = "COMPLETE" - result.meta.asn.pool_name = input_models.asn_pool_name - result.meta.asn.table_name = input_models.asn_table_name if self.pixel_scale is None or pscale_ratio is None: result.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio else: @@ -222,9 +220,6 @@ def _process_slit(self, input_models): del library result = input_models[0] - result.meta.cal_step.resample = "COMPLETE" - result.meta.asn.pool_name = input_models.asn_pool_name - result.meta.asn.table_name = input_models.asn_table_name result.meta.bunit_data = input_models[0].meta.bunit_data if self.pixel_scale is None: result.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio From 62bacbdc9f833465aa404e6e8951db724b3badad Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 14 Aug 2024 14:48:05 -0400 Subject: [PATCH 33/85] make ind_asn_type case-insensitive --- jwst/datamodels/library.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 32f6fb13b0..16f16b7c04 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -32,7 +32,7 @@ def ind_asn_type(self, exptype): Parameters ---------- asn_exptype : str - Exposure type as defined in an association, e.g. "science". + Exposure type as defined in an association, e.g. "science". case-insensitive Returns ------- @@ -43,7 +43,7 @@ def ind_asn_type(self, exptype): ----- Library does NOT need to be open (i.e., this can be caled outside the `with` context) """ - return [i for i, member in enumerate(self._members) if member["exptype"] == exptype] + return [i for i, member in enumerate(self._members) if member["exptype"].lower() == exptype.lower()] def _model_to_filename(self, model): model_filename = model.meta.filename From 30dcb3f2824c098d9f591f5cd3fc45d5c26b99f9 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 14 Aug 2024 15:56:59 -0400 Subject: [PATCH 34/85] fix output filenames from outlier_detection --- .../outlier_detection_step.py | 20 +++++++++---------- jwst/regtest/test_niriss_image.py | 7 ++++--- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py index b7040e7a09..c8c94e2334 100644 --- a/jwst/outlier_detection/outlier_detection_step.py +++ b/jwst/outlier_detection/outlier_detection_step.py @@ -89,6 +89,8 @@ def process(self, input_data): snr1, snr2 = [float(v) for v in self.snr.split()] scale1, scale2 = [float(v) for v in self.scale.split()] + print(self.make_output_path()) + if mode == 'tso': result_models = tso.detect_outliers( input_data, @@ -221,20 +223,18 @@ def _get_asn_id(self, input_models): asn_id = input_models.meta.asn_table.asn_id except (AttributeError, KeyError): pass - return asn_id if asn_id is None: asn_id = self.search_attr('asn_id') + if asn_id is not None: + _make_output_path = self.search_attr( + '_make_output_path', parent_first=True + ) - _make_output_path = self.search_attr( - '_make_output_path', parent_first=True - ) - - self._make_output_path = partial( - _make_output_path, - asn_id=asn_id, - suffix="i2d" - ) + self._make_output_path = partial( + _make_output_path, + asn_id=asn_id + ) return asn_id def _get_asn_id_library(self, input_models): diff --git a/jwst/regtest/test_niriss_image.py b/jwst/regtest/test_niriss_image.py index 4cce41eda6..adfb0efa4a 100644 --- a/jwst/regtest/test_niriss_image.py +++ b/jwst/regtest/test_niriss_image.py @@ -71,9 +71,10 @@ def test_niriss_tweakreg_no_sources(rtdata, fitsdiff_default_kwargs): assert model.meta.cal_step.tweakreg != 'SKIPPED' result = TweakRegStep.call(mc) - - for model in result: - assert model.meta.cal_step.tweakreg == 'SKIPPED' + with result: + for model in result: + assert model.meta.cal_step.tweakreg == 'SKIPPED' + result.shelve(model, modify=False) result.close() From 5e9e67206653faf684d0c9400d23682aab44d677 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 15 Aug 2024 11:46:23 -0400 Subject: [PATCH 35/85] bugfix for HDRTAB association info in i2d files --- .../outlier_detection_step.py | 25 +++++-------------- jwst/regtest/test_niriss_image.py | 4 +-- jwst/resample/resample.py | 12 +++++---- jwst/resample/resample_step.py | 1 - 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py index c8c94e2334..90a5013793 100644 --- a/jwst/outlier_detection/outlier_detection_step.py +++ b/jwst/outlier_detection/outlier_detection_step.py @@ -89,8 +89,6 @@ def process(self, input_data): snr1, snr2 = [float(v) for v in self.snr.split()] scale1, scale2 = [float(v) for v in self.scale.split()] - print(self.make_output_path()) - if mode == 'tso': result_models = tso.detect_outliers( input_data, @@ -215,14 +213,13 @@ def _get_asn_id(self, input_models): input_models = datamodels.open(input_models, asn_n_members=1) # Setup output path naming if associations are involved. - if isinstance(input_models, ModelLibrary): - asn_id = self._get_asn_id_library(input_models) - else: - asn_id = None - try: + try: + if isinstance(input_models, ModelLibrary): + asn_id = input_models.asn["asn_id"] + else: asn_id = input_models.meta.asn_table.asn_id - except (AttributeError, KeyError): - pass + except (AttributeError, KeyError): + asn_id = None if asn_id is None: asn_id = self.search_attr('asn_id') @@ -236,13 +233,3 @@ def _get_asn_id(self, input_models): asn_id=asn_id ) return asn_id - - def _get_asn_id_library(self, input_models): - """Get the association ID from a ModelLibrary. - Does not open any models, so it should respect on_disk status.""" - asn_id = None - try: - asn_id = input_models.asn.table_name - except (AttributeError, KeyError): - pass - return asn_id diff --git a/jwst/regtest/test_niriss_image.py b/jwst/regtest/test_niriss_image.py index adfb0efa4a..7e186986a2 100644 --- a/jwst/regtest/test_niriss_image.py +++ b/jwst/regtest/test_niriss_image.py @@ -74,9 +74,7 @@ def test_niriss_tweakreg_no_sources(rtdata, fitsdiff_default_kwargs): with result: for model in result: assert model.meta.cal_step.tweakreg == 'SKIPPED' - result.shelve(model, modify=False) - - result.close() + result.shelve(model, modify=False) def _assert_is_same(rtdata_module, fitsdiff_default_kwargs, suffix): diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index 4635e4d4f5..b5dc12bb60 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -373,16 +373,21 @@ def resample_many_to_one(self, input_models): output_model.meta.resample.weight_type = self.weight_type output_model.meta.resample.pointings = len(input_models.group_names) + # copy over asn information + copy_asn_info_from_library(input_models, output_model) + if self.blendheaders: - # FIXME: right now this needs a list of input models, all in memory + # right now this needs a list of input models, all in memory # for now, just load the models as a list with empty data arrays - # but the blend_meta step itself should be refactored to expect a list of metadata objects + # but the blend_meta step itself should eventually be refactored + # to expect a list of metadata objects # instead of a list of datamodels input_list = [] with input_models: for i, model in enumerate(input_models): empty_model = type(model)() empty_model.meta = model.meta + copy_asn_info_from_library(input_models, empty_model) empty_model.data = np.empty((1, 1)) empty_model.dq = np.empty((1, 1)) empty_model.err = np.empty((1, 1)) @@ -395,9 +400,6 @@ def resample_many_to_one(self, input_models): self.blend_output_metadata(output_model, input_list) del input_list - # copy over asn information - copy_asn_info_from_library(input_models, output_model) - # Initialize the output with the wcs driz = gwcs_drizzle.GWCSDrizzle(output_model, pixfrac=self.pixfrac, kernel=self.kernel, fillval=self.fillval) diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index d041fff60b..96c26d8160 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -103,7 +103,6 @@ def process(self, input): model.meta.cal_step.resample = 'COMPLETE' self.update_fits_wcs(model) util.update_s_region_imaging(model) - resample.copy_asn_info_from_library(input_models, model) # if pixel_scale exists, it will override pixel_scale_ratio. # calculate the actual value of pixel_scale_ratio based on pixel_scale From f87809e42e407bd238d5a9948dd6abed49f4ebb0 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 15 Aug 2024 11:53:12 -0400 Subject: [PATCH 36/85] re-add python 3.13 pin that was accidentally clobbered --- .github/workflows/build.yml | 12 ++++++------ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f842f29b8a..cc6c3dfd50 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,12 +25,12 @@ jobs: with: upload_to_pypi: ${{ (github.event_name == 'release') && (github.event.action == 'released') }} targets: | - # Linux wheels (except python 313) - - cp31[!3]-manylinux_x86_64 - # MacOS wheels (except python 313) - - cp31[!3]-macosx_x86_64 - # MacOS arm64 wheels (except python 313) - - cp31[!3]-macosx_arm64 + # Linux wheels + - cp3*-manylinux_x86_64 + # MacOS wheels + - cp3*-macosx_x86_64 + # MacOS arm64 wheels + - cp3*-macosx_arm64 sdist: true test_command: python -c "from jwst.lib import winclip; from jwst.cube_build import cube_match_internal, cube_match_sky_pointcloud, cube_match_sky_driz, blot_median; from jwst.straylight import calc_xart" secrets: diff --git a/pyproject.toml b/pyproject.toml index 8dd98fcb02..817d0e3e2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "jwst" description = "Library for calibration of science observations from the James Webb Space Telescope" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" authors = [ { name = "JWST calibration pipeline developers" }, ] From ca62bafb99e00d75219a7ca7ca078960509a874d Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 15 Aug 2024 11:57:19 -0400 Subject: [PATCH 37/85] reverting accidental clobber of stcal pin dependency --- CHANGES.rst | 2 ++ pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 50fc57345b..85c7328f01 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -46,6 +46,8 @@ general - Remove the unused ``stsci.image`` dependency. [#8663] +- Update required stcal version to 1.8.0. [#8706] + master_background ----------------- diff --git a/pyproject.toml b/pyproject.toml index 817d0e3e2a..825ab5ca3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "scikit-image>=0.19", "scipy>=1.9.3", "spherical-geometry>=1.2.22", - "stcal @ git+https://github.com/spacetelescope/stcal.git@main", + "stcal>=1.8.0,<1.9.0", "stdatamodels>=2.0.0,<2.1.0", "stpipe @ git+https://github.com/spacetelescope/stpipe.git@main", "stsci.image>=2.3.5", From d47ef4b30b4d970e8c5da9a26a67bd0d9516e468 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 16 Aug 2024 17:22:30 -0400 Subject: [PATCH 38/85] updates after reviews by myself and by @braingram --- docs/jwst/pipeline/calwebb_image3.rst | 4 +- docs/jwst/tweakreg/README.rst | 12 +- jwst/assign_mtwcs/assign_mtwcs_step.py | 1 - jwst/assign_mtwcs/moving_target_wcs.py | 3 +- jwst/datamodels/library.py | 21 +- jwst/datamodels/tests/test_library.py | 11 ++ jwst/outlier_detection/imaging.py | 46 ++--- .../outlier_detection_step.py | 6 +- jwst/outlier_detection/spec.py | 18 +- .../tests/test_outlier_detection.py | 27 +-- jwst/outlier_detection/utils.py | 182 ++++++------------ jwst/pipeline/calwebb_coron3.py | 2 +- jwst/pipeline/calwebb_image3.py | 73 ++++--- jwst/pipeline/calwebb_spec3.py | 13 +- jwst/pipeline/tests/test_calwebb_image3.py | 32 ++- jwst/resample/resample.py | 6 +- jwst/resample/resample_spec.py | 7 +- jwst/resample/resample_spec_step.py | 3 +- jwst/skymatch/skymatch_step.py | 21 +- jwst/skymatch/tests/test_skymatch.py | 2 +- jwst/source_catalog/source_catalog_step.py | 3 - jwst/stpipe/utilities.py | 10 +- jwst/tweakreg/tests/test_multichip_jwst.py | 2 +- jwst/tweakreg/tweakreg_step.py | 56 +++--- pyproject.toml | 3 +- 25 files changed, 288 insertions(+), 276 deletions(-) diff --git a/docs/jwst/pipeline/calwebb_image3.rst b/docs/jwst/pipeline/calwebb_image3.rst index 710948c3f5..1bad06273b 100644 --- a/docs/jwst/pipeline/calwebb_image3.rst +++ b/docs/jwst/pipeline/calwebb_image3.rst @@ -34,7 +34,9 @@ processed using the :ref:`calwebb_tso3 ` pipeline. Arguments --------- -The ``calwebb_image3`` pipeline does not have any optional arguments. +``--in_memory`` + Boolean governing whether to load all models in the input association to memory at once (faster) + or to save to temporary files when not in use (slower, less memory usage). Default is True. Inputs ------ diff --git a/docs/jwst/tweakreg/README.rst b/docs/jwst/tweakreg/README.rst index 699990afd4..cc4451397f 100644 --- a/docs/jwst/tweakreg/README.rst +++ b/docs/jwst/tweakreg/README.rst @@ -168,12 +168,8 @@ Identification of images that belong to the same "exposure" and therefore can be grouped together is based on several attributes described in `~jwst.datamodels.ModelLibrary`. This grouping is performed automatically in the ``tweakreg`` step using the -`~jwst.datamodels.ModelLibrary.models_grouped` property, which assigns -a group ID to each input image model in ``meta.group_id``. -## FIXME: The ModelLibrary does not have a models_grouped property. -## However, the models_grouped property of ModelContainer is currently not -## accessed by tweakreg on master, either. Is this comment outdated, -## misleading, or incorrect, or am I misunderstanding something? +`~jwst.datamodels.ModelLibrary.group_names` property. + However, when detector calibrations are not accurate, alignment of groups of images may fail (or result in poor alignment). In this case, it may be @@ -438,8 +434,8 @@ in the ``assign_wcs`` step. * ``output_use_model``: A boolean indicating whether to use `DataModel.meta.filename` when saving the results. (Default=True) -* ``on_disk``: A boolean indicating whether to keep models in temporary files on disk - while not in use to save memory. (Default=False) +* ``in_memory``: A boolean indicating whether to keep models in memory, or to save + temporary files on disk while not in use to save memory. (Default=True) Further Documentation --------------------- diff --git a/jwst/assign_mtwcs/assign_mtwcs_step.py b/jwst/assign_mtwcs/assign_mtwcs_step.py index f85e25b1d2..a8e33e46f9 100755 --- a/jwst/assign_mtwcs/assign_mtwcs_step.py +++ b/jwst/assign_mtwcs/assign_mtwcs_step.py @@ -41,5 +41,4 @@ def process(self, input): return input result = assign_moving_target_wcs(input) - record_step_status(result, "assign_mtwcs", True) return result diff --git a/jwst/assign_mtwcs/moving_target_wcs.py b/jwst/assign_mtwcs/moving_target_wcs.py index 5d33b5567f..44bac3f1d5 100644 --- a/jwst/assign_mtwcs/moving_target_wcs.py +++ b/jwst/assign_mtwcs/moving_target_wcs.py @@ -31,7 +31,7 @@ def assign_moving_target_wcs(input_models): raise ValueError("Expected a ModelLibrary object") # loop over only science exposures in the ModelLibrary - ind = input_models.ind_asn_type("science") + ind = input_models.indices_for_exptype("science") mt_ra = np.empty(len(ind)) mt_dec = np.empty(len(ind)) with input_models: @@ -71,6 +71,7 @@ def assign_moving_target_wcs(input_models): model.meta.wcs = new_wcs input_models.shelve(model, i, modify=True) + record_step_status(input_models, "assign_mtwcs", True) return input_models diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 16f16b7c04..23a3b964bd 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -23,25 +23,28 @@ def crds_observatory(self): @property def exptypes(self): + """ + List of exposure types for all members in the library. + """ return [member["exptype"] for member in self._members] - def ind_asn_type(self, exptype): + def indices_for_exptype(self, exptype): """ - Determine the indices of models corresponding to ``asn_exptype``. + Determine the indices of models corresponding to ``exptype``. Parameters ---------- - asn_exptype : str + exptype : str Exposure type as defined in an association, e.g. "science". case-insensitive Returns ------- ind : list - Indices of models in ModelLibrary matching ``asn_exptype``. + Indices of models in ModelLibrary with member exposure types matching ``exptype``. Notes ----- - Library does NOT need to be open (i.e., this can be caled outside the `with` context) + Library does NOT need to be open (i.e., this can be called outside the `with` context) """ return [i for i, member in enumerate(self._members) if member["exptype"].lower() == exptype.lower()] @@ -126,9 +129,11 @@ def _assign_member_to_model(self, model, member): if not hasattr(model.meta, "asn"): model.meta["asn"] = {} - for attr in ("table_name", "asn_pool"): - if not hasattr(model.meta.asn, attr) and hasattr(self.asn, attr): # do not clobber existing values - model.meta.asn.table_name = self.asn.get(attr, "") + if not hasattr(model.meta.asn, "table_name") and hasattr(self.asn, "table_name"): # do not clobber existing values + setattr(model.meta.asn, "table_name", getattr(self.asn, "table_name")) + + if not hasattr(model.meta.asn, "pool_name") and hasattr(self.asn, "asn_pool"): # do not clobber existing values + setattr(model.meta.asn, "pool_name", getattr(self.asn, "asn_pool")) def _attrs_to_group_id( diff --git a/jwst/datamodels/tests/test_library.py b/jwst/datamodels/tests/test_library.py index 5dc4c109de..d72968b7b5 100644 --- a/jwst/datamodels/tests/test_library.py +++ b/jwst/datamodels/tests/test_library.py @@ -128,3 +128,14 @@ def test_group_id_override(example_asn_path, asn_group_id, meta_group_id, expect model = library.borrow(0) assert model.meta.group_id == expected_group_id library.shelve(model, 0, modify=False) + + +def test_asn_attributes_assignment(example_library): + + # test that the association attributes are assigned to the models + with example_library: + for i in range(_N_MODELS): + model = example_library.borrow(i) + assert hasattr(model.meta.asn, 'pool_name') + assert hasattr(model.meta.asn, 'table_name') + example_library.shelve(model, i, modify=False) \ No newline at end of file diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py index 424fc071fd..0b70c8f884 100644 --- a/jwst/outlier_detection/imaging.py +++ b/jwst/outlier_detection/imaging.py @@ -13,8 +13,8 @@ from jwst.resample.resample_utils import build_driz_weight from jwst.stpipe.utilities import record_step_status -from .utils import create_median_library, flag_crs_in_models_library, flag_crs_in_models_with_resampling_library -from ._fileio import save_median +from .utils import create_median, flag_model_crs, flag_resampled_model_crs +from ._fileio import remove_file, save_median log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) @@ -51,8 +51,7 @@ def detect_outliers( See `OutlierDetectionStep.spec` for documentation of these arguments. """ if not isinstance(input_models, ModelLibrary): - on_disk = not in_memory - input_models = ModelLibrary(input_models, on_disk=on_disk) + input_models = ModelLibrary(input_models, on_disk=not in_memory) if len(input_models) < 2: log.warning(f"Input only contains {len(input_models)} exposures") @@ -98,38 +97,39 @@ def detect_outliers( # copy for when saving median and input is a filename? if i == 0: median_wcs = copy.deepcopy(model.meta.wcs) + input_models.shelve(model, modify=True) # Perform median combination on set of drizzled mosaics - on_disk = not in_memory - median_data = create_median_library(drizzled_models, maskpt, on_disk=on_disk) + median_data = create_median(drizzled_models, maskpt, on_disk=not in_memory) if save_intermediate_results: # make a median model with drizzled_models: example_model = drizzled_models.borrow(0) drizzled_models.shelve(example_model, modify=False) - with datamodels.open(example_model) as dm0: - median_model = datamodels.ImageModel(median_data) - median_model.update(dm0) - median_model.meta.wcs = median_wcs + #with datamodels.open(example_model) as dm0: + median_model = datamodels.ImageModel(median_data) + median_model.update(example_model) + median_model.meta.wcs = median_wcs del example_model save_median(median_model, make_output_path, asn_id) del median_model + else: + # since we're not saving intermediate results if the drizzled models + # were written to disk, remove them + if not in_memory: + for fn in drizzled_models._members: + remove_file(fn["expname"]) # Perform outlier detection using statistical comparisons between # each original input image and its blotted version of the median image - if resample_data: - flag_crs_in_models_with_resampling_library( - input_models, - median_data, - median_wcs, - snr1, - snr2, - scale1, - scale2, - backg, - ) - else: - flag_crs_in_models_library(input_models, median_data, snr1) + with input_models: + for image in input_models: + if resample_data: + flag_resampled_model_crs(image, median_data, median_wcs, snr1, snr2, scale1, scale2, backg) + else: + flag_model_crs(image, median_data, snr1) + input_models.shelve(image, modify=True) + return input_models diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py index 90a5013793..9af7cacf30 100644 --- a/jwst/outlier_detection/outlier_detection_step.py +++ b/jwst/outlier_detection/outlier_detection_step.py @@ -76,13 +76,11 @@ def process(self, input_data): # determine the "mode" (if not set by the pipeline) mode = self._guess_mode(input_data) if mode is None: - return self._set_status(input_data, False) + record_step_status(input_data, "outlier_detection", False) + return input_data self.log.info(f"Outlier Detection mode: {mode}") # determine the asn_id (if not set by the pipeline) - if mode == "imaging": - if not isinstance(input_data, ModelLibrary): - input_data = ModelLibrary(input_data, on_disk=not self.in_memory) asn_id = self._get_asn_id(input_data) self.log.info(f"Outlier Detection asn_id: {asn_id}") diff --git a/jwst/outlier_detection/spec.py b/jwst/outlier_detection/spec.py index 261c51b290..1647f96061 100644 --- a/jwst/outlier_detection/spec.py +++ b/jwst/outlier_detection/spec.py @@ -8,7 +8,8 @@ from jwst.stpipe.utilities import record_step_status from ..resample import resample_spec, resample_utils -from .utils import create_median_library, flag_crs_in_models, flag_crs_in_models_with_resampling +from .utils import create_median, flag_crs_in_models, flag_crs_in_models_with_resampling +from ._fileio import remove_file import logging log = logging.getLogger(__name__) @@ -69,8 +70,7 @@ def detect_outliers( median_wcs = resamp.output_wcs # convert to library for resample - # for compatibility with image3 pipeline which uses - # ModelLibrary for memory savings + # for compatibility with image3 pipeline library = ModelLibrary(input_models, on_disk=False) drizzled_models = resamp.do_drizzle(library) @@ -84,8 +84,17 @@ def detect_outliers( log.info("Writing out resampled spectra...") model.save(model.meta.filename) drizzled_models.shelve(model) + else: + # since we're not saving intermediate results if the drizzled models + # were written to disk, remove them + if not in_memory: + for fn in drizzled_models._members: + remove_file(fn["expname"]) else: + # TODO: there appears not to be any test coverage for this branch + # as discovered while testing another ticket. Adding that coverage is beyond + # the scope of the ticket, but it should be added in the future. drizzled_models = ModelLibrary(input_models) with drizzled_models: for i, model in enumerate(drizzled_models): @@ -93,12 +102,13 @@ def detect_outliers( input_models[i], weight_type=weight_type, good_bits=good_bits) + drizzled_models.shelve(model) # copy for when saving median and input is a filename? median_wcs = copy.deepcopy(input_models[0].meta.wcs) # Perform median combination on set of drizzled mosaics # create_median should be called as a method from parent class - median_data = create_median_library(drizzled_models, maskpt) + median_data = create_median(drizzled_models, maskpt) if save_intermediate_results: # Initialize intermediate products used in the outlier detection diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index d5c1f29d8e..e8a6ff5f55 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -8,7 +8,7 @@ from jwst.datamodels import ModelContainer, ModelLibrary from jwst.outlier_detection import OutlierDetectionStep -from jwst.outlier_detection.utils import flag_resampled_model_crs, create_median_library +from jwst.outlier_detection.utils import _flag_resampled_model_crs, create_median from jwst.outlier_detection.outlier_detection_step import ( IMAGE_MODES, TSO_SPEC_MODES, @@ -75,7 +75,7 @@ def test_flag_cr(sci_blot_image_pair): # run flag_cr() which updates in-place. Copy sci first. data_copy = sci.data.copy() - flag_resampled_model_crs( + _flag_resampled_model_crs( sci, blot.data, 5.0, @@ -191,11 +191,12 @@ def we_three_sci(): return we_many_sci(numsci=3) -def test_outlier_step_no_outliers(we_three_sci, tmp_cwd): +@pytest.mark.parametrize("do_resample", [True, False]) +def test_outlier_step_no_outliers(we_three_sci, do_resample, tmp_cwd): """Test whole step, no outliers""" container = ModelContainer(list(we_three_sci)) pristine = ModelContainer([m.copy() for m in container]) - OutlierDetectionStep.call(container, in_memory=True) + OutlierDetectionStep.call(container, in_memory=True, resample_data=do_resample) # Make sure nothing changed in SCI and DQ arrays for image, uncorrected in zip(pristine, container): @@ -214,7 +215,7 @@ def test_outlier_step_base(we_three_sci, tmp_cwd): container.shelve(zeroth) # Verify that intermediary files are removed - OutlierDetectionStep.call(container, in_memory=True) + OutlierDetectionStep.call(container) i2d_files = glob(os.path.join(tmp_cwd, '*i2d.fits')) median_files = glob(os.path.join(tmp_cwd, '*median.fits')) assert len(i2d_files) == 0 @@ -224,7 +225,7 @@ def test_outlier_step_base(we_three_sci, tmp_cwd): data_as_cube = container.map_function(lambda model, index: model.data, modify=False) result = OutlierDetectionStep.call( - container, save_results=True, save_intermediate_results=True, in_memory=False + container, save_results=True, save_intermediate_results=True ) # Make sure nothing changed in SCI array @@ -326,8 +327,8 @@ def test_outlier_step_square_source_no_outliers(we_three_sci, tmp_cwd): dq_as_cube = [] with container: for model in container: - data_as_cube.append(model.data) - dq_as_cube.append(model.dq) + data_as_cube.append(model.data.copy()) + dq_as_cube.append(model.dq.copy()) container.shelve(model, modify=False) result = OutlierDetectionStep.call(container, in_memory=True) @@ -367,7 +368,7 @@ def test_outlier_step_image_weak_CR_dither(exptype, tmp_cwd): data_as_cube = [] with container: for model in container: - data_as_cube.append(model.data) + data_as_cube.append(model.data.copy()) container.shelve(model, modify=False) result = OutlierDetectionStep.call(container, in_memory=True) @@ -462,13 +463,13 @@ def test_outlier_step_weak_cr_tso(exptype, tsovisit): assert result.dq[cr_timestep, 12, 12] == OUTLIER_DO_NOT_USE -def test_create_median_library(three_sci_as_asn, tmp_cwd): - """Test creation of median library""" +def test_create_median(three_sci_as_asn, tmp_cwd): + """Test creation of median on disk vs in memory""" lib_on_disk = ModelLibrary(three_sci_as_asn, on_disk=True) lib_in_memory = ModelLibrary(three_sci_as_asn, on_disk=False) - median_on_disk = create_median_library(lib_on_disk, 0.7) - median_in_memory = create_median_library(lib_in_memory, 0.7) + median_on_disk = create_median(lib_on_disk, 0.7) + median_in_memory = create_median(lib_in_memory, 0.7) # Make sure the median library is the same for on-disk and in-memory assert np.allclose(median_on_disk, median_in_memory) diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index 62ad2aa75d..0bb6f86180 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -31,7 +31,7 @@ def create_cube_median(cube_model, maskpt): return median -def create_median_library(resampled_models, maskpt, on_disk=True, buffer_size=10.0): +def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0): """Create a median image from the singly resampled images. resampled_models is expected to be a ModelLibrary for imaging modes. """ @@ -39,10 +39,8 @@ def create_median_library(resampled_models, maskpt, on_disk=True, buffer_size=10 weight_thresholds = [] with resampled_models: for resampled in resampled_models: - weight = resampled.wht - weight_threshold = compute_weight_threshold(weight, maskpt) + weight_threshold = compute_weight_threshold(resampled.wht, maskpt) weight_thresholds.append(weight_threshold) - # close and delete the model, just to explicitly try to keep the memory as clean as possible resampled_models.shelve(resampled, modify=False) # compute median over all models @@ -53,6 +51,7 @@ def create_median_library(resampled_models, maskpt, on_disk=True, buffer_size=10 for resampled in resampled_models: model_list.append(resampled.data) resampled_models.shelve(resampled, modify=False) + del resampled return np.nanmedian(np.array(model_list), axis=0) else: # set up buffered access to all input models @@ -65,12 +64,12 @@ def create_median_library(resampled_models, maskpt, on_disk=True, buffer_size=10 del example_model # get spatial sections of library and compute timewise median, one by one - resampled_sections = _get_sections_library(resampled_models, nsections, section_nrows, shp[0]) + resampled_sections = _get_sections(resampled_models, nsections, section_nrows, shp[0]) median_image_empty = np.empty(shp, dtype) * np.nan return _create_median(resampled_sections, resampled_models, weight_thresholds, median_image_empty) -def _get_sections_library(library, nsections, section_nrows, imrows): +def _get_sections(library, nsections, section_nrows, imrows): """Iterator to return sections from a ModelLibrary. Parameters @@ -86,6 +85,18 @@ def _get_sections_library(library, nsections, section_nrows, imrows): imrows : int The total number of rows in the image + + Yields + ------ + data_subset : ndarray + array of shape (len(library), section_nrows, ncols) representing a spatial + subset of all the data arrays in the library + + weight_subset : ndarray + weights corresponding to data_list + + row_range : tuple + The range of rows in the image covered by the data arrays """ with library: example_model = library.borrow(0) @@ -98,22 +109,20 @@ def _get_sections_library(library, nsections, section_nrows, imrows): row1 = i * section_nrows row2 = min(row1 + section_nrows, imrows) - data_list = np.empty((len(library), row2 - row1, shp[1]), dtype) - weight_list = np.empty((len(library), row2 - row1, shp[1]), dtype_wht) + data_subset = np.empty((len(library), row2 - row1, shp[1]), dtype) + weight_subset = np.empty((len(library), row2 - row1, shp[1]), dtype_wht) with library: for j, model in enumerate(library): - data_list[j] = model.data[row1:row2] - weight_list[j] = model.wht[row1:row2] + data_subset[j] = model.data[row1:row2] + weight_subset[j] = model.wht[row1:row2] library.shelve(model, j, modify=False) - del model - yield (data_list, weight_list, (row1, row2)) + yield (data_subset, weight_subset, (row1, row2)) def _compute_buffer_indices(model, buffer_size=None): imrows, imcols = model.data.shape data_item_size = model.data.itemsize - #data_item_type = model.data.dtype min_buffer_size = imcols * data_item_size buffer_size = min_buffer_size if buffer_size is None else (buffer_size * _ONE_MB) section_nrows = min(imrows, int(buffer_size // min_buffer_size)) @@ -126,23 +135,6 @@ def _compute_buffer_indices(model, buffer_size=None): return nsections, section_nrows -def create_median(resampled_models, maskpt): - """Create a median image from the singly resampled images. - Expects a ModelContainer, e.g. for spectroscopic modes - """ - log.info("Computing median") - - weight_thresholds = compute_weight_threshold_container(resampled_models, maskpt) - - # Now, set up buffered access to all input models - resampled_models.set_buffer(1.0) # Set buffer at 1Mb - resampled_sections = resampled_models.get_sections() - median_image_empty = np.empty((resampled_models.imrows, resampled_models.imcols), - resampled_models.imtype) - median_image_empty[:] = np.nan # initialize with NaNs - return _create_median(resampled_sections, resampled_models, weight_thresholds, median_image_empty) - - def _create_median(resampled_sections, resampled_models, weight_thresholds, median_image_empty): median_image = median_image_empty for (resampled_sci, resampled_weight, (row1, row2)) in resampled_sections: @@ -174,42 +166,6 @@ def _create_median(resampled_sections, resampled_models, weight_thresholds, medi return median_image -def compute_weight_threshold_container(resampled_models, maskpt): - ''' - Compute weight means without keeping datamodels for each input open - - Parameters - ---------- - resampled_models : ~jwst.datamodels.ModelContainer - The input data models. - - maskpt : float - The percentage of the mean weight to use as a threshold for masking. - - Returns - ------- - list - The weight thresholds for each integration. - ''' - - # Start by ensuring that the ModelContainer does NOT open and keep each datamodel - ropen_orig = resampled_models._return_open - resampled_models._return_open = True - # keep track of resulting computation for each input resampled datamodel - weight_thresholds = [] - # For each model, compute the bad-pixel threshold from the weight arrays - for resampled in resampled_models: - weight = resampled.wht - weight_threshold = compute_weight_threshold(weight, maskpt) - weight_thresholds.append(weight_threshold) - # close and delete the model, just to explicitly try to keep the memory as clean as possible - resampled.close() - del resampled - # Reset ModelContainer attribute to original value - resampled_models._return_open = ropen_orig - return weight_thresholds - - def flag_crs_in_models( input_models, median_data, @@ -218,16 +174,10 @@ def flag_crs_in_models( for image in input_models: # dq flags will be updated in-place flag_model_crs(image, median_data, snr1) + -def flag_crs_in_models_library( - input_models, - median_data, - snr1, -): - input_models.map_function(lambda image, index: flag_model_crs(image, median_data, snr1), modify=True) - -def flag_crs_in_models_with_resampling( - input_models, +def flag_resampled_model_crs( + input_model, median_data, median_wcs, snr1, @@ -236,48 +186,22 @@ def flag_crs_in_models_with_resampling( scale2, backg, ): - for image in input_models: - if 'SPECTRAL' not in image.meta.wcs.output_frame.axes_type: - input_pixflux_area = image.meta.photometry.pixelarea_steradians - # Set array shape, needed to compute image pixel area - image.meta.wcs.array_shape = image.shape - input_pixel_area = compute_image_pixel_area(image.meta.wcs) - pix_ratio = np.sqrt(input_pixflux_area / input_pixel_area) - else: - pix_ratio = 1.0 - - blot = gwcs_blot(median_data, median_wcs, image.data.shape, image.meta.wcs, pix_ratio) - # dq flags will be updated in-place - flag_resampled_model_crs(image, blot, snr1, snr2, scale1, scale2, backg) + if 'SPECTRAL' not in input_model.meta.wcs.output_frame.axes_type: + input_pixflux_area = input_model.meta.photometry.pixelarea_steradians + # Set array shape, needed to compute image pixel area + input_model.meta.wcs.array_shape = input_model.shape + input_pixel_area = compute_image_pixel_area(input_model.meta.wcs) + pix_ratio = np.sqrt(input_pixflux_area / input_pixel_area) + else: + pix_ratio = 1.0 -def flag_crs_in_models_with_resampling_library( - input_models, - median_data, - median_wcs, - snr1, - snr2, - scale1, - scale2, - backg, -): - with input_models: - for image in input_models: - if 'SPECTRAL' not in image.meta.wcs.output_frame.axes_type: - input_pixflux_area = image.meta.photometry.pixelarea_steradians - # Set array shape, needed to compute image pixel area - image.meta.wcs.array_shape = image.shape - input_pixel_area = compute_image_pixel_area(image.meta.wcs) - pix_ratio = np.sqrt(input_pixflux_area / input_pixel_area) - else: - pix_ratio = 1.0 - - blot = gwcs_blot(median_data, median_wcs, image.data.shape, image.meta.wcs, pix_ratio) - # dq flags will be updated in-place - flag_resampled_model_crs(image, blot, snr1, snr2, scale1, scale2, backg) - input_models.shelve(image) + blot = gwcs_blot(median_data, median_wcs, input_model.data.shape, input_model.meta.wcs, pix_ratio) + # dq flags will be updated in-place + _flag_resampled_model_crs(input_model, blot, snr1, snr2, scale1, scale2, backg) -def flag_resampled_model_crs( - image, + +def _flag_resampled_model_crs( + input_model, blot, snr1, snr2, @@ -285,27 +209,37 @@ def flag_resampled_model_crs( scale2, backg, ): - """ - Flag crs in image based on a resampled (and blotted) data (blot). - """ # If the datamodel has a measured background that has not been subtracted # use it instead of the user provided backg. # Get background level of science data if it has not been subtracted, so it # can be added into the level of the blotted data, which has been # background-subtracted - if (image.meta.background.subtracted is False and - image.meta.background.level is not None): - backg = image.meta.background.level + if (input_model.meta.background.subtracted is False and + input_model.meta.background.level is not None): + backg = input_model.meta.background.level log.debug(f"Adding background level {backg} to blotted image") - cr_mask = flag_resampled_crs(image.data, image.err, blot, snr1, snr2, scale1, scale2, backg) + cr_mask = flag_resampled_crs(input_model.data, input_model.err, blot, snr1, snr2, scale1, scale2, backg) # update the dq flags in-place - image.dq |= cr_mask * np.uint32(DO_NOT_USE | OUTLIER) - + input_model.dq |= cr_mask * np.uint32(DO_NOT_USE | OUTLIER) log.info(f"{np.count_nonzero(cr_mask)} pixels marked as outliers") +def flag_crs_in_models_with_resampling( + input_models, + median_data, + median_wcs, + snr1, + snr2, + scale1, + scale2, + backg, +): + for image in input_models: + flag_resampled_model_crs(image, median_data, median_wcs, snr1, snr2, scale1, scale2, backg) + + def flag_model_crs(image, blot, snr): cr_mask = flag_crs(image.data, image.err, blot, snr) # update dq array in-place diff --git a/jwst/pipeline/calwebb_coron3.py b/jwst/pipeline/calwebb_coron3.py index 70ab07dcec..04673418a9 100644 --- a/jwst/pipeline/calwebb_coron3.py +++ b/jwst/pipeline/calwebb_coron3.py @@ -197,7 +197,7 @@ def process(self, user_input): # Call the resample step to combine all psf-subtracted target images # for compatibility with image3 pipeline use of ModelLibrary, - # convert ModelContainer to ModelLibrary and then back + # convert ModelContainer to ModelLibrary resample_library = ModelLibrary(resample_input, on_disk=False) # Output is a single datamodel diff --git a/jwst/pipeline/calwebb_image3.py b/jwst/pipeline/calwebb_image3.py index 64b2a6b28e..a485570cb7 100644 --- a/jwst/pipeline/calwebb_image3.py +++ b/jwst/pipeline/calwebb_image3.py @@ -1,3 +1,4 @@ +from collections.abc import Sequence from stdatamodels.jwst import datamodels from jwst.datamodels import ModelLibrary @@ -32,7 +33,7 @@ class Image3Pipeline(Pipeline): class_alias = "calwebb_image3" spec = """ - on_disk = boolean(default=False) # Preserve memory using temporary files + in_memory = boolean(default=True) # If False, preserve memory using temporary files at the expense of runtime """ # Define alias to steps @@ -68,39 +69,63 @@ def process(self, input_data): # Only load science members from input ASN; # background and target-acq members are not needed. - if isinstance(input_data, ModelLibrary): - input_models = input_data - else: - input_models = ModelLibrary(input_data, asn_exptypes=['science'], on_disk=self.on_disk) + input_models = self._load_input_as_library(input_data) - if self.output_file is None: + if (self.output_file is None) and hasattr(input_models.asn["products"][0], "name"): # If input is an association, set the output to the product name. self.output_file = input_models.asn["products"][0]["name"] + # Check if input is single or multiple exposures + has_groups = len(input_models) > 1 + with input_models: - model = input_models.borrow(0) - is_moving = is_moving_target(model) - input_models.shelve(model, 0, modify=False) - if is_moving: - input_models = self.assign_mtwcs(input_models) - else: - input_models = self.tweakreg(input_models) + if has_groups: + model = input_models.borrow(0) + is_moving = is_moving_target(model) + input_models.shelve(model, 0, modify=False) + if is_moving: + input_models = self.assign_mtwcs(input_models) + else: + input_models = self.tweakreg(input_models) - input_models = self.skymatch(input_models) - input_models = self.outlier_detection(input_models) + input_models = self.skymatch(input_models) + input_models = self.outlier_detection(input_models) - # elif self.skymatch.skymethod == 'match': - # self.log.warning("Turning 'skymatch' step off for a single " - # "input image when 'skymethod' is 'match'") + elif self.skymatch.skymethod == 'match': + self.log.warning("Turning 'skymatch' step off for a single " + "input image when 'skymethod' is 'match'") - # else: - # # FIXME: here input_models is a DataModel, passing - # # that to skymatch would cause an error when it tries to call - # # ModelContainer(DataModel). This can be seen by running - # # strun calwebb_image3 any_cal.fits --steps.skymatch.method=local - # input_models = self.skymatch(input_models) + else: + input_models = self.skymatch(input_models) result = self.resample(input_models) del input_models if isinstance(result, datamodels.ImageModel) and result.meta.cal_step.resample == 'COMPLETE': self.source_catalog(result) + + + def _load_input_as_library(self, input): + """ + Load any valid input type into a ModelLibrary, including + single datamodels, associations, ModelLibrary instances, and + filenames pointing to those types. + """ + + if isinstance(input, ModelLibrary): + return input + + if isinstance(input, (str, dict)): + try: + # Try opening input as an association + return ModelLibrary(input, asn_exptypes=['science'], on_disk=not self.in_memory) + except OSError: + # Try opening input as a single cal file + input = datamodels.open(input) + input = [input,] + return ModelLibrary(input, asn_exptypes=['science'], on_disk=not self.in_memory) + elif isinstance(input, Sequence): + return ModelLibrary(input, asn_exptypes=['science'], on_disk=not self.in_memory) + elif isinstance(input, datamodels.JwstDataModel): + return ModelLibrary([input], asn_exptypes=['science'], on_disk=not self.in_memory) + else: + raise TypeError(f"Input type {type(input)} not supported.") \ No newline at end of file diff --git a/jwst/pipeline/calwebb_spec3.py b/jwst/pipeline/calwebb_spec3.py index 03ea4f2c2b..4db9d1d5a4 100644 --- a/jwst/pipeline/calwebb_spec3.py +++ b/jwst/pipeline/calwebb_spec3.py @@ -140,17 +140,8 @@ def process(self, input): if is_moving_target(input_models[0]): self.log.info("Assigning WCS to a Moving Target exposure.") - - # for compatibility with calwebb_image3, need to convert to ModelLibrary then back here - # keep asn metadata from input container - only metadata of individual models is modified - # by the assign_mtwcs step - library = ModelLibrary(input_models, on_disk=False) - library = self.assign_mtwcs(input_models) - with library: - for i, model in enumerate(library): - input_models[i] = model.copy() - library.shelve(model, modify=False) - del library + # assign_mtwcs modifies input_models in-place + self.assign_mtwcs(input_models) # If background data are present, call the master background step if members_by_type['background']: diff --git a/jwst/pipeline/tests/test_calwebb_image3.py b/jwst/pipeline/tests/test_calwebb_image3.py index 36b81b79d9..953694c8b6 100644 --- a/jwst/pipeline/tests/test_calwebb_image3.py +++ b/jwst/pipeline/tests/test_calwebb_image3.py @@ -62,8 +62,8 @@ def make_dummy_association(make_dummy_cal_file): os.system(f"asn_from_list -o {INPUT_ASN} --product-name {OUTPUT_PRODUCT} -r DMS_Level3_Base {INPUT_FILE} {INPUT_FILE_2}") -@pytest.fixture(scope='module') -def run_image3_pipeline(make_dummy_association): +@pytest.mark.parametrize("in_memory", [True, False]) +def test_run_image3_pipeline(make_dummy_association, in_memory): ''' Two-product association passed in, run pipeline, skipping most steps ''' @@ -81,16 +81,38 @@ def run_image3_pipeline(make_dummy_association): "--steps.outlier_detection.skip=true", "--steps.resample.skip=true", "--steps.source_catalog.skip=true", - "--on_disk=True",] + f"--in_memory={str(in_memory)}",] + + Step.from_cmdline(args) + + _is_run_complete(LOGFILE) + + +def test_run_image3_single_file(make_dummy_cal_file): + + logcfg_content = f"[*] \n \ + level = INFO \n \ + handler = file:{LOGFILE}" + with open(LOGCFG, 'w') as f: + f.write(logcfg_content) + + args = ["calwebb_image3", INPUT_FILE, + f"--logcfg={LOGCFG}", + "--steps.tweakreg.skip=true", + "--steps.skymatch.skip=true", + "--steps.outlier_detection.skip=true", + "--steps.resample.skip=true", + "--steps.source_catalog.skip=true",] Step.from_cmdline(args) + _is_run_complete(LOGFILE) -def test_run_complete(run_image3_pipeline): +def _is_run_complete(logfile): ''' Check that the pipeline runs to completion ''' msg = "Step Image3Pipeline done" with open(LOGFILE, 'r') as f: log = f.read() - assert msg in log \ No newline at end of file + assert msg in log diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index b5dc12bb60..c4035e1176 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -182,8 +182,9 @@ def __init__(self, input_models, output=None, single=False, blendheaders=True, # update meta data and wcs with input_models: example_model = input_models.borrow(0) + self.blank_output.update(example_model) input_models.shelve(example_model, 0, modify=False) - self.blank_output.update(example_model) + del example_model self.blank_output.meta.wcs = self.output_wcs self.blank_output.meta.photometry.pixelarea_steradians = output_pix_area self.blank_output.meta.photometry.pixelarea_arcsecsq = ( @@ -950,7 +951,8 @@ def compute_image_pixel_area(wcs): def copy_asn_info_from_library(library, output_model): if not hasattr(library, "asn"): # No ASN table, occurs when input comes from ModelContainer in spectroscopic modes - # in this case the container should retain the asn information in ResampleSpecStep + # in this case do nothing; the asn info will be passed along later + # by code inside ResampleSpecStep return if (asn_pool := library.asn.get("asn_pool", None)) is not None: output_model.meta.asn.pool_name = asn_pool diff --git a/jwst/resample/resample_spec.py b/jwst/resample/resample_spec.py index 5a7b2cc95b..9e8a06ea01 100644 --- a/jwst/resample/resample_spec.py +++ b/jwst/resample/resample_spec.py @@ -12,6 +12,7 @@ from astropy.utils.exceptions import AstropyUserWarning from gwcs import wcstools, WCS from gwcs import coordinate_frames as cf +from gwcs.geometry import SphericalToCartesian from stdatamodels.jwst import datamodels from jwst.assign_wcs.util import compute_scale, wrap_ra @@ -23,6 +24,8 @@ log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) +_S2C = SphericalToCartesian() + __all__ = ["ResampleSpecData"] @@ -192,7 +195,6 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, self.output_models = ModelContainer() - def build_nirspec_output_wcs(self, input_models, refmodel=None): """ Create a spatial/spectral WCS covering the footprint of the input. @@ -506,7 +508,6 @@ def build_interpolated_output_wcs(self, input_models): # append wavelengths that fall outside the endpoint of # of wavelength array when looping over additional data - all_wavelength = [] all_ra_slit = [] all_dec_slit = [] @@ -945,4 +946,4 @@ def compute_spectral_pixel_scale(wcs, fiducial=None, disp_axis=1): fiducial = wcs(center_x, center_y) pixel_scale = compute_scale(wcs, fiducial, disp_axis=disp_axis) - return float(pixel_scale) + return float(pixel_scale) \ No newline at end of file diff --git a/jwst/resample/resample_spec_step.py b/jwst/resample/resample_spec_step.py index 15a16e3a68..2d23472e0f 100755 --- a/jwst/resample/resample_spec_step.py +++ b/jwst/resample/resample_spec_step.py @@ -134,8 +134,7 @@ def _process_multislit(self, input_models): for container in containers.values(): resamp = resample_spec.ResampleSpecData(container, **self.drizpars) - if isinstance(container, ModelContainer): - library = ModelLibrary(container, on_disk=False) + library = ModelLibrary(container, on_disk=False) library = resamp.do_drizzle(library) with library: for i, model in enumerate(library): diff --git a/jwst/skymatch/skymatch_step.py b/jwst/skymatch/skymatch_step.py index 07c56dcfbc..41396ab396 100644 --- a/jwst/skymatch/skymatch_step.py +++ b/jwst/skymatch/skymatch_step.py @@ -94,13 +94,15 @@ def process(self, input): for group_index, (group_id, group_inds) in enumerate(library.group_indices.items()): sky_images = [] for index in group_inds: + print(index) model = library.borrow(index) - sky_images.append(self._imodel2skyim(model, index)) - library.shelve(model, index, modify=False) + try: + sky_images.append(self._imodel2skyim(model, index)) + finally: + library.shelve(model, index, modify=False) if len(sky_images) == 1: images.extend(sky_images) else: - # FIXME: why does this use a number for group_index? images.append(SkyGroup(sky_images, id=group_index)) # match/compute sky values: @@ -189,6 +191,19 @@ def _imodel2skyim(self, image_model, index): return sky_im def _set_sky_background(self, sky_image, library, step_status): + """ + Parameters + ---------- + sky_image : SkyImage + SkyImage object containing sky image data and metadata. + + library : ModelLibrary + Library of input data models, must be open + + step_status : str + Status of the sky subtraction step. Must be one of the following: + 'COMPLETE', 'SKIPPED'. + """ index = sky_image.meta['index'] dm = library.borrow(index) sky = sky_image.sky diff --git a/jwst/skymatch/tests/test_skymatch.py b/jwst/skymatch/tests/test_skymatch.py index d05541aaf3..2442819ffc 100644 --- a/jwst/skymatch/tests/test_skymatch.py +++ b/jwst/skymatch/tests/test_skymatch.py @@ -517,7 +517,7 @@ def test_skymatch_2x(tmp_cwd, nircam_rate, tmp_path, skymethod, subtract): # 2nd run. step.subtract = subtract - result2 = step.run(asn_out_fname) + result2 = step.run(result) # compute expected levels if skymethod in ['local', 'global+match']: diff --git a/jwst/source_catalog/source_catalog_step.py b/jwst/source_catalog/source_catalog_step.py index 9b02d5c7a2..0e3b78ef08 100755 --- a/jwst/source_catalog/source_catalog_step.py +++ b/jwst/source_catalog/source_catalog_step.py @@ -84,7 +84,6 @@ def process(self, input_model): threshold = self.snr_threshold * bkg.background_rms finder = JWSTSourceFinder(threshold, self.npixels, deblend=self.deblend) - del threshold convolved_data = convolve_data(model.data, self.kernel_fwhm, mask=coverage_mask) @@ -97,12 +96,10 @@ def process(self, input_model): catobj = JWSTSourceCatalog(model, segment_img, convolved_data, self.kernel_fwhm, aperture_params, abvega_offset, ci_star_thresholds) - del convolved_data catalog = catobj.catalog # add back background to data so input model is unchanged model.data += bkg.background - del bkg if self.save_results: cat_filepath = self.make_output_path(ext='.ecsv') diff --git a/jwst/stpipe/utilities.py b/jwst/stpipe/utilities.py index e4643d9512..0147aae604 100644 --- a/jwst/stpipe/utilities.py +++ b/jwst/stpipe/utilities.py @@ -36,7 +36,8 @@ import os import re from collections.abc import Sequence -from jwst.datamodels import ModelLibrary +from jwst import datamodels +from pathlib import PurePath # Configure logging logger = logging.getLogger(__name__) @@ -156,7 +157,7 @@ def record_step_status(datamodel, cal_step, success=True): Parameters ---------- - datamodel : `~jwst.datamodels.JwstDataModel` instance + datamodel : `~jwst.datamodels.JwstDataModel`, `~jwst.datamodels.ModelContainer`, `~jwst.datamodels.ModelLibrary`, str, or Path instance This is the datamodel or container of datamodels to modify in place cal_step : str @@ -170,10 +171,13 @@ def record_step_status(datamodel, cal_step, success=True): else: status = SKIPPED + if isinstance(datamodel, (str, PurePath)): + datamodel = datamodels.open(datamodel) + if isinstance(datamodel, Sequence): for model in datamodel: model.meta.cal_step._instance[cal_step] = status - elif isinstance(datamodel, ModelLibrary): + elif isinstance(datamodel, datamodels.ModelLibrary): with datamodel: for model in datamodel: model.meta.cal_step._instance[cal_step] = status diff --git a/jwst/tweakreg/tests/test_multichip_jwst.py b/jwst/tweakreg/tests/test_multichip_jwst.py index b29bd8959a..4402f7b897 100644 --- a/jwst/tweakreg/tests/test_multichip_jwst.py +++ b/jwst/tweakreg/tests/test_multichip_jwst.py @@ -406,7 +406,7 @@ def test_multichip_alignment_step_rel(monkeypatch): with result: for im in result: assert im.meta.cal_step.tweakreg == 'COMPLETE' - result.shelve(im) + result.shelve(im, modify=False) with result: m1 = result.borrow(1) diff --git a/jwst/tweakreg/tweakreg_step.py b/jwst/tweakreg/tweakreg_step.py index d15511a441..0ec9d58558 100644 --- a/jwst/tweakreg/tweakreg_step.py +++ b/jwst/tweakreg/tweakreg_step.py @@ -123,7 +123,7 @@ class TweakRegStep(Step): # stpipe general options output_use_model = boolean(default=True) # When saving use `DataModel.meta.filename` - on_disk = boolean(default=False) # Preserve memory using temporary files + in_memory = boolean(default=True) # If False, preserve memory using temporary files at expense of runtime """ reference_file_types = [] @@ -132,7 +132,7 @@ def process(self, input): if isinstance(input, ModelLibrary): images = input else: - images = ModelLibrary(input, on_disk=self.on_disk) + images = ModelLibrary(input, on_disk=not self.in_memory) if len(images) == 0: raise ValueError("Input must contain at least one image model.") @@ -193,13 +193,12 @@ def process(self, input): # Build the catalog and corrector for each input images with images: for (model_index, image_model) in enumerate(images): - # now that the model is open, check it's metadata for a custom catalog + # now that the model is open, check its metadata for a custom catalog # only if it's not listed in the catdict if use_custom_catalogs and image_model.meta.filename not in catdict: if (image_model.meta.tweakreg_catalog is not None and image_model.meta.tweakreg_catalog.strip()): catdict[image_model.meta.filename] = image_model.meta.tweakreg_catalog if use_custom_catalogs and catdict.get(image_model.meta.filename, None) is not None: - # FIXME this modifies the input_model image_model.meta.tweakreg_catalog = catdict[image_model.meta.filename] # use user-supplied catalog: self.log.info("Using user-provided input catalog " @@ -284,32 +283,33 @@ def process(self, input): # absolute alignment to the reference catalog # can (and does) occur after alignment between groups if align_to_abs_refcat: - try: - with images: + with images: + try: ref_image = images.borrow(0) + correctors = \ + twk.absolute_align(correctors, self.abs_refcat, + ref_wcs=ref_image.meta.wcs, + ref_wcsinfo=ref_image.meta.wcsinfo.instance, + epoch=Time(ref_image.meta.observation.date).decimalyear, + abs_minobj=self.abs_minobj, + abs_fitgeometry=self.abs_fitgeometry, + abs_nclip=self.abs_nclip, + abs_sigma=self.abs_sigma, + abs_searchrad=self.abs_searchrad, + abs_use2dhist=self.abs_use2dhist, + abs_separation=self.abs_separation, + abs_tolerance=self.abs_tolerance, + save_abs_catalog=self.save_abs_catalog, + abs_catalog_output_dir=self.output_dir, + ) images.shelve(ref_image, 0, modify=False) - correctors = \ - twk.absolute_align(correctors, self.abs_refcat, - ref_wcs=ref_image.meta.wcs, - ref_wcsinfo=ref_image.meta.wcsinfo.instance, - epoch=Time(ref_image.meta.observation.date).decimalyear, - abs_minobj=self.abs_minobj, - abs_fitgeometry=self.abs_fitgeometry, - abs_nclip=self.abs_nclip, - abs_sigma=self.abs_sigma, - abs_searchrad=self.abs_searchrad, - abs_use2dhist=self.abs_use2dhist, - abs_separation=self.abs_separation, - abs_tolerance=self.abs_tolerance, - save_abs_catalog=self.save_abs_catalog, - abs_catalog_output_dir=self.output_dir, - ) - del ref_image + del ref_image - except twk.TweakregError as e: - self.log.warning(str(e)) - record_step_status(images, "tweakreg", success=False) - return images + except twk.TweakregError as e: + self.log.warning(str(e)) + images.shelve(ref_image, 0, modify=False) + record_step_status(images, "tweakreg", success=False) + return images if local_align_failed and not align_to_abs_refcat: record_step_status(images, "tweakreg", success=False) @@ -367,8 +367,8 @@ def _apply_tweakreg_solution(self, msg = f"Failed to update 'meta.wcsinfo' with FITS SIP \ approximation. Reported error is: \n {e.args[0]}" self.log.warning(msg) + record_step_status(image_model, "tweakreg", success=True) images.shelve(image_model) - record_step_status(images, "tweakreg", success=True) return images diff --git a/pyproject.toml b/pyproject.toml index 825ab5ca3e..8f09685eb2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,8 +36,7 @@ dependencies = [ "spherical-geometry>=1.2.22", "stcal>=1.8.0,<1.9.0", "stdatamodels>=2.0.0,<2.1.0", - "stpipe @ git+https://github.com/spacetelescope/stpipe.git@main", - "stsci.image>=2.3.5", + "stpipe >=0.7.0,<0.8.0", "stsci.imagestats>=1.6.3", "synphot>=1.2", "tweakwcs>=0.8.8", From 26e543627d9ed8e28af6475f0f3fe7f8ab7862cf Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 16 Aug 2024 17:23:42 -0400 Subject: [PATCH 39/85] ruff style check --- jwst/pipeline/calwebb_spec3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jwst/pipeline/calwebb_spec3.py b/jwst/pipeline/calwebb_spec3.py index 4db9d1d5a4..7fd69e4373 100644 --- a/jwst/pipeline/calwebb_spec3.py +++ b/jwst/pipeline/calwebb_spec3.py @@ -5,7 +5,7 @@ from stdatamodels.jwst import datamodels -from jwst.datamodels import SourceModelContainer, ModelLibrary +from jwst.datamodels import SourceModelContainer from jwst.stpipe import query_step_status from ..associations.lib.rules_level3_base import format_product From 8f902b789aed33cdfb04ac123b69d2d6a5e128da Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 19 Aug 2024 08:53:27 -0400 Subject: [PATCH 40/85] call img.area --- jwst/resample/resample.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index c4035e1176..785364e4d5 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -307,7 +307,10 @@ def resample_many_to_many(self, input_models): log.info(f"{len(indices)} exposures to drizzle together") for index in indices: img = input_models.borrow(index) - img = datamodels.open(img) # must call this explicitly to get area reference file + if isinstance(img, datamodels.SlitModel): + # must call this explicitly to populate area extension + # although the existence of this extension may not be necessary + img.area = img.area iscale = self._get_intensity_scale(img) log.debug(f'Using intensity scale iscale={iscale}') From 33d66c9a8e6ad3a73f7759046202be0315953548 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 19 Aug 2024 10:17:24 -0400 Subject: [PATCH 41/85] bugfixes for remove s2d files and for mtimage regtest --- jwst/outlier_detection/spec.py | 12 ++++++------ jwst/regtest/test_nircam_mtimage.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/jwst/outlier_detection/spec.py b/jwst/outlier_detection/spec.py index 1647f96061..041c2ee4b2 100644 --- a/jwst/outlier_detection/spec.py +++ b/jwst/outlier_detection/spec.py @@ -84,12 +84,6 @@ def detect_outliers( log.info("Writing out resampled spectra...") model.save(model.meta.filename) drizzled_models.shelve(model) - else: - # since we're not saving intermediate results if the drizzled models - # were written to disk, remove them - if not in_memory: - for fn in drizzled_models._members: - remove_file(fn["expname"]) else: # TODO: there appears not to be any test coverage for this branch @@ -126,6 +120,12 @@ def detect_outliers( median_model.meta.filename)) median_model.save(median_model.meta.filename) del median_model + else: + # since we're not saving intermediate results if the drizzled models + # were written to disk, remove them + if not in_memory: + for fn in drizzled_models._members: + remove_file(fn["expname"]) # Perform outlier detection using statistical comparisons between # each original input image and its blotted version of the median image diff --git a/jwst/regtest/test_nircam_mtimage.py b/jwst/regtest/test_nircam_mtimage.py index 961d34914d..c5e6140844 100644 --- a/jwst/regtest/test_nircam_mtimage.py +++ b/jwst/regtest/test_nircam_mtimage.py @@ -11,7 +11,7 @@ def test_nircam_image_moving_target_i2d(rtdata, fitsdiff_default_kwargs): """Test resampled i2d of moving target exposures for NIRCam imaging""" rtdata.get_asn("nircam/image/mt_asn.json") - rtdata.output = "mt_assoc_i2d.fits" + rtdata.output = "mt_asn_i2d.fits" args = ["calwebb_image3", rtdata.input] Step.from_cmdline(args) rtdata.get_truth("truth/test_nircam_mtimage/mt_assoc_i2d.fits") From f35725a9ee85fbc0cd36baa0a3822a110a582539 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 20 Aug 2024 13:45:06 -0400 Subject: [PATCH 42/85] attempted fix for resample and source_catalog result filenames --- jwst/resample/resample_step.py | 7 ++++--- jwst/source_catalog/source_catalog_step.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 96c26d8160..936b5445ab 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -65,8 +65,8 @@ def process(self, input): input_models = ModelLibrary(input, on_disk=not self.in_memory) elif isinstance(input, ImageModel): input_models = ModelLibrary([input], on_disk=not self.in_memory) - input_models.asn_pool_name = input.meta.asn.pool_name - input_models.asn_table_name = input.meta.asn.table_name + input_models.asn["asn_pool"] = input.meta.asn.pool_name + input_models.asn["table_name"] = input.meta.asn.table_name output = input.meta.filename self.blendheaders = False else: @@ -79,7 +79,8 @@ def process(self, input): # resampling. # TODO: figure out why and make sure asn_table is carried along output = None - + if self.save_results: + self.output_file = output # Check that input models are 2D images with input_models: diff --git a/jwst/source_catalog/source_catalog_step.py b/jwst/source_catalog/source_catalog_step.py index 0e3b78ef08..93b037db5c 100755 --- a/jwst/source_catalog/source_catalog_step.py +++ b/jwst/source_catalog/source_catalog_step.py @@ -102,6 +102,7 @@ def process(self, input_model): model.data += bkg.background if self.save_results: + self.output_file = model.meta.filename cat_filepath = self.make_output_path(ext='.ecsv') catalog.write(cat_filepath, format='ascii.ecsv', overwrite=True) From 4ed08921072114b6944a1db6b4936e9f720f91bd Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 20 Aug 2024 14:10:45 -0400 Subject: [PATCH 43/85] remove setting of asn pool and table name in resample --- jwst/resample/resample_step.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 936b5445ab..81d30251b2 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -65,8 +65,6 @@ def process(self, input): input_models = ModelLibrary(input, on_disk=not self.in_memory) elif isinstance(input, ImageModel): input_models = ModelLibrary([input], on_disk=not self.in_memory) - input_models.asn["asn_pool"] = input.meta.asn.pool_name - input_models.asn["table_name"] = input.meta.asn.table_name output = input.meta.filename self.blendheaders = False else: From 79f91c60a8d54fed2c6b990fef60cbdb4f78741b Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 20 Aug 2024 14:31:16 -0400 Subject: [PATCH 44/85] pushing bad things to remote to diagnose regtest --- jwst/regtest/test_nircam_image.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jwst/regtest/test_nircam_image.py b/jwst/regtest/test_nircam_image.py index 982e62914a..98cd2217a0 100644 --- a/jwst/regtest/test_nircam_image.py +++ b/jwst/regtest/test_nircam_image.py @@ -131,6 +131,8 @@ def test_nircam_image_stage3_tweakreg(run_image3pipeline): @pytest.mark.parametrize("suffix", ["i2d"]) def test_nircam_image_stage3(run_image3pipeline, rtdata_module, fitsdiff_default_kwargs, suffix): """Test that resampled i2d looks good for NIRCam imaging""" + import os + raise ValueError(os.listdir(os.getcwd())) rtdata = rtdata_module rtdata.input = "jw01538-o046_20230331t102920_image3_00009_asn.json" output = f"jw01538-o046_t024_nircam_clear-f444w_{suffix}.fits" From 3fd25fc40daf1a7ae5d9abb6725499ac9af645cd Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 20 Aug 2024 16:29:15 -0400 Subject: [PATCH 45/85] yet another attempt to fix filename issue --- jwst/regtest/test_nircam_image.py | 2 -- jwst/resample/resample_step.py | 2 +- jwst/source_catalog/source_catalog_step.py | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/jwst/regtest/test_nircam_image.py b/jwst/regtest/test_nircam_image.py index 98cd2217a0..982e62914a 100644 --- a/jwst/regtest/test_nircam_image.py +++ b/jwst/regtest/test_nircam_image.py @@ -131,8 +131,6 @@ def test_nircam_image_stage3_tweakreg(run_image3pipeline): @pytest.mark.parametrize("suffix", ["i2d"]) def test_nircam_image_stage3(run_image3pipeline, rtdata_module, fitsdiff_default_kwargs, suffix): """Test that resampled i2d looks good for NIRCam imaging""" - import os - raise ValueError(os.listdir(os.getcwd())) rtdata = rtdata_module rtdata.input = "jw01538-o046_20230331t102920_image3_00009_asn.json" output = f"jw01538-o046_t024_nircam_clear-f444w_{suffix}.fits" diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 81d30251b2..9e53ad9125 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -71,7 +71,7 @@ def process(self, input): raise RuntimeError(f"Input {input} is not a 2D image.") try: - output = input_models.asn["products"][0]["members"][0]["expname"] + output = input_models.asn["products"][0]["name"] except KeyError: # coron data goes through this path by the time it gets to # resampling. diff --git a/jwst/source_catalog/source_catalog_step.py b/jwst/source_catalog/source_catalog_step.py index 93b037db5c..0e3b78ef08 100755 --- a/jwst/source_catalog/source_catalog_step.py +++ b/jwst/source_catalog/source_catalog_step.py @@ -102,7 +102,6 @@ def process(self, input_model): model.data += bkg.background if self.save_results: - self.output_file = model.meta.filename cat_filepath = self.make_output_path(ext='.ecsv') catalog.write(cat_filepath, format='ascii.ecsv', overwrite=True) From 28fa2171f1c6c5c4bfb427c25288c9e01855c94a Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 20 Aug 2024 17:33:03 -0400 Subject: [PATCH 46/85] attempt to propagate fix also into source_catalog --- jwst/resample/resample_step.py | 3 +++ jwst/skymatch/skymatch_step.py | 1 - jwst/source_catalog/source_catalog_step.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 9e53ad9125..946dda192a 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -111,6 +111,9 @@ def process(self, input): else: model.meta.resample.pixel_scale_ratio = resamp.pscale_ratio model.meta.resample.pixfrac = kwargs['pixfrac'] + # update filename to reflect new product + # necessary to get source_catalog output names to match i2d filename + model.meta.filename = self.output_file result.shelve(model) if len(result) == 1: diff --git a/jwst/skymatch/skymatch_step.py b/jwst/skymatch/skymatch_step.py index 41396ab396..c64b63c00d 100644 --- a/jwst/skymatch/skymatch_step.py +++ b/jwst/skymatch/skymatch_step.py @@ -94,7 +94,6 @@ def process(self, input): for group_index, (group_id, group_inds) in enumerate(library.group_indices.items()): sky_images = [] for index in group_inds: - print(index) model = library.borrow(index) try: sky_images.append(self._imodel2skyim(model, index)) diff --git a/jwst/source_catalog/source_catalog_step.py b/jwst/source_catalog/source_catalog_step.py index 0e3b78ef08..93b037db5c 100755 --- a/jwst/source_catalog/source_catalog_step.py +++ b/jwst/source_catalog/source_catalog_step.py @@ -102,6 +102,7 @@ def process(self, input_model): model.data += bkg.background if self.save_results: + self.output_file = model.meta.filename cat_filepath = self.make_output_path(ext='.ecsv') catalog.write(cat_filepath, format='ascii.ecsv', overwrite=True) From 1863ef1ad6f8c5f9d45e7b7a2c3a79669388acaa Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 21 Aug 2024 14:35:00 -0400 Subject: [PATCH 47/85] bugfix for updating table and pool name in library._assign_member_to_model --- jwst/datamodels/library.py | 8 ++++---- jwst/datamodels/tests/test_library.py | 14 ++++++++++++-- jwst/outlier_detection/utils.py | 1 + pyproject.toml | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 23a3b964bd..8e49e80e7c 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -129,11 +129,11 @@ def _assign_member_to_model(self, model, member): if not hasattr(model.meta, "asn"): model.meta["asn"] = {} - if not hasattr(model.meta.asn, "table_name") and hasattr(self.asn, "table_name"): # do not clobber existing values - setattr(model.meta.asn, "table_name", getattr(self.asn, "table_name")) + if (model.meta.asn.table_name is None) and ("table_name" in self.asn.keys()): # do not clobber existing values + setattr(model.meta.asn, "table_name", self.asn["table_name"]) - if not hasattr(model.meta.asn, "pool_name") and hasattr(self.asn, "asn_pool"): # do not clobber existing values - setattr(model.meta.asn, "pool_name", getattr(self.asn, "asn_pool")) + if (model.meta.asn.pool_name is None) and ("asn_pool" in self.asn.keys()): # do not clobber existing values + setattr(model.meta.asn, "pool_name", self.asn["asn_pool"]) def _attrs_to_group_id( diff --git a/jwst/datamodels/tests/test_library.py b/jwst/datamodels/tests/test_library.py index d72968b7b5..ea737a9ac2 100644 --- a/jwst/datamodels/tests/test_library.py +++ b/jwst/datamodels/tests/test_library.py @@ -18,6 +18,8 @@ _N_MODELS = len(_OBSERVATION_NUMBERS) _N_GROUPS = len(set(_OBSERVATION_NUMBERS)) _PRODUCT_NAME = "foo_out" +_POOL_NAME = "some_pool" +_TABLE_NAME = "some_table" @pytest.fixture @@ -42,8 +44,12 @@ def example_asn_path(tmp_path): m.meta.filename = base_fn m.save(str(tmp_path / base_fn)) fns.append(base_fn) + asn = asn_from_list(fns, product_name=_PRODUCT_NAME) base_fn, contents = asn.dump(format="json") + contents_as_dict = json.loads(contents) + contents_as_dict['asn_pool'] = _POOL_NAME + contents = json.dumps(contents_as_dict) asn_filename = tmp_path / base_fn with open(asn_filename, 'w') as f: f.write(contents) @@ -132,10 +138,14 @@ def test_group_id_override(example_asn_path, asn_group_id, meta_group_id, expect def test_asn_attributes_assignment(example_library): + expected_table_name = "jwnoprogram-a3001_none_00008_asn.json" + assert example_library.asn["table_name"] == expected_table_name + assert example_library.asn["asn_pool"] == _POOL_NAME + # test that the association attributes are assigned to the models with example_library: for i in range(_N_MODELS): model = example_library.borrow(i) - assert hasattr(model.meta.asn, 'pool_name') - assert hasattr(model.meta.asn, 'table_name') + assert model.meta.asn.table_name == expected_table_name + assert model.meta.asn.pool_name == _POOL_NAME example_library.shelve(model, i, modify=False) \ No newline at end of file diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index 0bb6f86180..a09303db65 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -116,6 +116,7 @@ def _get_sections(library, nsections, section_nrows, imrows): data_subset[j] = model.data[row1:row2] weight_subset[j] = model.wht[row1:row2] library.shelve(model, j, modify=False) + del model yield (data_subset, weight_subset, (row1, row2)) diff --git a/pyproject.toml b/pyproject.toml index 8f09685eb2..ff6b557472 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "spherical-geometry>=1.2.22", "stcal>=1.8.0,<1.9.0", "stdatamodels>=2.0.0,<2.1.0", - "stpipe >=0.7.0,<0.8.0", + "stpipe>=0.7.0,<0.8.0", "stsci.imagestats>=1.6.3", "synphot>=1.2", "tweakwcs>=0.8.8", From cb5594ea8d1f76735a5d263c54841e9b3bee7020 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 21 Aug 2024 15:43:45 -0400 Subject: [PATCH 48/85] fixes based on @braingram review --- jwst/assign_mtwcs/moving_target_wcs.py | 4 +-- jwst/outlier_detection/imaging.py | 2 +- .../outlier_detection_step.py | 17 +++++++---- .../tests/test_outlier_detection.py | 4 +-- jwst/outlier_detection/utils.py | 21 +++++++++++++- jwst/pipeline/calwebb_image3.py | 28 +++++++++---------- jwst/pipeline/tests/test_calwebb_image3.py | 14 ++++------ jwst/regtest/test_nircam_mtimage.py | 2 +- jwst/resample/resample.py | 1 + jwst/resample/resample_step.py | 2 +- jwst/source_catalog/source_catalog_step.py | 2 +- jwst/stpipe/utilities.py | 3 -- jwst/tweakreg/tweakreg_step.py | 6 ++-- 13 files changed, 62 insertions(+), 44 deletions(-) diff --git a/jwst/assign_mtwcs/moving_target_wcs.py b/jwst/assign_mtwcs/moving_target_wcs.py index 44bac3f1d5..be4a5a1b8b 100644 --- a/jwst/assign_mtwcs/moving_target_wcs.py +++ b/jwst/assign_mtwcs/moving_target_wcs.py @@ -69,9 +69,9 @@ def assign_moving_target_wcs(input_models): model.meta.wcsinfo.mt_ra, model.meta.wcsinfo.mt_dec) del model.meta.wcs model.meta.wcs = new_wcs + record_step_status(model, "assign_mtwcs", True) input_models.shelve(model, i, modify=True) - - record_step_status(input_models, "assign_mtwcs", True) + return input_models diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py index 0b70c8f884..14d9b9daca 100644 --- a/jwst/outlier_detection/imaging.py +++ b/jwst/outlier_detection/imaging.py @@ -119,7 +119,7 @@ def detect_outliers( # since we're not saving intermediate results if the drizzled models # were written to disk, remove them if not in_memory: - for fn in drizzled_models._members: + for fn in drizzled_models.asn["products"][0]["members"]: remove_file(fn["expname"]) # Perform outlier detection using statistical comparisons between diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py index 9af7cacf30..f9cf4c57ed 100644 --- a/jwst/outlier_detection/outlier_detection_step.py +++ b/jwst/outlier_detection/outlier_detection_step.py @@ -76,8 +76,7 @@ def process(self, input_data): # determine the "mode" (if not set by the pipeline) mode = self._guess_mode(input_data) if mode is None: - record_step_status(input_data, "outlier_detection", False) - return input_data + return self._set_status(input_data, False) self.log.info(f"Outlier Detection mode: {mode}") # determine the asn_id (if not set by the pipeline) @@ -161,11 +160,9 @@ def process(self, input_data): else: self.log.error("Outlier detection failed for unknown/unsupported ", f"mode: {mode}") - record_step_status(input_data, "outlier_detection", False) - return input_data + return self._set_status(input_data, True) - record_step_status(result_models, "outlier_detection", True) - return result_models + return self._set_status(result_models, True) def _guess_mode(self, input_models): # The pipelines should set this mode or ideally these should @@ -231,3 +228,11 @@ def _get_asn_id(self, input_models): asn_id=asn_id ) return asn_id + + def _set_status(self, input_models, status): + # this might be called with the input which might be a filename or path + if not isinstance(input_models, (datamodels.JwstDataModel, ModelLibrary)): + input_models = datamodels.open(input_models) + + record_step_status(input_models, "outlier_detection", status) + return input_models diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index e8a6ff5f55..d594c8c7fc 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -222,7 +222,7 @@ def test_outlier_step_base(we_three_sci, tmp_cwd): assert len(median_files) == 0 # Save all the data into a separate array before passing into step - data_as_cube = container.map_function(lambda model, index: model.data, modify=False) + data_as_cube = container.map_function(lambda model, index: model.data.copy(), modify=False) result = OutlierDetectionStep.call( container, save_results=True, save_intermediate_results=True @@ -287,7 +287,7 @@ def test_outlier_step_on_disk(three_sci_as_asn, tmp_cwd): container = ModelLibrary(three_sci_as_asn, on_disk=True) # Save all the data into a separate array before passing into step - data_as_cube = container.map_function(lambda model, index: model.data, modify=False) + data_as_cube = container.map_function(lambda model, index: model.data.copy(), modify=False) result = OutlierDetectionStep.call( container, save_results=True, save_intermediate_results=True, in_memory=False diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index a09303db65..b14ab125c2 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -33,7 +33,26 @@ def create_cube_median(cube_model, maskpt): def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0): """Create a median image from the singly resampled images. - resampled_models is expected to be a ModelLibrary for imaging modes. + + Parameters + ---------- + resampled_models : ModelLibrary + The singly resampled images. + + maskpt : float + The weight threshold for masking out low weight pixels. + + on_disk : bool + If True, the input models are on disk and will be read in chunks. + + buffer_size : float + The size of chunk in MB, per input model, that will be read into memory. + This parameter has no effect if on_disk is False. + + Returns + ------- + median_image : ndarray + The median image. """ # Compute the weight threshold for each input model weight_thresholds = [] diff --git a/jwst/pipeline/calwebb_image3.py b/jwst/pipeline/calwebb_image3.py index a485570cb7..473530ae32 100644 --- a/jwst/pipeline/calwebb_image3.py +++ b/jwst/pipeline/calwebb_image3.py @@ -76,27 +76,27 @@ def process(self, input_data): self.output_file = input_models.asn["products"][0]["name"] # Check if input is single or multiple exposures - has_groups = len(input_models) > 1 + has_groups = len(input_models.group_names) > 1 - with input_models: - if has_groups: + if has_groups: + with input_models: model = input_models.borrow(0) is_moving = is_moving_target(model) input_models.shelve(model, 0, modify=False) - if is_moving: - input_models = self.assign_mtwcs(input_models) - else: - input_models = self.tweakreg(input_models) + if is_moving: + input_models = self.assign_mtwcs(input_models) + else: + input_models = self.tweakreg(input_models) - input_models = self.skymatch(input_models) - input_models = self.outlier_detection(input_models) + input_models = self.skymatch(input_models) + input_models = self.outlier_detection(input_models) - elif self.skymatch.skymethod == 'match': - self.log.warning("Turning 'skymatch' step off for a single " - "input image when 'skymethod' is 'match'") + elif self.skymatch.skymethod == 'match': + self.log.warning("Turning 'skymatch' step off for a single " + "input image when 'skymethod' is 'match'") - else: - input_models = self.skymatch(input_models) + else: + input_models = self.skymatch(input_models) result = self.resample(input_models) del input_models diff --git a/jwst/pipeline/tests/test_calwebb_image3.py b/jwst/pipeline/tests/test_calwebb_image3.py index 953694c8b6..8531674664 100644 --- a/jwst/pipeline/tests/test_calwebb_image3.py +++ b/jwst/pipeline/tests/test_calwebb_image3.py @@ -12,6 +12,8 @@ OUTPUT_PRODUCT = "custom_name" LOGFILE = "run_asn.log" LOGCFG = "test_logs.cfg" +LOGCFG_CONTENT = f"[*] \n \ + handler = file:{LOGFILE}" @pytest.fixture(scope='module') @@ -68,11 +70,8 @@ def test_run_image3_pipeline(make_dummy_association, in_memory): Two-product association passed in, run pipeline, skipping most steps ''' # save warnings to logfile so can be checked later - logcfg_content = f"[*] \n \ - level = INFO \n \ - handler = file:{LOGFILE}" with open(LOGCFG, 'w') as f: - f.write(logcfg_content) + f.write(LOGCFG_CONTENT) args = ["calwebb_image3", INPUT_ASN, f"--logcfg={LOGCFG}", @@ -90,11 +89,8 @@ def test_run_image3_pipeline(make_dummy_association, in_memory): def test_run_image3_single_file(make_dummy_cal_file): - logcfg_content = f"[*] \n \ - level = INFO \n \ - handler = file:{LOGFILE}" with open(LOGCFG, 'w') as f: - f.write(logcfg_content) + f.write(LOGCFG_CONTENT) args = ["calwebb_image3", INPUT_FILE, f"--logcfg={LOGCFG}", @@ -115,4 +111,4 @@ def _is_run_complete(logfile): msg = "Step Image3Pipeline done" with open(LOGFILE, 'r') as f: log = f.read() - assert msg in log + assert msg in log diff --git a/jwst/regtest/test_nircam_mtimage.py b/jwst/regtest/test_nircam_mtimage.py index c5e6140844..d997b57dde 100644 --- a/jwst/regtest/test_nircam_mtimage.py +++ b/jwst/regtest/test_nircam_mtimage.py @@ -11,7 +11,7 @@ def test_nircam_image_moving_target_i2d(rtdata, fitsdiff_default_kwargs): """Test resampled i2d of moving target exposures for NIRCam imaging""" rtdata.get_asn("nircam/image/mt_asn.json") - rtdata.output = "mt_asn_i2d.fits" + rtdata.output = "mt_assoc_i2d.fits" #assoc comes from product name... will that carry over? args = ["calwebb_image3", rtdata.input] Step.from_cmdline(args) rtdata.get_truth("truth/test_nircam_mtimage/mt_assoc_i2d.fits") diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index 785364e4d5..f6c225398c 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -344,6 +344,7 @@ def resample_many_to_many(self, input_models): ) del data input_models.shelve(img, index, modify=False) + del img if not self.in_memory: # Write out model to disk, then return filename diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 946dda192a..0bbb2fc03f 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -113,7 +113,7 @@ def process(self, input): model.meta.resample.pixfrac = kwargs['pixfrac'] # update filename to reflect new product # necessary to get source_catalog output names to match i2d filename - model.meta.filename = self.output_file + # model.meta.filename = self.output_file result.shelve(model) if len(result) == 1: diff --git a/jwst/source_catalog/source_catalog_step.py b/jwst/source_catalog/source_catalog_step.py index 93b037db5c..90932584e9 100755 --- a/jwst/source_catalog/source_catalog_step.py +++ b/jwst/source_catalog/source_catalog_step.py @@ -102,7 +102,7 @@ def process(self, input_model): model.data += bkg.background if self.save_results: - self.output_file = model.meta.filename + # self.output_file = model.meta.filename cat_filepath = self.make_output_path(ext='.ecsv') catalog.write(cat_filepath, format='ascii.ecsv', overwrite=True) diff --git a/jwst/stpipe/utilities.py b/jwst/stpipe/utilities.py index 0147aae604..9e79d27e76 100644 --- a/jwst/stpipe/utilities.py +++ b/jwst/stpipe/utilities.py @@ -171,9 +171,6 @@ def record_step_status(datamodel, cal_step, success=True): else: status = SKIPPED - if isinstance(datamodel, (str, PurePath)): - datamodel = datamodels.open(datamodel) - if isinstance(datamodel, Sequence): for model in datamodel: model.meta.cal_step._instance[cal_step] = status diff --git a/jwst/tweakreg/tweakreg_step.py b/jwst/tweakreg/tweakreg_step.py index 0ec9d58558..4df8c6e2f9 100644 --- a/jwst/tweakreg/tweakreg_step.py +++ b/jwst/tweakreg/tweakreg_step.py @@ -284,8 +284,8 @@ def process(self, input): # can (and does) occur after alignment between groups if align_to_abs_refcat: with images: + ref_image = images.borrow(0) try: - ref_image = images.borrow(0) correctors = \ twk.absolute_align(correctors, self.abs_refcat, ref_wcs=ref_image.meta.wcs, @@ -303,13 +303,13 @@ def process(self, input): abs_catalog_output_dir=self.output_dir, ) images.shelve(ref_image, 0, modify=False) - del ref_image - except twk.TweakregError as e: self.log.warning(str(e)) images.shelve(ref_image, 0, modify=False) record_step_status(images, "tweakreg", success=False) return images + finally: + del ref_image if local_align_failed and not align_to_abs_refcat: record_step_status(images, "tweakreg", success=False) From 099580c3a63dff82dc2cc7bda0819f04e9b9272f Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 21 Aug 2024 16:03:39 -0400 Subject: [PATCH 49/85] fix ruff style check and remove unnecessary comment --- jwst/regtest/test_nircam_mtimage.py | 2 +- jwst/stpipe/utilities.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/jwst/regtest/test_nircam_mtimage.py b/jwst/regtest/test_nircam_mtimage.py index d997b57dde..961d34914d 100644 --- a/jwst/regtest/test_nircam_mtimage.py +++ b/jwst/regtest/test_nircam_mtimage.py @@ -11,7 +11,7 @@ def test_nircam_image_moving_target_i2d(rtdata, fitsdiff_default_kwargs): """Test resampled i2d of moving target exposures for NIRCam imaging""" rtdata.get_asn("nircam/image/mt_asn.json") - rtdata.output = "mt_assoc_i2d.fits" #assoc comes from product name... will that carry over? + rtdata.output = "mt_assoc_i2d.fits" args = ["calwebb_image3", rtdata.input] Step.from_cmdline(args) rtdata.get_truth("truth/test_nircam_mtimage/mt_assoc_i2d.fits") diff --git a/jwst/stpipe/utilities.py b/jwst/stpipe/utilities.py index 9e79d27e76..ad62ba1468 100644 --- a/jwst/stpipe/utilities.py +++ b/jwst/stpipe/utilities.py @@ -37,7 +37,6 @@ import re from collections.abc import Sequence from jwst import datamodels -from pathlib import PurePath # Configure logging logger = logging.getLogger(__name__) From 6ff67d331d7a260f85e92cae8b5865066d1dee3a Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 22 Aug 2024 09:02:33 -0400 Subject: [PATCH 50/85] fix unit test and revert changes to file naming --- jwst/datamodels/tests/test_library.py | 8 +++----- jwst/resample/resample_step.py | 2 +- jwst/source_catalog/source_catalog_step.py | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/jwst/datamodels/tests/test_library.py b/jwst/datamodels/tests/test_library.py index ea737a9ac2..ebb1417a37 100644 --- a/jwst/datamodels/tests/test_library.py +++ b/jwst/datamodels/tests/test_library.py @@ -16,10 +16,8 @@ # determining meta is the same, see `example_asn_path`) _OBSERVATION_NUMBERS = ['1', '1', '2'] _N_MODELS = len(_OBSERVATION_NUMBERS) -_N_GROUPS = len(set(_OBSERVATION_NUMBERS)) _PRODUCT_NAME = "foo_out" _POOL_NAME = "some_pool" -_TABLE_NAME = "some_table" @pytest.fixture @@ -138,14 +136,14 @@ def test_group_id_override(example_asn_path, asn_group_id, meta_group_id, expect def test_asn_attributes_assignment(example_library): - expected_table_name = "jwnoprogram-a3001_none_00008_asn.json" - assert example_library.asn["table_name"] == expected_table_name + expected_table_name = "jwnoprogram-a3001" + assert example_library.asn["table_name"].startswith(expected_table_name) assert example_library.asn["asn_pool"] == _POOL_NAME # test that the association attributes are assigned to the models with example_library: for i in range(_N_MODELS): model = example_library.borrow(i) - assert model.meta.asn.table_name == expected_table_name + assert model.meta.asn.table_name.startswith(expected_table_name) assert model.meta.asn.pool_name == _POOL_NAME example_library.shelve(model, i, modify=False) \ No newline at end of file diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 0bbb2fc03f..946dda192a 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -113,7 +113,7 @@ def process(self, input): model.meta.resample.pixfrac = kwargs['pixfrac'] # update filename to reflect new product # necessary to get source_catalog output names to match i2d filename - # model.meta.filename = self.output_file + model.meta.filename = self.output_file result.shelve(model) if len(result) == 1: diff --git a/jwst/source_catalog/source_catalog_step.py b/jwst/source_catalog/source_catalog_step.py index 90932584e9..93b037db5c 100755 --- a/jwst/source_catalog/source_catalog_step.py +++ b/jwst/source_catalog/source_catalog_step.py @@ -102,7 +102,7 @@ def process(self, input_model): model.data += bkg.background if self.save_results: - # self.output_file = model.meta.filename + self.output_file = model.meta.filename cat_filepath = self.make_output_path(ext='.ecsv') catalog.write(cat_filepath, format='ascii.ecsv', overwrite=True) From 6cad768fbbd415f293961c4d2a401a21ee7d6376 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 23 Aug 2024 14:25:04 -0400 Subject: [PATCH 51/85] new attempted fix of output filenames --- jwst/pipeline/calwebb_image3.py | 2 +- jwst/resample/resample_step.py | 4 ++-- jwst/source_catalog/source_catalog_step.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jwst/pipeline/calwebb_image3.py b/jwst/pipeline/calwebb_image3.py index 473530ae32..9fe98b9376 100644 --- a/jwst/pipeline/calwebb_image3.py +++ b/jwst/pipeline/calwebb_image3.py @@ -71,7 +71,7 @@ def process(self, input_data): # background and target-acq members are not needed. input_models = self._load_input_as_library(input_data) - if (self.output_file is None) and hasattr(input_models.asn["products"][0], "name"): + if (self.output_file is None) and "name" in input_models.asn["products"][0]: # If input is an association, set the output to the product name. self.output_file = input_models.asn["products"][0]["name"] diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 946dda192a..7d7377e5e6 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -77,8 +77,8 @@ def process(self, input): # resampling. # TODO: figure out why and make sure asn_table is carried along output = None - if self.save_results: - self.output_file = output + # if self.save_results: + # self.output_file = output # Check that input models are 2D images with input_models: diff --git a/jwst/source_catalog/source_catalog_step.py b/jwst/source_catalog/source_catalog_step.py index 93b037db5c..90932584e9 100755 --- a/jwst/source_catalog/source_catalog_step.py +++ b/jwst/source_catalog/source_catalog_step.py @@ -102,7 +102,7 @@ def process(self, input_model): model.data += bkg.background if self.save_results: - self.output_file = model.meta.filename + # self.output_file = model.meta.filename cat_filepath = self.make_output_path(ext='.ecsv') catalog.write(cat_filepath, format='ascii.ecsv', overwrite=True) From 91b1b19f3e890235bf367996d8db14c546fb101a Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 23 Aug 2024 14:40:14 -0400 Subject: [PATCH 52/85] removed one more manual change to output file naming --- jwst/pipeline/calwebb_image3.py | 2 +- jwst/resample/resample_step.py | 4 +--- jwst/source_catalog/source_catalog_step.py | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/jwst/pipeline/calwebb_image3.py b/jwst/pipeline/calwebb_image3.py index 9fe98b9376..fcdb45c755 100644 --- a/jwst/pipeline/calwebb_image3.py +++ b/jwst/pipeline/calwebb_image3.py @@ -71,7 +71,7 @@ def process(self, input_data): # background and target-acq members are not needed. input_models = self._load_input_as_library(input_data) - if (self.output_file is None) and "name" in input_models.asn["products"][0]: + if (self.output_file is None) and ("name" in input_models.asn["products"][0]): # If input is an association, set the output to the product name. self.output_file = input_models.asn["products"][0]["name"] diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index 7d7377e5e6..41b7e18479 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -77,8 +77,6 @@ def process(self, input): # resampling. # TODO: figure out why and make sure asn_table is carried along output = None - # if self.save_results: - # self.output_file = output # Check that input models are 2D images with input_models: @@ -113,7 +111,7 @@ def process(self, input): model.meta.resample.pixfrac = kwargs['pixfrac'] # update filename to reflect new product # necessary to get source_catalog output names to match i2d filename - model.meta.filename = self.output_file + # model.meta.filename = self.output_file result.shelve(model) if len(result) == 1: diff --git a/jwst/source_catalog/source_catalog_step.py b/jwst/source_catalog/source_catalog_step.py index 90932584e9..0e3b78ef08 100755 --- a/jwst/source_catalog/source_catalog_step.py +++ b/jwst/source_catalog/source_catalog_step.py @@ -102,7 +102,6 @@ def process(self, input_model): model.data += bkg.background if self.save_results: - # self.output_file = model.meta.filename cat_filepath = self.make_output_path(ext='.ecsv') catalog.write(cat_filepath, format='ascii.ecsv', overwrite=True) From bf069ed43131c7e20be86fd7886649106179d8ec Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 23 Aug 2024 15:36:17 -0400 Subject: [PATCH 53/85] changed has_groups conditional to reflect master branch --- jwst/pipeline/calwebb_image3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jwst/pipeline/calwebb_image3.py b/jwst/pipeline/calwebb_image3.py index fcdb45c755..757622a627 100644 --- a/jwst/pipeline/calwebb_image3.py +++ b/jwst/pipeline/calwebb_image3.py @@ -76,7 +76,7 @@ def process(self, input_data): self.output_file = input_models.asn["products"][0]["name"] # Check if input is single or multiple exposures - has_groups = len(input_models.group_names) > 1 + has_groups = len(input_models.group_names) >= 1 if has_groups: with input_models: From 5a44905bbdc21c89aa1d34714c23d682394726ef Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 23 Aug 2024 16:24:27 -0400 Subject: [PATCH 54/85] attempted fix single regtest failure for miri image3 crf files --- jwst/datamodels/library.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 8e49e80e7c..c2e9fd867b 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -129,10 +129,15 @@ def _assign_member_to_model(self, model, member): if not hasattr(model.meta, "asn"): model.meta["asn"] = {} - if (model.meta.asn.table_name is None) and ("table_name" in self.asn.keys()): # do not clobber existing values + # if (model.meta.asn.table_name is None) and ("table_name" in self.asn.keys()): # do not clobber existing values + # setattr(model.meta.asn, "table_name", self.asn["table_name"]) + + # if (model.meta.asn.pool_name is None) and ("asn_pool" in self.asn.keys()): # do not clobber existing values + # setattr(model.meta.asn, "pool_name", self.asn["asn_pool"]) + if "table_name" in self.asn.keys(): setattr(model.meta.asn, "table_name", self.asn["table_name"]) - if (model.meta.asn.pool_name is None) and ("asn_pool" in self.asn.keys()): # do not clobber existing values + if "asn_pool" in self.asn.keys(): # do not clobber existing values setattr(model.meta.asn, "pool_name", self.asn["asn_pool"]) From 5272ac9387ef531471a30e7371bd93158a500213 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 27 Aug 2024 16:55:16 -0400 Subject: [PATCH 55/85] fixes per @melanieclarke comments --- CHANGES.rst | 12 +++---- docs/jwst/tweakreg/README.rst | 1 - jwst/datamodels/library.py | 5 --- jwst/lib/exposure_types.py | 4 ++- .../outlier_detection_step.py | 2 +- jwst/resample/resample.py | 11 +++++++ jwst/resample/resample_spec.py | 2 -- jwst/resample/resample_spec_step.py | 33 ++++++++----------- jwst/resample/resample_step.py | 3 -- 9 files changed, 35 insertions(+), 38 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 88347e5ae6..1cffa9946e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -38,7 +38,7 @@ cube_build datamodels ---------- -- Added `ModelLibrary` class to allow passing ``"on-disk"`` models between steps in the +- Added `ModelLibrary` class to allow passing on-disk models between steps in the image3 pipeline. [#8683] emicorr @@ -68,17 +68,17 @@ outlier_detection - Fixed failures due to a missing ``wcs.array_shape`` attribute when the ``outlier_detection`` step was run standalone using e.g. ``strun`` [#8645] -set_telescope_pointing ----------------------- - -- replace usage of ``copy_arrays=True`` with ``memmap=False`` [#8660] - - Refactored separate modes into submodules instead of inheriting from a base class. Moved non-JWST-specific code to stcal. [#8613] - For imaging modes, step now uses `ModelLibrary` to handle accessing models consistently whether they are in memory or on disk. [#8683] +set_telescope_pointing +---------------------- + +- replace usage of ``copy_arrays=True`` with ``memmap=False`` [#8660] + pipeline -------- diff --git a/docs/jwst/tweakreg/README.rst b/docs/jwst/tweakreg/README.rst index cc4451397f..ea8069f0c0 100644 --- a/docs/jwst/tweakreg/README.rst +++ b/docs/jwst/tweakreg/README.rst @@ -87,7 +87,6 @@ other ways of supplying custom source catalogs to the step: 1. Adding ``tweakreg_catalog`` attribute to the ``members`` of the input ASN table - see `~jwst.datamodels.ModelLibrary` for more details. - # FIXME: does this still work as described? Catalog file names are relative to ASN file path. 2. Providing a simple two-column text file, specified via step's parameter diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index c2e9fd867b..9ba0c84ef9 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -129,11 +129,6 @@ def _assign_member_to_model(self, model, member): if not hasattr(model.meta, "asn"): model.meta["asn"] = {} - # if (model.meta.asn.table_name is None) and ("table_name" in self.asn.keys()): # do not clobber existing values - # setattr(model.meta.asn, "table_name", self.asn["table_name"]) - - # if (model.meta.asn.pool_name is None) and ("asn_pool" in self.asn.keys()): # do not clobber existing values - # setattr(model.meta.asn, "pool_name", self.asn["asn_pool"]) if "table_name" in self.asn.keys(): setattr(model.meta.asn, "table_name", self.asn["table_name"]) diff --git a/jwst/lib/exposure_types.py b/jwst/lib/exposure_types.py index 493695d57a..bddbb720c8 100644 --- a/jwst/lib/exposure_types.py +++ b/jwst/lib/exposure_types.py @@ -89,6 +89,8 @@ def is_nrs_autoflat(datamodel): def is_moving_target(datamodel): """ Determine if a moving target exposure.""" - if (hasattr(datamodel.meta.target, 'type') and datamodel.meta.target.type is not None and datamodel.meta.target.type.lower() == 'moving'): + if (hasattr(datamodel.meta.target, 'type') \ + and datamodel.meta.target.type is not None \ + and datamodel.meta.target.type.lower() == 'moving'): return True return False diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py index f9cf4c57ed..965b80797b 100644 --- a/jwst/outlier_detection/outlier_detection_step.py +++ b/jwst/outlier_detection/outlier_detection_step.py @@ -160,7 +160,7 @@ def process(self, input_data): else: self.log.error("Outlier detection failed for unknown/unsupported ", f"mode: {mode}") - return self._set_status(input_data, True) + return self._set_status(input_data, False) return self._set_status(result_models, True) diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index f6c225398c..bd9f91051e 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -953,6 +953,17 @@ def compute_image_pixel_area(wcs): def copy_asn_info_from_library(library, output_model): + """ + Transfer association information from the input library to the output model. + + Parameters + ---------- + library : ModelLibrary + The input library of data models. + + output_model : DataModel + The output data model to which the association information will be copied. + """ if not hasattr(library, "asn"): # No ASN table, occurs when input comes from ModelContainer in spectroscopic modes # in this case do nothing; the asn info will be passed along later diff --git a/jwst/resample/resample_spec.py b/jwst/resample/resample_spec.py index 5c68827e78..3444611959 100644 --- a/jwst/resample/resample_spec.py +++ b/jwst/resample/resample_spec.py @@ -16,7 +16,6 @@ from stdatamodels.jwst import datamodels from jwst.assign_wcs.util import compute_scale, wrap_ra -from jwst.datamodels import ModelContainer from jwst.resample import resample_utils from jwst.resample.resample import ResampleData @@ -191,7 +190,6 @@ def __init__(self, input_models, output=None, single=False, blendheaders=False, self.blank_output.meta.photometry.pixelarea_arcsecsq = ( output_pix_area * np.rad2deg(3600)**2) - self.output_models = ModelContainer() def build_nirspec_output_wcs(self, input_models, refmodel=None): """ diff --git a/jwst/resample/resample_spec_step.py b/jwst/resample/resample_spec_step.py index 2d23472e0f..bcc3d1a95c 100755 --- a/jwst/resample/resample_spec_step.py +++ b/jwst/resample/resample_spec_step.py @@ -135,17 +135,14 @@ def _process_multislit(self, input_models): resamp = resample_spec.ResampleSpecData(container, **self.drizpars) library = ModelLibrary(container, on_disk=False) - library = resamp.do_drizzle(library) - with library: - for i, model in enumerate(library): - container[i] = model - library.shelve(model, modify=False) - del library - - for model in container: - self.update_slit_metadata(model) - update_s_region_spectral(model) - result.slits.append(model) + drizzled_library = resamp.do_drizzle(library) + with drizzled_library: + for i, model in enumerate(drizzled_library): + self.update_slit_metadata(model) + update_s_region_spectral(model) + result.slits.append(model) + drizzled_library.shelve(model, i, modify=False) + del library, drizzled_library # Keep the first computed pixel scale ratio for storage if self.pixel_scale is not None and pscale_ratio is None: @@ -211,14 +208,12 @@ def _process_slit(self, input_models): resamp = resample_spec.ResampleSpecData(input_models, **self.drizpars) library = ModelLibrary(input_models, on_disk=False) - library = resamp.do_drizzle(library) - with library: - for i, model in enumerate(library): - input_models[i] = model - library.shelve(model, modify=False) - del library - - result = input_models[0] + drizzled_library = resamp.do_drizzle(library) + with drizzled_library: + result = drizzled_library.borrow(0) + drizzled_library.shelve(result, 0, modify=False) + del library, drizzled_library + result.meta.bunit_data = input_models[0].meta.bunit_data if self.pixel_scale is None: result.meta.resample.pixel_scale_ratio = self.pixel_scale_ratio diff --git a/jwst/resample/resample_step.py b/jwst/resample/resample_step.py index b5f74e8168..a2fd35db96 100755 --- a/jwst/resample/resample_step.py +++ b/jwst/resample/resample_step.py @@ -109,9 +109,6 @@ def process(self, input): else: model.meta.resample.pixel_scale_ratio = resamp.pscale_ratio model.meta.resample.pixfrac = kwargs['pixfrac'] - # update filename to reflect new product - # necessary to get source_catalog output names to match i2d filename - # model.meta.filename = self.output_file result.shelve(model) if len(result) == 1: From a4d4dda342ea176f3b2505e559d994a1290a6c47 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 29 Aug 2024 16:06:32 -0400 Subject: [PATCH 56/85] first draft of minimal container --- jwst/datamodels/container.py | 334 +++++++++++------------------------ 1 file changed, 101 insertions(+), 233 deletions(-) diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index 45440bcb17..37af86fb19 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -5,8 +5,6 @@ import re import logging -import numpy as np - from asdf import AsdfFile from astropy.io import fits from stdatamodels import properties @@ -19,7 +17,6 @@ __all__ = ['ModelContainer'] -_ONE_MB = 1 << 20 RECOGNIZED_MEMBER_FIELDS = ['tweakreg_catalog', 'group_id'] # Configure logging @@ -27,7 +24,7 @@ logger.addHandler(logging.NullHandler()) -class ModelContainer(JwstDataModel, Sequence): +class ModelContainer(Sequence): """ A container for holding DataModels. @@ -55,10 +52,6 @@ class ModelContainer(JwstDataModel, Sequence): asn_n_members : int Open only the first N qualifying members. - iscopy : bool - Presume this model is a copy. Members will not be closed - when the model is closed/garbage-collected. - Examples -------- >>> container = ModelContainer('example_asn.json') @@ -80,19 +73,6 @@ class ModelContainer(JwstDataModel, Sequence): Notes ----- - The optional paramters ``save_open`` and ``return_open`` can be - provided to control how the `JwstDataModel` are used by the - :py:class:`ModelContainer`. If ``save_open`` is set to `False`, each input - `JwstDataModel` instance in ``init`` will be written out to disk and - closed, then only the filename for the `JwstDataModel` will be used to - initialize the :py:class:`ModelContainer` object. - Subsequent access of each member will then open the `JwstDataModel` file to - work with it. If ``return_open`` is also `False`, then the `JwstDataModel` - will be closed when access to the `JwstDataModel` is completed. The use of - these parameters can minimize the amount of memory used by this object - during processing, with these parameters being used - by :py:class:`~jwst.outlier_detection.OutlierDetectionStep`. - When ASN table's members contain attributes listed in :py:data:`RECOGNIZED_MEMBER_FIELDS`, :py:class:`ModelContainer` will read those attribute values and update the corresponding attributes @@ -148,13 +128,11 @@ class ModelContainer(JwstDataModel, Sequence): """ schema_url = None - def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, - iscopy=False, **kwargs): + def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, **kwargs): super().__init__(init=None, **kwargs) self._models = [] - self._iscopy = iscopy self.asn_exptypes = asn_exptypes self.asn_n_members = asn_n_members self.asn_table = {} @@ -163,27 +141,16 @@ def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, self.asn_file_path = None self._memmap = kwargs.get("memmap", False) - self._return_open = kwargs.get('return_open', True) - self._save_open = kwargs.get('save_open', True) if init is None: # Don't populate the container with models pass - elif isinstance(init, fits.HDUList): - if self._save_open: - model = [datamodel_open(init, memmap=self._memmap)] - else: - model = init._file.name - init.close() - self._models.append(model) elif isinstance(init, list): if all(isinstance(x, (str, fits.HDUList, JwstDataModel)) for x in init): - if self._save_open: - init = [datamodel_open(m, memmap=self._memmap) for m in init] + self._models.append([datamodel_open(m, memmap=self._memmap) for m in init]) else: raise TypeError("list must contain items that can be opened " "with jwst.datamodels.open()") - self._models = init elif isinstance(init, self.__class__): instance = copy.deepcopy(init._instance) self._schema = init._schema @@ -192,7 +159,6 @@ def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, self._instance = instance self._ctx = self self._models = init._models - self._iscopy = True elif is_association(init): self.from_asn(init) elif isinstance(init, str): @@ -207,10 +173,7 @@ def __len__(self): return len(self._models) def __getitem__(self, index): - m = self._models[index] - if not isinstance(m, JwstDataModel) and self._return_open: - m = datamodel_open(m, memmap=self._memmap) - return m + return self._models[index] def __setitem__(self, index, model): self._models[index] = model @@ -220,8 +183,6 @@ def __delitem__(self, index): def __iter__(self): for model in self._models: - if not isinstance(model, JwstDataModel) and self._return_open: - model = datamodel_open(model, memmap=self._memmap) yield model def insert(self, index, model): @@ -246,14 +207,10 @@ def copy(self, memo=None): instance = copy.deepcopy(self._instance, memo=memo) result._asdf = AsdfFile(instance) result._instance = instance - result._iscopy = self._iscopy result._schema = self._schema result._ctx = result for m in self._models: - if isinstance(m, JwstDataModel): - result.append(m.copy()) - else: - result.append(m) + result.append(m.copy()) return result @staticmethod @@ -315,26 +272,17 @@ def from_asn(self, asn_data): try: for member in sublist: filepath = op.join(asn_dir, member['expname']) - update_model = any(attr in member for attr in RECOGNIZED_MEMBER_FIELDS) - if update_model or self._save_open: - m = datamodel_open(filepath, memmap=self._memmap) - m.meta.asn.exptype = member['exptype'] - for attr, val in member.items(): - if attr in RECOGNIZED_MEMBER_FIELDS: - if attr == 'tweakreg_catalog': - if val.strip(): - val = op.join(asn_dir, val) - else: - val = None - - setattr(m.meta, attr, val) - - if not self._save_open: - m.save(filepath, overwrite=True) - m.close() - else: - m = filepath - + m = datamodel_open(filepath, memmap=self._memmap) + m.meta.asn.exptype = member['exptype'] + for attr, val in member.items(): + if attr in RECOGNIZED_MEMBER_FIELDS: + if attr == 'tweakreg_catalog': + if val.strip(): + val = op.join(asn_dir, val) + else: + val = None + + setattr(m.meta, attr, val) self._models.append(m) except IOError: @@ -357,66 +305,66 @@ def from_asn(self, asn_data): except AttributeError: pass - def save(self, - path=None, - dir_path=None, - save_model_func=None, - **kwargs): - """ - Write out models in container to FITS or ASDF. - - Parameters - ---------- - path : str or func or None - - If None, the `meta.filename` is used for each model. - - If a string, the string is used as a root and an index is - appended. - - If a function, the function takes the two arguments: - the value of model.meta.filename and the - `idx` index, returning constructed file name. - - dir_path : str - Directory to write out files. Defaults to current working dir. - If directory does not exist, it creates it. Filenames are pulled - from `.meta.filename` of each datamodel in the container. - - save_model_func: func or None - Alternate function to save each model instead of - the models `save` method. Takes one argument, the model, - and keyword argument `idx` for an index. - - Returns - ------- - output_paths: [str[, ...]] - List of output file paths of where the models were saved. - """ - output_paths = [] - if path is None: - def path(filename, idx=None): - return filename - elif not callable(path): - path = make_file_with_index - - for idx, model in enumerate(self): - if len(self) <= 1: - idx = None - if save_model_func is None: - outpath, filename = op.split( - path(model.meta.filename, idx=idx) - ) - if dir_path: - outpath = dir_path - save_path = op.join(outpath, filename) - try: - output_paths.append( - model.save(save_path, **kwargs) - ) - except IOError as err: - raise err - - else: - output_paths.append(save_model_func(model, idx=idx)) - return output_paths + # def save(self, + # path=None, + # dir_path=None, + # save_model_func=None, + # **kwargs): + # """ + # Write out models in container to FITS or ASDF. + + # Parameters + # ---------- + # path : str or func or None + # - If None, the `meta.filename` is used for each model. + # - If a string, the string is used as a root and an index is + # appended. + # - If a function, the function takes the two arguments: + # the value of model.meta.filename and the + # `idx` index, returning constructed file name. + + # dir_path : str + # Directory to write out files. Defaults to current working dir. + # If directory does not exist, it creates it. Filenames are pulled + # from `.meta.filename` of each datamodel in the container. + + # save_model_func: func or None + # Alternate function to save each model instead of + # the models `save` method. Takes one argument, the model, + # and keyword argument `idx` for an index. + + # Returns + # ------- + # output_paths: [str[, ...]] + # List of output file paths of where the models were saved. + # """ + # output_paths = [] + # if path is None: + # def path(filename, idx=None): + # return filename + # elif not callable(path): + # path = make_file_with_index + + # for idx, model in enumerate(self): + # if len(self) <= 1: + # idx = None + # if save_model_func is None: + # outpath, filename = op.split( + # path(model.meta.filename, idx=idx) + # ) + # if dir_path: + # outpath = dir_path + # save_path = op.join(outpath, filename) + # try: + # output_paths.append( + # model.save(save_path, **kwargs) + # ) + # except IOError as err: + # raise err + + # else: + # output_paths.append(save_model_func(model, idx=idx)) + # return output_paths @property def models_grouped(self): @@ -454,8 +402,6 @@ def models_grouped(self): group_dict = OrderedDict() for i, model in enumerate(self._models): params = [] - if not self._save_open: - model = datamodel_open(model, memmap=self._memmap) if (hasattr(model.meta, 'group_id') and model.meta.group_id not in [None, '']): @@ -480,10 +426,6 @@ def models_grouped(self): group_id = model.meta.group_id - if not self._save_open and not self._return_open: - model.close() - model = self._models[i] - if group_id in group_dict: group_dict[group_id].append(model) else: @@ -503,10 +445,9 @@ def group_names(self): def close(self): """Close all datamodels.""" - if not self._iscopy: - for model in self._models: - if isinstance(model, JwstDataModel): - model.close() + for model in self._models: + if isinstance(model, JwstDataModel): + model.close() @property def crds_observatory(self): @@ -518,8 +459,6 @@ def crds_observatory(self): ------- str """ - # Eventually ModelContainer will also be used for Roman, but this - # will work for now: return "jwst" def get_crds_parameters(self): @@ -572,97 +511,26 @@ def ind_asn_type(self, asn_exptype): ind.append(i) return ind - def set_buffer(self, buffer_size, overlap=None): - """Set buffer size for scrolling section-by-section access. - - Parameters - ---------- - buffer_size : float, None - Define size of buffer in MB for each section. - If `None`, a default buffer size of 1MB will be used. - - overlap : int, optional - Define the number of rows of overlaps between sections. - If `None`, no overlap will be used. - """ - self.overlap = 0 if overlap is None else overlap - self.grow = 0 - - with datamodel_open(self._models[0]) as model: - imrows, imcols = model.data.shape - data_item_size = model.data.itemsize - data_item_type = model.data.dtype - model.close() - del model - min_buffer_size = imcols * data_item_size - - self.buffer_size = min_buffer_size if buffer_size is None else (buffer_size * _ONE_MB) - - section_nrows = min(imrows, int(self.buffer_size // min_buffer_size)) - - if section_nrows == 0: - self.buffer_size = min_buffer_size - logger.warning("WARNING: Buffer size is too small to hold a single row." - f"Increasing buffer size to {self.buffer_size / _ONE_MB}MB") - section_nrows = 1 - - nbr = section_nrows - self.overlap - nsec = (imrows - self.overlap) // nbr - if (imrows - self.overlap) % nbr > 0: - nsec += 1 - - self.n_sections = nsec - self.nbr = nbr - self.section_nrows = section_nrows - self.imrows = imrows - self.imcols = imcols - self.imtype = data_item_type - - def get_sections(self): - """Iterator to return the sections from all members of the container.""" - - for k in range(self.n_sections): - e1 = k * self.nbr - e2 = e1 + self.section_nrows - - if k == self.n_sections - 1: # last section - e2 = min(e2, self.imrows) - e1 = min(e1, e2 - self.overlap - 1) - - data_list = np.empty((len(self._models), e2 - e1, self.imcols), - dtype=self.imtype) - wht_list = np.empty((len(self._models), e2 - e1, self.imcols), - dtype=self.imtype) - for i, model in enumerate(self._models): - model = datamodel_open(model, memmap=self._memmap) - - data_list[i, :, :] = model.data[e1:e2].copy() - wht_list[i, :, :] = model.wht[e1:e2].copy() - model.close() - del model - - yield (data_list, wht_list, (e1, e2)) - -def make_file_with_index(file_path, idx): - """Append an index to a filename - - Parameters - ---------- - file_path: str - The file to append the index to. - idx: int - An index to append - - - Returns - ------- - file_path: str - Path with index appended - """ - # Decompose path - path_head, path_tail = op.split(file_path) - base, ext = op.splitext(path_tail) - if idx is not None: - base = base + str(idx) - return op.join(path_head, base + ext) +# def make_file_with_index(file_path, idx): +# """Append an index to a filename + +# Parameters +# ---------- +# file_path: str +# The file to append the index to. +# idx: int +# An index to append + + +# Returns +# ------- +# file_path: str +# Path with index appended +# """ +# # Decompose path +# path_head, path_tail = op.split(file_path) +# base, ext = op.splitext(path_tail) +# if idx is not None: +# base = base + str(idx) +# return op.join(path_head, base + ext) From b17652035e68c47159ca8c11777a1f62f820d1d7 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 29 Aug 2024 16:30:43 -0400 Subject: [PATCH 57/85] added basic context management --- jwst/datamodels/container.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index 37af86fb19..a986eccc5f 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -130,8 +130,6 @@ class ModelContainer(Sequence): def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, **kwargs): - super().__init__(init=None, **kwargs) - self._models = [] self.asn_exptypes = asn_exptypes self.asn_n_members = asn_n_members @@ -197,6 +195,13 @@ def extend(self, model): def pop(self, index=-1): self._models.pop(index) + def __enter__(self) -> str: + return self + + def __exit__(self, *args: object) -> None: + for model in self._models: + model.close() + def copy(self, memo=None): """ Returns a deep copy of the models in this model container. @@ -290,9 +295,9 @@ def from_asn(self, asn_data): raise # Pull the whole association table into meta.asn_table - self.meta.asn_table = {} + self.asn_table = {} properties.merge_tree( - self.meta.asn_table._instance, asn_data + self.asn_table, asn_data ) if self.asn_file_path is not None: From 04e2fa514203ce4d44e169ec26a9b72ce6dd4303 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 30 Aug 2024 10:55:59 -0400 Subject: [PATCH 58/85] fixed failure to raise NoGroupID for in-memory models with None for observation parameters --- jwst/datamodels/library.py | 3 +++ jwst/outlier_detection/tests/test_outlier_detection.py | 4 +--- jwst/resample/resample.py | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jwst/datamodels/library.py b/jwst/datamodels/library.py index 9ba0c84ef9..32e27aa117 100644 --- a/jwst/datamodels/library.py +++ b/jwst/datamodels/library.py @@ -148,6 +148,9 @@ def _attrs_to_group_id( """ Combine a number of file metadata values into a ``group_id`` string """ + for val in (program_number, observation_number, visit_number, visit_group, sequence_id, activity_id, exposure_number): + if val is None: + raise NoGroupID(f"Missing required value for group_id: {val}") return ( f"jw{program_number}{observation_number}{visit_number}" f"_{visit_group}{sequence_id}{activity_id}" diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index 20aad6bae9..7cb74f5a8c 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -268,10 +268,8 @@ def test_outlier_step_spec(tmp_cwd, tmp_path): # Make it an exposure type outlier detection expects miri_cal.meta.exposure.type = "MIR_LRS-FIXEDSLIT" - # Make a couple copies + # Make a couple copies, give them unique exposure numbers and filename container = ModelContainer([miri_cal, miri_cal.copy(), miri_cal.copy()]) - - # Give each image a unique name so output files don't overwrite for i, model in enumerate(container): model.meta.filename = f'test_{i}_cal.fits' diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py index a2fcf49de2..97b2e3d766 100644 --- a/jwst/resample/resample.py +++ b/jwst/resample/resample.py @@ -281,7 +281,6 @@ def resample_many_to_many(self, input_models): """ output_models = [] for group_id, indices in input_models.group_indices.items(): - print("indices", indices) output_model = self.blank_output copy_asn_info_from_library(input_models, output_model) From fff437ce35d947080492dfd491fb919740a9d2f6 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 30 Aug 2024 14:27:23 -0400 Subject: [PATCH 59/85] start to modify pipelines to account for container changes --- jwst/datamodels/container.py | 116 ++---------------- .../master_background_step.py | 7 +- jwst/pipeline/calwebb_coron3.py | 8 +- jwst/pipeline/calwebb_spec3.py | 6 +- 4 files changed, 21 insertions(+), 116 deletions(-) diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index a986eccc5f..399feadeb3 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -1,11 +1,8 @@ -import copy from collections import OrderedDict from collections.abc import Sequence import os.path as op import re import logging - -from asdf import AsdfFile from astropy.io import fits from stdatamodels import properties @@ -145,18 +142,20 @@ def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, **kwargs): pass elif isinstance(init, list): if all(isinstance(x, (str, fits.HDUList, JwstDataModel)) for x in init): - self._models.append([datamodel_open(m, memmap=self._memmap) for m in init]) + for m in init: + self._models.append(datamodel_open(m, memmap=self._memmap)) else: raise TypeError("list must contain items that can be opened " "with jwst.datamodels.open()") elif isinstance(init, self.__class__): - instance = copy.deepcopy(init._instance) - self._schema = init._schema - self._shape = init._shape - self._asdf = AsdfFile(instance) - self._instance = instance - self._ctx = self - self._models = init._models + for m in init: + self._models.append(datamodel_open(m, memmap=self._memmap)) + self.asn_exptypes = init.asn_exptypes + self.asn_n_members = init.asn_n_members + self.asn_table = init.asn_table + self.asn_table_name = init.asn_table_name + self.asn_pool_name = init.asn_pool_name + self.asn_file_path = init.asn_file_path elif is_association(init): self.from_asn(init) elif isinstance(init, str): @@ -206,16 +205,9 @@ def copy(self, memo=None): """ Returns a deep copy of the models in this model container. """ - result = self.__class__(init=None, - pass_invalid_values=self._pass_invalid_values, - strict_validation=self._strict_validation) - instance = copy.deepcopy(self._instance, memo=memo) - result._asdf = AsdfFile(instance) - result._instance = instance - result._schema = self._schema - result._ctx = result + result = self.__class__(init=None) for m in self._models: - result.append(m.copy()) + result.append(m.copy(memo=memo)) return result @staticmethod @@ -310,66 +302,6 @@ def from_asn(self, asn_data): except AttributeError: pass - # def save(self, - # path=None, - # dir_path=None, - # save_model_func=None, - # **kwargs): - # """ - # Write out models in container to FITS or ASDF. - - # Parameters - # ---------- - # path : str or func or None - # - If None, the `meta.filename` is used for each model. - # - If a string, the string is used as a root and an index is - # appended. - # - If a function, the function takes the two arguments: - # the value of model.meta.filename and the - # `idx` index, returning constructed file name. - - # dir_path : str - # Directory to write out files. Defaults to current working dir. - # If directory does not exist, it creates it. Filenames are pulled - # from `.meta.filename` of each datamodel in the container. - - # save_model_func: func or None - # Alternate function to save each model instead of - # the models `save` method. Takes one argument, the model, - # and keyword argument `idx` for an index. - - # Returns - # ------- - # output_paths: [str[, ...]] - # List of output file paths of where the models were saved. - # """ - # output_paths = [] - # if path is None: - # def path(filename, idx=None): - # return filename - # elif not callable(path): - # path = make_file_with_index - - # for idx, model in enumerate(self): - # if len(self) <= 1: - # idx = None - # if save_model_func is None: - # outpath, filename = op.split( - # path(model.meta.filename, idx=idx) - # ) - # if dir_path: - # outpath = dir_path - # save_path = op.join(outpath, filename) - # try: - # output_paths.append( - # model.save(save_path, **kwargs) - # ) - # except IOError as err: - # raise err - - # else: - # output_paths.append(save_model_func(model, idx=idx)) - # return output_paths @property def models_grouped(self): @@ -515,27 +447,3 @@ def ind_asn_type(self, asn_exptype): if model.meta.asn.exptype.lower() == asn_exptype: ind.append(i) return ind - - -# def make_file_with_index(file_path, idx): -# """Append an index to a filename - -# Parameters -# ---------- -# file_path: str -# The file to append the index to. -# idx: int -# An index to append - - -# Returns -# ------- -# file_path: str -# Path with index appended -# """ -# # Decompose path -# path_head, path_tail = op.split(file_path) -# base, ext = op.splitext(path_tail) -# if idx is not None: -# base = base + str(idx) -# return op.join(path_head, base + ext) diff --git a/jwst/master_background/master_background_step.py b/jwst/master_background/master_background_step.py index 1ea83855ee..83db84babe 100755 --- a/jwst/master_background/master_background_step.py +++ b/jwst/master_background/master_background_step.py @@ -240,8 +240,6 @@ def copy_background_to_surf_bright(spectrum): def split_container(container): """Divide a ModelContainer with science and background into one of each """ - asn = container.meta.asn_table.instance - background = ModelContainer() science = ModelContainer() @@ -252,12 +250,11 @@ def split_container(container): background.append(container._models[ind_bkgd]) # Pass along the association table to the output science container - science.meta.asn_table = {} science.asn_pool_name = container.asn_pool_name science.asn_table_name = container.asn_table_name - merge_tree(science.meta.asn_table.instance, asn) + merge_tree(science.asn_table, container.asn_table) # Prune the background members from the table - for p in science.meta.asn_table.instance['products']: + for p in science.asn_table['products']: p['members'] = [m for m in p['members'] if m['exptype'].lower() != 'background'] return science, background diff --git a/jwst/pipeline/calwebb_coron3.py b/jwst/pipeline/calwebb_coron3.py index 04673418a9..5551cbafe1 100644 --- a/jwst/pipeline/calwebb_coron3.py +++ b/jwst/pipeline/calwebb_coron3.py @@ -90,14 +90,14 @@ def process(self, user_input): # This asn_id assignment is important as it allows outlier detection # to know the asn_id since that step receives the cube as input. - self.asn_id = input_models.meta.asn_table.asn_id + self.asn_id = input_models.asn_table["asn_id"] # Store the output file for future use - self.output_file = input_models.meta.asn_table.products[0].name + self.output_file = input_models.asn_table["products"][0]["name"] # Find all the member types in the product members_by_type = defaultdict(list) - prod = input_models.meta.asn_table.products[0].instance + prod = input_models.asn_table["products"][0] for member in prod['members']: members_by_type[member['exptype'].lower()].append(member['expname']) @@ -216,7 +216,7 @@ def process(self, user_input): blendmeta.blendmodels(result, inputs=targ_files) try: - result.meta.asn.pool_name = input_models.meta.asn_table.asn_pool + result.meta.asn.pool_name = input_models.asn_pool_name result.meta.asn.table_name = op.basename(user_input) except AttributeError: self.log.debug('Cannot set association information on final') diff --git a/jwst/pipeline/calwebb_spec3.py b/jwst/pipeline/calwebb_spec3.py index 7fd69e4373..723df9c4a8 100644 --- a/jwst/pipeline/calwebb_spec3.py +++ b/jwst/pipeline/calwebb_spec3.py @@ -129,12 +129,12 @@ def process(self, input): # products until the individual tasks work and do it themselves exptype = input_models[0].meta.exposure.type model_type = input_models[0].meta.model_type - output_file = input_models.meta.asn_table.products[0].name + output_file = input_models.asn_table["products"][0]["name"] self.output_file = output_file # Find all the member types in the product members_by_type = defaultdict(list) - product = input_models.meta.asn_table.products[0].instance + product = input_models.asn_table["products"][0] for member in product['members']: members_by_type[member['exptype'].lower()].append(member['expname']) @@ -146,7 +146,7 @@ def process(self, input): # If background data are present, call the master background step if members_by_type['background']: source_models = self.master_background(input_models) - source_models.meta.asn_table = input_models.meta.asn_table + source_models.asn_table = input_models.asn_table # If the step is skipped, do the container splitting that # would've been done in master_background From 53635c17d952480a79fa1ca137cb068fe9229e20 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 5 Sep 2024 11:09:08 -0400 Subject: [PATCH 60/85] fix file naming with sourcemodelcontainer --- jwst/outlier_detection/outlier_detection_step.py | 2 +- jwst/outlier_detection/spec.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py index f18fb054e3..cfd121dfb2 100644 --- a/jwst/outlier_detection/outlier_detection_step.py +++ b/jwst/outlier_detection/outlier_detection_step.py @@ -231,7 +231,7 @@ def _get_asn_id(self, input_models): def _set_status(self, input_models, status): # this might be called with the input which might be a filename or path - if not isinstance(input_models, (datamodels.JwstDataModel, ModelLibrary)): + if not isinstance(input_models, (datamodels.JwstDataModel, ModelLibrary, ModelContainer)): input_models = datamodels.open(input_models) record_step_status(input_models, "outlier_detection", status) diff --git a/jwst/outlier_detection/spec.py b/jwst/outlier_detection/spec.py index 74a21965a4..63004e4db7 100644 --- a/jwst/outlier_detection/spec.py +++ b/jwst/outlier_detection/spec.py @@ -45,7 +45,7 @@ def detect_outliers( See `OutlierDetectionStep.spec` for documentation of these arguments. """ if not isinstance(input_models, ModelContainer): - input_models = ModelContainer(input_models, save_open=in_memory) + input_models = ModelContainer(input_models) if len(input_models) < 2: log.warning(f"Input only contains {len(input_models)} exposures") From 225c21ea189319e3ea0fee91339b1e0a30b38353 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 5 Sep 2024 12:21:58 -0400 Subject: [PATCH 61/85] removed schema url from container --- CHANGES.rst | 5 +++++ jwst/datamodels/container.py | 4 +--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 78365d337a..c1df4d5d4e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -271,6 +271,11 @@ dark_current - Add log info message when specifying an average_dark_current for noise calculations. [#8425] +datamodels +---------- + +- https://plwishmaster.stsci.edu:8081/job/RT/job/JWST-Developers-Pull-Requests/1686/ + documentation ------------- diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index 399feadeb3..df7e858064 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -123,7 +123,6 @@ class ModelContainer(Sequence): ``models_grouped`` property for more details. """ - schema_url = None def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, **kwargs): @@ -198,8 +197,7 @@ def __enter__(self) -> str: return self def __exit__(self, *args: object) -> None: - for model in self._models: - model.close() + self.close() def copy(self, memo=None): """ From 71a2a3bd1ebe9b727642e627cab17a23c7970fc0 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 6 Sep 2024 15:44:00 -0400 Subject: [PATCH 62/85] removed some instances of container.meta --- jwst/cube_build/data_types.py | 2 +- jwst/master_background/master_background_step.py | 6 ++++-- jwst/pipeline/calwebb_tso3.py | 12 ++++++------ jwst/pixel_replace/pixel_replace_step.py | 9 ++++++--- jwst/tweakreg/tests/test_tweakreg.py | 5 ++++- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/jwst/cube_build/data_types.py b/jwst/cube_build/data_types.py index d7217087a4..711f729bd1 100644 --- a/jwst/cube_build/data_types.py +++ b/jwst/cube_build/data_types.py @@ -89,7 +89,7 @@ def __init__(self, input, single, output_file, output_dir): self.output_name = 'Temp' self.input_models = input_models if not single: # find the name of the output file from the association - self.output_name = input_models.meta.asn_table.products[0].name + self.output_name = input_models.asn_table["products"][0]["name"] else: # close files opened above self.close() diff --git a/jwst/master_background/master_background_step.py b/jwst/master_background/master_background_step.py index 83db84babe..3f404a9a5e 100755 --- a/jwst/master_background/master_background_step.py +++ b/jwst/master_background/master_background_step.py @@ -86,6 +86,7 @@ def process(self, input): if self.user_background: if isinstance(input_data, ModelContainer): input_data, _ = split_container(input_data) + asn_id = input_data.asn_table["asn_id"] del _ result = ModelContainer() result.update(input_data) @@ -100,6 +101,7 @@ def process(self, input): model.meta.background.master_background_file = basename(self.user_background) # Use user-supplied master background and subtract it else: + asn_id = input_data.meta.asn_table.asn_id background_2d = expand_to_2d(input_data, self.user_background) background_2d_collection = background_2d result = subtract_2d_background(input_data, background_2d) @@ -109,14 +111,14 @@ def process(self, input): # Save the computed 2d background if requested by user. The user has supplied # the master background so just save the expanded 2d background if self.save_background: - asn_id = input_data.meta.asn_table.asn_id + self.save_model(background_2d_collection, suffix='masterbg2d', force=True, asn_id=asn_id) # Compute master background and subtract it else: if isinstance(input_data, ModelContainer): input_data, background_data = split_container(input_data) - asn_id = input_data.meta.asn_table.asn_id + asn_id = input_data.asn_table["asn_id"] for model in background_data: # Check if the background members are nodded x1d extractions diff --git a/jwst/pipeline/calwebb_tso3.py b/jwst/pipeline/calwebb_tso3.py index 2b9e14ae86..f5d974f48b 100644 --- a/jwst/pipeline/calwebb_tso3.py +++ b/jwst/pipeline/calwebb_tso3.py @@ -72,11 +72,11 @@ def process(self, input): return if self.output_file is None: - self.output_file = input_models.meta.asn_table.products[0].name + self.output_file = input_models.asn_table["products"][0]["name"] # This asn_id assignment is important as it allows outlier detection # to know the asn_id since that step receives the cube as input. - self.asn_id = input_models.meta.asn_table.asn_id + self.asn_id = input_models.asn_table["asn_id"] self.outlier_detection.mode = 'tso' # Input may consist of multiple exposures, so loop over each of them @@ -100,13 +100,13 @@ def process(self, input): original_filename = cube.meta.filename # ensure output filename will not have duplicate asn_id - if "_"+input_models.meta.asn_table.asn_id in original_filename: + if "_"+self.asn_id in original_filename: original_filename = original_filename.replace( - "_"+input_models.meta.asn_table.asn_id, '' + "_"+self.asn_id, '' ) self.save_model( cube, output_file=original_filename, suffix='crfints', - asn_id=input_models.meta.asn_table.asn_id + asn_id=self.asn_id ) cube.meta.filename = original_filename input_models[i] = cube @@ -173,7 +173,7 @@ def process(self, input): phot_result_list.append(self.white_light(result)) # Update some metadata from the association - x1d_result.meta.asn.pool_name = input_models.meta.asn_table.asn_pool + x1d_result.meta.asn.pool_name = input_models.asn_table["asn_pool"] x1d_result.meta.asn.table_name = op.basename(input) # Save the final x1d Multispec model diff --git a/jwst/pixel_replace/pixel_replace_step.py b/jwst/pixel_replace/pixel_replace_step.py index 7aa07b3dfd..6801cd04c7 100644 --- a/jwst/pixel_replace/pixel_replace_step.py +++ b/jwst/pixel_replace/pixel_replace_step.py @@ -52,8 +52,11 @@ def process(self, input): with datamodels.open(input) as input_model: # If more than one 2d spectrum exists in input, call replacement - if input_model.meta.model_type in ['MultiSlitModel', 'SlitModel', - 'ImageModel', 'IFUImageModel', 'CubeModel']: + if isinstance(input_model, (datamodels.MultiSlitModel, + datamodels.SlitModel, + datamodels.ImageModel, + datamodels.IFUImageModel, + datamodels.CubeModel)): self.log.debug('Input is a {input_model.meta.model_type}.') elif isinstance(input_model, datamodels.ModelContainer): self.log.debug('Input is a ModelContainer.') @@ -77,7 +80,7 @@ def process(self, input): # Setup output path naming if associations are involved. asn_id = None try: - asn_id = input_model.meta.asn_table.asn_id + asn_id = input_model.asn_table["asn_id"] except (AttributeError, KeyError): pass if asn_id is None: diff --git a/jwst/tweakreg/tests/test_tweakreg.py b/jwst/tweakreg/tests/test_tweakreg.py index a038410e69..0c6b1bfd06 100644 --- a/jwst/tweakreg/tests/test_tweakreg.py +++ b/jwst/tweakreg/tests/test_tweakreg.py @@ -290,7 +290,10 @@ def test_custom_catalog(custom_catalog_path, example_input, catfile, asn, meta, example_input[0].meta.tweakreg_catalog = "" # write out the ModelContainer and association (so the association table will be loaded) - example_input.save(dir_path=str(custom_catalog_path.parent)) + #example_input.save(dir_path=str(custom_catalog_path.parent)) + for model in example_input: + model.save(dir_path=str(custom_catalog_path.parent)) + model.close() asn_data = { 'asn_id': 'foo', 'asn_pool': 'bar', From d876ad59f13905f01aafe0982f04e1e29299b343 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 11 Sep 2024 10:55:07 -0400 Subject: [PATCH 63/85] small fixes to metadata setting and tests --- jwst/datamodels/container.py | 38 ++++++++++++++++++++++++++++ jwst/extract_1d/extract.py | 3 +++ jwst/tweakreg/tests/test_tweakreg.py | 3 +-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index df7e858064..d8fad3c152 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -1,5 +1,6 @@ from collections import OrderedDict from collections.abc import Sequence +import json import os.path as op import re import logging @@ -301,6 +302,43 @@ def from_asn(self, asn_data): pass + # def save(self, + # path=None, + # dir_path=None, + # save_model_func=None, + # **kwargs): + # """ + # Save container as .json and .fits files. + # The .json file should reflect the files that are currently in the model list, and not + # necessarily what is in the association table, since containers can be appended + + # Notes + # ----- + # This will lose lots of metadata. Is that behavior okay? The real way to fix + # this would be to modify the asn table every time a model is appended to or + # removed from the container. This would not only be a lot of work but would also + # require API changes to ensure that each model was assigned an exptype when appended + # """ + # # write out all the models + # for model in self._models: + # model.save(model.meta.filename, dir_path=dir_path) + # model.close() + # asn_data = { + # 'asn_id': self.asn_table['asn_id'], + # 'asn_pool': self.asn_table['asn_pool'], + # 'products': [ + # { + # 'members': [{'expname': m.meta.filename} for m in self._models], + # }, + # ], + # } + + # # write out a basic association table + # asn_path = dir_path / path + # with open(asn_path, 'w') as f: + # json.dump(asn_data, f) + + @property def models_grouped(self): """ diff --git a/jwst/extract_1d/extract.py b/jwst/extract_1d/extract.py index 7c0198bad7..56c87caca1 100644 --- a/jwst/extract_1d/extract.py +++ b/jwst/extract_1d/extract.py @@ -2853,6 +2853,9 @@ def do_extract1d( else: meta_source = input_model + print("META_SOURCE", meta_source) + print("META_SOURCE[0]", meta_source[0]) + # Setup the output model output_model = datamodels.MultiSpecModel() diff --git a/jwst/tweakreg/tests/test_tweakreg.py b/jwst/tweakreg/tests/test_tweakreg.py index 0c6b1bfd06..ac466026b4 100644 --- a/jwst/tweakreg/tests/test_tweakreg.py +++ b/jwst/tweakreg/tests/test_tweakreg.py @@ -290,9 +290,8 @@ def test_custom_catalog(custom_catalog_path, example_input, catfile, asn, meta, example_input[0].meta.tweakreg_catalog = "" # write out the ModelContainer and association (so the association table will be loaded) - #example_input.save(dir_path=str(custom_catalog_path.parent)) for model in example_input: - model.save(dir_path=str(custom_catalog_path.parent)) + model.save(model.meta.filename, dir_path=str(custom_catalog_path.parent)) model.close() asn_data = { 'asn_id': 'foo', From f6ed4b80375592bd25152c21ddee6ec6b0f2853b Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 17 Sep 2024 14:17:21 -0400 Subject: [PATCH 64/85] in the middle of fixing filename issues --- jwst/extract_1d/extract.py | 3 --- jwst/pipeline/calwebb_spec3.py | 3 +++ jwst/regtest/test_nirspec_fs_spec3_moving_target.py | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/jwst/extract_1d/extract.py b/jwst/extract_1d/extract.py index 56c87caca1..7c0198bad7 100644 --- a/jwst/extract_1d/extract.py +++ b/jwst/extract_1d/extract.py @@ -2853,9 +2853,6 @@ def do_extract1d( else: meta_source = input_model - print("META_SOURCE", meta_source) - print("META_SOURCE[0]", meta_source[0]) - # Setup the output model output_model = datamodels.MultiSpecModel() diff --git a/jwst/pipeline/calwebb_spec3.py b/jwst/pipeline/calwebb_spec3.py index 723df9c4a8..ff3caf06a8 100644 --- a/jwst/pipeline/calwebb_spec3.py +++ b/jwst/pipeline/calwebb_spec3.py @@ -132,6 +132,9 @@ def process(self, input): output_file = input_models.asn_table["products"][0]["name"] self.output_file = output_file + print("self.output_file = ", self.output_file) + print("self.outlier_detection.save_model = ", self.outlier_detection.save_model) + # Find all the member types in the product members_by_type = defaultdict(list) product = input_models.asn_table["products"][0] diff --git a/jwst/regtest/test_nirspec_fs_spec3_moving_target.py b/jwst/regtest/test_nirspec_fs_spec3_moving_target.py index 8845cd4201..a100405384 100644 --- a/jwst/regtest/test_nirspec_fs_spec3_moving_target.py +++ b/jwst/regtest/test_nirspec_fs_spec3_moving_target.py @@ -32,6 +32,9 @@ def test_nirspec_fs_spec3_moving_target( """Test spec3 pipeline on a NIRSpec FS moving target.""" rtdata = rtdata_module + import os + print(os.listdir(os.getcwd())) + output = f"jw01245-o002_s000000001_nirspec_clear-prism-s200a1-subs200a1_{suffix}.fits" rtdata.output = output rtdata.get_truth(f"truth/test_nirspec_fs_spec3_moving_target/{output}") From 8b0be055b6e61ce5f2e79ee29592b34cd808c820 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 25 Sep 2024 15:21:50 -0400 Subject: [PATCH 65/85] fix stpipe unit tests --- jwst/stpipe/tests/steps/__init__.py | 9 +++++++-- jwst/stpipe/tests/test_input.py | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/jwst/stpipe/tests/steps/__init__.py b/jwst/stpipe/tests/steps/__init__.py index 503b3e3d00..eab156a0e5 100644 --- a/jwst/stpipe/tests/steps/__init__.py +++ b/jwst/stpipe/tests/steps/__init__.py @@ -2,6 +2,7 @@ from stdatamodels.jwst import datamodels from stdatamodels.jwst.datamodels import ImageModel +from jwst.datamodels import ModelContainer class StepWithReference(Step): @@ -177,8 +178,12 @@ class StepWithContainer(Step): def process(self, *args): container = [] - model1 = ImageModel(args[0]).copy() - model2 = ImageModel(args[0]).copy() + if isinstance(args[0], ModelContainer): + model = args[0][0] + else: + model = args[0] + model1 = ImageModel(model).copy() + model2 = ImageModel(model).copy() model1.meta.filename = 'swc_model1.fits' model2.meta.filename = 'swc_model2.fits' container.append(model1) diff --git a/jwst/stpipe/tests/test_input.py b/jwst/stpipe/tests/test_input.py index 8586dd37db..a628382458 100644 --- a/jwst/stpipe/tests/test_input.py +++ b/jwst/stpipe/tests/test_input.py @@ -9,7 +9,7 @@ from jwst.stpipe import Step from jwst.datamodels import ModelContainer -from jwst.stpipe.tests.steps import StepWithModel +from jwst.stpipe.tests.steps import StepWithModel, StepWithContainer from jwst.stpipe.tests.util import t_path @@ -18,7 +18,7 @@ def test_default_input_with_container(mk_tmp_dirs): model_path = t_path('data/flat.fits') with ModelContainer([model_path]) as container: - step = StepWithModel() + step = StepWithContainer() step.run(container) assert step._input_filename is None From a4f22f360b91591ded48380d0dd47f5de0e6396c Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 26 Sep 2024 13:10:17 -0400 Subject: [PATCH 66/85] draft fix for JP-2038 --- CHANGES.rst | 5 --- changes/8831.datamodels.rst | 1 + jwst/datamodels/container.py | 41 +------------------ jwst/datamodels/tests/data/association.json | 6 +-- .../tests/data/association_with_paths.json | 27 ++++++++++++ .../datamodels/tests/test_open_association.py | 22 +++++++++- 6 files changed, 53 insertions(+), 49 deletions(-) create mode 100644 changes/8831.datamodels.rst create mode 100644 jwst/datamodels/tests/data/association_with_paths.json diff --git a/CHANGES.rst b/CHANGES.rst index 467898508d..7500ad652f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -508,11 +508,6 @@ dark_current - Add log info message when specifying an average_dark_current for noise calculations. [#8425] -datamodels ----------- - -- https://plwishmaster.stsci.edu:8081/job/RT/job/JWST-Developers-Pull-Requests/1686/ - documentation ------------- diff --git a/changes/8831.datamodels.rst b/changes/8831.datamodels.rst new file mode 100644 index 0000000000..fdb8209569 --- /dev/null +++ b/changes/8831.datamodels.rst @@ -0,0 +1 @@ +Remove memory-saving options and save method from ModelContainer \ No newline at end of file diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index d8fad3c152..84e171b87a 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -1,6 +1,5 @@ from collections import OrderedDict from collections.abc import Sequence -import json import os.path as op import re import logging @@ -10,6 +9,7 @@ from stdatamodels.jwst.datamodels.model_base import JwstDataModel from stdatamodels.jwst.datamodels.util import open as datamodel_open from stdatamodels.jwst.datamodels.util import is_association +from stpipe.step import _make_input_path __doctest_skip__ = ['ModelContainer'] @@ -267,7 +267,7 @@ def from_asn(self, asn_data): sublist = infiles try: for member in sublist: - filepath = op.join(asn_dir, member['expname']) + filepath = _make_input_path(member['expname'], asn_dir) m = datamodel_open(filepath, memmap=self._memmap) m.meta.asn.exptype = member['exptype'] for attr, val in member.items(): @@ -302,43 +302,6 @@ def from_asn(self, asn_data): pass - # def save(self, - # path=None, - # dir_path=None, - # save_model_func=None, - # **kwargs): - # """ - # Save container as .json and .fits files. - # The .json file should reflect the files that are currently in the model list, and not - # necessarily what is in the association table, since containers can be appended - - # Notes - # ----- - # This will lose lots of metadata. Is that behavior okay? The real way to fix - # this would be to modify the asn table every time a model is appended to or - # removed from the container. This would not only be a lot of work but would also - # require API changes to ensure that each model was assigned an exptype when appended - # """ - # # write out all the models - # for model in self._models: - # model.save(model.meta.filename, dir_path=dir_path) - # model.close() - # asn_data = { - # 'asn_id': self.asn_table['asn_id'], - # 'asn_pool': self.asn_table['asn_pool'], - # 'products': [ - # { - # 'members': [{'expname': m.meta.filename} for m in self._models], - # }, - # ], - # } - - # # write out a basic association table - # asn_path = dir_path / path - # with open(asn_path, 'w') as f: - # json.dump(asn_data, f) - - @property def models_grouped(self): """ diff --git a/jwst/datamodels/tests/data/association.json b/jwst/datamodels/tests/data/association.json index 0b3d4378d3..4dfa293f57 100644 --- a/jwst/datamodels/tests/data/association.json +++ b/jwst/datamodels/tests/data/association.json @@ -13,12 +13,12 @@ "members": [ { "exptype": "SCIENCE", - "expname": "test.fits", + "expname": "test.fits" }, { "exptype": "SCIENCE", - "expname": "test.fits", - }, + "expname": "test.fits" + } ] } ], diff --git a/jwst/datamodels/tests/data/association_with_paths.json b/jwst/datamodels/tests/data/association_with_paths.json new file mode 100644 index 0000000000..88152161c7 --- /dev/null +++ b/jwst/datamodels/tests/data/association_with_paths.json @@ -0,0 +1,27 @@ +{ + "asn_rule": "Asn_Image", + "degraded_status": "No known degraded exposures in association.", + "target": "1", + "code_version": "0.7.0", + "version_id": null, + "asn_pool": "pool", + "asn_id": "a3001", + "program": "80600", + "products": [ + { + "name": "jw80600-a3001_t001_miri_p750l", + "members": [ + { + "exptype": "SCIENCE", + "expname": "data/test.fits" + }, + { + "exptype": "SCIENCE", + "expname": "data/test.fits" + } + ] + } + ], + "asn_type": "image", + "constraints": "Constraints:\n target: 1\n opt_elem: P750L\n exp_type: MIR_LRS-SLITLESS|MIR_TACQ\n instrument: MIRI\n program: 80600\n subarray: SUBPRISM" +} diff --git a/jwst/datamodels/tests/test_open_association.py b/jwst/datamodels/tests/test_open_association.py index c11c9ec8dc..d4f243d3af 100644 --- a/jwst/datamodels/tests/test_open_association.py +++ b/jwst/datamodels/tests/test_open_association.py @@ -1,3 +1,4 @@ +import json import os import os.path import warnings @@ -9,12 +10,12 @@ # Define artificial memory size MEMORY = 100 # 100 bytes - +DATADIR = "data" # Utilities def t_path(partial_path): """Construction the full path for test files""" - test_dir = os.path.join(os.path.dirname(__file__), 'data') + test_dir = os.path.join(os.path.dirname(__file__), DATADIR) return os.path.join(test_dir, partial_path) @@ -36,3 +37,20 @@ def test_container_open_asn_with_sourcecat(): with datamodels.open(path, asn_exptypes="science") as c: for model in c: assert model.meta.asn.table_name == "association_w_cat.json" + + +def test_open_with_relative_path_inside_asn(): + """Coverage for bug where relative paths inside filenames in an ASN would not be found, + see JP-2038 / GitHub Issue 5950""" + asn_file = t_path("association_with_paths.json") + + # ensure that there are indeed relative paths in the new asn + with open(asn_file, "r") as f: + asn_data = json.load(f) + assert asn_data["products"][0]["members"][0]["expname"].split("/") == [DATADIR, "test.fits"] + + # cehck that this can be opened + with datamodels.open(asn_file) as c: + for model in c: + assert model.meta.asn.table_name == "association_with_paths.json" + assert model.meta.asn.pool_name == "pool" \ No newline at end of file From ecf4b0fef13a0b96d27a72613f8ca12e56fecff1 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 26 Sep 2024 14:58:34 -0400 Subject: [PATCH 67/85] fix master background unit and regression test failures --- jwst/master_background/master_background_step.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/jwst/master_background/master_background_step.py b/jwst/master_background/master_background_step.py index 3f404a9a5e..dcd0cbd00d 100755 --- a/jwst/master_background/master_background_step.py +++ b/jwst/master_background/master_background_step.py @@ -89,7 +89,6 @@ def process(self, input): asn_id = input_data.asn_table["asn_id"] del _ result = ModelContainer() - result.update(input_data) background_2d_collection = ModelContainer() background_2d_collection.update(input_data) for model in input_data: @@ -101,7 +100,7 @@ def process(self, input): model.meta.background.master_background_file = basename(self.user_background) # Use user-supplied master background and subtract it else: - asn_id = input_data.meta.asn_table.asn_id + asn_id = None background_2d = expand_to_2d(input_data, self.user_background) background_2d_collection = background_2d result = subtract_2d_background(input_data, background_2d) @@ -145,9 +144,7 @@ def process(self, input): background_data.close() result = ModelContainer() - result.update(input_data) background_2d_collection = ModelContainer() - background_2d_collection.update(input_data) for model in input_data: background_2d = expand_to_2d(model, master_background) result.append(subtract_2d_background(model, background_2d)) @@ -156,14 +153,13 @@ def process(self, input): input_data.close() else: - result = input_data.copy() input_data.close() self.log.warning( "Input %s of type %s cannot be handled without user-supplied background. Step skipped.", input, type(input) ) - record_step_status(result, 'master_background', success=False) - return result + record_step_status(input_data, 'master_background', success=False) + return input_data # Save the computed background if requested by user if self.save_background: From 52c9fb8b86b9d2c57d32735d653f1eb7fd770572 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 26 Sep 2024 15:18:23 -0400 Subject: [PATCH 68/85] fix regtest errors in niriss wfss stemming from extract_1d --- jwst/extract_1d/extract.py | 6 ++---- jwst/extract_1d/extract_1d_step.py | 5 ++++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/jwst/extract_1d/extract.py b/jwst/extract_1d/extract.py index 7c0198bad7..5235a4a010 100644 --- a/jwst/extract_1d/extract.py +++ b/jwst/extract_1d/extract.py @@ -2846,10 +2846,7 @@ def do_extract1d( # Set "meta_source" to either the first model in a container, or the individual input model, for convenience # of retrieving meta attributes in subsequent statements if was_source_model: - if isinstance(input_model, datamodels.SlitModel): # input_model is SourceContainer with a single SlitModel - meta_source = input_model - else: - meta_source = input_model[0] + meta_source = input_model[0] else: meta_source = input_model @@ -2859,6 +2856,7 @@ def do_extract1d( if hasattr(meta_source, "int_times"): output_model.int_times = meta_source.int_times.copy() + print(meta_source) output_model.update(meta_source, only='PRIMARY') # This will be relevant if we're asked to extract a spectrum and the spectral order is zero. diff --git a/jwst/extract_1d/extract_1d_step.py b/jwst/extract_1d/extract_1d_step.py index f8b43b0178..3ff620e6c5 100644 --- a/jwst/extract_1d/extract_1d_step.py +++ b/jwst/extract_1d/extract_1d_step.py @@ -197,7 +197,10 @@ def process(self, input): """ # Open the input and figure out what type of model it is - input_model = datamodels.open(input) + if isinstance(input, ModelContainer): + input_model = input + else: + input_model = datamodels.open(input) was_source_model = False # default value if isinstance(input_model, datamodels.CubeModel): From 3e5deda578c811e4267a26263bb574f564bdf57d Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 26 Sep 2024 18:41:20 -0400 Subject: [PATCH 69/85] fix badpix_selfcal regtest failure --- jwst/badpix_selfcal/badpix_selfcal_step.py | 7 ++++++- jwst/regtest/test_miri_mrs_badpix_selfcal.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/jwst/badpix_selfcal/badpix_selfcal_step.py b/jwst/badpix_selfcal/badpix_selfcal_step.py index 7d9ad62c9f..ec614bf64f 100644 --- a/jwst/badpix_selfcal/badpix_selfcal_step.py +++ b/jwst/badpix_selfcal/badpix_selfcal_step.py @@ -1,3 +1,4 @@ +from collections.abc import Sequence import warnings from ..stpipe import Step from . import badpix_selfcal @@ -35,9 +36,12 @@ class BadpixSelfcalStep(Step): """ def save_model(self, model, *args, **kwargs): - """Override save_model to suppress index 0 when save_model is True + """Override save_model to only save the science models + and to ignore the index for save_bkg """ kwargs["idx"] = None + if isinstance(model, Sequence): + model = model[0] return Step.save_model(self, model, *args, **kwargs) def save_bkg(self, bkg_list, suffix="badpix_selfcal_bkg"): @@ -82,6 +86,7 @@ def process(self, input, selfcal_list=None, bkg_list=None): i.e., true self-calibration. """ input_sci, selfcal_list, bkg_list = _parse_inputs(input, selfcal_list, bkg_list) + print(input_sci, selfcal_list, bkg_list) # ensure that there are background exposures to use, otherwise skip the step # unless forced diff --git a/jwst/regtest/test_miri_mrs_badpix_selfcal.py b/jwst/regtest/test_miri_mrs_badpix_selfcal.py index a65d0cbea4..3404c45bbf 100644 --- a/jwst/regtest/test_miri_mrs_badpix_selfcal.py +++ b/jwst/regtest/test_miri_mrs_badpix_selfcal.py @@ -63,6 +63,11 @@ def test_miri_mrs_badpix_selfcal(run_pipeline_selfcal, fitsdiff_default_kwargs): def test_miri_mrs_badpix_selfcal_bkg(run_pipeline_background, fitsdiff_default_kwargs): """Run a test for MIRI MRS data with dedicated background exposures.""" + # THIS IS FAILING BECAUSE IT'S SAVING FOUR FILES WHEN IT SHOULD SAVE ONE + # there is only one result_bkgasn_badpix_selfcal.fits file in truth directory + # but there are four instances of "saved model in result_bkgasn_badpix_selfcal.fits" in the log file + # reason is because of changes to step.save_model in stpipe + rtdata = run_pipeline_background # Get the truth file From b463a99ef15f0de324b1e6a840bd234006cf594a Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 7 Oct 2024 15:58:16 -0400 Subject: [PATCH 70/85] revert changes that badly attempted fix for relative paths in asn --- jwst/datamodels/container.py | 3 +-- .../tests/data/association_with_paths.json | 27 ------------------- .../datamodels/tests/test_open_association.py | 21 +-------------- 3 files changed, 2 insertions(+), 49 deletions(-) delete mode 100644 jwst/datamodels/tests/data/association_with_paths.json diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index 84e171b87a..df7e858064 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -9,7 +9,6 @@ from stdatamodels.jwst.datamodels.model_base import JwstDataModel from stdatamodels.jwst.datamodels.util import open as datamodel_open from stdatamodels.jwst.datamodels.util import is_association -from stpipe.step import _make_input_path __doctest_skip__ = ['ModelContainer'] @@ -267,7 +266,7 @@ def from_asn(self, asn_data): sublist = infiles try: for member in sublist: - filepath = _make_input_path(member['expname'], asn_dir) + filepath = op.join(asn_dir, member['expname']) m = datamodel_open(filepath, memmap=self._memmap) m.meta.asn.exptype = member['exptype'] for attr, val in member.items(): diff --git a/jwst/datamodels/tests/data/association_with_paths.json b/jwst/datamodels/tests/data/association_with_paths.json deleted file mode 100644 index 88152161c7..0000000000 --- a/jwst/datamodels/tests/data/association_with_paths.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "asn_rule": "Asn_Image", - "degraded_status": "No known degraded exposures in association.", - "target": "1", - "code_version": "0.7.0", - "version_id": null, - "asn_pool": "pool", - "asn_id": "a3001", - "program": "80600", - "products": [ - { - "name": "jw80600-a3001_t001_miri_p750l", - "members": [ - { - "exptype": "SCIENCE", - "expname": "data/test.fits" - }, - { - "exptype": "SCIENCE", - "expname": "data/test.fits" - } - ] - } - ], - "asn_type": "image", - "constraints": "Constraints:\n target: 1\n opt_elem: P750L\n exp_type: MIR_LRS-SLITLESS|MIR_TACQ\n instrument: MIRI\n program: 80600\n subarray: SUBPRISM" -} diff --git a/jwst/datamodels/tests/test_open_association.py b/jwst/datamodels/tests/test_open_association.py index d4f243d3af..5ca8c4b930 100644 --- a/jwst/datamodels/tests/test_open_association.py +++ b/jwst/datamodels/tests/test_open_association.py @@ -1,4 +1,3 @@ -import json import os import os.path import warnings @@ -10,12 +9,11 @@ # Define artificial memory size MEMORY = 100 # 100 bytes -DATADIR = "data" # Utilities def t_path(partial_path): """Construction the full path for test files""" - test_dir = os.path.join(os.path.dirname(__file__), DATADIR) + test_dir = os.path.join(os.path.dirname(__file__), 'data') return os.path.join(test_dir, partial_path) @@ -37,20 +35,3 @@ def test_container_open_asn_with_sourcecat(): with datamodels.open(path, asn_exptypes="science") as c: for model in c: assert model.meta.asn.table_name == "association_w_cat.json" - - -def test_open_with_relative_path_inside_asn(): - """Coverage for bug where relative paths inside filenames in an ASN would not be found, - see JP-2038 / GitHub Issue 5950""" - asn_file = t_path("association_with_paths.json") - - # ensure that there are indeed relative paths in the new asn - with open(asn_file, "r") as f: - asn_data = json.load(f) - assert asn_data["products"][0]["members"][0]["expname"].split("/") == [DATADIR, "test.fits"] - - # cehck that this can be opened - with datamodels.open(asn_file) as c: - for model in c: - assert model.meta.asn.table_name == "association_with_paths.json" - assert model.meta.asn.pool_name == "pool" \ No newline at end of file From 02ceca9092ebe260a3f539eb952eb88d03002fdd Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 7 Oct 2024 16:23:13 -0400 Subject: [PATCH 71/85] fixes from Perry review --- jwst/badpix_selfcal/badpix_selfcal_step.py | 1 - jwst/datamodels/container.py | 2 +- jwst/extract_1d/extract.py | 1 - jwst/pipeline/calwebb_spec3.py | 3 --- 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/jwst/badpix_selfcal/badpix_selfcal_step.py b/jwst/badpix_selfcal/badpix_selfcal_step.py index ec614bf64f..f059bedbf9 100644 --- a/jwst/badpix_selfcal/badpix_selfcal_step.py +++ b/jwst/badpix_selfcal/badpix_selfcal_step.py @@ -86,7 +86,6 @@ def process(self, input, selfcal_list=None, bkg_list=None): i.e., true self-calibration. """ input_sci, selfcal_list, bkg_list = _parse_inputs(input, selfcal_list, bkg_list) - print(input_sci, selfcal_list, bkg_list) # ensure that there are background exposures to use, otherwise skip the step # unless forced diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index df7e858064..748377b428 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -284,7 +284,7 @@ def from_asn(self, asn_data): self.close() raise - # Pull the whole association table into meta.asn_table + # Pull the whole association table into the asn_table attribute self.asn_table = {} properties.merge_tree( self.asn_table, asn_data diff --git a/jwst/extract_1d/extract.py b/jwst/extract_1d/extract.py index 5235a4a010..599e265db5 100644 --- a/jwst/extract_1d/extract.py +++ b/jwst/extract_1d/extract.py @@ -2856,7 +2856,6 @@ def do_extract1d( if hasattr(meta_source, "int_times"): output_model.int_times = meta_source.int_times.copy() - print(meta_source) output_model.update(meta_source, only='PRIMARY') # This will be relevant if we're asked to extract a spectrum and the spectral order is zero. diff --git a/jwst/pipeline/calwebb_spec3.py b/jwst/pipeline/calwebb_spec3.py index 067eb44031..d62bcc926f 100644 --- a/jwst/pipeline/calwebb_spec3.py +++ b/jwst/pipeline/calwebb_spec3.py @@ -131,9 +131,6 @@ def process(self, input): output_file = input_models.asn_table["products"][0]["name"] self.output_file = output_file - print("self.output_file = ", self.output_file) - print("self.outlier_detection.save_model = ", self.outlier_detection.save_model) - # Find all the member types in the product members_by_type = defaultdict(list) product = input_models.asn_table["products"][0] From 5d6dce71f21d3744bb5c750870d4cf834f4dcd51 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 8 Oct 2024 14:14:46 -0400 Subject: [PATCH 72/85] fix filename issues with nirspec ifu outlier detection --- jwst/outlier_detection/outlier_detection_step.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py index e581e4048d..43c20d1a08 100644 --- a/jwst/outlier_detection/outlier_detection_step.py +++ b/jwst/outlier_detection/outlier_detection_step.py @@ -209,6 +209,8 @@ def _get_asn_id(self, input_models): try: if isinstance(input_models, ModelLibrary): asn_id = input_models.asn["asn_id"] + elif isinstance(input_models, ModelContainer): + asn_id = input_models.asn_table["asn_id"] else: asn_id = input_models.meta.asn_table.asn_id except (AttributeError, KeyError): From 6fbf9e02db71393238fe7ead5a9d8c6504c858dd Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 8 Oct 2024 15:07:02 -0400 Subject: [PATCH 73/85] add docs for container vs library --- docs/jwst/stpipe/devel_io_design.rst | 41 ++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/docs/jwst/stpipe/devel_io_design.rst b/docs/jwst/stpipe/devel_io_design.rst index af233632f2..23b1193a20 100644 --- a/docs/jwst/stpipe/devel_io_design.rst +++ b/docs/jwst/stpipe/devel_io_design.rst @@ -163,8 +163,8 @@ Many of the JWST calibration steps and pipelines expect an :ref:`Association ` file as input. When opened with :meth:`~jwst.stpipe.step.Step.open_model`, a :class:`~jwst.datamodels.ModelContainer` is returned. `ModelContainer` -is, among other features, a list-like object where each element is the -`DataModel` of each member of the association. The `meta.asn_table` is +is a list-like object where each element is the +`DataModel` of each member of the association. The `asn_table` attribute is populated with the association data structure, allowing direct access to the association itself. The association file, as well as the files listed in the association file, must be in the input directory. @@ -172,6 +172,43 @@ listed in the association file, must be in the input directory. To read in a list of files, or an association file, as an association, use the `load_as_level2_asn` or `load_as_level3_asn` methods. +ModelContainer vs ModelLibrary +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some steps in the pipeline, namely any steps involved in the Stage 3 Imaging pipeline, +rely on the :class:`~jwst.datamodels.ModelLibrary` class instead of the +:class:`~jwst.datamodels.ModelContainer` class to handle association-type data. +The `ModelLibrary` class is purpose-built for enabling memory-saving options in the +image3 pipeline and is not recommended for general use. +Additional documentation on the `ModelLibrary` class can be found in the +`stpipe ModelLibrary documentation `_. + +ModelContainer Changes in JWST 1.17 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In JWST 1.17, the `ModelContainer` class was de-scoped in light of the introduction of the +`ModelLibrary` class in JWST 1.16. The `ModelContainer` class is still the recommended class +for handling association-type data, but it is no longer a subclass of `JWSTDataModel`. The +following changes in behavior are noteworthy: + +* `ModelContainer` no longer has a `save()` method; the `save()` method should be called on + individual `DataModel` objects in the container. +* The `ModelContainer` class no longer has a `meta` attribute. The association data is now + stored in the top-level `asn_table` attribute, along with several other association-relevant + attributes including `asn_table_name`, `asn_pool_name`, `asn_exptypes`, `asn_n_members`, + `asn_file_path`. +* The `ModelContainer` class is still a list-like object. +* The `ModelContainer` class is still the default class returned by stdatamodels `open()`. +* The `ModelContainer` class can still be used as a context manager, such that `with open(asn_file.json)` + still works. +* All infrastructure that attempted memory savings in the `ModelContainer` class has been removed. + Use the `ModelLibrary` class if memory-saving options are needed. + +means that the `ModelContainer` class no longer has the `meta` attribute, and the association +data is now stored in the top-level `asn_table` attribute instead of `meta.asn_table`. +The `ModelContainer` class is still a list-like + + Input Source ------------ From bbb12f30cb4081a4b7a2efa7837f6db2d0db0e2e Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 8 Oct 2024 15:45:24 -0400 Subject: [PATCH 74/85] small fixes from self-review --- docs/jwst/stpipe/devel_io_design.rst | 17 ++++++----------- jwst/badpix_selfcal/badpix_selfcal_step.py | 11 +++++------ jwst/regtest/test_miri_mrs_badpix_selfcal.py | 5 ----- .../test_nirspec_fs_spec3_moving_target.py | 3 --- 4 files changed, 11 insertions(+), 25 deletions(-) diff --git a/docs/jwst/stpipe/devel_io_design.rst b/docs/jwst/stpipe/devel_io_design.rst index 23b1193a20..a9755375bc 100644 --- a/docs/jwst/stpipe/devel_io_design.rst +++ b/docs/jwst/stpipe/devel_io_design.rst @@ -177,9 +177,9 @@ ModelContainer vs ModelLibrary Some steps in the pipeline, namely any steps involved in the Stage 3 Imaging pipeline, rely on the :class:`~jwst.datamodels.ModelLibrary` class instead of the -:class:`~jwst.datamodels.ModelContainer` class to handle association-type data. +:class:`~jwst.datamodels.ModelContainer` class to process association-type data. The `ModelLibrary` class is purpose-built for enabling memory-saving options in the -image3 pipeline and is not recommended for general use. +image3 pipeline and is only recommended when working with large associations. Additional documentation on the `ModelLibrary` class can be found in the `stpipe ModelLibrary documentation `_. @@ -192,22 +192,17 @@ for handling association-type data, but it is no longer a subclass of `JWSTDataM following changes in behavior are noteworthy: * `ModelContainer` no longer has a `save()` method; the `save()` method should be called on - individual `DataModel` objects in the container. + each of the individual `DataModel` objects in the container instead, as desired. * The `ModelContainer` class no longer has a `meta` attribute. The association data is now stored in the top-level `asn_table` attribute, along with several other association-relevant attributes including `asn_table_name`, `asn_pool_name`, `asn_exptypes`, `asn_n_members`, - `asn_file_path`. + `asn_file_path`. Note that `asn_table` is now a dictionary, not an `ObjectNode`. +* All infrastructure that attempted memory savings in the `ModelContainer` class has been removed. + Use the `ModelLibrary` class if memory-saving options are needed. * The `ModelContainer` class is still a list-like object. * The `ModelContainer` class is still the default class returned by stdatamodels `open()`. * The `ModelContainer` class can still be used as a context manager, such that `with open(asn_file.json)` still works. -* All infrastructure that attempted memory savings in the `ModelContainer` class has been removed. - Use the `ModelLibrary` class if memory-saving options are needed. - -means that the `ModelContainer` class no longer has the `meta` attribute, and the association -data is now stored in the top-level `asn_table` attribute instead of `meta.asn_table`. -The `ModelContainer` class is still a list-like - Input Source ------------ diff --git a/jwst/badpix_selfcal/badpix_selfcal_step.py b/jwst/badpix_selfcal/badpix_selfcal_step.py index f059bedbf9..d33169175d 100644 --- a/jwst/badpix_selfcal/badpix_selfcal_step.py +++ b/jwst/badpix_selfcal/badpix_selfcal_step.py @@ -1,4 +1,3 @@ -from collections.abc import Sequence import warnings from ..stpipe import Step from . import badpix_selfcal @@ -36,12 +35,12 @@ class BadpixSelfcalStep(Step): """ def save_model(self, model, *args, **kwargs): - """Override save_model to only save the science models - and to ignore the index for save_bkg - """ + # """Override save_model to only save the science models + # and to ignore the index for save_bkg + # """ kwargs["idx"] = None - if isinstance(model, Sequence): - model = model[0] + # if isinstance(model, Sequence): + # model = model[0] return Step.save_model(self, model, *args, **kwargs) def save_bkg(self, bkg_list, suffix="badpix_selfcal_bkg"): diff --git a/jwst/regtest/test_miri_mrs_badpix_selfcal.py b/jwst/regtest/test_miri_mrs_badpix_selfcal.py index 3404c45bbf..a65d0cbea4 100644 --- a/jwst/regtest/test_miri_mrs_badpix_selfcal.py +++ b/jwst/regtest/test_miri_mrs_badpix_selfcal.py @@ -63,11 +63,6 @@ def test_miri_mrs_badpix_selfcal(run_pipeline_selfcal, fitsdiff_default_kwargs): def test_miri_mrs_badpix_selfcal_bkg(run_pipeline_background, fitsdiff_default_kwargs): """Run a test for MIRI MRS data with dedicated background exposures.""" - # THIS IS FAILING BECAUSE IT'S SAVING FOUR FILES WHEN IT SHOULD SAVE ONE - # there is only one result_bkgasn_badpix_selfcal.fits file in truth directory - # but there are four instances of "saved model in result_bkgasn_badpix_selfcal.fits" in the log file - # reason is because of changes to step.save_model in stpipe - rtdata = run_pipeline_background # Get the truth file diff --git a/jwst/regtest/test_nirspec_fs_spec3_moving_target.py b/jwst/regtest/test_nirspec_fs_spec3_moving_target.py index a100405384..8845cd4201 100644 --- a/jwst/regtest/test_nirspec_fs_spec3_moving_target.py +++ b/jwst/regtest/test_nirspec_fs_spec3_moving_target.py @@ -32,9 +32,6 @@ def test_nirspec_fs_spec3_moving_target( """Test spec3 pipeline on a NIRSpec FS moving target.""" rtdata = rtdata_module - import os - print(os.listdir(os.getcwd())) - output = f"jw01245-o002_s000000001_nirspec_clear-prism-s200a1-subs200a1_{suffix}.fits" rtdata.output = output rtdata.get_truth(f"truth/test_nirspec_fs_spec3_moving_target/{output}") From 736ecb9363d2def2edb39eca9d0125ce95dd2ef2 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 9 Oct 2024 15:30:18 -0400 Subject: [PATCH 75/85] update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c73a69fef9..d5a4308d61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dependencies = [ "scipy>=1.9.3", "spherical-geometry>=1.2.22", "stcal @ git+https://github.com/spacetelescope/stcal.git@main", - "stdatamodels>=2.1.0,<2.2.0", + "stdatamodels @ git+https://github.com/spacetelescope/stdatamodels.git@main", "stpipe>=0.7.0,<0.8.0", "stsci.imagestats>=1.6.3", "synphot>=1.2", From 3cd76d8805342ce407c330a13d9d34b1f2ff9d40 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 9 Oct 2024 16:57:28 -0400 Subject: [PATCH 76/85] bugfix to avoid calling save_model directly on container --- jwst/master_background/master_background_step.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/jwst/master_background/master_background_step.py b/jwst/master_background/master_background_step.py index dcd0cbd00d..f0ffcdf54b 100755 --- a/jwst/master_background/master_background_step.py +++ b/jwst/master_background/master_background_step.py @@ -110,8 +110,8 @@ def process(self, input): # Save the computed 2d background if requested by user. The user has supplied # the master background so just save the expanded 2d background if self.save_background: - - self.save_model(background_2d_collection, suffix='masterbg2d', force=True, asn_id=asn_id) + print(type(background_2d_collection)) + self.save_container(background_2d_collection, suffix='masterbg2d', force=True, asn_id=asn_id) # Compute master background and subtract it else: @@ -164,7 +164,7 @@ def process(self, input): # Save the computed background if requested by user if self.save_background: self.save_model(master_background, suffix='masterbg1d', force=True, asn_id=asn_id) - self.save_model(background_2d_collection, suffix='masterbg2d', force=True, asn_id=asn_id) + self.save_container(background_2d_collection, suffix='masterbg2d', force=True, asn_id=asn_id) record_step_status(result, 'master_background', success=True) @@ -222,6 +222,11 @@ def _do_sub(self): "run again and set force_subtract = True.") return do_sub + + def save_container(self, container, suffix="", asn_id="", force=True): + """Save all models in container for intermediate background subtraction""" + for i, model in enumerate(container): + self.save_model(model, suffix=suffix, force=force, asn_id=asn_id, idx=i) def copy_background_to_surf_bright(spectrum): From a14ea56ff952a33538a9fb150ed83d2db52b0f7a Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 10 Oct 2024 17:08:49 -0400 Subject: [PATCH 77/85] restore save method to container --- jwst/badpix_selfcal/badpix_selfcal_step.py | 5 -- jwst/datamodels/container.py | 91 ++++++++++++++++++++++ 2 files changed, 91 insertions(+), 5 deletions(-) diff --git a/jwst/badpix_selfcal/badpix_selfcal_step.py b/jwst/badpix_selfcal/badpix_selfcal_step.py index d33169175d..10fd8accea 100644 --- a/jwst/badpix_selfcal/badpix_selfcal_step.py +++ b/jwst/badpix_selfcal/badpix_selfcal_step.py @@ -35,12 +35,7 @@ class BadpixSelfcalStep(Step): """ def save_model(self, model, *args, **kwargs): - # """Override save_model to only save the science models - # and to ignore the index for save_bkg - # """ kwargs["idx"] = None - # if isinstance(model, Sequence): - # model = model[0] return Step.save_model(self, model, *args, **kwargs) def save_bkg(self, bkg_list, suffix="badpix_selfcal_bkg"): diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index 748377b428..d315c94c4f 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -300,6 +300,66 @@ def from_asn(self, asn_data): except AttributeError: pass + def save(self, + path=None, + dir_path=None, + save_model_func=None, + **kwargs): + """ + Write out models in container to FITS or ASDF. + + Parameters + ---------- + path : str or func or None + - If None, the `meta.filename` is used for each model. + - If a string, the string is used as a root and an index is + appended. + - If a function, the function takes the two arguments: + the value of model.meta.filename and the + `idx` index, returning constructed file name. + + dir_path : str + Directory to write out files. Defaults to current working dir. + If directory does not exist, it creates it. Filenames are pulled + from `.meta.filename` of each datamodel in the container. + + save_model_func: func or None + Alternate function to save each model instead of + the models `save` method. Takes one argument, the model, + and keyword argument `idx` for an index. + + Returns + ------- + output_paths: [str[, ...]] + List of output file paths of where the models were saved. + """ + output_paths = [] + if path is None: + def path(filename, idx=None): + return filename + elif not callable(path): + path = make_file_with_index + + for idx, model in enumerate(self): + if len(self) <= 1: + idx = None + if save_model_func is None: + outpath, filename = op.split( + path(model.meta.filename, idx=idx) + ) + if dir_path: + outpath = dir_path + save_path = op.join(outpath, filename) + try: + output_paths.append( + model.save(save_path, **kwargs) + ) + except IOError as err: + raise err + + else: + output_paths.append(save_model_func(model, idx=idx)) + return output_paths @property def models_grouped(self): @@ -404,7 +464,15 @@ def get_crds_parameters(self): Returns ------- dict + + Notes + ----- + stpipe requires ModelContainer to have a crds_observatory attribute in order + to pass through step.run(), but it is never accessed. """ + msg = ("stpipe uses the get_crds_parameters method from the 0th model in the " + "ModelContainer. This method is currently not used.") + raise NotImplementedError(msg) with self._open_first_science_exposure() as model: return model.get_crds_parameters() @@ -445,3 +513,26 @@ def ind_asn_type(self, asn_exptype): if model.meta.asn.exptype.lower() == asn_exptype: ind.append(i) return ind + +def make_file_with_index(file_path, idx): + """Append an index to a filename + + Parameters + ---------- + file_path: str + The file to append the index to. + idx: int + An index to append + + + Returns + ------- + file_path: str + Path with index appended + """ + # Decompose path + path_head, path_tail = op.split(file_path) + base, ext = op.splitext(path_tail) + if idx is not None: + base = base + str(idx) + return op.join(path_head, base + ext) \ No newline at end of file From d4ec18239bb0611da786d368b755b4f266a3badd Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 11 Oct 2024 09:23:11 -0400 Subject: [PATCH 78/85] added note to docs about lists --- docs/jwst/stpipe/devel_io_design.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/jwst/stpipe/devel_io_design.rst b/docs/jwst/stpipe/devel_io_design.rst index a9755375bc..46775a0bd0 100644 --- a/docs/jwst/stpipe/devel_io_design.rst +++ b/docs/jwst/stpipe/devel_io_design.rst @@ -183,6 +183,11 @@ image3 pipeline and is only recommended when working with large associations. Additional documentation on the `ModelLibrary` class can be found in the `stpipe ModelLibrary documentation `_. +Developers should keep in mind that a `list` is a perfectly valid input and output to a +pipeline or step, and if association metadata is not needed, using a list is preferred +to using a ModelContainer in most cases. Initializing a ModelContainer from a list of +models (as opposed to an association or dictionary) is supported but almost never necessary. + ModelContainer Changes in JWST 1.17 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 7e3dbd41c35d5776c3e48728dbc73a34f17bce84 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 11 Oct 2024 09:33:10 -0400 Subject: [PATCH 79/85] fixes from self review --- changes/8831.datamodels.rst | 2 +- docs/jwst/stpipe/devel_io_design.rst | 2 -- jwst/master_background/master_background_step.py | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/changes/8831.datamodels.rst b/changes/8831.datamodels.rst index fdb8209569..22289006df 100644 --- a/changes/8831.datamodels.rst +++ b/changes/8831.datamodels.rst @@ -1 +1 @@ -Remove memory-saving options and save method from ModelContainer \ No newline at end of file +Remove memory-saving options from ModelContainer \ No newline at end of file diff --git a/docs/jwst/stpipe/devel_io_design.rst b/docs/jwst/stpipe/devel_io_design.rst index 46775a0bd0..457507ac3c 100644 --- a/docs/jwst/stpipe/devel_io_design.rst +++ b/docs/jwst/stpipe/devel_io_design.rst @@ -196,8 +196,6 @@ In JWST 1.17, the `ModelContainer` class was de-scoped in light of the introduct for handling association-type data, but it is no longer a subclass of `JWSTDataModel`. The following changes in behavior are noteworthy: -* `ModelContainer` no longer has a `save()` method; the `save()` method should be called on - each of the individual `DataModel` objects in the container instead, as desired. * The `ModelContainer` class no longer has a `meta` attribute. The association data is now stored in the top-level `asn_table` attribute, along with several other association-relevant attributes including `asn_table_name`, `asn_pool_name`, `asn_exptypes`, `asn_n_members`, diff --git a/jwst/master_background/master_background_step.py b/jwst/master_background/master_background_step.py index f0ffcdf54b..f4a0d87048 100755 --- a/jwst/master_background/master_background_step.py +++ b/jwst/master_background/master_background_step.py @@ -110,7 +110,6 @@ def process(self, input): # Save the computed 2d background if requested by user. The user has supplied # the master background so just save the expanded 2d background if self.save_background: - print(type(background_2d_collection)) self.save_container(background_2d_collection, suffix='masterbg2d', force=True, asn_id=asn_id) # Compute master background and subtract it From 64c1b5ae1ef93ca59bdca43e78e7a7071b2fbf89 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 14 Oct 2024 10:49:00 -0400 Subject: [PATCH 80/85] fix readthedocs build --- docs/jwst/stpipe/devel_io_design.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/jwst/stpipe/devel_io_design.rst b/docs/jwst/stpipe/devel_io_design.rst index 457507ac3c..4b1109850a 100644 --- a/docs/jwst/stpipe/devel_io_design.rst +++ b/docs/jwst/stpipe/devel_io_design.rst @@ -173,7 +173,7 @@ To read in a list of files, or an association file, as an association, use the `load_as_level2_asn` or `load_as_level3_asn` methods. ModelContainer vs ModelLibrary -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`````````````````````````````` Some steps in the pipeline, namely any steps involved in the Stage 3 Imaging pipeline, rely on the :class:`~jwst.datamodels.ModelLibrary` class instead of the @@ -189,7 +189,7 @@ to using a ModelContainer in most cases. Initializing a ModelContainer from a li models (as opposed to an association or dictionary) is supported but almost never necessary. ModelContainer Changes in JWST 1.17 -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``````````````````````````````````` In JWST 1.17, the `ModelContainer` class was de-scoped in light of the introduction of the `ModelLibrary` class in JWST 1.16. The `ModelContainer` class is still the recommended class From f0895580492377a8d77563ed976d050aab049992 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 14 Oct 2024 10:55:31 -0400 Subject: [PATCH 81/85] make mypy happy --- jwst/datamodels/container.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index d315c94c4f..04a37aea12 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -193,10 +193,10 @@ def extend(self, model): def pop(self, index=-1): self._models.pop(index) - def __enter__(self) -> str: + def __enter__(self): return self - def __exit__(self, *args: object) -> None: + def __exit__(self, *args): self.close() def copy(self, memo=None): From 7fab46c5b97844a5a6c20d9eac2d1204522e8e81 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 16 Oct 2024 10:09:19 -0400 Subject: [PATCH 82/85] fixes per @melanieclarke review --- docs/jwst/stpipe/devel_io_design.rst | 6 +- jwst/badpix_selfcal/badpix_selfcal_step.py | 1 + jwst/datamodels/container.py | 98 ++++--------------- .../master_background_step.py | 5 +- 4 files changed, 25 insertions(+), 85 deletions(-) diff --git a/docs/jwst/stpipe/devel_io_design.rst b/docs/jwst/stpipe/devel_io_design.rst index 4b1109850a..935b80c096 100644 --- a/docs/jwst/stpipe/devel_io_design.rst +++ b/docs/jwst/stpipe/devel_io_design.rst @@ -202,8 +202,10 @@ following changes in behavior are noteworthy: `asn_file_path`. Note that `asn_table` is now a dictionary, not an `ObjectNode`. * All infrastructure that attempted memory savings in the `ModelContainer` class has been removed. Use the `ModelLibrary` class if memory-saving options are needed. -* The `ModelContainer` class is still a list-like object. -* The `ModelContainer` class is still the default class returned by stdatamodels `open()`. +* A `ModelContainer` object can no longer hold a list of `ModelContainer` objects. +* The `ModelContainer` class is still list-like, and can be indexed and sliced like a list. +* The `ModelContainer` class is still the default class returned by stdatamodels `open()` + for association-type input data, e.g. a .json file or dict. * The `ModelContainer` class can still be used as a context manager, such that `with open(asn_file.json)` still works. diff --git a/jwst/badpix_selfcal/badpix_selfcal_step.py b/jwst/badpix_selfcal/badpix_selfcal_step.py index 10fd8accea..b83aaa0f63 100644 --- a/jwst/badpix_selfcal/badpix_selfcal_step.py +++ b/jwst/badpix_selfcal/badpix_selfcal_step.py @@ -35,6 +35,7 @@ class BadpixSelfcalStep(Step): """ def save_model(self, model, *args, **kwargs): + """Override save_model to suppress index 0 when save_model is True""" kwargs["idx"] = None return Step.save_model(self, model, *args, **kwargs) diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index 04a37aea12..e0f6cb80df 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -15,6 +15,11 @@ __all__ = ['ModelContainer'] RECOGNIZED_MEMBER_FIELDS = ['tweakreg_catalog', 'group_id'] +DUMMY_ASN_TABLE = { + "asn_id": None, + "asn_pool": None, + "products": [{"name": None}] +} # Configure logging logger = logging.getLogger(__name__) @@ -129,7 +134,7 @@ def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, **kwargs): self._models = [] self.asn_exptypes = asn_exptypes self.asn_n_members = asn_n_members - self.asn_table = {} + self.asn_table = DUMMY_ASN_TABLE self.asn_table_name = None self.asn_pool_name = None self.asn_file_path = None @@ -302,31 +307,20 @@ def from_asn(self, asn_data): def save(self, path=None, - dir_path=None, - save_model_func=None, **kwargs): """ Write out models in container to FITS or ASDF. Parameters ---------- - path : str or func or None + path : str or None - If None, the `meta.filename` is used for each model. - If a string, the string is used as a root and an index is appended. - - If a function, the function takes the two arguments: - the value of model.meta.filename and the - `idx` index, returning constructed file name. - - dir_path : str - Directory to write out files. Defaults to current working dir. - If directory does not exist, it creates it. Filenames are pulled - from `.meta.filename` of each datamodel in the container. - save_model_func: func or None - Alternate function to save each model instead of - the models `save` method. Takes one argument, the model, - and keyword argument `idx` for an index. + kwargs : dict + Additional parameters to be passed to the `save` method of each + model. Returns ------- @@ -334,31 +328,16 @@ def save(self, List of output file paths of where the models were saved. """ output_paths = [] - if path is None: - def path(filename, idx=None): - return filename - elif not callable(path): - path = make_file_with_index - for idx, model in enumerate(self): - if len(self) <= 1: + if len(self) <= 1 or path is None: idx = None - if save_model_func is None: - outpath, filename = op.split( - path(model.meta.filename, idx=idx) - ) - if dir_path: - outpath = dir_path - save_path = op.join(outpath, filename) - try: - output_paths.append( - model.save(save_path, **kwargs) - ) - except IOError as err: - raise err - + if path is None: + save_path = model.meta.filename else: - output_paths.append(save_model_func(model, idx=idx)) + save_path = path+str(idx)+".fits" + output_paths.append( + model.save(save_path, **kwargs) + ) return output_paths @property @@ -473,26 +452,6 @@ def get_crds_parameters(self): msg = ("stpipe uses the get_crds_parameters method from the 0th model in the " "ModelContainer. This method is currently not used.") raise NotImplementedError(msg) - with self._open_first_science_exposure() as model: - return model.get_crds_parameters() - - def _open_first_science_exposure(self): - """ - Open first model with exptype SCIENCE, or the first model - if none exists. - - Returns - ------- - stdatamodels.JwstDataModel - """ - for exposure in self.meta.asn_table.products[0].members: - if exposure.exptype.upper() == "SCIENCE": - first_exposure = exposure.expname - break - else: - first_exposure = self.meta.asn_table.products[0].members[0].expname - - return datamodel_open(first_exposure) def ind_asn_type(self, asn_exptype): """ @@ -513,26 +472,3 @@ def ind_asn_type(self, asn_exptype): if model.meta.asn.exptype.lower() == asn_exptype: ind.append(i) return ind - -def make_file_with_index(file_path, idx): - """Append an index to a filename - - Parameters - ---------- - file_path: str - The file to append the index to. - idx: int - An index to append - - - Returns - ------- - file_path: str - Path with index appended - """ - # Decompose path - path_head, path_tail = op.split(file_path) - base, ext = op.splitext(path_tail) - if idx is not None: - base = base + str(idx) - return op.join(path_head, base + ext) \ No newline at end of file diff --git a/jwst/master_background/master_background_step.py b/jwst/master_background/master_background_step.py index f4a0d87048..54085f6fc6 100755 --- a/jwst/master_background/master_background_step.py +++ b/jwst/master_background/master_background_step.py @@ -152,13 +152,14 @@ def process(self, input): input_data.close() else: + result = input_data.copy() input_data.close() self.log.warning( "Input %s of type %s cannot be handled without user-supplied background. Step skipped.", input, type(input) ) - record_step_status(input_data, 'master_background', success=False) - return input_data + record_step_status(result, 'master_background', success=False) + return result # Save the computed background if requested by user if self.save_background: From 0363da493e42f3c62def783ef8cffa36a68560d1 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 17 Oct 2024 17:41:15 -0400 Subject: [PATCH 83/85] restore save_model_func to save method and added unit test --- jwst/datamodels/container.py | 42 ++++++++++++++----- jwst/datamodels/tests/test_model_container.py | 22 ++++++++++ .../master_background_step.py | 7 ++++ .../outlier_detection_step.py | 2 +- 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index e0f6cb80df..123d66eed2 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -1,5 +1,6 @@ from collections import OrderedDict from collections.abc import Sequence +import copy import os.path as op import re import logging @@ -15,10 +16,17 @@ __all__ = ['ModelContainer'] RECOGNIZED_MEMBER_FIELDS = ['tweakreg_catalog', 'group_id'] -DUMMY_ASN_TABLE = { +EMPTY_ASN_TABLE = { "asn_id": None, "asn_pool": None, - "products": [{"name": None}] + "products": [ + {"name": "", + "members": [ + {"exptype": "", + "expname": ""} + ] + } + ] } # Configure logging @@ -134,7 +142,7 @@ def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, **kwargs): self._models = [] self.asn_exptypes = asn_exptypes self.asn_n_members = asn_n_members - self.asn_table = DUMMY_ASN_TABLE + self.asn_table = copy.deepcopy(EMPTY_ASN_TABLE) self.asn_table_name = None self.asn_pool_name = None self.asn_file_path = None @@ -307,6 +315,7 @@ def from_asn(self, asn_data): def save(self, path=None, + save_model_func=None, **kwargs): """ Write out models in container to FITS or ASDF. @@ -318,6 +327,11 @@ def save(self, - If a string, the string is used as a root and an index is appended. + save_model_func: func or None + Alternate function to save each model instead of + the models `save` method. Takes one argument, the model, + and keyword argument `idx` for an index. + kwargs : dict Additional parameters to be passed to the `save` method of each model. @@ -329,15 +343,21 @@ def save(self, """ output_paths = [] for idx, model in enumerate(self): - if len(self) <= 1 or path is None: - idx = None - if path is None: - save_path = model.meta.filename + print(model) + print(model.meta.filename) + print(save_model_func) + if save_model_func is None: + if path is None: + save_path = model.meta.filename + else: + if len(self) <= 1: + idx = None + save_path = path+str(idx)+".fits" + output_paths.append( + model.save(save_path, **kwargs) + ) else: - save_path = path+str(idx)+".fits" - output_paths.append( - model.save(save_path, **kwargs) - ) + output_paths.append(save_model_func(model, idx=idx)) return output_paths @property diff --git a/jwst/datamodels/tests/test_model_container.py b/jwst/datamodels/tests/test_model_container.py index 2bf3528a32..2266f7909d 100644 --- a/jwst/datamodels/tests/test_model_container.py +++ b/jwst/datamodels/tests/test_model_container.py @@ -125,3 +125,25 @@ def test_group_id(tmp_path): model_droup_ids.add(m.meta.group_id) assert asn_group_ids == model_droup_ids + + +def test_save(tmp_cwd, container): + + # container pushes us to data/ directory so need to go back to tmp_cwd + # to avoid polluting the data/ directory + with pushdir(tmp_cwd): + + # test default just saves things at model meta filename + container.save() + expected_fnames = [] + for model in container: + expected_fnames.append(model.meta.filename) + for fname in expected_fnames: + assert os.path.exists(fname) + + # test specifying path saves to custom path with indices + path = "foo" + container.save(path) + expected_fnames = [path+str(i)+".fits" for i in range(len(container))] + for fname in expected_fnames: + assert os.path.exists(fname) diff --git a/jwst/master_background/master_background_step.py b/jwst/master_background/master_background_step.py index 54085f6fc6..727b351c47 100755 --- a/jwst/master_background/master_background_step.py +++ b/jwst/master_background/master_background_step.py @@ -116,6 +116,13 @@ def process(self, input): else: if isinstance(input_data, ModelContainer): input_data, background_data = split_container(input_data) + if len(background_data) == 0: + msg = ("No background data found in input container, " + "and no user-supplied background provided. Skipping step.") + self.log.warning(msg) + result = input_data.copy() + record_step_status(result, 'master_background', success=False) + return result asn_id = input_data.asn_table["asn_id"] for model in background_data: diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py index 9638a07b54..8fb31fa6ba 100644 --- a/jwst/outlier_detection/outlier_detection_step.py +++ b/jwst/outlier_detection/outlier_detection_step.py @@ -156,7 +156,7 @@ def _guess_mode(self, input_models): return self.mode # guess mode from input type - if isinstance(input_models, (str, dict)): + if isinstance(input_models, (str, dict, list)): input_models = datamodels.open(input_models, asn_n_members=1) # Select which version of OutlierDetection From 03f74cd41e2310f58a072cb828dd73cc92454f46 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 18 Oct 2024 11:08:48 -0400 Subject: [PATCH 84/85] Add default file naming for list input to container, plus small change per Brett review --- docs/jwst/stpipe/devel_io_design.rst | 5 ----- jwst/datamodels/container.py | 14 ++++++-------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/docs/jwst/stpipe/devel_io_design.rst b/docs/jwst/stpipe/devel_io_design.rst index 935b80c096..56f106734f 100644 --- a/docs/jwst/stpipe/devel_io_design.rst +++ b/docs/jwst/stpipe/devel_io_design.rst @@ -183,11 +183,6 @@ image3 pipeline and is only recommended when working with large associations. Additional documentation on the `ModelLibrary` class can be found in the `stpipe ModelLibrary documentation `_. -Developers should keep in mind that a `list` is a perfectly valid input and output to a -pipeline or step, and if association metadata is not needed, using a list is preferred -to using a ModelContainer in most cases. Initializing a ModelContainer from a list of -models (as opposed to an association or dictionary) is supported but almost never necessary. - ModelContainer Changes in JWST 1.17 ``````````````````````````````````` diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index 123d66eed2..1c52505300 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -5,7 +5,6 @@ import re import logging from astropy.io import fits -from stdatamodels import properties from stdatamodels.jwst.datamodels.model_base import JwstDataModel from stdatamodels.jwst.datamodels.util import open as datamodel_open @@ -156,6 +155,11 @@ def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, **kwargs): if all(isinstance(x, (str, fits.HDUList, JwstDataModel)) for x in init): for m in init: self._models.append(datamodel_open(m, memmap=self._memmap)) + # set asn_table_name and product name to first datamodel stem since they were not provided + root = op.basename(self._models[0].meta.filename).split(".")[0] + default_name = "_".join(root.split("_")[:-1]) # remove old suffix + self.asn_table_name = default_name + self.asn_table['products'][0]['name'] = default_name else: raise TypeError("list must contain items that can be opened " "with jwst.datamodels.open()") @@ -298,10 +302,7 @@ def from_asn(self, asn_data): raise # Pull the whole association table into the asn_table attribute - self.asn_table = {} - properties.merge_tree( - self.asn_table, asn_data - ) + self.asn_table = copy.deepcopy(asn_data) if self.asn_file_path is not None: self.asn_table_name = op.basename(self.asn_file_path) @@ -343,9 +344,6 @@ def save(self, """ output_paths = [] for idx, model in enumerate(self): - print(model) - print(model.meta.filename) - print(save_model_func) if save_model_func is None: if path is None: save_path = model.meta.filename From 74762e5d2ff31ae343b9df758bea5a1015f344cf Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 18 Oct 2024 13:24:25 -0400 Subject: [PATCH 85/85] fix default file name logic --- jwst/datamodels/container.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/jwst/datamodels/container.py b/jwst/datamodels/container.py index 1c52505300..196c31ca5c 100644 --- a/jwst/datamodels/container.py +++ b/jwst/datamodels/container.py @@ -156,10 +156,14 @@ def __init__(self, init=None, asn_exptypes=None, asn_n_members=None, **kwargs): for m in init: self._models.append(datamodel_open(m, memmap=self._memmap)) # set asn_table_name and product name to first datamodel stem since they were not provided - root = op.basename(self._models[0].meta.filename).split(".")[0] - default_name = "_".join(root.split("_")[:-1]) # remove old suffix + fname = self._models[0].meta.filename + if fname is not None: + root = op.basename(fname).split(".")[0] + default_name = "_".join(root.split("_")[:-1]) # remove old suffix + else: + default_name = "" self.asn_table_name = default_name - self.asn_table['products'][0]['name'] = default_name + self.asn_table["products"][0]["name"] = default_name else: raise TypeError("list must contain items that can be opened " "with jwst.datamodels.open()")