From c15933b5c132c1530f3190abc26c1138a20336f7 Mon Sep 17 00:00:00 2001 From: Samuel Jackson Date: Mon, 12 Aug 2024 15:15:12 +0100 Subject: [PATCH 1/7] Add fix for XSX channels --- src/transforms.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/transforms.py b/src/transforms.py index eeadb6b..83906d5 100644 --- a/src/transforms.py +++ b/src/transforms.py @@ -171,7 +171,7 @@ def __call__(self, dataset: xr.Dataset) -> xr.Dataset: dataset[self.stem] = dataset[self.stem].chunk("auto") dataset[self.stem] = self._update_attributes(dataset[self.stem], channels) dataset = dataset.drop_vars(group_keys) - dataset = dataset.compute() + dataset: xr.Dataset = dataset.compute() return dataset def _update_attributes( @@ -300,13 +300,15 @@ def __init__(self, stem: str, path: str): cam_data.drop("name", inplace=True, axis=1) cam_data.drop("comment", inplace=True, axis=1) cam_data.columns = [stem + "_" + c for c in cam_data.columns] - name = stem.split("/")[-1] - cam_data.index.name = name + "_channel" self.stem = stem + self.index_name = f'{self.stem}_channel' + cam_data.index.name = self.index_name self.cam_data = cam_data.to_xarray() def __call__(self, dataset: xr.Dataset) -> xr.Dataset: - dataset = xr.merge([dataset, self.cam_data], combine_attrs="drop_conflicts") + cam_data = self.cam_data.copy() + cam_data[self.index_name] = dataset[self.index_name] + dataset = xr.merge([dataset, cam_data], combine_attrs="drop_conflicts") dataset = dataset.compute() return dataset From 338b8645a680c3ae57ff0cdf90f19d4ac3bf09cb Mon Sep 17 00:00:00 2001 From: Samuel Jackson Date: Mon, 12 Aug 2024 15:24:11 +0100 Subject: [PATCH 2/7] Update XDC for DEFUSE --- src/task.py | 2 + src/transforms.py | 624 +++++++++++++++++++++++----------------------- 2 files changed, 314 insertions(+), 312 deletions(-) diff --git a/src/task.py b/src/task.py index 2cafde5..ea51cf4 100644 --- a/src/task.py +++ b/src/task.py @@ -97,6 +97,8 @@ def __call__(self): for key, group_index in signal_infos.groupby("source").groups.items(): signal_infos_for_source = signal_infos.loc[group_index] + if key == 'xdc': + signal_infos_for_source = signal_infos_for_source.loc[signal_infos_for_source.name == 'xdc/ip_t_ipref'] signal_datasets = self.load_source(signal_infos_for_source) pipeline = self.pipelines.get(key) dataset = pipeline(signal_datasets) diff --git a/src/transforms.py b/src/transforms.py index 83906d5..323d195 100644 --- a/src/transforms.py +++ b/src/transforms.py @@ -698,318 +698,318 @@ def __init__(self) -> None: MapDict(XDCRenameDimensions()), MapDict(StandardiseSignalDataset("xdc")), MergeDatasets(), - TensoriseChannels( - "ai_cpu1_ccbv", - dim_name="ai_ccbv_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu1_flcc", - dim_name="ai_flcc_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu1_incon", - dim_name="ai_incon_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu1_lhorw", - dim_name="ai_lhorw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu1_mid", - dim_name="ai_mid_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu1_obr", - dim_name="ai_obr_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu1_obv", - dim_name="ai_obv_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu1_ring", - dim_name="ai_ring_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu1_rodgr", - dim_name="ai_rodgr_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu1_uhorw", - dim_name="ai_uhorw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu1_vertw", - dim_name="ai_vertw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_ccbv", - dim_name="ai_ccbv_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_flcc", - dim_name="ai_flcc_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_incon", - dim_name="ai_incon_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_lhorw", - dim_name="ai_lhorw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_mid", - dim_name="ai_mid_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_obr", - dim_name="ai_obr_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_obv", - dim_name="ai_obv_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_ring", - dim_name="ai_ring_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_rodgr", - dim_name="ai_rodgr_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_uhorw", - dim_name="ai_uhorw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu2_vertw", - dim_name="ai_vertw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_ccbv", - dim_name="ai_ccbv_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_flcc", - dim_name="ai_flcc_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_incon", - dim_name="ai_incon_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_lhorw", - dim_name="ai_lhorw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_mid", - dim_name="ai_mid_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_obr", - dim_name="ai_obr_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_obv", - dim_name="ai_obv_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_ring", - dim_name="ai_ring_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_rodgr", - dim_name="ai_rodgr_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_uhorw", - dim_name="ai_uhorw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu3_vertw", - dim_name="ai_vertw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_ccbv", - dim_name="ai_ccbv_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_flcc", - dim_name="ai_flcc_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_incon", - dim_name="ai_incon_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_lhorw", - dim_name="ai_lhorw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_mid", - dim_name="ai_mid_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_obr", - dim_name="ai_obr_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_obv", - dim_name="ai_obv_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_ring", - dim_name="ai_ring_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_rodgr", - dim_name="ai_rodgr_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_uhorw", - dim_name="ai_uhorw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_cpu4_vertw", - dim_name="ai_vertw_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_raw_ccbv", dim_name="ai_ccbv", assign_coords=False - ), - TensoriseChannels( - "ai_raw_flcc", - dim_name="ai_flcc_channel", - assign_coords=False, - ), - TensoriseChannels( - "ai_raw_obv", dim_name="ai_obv_channel", assign_coords=False - ), - TensoriseChannels( - "ai_raw_obr", dim_name="ai_obr_channel", assign_coords=False - ), - TensoriseChannels( - "equil_s_seg", - regex=r"equil_s_seg(\d+)$", - dim_name="equil_seg_channel", - assign_coords=False, - ), - TensoriseChannels( - "equil_s_seg_at", - regex=r"equil_s_seg(\d+)at$", - dim_name="equil_seg_channel", - assign_coords=False, - ), - TensoriseChannels( - "equil_s_seg_rt", - regex=r"equil_s_seg(\d+)rt$", - dim_name="equil_seg_channel", - assign_coords=False, - ), - TensoriseChannels( - "equil_s_seg_zt", - regex=r"equil_s_seg(\d+)zt$", - dim_name="equil_seg_channel", - assign_coords=False, - ), - TensoriseChannels( - "equil_s_segb", - dim_name="equil_seg_channel", - assign_coords=False, - ), - TensoriseChannels( - "equil_t_seg", - regex=r"equil_t_seg(\d+)$", - dim_name="equil_seg_channel", - assign_coords=False, - ), - TensoriseChannels( - "equil_t_seg_u", - regex=r"equil_t_seg(\d+)u$", - dim_name="equil_seg_channel", - assign_coords=False, - ), - TensoriseChannels("isoflux_e_seg"), - TensoriseChannels( - "isoflux_t_rpsh_n", - regex=r"isoflux_t_rpsh(\d+)n", - ), - TensoriseChannels( - "isoflux_t_rpsh_p", - regex=r"isoflux_t_rpsh(\d+)p", - ), - TensoriseChannels("isoflux_t_seg", regex=r"isoflux_t_seg(\d+)$"), - TensoriseChannels( - "isoflux_t_seg_gd", regex=r"isoflux_t_seg(\d+)gd$" - ), - TensoriseChannels( - "isoflux_t_seg_gi", regex=r"isoflux_t_seg(\d+)gi$" - ), - TensoriseChannels( - "isoflux_t_seg_gp", regex=r"isoflux_t_seg(\d+)gp$" - ), - TensoriseChannels( - "isoflux_t_seg_td", regex=r"isoflux_t_seg(\d+)td$" - ), - TensoriseChannels( - "isoflux_t_seg_ti", regex=r"isoflux_t_seg(\d+)ti$" - ), - TensoriseChannels( - "isoflux_t_seg_tp", regex=r"isoflux_t_seg(\d+)tp$" - ), - TensoriseChannels("isoflux_t_seg_u", regex=r"isoflux_t_seg(\d+)u$"), - TensoriseChannels( - "isoflux_t_zpsh_n", - regex=r"isoflux_t_zpsh(\d+)n", - ), - TensoriseChannels( - "isoflux_t_zpsh_p", - regex=r"isoflux_t_zpsh(\d+)p", - ), + # TensoriseChannels( + # "ai_cpu1_ccbv", + # dim_name="ai_ccbv_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu1_flcc", + # dim_name="ai_flcc_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu1_incon", + # dim_name="ai_incon_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu1_lhorw", + # dim_name="ai_lhorw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu1_mid", + # dim_name="ai_mid_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu1_obr", + # dim_name="ai_obr_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu1_obv", + # dim_name="ai_obv_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu1_ring", + # dim_name="ai_ring_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu1_rodgr", + # dim_name="ai_rodgr_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu1_uhorw", + # dim_name="ai_uhorw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu1_vertw", + # dim_name="ai_vertw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_ccbv", + # dim_name="ai_ccbv_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_flcc", + # dim_name="ai_flcc_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_incon", + # dim_name="ai_incon_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_lhorw", + # dim_name="ai_lhorw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_mid", + # dim_name="ai_mid_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_obr", + # dim_name="ai_obr_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_obv", + # dim_name="ai_obv_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_ring", + # dim_name="ai_ring_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_rodgr", + # dim_name="ai_rodgr_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_uhorw", + # dim_name="ai_uhorw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu2_vertw", + # dim_name="ai_vertw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_ccbv", + # dim_name="ai_ccbv_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_flcc", + # dim_name="ai_flcc_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_incon", + # dim_name="ai_incon_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_lhorw", + # dim_name="ai_lhorw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_mid", + # dim_name="ai_mid_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_obr", + # dim_name="ai_obr_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_obv", + # dim_name="ai_obv_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_ring", + # dim_name="ai_ring_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_rodgr", + # dim_name="ai_rodgr_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_uhorw", + # dim_name="ai_uhorw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu3_vertw", + # dim_name="ai_vertw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_ccbv", + # dim_name="ai_ccbv_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_flcc", + # dim_name="ai_flcc_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_incon", + # dim_name="ai_incon_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_lhorw", + # dim_name="ai_lhorw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_mid", + # dim_name="ai_mid_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_obr", + # dim_name="ai_obr_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_obv", + # dim_name="ai_obv_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_ring", + # dim_name="ai_ring_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_rodgr", + # dim_name="ai_rodgr_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_uhorw", + # dim_name="ai_uhorw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_cpu4_vertw", + # dim_name="ai_vertw_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_raw_ccbv", dim_name="ai_ccbv", assign_coords=False + # ), + # TensoriseChannels( + # "ai_raw_flcc", + # dim_name="ai_flcc_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "ai_raw_obv", dim_name="ai_obv_channel", assign_coords=False + # ), + # TensoriseChannels( + # "ai_raw_obr", dim_name="ai_obr_channel", assign_coords=False + # ), + # TensoriseChannels( + # "equil_s_seg", + # regex=r"equil_s_seg(\d+)$", + # dim_name="equil_seg_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "equil_s_seg_at", + # regex=r"equil_s_seg(\d+)at$", + # dim_name="equil_seg_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "equil_s_seg_rt", + # regex=r"equil_s_seg(\d+)rt$", + # dim_name="equil_seg_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "equil_s_seg_zt", + # regex=r"equil_s_seg(\d+)zt$", + # dim_name="equil_seg_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "equil_s_segb", + # dim_name="equil_seg_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "equil_t_seg", + # regex=r"equil_t_seg(\d+)$", + # dim_name="equil_seg_channel", + # assign_coords=False, + # ), + # TensoriseChannels( + # "equil_t_seg_u", + # regex=r"equil_t_seg(\d+)u$", + # dim_name="equil_seg_channel", + # assign_coords=False, + # ), + # TensoriseChannels("isoflux_e_seg"), + # TensoriseChannels( + # "isoflux_t_rpsh_n", + # regex=r"isoflux_t_rpsh(\d+)n", + # ), + # TensoriseChannels( + # "isoflux_t_rpsh_p", + # regex=r"isoflux_t_rpsh(\d+)p", + # ), + # TensoriseChannels("isoflux_t_seg", regex=r"isoflux_t_seg(\d+)$"), + # TensoriseChannels( + # "isoflux_t_seg_gd", regex=r"isoflux_t_seg(\d+)gd$" + # ), + # TensoriseChannels( + # "isoflux_t_seg_gi", regex=r"isoflux_t_seg(\d+)gi$" + # ), + # TensoriseChannels( + # "isoflux_t_seg_gp", regex=r"isoflux_t_seg(\d+)gp$" + # ), + # TensoriseChannels( + # "isoflux_t_seg_td", regex=r"isoflux_t_seg(\d+)td$" + # ), + # TensoriseChannels( + # "isoflux_t_seg_ti", regex=r"isoflux_t_seg(\d+)ti$" + # ), + # TensoriseChannels( + # "isoflux_t_seg_tp", regex=r"isoflux_t_seg(\d+)tp$" + # ), + # TensoriseChannels("isoflux_t_seg_u", regex=r"isoflux_t_seg(\d+)u$"), + # TensoriseChannels( + # "isoflux_t_zpsh_n", + # regex=r"isoflux_t_zpsh(\d+)n", + # ), + # TensoriseChannels( + # "isoflux_t_zpsh_p", + # regex=r"isoflux_t_zpsh(\d+)p", + # ), TransformUnits(), ] ), From 845652ca0046b2276bb3f0ca2ba1592948730de0 Mon Sep 17 00:00:00 2001 From: Samuel Jackson Date: Mon, 12 Aug 2024 16:25:45 +0100 Subject: [PATCH 3/7] Update job script --- jobs/freia_write_datasets.qsub | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/jobs/freia_write_datasets.qsub b/jobs/freia_write_datasets.qsub index 076437e..94c591f 100755 --- a/jobs/freia_write_datasets.qsub +++ b/jobs/freia_write_datasets.qsub @@ -27,11 +27,16 @@ num_workers=$3 export PATH="/home/rt2549/dev/:$PATH" random_string=$(head /dev/urandom | tr -dc A-Za-z0-9 | head -c 16) + temp_dir="/common/tmp/sjackson/local_cache/$random_string" +metadata_dir="/common/tmp/sjackson/data/uda/" # Run script -# --force --source_names abm ada adg aga ahx aim air ait alp ama amb amc amh amm ams anb ane ant anu aoe arp asb asm asx ayc aye efm esm esx rba rbb rbc rca rco rgb rgc rir rit xmo xpc xsx -# --force --source_names amc ayc efm xmo xsx time mpirun -np $num_workers \ - python3 -m src.archive.main $temp_dir $summary_file $bucket_path --force \ - --source_names ${@:4} \ No newline at end of file + python3 -m src.main $temp_dir $summary_file \ + --metadata_dir $metadata_dir \ + --bucket_path $bucket_path \ + --source_names ${@:4} \ + --file_format zarr \ + --upload \ + --force From 5c1f26e9f1a697861597e9c4bc2d368feeb88fd7 Mon Sep 17 00:00:00 2001 From: Samuel Jackson Date: Mon, 12 Aug 2024 16:39:27 +0100 Subject: [PATCH 4/7] Update freia job script --- jobs/freia_write_datasets.qsub | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/jobs/freia_write_datasets.qsub b/jobs/freia_write_datasets.qsub index 94c591f..3a11678 100755 --- a/jobs/freia_write_datasets.qsub +++ b/jobs/freia_write_datasets.qsub @@ -33,10 +33,4 @@ metadata_dir="/common/tmp/sjackson/data/uda/" # Run script time mpirun -np $num_workers \ - python3 -m src.main $temp_dir $summary_file \ - --metadata_dir $metadata_dir \ - --bucket_path $bucket_path \ - --source_names ${@:4} \ - --file_format zarr \ - --upload \ - --force + python3 -m src.main $temp_dir $summary_file --metadata_dir $metadata_dir --bucket_path $bucket_path --file_format zarr --upload --force --source_names ${@:4} From 7c9ddeb931e34fd3f98a8bdf97bce9912b1637c4 Mon Sep 17 00:00:00 2001 From: Samuel Jackson Date: Thu, 15 Aug 2024 10:24:52 +0100 Subject: [PATCH 5/7] Refactor code. Fix issues with older xsx data --- src/task.py | 82 ++++++++++++++++++++++++++++++++++------------- src/transforms.py | 24 ++++++++++---- src/workflow.py | 6 ++-- 3 files changed, 81 insertions(+), 31 deletions(-) diff --git a/src/task.py b/src/task.py index ea51cf4..9f0fb60 100644 --- a/src/task.py +++ b/src/task.py @@ -1,10 +1,11 @@ -from pathlib import Path import os -import xarray as xr -import pandas as pd +import traceback import shutil import subprocess import logging +from pathlib import Path +import xarray as xr +import pandas as pd from src.transforms import MASTPipelineRegistry, MASTUPipelineRegistry from src.mast import MASTClient @@ -83,32 +84,68 @@ def __init__( self.pipelines = MASTUPipelineRegistry() def __call__(self): - signal_infos = self.read_signal_info() - source_infos = self.read_source_info() - - if len(self.signal_names) > 0: - signal_infos = signal_infos.loc[signal_infos.name.isin(self.signal_names)] + try: + self._main() + except Exception as e: + trace = traceback.format_exc() + logging.error(f"Error reading sources for shot {self.shot}: {e}\n{trace}") + + def _main(self): + signal_infos, source_infos = self._read_metadata() - if len(self.source_names) > 0: - signal_infos = signal_infos.loc[signal_infos.source.isin(self.source_names)] + if signal_infos is None or signal_infos is None: + return + signal_infos = self._filter_signals(signal_infos) self.writer.write_metadata() - for key, group_index in signal_infos.groupby("source").groups.items(): - signal_infos_for_source = signal_infos.loc[group_index] - if key == 'xdc': - signal_infos_for_source = signal_infos_for_source.loc[signal_infos_for_source.name == 'xdc/ip_t_ipref'] - signal_datasets = self.load_source(signal_infos_for_source) - pipeline = self.pipelines.get(key) - dataset = pipeline(signal_datasets) - source_info = source_infos.loc[source_infos["name"] == key].iloc[0] - source_info = source_info.to_dict() - dataset.attrs.update(source_info) - self.writer.write_dataset(dataset) + for source_name, source_group_index in signal_infos.groupby("source").groups.items(): + source_info = self._get_source_metadata(source_name, source_infos) + signal_infos_for_source = self._get_signals_for_source(source_name, source_group_index, signal_infos) + self._process_source(source_name, signal_infos_for_source, source_info) self.writer.consolidate_dataset() + def _process_source(self, source_name: str, signal_infos: pd.DataFrame, source_info: dict): + signal_datasets = self.load_source(signal_infos) + pipeline = self.pipelines.get(source_name) + dataset = pipeline(signal_datasets) + dataset.attrs.update(source_info) + self.writer.write_dataset(dataset) + + def _get_source_metadata(self, source_name, source_infos: pd.DataFrame) -> dict: + source_info = source_infos.loc[source_infos["name"] == source_name].iloc[0] + source_info = source_info.to_dict() + return source_info + + def _get_signals_for_source(self, source_name: str, source_group_index: pd.Series, signal_infos: pd.DataFrame): + signal_infos_for_source = signal_infos.loc[source_group_index] + if source_name == 'xdc': + signal_infos_for_source = signal_infos_for_source.loc[signal_infos_for_source.name == 'xdc/ip_t_ipref'] + return signal_infos_for_source + + def _read_metadata(self) -> tuple[pd.DataFrame, pd.DataFrame]: + try: + signal_infos = self.read_signal_info() + source_infos = self.read_source_info() + except FileNotFoundError: + message = f"Could not find source/signal metadata file for shot {self.shot}" + logging.warning(message) + return None, None + + return signal_infos, source_infos + + + def _filter_signals(self, signal_infos: pd.DataFrame) -> pd.DataFrame: + if len(self.signal_names) > 0: + signal_infos = signal_infos.loc[signal_infos.name.isin(self.signal_names)] + + if len(self.source_names) > 0: + signal_infos = signal_infos.loc[signal_infos.source.isin(self.source_names)] + + return signal_infos + def load_source(self, group: pd.DataFrame) -> dict[str, xr.Dataset]: datasets = {} for _, info in group.iterrows(): @@ -128,7 +165,8 @@ def load_source(self, group: pd.DataFrame) -> dict[str, xr.Dataset]: shot_num=self.shot, name=info["uda_name"] ) except Exception as e: - logging.error(f"Error reading dataset {name} for shot {self.shot}: {e}") + uda_name = info["uda_name"] + logging.warning(f"Could not read dataset {name} ({uda_name}) for shot {self.shot}: {e}") continue dataset.attrs.update(info) diff --git a/src/transforms.py b/src/transforms.py index 323d195..f1e64f1 100644 --- a/src/transforms.py +++ b/src/transforms.py @@ -158,10 +158,15 @@ def __init__( self.assign_coords = assign_coords def __call__(self, dataset: xr.Dataset) -> xr.Dataset: - group_keys = self._get_group_keys(dataset) + + # If we couldn't find any matching keys, do nothing. + if len(group_keys) == 0: + return dataset + channels = [dataset[key] for key in group_keys] - dataset[self.stem] = xr.combine_nested(channels, concat_dim=self.dim_name) + combined = xr.combine_nested(channels, concat_dim=self.dim_name) + dataset[self.stem] = combined if self.assign_coords: dataset[self.stem] = dataset[self.stem].assign_coords( @@ -301,14 +306,16 @@ def __init__(self, stem: str, path: str): cam_data.drop("comment", inplace=True, axis=1) cam_data.columns = [stem + "_" + c for c in cam_data.columns] self.stem = stem - self.index_name = f'{self.stem}_channel' - cam_data.index.name = self.index_name + index_name = f'{self.stem}_channel' + cam_data[index_name] = [stem + '_' + str(index+1) for index in range(len(cam_data))] + cam_data = cam_data.set_index(index_name) self.cam_data = cam_data.to_xarray() def __call__(self, dataset: xr.Dataset) -> xr.Dataset: cam_data = self.cam_data.copy() - cam_data[self.index_name] = dataset[self.index_name] - dataset = xr.merge([dataset, cam_data], combine_attrs="drop_conflicts") + dataset = xr.merge([dataset, cam_data], combine_attrs="drop_conflicts", join='left') + print(dataset) + print(dataset.dims) dataset = dataset.compute() return dataset @@ -1038,6 +1045,11 @@ def __init__(self) -> None: TensoriseChannels("hcam_l", regex=r"hcam_l_(\d+)"), TensoriseChannels("hcam_u", regex=r"hcam_u_(\d+)"), TensoriseChannels("tcam", regex=r"tcam_(\d+)"), + TensoriseChannels("hcam_l", regex=r"hcaml#(\d+)"), + TensoriseChannels("hcam_u", regex=r"hcamu#(\d+)"), + TensoriseChannels("hpzr", regex=r"hpzr_(\d+)"), + TensoriseChannels("v_ste29", regex=r"v_ste29_(\d+)"), + TensoriseChannels("v_ste36", regex=r"v_ste36_(\d+)"), TransformUnits(), AddXSXCameraParams("hcam_l", "parameters/xsx_camera_l.csv"), AddXSXCameraParams("hcam_u", "parameters/xsx_camera_u.csv"), diff --git a/src/workflow.py b/src/workflow.py index a41737d..2e18858 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -123,9 +123,9 @@ def __call__(self, shot: int): try: create() except Exception as e: - import traceback; traceback.print_exc(); - print(traceback.format_exc()) - logging.error(f"Failed to run workflow with error {type(e)}: {e}") + import traceback + trace = traceback.format_exc() + logging.error(f"Failed to run workflow with error {type(e)}: {e}\n{trace}") From 4176f101906e795ec37eebb1cefefd88a934b3c6 Mon Sep 17 00:00:00 2001 From: Samuel Jackson Date: Thu, 15 Aug 2024 10:28:59 +0100 Subject: [PATCH 6/7] Don't add parameters that don't exist --- src/transforms.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transforms.py b/src/transforms.py index f1e64f1..c0b59bd 100644 --- a/src/transforms.py +++ b/src/transforms.py @@ -313,9 +313,10 @@ def __init__(self, stem: str, path: str): def __call__(self, dataset: xr.Dataset) -> xr.Dataset: cam_data = self.cam_data.copy() + # if camera data in not in dataset, then skip and do nothing + if self.stem not in dataset: + return dataset dataset = xr.merge([dataset, cam_data], combine_attrs="drop_conflicts", join='left') - print(dataset) - print(dataset.dims) dataset = dataset.compute() return dataset From 0d774e5b6112940625e2dde715ff82bfb09e2d55 Mon Sep 17 00:00:00 2001 From: Samuel Jackson Date: Thu, 15 Aug 2024 11:41:02 +0100 Subject: [PATCH 7/7] Update test --- tests/test_transforms.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index d95225e..6b1733d 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -1,3 +1,4 @@ +import numpy as np import xarray as xr from src.transforms import ( AddXSXCameraParams, @@ -85,7 +86,14 @@ def test_standardise_dataset(fake_dataset): assert "plasma_current_error" in dataset.data_vars -def test_xsx_camera_params(fake_dataset): +def test_xsx_camera_params(): + fake_dataset = xr.Dataset( + data_vars=dict( + tcam=(("time", 'tcam_channels'), np.random.random((100, 18))), + time=("time", np.random.random(100)), + ), + attrs={"name": "xsx/tcam", "shot_id": 30420}, + ) transform = AddXSXCameraParams("tcam", "parameters/xsx_camera_t.csv") dataset = transform(fake_dataset)