From 155bf80a8750e6ebd3b3032897a4edebb45d3fe1 Mon Sep 17 00:00:00 2001 From: raychew Date: Wed, 19 Jun 2024 14:39:43 +0200 Subject: [PATCH] wrote a simple consolidator script for the chunked outputs the script is very inefficient, and I recall NetCDF having an in-built function for this, but anyway, it works, and I can wait... --- runs/chunk_consolidator.py | 52 ++++++++++++++++++++++++++++++++++++++ src/io.py | 26 +++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 runs/chunk_consolidator.py diff --git a/runs/chunk_consolidator.py b/runs/chunk_consolidator.py new file mode 100644 index 0000000..27a4d7b --- /dev/null +++ b/runs/chunk_consolidator.py @@ -0,0 +1,52 @@ +# %% +import numpy as np +from tqdm import tqdm + +from pycsam.src import io, var +from pycsam.inputs.icon_global_run import params + +chunk_start = 0 +n_cells = 20480 +chunk_sz = 100 + +dat_path = params.path_output + "global_dataset/chunks/" +out_path = params.path_output + "global_dataset/" +out_fn = 'icon_global_R2B4' + +global_dat = np.zeros((n_cells), dtype='object') + +cnt = 0 +for chunk in tqdm(range(chunk_start, n_cells, chunk_sz)): + + sfx = "_" + str(chunk+chunk_sz) + fn = params.fn_output + sfx + '.nc' + + writer = io.nc_writer(params, sfx) + + if chunk+chunk_sz > n_cells: + chunk_end = n_cells + else: + chunk_end = chunk+chunk_sz + + for ii in range(chunk, chunk_end): + struct = var.obj() + res = writer.read_dat(dat_path, fn, ii, struct) + global_dat[cnt] = struct + # print(cnt) + del struct + + cnt += 1 + +# print(cnt, chunk_end) +print("\n==========") +print("Collection done; writing output...") +print("==========\n") +assert (cnt) == chunk_end + +params.path_output = out_path +global_writer = io.nc_writer(params, '') + +for cnt, item in tqdm(enumerate(global_dat)): + global_writer.duplicate(cnt, item) + +# %% diff --git a/src/io.py b/src/io.py index d849eff..e70026f 100644 --- a/src/io.py +++ b/src/io.py @@ -870,6 +870,32 @@ def duplicate(self, id, struct): rootgrp.close() + + @staticmethod + def read_dat(path, fn, id, struct): + try: + rootgrp = nc.Dataset(path + fn, "a", format="NETCDF4") + except: + return False + + grp = rootgrp[str(id)] + + struct.is_land = grp["is_land"][:] + struct.clat = grp["clat"][:] + struct.clon = grp["clon"][:] + + if struct.is_land: + struct.dk = grp["dk"][:] + struct.dl = grp["dl"][:] + + struct.ampls = grp["H_spec"][:] + struct.kks = grp["kks"][:] + struct.lls = grp["lls"][:] + + rootgrp.close() + + return True + class grp_struct(object): def __init__(self, c_idx, clat, clon, is_land, analysis = None): self.c_idx = c_idx