From 155bf80a8750e6ebd3b3032897a4edebb45d3fe1 Mon Sep 17 00:00:00 2001
From: raychew <chew@iau.uni-frankfurt.de>
Date: Wed, 19 Jun 2024 14:39:43 +0200
Subject: [PATCH] wrote a simple consolidator script for the chunked outputs

the script is very inefficient, and I recall NetCDF having an in-built function for this, but anyway, it works, and I can wait...
---
 runs/chunk_consolidator.py | 52 ++++++++++++++++++++++++++++++++++++++
 src/io.py                  | 26 +++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 runs/chunk_consolidator.py

diff --git a/runs/chunk_consolidator.py b/runs/chunk_consolidator.py
new file mode 100644
index 0000000..27a4d7b
--- /dev/null
+++ b/runs/chunk_consolidator.py
@@ -0,0 +1,52 @@
+# %%
+import numpy as np
+from tqdm import tqdm
+
+from pycsam.src import io, var
+from pycsam.inputs.icon_global_run import params
+
+chunk_start = 0
+n_cells     = 20480
+chunk_sz    = 100
+
+dat_path = params.path_output + "global_dataset/chunks/"
+out_path = params.path_output + "global_dataset/"
+out_fn = 'icon_global_R2B4'
+
+global_dat = np.zeros((n_cells), dtype='object')
+
+cnt = 0
+for chunk in tqdm(range(chunk_start, n_cells, chunk_sz)):
+
+    sfx = "_" + str(chunk+chunk_sz)
+    fn = params.fn_output + sfx + '.nc'
+
+    writer = io.nc_writer(params, sfx)
+
+    if chunk+chunk_sz > n_cells:
+        chunk_end = n_cells
+    else:
+        chunk_end = chunk+chunk_sz
+
+    for ii in range(chunk, chunk_end):
+        struct = var.obj()
+        res = writer.read_dat(dat_path, fn, ii, struct)
+        global_dat[cnt] = struct
+        # print(cnt)
+        del struct
+
+        cnt += 1
+
+# print(cnt, chunk_end)
+print("\n==========")
+print("Collection done; writing output...")
+print("==========\n")
+assert (cnt) == chunk_end
+
+params.path_output = out_path
+global_writer = io.nc_writer(params, '')
+
+for cnt, item in tqdm(enumerate(global_dat)):
+    global_writer.duplicate(cnt, item)
+
+# %%
diff --git a/src/io.py b/src/io.py
index d849eff..e70026f 100644
--- a/src/io.py
+++ b/src/io.py
@@ -870,6 +870,32 @@ def duplicate(self, id, struct):
 
         rootgrp.close()
 
+
+    @staticmethod
+    def read_dat(path, fn, id, struct):
+        try:
+            rootgrp = nc.Dataset(path + fn, "a", format="NETCDF4")
+        except:
+            return False
+        
+        grp = rootgrp[str(id)]
+
+        struct.is_land = grp["is_land"][:]
+        struct.clat    = grp["clat"][:]
+        struct.clon    = grp["clon"][:]
+
+        if struct.is_land:
+            struct.dk = grp["dk"][:]
+            struct.dl = grp["dl"][:]
+
+            struct.ampls = grp["H_spec"][:]
+            struct.kks = grp["kks"][:]
+            struct.lls = grp["lls"][:]
+
+        rootgrp.close()
+
+        return True
+
     class grp_struct(object):
         def __init__(self, c_idx, clat, clon, is_land, analysis = None):
             self.c_idx = c_idx