diff --git a/src/processor/DldProcessor.py b/src/processor/DldProcessor.py index 811921e..66b3492 100644 --- a/src/processor/DldProcessor.py +++ b/src/processor/DldProcessor.py @@ -51,7 +51,8 @@ class DldProcessor: """ root_folder = os.path.dirname(os.path.dirname(processor.__file__)) - + if 'src' in root_folder: + root_folder = os.path.dirname(root_folder) def __init__(self, settings=None,silent=False): """ Create and manage a dask DataFrame from the data recorded at FLASH. """ diff --git a/src/processor/LabDataframeCreator.py b/src/processor/LabDataframeCreator.py index 0a0c2ff..07137d6 100644 --- a/src/processor/LabDataframeCreator.py +++ b/src/processor/LabDataframeCreator.py @@ -1,4 +1,5 @@ from processor.DldProcessor import DldProcessor +from processor.utilities import misc import sys, os import glob import json @@ -38,8 +39,11 @@ def __init__(self, path = None, filenames = None, channels = None, settings = No else: all_channel_list_dir = channels # Read all channel info from a json file - with open(all_channel_list_dir, "r") as json_file: - self.all_channels = json.load(json_file) + if isinstance(all_channel_list_dir,dict): + self.all_channels = channels + else: + with open(all_channel_list_dir, "r") as json_file: + self.all_channels = json.load(json_file) self.channels = self.availableChannels @property @@ -54,6 +58,7 @@ def createDataframePerFormat(self, h5_file, format_): if each_name in self.all_channels] # filters for valid channels # Only channels with the defined format are selected and stored # in an iterable list +# print(valid_names) if format_ is not None: channels = [each_name for each_name in valid_names @@ -65,13 +70,28 @@ def createDataframePerFormat(self, h5_file, format_): electronID = np.cumsum([0,*h5_file['DLD/NumOfEvents'][:-1]]) elif format_ == "electron": - electronID = np.arange(len(h5_file['DLD/times'])) - - dataframes = (Series(h5_file[self.all_channels[channel]['group_name']], - name = channel, - index = electronID) - .to_frame() for channel in channels) + electronID = np.arange(len(h5_file['DLD/DLD/times'])) + + channels_in_h5 = misc.parse_h5_keys(h5_file) + bad_channels = [] + good_channels = [] + for channel in channels: + gn = self.all_channels[channel]['group_name'] + if gn not in channels_in_h5: + bad_channels.append(channel) + else: + good_channels.append(channel) + if len(bad_channels) > 0: + print(f"ERROR: skipped channels missing in h5 file: {[self.all_channels[channel]['group_name'] for channel in bad_channels]}") +# print([self.all_channels[channel]['group_name'] for channel in channels]) +# print(h5_file) + + dataframes = (Series(h5_file[self.all_channels[channel]['group_name']], + name = channel, + index = electronID) + .to_frame() for channel in good_channels) + return reduce(DataFrame.combine_first, dataframes) def readFile(self, filename): @@ -111,7 +131,7 @@ def fillNA(self): # Overwrite the dataframes with filled dataframes self.dfs[i] = subset - def readData(self, path=None, filenames = None): + def readData(self, path = None, filenames = None, parquet_path = None): if (self.filenames or filenames) is None: raise ValueError('Must provide a file or list of files!') @@ -124,7 +144,10 @@ def readData(self, path=None, filenames = None): self.path = Path(path) # create a per_file directory - self.parquet_dir = self.path.joinpath('parquet') + if parquet_path is None: + self.parquet_dir = self.path.joinpath('parquet') + else: + self.parquet_dir = Path(parquet_path) if not self.parquet_dir.exists(): os.mkdir(self.parquet_dir) diff --git a/src/processor/utilities/misc.py b/src/processor/utilities/misc.py index bf3ab16..2c7e416 100644 --- a/src/processor/utilities/misc.py +++ b/src/processor/utilities/misc.py @@ -266,6 +266,15 @@ def argnearest(array, val, rettype='vectorized'): # %% Data Input/Output # ================================================================================ +def parse_h5_keys(d,prefix=''): + l = [] + for k in d.keys(): + try: + [l.append(s) for s in parse_h5_keys(d[k],prefix=prefix + '/' + k)] + except: + l.append(prefix + '/' + k) + return l + def save_H5_hyperstack(data_array, filename, path=None, overwrite=True): """ Saves an hdf5 file with 4D (Kx,Ky,E,Time) images for import in FIJI