Skip to content

Commit

Permalink
Bug/channelcheck (#109)
Browse files Browse the repository at this point in the history
* add channel check in dfcreator

* add optional selection of parquet dir in lab data reader

Co-authored-by: Steinn Ymir Agustsson <sagustss@uni-mainz.de>
  • Loading branch information
zain-sohail and steinnymir authored Jan 20, 2022
1 parent f23c4c2 commit 5422a80
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 11 deletions.
3 changes: 2 additions & 1 deletion src/processor/DldProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ class DldProcessor:
"""
root_folder = os.path.dirname(os.path.dirname(processor.__file__))

if 'src' in root_folder:
root_folder = os.path.dirname(root_folder)
def __init__(self, settings=None,silent=False):
""" Create and manage a dask DataFrame from the data recorded at FLASH.
"""
Expand Down
43 changes: 33 additions & 10 deletions src/processor/LabDataframeCreator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from processor.DldProcessor import DldProcessor
from processor.utilities import misc
import sys, os
import glob
import json
Expand Down Expand Up @@ -38,8 +39,11 @@ def __init__(self, path = None, filenames = None, channels = None, settings = No
else:
all_channel_list_dir = channels
# Read all channel info from a json file
with open(all_channel_list_dir, "r") as json_file:
self.all_channels = json.load(json_file)
if isinstance(all_channel_list_dir,dict):
self.all_channels = channels
else:
with open(all_channel_list_dir, "r") as json_file:
self.all_channels = json.load(json_file)
self.channels = self.availableChannels

@property
Expand All @@ -54,6 +58,7 @@ def createDataframePerFormat(self, h5_file, format_):
if each_name in self.all_channels] # filters for valid channels
# Only channels with the defined format are selected and stored
# in an iterable list
# print(valid_names)
if format_ is not None:
channels = [each_name
for each_name in valid_names
Expand All @@ -65,13 +70,28 @@ def createDataframePerFormat(self, h5_file, format_):
electronID = np.cumsum([0,*h5_file['DLD/NumOfEvents'][:-1]])

elif format_ == "electron":
electronID = np.arange(len(h5_file['DLD/times']))

dataframes = (Series(h5_file[self.all_channels[channel]['group_name']],
name = channel,
index = electronID)
.to_frame() for channel in channels)
electronID = np.arange(len(h5_file['DLD/DLD/times']))

channels_in_h5 = misc.parse_h5_keys(h5_file)
bad_channels = []
good_channels = []
for channel in channels:
gn = self.all_channels[channel]['group_name']
if gn not in channels_in_h5:
bad_channels.append(channel)
else:
good_channels.append(channel)
if len(bad_channels) > 0:

print(f"ERROR: skipped channels missing in h5 file: {[self.all_channels[channel]['group_name'] for channel in bad_channels]}")
# print([self.all_channels[channel]['group_name'] for channel in channels])
# print(h5_file)

dataframes = (Series(h5_file[self.all_channels[channel]['group_name']],
name = channel,
index = electronID)
.to_frame() for channel in good_channels)

return reduce(DataFrame.combine_first, dataframes)

def readFile(self, filename):
Expand Down Expand Up @@ -111,7 +131,7 @@ def fillNA(self):
# Overwrite the dataframes with filled dataframes
self.dfs[i] = subset

def readData(self, path=None, filenames = None):
def readData(self, path = None, filenames = None, parquet_path = None):

if (self.filenames or filenames) is None:
raise ValueError('Must provide a file or list of files!')
Expand All @@ -124,7 +144,10 @@ def readData(self, path=None, filenames = None):
self.path = Path(path)

# create a per_file directory
self.parquet_dir = self.path.joinpath('parquet')
if parquet_path is None:
self.parquet_dir = self.path.joinpath('parquet')
else:
self.parquet_dir = Path(parquet_path)
if not self.parquet_dir.exists():
os.mkdir(self.parquet_dir)

Expand Down
9 changes: 9 additions & 0 deletions src/processor/utilities/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,15 @@ def argnearest(array, val, rettype='vectorized'):
# %% Data Input/Output
# ================================================================================

def parse_h5_keys(d,prefix=''):
l = []
for k in d.keys():
try:
[l.append(s) for s in parse_h5_keys(d[k],prefix=prefix + '/' + k)]
except:
l.append(prefix + '/' + k)
return l

def save_H5_hyperstack(data_array, filename, path=None, overwrite=True):
""" Saves an hdf5 file with 4D (Kx,Ky,E,Time) images for import in FIJI
Expand Down

0 comments on commit 5422a80

Please sign in to comment.