From ee8db6b1eb7df6fa0da3fb5c09e1497232ab335d Mon Sep 17 00:00:00 2001 From: Alex Wietek Date: Thu, 10 Aug 2023 14:14:48 +0200 Subject: [PATCH] Added FileCollection --- src/Dumper.jl | 3 ++- src/file_collection.jl | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 src/file_collection.jl diff --git a/src/Dumper.jl b/src/Dumper.jl index 474bef4..114de52 100644 --- a/src/Dumper.jl +++ b/src/Dumper.jl @@ -2,10 +2,11 @@ module Dumper using HDF5 using Printf -export DumpFile, write_data!, dump!, read_data +export DumpFile, write_data!, dump!, read_data, FileCollection, filenames, read_collection_h5 include("create_extensible.jl") include("append_extensible.jl") +include("file_collection.jl") struct DumpFile filename::AbstractString diff --git a/src/file_collection.jl b/src/file_collection.jl new file mode 100644 index 0000000..f133d70 --- /dev/null +++ b/src/file_collection.jl @@ -0,0 +1,38 @@ +struct FileCollection + directory::AbstractString + regex::Regex +end + +function filenames(collection::FileCollection) + files = readdir(collection.directory) + matched_files = String[] + for file in files + m = match(collection.regex, file) + if !isnothing(m) + push!(matched_files, file) + end + end + return map(fl -> joinpath(collection.directory, fl), matched_files) +end + +function read_collection_h5(collection::FileCollection, tag::AbstractString) + files = readdir(collection.directory) + data = Dict() + for file in files + m = match(collection.regex, file) + if !isnothing(m) + if length(m) == 1 + param = m[keys(m)[1]] + else + param = ntuple(i -> m[keys(m)[i]], length(m)) + end + + # read hdf5 data + file = joinpath(collection.directory, file) + dset = h5read(file, tag) + ndims = length(size(dset)) + data[param] = permutedims(dset, ndims:-1:1) + end + end + return sort(collect(data), by = x->x[1]) +end