Skip to content

Commit

Permalink
Issue 1005 file io refactor (#1007)
Browse files Browse the repository at this point in the history
* Issue #1005: This inverts the dependency between GenSymIO to HDF5 & Parquet
HDF5 related code was moved to its own module like Parquet.  They now
both depend on GenSymIO so they can become optional build modules with
respect to the modular build changes.

* Issue #1005: Removing self referential  'use Parquet' from Parquet module

* Issue #1005: Renaming GenHDF5IO to HDF5Msg based on PR feedback.

* Issue #1005: Updates to handle optional Parquet support as part
of IO refactor.  Renamed Parquet -> ParquetMsg and applied
other changes suggested in PR feedback.

* Moving arrow versioning from ServerConfig to ParquetMsg and adding
ability to update config string.

* Issued #1005: Fixing chapel based unit tests with respect to
Parquet support.

* Issue #1005: Final removal of hasParquetSupport compiler flag.
This is now completely handled by env var and ServerModules.cfg
  • Loading branch information
glitch authored Jan 19, 2022
1 parent 9912eb2 commit 697d868
Show file tree
Hide file tree
Showing 20 changed files with 2,769 additions and 2,718 deletions.
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ CHPL_FLAGS += -lhdf5 -lhdf5_hl -lzmq
ifdef ARKOUDA_SERVER_PARQUET_SUPPORT
CHPL_FLAGS += -lparquet -larrow
OPTIONAL_CHECKS += check-arrow
OPTIONAL_SERVER_FLAGS += -shasParquetSupport
ARROW_FILE_NAME += $(ARKOUDA_SOURCE_DIR)/ArrowFunctions
ARROW_CPP += $(ARROW_FILE_NAME).cpp
ARROW_H += $(ARROW_FILE_NAME).h
Expand Down Expand Up @@ -186,7 +185,7 @@ check-hdf5: $(HDF5_CHECK)
ARROW_CHECK = $(DEP_INSTALL_DIR)/checkArrow.chpl
check-arrow: $(ARROW_CHECK) $(ARROW_O)
@echo "Checking for Arrow"
$(CHPL) $(CHPL_FLAGS) $< $(ARROW_M) -M $(ARKOUDA_SOURCE_DIR) -o $(DEP_INSTALL_DIR)/$@ -shasParquetSupport
$(CHPL) $(CHPL_FLAGS) $< $(ARROW_M) -M $(ARKOUDA_SOURCE_DIR) -o $(DEP_INSTALL_DIR)/$@
$(DEP_INSTALL_DIR)/$@ -nl 1
@rm -f $(DEP_INSTALL_DIR)/$@ $(DEP_INSTALL_DIR)/$@_real

Expand Down Expand Up @@ -259,13 +258,14 @@ endif
MODULE_GENERATION_SCRIPT=$(ARKOUDA_SOURCE_DIR)/serverModuleGen.py
# This is the main compilation statement section
$(ARKOUDA_MAIN_MODULE): check-deps $(ARROW_O) $(ARKOUDA_SOURCES) $(ARKOUDA_MAKEFILES)
python3 $(MODULE_GENERATION_SCRIPT) $(ARKOUDA_CONFIG_FILE)
$(eval MOD_GEN_OUT=$(shell python3 $(MODULE_GENERATION_SCRIPT) $(ARKOUDA_CONFIG_FILE)))
@echo $(MOD_GEN_OUT);
$(CHPL) $(CHPL_DEBUG_FLAGS) $(PRINT_PASSES_FLAGS) $(REGEX_MAX_CAPTURES_FLAG) $(OPTIONAL_SERVER_FLAGS) $(CHPL_FLAGS_WITH_VERSION) $(ARKOUDA_MAIN_SOURCE) $(ARKOUDA_COMPAT_MODULES) -o $@

CLEAN_TARGETS += arkouda-clean
.PHONY: arkouda-clean
arkouda-clean:
$(RM) $(ARKOUDA_MAIN_MODULE) $(ARKOUDA_MAIN_MODULE)_real
$(RM) $(ARKOUDA_MAIN_MODULE) $(ARKOUDA_MAIN_MODULE)_real $(ARKOUDA_SOURCE_DIR)/ServerRegistration.chpl

.PHONY: tags
tags:
Expand Down Expand Up @@ -418,6 +418,7 @@ $(TEST_BINARY_DIR):

.PHONY: $(TEST_TARGETS) # Force tests to always rebuild.
$(TEST_TARGETS): $(TEST_BINARY_DIR)/$(TEST_BINARY_SIGIL)%: $(TEST_SOURCE_DIR)/%.chpl | $(TEST_BINARY_DIR)
python3 $(MODULE_GENERATION_SCRIPT) $(ARKOUDA_CONFIG_FILE)
$(CHPL) $(TEST_CHPL_FLAGS) -M $(ARKOUDA_SOURCE_DIR) $(ARKOUDA_COMPAT_MODULES) $< -o $@

print-%:
Expand Down
4 changes: 3 additions & 1 deletion ServerModules.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ RegistrationMsg
CastMsg
BroadcastMsg
FlattenMsg
HDF5Msg
ParquetMsg

# Add additional modules located in
# the Arkouda src/ directory below
# the Arkouda src/ directory below
23 changes: 22 additions & 1 deletion dep/checkArrow.chpl
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
use Parquet;
use SysCTypes, CPtr, IO;

require "../src/ArrowFunctions.h";
require "../src/ArrowFunctions.o";

proc getVersionInfo() {
extern proc c_getVersionInfo(): c_string;
extern proc strlen(str): c_int;
extern proc c_free_string(ptr);
var cVersionString = c_getVersionInfo();
defer {
c_free_string(cVersionString: c_void_ptr);
}
var ret: string;
try {
ret = createStringWithNewBuffer(cVersionString,
strlen(cVersionString));
} catch e {
ret = "Error converting Arrow version message to Chapel string";
}
return ret;
}

proc main() {
var ArrowVersion = getVersionInfo();
Expand Down
92 changes: 92 additions & 0 deletions src/FileIO.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ module FileIO {
use FileSystem;
use Map;
use Path;
use Reflection;

use ServerConfig, Logging;
private config const logLevel = ServerConfig.logLevel;
const fioLogger = new Logger(logLevel);

enum FileType {HDF5, ARROW, PARQUET, UNKNOWN};

proc appendFile(filePath : string, line : string) throws {
Expand Down Expand Up @@ -62,6 +67,81 @@ module FileIO {
}
}

/*
* Ensure the file is closed, disregard errors
*/
proc ensureClose(tmpf:file): bool {
var success = true;
try {
tmpf.close();
} catch {
success = false;
}
return success;
}

/*
* Indicates whether the filename represents a glob expression as opposed to
* an specific filename
*/
proc isGlobPattern(filename: string): bool throws {
return filename.endsWith("*");
}

/*
* Generates a list of filenames to be written to based upon a file prefix,
* extension, and number of locales.
*/
proc generateFilenames(prefix : string, extension : string, targetLocalesSize:int) : [] string throws {
// Generate the filenames based upon the number of targetLocales.
var filenames: [0..#targetLocalesSize] string;
for i in 0..#targetLocalesSize {
filenames[i] = generateFilename(prefix, extension, i);
}
fioLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
"generateFilenames targetLocales.size %i, filenames.size %i".format(targetLocalesSize, filenames.size));

return filenames;
}

/*
* Generates a file name composed of a prefix, which is a filename provided by
* the user along with a file index and extension.
*/
proc generateFilename(prefix : string, extension : string, idx : int) : string throws {
var suffix = '%04i'.format(idx);
return "%s_LOCALE%s%s".format(prefix, suffix, extension);
}

/*
* Generates an array of filenames to be matched in APPEND mode and to be
* checked in TRUNCATE mode that will warn the user that 1..n files are
* being overwritten.
*/
proc getMatchingFilenames(prefix : string, extension : string) throws {
return glob("%s_LOCALE*%s".format(prefix, extension));
}

/*
* Returns a tuple composed of a file prefix and extension to be used to
* generate locale-specific filenames to be written to.
*/
proc getFileMetadata(filename : string) {
const fields = filename.split(".");
var prefix: string;
var extension: string;

if fields.size == 1 || fields[fields.domain.high].count(pathSep) > 0 {
prefix = filename;
extension = "";
} else {
prefix = ".".join(fields#(fields.size-1)); // take all but the last
extension = "." + fields[fields.domain.high];
}

return (prefix,extension);
}

// File Magic headers for supported formats
const MAGIC_PARQUET:bytes = b"\x50\x41\x52\x31"; // 4 bytes "PAR1"
const MAGIC_HDF5:bytes = b"\x89\x48\x44\x46\x0d\x0a\x1a\x0a"; // 8 bytes "\211HDF\r\n\032\n"
Expand All @@ -85,4 +165,16 @@ module FileIO {
}
return t;
}

proc domain_intersection(d1: domain(1), d2: domain(1)) {
var low = max(d1.low, d2.low);
var high = min(d1.high, d2.high);
if (d1.stride !=1) && (d2.stride != 1) {
//TODO: change this to throw
halt("At least one domain must have stride 1");
}
var stride = max(d1.stride, d2.stride);
return {low..high by stride};
}

}
1 change: 0 additions & 1 deletion src/FlattenMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ module FlattenMsg {
use Flatten;
use ServerConfig;
use SegmentedArray;
use GenSymIO;
use Logging;
use Message;

Expand Down
Loading

0 comments on commit 697d868

Please sign in to comment.