diff --git a/ingestion_program/ingestion.py b/ingestion_program/ingestion.py index dffe644..db184bc 100755 --- a/ingestion_program/ingestion.py +++ b/ingestion_program/ingestion.py @@ -26,11 +26,46 @@ # ===== Begin Imageomics modifications ===== import os -from sys import argv, path, executable +import re +from sys import argv, path, executable, exit import subprocess import time +# expected version pattern for requirements +VERSION_PATTERN = re.compile("^[N!]N(.N)*[{a|b|rc}N][.postN][.devN]$") + + + +def install_from_whitelist(req_file): + + whitelist = open("/app/program/whitelist.txt", 'r').readlines() + whitelist = [i.rstrip('\n') for i in whitelist] + # print(whitelist) + + for package in open(req_file, 'r').readlines(): + package = package.rstrip('\n') + package_version = package.split("==") + if len(package_version) > 2: + # invalid format, don't use + print(f"requested package {package} has invalid format, will install latest version (of {package_version[0]}) if allowed") + package = package_version[0] + elif len(package_version) == 2: + version = package_version[1] + if not VERSION_PATTERN.match(version): + # invalid format of version, don't use + print(f"requested package {package} has invalid version, will install latest version (of {package_version[0]}) if allowed") + package = package_version[0] + #print("accepted package name: ", package) + #print("package name ", package_version[0]) + if package_version[0] in whitelist: + # package must be in whitelist, so format check unnecessary + subprocess.check_call([executable, "-m", "pip", "install", package]) + print(f"{package_version[0]} installed") + else: + exit(f"{package_version[0]} is not an allowed package. Please contact the organizers on GitHub to request acceptance of the package.") + + if __name__ == "__main__": #### INPUT/OUTPUT: Get input and output directory names print("We're running ingestion") @@ -49,14 +84,16 @@ path.append(submission_dir) # In order to access libraries of the user start = time.time() - if os.path.isfile(os.path.join(submission_dir, "requirements.txt")): - subprocess.check_call([executable, "-m", "pip", "install", "-r", os.path.join(submission_dir, "requirements.txt")]) + requirements_file = os.path.join(submission_dir, "requirements.txt") + if os.path.isfile(requirements_file): + install_from_whitelist(requirements_file) end = time.time() elapsed = time.strftime("%H:%M:%S", time.gmtime(end - start)) print(f"pip handling packages takes {elapsed}.") + # Import remaining packages from PIL import Image from tqdm import tqdm from model import Model @@ -67,7 +104,6 @@ submit_model.load() - img_list = os.listdir(input_dir) num_of_datapoint = len(img_list) @@ -95,6 +131,4 @@ print(f"model inference takes {elapsed}.") - print(f"we looped {idx} times") - - + print(f"we looped {idx} times") diff --git a/ingestion_program/whitelist.txt b/ingestion_program/whitelist.txt new file mode 100644 index 0000000..1e08919 --- /dev/null +++ b/ingestion_program/whitelist.txt @@ -0,0 +1,25 @@ +pytorch +tensorflow +numpy +scikit-learn +keras +torch +open-clip-torch +torchvision +transformers +pandas +Pillow +scipy +tqdm +pyarrow +duckdb +matplotlib +xgboost +nflows +lightgbm +seaborn +iminuit +timm +torchaudio +catboost +plotly