finishing merge

ECP-CANDLE · Jun 6, 2022 · 7d50c4f · 7d50c4f
2 parents 8b0b27c + ce9a741
commit 7d50c4f
Show file tree

Hide file tree

Showing 280 changed files with 20,940 additions and 14,184 deletions.
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -0,0 +1,15 @@
+name: pre-commit
+
+on:
+ pull_request:
+ push:
+ branches:
+ - master
+
+jobs:
+ pre-commit:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v3
+ - uses: pre-commit/action@v2.0.3
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,7 +1,41 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
+
+# Don't run pre-commit on files under build/ - currently happens by default
+# delete and uncomment if needed
+#exclude: "^\
+# (build/.*)\
+# "
+
 repos:
--  repo: https://github.com/pycqa/flake8
- rev: '3.9.2' # pick a git hash / tag to point to
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.2.0
  hooks:
- - id: flake8
+ - id: check-added-large-files # prevents giant files from being committed.
+ - id: check-case-conflict # checks for files that would conflict in case-insensitive filesystems.
+ - id: check-merge-conflict # checks for files that contain merge conflict strings.
+ - id: check-yaml # checks yaml files for parseable syntax.
+ - id: detect-private-key # detects the presence of private keys.
+ - id: end-of-file-fixer # ensures that a file is either empty, or ends with one newline.
+ - id: fix-byte-order-marker # removes utf-8 byte order marker.
+ - id: mixed-line-ending # replaces or checks mixed line ending.
+ - id: requirements-txt-fixer # sorts entries in requirements.txt.
+ - id: trailing-whitespace # trims trailing whitespace.
+ - id: check-docstring-first # Checks that code comes after the docstrings.
+
+ - repo: https://github.com/pre-commit/mirrors-prettier
+ rev: v2.6.2
+ hooks:
+ - id: prettier
+ files: \.(js|ts|jsx|tsx|css|less|html|json|markdown|md|yaml|yml)$
+
+ - repo: https://github.com/psf/black
+ rev: 22.3.0
+ hooks:
+ - id: black
+
+ - repo: https://github.com/PyCQA/isort
+ rev: 5.10.1
+ hooks:
+ - id: isort
+ args: [--profile=black]
diff --git a/Pilot1/Attn/attn.py b/Pilot1/Attn/attn.py
@@ -1,11 +1,11 @@
 from __future__ import print_function
 
+import logging
 import os
 import sys
-import logging
 
-import pandas as pd
 import numpy as np
+import pandas as pd
 
 file_path = os.path.dirname(os.path.realpath(__file__))
 
@@ -15,64 +15,72 @@
 candle.set_parallelism_threads()
 
 additional_definitions = [
- {'name': 'latent_dim',
- 'action': 'store',
- 'type': int,
- 'help': 'latent dimensions'},
- {'name': 'residual',
- 'type': candle.str2bool,
- 'default': False,
- 'help': 'add skip connections to the layers'},
- {'name': 'reduce_lr',
- 'type': candle.str2bool,
- 'default': False,
- 'help': 'reduce learning rate on plateau'},
- {'name': 'warmup_lr',
- 'type': candle.str2bool,
- 'default': False,
- 'help': 'gradually increase learning rate on start'},
- {'name': 'base_lr',
- 'type': float,
- 'help': 'base learning rate'},
- {'name': 'epsilon_std',
- 'type': float,
- 'help': 'epsilon std for sampling latent noise'},
- {'name': 'use_cp',
- 'type': candle.str2bool,
- 'default': False,
- 'help': 'checkpoint models with best val_loss'},
- {'name': 'use_tb',
- 'type': candle.str2bool,
- 'default': False,
- 'help': 'use tensorboard'},
- {'name': 'tsne',
- 'type': candle.str2bool,
- 'default': False,
- 'help': 'generate tsne plot of the latent representation'}
+ {"name": "latent_dim", "action": "store", "type": int, "help": "latent dimensions"},
+ {
+ "name": "residual",
+ "type": candle.str2bool,
+ "default": False,
+ "help": "add skip connections to the layers",
+ },
+ {
+ "name": "reduce_lr",
+ "type": candle.str2bool,
+ "default": False,
+ "help": "reduce learning rate on plateau",
+ },
+ {
+ "name": "warmup_lr",
+ "type": candle.str2bool,
+ "default": False,
+ "help": "gradually increase learning rate on start",
+ },
+ {"name": "base_lr", "type": float, "help": "base learning rate"},
+ {
+ "name": "epsilon_std",
+ "type": float,
+ "help": "epsilon std for sampling latent noise",
+ },
+ {
+ "name": "use_cp",
+ "type": candle.str2bool,
+ "default": False,
+ "help": "checkpoint models with best val_loss",
+ },
+ {
+ "name": "use_tb",
+ "type": candle.str2bool,
+ "default": False,
+ "help": "use tensorboard",
+ },
+ {
+ "name": "tsne",
+ "type": candle.str2bool,
+ "default": False,
+ "help": "generate tsne plot of the latent representation",
+ },
 ]
 
 required = [
- 'activation',
- 'batch_size',
- 'dense',
- 'dropout',
- 'epochs',
- 'initialization',
- 'learning_rate',
- 'loss',
- 'optimizer',
- 'rng_seed',
- 'scaling',
- 'val_split',
- 'latent_dim',
- 'batch_normalization',
- 'epsilon_std',
- 'timeout'
+ "activation",
+ "batch_size",
+ "dense",
+ "dropout",
+ "epochs",
+ "initialization",
+ "learning_rate",
+ "loss",
+ "optimizer",
+ "rng_seed",
+ "scaling",
+ "val_split",
+ "latent_dim",
+ "batch_normalization",
+ "epsilon_std",
+ "timeout",
 ]
 
 
 class BenchmarkAttn(candle.Benchmark):
-
  def set_locals(self):
  """Functionality to set variables specific for the benchmark
  - required: set of required parameters for the benchmark.
@@ -86,63 +94,65 @@ def set_locals(self):
  self.additional_definitions = additional_definitions
 
 
-def extension_from_parameters(params, framework=''):
+def extension_from_parameters(params, framework=""):
  """Construct string for saving model with annotation of parameters"""
  ext = framework
- for i, n in enumerate(params['dense']):
+ for i, n in enumerate(params["dense"]):
  if n:
- ext += '.D{}={}'.format(i + 1, n)
- ext += '.A={}'.format(params['activation'][0])
- ext += '.B={}'.format(params['batch_size'])
- ext += '.E={}'.format(params['epochs'])
- ext += '.L={}'.format(params['latent_dim'])
- ext += '.LR={}'.format(params['learning_rate'])
- ext += '.S={}'.format(params['scaling'])
-
- if params['epsilon_std'] != 1.0:
- ext += '.EPS={}'.format(params['epsilon_std'])
- if params['dropout']:
- ext += '.DR={}'.format(params['dropout'])
- if params['batch_normalization']:
- ext += '.BN'
- if params['warmup_lr']:
- ext += '.WU_LR'
- if params['reduce_lr']:
- ext += '.Re_LR'
- if params['residual']:
- ext += '.Res'
+ ext += ".D{}={}".format(i + 1, n)
+ ext += ".A={}".format(params["activation"][0])
+ ext += ".B={}".format(params["batch_size"])
+ ext += ".E={}".format(params["epochs"])
+ ext += ".L={}".format(params["latent_dim"])
+ ext += ".LR={}".format(params["learning_rate"])
+ ext += ".S={}".format(params["scaling"])
+
+ if params["epsilon_std"] != 1.0:
+ ext += ".EPS={}".format(params["epsilon_std"])
+ if params["dropout"]:
+ ext += ".DR={}".format(params["dropout"])
+ if params["batch_normalization"]:
+ ext += ".BN"
+ if params["warmup_lr"]:
+ ext += ".WU_LR"
+ if params["reduce_lr"]:
+ ext += ".Re_LR"
+ if params["residual"]:
+ ext += ".Res"
 
  return ext
 
 
 def load_data(params, seed):
 
  # start change #
- if params['train_data'].endswith('h5') or params['train_data'].endswith('hdf5'):
- print('processing h5 in file {}'.format(params['train_data']))
+ if params["train_data"].endswith("h5") or params["train_data"].endswith("hdf5"):
+ print("processing h5 in file {}".format(params["train_data"]))
 
- url = params['data_url']
- file_train = params['train_data']
- train_file = candle.get_file(file_train, url + file_train, cache_subdir='Pilot1')
+ url = params["data_url"]
+ file_train = params["train_data"]
+ train_file = candle.get_file(
+ file_train, url + file_train, cache_subdir="Pilot1"
+ )
 
- df_x_train_0 = pd.read_hdf(train_file, 'x_train_0').astype(np.float32)
- df_x_train_1 = pd.read_hdf(train_file, 'x_train_1').astype(np.float32)
+ df_x_train_0 = pd.read_hdf(train_file, "x_train_0").astype(np.float32)
+ df_x_train_1 = pd.read_hdf(train_file, "x_train_1").astype(np.float32)
  X_train = pd.concat([df_x_train_0, df_x_train_1], axis=1, sort=False)
  del df_x_train_0, df_x_train_1
 
- df_x_test_0 = pd.read_hdf(train_file, 'x_test_0').astype(np.float32)
- df_x_test_1 = pd.read_hdf(train_file, 'x_test_1').astype(np.float32)
+ df_x_test_0 = pd.read_hdf(train_file, "x_test_0").astype(np.float32)
+ df_x_test_1 = pd.read_hdf(train_file, "x_test_1").astype(np.float32)
  X_test = pd.concat([df_x_test_0, df_x_test_1], axis=1, sort=False)
  del df_x_test_0, df_x_test_1
 
- df_x_val_0 = pd.read_hdf(train_file, 'x_val_0').astype(np.float32)
- df_x_val_1 = pd.read_hdf(train_file, 'x_val_1').astype(np.float32)
+ df_x_val_0 = pd.read_hdf(train_file, "x_val_0").astype(np.float32)
+ df_x_val_1 = pd.read_hdf(train_file, "x_val_1").astype(np.float32)
  X_val = pd.concat([df_x_val_0, df_x_val_1], axis=1, sort=False)
  del df_x_val_0, df_x_val_1
 
- Y_train = pd.read_hdf(train_file, 'y_train')
- Y_test = pd.read_hdf(train_file, 'y_test')
- Y_val = pd.read_hdf(train_file, 'y_val')
+ Y_train = pd.read_hdf(train_file, "y_train")
+ Y_test = pd.read_hdf(train_file, "y_test")
+ Y_val = pd.read_hdf(train_file, "y_val")
 
  # assumes AUC is in the third column at index 2
  # df_y = df['AUC'].astype('int')
@@ -152,36 +162,36 @@ def load_data(params, seed):
  # scaler = StandardScaler()
  # df_x = scaler.fit_transform(df_x)
  else:
- print('expecting in file file suffix h5')
+ print("expecting in file file suffix h5")
  sys.exit()
 
- print('x_train shape:', X_train.shape)
- print('x_test shape:', X_test.shape)
+ print("x_train shape:", X_train.shape)
+ print("x_test shape:", X_test.shape)
 
  return X_train, Y_train, X_val, Y_val, X_test, Y_test
 
  # start change #
- if train_file.endswith('h5') or train_file.endswith('hdf5'):
- print('processing h5 in file {}'.format(train_file))
+ if train_file.endswith("h5") or train_file.endswith("hdf5"):
+ print("processing h5 in file {}".format(train_file))
 
- df_x_train_0 = pd.read_hdf(train_file, 'x_train_0').astype(np.float32)
- df_x_train_1 = pd.read_hdf(train_file, 'x_train_1').astype(np.float32)
+ df_x_train_0 = pd.read_hdf(train_file, "x_train_0").astype(np.float32)
+ df_x_train_1 = pd.read_hdf(train_file, "x_train_1").astype(np.float32)
  X_train = pd.concat([df_x_train_0, df_x_train_1], axis=1, sort=False)
  del df_x_train_0, df_x_train_1
 
- df_x_test_0 = pd.read_hdf(train_file, 'x_test_0').astype(np.float32)
- df_x_test_1 = pd.read_hdf(train_file, 'x_test_1').astype(np.float32)
+ df_x_test_0 = pd.read_hdf(train_file, "x_test_0").astype(np.float32)
+ df_x_test_1 = pd.read_hdf(train_file, "x_test_1").astype(np.float32)
  X_test = pd.concat([df_x_test_0, df_x_test_1], axis=1, sort=False)
  del df_x_test_0, df_x_test_1
 
- df_x_val_0 = pd.read_hdf(train_file, 'x_val_0').astype(np.float32)
- df_x_val_1 = pd.read_hdf(train_file, 'x_val_1').astype(np.float32)
+ df_x_val_0 = pd.read_hdf(train_file, "x_val_0").astype(np.float32)
+ df_x_val_1 = pd.read_hdf(train_file, "x_val_1").astype(np.float32)
  X_val = pd.concat([df_x_val_0, df_x_val_1], axis=1, sort=False)
  del df_x_val_0, df_x_val_1
 
- Y_train = pd.read_hdf(train_file, 'y_train')
- Y_test = pd.read_hdf(train_file, 'y_test')
- Y_val = pd.read_hdf(train_file, 'y_val')
+ Y_train = pd.read_hdf(train_file, "y_train")
+ Y_test = pd.read_hdf(train_file, "y_test")
+ Y_val = pd.read_hdf(train_file, "y_val")
 
  # assumes AUC is in the third column at index 2
  # df_y = df['AUC'].astype('int')
@@ -191,10 +201,10 @@ def load_data(params, seed):
  # scaler = StandardScaler()
  # df_x = scaler.fit_transform(df_x)
  else:
- print('expecting in file file suffix h5')
+ print("expecting in file file suffix h5")
  sys.exit()
 
- print('x_train shape:', X_train.shape)
- print('x_test shape:', X_test.shape)
+ print("x_train shape:", X_train.shape)
+ print("x_test shape:", X_test.shape)
 
  return X_train, Y_train, X_val, Y_val, X_test, Y_test