Skip to content

Commit

Permalink
finishing merge
Browse files Browse the repository at this point in the history
  • Loading branch information
brettinanl committed Jun 6, 2022
2 parents 8b0b27c + ce9a741 commit 7d50c4f
Show file tree
Hide file tree
Showing 280 changed files with 20,940 additions and 14,184 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: pre-commit

on:
pull_request:
push:
branches:
- master

jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- uses: pre-commit/action@v2.0.3
40 changes: 37 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,41 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks

# Don't run pre-commit on files under build/ - currently happens by default
# delete and uncomment if needed
#exclude: "^\
# (build/.*)\
# "

repos:
- repo: https://github.com/pycqa/flake8
rev: '3.9.2' # pick a git hash / tag to point to
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
hooks:
- id: flake8
- id: check-added-large-files # prevents giant files from being committed.
- id: check-case-conflict # checks for files that would conflict in case-insensitive filesystems.
- id: check-merge-conflict # checks for files that contain merge conflict strings.
- id: check-yaml # checks yaml files for parseable syntax.
- id: detect-private-key # detects the presence of private keys.
- id: end-of-file-fixer # ensures that a file is either empty, or ends with one newline.
- id: fix-byte-order-marker # removes utf-8 byte order marker.
- id: mixed-line-ending # replaces or checks mixed line ending.
- id: requirements-txt-fixer # sorts entries in requirements.txt.
- id: trailing-whitespace # trims trailing whitespace.
- id: check-docstring-first # Checks that code comes after the docstrings.

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v2.6.2
hooks:
- id: prettier
files: \.(js|ts|jsx|tsx|css|less|html|json|markdown|md|yaml|yml)$

- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black

- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
args: [--profile=black]
222 changes: 116 additions & 106 deletions Pilot1/Attn/attn.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import print_function

import logging
import os
import sys
import logging

import pandas as pd
import numpy as np
import pandas as pd

file_path = os.path.dirname(os.path.realpath(__file__))

Expand All @@ -15,64 +15,72 @@
candle.set_parallelism_threads()

additional_definitions = [
{'name': 'latent_dim',
'action': 'store',
'type': int,
'help': 'latent dimensions'},
{'name': 'residual',
'type': candle.str2bool,
'default': False,
'help': 'add skip connections to the layers'},
{'name': 'reduce_lr',
'type': candle.str2bool,
'default': False,
'help': 'reduce learning rate on plateau'},
{'name': 'warmup_lr',
'type': candle.str2bool,
'default': False,
'help': 'gradually increase learning rate on start'},
{'name': 'base_lr',
'type': float,
'help': 'base learning rate'},
{'name': 'epsilon_std',
'type': float,
'help': 'epsilon std for sampling latent noise'},
{'name': 'use_cp',
'type': candle.str2bool,
'default': False,
'help': 'checkpoint models with best val_loss'},
{'name': 'use_tb',
'type': candle.str2bool,
'default': False,
'help': 'use tensorboard'},
{'name': 'tsne',
'type': candle.str2bool,
'default': False,
'help': 'generate tsne plot of the latent representation'}
{"name": "latent_dim", "action": "store", "type": int, "help": "latent dimensions"},
{
"name": "residual",
"type": candle.str2bool,
"default": False,
"help": "add skip connections to the layers",
},
{
"name": "reduce_lr",
"type": candle.str2bool,
"default": False,
"help": "reduce learning rate on plateau",
},
{
"name": "warmup_lr",
"type": candle.str2bool,
"default": False,
"help": "gradually increase learning rate on start",
},
{"name": "base_lr", "type": float, "help": "base learning rate"},
{
"name": "epsilon_std",
"type": float,
"help": "epsilon std for sampling latent noise",
},
{
"name": "use_cp",
"type": candle.str2bool,
"default": False,
"help": "checkpoint models with best val_loss",
},
{
"name": "use_tb",
"type": candle.str2bool,
"default": False,
"help": "use tensorboard",
},
{
"name": "tsne",
"type": candle.str2bool,
"default": False,
"help": "generate tsne plot of the latent representation",
},
]

required = [
'activation',
'batch_size',
'dense',
'dropout',
'epochs',
'initialization',
'learning_rate',
'loss',
'optimizer',
'rng_seed',
'scaling',
'val_split',
'latent_dim',
'batch_normalization',
'epsilon_std',
'timeout'
"activation",
"batch_size",
"dense",
"dropout",
"epochs",
"initialization",
"learning_rate",
"loss",
"optimizer",
"rng_seed",
"scaling",
"val_split",
"latent_dim",
"batch_normalization",
"epsilon_std",
"timeout",
]


class BenchmarkAttn(candle.Benchmark):

def set_locals(self):
"""Functionality to set variables specific for the benchmark
- required: set of required parameters for the benchmark.
Expand All @@ -86,63 +94,65 @@ def set_locals(self):
self.additional_definitions = additional_definitions


def extension_from_parameters(params, framework=''):
def extension_from_parameters(params, framework=""):
"""Construct string for saving model with annotation of parameters"""
ext = framework
for i, n in enumerate(params['dense']):
for i, n in enumerate(params["dense"]):
if n:
ext += '.D{}={}'.format(i + 1, n)
ext += '.A={}'.format(params['activation'][0])
ext += '.B={}'.format(params['batch_size'])
ext += '.E={}'.format(params['epochs'])
ext += '.L={}'.format(params['latent_dim'])
ext += '.LR={}'.format(params['learning_rate'])
ext += '.S={}'.format(params['scaling'])

if params['epsilon_std'] != 1.0:
ext += '.EPS={}'.format(params['epsilon_std'])
if params['dropout']:
ext += '.DR={}'.format(params['dropout'])
if params['batch_normalization']:
ext += '.BN'
if params['warmup_lr']:
ext += '.WU_LR'
if params['reduce_lr']:
ext += '.Re_LR'
if params['residual']:
ext += '.Res'
ext += ".D{}={}".format(i + 1, n)
ext += ".A={}".format(params["activation"][0])
ext += ".B={}".format(params["batch_size"])
ext += ".E={}".format(params["epochs"])
ext += ".L={}".format(params["latent_dim"])
ext += ".LR={}".format(params["learning_rate"])
ext += ".S={}".format(params["scaling"])

if params["epsilon_std"] != 1.0:
ext += ".EPS={}".format(params["epsilon_std"])
if params["dropout"]:
ext += ".DR={}".format(params["dropout"])
if params["batch_normalization"]:
ext += ".BN"
if params["warmup_lr"]:
ext += ".WU_LR"
if params["reduce_lr"]:
ext += ".Re_LR"
if params["residual"]:
ext += ".Res"

return ext


def load_data(params, seed):

# start change #
if params['train_data'].endswith('h5') or params['train_data'].endswith('hdf5'):
print('processing h5 in file {}'.format(params['train_data']))
if params["train_data"].endswith("h5") or params["train_data"].endswith("hdf5"):
print("processing h5 in file {}".format(params["train_data"]))

url = params['data_url']
file_train = params['train_data']
train_file = candle.get_file(file_train, url + file_train, cache_subdir='Pilot1')
url = params["data_url"]
file_train = params["train_data"]
train_file = candle.get_file(
file_train, url + file_train, cache_subdir="Pilot1"
)

df_x_train_0 = pd.read_hdf(train_file, 'x_train_0').astype(np.float32)
df_x_train_1 = pd.read_hdf(train_file, 'x_train_1').astype(np.float32)
df_x_train_0 = pd.read_hdf(train_file, "x_train_0").astype(np.float32)
df_x_train_1 = pd.read_hdf(train_file, "x_train_1").astype(np.float32)
X_train = pd.concat([df_x_train_0, df_x_train_1], axis=1, sort=False)
del df_x_train_0, df_x_train_1

df_x_test_0 = pd.read_hdf(train_file, 'x_test_0').astype(np.float32)
df_x_test_1 = pd.read_hdf(train_file, 'x_test_1').astype(np.float32)
df_x_test_0 = pd.read_hdf(train_file, "x_test_0").astype(np.float32)
df_x_test_1 = pd.read_hdf(train_file, "x_test_1").astype(np.float32)
X_test = pd.concat([df_x_test_0, df_x_test_1], axis=1, sort=False)
del df_x_test_0, df_x_test_1

df_x_val_0 = pd.read_hdf(train_file, 'x_val_0').astype(np.float32)
df_x_val_1 = pd.read_hdf(train_file, 'x_val_1').astype(np.float32)
df_x_val_0 = pd.read_hdf(train_file, "x_val_0").astype(np.float32)
df_x_val_1 = pd.read_hdf(train_file, "x_val_1").astype(np.float32)
X_val = pd.concat([df_x_val_0, df_x_val_1], axis=1, sort=False)
del df_x_val_0, df_x_val_1

Y_train = pd.read_hdf(train_file, 'y_train')
Y_test = pd.read_hdf(train_file, 'y_test')
Y_val = pd.read_hdf(train_file, 'y_val')
Y_train = pd.read_hdf(train_file, "y_train")
Y_test = pd.read_hdf(train_file, "y_test")
Y_val = pd.read_hdf(train_file, "y_val")

# assumes AUC is in the third column at index 2
# df_y = df['AUC'].astype('int')
Expand All @@ -152,36 +162,36 @@ def load_data(params, seed):
# scaler = StandardScaler()
# df_x = scaler.fit_transform(df_x)
else:
print('expecting in file file suffix h5')
print("expecting in file file suffix h5")
sys.exit()

print('x_train shape:', X_train.shape)
print('x_test shape:', X_test.shape)
print("x_train shape:", X_train.shape)
print("x_test shape:", X_test.shape)

return X_train, Y_train, X_val, Y_val, X_test, Y_test

# start change #
if train_file.endswith('h5') or train_file.endswith('hdf5'):
print('processing h5 in file {}'.format(train_file))
if train_file.endswith("h5") or train_file.endswith("hdf5"):
print("processing h5 in file {}".format(train_file))

df_x_train_0 = pd.read_hdf(train_file, 'x_train_0').astype(np.float32)
df_x_train_1 = pd.read_hdf(train_file, 'x_train_1').astype(np.float32)
df_x_train_0 = pd.read_hdf(train_file, "x_train_0").astype(np.float32)
df_x_train_1 = pd.read_hdf(train_file, "x_train_1").astype(np.float32)
X_train = pd.concat([df_x_train_0, df_x_train_1], axis=1, sort=False)
del df_x_train_0, df_x_train_1

df_x_test_0 = pd.read_hdf(train_file, 'x_test_0').astype(np.float32)
df_x_test_1 = pd.read_hdf(train_file, 'x_test_1').astype(np.float32)
df_x_test_0 = pd.read_hdf(train_file, "x_test_0").astype(np.float32)
df_x_test_1 = pd.read_hdf(train_file, "x_test_1").astype(np.float32)
X_test = pd.concat([df_x_test_0, df_x_test_1], axis=1, sort=False)
del df_x_test_0, df_x_test_1

df_x_val_0 = pd.read_hdf(train_file, 'x_val_0').astype(np.float32)
df_x_val_1 = pd.read_hdf(train_file, 'x_val_1').astype(np.float32)
df_x_val_0 = pd.read_hdf(train_file, "x_val_0").astype(np.float32)
df_x_val_1 = pd.read_hdf(train_file, "x_val_1").astype(np.float32)
X_val = pd.concat([df_x_val_0, df_x_val_1], axis=1, sort=False)
del df_x_val_0, df_x_val_1

Y_train = pd.read_hdf(train_file, 'y_train')
Y_test = pd.read_hdf(train_file, 'y_test')
Y_val = pd.read_hdf(train_file, 'y_val')
Y_train = pd.read_hdf(train_file, "y_train")
Y_test = pd.read_hdf(train_file, "y_test")
Y_val = pd.read_hdf(train_file, "y_val")

# assumes AUC is in the third column at index 2
# df_y = df['AUC'].astype('int')
Expand All @@ -191,10 +201,10 @@ def load_data(params, seed):
# scaler = StandardScaler()
# df_x = scaler.fit_transform(df_x)
else:
print('expecting in file file suffix h5')
print("expecting in file file suffix h5")
sys.exit()

print('x_train shape:', X_train.shape)
print('x_test shape:', X_test.shape)
print("x_train shape:", X_train.shape)
print("x_test shape:", X_test.shape)

return X_train, Y_train, X_val, Y_val, X_test, Y_test
Loading

0 comments on commit 7d50c4f

Please sign in to comment.