Skip to content

Commit

Permalink
Data Feed Update 2.3.9
Browse files Browse the repository at this point in the history
Data Feed Update 2.3.9
  • Loading branch information
mrconway committed Nov 17, 2019
1 parent 3b7aea6 commit 7a7d0e7
Show file tree
Hide file tree
Showing 19 changed files with 504 additions and 190 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@
alphapy/examples/Trading System/.ipynb_checkpoints/A Trading System-checkpoint.ipynb
*.pkl
*.png
*.code-workspace
alphapy/.vscode/launch.json
alphapy/.vscode/settings.json
*.log
23 changes: 14 additions & 9 deletions alphapy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import sys
import warnings
warnings.simplefilter(action='ignore', category=DeprecationWarning)
Expand Down Expand Up @@ -115,18 +116,27 @@ def training_pipeline(model):
feature_selection = model.specs['feature_selection']
grid_search = model.specs['grid_search']
model_type = model.specs['model_type']
predict_mode = model.specs['predict_mode']
rfe = model.specs['rfe']
sampling = model.specs['sampling']
scorer = model.specs['scorer']
seed = model.specs['seed']
separator = model.specs['separator']
split = model.specs['split']
target = model.specs['target']

# Get train and test data

X_train, y_train = get_data(model, Partition.train)
X_test, y_test = get_data(model, Partition.test)

# If there is no test partition, then we will split the train partition

if X_test.empty:
logger.info("No Test Data Found")
logger.info("Splitting Training Data")
X_train, X_test, y_train, y_test = train_test_split(
X_train, y_train, test_size=split, random_state=seed)

# Determine if there are any test labels

if y_test.any():
Expand Down Expand Up @@ -311,11 +321,9 @@ def prediction_pipeline(model):

directory = model.specs['directory']
drop = model.specs['drop']
extension = model.specs['extension']
feature_selection = model.specs['feature_selection']
model_type = model.specs['model_type']
rfe = model.specs['rfe']
separator = model.specs['separator']

# Get all data. We need original train and test for interactions.

Expand Down Expand Up @@ -379,15 +387,12 @@ def prediction_pipeline(model):
if model_type == ModelType.classification:
model.probas[(tag, partition)] = predictor.predict_proba(all_features)[:, 1]

# Get date stamp to record file creation

d = datetime.now()
f = "%Y%m%d"
timestamp = d.strftime(f)

# Save predictions
save_predictions(model, tag, partition)

# Return the model
return model


#
# Function main_pipeline
Expand Down
15 changes: 7 additions & 8 deletions alphapy/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Module : analysis
# Created : July 11, 2013
#
# Copyright 2017 ScottFree Analytics LLC
# Copyright 2019 ScottFree Analytics LLC
# Mark Conway & Robert D. Scott II
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -95,7 +95,7 @@ class Analysis(object):
analyses = {}

# __new__

def __new__(cls,
model,
group):
Expand Down Expand Up @@ -123,7 +123,7 @@ def __init__(self,
self.group = group
# add analysis to analyses list
Analysis.analyses[an] = self

# __str__

def __str__(self):
Expand Down Expand Up @@ -192,9 +192,6 @@ def run_analysis(analysis, lag_period, forecast_period, leaders,
# Calculate split date
logger.info("Analysis Dates")
split_date = subtract_days(predict_date, predict_history)
logger.info("Train Date: %s", train_date)
logger.info("Split Date: %s", split_date)
logger.info("Test Date: %s", predict_date)

# Load the data frames
data_frames = load_frames(group, directory, extension, separator, splits)
Expand All @@ -203,9 +200,11 @@ def run_analysis(analysis, lag_period, forecast_period, leaders,

if predict_mode:
# create predict frame
logger.info("Split Date for Prediction Mode: %s", split_date)
predict_frame = pd.DataFrame()
else:
# create train and test frames
logger.info("Split Date for Training Mode: %s", predict_date)
train_frame = pd.DataFrame()
test_frame = pd.DataFrame()

Expand All @@ -232,11 +231,11 @@ def run_analysis(analysis, lag_period, forecast_period, leaders,
tag)
else:
# split data into train and test
new_train = df.loc[(df.index >= train_date) & (df.index < split_date)]
new_train = df.loc[(df.index >= train_date) & (df.index < predict_date)]
if len(new_train) > 0:
new_train = new_train.dropna()
train_frame = train_frame.append(new_train)
new_test = df.loc[(df.index >= split_date) & (df.index <= last_date)]
new_test = df.loc[(df.index >= predict_date) & (df.index <= last_date)]
if len(new_test) > 0:
# check if target column has NaN values
nan_count = df[target].isnull().sum()
Expand Down
Loading

0 comments on commit 7a7d0e7

Please sign in to comment.