Data Feed Update 2.3.9

ScottfreeLLC · Nov 17, 2019 · 7a7d0e7 · 7a7d0e7
1 parent 3b7aea6
commit 7a7d0e7
Show file tree

Hide file tree

Showing 19 changed files with 504 additions and 190 deletions.
diff --git a/.gitignore b/.gitignore
@@ -17,3 +17,7 @@
 alphapy/examples/Trading System/.ipynb_checkpoints/A Trading System-checkpoint.ipynb
 *.pkl
 *.png
+*.code-workspace
+alphapy/.vscode/launch.json
+alphapy/.vscode/settings.json
+*.log
diff --git a/alphapy/__main__.py b/alphapy/__main__.py
@@ -66,6 +66,7 @@
 import numpy as np
 import os
 import pandas as pd
+from sklearn.model_selection import train_test_split
 import sys
 import warnings
 warnings.simplefilter(action='ignore', category=DeprecationWarning)
@@ -115,18 +116,27 @@ def training_pipeline(model):
     feature_selection = model.specs['feature_selection']
     grid_search = model.specs['grid_search']
     model_type = model.specs['model_type']
-    predict_mode = model.specs['predict_mode']
     rfe = model.specs['rfe']
     sampling = model.specs['sampling']
     scorer = model.specs['scorer']
+    seed = model.specs['seed']
     separator = model.specs['separator']
+    split = model.specs['split']
     target = model.specs['target']
 
     # Get train and test data
 
     X_train, y_train = get_data(model, Partition.train)
     X_test, y_test = get_data(model, Partition.test)
 
+    # If there is no test partition, then we will split the train partition
+
+    if X_test.empty:
+        logger.info("No Test Data Found")
+        logger.info("Splitting Training Data")
+        X_train, X_test, y_train, y_test = train_test_split(
+            X_train, y_train, test_size=split, random_state=seed)
+
     # Determine if there are any test labels
 
     if y_test.any():
@@ -311,11 +321,9 @@ def prediction_pipeline(model):
 
     directory = model.specs['directory']
     drop = model.specs['drop']
-    extension = model.specs['extension']
     feature_selection = model.specs['feature_selection']
     model_type = model.specs['model_type']
     rfe = model.specs['rfe']
-    separator = model.specs['separator']
 
     # Get all data. We need original train and test for interactions.
 
@@ -379,15 +387,12 @@ def prediction_pipeline(model):
     if model_type == ModelType.classification:
         model.probas[(tag, partition)]  = predictor.predict_proba(all_features)[:, 1]
 
-    # Get date stamp to record file creation
-
-    d = datetime.now()
-    f = "%Y%m%d"
-    timestamp = d.strftime(f)
-
     # Save predictions
     save_predictions(model, tag, partition)
 
+    # Return the model
+    return model
+
 
 #
 # Function main_pipeline

diff --git a/alphapy/analysis.py b/alphapy/analysis.py
@@ -4,7 +4,7 @@
 # Module    : analysis
 # Created   : July 11, 2013
 #
-# Copyright 2017 ScottFree Analytics LLC
+# Copyright 2019 ScottFree Analytics LLC
 # Mark Conway & Robert D. Scott II
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -95,7 +95,7 @@ class Analysis(object):
     analyses = {}
 
     # __new__
-    
+
     def __new__(cls,
                 model,
                 group):
@@ -123,7 +123,7 @@ def __init__(self,
         self.group = group
         # add analysis to analyses list
         Analysis.analyses[an] = self
-        
+
     # __str__
 
     def __str__(self):
@@ -192,9 +192,6 @@ def run_analysis(analysis, lag_period, forecast_period, leaders,
     # Calculate split date
     logger.info("Analysis Dates")
     split_date = subtract_days(predict_date, predict_history)
-    logger.info("Train Date: %s", train_date)
-    logger.info("Split Date: %s", split_date)
-    logger.info("Test  Date: %s", predict_date)
 
     # Load the data frames
     data_frames = load_frames(group, directory, extension, separator, splits)
@@ -203,9 +200,11 @@ def run_analysis(analysis, lag_period, forecast_period, leaders,
 
     if predict_mode:
         # create predict frame
+        logger.info("Split Date for Prediction Mode: %s", split_date)
         predict_frame = pd.DataFrame()
     else:
         # create train and test frames
+        logger.info("Split Date for Training Mode: %s", predict_date)
         train_frame = pd.DataFrame()
         test_frame = pd.DataFrame()
 
@@ -232,11 +231,11 @@ def run_analysis(analysis, lag_period, forecast_period, leaders,
                             tag)
         else:
             # split data into train and test
-            new_train = df.loc[(df.index >= train_date) & (df.index < split_date)]
+            new_train = df.loc[(df.index >= train_date) & (df.index < predict_date)]
             if len(new_train) > 0:
                 new_train = new_train.dropna()
                 train_frame = train_frame.append(new_train)
-                new_test = df.loc[(df.index >= split_date) & (df.index <= last_date)]
+                new_test = df.loc[(df.index >= predict_date) & (df.index <= last_date)]
                 if len(new_test) > 0:
                     # check if target column has NaN values
                     nan_count = df[target].isnull().sum()