Skip to content

Commit

Permalink
unify models and systems
Browse files Browse the repository at this point in the history
Use model output as system input. Apply probabilities generated by models to dictate trading rules.
  • Loading branch information
mrconway committed Jan 25, 2018
1 parent b4d1d56 commit 61ded5a
Show file tree
Hide file tree
Showing 13 changed files with 298 additions and 265 deletions.
1 change: 0 additions & 1 deletion alphapy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
from alphapy.optimize import rfecv_search
from alphapy.plots import generate_plots
from alphapy.utilities import get_datestamp
from alphapy.utilities import np_store_data

import argparse
from datetime import datetime
Expand Down
115 changes: 80 additions & 35 deletions alphapy/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@
from alphapy.frame import read_frame
from alphapy.globals import ModelType
from alphapy.globals import Partition, datasets
from alphapy.globals import PD_INTRADAY_OFFSETS
from alphapy.globals import PD_WEB_DATA_FEEDS
from alphapy.globals import PSEP, SSEP, USEP
from alphapy.globals import SamplingMethod
from alphapy.globals import WILDCARD
from alphapy.space import Space

from datetime import datetime
from datetime import timedelta
Expand Down Expand Up @@ -284,6 +284,57 @@ def sample_data(model):
return model


#
# Function convert_data
#

def convert_data(df, index_column, intraday_data):
r"""Convert the market data frame to canonical format.
Parameters
----------
df : pandas.DataFrame
The intraday dataframe.
index_column : str
The name of the index column.
intraday_data : bool
Flag set to True if the frame contains intraday data.
Returns
-------
df : pandas.DataFrame
The canonical dataframe with date/time index.
"""

# Standardize column names
df = df.rename(columns = lambda x: x.lower().replace(' ',''))

# Create the time/date index if not already done

if not isinstance(df.index, pd.DatetimeIndex):
if intraday_data:
dt_column = df['date'] + ' ' + df['time']
else:
dt_column = df['date']
df[index_column] = pd.to_datetime(dt_column)
df.set_index(pd.DatetimeIndex(df[index_column]),
drop=True, inplace=True)
del df['date']
if intraday_data:
del df['time']

# Make the remaining columns floating point

cols_float = ['open', 'high', 'low', 'close', 'volume']
df[cols_float] = df[cols_float].astype(float)

# Order the frame by increasing date if necessary
df = df.sort_index()

return df


#
# Function enhance_intraday_data
#
Expand All @@ -303,31 +354,22 @@ def enhance_intraday_data(df):
"""

# Convert the columns to proper data types
# Group by date first

index_column = 'datetime'
dt_column = df['date'] + ' ' + df['time']
df[index_column] = pd.to_datetime(dt_column)
cols_float = ['open', 'high', 'low', 'close', 'volume']
df[cols_float] = df[cols_float].astype(float)
df['date'] = df.index.strftime('%Y-%m-%d')
date_group = df.groupby('date')

# Number the intraday bars

date_group = df.groupby('date')
df['bar_number'] = date_group.cumcount()

# Mark the end of the trading day

df['end_of_day'] = False
df.loc[date_group.tail(1).index, 'end_of_day'] = True

# Set the data frame's index
df.set_index(pd.DatetimeIndex(df[index_column]), drop=True, inplace=True)

# Return the enhanced frame

del df['date']
del df['time']
return df


Expand Down Expand Up @@ -427,13 +469,13 @@ def get_pandas_data(schema, symbol, lookback_period):
"""

# Quandl is a special case.
# Quandl is a special case with subfeeds.

if 'quandl' in schema:
schema, symbol_prefix = schema.split(USEP)
symbol = SSEP.join([symbol_prefix, symbol]).upper()

# Calculate the start and end date for Yahoo.
# Calculate the start and end date.

start = datetime.now() - timedelta(lookback_period)
end = datetime.now()
Expand All @@ -443,7 +485,6 @@ def get_pandas_data(schema, symbol, lookback_period):
try:
df = web.DataReader(symbol, schema, start, end)
except:
df = None
logger.info("Could not retrieve data for: %s", symbol)

return df
Expand All @@ -453,7 +494,8 @@ def get_pandas_data(schema, symbol, lookback_period):
# Function get_market_data
#

def get_market_data(model, group, lookback_period, resample_data):
def get_market_data(model, group, lookback_period,
data_fractal, intraday_data=False):
r"""Get data from an external feed.
Parameters
Expand All @@ -464,6 +506,10 @@ def get_market_data(model, group, lookback_period, resample_data):
The group of symbols.
lookback_period : int
The number of periods of data to retrieve.
data_fractal : str
Pandas offset alias.
intraday_data : bool
If True, then get intraday data.
Returns
-------
Expand All @@ -486,57 +532,56 @@ def get_market_data(model, group, lookback_period, resample_data):

# Determine the feed source

if any(substring in fractal for substring in PD_INTRADAY_OFFSETS):
if intraday_data:
# intraday data (date and time)
logger.info("Getting Intraday Data [%s] from %s", fractal, schema)
intraday_data = True
logger.info("Getting Intraday Data [%s] from %s", data_fractal, schema)
index_column = 'datetime'
else:
# daily data or higher (date only)
logger.info("Getting Daily Data [%s] from %s", fractal, schema)
intraday_data = False
logger.info("Getting Daily Data [%s] from %s", data_fractal, schema)
index_column = 'date'

# Get the data from the relevant feed

data_dir = SSEP.join([directory, 'data'])
pandas_data = any(substring in schema for substring in PD_WEB_DATA_FEEDS)
n_periods = 0
resample_data = True if fractal != data_fractal else False

for item in group.members:
logger.info("Getting %s data for last %d days", item, lookback_period)
# Locate the data source
if schema == 'data':
fname = frame_name(item.lower(), gspace)
# local intraday or daily
dspace = Space(gspace.subject, gspace.schema, data_fractal)
fname = frame_name(item.lower(), dspace)
df = read_frame(data_dir, fname, extension, separator)
if not intraday_data:
df.set_index(pd.DatetimeIndex(df[index_column]),
drop=True, inplace=True)
elif schema == 'google' and intraday_data:
df = get_google_data(item, lookback_period, fractal)
# intraday only
df = get_google_data(item, lookback_period, data_fractal)
elif pandas_data:
# daily only
df = get_pandas_data(schema, item, lookback_period)
else:
logger.error("Unsupported Data Source: %s", schema)
# Now that we have content, standardize the data
if df is not None and not df.empty:
logger.info("Rows: %d", len(df))
# standardize column names
df = df.rename(columns = lambda x: x.lower().replace(' ',''))
# add intraday columns if necessary
if intraday_data:
df = enhance_intraday_data(df)
# order by increasing date if necessary
df = df.sort_index()
# resample data
# convert data to canonical form
df = convert_data(df, index_column, intraday_data)
# resample data and forward fill any NA values
if resample_data:
df = df.resample(fractal).agg({'open' : 'first',
'high' : 'max',
'low' : 'min',
'close' : 'last',
'volume' : 'sum'})
df.dropna(axis=0, how='any', inplace=True)
logger.info("Rows after Resampling at %s: %d",
fractal, len(df))
# add intraday columns if necessary
if intraday_data:
df = enhance_intraday_data(df)
# allocate global Frame
newf = Frame(item.lower(), gspace, df)
if newf is None:
Expand Down
3 changes: 2 additions & 1 deletion alphapy/examples/Trading Model/config/market.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
market:
create_model : True
data_fractal : 1d
data_history : 500
forecast_period : 1
fractal : 1d
lag_period : 1
leaders : ['gap', 'gapbadown', 'gapbaup', 'gapdown', 'gapup']
predict_history : 100
resample_data : False
schema : yahoo
subject : stock
target_group : test
Expand Down
3 changes: 2 additions & 1 deletion alphapy/examples/Trading System/config/market.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
market:
create_model : False
data_fractal : 1d
data_history : 500
forecast_period : 1
fractal : 1d
lag_period : 1
leaders : []
predict_history : 50
resample_data : False
schema : quandl_wiki
subject : stock
target_group : faang
Expand Down
7 changes: 5 additions & 2 deletions alphapy/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def read_frame(directory, filename, extension, separator,
#

def write_frame(df, directory, filename, extension, separator,
index=False, index_label=None):
index=False, index_label=None, columns=None):
r"""Write a dataframe into a delimiter-separated file.
Parameters
Expand All @@ -194,6 +194,8 @@ def write_frame(df, directory, filename, extension, separator,
If ``True``, write the row names (index).
index_label : str, optional
A column label for the ``index``.
columns : str, optional
A list of column names.
Returns
-------
Expand All @@ -204,7 +206,8 @@ def write_frame(df, directory, filename, extension, separator,
file_all = SSEP.join([directory, file_only])
logger.info("Writing data frame to %s", file_all)
try:
df.to_csv(file_all, sep=separator, index=index, index_label=index_label)
df.to_csv(file_all, sep=separator, index=index,
index_label=index_label, columns=columns)
except:
logger.info("Could not write data frame to %s", file_all)

Expand Down
Loading

0 comments on commit 61ded5a

Please sign in to comment.