diff --git a/.gitignore b/.gitignore index a0a7f977c..88963513b 100644 --- a/.gitignore +++ b/.gitignore @@ -163,3 +163,7 @@ cython_debug/ #.idea/ .test_cache/ .cache/ + +# predictoor dynamic modeling +out*.txt +csvs/ diff --git a/READMEs/localpredictoor-localnet.md b/READMEs/localpredictoor-localnet.md index 00cf4ffd9..5aaf1d32b 100644 --- a/READMEs/localpredictoor-localnet.md +++ b/READMEs/localpredictoor-localnet.md @@ -14,10 +14,13 @@ This README describes: 1. **Setup & run local network** - [Install it](#install-local-network) - [Run it](#run-local-network) -2. **Setup & run predictoor bot** +2. **Setup predictoor bot** - [Install it](#install-predictoor-bot) - [Set envvars](#set-envvars) - - [Run it](#run-predictoor-bot): [random](#run-random-predictoor) then [model-based](#run-model-based-predictoor) +3. **[Run predictoor bot](#run-predictoor-bot)** + - [Random](#run-random-predictoor) + - [Static model](#run-static-model-predictoor) + - [Dynamic model](#run-dynamic-model-predictoor) ## Install Local Network @@ -66,10 +69,6 @@ There are other envvars that you might want to set, such as the owner addresses. Here, we run a bot that makes random predictions. -- It runs [`predictoor_agent1.py::PredictoorAgent1`](../pdr_backend/predictoor/approach1/predictoor_agent1.py) found in `pdr_backend/predictoor/approach1` -- It's configured by envvars and [`predictoor_config1.py::PredictoorConfig1`](../pdr_backend/predictoor/approach1/predictoor_config1.py) -- It predicts according to `PredictoorAgent1:get_prediction()`. - In work console: ```console # run random predictoor bot @@ -79,20 +78,23 @@ python pdr_backend/predictoor/main.py 1 Observe the bots in action: - In the barge console: trueval bot submitting (mock random) truevals, trader is (mock) trading, etc - In your work console: predictoor bot is submitting (mock random) predictions +- Query predictoor subgraph for detailed run info. [`subgraph.md`](subgraph.md) has details. -You can query predictoor subgraph for detailed run info. [`subgraph.md`](subgraph.md) has details. - +Code structure: +- It runs [`predictoor_agent1.py::PredictoorAgent1`](../pdr_backend/predictoor/approach1/predictoor_agent1.py) found in `pdr_backend/predictoor/approach1` +- It's configured by envvars and [`predictoor_config1.py::PredictoorConfig1`](../pdr_backend/predictoor/approach1/predictoor_config1.py) +- It predicts according to `PredictoorAgent1:get_prediction()`. -### Run Model Based Predictoor +### Run Static Model Predictoor -Since random predictions aren't accurate, let's use AI/ML models. Here's an example flow that loads pre-learned models ("approach2"): +Since random predictions aren't accurate, let's use AI/ML models. +- Here in "approach2", we load pre-learned models (static) +- And in "approach3" further below, we learn models on-the-fly (dynamic) +Code structure: - The bot runs from [`predictoor/approach2/main.py`](../pdr_backend/predictoor/approach2/main.py), using `predict.py` in the same dir. - Which imports a model stored in [`pdr-model-simple`](https://github.com/oceanprotocol/pdr-model-simple) repo - -Once you're familiar with this, you'll want to fork it and run your own. - In work console: ```console # (ctrl-c existing run) @@ -113,6 +115,61 @@ pip install scikit-learn ta python pdr_backend/predictoor/main.py 2 ``` +Once you're familiar with this, you can fork it and run your own. + +### Run Dynamic Model Predictoor + +Here, we build models on-the-fly, ie dynamic models. It's "approach3". + +Whereas approach2 has model development in a different repo, approach3 has it inside this repo. Accordingly, this flow has two top-level steps: +1. Develop & backtest models +2. Run bot in Predictoor context + +Let's go through each in turn. + +**Approach3 - Step 1: Develop & backtest models** + +Here, we develop & backtest the model. The setup is optimized for rapid iterations, by being independent of Barge and Predictoor bots. + +In work console: +```console +#this dir will hold data fetched from exchanges +mkdir csvs + +#run approach3 unit tests +pytest pdr_backend/predictoor/approach3 + +#run approach3 backtest. +# (Edit the parameters in runtrade.py as desired) +python pdr_backend/predictoor/approach3/runtrade.py +``` + +`runtrade.py` will grab data from exchanges, then simulate one epoch at a time (including building a model, predicting, and trading). When done, it plots accumulated returns vs. time. Besides logging to stdout, it also logs to out*.txt in pwd. + +**Approach3 - Step 2: Run bot in Predictoor context** + +Once you're satisfied with your backtests, you're ready to run the approach3 bot in a Predictoor context. + +First, get Barge & other bots going via "Run local network" instructions above. + +Then, in work console: +```console +# run approach3 predictoor bot +python pdr_backend/predictoor/main.py 3 +``` + +Observe all bots in action: +- In the barge console: trueval bot submitting (mock random) truevals, trader is (mock) trading, etc +- In your work console: predictoor bot is submitting (mock random) predictions +- Query predictoor subgraph for detailed run info. [`subgraph.md`](subgraph.md) has details. + +Code structure: +- It runs [`predictoor_agent3.py::PredictoorAgent3`](../pdr_backend/predictoor/approach3/predictoor_agent3.py) found in `pdr_backend/predictoor/approach3` +- It's configured by envvars and [`predictoor_config3.py::PredictoorConfig3`](../pdr_backend/predictoor/approach3/predictoor_config3.py) +- It predicts according to `PredictoorAgent3:get_prediction()`. + +Once you're familiar with this, you can fork it and run your own. + ## Next step You're now running a local predictoor bot on a local network. Congrats! diff --git a/mypy.ini b/mypy.ini index 521977576..dd4ed85f5 100644 --- a/mypy.ini +++ b/mypy.ini @@ -56,5 +56,8 @@ ignore_missing_imports = True [mypy-web3.*] ignore_missing_imports = True +[mypy-sklearn.*] +ignore_missing_imports = True + [mypy-pdr_backend.predictoor.examples.*] ignore_missing_imports = True diff --git a/pdr_backend/predictoor/approach3/__init__.py b/pdr_backend/predictoor/approach3/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pdr_backend/predictoor/approach3/constants.py b/pdr_backend/predictoor/approach3/constants.py new file mode 100644 index 000000000..ae5d7f5f3 --- /dev/null +++ b/pdr_backend/predictoor/approach3/constants.py @@ -0,0 +1,9 @@ +import numpy as np + +OHLCV_COLS = ["open", "high", "low", "close", "volume"] +OHLCV_DTYPES = [np.float64] * len(OHLCV_COLS) + +TOHLCV_COLS = ["timestamp"] + OHLCV_COLS +TOHLCV_DTYPES = [np.int64] + OHLCV_DTYPES + +MS_PER_EPOCH = 300000 # 300,000 ms in 5 min diff --git a/pdr_backend/predictoor/approach3/data_factory.py b/pdr_backend/predictoor/approach3/data_factory.py new file mode 100644 index 000000000..86d7d2e61 --- /dev/null +++ b/pdr_backend/predictoor/approach3/data_factory.py @@ -0,0 +1,273 @@ +import os +import sys +from typing import Dict + +from enforce_typing import enforce_types +import numpy as np +import pandas as pd + +from pdr_backend.predictoor.approach3.tradeutil import pairstr + +from pdr_backend.predictoor.approach3.constants import ( + OHLCV_COLS, + TOHLCV_COLS, + MS_PER_EPOCH, +) +from pdr_backend.predictoor.approach3.data_ss import DataSS +from pdr_backend.predictoor.approach3.pdutil import ( + initialize_df, + concat_next_df, + save_csv, + load_csv, + has_data, + oldest_ut, + newest_ut, +) +from pdr_backend.predictoor.approach3.timeutil import ( + pretty_timestr, + current_ut, +) + + +@enforce_types +class DataFactory: + def __init__(self, ss: DataSS): + self.ss = ss + + def get_hist_df(self) -> pd.DataFrame: + """ + @return + hist_df -- df w/ cols={exchange_id}:{coin}:{signal}+"datetime", + and index=timestamp + """ + self._update_csvs() + csv_dfs = self._load_csvs() + hist_df = self._merge_csv_dfs(csv_dfs) + return hist_df + + def _update_csvs(self): + print("Update csvs.") + print(f"-User-specified start: {pretty_timestr(self.ss.st_timestamp)}") + print(f"-User-specified fin: {pretty_timestr(self.ss.fin_timestamp)}") + + for exchange_id in self.ss.exchange_ids: + for coin in self.ss.coins: + pair = pairstr(coin, usdcoin=self.ss.usdcoin) + self._update_hist_csv_at_exch_and_pair(exchange_id, pair) + + def _update_hist_csv_at_exch_and_pair(self, exchange_id, pair): + print(f"Update csv at exchange={exchange_id}, pair={pair}") + + filename = self._hist_csv_filename(exchange_id, pair) + print(f" filename={filename}") + + st_ut = self._calc_start_ut_maybe_delete(filename) + print(f" Aim to fetch data from start time: {pretty_timestr(st_ut)}") + if st_ut > min(current_ut(), self.ss.fin_timestamp): + print(" Given start time, no data to gather. Exit.") + return + + # Fill in df + df = initialize_df(OHLCV_COLS) + while True: + print(f" Fetch 1000 pts from {pretty_timestr(st_ut)}") + + exch = self.ss.exchs_dict[exchange_id] + + # C is [sample x signal(TOHLCV)]. Row 0 is oldest + # TOHLCV = unixTime (in ms), Open, High, Low, Close, Volume + raw_tohlcv_data = exch.fetch_ohlcv( + symbol=pair, + timeframe=self.ss.timeframe, + since=st_ut, + limit=1000, + ) + uts = [vec[0] for vec in raw_tohlcv_data] + if len(uts) > 1: + diffs = np.array(uts[1:]) - np.array(uts[:-1]) + mx, mn = max(diffs), min(diffs) + diffs_ok = mn == mx == MS_PER_EPOCH + if not diffs_ok: + print(f"**WARNING: diffs not ok: mn={mn}, mx={mx}**") + # assert mx == mn == MS_PER_EPOCH + raw_tohlcv_data = [ + vec for vec in raw_tohlcv_data if vec[0] <= self.ss.fin_timestamp + ] + next_df = pd.DataFrame(raw_tohlcv_data, columns=TOHLCV_COLS) + df = concat_next_df(df, next_df) + + if len(raw_tohlcv_data) < 1000: # no more data, we're at newest time + break + + # prep next iteration + newest_ut_value = int(df.index.values[-1]) + # prev_st_ut = st_ut + st_ut = newest_ut_value + MS_PER_EPOCH + + # output to csv + save_csv(filename, df) + + def _calc_start_ut_maybe_delete(self, filename: str) -> int: + """Calculate start timestamp, reconciling whether file exists and where + its data starts. Will delete file if it's inconvenient to re-use""" + if not os.path.exists(filename): + print(" No file exists yet, so will fetch all data") + return self.ss.st_timestamp + + print(" File already exists") + if not has_data(filename): + print(" File has no data, so delete it") + os.remove(filename) + return self.ss.st_timestamp + + file_ut0, file_utN = oldest_ut(filename), newest_ut(filename) + print(f" File starts at: {pretty_timestr(file_ut0)}") + print(f" File finishes at: {pretty_timestr(file_utN)}") + + if self.ss.st_timestamp >= file_ut0: + print(" User-specified start >= file start, so append file") + return file_utN + MS_PER_EPOCH + + print(" User-specified start < file start, so delete file") + os.remove(filename) + return self.ss.st_timestamp + + def _load_csvs(self) -> Dict[str, Dict[str, pd.DataFrame]]: + """ + @return -- csv_dfs -- dict of [exchange_id_str][coin_str] : df + df has columns=OHLCV_COLS+"datetime", and index=timestamp + """ + cols = self.ss.signals # subset of TOHLCV_COLS + st, fin = self.ss.st_timestamp, self.ss.fin_timestamp + + csv_dfs: Dict[str, Dict[str, pd.DataFrame]] = {} + for exchange_id in self.ss.exchange_ids: + exch = self.ss.exchs_dict[exchange_id] + csv_dfs[exchange_id] = {} + for coin in self.ss.coins: + pair = pairstr(coin, usdcoin=self.ss.usdcoin) + print(f"Load csv from exchange={exch}, pair={pair}") + filename = self._hist_csv_filename(exchange_id, pair) + csv_df = load_csv(filename, cols, st, fin) + assert "datetime" in csv_df.columns + assert csv_df.index.name == "timestamp" + csv_dfs[exchange_id][coin] = csv_df + return csv_dfs + + def _merge_csv_dfs(self, csv_dfs: dict) -> pd.DataFrame: + """ + @arguments + csv_dfs -- dict [exchange_id][coin] : df + where df has cols={signal}+"datetime", and index=timestamp + @return + hist_df -- df w/ cols={exchange_id}:{coin}:{signal}+"datetime", + and index=timestamp + """ + hist_df = pd.DataFrame() + for exchange_id in csv_dfs.keys(): + for coin, csv_df in csv_dfs[exchange_id].items(): + assert "datetime" in csv_df.columns + assert csv_df.index.name == "timestamp" + + for csv_col in csv_df.columns: + if csv_col == "datetime": + if "datetime" in hist_df.columns: + continue + hist_col = csv_col + else: + signal = csv_col # eg "close" + hist_col = f"{exchange_id}:{coin}:{signal}" + hist_df[hist_col] = csv_df[csv_col] + + assert "datetime" in hist_df.columns + assert hist_df.index.name == "timestamp" + return hist_df + + def create_xy(self, hist_df: pd.DataFrame, testshift: int): + """ + @arguments + hist_df -- df w/ cols={exchange_id}:{coin}:{signal}+"datetime", + and index=timestamp + testshift -- to simulate across historical test data + + @return -- + X -- 2d array of [sample_i, var_i] : value + y -- 1d array of [sample_i] + var_with_prev -- int + x_df -- df w/ cols={exchange_id}:{coin}:{signal}:t-{x} + "datetime" + index=0,1,.. (nothing special) + """ + ss = self.ss + x_df = pd.DataFrame() + + target_hist_cols = [ + f"{exchange_id}:{coin}:{signal}" + for exchange_id in ss.exchange_ids + for coin in ss.coins + for signal in ss.signals + ] + + for hist_col in target_hist_cols: + assert hist_col in hist_df.columns, "missing a data col" + z = hist_df[hist_col].tolist() # [..., z(t-3), z(t-2), z(t-1)] + maxshift = testshift + ss.Nt + N_train = min(ss.max_N_train, len(z) - maxshift - 1) + if N_train <= 0: + print( + f"Too little data. len(z)={len(z)}, maxshift={maxshift}" + f" (= testshift + Nt = {testshift} + {ss.Nt})" + ) + print("To fix: broaden time, shrink testshift, or shrink Nt") + sys.exit(1) + for delayshift in range(ss.Nt, 0, -1): # eg [2, 1, 0] + shift = testshift + delayshift + x_col = hist_col + f":t-{delayshift+1}" + assert (shift + N_train + 1) <= len(z) + # 1 point for test, the rest for train data + x_df[x_col] = _slice(z, -shift - N_train - 1, -shift) + + X = x_df.to_numpy() + + x_cols = x_df.columns.tolist() + x_col_with_prev = ( + f"{ss.yval_exchange_id}:{ss.yval_coin}:" f"{ss.yval_signal}:t-2" + ) + var_with_prev = x_cols.index(x_col_with_prev) + + # y is set from yval_{exchange_id, coin, signal} + # eg y = [BinEthC_-1, BinEthC_-2, ..., BinEthC_-450, BinEthC_-451] + hist_col = f"{ss.yval_exchange_id}:{ss.yval_coin}:{ss.yval_signal}" + z = hist_df[hist_col].tolist() + y = np.array(_slice(z, -testshift - N_train - 1, -testshift)) + + # postconditions + assert X.shape[0] == y.shape[0] + assert X.shape[0] <= (ss.max_N_train + 1) + assert X.shape[1] == ss.n + + # return + return X, y, var_with_prev, x_df + + def _hist_csv_filename(self, exchange_id, pair) -> str: + """Given exchange_id and pair (and self path), compute csv filename""" + basename = ( + exchange_id + + "_" + + pair.replace("/", "-") + + "_" + + self.ss.timeframe + + ".csv" + ) + filename = os.path.join(self.ss.csv_dir, basename) + return filename + + +def _slice(x: list, st: int, fin: int) -> list: + """Python list slice returns an empty list on x[st:fin] if st<0 and fin=0 + This overcomes that issue, for cases when st<0""" + assert st < 0 + assert fin <= 0 + assert st < fin + if fin == 0: + return x[st:] + return x[st:fin] diff --git a/pdr_backend/predictoor/approach3/data_ss.py b/pdr_backend/predictoor/approach3/data_ss.py new file mode 100644 index 000000000..b85bae4ee --- /dev/null +++ b/pdr_backend/predictoor/approach3/data_ss.py @@ -0,0 +1,124 @@ +import os +from typing import List + +import ccxt +from enforce_typing import enforce_types +import numpy as np + +from pdr_backend.predictoor.approach3.timeutil import pretty_timestr + +CAND_USDCOINS = ["USDT", "DAI", "USDC"] # add more if needed +CAND_TIMEFRAMES = ["1m", "5m", "15m", "30m", "1h", "1d", "1w", "1M"] +CAND_SIGNALS = ["open", "high", "low", "close", "volume"] + + +class DataSS: + # pylint: disable=too-many-instance-attributes + @enforce_types + def __init__( + self, + csv_dir: str, # abs or relative location of csvs directory + st_timestamp: int, # ut, eg via timestr_to_ut(timestr) + fin_timestamp: int, # "" + max_N_train, # if inf, only limited by data available + N_test: int, # eg 100. num pts to test on, 1 at a time (online) + Nt: int, # eg 10. # model inputs Nt past pts z[t-1], .., z[t-Nt] + usdcoin: str, # e.g. "USDT", for pairs of eg ETH-USDT, BTC-USDT + timeframe: str, # "1m", 5m, 15m, 30m, 1h, 1d, 1w, 1M + signals: List[str], # eg ["open","high","low","close","volume"] + coins: List[str], # eg ["ETH", "BTC"] + exchange_ids: List[str], # eg ["binance", "kraken"] + yval_exchange_id: str, # eg "binance", + yval_coin: str, # eg "ETH" + yval_signal: str, # eg "c" for closing price + ): + if not os.path.exists(csv_dir): + print(f"Could not find csv dir, creating one at: {csv_dir}") + os.makedirs(csv_dir) + assert 0 <= st_timestamp <= fin_timestamp <= np.inf + assert 0 < max_N_train + assert 0 < N_test < np.inf + assert 0 < Nt < np.inf + assert usdcoin in CAND_USDCOINS + assert timeframe in CAND_TIMEFRAMES + unknown_signals = set(signals) - set(CAND_SIGNALS) + assert not unknown_signals, unknown_signals + assert yval_signal in CAND_SIGNALS, yval_signal + + self.csv_dir = csv_dir + self.st_timestamp = st_timestamp + self.fin_timestamp = fin_timestamp + + self.max_N_train = max_N_train + self.N_test = N_test + self.Nt = Nt + + self.usdcoin = usdcoin + self.timeframe = timeframe + self.signals = signals + self.coins = coins + + self.exchs_dict = {} # e.g. {"binance" : ccxt.binance()} + for exchange_id in exchange_ids: + exchange_class = getattr(ccxt, exchange_id) + self.exchs_dict[exchange_id] = exchange_class() + + self.yval_exchange_id = yval_exchange_id + self.yval_coin = yval_coin + self.yval_signal = yval_signal + + @property + def n(self) -> int: + """Number of input dimensions == # columns in X""" + return self.n_exchs * self.n_coins * self.n_signals * self.Nt + + @property + def n_exchs(self) -> int: + return len(self.exchs_dict) + + @property + def exchange_ids(self) -> List[str]: + return sorted(self.exchs_dict.keys()) + + @property + def n_signals(self) -> int: + return len(self.signals) + + @property + def n_coins(self) -> int: + return len(self.coins) + + def __str__(self) -> str: + s = "DataSS={\n" + + s += f" csv_dir={self.csv_dir}\n" + s += f" st_timestamp={pretty_timestr(self.st_timestamp)}\n" + s += f" fin_timestamp={pretty_timestr(self.fin_timestamp)}\n" + s += " \n" + + s += f" max_N_train={self.max_N_train} -- max # pts to train on\n" + s += f" N_test={self.N_test} -- # pts to test on, 1 at a time\n" + s += f" Nt={self.Nt} -- model inputs Nt past pts z[t-1], .., z[t-Nt]\n" + s += " \n" + + s += f" usdcoin={self.usdcoin}\n" + s += f" timeframe={self.timeframe}\n" + s += f" signals={self.signals}\n" + s += f" coins={self.coins}\n" + s += " \n" + + s += f" exchs_dict={self.exchs_dict}\n" + s += f" yval_exchange_id={self.yval_exchange_id}\n" + s += f" yval_coin={self.yval_coin}\n" + s += f" yval_signal={self.yval_signal}\n" + s += " \n" + + s += " (then...)\n" + s += f" n_exchs={self.n_exchs}\n" + s += f" exchange_ids={self.exchange_ids}\n" + s += f" n_signals={self.n_signals}\n" + s += f" n_coins={self.n_coins}\n" + s += f" n={self.n} -- # input variables to model\n" + + s += "/DataSS}\n" + return s diff --git a/pdr_backend/predictoor/approach3/model_factory.py b/pdr_backend/predictoor/approach3/model_factory.py new file mode 100644 index 000000000..a15bfc583 --- /dev/null +++ b/pdr_backend/predictoor/approach3/model_factory.py @@ -0,0 +1,39 @@ +from enforce_typing import enforce_types +from sklearn import linear_model, svm +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import RBF + +from pdr_backend.predictoor.approach3 import prev_model +from pdr_backend.predictoor.approach3.model_ss import ModelSS + + +@enforce_types +class ModelFactory: + def __init__(self, model_ss: ModelSS): + self.model_ss = model_ss + + def build(self, X_train, y_train): + # print(f"Build model: start") + model = self._model() + model.fit(X_train, y_train) + # print("Build model: done") + return model + + def _model(self): + a = self.model_ss.model_approach + # print(f"model_approach={a}") + if a == "PREV": + return prev_model.PrevModel(self.model_ss.var_with_prev) + if a == "LIN": + return linear_model.LinearRegression() + if a == "GPR": + kernel = 1.0 * RBF(length_scale=1e1, length_scale_bounds=(1e-2, 1e3)) + return GaussianProcessRegressor(kernel=kernel, alpha=0.0) + if a == "SVR": + return svm.SVR() + if a == "NuSVR": + return svm.NuSVR() + if a == "LinearSVR": + return svm.LinearSVR() + + raise ValueError(a) diff --git a/pdr_backend/predictoor/approach3/model_ss.py b/pdr_backend/predictoor/approach3/model_ss.py new file mode 100644 index 000000000..af2b84a9a --- /dev/null +++ b/pdr_backend/predictoor/approach3/model_ss.py @@ -0,0 +1,18 @@ +from enforce_typing import enforce_types + + +@enforce_types +class ModelSS: + def __init__( + self, + model_approach: str, # PREV,LIN,GPR,SVR,NuSVR,LinearSVR + ): + self.model_approach = model_approach + self.var_with_prev = None + + def __str__(self) -> str: + s = "ModelSS={\n" + s += f" model_approach={self.model_approach}\n" + s += f" var_with_prev={self.var_with_prev}\n" + s += "/ModelSS}\n" + return s diff --git a/pdr_backend/predictoor/approach3/pdutil.py b/pdr_backend/predictoor/approach3/pdutil.py new file mode 100644 index 000000000..49bccf567 --- /dev/null +++ b/pdr_backend/predictoor/approach3/pdutil.py @@ -0,0 +1,187 @@ +""" +pdutil: pandas dataframe & cvs utilities. +These utilities are specific to the time-series dataframe columns we're using. +""" +import os +from typing import List + +from enforce_typing import enforce_types +import numpy as np +import pandas as pd + +from pdr_backend.predictoor.approach3.constants import ( + OHLCV_COLS, + TOHLCV_COLS, + TOHLCV_DTYPES, +) + + +@enforce_types +def initialize_df(cols: List[str]) -> pd.DataFrame: + """Start a new df, with the expected columns, index, and dtypes + It's ok whether cols has "timestamp" or not. Same for "datetime". + The return df has "timestamp" for index and "datetime" as col + """ + dtypes = { + col: pd.Series(dtype=dtype) + for col, dtype in zip(TOHLCV_COLS, TOHLCV_DTYPES) + if col in cols or col == "timestamp" + } + df = pd.DataFrame(dtypes) + df = df.set_index("timestamp") + # pylint: disable=unsupported-assignment-operation + df["datetime"] = pd.to_datetime(df.index.values, unit="ms", utc=True) + + return df + + +@enforce_types +def concat_next_df(df: pd.DataFrame, next_df: pd.DataFrame) -> pd.DataFrame: + """Add a next df to existing df, with the expected columns etc. + The existing df *should* have the 'datetime' col, and next_df should *not*. + """ + assert "datetime" in df.columns + assert "datetime" not in next_df.columns + next_df = next_df.set_index("timestamp") + next_df["datetime"] = pd.to_datetime(next_df.index.values, unit="ms", utc=True) + df = pd.concat([df, next_df]) + return df + + +@enforce_types +def save_csv(filename: str, df: pd.DataFrame): + """Append to csv file if it exists, otherwise create new one. + With header=True and index=True, it will set the index_col too + """ + # preconditions + assert df.columns.tolist() == OHLCV_COLS + ["datetime"] + + # csv column order: timestamp (index), datetime, O, H, L, C, V + columns = ["datetime"] + OHLCV_COLS + + if os.path.exists(filename): # append existing file + df.to_csv(filename, mode="a", header=False, index=True, columns=columns) + print(f" Just appended {df.shape[0]} df rows to file {filename}") + else: # write new file + df.to_csv(filename, mode="w", header=True, index=True, columns=columns) + print(f" Just saved df with {df.shape[0]} rows to new file {filename}") + + +@enforce_types +def load_csv(filename: str, cols=None, st=None, fin=None) -> pd.DataFrame: + """Load csv file as a dataframe. + + Features: + - Ensure that all dtypes are correct + - Filter to just the input columns + - Filter to just the specified start & end times + - Memory stays reasonable + + @arguments + cols -- what columns to use, eg ["open","high"]. Set to None for all cols. + st -- starting timestamp, in ut. Set to 0 or None for very beginning + fin -- ending timestamp, in ut. Set to inf or None for very end + + @return + df -- dataframe + + @notes + Don't specify "timestamp" as a column because it's the df *index* + Don't specify "datetime" as a column, as that'll get calc'd from timestamp + """ + if cols is None: + cols = OHLCV_COLS + assert "timestamp" not in cols + assert "datetime" not in cols + cols = ["timestamp"] + cols + + # set skiprows, nrows + if st in [0, None] and fin in [np.inf, None]: + skiprows, nrows = None, None + else: + df0 = pd.read_csv(filename, usecols=["timestamp"]) + timestamps = df0["timestamp"].tolist() + skiprows = [ + i + 1 for i, timestamp in enumerate(timestamps) if timestamp < st + ] # "+1" to account for header + if skiprows == []: + skiprows = None + nrows = sum( + 1 for row, timestamp in enumerate(timestamps) if st <= timestamp <= fin + ) + + # set dtypes + cand_dtypes = dict(zip(TOHLCV_COLS, TOHLCV_DTYPES)) + dtypes = {col: cand_dtypes[col] for col in cols} + + # load + df = pd.read_csv( + filename, + dtype=dtypes, + usecols=cols, + skiprows=skiprows, + nrows=nrows, + ) + + # add in datetime column + df0 = initialize_df(cols) + df = concat_next_df(df0, df) + + # postconditions, return + assert "timestamp" not in df.columns + assert df.index.name == "timestamp" and df.index.dtype == np.int64 + assert "datetime" in df.columns + return df + + +@enforce_types +def has_data(filename: str) -> bool: + """Returns True if the file has >0 data entries""" + with open(filename) as f: + for i, _ in enumerate(f): + if i >= 1: + return True + return False + + +@enforce_types +def oldest_ut(filename: str) -> int: + """ + Return the timestamp for the oldest entry in the file. + Assumes the oldest entry is the second line in the file. + (First line is header) + """ + line = _get_second_line(filename) + ut = int(line.split(",")[0]) + return ut + + +@enforce_types +def _get_second_line(filename) -> str: + """Returns the last line in a file, as a string""" + with open(filename) as f: + for i, line in enumerate(f): + if i == 1: + return line + raise ValueError(f"File {filename} has no entries") + + +@enforce_types +def newest_ut(filename: str) -> int: + """ + Return the timestamp for the youngest entry in the file. + Assumes the youngest entry is the very last line in the file. + """ + line = _get_last_line(filename) + ut = int(line.split(",")[0]) + return ut + + +@enforce_types +def _get_last_line(filename: str) -> str: + """Returns the last line in a file, as a string""" + line = None + with open(filename) as f: + for line in f: + pass + return line if line is not None else "" diff --git a/pdr_backend/predictoor/approach3/plotutil.py b/pdr_backend/predictoor/approach3/plotutil.py new file mode 100644 index 000000000..6293366d3 --- /dev/null +++ b/pdr_backend/predictoor/approach3/plotutil.py @@ -0,0 +1,83 @@ +import matplotlib +import matplotlib.pyplot as plt +import numpy as np + +HEIGHT = 8 +WIDTH = HEIGHT * 2 + + +def plot_vals_vs_time1(y, yhat, title=""): + matplotlib.rcParams.update({"font.size": 22}) + assert len(y) == len(yhat) + N = len(y) + x = list(range(0, N)) + fig, ax = plt.subplots() + ax.set_title(title) + ax.plot(x, y, "g-", label="y") + ax.plot(x, yhat, "b-", label="yhat") + ax.legend(loc="lower right") + plt.ylabel("y") + plt.xlabel("time") + fig.set_size_inches(WIDTH, HEIGHT) + plt.show() + + +def plot_vals_vs_time2(y_train, y_trainhat, y_test, y_testhat, title=""): + matplotlib.rcParams.update({"font.size": 22}) + assert len(y_train) == len(y_trainhat) + assert len(y_test) == len(y_testhat) + N_train, N_test = len(y_train), len(y_test) + N = N_train + N_test + x = np.array(list(range(0, N))) + x_train, x_test = x[:N_train], x[N_train:] + fig, ax = plt.subplots() + ax.set_title(title) + ax.plot( + list(x_train) + [x_test[0]], + list(y_train) + [y_test[0]], + "g-o", + linewidth=3, + label="y_train", + ) + ax.plot( + list(x_train) + [x_test[0]], + list(y_trainhat) + [y_testhat[0]], + "b--o", + linewidth=2, + label="y_trainhat", + ) + ax.plot(x_test, y_test, "g-o", linewidth=3, label="y_test") + ax.plot(x_test, y_testhat, "b--o", linewidth=2, label="y_testhat") + ax.plot([N_train - 0.5, N_train - 0.5], ax.get_ylim(), "r--", label="train/test") + ax.legend(loc="lower right") + plt.ylabel("y") + plt.xlabel("time") + fig.set_size_inches(WIDTH, HEIGHT) + plt.show() + + +def scatter_pred_vs_actual(y, yhat, title=""): + matplotlib.rcParams.update({"font.size": 22}) + assert len(y) == len(yhat) + fig, ax = plt.subplots() + ax.set_title(title) + ax.plot(y, yhat, "bo", markersize=15.0, label="Actual") + ax.plot(ax.get_xlim(), ax.get_ylim(), "g--", label="Ideal") + ax.legend(loc="lower right") + plt.ylabel("yhat") + plt.xlabel("y") + fig.set_size_inches(WIDTH, HEIGHT) + plt.show() + + +def plot_any_vs_time(y, ylabel): + matplotlib.rcParams.update({"font.size": 22}) + N = len(y) + x = list(range(0, N)) + fig, ax = plt.subplots() + ax.set_title(ylabel + " vs time") + ax.plot(x, y, "g-") + plt.ylabel(ylabel) + plt.xlabel("time") + fig.set_size_inches(WIDTH, HEIGHT) + plt.show() diff --git a/pdr_backend/predictoor/approach3/predictoor_agent3.py b/pdr_backend/predictoor/approach3/predictoor_agent3.py new file mode 100644 index 000000000..f84334c9f --- /dev/null +++ b/pdr_backend/predictoor/approach3/predictoor_agent3.py @@ -0,0 +1,103 @@ +import os +from typing import Tuple + +from enforce_typing import enforce_types +from pdr_backend.predictoor.approach3.data_factory import DataFactory +from pdr_backend.predictoor.approach3.model_factory import ModelFactory +from pdr_backend.predictoor.approach3.model_ss import ModelSS + +from pdr_backend.predictoor.base_predictoor_agent import BasePredictoorAgent +from pdr_backend.predictoor.approach3.predictoor_config3 import PredictoorConfig3 +from pdr_backend.predictoor.approach3.timeutil import timestr_to_ut +from pdr_backend.predictoor.approach3.data_ss import DataSS + + +@enforce_types +class PredictoorAgent3(BasePredictoorAgent): + predictoor_config_class = PredictoorConfig3 + + def __init__(self, config: PredictoorConfig3): + super().__init__(config) + self.config: PredictoorConfig3 = config + + def get_prediction( + self, addr: str, timestamp: int # pylint: disable=unused-argument + ) -> Tuple[bool, float]: + """ + @description + Given a feed, let's predict for a given timestamp. + + @arguments + addr -- str -- address of the trading pair. Info in self.feeds[addr] + timestamp -- int -- when to make prediction for (unix time) + + @return + predval -- bool -- if True, it's predicting 'up'. If False, 'down' + stake -- int -- amount to stake, in units of Eth + """ + # Set model_ss + model_ss = ModelSS( + self.config.model_ss + ) # PREV, LIN, GPR, SVR, NuSVR, LinearSVR + + # Controllable data_ss params. Hardcoded; could be moved to envvars + + coins = ["ETH", "BTC"] + signals = ["close"] # ["open", "high","low", "close", "volume"] + exchange_ids = ["binanceus"] # ["binance", "kraken"] + + # Uncontrollable data_ss params + feed = self.feeds[addr] + timeframe = feed.timeframe # eg 5m, 1h + yval_coin = feed.base # eg ETH + usdcoin = feed.quote # eg USDT + yval_exchange_id = feed.source + yval_signal = "close" + + if yval_coin not in coins: # eg DOT + coins.append(yval_coin) + if yval_exchange_id not in exchange_ids: + exchange_ids.append(yval_exchange_id) + + # Set data_ss + data_ss = DataSS( + csv_dir=os.path.abspath("csvs"), + st_timestamp=self.config.st_timestamp, + fin_timestamp=timestr_to_ut("now"), + max_N_train=self.config.max_N_train, + N_test=self.config.N_test, + Nt=self.config.Nt, + usdcoin=usdcoin, + timeframe=timeframe, + signals=signals, + coins=coins, + exchange_ids=exchange_ids, + yval_exchange_id=yval_exchange_id, + yval_coin=yval_coin, + yval_signal=yval_signal, + ) + + data_factory = DataFactory(data_ss) + + # Compute X/y + hist_df = data_factory.get_hist_df() + X, y, _, _ = data_factory.create_xy(hist_df, testshift=0) + + # Split X/y + st, fin = 0, X.shape[0] - 1 + X_train, X_test = X[st:fin, :], X[fin : fin + 1] + y_train, _ = y[st:fin], y[fin : fin + 1] + + # Compute the model + model_factory = ModelFactory(model_ss) + model = model_factory.build(X_train, y_train) + + # Predict + predprice = model.predict(X_test)[0] + curprice = y_train[-1] + predval = predprice > curprice + + # Stake what was set via envvar STAKE_AMOUNT + stake = self.config.stake_amount + + return (bool(predval), stake) diff --git a/pdr_backend/predictoor/approach3/predictoor_config3.py b/pdr_backend/predictoor/approach3/predictoor_config3.py new file mode 100644 index 000000000..c3938bf18 --- /dev/null +++ b/pdr_backend/predictoor/approach3/predictoor_config3.py @@ -0,0 +1,16 @@ +from enforce_typing import enforce_types +from pdr_backend.predictoor.approach3.timeutil import timestr_to_ut + +from pdr_backend.predictoor.base_predictoor_config import BasePredictoorConfig + + +@enforce_types +class PredictoorConfig3(BasePredictoorConfig): + def __init__(self): + super().__init__() + self.max_N_train = 5000 + self.Nt = 10 # eg 10. model inputs Nt past pts z[t-1], .., z[t-Nt] + self.N_test = 10 + self.signals = ["close"] # ["open", "high","low", "close", "volume"] + self.st_timestamp = timestr_to_ut("2023-01-31") # 2019-09-13_04:00 earliest + self.model_ss = "LIN" # PREV, LIN, GPR, SVR, NuSVR, LinearSVR diff --git a/pdr_backend/predictoor/approach3/prev_model.py b/pdr_backend/predictoor/approach3/prev_model.py new file mode 100644 index 000000000..e9df92cb1 --- /dev/null +++ b/pdr_backend/predictoor/approach3/prev_model.py @@ -0,0 +1,16 @@ +from enforce_typing import enforce_types +import numpy as np + + +@enforce_types +class PrevModel: + def __init__(self, var_with_prev: int): + # which variable (= column in X) has the previous values + # i.e. if we're predicting y[t], what column has y[y-1] + self.var_with_prev = var_with_prev + + def fit(self, X_train, y_train): + pass + + def predict(self, X) -> np.ndarray: + return X[:, self.var_with_prev] diff --git a/pdr_backend/predictoor/approach3/runtrade.py b/pdr_backend/predictoor/approach3/runtrade.py new file mode 100755 index 000000000..7479a40ad --- /dev/null +++ b/pdr_backend/predictoor/approach3/runtrade.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +import os + +from pdr_backend.predictoor.approach3.data_ss import DataSS +from pdr_backend.predictoor.approach3.model_ss import ModelSS +from pdr_backend.predictoor.approach3.timeutil import timestr_to_ut +from pdr_backend.predictoor.approach3.tradeutil import TradeParams, TradeSS +from pdr_backend.predictoor.approach3.trade_engine import TradeEngine + +# Backlog is in backlog.py + +# ================================================================== +# params that I change + +data_ss = DataSS( + csv_dir=os.path.abspath("csvs"), + st_timestamp=timestr_to_ut("2022-09-13"), # 2019-09-13_04:00 earliest + fin_timestamp=timestr_to_ut("2023-06-24"), # 'now','2023-06-21_17:55' + max_N_train=5000, # 50000 # if inf, only limited by data available + N_test=200, # 50000 . num points to test on, 1 at a time (online) + Nt=10, # eg 10. model inputs Nt past pts z[t-1], .., z[t-Nt] + usdcoin="USDT", + timeframe="5m", + signals=["close"], # ["open", "high","low", "close", "volume"], + coins=["ETH", "BTC"], + exchange_ids=["binance"], + yval_exchange_id="binance", + yval_coin="BTC", + yval_signal="close", +) + +model_ss = ModelSS("LIN") # PREV, LIN, GPR, SVR, NuSVR, LinearSVR + +trade_pp = TradeParams( + fee_percent=0.0, # Eg 0.001 is 0.1%. Trading fee (simulated) + init_holdings={"USDT": 100000.0, "BTC": 0.0}, +) + +trade_ss = TradeSS( + do_plot=True, # plot at end? + logpath=os.path.abspath("./"), + buy_amt_usd=100000.00, # How much to buy at a time. In USD +) + +# ================================================================== +# print setup +print(f"data_ss={data_ss}") +print(f"model_ss={model_ss}") +print(f"trade_pp={trade_pp}") +print(f"trade_ss={trade_ss}") + +# ================================================================== +# do work +trade_engine = TradeEngine(data_ss, model_ss, trade_pp, trade_ss) + +trade_engine.run() diff --git a/pdr_backend/predictoor/approach3/test/__init__.py b/pdr_backend/predictoor/approach3/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pdr_backend/predictoor/approach3/test/conftest.py b/pdr_backend/predictoor/approach3/test/conftest.py new file mode 100644 index 000000000..fc9ed034c --- /dev/null +++ b/pdr_backend/predictoor/approach3/test/conftest.py @@ -0,0 +1 @@ +from pdr_backend.conftest_ganache import * # pylint: disable=wildcard-import diff --git a/pdr_backend/predictoor/approach3/test/test_constants.py b/pdr_backend/predictoor/approach3/test/test_constants.py new file mode 100644 index 000000000..666cfc193 --- /dev/null +++ b/pdr_backend/predictoor/approach3/test/test_constants.py @@ -0,0 +1,25 @@ +from enforce_typing import enforce_types +import numpy as np + +from pdr_backend.predictoor.approach3.constants import ( + OHLCV_COLS, + OHLCV_DTYPES, + TOHLCV_COLS, + TOHLCV_DTYPES, +) + + +@enforce_types +def test_constants(): + assert len(OHLCV_COLS) == len(OHLCV_DTYPES) + assert len(TOHLCV_COLS) == len(TOHLCV_DTYPES) == len(OHLCV_COLS) + 1 + + assert "high" in OHLCV_COLS + assert "timestamp" not in OHLCV_COLS + assert np.float64 in OHLCV_DTYPES + assert np.int64 not in OHLCV_DTYPES + + assert "high" in TOHLCV_COLS + assert "timestamp" in TOHLCV_COLS + assert np.float64 in TOHLCV_DTYPES + assert np.int64 in TOHLCV_DTYPES diff --git a/pdr_backend/predictoor/approach3/test/test_data_factory.py b/pdr_backend/predictoor/approach3/test/test_data_factory.py new file mode 100644 index 000000000..38bd97448 --- /dev/null +++ b/pdr_backend/predictoor/approach3/test/test_data_factory.py @@ -0,0 +1,359 @@ +import copy + +from enforce_typing import enforce_types +import pandas as pd + +from pdr_backend.predictoor.approach3.constants import ( + TOHLCV_COLS, + MS_PER_EPOCH, +) +from pdr_backend.predictoor.approach3.data_ss import DataSS +from pdr_backend.predictoor.approach3.data_factory import DataFactory +from pdr_backend.predictoor.approach3.pdutil import ( + initialize_df, + concat_next_df, + load_csv, +) +from pdr_backend.predictoor.approach3.timeutil import ( + current_ut, + timestr_to_ut, +) + + +# ==================================================================== +# test csv updating + + +def test_update_csv1(tmpdir): + _test_update_csv("2023-01-01_0:00", "2023-01-01_0:00", tmpdir, n_uts=1) + + +def test_update_csv2(tmpdir): + _test_update_csv("2023-01-01_0:00", "2023-01-01_0:05", tmpdir, n_uts=2) + + +def test_update_csv3(tmpdir): + _test_update_csv("2023-01-01_0:00", "2023-01-01_0:10", tmpdir, n_uts=3) + + +def test_update_csv4(tmpdir): + _test_update_csv("2023-01-01_0:00", "2023-01-01_0:45", tmpdir, n_uts=10) + + +def test_update_csv5(tmpdir): + _test_update_csv("2023-01-01", "2023-06-21", tmpdir, n_uts=">1K") + + +@enforce_types +def _test_update_csv(st_str: str, fin_str: str, tmpdir, n_uts): + """n_uts -- expected # timestamps. Typically int. If '>1K', expect >1000""" + + # setup: base data + st_ut = timestr_to_ut(st_str) + fin_ut = timestr_to_ut(fin_str) + csvdir = str(tmpdir) + + # setup: uts helpers + def _calc_ut(since: int, i: int) -> int: + return since + i * MS_PER_EPOCH + + def _uts_in_range(st_ut, fin_ut): + return [ + _calc_ut(st_ut, i) + for i in range(100000) # assume <=100K epochs + if _calc_ut(st_ut, i) <= fin_ut + ] + + def _uts_from_since(cur_ut, since, limit): + return [ + _calc_ut(since, i) for i in range(limit) if _calc_ut(since, i) <= cur_ut + ] + + # setup: exchange + class FakeExchange: + def __init__(self): + self.cur_ut = current_ut() # fixed value, for easier testing + + # pylint: disable=unused-argument + def fetch_ohlcv(self, since, limit, *args, **kwargs) -> list: + uts = _uts_from_since(self.cur_ut, since, limit) + return [[ut] + [1.0] * 5 for ut in uts] # 1.0 for open, high, .. + + exchange = FakeExchange() + + # setup: ss + ss = DataSS( + csv_dir=csvdir, + st_timestamp=st_ut, + fin_timestamp=fin_ut, + max_N_train=7, + Nt=3, + N_test=2, + usdcoin="USDT", + timeframe="5m", + signals=["high"], + coins=["ETH"], + exchange_ids=["binanceus"], + yval_exchange_id="binanceus", + yval_coin="ETH", + yval_signal="high", + ) + ss.exchs_dict["binanceus"] = exchange + + # setup: data_factory, filename + data_factory = DataFactory(ss) + filename = data_factory._hist_csv_filename("binanceus", "ETH/USDT") + + def _uts_in_csv(filename: str) -> list: + df = load_csv(filename) + return df.index.values.tolist() + + # work 1: new csv + data_factory._update_hist_csv_at_exch_and_pair("binanceus", "ETH/USDT") + uts = _uts_in_csv(filename) + if isinstance(n_uts, int): + assert len(uts) == n_uts + elif n_uts == ">1K": + assert len(uts) > 1000 + assert sorted(uts) == uts and uts[0] == st_ut and uts[-1] == fin_ut + assert uts == _uts_in_range(st_ut, fin_ut) + + # work 2: two more epochs at end --> it'll append existing csv + ss.fin_timestamp = fin_ut + 2 * MS_PER_EPOCH + data_factory._update_hist_csv_at_exch_and_pair("binanceus", "ETH/USDT") + uts2 = _uts_in_csv(filename) + assert uts2 == _uts_in_range(st_ut, fin_ut + 2 * MS_PER_EPOCH) + + # work 3: two more epochs at beginning *and* end --> it'll create new csv + ss.st_timestamp = st_ut - 2 * MS_PER_EPOCH + ss.fin_timestamp = fin_ut + 4 * MS_PER_EPOCH + data_factory._update_hist_csv_at_exch_and_pair("binanceus", "ETH/USDT") + uts3 = _uts_in_csv(filename) + assert uts3 == _uts_in_range(st_ut - 2 * MS_PER_EPOCH, fin_ut + 4 * MS_PER_EPOCH) + + +# ====================================================================== +# end-to-end tests + +BINANCE_ETH_DATA = [ + # time #open #high #low #close #volume + [1686805500000, 0.5, 12, 0.12, 1.1, 7.0], + [1686805800000, 0.5, 11, 0.11, 2.2, 7.0], + [1686806100000, 0.5, 10, 0.10, 3.3, 7.0], + [1686806400000, 1.1, 9, 0.09, 4.4, 1.4], + [1686806700000, 3.5, 8, 0.08, 5.5, 2.8], + [1686807000000, 4.7, 7, 0.07, 6.6, 8.1], + [1686807300000, 4.5, 6, 0.06, 7.7, 8.1], + [1686807600000, 0.6, 5, 0.05, 8.8, 8.1], + [1686807900000, 0.9, 4, 0.04, 9.9, 8.1], + [1686808200000, 2.7, 3, 0.03, 10.10, 8.1], + [1686808500000, 0.7, 2, 0.02, 11.11, 8.1], + [1686808800000, 0.7, 1, 0.01, 12.12, 8.3], +] + + +def _addval(DATA: list, val: float) -> list: + DATA2 = copy.deepcopy(DATA) + for row_i, row in enumerate(DATA2): + for col_j, _ in enumerate(row): + if col_j == 0: + continue + DATA2[row_i][col_j] += val + return DATA2 + + +BINANCE_BTC_DATA = _addval(BINANCE_ETH_DATA, 10000.0) +KRAKEN_ETH_DATA = _addval(BINANCE_ETH_DATA, 0.0001) +KRAKEN_BTC_DATA = _addval(BINANCE_ETH_DATA, 10000.0 + 0.0001) + + +@enforce_types +def test_create_xy__1exchange_1coin_1signal(tmpdir): + csvdir = str(tmpdir) + + csv_dfs = {"kraken": {"ETH": _df_from_raw_data(BINANCE_ETH_DATA)}} + + ss = DataSS( + csv_dir=csvdir, + st_timestamp=timestr_to_ut("2023-06-18"), + fin_timestamp=timestr_to_ut("2023-06-21"), + max_N_train=7, + Nt=3, + N_test=2, + usdcoin="USDT", + timeframe="5m", + signals=["high"], + coins=["ETH"], + exchange_ids=["kraken"], + yval_exchange_id="kraken", + yval_coin="ETH", + yval_signal="high", + ) + + assert ss.n == 1 * 1 * 1 * 3 # n_exchs * n_coins * n_signals * Nt + + data_factory = DataFactory(ss) + hist_df = data_factory._merge_csv_dfs(csv_dfs) + X, y, var_with_prev, x_df = data_factory.create_xy(hist_df, testshift=0) + _assert_shapes(ss, X, y, x_df) + + assert X[-1, :].tolist() == [4, 3, 2] and y[-1] == 1 + assert X[-2, :].tolist() == [5, 4, 3] and y[-2] == 2 + assert X[0, :].tolist() == [11, 10, 9] and y[0] == 8 + + assert x_df.iloc[-1].tolist() == [4, 3, 2] + + found_cols = x_df.columns.tolist() + target_cols = [ + "kraken:ETH:high:t-4", + "kraken:ETH:high:t-3", + "kraken:ETH:high:t-2", + ] + assert found_cols == target_cols + + assert found_cols.index("kraken:ETH:high:t-2") == var_with_prev + assert x_df["kraken:ETH:high:t-2"].tolist() == [9, 8, 7, 6, 5, 4, 3, 2] + assert X[:, 2].tolist() == [9, 8, 7, 6, 5, 4, 3, 2] + + # =========== now have a different testshift (1 not 0) + X, y, var_with_prev, x_df = data_factory.create_xy(hist_df, testshift=1) + _assert_shapes(ss, X, y, x_df) + + assert X[-1, :].tolist() == [5, 4, 3] and y[-1] == 2 + assert X[-2, :].tolist() == [6, 5, 4] and y[-2] == 3 + assert X[0, :].tolist() == [12, 11, 10] and y[0] == 9 + + assert x_df.iloc[-1].tolist() == [5, 4, 3] + + found_cols = x_df.columns.tolist() + target_cols = [ + "kraken:ETH:high:t-4", + "kraken:ETH:high:t-3", + "kraken:ETH:high:t-2", + ] + assert found_cols == target_cols + + assert found_cols.index("kraken:ETH:high:t-2") == var_with_prev + assert x_df["kraken:ETH:high:t-2"].tolist() == [10, 9, 8, 7, 6, 5, 4, 3] + assert X[:, 2].tolist() == [10, 9, 8, 7, 6, 5, 4, 3] + + # =========== now have a different max_N_train + ss.max_N_train = 5 + # ss.Nt = 2 + + X, y, var_with_prev, x_df = data_factory.create_xy(hist_df, testshift=0) + _assert_shapes(ss, X, y, x_df) + + assert X.shape[0] == 5 + 1 # +1 for one test point + assert y.shape[0] == 5 + 1 + assert len(x_df) == 5 + 1 + + assert X[-1, :].tolist() == [4, 3, 2] and y[-1] == 1 + assert X[-2, :].tolist() == [5, 4, 3] and y[-2] == 2 + assert X[0, :].tolist() == [9, 8, 7] and y[0] == 6 + + +@enforce_types +def test_create_xy__2exchanges_2coins_2signals(tmpdir): + csvdir = str(tmpdir) + + csv_dfs = { + "binanceus": { + "BTC": _df_from_raw_data(BINANCE_BTC_DATA), + "ETH": _df_from_raw_data(BINANCE_ETH_DATA), + }, + "kraken": { + "BTC": _df_from_raw_data(KRAKEN_BTC_DATA), + "ETH": _df_from_raw_data(KRAKEN_ETH_DATA), + }, + } + + ss = DataSS( + csv_dir=csvdir, + st_timestamp=timestr_to_ut("2023-06-18"), + fin_timestamp=timestr_to_ut("2023-06-21"), + max_N_train=7, + Nt=3, + N_test=2, + usdcoin="USDT", + timeframe="5m", + signals=["high", "low"], + coins=["BTC", "ETH"], + exchange_ids=["binanceus", "kraken"], + yval_exchange_id="binanceus", + yval_coin="ETH", + yval_signal="high", + ) + + assert ss.n == 2 * 2 * 2 * 3 # n_exchs * n_coins * n_signals * Nt + + data_factory = DataFactory(ss) + hist_df = data_factory._merge_csv_dfs(csv_dfs) + X, y, var_with_prev, x_df = data_factory.create_xy(hist_df, testshift=0) + _assert_shapes(ss, X, y, x_df) + + found_cols = x_df.columns.tolist() + target_cols = [ + "binanceus:BTC:high:t-4", + "binanceus:BTC:high:t-3", + "binanceus:BTC:high:t-2", + "binanceus:BTC:low:t-4", + "binanceus:BTC:low:t-3", + "binanceus:BTC:low:t-2", + "binanceus:ETH:high:t-4", + "binanceus:ETH:high:t-3", + "binanceus:ETH:high:t-2", + "binanceus:ETH:low:t-4", + "binanceus:ETH:low:t-3", + "binanceus:ETH:low:t-2", + "kraken:BTC:high:t-4", + "kraken:BTC:high:t-3", + "kraken:BTC:high:t-2", + "kraken:BTC:low:t-4", + "kraken:BTC:low:t-3", + "kraken:BTC:low:t-2", + "kraken:ETH:high:t-4", + "kraken:ETH:high:t-3", + "kraken:ETH:high:t-2", + "kraken:ETH:low:t-4", + "kraken:ETH:low:t-3", + "kraken:ETH:low:t-2", + ] + assert found_cols == target_cols + assert found_cols.index("binanceus:ETH:high:t-2") == var_with_prev + + # test binanceus:ETH:high like in 1-signal + assert target_cols[6:9] == [ + "binanceus:ETH:high:t-4", + "binanceus:ETH:high:t-3", + "binanceus:ETH:high:t-2", + ] + Xa = X[:, 6:9] + assert Xa[-1, :].tolist() == [4, 3, 2] and y[-1] == 1 + assert Xa[-2, :].tolist() == [5, 4, 3] and y[-2] == 2 + assert Xa[0, :].tolist() == [11, 10, 9] and y[0] == 8 + + assert x_df.iloc[-1].tolist()[6:9] == [4, 3, 2] + assert x_df.iloc[-2].tolist()[6:9] == [5, 4, 3] + assert x_df.iloc[0].tolist()[6:9] == [11, 10, 9] + + assert x_df["binanceus:ETH:high:t-2"].tolist() == [9, 8, 7, 6, 5, 4, 3, 2] + assert Xa[:, 2].tolist() == [9, 8, 7, 6, 5, 4, 3, 2] + + +@enforce_types +def _assert_shapes(ss, X, y, x_df): + assert X.shape[0] == y.shape[0] + assert X.shape[0] == (ss.max_N_train + 1) # 1 for test, rest for train + assert X.shape[1] == ss.n + + assert len(x_df) == X.shape[0] + assert len(x_df.columns) == ss.n + + +@enforce_types +def _df_from_raw_data(raw_data: list): + df = initialize_df(TOHLCV_COLS) + next_df = pd.DataFrame(raw_data, columns=TOHLCV_COLS) + df = concat_next_df(df, next_df) + return df diff --git a/pdr_backend/predictoor/approach3/test/test_pdutil.py b/pdr_backend/predictoor/approach3/test/test_pdutil.py new file mode 100644 index 000000000..b41f9e0e2 --- /dev/null +++ b/pdr_backend/predictoor/approach3/test/test_pdutil.py @@ -0,0 +1,214 @@ +import os + +from enforce_typing import enforce_types +import numpy as np +import pandas as pd +import pytest + +from pdr_backend.predictoor.approach3.constants import ( + OHLCV_COLS, + OHLCV_DTYPES, + TOHLCV_COLS, +) +from pdr_backend.predictoor.approach3.pdutil import ( + initialize_df, + concat_next_df, + save_csv, + load_csv, + has_data, + oldest_ut, + newest_ut, + _get_last_line, +) + +FOUR_ROWS_RAW_TOHLCV_DATA = [ + [1686806100000, 1648.58, 1648.58, 1646.27, 1646.64, 7.4045], + [1686806400000, 1647.05, 1647.05, 1644.61, 1644.86, 14.452], + [1686806700000, 1644.57, 1646.41, 1642.49, 1645.81, 22.8612], + [1686807000000, 1645.77, 1646.2, 1645.23, 1646.05, 8.1741], +] +ONE_ROW_RAW_TOHLCV_DATA = [[1686807300000, 1646, 1647.2, 1646.23, 1647.05, 8.1742]] + + +@enforce_types +def test_initialize_df(): + df = initialize_df(TOHLCV_COLS) + + assert isinstance(df, pd.DataFrame) + _assert_TOHLCVd_cols_and_types(df) + + df = initialize_df(OHLCV_COLS[:2]) + assert df.columns.tolist() == OHLCV_COLS[:2] + ["datetime"] + assert df.dtypes.tolist()[:-1] == OHLCV_DTYPES[:2] + + +@enforce_types +def test_concat_next_df(): + # baseline data + df = initialize_df(TOHLCV_COLS) + assert len(df) == 0 + + next_df = pd.DataFrame(FOUR_ROWS_RAW_TOHLCV_DATA, columns=TOHLCV_COLS) + assert len(next_df) == 4 + + # add 4 rows to empty df + df = concat_next_df(df, next_df) + assert len(df) == 4 + _assert_TOHLCVd_cols_and_types(df) + + # from df with 4 rows, add 1 more row + next_df = pd.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, columns=TOHLCV_COLS) + assert len(next_df) == 1 + + df = concat_next_df(df, next_df) + assert len(df) == 4 + 1 + _assert_TOHLCVd_cols_and_types(df) + + +@enforce_types +def _assert_TOHLCVd_cols_and_types(df: pd.DataFrame): + assert df.columns.tolist() == OHLCV_COLS + ["datetime"] + assert df.dtypes.tolist()[:-1] == OHLCV_DTYPES + assert str(df.dtypes.tolist()[-1]) == "datetime64[ns, UTC]" + assert df.index.name == "timestamp" and df.index.dtype == np.int64 + + +def _filename(tmpdir) -> str: + return os.path.join(tmpdir, "foo.csv") + + +@enforce_types +def test_load_basic(tmpdir): + filename = _filename(tmpdir) + df = _df_from_raw_data(FOUR_ROWS_RAW_TOHLCV_DATA) + save_csv(filename, df) + + # simplest specification. Don't specify cols, st or fin + df2 = load_csv(filename) + _assert_TOHLCVd_cols_and_types(df2) + assert len(df2) == 4 and str(df) == str(df2) + + # explicitly specify cols, but not st or fin + df2 = load_csv(filename, OHLCV_COLS) + _assert_TOHLCVd_cols_and_types(df2) + assert len(df2) == 4 and str(df) == str(df2) + + # explicitly specify cols, st, fin + df2 = load_csv(filename, OHLCV_COLS, st=None, fin=None) + _assert_TOHLCVd_cols_and_types(df2) + assert len(df2) == 4 and str(df) == str(df2) + + df2 = load_csv(filename, OHLCV_COLS, st=0, fin=np.inf) + _assert_TOHLCVd_cols_and_types(df2) + assert len(df2) == 4 and str(df) == str(df2) + + +@enforce_types +def test_load_append(tmpdir): + # save 4-row csv + filename = _filename(tmpdir) + df_4_rows = _df_from_raw_data(FOUR_ROWS_RAW_TOHLCV_DATA) + save_csv(filename, df_4_rows) # write new file + + # append 1 row to csv + df_1_row = _df_from_raw_data(ONE_ROW_RAW_TOHLCV_DATA) + save_csv(filename, df_1_row) # will append existing file + + # test + df_5_rows = concat_next_df( + df_4_rows, pd.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, columns=TOHLCV_COLS) + ) + df_5_rows_loaded = load_csv(filename) + _assert_TOHLCVd_cols_and_types(df_5_rows_loaded) + assert len(df_5_rows_loaded) == 5 + assert str(df_5_rows) == str(df_5_rows_loaded) + + +@enforce_types +def test_load_filtered(tmpdir): + # save + filename = _filename(tmpdir) + df = _df_from_raw_data(FOUR_ROWS_RAW_TOHLCV_DATA) + save_csv(filename, df) + + # load with filters on rows & columns + cols = OHLCV_COLS[:2] # ["open", "high"] + timestamps = [row[0] for row in FOUR_ROWS_RAW_TOHLCV_DATA] + st = timestamps[1] # 1686806400000 + fin = timestamps[2] # 1686806700000 + df2 = load_csv(filename, cols, st, fin) + + # test entries + assert len(df2) == 2 + assert len(df2.index.values) == 2 + assert df2.index.values.tolist() == timestamps[1:3] + + # test cols and types + assert df2.columns.tolist() == OHLCV_COLS[:2] + ["datetime"] + assert df2.dtypes.tolist()[:-1] == OHLCV_DTYPES[:2] + assert str(df2.dtypes.tolist()[-1]) == "datetime64[ns, UTC]" + assert df2.index.name == "timestamp" + assert df2.index.dtype == np.int64 + + +@enforce_types +def _df_from_raw_data(raw_data: list): + df = initialize_df(OHLCV_COLS) + next_df = pd.DataFrame(raw_data, columns=TOHLCV_COLS) + df = concat_next_df(df, next_df) + return df + + +@enforce_types +def test_has_data(tmpdir): + filename0 = os.path.join(tmpdir, "f0.csv") + save_csv(filename0, _df_from_raw_data([])) + assert not has_data(filename0) + + filename1 = os.path.join(tmpdir, "f1.csv") + save_csv(filename1, _df_from_raw_data(ONE_ROW_RAW_TOHLCV_DATA)) + assert has_data(filename1) + + filename4 = os.path.join(tmpdir, "f4.csv") + save_csv(filename4, _df_from_raw_data(FOUR_ROWS_RAW_TOHLCV_DATA)) + assert has_data(filename4) + + +@enforce_types +def test_oldest_ut_and_newest_ut__with_data(tmpdir): + filename = _filename(tmpdir) + df = _df_from_raw_data(FOUR_ROWS_RAW_TOHLCV_DATA) + save_csv(filename, df) + + ut0 = oldest_ut(filename) + utN = newest_ut(filename) + assert ut0 == FOUR_ROWS_RAW_TOHLCV_DATA[0][0] + assert utN == FOUR_ROWS_RAW_TOHLCV_DATA[-1][0] + + +@enforce_types +def test_oldest_ut_and_newest_ut__no_data(tmpdir): + filename = _filename(tmpdir) + df = _df_from_raw_data([]) + save_csv(filename, df) + + with pytest.raises(ValueError): + oldest_ut(filename) + with pytest.raises(ValueError): + newest_ut(filename) + + +@enforce_types +def test_get_last_line(tmpdir): + filename = os.path.join(tmpdir, "foo.csv") + + with open(filename, "w") as f: + f.write( + """line0 boo bo bum +line1 foo bar +line2 bah bah +line3 ha ha lol""" + ) + target_last_line = "line3 ha ha lol" + found_last_line = _get_last_line(filename) + assert found_last_line == target_last_line diff --git a/pdr_backend/predictoor/approach3/test/test_predictoor_agent3.py b/pdr_backend/predictoor/approach3/test/test_predictoor_agent3.py new file mode 100644 index 000000000..788aa92d7 --- /dev/null +++ b/pdr_backend/predictoor/approach3/test/test_predictoor_agent3.py @@ -0,0 +1,167 @@ +import os +import random +from typing import List +from unittest.mock import Mock + +from enforce_typing import enforce_types + +from pdr_backend.predictoor.approach3.predictoor_config3 import PredictoorConfig3 +from pdr_backend.predictoor.approach3.predictoor_agent3 import PredictoorAgent3 +from pdr_backend.util.constants import S_PER_MIN, S_PER_DAY + +PRIV_KEY = os.getenv("PRIVATE_KEY") + +ADDR = "0xe8933f2950aec1080efad1ca160a6bb641ad245d" + +SOURCE = "binanceus" +PAIR = "BTC-USDT" +TIMEFRAME, S_PER_EPOCH = "5m", 5 * S_PER_MIN # must change both at once +SECONDS_TILL_EPOCH_END = 60 # how soon to start making predictions? +FEED_S = f"{PAIR}|{SOURCE}|{TIMEFRAME}" +S_PER_SUBSCRIPTION = 1 * S_PER_DAY +FEED_DICT = { # info inside a predictoor contract + "name": f"Feed of {FEED_S}", + "address": ADDR, + "symbol": f"FEED:{FEED_S}", + "seconds_per_epoch": S_PER_EPOCH, + "seconds_per_subscription": S_PER_SUBSCRIPTION, + "trueval_submit_timeout": 15, + "owner": "0xowner", + "pair": PAIR, + "timeframe": TIMEFRAME, + "source": SOURCE, +} +INIT_TIMESTAMP = 107 +INIT_BLOCK_NUMBER = 13 + + +@enforce_types +def test_predictoor_agent3(monkeypatch): + _setenvs(monkeypatch) + + # mock query_feed_contracts() + def mock_query_feed_contracts(*args, **kwargs): # pylint: disable=unused-argument + feed_dicts = {ADDR: FEED_DICT} + return feed_dicts + + monkeypatch.setattr( + "pdr_backend.models.base_config.query_feed_contracts", + mock_query_feed_contracts, + ) + + # mock w3.eth.block_number, w3.eth.get_block() + @enforce_types + class MockEth: + def __init__(self): + self.timestamp = INIT_TIMESTAMP + self.block_number = INIT_BLOCK_NUMBER + self._timestamps_seen: List[int] = [INIT_TIMESTAMP] + + def get_block( + self, block_number: int, full_transactions: bool = False + ): # pylint: disable=unused-argument + mock_block = {"timestamp": self.timestamp} + return mock_block + + mock_w3 = Mock() # pylint: disable=not-callable + mock_w3.eth = MockEth() + + # mock PredictoorContract + @enforce_types + def toEpochStart(timestamp: int) -> int: + return timestamp // S_PER_EPOCH * S_PER_EPOCH + + @enforce_types + class MockContract: + def __init__(self, w3): + self._w3 = w3 + self.contract_address: str = ADDR + self._prediction_slots: List[int] = [] + + def get_current_epoch(self) -> int: # returns an epoch number + return self.get_current_epoch_ts() // S_PER_EPOCH + + def get_current_epoch_ts(self) -> int: # returns a timestamp + curEpoch_ts = toEpochStart(self._w3.eth.timestamp) + return curEpoch_ts + + def get_secondsPerEpoch(self) -> int: + return S_PER_EPOCH + + def submit_prediction( + self, predval: bool, stake: float, timestamp: int, wait: bool = True + ): # pylint: disable=unused-argument + assert stake <= 3 + if timestamp in self._prediction_slots: + print(f" (Replace prev pred at time slot {timestamp})") + self._prediction_slots.append(timestamp) + + mock_contract = MockContract(mock_w3) + + def mock_contract_func(*args, **kwargs): # pylint: disable=unused-argument + return mock_contract + + monkeypatch.setattr( + "pdr_backend.models.base_config.PredictoorContract", mock_contract_func + ) + + # mock time.sleep() + def advance_func(*args, **kwargs): # pylint: disable=unused-argument + do_advance_block = random.random() < 0.40 + if do_advance_block: + mock_w3.eth.timestamp += random.randint(3, 12) + mock_w3.eth.block_number += 1 + mock_w3.eth._timestamps_seen.append(mock_w3.eth.timestamp) + + monkeypatch.setattr("time.sleep", advance_func) + + # now we're done the mocking, time for the real work!! + + # real work: initialize + c = PredictoorConfig3() + agent = PredictoorAgent3(c) + + # last bit of mocking + agent.config.web3_config.w3 = mock_w3 + + # real work: main iterations + for _ in range(1000): + agent.take_step() + + # log some final results for debubbing / inspection + print("\n" + "=" * 80) + print("Done iterations") + print( + f"init block_number = {INIT_BLOCK_NUMBER}" + f", final = {mock_w3.eth.block_number}" + ) + print() + print(f"init timestamp = {INIT_TIMESTAMP}, final = {mock_w3.eth.timestamp}") + print(f"all timestamps seen = {mock_w3.eth._timestamps_seen}") + print() + print( + "unique prediction_slots = " f"{sorted(set(mock_contract._prediction_slots))}" + ) + print(f"all prediction_slots = {mock_contract._prediction_slots}") + + # relatively basic sanity tests + assert mock_contract._prediction_slots + assert (mock_w3.eth.timestamp + 2 * S_PER_EPOCH) >= max( + mock_contract._prediction_slots + ) + + +def _setenvs(monkeypatch): + # envvars handled by PredictoorConfig3 + monkeypatch.setenv("SECONDS_TILL_EPOCH_END", "60") + monkeypatch.setenv("STAKE_AMOUNT", "1") + + # envvars handled by BaseConfig + monkeypatch.setenv("RPC_URL", "http://foo") + monkeypatch.setenv("SUBGRAPH_URL", "http://bar") + monkeypatch.setenv("PRIVATE_KEY", PRIV_KEY) + + monkeypatch.setenv("PAIR_FILTER", PAIR.replace("-", "/")) + monkeypatch.setenv("TIMEFRAME_FILTER", TIMEFRAME) + monkeypatch.setenv("SOURCE_FILTER", SOURCE) + monkeypatch.setenv("OWNER_ADDRS", FEED_DICT["owner"]) diff --git a/pdr_backend/predictoor/approach3/test/test_predictoor_config3.py b/pdr_backend/predictoor/approach3/test/test_predictoor_config3.py new file mode 100644 index 000000000..8c36f326e --- /dev/null +++ b/pdr_backend/predictoor/approach3/test/test_predictoor_config3.py @@ -0,0 +1,46 @@ +import os + +from enforce_typing import enforce_types + +from pdr_backend.predictoor.approach3.predictoor_config3 import PredictoorConfig3 + +ADDR = "0xe8933f2950aec1080efad1ca160a6bb641ad245d" # predictoor contract addr +PRIV_KEY = os.getenv("PRIVATE_KEY") + + +@enforce_types +def test_predictoor_config_basic(monkeypatch): + _setenvs(monkeypatch) + c = PredictoorConfig3() + + # values handled by PredictoorConfig3 + assert c.s_until_epoch_end == 60 + assert c.stake_amount == 30000 + + # values handled by BaseConfig + assert c.rpc_url == "http://foo" + assert c.subgraph_url == "http://bar" + assert c.private_key == PRIV_KEY + + assert c.pair_filters == ["BTC/USDT", "ETH/USDT"] + assert c.timeframe_filter == ["5m", "15m"] + assert c.source_filter == ["binance", "kraken"] + assert c.owner_addresses == ["0x123", "0x124"] + + assert c.web3_config is not None + + +def _setenvs(monkeypatch): + # envvars handled by PredictoorConfig3 + monkeypatch.setenv("SECONDS_TILL_EPOCH_END", "60") + monkeypatch.setenv("STAKE_AMOUNT", "30000") + + # envvars handled by BaseConfig + monkeypatch.setenv("RPC_URL", "http://foo") + monkeypatch.setenv("SUBGRAPH_URL", "http://bar") + monkeypatch.setenv("PRIVATE_KEY", PRIV_KEY) + + monkeypatch.setenv("PAIR_FILTER", "BTC/USDT,ETH/USDT") + monkeypatch.setenv("TIMEFRAME_FILTER", "5m,15m") + monkeypatch.setenv("SOURCE_FILTER", "binance,kraken") + monkeypatch.setenv("OWNER_ADDRS", "0x123,0x124") diff --git a/pdr_backend/predictoor/approach3/test/test_timeblock.py b/pdr_backend/predictoor/approach3/test/test_timeblock.py new file mode 100644 index 000000000..09d7c9a10 --- /dev/null +++ b/pdr_backend/predictoor/approach3/test/test_timeblock.py @@ -0,0 +1,20 @@ +from enforce_typing import enforce_types + +from pdr_backend.predictoor.approach3 import timeblock + + +@enforce_types +def test_timeblock(): + z = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + + X = timeblock.timeblock(z, 4) + + assert list(X[0, :]) == [5, 4, 3, 2] + assert list(X[1, :]) == [6, 5, 4, 3] + assert list(X[2, :]) == [7, 6, 5, 4] + assert list(X[3, :]) == [8, 7, 6, 5] + assert list(X[4, :]) == [9, 8, 7, 6] + assert list(X[5, :]) == [10, 9, 8, 7] + + assert X.shape[0] == 6 + assert X.shape[1] == 4 diff --git a/pdr_backend/predictoor/approach3/test/test_timeutil.py b/pdr_backend/predictoor/approach3/test/test_timeutil.py new file mode 100644 index 000000000..0f24af3ef --- /dev/null +++ b/pdr_backend/predictoor/approach3/test/test_timeutil.py @@ -0,0 +1,55 @@ +import datetime +from datetime import timezone + +from enforce_typing import enforce_types + +from pdr_backend.predictoor.approach3.timeutil import ( + pretty_timestr, + current_ut, + dt_to_ut, + ut_to_dt, + timestr_to_ut, +) + + +@enforce_types +def test_pretty_timestr(): + ut = 1648576500000 + s = pretty_timestr(ut) + assert "1648576500000" in s # ut + assert "2022-03-29" in s # date + assert "17:55" in s # time + + +@enforce_types +def test_current_ut(): + ut = current_ut() + assert isinstance(ut, int) + assert ut > 1648576500000 + + +@enforce_types +def test_timestr_to_ut(): + t = timestr_to_ut("now") + assert t > 1648576500000 and isinstance(t, int) + + t = timestr_to_ut("1970-01-01_0:00") + assert t == 0 and isinstance(t, int) + + t = timestr_to_ut("2022-03-29_17:55") + assert t == 1648576500000 and isinstance(t, int) + + t = timestr_to_ut("2022-03-29") + assert t == 1648512000000 and isinstance(t, int) + + +@enforce_types +def test_dt_to_ut_and_back(): + dt = datetime.datetime.strptime("2022-03-29_17:55", "%Y-%m-%d_%H:%M") + dt = dt.replace(tzinfo=timezone.utc) # tack on timezone + + ut = dt_to_ut(dt) + assert ut == 1648576500000 + + dt2 = ut_to_dt(ut) + assert dt2 == dt diff --git a/pdr_backend/predictoor/approach3/test/test_trade_engine.py b/pdr_backend/predictoor/approach3/test/test_trade_engine.py new file mode 100644 index 000000000..7867e23c0 --- /dev/null +++ b/pdr_backend/predictoor/approach3/test/test_trade_engine.py @@ -0,0 +1,55 @@ +import os + +from enforce_typing import enforce_types + +from pdr_backend.predictoor.approach3.data_ss import DataSS +from pdr_backend.predictoor.approach3.model_ss import ModelSS +from pdr_backend.predictoor.approach3.timeutil import timestr_to_ut +from pdr_backend.predictoor.approach3.tradeutil import TradeParams, TradeSS +from pdr_backend.predictoor.approach3.trade_engine import TradeEngine + + +@enforce_types +def test_TradeEngine(tmpdir): + logpath = str(tmpdir) + data_ss = DataSS( + csv_dir=os.path.abspath("csvs"), # use the usual data (worksforme) + st_timestamp=timestr_to_ut("2023-06-22"), + fin_timestamp=timestr_to_ut("2023-06-24"), + max_N_train=500, + N_test=100, + Nt=2, + usdcoin="USDT", + timeframe="5m", + signals=["open", "close"], + coins=["ETH", "BTC"], + exchange_ids=["binanceus"], + yval_exchange_id="binanceus", + yval_coin="BTC", + yval_signal="close", + ) + + model_ss = ModelSS("LIN") + + trade_pp = TradeParams( + fee_percent=0.0, # Eg 0.001 is 0.1%. Trading fee (simulated) + init_holdings={"USDT": 100000.0, "BTC": 0.0}, + ) + + trade_ss = TradeSS( + do_plot=False, # plot at end? + logpath=logpath, + buy_amt_usd=100000.00, # How much to buy at a time. In USD + ) + + # ================================================================== + # print setup + print(f"data_ss={data_ss}") + print(f"model_ss={model_ss}") + print(f"trade_pp={trade_pp}") + print(f"trade_ss={trade_ss}") + + # ================================================================== + # do work + trade_engine = TradeEngine(data_ss, model_ss, trade_pp, trade_ss) + trade_engine.run() diff --git a/pdr_backend/predictoor/approach3/test/test_tradeutil.py b/pdr_backend/predictoor/approach3/test/test_tradeutil.py new file mode 100644 index 000000000..0bed692e9 --- /dev/null +++ b/pdr_backend/predictoor/approach3/test/test_tradeutil.py @@ -0,0 +1,39 @@ +from enforce_typing import enforce_types + +from pdr_backend.predictoor.approach3.tradeutil import ( + pairstr, + pairstr_to_coin, + pairstr_to_usdcoin, + TradeParams, + TradeSS, +) + + +@enforce_types +def test_TradeParams(): + pp = TradeParams( + fee_percent=0.01, + init_holdings={"USDT": 10000.0, "BTC": 0.0}, + ) + assert pp.fee_percent == 0.01 + assert "TradeParams" in str(pp) + assert "fee_percent" in str(pp) + + +@enforce_types +def test_TradeSS(tmpdir): + ss = TradeSS( + do_plot=False, + logpath=str(tmpdir), + buy_amt_usd=100000.00, + ) + assert ss.buy_amt_usd == 100000.00 + assert "TradeSS" in str(ss) + assert "buy_amt_usd" in str(ss) + + +@enforce_types +def test_pairstr(): + assert pairstr("BTC", "USDT") == "BTC/USDT" + assert pairstr_to_coin("BTC/USDT") == "BTC" + assert pairstr_to_usdcoin("BTC/USDT") == "USDT" diff --git a/pdr_backend/predictoor/approach3/timeblock.py b/pdr_backend/predictoor/approach3/timeblock.py new file mode 100644 index 000000000..bed8c4882 --- /dev/null +++ b/pdr_backend/predictoor/approach3/timeblock.py @@ -0,0 +1,51 @@ +import numpy as np + + +def timeblock(z, Nt: int): + """ + Calculate a timeblock for training, from a 1-d time series + + @arguments + z -- 1d array -- timeseries [z(t-Np), z(t-Np+1), ..., z(t-2), z(t-1)] + where Np == # points in time series == # points back it goes. Eg 500 + so z[ 0] == z(t-500) is oldest, + and z[-1] == t(-1) is youngest + + Nt -- int -- # time steps for each input sample. Eg if Nt == 10 then + at one sample it's [z(t-13), z(t-12), ..., z(t- 5), z(t- 4)] + at another sample is [z(t-31), z(t-30), ..., z(t-23), z(t-22)] + + @return + X -- 2d array -- timeblock [sample i, var j] + + With Nt columns (vars) and Np-Nt rows + Shaped as: + [[ z(t-Nt+1) z(t-Nt+0) ... z(t- 3) z(t- 2) + z(t-Nt+0) z(t-Nt-1) ... z(t- 4) z(t- 3) + z(t-Nt-1) z(t-Nt-2) ... z(t- 5) z(t- 4) + ... ... ... ... + z(t-Np+2) z(t-Np+3) ... z(t-Np+Nt+2) z(t-Np+Nt+1) + z(t-Np+1) z(t-Np+2) ... z(t-Np+Nt+1) z(t-Np+Nt-0) + z(t-Np+0) z(t-Np+1) ... z(t-Np+Nt-0) z(t-Np+Nt-1) + The 0th row is z shifted back by 1 time step + The 1st row is z shifted back by 2 time steps + ... + The nth row is z shifted back by Np-Nt time steps + + It does _not_ give z(t-1) because t-1 is the time "into the future" + that we're training for. + + Example: if Np = 500, Nt = 10 then it returns X as + [[ z(t-11) z(t- 10) ... z(t- 3) z(t- 2) + z(t-12) z(t- 11) ... z(t- 4) z(t- 3) + ... ... ... ... ... + z(t-499) z(t-498) ... z(t-491) z(t-490) + z(t-500) z(t-499) ... z(t-492) z(t-491) ]] + """ + Np = len(z) + n_rows = Np - Nt + n_cols = Nt + X = np.zeros((n_rows, n_cols), dtype=float) + for row_i in range(n_rows): + X[row_i, :] = z[-(row_i + Nt + 1) : -(row_i + 1)] + return X diff --git a/pdr_backend/predictoor/approach3/timeutil.py b/pdr_backend/predictoor/approach3/timeutil.py new file mode 100644 index 000000000..37d43bcd5 --- /dev/null +++ b/pdr_backend/predictoor/approach3/timeutil.py @@ -0,0 +1,63 @@ +import datetime +from datetime import timezone + +from enforce_typing import enforce_types + + +@enforce_types +def pretty_timestr(ut: int) -> str: + """Pretty-print version of ut timestamp: show as unix time and datetime""" + return f"timestamp={ut}, dt={ut_to_dt(ut)}" + + +@enforce_types +def current_ut() -> int: + """Return the current date/time as a unix time (int in # ms)""" + dt = datetime.datetime.now(timezone.utc) + return dt_to_ut(dt) + + +@enforce_types +def timestr_to_ut(timestr: str) -> int: + """ + Convert a datetime string to unix time (in #ms) + Needs a date; time for a given date is optional. + + Examples: + 'now' --> 1648872899300 + '2022-03-29_17:55' --> 1648872899300 + '2022-03-29' --> 1648872899000 + Does not use local time, rather always uses UTC + """ + if timestr.lower() == "now": + return current_ut() + + ncolon = timestr.count(":") + if ncolon == 1: + dt = datetime.datetime.strptime(timestr, "%Y-%m-%d_%H:%M") + elif ncolon == 2: + dt = datetime.datetime.strptime(timestr, "%Y-%m-%d_%H:%M:%S") + else: + dt = datetime.datetime.strptime(timestr, "%Y-%m-%d") + + dt = dt.replace(tzinfo=timezone.utc) # tack on timezone + return dt_to_ut(dt) + + +@enforce_types +def dt_to_ut(dt: datetime.datetime) -> int: + """Convert datetime to unix time (int in # ms)""" + return int(dt.timestamp() * 1000) + + +@enforce_types +def ut_to_dt(ut: int) -> datetime.datetime: + """Convert unix time (in # ms) to datetime format""" + dt = datetime.datetime.utcfromtimestamp(ut / 1000) + dt = dt.replace(tzinfo=timezone.utc) # tack on timezone + + # postcondition + ut2 = int(dt.replace(tzinfo=timezone.utc).timestamp() * 1000) + assert ut2 == ut, (ut, ut2) + + return dt diff --git a/pdr_backend/predictoor/approach3/trade_engine.py b/pdr_backend/predictoor/approach3/trade_engine.py new file mode 100644 index 000000000..83db1495c --- /dev/null +++ b/pdr_backend/predictoor/approach3/trade_engine.py @@ -0,0 +1,230 @@ +import os +from typing import List + +from enforce_typing import enforce_types +import numpy as np +import pandas as pd + +from pdr_backend.predictoor.approach3 import plotutil +from pdr_backend.predictoor.approach3.constants import MS_PER_EPOCH +from pdr_backend.predictoor.approach3.data_factory import DataFactory +from pdr_backend.predictoor.approach3.data_ss import DataSS +from pdr_backend.predictoor.approach3.model_factory import ModelFactory +from pdr_backend.predictoor.approach3.model_ss import ModelSS +from pdr_backend.predictoor.approach3.timeutil import current_ut, pretty_timestr +from pdr_backend.predictoor.approach3.tradeutil import TradeParams, TradeSS +from pdr_backend.util.mathutil import nmse + + +# pylint: disable=too-many-instance-attributes +class TradeEngine: + @enforce_types + def __init__( + self, + data_ss: DataSS, + model_ss: ModelSS, + trade_pp: TradeParams, + trade_ss: TradeSS, + ): + self.data_ss = data_ss + self.model_ss = model_ss + self.trade_pp = trade_pp + self.trade_ss = trade_ss + + self.holdings = self.trade_pp.init_holdings + self.tot_profit_usd = 0.0 + self.nmses_train: List[float] = [] + self.ys_test: List[float] = [] + self.ys_testhat: List[float] = [] + self.corrects: List[bool] = [] + self.profit_usds: List[float] = [] + self.tot_profit_usds: List[float] = [] + + self.data_factory = DataFactory(self.data_ss) + + self.logfile = "" + + @property + def usdcoin(self) -> str: + return self.data_ss.usdcoin + + @property + def tokcoin(self) -> str: + return self.data_ss.yval_coin + + @enforce_types + def _init_loop_attributes(self): + filebase = f"out_{current_ut()}.txt" + self.logfile = os.path.join(self.trade_ss.logpath, filebase) + with open(self.logfile, "w") as f: + f.write("\n") + + self.tot_profit_usd = 0.0 + self.nmses_train, self.ys_test, self.ys_testhat, self.corrects = [], [], [], [] + self.profit_usds, self.tot_profit_usds = [], [] + + @enforce_types + def run(self): + self._init_loop_attributes() + log = self._log + log("Start run") + # main loop! + hist_df = self.data_factory.get_hist_df() + for test_i in range(self.data_ss.N_test): + self.run_one_iter(test_i, hist_df) + + log("Done all iters.") + + nmse_train = np.average(self.nmses_train) + nmse_test = nmse(self.ys_testhat, self.ys_test) + log(f"Final nmse_train={nmse_train:.5f}, nmse_test={nmse_test:.5f}") + + self._final_plot() + + @enforce_types + def run_one_iter(self, test_i: int, hist_df: pd.DataFrame): + log = self._log + testshift = self.data_ss.N_test - test_i - 1 # eg [99, 98, .., 2, 1, 0] + X, y, var_with_prev, _ = self.data_factory.create_xy(hist_df, testshift) + + st, fin = 0, X.shape[0] - 1 + X_train, X_test = X[st:fin, :], X[fin : fin + 1] + y_train, y_test = y[st:fin], y[fin : fin + 1] + + self.model_ss.var_with_prev = var_with_prev # used for PREV model, that's all + model_factory = ModelFactory(self.model_ss) + model = model_factory.build(X_train, y_train) + + y_trainhat = model.predict(X_train) # eg yhat=zhat[y-5] + # plotutil.plot_vals_vs_time1(y_train, y_trainhat,"ytr & ytrhat vs time") + # plotutil.scatter_pred_vs_actual(y_train, y_trainhat, "ytr vs ytrhat") + + nmse_train = nmse(y_train, y_trainhat, min(y), max(y)) + self.nmses_train.append(nmse_train) + + # current time + ut = int(hist_df.index.values[-1]) - testshift * MS_PER_EPOCH + + # current price + curprice = y_train[-1] + + # predict price + predprice = model.predict(X_test)[0] + self.ys_testhat.append(predprice) + + # simulate buy. Buy 'amt_usd' worth of TOK if we think price going up + usdcoin_holdings_before = self.holdings[self.usdcoin] + if self._do_buy(predprice, curprice): + self._buy(curprice, self.trade_ss.buy_amt_usd) + + # observe true price + trueprice = y_test[0] + self.ys_test.append(trueprice) + + # simulate sell. Update tot_profit_usd + tokcoin_amt_sell = self.holdings[self.tokcoin] + if tokcoin_amt_sell > 0: + self._sell(trueprice, tokcoin_amt_sell) + usdcoin_holdings_after = self.holdings[self.usdcoin] + + profit_usd = usdcoin_holdings_after - usdcoin_holdings_before + + self.tot_profit_usd += profit_usd + self.profit_usds.append(profit_usd) + self.tot_profit_usds.append(self.tot_profit_usd) + + # err = abs(predprice - trueprice) + pred_dir = "UP" if predprice > curprice else "DN" + true_dir = "UP" if trueprice > curprice else "DN" + correct = pred_dir == true_dir + correct_s = "Y" if correct else "N" + self.corrects.append(correct) + acc = float(sum(self.corrects)) / len(self.corrects) * 100 + log( + f"Iter #{test_i+1:3}/{self.data_ss.N_test}: " + f" ut{pretty_timestr(ut)[9:][:-9]}" + # f". Predval|true|err {predprice:.2f}|{trueprice:.2f}|{err:6.2f}" + f". Preddir|true|correct = {pred_dir}|{true_dir}|{correct_s}" + f". Total correct {sum(self.corrects):3}/{len(self.corrects):3}" + f" ({acc:.1f}%)" + # f". Spent ${amt_usdcoin_sell:9.2f}, recd ${amt_usdcoin_recd:9.2f}" + f", profit ${profit_usd:7.2f}" + f", tot_profit ${self.tot_profit_usd:9.2f}" + ) + + def _do_buy(self, predprice: float, curprice: float) -> bool: + """ + @arguments + predprice -- predicted price (5 min from now) + curprice -- current price (now) + + @return + bool -- buy y/n? + """ + return predprice > curprice + + def _buy(self, price: float, usdcoin_amt_spend: float): + """ + @description + Buy tokcoin with usdcoin + + @arguments + price -- amt of usdcoin per token + usdcoin_amt_spend -- amount to spend, in usdcoin; spend less if have less + """ + # simulate buy + usdcoin_amt_sent = min(usdcoin_amt_spend, self.holdings[self.usdcoin]) + self.holdings[self.usdcoin] -= usdcoin_amt_sent + + p = self.trade_pp.fee_percent + usdcoin_amt_fee = p * usdcoin_amt_sent + tokcoin_amt_recd = (1 - p) * usdcoin_amt_sent / price + self.holdings[self.tokcoin] += tokcoin_amt_recd + + self._log( + f" TX: BUY : send {usdcoin_amt_sent:8.2f} {self.usdcoin:4}" + f", receive {tokcoin_amt_recd:8.2f} {self.tokcoin:4}" + f", fee = {usdcoin_amt_fee:8.4f} {self.usdcoin:4}" + ) + + def _sell(self, price: float, tokcoin_amt_sell: float): + """ + @description + Sell tokcoin for usdcoin + + @arguments + price -- amt of usdcoin per token + tokcoin_amt_sell -- how much of coin to sell, in tokcoin + """ + tokcoin_amt_sent = tokcoin_amt_sell + self.holdings[self.tokcoin] -= tokcoin_amt_sent + + p = self.trade_pp.fee_percent + usdcoin_amt_fee = p * tokcoin_amt_sent * price + usdcoin_amt_recd = (1 - p) * tokcoin_amt_sent * price + self.holdings[self.usdcoin] += usdcoin_amt_recd + + self._log( + f" TX: SELL: send {tokcoin_amt_sent:8.2f} {self.tokcoin:4}" + f", receive {usdcoin_amt_recd:8.2f} {self.usdcoin:4}" + f", fee = {usdcoin_amt_fee:8.4f} {self.usdcoin:4}" + ) + + @enforce_types + def _final_plot(self): + if not self.trade_ss.do_plot: + return + + # plotutil.plot_vals_vs_time1( + # self.ys_test, self.ys_testhat, "ys_test & ys_testhat vs time") + # plotutil.scatter_pred_vs_actual( + # self.ys_test, self.ys_testhat, "ys_test vs ys_testhat") + plotutil.plot_any_vs_time(self.profit_usds, "profit") + plotutil.plot_any_vs_time(self.tot_profit_usds, "tot profit") + + @enforce_types + def _log(self, s: str): + """Log to both stdout and to file""" + print(s) + with open(self.logfile, "a") as f: + f.write(s + "\n") diff --git a/pdr_backend/predictoor/approach3/tradeutil.py b/pdr_backend/predictoor/approach3/tradeutil.py new file mode 100644 index 000000000..aa3a92bea --- /dev/null +++ b/pdr_backend/predictoor/approach3/tradeutil.py @@ -0,0 +1,44 @@ +import os + +from enforce_typing import enforce_types + +from pdr_backend.util.strutil import StrMixin + + +@enforce_types +class TradeParams(StrMixin): + def __init__( + self, + fee_percent: float, # Eg 0.001 is 0.1%. Trading fee (simulated) + init_holdings: dict, # Eg {"USDT": 100000.00} + ): + self.fee_percent = fee_percent + self.init_holdings = init_holdings + + +@enforce_types +class TradeSS(StrMixin): + def __init__(self, do_plot: bool, logpath: str, buy_amt_usd: float): + assert os.path.exists(logpath) + + self.do_plot = do_plot + self.logpath = logpath # directory, not file + self.buy_amt_usd = buy_amt_usd + + +@enforce_types +def pairstr(coin: str, usdcoin: str) -> str: + """Eg given 'BTC','USDT', return 'BTC/USDT'""" + return f"{coin}/{usdcoin}" + + +@enforce_types +def pairstr_to_coin(pair: str) -> str: + """Eg given 'BTC/USDT', return 'BTC'""" + return pair.split("/")[0] + + +@enforce_types +def pairstr_to_usdcoin(pair: str) -> str: + """Eg given 'BTC/USDT', return 'USDT'""" + return pair.split("/")[1] diff --git a/pdr_backend/predictoor/main.py b/pdr_backend/predictoor/main.py index 9b8c48ff4..13b2f29f1 100644 --- a/pdr_backend/predictoor/main.py +++ b/pdr_backend/predictoor/main.py @@ -8,7 +8,8 @@ Usage: python pdr_backend/predictoor/main.py APPROACH where APPROACH=1 - does random predictions - APPROACH=2 - uses a model to predict. Needs MODELDIR specified. + APPROACH=2 - uses a static model to predict. Needs MODELDIR specified. + APPROACH=3 - uses a dynamic model to predict APPROACH=payout - claim all unclaimed payouts. """ @@ -25,16 +26,18 @@ def do_main(): do_help() arg1 = sys.argv[1] - if arg1 in ["1"]: - agent_class = importlib.import_module( + if arg1 in ["1", "3"]: # approach1, approach3 + agent_module = importlib.import_module( f"pdr_backend.predictoor.approach{arg1}.predictoor_agent{arg1}" ) - agent_class = getattr(agent_class, f"PredictoorAgent{arg1}") - config = agent_class.predictoor_config_class() + agent_class = getattr(agent_module, f"PredictoorAgent{arg1}") + config_class = agent_class.predictoor_config_class + config = config_class() agent = agent_class(config) agent.run() - elif arg1 == "2": - # to be integrated similarly to approach 1 + + elif arg1 == "2": # approach2 + # To be integrated similar to "1" from pdr_backend.predictoor.approach2.main2 import ( # pylint: disable=import-outside-toplevel,line-too-long do_main2, ) diff --git a/pdr_backend/util/mathutil.py b/pdr_backend/util/mathutil.py new file mode 100644 index 000000000..1dbb94964 --- /dev/null +++ b/pdr_backend/util/mathutil.py @@ -0,0 +1,97 @@ +import random +import re + +from typing import Union +from math import log10, floor + +import numpy as np + + +from enforce_typing import enforce_types + +from pdr_backend.util.strutil import StrMixin + + +@enforce_types +def isNumber(x) -> bool: + return isinstance(x, (int, float)) + + +@enforce_types +def intInStr(s: str) -> int: + int_s = re.sub("[^0-9]", "", s) + return int(int_s) + + +@enforce_types +class Range(StrMixin): + def __init__(self, min_: float, max_: Union[float, None] = None): + assert (max_ is None) or (max_ >= min_) + self.min_: float = min_ + self.max_: Union[float, None] = max_ + + def drawRandomPoint(self) -> float: + if self.max_ is None: + return self.min_ + return randunif(self.min_, self.max_) + + +@enforce_types +def randunif(mn: float, mx: float) -> float: + """Return a uniformly-distributed random number in range [mn, mx]""" + assert mx >= mn + if mn == mx: + return mn + return mn + random.random() * (mx - mn) + + +@enforce_types +def round_sig(x: Union[int, float], sig: int) -> Union[int, float]: + """Return a number with the specified # significant bits""" + return round(x, sig - int(floor(log10(abs(x)))) - 1) + + +def nmse(yhat, y, ymin=None, ymax=None) -> float: + """ + @description + Calculates the normalized mean-squared error. + @arguments + yhat -- 1d array or list of floats -- estimated values of y + y -- 1d array or list of floats -- true values + ymin, ymax -- float, float -- roughly the min and max; they + do not have to be the perfect values of min and max, because + they're just here to scale the output into a roughly [0,1] range + @return + nmse -- float -- normalized mean-squared error + """ + assert len(y) == len(yhat) + y, yhat = np.asarray(y), np.asarray(yhat) + + # base case: no entries + if len(yhat) == 0: + return 0.0 + + # condition ymin, ymax + if ymin is None and ymax is None: + ymin, ymax = min(y), max(y) + assert ymin is not None + assert ymax is not None + + # base case: both yhat and y are constant, and same values + if (ymax == ymin) and (max(yhat) == min(yhat) == max(y) == min(y)): + return 0.0 + + # yrange + yrange = ymax - ymin + + # First, scale true values and predicted values such that: + # - true values are in range [0.0, 1.0] + # - predicted values follow the same scaling factors + y01 = (y - ymin) / yrange + yhat01 = (yhat - ymin) / yrange + + mse_xy = np.sum(np.square(y01 - yhat01)) + mse_x = np.sum(np.square(y01)) + nmse_result = mse_xy / mse_x + + return nmse_result diff --git a/pdr_backend/util/strutil.py b/pdr_backend/util/strutil.py index ae572dd26..ae5109192 100644 --- a/pdr_backend/util/strutil.py +++ b/pdr_backend/util/strutil.py @@ -67,3 +67,59 @@ def dictStr(d: dict, newline=False) -> str: s += ["\n"] s += ["/dict}"] return "".join(s) + + +def asCurrency(amount, decimals: bool = True) -> str: + """Ref: https://stackoverflow.com/questions/21208376/converting-float-to-dollars-and-cents""" + if decimals: + if amount >= 0: + return f"${amount:,.2f}" + return f"-${-amount:,.2f}".format(-amount) + + if amount >= 0: + return f"${amount:,.0f}" + + return f"-${-amount:,.0f}" + + +def prettyBigNum(amount, remove_zeroes: bool = True) -> str: + """Prints, for example: + 1.23e12, 123.4B, 1.23B, 123M, 1.23M, 123K, 1.23K, 123, + 1.23, 0.12, 1.23e-3, + 1e12, 100B, 1B, 100M, 1M, 100K, 1K, 100, 1 + + Remove zeros True vs False: 1.00M vs 1M + """ + if remove_zeroes: + amount = float(f"{amount:.2e}") # reduce to 3 sig figs + if amount == 0: + return "0" + + a = abs(amount) + + if a >= 1e12 or a < 1e-1: + s = format(a, ".2e").replace("e+", "e").replace("e0", "e").replace("e-0", "e-") + base = "e" + s = s.replace("e", "X") + elif a >= 1e9: + s = f"{a/1e9:.2f}X" + base = "B" + elif a >= 1e6: + s = f"{a/1e6:.2f}X" + base = "M" + elif a >= 1e3: + s = f"{a/1e3:.2f}X" + base = "K" + else: + s = f"{a:.2f}X" + base = "" + + if remove_zeroes: + s = s.replace("0X", "X").replace(".0X", "X") + + s = s.replace("X", base) + + if amount < 0: + s = "-" + s + + return s diff --git a/pdr_backend/util/test/test_mathutil.py b/pdr_backend/util/test/test_mathutil.py new file mode 100644 index 000000000..dcd0d2db7 --- /dev/null +++ b/pdr_backend/util/test/test_mathutil.py @@ -0,0 +1,140 @@ +from enforce_typing import enforce_types +import numpy as np +import pytest + +from pdr_backend.util.mathutil import * # pylint: disable=wildcard-import + + +@enforce_types +def testIsNumber(): + for x in [-2, 0, 2, 20000, -2.1, -2.0, 0.0, 2.0, 2.1, 2e6]: + assert isNumber(x) + + for x in [[], [1, 2], {}, {1: 2, 2: 3}, None, "", "foo"]: + assert not isNumber(x) + + +@enforce_types +def testIntInStr(): + assert intInStr("123") == 123 + assert intInStr("sdds12") == 12 + assert intInStr("sdds12afdsf3zz") == 123 + assert intInStr("sdds12afdsf39sf#@#@9fdsj!!49sd") == 1239949 + + assert intInStr("34.56") == 3456 + assert intInStr("0.00006") == 6 + assert intInStr("10.00006") == 1000006 + + with pytest.raises(ValueError): + intInStr("") + for v in [32, None, {}, []]: + with pytest.raises(TypeError): + intInStr(v) + + +@enforce_types +def testRange(): + r = Range(2.2) + p = r.drawRandomPoint() + assert p == 2.2 + + r = Range(-1.5, 2.5) + for _ in range(20): + p = r.drawRandomPoint() + assert -1.5 <= p <= 2.5 + + r = Range(2.3, None) + p = r.drawRandomPoint() + assert p == 2.3 + + r = Range(2.3, 2.3) + p = r.drawRandomPoint() + assert p == 2.3 + + with pytest.raises(AssertionError): + Range(3.0, 1.0) + + with pytest.raises(TypeError): + Range(3) + with pytest.raises(TypeError): + Range("foo") + with pytest.raises(TypeError): + Range(3.0, "foo") + + +@enforce_types +def testRangeStr(): + r = Range(2.2) + s = str(r) + assert "Range={" in s + assert "min_" in s + assert "2.2" in s + assert "Range}" in s + + +@enforce_types +def testRandunif(): + for _ in range(20): + # happy path + p = randunif(-1.5, 2.5) + assert -1.5 <= p <= 2.5 + + p = randunif(-1.5, -0.5) + assert -1.5 <= p <= -0.5 + + p = randunif(0.0, 100.0) + assert 0.0 <= p <= 100.0 + + # min = max + p = randunif(-2.0, -2.0) + assert p == -2.0 + + p = randunif(0.0, 0.0) + assert p == 0.0 + + p = randunif(2.0, 2.0) + assert p == 2.0 + + # exceptions + with pytest.raises(AssertionError): + p = randunif(0.0, -1.0) + + with pytest.raises(TypeError): + randunif(0.0, 3) + with pytest.raises(TypeError): + randunif(0, 3.0) + with pytest.raises(TypeError): + randunif(3.0, "foo") + + +@enforce_types +def test_round_sig(): + assert round_sig(123456, 1) == 100000 + assert round_sig(123456, 2) == 120000 + assert round_sig(123456, 3) == 123000 + assert round_sig(123456, 4) == 123500 + assert round_sig(123456, 5) == 123460 + assert round_sig(123456, 6) == 123456 + + assert round_sig(1.23456, 1) == 1.00000 + assert round_sig(1.23456, 2) == 1.20000 + assert round_sig(1.23456, 3) == 1.23000 + assert round_sig(1.23456, 4) == 1.23500 + assert round_sig(1.23456, 5) == 1.23460 + assert round_sig(1.23456, 6) == 1.23456 + + assert round_sig(1.23456e9, 1) == 1.00000e9 + assert round_sig(1.23456e9, 2) == 1.20000e9 + assert round_sig(1.23456e9, 3) == 1.23000e9 + assert round_sig(1.23456e9, 4) == 1.23500e9 + assert round_sig(1.23456e9, 5) == 1.23460e9 + assert round_sig(1.23456e9, 6) == 1.23456e9 + + +@enforce_types +def test_nmse(): + y = np.array([10.0, 12.0, 13.0, 20.0]) + yhat = np.array([9.0, 11.0, 14.0, 21.0]) + ymin, ymax = 10.0, 20.0 + e = nmse(yhat, y, ymin, ymax) + assert 0.035 <= e <= 0.036 diff --git a/pdr_backend/util/test/test_strutil.py b/pdr_backend/util/test/test_strutil.py index 1232e803c..456ecf327 100644 --- a/pdr_backend/util/test/test_strutil.py +++ b/pdr_backend/util/test/test_strutil.py @@ -1,4 +1,7 @@ -from pdr_backend.util.strutil import StrMixin, dictStr +import random + +from pdr_backend.util import mathutil +from pdr_backend.util.strutil import StrMixin, dictStr, prettyBigNum, asCurrency def testStrMixin(): @@ -45,3 +48,283 @@ def testEmptyDictStr(): d = {} s = dictStr(d) assert s == ("{}") + + +def testAsCurrency(): + assert asCurrency(0) == "$0.00" + assert asCurrency(0.0) == "$0.00" + assert asCurrency(10) == "$10.00" + assert asCurrency(10.0) == "$10.00" + assert asCurrency(1234.567) == "$1,234.57" + assert asCurrency(2e6) == "$2,000,000.00" + assert asCurrency(2e6 + 0.03) == "$2,000,000.03" + + assert asCurrency(0, decimals=False) == "$0" + assert asCurrency(0.0, False) == "$0" + assert asCurrency(10, False) == "$10" + assert asCurrency(10.0, False) == "$10" + assert asCurrency(1234.567, False) == "$1,235" + assert asCurrency(2e6, False) == "$2,000,000" + assert asCurrency(2e6 + 0.03, False) == "$2,000,000" + + +def testPrettyBigNum1_DoRemoveZeros_decimalsNeeded(): + assert prettyBigNum(1.23456e13) == "1.23e13" + assert prettyBigNum(1.23456e12) == "1.23e12" + assert prettyBigNum(1.23456e11) == "123B" + assert prettyBigNum(1.23456e10) == "12.3B" + assert prettyBigNum(1.23456e9) == "1.23B" + assert prettyBigNum(1.23456e8) == "123M" + assert prettyBigNum(1.23456e7) == "12.3M" + assert prettyBigNum(1.23456e6) == "1.23M" + assert prettyBigNum(1.23456e5) == "123K" + assert prettyBigNum(1.23456e4) == "12.3K" + assert prettyBigNum(1.23456e3) == "1.23K" + assert prettyBigNum(1.23456e2) == "123" + assert prettyBigNum(1.23456e1) == "12.3" + assert prettyBigNum(1.23456e0) == "1.23" + assert prettyBigNum(1.23456e-1) == "0.12" + assert prettyBigNum(1.23456e-2) == "1.23e-2" + assert prettyBigNum(1.23456e-3) == "1.23e-3" + assert prettyBigNum(1.23456e-10) == "1.23e-10" + + +def testPrettyBigNum1_DoRemoveZeros_decimalsNotNeeded(): + assert prettyBigNum(1e13) == "1e13" + assert prettyBigNum(1e12) == "1e12" + assert prettyBigNum(1e11) == "100B" + assert prettyBigNum(1e10) == "10B" + assert prettyBigNum(1e9) == "1B" + assert prettyBigNum(1e8) == "100M" + assert prettyBigNum(1e7) == "10M" + assert prettyBigNum(1e6) == "1M" + assert prettyBigNum(1e5) == "100K" + assert prettyBigNum(1e4) == "10K" + assert prettyBigNum(1e3) == "1K" + assert prettyBigNum(1e2) == "100" + assert prettyBigNum(1e1) == "10" + assert prettyBigNum(1) == "1" + assert prettyBigNum(1e-1) == "0.1" + assert prettyBigNum(1e-2) == "1e-2" + assert prettyBigNum(1e-3) == "1e-3" + assert prettyBigNum(1e-10) == "1e-10" + + +def testPrettyBigNum1_DoRemoveZeros_catchRoundoff(): + assert prettyBigNum(57.02e10) == "570B" + assert prettyBigNum(57.02e9) == "57B" + assert prettyBigNum(57.02e8) == "5.7B" + assert prettyBigNum(57.02e7) == "570M" + assert prettyBigNum(57.02e6) == "57M" + assert prettyBigNum(57.02e5) == "5.7M" + assert prettyBigNum(57.02e4) == "570K" + assert prettyBigNum(57.02e3) == "57K" + assert prettyBigNum(57.02) == "57" + assert prettyBigNum(27.02) == "27" + + +def testPrettyBigNum1_DoRemoveZeros_zero(): + assert prettyBigNum(0) == "0" + assert prettyBigNum(0.0) == "0" + + +def testPrettyBigNum1_DoRemoveZeros_negative(): + assert prettyBigNum(-1.23456e13) == "-1.23e13" + assert prettyBigNum(-1.23456e11) == "-123B" + assert prettyBigNum(-1.23456e7) == "-12.3M" + assert prettyBigNum(-1.23456e3) == "-1.23K" + assert prettyBigNum(-1.23456e2) == "-123" + assert prettyBigNum(-1.23456e-1) == "-0.12" + assert prettyBigNum(-1.23456e-3) == "-1.23e-3" + + assert prettyBigNum(-1e13) == "-1e13" + assert prettyBigNum(-1e10) == "-10B" + assert prettyBigNum(-1e7) == "-10M" + assert prettyBigNum(-1e5) == "-100K" + assert prettyBigNum(-1e1) == "-10" + assert prettyBigNum(-1e-1) == "-0.1" + assert prettyBigNum(-1e-10) == "-1e-10" + + +def generatePairsForPrettyBigNum2_Random_DoRemoveZeroes(): + for _ in range(100): + power = random.choice(list(range(-4, 14))) + sigfigs = random.choice([1, 2, 3, 4, 5]) + x = random.random() * pow(10, power) + x = mathutil.round_sig(x, sigfigs) + + s = prettyBigNum(x) # prettyBigNum(x, remove_zeroes=False) + print(f" ({x:s}, '{s:s}')," % (x, s)) + + +def testPrettyBigNum2_Random_DoRemoveZeroes(): + # these are generated via method above, then manually fixed as needed + x_s_pairs = [ + (1200.0, "1.2K"), + (5.284, "5.28"), + (2380000000000.0, "2.38e12"), + (0.071, "7.1e-2"), + (86000.0, "86K"), + (49300000.0, "49.3M"), + (4020000.0, "4.02M"), + (9600000000.0, "9.6B"), + (4800000000000.0, "4.8e12"), + (3000000.0, "3M"), + (6.256, "6.26"), + (89500.0, "89.5K"), + (156170000000.0, "156B"), + (80000.0, "80K"), + (710000000.0, "710M"), + (0.65312, "0.65"), + (553000000000.0, "553B"), + (0.04, "4e-2"), + (6.03e-05, "6.03e-5"), + (90300000.0, "90.3M"), + (828000000000.0, "828B"), + (0.09939, "9.94e-2"), + (5552000.0, "5.55M"), + (0.0004, "4e-4"), + (10000.0, "10K"), + (513000.0, "513K"), + (0.00097, "9.7e-4"), + (52325.0, "52.3K"), + (90000000.0, "90M"), + (0.00266, "2.66e-3"), + (400000.0, "400K"), + (400000.0, "400K"), + (107480000.0, "107M"), + (6785200000000.0, "6.79e12"), + (33680000.0, "33.7M"), + (625000.0, "625K"), + (52790000000.0, "52.8B"), + (51354000000.0, "51.4B"), + (71660.0, "71.7K"), + (2726000000000.0, "2.73e12"), + (671.6, "672"), + (10000000.0, "10M"), + (3415000000.0, "3.42B"), + (0.00272, "2.72e-3"), + (3000000.0, "3M"), + (0.0004171, "4.17e-4"), + (0.002181, "2.18e-3"), + (400000.0, "400K"), + (20000000000.0, "20B"), + (1.8458e-05, "1.85e-5"), + (403000.0, "403K"), + (3.81e-05, "3.81e-5"), + (2e-05, "2e-5"), + (6800000000.0, "6.8B"), + (1000000000000.0, "1e12"), + (4405300000000.0, "4.41e12"), + (0.0048122, "4.81e-3"), + (891000.0, "891K"), + (99000000.0, "99M"), + (50.0, "50"), + (0.128, "0.13"), + (23440000000.0, "23.4B"), + (41000.0, "41K"), + (7271100000000.0, "7.27e12"), + (3230000000000.0, "3.23e12"), + (64.99, "65"), + (740000000.0, "740M"), + (217000.0, "217K"), + (900.0, "900"), + (6.0, "6"), + (0.7631, "0.76"), + (0.04, "4e-2"), + (61700000.0, "61.7M"), + (0.0449, "4.49e-2"), + (737360000.0, "737M"), + (3415000000.0, "3.42B"), + (81244000000.0, "81.2B"), + (4.9e-05, "4.9e-5"), + (9493000.0, "9.49M"), + ] + for x, target_s in x_s_pairs: + assert prettyBigNum(x) == target_s + + +def testPrettyBigNum3_Random_DontRemoveZeros(): + # these are generated via method above, then manually fixed as needed + x_s_pairs = [ + (1200.0, "1.20K"), + (5.284, "5.28"), + (2380000000000.0, "2.38e12"), + (0.071, "7.10e-2"), + (86000.0, "86.00K"), + (49300000.0, "49.30M"), + (4020000.0, "4.02M"), + (9600000000.0, "9.60B"), + (4800000000000.0, "4.80e12"), + (3000000.0, "3.00M"), + (6.256, "6.26"), + (89500.0, "89.50K"), + (156170000000.0, "156.17B"), + (80000.0, "80.00K"), + (710000000.0, "710.00M"), + (0.65312, "0.65"), + (553000000000.0, "553.00B"), + (0.04, "4.00e-2"), + (6.03e-05, "6.03e-5"), + (90300000.0, "90.30M"), + (828000000000.0, "828.00B"), + (0.09939, "9.94e-2"), + (5552000.0, "5.55M"), + (0.0004, "4.00e-4"), + (10000.0, "10.00K"), + (513000.0, "513.00K"), + (0.00097, "9.70e-4"), + (52325.0, "52.33K"), + (90000000.0, "90.00M"), + (0.00266, "2.66e-3"), + (400000.0, "400.00K"), + (107480000.0, "107.48M"), + (6785200000000.0, "6.79e12"), + (33680000.0, "33.68M"), + (625000.0, "625.00K"), + (52790000000.0, "52.79B"), + (51354000000.0, "51.35B"), + (71660.0, "71.66K"), + (2726000000000.0, "2.73e12"), + (671.6, "671.60"), + (10000000.0, "10.00M"), + (3415000000.0, "3.42B"), + (0.00272, "2.72e-3"), + (3000000.0, "3.00M"), + (0.0004171, "4.17e-4"), + (0.002181, "2.18e-3"), + (400000.0, "400.00K"), + (20000000000.0, "20.00B"), + (1.8458e-05, "1.85e-5"), + (403000.0, "403.00K"), + (3.81e-05, "3.81e-5"), + (2e-05, "2.00e-5"), + (6800000000.0, "6.80B"), + (1000000000000.0, "1.00e12"), + (4405300000000.0, "4.41e12"), + (0.0048122, "4.81e-3"), + (891000.0, "891.00K"), + (99000000.0, "99.00M"), + (50.0, "50.00"), + (0.128, "0.13"), + (23440000000.0, "23.44B"), + (41000.0, "41.00K"), + (7271100000000.0, "7.27e12"), + (3230000000000.0, "3.23e12"), + (64.99, "64.99"), + (740000000.0, "740.00M"), + (217000.0, "217.00K"), + (900.0, "900.00"), + (6.0, "6.00"), + (0.7631, "0.76"), + (0.04, "4.00e-2"), + (61700000.0, "61.70M"), + (0.0449, "4.49e-2"), + (737360000.0, "737.36M"), + (3415000000.0, "3.42B"), + (81244000000.0, "81.24B"), + (4.9e-05, "4.90e-5"), + (9493000.0, "9.49M"), + ] + for x, target_s in x_s_pairs: + assert prettyBigNum(x, False) == target_s diff --git a/setup.py b/setup.py index 04f6f0290..50714140e 100644 --- a/setup.py +++ b/setup.py @@ -15,12 +15,15 @@ "enforce_typing", "eth-account", "eth-keys", + "matplotlib", + "numpy", "pandas", "pathlib", "pylint", "pytest", "pytest-env", "requests", + "scikit-learn", "web3", "sapphire.py", "ocean-contracts==2.0.0a14", # install this last