Skip to content

Commit

Permalink
v4.0.3
Browse files Browse the repository at this point in the history
  • Loading branch information
chen-001 committed Jul 4, 2023
1 parent fdf3d4a commit 8d30b05
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 79 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

### 全新大版本📢
* v4.0.0 — 2023.06.28
> 因子框架4.0版本来啦!逐笔数据&任意秒级数据来啦
> 因子框架4.0版本来啦!逐笔数据&任意秒级数据&全新的因子成果数据库来啦
* v3.0.0 — 2022.08.16

Expand Down
4 changes: 2 additions & 2 deletions pure_ocean_breeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
一个量化多因子研究的框架,包含数据、回测、因子加工等方面的功能
"""

__updated__ = "2023-07-03 01:29:04"
__version__ = "4.0.2"
__updated__ = "2023-07-03 20:59:40"
__version__ = "4.0.3"
__author__ = "chenzongwei"
__author_email__ = "winterwinter999@163.com"
__url__ = "https://github.com/chen-001/pure_ocean_breeze"
Expand Down
150 changes: 119 additions & 31 deletions pure_ocean_breeze/data/read_data.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
__updated__ = "2023-06-21 14:48:09"
__updated__ = "2023-07-04 09:13:50"

import os
import numpy as np
import pandas as pd
import datetime
from typing import Union, Dict, Tuple
import deprecation
from typing import Any, Union, Dict, Tuple
from loguru import logger

from pure_ocean_breeze import __version__
from pure_ocean_breeze.state.states import STATES
from pure_ocean_breeze.state.homeplace import HomePlace
from pure_ocean_breeze.state.decorators import *
Expand Down Expand Up @@ -172,33 +174,33 @@ def read_daily(
elif open:
opens = pd.read_parquet(
homeplace.daily_data_file + "opens.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
df = opens
elif close:
closes = pd.read_parquet(
homeplace.daily_data_file + "closes.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
df = closes
elif high:
highs = pd.read_parquet(
homeplace.daily_data_file + "highs.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
df = highs
elif low:
lows = pd.read_parquet(
homeplace.daily_data_file + "lows.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
df = lows
elif vwap:
df = (
pd.read_parquet(homeplace.daily_data_file + "vwaps.parquet")
* read_daily(adjfactor=1, start=start)
* read_daily(state=1,start=start)
* read_daily(state=1, start=start)
)
elif tr:
trs = pd.read_parquet(homeplace.daily_data_file + "trs.parquet").replace(
0, np.nan
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
df = trs
elif sharenum:
sharenums = pd.read_parquet(homeplace.daily_data_file + "sharenums.parquet")
Expand All @@ -208,7 +210,7 @@ def read_daily(
elif amount:
volumes = pd.read_parquet(
homeplace.daily_data_file + "amounts.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
df = volumes
elif money:
df = pd.read_parquet(
Expand All @@ -220,14 +222,14 @@ def read_daily(
elif flow_cap:
closes = pd.read_parquet(
homeplace.daily_data_file + "closes_unadj.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
sharenums = pd.read_parquet(homeplace.daily_data_file + "sharenums.parquet")
flow_cap = closes * sharenums
df = flow_cap
elif total_cap:
closes = pd.read_parquet(
homeplace.daily_data_file + "closes_unadj.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
sharenums = pd.read_parquet(
homeplace.daily_data_file + "total_sharenums.parquet"
)
Expand All @@ -237,9 +239,9 @@ def read_daily(
# df=pd.read_parquet(homeplace.daily_data_file+'adjfactors.parquet')
df = (
read_daily(close=1, start=start)
* read_daily(state=1,start=start)
* read_daily(state=1, start=start)
/ read_daily(close=1, start=start, unadjust=1)
* read_daily(state=1,start=start)
* read_daily(state=1, start=start)
)
elif st:
st = pd.read_parquet(homeplace.daily_data_file + "sts.parquet")
Expand Down Expand Up @@ -278,46 +280,46 @@ def read_daily(
) / read_daily(close=1, start=start)
elif pb:
df = pd.read_parquet(homeplace.daily_data_file + "pb.parquet") * read_daily(
state=1,start=start
state=1, start=start
)
elif pe:
df = pd.read_parquet(homeplace.daily_data_file + "pe.parquet") * read_daily(
state=1,start=start
state=1, start=start
)
elif pettm:
df = pd.read_parquet(homeplace.daily_data_file + "pettm.parquet") * read_daily(
state=1,start=start
)
df = pd.read_parquet(
homeplace.daily_data_file + "pettm.parquet"
) * read_daily(state=1, start=start)
elif iret:
df = pd.read_parquet(
homeplace.daily_data_file + "idiosyncratic_ret.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
elif ivol:
df = read_daily(iret=1, start=start)
df = df.rolling(20, min_periods=10).std()
elif illiquidity:
df = pd.read_parquet(
homeplace.daily_data_file + "illiquidity.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
elif swindustry_ret:
df = pd.read_parquet(
homeplace.daily_data_file + "股票对应申万一级行业每日收益率.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
elif zxindustry_ret:
df = pd.read_parquet(
homeplace.daily_data_file + "股票对应中信一级行业每日收益率.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
elif stop_up:
df = (
pd.read_parquet(homeplace.daily_data_file + "stop_ups.parquet")
* read_daily(adjfactor=1, start=start)
* read_daily(state=1,start=start)
* read_daily(state=1, start=start)
)
elif stop_down:
df = (
pd.read_parquet(homeplace.daily_data_file + "stop_downs.parquet")
* read_daily(adjfactor=1, start=start)
* read_daily(state=1,start=start)
* read_daily(state=1, start=start)
)
elif zxindustry_dummy_code:
df = pd.read_parquet(homeplace.daily_data_file + "中信一级行业哑变量代码版.parquet")
Expand Down Expand Up @@ -349,35 +351,35 @@ def read_daily(
if open:
opens = pd.read_parquet(
homeplace.daily_data_file + "opens_unadj.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
df = opens
elif close:
closes = pd.read_parquet(
homeplace.daily_data_file + "closes_unadj.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
df = closes
elif high:
highs = pd.read_parquet(
homeplace.daily_data_file + "highs_unadj.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
df = highs
elif low:
lows = pd.read_parquet(
homeplace.daily_data_file + "lows_unadj.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
df = lows
elif vwap:
df = pd.read_parquet(
homeplace.daily_data_file + "vwaps.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
elif stop_up:
df = pd.read_parquet(
homeplace.daily_data_file + "stop_ups.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
elif stop_down:
df = pd.read_parquet(
homeplace.daily_data_file + "stop_downs.parquet"
) * read_daily(state=1,start=start)
) * read_daily(state=1, start=start)
else:
raise IOError("阁下总得读点什么吧?🤒")
if "date" not in df.columns:
Expand Down Expand Up @@ -755,6 +757,12 @@ def get_industry_dummies(
return ress


@deprecation.deprecated(
deprecated_in="4.0",
removed_in="5.0",
current_version=__version__,
details="由于因子成果数据库升级,3.x版本的因子成果读取函数将下线",
)
def database_read_final_factors(
name: str = None,
order: int = None,
Expand Down Expand Up @@ -877,6 +885,12 @@ def database_read_final_factors(
return df, ""


@deprecation.deprecated(
deprecated_in="4.0",
removed_in="5.0",
current_version=__version__,
details="由于因子成果数据库升级,3.x版本的因子成果读取函数将下线",
)
def database_read_primary_factors(name: str, name2: str = None) -> pd.DataFrame:
"""根据因子名字,读取初级因子的因子值
Expand All @@ -900,3 +914,77 @@ def database_read_primary_factors(name: str, name2: str = None) -> pd.DataFrame:
df = pd.read_parquet(homeplace.factor_data_file + name)
df = df[sorted(list(df.columns))]
return df


class FactorDone(object):
def __init__(
self,
order: str,
name: str = None,
place: str = None,
son_name: str = None,
freq: str = "月",
) -> None:
self.homeplace = HomePlace()
self.order = order
self.freq = freq
self.qdb = Questdb()
try:
self.factor_infos = self.qdb.get_data(
f"select * from factor_infos where order='{self.order}' and freq='{self.freq}'"
)
except Exception:
self.factor_infos = pd.DataFrame()
self.name = name
self.place = place
self.son_name = son_name
if (self.place is None) or (self.name is None):
final_line = self.qdb.get_data(
f"select * from factor_infos where order='{self.order}' and freq='{self.freq}'"
)
self.name = final_line.name.iloc[0]
self.place = final_line.place.iloc[0]
if son_name is None:
self.file = f"因子{self.order}_{self.name}_{self.freq}_{self.place}.parquet"
self.son_factors = {}
if self.factor_infos.shape[0] > 0:
for row in self.factor_infos.dropna().itertuples():
self.son_factors[row.son_name] = FactorDone(
order=row.order,
name=row.name,
place=row.place,
son_name=row.son_name,
freq=row.freq,
)
else:
self.file = f"因子{self.order}_{self.name}_{self.son_name}_{self.freq}_{self.place}.parquet"


def __call__(self, son_name: str = None) -> Union[pd.DataFrame, dict]:
if son_name is None:
return pd.read_parquet(self.homeplace.final_factor_file + self.file)
else:
return self.son_factors[son_name]()

def save_factor(self, factor: pd.DataFrame):
try:
son_info = self.qdb.get_data(
f"select * from factor_infos where order='{self.order}' and son_name='{self.son_name}' and freq='{self.freq}'"
)
except Exception:
logger.warning(f"本次为第一次写入{self.name}_{self.son_name}因子")
son_info = pd.DataFrame()
if son_info.shape[0] == 0:
self.qdb.write_via_df(
pd.DataFrame(
{
"order": [self.order],
"name": [self.name],
"place": [self.place],
"son_name": [self.son_name],
"freq": [self.freq],
}
),
"factor_infos",
)
factor.to_parquet(self.homeplace.final_factor_file + self.file)
Loading

0 comments on commit 8d30b05

Please sign in to comment.