Skip to content

Commit

Permalink
v4.2.2
Browse files Browse the repository at this point in the history
  • Loading branch information
chen-001 committed May 14, 2024
1 parent 83d1716 commit ec18aef
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 47 deletions.
40 changes: 40 additions & 0 deletions pure_ocean_breeze/jason/data/read_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
from pure_ocean_breeze.jason.state.states import STATES
from pure_ocean_breeze.jason.state.homeplace import HomePlace
from pure_ocean_breeze.jason.state.decorators import *
from cachier import cachier

try:
homeplace = HomePlace()
except Exception:
print("您暂未初始化,功能将受限")


@cachier()
def read_daily(
open: bool = 0,
close: bool = 0,
Expand Down Expand Up @@ -333,6 +335,7 @@ def database_read_primary_factors(name: str) -> pd.DataFrame:
df = pd.read_parquet(homeplace.factor_data_file + name+'.parquet')
return df

@cachier()
def read_market(
open: bool = 0,
close: bool = 0,
Expand Down Expand Up @@ -406,3 +409,40 @@ def read_market(
tr = read_daily(tr=1, start=start)
df = pd.DataFrame({k: list(df) for k in list(tr.columns)}, index=df.index)
return df


@cachier()
def moon_read_dummy(freq):
def deal_dummy(industry_dummy):
industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
col = ["code", "date"] + industry_ws
industry_dummy.columns = col
industry_dummy = industry_dummy[
industry_dummy.date >= pd.Timestamp(str(STATES["START"]))
]
return industry_dummy

# week_here
swindustry_dummy = (
pd.read_parquet(
homeplace.daily_data_file + "sw_industry_level1_dummies.parquet"
)
.fillna(0)
.set_index("date")
.groupby("code")
.resample(freq)
.last()
)
return deal_dummy(swindustry_dummy)

@cachier()
def moon_read_barra():
styles = os.listdir(homeplace.barra_data_file)
styles = [i for i in styles if (i.endswith(".parquet")) and (i[0] != ".")]
barras = {}
for s in styles:
k = s.split(".")[0]
v = pd.read_parquet(homeplace.barra_data_file + s).resample("W").last()
barras[k] = v
return barras
95 changes: 48 additions & 47 deletions pure_ocean_breeze/jason/labor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
read_daily,
read_market,
get_industry_dummies,
moon_read_dummy,
moon_read_barra,
)
from pure_ocean_breeze.jason.state.homeplace import HomePlace

Expand Down Expand Up @@ -817,29 +819,30 @@ def __init__(
cls.freq = freq
cls.freq_ctrl = frequency_controller(freq)

def deal_dummy(industry_dummy):
industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
col = ["code", "date"] + industry_ws
industry_dummy.columns = col
industry_dummy = industry_dummy[
industry_dummy.date >= pd.Timestamp(str(STATES["START"]))
]
return industry_dummy
# def deal_dummy(industry_dummy):
# industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
# industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
# col = ["code", "date"] + industry_ws
# industry_dummy.columns = col
# industry_dummy = industry_dummy[
# industry_dummy.date >= pd.Timestamp(str(STATES["START"]))
# ]
# return industry_dummy

if not no_read_indu:
# week_here
cls.swindustry_dummy = (
pd.read_parquet(
cls.homeplace.daily_data_file + "sw_industry_level1_dummies.parquet"
)
.fillna(0)
.set_index("date")
.groupby("code")
.resample(freq)
.last()
)
cls.swindustry_dummy = deal_dummy(cls.swindustry_dummy)
# cls.swindustry_dummy = (
# pd.read_parquet(
# cls.homeplace.daily_data_file + "sw_industry_level1_dummies.parquet"
# )
# .fillna(0)
# .set_index("date")
# .groupby("code")
# .resample(freq)
# .last()
# )
# cls.swindustry_dummy = deal_dummy(cls.swindustry_dummy)
cls.swindustry_dummy=moon_read_dummy(freq)

@property
def factors_out(self):
Expand Down Expand Up @@ -1181,12 +1184,7 @@ def get_long_short_comments(self, on_paper=False):
self.inner_short_ret_yearly = self.inner_short_net_values[-1] * (
self.freq_ctrl.counts_one_year / len(self.inner_short_net_values)
)
self.group1_ret_yearly= self.group_net_values["group1"][-1] * (
self.freq_ctrl.counts_one_year / len(self.group_net_values.group1)
)
self.group10_ret_yearly = self.group_net_values["group10"][-1] * (
self.freq_ctrl.counts_one_year / len(self.group_net_values.group10)
)

# week_here
self.long_short_vol_yearly = np.std(self.long_short_rets) * (
self.freq_ctrl.counts_one_year**0.5
Expand Down Expand Up @@ -1256,6 +1254,18 @@ def get_total_comments(self, groups_num):
].mean()
else:
self.factor_turnover_rate = self.factor_turnover_rates["group1"].mean()
self.group_mean_rets_monthly = self.group_rets.drop(
columns=["long_short"]
).mean()
# self.group_mean_rets_monthly = (
# self.group_mean_rets_monthly - self.group_mean_rets_monthly.mean()
# )
mar=self.market_ret.loc[self.factors_out.index]
self.group_mean_rets_monthly = (
self.group_mean_rets_monthly - mar.mean()
)*self.freq_ctrl.counts_one_year
self.group1_ret_yearly= self.group_mean_rets_monthly.loc['group1']
self.group10_ret_yearly = self.group_mean_rets_monthly.loc['group10']
self.total_comments = pd.concat(
[
self.ic_icir_and_rank,
Expand Down Expand Up @@ -1306,16 +1316,7 @@ def get_total_comments(self, groups_num):
]
)
# print(self.total_comments)
self.group_mean_rets_monthly = self.group_rets.drop(
columns=["long_short"]
).mean()
# self.group_mean_rets_monthly = (
# self.group_mean_rets_monthly - self.group_mean_rets_monthly.mean()
# )
mar=self.market_ret.loc[self.factors_out.index]
self.group_mean_rets_monthly = (
self.group_mean_rets_monthly - mar.mean()
)*self.freq_ctrl.counts_one_year


def plot_net_values(self, y2, filename, iplot=1, ilegend=1, without_breakpoint=0):
"""使用matplotlib来画图,y2为是否对多空组合采用双y轴"""
Expand Down Expand Up @@ -2209,13 +2210,14 @@ def __init__(
"""
cls.homeplace = HomePlace()
# barra因子数据
styles = os.listdir(cls.homeplace.barra_data_file)
styles = [i for i in styles if (i.endswith(".parquet")) and (i[0] != ".")]
barras = {}
for s in styles:
k = s.split(".")[0]
v = pd.read_parquet(cls.homeplace.barra_data_file + s).resample("W").last()
barras[k] = v
# styles = os.listdir(cls.homeplace.barra_data_file)
# styles = [i for i in styles if (i.endswith(".parquet")) and (i[0] != ".")]
# barras = {}
# for s in styles:
# k = s.split(".")[0]
# v = pd.read_parquet(cls.homeplace.barra_data_file + s).resample("W").last()
# barras[k] = v
barras=moon_read_barra()
rename_dict = {
"size": "市值",
"nonlinearsize": "非线性市值",
Expand Down Expand Up @@ -2642,11 +2644,10 @@ def sing(dfs: list[pd.DataFrame], date: pd.Timestamp):


@do_on_dfs
def sun(factor:pd.DataFrame,rolling_5:int=1):
def sun(factor:pd.DataFrame,rolling_days:int=10):
'''先单因子测试,再测试其与常用风格之间的关系'''
if rolling_5:
factor=boom_one(factor)
ractor=boom_one(factor.rank(axis=1))
ractor=boom_one(factor.rank(axis=1),rolling_days)
factor=boom_one(factor,rolling_days)
shen=pure_moonnight(factor)
pfi=pure_snowtrain(ractor)
shen=pure_moonnight(pfi,neutralize=1)
Expand Down

0 comments on commit ec18aef

Please sign in to comment.