v4.2.2

chen-001 · May 14, 2024 · ec18aef · ec18aef
1 parent 83d1716
commit ec18aef
Show file tree

Hide file tree

Showing 2 changed files with 88 additions and 47 deletions.
diff --git a/pure_ocean_breeze/jason/data/read_data.py b/pure_ocean_breeze/jason/data/read_data.py
@@ -9,13 +9,15 @@
 from pure_ocean_breeze.jason.state.states import STATES
 from pure_ocean_breeze.jason.state.homeplace import HomePlace
 from pure_ocean_breeze.jason.state.decorators import *
+from cachier import cachier
 
 try:
  homeplace = HomePlace()
 except Exception:
  print("您暂未初始化，功能将受限")
 
 
+@cachier()
 def read_daily(
  open: bool = 0,
  close: bool = 0,
@@ -333,6 +335,7 @@ def database_read_primary_factors(name: str) -> pd.DataFrame:
  df = pd.read_parquet(homeplace.factor_data_file + name+'.parquet')
  return df
 
+@cachier()
 def read_market(
  open: bool = 0,
  close: bool = 0,
@@ -406,3 +409,40 @@ def read_market(
  tr = read_daily(tr=1, start=start)
  df = pd.DataFrame({k: list(df) for k in list(tr.columns)}, index=df.index)
  return df
+
+
+@cachier()
+def moon_read_dummy(freq):
+ def deal_dummy(industry_dummy):
+ industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
+ industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
+ col = ["code", "date"] + industry_ws
+ industry_dummy.columns = col
+ industry_dummy = industry_dummy[
+ industry_dummy.date >= pd.Timestamp(str(STATES["START"]))
+ ]
+ return industry_dummy
+
+ # week_here
+ swindustry_dummy = (
+ pd.read_parquet(
+ homeplace.daily_data_file + "sw_industry_level1_dummies.parquet"
+ )
+ .fillna(0)
+ .set_index("date")
+ .groupby("code")
+ .resample(freq)
+ .last()
+ )
+ return deal_dummy(swindustry_dummy)
+
+@cachier()
+def moon_read_barra():
+ styles = os.listdir(homeplace.barra_data_file)
+ styles = [i for i in styles if (i.endswith(".parquet")) and (i[0] != ".")]
+ barras = {}
+ for s in styles:
+ k = s.split(".")[0]
+ v = pd.read_parquet(homeplace.barra_data_file + s).resample("W").last()
+ barras[k] = v
+ return barras
diff --git a/pure_ocean_breeze/jason/labor/process.py b/pure_ocean_breeze/jason/labor/process.py
@@ -42,6 +42,8 @@
  read_daily,
  read_market,
  get_industry_dummies,
+ moon_read_dummy,
+ moon_read_barra,
 )
 from pure_ocean_breeze.jason.state.homeplace import HomePlace
 
@@ -817,29 +819,30 @@ def __init__(
  cls.freq = freq
  cls.freq_ctrl = frequency_controller(freq)
 
- def deal_dummy(industry_dummy):
- industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
- industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
- col = ["code", "date"] + industry_ws
- industry_dummy.columns = col
- industry_dummy = industry_dummy[
- industry_dummy.date >= pd.Timestamp(str(STATES["START"]))
- ]
- return industry_dummy
+ # def deal_dummy(industry_dummy):
+ #  industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
+ #  industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
+ #  col = ["code", "date"] + industry_ws
+ #  industry_dummy.columns = col
+ #  industry_dummy = industry_dummy[
+ #  industry_dummy.date >= pd.Timestamp(str(STATES["START"]))
+ #  ]
+ #  return industry_dummy
 
  if not no_read_indu:
  # week_here
- cls.swindustry_dummy = (
- pd.read_parquet(
- cls.homeplace.daily_data_file + "sw_industry_level1_dummies.parquet"
- )
- .fillna(0)
- .set_index("date")
- .groupby("code")
- .resample(freq)
- .last()
- )
- cls.swindustry_dummy = deal_dummy(cls.swindustry_dummy)
+ # cls.swindustry_dummy = (
+ # pd.read_parquet(
+ # cls.homeplace.daily_data_file + "sw_industry_level1_dummies.parquet"
+ # )
+ # .fillna(0)
+ # .set_index("date")
+ # .groupby("code")
+ # .resample(freq)
+ # .last()
+ # )
+ # cls.swindustry_dummy = deal_dummy(cls.swindustry_dummy)
+ cls.swindustry_dummy=moon_read_dummy(freq)
 
  @property
  def factors_out(self):
@@ -1181,12 +1184,7 @@ def get_long_short_comments(self, on_paper=False):
  self.inner_short_ret_yearly = self.inner_short_net_values[-1] * (
  self.freq_ctrl.counts_one_year / len(self.inner_short_net_values)
  )
- self.group1_ret_yearly= self.group_net_values["group1"][-1] * (
- self.freq_ctrl.counts_one_year / len(self.group_net_values.group1)
- )
- self.group10_ret_yearly = self.group_net_values["group10"][-1] * (
- self.freq_ctrl.counts_one_year / len(self.group_net_values.group10)
- )
+
  # week_here
  self.long_short_vol_yearly = np.std(self.long_short_rets) * (
  self.freq_ctrl.counts_one_year**0.5
@@ -1256,6 +1254,18 @@ def get_total_comments(self, groups_num):
  ].mean()
  else:
  self.factor_turnover_rate = self.factor_turnover_rates["group1"].mean()
+ self.group_mean_rets_monthly = self.group_rets.drop(
+ columns=["long_short"]
+ ).mean()
+ # self.group_mean_rets_monthly = (
+ # self.group_mean_rets_monthly - self.group_mean_rets_monthly.mean()
+ # )
+ mar=self.market_ret.loc[self.factors_out.index]
+ self.group_mean_rets_monthly = (
+ self.group_mean_rets_monthly - mar.mean()
+ )*self.freq_ctrl.counts_one_year
+ self.group1_ret_yearly= self.group_mean_rets_monthly.loc['group1']
+ self.group10_ret_yearly = self.group_mean_rets_monthly.loc['group10']
  self.total_comments = pd.concat(
  [
  self.ic_icir_and_rank,
@@ -1306,16 +1316,7 @@ def get_total_comments(self, groups_num):
  ]
  )
  # print(self.total_comments)
- self.group_mean_rets_monthly = self.group_rets.drop(
- columns=["long_short"]
- ).mean()
- # self.group_mean_rets_monthly = (
- # self.group_mean_rets_monthly - self.group_mean_rets_monthly.mean()
- # )
- mar=self.market_ret.loc[self.factors_out.index]
- self.group_mean_rets_monthly = (
- self.group_mean_rets_monthly - mar.mean()
- )*self.freq_ctrl.counts_one_year
+
 
  def plot_net_values(self, y2, filename, iplot=1, ilegend=1, without_breakpoint=0):
  """使用matplotlib来画图，y2为是否对多空组合采用双y轴"""
@@ -2209,13 +2210,14 @@ def __init__(
  """
  cls.homeplace = HomePlace()
  # barra因子数据
- styles = os.listdir(cls.homeplace.barra_data_file)
- styles = [i for i in styles if (i.endswith(".parquet")) and (i[0] != ".")]
- barras = {}
- for s in styles:
- k = s.split(".")[0]
- v = pd.read_parquet(cls.homeplace.barra_data_file + s).resample("W").last()
- barras[k] = v
+ # styles = os.listdir(cls.homeplace.barra_data_file)
+ # styles = [i for i in styles if (i.endswith(".parquet")) and (i[0] != ".")]
+ # barras = {}
+ # for s in styles:
+ # k = s.split(".")[0]
+ # v = pd.read_parquet(cls.homeplace.barra_data_file + s).resample("W").last()
+ # barras[k] = v
+ barras=moon_read_barra()
  rename_dict = {
  "size": "市值",
  "nonlinearsize": "非线性市值",
@@ -2642,11 +2644,10 @@ def sing(dfs: list[pd.DataFrame], date: pd.Timestamp):
 
 
 @do_on_dfs
-def sun(factor:pd.DataFrame,rolling_5:int=1):
+def sun(factor:pd.DataFrame,rolling_days:int=10):
  '''先单因子测试，再测试其与常用风格之间的关系'''
- if rolling_5:
- factor=boom_one(factor)
- ractor=boom_one(factor.rank(axis=1))
+ ractor=boom_one(factor.rank(axis=1),rolling_days)
+ factor=boom_one(factor,rolling_days)
  shen=pure_moonnight(factor)
  pfi=pure_snowtrain(ractor)
  shen=pure_moonnight(pfi,neutralize=1)