From ec18aef4225607a9ff6472f0ff7164be6fcc6ccf Mon Sep 17 00:00:00 2001
From: chenzongwei <17695480342@163.com>
Date: Tue, 14 May 2024 10:36:25 +0800
Subject: [PATCH] v4.2.2

---
 pure_ocean_breeze/jason/data/read_data.py | 40 ++++++++++
 pure_ocean_breeze/jason/labor/process.py  | 95 ++++++++++++-----------
 2 files changed, 88 insertions(+), 47 deletions(-)

diff --git a/pure_ocean_breeze/jason/data/read_data.py b/pure_ocean_breeze/jason/data/read_data.py
index 09a710e..2a0750d 100644
--- a/pure_ocean_breeze/jason/data/read_data.py
+++ b/pure_ocean_breeze/jason/data/read_data.py
@@ -9,6 +9,7 @@
 from pure_ocean_breeze.jason.state.states import STATES
 from pure_ocean_breeze.jason.state.homeplace import HomePlace
 from pure_ocean_breeze.jason.state.decorators import *
+from cachier import cachier
 
 try:
     homeplace = HomePlace()
@@ -16,6 +17,7 @@
     print("您暂未初始化，功能将受限")
 
 
+@cachier()
 def read_daily(
     open: bool = 0,
     close: bool = 0,
@@ -333,6 +335,7 @@ def database_read_primary_factors(name: str) -> pd.DataFrame:
     df = pd.read_parquet(homeplace.factor_data_file + name+'.parquet')
     return df
 
+@cachier()
 def read_market(
     open: bool = 0,
     close: bool = 0,
@@ -406,3 +409,40 @@ def read_market(
         tr = read_daily(tr=1, start=start)
         df = pd.DataFrame({k: list(df) for k in list(tr.columns)}, index=df.index)
     return df
+
+
+@cachier()
+def moon_read_dummy(freq):
+    def deal_dummy(industry_dummy):
+            industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
+            industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
+            col = ["code", "date"] + industry_ws
+            industry_dummy.columns = col
+            industry_dummy = industry_dummy[
+                industry_dummy.date >= pd.Timestamp(str(STATES["START"]))
+            ]
+            return industry_dummy
+
+            # week_here
+    swindustry_dummy = (
+        pd.read_parquet(
+            homeplace.daily_data_file + "sw_industry_level1_dummies.parquet"
+        )
+        .fillna(0)
+        .set_index("date")
+        .groupby("code")
+        .resample(freq)
+        .last()
+        )
+    return deal_dummy(swindustry_dummy)
+
+@cachier()
+def moon_read_barra():
+    styles = os.listdir(homeplace.barra_data_file)
+    styles = [i for i in styles if (i.endswith(".parquet")) and (i[0] != ".")]
+    barras = {}
+    for s in styles:
+        k = s.split(".")[0]
+        v = pd.read_parquet(homeplace.barra_data_file + s).resample("W").last()
+        barras[k] = v
+    return barras
\ No newline at end of file
diff --git a/pure_ocean_breeze/jason/labor/process.py b/pure_ocean_breeze/jason/labor/process.py
index 86603ea..9df1640 100644
--- a/pure_ocean_breeze/jason/labor/process.py
+++ b/pure_ocean_breeze/jason/labor/process.py
@@ -42,6 +42,8 @@
     read_daily,
     read_market,
     get_industry_dummies,
+    moon_read_dummy,
+    moon_read_barra,
 )
 from pure_ocean_breeze.jason.state.homeplace import HomePlace
 
@@ -817,29 +819,30 @@ def __init__(
         cls.freq = freq
         cls.freq_ctrl = frequency_controller(freq)
 
-        def deal_dummy(industry_dummy):
-            industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
-            industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
-            col = ["code", "date"] + industry_ws
-            industry_dummy.columns = col
-            industry_dummy = industry_dummy[
-                industry_dummy.date >= pd.Timestamp(str(STATES["START"]))
-            ]
-            return industry_dummy
+        # def deal_dummy(industry_dummy):
+        #     industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
+        #     industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
+        #     col = ["code", "date"] + industry_ws
+        #     industry_dummy.columns = col
+        #     industry_dummy = industry_dummy[
+        #         industry_dummy.date >= pd.Timestamp(str(STATES["START"]))
+        #     ]
+        #     return industry_dummy
 
         if not no_read_indu:
             # week_here
-            cls.swindustry_dummy = (
-                pd.read_parquet(
-                    cls.homeplace.daily_data_file + "sw_industry_level1_dummies.parquet"
-                )
-                .fillna(0)
-                .set_index("date")
-                .groupby("code")
-                .resample(freq)
-                .last()
-            )
-            cls.swindustry_dummy = deal_dummy(cls.swindustry_dummy)
+            # cls.swindustry_dummy = (
+            #     pd.read_parquet(
+            #         cls.homeplace.daily_data_file + "sw_industry_level1_dummies.parquet"
+            #     )
+            #     .fillna(0)
+            #     .set_index("date")
+            #     .groupby("code")
+            #     .resample(freq)
+            #     .last()
+            # )
+            # cls.swindustry_dummy = deal_dummy(cls.swindustry_dummy)
+            cls.swindustry_dummy=moon_read_dummy(freq)
 
     @property
     def factors_out(self):
@@ -1181,12 +1184,7 @@ def get_long_short_comments(self, on_paper=False):
         self.inner_short_ret_yearly = self.inner_short_net_values[-1] * (
             self.freq_ctrl.counts_one_year / len(self.inner_short_net_values)
         )
-        self.group1_ret_yearly= self.group_net_values["group1"][-1] * (
-            self.freq_ctrl.counts_one_year / len(self.group_net_values.group1)
-        )
-        self.group10_ret_yearly = self.group_net_values["group10"][-1] * (
-            self.freq_ctrl.counts_one_year / len(self.group_net_values.group10)
-        )
+        
         # week_here
         self.long_short_vol_yearly = np.std(self.long_short_rets) * (
             self.freq_ctrl.counts_one_year**0.5
@@ -1256,6 +1254,18 @@ def get_total_comments(self, groups_num):
             ].mean()
         else:
             self.factor_turnover_rate = self.factor_turnover_rates["group1"].mean()
+        self.group_mean_rets_monthly = self.group_rets.drop(
+            columns=["long_short"]
+        ).mean()
+        # self.group_mean_rets_monthly = (
+        #     self.group_mean_rets_monthly - self.group_mean_rets_monthly.mean()
+        # )
+        mar=self.market_ret.loc[self.factors_out.index]
+        self.group_mean_rets_monthly = (
+            self.group_mean_rets_monthly - mar.mean()
+        )*self.freq_ctrl.counts_one_year
+        self.group1_ret_yearly= self.group_mean_rets_monthly.loc['group1']
+        self.group10_ret_yearly = self.group_mean_rets_monthly.loc['group10']
         self.total_comments = pd.concat(
             [
                 self.ic_icir_and_rank,
@@ -1306,16 +1316,7 @@ def get_total_comments(self, groups_num):
             ]
         )
         # print(self.total_comments)
-        self.group_mean_rets_monthly = self.group_rets.drop(
-            columns=["long_short"]
-        ).mean()
-        # self.group_mean_rets_monthly = (
-        #     self.group_mean_rets_monthly - self.group_mean_rets_monthly.mean()
-        # )
-        mar=self.market_ret.loc[self.factors_out.index]
-        self.group_mean_rets_monthly = (
-            self.group_mean_rets_monthly - mar.mean()
-        )*self.freq_ctrl.counts_one_year
+        
 
     def plot_net_values(self, y2, filename, iplot=1, ilegend=1, without_breakpoint=0):
         """使用matplotlib来画图，y2为是否对多空组合采用双y轴"""
@@ -2209,13 +2210,14 @@ def __init__(
         """
         cls.homeplace = HomePlace()
         # barra因子数据
-        styles = os.listdir(cls.homeplace.barra_data_file)
-        styles = [i for i in styles if (i.endswith(".parquet")) and (i[0] != ".")]
-        barras = {}
-        for s in styles:
-            k = s.split(".")[0]
-            v = pd.read_parquet(cls.homeplace.barra_data_file + s).resample("W").last()
-            barras[k] = v
+        # styles = os.listdir(cls.homeplace.barra_data_file)
+        # styles = [i for i in styles if (i.endswith(".parquet")) and (i[0] != ".")]
+        # barras = {}
+        # for s in styles:
+        #     k = s.split(".")[0]
+        #     v = pd.read_parquet(cls.homeplace.barra_data_file + s).resample("W").last()
+        #     barras[k] = v
+        barras=moon_read_barra()
         rename_dict = {
             "size": "市值",
             "nonlinearsize": "非线性市值",
@@ -2642,11 +2644,10 @@ def sing(dfs: list[pd.DataFrame], date: pd.Timestamp):
 
 
 @do_on_dfs
-def sun(factor:pd.DataFrame,rolling_5:int=1):
+def sun(factor:pd.DataFrame,rolling_days:int=10):
     '''先单因子测试，再测试其与常用风格之间的关系'''
-    if rolling_5:
-        factor=boom_one(factor)
-        ractor=boom_one(factor.rank(axis=1))
+    ractor=boom_one(factor.rank(axis=1),rolling_days)
+    factor=boom_one(factor,rolling_days)
     shen=pure_moonnight(factor)
     pfi=pure_snowtrain(ractor)
     shen=pure_moonnight(pfi,neutralize=1)