jroessler · PaSeidel · Feb 7, 2023 · Feb 13, 2023 · Feb 23, 2023 · Mar 7, 2023
diff --git a/autoum/approaches/uplift_random_forest.py b/autoum/approaches/uplift_random_forest.py
@@ -52,14 +52,20 @@ def __init__(self, parameters: dict, approach_parameters: ApproachParameters, ev
             "IDDP": Invariante DDP (Rößler et al. 2022)
         """
 
-        self.parameters = parameters
+        self.parameters = parameters.copy()
         self.parameters["evaluationFunction"] = eval_function
         self.feature_importance = approach_parameters.feature_importance
         self.save = approach_parameters.save
         self.path = approach_parameters.path
+        self.post_prune = parameters["post_prune"]
         self.split_number = approach_parameters.split_number
         self.log = logging.getLogger(type(self).__name__)
 
+        del self.parameters["post_prune"]
+
+        if eval_function not in ["ED", "KL", "CHI"]:
+            self.post_prune = False
+
     def analyze(self, data_set_helper: DataSetsHelper) -> dict:
         """
         Calculate the score (ITE/Uplift/CATE) for each sample using uplift random forest
@@ -78,6 +84,12 @@ def analyze(self, data_set_helper: DataSetsHelper) -> dict:
 
         urf.fit(X=data_set_helper.x_train, treatment=experiment_groups_col, y=data_set_helper.y_train)
 
+        if self.post_prune:
+            for tree in urf.uplift_forest:
+                tree.prune(data_set_helper.x_valid,
+                           data_set_helper.df_valid["treatment"],
+                           data_set_helper.df_valid["response"])
+
         self.log.debug(urf)
 
         if self.save:

diff --git a/autoum/pipelines/pipeline_rw.py b/autoum/pipelines/pipeline_rw.py
@@ -53,6 +53,7 @@ def __init__(self,
                  plot_uqc: bool = True,
                  plot_save_figures: bool = False,
                  pool_capacity: int = 40,
+                 post_prune: bool = False,
                  rlearner: bool = False,
                  run_name: str = "RUN",
                  run_id: int = 1,
@@ -109,6 +110,7 @@ def __init__(self,
         :param plot_uqc: True if the UQC value for a curve should be included in the plot legend. False otherwise. Default: True
         :param plot_save_figures: True if the resulting qini figures shall be saved. False otherwise. Default: False
         :param pool_capacity: Set this to the maximum number of free kernels for the calculation. Default 40
+        :param post_prune: Prune the uplift models after training, applies to URF_CHI, URF_ED and URF_KL
         :param rlearner: True, if R-Learner should be applied. False otherwise. Default: False
         :param run_id: Id of the run (For logging and saving purposes). Default: 1
         :param run_name: Name of the run (For logging and saving purposes). Default: "RUN"
@@ -152,6 +154,7 @@ def __init__(self,
         self.plot_uqc = plot_uqc
         self.plot_save_figures = plot_save_figures
         self.pool_capacity = pool_capacity
+        self.post_prune = post_prune
         self.rlearner = rlearner
         self.random_seed = random_seed
         self.save_models = save_models
@@ -184,7 +187,7 @@ def __init__(self,
 
         # Hyperparameters of different uplift modeling approaches
         self.max_features = max_features
-        self.set_parameters(n_estimators, max_depth, min_samples_leaf, min_samples_treatment, n_reg, n_jobs, normalization, honesty, random_seed)
+        self.set_parameters(n_estimators, max_depth, min_samples_leaf, min_samples_treatment, n_reg, n_jobs, normalization, honesty, random_seed, post_prune)
 
         # Create helper
         self.helper = HelperPipeline()
@@ -216,26 +219,33 @@ def sanity_checks(self):
         assert 0.1 <= self.validation_size <= 0.9, "Please select 0.1 <= validation_size <= 0.9"
         assert self.n_estimators % 4 == 0, "Please select a multiple of 4 as n_estimators"
 
-    def analyze_dataset(self, data: pd.DataFrame):
+    def analyze_dataset(self, data: pd.DataFrame, test_data: pd.DataFrame = None):
         """
         Apply, compare, and evaluate various uplift modeling approaches on the given data set.
 
         :param data: Dataset to be analyzed
+        :param test_data: (optional) Test Dataset, which the pipeline will use for the test metrics
         """
 
         if not isinstance(data, pd.DataFrame):
             return
 
+        if test_data is not None:
+            assert data.columns.equals(test_data.columns), "The train and test dataset columns are not identical"
+
         start = time.time()
         logging.info("Starting analyzing dataset ... ")
 
-        try:
-            df_train, df_test = train_test_split(data, test_size=self.test_size, shuffle=True, stratify=data[['response', 'treatment']], random_state=self.random_seed)
-            df_train.reset_index(inplace=True, drop=True)
-            df_test.reset_index(inplace=True, drop=True)
-        except ValueError:
-            logging.error("Stratification not possible" + data.groupby(["response", "treatment"]).size().reset_index(name="Counter").to_string())
-            raise ValueError("Stratification not possible" + data.groupby(["response", "treatment"]).size().reset_index(name="Counter").to_string())
+        if test_data is not None:
+            df_train, df_test = data.sample(frac=1.0, random_state=self.random_seed), test_data
+        else:
+            try:
+                df_train, df_test = train_test_split(data, test_size=self.test_size, shuffle=True, stratify=data[['response', 'treatment']], random_state=self.random_seed)
+                df_train.reset_index(inplace=True, drop=True)
+                df_test.reset_index(inplace=True, drop=True)
+            except ValueError:
+                logging.error("Stratification not possible" + data.groupby(["response", "treatment"]).size().reset_index(name="Counter").to_string())
+                raise ValueError("Stratification not possible" + data.groupby(["response", "treatment"]).size().reset_index(name="Counter").to_string())
 
         # Get feature names
         feature_names = list(df_train.drop(['response', 'treatment'], axis=1).columns.values)
@@ -726,7 +736,7 @@ def calculate_feature_importance_mean(self, feature_importances: dict, feature_n
                 HelperPipeline.save_feature_importance(importance, feature_names, "Feature_importance_{}".format(key), self.plot_save_figures, self.plot_figures,
                                                        self.data_home + FIGURES + self.run_name + "/")
 
-    def set_parameters(self, n_estimators, max_depth, min_samples_leaf, min_samples_treatment, n_reg, n_jobs, normalization, honesty, random_seed):
+    def set_parameters(self, n_estimators, max_depth, min_samples_leaf, min_samples_treatment, n_reg, n_jobs, normalization, honesty, random_seed, post_prune):
         """
         Set the parameters for each approach
         """
@@ -742,7 +752,8 @@ def set_parameters(self, n_estimators, max_depth, min_samples_leaf, min_samples_
             "n_jobs": n_jobs,
             "control_name": "c",
             "normalization": normalization,
-            "honesty": honesty
+            "honesty": honesty,
+            "post_prune": post_prune
         }
 
         s_learner_parameters = {

diff --git a/tests/test_pipeline_rw.py b/tests/test_pipeline_rw.py
@@ -241,10 +241,10 @@ def test_plotting(self):
 
     def test_create_approach_tuples(self):
         cv_number_splits = 10
-        pipeline = PipelineRW(cv_number_splits=cv_number_splits, urf_ddp=False, two_model=False)
+        pipeline = PipelineRW(cv_number_splits=cv_number_splits, slearner=True, two_model=True)
         dataframe_pairs = pipeline.create_k_splits(df_train=self.df_train, df_test=self.df_test)
         tuple_list = pipeline.create_approach_tuples(dataframe_pairs)
-        self.assertEqual(len(tuple_list), 15 * cv_number_splits)
+        self.assertEqual(len(tuple_list), 2 * cv_number_splits)
         for _tuple in tuple_list:
             self.assertEqual(len(_tuple), 5)
 

diff --git a/tests/test_uplift_random_forest.py b/tests/test_uplift_random_forest.py
@@ -36,7 +36,8 @@ def setUp(self):
             "n_reg": 100,
             "random_state": 123,
             "n_jobs": 10,
-            "control_name": "c"
+            "control_name": "c",
+            "post_prune": True
         }
 
     def test_analyze(self):

diff --git a/tests/test_utils_pipelines.py b/tests/test_utils_pipelines.py
@@ -1,3 +1,4 @@
+import time
 import unittest
 from unittest.mock import MagicMock, patch
 
@@ -56,7 +57,8 @@ def setUp(self):
             "n_jobs": n_jobs,
             "control_name": "c",
             "normalization": True,
-            "honesty": False
+            "honesty": False,
+            "post_prune": True
         }
 
         s_learner_parameters = {
@@ -240,7 +242,7 @@ def test_apply_uplift_approaches(self, m_apply_approach):
 
                 if i == "TWO_MODEL":
                     self.assertTrue(TwoModel.__instancecheck__(m_apply_approach.call_args[0][0]))
-                elif "URF" in i:
+                elif i == "URF":
                     self.assertTrue(UpliftRandomForest.__instancecheck__(m_apply_approach.call_args[0][0]))
                 elif i == "TRADITIONAL":
                     self.assertTrue(Traditional.__instancecheck__(m_apply_approach.call_args[0][0]))
@@ -343,7 +345,7 @@ def test_cast_to_dataframe(self):
         df_uplift = helper.cast_to_dataframe(list_dict)
 
         # Check if type equals pd.DataFrame
-        self.assertEqual(type(df_uplift), pd.DataFrame)
+        self.assertEqual(df_uplift.__class__, pd.DataFrame)
 
         # Check if the DataFrame contains 55 columns (11 columns for each approach)
         self.assertEqual(df_uplift.shape[1], 22)