BudgetOptimizer extracts response variable from graph

pymc-labs · Jan 8, 2025 · 10b0783 · 10b0783
1 parent 9684821
commit 10b0783
Show file tree

Hide file tree

Showing 3 changed files with 116 additions and 137 deletions.
diff --git a/pymc_marketing/mmm/budget_optimizer.py b/pymc_marketing/mmm/budget_optimizer.py
@@ -20,11 +20,13 @@
 import numpy as np
 import pytensor.tensor as pt
 from pydantic import BaseModel, ConfigDict, Field
-from pytensor import function
+from pymc.logprob.utils import rvs_in_graph
+from pymc.model.transform.optimization import freeze_dims_and_data
+from pytensor import clone_replace, function
+from pytensor.graph import rewrite_graph, vectorize_graph
 from scipy.optimize import minimize
 
-from pymc_marketing.mmm.components.adstock import AdstockTransformation
-from pymc_marketing.mmm.components.saturation import SaturationTransformation
+from pymc_marketing.mmm.mmm import BaseMMM
 from pymc_marketing.mmm.utility import UtilityFunctionType, average_response
 
 
@@ -49,58 +51,28 @@ class BudgetOptimizer(BaseModel):
 
     Parameters
     ----------
-    adstock : AdstockTransformation
-        The adstock class.
-    saturation : SaturationTransformation
-        The saturation class.
-    num_periods : int
-        The number of time units.
-    parameters : dict
-        A dictionary of parameters for each channel.
-    scales : np.ndarray
-        The scale parameter for each channel variable.
-    response_scaler : float, optional
-        The scaling factor for the target response variable. Default is 1.
-    adstock_first : bool, optional
-        Whether to apply adstock transformation first or saturation transformation first.
-        Default is True.
+    model: MMMModel
+        The marketing mix model to optimize.
     utility_function : UtilityFunctionType, optional
         The utility function to maximize. Default is the mean of the response distribution.
 
     """
 
-    adstock: AdstockTransformation = Field(
-        ..., description="The adstock transformation class."
-    )
-    saturation: SaturationTransformation = Field(
-        ..., description="The saturation transformation class."
-    )
     num_periods: int = Field(
         ...,
         gt=0,
         description="The number of time units at time granularity which the budget is to be allocated.",
     )
-    parameters: dict[str, Any] = Field(
-        ..., description="A dictionary of parameters for each channel."
-    )
-    scales: np.ndarray = Field(
-        ..., description="The scale parameter for each channel variable"
-    )
-    response_scaler: float = Field(
-        default=1.0,
-        description="Scaling factor for the target response variable. Defaults to 1.",
-    )
-    adstock_first: bool = Field(
-        True,
-        description="Whether to apply adstock transformation first or saturation transformation first.",
-    )
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
-    response_scaler_sym: pt.TensorVariable = Field(
-        default=None,
-        exclude=True,
-        repr=False,
-        description="Response scaler tensor variable.",
+    hmm_model: BaseMMM = Field(
+        ...,
+        description="The marketing mix model to optimize.",
+    )
+
+    response_variable: str = Field(
+        default="channel_contributions",
+        description="The response variable to optimize.",
     )
 
     utility_function: UtilityFunctionType = Field(
@@ -116,17 +88,15 @@ class BudgetOptimizer(BaseModel):
 
     def __init__(self, **data):
         super().__init__(**data)
-        self.response_scaler_sym = pt.as_tensor_variable(self.response_scaler)
+        self._num_channels = len(self.hmm_model.model.coords["channel"])
         self._compiled_functions = {}
         self._compile_objective_and_grad()
 
     def _compile_objective_and_grad(self):
         """Compile the objective function and its gradient using symbolic computation."""
-        budgets_sym = pt.vector("budgets")
+        budgets_sym = pt.vector("budgets", shape=(self._num_channels,))
 
-        _response_distribution = self._estimate_response(budgets=budgets_sym)
-
-        response_distribution = _response_distribution.sum(axis=(2, 3)).flatten()
+        response_distribution = self._extract_response_distribution(budgets=budgets_sym)
 
         objective_value = -self.utility_function(
             samples=response_distribution, budgets=budgets_sym
@@ -155,67 +125,84 @@ def _gradient(self, budgets: pt.TensorVariable) -> pt.TensorVariable:
         """Gradient of the objective function."""
         return self._compiled_functions[self.utility_function]["gradient"](budgets)
 
-    def _estimate_response(self, budgets: list[float]) -> np.ndarray:
-        """Calculate the total response during a period of time given the budgets.
-
-        It considers the saturation and adstock transformations.
-
-        Parameters
-        ----------
-        budgets : list[float]
-            The budgets for each channel.
-
-        Returns
-        -------
-        np.ndarray
-            The estimated response distribution.
-
-        """
-        first_transform, second_transform = (
-            (self.adstock, self.saturation)
-            if self.adstock_first
-            else (self.saturation, self.adstock)
+    def _extract_response_distribution(
+        self, budgets: pt.TensorVariable
+    ) -> pt.TensorVariable:
+        """Extract the response graph, conditioned on the posterior draws and a placeholder budget variable."""
+        if not (isinstance(budgets, pt.TensorVariable) and budgets.type.ndim == 1):
+            raise ValueError("budgets must be a 1D TensorVariable")
+
+        model = self.hmm_model.model
+        posterior = self.hmm_model.idata.posterior  # type: ignore
+        max_lag = self.hmm_model.adstock.l_max
+        num_periods = self.num_periods
+
+        # Freeze all but channel dims for a more succinct graph
+        model = freeze_dims_and_data(
+            model, data=[], dims=[dim for dim in model.coords if dim != "date"]
         )
 
-        # Convert scales to a tensor variable when needed
-        budget = budgets / pt.as_tensor_variable(self.scales)
+        response_variable = model[self.response_variable]
+
+        # Replicate the budget over num_periods and append zeros to also quantify carry-over effects
+        n_channels = len(model.coords["channel"])
+        budgets_tiled = pt.broadcast_to(budgets, (num_periods, n_channels))
+        budgets_full = pt.zeros((num_periods + max_lag, n_channels))
+        budgets_full = budgets_full[:num_periods, :].set(budgets_tiled)
+        budgets_full.name = "budgets_full"
+
+        # Replace model free_RVs by placeholder variables
+        placeholder_replace_dict = {
+            model[free_RV.name]: pt.tensor(
+                name=free_RV.name,
+                shape=free_RV.type.shape,
+                dtype=free_RV.dtype,
+            )
+            for free_RV in model.free_RVs
+        }
 
-        # Convert parameters to tensor variables if necessary
-        def convert_params(params):
-            return {
-                k: (pt.as_tensor_variable(v) if isinstance(v, np.ndarray) else v)
-                for k, v in params.items()
-            }
+        # Replace the channel_data by the budget variable
+        placeholder_replace_dict[model["channel_data"]] = budgets_full
 
-        first_params = convert_params(
-            self.parameters["adstock_params"]
-            if self.adstock_first
-            else self.parameters["saturation_params"]
-        )
-        second_params = convert_params(
-            self.parameters["saturation_params"]
-            if self.adstock_first
-            else self.parameters["adstock_params"]
+        [response_variable] = clone_replace(
+            [response_variable],
+            replace=placeholder_replace_dict,
         )
 
-        spend = pt.tile(budget, (self.num_periods, 1))
-        spend_extended = pt.concatenate(
-            [spend, pt.zeros((self.adstock.l_max, spend.shape[1]))], axis=0
+        if rvs_in_graph([response_variable]):
+            raise RuntimeError("RVs found in the extracted graph, this is likely a bug")
+
+        # Cleanup graph prior to vectorization
+        response_variable = rewrite_graph(
+            response_variable, include=("canonicalize", "ShapeOpt")
         )
 
-        _response = first_transform.function(x=spend_extended, **first_params)
+        # Replace dummy variables by posterior constants (and vectorize graph)
+        replace_dict = {}
+        for placeholder in placeholder_replace_dict.values():
+            if placeholder.name == "budgets_full":
+                continue
+            replace_dict[placeholder] = pt.constant(
+                posterior[placeholder.name].astype(placeholder.dtype),
+                name=placeholder.name,
+            )
 
-        for param_name, param_value in second_params.items():
-            if isinstance(param_value, pt.TensorVariable) and param_value.ndim == 3:
-                param_value = param_value.dimshuffle(0, 1, "x", 2)
-                second_params[param_name] = param_value
+        response_variable_distribution = vectorize_graph(
+            response_variable, replace=replace_dict
+        )
 
-        # Multiply by the response_scaler_sym
-        return (
-            second_transform.function(x=_response, **second_params)
-            * self.response_scaler_sym
+        # Final cleanup of the vectorize graph.
+        # This shouldn't be needed, vectorize should just not do anything if there are no batch dims!
+        response_variable_distribution = rewrite_graph(
+            response_variable_distribution,
+            include=(
+                "local_eager_useless_unbatched_blockwise",
+                "local_useless_unbatched_blockwise",
+            ),
         )
 
+        return response_variable_distribution
+
     def allocate_budget(
         self,
         total_budget: float,
@@ -259,7 +246,7 @@ def allocate_budget(
         """
         if budget_bounds is None:
             budget_bounds = {
-                channel: (0, total_budget) for channel in self.parameters["channels"]
+                channel: (0, total_budget) for channel in self.hmm_model.channel_columns
             }
             warnings.warn(
                 "No budget bounds provided. Using default bounds (0, total_budget) for each channel.",
@@ -281,15 +268,14 @@ def allocate_budget(
         else:
             constraints = custom_constraints
 
-        num_channels = len(self.parameters["channels"])
-        initial_guess = np.ones(num_channels) * total_budget / num_channels
+        initial_guess = np.ones(self._num_channels) * total_budget / self._num_channels
         bounds = [
             (
                 (budget_bounds[channel][0], budget_bounds[channel][1])
                 if channel in budget_bounds
                 else (0, total_budget)
             )
-            for channel in self.parameters["channels"]
+            for channel in self.hmm_model.channel_columns
         ]
 
         if minimize_kwargs is None:
@@ -310,7 +296,7 @@ def allocate_budget(
             optimal_budgets = {
                 name: budget
                 for name, budget in zip(
-                    self.parameters["channels"], result.x, strict=False
+                    self.hmm_model.channel_columns, result.x, strict=False
                 )
             }
             return optimal_budgets, result

diff --git a/pymc_marketing/mmm/mmm.py b/pymc_marketing/mmm/mmm.py
@@ -32,7 +32,8 @@
 
 from pymc_marketing.hsgp_kwargs import HSGPKwargs
 from pymc_marketing.mmm.base import BaseValidateMMM
-from pymc_marketing.mmm.budget_optimizer import BudgetOptimizer
+
+# from pymc_marketing.mmm.budget_optimizer import BudgetOptimizer
 from pymc_marketing.mmm.components.adstock import (
     AdstockTransformation,
     adstock_from_dict,
@@ -2295,6 +2296,8 @@ def optimize_budget(
 
         _parameters = self._format_parameters_for_budget_allocator()
 
+        from pymc_marketing.mmm.budget_optimizer import BudgetOptimizer
+
         allocator = BudgetOptimizer(
             adstock=self.adstock,
             saturation=self.saturation,
@@ -2389,6 +2392,8 @@ def allocate_budget_to_maximize_response(
 
         _parameters = self._format_parameters_for_budget_allocator()
 
+        from pymc_marketing.mmm.budget_optimizer import BudgetOptimizer
+
         allocator = BudgetOptimizer(
             adstock=self.adstock,
             saturation=self.saturation,

diff --git a/tests/mmm/test_budget_optimizer.py b/tests/mmm/test_budget_optimizer.py
@@ -167,38 +167,31 @@ def test_allocate_budget_zero_total(
 def test_allocate_budget_custom_minimize_args(minimize_mock) -> None:
     total_budget = 100
     budget_bounds = {"channel_1": (0.0, 50.0), "channel_2": (0.0, 50.0)}
-    parameters = {
-        "saturation_params": {
-            "lam": np.array(
-                [[[0.1, 0.2], [0.3, 0.4]], [[0.5, 0.6], [0.7, 0.8]]]
-            ),  # dims: chain, draw, channel
-            "beta": np.array(
-                [[[0.5, 1.0], [0.5, 1.0]], [[0.5, 1.0], [0.5, 1.0]]]
-            ),  # dims: chain, draw, channel
-        },
-        "adstock_params": {
-            "alpha": np.array(
-                [[[0.5, 0.7], [0.5, 0.7]], [[0.5, 0.7], [0.5, 0.7]]]
-            )  # dims: chain, draw, channel
-        },
-        "channels": ["channel_1", "channel_2"],
-    }
+    # parameters = {
+    #     "saturation_params": {
+    #         "lam": np.array(
+    #             [[[0.1, 0.2], [0.3, 0.4]], [[0.5, 0.6], [0.7, 0.8]]]
+    #         ),  # dims: chain, draw, channel
+    #         "beta": np.array(
+    #             [[[0.5, 1.0], [0.5, 1.0]], [[0.5, 1.0], [0.5, 1.0]]]
+    #         ),  # dims: chain, draw, channel
+    #     },
+    #     "adstock_params": {
+    #         "alpha": np.array(
+    #             [[[0.5, 0.7], [0.5, 0.7]], [[0.5, 0.7], [0.5, 0.7]]]
+    #         )  # dims: chain, draw, channel
+    #     },
+    #     "channels": ["channel_1", "channel_2"],
+    # }
     minimize_kwargs = {
         "method": "SLSQP",
         "options": {"ftol": 1e-8, "maxiter": 1_002},
     }
 
-    adstock = GeometricAdstock(l_max=4)
-    saturation = LogisticSaturation()
+    # adstock = GeometricAdstock(l_max=4)
+    # saturation = LogisticSaturation()
 
-    optimizer = optimizer = BudgetOptimizer(
-        adstock=adstock,
-        saturation=saturation,
-        num_periods=30,
-        parameters=parameters,
-        adstock_first=True,
-        scales=np.array([1, 1]),
-    )
+    optimizer = BudgetOptimizer(num_periods=30)
     match = "Using default equality constraint"
     with pytest.warns(UserWarning, match=match):
         optimizer.allocate_budget(
@@ -253,16 +246,11 @@ def test_allocate_budget_custom_minimize_args(minimize_mock) -> None:
 def test_allocate_budget_infeasible_constraints(
     total_budget, budget_bounds, parameters, custom_constraints
 ):
-    adstock = GeometricAdstock(l_max=4)
-    saturation = LogisticSaturation()
+    # adstock = GeometricAdstock(l_max=4)
+    # saturation = LogisticSaturation()
 
-    optimizer = optimizer = BudgetOptimizer(
-        adstock=adstock,
-        saturation=saturation,
+    optimizer = BudgetOptimizer(
         num_periods=30,
-        parameters=parameters,
-        adstock_first=True,
-        scales=np.array([1, 1]),
     )
 
     with pytest.raises(MinimizeException, match="Optimization failed"):