Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scale spend data to improve budget allocation efficacy #945

Open
wants to merge 154 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
154 commits
Select commit Hold shift + click to select a range
f055f98
MMM Lift test calibration (#590)
wd60622 Apr 2, 2024
8590b45
Drop python 3.9 support (#615)
juanitorduz Apr 5, 2024
27a55d5
add lift tests check
juanitorduz Apr 4, 2024
f3ec2cf
Add more content to the Gamma-Gamma Notebook (#573)
juanitorduz Apr 5, 2024
7b82cc3
Add more content to the BG/NBD Notebook (#571)
juanitorduz Apr 5, 2024
da9f025
Improve MMM Docs (#612)
juanitorduz Apr 5, 2024
38b55a2
Fix `clv` plotting bugs and edits to Quickstart (#601)
ColtAllen Apr 8, 2024
ffef5a6
[pre-commit.ci] pre-commit autoupdate (#616)
pre-commit-ci[bot] Apr 8, 2024
a151330
improve coords matching (#623)
juanitorduz Apr 11, 2024
6f33cef
python 3.12 attempt (#618)
juanitorduz Apr 11, 2024
c40c8b8
closes #520 (#621) dataset -> data
wd60622 Apr 11, 2024
ff937f4
mmm example notebook small fixes (#626)
juanitorduz Apr 12, 2024
e2dba27
Remove ruff E501 ignore (#619)
juanitorduz Apr 12, 2024
b777eb5
Update pypi.yml (dash change) (#633)
juanitorduz Apr 16, 2024
4a737cf
[pre-commit.ci] pre-commit autoupdate (#629)
pre-commit-ci[bot] Apr 16, 2024
2be2f63
UP rule ruff (#635)
juanitorduz Apr 16, 2024
582d2ed
Add ruff `RUF` rules (#636)
juanitorduz Apr 16, 2024
c02d349
Add Flake8-BugBear and Bandit (Security!) (#637)
juanitorduz Apr 18, 2024
78964f0
Update version.txt (#640)
juanitorduz Apr 19, 2024
18d926c
[pre-commit.ci] pre-commit autoupdate (#642)
pre-commit-ci[bot] Apr 23, 2024
ce41f5f
Fix build badge (#645)
juanitorduz Apr 23, 2024
818781a
Add downloads stats to README
juanitorduz Apr 25, 2024
91564cf
Pareto/NBD Example Notebook (#646)
ColtAllen Apr 25, 2024
7034e28
add spaces, increase indentation, and fix number order to Pareto note…
wd60622 Apr 27, 2024
d531d0c
Add link to new Pareto notebook (#649)
juanitorduz Apr 27, 2024
1c186f0
Plot Waterfall Components Decomposition (#631)
cetagostini-wise Apr 28, 2024
b36b0ac
Update resources.md (#652)
coreyabs-db Apr 29, 2024
ae6ae1d
fix ylabel (#654)
juanitorduz Apr 29, 2024
41dda4f
[pre-commit.ci] pre-commit autoupdate (#655)
pre-commit-ci[bot] Apr 29, 2024
a89bd93
Lift test fixes (#656)
wd60622 Apr 30, 2024
5a93ef2
add to docstrings tanh saturation (#657)
wd60622 Apr 30, 2024
b58f9b6
Add quickstart to readme (#653)
juanitorduz Apr 30, 2024
e63929a
Add contributors to README (#659)
juanitorduz May 1, 2024
f0097e2
Time varying intercept (#628)
ulfaslak May 1, 2024
8e67803
Update README.md (#660)
nialloulton May 1, 2024
fd6b331
Add tv intrecept to readme (#661)
juanitorduz May 2, 2024
b27da9e
fix title level (#663)
juanitorduz May 2, 2024
b785baa
Remove unnecessary NonImplemented errors from abstract methods (#662)
juanitorduz May 2, 2024
19bbf6d
Pass conv mode to adstock functions (#665)
juanitorduz May 2, 2024
fc37560
[Try] Fix compressed images in docs. (#667)
juanitorduz May 3, 2024
5f54d8e
Update pyproject.toml (#671)
juanitorduz May 5, 2024
0e8ac45
add license (#673)
juanitorduz May 5, 2024
48c88b2
use grep and sed in the env line (#675)
wd60622 May 6, 2024
2374a0c
add sample_kwargs (#676)
juanitorduz May 8, 2024
4dbf943
MMM NB Improvements (waterfall & error plots) (#664)
juanitorduz May 8, 2024
61da9c0
Update version.txt (#677)
juanitorduz May 8, 2024
6e37d3b
[pre-commit.ci] pre-commit autoupdate (#683)
pre-commit-ci[bot] May 13, 2024
ad2353a
v0 Streamlit MMM Explainer App (#614)
louismagowan May 21, 2024
9aa8bc0
Correct BetaGeo docstring example (#693)
wd60622 May 22, 2024
ae84163
Add `BetaGeoBetaBinom` Distribution Block (#431)
ColtAllen May 27, 2024
0e6ce83
[pre-commit.ci] pre-commit autoupdate (#705)
pre-commit-ci[bot] May 27, 2024
3feec18
Fix related to column renaming after aggregating test frequency (#698)
IvanUgrin May 28, 2024
b763c12
RFM Segmentation (#680)
ColtAllen May 28, 2024
7be19f6
create bgbb_donations.csv (#710)
ColtAllen Jun 2, 2024
b4d84cc
closes #678 (#716)
wd60622 Jun 3, 2024
ba41aef
use URL for README image (#715)
wd60622 Jun 3, 2024
86805a4
closes #264 (#714)
wd60622 Jun 3, 2024
6edc4ca
[pre-commit.ci] pre-commit autoupdate (#719)
pre-commit-ci[bot] Jun 3, 2024
f79b7b0
Update `BetaGeoModel` API (#709)
ColtAllen Jun 9, 2024
f3be754
User-defined media transformations and custom ordering (#632)
cetagostini-wise Jun 10, 2024
dc117be
CLV Plotting API (#728)
ColtAllen Jun 10, 2024
c31bee4
[pre-commit.ci] pre-commit autoupdate (#730)
pre-commit-ci[bot] Jun 10, 2024
46ba03f
Fix some ParetoNBDModel docstring typos (#731)
Mews Jun 11, 2024
b6a938f
pass kwargs to minimizer (#737)
juanitorduz Jun 12, 2024
5296f0f
Minor improvements [MMM] (#735)
juanitorduz Jun 12, 2024
271966b
Set upper bound pymc 5.16 (#725)
juanitorduz Jun 12, 2024
6fe03dc
Media transformation sampling & plotting methods (#734)
wd60622 Jun 12, 2024
664b5ca
improve tests mmm utils (#738)
juanitorduz Jun 12, 2024
a148e83
`model.fit` doesn't remove prior samples (#741)
wd60622 Jun 12, 2024
4b6607a
Hierarchical Model Configuration (#743)
wd60622 Jun 13, 2024
0144bd3
[pre-commit.ci] pre-commit autoupdate (#756)
pre-commit-ci[bot] Jun 17, 2024
c994210
MMM Component Notebook (#748)
wd60622 Jun 18, 2024
8596c8a
start addressing sphinx warnings and rendering issues (#750)
OriolAbril Jun 18, 2024
5caa61a
Allowing Hierarchical Non Centered Parametrization (#747)
cetagostini Jun 21, 2024
5ff29b1
fix np typing (#763)
juanitorduz Jun 21, 2024
405a5cf
add it back (#764)
juanitorduz Jun 21, 2024
9f84dc9
remove noqa from plots (#761)
juanitorduz Jun 21, 2024
897cf3d
Creating Time Base component for Media Contribution (#752)
cetagostini Jun 22, 2024
6a5934c
Run Ruff Notebooks (#773)
juanitorduz Jun 23, 2024
0bfca1a
[pre-commit.ci] pre-commit autoupdate (#779)
pre-commit-ci[bot] Jun 25, 2024
cec2cc2
`GammaGammaModel` API Improvements (#758)
ColtAllen Jun 27, 2024
f6a1a37
Deepcopy of posterior to allow second `fit` call (#790)
wd60622 Jun 28, 2024
fafe354
Add prior predictive example notebook (#787)
juanitorduz Jun 28, 2024
d5c3d1e
CLV Modeling Domains and Docstrings (#785)
ColtAllen Jun 28, 2024
a933b70
fix nb (#793)
juanitorduz Jun 28, 2024
576cf45
Run example notebooks CI (#791)
juanitorduz Jun 28, 2024
9892311
docs: Update model_builder.py to resolve warning in documentation bui…
c0d33ngr Jun 30, 2024
93f5e77
Time Varying Media Contribution Notebook (#778)
cetagostini Jul 1, 2024
24496a0
prepare release (#799)
juanitorduz Jul 1, 2024
2388c01
`Prior` class to represent distributions (#759)
wd60622 Jul 1, 2024
5cda250
[pre-commit.ci] pre-commit autoupdate (#801)
pre-commit-ci[bot] Jul 1, 2024
7179904
Pull out seasonality as `YearlyFourier` and `MonthlyFourier` (#802)
wd60622 Jul 5, 2024
496eb76
Separate Weibull adstock into CDF & PDF (#810)
wd60622 Jul 5, 2024
9d4d04a
Add typing and package classifiers (#811)
wd60622 Jul 5, 2024
9b691a9
add imports to mmm module (#812)
wd60622 Jul 5, 2024
03e9215
Save & load support for time varying parameters (#815)
wd60622 Jul 8, 2024
723887a
migrate to Data and non-mutable coords (#816)
wd60622 Jul 8, 2024
a97e272
[pre-commit.ci] pre-commit autoupdate (#817)
pre-commit-ci[bot] Jul 8, 2024
d766722
PoC: Use Pydantic as data validator (#809)
juanitorduz Jul 9, 2024
05abba7
Date Validation and MMM Model Hamonization (Pydantic) (#824)
juanitorduz Jul 10, 2024
d235ff4
Remove warnings during tests (#823)
wd60622 Jul 11, 2024
717702a
Future-proof `prior_linearized` method call (#806)
shuvayan Jul 11, 2024
6049ae8
Create inverse_scaled_logistic_saturation and the corresponding class…
arthurmello Jul 18, 2024
6619076
fix (#842)
juanitorduz Jul 19, 2024
4cc4ba5
use labs theme as external (#830)
OriolAbril Jul 19, 2024
7a8b627
specify 0.9.0 as deprecation version (#849)
wd60622 Jul 22, 2024
a98815f
add intercept and target variable to example (#850)
wd60622 Jul 22, 2024
e6f844f
Various MMM small documentation fixes (#854)
juanitorduz Jul 22, 2024
8234c4b
Update UML diagrams (#856)
drbenvincent Jul 23, 2024
28ea158
Fixing ruff commands in Makefile #825 (#859)
MuradKhalil Jul 23, 2024
d759eb3
[pre-commit.ci] pre-commit autoupdate (#855)
pre-commit-ci[bot] Jul 24, 2024
b10f1f1
Fix model_builder docstrings (#861)
GiannisApost Jul 24, 2024
b7b97e7
add GH discussions link to README (#866)
cluhmann Jul 24, 2024
8ee9254
Skip coords with scalar value (#868)
GiannisApost Jul 25, 2024
9129a9e
Fix Visual for hill_saturation function (Issue #851 ) (#857)
PatrickRobotham Jul 25, 2024
de5679f
Allow plot MMM components in the original scale (#870)
juanitorduz Jul 25, 2024
19aea61
Inference changed to dataset (#873)
Ishaanjolly Jul 25, 2024
9755d3b
Add root saturation function (issue #702) (#858)
iraur Jul 25, 2024
7090401
Check for missing attrs after `sample_prior_predictive` and `fit` (#867)
wd60622 Jul 28, 2024
43131db
`json.loads` with python types bug (#881)
wd60622 Jul 29, 2024
24e9833
[pre-commit.ci] pre-commit autoupdate (#883)
pre-commit-ci[bot] Jul 29, 2024
7a89f27
Fix default `ConvMode` in docstring (#864)
ferrine Jul 29, 2024
ac645c9
Save off media transformations (#882)
wd60622 Jul 30, 2024
37cf3c8
Update deployment docs (#887)
wd60622 Jul 30, 2024
15396cd
Automate UML creation (#886)
wd60622 Jul 30, 2024
c72fb8a
prepare release (#888)
wd60622 Jul 30, 2024
4abac88
Update README.md (#893)
juanitorduz Jul 31, 2024
00d4e20
Add url health job from streamlit app (#902)
juanitorduz Aug 2, 2024
3eb5407
small improvements model config nb (#906)
juanitorduz Aug 2, 2024
dffae87
Add link model deployment to example notebook (#904)
juanitorduz Aug 2, 2024
41dd8a3
Add pymc-marketing version to some MMM notebooks (#907)
juanitorduz Aug 2, 2024
39f2336
Move adstock and saturation method imports to mmm.__all__ (#908)
PabloRoque Aug 5, 2024
ff4eef1
chore(Makefile): Adding a self-documenting command and light command …
louismagowan Aug 5, 2024
e3c79aa
Fix uml permissions (#913)
wd60622 Aug 5, 2024
6779fd1
[pre-commit.ci] pre-commit autoupdate (#914)
pre-commit-ci[bot] Aug 6, 2024
789aa0d
Don't run tests for non-code changes (#898)
dandeandean Aug 7, 2024
b070375
test running budget allocator nb (#919)
juanitorduz Aug 11, 2024
285f704
make hill pass through the origin (#920)
juanitorduz Aug 12, 2024
bfeb756
Add MMM ROAS Priors Case Study (#916)
juanitorduz Aug 12, 2024
1f89294
typo (#923)
juanitorduz Aug 12, 2024
1c8fefa
[pre-commit.ci] pre-commit autoupdate (#926)
pre-commit-ci[bot] Aug 12, 2024
49ac689
Original hill function definition (#925)
juanitorduz Aug 12, 2024
93fc9d9
MLflow autologging (#921)
wd60622 Aug 14, 2024
c1b5933
Adding BLAS to the env generation.
cetagostini Aug 14, 2024
35c8850
Merge branch 'main' of https://github.com/pymc-labs/pymc-marketing
cetagostini Aug 14, 2024
809a079
Solving optimizer issues & typos (#933)
cetagostini Aug 17, 2024
8d116b5
Log number of posterior & tuning samples (#943)
wd60622 Aug 18, 2024
5bbce01
point to GH discussions (#944)
cluhmann Aug 19, 2024
3126ae0
[pre-commit.ci] pre-commit autoupdate (#946)
pre-commit-ci[bot] Aug 19, 2024
a196a72
scale budget for minimize
MobiusLooper Aug 19, 2024
076dd2e
scale bounds
MobiusLooper Aug 21, 2024
f6c6825
Fallback to defaults in `adstock|saturation_from_dict` (#955)
PabloRoque Aug 22, 2024
39d38b7
Add PyDocStyle Support (#951)
juanitorduz Aug 22, 2024
3e7803d
Merge branch 'main' into budget-optimization-scaling
MobiusLooper Aug 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Lift test fixes (#656)
* support for negative values and dates (if used)

* fix terrible spelling

* test dates in coords

* cover numpy objects

* consolidate the tests
  • Loading branch information
wd60622 authored Apr 30, 2024
commit a89bd93110d06756e80f7d5f6908deec66f81f87
4 changes: 2 additions & 2 deletions pymc_marketing/mmm/delayed_saturated_mmm.py
Original file line number Diff line number Diff line change
@@ -1303,7 +1303,7 @@ def add_lift_test_measurements(
then conditioned using the empirical lift, `delta_y`, and `sigma` of the lift test
with the specified distribution `dist`.

The sudo code for the lift test is as follows:
The pseudo-code for the lift test is as follows:

.. code-block:: python

@@ -1312,7 +1312,7 @@ def add_lift_test_measurements(
- saturation_curve(x)
)
empirical_lift = delta_y
dist(model_estimated_lift, sigma=sigma, observed=empirical_lift)
dist(abs(model_estimated_lift), sigma=sigma, observed=abs(empirical_lift))


The model has to be built before adding the lift tests.
34 changes: 30 additions & 4 deletions pymc_marketing/mmm/lift_test.py
Original file line number Diff line number Diff line change
@@ -28,7 +28,6 @@ def __init__(self, missing_values: npt.NDArray[np.int_]) -> None:


def _lift_test_index(lift_values: Values, model_values: Values) -> Index:
# TODO: better support for datetime64 required for date coordinates
same_value = lift_values[:, None] == model_values
if not (same_value.sum(axis=1) == 1).all():
missing_values = np.argwhere(same_value.sum(axis=1) == 0).flatten()
@@ -63,8 +62,16 @@ def lift_test_indices(df_lift_test: pd.DataFrame, model: pm.Model) -> Indices:
"""

columns = df_lift_test.columns.tolist()

return {
col: _lift_test_index(df_lift_test[col].to_numpy(), np.array(model.coords[col]))
col: _lift_test_index(
df_lift_test[col].to_numpy(),
# Coords in the model become tuples
# Reference: https://github.com/pymc-devs/pymc/blob/04b6881efa9f69711d604d2234c5645304f63d28/pymc/model/core.py#L998
# which become pd.Timestamp if from pandas objects
# Convert to Series stores them as np.datetime64
pd.Series(model.coords[col]).to_numpy(),
)
for col in columns
}

@@ -178,6 +185,23 @@ def index_variable(
return var.__getitem__(idx)


class NonMonotonicLiftError(Exception):
"""Raised when the lift test results do not satisfy the increasing assumption."""


def check_increasing_assumption(df_lift_tests: pd.DataFrame) -> None:
"""Checks if the lift test results satisfy the increasing assumption.

If delta_x is positive, delta_y must be positive, and vice versa.
"""
increasing = df_lift_tests["delta_x"] * df_lift_tests["delta_y"] >= 0

if not increasing.all():
raise NonMonotonicLiftError(
"The lift test results do not satisfy the increasing assumption."
)


def add_lift_measurements_to_likelihood(
df_lift_test: pd.DataFrame,
variable_mapping,
@@ -257,6 +281,8 @@ def saturation_function(x, alpha, lam):
if missing_cols:
raise KeyError(f"Missing from DataFrame: {list(missing_cols)}")

check_increasing_assumption(df_lift_test)

model = pm.modelcontext(model)

var_names = list(variable_mapping.values())
@@ -281,9 +307,9 @@ def saturation_function(x, alpha, lam):

dist(
name=name,
mu=model_estimated_lift,
mu=pt.abs(model_estimated_lift),
sigma=df_lift_test["sigma"].to_numpy(),
observed=df_lift_test["delta_y"].to_numpy(),
observed=np.abs(df_lift_test["delta_y"].to_numpy()),
)


84 changes: 84 additions & 0 deletions tests/mmm/test_lift_test.py
Original file line number Diff line number Diff line change
@@ -9,8 +9,10 @@

from pymc_marketing.mmm.lift_test import (
MissingLiftTestError,
NonMonotonicLiftError,
add_logistic_empirical_lift_measurements_to_likelihood,
add_menten_empirical_lift_measurements_to_likelihood,
check_increasing_assumption,
index_variable,
indices_from_lift_tests,
lift_test_indices,
@@ -73,6 +75,43 @@ def test_lift_test_indices_another_dim(df_lift_tests) -> None:
assert indices["brand"].tolist() == [0, 0]


@pytest.mark.parametrize(
"dates",
[
pd.date_range("2023-01-01", periods=3, freq="D"),
pd.date_range("2023-01-01", periods=3, freq="W"),
pd.date_range("2023-01-01", periods=3, freq="W-MON"),
pd.date_range("2023-01-01", periods=3, freq="W-SUN"),
pd.date_range("2023-01-01", periods=3, freq="D")
.to_numpy()
.astype("datetime64"),
pd.date_range("2023-01-01", periods=3, freq="D")
.to_numpy()
.astype("datetime64[D]"),
pd.date_range("2023-01-01", periods=3, freq="D")
.to_numpy()
.astype("datetime64[s]"),
],
)
def test_lift_test_indices_with_dates(df_lift_tests, dates) -> None:
coords = {
"actual_date": dates,
"channel": ["organic", "paid", "social"],
}
model = pm.Model(coords=coords)

df_actual_dates = df_lift_tests.assign(
actual_date=dates[[0, 2]],
)

indices = lift_test_indices(
df_actual_dates.loc[:, ["actual_date", "channel"]], model
)

assert indices["actual_date"].tolist() == [0, 2]
assert indices["channel"].tolist() == [0, 1]


def test_lift_test_missing_coords(df_lift_tests) -> None:
with pytest.raises(KeyError):
df_lift_tests.pipe(lift_test_indices, model=pm.Model())
@@ -415,3 +454,48 @@ def test_scale_target_for_lift_measurements(mock_target_pipeline) -> None:
result,
pd.Series([0, 1, 2, 3], dtype="float64"),
)


def test_works_with_negative_delta(df_lift_tests_with_numerics) -> None:
df_lift_tests_with_numerics_negative = df_lift_tests_with_numerics.assign(
delta_x=lambda row: row["delta_x"] * -1,
delta_y=lambda row: row["delta_y"] * -1,
)

alpha_dims = "date"
dist = pm.Gamma

coords = {
"date": ["2020-01-01", "2020-01-02", "2020-01-03"],
"channel": ["organic", "paid", "social"],
}
with pm.Model(coords=coords) as model:
pm.HalfNormal("alpha", dims=alpha_dims)
pm.HalfNormal("lam", dims="channel")

add_menten_empirical_lift_measurements_to_likelihood(
df_lift_tests_with_numerics_negative,
alpha_name="alpha",
lam_name="lam",
dist=dist,
)

assert "lift_measurements" in model

try:
with model:
pm.sample(draws=10, tune=10)
except pm.SamplingError:
pytest.fail("Negative delta values caused a sampling error.")


def test_check_increasing_assumption() -> None:
df = pd.DataFrame(
{
"delta_x": [1, 2, 3],
"delta_y": [1, -2, 3],
}
)

with pytest.raises(NonMonotonicLiftError):
check_increasing_assumption(df)
Loading