From 0e1c3da1ffba2aa2379fca86ff82cb6c2c1fdaf6 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Sun, 13 Oct 2024 22:47:53 +0200 Subject: [PATCH 1/7] Version bump --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 719a1cb5..b144668a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: CLVTools Title: Tools for Customer Lifetime Value Estimation -Version: 0.11.1 -Date: 2024-10-10 +Version: 0.11.2 +Date: 2024-10-13 Authors@R: c( person(given="Patrick", family="Bachmann", email = "pbachma@ethz.ch", role = c("cre","aut")), person(given="Niels", family="Kuebler", email = "niels.kuebler@uzh.ch", role = "aut"), From 88e6b15ddb423b71bc7475b68618ebd9b4592eff Mon Sep 17 00:00:00 2001 From: Markus Meierer Date: Tue, 12 Nov 2024 22:57:05 +0100 Subject: [PATCH 2/7] Update README.md (#276) Added table in the readme file to give an overview on the advanced modeling options --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index 0e63adf0..1017a5aa 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,21 @@ layers include: - Equality constraints between parameters of contextual factors for the purchase and the attrition process. +The following table provides an overview which advanced modeling options are supported for which models: + +| | pnbd | bgnbd | ggomnbd | +|---------------------------------------------|------------------------------|------------------------------|------------------------------| +| **Model name** | Pareto/ NBD | BG/ NBD | GGom/ NBD | +| **Attrition: Distribution main process & heterogeneity** | Exponential / Gamma | Geometric / Beta | Gompertz / Gamma | +| **Transaction: Distribution main process & heterogeneity** | Poisson / Gamma | Poisson / Gamma | Poisson / Gamma | +| **Model Parameters** | r, α, s, β | r, α, a, b | r, α, β, b, s | +| **Covariates (time-invariant/-varying)** | ✔ / ✔ | ✔ / - | ✔ / - | +| **Process correlation** | ✔ | - | - | +| **Equality constraints of covariate parameters** | ✔ | ✔ | ✔ | +| **Regularization of covariate parameters** | ✔ | ✔ | ✔ | + + + ## Installation Install the most recent **stable release from CRAN**: From f45f703bda6a06e14a6b2239c4a769ca91de2d2a Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Mon, 18 Nov 2024 00:14:47 +0100 Subject: [PATCH 3/7] Spending model new customer prediction: `newcustomer.spending()` (#277) * Interface and class for `newcustomer.spending()` * Separate newcustomer classes, not inheriting from each other but all inherit directly from `clv.newcustomer.base`. Otherwise would expect to be able to use a subclass in any place where parent class is accepted - which is not the case. * Rename `clv.model.predict.new.customer.unconditional.expectation` -> `clv.model.predict.new.customer` and remove `t`: Make generic to predict new customer value in general * Add and update docu * Tests --- NAMESPACE | 1 + R/all_generics.R | 8 +- R/class_clv_model_bgnbd.R | 6 +- R/class_clv_model_bgnbd_staticcov.R | 6 +- R/class_clv_model_gg.R | 12 +++ R/class_clv_model_ggomnbd_nocov.R | 6 +- R/class_clv_model_ggomnbd_staticcov.R | 6 +- R/class_clv_model_pnbd.R | 6 +- R/class_clv_model_pnbd_dynamiccov.R | 6 +- R/class_clv_model_pnbd_staticcov.R | 6 +- R/f_clvfitted_inputchecks.R | 2 +- R/f_generics_clvfittedspending.R | 17 ++++ R/f_generics_clvfittedtransactions.R | 7 +- R/f_generics_clvfittedtransactionsdyncov.R | 3 +- R/f_generics_clvfittedtransactionsstaticcov.R | 5 +- R/f_interface_newcustomer.R | 81 ++++++++++++++++--- R/f_interface_predict_clvfittedspending.R | 46 +++++++++-- R/f_interface_predict_clvfittedtransactions.R | 32 +++++--- man/newcustomer.Rd | 41 +++++++--- man/predict.clv.fitted.spending.Rd | 30 ++++++- man/predict.clv.fitted.transactions.Rd | 8 +- tests/testthat/test_inputchecks_newcustomer.R | 28 ++++++- tests/testthat/test_runability_newcustomer.R | 10 +++ 23 files changed, 285 insertions(+), 88 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6d2e372e..1b983919 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ export(clvdata) export(latentAttrition) export(newcustomer) export(newcustomer.dynamic) +export(newcustomer.spending) export(newcustomer.static) export(spending) exportMethods(bgbb) diff --git a/R/all_generics.R b/R/all_generics.R index f4d214ab..51eb2e86 100644 --- a/R/all_generics.R +++ b/R/all_generics.R @@ -165,10 +165,10 @@ setGeneric(name="clv.model.process.newdata", def=function(clv.model, clv.fitted, setGeneric(name="clv.model.pmf", def=function(clv.model, clv.fitted, x) standardGeneric("clv.model.pmf")) -# .. New customer expectation ----------------------------------------------------------------------------------------------- -# predict unconditional expectation until individual t_i for all customers in clv.fitted@clv.data -setGeneric("clv.model.predict.new.customer.unconditional.expectation", function(clv.model, clv.fitted, clv.newcustomer, t) - standardGeneric("clv.model.predict.new.customer.unconditional.expectation")) +# .. New customer prediction ----------------------------------------------------------------------------------------------- +setGeneric("clv.model.predict.new.customer", function(clv.model, clv.fitted, clv.newcustomer) + standardGeneric("clv.model.predict.new.customer")) + diff --git a/R/class_clv_model_bgnbd.R b/R/class_clv_model_bgnbd.R index 460101f8..90c836fc 100644 --- a/R/class_clv_model_bgnbd.R +++ b/R/class_clv_model_bgnbd.R @@ -106,15 +106,15 @@ setMethod("clv.model.expectation", signature(clv.model="clv.model.bgnbd.no.cov") fct.expectation = fct.bgnbd.expectation, clv.time = clv.fitted@clv.data@clv.time)) }) -# . clv.model.predict.new.customer.unconditional.expectation -------------------------------------------------------------------------------------------------------- -setMethod("clv.model.predict.new.customer.unconditional.expectation", signature = signature(clv.model="clv.model.bgnbd.no.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer, t){ +# . clv.model.predict.new.customer -------------------------------------------------------------------------------------------------------- +setMethod("clv.model.predict.new.customer", signature = signature(clv.model="clv.model.bgnbd.no.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer){ return(bgnbd_nocov_expectation( r = clv.fitted@prediction.params.model[["r"]], alpha = clv.fitted@prediction.params.model[["alpha"]], a = clv.fitted@prediction.params.model[["a"]], b = clv.fitted@prediction.params.model[["b"]], - vT_i = t)) + vT_i = clv.newcustomer@num.periods)) }) diff --git a/R/class_clv_model_bgnbd_staticcov.R b/R/class_clv_model_bgnbd_staticcov.R index cfa8f757..7cef302f 100644 --- a/R/class_clv_model_bgnbd_staticcov.R +++ b/R/class_clv_model_bgnbd_staticcov.R @@ -130,8 +130,8 @@ setMethod("clv.model.expectation", signature(clv.model="clv.model.bgnbd.static.c fct.expectation = fct.bgnbd.expectation, clv.time = clv.fitted@clv.data@clv.time)) }) -# . clv.model.predict.new.customer.unconditional.expectation ----------------------------------------------------------------------------------------------------- -setMethod("clv.model.predict.new.customer.unconditional.expectation", signature = signature(clv.model="clv.model.bgnbd.static.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer, t){ +# . clv.model.predict.new.customer ----------------------------------------------------------------------------------------------------- +setMethod("clv.model.predict.new.customer", signature = signature(clv.model="clv.model.bgnbd.static.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer){ m.cov.trans <- clv.newcustomer.static.get.matrix.cov.trans(clv.newcustomer=clv.newcustomer, clv.fitted=clv.fitted) m.cov.life <- clv.newcustomer.static.get.matrix.cov.life(clv.newcustomer=clv.newcustomer, clv.fitted=clv.fitted) @@ -153,7 +153,7 @@ setMethod("clv.model.predict.new.customer.unconditional.expectation", signature vAlpha_i = alpha_i, vA_i = a_i, vB_i = b_i, - vT_i = t + vT_i = clv.newcustomer@num.periods )) }) diff --git a/R/class_clv_model_gg.R b/R/class_clv_model_gg.R index e822bcff..2db34c11 100644 --- a/R/class_clv_model_gg.R +++ b/R/class_clv_model_gg.R @@ -115,6 +115,18 @@ setMethod("clv.model.predict", signature(clv.model="clv.model.gg"), function(clv }) +# .clv.model.predict.newcustomer -------------------------------------------------------------------------------------------------------- +setMethod("clv.model.predict.new.customer", signature(clv.model="clv.model.gg"), function(clv.model, clv.fitted, clv.newcustomer){ + + p <- clv.fitted@prediction.params.model[["p"]] + q <- clv.fitted@prediction.params.model[["q"]] + gamma <- clv.fitted@prediction.params.model[["gamma"]] + + # setting x=0 in the ordinary prediction function + return( (gamma) * p/(q - 1) ) +}) + + # .clv.model.vcov.jacobi.diag -------------------------------------------------------------------------------------------------------- setMethod(f = "clv.model.vcov.jacobi.diag", signature = signature(clv.model="clv.model.gg"), definition = function(clv.model, clv.fitted, prefixed.params){ diff --git a/R/class_clv_model_ggomnbd_nocov.R b/R/class_clv_model_ggomnbd_nocov.R index 12aa0d47..e6beee1c 100644 --- a/R/class_clv_model_ggomnbd_nocov.R +++ b/R/class_clv_model_ggomnbd_nocov.R @@ -118,8 +118,8 @@ setMethod("clv.model.expectation", signature(clv.model="clv.model.ggomnbd.no.cov }) -# . clv.model.predict.new.customer.unconditional.expectation -------------------------------------------------------------------------------------------------------- -setMethod("clv.model.predict.new.customer.unconditional.expectation", signature = signature(clv.model="clv.model.ggomnbd.no.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer, t){ +# . clv.model.predict.new.customer -------------------------------------------------------------------------------------------------------- +setMethod("clv.model.predict.new.customer", signature = signature(clv.model="clv.model.ggomnbd.no.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer){ return(ggomnbd_nocov_expectation( r = clv.fitted@prediction.params.model[["r"]], @@ -127,7 +127,7 @@ setMethod("clv.model.predict.new.customer.unconditional.expectation", signature beta_0 = clv.fitted@prediction.params.model[["beta"]], b = clv.fitted@prediction.params.model[["b"]], s = clv.fitted@prediction.params.model[["s"]], - vT_i = t)) + vT_i = clv.newcustomer@num.periods)) }) diff --git a/R/class_clv_model_ggomnbd_staticcov.R b/R/class_clv_model_ggomnbd_staticcov.R index 3131e132..a9cf7715 100644 --- a/R/class_clv_model_ggomnbd_staticcov.R +++ b/R/class_clv_model_ggomnbd_staticcov.R @@ -100,8 +100,8 @@ setMethod("clv.model.expectation", signature(clv.model="clv.model.ggomnbd.static fct.expectation = fct.expectation, clv.time = clv.fitted@clv.data@clv.time)) }) -# . clv.model.predict.new.customer.unconditional.expectation ----------------------------------------------------------------------------------------------------- -setMethod("clv.model.predict.new.customer.unconditional.expectation", signature = signature(clv.model="clv.model.ggomnbd.static.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer, t){ +# . clv.model.predict.new.customer ----------------------------------------------------------------------------------------------------- +setMethod("clv.model.predict.new.customer", signature = signature(clv.model="clv.model.ggomnbd.static.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer){ m.cov.trans <- clv.newcustomer.static.get.matrix.cov.trans(clv.newcustomer=clv.newcustomer, clv.fitted=clv.fitted) m.cov.life <- clv.newcustomer.static.get.matrix.cov.life(clv.newcustomer=clv.newcustomer, clv.fitted=clv.fitted) @@ -122,7 +122,7 @@ setMethod("clv.model.predict.new.customer.unconditional.expectation", signature s = clv.fitted@prediction.params.model[["s"]], vAlpha_i= alpha_i, vBeta_i = beta_i, - vT_i = t)) + vT_i = clv.newcustomer@num.periods)) }) diff --git a/R/class_clv_model_pnbd.R b/R/class_clv_model_pnbd.R index 2cad5176..36183227 100644 --- a/R/class_clv_model_pnbd.R +++ b/R/class_clv_model_pnbd.R @@ -269,15 +269,15 @@ setMethod("clv.model.expectation", signature(clv.model="clv.model.pnbd.no.cov"), -# . clv.model.predict.new.customer.unconditional.expectation -------------------------------------------------------------------------------------------------------- -setMethod("clv.model.predict.new.customer.unconditional.expectation", signature = signature(clv.model="clv.model.pnbd.no.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer, t){ +# . clv.model.predict.new.customer -------------------------------------------------------------------------------------------------------- +setMethod("clv.model.predict.new.customer", signature = signature(clv.model="clv.model.pnbd.no.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer){ return(pnbd_nocov_expectation( r = clv.fitted@prediction.params.model[["r"]], s = clv.fitted@prediction.params.model[["s"]], alpha_0 = clv.fitted@prediction.params.model[["alpha"]], beta_0 = clv.fitted@prediction.params.model[["beta"]], - vT_i = t)) + vT_i = clv.newcustomer@num.periods)) }) diff --git a/R/class_clv_model_pnbd_dynamiccov.R b/R/class_clv_model_pnbd_dynamiccov.R index 2ba0d8e7..2d669d83 100644 --- a/R/class_clv_model_pnbd_dynamiccov.R +++ b/R/class_clv_model_pnbd_dynamiccov.R @@ -175,11 +175,11 @@ setMethod("clv.model.expectation", signature(clv.model="clv.model.pnbd.dynamic.c }) -# . clv.model.predict.new.customer.unconditional.expectation ----------------------------------------------------------------------------------------------------- -setMethod("clv.model.predict.new.customer.unconditional.expectation", signature = signature(clv.model="clv.model.pnbd.dynamic.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer, t){ +# . clv.model.predict.new.customer ----------------------------------------------------------------------------------------------------- +setMethod("clv.model.predict.new.customer", signature = signature(clv.model="clv.model.pnbd.dynamic.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer){ return(pnbd_dyncov_newcustomer_expectation( clv.fitted=clv.fitted, - t=t, + t=clv.newcustomer@num.periods, tp.first.transaction=clv.newcustomer@first.transaction, dt.cov.life=clv.newcustomer@data.cov.life, dt.cov.trans=clv.newcustomer@data.cov.trans)) diff --git a/R/class_clv_model_pnbd_staticcov.R b/R/class_clv_model_pnbd_staticcov.R index f59abd03..2e00f7aa 100644 --- a/R/class_clv_model_pnbd_staticcov.R +++ b/R/class_clv_model_pnbd_staticcov.R @@ -211,8 +211,8 @@ setMethod("clv.model.expectation", signature(clv.model="clv.model.pnbd.static.co -# . clv.model.predict.new.customer.unconditional.expectation ----------------------------------------------------------------------------------------------------- -setMethod("clv.model.predict.new.customer.unconditional.expectation", signature = signature(clv.model="clv.model.pnbd.static.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer, t){ +# . clv.model.predict.new.customer ----------------------------------------------------------------------------------------------------- +setMethod("clv.model.predict.new.customer", signature = signature(clv.model="clv.model.pnbd.static.cov"), definition = function(clv.model, clv.fitted, clv.newcustomer){ m.cov.trans <- clv.newcustomer.static.get.matrix.cov.trans(clv.newcustomer=clv.newcustomer, clv.fitted=clv.fitted) @@ -232,7 +232,7 @@ setMethod("clv.model.predict.new.customer.unconditional.expectation", signature s = clv.fitted@prediction.params.model[["s"]], vAlpha_i = alpha_i, vBeta_i = beta_i, - vT_i = t + vT_i = clv.newcustomer@num.periods )) }) diff --git a/R/f_clvfitted_inputchecks.R b/R/f_clvfitted_inputchecks.R index bc5100d9..eb9db51d 100644 --- a/R/f_clvfitted_inputchecks.R +++ b/R/f_clvfitted_inputchecks.R @@ -437,7 +437,7 @@ check_user_data_predict_newcustomer_numperiods <- function(num.periods){ check_user_data_predict_newcustomer_staticcov <- function(clv.fitted, clv.newcustomer){ # is exactly "clv.newcustomer.static.cov" - if(!is(clv.newcustomer, "clv.newcustomer.static.cov") | is(clv.newcustomer, "clv.newcustomer.dynamic.cov")){ + if(!is(clv.newcustomer, "clv.newcustomer.static.cov")){ return("Parameter newdata has to be output from calling `newcustomer.static()`!") } diff --git a/R/f_generics_clvfittedspending.R b/R/f_generics_clvfittedspending.R index 2841dff8..cde6014e 100644 --- a/R/f_generics_clvfittedspending.R +++ b/R/f_generics_clvfittedspending.R @@ -47,6 +47,23 @@ setMethod("clv.controlflow.check.newdata", signature(clv.fitted="clv.fitted.spen }) +# . clv.controlflow.predict.new.customer ----------------------------------------------------------------------- +setMethod("clv.controlflow.predict.new.customer", signature(clv.fitted="clv.fitted.spending"), definition = function(clv.fitted, clv.newcustomer){ + + + # Only newcustomer.spending() is allowed + if(!is(clv.newcustomer, "clv.newcustomer.spending")){ + check_err_msg("To predict for new customers, 'newdata' has to be the output of 'newdata.spending()'!") + } + + return(drop(clv.model.predict.new.customer( + clv.model = clv.fitted@clv.model, + clv.fitted = clv.fitted, + clv.newcustomer=clv.newcustomer + ))) +}) + + # . clv.controlflow.predict.build.result.table ----------------------------------------------------------------- setMethod("clv.controlflow.predict.build.result.table", signature(clv.fitted="clv.fitted.spending"), definition = function(clv.fitted, verbose, ...){ dt.predictions <- copy(clv.fitted@cbs[, "Id"]) diff --git a/R/f_generics_clvfittedtransactions.R b/R/f_generics_clvfittedtransactions.R index e60c2697..c98fcda8 100644 --- a/R/f_generics_clvfittedtransactions.R +++ b/R/f_generics_clvfittedtransactions.R @@ -329,15 +329,14 @@ setMethod("clv.controlflow.predict.post.process.prediction.table", signature = s #' @include class_clv_fitted_transactions.R setMethod("clv.controlflow.predict.new.customer", signature = signature(clv.fitted="clv.fitted.transactions"), definition = function(clv.fitted, clv.newcustomer){ - if(!is(clv.newcustomer, "clv.newcustomer.no.cov") | is(clv.newcustomer, "clv.newcustomer.static.cov")){ + if(!is(clv.newcustomer, "clv.newcustomer.no.cov")){ check_err_msg("Parameter newdata has to be output from calling `newcustomer()`!") } - return(drop(clv.model.predict.new.customer.unconditional.expectation( + return(drop(clv.model.predict.new.customer( clv.model = clv.fitted@clv.model, clv.fitted = clv.fitted, - clv.newcustomer=clv.newcustomer, - t=clv.newcustomer@num.periods))) + clv.newcustomer=clv.newcustomer))) }) diff --git a/R/f_generics_clvfittedtransactionsdyncov.R b/R/f_generics_clvfittedtransactionsdyncov.R index 3c4c4ac0..9318c968 100644 --- a/R/f_generics_clvfittedtransactionsdyncov.R +++ b/R/f_generics_clvfittedtransactionsdyncov.R @@ -107,9 +107,8 @@ setMethod(f = "clv.controlflow.predict.new.customer", signature = signature(clv. tp.prediction.end=tp.prediction.end)) - return(clv.model.predict.new.customer.unconditional.expectation( + return(clv.model.predict.new.customer( clv.model = clv.fitted@clv.model, clv.fitted = clv.fitted, - t=clv.newcustomer@num.periods, clv.newcustomer=clv.newcustomer)) }) diff --git a/R/f_generics_clvfittedtransactionsstaticcov.R b/R/f_generics_clvfittedtransactionsstaticcov.R index 2d9016aa..b21c84d2 100644 --- a/R/f_generics_clvfittedtransactionsstaticcov.R +++ b/R/f_generics_clvfittedtransactionsstaticcov.R @@ -91,11 +91,10 @@ setMethod(f = "clv.controlflow.predict.new.customer", signature = signature(clv. check_err_msg(check_user_data_predict_newcustomer_staticcov(clv.fitted=clv.fitted, clv.newcustomer=clv.newcustomer)) - return(drop(clv.model.predict.new.customer.unconditional.expectation( + return(drop(clv.model.predict.new.customer( clv.model = clv.fitted@clv.model, clv.fitted = clv.fitted, - clv.newcustomer=clv.newcustomer, - t=clv.newcustomer@num.periods))) + clv.newcustomer=clv.newcustomer))) }) diff --git a/R/f_interface_newcustomer.R b/R/f_interface_newcustomer.R index d370d5af..f9d05f75 100644 --- a/R/f_interface_newcustomer.R +++ b/R/f_interface_newcustomer.R @@ -2,15 +2,25 @@ #' @title New customer prediction data #' #' @description -#' The methods documented here are to be used together with \link[CLVTools:predict.clv.fitted.transactions]{predict} to obtain -#' the expected number of transactions of an average newly alive customer. -#' It describes the number of transactions a single, average new customer is expected to make in -#' the \code{num.periods} periods since making the first transaction ("coming alive"). This prediction is only -#' sensible for customers who just came alive and have not had the chance to reveal any more of their behavior. +#' The methods documented here are to be used together with +#' \link[CLVTools:predict.clv.fitted.transactions]{predict (transactions)} to obtain +#' the expected number of transactions of an average newly alive customer and +#' with \link[CLVTools:predict.clv.fitted.spending]{predict (spending)} to obtain +#' the expected spending of an average newly alive customer. +#' This prediction is only sensible for (fictional) customers without order history: +#' Customers which just came alive and have not had the chance to reveal any more of their behavior. #' -#' The data required for this new customer prediction is produced by the methods described here. This is mostly covariate data -#' for static and dynamic covariate models. See details for the required format. +#' The methods described here produce the data required as input to +#' \code{predict(newdata=)} to make this new customer prediction. +#' This is mostly covariate data for static and dynamic covariate models. +#' See details for the required format. #' +#' \code{newcustomer()}, \code{newcustomer.static()}, \code{newcustomer.dynamic()}: +#' To predict the number of transactions a single, fictional, average new customer is expected to make in +#' the \code{num.periods} periods since making the first transaction ("coming alive"). +#' +#' \code{newcustomer.spending()}: To estimate how much a single, fictional, average +#' new customer is expected to spend on average per transaction. #' #' @param num.periods A positive, numeric scalar indicating the number of periods to predict. #' @param data.cov.life Numeric-only covariate data for the lifetime process for a single customer, \code{data.table} or \code{data.frame}. See details. @@ -18,7 +28,8 @@ #' @param first.transaction For dynamic covariate models only: The time point of the first transaction of the customer ("coming alive") for which a prediction is made. #' Has to be within the time range of the covariate data. #' -#' @seealso \link[CLVTools:predict.clv.fitted.transactions]{predict} to use the output of the methods described here. +#' @seealso \link[CLVTools:predict.clv.fitted.transactions]{predict (transactions)} to use the output of the methods described here. +#' @seealso \link[CLVTools:predict.clv.fitted.spending]{predict (spending)} to use the output of the methods described here. #' #' @details #' The covariate data has to contain one column for every covariate parameter in the fitted model. Only numeric values are allowed, no factors or characters. @@ -45,6 +56,9 @@ #' \item{newcustomer()}{An object of class \code{clv.newcustomer.no.cov}} #' \item{newcustomer.static()}{An object of class \code{clv.newcustomer.static.cov}} #' \item{newcustomer.dynamic()}{An object of class \code{clv.newcustomer.dynamic.cov}} +#' \item{newcustomer.spending()}{An object of class \code{clv.newcustomer.spending}} +#' +#' #' #' @examples #' \donttest{ @@ -81,6 +95,11 @@ #' ) #' #' +#' # Spending model +#' gg.apparel <- gg(clv.data.apparel) +#' predict(gg.apparel, newdata = newcustomer.spending()) +#' +#' #' #' # Static covariate model #' p.apparel.static <- pnbd(clv.data.static.cov) @@ -127,16 +146,28 @@ #' first.transaction = "2051-02-16" #' ) #' ) -#' #' } #' } #' NULL +# . clv.newcustomer.base ------------------------------------------------------- +# A (near useless) base class from which other 'newcustomer' classes inherit. +# This is required because a class defined without slots and without parents is +# considered VIRTUAL and cannot be instantiated. Inheriting from this class, +# allows to define a class `newcustomer.spending` which has no slots and +# otherwise would be considered VIRTUAL. +# This base class additionally is handy to catch any type of newcustomer +# instance when verifying parameters. +# Making this class virtual is not required as having no slots and parent has +# the same effect but better to be explicit. +setClass("clv.newcustomer.base", contains = "VIRTUAL") + setClass( Class = "clv.newcustomer.no.cov", - representation = list(num.periods="numeric") - ) + representation = list(num.periods="numeric"), + contains = 'clv.newcustomer.base' +) clv.newcustomer.no.cov <- function(num.periods){ return(new("clv.newcustomer.no.cov", num.periods=num.periods)) @@ -152,8 +183,9 @@ clv.newcustomer.no.cov <- function(num.periods){ # convert the data setClass( Class = "clv.newcustomer.static.cov", - contains = "clv.newcustomer.no.cov", + contains = "clv.newcustomer.base", representation = list( + num.periods="numeric", data.cov.life="data.table", data.cov.trans="data.table" )) @@ -181,8 +213,11 @@ clv.newcustomer.static.get.matrix.cov.life <- function(clv.newcustomer, clv.fitt setClass( Class = "clv.newcustomer.dynamic.cov", - contains = "clv.newcustomer.static.cov", + contains = "clv.newcustomer.base", representation = list( + num.periods="numeric", + data.cov.life="data.table", + data.cov.trans="data.table", # Has to be ANY because can be Date, Posixt, or character because this class is # used to transport the data to the clv.fitted object for predicting and it # contains the clv.data@clv.time object required to convert first.transaction @@ -229,6 +264,19 @@ clv.newcustomer.dynamic.cov.convert.time <- function(clv.newcustomer, clv.time){ } +# Needs to inherit from a class as it would otherwise be a VIRTUAL class as it +# also has no slots. +setClass( + Class = "clv.newcustomer.spending", + contains = 'clv.newcustomer.base' +) + +clv.newcustomer.spending <- function(){ + return(new("clv.newcustomer.spending")) +} + + + #' @rdname newcustomer #' @export newcustomer <- function(num.periods){ @@ -279,6 +327,13 @@ newcustomer.dynamic <- function(num.periods, data.cov.life, data.cov.trans, firs } +#' @rdname newcustomer +#' @export +newcustomer.spending <- function(){ + return(clv.newcustomer.spending()) +} + + check_user_data_newcustomer_staticcovdatacov <- function(data.cov, name.of.covariate){ # Check if data has basic properties diff --git a/R/f_interface_predict_clvfittedspending.R b/R/f_interface_predict_clvfittedspending.R index 2d8fe148..bfe7f871 100644 --- a/R/f_interface_predict_clvfittedspending.R +++ b/R/f_interface_predict_clvfittedspending.R @@ -1,23 +1,36 @@ # S3 predict for clv.fitted.spending ------------------------------------------------------------------------------ -#' @title Predict customers' future spending +#' @title Infer customers' spending #' #' @param object A fitted spending model for which prediction is desired. -#' @param newdata A clv data object for which predictions should be made with the fitted model. If none or NULL is given, predictions are made for the data on which the model was fit. +#' @param newdata A \code{clv.data} object or data for the new customer prediction (see \link[CLVTools:newcustomer]{newcustomer.spending}). +#' If none or NULL is given, predictions are made for the data on which the model was fit. #' @template template_params_uncertainty #' @template template_param_verbose #' @template template_param_dots #' #' @description -#' Predict customer's future mean spending per transaction and compare it to the actual mean spending in the holdout period. +#' Infer customer's mean spending per transaction and compare it to the actual mean spending in the holdout period. +#' +#' \subsection{New customer prediction}{ +#' The fitted model can also be used to estimate the spending that a single, (fictional), average +#' newly alive customer is expected to make at the moment of the first transaction. +#' This is, for a customer which has no existing order history and that just "came alive". +#' +#' The data on which the model was fit and which is stored in it is NOT used for this prediction. +#' See examples and \link[CLVTools:newcustomer]{newcustomer.spending} for more details. +#' } #' #' @details #' If \code{newdata} is provided, the individual customer statistics underlying the model are calculated #' the same way as when the model was fit initially. Hence, if \code{remove.first.transaction} was \code{TRUE}, #' this will be applied to \code{newdata} as well. #' +#' To predict for new customers, the output of \link[CLVTools:newcustomer]{newcustomer.spending} has to be given to \code{newdata}. See examples. +#' #' @seealso models to predict spending: \link{gg}. #' @seealso models to predict transactions: \link{pnbd}, \link{bgnbd}, \link{ggomnbd}. #' @seealso \code{\link[CLVTools:predict.clv.fitted.transactions]{predict}} for transaction models +#' @seealso \code{\link[CLVTools:newcustomer]{newdata.spending}} to create data to predict for customers without order history #' #' #' @return An object of class \code{data.table} with columns: @@ -25,6 +38,8 @@ #' \item{actual.mean.spending}{Actual mean spending per transaction in the holdout period. Only if there is a holdout period otherwise it is not reported.} #' \item{predicted.mean.spending}{The mean spending per transaction as predicted by the fitted spending model.} #' +#' If predicting for new customers (using \code{newcustomer.spending()}), a numeric scalar +#' indicating the expected spending is returned instead. #' #' @examples #' \donttest{ @@ -35,9 +50,14 @@ #' estimation.split = 52, date.format = "ymd") #' apparel.gg <- gg(apparel.holdout) #' -#' # Predict customers' future mean spending per transaction +#' # Estimate customers' mean spending per transaction #' predict(apparel.gg) #' +#' # Estimate the mean spending per transaction a single, +#' # fictional, average new customer is expected to make +#' # See ?newcustomer.spending() for more examples +#' predict(apparel.gg, newdata=newcustomer.spending()) +#' #' } #' #' @importFrom stats predict @@ -46,10 +66,24 @@ predict.clv.fitted.spending <- function(object, newdata=NULL, uncertainty=c("none", "boots"), level=0.9, num.boots=100, verbose=TRUE, ...){ check_err_msg(check_user_data_emptyellipsis(...)) + + # The usual prediction unless newdata indicates a new customer prediction (ie newdata=newcustomer.spending()) + # check for base class and not "clv.newcustomer.spending" as users likely will pass other newcustomer objects + if(is(newdata, "clv.newcustomer.base")){ + # some type of newcustomer object passed + + # No other parameters may be passed (all others must be missing) + if(!all(missing(uncertainty), missing(level), missing(num.boots))){ + check_err_msg("No other parameters ('uncertainty', 'level', 'num.boots') may be specified when predicting for new customers!") + } + + return(clv.controlflow.predict.new.customer(clv.fitted = object, clv.newcustomer = newdata)) + } + + # match uncertainty to one of the allowed values. Only after newdata section + # because after match.arg(), missing() is always false check_err_msg(check_user_data_uncertainty(uncertainty = uncertainty)) - # match uncertainty to one of the allowed values uncertainty <- match.arg(tolower(uncertainty), choices=c("none", "boots"), several.ok=FALSE) - return(clv.template.controlflow.predict(clv.fitted=object, verbose=verbose, user.newdata=newdata, uncertainty=uncertainty, num.boots=num.boots, level=level)) } diff --git a/R/f_interface_predict_clvfittedtransactions.R b/R/f_interface_predict_clvfittedtransactions.R index ccfd5eba..037ab2f4 100644 --- a/R/f_interface_predict_clvfittedtransactions.R +++ b/R/f_interface_predict_clvfittedtransactions.R @@ -40,8 +40,9 @@ #' Uncertainty estimates are available for all predicted quantities using bootstrapping. #' #' \subsection{New customer prediction}{ -#' The fitted model can also be used to predict the number of transactions a single, average +#' The fitted model can also be used to predict the number of transactions a fictional, single, average #' newly alive customer is expected to make at the moment of the first transaction ("coming alive"). +#' This is, for a customer which has no existing order history. #' For covariate models, the prediction is for an average customer with the given covariates. #' #' The individual-level unconditional expectation that is also used for the @@ -94,7 +95,6 @@ #' #' See \link{clv.bootstrapped.apply} to create a custom bootstrapping procedure. #' - #' #' #' @seealso models to predict transactions: \link{pnbd}, \link{bgnbd}, \link{ggomnbd}. @@ -168,9 +168,8 @@ #' # But it works if providing a prediction.end #' predict(pnc, prediction.end = 10) # ends on 2016-12-17 #' -#' -#' # Predict num transactions for a newly alive customer -#' # in the next 3.45 weeks +#' # Predict the number of transactions a single, fictional, average new +#' # customer is expected to make in the first 3.45 weeks since coming alive #' # See ?newcustomer() for more examples #' predict(apparel.pnbd, newdata = newcustomer(num.periods=3.45)) #' @@ -186,21 +185,30 @@ predict.clv.fitted.transactions <- function(object, newdata=NULL, prediction.end continuous.discount.factor=log(1+0.1), uncertainty=c("none", "boots"), level=0.9, num.boots=100, verbose=TRUE, ...){ check_err_msg(check_user_data_emptyellipsis(...)) - check_err_msg(check_user_data_uncertainty(uncertainty=uncertainty)) - # match uncertainty to one of the allowed values - uncertainty <- match.arg(tolower(uncertainty), choices = c("none", "boots"), several.ok = FALSE) # The usual prediction unless newdata indicates a new customer prediction (ie newdata=newcustomer()) - if(is(newdata, "clv.newcustomer.no.cov")){ + if(is(newdata, "clv.newcustomer.base")){ + # not other parameters except object and newdata may be given (all others must be missing) - if(!all(missing(prediction.end), missing(predict.spending), missing(continuous.discount.factor))){ - check_err_msg("Parameters prediction.end, predict.spending and continuous.discount.factor may not be specified when predicting for new customers.") + if(!all(missing(prediction.end), + missing(predict.spending), + missing(continuous.discount.factor), + missing(uncertainty), + missing(level), + missing(num.boots))){ + check_err_msg("No other parameters ('prediction.end', 'predict.spending', 'continuous.discount.factor', uncertainty', 'level', 'num.boots') may be specified when predicting for new customers!") } - return(clv.controlflow.predict.new.customer(clv.fitted = object, clv.newcustomer = newdata)) } + + # match uncertainty to one of the allowed values. Only after newdata section + # because after match.arg(), missing() is always false + check_err_msg(check_user_data_uncertainty(uncertainty=uncertainty)) + # match uncertainty to one of the allowed values + uncertainty <- match.arg(tolower(uncertainty), choices = c("none", "boots"), several.ok = FALSE) + # If it was not explicitly passed in the call, the spending model should only be applied # it there is spending data. Otherwise, predict does not work out-of-the-box for # data object w/o spending diff --git a/man/newcustomer.Rd b/man/newcustomer.Rd index 9e5482ce..a156ba82 100644 --- a/man/newcustomer.Rd +++ b/man/newcustomer.Rd @@ -4,6 +4,7 @@ \alias{newcustomer} \alias{newcustomer.static} \alias{newcustomer.dynamic} +\alias{newcustomer.spending} \title{New customer prediction data} \usage{ newcustomer(num.periods) @@ -16,6 +17,8 @@ newcustomer.dynamic( data.cov.trans, first.transaction ) + +newcustomer.spending() } \arguments{ \item{num.periods}{A positive, numeric scalar indicating the number of periods to predict.} @@ -31,16 +34,28 @@ Has to be within the time range of the covariate data.} \item{newcustomer()}{An object of class \code{clv.newcustomer.no.cov}} \item{newcustomer.static()}{An object of class \code{clv.newcustomer.static.cov}} \item{newcustomer.dynamic()}{An object of class \code{clv.newcustomer.dynamic.cov}} +\item{newcustomer.spending()}{An object of class \code{clv.newcustomer.spending}} } \description{ -The methods documented here are to be used together with \link[CLVTools:predict.clv.fitted.transactions]{predict} to obtain -the expected number of transactions of an average newly alive customer. -It describes the number of transactions a single, average new customer is expected to make in -the \code{num.periods} periods since making the first transaction ("coming alive"). This prediction is only -sensible for customers who just came alive and have not had the chance to reveal any more of their behavior. - -The data required for this new customer prediction is produced by the methods described here. This is mostly covariate data -for static and dynamic covariate models. See details for the required format. +The methods documented here are to be used together with +\link[CLVTools:predict.clv.fitted.transactions]{predict (transactions)} to obtain +the expected number of transactions of an average newly alive customer and +with \link[CLVTools:predict.clv.fitted.spending]{predict (spending)} to obtain +the expected spending of an average newly alive customer. +This prediction is only sensible for (fictional) customers without order history: +Customers which just came alive and have not had the chance to reveal any more of their behavior. + +The methods described here produce the data required as input to +\code{predict(newdata=)} to make this new customer prediction. +This is mostly covariate data for static and dynamic covariate models. +See details for the required format. + +\code{newcustomer()}, \code{newcustomer.static()}, \code{newcustomer.dynamic()}: +To predict the number of transactions a single, fictional, average new customer is expected to make in +the \code{num.periods} periods since making the first transaction ("coming alive"). + +\code{newcustomer.spending()}: To estimate how much a single, fictional, average +new customer is expected to spend on average per transaction. } \details{ The covariate data has to contain one column for every covariate parameter in the fitted model. Only numeric values are allowed, no factors or characters. @@ -98,6 +113,11 @@ predict( ) +# Spending model +gg.apparel <- gg(clv.data.apparel) +predict(gg.apparel, newdata = newcustomer.spending()) + + # Static covariate model p.apparel.static <- pnbd(clv.data.static.cov) @@ -144,11 +164,12 @@ predict( first.transaction = "2051-02-16" ) ) - } } } \seealso{ -\link[CLVTools:predict.clv.fitted.transactions]{predict} to use the output of the methods described here. +\link[CLVTools:predict.clv.fitted.transactions]{predict (transactions)} to use the output of the methods described here. + +\link[CLVTools:predict.clv.fitted.spending]{predict (spending)} to use the output of the methods described here. } diff --git a/man/predict.clv.fitted.spending.Rd b/man/predict.clv.fitted.spending.Rd index a9f1f450..49325d35 100644 --- a/man/predict.clv.fitted.spending.Rd +++ b/man/predict.clv.fitted.spending.Rd @@ -3,7 +3,7 @@ \name{predict.clv.fitted.spending} \alias{predict.clv.fitted.spending} \alias{predict,clv.fitted.spending-method} -\title{Predict customers' future spending} +\title{Infer customers' spending} \usage{ \method{predict}{clv.fitted.spending}( object, @@ -28,7 +28,8 @@ \arguments{ \item{object}{A fitted spending model for which prediction is desired.} -\item{newdata}{A clv data object for which predictions should be made with the fitted model. If none or NULL is given, predictions are made for the data on which the model was fit.} +\item{newdata}{A \code{clv.data} object or data for the new customer prediction (see \link[CLVTools:newcustomer]{newcustomer.spending}). +If none or NULL is given, predictions are made for the data on which the model was fit.} \item{uncertainty}{Method to produce confidence intervals of the predictions (parameter uncertainty). Either "none" (default) or "boots".} @@ -45,14 +46,28 @@ An object of class \code{data.table} with columns: \item{Id}{The respective customer identifier} \item{actual.mean.spending}{Actual mean spending per transaction in the holdout period. Only if there is a holdout period otherwise it is not reported.} \item{predicted.mean.spending}{The mean spending per transaction as predicted by the fitted spending model.} + +If predicting for new customers (using \code{newcustomer.spending()}), a numeric scalar +indicating the expected spending is returned instead. } \description{ -Predict customer's future mean spending per transaction and compare it to the actual mean spending in the holdout period. +Infer customer's mean spending per transaction and compare it to the actual mean spending in the holdout period. + +\subsection{New customer prediction}{ +The fitted model can also be used to estimate the spending that a single, (fictional), average +newly alive customer is expected to make at the moment of the first transaction. +This is, for a customer which has no existing order history and that just "came alive". + +The data on which the model was fit and which is stored in it is NOT used for this prediction. +See examples and \link[CLVTools:newcustomer]{newcustomer.spending} for more details. +} } \details{ If \code{newdata} is provided, the individual customer statistics underlying the model are calculated the same way as when the model was fit initially. Hence, if \code{remove.first.transaction} was \code{TRUE}, this will be applied to \code{newdata} as well. + +To predict for new customers, the output of \link[CLVTools:newcustomer]{newcustomer.spending} has to be given to \code{newdata}. See examples. } \examples{ \donttest{ @@ -63,9 +78,14 @@ apparel.holdout <- clvdata(apparelTrans, time.unit="w", estimation.split = 52, date.format = "ymd") apparel.gg <- gg(apparel.holdout) -# Predict customers' future mean spending per transaction +# Estimate customers' mean spending per transaction predict(apparel.gg) +# Estimate the mean spending per transaction a single, +# fictional, average new customer is expected to make +# See ?newcustomer.spending() for more examples +predict(apparel.gg, newdata=newcustomer.spending()) + } } @@ -75,4 +95,6 @@ models to predict spending: \link{gg}. models to predict transactions: \link{pnbd}, \link{bgnbd}, \link{ggomnbd}. \code{\link[CLVTools:predict.clv.fitted.transactions]{predict}} for transaction models + +\code{\link[CLVTools:newcustomer]{newdata.spending}} to create data to predict for customers without order history } diff --git a/man/predict.clv.fitted.transactions.Rd b/man/predict.clv.fitted.transactions.Rd index d7fcf870..0ee14796 100644 --- a/man/predict.clv.fitted.transactions.Rd +++ b/man/predict.clv.fitted.transactions.Rd @@ -97,8 +97,9 @@ In this case, the prediction additionally contains the following two columns: Uncertainty estimates are available for all predicted quantities using bootstrapping. \subsection{New customer prediction}{ -The fitted model can also be used to predict the number of transactions a single, average +The fitted model can also be used to predict the number of transactions a fictional, single, average newly alive customer is expected to make at the moment of the first transaction ("coming alive"). +This is, for a customer which has no existing order history. For covariate models, the prediction is for an average customer with the given covariates. The individual-level unconditional expectation that is also used for the @@ -218,9 +219,8 @@ predict(pnc) # But it works if providing a prediction.end predict(pnc, prediction.end = 10) # ends on 2016-12-17 - -# Predict num transactions for a newly alive customer -# in the next 3.45 weeks +# Predict the number of transactions a single, fictional, average new +# customer is expected to make in the first 3.45 weeks since coming alive # See ?newcustomer() for more examples predict(apparel.pnbd, newdata = newcustomer(num.periods=3.45)) diff --git a/tests/testthat/test_inputchecks_newcustomer.R b/tests/testthat/test_inputchecks_newcustomer.R index 73918bc7..d36159db 100644 --- a/tests/testthat/test_inputchecks_newcustomer.R +++ b/tests/testthat/test_inputchecks_newcustomer.R @@ -4,6 +4,8 @@ p.cdnow <- fit.cdnow() p.apparel.static <- fit.apparel.static() p.apparel.dyn <- fit.apparel.dyncov.quick() +gg.cdnow <- fit.cdnow(model=gg) + default.dyn.cov <- function(...){ l.args <- list(...) @@ -154,25 +156,43 @@ test_that("newcustomer fits the type of fitted model", { nc.nocov <- newcustomer(num.periods = 1.23) nc.static <- default.nc.static() nc.dyn <- default.nc.dyn() + nc.spending <- newcustomer.spending() # nocov expect_error(predict(p.cdnow, newdata=nc.static), regexp = "output from") expect_error(predict(p.cdnow, newdata=nc.dyn), regexp = "output from") + expect_error(predict(p.cdnow, newdata=nc.spending), regexp = "output from") + # static cov expect_error(predict(p.apparel.static, newdata=nc.nocov), regexp = "output from") expect_error(predict(p.apparel.static, newdata=nc.dyn), regexp = "output from") + expect_error(predict(p.apparel.static, newdata=nc.spending), regexp = "output from") # dyncov expect_error(predict(p.apparel.dyn, newdata=nc.nocov), regexp = "output from") expect_error(predict(p.apparel.dyn, newdata=nc.static), regexp = "output from") + expect_error(predict(p.apparel.dyn, newdata=nc.spending), regexp = "output from") + + # spending mode + expect_error(predict(gg.cdnow, newdata=nc.nocov), regexp = "output of") + expect_error(predict(gg.cdnow, newdata=nc.static), regexp = "output of") + expect_error(predict(gg.cdnow, newdata=nc.dyn), regexp = "output of") }) -test_that("predict(): Error if other parameters are passed", { - expect_error(predict(p.cdnow, newdata=newcustomer(12), prediction.end=12), regexp = "may not be specified") - expect_error(predict(p.cdnow, newdata=newcustomer(12), continuous.discount.factor=0.1), regexp = "may not be specified") - expect_error(predict(p.cdnow, newdata=newcustomer(12), predict.spending=TRUE), regexp = "may not be specified") +test_that("predict(): Error if other parameters are passed (spending & transactions)", { + + # transactions + expect_error(predict(p.cdnow, newdata=newcustomer(12), prediction.end=12), regexp = "No other parameters") + expect_error(predict(p.cdnow, newdata=newcustomer(12), continuous.discount.factor=0.1), regexp = "No other parameters") + expect_error(predict(p.cdnow, newdata=newcustomer(12), predict.spending=TRUE), regexp = "No other parameters") + + # spending + expect_error(predict(gg.cdnow, newdata=newcustomer.spending(), uncertainty="none"), regexp = "No other parameters") + expect_error(predict(gg.cdnow, newdata=newcustomer.spending(), num.boots=12), regexp = "No other parameters") + expect_error(predict(gg.cdnow, newdata=newcustomer.spending(), level=0.8), regexp = "No other parameters") + }) test_that("predict vs newcustomer: dyn/static cov data names are not the same as parameters", { diff --git a/tests/testthat/test_runability_newcustomer.R b/tests/testthat/test_runability_newcustomer.R index a8fa289c..a030f51c 100644 --- a/tests/testthat/test_runability_newcustomer.R +++ b/tests/testthat/test_runability_newcustomer.R @@ -29,6 +29,9 @@ p.apparel.dyn <- fct.helper.dyncov.quickfit.apparel.data( names.cov.trans = c("High.Season", "Gender", "Channel") ) +gg.apparel.remove.first <- fit.apparel.nocov(model=gg, remove.first.transaction=TRUE) +gg.apparel.notremove.first <- fit.apparel.nocov(model=gg, remove.first.transaction=FALSE) + # Tests no cov models ------------------------------------------------------------------ @@ -232,3 +235,10 @@ test_that("Works with Cov.Date & first.transaction of type Date, character, POSI }) + +# Tests spending models --------------------------------------------------------- + +test_that("Works for spending models", { + fct.expect.silent.predict.newcustomer(gg.apparel.remove.first, newcustomer.spending()) + fct.expect.silent.predict.newcustomer(gg.apparel.notremove.first, newcustomer.spending()) +}) From b4e1ce81182ee0c43bde9a26a672c4a09f2370be Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Tue, 26 Nov 2024 22:52:27 +0100 Subject: [PATCH 4/7] PNBD Dyncov: Improved optimx defaults (higher `itnmax`) (#279) Significantly increase the default number of optimizer iterations for dyncov models (`itnmax`) from 3000 to 50000. The previous iteration limit is rather low for NelderMead and in practice was often reached. This likely mislead users to believe that the optimization converged when in reality it only stopped because the max num iterations had been reached. --- R/class_clv_model_pnbd_dynamiccov.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/class_clv_model_pnbd_dynamiccov.R b/R/class_clv_model_pnbd_dynamiccov.R index 2d669d83..0f4deb8f 100644 --- a/R/class_clv_model_pnbd_dynamiccov.R +++ b/R/class_clv_model_pnbd_dynamiccov.R @@ -18,7 +18,7 @@ clv.model.pnbd.dynamic.cov <- function(){ name.model = "Pareto/NBD with Dynamic Covariates", # Overwrite optimx default args optimx.defaults = list(method = "Nelder-Mead", - itnmax = 3000, + itnmax = 50000, control = list( kkt = TRUE, save.failures = TRUE, From a327faf0c59203a5722e026aa9017b4f8125f1cb Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Tue, 26 Nov 2024 22:57:09 +0100 Subject: [PATCH 5/7] Bootstrapping: More tests (#281) * Inputchecks: Reject bootstrapping params during newcustomer prediction * predict(boots) runs when other params are given --- tests/testthat/test_inputchecks_newcustomer.R | 25 +++++---- .../testthat/test_runability_bootstrapping.R | 56 ++++++++++++++++++- 2 files changed, 68 insertions(+), 13 deletions(-) diff --git a/tests/testthat/test_inputchecks_newcustomer.R b/tests/testthat/test_inputchecks_newcustomer.R index d36159db..f3a733b4 100644 --- a/tests/testthat/test_inputchecks_newcustomer.R +++ b/tests/testthat/test_inputchecks_newcustomer.R @@ -183,16 +183,21 @@ test_that("newcustomer fits the type of fitted model", { test_that("predict(): Error if other parameters are passed (spending & transactions)", { - # transactions - expect_error(predict(p.cdnow, newdata=newcustomer(12), prediction.end=12), regexp = "No other parameters") - expect_error(predict(p.cdnow, newdata=newcustomer(12), continuous.discount.factor=0.1), regexp = "No other parameters") - expect_error(predict(p.cdnow, newdata=newcustomer(12), predict.spending=TRUE), regexp = "No other parameters") - - # spending - expect_error(predict(gg.cdnow, newdata=newcustomer.spending(), uncertainty="none"), regexp = "No other parameters") - expect_error(predict(gg.cdnow, newdata=newcustomer.spending(), num.boots=12), regexp = "No other parameters") - expect_error(predict(gg.cdnow, newdata=newcustomer.spending(), level=0.8), regexp = "No other parameters") - + for(m in list(p.cdnow, gg.cdnow)){ + if(is(m, "clv.pnbd")){ + nc <- newcustomer(12) + + expect_error(predict(m, newdata=nc, prediction.end=12), regexp = "No other parameters") + expect_error(predict(m, newdata=nc, continuous.discount.factor=0.1), regexp = "No other parameters") + expect_error(predict(m, newdata=nc, predict.spending=TRUE), regexp = "No other parameters") + }else{ + nc <- newcustomer.spending() + } + + expect_error(predict(m, newdata=nc, uncertainty="boots"), regexp = "No other parameters") + expect_error(predict(m, newdata=nc, num.boots=12), regexp = "No other parameters") + expect_error(predict(m, newdata=nc, level=0.8), regexp = "No other parameters") + } }) test_that("predict vs newcustomer: dyn/static cov data names are not the same as parameters", { diff --git a/tests/testthat/test_runability_bootstrapping.R b/tests/testthat/test_runability_bootstrapping.R index 481fa049..0644498c 100644 --- a/tests/testthat/test_runability_bootstrapping.R +++ b/tests/testthat/test_runability_bootstrapping.R @@ -262,14 +262,14 @@ for(clv.fitted in list( } -# predict(boots) works on all model specifications ----------------------------- -# This also includes testing clv.bootstrapped.apply because it is used under the hood +# predict(uncertainty=boots) works on all model specifications ----------------------------- +# This also includes testing `clv.bootstrapped.apply` because it is used under the hood # - fit with correlation # - constrained params # - regularization # - combinations -test_that("predict(boots) works on all model specifications", { +test_that("predict(uncertainty=boots) works on all model specifications", { fn.predict.boots <- function(clv.fitted){ expect_warning(predict(clv.fitted, uncertainty='boots', num.boots=2, predict.spending=TRUE, verbose=FALSE), regexp = 'recommended to run') } @@ -307,3 +307,53 @@ test_that("predict(boots) works on all model specifications", { }) + + +# predict(uncertainty=boots) works with various inputs ------------------------------------ + +test_that("predict(uncertainty=boots) works with predict.spending, newdata, prediction.end", { + + p.cdnow <- fit.cdnow(optimx.args = optimx.args.NM) + + fn.predict.boots <- function(predict.spending=TRUE, newdata=NULL, prediction.end=NULL){ + expect_warning(dt.pred <- predict( + p.cdnow, + verbose=FALSE, + uncertainty='boots', + num.boots=2, + newdata=newdata, + prediction.end=prediction.end, + predict.spending=predict.spending + ), regexp = "recommended to run") + return(dt.pred) + } + + # predict.spending + fn.predict.boots(predict.spending = TRUE) + fn.predict.boots(predict.spending = FALSE) + fn.predict.boots(predict.spending = gg) + fn.predict.boots(predict.spending = fit.cdnow(model = gg)) + + # newdata + clv.apparel.nocov <- fct.helper.create.clvdata.apparel.nocov() + dt.pred <- fn.predict.boots(newdata=clv.apparel.nocov) + # really did predict for the apparel dataset and not the cdnow + expect_true(dt.pred[, .N] == nobs(clv.apparel.nocov)) + + # prediction.end + clv.cdnow.noholdout <- fct.helper.create.clvdata.cdnow(estimation.split = NULL) + + # with holdout, no prediction.end is required + fn.predict.boots(prediction.end=NULL) + # with holdout, can also with prediction.end + fn.predict.boots(prediction.end=10) + + # without holdout, prediction.end is required + expect_error( + predict(p.cdnow, uncertainty='boots', newdata=clv.cdnow.noholdout), + regexp = "Cannot predict without prediction.end" + ) + # without holdout, works if prediction.end is given + fn.predict.boots(newdata=clv.cdnow.noholdout, prediction.end=10) + +}) From 93bba567fcc78c4e2aefffc519ccf27bb12ed1f2 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Tue, 26 Nov 2024 22:59:40 +0100 Subject: [PATCH 6/7] Uncertainty estimates: Move common docu to template (#280) --- R/f_interface_predict_clvfittedspending.R | 4 ++++ R/f_interface_predict_clvfittedtransactions.R | 19 +-------------- man-roxygen/template_predict_uncertainty.R | 24 +++++++++++++++++++ man/predict.clv.fitted.spending.Rd | 21 ++++++++++++++++ 4 files changed, 50 insertions(+), 18 deletions(-) create mode 100644 man-roxygen/template_predict_uncertainty.R diff --git a/R/f_interface_predict_clvfittedspending.R b/R/f_interface_predict_clvfittedspending.R index bfe7f871..662af8f6 100644 --- a/R/f_interface_predict_clvfittedspending.R +++ b/R/f_interface_predict_clvfittedspending.R @@ -27,6 +27,10 @@ #' #' To predict for new customers, the output of \link[CLVTools:newcustomer]{newcustomer.spending} has to be given to \code{newdata}. See examples. #' +#' +#' @template template_predict_uncertainty +#' +#' #' @seealso models to predict spending: \link{gg}. #' @seealso models to predict transactions: \link{pnbd}, \link{bgnbd}, \link{ggomnbd}. #' @seealso \code{\link[CLVTools:predict.clv.fitted.transactions]{predict}} for transaction models diff --git a/R/f_interface_predict_clvfittedtransactions.R b/R/f_interface_predict_clvfittedtransactions.R index 037ab2f4..e8586259 100644 --- a/R/f_interface_predict_clvfittedtransactions.R +++ b/R/f_interface_predict_clvfittedtransactions.R @@ -76,25 +76,8 @@ #' To account for time units which are not annual, the continuous rate has to be further adjusted #' to delta=ln(1+d)/k, where k are the number of time units in a year. #' -#' @section Uncertainty Estimates: -#' Bootstrapping is used to provide confidence intervals of all predicted metrics. -#' These provide an estimate of parameter uncertainty. -#' To create bootstrapped data, customer ids are sampled with replacement until reaching original -#' length and all transactions of the sampled customers are used to create a new \code{clv.data} object. -#' A new model is fit on the bootstrapped data with the exact same specification as used when -#' fitting \code{object} (incl. start parameters and `optimx.args`) and it is then used to predict on this data. -#' -#' It is highly recommended to fit the original model (\code{object}) with a robust optimization -#' method, such as Nelder-Mead (\code{optimx.args=list(method='Nelder-Mead')}). -#' This ensures that the model can also be fit on the bootstrapped data. -#' -#' All prediction parameters, incl \code{prediction.end} and \code{continuous.discount.factor}, are forwarded -#' to the prediction on the bootstrapped data. -#' Per customer, the boundaries of the confidence intervals of each predicted metric are the -#' sample quantiles (\code{quantile(x, probs=c((1-level)/2, 1-(1-level)/2)}). -#' -#' See \link{clv.bootstrapped.apply} to create a custom bootstrapping procedure. #' +#' @template template_predict_uncertainty #' #' #' @seealso models to predict transactions: \link{pnbd}, \link{bgnbd}, \link{ggomnbd}. diff --git a/man-roxygen/template_predict_uncertainty.R b/man-roxygen/template_predict_uncertainty.R new file mode 100644 index 00000000..3e16ff92 --- /dev/null +++ b/man-roxygen/template_predict_uncertainty.R @@ -0,0 +1,24 @@ +#' +#' +#' +#' @section Uncertainty Estimates: +#' Bootstrapping is used to provide confidence intervals of all predicted metrics. +#' These provide an estimate of parameter uncertainty. +#' To create bootstrapped data, customer ids are sampled with replacement until reaching original +#' length and all transactions of the sampled customers are used to create a new \code{clv.data} object. +#' A new model is fit on the bootstrapped data with the exact same specification as used when +#' fitting \code{object} (incl. start parameters and `optimx.args`) and it is then used to predict on this data. +#' +#' It is highly recommended to fit the original model (\code{object}) with a robust optimization +#' method, such as Nelder-Mead (\code{optimx.args=list(method='Nelder-Mead')}). +#' This ensures that the model can also be fit on the bootstrapped data. +#' +#' All prediction parameters, incl \code{prediction.end} and \code{continuous.discount.factor}, are forwarded +#' to the prediction on the bootstrapped data. +#' Per customer, the boundaries of the confidence intervals of each predicted metric are the +#' sample quantiles (\code{quantile(x, probs=c((1-level)/2, 1-(1-level)/2)}). +#' +#' See \link{clv.bootstrapped.apply} to create a custom bootstrapping procedure. +#' +#' +#' diff --git a/man/predict.clv.fitted.spending.Rd b/man/predict.clv.fitted.spending.Rd index 49325d35..0e7bf4ce 100644 --- a/man/predict.clv.fitted.spending.Rd +++ b/man/predict.clv.fitted.spending.Rd @@ -69,6 +69,27 @@ this will be applied to \code{newdata} as well. To predict for new customers, the output of \link[CLVTools:newcustomer]{newcustomer.spending} has to be given to \code{newdata}. See examples. } +\section{Uncertainty Estimates}{ + +Bootstrapping is used to provide confidence intervals of all predicted metrics. +These provide an estimate of parameter uncertainty. +To create bootstrapped data, customer ids are sampled with replacement until reaching original +length and all transactions of the sampled customers are used to create a new \code{clv.data} object. +A new model is fit on the bootstrapped data with the exact same specification as used when +fitting \code{object} (incl. start parameters and `optimx.args`) and it is then used to predict on this data. + +It is highly recommended to fit the original model (\code{object}) with a robust optimization +method, such as Nelder-Mead (\code{optimx.args=list(method='Nelder-Mead')}). +This ensures that the model can also be fit on the bootstrapped data. + +All prediction parameters, incl \code{prediction.end} and \code{continuous.discount.factor}, are forwarded +to the prediction on the bootstrapped data. +Per customer, the boundaries of the confidence intervals of each predicted metric are the +sample quantiles (\code{quantile(x, probs=c((1-level)/2, 1-(1-level)/2)}). + +See \link{clv.bootstrapped.apply} to create a custom bootstrapping procedure. +} + \examples{ \donttest{ data("apparelTrans") From ca0ec5f507dea7a2e2a9ed8dbaeb7146c2237106 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Sun, 24 Nov 2024 21:38:05 +0100 Subject: [PATCH 7/7] Prepare release --- DESCRIPTION | 2 +- NEWS.md | 8 ++++++++ cran-comments.md | 8 ++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b144668a..9ce8f283 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: CLVTools Title: Tools for Customer Lifetime Value Estimation Version: 0.11.2 -Date: 2024-10-13 +Date: 2024-12-01 Authors@R: c( person(given="Patrick", family="Bachmann", email = "pbachma@ethz.ch", role = c("cre","aut")), person(given="Niels", family="Kuebler", email = "niels.kuebler@uzh.ch", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 9bda7369..4f59b64c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# CLVTools 0.11.2 + +### NEW FEATURES +* `newcustomer.spending()`: Predict average spending per transaction for customers without order history +* Improved optimizer defaults (higher iteration count) for PNBD dyncov + + + # CLVTools 0.11.1 ### NEW FEATURES diff --git a/cran-comments.md b/cran-comments.md index 06e9ae71..a0598fc3 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,12 +1,12 @@ # Comment from the authors -This is version 0.11.1 of the CLVTools package. +This is version 0.11.2 of the CLVTools package. The most relevant changes in this version are: -* Updated the example data -* Bootstrapping: Calculate confidence intervals using regular rather than "reversed-quantiles" - +* `newcustomer.spending()`: Predict average spending per transaction for customers without order history +* Improved optimizer defaults + # Test environments ## Testthat