Skip to content

Commit

Permalink
add mix methods;
Browse files Browse the repository at this point in the history
fix lm & cubist problem refitting + oosf keeping 'date' in recipe as time trend and removing index.num
  • Loading branch information
Marco Zanotti committed Jan 12, 2024
1 parent 4f6ffc9 commit 5cf7ada
Show file tree
Hide file tree
Showing 4 changed files with 342 additions and 123 deletions.
15 changes: 14 additions & 1 deletion R/tsfor_lecture2_feateng.R
Original file line number Diff line number Diff line change
Expand Up @@ -455,16 +455,29 @@ wrkfl_fit_lm_2_lag |>
pluck("fit") |>
summary()

# LM Spline Workflow
wrkfl_fit_lm_3 <- workflow() |>
add_model(model_spec_lm) |>
add_recipe(rcp_spec) |>
fit(training(splits))
wrkfl_fit_lm_3
wrkfl_fit_lm_3 |>
extract_fit_parsnip() |>
pluck("fit") |>
summary()


# * Modeltime -------------------------------------------------------------

# Calibration
calibration_tbl <- modeltime_table(
wrkfl_fit_lm_1_spline,
wrkfl_fit_lm_2_lag
wrkfl_fit_lm_2_lag,
wrkfl_fit_lm_3
) |>
update_model_description(1, "LM - Spline Recipe") |>
update_model_description(2, "LM - Lag Recipe") |>
update_model_description(3, "LM - Base Recipe") |>
modeltime_calibrate(new_data = testing(splits))
calibration_tbl

Expand Down
154 changes: 127 additions & 27 deletions dashboard/R/fit_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,17 @@ get_default <- function(parameter, return_value = TRUE) {
"neighbors" = 5, # KNN
"boundary" = "linear", "cost" = 1, "margin" = 0.1, # SVM
"rf_mtry" = 5, "rf_trees" = 500, "rf_min_n" = 5, # Random Forest
"boost_mtry" = 5, "boost_trees" = 100, "boost_min_n" = 1, "boost_tree_depth" = 6, # Boosted Trees
"boost_method" = "XGBoost", # Boosted Trees
"boost_mtry" = 5, "boost_trees" = 100, "boost_min_n" = 1, "boost_tree_depth" = 6,
"boost_learn_rate" = 0.3, "boost_loss_reduction" = 0, "boost_sample_size" = 1,
"committees" = 1, "cub_neighbors" = 0, "max_rules" = 20 # Cubist
"committees" = 1, "cub_neighbors" = 0, "max_rules" = 20, # Cubist
"ff_hidden_units" = 10, "ff_penalty" = 0, "ff_epochs" = 100, "ff_dropout" = 0.1, "ff_learn_rate" = 0.3, # Feed-Forward
"ffar_non_seasonal_ar" = 1, "ffar_seasonal_ar" = 0, # Feed-Forward AR
"ffar_hidden_units" = 10, "ffar_penalty" = 0, "ffar_epochs" = 100, "ffar_num_networks" = 20,
"arima_boost_mtry" = 5, "arima_boost_trees" = 100, "arima_boost_min_n" = 1, "arima_boost_tree_depth" = 6, # ARIMA-Boost
"arima_boost_learn_rate" = 0.3, "arima_boost_loss_reduction" = 0, "arima_boost_sample_size" = 1,
"prophet_boost_mtry" = 5, "prophet_boost_trees" = 100, "prophet_boost_min_n" = 1, "prophet_boost_tree_depth" = 6, # Prophet-Boost
"prophet_boost_learn_rate" = 0.3, "prophet_boost_loss_reduction" = 0, "prophet_boost_sample_size" = 1
)

if (return_value) {
Expand All @@ -33,6 +41,41 @@ get_default <- function(parameter, return_value = TRUE) {

}

#function to generate the recipe specification
generate_recipe_spec <- function(data, method) {

method_type <- parse_method(method)

if (method_type == "ts") {

rcp_spec <- recipe(value ~ ., data = data)

} else if (method_type == "ml" | method_type == "dl") {

rcp_spec <- recipe(value ~ ., data = data) |>
step_timeseries_signature(date) |>
step_mutate(date = as.numeric(date)) |>
step_zv(all_predictors()) |>
step_rm(matches("(iso)|(xts)|(index.num)")) |>
step_dummy(all_nominal(), one_hot = TRUE)

} else if (method_type == "mix") {

rcp_spec <- recipe(value ~ ., data = data) |>
step_timeseries_signature(date) |>
step_normalize(date_index.num) |>
step_zv(all_predictors()) |>
step_rm(matches("(iso)|(xts)")) |>
step_dummy(all_nominal(), one_hot = TRUE)

} else {
stop(paste("Unknown method type", method_type))
}

return(rcp_spec)

}

# function to generate the model specification
generate_model_spec <- function(method, params) {

Expand Down Expand Up @@ -220,17 +263,33 @@ generate_model_spec <- function(method, params) {

} else if (method == "Boosted Trees") {

model_spec <- boost_tree(
mode = "regression",
mtry = params$boost_mtry,
trees = params$boost_trees,
min_n = params$boost_min_n,
tree_depth = params$boost_tree_depth,
learn_rate = params$boost_learn_rate,
loss_reduction = params$boost_loss_reduction,
sample_size = params$boost_sample_size
) |>
set_engine("xgboost")
if (params$boost_method == "XGBoost") {
model_spec <- boost_tree(
mode = "regression",
mtry = params$boost_mtry,
trees = params$boost_trees,
min_n = params$boost_min_n,
tree_depth = params$boost_tree_depth,
learn_rate = params$boost_learn_rate,
loss_reduction = params$boost_loss_reduction,
sample_size = params$boost_sample_size
) |>
set_engine("xgboost")
} else if (params$boost_method == "LightGBM") {
model_spec <- boost_tree(
mode = "regression",
mtry = params$boost_mtry,
trees = params$boost_trees,
min_n = params$boost_min_n,
tree_depth = params$boost_tree_depth,
learn_rate = params$boost_learn_rate,
loss_reduction = params$boost_loss_reduction,
sample_size = params$boost_sample_size
) |>
set_engine("lightgbm")
} else {
stop(paste("Unknown Boosting method", params$boost_method))
}

} else if (method == "Cubist") {

Expand All @@ -241,6 +300,59 @@ generate_model_spec <- function(method, params) {
) |>
set_engine("Cubist")

} else if (method == "Feed-Forward") {

model_spec <- mlp(
mode = "regression",
hidden_units = params$ff_hidden_units,
penalty = params$ff_penalty,
epochs = params$ff_epochs,
dropout = params$ff_dropout,
learn_rate = params$ff_learn_rate
) |>
set_engine("nnet")

} else if (method == "Feed-Forward AR") {

model_spec <- nnetar_reg(
mode = "regression",
non_seasonal_ar = params$ffar_non_seasonal_ar,
seasonal_ar = params$ffar_seasonal_ar,
hidden_units = params$ffar_hidden_units,
penalty = params$ffar_penalty,
epochs = params$ffar_epochs,
num_networks = params$ffar_num_networks
) |>
set_engine("nnetar")

} else if (method == "ARIMA-Boost") {

model_spec <- arima_boost(
mode = "regression",
mtry = params$arima_boost_mtry,
trees = params$arima_boost_trees,
min_n = params$arima_boost_min_n,
tree_depth = params$arima_boost_tree_depth,
learn_rate = params$arima_boost_learn_rate,
loss_reduction = params$arima_boost_loss_reduction,
sample_size = params$arima_boost_sample_size
) |>
set_engine("auto_arima_xgboost")

} else if (method == "Prophet-Boost") {

model_spec <- prophet_boost(
mode = "regression",
mtry = params$prophet_boost_mtry,
trees = params$prophet_boost_trees,
min_n = params$prophet_boost_min_n,
tree_depth = params$prophet_boost_tree_depth,
learn_rate = params$prophet_boost_learn_rate,
loss_reduction = params$prophet_boost_loss_reduction,
sample_size = params$prophet_boost_sample_size
) |>
set_engine("prophet_xgboost")

} else {
stop(paste("Unknown method", method))
}
Expand All @@ -258,9 +370,7 @@ generate_model_spec <- function(method, params) {
fit_model <- function(data, method, params, n_assess, assess_type, seed = 1992) {

check_parameters(method, params)

set.seed(seed)
method_type <- parse_method(method)

splits <- timetk::time_series_split(
data, date_var = date,
Expand All @@ -270,18 +380,8 @@ fit_model <- function(data, method, params, n_assess, assess_type, seed = 1992)
)
train_tbl <- training(splits) |> select(-id, -frequency)

if (method_type == "ts") {
rcp_spec <- recipe(value ~ ., data = train_tbl)
} else if (method_type == "ml") {
rcp_spec <- recipe(value ~ ., data = train_tbl) |>
step_timeseries_signature(date) |>
step_normalize(date_index.num) |>
step_zv(all_predictors()) |>
step_rm(matches("(iso)|(xts)|(lbl)")) |>
step_rm(date)
} else {
stop(paste("Unknown method type", method_type))
}
# recipe specification
rcp_spec <- generate_recipe_spec(train_tbl, method)

# model specification
model_spec <- generate_model_spec(method, params)
Expand Down
29 changes: 25 additions & 4 deletions dashboard/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ set_options <- function() {
tsf.dashboard.methods = list(
"ts" = c("Naive", "Seasonal Naive", "Rolling Average", "ETS", "Theta", "SARIMA", "TBATS", "STLM", "Prophet"),
"ml" = c("Linear Regression", "Elastic Net", "MARS", "KNN", "SVM", "Random Forest", "Boosted Trees", "Cubist"),
"dl" = c("MLP", "NNETAR"),
"dl" = c("Feed-Forward", "COMING SOON!"),
"mix" = c("Feed-Forward AR", "ARIMA-Boost", "Prophet-Boost"),
"ens" = c("Average", "Weighted Average", "Median", "Linear Regression")
),
tsf.dashboard.methods_params = list(
Expand Down Expand Up @@ -38,10 +39,26 @@ set_options <- function() {
"SVM" = c("boundary", "cost", "margin"),
"Random Forest" = c("rf_mtry", "rf_trees", "rf_min_n"),
"Boosted Trees" = c(
"boost_method",
"boost_mtry", "boost_trees", "boost_min_n", "boost_tree_depth",
"boost_learn_rate", "boost_loss_reduction", "boost_sample_size"
),
"Cubist" = c("committees", "cub_neighbors", "max_rules")
"Cubist" = c("committees", "cub_neighbors", "max_rules"),
"Feed-Forward" = c("ff_hidden_units", "ff_penalty", "ff_epochs", "ff_dropout", "ff_learn_rate"),
"Feed-Forward AR" = c(
"ffar_non_seasonal_ar", "ffar_seasonal_ar",
"ffar_hidden_units", "ffar_penalty", "ffar_epochs", "ffar_num_networks"
),
"ARIMA-Boost" = c(
"arima_boost_mtry", "arima_boost_trees", "arima_boost_min_n",
"arima_boost_tree_depth", "arima_boost_learn_rate", "arima_boost_loss_reduction",
"arima_boost_sample_size"
),
"Prophet-Boost" = c(
"prophet_boost_mtry", "prophet_boost_trees", "prophet_boost_min_n",
"prophet_boost_tree_depth", "prophet_boost_learn_rate", "prophet_boost_loss_reduction",
"prophet_boost_sample_size"
)
),
tsf.dashboard.transfs = c("log", "boxcox", "norm", "stand", "diff", "sdiff"),
tsf.dashboard.test_transfs = c("test_log", "test_diff", "test_sdiff")
Expand Down Expand Up @@ -87,15 +104,19 @@ parse_frequency <- function(frequency) {
parse_method <- function(method) {

mtd <- getOption("tsf.dashboard.methods")

if (method %in% mtd$ts) {
res <- "ts"
} else if (method %in% mtd$ml) {
res <- "ml"
} else if (method %in% mtd$dl) {
res <- "dl"
} else if (method %in% mtd$mix) {
res <- "mix"
} else if (method %in% mtd$ens) {
res <- "ens"
} else {
stop(paste("Unknown method", method))
}

return(res)

}
Expand Down
Loading

0 comments on commit 5cf7ada

Please sign in to comment.