Skip to content

Commit

Permalink
up tuning
Browse files Browse the repository at this point in the history
  • Loading branch information
Marco Zanotti committed Jan 18, 2024
1 parent 9b60055 commit dd11e38
Show file tree
Hide file tree
Showing 6 changed files with 397 additions and 237 deletions.
86 changes: 72 additions & 14 deletions dashboard/R/fit_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ generate_recipe_spec <- function(data, method) {

rcp_spec <- recipe(value ~ ., data = data)

} else if (method_type == "ml" | method_type == "dl") {
} else if (any(method_type %in% c("ml", "dl"))) {

rcp_spec <- recipe(value ~ ., data = data) |>
step_timeseries_signature(date) |>
Expand All @@ -51,13 +51,13 @@ generate_recipe_spec <- function(data, method) {
step_rm(matches("(iso)|(xts)|(index.num)")) |>
step_dummy(all_nominal(), one_hot = TRUE)

} else if (method_type == "mix") {
} else if (any(method_type %in% c("mix", "aml"))) {

rcp_spec <- recipe(value ~ ., data = data) |>
step_timeseries_signature(date) |>
step_normalize(date_index.num) |>
step_mutate(trend = as.numeric(date)) |>
step_zv(all_predictors()) |>
step_rm(matches("(iso)|(xts)")) |>
step_rm(matches("(iso)|(xts)|(index.num)")) |>
step_dummy(all_nominal(), one_hot = TRUE)

} else {
Expand Down Expand Up @@ -346,6 +346,23 @@ generate_model_spec <- function(method, params) {
) |>
set_engine("prophet_xgboost")

} else if (method == "H2O AutoML") {

model_spec <- automl_reg(mode = "regression") |>
set_engine(
engine = "h2o",
project_name = "h2o_tsf_dashboard",
max_models = 50,
max_runtime_secs = !!params$h2o_max_time,
max_runtime_secs_per_model = !!params$h2o_max_time_model,
nfolds = !!params$h2o_nfolds,
sort_metric = !!params$h2o_metric,
seed = 1992
# include_algos = c("DRF"),
# exclude_algos = c("DeepLearning"),
# verbosity = NULL
)

} else {
stop(paste("Unknown method", method))
}
Expand All @@ -354,6 +371,29 @@ generate_model_spec <- function(method, params) {

}

# function to set the metric set
set_metric_set <- function(metric) {

metric <- tolower(metric)
if (metric == "mae") {
mtr_set <- yardstick::metric_set(mae)
} else if (metric == "mape") {
mtr_set <- yardstick::metric_set(mape)
} else if (metric == "mase") {
mtr_set <- yardstick::metric_set(mase)
} else if (metric == "smape") {
mtr_set <- yardstick::metric_set(smape)
} else if (metric == "mse") {
mtr_set <- yardstick::metric_set(mse)
} else if (metric == "rmse") {
mtr_set <- yardstick::metric_set(rmse)
} else {
stop(paste("Unknown metric", metric))
}
return(mtr_set)

}

# function to generate the model specification for tuning
set_tune_parameters <- function(method, params) {

Expand All @@ -366,7 +406,6 @@ set_tune_parameters <- function(method, params) {
}
}

mtd_params <- getOption("tsf.dashboard.methods_params")[[method]] # get the parameters for the method
if (method == "Elastic Net") {
prm_ui_name <- params$tune_elanet
} else if (method == "MARS") {
Expand Down Expand Up @@ -396,6 +435,8 @@ set_tune_parameters <- function(method, params) {
} else {
stop(paste("Unknown method", method))
}

mtd_params <- getOption("tsf.dashboard.methods_params")[[method]] # get the parameters for the method
tune_params <- mtd_params[names(mtd_params) %in% prm_ui_name] # get the parameters to tune
is_to_tune <- mtd_params %in% tune_params
new_params <- purrr::map2(mtd_params, is_to_tune, set_tune) |> purrr::set_names(mtd_params)
Expand Down Expand Up @@ -456,6 +497,7 @@ fit_model <- function(data, method, params, n_assess, assess_type, seed = 1992)
wkfl_spec <- workflow() |> add_recipe(rcp_spec) |> add_model(model_spec)

# fitting
if (method == "H2O AutoML") { h2o.init() }
wkfl_fit <- wkfl_spec |> fit(data = train_tbl)

return(wkfl_fit)
Expand Down Expand Up @@ -526,11 +568,13 @@ fit_model_tuning <- function(
data, method, params, n_assess, assess_type,
validation_type = "Time Series CV",
n_folds = 5, validation_metric = "rmse", grid_size = 10,
seed = 1992
bayesian_optimization = TRUE, seed = 1992
) {

params_new <- set_tune_parameters(method, params)
check_parameters(method, params_new)
validation_metric <- tolower(validation_metric)
valid_metric_set <- set_metric_set(validation_metric)
set.seed(seed)

# initial split
Expand All @@ -557,17 +601,31 @@ fit_model_tuning <- function(
# tuning
doFuture::registerDoFuture()
future::plan(strategy = "multisession", workers = parallelly::availableCores() - 1)
tune_fit <- wkfl_spec |>
tune::tune_grid(
resamples = cv_splits,
grid = params$tune_grid_size, # grid_spec
metrics = modeltime::default_forecast_accuracy_metric_set(),
control = tune::control_grid(save_pred = FALSE, allow_par = TRUE)
)
if (bayesian_optimization) {
tune_fit <- wkfl_spec |>
tune::tune_bayes(
resamples = cv_splits,
metrics = valid_metric_set,
initial = as.integer(params$tune_grid_size),
objective = tune::conf_bound(kappa = 0.1),
iter = 20L, # as.integer(length(params_new) * 20) good practice
control = tune::control_bayes(
save_pred = FALSE, allow_par = TRUE, verbose = TRUE, no_improve = 5L
)
)
} else {
tune_fit <- wkfl_spec |>
tune::tune_grid(
resamples = cv_splits,
metrics = valid_metric_set,
grid = as.integer(params$tune_grid_size), # grid_spec
control = tune::control_grid(save_pred = FALSE, allow_par = TRUE, verbose = TRUE)
)
}
future::plan(strategy = "sequential")

# picking best model
best_fit <- tune::show_best(tune_fit, metric = tolower(validation_metric), n = 1)
best_fit <- tune::show_best(tune_fit, metric = validation_metric, n = 1)

# fitting (fit to training with optimal values)
wkfl_fit <- wkfl_spec |> tune::finalize_workflow(best_fit) |> fit(train_tbl)
Expand Down
1 change: 1 addition & 0 deletions dashboard/R/generate_forecast.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ generate_forecast <- function(
conf_interval = 0.95, conf_method = "conformal_split"
)

if (method == "H2O AutoML") { h2o.shutdown(prompt = FALSE) }
res <- list(
"splits" = splits,
"fit" = fitted_model_list,
Expand Down
14 changes: 12 additions & 2 deletions dashboard/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ set_options <- function() {
"ml" = c("Linear Regression", "Elastic Net", "MARS", "KNN", "SVM", "Random Forest", "Boosted Trees", "Cubist"),
"dl" = c("Feed-Forward", "COMING SOON!"),
"mix" = c("Feed-Forward AR", "ARIMA-Boost", "Prophet-Boost"),
"aml" = c("H2O AutoML", "COMING SOON!"),
"ens" = c("Average", "Weighted Average", "Median"),
"stk" = c("Linear Regression", "Elastic Net"),
"tune" = c(
Expand Down Expand Up @@ -85,7 +86,9 @@ set_options <- function() {
) |> purrr::set_names(c(
"Random Predictors", "Trees", "Min Node Size", "Tree Depth",
"Learning Rate", "Min Loss Reduction", "Sample"
))
)),
"H2O AutoML" = c("h2o_max_time", "h2o_max_time_model", "h2o_nfolds", "h2o_metric") |>
purrr::set_names(c("Max Time (secs)", "Max Time per Model (secs)", "Folds", "Metric"))
),
tsf.dashboard.transfs = c("log", "boxcox", "norm", "stand", "diff", "sdiff"),
tsf.dashboard.test_transfs = c("test_log", "test_diff", "test_sdiff"),
Expand Down Expand Up @@ -140,8 +143,14 @@ parse_method <- function(method) {
res <- "dl"
} else if (method %in% mtd$mix) {
res <- "mix"
} else if (method %in% mtd$aml) {
res <- "aml"
} else if (method %in% mtd$ens) {
res <- "ens"
} else if (method %in% mtd$stk) {
res <- "stk"
} else if (method %in% mtd$tune) {
res <- "tune"
} else {
stop(paste("Unknown method", method))
}
Expand Down Expand Up @@ -198,7 +207,8 @@ get_default <- function(parameter, return_value = TRUE) {
"arima_boost_mtry" = 5, "arima_boost_trees" = 100, "arima_boost_min_n" = 1, "arima_boost_tree_depth" = 6, # ARIMA-Boost
"arima_boost_learn_rate" = 0.3, "arima_boost_loss_reduction" = 0, "arima_boost_sample_size" = 1,
"prophet_boost_mtry" = 5, "prophet_boost_trees" = 100, "prophet_boost_min_n" = 1, "prophet_boost_tree_depth" = 6, # Prophet-Boost
"prophet_boost_learn_rate" = 0.3, "prophet_boost_loss_reduction" = 0, "prophet_boost_sample_size" = 1
"prophet_boost_learn_rate" = 0.3, "prophet_boost_loss_reduction" = 0, "prophet_boost_sample_size" = 1,
"h2o_max_time" = 30, "h2o_max_time_model" = 15, "h2o_nfolds" = 5, "h2o_metric" = "RMSE"
)

if (return_value) {
Expand Down
40 changes: 39 additions & 1 deletion dashboard/test.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ input <- list(
n_folds = 5,
metric = "RMSE",
grid_size = 10,
tune_xx_elanet = c("Penalty", "Mixture")
tune_elanet = c("Penalty", "Mixture")
)
input <- list(
n_future = 12,
Expand All @@ -221,6 +221,7 @@ validation_type = input$valid_type
n_folds = input$n_folds
validation_metric = input$metric
grid_size = input$grid_size
seed = 1992

fitted_model_list <- map(
input$method,
Expand Down Expand Up @@ -262,3 +263,40 @@ res <- map(
assess_type = input$assess_type
)
res$accuracy |> format_accuracy(single_method = TRUE)


### GRID

model_spec <- rand_forest(
mode = "regression",
mtry = tune(),
trees = tune(),
min_n = tune()
) |>
set_engine("ranger")

model_spec <- boost_tree(
mode = "regression",
mtry = tune(),
trees = tune(),
min_n = tune(),
tree_depth = tune(),
learn_rate = tune(),
loss_reduction = tune(),
sample_size = tune()
) |>
set_engine("xgboost")

model_spec <- prophet_boost(
mode = "regression",
mtry = tune(),
trees = tune(),
min_n = tune(),
tree_depth = tune(),
learn_rate = tune(),
loss_reduction = tune(),
sample_size = tune()
) |>
set_engine("prophet_xgboost")


10 changes: 6 additions & 4 deletions dashboard/todo.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@ Next steps:
- deployment su github actions
- move to another github repo
- documentazione in alto a destra
- aggiungere package::function per ogni funzione

To Do:
- tune_bayes

- aggiungere metodi di automl (h2o)
- ancora problema con mtry con tune_bayes, risolvere creando la griglia
- check h2o in compare e combine
- pensare e aggiungere la sezione di stacking (LM + Elastic Net)
- pensare e aggiungere la sezione di scenario forecasting + uncertainty + judgmental (gauges?)
- pensare e aggiungere la sezione di scenario forecasting + uncertainty + judgmental (gauges?) + rolling variances
- pensare e aggiungere il save del modello ottimizzato + use optimize in altre sezioni
- aggiungere metodi di dl (NeuralProphet + NBEATS + DeepAR)
- pensare e aggiungere la sezione di feature engineering (con in mente il save)
- modificare output modello con parsing
- cambiare assegnazione nomi ai parametri in UI
- XAI
- aggiornamento cluster h2o
Loading

0 comments on commit dd11e38

Please sign in to comment.