Skip to content

Commit

Permalink
up h2o
Browse files Browse the repository at this point in the history
up grid generation
fix bayes tuning
up stacking ui
  • Loading branch information
Marco Zanotti committed Jan 19, 2024
1 parent dd11e38 commit 582b081
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 125 deletions.
46 changes: 28 additions & 18 deletions dashboard/R/fit_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -452,22 +452,24 @@ set_tune_parameters <- function(method, params) {

}

# function to generate the feature set
generate_feature_set <- function(recipe_spec) {
feature_set <- recipe_spec |>
recipes::prep() |>
recipes::bake(new_data = NULL) |>
dplyr::select(-date, -value)
return(feature_set)
}

# function to perform grid specification
generate_grid_spec <- function(method, model_spec, grid_size, seed = 1992) {
generate_grid_spec <- function(method, model_spec, recipe_spec, grid_size, seed = 1992) {

set.seed(seed)
if (method %in% c("Random Forest", "Boosted Trees")) {
grid_spec <- grid_latin_hypercube(
hardhat::extract_parameter_set_dials(model_spec) |>
recipes::update(mtry = mtry(range = c(1, 15))),
size = grid_size
)
} else {
grid_spec <- dials::grid_latin_hypercube(
hardhat::extract_parameter_set_dials(model_spec),
size = grid_size
)
}
feature_set <- generate_feature_set(recipe_spec)
updated_parameter_set <- model_spec |>
hardhat::extract_parameter_set_dials() |>
dials::finalize(x = feature_set)
grid_spec <- dials::grid_latin_hypercube(updated_parameter_set, size = grid_size)
return(grid_spec)

}
Expand Down Expand Up @@ -575,7 +577,6 @@ fit_model_tuning <- function(
check_parameters(method, params_new)
validation_metric <- tolower(validation_metric)
valid_metric_set <- set_metric_set(validation_metric)
set.seed(seed)

# initial split
splits <- generate_initial_split(data, n_assess, assess_type)
Expand All @@ -596,30 +597,39 @@ fit_model_tuning <- function(
wkfl_spec <- workflow() |> add_recipe(rcp_spec) |> add_model(model_spec)

# grid specification
# grid_spec <- generate_grid_spec(method, model_spec, grid_size, seed)
# grid_spec <- generate_grid_spec(method, model_spec, rcp_spec, grid_size, seed)

# tuning
doFuture::registerDoFuture()
future::plan(strategy = "multisession", workers = parallelly::availableCores() - 1)
if (bayesian_optimization) {
feat_set <- generate_feature_set(rcp_spec)
updated_param_set <- hardhat::extract_parameter_set_dials(model_spec) |>
dials::finalize(x = feat_set)
set.seed(seed)
tune_fit <- wkfl_spec |>
tune::tune_bayes(
resamples = cv_splits,
metrics = valid_metric_set,
initial = as.integer(params$tune_grid_size),
initial = as.integer(params$tune_grid_size), # tune_fit (result from tune_grid)
objective = tune::conf_bound(kappa = 0.1),
iter = 20L, # as.integer(length(params_new) * 20) good practice
param_info = updated_param_set,
control = tune::control_bayes(
save_pred = FALSE, allow_par = TRUE, verbose = TRUE, no_improve = 5L
)
)
} else {
set.seed(seed)
tune_fit <- wkfl_spec |>
tune::tune_grid(
preprocessor = rcp_spec,
resamples = cv_splits,
metrics = valid_metric_set,
grid = as.integer(params$tune_grid_size), # grid_spec
control = tune::control_grid(save_pred = FALSE, allow_par = TRUE, verbose = TRUE)
grid = as.integer(params$tune_grid_size), # as.integer(params$tune_grid_size)
control = tune::control_grid(
save_pred = FALSE, allow_par = TRUE, verbose = TRUE
)
)
}
future::plan(strategy = "sequential")
Expand Down
2 changes: 1 addition & 1 deletion dashboard/R/generate_forecast.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ generate_forecast <- function(
conf_interval = 0.95, conf_method = "conformal_split"
)

if (method == "H2O AutoML") { h2o.shutdown(prompt = FALSE) }
if (any(method %in% "H2O AutoML")) { h2o.shutdown(prompt = FALSE) }
res <- list(
"splits" = splits,
"fit" = fitted_model_list,
Expand Down
147 changes: 62 additions & 85 deletions dashboard/test.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,22 +68,26 @@ input <- list(
n_future = 12,
n_assess = 24,
assess_type = "Rolling",
method = c("ETS", "SARIMA"),
auto_ets = TRUE,
error = "auto",
trend = "auto",
season = "auto",
damping = "auto",
smooth_level = 0.1,
smooth_trend = 0.1,
smooth_season = 0.1,
auto_arima = TRUE,
non_seasonal_ar = 1,
non_seasonal_differences = 1,
non_seasonal_ma = 1,
seasonal_ar = 1,
seasonal_differences = 1,
seasonal_ma = 1
method = c("ETS", "SARIMA", "H2O AutoML"),
auto_ets = get_default("auto_ets"),
error = get_default("error"),
trend = get_default("trend"),
season = get_default("season"),
damping = get_default("damping"),
smooth_level = get_default("smooth_level"),
smooth_trend = get_default("smooth_trend"),
smooth_season = get_default("smooth_season"),
auto_arima = get_default("auto_arima"),
non_seasonal_ar = get_default("non_seasonal_ar"),
non_seasonal_differences = get_default("non_seasonal_differences"),
non_seasonal_ma = get_default("non_seasonal_ma"),
seasonal_ar = get_default("seasonal_ar"),
seasonal_differences = get_default("seasonal_differences"),
seasonal_ma = get_default("seasonal_ma"),
h2o_max_time = get_default("h2o_max_time"),
h2o_max_time_model = get_default("h2o_max_time_model"),
h2o_nfolds = get_default("h2o_nfolds"),
h2o_metric = get_default("h2o_metric")
)

fitted_model_list <- map(
Expand Down Expand Up @@ -124,29 +128,34 @@ res$accuracy |> format_accuracy(single_method = FALSE) |> filter(Type == "Test")
# COMBINE -----------------------------------------------------------------
data_selected <- get_data(datasets[1])
ts_freq <- data_selected$frequency |> unique() |> parse_frequency()
ens_methods <- getOption("tsf.dashboard.methods")[["ens"]]
input <- list(
n_future = 12,
n_assess = 24,
assess_type = "Rolling",
method = c("ETS", "SARIMA", "Elastic Net"),
method = c("ETS", "SARIMA", "Elastic Net", "H2O AutoML"),
ens_type = ens_methods,
auto_ets = TRUE,
error = "auto",
trend = "auto",
season = "auto",
damping = "auto",
smooth_level = 0.1,
smooth_trend = 0.1,
smooth_season = 0.1,
auto_arima = TRUE,
non_seasonal_ar = 1,
non_seasonal_differences = 1,
non_seasonal_ma = 1,
seasonal_ar = 1,
seasonal_differences = 1,
seasonal_ma = 1,
penalty = 1,
mixture = 0.5
auto_ets = get_default("auto_ets"),
error = get_default("error"),
trend = get_default("trend"),
season = get_default("season"),
damping = get_default("damping"),
smooth_level = get_default("smooth_level"),
smooth_trend = get_default("smooth_trend"),
smooth_season = get_default("smooth_season"),
auto_arima = get_default("auto_arima"),
non_seasonal_ar = get_default("non_seasonal_ar"),
non_seasonal_differences = get_default("non_seasonal_differences"),
non_seasonal_ma = get_default("non_seasonal_ma"),
seasonal_ar = get_default("seasonal_ar"),
seasonal_differences = get_default("seasonal_differences"),
seasonal_ma = get_default("seasonal_ma"),
penalty = get_default("penalty"),
mixture = get_default("mixture"),
h2o_max_time = get_default("h2o_max_time"),
h2o_max_time_model = get_default("h2o_max_time_model"),
h2o_nfolds = get_default("h2o_nfolds"),
h2o_metric = get_default("h2o_metric")
)

fitted_model_list <- map(
Expand Down Expand Up @@ -201,26 +210,29 @@ input <- list(
tune_elanet = c("Penalty", "Mixture")
)
input <- list(
n_future = 12,
n_assess = 24,
assess_type = "Rolling",
method = "Random Forest",
valid_type = "K-Fold CV",
n_folds = 5,
valid_metric = "RMSE",
grid_size = 10,
tune_xx_rf = c()
tune_n_future = 12,
tune_n_assess = 24,
tune_assess_type = "Rolling",
tune_method = "Random Forest",
tune_valid_type = "K-Fold CV",
tune_n_folds = 5,
tune_valid_metric = "RMSE",
tune_bayes = TRUE,
tune_grid_size = 10,
tune_rf = c("Random Predictors", "Trees")
)

data = data_selected
params = input
n_assess = input$n_assess
assess_type = input$assess_type
method = input$method
validation_type = input$valid_type
n_folds = input$n_folds
validation_metric = input$metric
grid_size = input$grid_size
n_assess = input$tune_n_assess
assess_type = input$tune_assess_type
method = input$tune_method
validation_type = input$tune_valid_type
n_folds = input$tune_n_folds
validation_metric = input$tune_valid_metric
bayesian_optimization = input$tune_bayes
grid_size = input$tune_grid_size
n_future = input$tune_n_future
seed = 1992

fitted_model_list <- map(
Expand Down Expand Up @@ -265,38 +277,3 @@ res <- map(
res$accuracy |> format_accuracy(single_method = TRUE)


### GRID

model_spec <- rand_forest(
mode = "regression",
mtry = tune(),
trees = tune(),
min_n = tune()
) |>
set_engine("ranger")

model_spec <- boost_tree(
mode = "regression",
mtry = tune(),
trees = tune(),
min_n = tune(),
tree_depth = tune(),
learn_rate = tune(),
loss_reduction = tune(),
sample_size = tune()
) |>
set_engine("xgboost")

model_spec <- prophet_boost(
mode = "regression",
mtry = tune(),
trees = tune(),
min_n = tune(),
tree_depth = tune(),
learn_rate = tune(),
loss_reduction = tune(),
sample_size = tune()
) |>
set_engine("prophet_xgboost")


2 changes: 0 additions & 2 deletions dashboard/todo.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ Next steps:
- aggiungere package::function per ogni funzione

To Do:
- ancora problema con mtry con tune_bayes, risolvere creando la griglia
- check h2o in compare e combine
- pensare e aggiungere la sezione di stacking (LM + Elastic Net)
- pensare e aggiungere la sezione di scenario forecasting + uncertainty + judgmental (gauges?) + rolling variances
- pensare e aggiungere il save del modello ottimizzato + use optimize in altre sezioni
Expand Down
Loading

0 comments on commit 582b081

Please sign in to comment.