diff --git a/dashboard/R/fit_model.R b/dashboard/R/fit_model.R index 8c95657..50551ae 100644 --- a/dashboard/R/fit_model.R +++ b/dashboard/R/fit_model.R @@ -452,22 +452,24 @@ set_tune_parameters <- function(method, params) { } +# function to generate the feature set +generate_feature_set <- function(recipe_spec) { + feature_set <- recipe_spec |> + recipes::prep() |> + recipes::bake(new_data = NULL) |> + dplyr::select(-date, -value) + return(feature_set) +} + # function to perform grid specification -generate_grid_spec <- function(method, model_spec, grid_size, seed = 1992) { +generate_grid_spec <- function(method, model_spec, recipe_spec, grid_size, seed = 1992) { set.seed(seed) - if (method %in% c("Random Forest", "Boosted Trees")) { - grid_spec <- grid_latin_hypercube( - hardhat::extract_parameter_set_dials(model_spec) |> - recipes::update(mtry = mtry(range = c(1, 15))), - size = grid_size - ) - } else { - grid_spec <- dials::grid_latin_hypercube( - hardhat::extract_parameter_set_dials(model_spec), - size = grid_size - ) - } + feature_set <- generate_feature_set(recipe_spec) + updated_parameter_set <- model_spec |> + hardhat::extract_parameter_set_dials() |> + dials::finalize(x = feature_set) + grid_spec <- dials::grid_latin_hypercube(updated_parameter_set, size = grid_size) return(grid_spec) } @@ -575,7 +577,6 @@ fit_model_tuning <- function( check_parameters(method, params_new) validation_metric <- tolower(validation_metric) valid_metric_set <- set_metric_set(validation_metric) - set.seed(seed) # initial split splits <- generate_initial_split(data, n_assess, assess_type) @@ -596,30 +597,39 @@ fit_model_tuning <- function( wkfl_spec <- workflow() |> add_recipe(rcp_spec) |> add_model(model_spec) # grid specification - # grid_spec <- generate_grid_spec(method, model_spec, grid_size, seed) + # grid_spec <- generate_grid_spec(method, model_spec, rcp_spec, grid_size, seed) # tuning doFuture::registerDoFuture() future::plan(strategy = "multisession", workers = parallelly::availableCores() - 1) if (bayesian_optimization) { + feat_set <- generate_feature_set(rcp_spec) + updated_param_set <- hardhat::extract_parameter_set_dials(model_spec) |> + dials::finalize(x = feat_set) + set.seed(seed) tune_fit <- wkfl_spec |> tune::tune_bayes( resamples = cv_splits, metrics = valid_metric_set, - initial = as.integer(params$tune_grid_size), + initial = as.integer(params$tune_grid_size), # tune_fit (result from tune_grid) objective = tune::conf_bound(kappa = 0.1), iter = 20L, # as.integer(length(params_new) * 20) good practice + param_info = updated_param_set, control = tune::control_bayes( save_pred = FALSE, allow_par = TRUE, verbose = TRUE, no_improve = 5L ) ) } else { + set.seed(seed) tune_fit <- wkfl_spec |> tune::tune_grid( + preprocessor = rcp_spec, resamples = cv_splits, metrics = valid_metric_set, - grid = as.integer(params$tune_grid_size), # grid_spec - control = tune::control_grid(save_pred = FALSE, allow_par = TRUE, verbose = TRUE) + grid = as.integer(params$tune_grid_size), # as.integer(params$tune_grid_size) + control = tune::control_grid( + save_pred = FALSE, allow_par = TRUE, verbose = TRUE + ) ) } future::plan(strategy = "sequential") diff --git a/dashboard/R/generate_forecast.R b/dashboard/R/generate_forecast.R index cbcd406..d391ff3 100644 --- a/dashboard/R/generate_forecast.R +++ b/dashboard/R/generate_forecast.R @@ -75,7 +75,7 @@ generate_forecast <- function( conf_interval = 0.95, conf_method = "conformal_split" ) - if (method == "H2O AutoML") { h2o.shutdown(prompt = FALSE) } + if (any(method %in% "H2O AutoML")) { h2o.shutdown(prompt = FALSE) } res <- list( "splits" = splits, "fit" = fitted_model_list, diff --git a/dashboard/test.R b/dashboard/test.R index ab50549..fa00898 100644 --- a/dashboard/test.R +++ b/dashboard/test.R @@ -68,22 +68,26 @@ input <- list( n_future = 12, n_assess = 24, assess_type = "Rolling", - method = c("ETS", "SARIMA"), - auto_ets = TRUE, - error = "auto", - trend = "auto", - season = "auto", - damping = "auto", - smooth_level = 0.1, - smooth_trend = 0.1, - smooth_season = 0.1, - auto_arima = TRUE, - non_seasonal_ar = 1, - non_seasonal_differences = 1, - non_seasonal_ma = 1, - seasonal_ar = 1, - seasonal_differences = 1, - seasonal_ma = 1 + method = c("ETS", "SARIMA", "H2O AutoML"), + auto_ets = get_default("auto_ets"), + error = get_default("error"), + trend = get_default("trend"), + season = get_default("season"), + damping = get_default("damping"), + smooth_level = get_default("smooth_level"), + smooth_trend = get_default("smooth_trend"), + smooth_season = get_default("smooth_season"), + auto_arima = get_default("auto_arima"), + non_seasonal_ar = get_default("non_seasonal_ar"), + non_seasonal_differences = get_default("non_seasonal_differences"), + non_seasonal_ma = get_default("non_seasonal_ma"), + seasonal_ar = get_default("seasonal_ar"), + seasonal_differences = get_default("seasonal_differences"), + seasonal_ma = get_default("seasonal_ma"), + h2o_max_time = get_default("h2o_max_time"), + h2o_max_time_model = get_default("h2o_max_time_model"), + h2o_nfolds = get_default("h2o_nfolds"), + h2o_metric = get_default("h2o_metric") ) fitted_model_list <- map( @@ -124,29 +128,34 @@ res$accuracy |> format_accuracy(single_method = FALSE) |> filter(Type == "Test") # COMBINE ----------------------------------------------------------------- data_selected <- get_data(datasets[1]) ts_freq <- data_selected$frequency |> unique() |> parse_frequency() +ens_methods <- getOption("tsf.dashboard.methods")[["ens"]] input <- list( n_future = 12, n_assess = 24, assess_type = "Rolling", - method = c("ETS", "SARIMA", "Elastic Net"), + method = c("ETS", "SARIMA", "Elastic Net", "H2O AutoML"), ens_type = ens_methods, - auto_ets = TRUE, - error = "auto", - trend = "auto", - season = "auto", - damping = "auto", - smooth_level = 0.1, - smooth_trend = 0.1, - smooth_season = 0.1, - auto_arima = TRUE, - non_seasonal_ar = 1, - non_seasonal_differences = 1, - non_seasonal_ma = 1, - seasonal_ar = 1, - seasonal_differences = 1, - seasonal_ma = 1, - penalty = 1, - mixture = 0.5 + auto_ets = get_default("auto_ets"), + error = get_default("error"), + trend = get_default("trend"), + season = get_default("season"), + damping = get_default("damping"), + smooth_level = get_default("smooth_level"), + smooth_trend = get_default("smooth_trend"), + smooth_season = get_default("smooth_season"), + auto_arima = get_default("auto_arima"), + non_seasonal_ar = get_default("non_seasonal_ar"), + non_seasonal_differences = get_default("non_seasonal_differences"), + non_seasonal_ma = get_default("non_seasonal_ma"), + seasonal_ar = get_default("seasonal_ar"), + seasonal_differences = get_default("seasonal_differences"), + seasonal_ma = get_default("seasonal_ma"), + penalty = get_default("penalty"), + mixture = get_default("mixture"), + h2o_max_time = get_default("h2o_max_time"), + h2o_max_time_model = get_default("h2o_max_time_model"), + h2o_nfolds = get_default("h2o_nfolds"), + h2o_metric = get_default("h2o_metric") ) fitted_model_list <- map( @@ -201,26 +210,29 @@ input <- list( tune_elanet = c("Penalty", "Mixture") ) input <- list( - n_future = 12, - n_assess = 24, - assess_type = "Rolling", - method = "Random Forest", - valid_type = "K-Fold CV", - n_folds = 5, - valid_metric = "RMSE", - grid_size = 10, - tune_xx_rf = c() + tune_n_future = 12, + tune_n_assess = 24, + tune_assess_type = "Rolling", + tune_method = "Random Forest", + tune_valid_type = "K-Fold CV", + tune_n_folds = 5, + tune_valid_metric = "RMSE", + tune_bayes = TRUE, + tune_grid_size = 10, + tune_rf = c("Random Predictors", "Trees") ) data = data_selected params = input -n_assess = input$n_assess -assess_type = input$assess_type -method = input$method -validation_type = input$valid_type -n_folds = input$n_folds -validation_metric = input$metric -grid_size = input$grid_size +n_assess = input$tune_n_assess +assess_type = input$tune_assess_type +method = input$tune_method +validation_type = input$tune_valid_type +n_folds = input$tune_n_folds +validation_metric = input$tune_valid_metric +bayesian_optimization = input$tune_bayes +grid_size = input$tune_grid_size +n_future = input$tune_n_future seed = 1992 fitted_model_list <- map( @@ -265,38 +277,3 @@ res <- map( res$accuracy |> format_accuracy(single_method = TRUE) -### GRID - -model_spec <- rand_forest( - mode = "regression", - mtry = tune(), - trees = tune(), - min_n = tune() -) |> - set_engine("ranger") - -model_spec <- boost_tree( - mode = "regression", - mtry = tune(), - trees = tune(), - min_n = tune(), - tree_depth = tune(), - learn_rate = tune(), - loss_reduction = tune(), - sample_size = tune() -) |> - set_engine("xgboost") - -model_spec <- prophet_boost( - mode = "regression", - mtry = tune(), - trees = tune(), - min_n = tune(), - tree_depth = tune(), - learn_rate = tune(), - loss_reduction = tune(), - sample_size = tune() -) |> - set_engine("prophet_xgboost") - - diff --git a/dashboard/todo.txt b/dashboard/todo.txt index 4c1bb7f..5042130 100644 --- a/dashboard/todo.txt +++ b/dashboard/todo.txt @@ -14,8 +14,6 @@ Next steps: - aggiungere package::function per ogni funzione To Do: -- ancora problema con mtry con tune_bayes, risolvere creando la griglia -- check h2o in compare e combine - pensare e aggiungere la sezione di stacking (LM + Elastic Net) - pensare e aggiungere la sezione di scenario forecasting + uncertainty + judgmental (gauges?) + rolling variances - pensare e aggiungere il save del modello ottimizzato + use optimize in altre sezioni diff --git a/dashboard/tsf_dashboard.Rmd b/dashboard/tsf_dashboard.Rmd index 40de32f..c923d49 100644 --- a/dashboard/tsf_dashboard.Rmd +++ b/dashboard/tsf_dashboard.Rmd @@ -1471,23 +1471,30 @@ pickerInput( ) ) +br() +awesomeCheckbox(inputId = "ens_tune", label = "Use optimized algorithms?", value = FALSE) +br() + pickerInput( inputId = "ens_type", label = h3("Ensemble Method"), choices = ens_methods, selected = "Average", multiple = TRUE, options = list("actions-box" = TRUE) ) +actionButton(inputId = "ens_apply_forecast", label = "Forecast!", icon = icon("play")) +br() +br() +br() pickerInput( inputId = "stk_type", label = h3("Stacking Method"), choices = stk_methods, selected = "Linear Regression", multiple = TRUE, options = list("actions-box" = TRUE) ) +actionButton(inputId = "stk_apply_forecast", label = "Forecast!", icon = icon("play")) br() -awesomeCheckbox(inputId = "ens_tune", label = "Use optimized algorithms?", value = FALSE) br() - -actionButton(inputId = "ens_apply_forecast", label = "Forecast!", icon = icon("play")) +br() actionButton(inputId = "ens_reset", label = "Reset", icon = icon("sync")) observeEvent( eventExpr = input$ens_reset, @@ -1496,9 +1503,9 @@ observeEvent( updateNumericInput(session = session, inputId = "ens_n_assess", value = 24) updatePrettyRadioButtons(session = session, inputId = "ens_assess_type", selected = "Rolling") updatePickerInput(session = session, inputId = "ens_method", selected = c("ETS", "SARIMA")) + updateMaterialSwitch(session = session, inputId = "ens_tune", value = FALSE) updatePickerInput(session = session, inputId = "ens_type", selected = "Average") updatePickerInput(session = session, inputId = "stk_type", selected = "Linear Regression") - updateMaterialSwitch(session = session, inputId = "ens_tune", value = FALSE) # shinyjs::delay(ms = 300, expr = {shinyjs::click(id = "tune_apply_forecast")}) } ) @@ -1523,31 +1530,88 @@ ensemble_results <- eventReactive( } ) +stacking_results <- eventReactive( + eventExpr = input$stk_apply_forecast, + valueExpr = { + map( + input$ens_method, + ~ fit_model( + data = data_cleaned(), method = ., params = input, + n_assess = input$ens_n_assess, assess_type = input$ens_assess_type, seed = 1992 + ) + ) |> + generate_forecast( + data = data_cleaned(), method = input$ens_method, n_future = input$ens_n_future, + n_assess = input$ens_n_assess, assess_type = input$ens_assess_type, + ensemble_method = input$stk_type + ) + } +) + output$plot_ens_test_forecast <- renderPlotly({ - ensemble_results()$test_forecast |> - plot_modeltime_forecast(.legend_show = TRUE, .title = FALSE, .conf_interval_fill = "lightblue") + if (input$ens_apply_forecast) { + ensemble_results()$test_forecast |> + plot_modeltime_forecast(.legend_show = TRUE, .title = FALSE, .conf_interval_fill = "lightblue") + } else if (input$stk_apply_forecast) { + stacking_results()$test_forecast |> + plot_modeltime_forecast(.legend_show = TRUE, .title = FALSE, .conf_interval_fill = "lightblue") + } }) output$plot_ens_oos_forecast <- renderPlotly({ - ensemble_results()$oos_forecast |> - plot_modeltime_forecast(.legend_show = TRUE, .title = FALSE, .conf_interval_fill = "lightblue") + if (input$ens_apply_forecast) { + ensemble_results()$oos_forecast |> + plot_modeltime_forecast(.legend_show = TRUE, .title = FALSE, .conf_interval_fill = "lightblue") + } else if (input$stk_apply_forecast) { + stacking_results()$oos_forecast |> + plot_modeltime_forecast(.legend_show = TRUE, .title = FALSE, .conf_interval_fill = "lightblue") + } }) output$ens_accuracy_table <- renderDT({ - ensemble_results()$accuracy |> - format_accuracy(single_method = FALSE) |> - filter(Type == "Test") |> - datatable( - options = list( - ordering = TRUE, pageLength = 6, lengthChange = FALSE, searching = FALSE, - info = FALSE, paging = FALSE, scrollCollapse = TRUE, scrollY = 300 - ), - rownames = FALSE - ) + if (input$ens_apply_forecast) { + ensemble_results()$accuracy |> + format_accuracy(single_method = FALSE) |> + filter(Type == "Test") |> + datatable( + options = list( + ordering = TRUE, + pageLength = 6, + lengthChange = FALSE, + searching = FALSE, + info = FALSE, + paging = FALSE, + scrollCollapse = TRUE, + scrollY = 300 + ), + rownames = FALSE + ) + } else if (input$stk_apply_forecast) { + stacking_results()$accuracy |> + format_accuracy(single_method = FALSE) |> + filter(Type == "Test") |> + datatable( + options = list( + ordering = TRUE, + pageLength = 6, + lengthChange = FALSE, + searching = FALSE, + info = FALSE, + paging = FALSE, + scrollCollapse = TRUE, + scrollY = 300 + ), + rownames = FALSE + ) + } }) output$ens_model_summary <- renderPrint({ - ensemble_results()$fit + if (input$ens_apply_forecast) { + ensemble_results()$fit + } else if (input$stk_apply_forecast) { + stacking_results()$fit + } }) ```