From 1c7594e5e4235e35dc15d5b15d9ea0f40e4804b2 Mon Sep 17 00:00:00 2001 From: Koen Hufkens Date: Fri, 16 Feb 2024 18:28:37 +0100 Subject: [PATCH] LSO model --- analysis/04_regression_training_LSO.R | 6 ++++-- data/README.md | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/analysis/04_regression_training_LSO.R b/analysis/04_regression_training_LSO.R index ae67ba4..9059838 100644 --- a/analysis/04_regression_training_LSO.R +++ b/analysis/04_regression_training_LSO.R @@ -1,5 +1,6 @@ # Basic xgboost model with limited -# hyperparameter tuning +# hyperparameter tuning with +# leave site out cross validation # load the ecosystem library(tidymodels) @@ -14,7 +15,8 @@ ml_df <- read_ml_data( spatial = TRUE ) -results <- lapply(unique(ml_df$site)[1:2], function(site){ +# Leave-Site-Out cross validation loop +results <- lapply(unique(ml_df$site), function(site){ #---- data partitioning ---- diff --git a/data/README.md b/data/README.md index 0d41cc0..dccdc8e 100644 --- a/data/README.md +++ b/data/README.md @@ -7,6 +7,13 @@ Input data consist of the driver data file: Which contains all data required for the analysis. +Spectral indices are downloaded from: +https://awesome-ee-spectral-indices.readthedocs.io/ +on 15/02/2024 + +more info on indices here: +https://www.indexdatabase.de/ + ## Output Two models are created, a binary classification (drought day or not), and an @@ -17,6 +24,16 @@ folder and output is called, classification models respectively. These are the best models selected after cross validation (see code in the `analysis` folder). +### LSO + +The LSO directory contains all models of the leave-site-out model training +runs, the model name reflects the run for which the site was ommitted from +training, + +`test_*.rds` + +would therefore be trained on all data but that from the `test` site. + ### Annotated manuscript An annotated manuscript of the model result is written up in the vignettes