PecanProject · Qianyuxuan · Jan 1, 2025 · Jan 18, 2025 · Jan 18, 2025 · Jan 18, 2025
diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R
@@ -462,22 +462,40 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs
     }
   } ## end loop over PFTS
   ####### end parameter update
-  #working on reading soil file (only working for 1 soil file)
-  if(length(settings$run$inputs$soilinitcond$path)==1){
-    soil_IC_list <- PEcAn.data.land::pool_ic_netcdf2list(settings$run$inputs$soilinitcond$path)
-    #SoilWHC and LitterWHC
-    if("volume_fraction_of_water_in_soil_at_saturation"%in%names(soil_IC_list$vals)){
-      #SoilWHC
-      param[which(param[, 1] == "soilWHC"), 2] <- mean(unlist(soil_IC_list$vals["volume_fraction_of_water_in_soil_at_saturation"]))*100
-
-      #LitterWHC
-      #param[which(param[, 1] == "litterWHC"), 2] <- unlist(soil_IC_list$vals["volume_fraction_of_water_in_soil_at_saturation"])[1]*100
+  #working on reading soil file
+  if (length(settings$run$inputs$soilinitcond$path) > 0) {
+    template.soilinit <- settings$run$inputs$soilinitcond$path  ## read from settings
+
+    if (!is.null(inputs)) {
+      ## override if specified in inputs
+      if ("soilinitcond" %in% names(inputs)) {
+        template.soilinit <- inputs$soilinit$path
+      }
     }
-    if("soil_hydraulic_conductivity_at_saturation"%in%names(soil_IC_list$vals)){
+    soil_IC_list <- PEcAn.data.land::pool_ic_netcdf2list(template.soilinit)
+    # Calculate the thickness of soil layers based on the assumption that the depth values are at bottoms and the first layer top is at 0
+    if ("depth" %in% names(soil_IC_list$dims)) {
+      thickness<-c(soil_IC_list$dims$depth[1],diff(soil_IC_list$dims$depth))
+    }
+    #SoilWHC
+    if ("depth" %in% names(soil_IC_list$dims) && "volume_fraction_of_water_in_soil_at_saturation" %in% names(soil_IC_list$vals)) {
+      # Calculate the soilWHC for the whole soil profile in cm
+      soilWHC_total <- sum(unlist(soil_IC_list$vals["volume_fraction_of_water_in_soil_at_saturation"])*thickness)
+      param[which(param[, 1] == "soilWHC"), 2] <- soilWHC_total
+    #LitterWHC
+      param[which(param[, 1] == "litterWHC"), 2] <- soilWHC_total
+    }
+    if ("depth" %in% names(soil_IC_list$dims) && "soil_hydraulic_conductivity_at_saturation" %in% names(soil_IC_list$vals)) {
+      # Calculate the soil_hydraulic_conductivity for the whole soil profile (soilHC_total) if vertical flow dominates for
+      # unsaturated conditions or in deep, well-drained profiles. If horizontal flow dominates such as for 
+      # saturated conditions or where barriers to vertical flow exist, the overall conductivity is the 
+      # weighted arithmetic mean based on the proportion of the total thickness each layer represents as:
+      # soilHC_total <- sum(unlist(soil_IC_list$vals["soil_hydraulic_conductivity_at_saturation"])*thickness)/sum(thickness)
+      soilHC_total <- sum(thickness)/sum(thickness/(unlist(soil_IC_list$vals["soil_hydraulic_conductivity_at_saturation"])))
       #litwaterDrainrate
-      param[which(param[, 1] == "litWaterDrainRate"), 2] <- unlist(soil_IC_list$vals["soil_hydraulic_conductivity_at_saturation"])[1]*100/(3600*24)
+      param[which(param[, 1] == "litWaterDrainRate"), 2] <- soilHC_total
+      }
     }
-  }
   if (!is.null(IC)) {
     ic.names <- names(IC)
     ## plantWoodInit gC/m2

diff --git a/modules/assim.sequential/R/metSplit.R b/modules/assim.sequential/R/metSplit.R
@@ -32,17 +32,18 @@ metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, o
       # Loading the model package - this is required bc of the furrr
       library(paste0("PEcAn.",model), character.only = TRUE)
 
-      inputs.split <- list()
+      # Initialize inputs.split with inputs to keep the sub-list of soil parameters "soilinitcond"
+      inputs.split <- inputs
       if (!no_split) {
         for (i in seq_len(nens)) {
           #---------------- model specific split inputs
-          inputs.split$samples[i] <- do.call(
+          inputs.split$met$samples[i] <- do.call(
             my.split_inputs,
             args = list(
               settings = settings,
               start.time = (lubridate::ymd_hms(start.time, truncated = 3) + lubridate::second(lubridate::hms("00:00:01"))),
               stop.time =   lubridate::ymd_hms(stop.time, truncated = 3),
-              inputs = inputs$samples[[i]])
+              inputs = inputs$met$samples[[i]])
           )
         }
       } else{

diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R
@@ -355,17 +355,28 @@ sda.enkf.multisite <- function(settings,
       #reformatting params
       new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens)
     }
-      #sample met ensemble members
+      #sample met and soil parameter ensemble members 
       #TODO: incorporate Phyllis's restart work
       #      sample all inputs specified in the settings$ensemble not just met
-      inputs <- PEcAn.settings::papply(conf.settings,function(setting) {
-        PEcAn.uncertainty::input.ens.gen(
-          settings = setting,
-          input = "met",
-          method = setting$ensemble$samplingspace$met$method,
-          parent_ids = NULL 
-        )
-       })
+      input_name <- c("met","soilinitcond")
+      inputs <- list()
+      new_inputs <- list()
+      for (i_input in seq_along(input_name)){
+       inputs[[input_name[i_input]]]<-PEcAn.settings::papply(conf.settings,function(setting) {
+         PEcAn.uncertainty::input.ens.gen(
+           settings = setting,
+           input = input_name[i_input],
+           method = setting$ensemble$samplingspace[[input_name[i_input]]]$method,
+           parent_ids = NULL 
+          )
+        })
+       #reformat the input list to make the multisite setting as the first sub-category 
+       for (setting_name in names(inputs[[input_name[i_input]]])) {
+       new_inputs[[setting_name]][[input_name[i_input]]] <- inputs[[input_name[i_input]]][[setting_name]]
+       }
+      } 
+
+
   ###------------------------------------------------------------------------------------------------###
   ### loop over time                                                                                 ###
   ###------------------------------------------------------------------------------------------------###
@@ -379,7 +390,9 @@ sda.enkf.multisite <- function(settings,
       if (t>1){
         #for next time step split the met if model requires
         #-Splitting the input for the models that they don't care about the start and end time of simulations and they run as long as their met file.
-        inputs.split <- metSplit(conf.settings, inputs, settings, model, no_split = FALSE, obs.times, t, nens, restart_flag = FALSE, my.split_inputs)
+        inputs.split <- list()
+        inputs.split <- metSplit(conf.settings, new_inputs, settings, model, no_split = FALSE, obs.times, t, nens, restart_flag = FALSE, my.split_inputs)
+
 
         #---------------- setting up the restart argument for each site separately and keeping them in a list
         restart.list <-
@@ -768,4 +781,5 @@ sda.enkf.multisite <- function(settings,
 #    }
       ## MCD: I commented the above "if" out because if you are restarting from a previous forecast, this might delete the files in that earlier folder
   } ### end loop over time
-} # sda.enkf
+} # sda.enkf
+
diff --git a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R
@@ -225,6 +225,7 @@ template <- PEcAn.settings::Settings(list(
   ensemble = structure(list(size = 25, variable = "NPP", 
                             samplingspace = structure(list(
                               parameters = structure(list(method = "lhc")),
+                              soilinitcond = structure(list(method = "sampling")),
                               met = structure(list(method = "sampling"))
                             ))
   )),

diff --git a/modules/data.land/NAMESPACE b/modules/data.land/NAMESPACE
@@ -54,6 +54,7 @@ export(soil2netcdf)
 export(soil_params)
 export(soil_process)
 export(soilgrids_soilC_extract)
+export(soilgrids_texture_extraction)
 export(subset_layer)
 export(to.Tag)
 export(to.TreeCode)

diff --git a/modules/data.land/R/extract_soil_nc.R b/modules/data.land/R/extract_soil_nc.R
@@ -348,6 +348,7 @@ extract_soil_nc <- function(in.file,outdir,lat,lon){
 #'  * `soil_thermal_conductivity_at_saturation`
 #'  * `soil_thermal_capacity`
 #'  * `soil_albedo`
+
 #'
 #' @param varname character vector. See details
 #'
@@ -383,6 +384,7 @@ soil.units <- function(varname = NA){
                                       "soil_thermal_conductivity_at_saturation","W m-1 K-1", 
                                       "soil_thermal_capacity","J kg-1 K-1",
                                       "soil_albedo","1"
+
   ),
   ncol=2,byrow = TRUE))
   colnames(variables) <- c('var','unit')

diff --git a/modules/data.land/R/soil_params_ensemble.R b/modules/data.land/R/soil_params_ensemble.R
@@ -0,0 +1,223 @@
+##' A function to estimate individual alphas for Dirichlet distribution to approximate the observed quantiles with means as known moments
+##' for SoilGrids soil texture data. 
+##' Dirichlet distribution is assumed as soil texture data follow categorical distribution and the probability of each category is in the range 0 to 1, 
+##' and all must sum to 1.
+##'  
+##' @param means A vector of means of sand, clay, and silt proportion for one soil layer at one site from SoilGrids data
+##' @param quantiles A list of 5th, 50th, and 95th percentiles for sand, clay, and silt for one soil layer at one site from SoilGrids data
+##'
+##' @example
+##' \dontrun{
+##' means <- c(0.566,0.193,0.241) # means of sand, clay, and silt at one site and one depth
+##' quantiles <-list(
+##' q5 = c(0.127,0.034,0.052), # 5th percentile for each category: sand, clay, and silt at one site and one depth
+##' q50 = c(0.615,0.15,0.191), # 50th percentile (median) for each category: sand, clay, and silt at one site and one depth
+##' q95 = c(0.799,0.66,0.616))  # 95th percentile for each category: sand, clay, and silt at one site and one depth
+##' alpha_est <- estimate_dirichlet_parameters(means, quantiles)
+##' }
+##'  @return The individual alphas that work best to fit the observed quantiles
+##'  @export 
+##'  @author Qianyu Li
+estimate_dirichlet_parameters <- function(means, quantiles) {
+
+  # A function to optimize alpha0, which is the sum of individual alphas.
+  estimate_alpha0 <- function(means, quantiles) {
+    # Objective function to minimize the difference between observed and simulated quantiles with means as a known moment
+    objective_function <- function(alpha0) {
+      if (alpha0 <= 0)
+        return(Inf) # alpha0 couldn't be zero or negative as it is the sum of individual alpha which are positive reals
+      # Estimate individual alpha based on that the means of each categorical data are individual alpha divided by alpha0 in Dirichlet distribution
+      alpha <- means * alpha0
+      # Generate samples based on estimated alpha
+      samples <- MCMCpack::rdirichlet(10000, alpha) # Generate samples
+      # Compute differences with observed quantiles
+      estimated_quantiles <- apply(samples, 2, quantile, probs = c(0.05, 0.5, 0.95),na.rm = TRUE)
+      quantile_diff <- sum((estimated_quantiles - do.call(rbind, quantiles))^2)
+      return(quantile_diff)
+    }
+
+    # Optimize alpha0
+    result <- optim(
+      par = 1, # Initial guess for alpha0
+      fn = objective_function,
+      method = "L-BFGS-B",
+      lower = 0.01  # alpha0 must be positive
+    )
+    return(result$par)
+  }
+
+  alpha0 <- estimate_alpha0(means, quantiles)
+  if (alpha0 <= 0) {
+    stop("Estimated alpha0 is non-positive, which is invalid.")
+  }
+  alphas <- means * alpha0
+  return(alphas)
+}
+
+
+
+##' A function to estimate the soil parameters based on SoilGrids soil texture data and write the parameter paths into settings
+##' 
+##' @param settings A multi-site settings
+##' @param sand A data frame containing sand fraction in percentage from SoilGrids250m v2.0 with columns "Depth", "Quantile", "Siteid", and "Value"
+##' @param clay A data frame containing clay fraction in percentage from SoilGrids250m v2.0 with columns "Depth", "Quantile", "Siteid", and "Value"
+##' @param silt A data frame containing silt fraction in percentage from SoilGrids250m v2.0 with columns "Depth", "Quantile", "Siteid", and "Value"
+##' @param outdir Provide the path to store the parameter files
+##' @param write_into_settings Whether to write the path of parameter file into the setting. The default is TRUE
+##' 
+##' @example
+##' \dontrun{
+##' 
+##' outdir <- "/projectnb/dietzelab/Cherry/SoilGrids_texture/39NEON"
+##' sand <- readRDS("/projectnb/dietzelab/Cherry/SoilGrids_texture/sand_percent.rds") #sand fraction in percentage
+##' clay <- readRDS("/projectnb/dietzelab/Cherry/SoilGrids_texture/clay_percent.rds") #clay fraction in percentage
+##' silt <- readRDS("/projectnb/dietzelab/Cherry/SoilGrids_texture/silt_percent.rds") #silt fraction in percentage
+##' settings <-read.settings("/projectnb/dietzelab/Cherry/xml/pecan_monthly_SDA_soilwater.xml")
+##' soil_params_ensemble_soilgrids(settings,sand,clay,silt,outdir)
+##'  }
+##'  
+##'  @return Ensemble soil parameter files defined in outdir and file paths in xml file
+##'  @export 
+##'  @author Qianyu Li
+##'  @importFrom magrittr %>%
+##'  
+
+soil_params_ensemble_soilgrids <- function(settings,sand,clay,silt,outdir,write_into_settings=TRUE){
+
+  # A function to rescale the sums of mean texture fractions to 1 as the original sums are slightly different from 1 for some layers
+  rescale_sum_to_one <- function(sand, clay, silt) {
+    total <- sand + clay + silt
+    rescaled_sand <- sand / total
+    rescaled_clay <- clay / total
+    rescaled_silt <- silt / total
+    return(list(
+      sand = rescaled_sand,
+      clay = rescaled_clay,
+      silt = rescaled_silt))
+  }
+
+  # A function to write to settings
+  create_mult_list <- function(list.names, paths) {
+    out <- as.list(paths)
+    names(out) <- list.names
+    out
+  }
+
+  # Convert values to proportion (0-1) from percentage
+  if (any(c(sand$Value, clay$Value, silt$Value) > 2)) {
+     sand$Value <- if (is.null(sand$Value)) { NULL } else { sand$Value / 100 }
+     clay$Value <- if (is.null(clay$Value)) { NULL } else { clay$Value / 100 }
+     silt$Value <- if (is.null(silt$Value)) { NULL } else { silt$Value / 100 }
+  }
+  ens_n <- as.numeric(settings$ensemble$size)
+  # Merge all soil texture data together
+  texture_all <-merge(sand, clay, by=c("Depth", "Quantile", "Siteid"))  %>% merge(silt, by=c("Depth", "Quantile", "Siteid")) %>%
+  `colnames<-`(c(
+      "soil_depth", #"soil_depth" will be used in "soil2netcdf" function
+      "quantile",
+      "siteid",
+      "fraction_of_sand_in_soil",
+      "fraction_of_clay_in_soil",
+      "fraction_of_silt_in_soil"))
+
+   # Substitute the depth range with the bottom depth values (with the assumption that the first layer's top is at 0)
+   texture_all$soil_depth <-
+     gsub("100-200cm", 200, gsub("60-100cm", 100, gsub(
+    "30-60cm", 60, gsub("15-30cm", 30, gsub(
+      "5-15cm", 15, gsub("0-5cm", 5, texture_all$soil_depth))))))
+   texture_all$soil_depth <- as.numeric(texture_all$soil_depth)
+   # Reformat the list based on site id
+   f1 <- factor(texture_all$siteid, levels = unique(texture_all$siteid))
+   dat <- split(texture_all, f1)
+   # Grab Site IDs from settings
+   settings_id <-lapply(settings, function(x) as.numeric(x$run$site$id))
+
+   for (i in seq_along(dat)) {
+      samples_ens <- list()
+      paths <- c()
+      siteid <- as.numeric(unique(dat[[i]]$siteid))
+      soil_depth <- unique(dat[[i]]$soil_depth)
+      str_ns <- paste0(siteid %/% 1e+09, "-", siteid %% 1e+09)
+      temp_outdir <- file.path(outdir, siteid)
+      dir.create(temp_outdir)
+      # Estimate Dirichlet parameters for each depth at each site
+      for (depths in sort(unique(texture_all$soil_depth))) {
+          quantiles <- list(
+               q5 = dplyr::filter(dat[[i]], quantile == "0.05", soil_depth == depths) %>% dplyr::select(
+                    fraction_of_sand_in_soil,
+                    fraction_of_clay_in_soil,
+                    fraction_of_silt_in_soil), # 5th percentile for each category
+               q50 = dplyr::filter(dat[[i]], quantile == "0.5", soil_depth == depths) %>% dplyr::select(
+                    fraction_of_sand_in_soil,
+                    fraction_of_clay_in_soil,
+                    fraction_of_silt_in_soil), # 50th percentile (median) for each category
+               q95 = dplyr::filter(dat[[i]], quantile == "0.95", soil_depth == depths) %>% dplyr::select(
+                    fraction_of_sand_in_soil,
+                    fraction_of_clay_in_soil,
+                    fraction_of_silt_in_soil))  # 95th percentile for each category
+
+         # Extract the means
+         means <- dplyr::filter(dat[[i]], quantile == "Mean", soil_depth == depths) %>% dplyr::select(fraction_of_sand_in_soil,fraction_of_clay_in_soil,fraction_of_silt_in_soil)
+         soil_rescaled <-rescale_sum_to_one(means$fraction_of_sand_in_soil,means$fraction_of_clay_in_soil,means$fraction_of_silt_in_soil)
+
+         # Replace the original means with the rescaled ones
+         means$fraction_of_sand_in_soil <- soil_rescaled$sand
+         means$fraction_of_clay_in_soil <- soil_rescaled$clay
+         means$fraction_of_silt_in_soil <- soil_rescaled$silt
+
+         # Estimate Dirichlet parameters
+         alpha_est <- estimate_dirichlet_parameters(as.matrix(means), quantiles)
+
+         # Generate the ensemble soil texture data based on the ensemble size (ens_n) defined in the settings
+         samples <- MCMCpack::rdirichlet(ens_n, alpha_est)
+         colnames(samples) <-c("fraction_of_sand_in_soil","fraction_of_clay_in_soil","fraction_of_silt_in_soil")
+         samples <-list(samples) %>% setNames(depths)
+         samples_ens <- append(samples_ens, samples)
+      }
+
+      # Generate soil parameter file for each one in ensemble soil texture data
+      for (ens in 1:ens_n) {
+          # Choose one sample
+          samples_all_depth <- lapply(samples_ens, function(x) x[ens, ])
+          # Reformat the nested list as input to "soil2netcdf" function
+          reformatted_soil_list <- reformat_soil_list(samples_all_depth)
+          prefix <- paste0("Soil_params_", str_ns, "_", ens)
+          new.file <-  file.path(outdir, siteid, paste0(prefix, ".nc"))
+          out.ense <- soil2netcdf(reformatted_soil_list, new.file)
+          paths <- c(new.file, paths)
+       }
+
+      # Write the parameter paths to settings
+      if (write_into_settings) {
+          ind <- which(settings_id == siteid)
+          settings[[ind]]$run$inputs$soilinitcond$source <- "SoilGrids"
+          settings[[ind]]$run$inputs$soilinitcond$output <- "soilinitcond"
+          settings[[ind]]$run$inputs$soilinitcond$ensemble <- ens_n
+          settings[[ind]]$run$inputs$soilinitcond$path <-create_mult_list(rep("path", ens_n), paths)
+          write.settings(settings,outputdir = settings$outdir,outputfile = "pecan.xml")
+       }
+   }
+}
+
+# A function to reformat the nested list as inputs to "soil2netcdf" function
+reformat_soil_list <- function(samples_all_depth) {
+  # Define the fractions we want to extract
+  fractions <-
+    c("fraction_of_sand_in_soil",
+      "fraction_of_clay_in_soil",
+      "fraction_of_silt_in_soil")
+
+  # Initialize a new list to store reformatted data
+  reformatted <-setNames(vector("list", length(fractions)), fractions)
+
+  # Extract data for each fraction
+  for (fraction in fractions) {
+    reformatted[[fraction]] <-
+      unlist(lapply(samples_all_depth, function(depth_list) {
+        depth_list[[fraction]] # Extract the fraction value
+      })) %>% purrr::set_names(NULL)
+  }
+  # Combine depth into a single vector for readability
+  reformatted$soil_depth <- as.numeric(names(samples_all_depth))
+  return(reformatted)
+}