Skip to content

Commit

Permalink
Merge pull request #93 from stineb/main
Browse files Browse the repository at this point in the history
revised downsampling (treatment of quality control info for met varia…
  • Loading branch information
stineb committed Jun 18, 2024
2 parents ad28b80 + af83731 commit 75fbc10
Show file tree
Hide file tree
Showing 15 changed files with 582 additions and 96 deletions.
6 changes: 2 additions & 4 deletions R/fdk_convert_lsm.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Converts LSM netcdf data to FLUXNET
#' Converts LSM NetCDF data to FLUXNET
#'
#' Reads LSM data (as a netcdf) in a given directory
#' by (fluxnet) site name, optionally only returns the meta-data of the
Expand Down Expand Up @@ -347,9 +347,7 @@ fdk_convert_lsm <- function(
)
)

} else {
# return the merged file
return(all)
}
return(all)
}

200 changes: 176 additions & 24 deletions R/fdk_downsample_fluxnet.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,39 +158,39 @@ fdk_downsample_fluxnet <- function(
# add fraction of daily "missing values"

# precipitation is the sum of HH values
P_F = sum(P_F, na.rm = FALSE),
P_F_QC = mean(P_F_QC < 2, na.rm = FALSE),
P_F = sum(P_F, na.rm = TRUE),
P_F_QC = mean(P_F_QC < 2, na.rm = TRUE),

# temperature is the mean of the HH values
TA_F_MDS = mean(TA_F_MDS, na.rm = FALSE),
TA_F_MDS_QC = mean(TA_F_MDS_QC < 2, na.rm = FALSE),
TA_F_MDS = mean(TA_F_MDS, na.rm = TRUE),
TA_F_MDS_QC = mean(TA_F_MDS_QC < 2, na.rm = TRUE),

TMIN_F_MDS = min(TA_F_MDS, na.rm = FALSE),
TMAX_F_MDS = max(TA_F_MDS, na.rm = FALSE),
TMIN_F_MDS = min(TA_F_MDS, na.rm = TRUE),
TMAX_F_MDS = max(TA_F_MDS, na.rm = TRUE),

# shortwave radiation is the mean of the HH values
SW_IN_F_MDS = mean(SW_IN_F_MDS, na.rm = FALSE),
SW_IN_F_MDS_QC = mean(SW_IN_F_MDS_QC < 2, na.rm = FALSE),
SW_IN_F_MDS = mean(SW_IN_F_MDS, na.rm = TRUE),
SW_IN_F_MDS_QC = mean(SW_IN_F_MDS_QC < 2, na.rm = TRUE),

# long wave radiation is the mean of the HH values
LW_IN_F_MDS = mean(LW_IN_F_MDS, na.rm = FALSE),
LW_IN_F_MDS_QC = mean(LW_IN_F_MDS_QC < 2, na.rm = FALSE),
LW_IN_F_MDS = mean(LW_IN_F_MDS, na.rm = TRUE),
LW_IN_F_MDS_QC = mean(LW_IN_F_MDS_QC < 2, na.rm = TRUE),

# VPD is the mean of the HH values
VPD_F_MDS = mean(VPD_F_MDS, na.rm = FALSE),
VPD_F_MDS_QC = mean(VPD_F_MDS_QC < 2, na.rm = FALSE),
VPD_F_MDS = mean(VPD_F_MDS, na.rm = TRUE),
VPD_F_MDS_QC = mean(VPD_F_MDS_QC < 2, na.rm = TRUE),

# wind speed is the mean of the HH values
WS_F = mean(WS_F, na.rm = FALSE),
WS_F_QC = mean(WS_F_QC < 2, na.rm = FALSE),
WS_F = mean(WS_F, na.rm = TRUE),
WS_F_QC = mean(WS_F_QC < 2, na.rm = TRUE),

# atmospheric pressure is the mean of the HH values
PA_F = mean(PA_F, na.rm = FALSE),
PA_F_QC = mean(PA_F_QC < 2, na.rm = FALSE),
PA_F = mean(PA_F, na.rm = TRUE),
PA_F_QC = mean(PA_F_QC < 2, na.rm = TRUE),

# CO2 is the mean of the HH values
CO2_F_MDS = mean(CO2_F_MDS, na.rm = FALSE),
CO2_F_MDS_QC = mean(CO2_F_MDS_QC < 2, na.rm = FALSE),
CO2_F_MDS = mean(CO2_F_MDS, na.rm = TRUE),
CO2_F_MDS_QC = mean(CO2_F_MDS_QC < 2, na.rm = TRUE),

# FLUXES
# add fraction of daily "missing values"
Expand All @@ -217,14 +217,14 @@ fdk_downsample_fluxnet <- function(
# NETRAD/USTAR/SW_out is average from HH data
# (only days with more than 50% records available)
# add fraction of daily "missing values"
NETRAD = mean(NETRAD, na.rm = FALSE),
NETRAD_QC = mean(NETRAD_QC < 2, na.rm = FALSE),
NETRAD = mean(NETRAD, na.rm = TRUE),
NETRAD_QC = mean(NETRAD_QC < 2, na.rm = TRUE),

USTAR = mean(USTAR, na.rm = FALSE),
USTAR_QC = mean(USTAR_QC < 2, na.rm = FALSE),
USTAR = mean(USTAR, na.rm = TRUE),
USTAR_QC = mean(USTAR_QC < 2, na.rm = TRUE),

SW_OUT = mean(SW_OUT, na.rm = FALSE),
# SW_OUT_QC = mean(SW_OUT_QC < 2, na.rm = FALSE),
SW_OUT = mean(SW_OUT, na.rm = TRUE),
# SW_OUT_QC = mean(SW_OUT_QC < 2, na.rm = TRUE),

# Latent heat is the mean of the HH values
# add fraction of daily "missing values"
Expand Down Expand Up @@ -261,6 +261,158 @@ fdk_downsample_fluxnet <- function(
df <- df |>
dplyr::left_join(df_day, by = "TIMESTAMP")

# clean data - remove if less than 80% is good-quality gap-filled
df <- df |>
mutate(
# P_F = ifelse(P_F_QC < 0.5, NA, P_F), # no better approach
# TA_F_MDS = ifelse(TA_F_MDS_QC < 0.5, NA, TA_F_MDS), # no better approach
TA_DAY_F_MDS = ifelse(TA_F_MDS_QC < 0.5, NA, TA_DAY_F_MDS),
VPD_DAY_F_MDS = ifelse(VPD_F_MDS_QC < 0.5, NA, VPD_DAY_F_MDS),
TMIN_F_MDS = ifelse(TA_F_MDS_QC < 0.5, NA, TMIN_F_MDS),
TMAX_F_MDS = ifelse(TA_F_MDS_QC < 0.5, NA, TMAX_F_MDS),
SW_IN_F_MDS = ifelse(SW_IN_F_MDS_QC < 0.5, NA, SW_IN_F_MDS),
LW_IN_F_MDS = ifelse(LW_IN_F_MDS_QC < 0.5, NA, LW_IN_F_MDS),
VPD_F_MDS = ifelse(VPD_F_MDS_QC < 0.5, NA, VPD_F_MDS),
# WS_F = ifelse(WS_F_QC < 0.5, NA, WS_F), # no better approach
# PA_F = ifelse(PA_F_QC < 0.5, NA, PA_F), # no better approach
# CO2_F_MDS = ifelse(CO2_F_MDS_QC < 0.5, NA, CO2_F_MDS) # no better approach
)

# test for missing forcing data and impute
vars <- c(
"P_F",
"TA_F_MDS",
"TA_DAY_F_MDS",
"VPD_DAY_F_MDS",
"TMIN_F_MDS",
"TMAX_F_MDS",
"SW_IN_F_MDS",
"LW_IN_F_MDS",
"VPD_F_MDS",
"WS_F",
"PA_F",
"CO2_F_MDS",
"LAI",
"FPAR"
)

missing <- df |>
dplyr::summarise(
dplyr::across(
dplyr::all_of(vars),
~sum(is.na(.))
)) |>
tidyr::pivot_longer(everything()) |>
dplyr::filter(value > 0) |>
dplyr::pull(name)

# Shortwave radiation: impute with KNN
if ("SW_IN_F_MDS" %in% missing){
df <- fdk_impute_knn(
df,
target = "SW_IN_F_MDS",
pred1 = "TA_F_MDS",
k = 5
)
}

# Longwave radiation: impute with KNN
if ("LW_IN_F_MDS" %in% missing){
df <- fdk_impute_knn(
df,
target = "LW_IN_F_MDS",
pred1 = "TA_F_MDS",
k = 5
)
}

# Daytime temperature: impute with KNN
if ("TA_DAY_F_MDS" %in% missing){
df <- fdk_impute_knn(
df,
target = "TA_DAY_F_MDS",
pred1 = "TA_F_MDS",
k = 5
)
}

# Daily minimum temperature: impute with KNN
if ("TMIN_F_MDS" %in% missing){
df <- fdk_impute_knn(
df,
target = "TMIN_F_MDS",
pred1 = "TA_F_MDS",
k = 5
)
}

# Daily maximum temperature: impute with KNN
if ("TMAX_F_MDS" %in% missing){
df <- fdk_impute_knn(
df,
target = "TMAX_F_MDS",
pred1 = "TA_F_MDS",
k = 5
)
}

# VPD: impute with KNN
if ("VPD_F_MDS" %in% missing){
df <- fdk_impute_knn(
df,
target = "VPD_F_MDS",
pred1 = "TA_F_MDS",
k = 5
)
}

# Daytime VPD: impute with KNN
if ("VPD_DAY_F_MDS" %in% missing){
df <- fdk_impute_knn(
df,
target = "VPD_DAY_F_MDS",
pred1 = "VPD_F_MDS",
pred2 = "TMAX_F_MDS",
k = 5
)
}

# CO2: interpolate
if ("CO2_F_MDS" %in% missing){
df <- interpolate2daily_CO2_F_MDS(df)
}

# Atmospheric pressure: interpolate
if ("PA_F" %in% missing){
df <- interpolate2daily_PA_F(df)
}

# fAPAR: interpolate
if ("FPAR" %in% missing){
df <- interpolate2daily_fpar(df)
}

# Wind speed: interpolate
if ("WS_F" %in% missing){
df <- interpolate2daily_WS_F(df)
}

# still missing?
missing <- df |>
dplyr::summarise(
dplyr::across(
dplyr::all_of(vars),
~sum(is.na(.))
)) |>
tidyr::pivot_longer(everything()) |>
dplyr::filter(value > 0) |>
dplyr::pull(name)

if (length(missing) > 0){
message(paste("!!! still missing values:"))
message(paste(missing, collapse = ","))
}

# save data to file, using FLUXNET formatting
if (!missing(out_path)) {

Expand Down
1 change: 1 addition & 0 deletions R/fdk_format_drivers.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ fdk_format_drivers <- function(
le_qc = LE_F_MDS_QC
)

# fill missing net radiation data
df_flux <- df_flux |>
dplyr::group_by(sitename) |>
tidyr::nest() |>
Expand Down
12 changes: 5 additions & 7 deletions R/fdk_get_sequence.R
Original file line number Diff line number Diff line change
Expand Up @@ -212,23 +212,21 @@ get_sequence_byvar <- function(site, df, good, leng_threshold, do_merge){

}

## Returns a dataframe that contains information about events (starting index and length)
## of consecutive conditions (TRUE) in a boolean vector ('good' - naming is a legacy).
get_consecutive <- function(
good,
merge_threshold = 5,
leng_threshold = 5,
do_merge = FALSE
){
##------------------------------------
## Returns a dataframe that contains information about events (starting index and length)
## of consecutive conditions (TRUE) in a boolean vector ('good' - naming is a legacy).
##------------------------------------

## replace NAs with FALSE (no drought). This is needed because of NAs at head or tail
good[ which(is.na(good)) ] <- FALSE

## identifies periods where 'good' true for consecutive days of length>leng_threshold and
## creates data frame holding each instance's info: start of drought by index in 'good' and length (number of days thereafter)
instances <- data.frame( idx_start=c(), len=c() )
instances <- data.frame( idx_start = c(), len = c() )
consecutive_good <- rep( NA, length( good ) )
ngood <- 0
ninst <- 0
Expand All @@ -249,7 +247,7 @@ get_consecutive <- function(
if (ngood > leng_threshold){
## create a last instance if the last good period extends to the end of the time series
ninst <- ninst + 1
addrow <- data.frame( idx_start=idx-(ngood), len=ngood )
addrow <- data.frame( idx_start = idx-(ngood), len = ngood )
instances <- rbind( instances, addrow )
}

Expand Down Expand Up @@ -278,7 +276,7 @@ get_consecutive <- function(
}

# if all is merged until the end
instances_merged$len[idx_merged] <- instances$idx_start[idx] + instances$len[idx] - instances_merged$idx_start[idx_merged]
instances_merged$len[idx_merged] <- instances$idx_start[idx] + instances$len[idx] - instances_merged$idx_start[idx_merged] + 1

instances <- instances_merged[,c("idx_start", "len")]
} else {
Expand Down
10 changes: 5 additions & 5 deletions R/fdk_release.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,33 +34,33 @@ fdk_release <- function(

# create remaining output directories
dir.create(
file.path(output_path,"lsm"),
file.path(output_path, "lsm"),
recursive = TRUE,
showWarnings = FALSE
)

dir.create(
file.path(output_path,"fluxnet"),
file.path(output_path, "fluxnet"),
recursive = TRUE,
showWarnings = FALSE
)

dir.create(
file.path(output_path,"pmodel"),
file.path(output_path, "pmodel"),
recursive = TRUE,
showWarnings = FALSE
)

dir.create(
file.path(output_path,"plots"),
file.path(output_path, "plots"),
recursive = TRUE,
showWarnings = FALSE
)

# amend path to the set input path
sites <- df |>
mutate(
data_path = file.path(input_path,"flux_data/")
data_path = file.path(input_path, "flux_data/")
)

#---- FluxnetLSM reprocessing routine ----
Expand Down
Loading

0 comments on commit 75fbc10

Please sign in to comment.