Skip to content

Commit

Permalink
First full code to handle o-o-s
Browse files Browse the repository at this point in the history
  • Loading branch information
bpbond committed Dec 20, 2023
1 parent a139224 commit fb2faae
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 47 deletions.
4 changes: 2 additions & 2 deletions synoptic/L1.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ f <- function(dir_name, dirs_to_process, out_dir) {
# everything can be stacked into a single data frame
dat_raw <- read_csv_group(d,
remove_input_files = params$remove_input_files,
col_types = "cccccTdcccdi")
col_types = "cccccTdcccdii")
# File-based summary
message("\tTotal data: ", nrow(dat_raw), " rows, ", ncol(dat_raw), " columns")
Expand Down Expand Up @@ -102,7 +102,7 @@ f <- function(dir_name, dirs_to_process, out_dir) {
write_plots = params$write_plots)
# Add any OOB flags to the flag database
flg <- dat[!is.na(dat$OOB) & dat$OOB == 1, "ID"]
flg <- dat[!is.na(dat$F_OOB) & dat$F_OOB == 1, "ID"]
if(nrow(flg)) {
flg$Flag_type <- "OOB"
flg$Remark <- basename(dir_name)
Expand Down
10 changes: 5 additions & 5 deletions synoptic/L1_normalize.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ This script

- Performs unit conversion (every research name must have an entry)

- Performs bounds checking (adding flags) for each `research_name` variable
- Performs bounds checking (adding a `F_OOB` flag) for each `research_name` variable

- Integrated out-of-service records into a `F_OOS` flag

- Writes into <year><month><site> folders

Expand Down Expand Up @@ -158,7 +160,6 @@ f <- function(fn, out_dir, design_table) {
# At this point, there should be exactly one match for every loggernet variable
if(nrow(dat) > old_rows) {
browser()
counts <- aggregate(design_link ~ loggernet_variable, data = dat,
FUN = function(x) length(unique(x)))
counts <- counts[counts$design_link > 1,]
Expand Down Expand Up @@ -205,15 +206,14 @@ f <- function(fn, out_dir, design_table) {
message("\tAdding OOB flags")
dat <- merge(dat, bt, by = c("research_name", "units"))
dat$F_OOB <- as.integer(with(dat, value < low_bound | value > high_bound))
smry$`OOB%` <- round(sum(dat$OOB) / nrow(dat) * 100, 1)
smry$`OOB%` <- round(sum(dat$F_OOB) / nrow(dat) * 100, 1)
# ------------- Out-of-service flags
message("\tAdding OOS flags")
x <- separate(dat, design_link, sep = "-",
into = c("What", "Site", "Location", "Sensor"))
browser()
dat$F_OOS <- as.integer(oos(oos_troll, x))
# Remove unneeded columns unless needed for debugging
if(!params$debug) {
Expand Down
3 changes: 2 additions & 1 deletion synoptic/data_TEST/L1_metadata/L1_metadata_columns.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ design_link,Design name that links to experimental unit (character)
research_name,Measurement name (character)
value,Observed value (numeric). The no-data value is '[NA_STRING_L1]'
ID,Observation ID (character)
OOB,"Out of instrumental bounds flag (1=TRUE, 0=FALSE) (logical)"
F_OOB,"Flag: Out of instrumental bounds (1=TRUE, 0=FALSE) (logical)"
F_OOS,"Flag: Out of service according to records (1=TRUE, 0=FALSE) (logical)"
16 changes: 8 additions & 8 deletions synoptic/data_TEST/L1_metadata/L1_metadata_vars.csv
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ Teros,EC,soil_EC_15cm,dS/m,uS/cm,x * 100000,0,20000,NA
Teros,VWC,soil_vwc_30cm,m3/m3,m3/m3,(3.879*10^-4) * x - 0.6956,0,1,calibration equation for mineral soil VWC
Teros,TSOIL,soil_temp_30cm,degC,degC,x * 1,-40,60,NA
Teros,EC,soil_EC_30cm,dS/m,uS/cm,x * 100000,0,20000,NA
AquaTroll,Temperature600,gw_temperature,degC,degC,x * 1,-5,50,"temperature, water"
AquaTroll,Specific_Conductivity600,gw_conductivity,μS/cm,μS/cm,x * 1,0,350000,NA
AquaTroll,Salinity600,gw_salinity,PSU,PSU,x * 1,0,350,salinity
AquaTroll,Water_Density600,gw_density,g/cm3,g/cm3,x * 1,NA,NA,water_density
AquaTroll,pH600,gw_ph,unitless,unitless,x * 1,0,14,"ph, water"
AquaTroll,pH_ORP600,gw_ph_orp,mV,mV,x * 1,-1400,1400,ph_orp
AquaTroll,RDO_concen600,gw_rdo_concentration,mg/L,mg/L,x * 1,0,20,rdo_concentration
AquaTroll,Pressure600,gw_pressure,psi,mbar,x * 68.948,NA,NA,pressure
AquaTroll,Temperature600,gw_temperature,degC,degC,x * 1,-5,50,Water temperature
AquaTroll,Specific_Conductivity600,gw_conductivity,μS/cm,μS/cm,x * 1,0,350000,Water specific conductivity
AquaTroll,Salinity600,gw_salinity,PSU,PSU,x * 1,0,350,Water salinity
AquaTroll,Water_Density600,gw_density,g/cm3,g/cm3,x * 1,0.98,1.05,Water density
AquaTroll,pH600,gw_ph,unitless,unitless,x * 1,0,14,Water pH
AquaTroll,pH_ORP600,gw_ph_orp,mV,mV,x * 1,-1400,1400,Water oxidation-reduction potential
AquaTroll,RDO_concen600,gw_rdo_concentration,mg/L,mg/L,x * 1,0,20,Dissolved oxygen concentration
AquaTroll,Pressure600,gw_pressure,psi,mbar,x * 68.948,-10,910,Vented pressure (corrected for barometric pressure)
ClimateVue,,wx_rain,?,?,x * 1,,,
ClimateVue,,wx_winddir,?,?,x * 1,,,
ClimateVue,,wx_maxws,?,?,x * 1,,,
Expand Down
73 changes: 42 additions & 31 deletions synoptic/out-of-service.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,27 +42,38 @@ prep_troll_oos_table <- function(troll) {
# This function returns a logical vector, of the same length as the data_df
# input, that becomes F_OOS
oos <- function(oos_df, data_df) {
oos_df <- as.data.frame(oos_df)

# Make sure that any 'extra' condition columns (in addition to the
# oos window begin and end) are present in the data d.f.
non_ts_fields <- setdiff(colnames(oos_df), c("oos_begin", "oos_end"))
if(!all(non_ts_fields %in% colnames(data_df))) {
stop("Not all out-of-service condition columns are present in data!")
}
# For speed, compute the min and max up front
min_ts <- min(data_df$TIMESTAMP)
max_ts <- max(data_df$TIMESTAMP)

oos_final <- rep(FALSE, nrow(data_df))

for(i in seq_len(nrow(oos_df))) {
# First check: are any timestamps within the oos window?
oos <- data_df$TIMESTAMP >= oos_df$oos_begin[i] &
data_df$TIMESTAMP <= oos_df$oos_end[i]
if(any(oos)) {
# First quickly check: is there any overlap in timestamps?
timestamp_overlap <- min_ts <= oos_df$oos_end[i] &&
max_ts >= oos_df$oos_begin[i]
message("timestamp_overlap = ", timestamp_overlap)
if(timestamp_overlap) {
oos <- data_df$TIMESTAMP >= oos_df$oos_begin[i] &
data_df$TIMESTAMP <= oos_df$oos_end[i]
# There are timestamp matches, so check other (optional)
# conditions in the oos_df; they must match exactly
# For example, if there's a "Site" entry in oos_df then only
# data_df entries with the same Site qualify to be o.o.s
for(f in non_ts_fields) {
oos <- oos & data_df[,f] == oos_df[,f][i]
matches <- data_df[,f] == oos_df[i,f]
message("f = ", f, " ", oos_df[i,f], ", matches = ", sum(matches))
oos <- oos & matches
}

# The out-of-service flags for this row of the oos_df table
# are OR'd with the overall flags that will be returned below
oos_final <- oos_final | oos
Expand All @@ -76,31 +87,31 @@ oos <- function(oos_df, data_df) {
data_df <- data.frame(TIMESTAMP = 1:3, x = letters[1:3], y = 4:6)

# No other conditions beyond time window
oos_df <- data.frame(oos_begin = 1, oos_end = 1)
stopifnot(oos(oos_df, data_df) == c(TRUE, FALSE, FALSE))
oos_df <- data.frame(oos_begin = 4, oos_end = 5)
stopifnot(oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))
oos_df <- data.frame(oos_begin = 0, oos_end = 2)
stopifnot(oos(oos_df, data_df) == c(TRUE, TRUE, FALSE))
oos_df <- data.frame(oos_begin = 0, oos_end = 3)
stopifnot(oos(oos_df, data_df) == c(TRUE, TRUE, TRUE))
# oos_df <- data.frame(oos_begin = 1, oos_end = 1)
# stopifnot(oos(oos_df, data_df) == c(TRUE, FALSE, FALSE))
# oos_df <- data.frame(oos_begin = 4, oos_end = 5)
# stopifnot(oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))
# oos_df <- data.frame(oos_begin = 0, oos_end = 2)
# stopifnot(oos(oos_df, data_df) == c(TRUE, TRUE, FALSE))
# oos_df <- data.frame(oos_begin = 0, oos_end = 3)
# stopifnot(oos(oos_df, data_df) == c(TRUE, TRUE, TRUE))

# x condition - doesn't match even though timestamp does
oos_df <- data.frame(oos_begin = 1, oos_end = 1, x = "b")
stopifnot(oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))
# x condition - matches and timestamp does
oos_df <- data.frame(oos_begin = 1, oos_end = 1, x = "a")
stopifnot(oos(oos_df, data_df) == c(TRUE, FALSE, FALSE))
# x condition - some match, some don't
oos_df <- data.frame(oos_begin = 1, oos_end = 2, x = "b")
stopifnot(oos(oos_df, data_df) == c(FALSE, TRUE, FALSE))
# x and y condition
oos_df <- data.frame(oos_begin = 1, oos_end = 2, x = "b", y = 5)
stopifnot(oos(oos_df, data_df) == c(FALSE, TRUE, FALSE))
oos_df <- data.frame(oos_begin = 1, oos_end = 2, x = "a", y = 5)
stopifnot(oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))

# Error thrown if condition column(s) not present
oos_df <- data.frame(oos_begin = 1, oos_end = 2, z = 1)
out <- try(oos(oos_df, data_df), silent = TRUE)
stopifnot(class(out) == "try-error")
# oos_df <- data.frame(oos_begin = 1, oos_end = 1, x = "b")
# stopifnot(oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))
# # x condition - matches and timestamp does
# oos_df <- data.frame(oos_begin = 1, oos_end = 1, x = "a")
# stopifnot(oos(oos_df, data_df) == c(TRUE, FALSE, FALSE))
# # x condition - some match, some don't
# oos_df <- data.frame(oos_begin = 1, oos_end = 2, x = "b")
# stopifnot(oos(oos_df, data_df) == c(FALSE, TRUE, FALSE))
# # x and y condition
# oos_df <- data.frame(oos_begin = 1, oos_end = 2, x = "b", y = 5)
# stopifnot(oos(oos_df, data_df) == c(FALSE, TRUE, FALSE))
# oos_df <- data.frame(oos_begin = 1, oos_end = 2, x = "a", y = 5)
# stopifnot(oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))
#
# # Error thrown if condition column(s) not present
# oos_df <- data.frame(oos_begin = 1, oos_end = 2, z = 1)
# out <- try(oos(oos_df, data_df), silent = TRUE)
# stopifnot(class(out) == "try-error")

0 comments on commit fb2faae

Please sign in to comment.