Skip to content

Commit

Permalink
code cleanup: linter suggests
Browse files Browse the repository at this point in the history
  • Loading branch information
RobLBaker committed Dec 18, 2023
1 parent 4d5a637 commit ef4035e
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 77 deletions.
81 changes: 50 additions & 31 deletions R/order_cols.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,35 +17,54 @@
#' \dontrun{
#' order_cols(df)
#' }
order_cols <- function(df){

suggested <- c('eventDate', 'eventDate_flag', 'scientificName', 'scientificName_flag', 'taxonRank', 'verbatimIdentification', 'vernacularName', 'namePublishedIn', 'recordedBy', 'individualCount', 'decimalLongitude', 'decimalLatitude', 'coordinate_flag', 'geodeticDatum', 'verbatimCoordinates', 'verbatimCoordinateSystem', 'verbatimSRS', 'coordinateUncertaintyInMeters')

required <- c('locality', 'type', 'basisOfRecord')

allofem <- c('locality', 'type', 'basisOfRecord', 'eventDate', 'eventDate_flag', 'scientificName', 'scientificName_flag', 'taxonRank', 'verbatimIdentification', 'vernacularName', 'namePublishedIn', 'custom_TaxonomicNotes', 'recordedBy', 'individualCount', 'decimalLongitude', 'decimalLatitude', 'coordinate_flag', 'geodeticDatum', 'verbatimCoordinates', 'verbatimCoordinateSystem', 'verbatimSRS', 'coordinateUncertaintyInMeters')

print(lapply(required, function(x) ifelse(x %in% names(df), paste0("Looking great! The required field \'", x, "\' exists within your data"), paste0("Please include the required field \'", x, "\' in your dataset"))))

print(lapply(suggested, function(x) ifelse(x %in% names(df), paste0("The field \'", x, "\' is present"), paste0("The suggested field \'", x, "\' is NOT present. If data for this field exists, please include it in your dataset"))))


df <- list(df)

df <- lapply(df, function(x) data.table::setcolorder(x, intersect(allofem, names(x))))

df <- as.data.frame(df)

customs <- df[, stringr::str_detect(names(df), 'custom_') == TRUE]

df <- df[, stringr::str_detect(names(df), 'custom_') == FALSE]

sensitives <- df %>% dplyr::select(any_of(c("informationWithheld", "dataGeneralizations", "footprintWKT")))

df <- df %>% dplyr::select(-dplyr::any_of(c("informationWithheld", "dataGeneralizations", "footprintWKT")))

df <- cbind(df, customs, sensitives)

df <- df %>% dplyr::relocate(any_of("custom_TaxonomicNotes"), .after = "namePublishedIn")

order_cols <- function(df) {

suggested <- c("eventDate", "eventDate_flag", "scientificName",
"scientificName_flag", "taxonRank", "verbatimIdentification",
"vernacularName", "namePublishedIn", "recordedBy",
"individualCount", "decimalLongitude", "decimalLatitude",
"coordinate_flag", "geodeticDatum", "verbatimCoordinates",
"verbatimCoordinateSystem", "verbatimSRS",
"coordinateUncertaintyInMeters")

required <- c("locality", "type", "basisOfRecord")

allofem <- c("locality", "type", "basisOfRecord", "eventDate",
"eventDate_flag", "scientificName", "scientificName_flag",
"taxonRank", "verbatimIdentification", "vernacularName",
"namePublishedIn", "custom_TaxonomicNotes", "recordedBy",
"individualCount", "decimalLongitude", "decimalLatitude",
"coordinate_flag", "geodeticDatum", "verbatimCoordinates",
"verbatimCoordinateSystem", "verbatimSRS",
"coordinateUncertaintyInMeters")

print(lapply(required,
function(x) ifelse(x %in% names(df),
paste0("Looking great! The required field \'",
x, "\' exists within your data"),
paste0("Please include the required field \'",
x, "\' in your dataset"))))

print(lapply(suggested,
function(x) ifelse(x %in% names(df),
paste0("The field \'", x, "\' is present"),
paste0("The suggested field \'", x,
"\' is NOT present. If data for this field exists, please include it in your dataset"))))

df <- list(df)
df <- lapply(df, function(x) data.table::setcolorder(x,
intersect(allofem,
names(x))))
df <- as.data.frame(df)
customs <- df[, stringr::str_detect(names(df), "custom_") == TRUE]
df <- df[, stringr::str_detect(names(df), "custom_") == FALSE]
sensitives <- df %>% dplyr::select(any_of(c("informationWithheld",
"dataGeneralizations",
"footprintWKT")))
df <- df %>% dplyr::select(-dplyr::any_of(c("informationWithheld",
"dataGeneralizations",
"footprintWKT")))
df <- cbind(df, customs, sensitives)
df <- df %>% dplyr::relocate(any_of("custom_TaxonomicNotes"),
.after = "namePublishedIn")
}
74 changes: 28 additions & 46 deletions R/summarize_qc_flags.R
Original file line number Diff line number Diff line change
Expand Up @@ -276,16 +276,16 @@ get_dc_flags <- function(directory = here::here()) {
# Assessment codes)
A_flag <- suppressWarnings(sum(stringr::str_count(flags_only[j],
"\\bA"),
na.rm = TRUE))
na.rm = TRUE))
AE_flag <- suppressWarnings(sum(stringr::str_count(flags_only[j],
"\\bAE"),
na.rm = TRUE))
na.rm = TRUE))
R_flag <- suppressWarnings(sum(stringr::str_count(flags_only[j],
"\\bR"),
na.rm = TRUE))
na.rm = TRUE))
P_flag <- suppressWarnings(sum(stringr::str_count(flags_only[j],
"\\bP"),
na.rm = TRUE))
na.rm = TRUE))
# get cell count in file, exclude NAs and flags:
Cell_count <- sum(!is.na(flags_only[j]))

Expand Down Expand Up @@ -430,9 +430,9 @@ get_custom_flags <- function(directory = here::here(),
AE_flag <- 0
R_flag <- 0
P_flag <- 0
RRU <- A_flag/(nrow(cust_cols[j]))
RRU <- A_flag / (nrow(cust_cols[j]))
Cell_count <- A_flag
percent_missing <- (sum(is.na(cust_cols[j])))/nrow(cust_cols[j])
percent_missing <- (sum(is.na(cust_cols[j]))) / nrow(cust_cols[j])

filename <- names(dfList)[i]
column <- colnames(cust_cols)[j]
Expand Down Expand Up @@ -486,7 +486,7 @@ get_custom_flags <- function(directory = here::here(),
# get cell count in file, exclude NAs and flags:
Cell_count <- sum(!is.na(flags_only[j]))

percent_missing <- (sum(is.na(flags_only[j])))/nrow(flags_only[j])
percent_missing <- (sum(is.na(flags_only[j]))) / nrow(flags_only[j])

RRU <- (A_flag + AE_flag) / nrow(flags_only[j])

Expand Down Expand Up @@ -525,7 +525,7 @@ get_custom_flags <- function(directory = here::here(),
R_flag <- NA
P_flag <- NA
Cell_count <- NA
percent_missing <-NA
percent_missing <- NA
RRU <- NA

flags <- data.frame(
Expand All @@ -541,60 +541,42 @@ get_custom_flags <- function(directory = here::here(),
#generate summary statistics for each column:
data_file_summaries <- cust_flags %>%
dplyr::group_by(filename) %>%
dplyr::summarize(A_total=sum(A_flag),
#A_mean=mean(A_flag),
#A_sd=stats::sd(A_flag),
AE_total=sum(AE_flag),
#AE_mean=mean(AE_flag),
#AE_sd=stats::sd(AE_flag),
P_total=sum(P_flag),
#P_mean=mean(P_flag),
#P_sd=stats::sd(P_flag),
R_total=sum(R_flag),
#R_mean=mean(R_flag),
#R_sd=stats::sd(R_flag),
missing_mean_percent=mean(percent_missing),
#missing_sd=stats::sd(percent_missing),
RRU_mean=mean(RRU),
RRU_sd=stats::sd(RRU))
dplyr::summarize(A_total = sum(A_flag),
AE_total = sum(AE_flag),
P_total = sum(P_flag),
R_total = sum(R_flag),
missing_mean_percent = mean(percent_missing),
RRU_mean = mean(RRU),
RRU_sd = stats::sd(RRU))

#generate data package level summaries
data_package_summary<- cust_flags %>%
plyr::summarize(A_total=sum(A_flag, na.rm=TRUE),
#A_mean=mean(A_flag, na.rm = TRUE),
#A_sd=sd(A_flag, na.rm = TRUE),
AE_total=sum(AE_flag, na.rm=TRUE),
#AE_mean=mean(AE_flag, na.rm=TRUE),
#AE_sd=sd(AE_flag, na.rm=TRUE),
P_total=sum(P_flag, na.rm=TRUE),
#P_mean=mean(P_flag, na.rm=TRUE),
#P_sd=sd(P_flag, na.rm=TRUE),
R_total=sum(R_flag, na.rm=TRUE),
#R_mean=mean(R_flag, na.rm=TRUE),
#R_sd=sd(R_flag, na.rm=TRUE),
missing_mean_percent=mean(percent_missing, na.rm=TRUE),
#missing_sd=sd(percent_missing, na.rm=TRUE),
RRU_mean=mean(RRU, na.rm=TRUE),
RRU_sd=sd(RRU, na.rm=TRUE))
data_package_summary <- cust_flags %>%
plyr::summarize(A_total = sum(A_flag, na.rm = TRUE),
AE_total = sum(AE_flag, na.rm = TRUE),
P_total = sum(P_flag, na.rm = TRUE),
R_total = sum(R_flag, na.rm = TRUE),
missing_mean_percent = mean(percent_missing, na.rm = TRUE),
RRU_mean = mean(RRU, na.rm = TRUE),
RRU_sd = sd(RRU, na.rm = TRUE))

qc_summary <- list(cust_flags,
data_file_summaries,
data_package_summary)

names(qc_summary)<-c("Column Level QC Summaries",
names(qc_summary) <- c("Column Level QC Summaries",
"Data File Level QC Summaries",
"Data Package Level QC Summaries")

if(output == "package"){
if (output == "package") {
return(qc_summary[[3]])
}
if(output == "files"){
if (output == "files") {
return(qc_summary[[2]])
}
if(output == "columns"){
if (output == "columns") {
return(qc_summary[[1]])
}
if(output == "all"){
if (output == "all") {
return(qc_summary)
}
}

0 comments on commit ef4035e

Please sign in to comment.