From 2dcf030a41d14f1ce739981cc9d0f4af9beed646 Mon Sep 17 00:00:00 2001 From: mikejohnson51 Date: Thu, 31 Oct 2024 10:37:39 -0600 Subject: [PATCH] pkg cleanup --- .lintr | 2 - DESCRIPTION | 4 - DISCLAIMER.md | 5 - NAMESPACE | 2 - NEWS.md | 26 ---- R/hydrofab.R | 4 - README.Rmd | 2 +- inst/CITATION | 10 +- ..._update_reference_and_refactored_fabrics.R | 48 -------- runners/02_update_hydrolocations.R | 111 ------------------ runners/03_aggregate.R | 51 -------- runners/04_global_id.R | 35 ------ runners/uniform_aggregation_runner.R | 92 --------------- 13 files changed, 6 insertions(+), 386 deletions(-) delete mode 100644 .lintr delete mode 100644 DISCLAIMER.md delete mode 100644 NEWS.md delete mode 100644 runners/01_update_reference_and_refactored_fabrics.R delete mode 100644 runners/02_update_hydrolocations.R delete mode 100644 runners/03_aggregate.R delete mode 100644 runners/04_global_id.R delete mode 100644 runners/uniform_aggregation_runner.R diff --git a/.lintr b/.lintr deleted file mode 100644 index deacbb2c..00000000 --- a/.lintr +++ /dev/null @@ -1,2 +0,0 @@ -linters: with_defaults(line_length_linter(120), camel_case_linter = NULL, cyclocomp_linter(25)) -exclusions: list("tests/testthat/data/prep_test_data.R") diff --git a/DESCRIPTION b/DESCRIPTION index 6b0a98f8..5f3ba6e3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,8 +15,6 @@ BugReports: https://github.com/mikejohnson51/hydrofab/issues Depends: R (>= 3.5.0) Imports: - arrow, - climateR, data.table, dplyr, glue, @@ -41,8 +39,6 @@ Imports: Suggests: testthat, knitr -Remotes: - mikejohnson51/climateR License: CC0 Encoding: UTF-8 LazyData: true diff --git a/DISCLAIMER.md b/DISCLAIMER.md deleted file mode 100644 index 16cd49da..00000000 --- a/DISCLAIMER.md +++ /dev/null @@ -1,5 +0,0 @@ -Disclaimer -========== - -These data are preliminary or provisional and are subject to revision. They are being provided to meet the need for timely best science. The data have not received final approval by the National Oceanic and Atmospheric Administration (NOAA) or the U.S. Geological Survey (USGS) and are provided on the condition that neither NOAA, the USGS, nor the U.S. Government shall be held liable for any damages resulting from the authorized or unauthorized use of the data. - diff --git a/NAMESPACE b/NAMESPACE index 5ea6c28a..e118998e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -59,8 +59,6 @@ export(union_polygons) export(unpack_set) export(update_network_identifiers) export(write_hydrofabric) -importFrom(arrow,open_dataset) -importFrom(climateR,dap) importFrom(data.table,rbindlist) importFrom(dplyr,"%>%") importFrom(dplyr,`%>%`) diff --git a/NEWS.md b/NEWS.md deleted file mode 100644 index 6fb1bd5e..00000000 --- a/NEWS.md +++ /dev/null @@ -1,26 +0,0 @@ -August 15, 2022 - -Merging `hyRefactor` with `hyAggregate` to create `hydrofab` -================================================================================= - - Change `union_linestrings_geos` to `union_linestrings` - remove `rgeos` dependency - - Change `union_polygons_geos` to `union_polygons` - remove `rgeos` dependency - - Change `aggregate_network` to `aggregate_network_to_outlets` - - Change `aggregate_catchments` to `aggregate_to_outlets` - - Change `add_length` to `add_lengthm` to avoid conflicts with `add_lengthkm` - - - remove onAttach message - - - add `get_hydrofabric`, `read_hydrofabric`, and `write_hydrofabric` - - add `prepare_network` - - add `refactor` that wraps refactoring workflow - - add `aggregate_along_mainstem` - - add `collapse_headwaters` - - add `aggregate_to_distribution` - - add `add_nonnetwork_divides` - - add `poi_to_outlet` - - add `add_mapped_pois` - - add `generate_lookup_table` - - # TODO: add subsetting utilites. - - - use I/O methods in all existing `hyRefactor` code diff --git a/R/hydrofab.R b/R/hydrofab.R index 4a5df9a3..3a2c52a3 100644 --- a/R/hydrofab.R +++ b/R/hydrofab.R @@ -1,7 +1,3 @@ -#' @importFrom arrow open_dataset -#' -#' @importFrom climateR dap -#' #' @importFrom dplyr across add_count bind_cols bind_rows collect case_when contains #' @importFrom dplyr distinct everything filter group_by left_join mutate n rename #' @importFrom dplyr right_join select slice_max slice_min summarize tbl ungroup `%>%` diff --git a/README.Rmd b/README.Rmd index bd875563..f8bde889 100644 --- a/README.Rmd +++ b/README.Rmd @@ -18,7 +18,7 @@ knitr::opts_chunk$set( [![R CMD Check](https://github.com/mikejohnson51/hydrofab/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/mikejohnson51/hydrofab/actions/workflows/R-CMD-check.yml) -[![Dependencies](https://img.shields.io/badge/dependencies-21/80-red?style=flat)](#) +[![Dependencies](https://img.shields.io/badge/dependencies-18/71-red?style=flat)](#) The goal of `hydrofab` is to provide consistent hydrologic and hydraulic network manipulation tool chains to achieve model application ready datasets from a consistent reference fabric. Some of these are being built at [ngen.hydrofab](https://github.com/mikejohnson51/ngen.hydrofab) and [gfv2.0](https://code.usgs.gov/wma/nhgf/gfv2.0) diff --git a/inst/CITATION b/inst/CITATION index ca54d74a..814ebb8d 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -6,11 +6,11 @@ bibentry(bibtype = "Manual", as.person("David Blodgett") ), title = "hydrofab: Hydrologic Network Refactoring Tools Based on HY_Features", - publisher = "U.S. Geological Survey", - address="Reston, VA", - version = "0.5.0", - institution = "U.S. Geological Survey", - year = 2022, + publisher = "NOAA", + address="", + version = "0.5.3", + institution = "U.S. NOAA", + year = 2024, url = "https://github.com/NOAA-OWP/hydrofab", textVersion = "Johnson, J.M., Blodgett, D., 2022, hydrofab: Hydrologic Network Refactoring Tools Based on HY_Features, https://github.com/NOAA-OWP/hydrofab" ) diff --git a/runners/01_update_reference_and_refactored_fabrics.R b/runners/01_update_reference_and_refactored_fabrics.R deleted file mode 100644 index 7e60b1eb..00000000 --- a/runners/01_update_reference_and_refactored_fabrics.R +++ /dev/null @@ -1,48 +0,0 @@ -pacman::p_load(hydrofabric, glue) -devtools::load_all() - -vpus = vpu_boundaries$VPUID[1:21] - -base = '/Volumes/MyBook/nextgen' - -ref = glue("{base}/reference") -refac = glue("{base}/refactored") - -ow = TRUE - -dir.create(ref, showWarnings = FALSE); dir.create(refac, showWarnings = FALSE) - -# Refreshed last on: - # 06-27-2023 - # ... - # ... - # ... - -for(i in 15:length(vpus)){ - - source('secret/sb_tools.R') - - get_hydrofabric(VPU = vpus[i], - type = "reference", - dir = ref, - overwrite = ow) - - get_hydrofabric(VPU = vpus[i], - type = "refactor", - dir = refac, - overwrite = ow) - - message(vpus[i]) -} - -library(dplyr) -sf::sf_use_s2(FALSE) - -tmp = tempfile(fileext = ".geojson") -httr::GET("https://earth-info.nga.mil/php/download.php?file=hydrobasins_level2", httr::write_disk(tmp)) - -xx = sf::read_sf('/Users/mjohnson/Downloads/hydrobasins_level2.geojson') - -xx2 = st_make_valid(ms_simplify(xx)) - - diff --git a/runners/02_update_hydrolocations.R b/runners/02_update_hydrolocations.R deleted file mode 100644 index 2fe4a4e5..00000000 --- a/runners/02_update_hydrolocations.R +++ /dev/null @@ -1,111 +0,0 @@ -# fs::file_copy('/Users/mjohnson/Library/Application Support/org.R-project.R/R/ngen.hydrofab/RouteLink_nwm_v2_2_3.fst', -# glue("{base}/RouteLink_nwm_v2_2_3.fst")) -# -# arrow::write_parquet(tt, glue("{base}/RouteLink_nwm_v2_2_3.parquet")) - -pacman::p_load(hydrofabric, glue, arrow) -devtools::load_all() - -vpus = vpu_boundaries$VPUID[1:21] - -base = '/Volumes/MyBook/nextgen' -hldir = glue("{base}/hydrolocations") -dir.create(hldir) - -# Extract and Build POIs -------------------------------------------------- - -for(i in 1:length(vpus)){ - - VPU = vpus[i] - - refactored_gpkg = get_hydrofabric(VPU = VPU, - type = "refactor", - dir = glue("{base}/refactored"), - overwrite = FALSE) - - fl = read_hydrofabric(refactored_gpkg, "flowlines")[[1]] %>% - st_transform(4326) - - type = c('HUC12', 'Gages', 'TE', 'NID', "WBOut") - poi_layer = grep("POIs_*", st_layers(refactored_gpkg)$name, value = TRUE) - - # Community POIs ---- - - hl = read_sf(refactored_gpkg, poi_layer) %>% - st_drop_geometry() %>% - select(hy_id = ID, paste0("Type_", type)) %>% - mutate_at(vars(matches("Type_")), as.character) %>% - pivot_longer(-hy_id) %>% - filter(!is.na(value)) %>% - mutate(hl_reference = gsub("Type_", "", name), hl_link = as.character(value)) %>% - select(hy_id, hl_reference, hl_link) %>% - tidyr::separate_longer_delim(hl_link, ",") - - - # Coastal Model ---- - - pts = read.csv(glue('{base}/GAGE_SUMMARY.csv')) %>% - select(hl_link = SITE_NO, lat = LAT_NHD, lon = LON_NHD) %>% - st_as_sf(coords = c("lon", "lat"), crs = 4326) %>% - mutate(hl_reference = "CoastalGage") %>% - mutate(hl_link = as.character(hl_link)) %>% - rename_geometry("geometry") - - pts$hy_id = fl$ID[st_nearest_feature(pts, fl)] - - # RouteLink ---- - - hs = get_vaa("hydroseq") - - lu = read_sf(refactored_gpkg, "lookup_table") %>% - select(comid = member_COMID, hy_id = reconciled_ID) %>% - mutate(comid = as.numeric(comid)) - - topo = st_drop_geometry(fl) %>% - select(hy_id = ID, toID) - - topo2 = left_join(topo, select(topo, hy_id = toID, fromID = hy_id), by = "hy_id") - - rl = read_parquet(glue("{base}/RouteLink_nwm_v2_2_3.parquet")) %>% - filter(!is.na(NHDWaterbodyComID)) %>% - select(comid, to, NHDWaterbodyComID) %>% - left_join(hs) %>% - group_by(NHDWaterbodyComID, by = "comid") %>% - left_join(lu, by = "comid") %>% - left_join(topo2, by = "hy_id", relationship = "many-to-many") %>% - mutate(WBIn = !hy_id %in% toID, - WBOut = hydroseq == min(hydroseq)) %>% - ungroup() %>% - filter(complete.cases(.)) - - #TODO: need to actually add these pre-refactor - # - # WBIn = filter(rl, WBIn) %>% - # select(hy_id = fromID, hl_link = NHDWaterbodyComID) %>% - # mutate(hl_reference = "RL_WBIn", hl_link = as.character(hl_link)) %>% - # distinct() - # - WBOut = filter(rl, WBOut) %>% - select(hy_id, hl_link = NHDWaterbodyComID) %>% - mutate(hl_reference = "RL_WBOut", - hl_link = as.character(hl_link)) %>% - distinct() - - # Merge and Build ---- - - tmp = bind_rows(hl, st_drop_geometry(pts), WBOut) %>% - group_by(hy_id) %>% - mutate(hl_id = paste0(VPU, cur_group_id())) %>% - ungroup() - - sub = filter(fl, ID %in% tmp$hy_id) - - outlets = get_node(sub) - outlets$hy_id = sub$ID - - hydrolocations = full_join(tmp, outlets, by = "hy_id") %>% - mutate(hl_position = "outflow") - - write_sf(hydrolocations, glue("{hldir}/hl_{VPU}.gpkg")) - -} diff --git a/runners/03_aggregate.R b/runners/03_aggregate.R deleted file mode 100644 index 4961e21d..00000000 --- a/runners/03_aggregate.R +++ /dev/null @@ -1,51 +0,0 @@ -pacman::p_load(hydrofabric) -devtools::load_all() - -vpus = vpu_boundaries$VPUID[1:21] - -base = '/Volumes/MyBook/nextgen' - -## TASK 1: build out uniform catchment distribution ---- -process = data.frame( - vpus = vpus, - outfiles = glue("{base}/uniform/uniform_{vpus}.gpkg"), - global = glue("{base}/global_uniform/uniform_{vpus}.gpkg") -) - -dir.create(dirname(process$outfiles[1]), showWarnings = FALSE) -dir.create(dirname(process$global[1]), showWarnings = FALSE) - -cw = read.csv(glue('{base}/CrosswalkTable_NHDplus_HU12.csv')) %>% - select(id = FEATUREID, huc12 = HUC_12) - -for (i in 1:nrow(process)) { - - VPU = process$vpus[i] - - refactored_gpkg = get_hydrofabric(VPU = VPU, - type = "refactor", - dir = glue("{base}/refactored")) - - reference_gpkg = get_hydrofabric(VPU = VPU, - type = "reference", - dir = glue("{base}/reference")) - - hl = read_sf(glue('{base}/hydrolocations/hl_{VPU}.gpkg')) - - gpkg = aggregate_to_distribution( - gpkg = refactored_gpkg, - vpu = VPU, - divide = 'refactored_divides', - outfile = process$outfiles[i], - hydrolocations = hl, - overwrite = TRUE - ) - - gpkg = add_nonnetwork_divides(gpkg, - huc12 = cw, - reference_gpkg = reference_gpkg) - -} - - - diff --git a/runners/04_global_id.R b/runners/04_global_id.R deleted file mode 100644 index f8370fc2..00000000 --- a/runners/04_global_id.R +++ /dev/null @@ -1,35 +0,0 @@ -## TASK 2: Assign Globally Unique Identifiers ---- - -pacman::p_load(hydrofabric) -devtools::load_all() - -vpus = vpu_boundaries$VPUID[1:21] - -base = '/Volumes/MyBook/nextgen' - -## TASK 1: build out uniform catchment distribution ---- -process = data.frame( - vpus = vpus, - outfiles = glue("{base}/uniform/uniform_{vpus}.gpkg"), - global = glue("{base}/global_uniform/uniform_{vpus}.gpkg") -) - -unlink(process$global) - -gs_file = 'https://code.usgs.gov/wma/nhgf/reference-hydrofabric/-/raw/04cd22f6b5f3f53d10c0b83b85a21d2387dfb6aa/workspace/cache/rpu_vpu_out.csv' - -modifications = read.csv(gs_file) %>% - filter(VPUID != toVPUID) %>% - rename(from = COMID, to = toCOMID) - -meta = assign_global_identifiers(gpkgs = process$outfiles, - outfiles = process$global, - modifications = modifications) - -for(i in 1:nrow(process)){ - try(append_style(process$global[i], layer_names = c("flowpaths", "divides", "hydrolocations")), silent = TRUE) -} - -arrow::read_parquet('/Users/mjohnson/Downloads/ml_exports.parquet') - - diff --git a/runners/uniform_aggregation_runner.R b/runners/uniform_aggregation_runner.R deleted file mode 100644 index b7e42b85..00000000 --- a/runners/uniform_aggregation_runner.R +++ /dev/null @@ -1,92 +0,0 @@ -pacman::p_load(hydrofabric, arrow) -devtools::load_all() - -vpus <- c("01", "08", "10L", - "15", "02", "04", - "05", "06", "07", - "09", "03S", "03W", - "03N", "10U", "11", - "12", "13", "14", - "16", "17", "18") - -base = '/Volumes/Transcend/ngen/CONUS-hydrofabric' -overwrite = TRUE -cache = FALSE - -## TASK 1: build out uniform catchment distribution - -process = data.frame(vpus = vpus, - outfiles = glue("{base}/03_uniform/uniform_{vpus}.gpkg"), - global = glue("{base}/04_global_uniform/uniform_{vpus}.gpkg")) - -dir.create(dirname(process$outfiles[1]), showWarnings = FALSE) -dir.create(dirname(process$global[1]), showWarnings = FALSE) - -cw = readr::read_csv('/Users/mjohnson/Downloads/CrosswalkTable_NHDplus_HU12.csv') %>% - select(id = FEATUREID, huc12 = HUC_12) - -unlink(process$outfiles) - -for(i in 1:nrow(process)){ - - VPU = process$vpus[i] - - refactored_gpkg = get_hydrofabric(VPU = VPU, - type = "refactor", - dir = glue("{base}/02_refactored"), - overwrite = FALSE) - - reference_gpkg = get_hydrofabric(VPU = VPU, - type = "reference", - dir = glue("{base}/01_reference"), - overwrite = FALSE) - - hl = hl_to_outlet(gpkg = refactored_gpkg, verbose = FALSE) %>% - mutate(hl_position = "outflow") - - gpkg = aggregate_to_distribution( - gpkg = refactored_gpkg, - vpu = process$vpus[i], - divide = 'refactored_divides', - outfile = process$outfiles[i], - hydrolocations = hl, - overwrite = TRUE, - log = TRUE, - cache = FALSE - ) - - gpkg = add_nonnetwork_divides(gpkg, - huc12 = cw, - reference_gpkg = reference_gpkg) - -} - - - -## TASK 2: Assign Globally Unique Identifiers - -unlink(process$global) - -gs_file = 'https://code.usgs.gov/wma/nhgf/reference-hydrofabric/-/raw/04cd22f6b5f3f53d10c0b83b85a21d2387dfb6aa/workspace/cache/rpu_vpu_out.csv' - -modifications = read.csv(gs_file) %>% - filter(VPUID != toVPUID) %>% - rename(from = COMID, to = toCOMID) - -meta = assign_global_identifiers(gpkgs = process$outfiles, - outfiles = process$global, - modifications = modifications) - - -for(i in 1:nrow(process)){ - try(append_style(process$global[i], layer_names = c("flowpaths", "divides", "hydrolocations")), silent = TRUE) - } - - -## TASK 3: Upload to ScienceBase - -# for(i in 1:length(gpkgs)){ -# sbtools::item_append_files(sb_id("uniform"), gpkgs[i]) -# message(basename(gpkgs[i])) -# } -