DOI-USGS · lindsayplatt · Dec 21, 2021 · Dec 21, 2021 · Dec 21, 2021 · Dec 21, 2021
diff --git a/.gitignore b/.gitignore
@@ -46,6 +46,12 @@ vignettes/*.pdf
 .remake
 build/
 
+# Track indicator files for what files are one S3
+gw-conditions/*
+!gw-conditions/*.ind
+visualizations/data/*
+!visualizations/data/*.ind
+
 .DS_Store
 node_modules
 /dist

diff --git a/2_process.yml b/2_process.yml
@@ -21,7 +21,7 @@ targets:
       - 2_process/out/gw_daily_quantiles.csv
       - 2_process/out/gw_data_anomalies.csv
       - gw_sites_sf
-      - public/gw-conditions-time-labels.csv
+      - visualizations/data/gw-conditions-time-labels.csv.ind
 
   # Sites that use "depth below" as their gw level need to be inversed. In the 
   # current implementation, this means any site that used pcode == '72019'
@@ -59,7 +59,7 @@ targets:
     command: read_csv("2_process/out/gw_data_anomalies.csv", col_types = I('cDnnc'))
   gw_time:
     command: generate_time(gw_anomaly_data)
-  public/gw-conditions-time-labels.csv:
+  2_process/out/gw-conditions-time-labels.csv:
     command: generate_months(target_name, gw_time)
   gw_anomaly_data_w_paths:
     command: add_paths_to_data(gw_anomaly_data)
@@ -72,3 +72,8 @@ targets:
       out_file = target_name,
       in_zip = '1_fetch/out/nws_states.zip',
       tmp_dir = I('2_process/tmp'))
+
+  ##-- Now push the data needed by Vue to S3 --##
+
+  visualizations/data/gw-conditions-time-labels.csv.ind:
+    command: s3_put(target_name, '2_process/out/gw-conditions-time-labels.csv', config_file = I('lib/cfg/s3_config_viz.yml'))
diff --git a/3_visualize.yml b/3_visualize.yml
@@ -20,34 +20,51 @@ targets:
 
   3_visualize:
     depends:
-      - src/assets/anomaly_peaks.svg 
-      - public/gw-conditions-site-coords.csv ## this is in aws as gw-conditions-sites.csv
-      - public/gw-conditions-wy20.csv ## in aws as gw-conditions-wy20.csv
-      - public/gw-conditions-daily-proportions.csv ## in aws as gw-conditions-daily-count.csv
-
-  # exports that are leveraged by vue
-  # TODO: send to s3
-  src/assets/anomaly_peaks.svg:
+      - visualizations/data/gw-conditions-peaks-map.svg.ind
+      - visualizations/data/gw-conditions-site-coords.csv.ind
+      - visualizations/data/gw-conditions-wy20.csv.ind
+      - visualizations/data/gw-conditions-daily-proportions.csv.ind
+      - src/assets/gw-conditions-peaks-map.svg # Currently a temporary fix!
+
+  3_visualize/out/gw-conditions-peaks-map.svg:
     command: build_peaks_svg(
       target_name, 
       svg_width = svg_width, 
       svg_height = svg_height)
     depends:
      - '2_process/out/nws_states.shp'
 
-  public/gw-conditions-wy20.csv:
+  3_visualize/out/gw-conditions-wy20.csv:
     command: gwl_to_peak(
       target_name, 
       gw_anomaly_data_w_colors)
 
-  public/gw-conditions-site-coords.csv:
+  3_visualize/out/gw-conditions-site-coords.csv:
     command: get_site_coords(
       target_name,
       sites_sf = gw_sites_sf_shifted)
 
-  public/gw-conditions-daily-proportions.csv:
+  3_visualize/out/gw-conditions-daily-proportions.csv:
     command: site_prop_timeseries(target_name, 
       gw_anomaly_data_w_colors)
+
+  ##-- Now push the data needed by Vue to S3 --##
 
-
+  visualizations/data/gw-conditions-peaks-map.svg.ind:
+    command: s3_put(target_name, '3_visualize/out/gw-conditions-peaks-map.svg', config_file = I('lib/cfg/s3_config_viz.yml'))
+
+  visualizations/data/gw-conditions-wy20.csv.ind:
+    command: s3_put(target_name, '3_visualize/out/gw-conditions-wy20.csv', config_file = I('lib/cfg/s3_config_viz.yml'))
+
+  visualizations/data/gw-conditions-site-coords.csv.ind:
+    command: s3_put(target_name, '3_visualize/out/gw-conditions-site-coords.csv', config_file = I('lib/cfg/s3_config_viz.yml'))
+
+  visualizations/data/gw-conditions-daily-proportions.csv.ind:
+    command: s3_put(target_name, '3_visualize/out/gw-conditions-daily-proportions.csv', config_file = I('lib/cfg/s3_config_viz.yml'))
 
+  # TEMPORARY FIX #
+  # Currently, we can't inject SVG directly from a URL. So,
+  # moving the SVG file to `assets` for now. Delete when we can 
+  # do this from S3 (and also delete the file from assets/)
+  src/assets/gw-conditions-peaks-map.svg:
+    command: file.copy(to = target_name, from = '3_visualize/out/gw-conditions-peaks-map.svg', overwrite = TRUE)
diff --git a/3_visualize/src/svg_utils_vue.R b/3_visualize/src/svg_utils_vue.R
@@ -5,8 +5,14 @@ gwl_to_peak <- function(file_out, gw_anomaly_data_w_colors){
            path_y = round(50-daily_quant, digits = 0)) %>%
     #filter(!is.na(quant)) %>%
     mutate(site_no = paste0('gwl_', site_no)) %>%
-    select(site_no, day_seq, daily_quant, path_y) %>%
+    select(site_no, day_seq, daily_quant, path_y) %>% 
+
+    # Convert to `data.table` for fast reshaping, then back 
+    # to tibble. Note that `reshape2::dcast()` is deprecated.
+    setDT() %>% 
     dcast(day_seq~site_no, value.var = 'path_y') %>%
+    as_tibble() %>% 
+
     arrange(day_seq) %>%
     write_csv(file_out)
 }

diff --git a/4_animate.yml b/4_animate.yml
diff --git a/4_animate/src/create_gif.R b/4_animate/src/create_gif.R
diff --git a/4_animate/src/create_video.R b/4_animate/src/create_video.R
diff --git a/README.md b/README.md
@@ -1,27 +1,41 @@
 # gw-conditions
-Similar to gage-conditions-gif but for groundwater!
 
-### DISCLAIMER: 
+A visualization showing groundwater conditions as little peaks above or below their normal values. This visualization is built using an R-based pipeline for data processing, and Vue.js + D3.js to create an animated site. The R-based pipeline (1) calculates daily percentiles for each well based on the historic record at each site, and (2) writes an svg map for the base of the animation. The R-based pipeline also pushes the data up to a public S3 bucket. This pipeline leverages an internal package, `library(scipiper)` to automate the workflow. You do not need to run the pipeline to be able to build the app locally, since the app points to data in the S3 bucket.
 
-THE ANALYSIS IN THIS VIZ REPO (AS IT CURRENTLY STANDS 12/23/2020) IS NOT MEANT TO BE FINAL. JUST USED AS AN EXAMPLE OF HOW TO BUILD A VIDEO-BASED DATAVIZ USING SCIPIPER
+## Build the visualization locally
 
-### Build the historic data
+The data (all CSVs and SVGs) needed for this visualization are already publicly available through `labs.waterdata.usgs.gov/visualizations/data/[FILENAME]`. You should be able open this repo, run the following code chunk, and see the visualization locally (http://localhost:8080/) without needed to build the data pipeline:
 
-The historic data pipeline (`0_historic.yml`) is decoupled from the rest of the pipeline. It will build only when you run `scmake(remake_file = "0_historic.yml")`. Otherwise, the `1_fetch.yml` part of the pipeline will assume the historic data is on S3 ready to use and will download the data using the filepaths described in `0_config.yml`.
+```
+npm install
+npm run serve
+```
+
+## Build the data behind the visualization and push to S3
+
+This step is not needed to build the visualization locally since the data files are available through a public S3 bucket and can be accessed from `labs.waterdata.usgs.gov/visualizations/data/[FILENAME]`. Run this code if you need to update the data or base SVG behind the visualization.
+
+### Generate the historic data
+
+The historic data pipeline (`0_historic.yml`) is decoupled from the rest of the pipeline. It will build only when you run `scmake(remake_file = "0_historic.yml")`. Otherwise, the `1_fetch.yml` part of the pipeline will assume the historic data is on S3 ready to use and will download the data using the filepaths described in `0_config.yml`. The historic data is being stored on the developer VPC in the `vizlab-data` bucket but should be moved for public access later if we do a data release. 
 
-### How to build the viz:
+### Generate the data behind the viz
 
-To build, check the `start_date` and `end_date` in `0_config.yml`. Then, run the following and look in your `4_animate/out` folder for the video.
+If you plan to be able to push the final data to S3, you need to have the appropriate S3 configs prepared. The file `lib/cfg/s3_config_viz.yml` expects you to have credentials labeled `[prod]` in your `~/.aws/credentials` file (rather than `[default]`). Also note that any of the targets that call `scipiper::s3_put()` will require you to be on the USGS Network (VPN). If you want to build the data pipeline but don't need to (or can't) push to S3, you can run `options(scipiper.dry_put = TRUE)` and then the code listed below. This will skip the upload step.
+
+Once your configurations are set up, run the following to generate the data needed by the visualization. 
 
 ```r
 library(scipiper)
 scmake()
 ```
 
-### How to get Climate Response Network data:
+In the end the following targets should have been built successfully (because they are what is pushed to S3 and used by the Vue code):
 
-```r
-library(scipiper)
-scmake("1_fetch/out/gw_crn_data.rds")
-crn_data <- readRDS("1_fetch/out/gw_crn_data.rds")
-```
+- `visualizations/data/gw-conditions-time-labels.csv.ind`
+- `visualizations/data/gw-conditions-peaks-map.svg`
+- `visualizations/data/gw-conditions-wy20.csv.ind`
+- `visualizations/data/gw-conditions-site-coords.csv.ind`
+- `visualizations/data/gw-conditions-daily-proportions.csv.ind`
+
+If you change the `viz_start_date` and `viz_end_date` values in `0_config.yml`, you should also change the name of the `visualizations/data/gw-conditions-wy20.csv.ind` file to reflect your new time period. In the future, we would like to do this automatically, but for now it is a manual step. The Vue code that reads that file would also need to be changed.
diff --git a/gw-conditions/historic_gw_data_filtered.csv.ind b/gw-conditions/historic_gw_data_filtered.csv.ind
@@ -0,0 +1,2 @@
+hash: 991b0a39bf0916d45c3e883dba0ab760
+
diff --git a/gw-conditions/historic_gw_data_unfiltered.csv.ind b/gw-conditions/historic_gw_data_unfiltered.csv.ind
@@ -0,0 +1,2 @@
+hash: 5f41027b53594a6cff5007cfe775811a
+
diff --git a/gw-conditions/historic_gw_quantiles.csv.ind b/gw-conditions/historic_gw_quantiles.csv.ind
@@ -0,0 +1,2 @@
+hash: 159c709725092b140fe0b8f3eb37d845
+
diff --git a/gw-conditions/historic_gw_site_info_filtered.rds.ind b/gw-conditions/historic_gw_site_info_filtered.rds.ind
@@ -0,0 +1,2 @@
+hash: 48114fcc5818e5c82ba45c06f0174308
+
diff --git a/gw-conditions/historic_gw_site_info_unfiltered.rds.ind b/gw-conditions/historic_gw_site_info_unfiltered.rds.ind
@@ -0,0 +1,2 @@
+hash: aab93b170f7889cbcf92f201b4b47165
+
diff --git a/lib/cfg/s3_config_viz.yml b/lib/cfg/s3_config_viz.yml
@@ -0,0 +1,3 @@
+profile: 'prod'
+
+bucket: 'water-visualizations-prod-website'
diff --git a/package-lock.json b/package-lock.json
diff --git a/remake.yml b/remake.yml
@@ -1,19 +1,17 @@
-target_default: 3_visualize
+target_default: build_gw_viz_data
 
 include:
   - 0_config.yml
   - 1_fetch.yml
   - 2_process.yml
   - 3_visualize.yml
-  #- 4_animate.yml
 
 targets:
-  build_viz:
+  build_gw_viz_data:
     depends: 
       - 1_fetch
       - 2_process
       - 3_visualize
-      #- 4_animate
 
 # The historic data used in 1_fetch has been prebuilt by the 0_historic pipeline.
 # 

diff --git a/src/assets/anomaly_peaks.svg b/src/assets/anomaly_peaks.svg
diff --git a/src/assets/gw-conditions-peaks-map.svg b/src/assets/gw-conditions-peaks-map.svg
diff --git a/src/components/GWL.vue b/src/components/GWL.vue
@@ -34,7 +34,7 @@
 </template>
 <script>
 import * as d3Base from 'd3';
-import GWLmap from "@/assets/anomaly_peaks.svg";
+import GWLmap from "@/assets/gw-conditions-peaks-map.svg";
 import { TimelineMax } from "gsap/all"; 
 
 export default {
@@ -111,7 +111,7 @@ export default {
         // read in data 
         let promises = [
         self.d3.csv(self.publicPath + "quant_peaks.csv",  this.d3.autotype), // used to draw legend shapes - color palette needs to be pulled out
-        self.d3.csv("https://labs.waterdata.usgs.gov/visualizations/data/gw-conditions-wy2020.csv",  this.d3.autotype),
+        self.d3.csv("https://labs.waterdata.usgs.gov/visualizations/data/gw-conditions-wy20.csv",  this.d3.autotype),
         self.d3.csv("https://labs.waterdata.usgs.gov/visualizations/data/gw-conditions-site-coords.csv",  this.d3.autotype), 
         self.d3.csv("https://labs.waterdata.usgs.gov/visualizations/data/gw-conditions-daily-proportions.csv",  this.d3.autotype),
         self.d3.csv("https://labs.waterdata.usgs.gov/visualizations/data/gw-conditions-time-labels.csv",  this.d3.autotype),

diff --git a/visualizations/data/gw-conditions-daily-proportions.csv.ind b/visualizations/data/gw-conditions-daily-proportions.csv.ind
@@ -0,0 +1,2 @@
+hash: fe67227f86088061aa01362b2238aae2
+
diff --git a/visualizations/data/gw-conditions-peaks-map.svg.ind b/visualizations/data/gw-conditions-peaks-map.svg.ind
@@ -0,0 +1,2 @@
+hash: 002b9812891b9ae2a029fdc2d2c27c5b
+
diff --git a/visualizations/data/gw-conditions-site-coords.csv.ind b/visualizations/data/gw-conditions-site-coords.csv.ind
@@ -0,0 +1,2 @@
+hash: 45b4cde69f2a691c0e7e92aceab4f0b2
+
diff --git a/visualizations/data/gw-conditions-time-labels.csv.ind b/visualizations/data/gw-conditions-time-labels.csv.ind
@@ -0,0 +1,2 @@
+hash: 6d325b86933a2852bbbba5e5f7d5ceff
+
diff --git a/visualizations/data/gw-conditions-wy20.csv.ind b/visualizations/data/gw-conditions-wy20.csv.ind
@@ -0,0 +1,2 @@
+hash: af9704ef7f518845ef58553900e4ae4e
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		profile: 'prod'

		bucket: 'water-visualizations-prod-website'