update CONUS404 and dask on HPC details

hytest-org · Aug 22, 2024 · 1025307 · 1025307
1 parent 30dcea7
commit 1025307
Show file tree

Hide file tree

Showing 180 changed files with 74,437 additions and 9,179 deletions.
diff --git a/.buildinfo b/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: abd3ddd4b6ad8bb366467d48a3bc9429
+config: 3d2695535ece7686452ac5e168410457
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/.ipynb_checkpoints/genindex-checkpoint.html b/.ipynb_checkpoints/genindex-checkpoint.html
diff --git a/.ipynb_checkpoints/index-checkpoint.html b/.ipynb_checkpoints/index-checkpoint.html
@@ -0,0 +1 @@
+<meta http-equiv="Refresh" content="0; url=doc/About.html" />
diff --git a/_downloads/18da692517bdcb499ad9638aeab068b2/jupter-start.sh b/_downloads/18da692517bdcb499ad9638aeab068b2/jupter-start.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+## Set options for the SLURM scheduler -- edit as appropriate
+#SBATCH -J jupyternb
+#SBATCH -t 2-00:00:00
+#SBATCH -o %j-jupyternb.out
+#SBATCH -p workq
+#SBATCH -A wbeep
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=40
+
+# We will launch the server on a randomly assigned port number between 8400 and 9400.  This
+# minimizes the chances that we will impact other jupyter servers running at the same time.
+JPORT=`shuf -i 8400-9400 -n 1`
+
+### this will load the centralized HyTEST conda environment.
+module use --append /caldera/projects/usgs/water/impd/hytest/modules
+module load hytest
+
+### If you want to use your own conda environment, remove the above module load statement
+### and include statements here to activate your own environment:
+# export PATH=/path/to/your/conda/bin:$PATH
+# source activate envname
+
+echo
+echo "##########################################################################"
+echo "Run the following ssh command from a new terminal on your desktop"
+echo "ssh -N -L $JPORT:`hostname`:$JPORT -L 8787:`hostname`:8787 $USER@denali.cr.usgs.gov"
+echo "##########################################################################"
+echo
+
+echo
+echo "##########################################################################"
+echo "COPY and paste the 127.0.0.1 URL below into a browser on your desktop"
+echo "##########################################################################"
+echo
+
+srun jupyter lab --ip '*' --no-browser --port $JPORT --notebook-dir $PWD
diff --git a/...262916764465d0218e9d36ed7fb0c28c7037e.png → ...477a3367367b595e6d0ba6710c35747cc28ae.png b/...262916764465d0218e9d36ed7fb0c28c7037e.png → ...477a3367367b595e6d0ba6710c35747cc28ae.png
diff --git a/_images/1cf6e55bd5ada9ce71bb95a003b38135fbd4d570048232ee45b0be3c4e1b44dc.png b/_images/1cf6e55bd5ada9ce71bb95a003b38135fbd4d570048232ee45b0be3c4e1b44dc.png
diff --git a/_images/2a582d441b4c1538c7e150c7332b8a391d247742e8a93dcab1aaf61d6d5330f9.png b/_images/2a582d441b4c1538c7e150c7332b8a391d247742e8a93dcab1aaf61d6d5330f9.png
diff --git a/_images/3176082a918577af17604d4be4f2a8f61b24134eff2c0a1bfd008f55e1fa7067.png b/_images/3176082a918577af17604d4be4f2a8f61b24134eff2c0a1bfd008f55e1fa7067.png
diff --git a/...92726128fec160de7150efa84ac2cb7d233db.png → ...597ecd12935bab7fe330552985f258edb8a3d.png b/...92726128fec160de7150efa84ac2cb7d233db.png → ...597ecd12935bab7fe330552985f258edb8a3d.png
diff --git a/...56913e57f43cdc325ad0c35e8866724e7de39.png → ...6189c58d1c96d9d830c51d30fae9f51a5da8c.png b/...56913e57f43cdc325ad0c35e8866724e7de39.png → ...6189c58d1c96d9d830c51d30fae9f51a5da8c.png
diff --git a/...85c757fe8571d64102ced64b7240cf1cdca13.png → ...41993ede6f5f5aac9e0f43fd638cc0ac74278.png b/...85c757fe8571d64102ced64b7240cf1cdca13.png → ...41993ede6f5f5aac9e0f43fd638cc0ac74278.png
diff --git a/...a9edf763a14da53e17dab23ee57b9c9605ca9.png → ...a9d104e1bba8dd927043e22fc269e696c0660.png b/...a9edf763a14da53e17dab23ee57b9c9605ca9.png → ...a9d104e1bba8dd927043e22fc269e696c0660.png
diff --git a/...a675e742e72804ab2c597265fce4fe9f1f78c.png → ...e7068f62bd091eafeacb3943f5705feae655a.png b/...a675e742e72804ab2c597265fce4fe9f1f78c.png → ...e7068f62bd091eafeacb3943f5705feae655a.png
diff --git a/_sources/dataset_access/CONUS404_ACCESS.md b/_sources/dataset_access/CONUS404_ACCESS.md
@@ -5,7 +5,13 @@ This section contains notebooks that demonstrate how to access and perform basic
 In the CONUS404 intake sub-catalog (see [here](../dataset_catalog/README.md) for an explainer of our intake data catalog), you will see entries for four CONUS404 datasets: `conus404-hourly`, `conus404-daily`, `conus404-monthly`, and `conus404-daily-diagnostic` data, as well as two CONUS404 bias-adjusted datasets: `conus404-hourly-ba`, `conus404-daily-ba`. Each of these datasets is duplicated in up to three different storage locations (as the [intake catalog section](../dataset_catalog/README.md) also describes).
 
 ## CONUS404 Data
-The `conus404-hourly` data is a subset of the wrfout model output and `conus404-daily-diagnostic` is a subset from the wrfxtrm model output, both of which are described in the official [CONUS404 data release](https://doi.org/10.5066/P9PHPK4F). We also have `conus404-daily` and `conus404-monthly` files, which are just resampled from the `conus404-hourly` data.
+The `conus404-hourly` data is a subset of the `wrfout` model output. For instantaneous variables, the data value at each time step represents the instantaneous value at the timestep. For accumulated variables, the data value represents the accumulated value up to the timestep (see the `integration_length` attribute attached to each accumulated variable for more details on the accumulation period).
+
+The `conus404-daily-diagnostic` data is a subset from the `wrfxtrm` model output. These data represent the results of the past 24 hours, with the timestamp corresponding to the end time of the 24 hour period. Because the CONUS404 started at 1979-10-01_00:00:00, the first timestep (1979-10-01_00:00:00) for each variable is all zeros. 
+
+Both of these datasets are described in the official [CONUS404 data release](https://doi.org/10.5066/P9PHPK4F).
+
+We also have `conus404-daily` and `conus404-monthly` files, which are just resampled from the `conus404-hourly` data. To create the `conus404-daily` zarr, instantaneous variables are aggregated from 00:00:00 UTC to 11:00:00 UTC, while accumulated variables are aggregated from 01:00:00 UTC to 12:00:00 UTC of the next day.
 
 **Please note that the values in the ACLWDNB, ACLWUPB, ACSWDNB, ACSWDNT, and ACSWUPB variables available in the zarr store differ from the original model output.** These variables have been re-calculated to reflect the accumulated value since the model start, as directed in the WRF manual. An attribute has been added to each of these variables in the zarr store to denote the accumulation period for the variable. 
 

diff --git a/_sources/dataset_access/CONUS404_CHANGELOG.md b/_sources/dataset_access/CONUS404_CHANGELOG.md
@@ -3,7 +3,8 @@
 This changelog documents major changes to the [CONUS404 zarr datasets](./CONUS404_ACCESS.md). We do not anticipate regular changes to the dataset, but we may need to fix an occasional bug or update the dataset with additional years of data. Therefore, we recommend that users of the CONUS404 zarr data check this changelog regularly.
 
 ## 2024-02
-Water year 2022 data (October 1, 2021 - September 30, 2022) was added to all zarr stores (`conus404-hourly-*`, `conus404-daily-*`, `conus404-monthly-*`). Coordinate x and y values were updated to fix an issue with how they were generated that resulted in small location errors (lat and lon were not changed).
+* Water year 2022 data (October 1, 2021 - September 30, 2022) was added to all zarr stores (`conus404-hourly-*`, `conus404-daily-*`, `conus404-monthly-*`).
+* Coordinate x and y values were updated to fix an issue with how they were generated that resulted in small location errors (lat and lon were not changed).
 
 ## 2023-11
-Removed derived variables (E2, ES2, RH2, SH2) that were not part of original CONUS404 model output from CONUS404 zarr stores `conus404-hourly-*`, `conus404-daily-*`, `conus404-monthly-*`.
+* Removed derived variables (E2, ES2, RH2, SH2) that were not part of original CONUS404 model output from CONUS404 zarr stores `conus404-hourly-*`, `conus404-daily-*`, `conus404-monthly-*`.
diff --git a/_sources/dataset_access/README.md b/_sources/dataset_access/README.md
@@ -0,0 +1,19 @@
+# CONUS404 Access
+
+This section contains notebooks that demonstrate how to access and perform basic data manipulation for the [CONUS404 dataset](https://doi.org/10.5066/P9PHPK4F). 
+
+In the CONUS404 intake sub-catalog (see [here](../dataset_catalog/README.md) for an explainer of our intake data catalog), you will see entries for four CONUS404 datasets: conus404-hourly, conus404-daily, conus404-monthly, and conus404-daily-diagnostic data. Each of these datasets is duplicated in three different storage locations (as the [intake catalog section](../dataset_catalog/README.md) also describes). The conus404-hourly data is a subset of the wrfout model output and conus404-daily-diagnostic is a subset from the wrfxtrm model output, both of which are described in the official [CONUS404 data release](https://doi.org/10.5066/P9PHPK4F). We also have conus404-daily and conus404-monthly files, which are just resampled from the conus404-hourly data.
+
+**Please note that the values in the ACLWDNB, ACLWUPB, ACSWDNB, ACSWDNT, and ACSWUPB variables available in the zarr store differ from the original model output.** These variables have been re-calculated to reflect the accumulated value since the model start, as directed in the WRF manual. An attribute has been added to each of these variables in the zarr store to denote the accumulation period for the variable. 
+
+We currently have five notebooks to help demonstrate how to work with these datasets in a python workflow:
+- [Explore CONUS404 Dataset](./conus404_explore.ipynb): opens the CONUS404 dataset, loads and plots the entire spatial 
+   domain of a specified variable at a specfic time step, and loads and plots a time series of a variable at a specified coordinate pair.
+- [CONUS404 Temporal Aggregation](./conus404_temporal_aggregation.ipynb): calculates a daily average of the CONUS404 hourly data.
+- [CONUS404 Spatial Aggregation](./conus404_spatial_aggregation.ipynb): calculates the area-weighted mean of the CONUS404 data for all HUC12s in the Delaware River Basin.
+- [CONUS404 Point Selection](./conus404_point_selection.ipynb): samples the CONUS404 data at a selection of gage locations using their lat/lon point coordinates.
+- [CONUS404 Regridding (Curvilinear => Rectilinear)](./conus404_regrid.ipynb): regrids a subset of the CONUS404 dataset from a curvilinear grid to a rectilinear grid and saves the output to a netcdf file. The package used in this demo is not compatible with Windows. We hope to improve upon this methodology, and will likely update the package/technique used in the future.
+
+These methods are likely applicable to many of the other key HyTEST datasets that can be opened with xarray.
+
+*Note: If you need help setting up a computing environment where you can run these notebooks, you should review the [Computing Environments](../environment_set_up/README.md) section of the documentation.*
diff --git a/_sources/dataset_access/conus404_explore.ipynb b/_sources/dataset_access/conus404_explore.ipynb
@@ -255,8 +255,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# pull out a particulat time slice at the specified coordinates\n",
     "%%time\n",
+    "# pull out a particulat time slice at the specified coordinates\n",
     "da = ds.PREC_ACC_NC.sel(x=x, y=y, method='nearest').sel(time=slice('2013-01-01 00:00','2013-12-31 00:00')).load()"
    ]
   },
@@ -308,7 +308,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.10.0"
   },
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {

diff --git a/_sources/dataset_access/conus404_spatial_aggregation.ipynb b/_sources/dataset_access/conus404_spatial_aggregation.ipynb
@@ -195,7 +195,7 @@
     "gage_id = '01482100'\n",
     "nldi = NLDI()\n",
     "del_basins = nldi.get_basins(gage_id)\n",
-    "huc12_basins = WaterData('wdb12').bygeom(del_basins.geometry[0])"
+    "huc12_basins = WaterData('wbd12').bygeom(del_basins.geometry[0])"
    ]
   },
   {
@@ -922,7 +922,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "merged_geo.hvplot(c=var, geo=True, cmap='viridis_r', frame_width=600, tiles='StamenTerrain', \n",
+    "merged_geo.hvplot(c=var, geo=True, cmap='viridis_r', frame_width=600, tiles='EsriTerrain', \n",
     "               title='CONUS404', alpha=0.7)"
    ]
   },

diff --git a/_sources/environment_set_up/OpenOnDemand.md b/_sources/environment_set_up/OpenOnDemand.md
@@ -1,17 +1,16 @@
 # HPC Server: Open OnDemand Quick-Start
 
-This is a custom service provided by the ARC team and customized for use in HyTEST workflows. It is the easiest
-to use (no configuration needed on your part), and provides reasonable compute resources via the `tallgrass`
-host:
+This is a custom service provided by the USGS ARC team. It is the easiest to use (no configuration needed on your part), and provides reasonable compute resources via the `tallgrass` and `hovenweep` hosts:
 
-* Go to `https://tg-ood.cr.usgs.gov/pun/sys/dashboard` in your web browser.
-  Note that you must be on the VPN to access this host. You will be prompted to log in to the server, and you should use your AD username and password here.
-* Launch the HyTEST Jupyter Server app under the Interactive Apps dropdown menu.
-* Fill in the form to customize the allocation in which the Jupyter Server will execute. You may want to consider adding the git and/or aws modules if you plan to use them during your session. You will just need to type `module load git` and/or `module load aws` in the `Module loads` section.
-* Submit
+* To log in to OnDemand, select the appropriate login link from the `OnDemand` section of `https://hpcportal.cr.usgs.gov/`. Note that you must be on the VPN to access this host. Denali/Tallgrass share one disk for data storage and Hovenweep has a different disk. If you have data stored on the HPCs, you will want to choose whichever resource is attached to where your data is stored. If you are accessing data from a different, publicly accessible storage location, you can choose either option.
+* From the OnDemand landing page, choose `Interactive Apps`. If you are using `Hovenweep`, select the `Jupyter` option from this dropdown. If you are using `Tallgrass`, you can either select `Jupyter` or you can launch the `HyTEST Jupyter` server app, which will include a conda environment pre-configured with the packages you need to run the workflows in this JupyterBook. If you do not use our pre-configured environment (if you selected `Jupyter`), you will need to build your own. You can learn more about how to set up your own conda environment [here](https://hpcportal.cr.usgs.gov/hpc-user-docs/guides/software/environments/python/Python_Environment_Setup_with_Conda.html) in the HPC user docs.
+* Fill in the form to customize the allocation in which the Jupyter Server will execute.
+  * You may want to consider adding the git and/or aws modules if you plan to use them during your session. You will just need to type `module load git` and/or `module load aws` in the `Module loads` section.
+  * If you expect to run code in parallel on multiple compute nodes, you have two options. (1) You can use the form to request the number of cores you need and then run a [Dask Local Cluster](./Start_Dask_Cluster_Denali.ipynb) on those cores, or (2) you can request the standard 2 cores, and then use a [Dask SLURMCluster](./Start_Dask_Cluster_Tallgrass.ipynb) to submit new jobs to the SLURM scheduler, giving you access to additional compute nodes.
+* Click Submit
 * Once your server is ready, a `Connect to Jupyter` button will appear that you can click to start your session.
 
-The Jupyter Server will run in an allocation on `tallgrass`. This server will have access to your home
+The Jupyter Server will run in an allocation on `tallgrass` or `hovenweep`. This server will have access to your home
 directory/folder on that host, which is where your notebooks will reside.
 
 For light duty work (i.e. tutorials), a `Viz` node is likely adequate in your allocation request.  If you

diff --git a/_sphinx_design_static/sphinx-design.min.css b/_sphinx_design_static/sphinx-design.min.css
diff --git a/_static/_sphinx_javascript_frameworks_compat.js b/_static/_sphinx_javascript_frameworks_compat.js
@@ -0,0 +1,134 @@
+/*
+ * _sphinx_javascript_frameworks_compat.js
+ * ~~~~~~~~~~
+ *
+ * Compatability shim for jQuery and underscores.js.
+ *
+ * WILL BE REMOVED IN Sphinx 6.0
+ * xref RemovedInSphinx60Warning
+ *
+ */
+
+/**
+ * select a different prefix for underscore
+ */
+$u = _.noConflict();
+
+
+/**
+ * small helper function to urldecode strings
+ *
+ * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL
+ */
+jQuery.urldecode = function(x) {
+    if (!x) {
+        return x
+    }
+    return decodeURIComponent(x.replace(/\+/g, ' '));
+};
+
+/**
+ * small helper function to urlencode strings
+ */
+jQuery.urlencode = encodeURIComponent;
+
+/**
+ * This function returns the parsed url parameters of the
+ * current request. Multiple values per key are supported,
+ * it will always return arrays of strings for the value parts.
+ */
+jQuery.getQueryParameters = function(s) {
+    if (typeof s === 'undefined')
+        s = document.location.search;
+    var parts = s.substr(s.indexOf('?') + 1).split('&');
+    var result = {};
+    for (var i = 0; i < parts.length; i++) {
+        var tmp = parts[i].split('=', 2);
+        var key = jQuery.urldecode(tmp[0]);
+        var value = jQuery.urldecode(tmp[1]);
+        if (key in result)
+            result[key].push(value);
+        else
+            result[key] = [value];
+    }
+    return result;
+};
+
+/**
+ * highlight a given string on a jquery object by wrapping it in
+ * span elements with the given class name.
+ */
+jQuery.fn.highlightText = function(text, className) {
+    function highlight(node, addItems) {
+        if (node.nodeType === 3) {
+            var val = node.nodeValue;
+            var pos = val.toLowerCase().indexOf(text);
+            if (pos >= 0 &&
+                !jQuery(node.parentNode).hasClass(className) &&
+                !jQuery(node.parentNode).hasClass("nohighlight")) {
+                var span;
+                var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
+                if (isInSVG) {
+                    span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
+                } else {
+                    span = document.createElement("span");
+                    span.className = className;
+                }
+                span.appendChild(document.createTextNode(val.substr(pos, text.length)));
+                node.parentNode.insertBefore(span, node.parentNode.insertBefore(
+                    document.createTextNode(val.substr(pos + text.length)),
+                    node.nextSibling));
+                node.nodeValue = val.substr(0, pos);
+                if (isInSVG) {
+                    var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
+                    var bbox = node.parentElement.getBBox();
+                    rect.x.baseVal.value = bbox.x;
+                    rect.y.baseVal.value = bbox.y;
+                    rect.width.baseVal.value = bbox.width;
+                    rect.height.baseVal.value = bbox.height;
+                    rect.setAttribute('class', className);
+                    addItems.push({
+                        "parent": node.parentNode,
+                        "target": rect});
+                }
+            }
+        }
+        else if (!jQuery(node).is("button, select, textarea")) {
+            jQuery.each(node.childNodes, function() {
+                highlight(this, addItems);
+            });
+        }
+    }
+    var addItems = [];
+    var result = this.each(function() {
+        highlight(this, addItems);
+    });
+    for (var i = 0; i < addItems.length; ++i) {
+        jQuery(addItems[i].parent).before(addItems[i].target);
+    }
+    return result;
+};
+
+/*
+ * backward compatibility for jQuery.browser
+ * This will be supported until firefox bug is fixed.
+ */
+if (!jQuery.browser) {
+    jQuery.uaMatch = function(ua) {
+        ua = ua.toLowerCase();
+
+        var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
+            /(webkit)[ \/]([\w.]+)/.exec(ua) ||
+            /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
+            /(msie) ([\w.]+)/.exec(ua) ||
+            ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
+            [];
+
+        return {
+            browser: match[ 1 ] || "",
+            version: match[ 2 ] || "0"
+        };
+    };
+    jQuery.browser = {};
+    jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
+}
diff --git a/....1e8bd061cd6da7fc9cf755528e8ffc24.min.css → ....4045f2051d55cab465a707391d5b2007.min.css b/....1e8bd061cd6da7fc9cf755528e8ffc24.min.css → ....4045f2051d55cab465a707391d5b2007.min.css
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		<meta http-equiv="Refresh" content="0; url=doc/About.html" />