diff --git a/.gitignore b/.gitignore index ffe84ea..f293e6d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,13 @@ __pycache__/ *.py[cod] -# Test things +# Internal test notebooks *test*.ipynb +# xagg /wm/ directories created during docs processing +wm/ +docs/notebooks/wm/ + # C extensions *.so diff --git a/docs/notebooks/base_run.ipynb b/docs/notebooks/base_run.ipynb index 4d7c404..cfc77c5 100644 --- a/docs/notebooks/base_run.ipynb +++ b/docs/notebooks/base_run.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "source": [ "# Base run\n", - "A simple run of `xagg`, aggregating gridded temperature data over US counties. For a deeper dive into `xagg`'s functionality, see the [Detailed Code Run](./full_run.ipynb)." + "A simple run of `xagg`, aggregating gridded temperature data over US counties. For a deeper dive into `xagg`'s functionality, see the [Detailed Code Run](./full_run.ipynb). " ] }, { @@ -38,10 +38,455 @@ "execution_count": 2, "id": "simple-spelling", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset>\n", + "Dimensions: (lon: 288, lat: 192, month: 12, bnds: 2)\n", + "Coordinates:\n", + " height float64 ...\n", + " * lon (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8\n", + " * lat (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0\n", + " * month (month) int64 1 2 3 4 5 6 7 8 9 10 11 12\n", + "Dimensions without coordinates: bnds\n", + "Data variables:\n", + " lat_bnds (month, lat, bnds) float64 ...\n", + " lon_bnds (month, lon, bnds) float64 ...\n", + " tas (month, lat, lon) float32 ...
\n", + " | NAME | \n", + "STATE_NAME | \n", + "STATE_FIPS | \n", + "CNTY_FIPS | \n", + "FIPS | \n", + "geometry | \n", + "
---|---|---|---|---|---|---|
0 | \n", + "Lake of the Woods | \n", + "Minnesota | \n", + "27 | \n", + "077 | \n", + "27077 | \n", + "POLYGON ((-95.34283 48.54668, -95.34105 48.715... | \n", + "
1 | \n", + "Ferry | \n", + "Washington | \n", + "53 | \n", + "019 | \n", + "53019 | \n", + "POLYGON ((-118.85163 47.94956, -118.84846 48.4... | \n", + "
2 | \n", + "Stevens | \n", + "Washington | \n", + "53 | \n", + "065 | \n", + "53065 | \n", + "POLYGON ((-117.43883 48.04412, -117.54219 48.0... | \n", + "
3 | \n", + "Okanogan | \n", + "Washington | \n", + "53 | \n", + "047 | \n", + "53047 | \n", + "POLYGON ((-118.97209 47.93915, -118.97406 47.9... | \n", + "
4 | \n", + "Pend Oreille | \n", + "Washington | \n", + "53 | \n", + "051 | \n", + "53051 | \n", + "POLYGON ((-117.43858 48.99992, -117.03205 48.9... | \n", + "
<xarray.Dataset>\n", - "Dimensions: (month: 12, pix_idx: 3141)\n", + "Dimensions: (poly_idx: 3141, month: 12)\n", "Coordinates:\n", - " * pix_idx (pix_idx) int64 0 1 2 3 4 5 6 ... 3135 3136 3137 3138 3139 3140\n", + " * poly_idx (poly_idx) int64 0 1 2 3 4 5 6 ... 3135 3136 3137 3138 3139 3140\n", " * month (month) int64 1 2 3 4 5 6 7 8 9 10 11 12\n", "Data variables:\n", - " NAME (pix_idx) object 'Lake of the Woods' 'Ferry' ... 'Broomfield'\n", - " STATE_NAME (pix_idx) object 'Minnesota' 'Washington' ... 'Colorado'\n", - " STATE_FIPS (pix_idx) object '27' '53' '53' '53' ... '02' '02' '02' '08'\n", - " CNTY_FIPS (pix_idx) object '077' '019' '065' '047' ... '240' '068' '014'\n", - " FIPS (pix_idx) object '27077' '53019' '53065' ... '02068' '08014'\n", - " tas (pix_idx, month) float64 263.9 268.8 274.0 ... 283.5 276.4 270.4
array([ 0, 1, 2, ..., 3138, 3139, 3140])
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
array(['Lake of the Woods', 'Ferry', 'Stevens', ...,\n", - " 'Southeast Fairbanks', 'Denali', 'Broomfield'], dtype=object)
array(['Minnesota', 'Washington', 'Washington', ..., 'Alaska', 'Alaska',\n", - " 'Colorado'], dtype=object)
array(['27', '53', '53', ..., '02', '02', '08'], dtype=object)
array(['077', '019', '065', ..., '240', '068', '014'], dtype=object)
array(['27077', '53019', '53065', ..., '02240', '02068', '08014'],\n", - " dtype=object)
array([[263.91894338, 268.83407312, 273.97753272, ..., 283.79866008,\n", + " NAME (poly_idx) object 'Lake of the Woods' 'Ferry' ... 'Broomfield'\n", + " STATE_NAME (poly_idx) object 'Minnesota' 'Washington' ... 'Colorado'\n", + " STATE_FIPS (poly_idx) object '27' '53' '53' '53' ... '02' '02' '02' '08'\n", + " CNTY_FIPS (poly_idx) object '077' '019' '065' '047' ... '240' '068' '014'\n", + " FIPS (poly_idx) object '27077' '53019' '53065' ... '02068' '08014'\n", + " tas (poly_idx, month) float64 263.9 268.8 274.0 ... 276.4 270.4
PandasIndex(Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\n", + " ...\n", + " 3131, 3132, 3133, 3134, 3135, 3136, 3137, 3138, 3139, 3140],\n", + " dtype='int64', name='poly_idx', length=3141))
PandasIndex(Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype='int64', name='month'))
3141 rows × 17 columns
\n", + "37692 rows × 6 columns
\n", "" ], "text/plain": [ - " NAME STATE_NAME STATE_FIPS CNTY_FIPS FIPS \\\n", - "0 Lake of the Woods Minnesota 27 077 27077 \n", - "1 Ferry Washington 53 019 53019 \n", - "2 Stevens Washington 53 065 53065 \n", - "3 Okanogan Washington 53 047 53047 \n", - "4 Pend Oreille Washington 53 051 53051 \n", - "... ... ... ... ... ... \n", - "3136 Skagway-Hoonah-Angoon Alaska 02 232 02232 \n", - "3137 Yukon-Koyukuk Alaska 02 290 02290 \n", - "3138 Southeast Fairbanks Alaska 02 240 02240 \n", - "3139 Denali Alaska 02 068 02068 \n", - "3140 Broomfield Colorado 08 014 08014 \n", - "\n", - " tas0 tas1 tas2 tas3 tas4 tas5 \\\n", - "0 263.918943 268.834073 273.977533 283.141960 290.623952 297.858885 \n", - "1 271.794169 275.631364 276.947080 279.837102 286.630023 293.769471 \n", - "2 272.113155 275.910279 277.355354 280.428965 287.247099 294.356788 \n", - "3 271.772021 275.539162 276.654805 279.317270 285.794503 292.650947 \n", - "4 271.721285 275.542011 276.993355 280.157156 287.086018 294.169635 \n", - "... ... ... ... ... ... ... \n", - "3136 270.709185 272.455135 273.717142 276.188285 281.253285 286.791100 \n", - "3137 263.970656 263.404975 266.670047 272.394716 280.492861 288.813169 \n", - "3138 262.846312 263.000185 265.438037 270.754788 278.476096 286.669566 \n", - "3139 265.084342 264.547936 267.203954 271.782649 278.898267 287.059920 \n", - "3140 270.803864 273.430206 275.955505 280.790070 287.303619 292.830048 \n", - "\n", - " tas6 tas7 tas8 tas9 tas10 tas11 \n", - "0 302.068017 300.362248 293.471128 283.798660 275.109100 266.016176 \n", - "1 299.073178 297.151514 289.866690 281.648927 276.727886 272.256934 \n", - "2 299.847098 297.967740 290.637124 282.076344 277.019222 272.516056 \n", - "3 297.741617 295.915714 289.090624 281.372544 276.598377 272.208944 \n", - "4 299.503768 297.523382 290.086946 281.657134 276.644670 272.095152 \n", - "... ... ... ... ... ... ... \n", - "3136 288.361128 287.822862 284.093411 278.681980 274.221760 271.175471 \n", - "3137 288.513645 285.724033 280.243361 273.044271 266.155923 265.022613 \n", - "3138 287.315147 284.920161 279.230840 271.713061 264.946526 263.297936 \n", - "3139 287.375217 285.069283 279.833609 272.514117 266.145088 265.682660 \n", - "3140 297.615662 297.646820 292.368988 283.544708 276.383606 270.444855 \n", - "\n", - "[3141 rows x 17 columns]" + " NAME STATE_NAME STATE_FIPS CNTY_FIPS FIPS \\\n", + "poly_idx month \n", + "0 1 Lake of the Woods Minnesota 27 077 27077 \n", + " 2 Lake of the Woods Minnesota 27 077 27077 \n", + " 3 Lake of the Woods Minnesota 27 077 27077 \n", + " 4 Lake of the Woods Minnesota 27 077 27077 \n", + " 5 Lake of the Woods Minnesota 27 077 27077 \n", + "... ... ... ... ... ... \n", + "3140 8 Broomfield Colorado 08 014 08014 \n", + " 9 Broomfield Colorado 08 014 08014 \n", + " 10 Broomfield Colorado 08 014 08014 \n", + " 11 Broomfield Colorado 08 014 08014 \n", + " 12 Broomfield Colorado 08 014 08014 \n", + "\n", + " tas \n", + "poly_idx month \n", + "0 1 263.918943 \n", + " 2 268.834073 \n", + " 3 273.977533 \n", + " 4 283.141960 \n", + " 5 290.623952 \n", + "... ... \n", + "3140 8 297.646820 \n", + " 9 292.368988 \n", + " 10 283.544708 \n", + " 11 276.383606 \n", + " 12 270.444855 \n", + "\n", + "[37692 rows x 6 columns]" ] }, "execution_count": 7, diff --git a/docs/notebooks/full_run.ipynb b/docs/notebooks/full_run.ipynb index 331e3ba..349c46e 100644 --- a/docs/notebooks/full_run.ipynb +++ b/docs/notebooks/full_run.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "broken-labor", "metadata": {}, "outputs": [], @@ -40,6 +40,29 @@ "Let's get started." ] }, + { + "cell_type": "markdown", + "id": "c278bcbd-0e54-47a5-ae50-0d0fae0d6565", + "metadata": {}, + "source": [ + "### Extra downloads\n", + "Since we will be using an extra `weights` file that must be regridded, we need the optional dependency :py:mod:`xesmf`, which we can install through: \n", + "\n", + "`mamba install -c conda-forge xesmf`\n", + "\n", + "Since we will be using the optional feature :py:meth:`.diag_fig()`, we need the optional dependencies :py:mod:`matplotlib`, :py:mod:`cartopy`, and :py:mod:`cmocean`, which we can install through: \n", + "\n", + "`mamba install -c conda-forge matplotlib cartopy cmocean`" + ] + }, + { + "cell_type": "markdown", + "id": "a3de1c17-f535-46eb-8cc2-f7a19ab1aa58", + "metadata": {}, + "source": [ + "### Load data" + ] + }, { "cell_type": "code", "execution_count": 2, @@ -112,23 +135,7 @@ "output_type": "stream", "text": [ "creating polygons for each pixel...\n", - "regridding weights to data grid...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/kevinschwarzwald/opt/anaconda3/envs/test/lib/python3.9/site-packages/xarray/core/dataarray.py:746: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", - " return key in self.data\n", - "/Users/kevinschwarzwald/opt/anaconda3/envs/test/lib/python3.9/site-packages/xesmf/frontend.py:466: FutureWarning: ``output_sizes`` should be given in the ``dask_gufunc_kwargs`` parameter. It will be removed as direct parameter in a future version.\n", - " dr_out = xr.apply_ufunc(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "regridding weights to data grid...\n", "calculating overlaps between pixels and output polygons...\n", "success!\n" ] @@ -139,6 +146,14 @@ "weightmap = xa.pixel_overlaps(ds,gdf,weights=ds_pop.pop)" ] }, + { + "cell_type": "markdown", + "id": "2e6c1fd7-a2d6-4ea8-8c38-441b62247a3e", + "metadata": {}, + "source": [ + "### Exporting / Importing the weightmap " + ] + }, { "cell_type": "markdown", "id": "ee2451e7-2e92-4cee-9d28-7b51625928ec", @@ -149,13 +164,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "9b5ba5c3-03ce-4fdb-866d-a937295b83f8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/kevinschwarzwald/opt/anaconda3/envs/xagg0320/lib/python3.12/site-packages/xagg/export.py:17: UserWarning: export_weightmap() is still an experimental feature. use with care.\n", + " warnings.warn('export_weightmap() is still an experimental feature. use with care.')\n" + ] + } + ], "source": [ - "# Export weightmap\n", - "weightmap.to_file('wm')" + "# Export weightmap to a directory called \"wm\" in the current directory\n", + "weightmap.to_file('./wm')" ] }, { @@ -168,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "a6f23714-51b8-43f6-8fda-49299c6b56fc", "metadata": {}, "outputs": [], @@ -177,6 +201,129 @@ "weightmap = xa.read_wm('wm')" ] }, + { + "cell_type": "markdown", + "id": "e297b4c4-22b9-427c-893e-f3ab708f853d", + "metadata": {}, + "source": [ + "### Verifying the weightmap" + ] + }, + { + "cell_type": "markdown", + "id": "a0ceaf52-bc04-4630-a017-8157b24ce9d0", + "metadata": {}, + "source": [ + "Let's verify if the aggregation was successful. The `weightmap` class can produce diagnostic figures that show a given polygon + the grid cells of the original raster dataset that overlap it. (This feature is still a bit experimental and finicky, and as of v0.3.2.0 needs a little bit of manual processing) " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d136f646-836b-4c0a-8d5c-e84e5e25a38b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "adjusting grid... (this may happen because only a subset of pixels were used for aggregation for efficiency - i.e. [subset_bbox=True] in xa.pixel_overlaps())\n", + "grid adjustment successful\n" + ] + } + ], + "source": [ + "# Load `subset_find()`, which allows you to find one grid within another\n", + "from xagg.auxfuncs import subset_find\n", + "\n", + "# weightmap.diag_fig() takes two required arguments: some information about\n", + "# a grid, and either the polygons of the raster grid, or the raster grid\n", + "# itself to calculate the polygons. \n", + "\n", + "# Let's get the raster grid.\n", + "# To match the internal indexing of `weightmap`, we need to subset the `ds`\n", + "# TODO: move this step internally to `weightmap.diag_fig()`\n", + "grid_polygon_info = subset_find(ds,weightmap.source_grid)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9320f9f4-ff72-48cc-b2d2-32d4656a7e42", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "