diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7f64916..5d0f096 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ ci: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -13,29 +13,12 @@ repos: - id: debug-statements - id: mixed-line-ending - - repo: https://github.com/asottile/pyupgrade - rev: v3.17.0 - hooks: - - id: pyupgrade - args: - - "--py38-plus" - - - repo: https://github.com/psf/black - rev: 24.8.0 - hooks: - - id: black - - id: black-jupyter - - - repo: https://github.com/keewis/blackdoc - rev: v0.3.9 - hooks: - - id: blackdoc - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.5.6" + rev: "v0.8.1" hooks: - id: ruff args: ["--fix"] + - id: ruff-format - repo: https://github.com/pre-commit/mirrors-prettier rev: v4.0.0-alpha.8 @@ -43,7 +26,7 @@ repos: - id: prettier - repo: https://github.com/kynan/nbstripout - rev: 0.7.1 + rev: 0.8.1 hooks: - id: nbstripout diff --git a/carbonplan_data/utils.py b/carbonplan_data/utils.py index 30797b5..dd3dfb4 100644 --- a/carbonplan_data/utils.py +++ b/carbonplan_data/utils.py @@ -246,7 +246,7 @@ def get_versions( "intake", "rasterio", "zarr", - ] + ], ) -> dict[str, str]: """Helper to fetch commonly used package versions Parameters diff --git a/pyproject.toml b/pyproject.toml index 98d2570..762c1cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,13 +24,7 @@ classifiers = [ ] dynamic = ["version"] -dependencies = [ - "intake<=0.7.0", - "validators", - "wget", - "numpy", - "xarray", -] +dependencies = ["intake<=0.7.0", "validators", "wget", "numpy", "xarray"] [project.urls] repository = "https://github.com/carbonplan/data" @@ -53,6 +47,7 @@ skip-string-normalization = true line-length = 100 target-version = "py39" builtins = ["ellipsis"] +extend-include = ["*.ipynb"] # Exclude a variety of commonly ignored directories. exclude = [ ".bzr", @@ -75,11 +70,14 @@ exclude = [ "node_modules", "venv", ] +[tool.ruff.lint] per-file-ignores = {} -# E402: module level import not at top of file -# E501: line too long - let black worry about that -# E731: do not assign a lambda expression, use a def -ignore = ["E402", "E501", "E731"] +ignore = [ + "E721", # Comparing types instead of isinstance + "E741", # Ambiguous variable names + "E501", # Conflicts with ruff format + "E722", # Bare except +] select = [ # Pyflakes "F", @@ -93,10 +91,10 @@ select = [ ] -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 18 -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["carbonplan_data"] [tool.pytest.ini_options] diff --git a/scripts/fia/00_download.ipynb b/scripts/fia/00_download.ipynb index bf0584d..7f9d057 100644 --- a/scripts/fia/00_download.ipynb +++ b/scripts/fia/00_download.ipynb @@ -30,9 +30,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from carbonplan_data.utils import process_sources" - ] + "source": [] }, { "cell_type": "code", @@ -40,7 +38,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import pathlib\n", "import zipfile\n", "\n", diff --git a/scripts/fia/01_raw_to_parquet.ipynb b/scripts/fia/01_raw_to_parquet.ipynb index 3dc1556..7a65303 100644 --- a/scripts/fia/01_raw_to_parquet.ipynb +++ b/scripts/fia/01_raw_to_parquet.ipynb @@ -32,10 +32,6 @@ "metadata": {}, "outputs": [], "source": [ - "import io\n", - "import os.path\n", - "import pathlib\n", - "\n", "import gcsfs\n", "import pandas as pd\n", "\n", diff --git a/scripts/fia/01_raw_to_parquet_part2.ipynb b/scripts/fia/01_raw_to_parquet_part2.ipynb index 89bf6cb..eb3ed03 100644 --- a/scripts/fia/01_raw_to_parquet_part2.ipynb +++ b/scripts/fia/01_raw_to_parquet_part2.ipynb @@ -34,9 +34,10 @@ "metadata": {}, "outputs": [], "source": [ - "import fsspec\n", + "import pathlib\n", + "\n", "import dask.dataframe as dd\n", - "import pathlib" + "import fsspec" ] }, { @@ -88,9 +89,6 @@ " \"PHASE\": \"object\",\n", " \"PILE_TL_ADJ\": \"float64\",\n", " \"PILE_TL_COND\": \"float64\",\n", - " \"CWD_TL_UNADJ\": \"float64\",\n", - " \"PILE_TL_ADJ\": \"float64\",\n", - " \"PILE_TL_COND\": \"float64\",\n", " },\n", " \"dwm_coarse_woody_debris\": {\n", " \"CONDID\": \"float64\",\n", diff --git a/scripts/fluxnet/01_raw_to_parquet.ipynb b/scripts/fluxnet/01_raw_to_parquet.ipynb index cb2d0f2..d4d8e4a 100644 --- a/scripts/fluxnet/01_raw_to_parquet.ipynb +++ b/scripts/fluxnet/01_raw_to_parquet.ipynb @@ -35,7 +35,6 @@ "import pathlib\n", "\n", "import dask.dataframe as dd\n", - "import fsspec\n", "import gcsfs\n", "import pandas as pd\n", "from fsspec.implementations.zip import ZipFileSystem\n", diff --git a/scripts/glas/01_cache_glas_data.ipynb b/scripts/glas/01_cache_glas_data.ipynb index 01743d0..009cc95 100644 --- a/scripts/glas/01_cache_glas_data.ipynb +++ b/scripts/glas/01_cache_glas_data.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "pip install git+https://github.com/pangeo-forge/pangeo-forge.git" + "%pip install git+https://github.com/pangeo-forge/pangeo-forge.git" ] }, { @@ -51,7 +51,6 @@ "# machine urs.earthdata.nasa.gov login myusername password mypassword\n", "# where 'myusername' and 'mypassword' are your Earthdata credentials.\n", "#\n", - "from __future__ import print_function\n", "\n", "import base64\n", "import itertools\n", @@ -62,24 +61,24 @@ "from getpass import getpass\n", "\n", "try:\n", + " from urllib.error import HTTPError, URLError\n", " from urllib.parse import urlparse\n", " from urllib.request import (\n", - " urlopen,\n", + " HTTPCookieProcessor,\n", " Request,\n", " build_opener,\n", - " HTTPCookieProcessor,\n", + " urlopen,\n", " )\n", - " from urllib.error import HTTPError, URLError\n", "except ImportError:\n", - " from urlparse import urlparse\n", " from urllib2 import (\n", - " urlopen,\n", - " Request,\n", + " HTTPCookieProcessor,\n", " HTTPError,\n", + " Request,\n", " URLError,\n", " build_opener,\n", - " HTTPCookieProcessor,\n", + " urlopen,\n", " )\n", + " from urlparse import urlparse\n", "\n", "# short_name = 'GLAH01'\n", "# version = '033'\n", @@ -96,9 +95,9 @@ "URS_URL = \"https://urs.earthdata.nasa.gov\"\n", "CMR_PAGE_SIZE = 2000\n", "CMR_FILE_URL = (\n", - " \"{0}/search/granules.json?provider=NSIDC_ECS\"\n", + " f\"{CMR_URL}/search/granules.json?provider=NSIDC_ECS\"\n", " \"&sort_key[]=start_date&sort_key[]=producer_granule_id\"\n", - " \"&scroll=true&page_size={1}\".format(CMR_URL, CMR_PAGE_SIZE)\n", + " f\"&scroll=true&page_size={CMR_PAGE_SIZE}\"\n", ")\n", "\n", "\n", @@ -138,8 +137,8 @@ " username, account, password = info.authenticators(urlparse(URS_URL).hostname)\n", " errprefix = \"netrc error: \"\n", " except Exception as e:\n", - " if not (\"No such file\" in str(e)):\n", - " print(\"netrc error: {0}\".format(str(e)))\n", + " if \"No such file\" not in str(e):\n", + " print(f\"netrc error: {str(e)}\")\n", " username = None\n", " password = None\n", "\n", @@ -147,13 +146,13 @@ " if not username:\n", " username = get_username()\n", " password = get_password()\n", - " credentials = \"{0}:{1}\".format(username, password)\n", + " credentials = f\"{username}:{password}\"\n", " credentials = base64.b64encode(credentials.encode(\"ascii\")).decode(\"ascii\")\n", "\n", " if url:\n", " try:\n", " req = Request(url)\n", - " req.add_header(\"Authorization\", \"Basic {0}\".format(credentials))\n", + " req.add_header(\"Authorization\", f\"Basic {credentials}\")\n", " opener = build_opener(HTTPCookieProcessor())\n", " opener.open(req)\n", " except HTTPError:\n", @@ -169,7 +168,7 @@ "def build_version_query_params(version):\n", " desired_pad_length = 3\n", " if len(version) > desired_pad_length:\n", - " print('Version string too long: \"{0}\"'.format(version))\n", + " print(f'Version string too long: \"{version}\"')\n", " quit()\n", "\n", " version = str(int(version)) # Strip off any leading zeros\n", @@ -177,7 +176,7 @@ "\n", " while len(version) <= desired_pad_length:\n", " padded_version = version.zfill(desired_pad_length)\n", - " query_params += \"&version={0}\".format(padded_version)\n", + " query_params += f\"&version={padded_version}\"\n", " desired_pad_length -= 1\n", " return query_params\n", "\n", @@ -191,16 +190,16 @@ " polygon=None,\n", " filename_filter=None,\n", "):\n", - " params = \"&short_name={0}\".format(short_name)\n", + " params = f\"&short_name={short_name}\"\n", " params += build_version_query_params(version)\n", - " params += \"&temporal[]={0},{1}\".format(time_start, time_end)\n", + " params += f\"&temporal[]={time_start},{time_end}\"\n", " if polygon:\n", - " params += \"&polygon={0}\".format(polygon)\n", + " params += f\"&polygon={polygon}\"\n", " elif bounding_box:\n", - " params += \"&bounding_box={0}\".format(bounding_box)\n", + " params += f\"&bounding_box={bounding_box}\"\n", " if filename_filter:\n", " option = \"&options[producer_granule_id][pattern]=true\"\n", - " params += \"&producer_granule_id[]={0}{1}\".format(filename_filter, option)\n", + " params += f\"&producer_granule_id[]={filename_filter}{option}\"\n", " return CMR_FILE_URL + params\n", "\n", "\n", @@ -262,7 +261,7 @@ " polygon=polygon,\n", " filename_filter=filename_filter,\n", " )\n", - " print(\"Querying for data:\\n\\t{0}\\n\".format(cmr_query_url))\n", + " print(f\"Querying for data:\\n\\t{cmr_query_url}\\n\")\n", "\n", " cmr_scroll_id = None\n", " ctx = ssl.create_default_context()\n", @@ -282,7 +281,7 @@ " cmr_scroll_id = headers[\"cmr-scroll-id\"]\n", " hits = int(headers[\"cmr-hits\"])\n", " if hits > 0:\n", - " print(\"Found {0} matches.\".format(hits))\n", + " print(f\"Found {hits} matches.\")\n", " else:\n", " print(\"Found no matches.\")\n", " search_page = response.read()\n", @@ -326,8 +325,9 @@ "outputs": [], "source": [ "import os\n", - "import fsspec\n", + "\n", "import dask\n", + "import fsspec\n", "\n", "\n", "@dask.delayed\n", @@ -338,7 +338,7 @@ " return out\n", "\n", " url_count = len(urls)\n", - " print(\"Downloading {0} files...\".format(url_count))\n", + " print(f\"Downloading {url_count} files...\")\n", "\n", " for index, url in enumerate(urls, start=1):\n", " if not credentials and urlparse(url).scheme == \"https\":\n", @@ -359,7 +359,7 @@ " # open(filename, 'wb').write(resp.content)\n", " req = Request(url)\n", " if credentials:\n", - " req.add_header(\"Authorization\", \"Basic {0}\".format(credentials))\n", + " req.add_header(\"Authorization\", f\"Basic {credentials}\")\n", " opener = build_opener(HTTPCookieProcessor())\n", "\n", " with fsspec.open(target_url, mode=\"wb\") as target:\n", @@ -368,11 +368,11 @@ " out.append(target_url)\n", "\n", " except HTTPError as e:\n", - " print(\"HTTPError {0}, {1}\".format(e.code, e.reason), filename)\n", + " print(f\"HTTPError {e.code}, {e.reason}\", filename)\n", " except URLError as e:\n", - " print(\"URLError: {0}\".format(e.reason), filename)\n", - " except IOError:\n", - " print(\"IOError: {0}\".format(e.reason), filename)\n", + " print(f\"URLError: {e.reason}\", filename)\n", + " except OSError as e:\n", + " print(f\"IOError: {e.reason}\", filename)\n", " except KeyboardInterrupt:\n", " quit()\n", " except:\n", diff --git a/scripts/global-biomass/01_biomass_to_cogs.ipynb b/scripts/global-biomass/01_biomass_to_cogs.ipynb index 1b8cfb5..608f3dd 100644 --- a/scripts/global-biomass/01_biomass_to_cogs.ipynb +++ b/scripts/global-biomass/01_biomass_to_cogs.ipynb @@ -32,12 +32,8 @@ "metadata": {}, "outputs": [], "source": [ - "import io\n", "import os\n", - "import pathlib\n", "\n", - "from google.cloud import storage\n", - "from rasterio.io import MemoryFile\n", "from rio_cogeo.cogeo import cog_translate\n", "from rio_cogeo.profiles import cog_profiles\n", "\n", diff --git a/scripts/gridmet/01_gridmet_to_zarr.ipynb b/scripts/gridmet/01_gridmet_to_zarr.ipynb index c3778de..97e50cf 100644 --- a/scripts/gridmet/01_gridmet_to_zarr.ipynb +++ b/scripts/gridmet/01_gridmet_to_zarr.ipynb @@ -14,7 +14,7 @@ "\n", "**Inputs:**\n", "\n", - "- inake catalog: `climate.gridmet_opendap`\n", + "- intake catalog: `climate.gridmet_opendap`\n", "\n", "**Outputs:**\n", "\n", @@ -32,9 +32,7 @@ "outputs": [], "source": [ "import gcsfs\n", - "import intake\n", "import xarray as xr\n", - "import zarr\n", "from numcodecs.zlib import Zlib\n", "\n", "fs = gcsfs.GCSFileSystem(\n", @@ -93,7 +91,7 @@ "ds_list = []\n", "for v in variables:\n", " print(v)\n", - " ds_list.append(xr.concat([source(variable=v, year=y).to_dask() for y in years], dim=\"day\"))" + " ds_list.append(xr.concat([source(variable=v, year=y).to_dask() for y in years], dim=\"day\")) # noqa" ] }, { diff --git a/scripts/grids/make_grid.ipynb b/scripts/grids/make_grid.ipynb index f2c4994..f174cb3 100644 --- a/scripts/grids/make_grid.ipynb +++ b/scripts/grids/make_grid.ipynb @@ -6,11 +6,12 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", "import os\n", + "from datetime import datetime\n", + "\n", + "import numpy as np\n", "import rasterio as rio\n", "import xarray as xr\n", - "from datetime import datetime\n", "from rasterio.warp import transform\n", "\n", "from carbonplan_data import cat" diff --git a/scripts/iiasa/01_raw_to_parquet.ipynb b/scripts/iiasa/01_raw_to_parquet.ipynb index 27d81cf..99168b7 100644 --- a/scripts/iiasa/01_raw_to_parquet.ipynb +++ b/scripts/iiasa/01_raw_to_parquet.ipynb @@ -33,8 +33,6 @@ "metadata": {}, "outputs": [], "source": [ - "import io\n", - "import os.path\n", "import pathlib\n", "\n", "import gcsfs\n", @@ -108,10 +106,8 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ "# TODO:\n", "\n", diff --git a/scripts/mtbs/02_downsampling_and_reprojection.ipynb b/scripts/mtbs/02_downsampling_and_reprojection.ipynb index e47249f..2beeb00 100644 --- a/scripts/mtbs/02_downsampling_and_reprojection.ipynb +++ b/scripts/mtbs/02_downsampling_and_reprojection.ipynb @@ -103,7 +103,7 @@ " resampling = \"mode\"\n", " else:\n", " resampling = \"near\"\n", - " cmd = (\"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"%s \" \"%s\") % (\n", + " cmd = (\"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"{} \" \"{}\").format(\n", " crs,\n", " extent,\n", " resolution,\n", diff --git a/scripts/mtbs/02_mtbs_to_zarr.ipynb b/scripts/mtbs/02_mtbs_to_zarr.ipynb index f7155ba..9086df5 100644 --- a/scripts/mtbs/02_mtbs_to_zarr.ipynb +++ b/scripts/mtbs/02_mtbs_to_zarr.ipynb @@ -42,9 +42,7 @@ "import gcsfs\n", "import numpy as np\n", "import rasterio\n", - "import rioxarray\n", "import xarray as xr\n", - "import zarr\n", "from numcodecs.zlib import Zlib\n", "from rasterio import Affine\n", "from rasterio.crs import CRS\n", diff --git a/scripts/mtbs/03_mtbs_to_zarr.ipynb b/scripts/mtbs/03_mtbs_to_zarr.ipynb index 8399284..ff72efd 100644 --- a/scripts/mtbs/03_mtbs_to_zarr.ipynb +++ b/scripts/mtbs/03_mtbs_to_zarr.ipynb @@ -42,9 +42,7 @@ "import gcsfs\n", "import numpy as np\n", "import rasterio\n", - "import rioxarray\n", "import xarray as xr\n", - "import zarr\n", "from numcodecs.zlib import Zlib\n", "from rasterio import Affine\n", "from rasterio.crs import CRS\n", diff --git a/scripts/mtbs/04_mtbs_perims_to_raster.ipynb b/scripts/mtbs/04_mtbs_perims_to_raster.ipynb index 9e9222e..099f042 100644 --- a/scripts/mtbs/04_mtbs_perims_to_raster.ipynb +++ b/scripts/mtbs/04_mtbs_perims_to_raster.ipynb @@ -42,24 +42,14 @@ "metadata": {}, "outputs": [], "source": [ - "from carbonplan.data import cat\n", - "import xarray as xr\n", + "import geopandas\n", + "import hvplot.pandas # noqa\n", "import numpy as np\n", - "\n", "import pandas as pd\n", - "\n", - "import geopandas\n", - "\n", "import rasterio\n", - "from rasterio import Affine\n", - "from rasterio.transform import rowcol\n", - "from rasterio.features import rasterize\n", - "from rasterio.transform import from_bounds\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import zarr\n", - "\n", - "import hvplot.pandas # noqa" + "import xarray as xr\n", + "from carbonplan.data import cat\n", + "from rasterio.features import rasterize" ] }, { @@ -158,9 +148,9 @@ "metadata": {}, "outputs": [], "source": [ - "from rio_cogeo.profiles import cog_profiles\n", "from rasterio.io import MemoryFile\n", - "from rio_cogeo.cogeo import cog_translate" + "from rio_cogeo.cogeo import cog_translate\n", + "from rio_cogeo.profiles import cog_profiles" ] }, { @@ -257,7 +247,6 @@ "outputs": [], "source": [ "import intake\n", - "import xarray as xr\n", "from dask.diagnostics import ProgressBar\n", "\n", "cat2 = intake.open_catalog(\n", diff --git a/scripts/mtbs/05_monthly_downsampling.ipynb b/scripts/mtbs/05_monthly_downsampling.ipynb index acc4809..3989e5f 100644 --- a/scripts/mtbs/05_monthly_downsampling.ipynb +++ b/scripts/mtbs/05_monthly_downsampling.ipynb @@ -89,7 +89,9 @@ " f = get_file(region, fire, year, month)\n", " crs, extent = projections(\"albers\", region)\n", " resampling = \"average\"\n", - " cmd = (\"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"%s \" \"%s\") % (\n", + " cmd = (\n", + " \"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"{} \" \"{}\"\n", + " ).format(\n", " crs,\n", " extent,\n", " resolution,\n", @@ -127,7 +129,7 @@ " files = [get_file(\"conus\", \"vlf\", year, month + 1)[\"source\"] for month in range(12)]\n", " crs, extent = projections(\"albers\", region)\n", " resampling = \"sum\"\n", - " cmd = (\"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"%s \" \"%s\") % (\n", + " cmd = (\"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"{} \" \"{}\").format(\n", " crs,\n", " extent,\n", " resolution,\n", diff --git a/scripts/mtbs/05_monthly_mtbs_to_zarr.ipynb b/scripts/mtbs/05_monthly_mtbs_to_zarr.ipynb index 586dc3e..93dbe35 100644 --- a/scripts/mtbs/05_monthly_mtbs_to_zarr.ipynb +++ b/scripts/mtbs/05_monthly_mtbs_to_zarr.ipynb @@ -36,14 +36,10 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "\n", "import gcsfs\n", "import numpy as np\n", "import rasterio\n", - "import rioxarray\n", "import xarray as xr\n", - "import zarr\n", "from numcodecs.zlib import Zlib\n", "from rasterio import Affine\n", "from rasterio.crs import CRS\n", @@ -125,74 +121,63 @@ "\n", "def prepare_mtbs(year, resolution, return_ds=True):\n", " src_path_year = f\"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/severity/{year}.tif\"\n", - " with rasterio.open(src_path_year, \"r\") as src_raster_year:\n", + " with rasterio.open(src_path_year, \"r\") as src_raster:\n", " src_transform = src_raster.meta[\"transform\"]\n", " src_crs = src_raster.meta[\"crs\"]\n", " src_band = src_raster.read(1)\n", " src_resolution = resolution\n", "\n", - " dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(\n", - " src_band, src_resolution\n", - " )\n", + " dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(src_band, src_resolution)\n", " print(\"calc_coords\")\n", " coords = calc_coords(dst_shape, dst_transform, dst_crs)\n", - " \n", - " for month in range(12):\n", - " src_path_month = f\"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/area/{year}.{month+1}.tif\"\n", - " with rasterio.open(src_path_month, \"r\") as src_raster_month:\n", "\n", - " src_nodata = 6\n", - " resampling = Resampling.average\n", - " # set moderate or high burn severity to 1 and others to 1\n", - " src_band_tmp = ((src_band == 3) | (src_band == 4)).astype(\"uint8\")\n", - " # set masked regions to nodata value\n", - " src_band_tmp[src_band == src_nodata] = src_nodata\n", - " src_band = src_band_tmp\n", - " dst_band = dst_band.astype(\n", - " \"float32\"\n", - " ) # convert to float for averaging\n", + " for month in range(12):\n", + " src_path_month = f\"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/area/{year}.{month+1}.tif\"\n", + " with rasterio.open(src_path_month, \"r\"):\n", + " src_nodata = 6\n", + " resampling = Resampling.average\n", + " # set moderate or high burn severity to 1 and others to 1\n", + " src_band_tmp = ((src_band == 3) | (src_band == 4)).astype(\"uint8\")\n", + " # set masked regions to nodata value\n", + " src_band_tmp[src_band == src_nodata] = src_nodata\n", + " src_band = src_band_tmp\n", + " dst_band = dst_band.astype(\"float32\") # convert to float for averaging\n", "\n", - " print(\"reproject\")\n", - " # this seems to require rasterio=1.0.25 and gdal=2.4.2\n", - " reproject(\n", - " src_band,\n", - " dst_band,\n", - " src_transform=src_transform,\n", - " src_crs=src_crs,\n", - " dst_transform=dst_transform,\n", - " dst_crs=dst_crs,\n", - " resampling=resampling,\n", - " src_nodata=src_nodata,\n", - " dst_nodata=src_raster.meta[\"nodata\"],\n", - " )\n", + " print(\"reproject\")\n", + " # this seems to require rasterio=1.0.25 and gdal=2.4.2\n", + " reproject(\n", + " src_band,\n", + " dst_band,\n", + " src_transform=src_transform,\n", + " src_crs=src_crs,\n", + " dst_transform=dst_transform,\n", + " dst_crs=dst_crs,\n", + " resampling=resampling,\n", + " src_nodata=src_nodata,\n", + " dst_nodata=src_raster.meta[\"nodata\"],\n", + " )\n", "\n", - " meta = src_raster.meta\n", - " meta.update(\n", - " width=dst_shape[0],\n", - " height=dst_shape[1],\n", - " dtype=str(dst_band.dtype),\n", - " crs=dst_crs.to_wkt(),\n", - " transform=list(dst_transform),\n", - " nodata=src_raster.meta[\"nodata\"],\n", - " )\n", + " meta = src_raster.meta\n", + " meta.update(\n", + " width=dst_shape[0],\n", + " height=dst_shape[1],\n", + " dtype=str(dst_band.dtype),\n", + " crs=dst_crs.to_wkt(),\n", + " transform=list(dst_transform),\n", + " nodata=src_raster.meta[\"nodata\"],\n", + " )\n", "\n", " varname = f\"{year}\"\n", " chunks = {\"x\": 512, \"y\": 512}\n", - " ds = xr.DataArray(dst_band, dims=(\"y\", \"x\"), attrs=meta).to_dataset(\n", - " name=varname\n", - " )\n", + " ds = xr.DataArray(dst_band, dims=(\"y\", \"x\"), attrs=meta).to_dataset(name=varname)\n", " ds = ds.assign_coords(coords).chunk(chunks)\n", "\n", " if return_ds:\n", " return ds\n", " else:\n", - " fs = gcsfs.GCSFileSystem(\n", - " project=\"carbonplan\", token=\"cloud\", requester_pays=True\n", - " )\n", - " mapper = fs.get_mapper(scratch + f\"/MTBS.{year}.{resolution}m.zarr\")\n", - " ds.to_zarr(\n", - " store=mapper, mode=\"w\", encoding={varname: {\"compressor\": Zlib()}}\n", - " )" + " fs = gcsfs.GCSFileSystem(project=\"carbonplan\", token=\"cloud\", requester_pays=True)\n", + " mapper = fs.get_mapper(\"scratch\" + f\"/MTBS.{year}.{resolution}m.zarr\")\n", + " ds.to_zarr(store=mapper, mode=\"w\", encoding={varname: {\"compressor\": Zlib()}})" ] }, { diff --git a/scripts/mtbs/06_annual_downsampling.py b/scripts/mtbs/06_annual_downsampling.py index a262992..c37e2fc 100644 --- a/scripts/mtbs/06_annual_downsampling.py +++ b/scripts/mtbs/06_annual_downsampling.py @@ -4,9 +4,10 @@ from rio_cogeo.cogeo import cog_translate from rio_cogeo.profiles import cog_profiles +from carbonplan_data.utils import projections, setup + dst_profile = cog_profiles.get("deflate") -from carbonplan_data.utils import projections, setup workdir, upload = setup("jeremy") workdir diff --git a/scripts/nftd/00_download.ipynb b/scripts/nftd/00_download.ipynb index 16debe3..5e85aaa 100644 --- a/scripts/nftd/00_download.ipynb +++ b/scripts/nftd/00_download.ipynb @@ -32,7 +32,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import pathlib\n", "import zipfile\n", "\n", diff --git a/scripts/nftd/01_nftd_to_cogs.ipynb b/scripts/nftd/01_nftd_to_cogs.ipynb index 6fe5607..61656fd 100644 --- a/scripts/nftd/01_nftd_to_cogs.ipynb +++ b/scripts/nftd/01_nftd_to_cogs.ipynb @@ -32,12 +32,10 @@ "metadata": {}, "outputs": [], "source": [ - "import io\n", "import os\n", "import pathlib\n", "\n", "from google.cloud import storage\n", - "from rasterio.io import MemoryFile\n", "from rio_cogeo.cogeo import cog_translate\n", "from rio_cogeo.profiles import cog_profiles" ] diff --git a/scripts/nftd/02_downsampling_and_reprojection.ipynb b/scripts/nftd/02_downsampling_and_reprojection.ipynb index ff7b0f8..2ef0ba7 100644 --- a/scripts/nftd/02_downsampling_and_reprojection.ipynb +++ b/scripts/nftd/02_downsampling_and_reprojection.ipynb @@ -52,9 +52,9 @@ "metadata": {}, "outputs": [], "source": [ + "import rasterio\n", "from rio_cogeo.cogeo import cog_translate\n", "from rio_cogeo.profiles import cog_profiles\n", - "import rasterio\n", "\n", "dst_profile = cog_profiles.get(\"deflate\")" ] @@ -103,7 +103,7 @@ " resampling = \"mode\"\n", " else:\n", " resampling = \"near\"\n", - " cmd = (\"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"%s \" \"%s\") % (\n", + " cmd = (\"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"{} \" \"{}\").format(\n", " crs,\n", " extent,\n", " resolution,\n", @@ -186,8 +186,8 @@ " dst.write(out, 1)\n", "\n", " cmd = (\n", - " \"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"-ot Float32 \" \"%s \" \"%s\"\n", - " ) % (\n", + " \"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"-ot Float32 \" \"{} \" \"{}\"\n", + " ).format(\n", " crs,\n", " extent,\n", " resolution,\n", diff --git a/scripts/nlcd/00_download.ipynb b/scripts/nlcd/00_download.ipynb index 25fe6e5..e51dc25 100644 --- a/scripts/nlcd/00_download.ipynb +++ b/scripts/nlcd/00_download.ipynb @@ -31,7 +31,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import pathlib\n", "import zipfile\n", "\n", diff --git a/scripts/nlcd/01_nlcd_to_cogs.ipynb b/scripts/nlcd/01_nlcd_to_cogs.ipynb index a6746ee..e90f867 100644 --- a/scripts/nlcd/01_nlcd_to_cogs.ipynb +++ b/scripts/nlcd/01_nlcd_to_cogs.ipynb @@ -34,12 +34,10 @@ "metadata": {}, "outputs": [], "source": [ - "import io\n", "import os\n", "import pathlib\n", "\n", "from google.cloud import storage\n", - "from rasterio.io import MemoryFile\n", "from rio_cogeo.cogeo import cog_translate\n", "from rio_cogeo.profiles import cog_profiles\n", "\n", diff --git a/scripts/nlcd/02_downsampling_and_reprojection.ipynb b/scripts/nlcd/02_downsampling_and_reprojection.ipynb index 0c306fe..b623d88 100644 --- a/scripts/nlcd/02_downsampling_and_reprojection.ipynb +++ b/scripts/nlcd/02_downsampling_and_reprojection.ipynb @@ -65,7 +65,7 @@ "outputs": [], "source": [ "import rasterio\n", - "from numpy import asarray, argmax" + "from numpy import argmax, asarray" ] }, { @@ -122,7 +122,7 @@ " resampling = \"near\"\n", " else:\n", " resampling = \"mode\"\n", - " cmd = (\"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"%s \" \"%s\") % (\n", + " cmd = (\"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"{} \" \"{}\").format(\n", " crs,\n", " extent,\n", " resolution,\n", @@ -208,14 +208,14 @@ "\n", " cmd = (\n", " \"gdalwarp \"\n", - " \"-t_srs '%s' \"\n", - " \"-te %s \"\n", - " \"-tr %s %s \"\n", - " \"-r %s \"\n", + " \"-t_srs '{}' \"\n", + " \"-te {} \"\n", + " \"-tr {} {} \"\n", + " \"-r {} \"\n", " \"-ot Float32 \"\n", - " \"%s \"\n", - " \"%s\"\n", - " ) % (\n", + " \"{} \"\n", + " \"{}\"\n", + " ).format(\n", " crs,\n", " extent,\n", " resolution,\n", diff --git a/scripts/prism/00_download.ipynb b/scripts/prism/00_download.ipynb index e5f7015..651218a 100644 --- a/scripts/prism/00_download.ipynb +++ b/scripts/prism/00_download.ipynb @@ -6,10 +6,10 @@ "metadata": {}, "outputs": [], "source": [ - "import wget\n", - "import os\n", - "import zipfile\n", "import pathlib\n", + "import zipfile\n", + "\n", + "import wget\n", "\n", "from carbonplan_data.utils import setup\n", "\n", diff --git a/scripts/prism/01_prism_to_cogs.ipynb b/scripts/prism/01_prism_to_cogs.ipynb index 234f467..9721aed 100644 --- a/scripts/prism/01_prism_to_cogs.ipynb +++ b/scripts/prism/01_prism_to_cogs.ipynb @@ -29,20 +29,15 @@ "metadata": {}, "outputs": [], "source": [ - "import io\n", "import os\n", - "import pathlib\n", "\n", + "import xarray as xr\n", "from google.cloud import storage\n", - "from rasterio.io import MemoryFile\n", "from rio_cogeo.cogeo import cog_translate\n", "from rio_cogeo.profiles import cog_profiles\n", "\n", "from carbonplan_data.utils import setup\n", "\n", - "import xarray as xr\n", - "import rioxarray\n", - "\n", "storage.blob._DEFAULT_CHUNKSIZE = 5 * 1024 * 1024 # 5 MB\n", "storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 # 5 MB" ] diff --git a/scripts/prism/02_downsample_and_reproject.ipynb b/scripts/prism/02_downsample_and_reproject.ipynb index 00a2917..4eda44d 100644 --- a/scripts/prism/02_downsample_and_reproject.ipynb +++ b/scripts/prism/02_downsample_and_reproject.ipynb @@ -33,11 +33,12 @@ "outputs": [], "source": [ "import os\n", - "from carbonplan_data.utils import projections, setup\n", + "\n", + "import fsspec\n", "from rio_cogeo.cogeo import cog_translate\n", "from rio_cogeo.profiles import cog_profiles\n", - "import rasterio\n", - "import fsspec\n", + "\n", + "from carbonplan_data.utils import projections, setup\n", "\n", "dst_profile = cog_profiles.get(\"deflate\")\n", "\n", diff --git a/scripts/terraclimate/01_terraclimate_aux_fileds_to_zarr.ipynb b/scripts/terraclimate/01_terraclimate_aux_fileds_to_zarr.ipynb index 698a2cd..f1af1b7 100644 --- a/scripts/terraclimate/01_terraclimate_aux_fileds_to_zarr.ipynb +++ b/scripts/terraclimate/01_terraclimate_aux_fileds_to_zarr.ipynb @@ -31,7 +31,6 @@ "source": [ "import fsspec\n", "import xarray as xr\n", - "\n", "import zarr" ] }, diff --git a/scripts/terraclimate/01_terraclimate_to_zarr3.ipynb b/scripts/terraclimate/01_terraclimate_to_zarr3.ipynb index 19d8bdd..79347b4 100644 --- a/scripts/terraclimate/01_terraclimate_to_zarr3.ipynb +++ b/scripts/terraclimate/01_terraclimate_to_zarr3.ipynb @@ -32,14 +32,12 @@ "outputs": [], "source": [ "import os\n", - "import fsspec\n", - "import xarray as xr\n", "\n", "import dask\n", - "from dask.distributed import Client\n", - "from dask_gateway import Gateway\n", - "from typing import List\n", + "import fsspec\n", "import urlpath\n", + "import xarray as xr\n", + "from dask_gateway import Gateway\n", "from tqdm import tqdm" ] }, @@ -162,6 +160,9 @@ "metadata": {}, "outputs": [], "source": [ + "from numcodecs import Blosc\n", + "\n", + "\n", "def apply_mask(key, da):\n", " \"\"\"helper function to mask DataArrays based on a threshold value\"\"\"\n", " if mask_opts.get(key, None):\n", @@ -282,7 +283,7 @@ " .load()\n", " .chunk(chunks)\n", " )\n", - " except Exception as e:\n", + " except Exception:\n", " raise ValueError(source_url)\n", "\n", " mapper = fs.get_mapper(target_url)\n", diff --git a/scripts/terraclimate/02_terraclimate_regrid.ipynb b/scripts/terraclimate/02_terraclimate_regrid.ipynb index 0412e01..12a9787 100644 --- a/scripts/terraclimate/02_terraclimate_regrid.ipynb +++ b/scripts/terraclimate/02_terraclimate_regrid.ipynb @@ -29,7 +29,7 @@ "metadata": {}, "outputs": [], "source": [ - "pip install -U xarray==0.16.0 --no-deps" + "%pip install -U xarray==0.16.0 --no-deps" ] }, { @@ -39,10 +39,9 @@ "outputs": [], "source": [ "import fsspec\n", + "import numpy as np\n", "import xarray as xr\n", "import xesmf as xe\n", - "import numpy as np\n", - "\n", "from dask.diagnostics import ProgressBar" ] }, @@ -91,7 +90,7 @@ "source": [ "# options\n", "name = \"terraclimate\"\n", - "raw_location = f\"gs://carbonplan-data/raw/terraclimate/4000m/raster.zarr\"\n", + "raw_location = \"gs://carbonplan-data/raw/terraclimate/4000m/raster.zarr\"\n", "target_grid = \"gs://carbonplan-data/processed/grids/conus/4000m/domain.zarr\"\n", "# getting weird errors when writing to carbonplan-data\n", "target_location = f\"gs://carbonplan-data/processed/{name}/conus/4000m/raster.zarr\"" @@ -192,9 +191,10 @@ "metadata": {}, "outputs": [], "source": [ - "import dask\n", "from multiprocessing.pool import ThreadPool\n", "\n", + "import dask\n", + "\n", "with dask.config.set(scheduler=\"threads\", pool=ThreadPool(3)):\n", " with ProgressBar():\n", " mapper2 = fsspec.get_mapper(target_location)\n", @@ -207,9 +207,11 @@ "metadata": {}, "outputs": [], "source": [ - "mapper2 = fsspec.get_mapper(target_location)\n", "import zarr\n", "\n", + "mapper2 = fsspec.get_mapper(target_location)\n", + "\n", + "\n", "zarr.consolidate_metadata(mapper2)" ] } diff --git a/scripts/terraclimate/02_terraclimate_to_fiaplots.ipynb b/scripts/terraclimate/02_terraclimate_to_fiaplots.ipynb index 64a1b1a..4a07934 100644 --- a/scripts/terraclimate/02_terraclimate_to_fiaplots.ipynb +++ b/scripts/terraclimate/02_terraclimate_to_fiaplots.ipynb @@ -6,10 +6,10 @@ "metadata": {}, "outputs": [], "source": [ - "import xarray as xr\n", - "import pandas as pd\n", - "import fsspec\n", "import dask\n", + "import fsspec\n", + "import pandas as pd\n", + "import xarray as xr\n", "\n", "dask.config.set({\"distributed.logging.distributed\": \"critical\"})" ] @@ -339,10 +339,11 @@ "metadata": {}, "outputs": [], "source": [ + "from dask.diagnostics import ProgressBar\n", + "\n", "bucket = \"gs://carbonplan-scratch/terraclimate-fia-cond-ann-3.zarr\"\n", "mapper3 = fsspec.get_mapper(bucket, create=True)\n", "\n", - "from dask.diagnostics import ProgressBar\n", "\n", "with ProgressBar():\n", " ds_cond_ann.to_zarr(mapper3, mode=\"w\", consolidated=True, encoding=encoding)" diff --git a/scripts/worldclim/01_raw_to_zarr.ipynb b/scripts/worldclim/01_raw_to_zarr.ipynb index f3ba5fc..6b5bdf5 100644 --- a/scripts/worldclim/01_raw_to_zarr.ipynb +++ b/scripts/worldclim/01_raw_to_zarr.ipynb @@ -20,9 +20,8 @@ "metadata": {}, "outputs": [], "source": [ - "import xarray as xr\n", - "\n", - "import fsspec" + "import fsspec\n", + "import xarray as xr" ] }, { @@ -71,7 +70,7 @@ "outputs": [], "source": [ "ds[\"elev\"] = xr.open_rasterio(\n", - " f\"gs://carbonplan-scratch/worldclim-raw/wc2.1_30s_elev.tif\",\n", + " \"gs://carbonplan-scratch/worldclim-raw/wc2.1_30s_elev.tif\",\n", " chunks={\"x\": 8192, \"y\": 8192},\n", ").squeeze(drop=True)" ]