[pre-commit.ci] pre-commit autoupdate (#201)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Anderson Banihirwe <axbanihirwe@ualr.edu>
carbonplan · Dec 3, 2024 · 0bf2ac3 · 0bf2ac3
1 parent fe538fe
commit 0bf2ac3
Show file tree

Hide file tree

Showing 33 changed files with 154 additions and 227 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,7 +3,7 @@ ci:
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
@@ -13,37 +13,20 @@ repos:
       - id: debug-statements
       - id: mixed-line-ending
 
-  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.17.0
-    hooks:
-      - id: pyupgrade
-        args:
-          - "--py38-plus"
-
-  - repo: https://github.com/psf/black
-    rev: 24.8.0
-    hooks:
-      - id: black
-      - id: black-jupyter
-
-  - repo: https://github.com/keewis/blackdoc
-    rev: v0.3.9
-    hooks:
-      - id: blackdoc
-
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.5.6"
+    rev: "v0.8.1"
     hooks:
       - id: ruff
         args: ["--fix"]
+      - id: ruff-format
 
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v4.0.0-alpha.8
     hooks:
       - id: prettier
 
   - repo: https://github.com/kynan/nbstripout
-    rev: 0.7.1
+    rev: 0.8.1
     hooks:
       - id: nbstripout
 

diff --git a/carbonplan_data/utils.py b/carbonplan_data/utils.py
@@ -246,7 +246,7 @@ def get_versions(
         "intake",
         "rasterio",
         "zarr",
-    ]
+    ],
 ) -> dict[str, str]:
     """Helper to fetch commonly used package versions
     Parameters

diff --git a/pyproject.toml b/pyproject.toml
@@ -24,13 +24,7 @@ classifiers = [
 ]
 dynamic = ["version"]
 
-dependencies = [
-    "intake<=0.7.0",
-    "validators",
-    "wget",
-    "numpy",
-    "xarray",
-]
+dependencies = ["intake<=0.7.0", "validators", "wget", "numpy", "xarray"]
 
 [project.urls]
 repository = "https://github.com/carbonplan/data"
@@ -53,6 +47,7 @@ skip-string-normalization = true
 line-length = 100
 target-version = "py39"
 builtins = ["ellipsis"]
+extend-include = ["*.ipynb"]
 # Exclude a variety of commonly ignored directories.
 exclude = [
     ".bzr",
@@ -75,11 +70,14 @@ exclude = [
     "node_modules",
     "venv",
 ]
+[tool.ruff.lint]
 per-file-ignores = {}
-# E402: module level import not at top of file
-# E501: line too long - let black worry about that
-# E731: do not assign a lambda expression, use a def
-ignore = ["E402", "E501", "E731"]
+ignore = [
+    "E721", # Comparing types instead of isinstance
+    "E741", # Ambiguous variable names
+    "E501", # Conflicts with ruff format
+    "E722", # Bare except
+]
 select = [
     # Pyflakes
     "F",
@@ -93,10 +91,10 @@ select = [
 ]
 
 
-[tool.ruff.mccabe]
+[tool.ruff.lint.mccabe]
 max-complexity = 18
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 known-first-party = ["carbonplan_data"]
 
 [tool.pytest.ini_options]

diff --git a/scripts/fia/00_download.ipynb b/scripts/fia/00_download.ipynb
@@ -30,17 +30,14 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from carbonplan_data.utils import process_sources"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
     "import pathlib\n",
     "import zipfile\n",
     "\n",

diff --git a/scripts/fia/01_raw_to_parquet.ipynb b/scripts/fia/01_raw_to_parquet.ipynb
@@ -32,10 +32,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import io\n",
-    "import os.path\n",
-    "import pathlib\n",
-    "\n",
     "import gcsfs\n",
     "import pandas as pd\n",
     "\n",

diff --git a/scripts/fia/01_raw_to_parquet_part2.ipynb b/scripts/fia/01_raw_to_parquet_part2.ipynb
@@ -34,9 +34,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import fsspec\n",
+    "import pathlib\n",
+    "\n",
     "import dask.dataframe as dd\n",
-    "import pathlib"
+    "import fsspec"
    ]
   },
   {
@@ -88,9 +89,6 @@
     "        \"PHASE\": \"object\",\n",
     "        \"PILE_TL_ADJ\": \"float64\",\n",
     "        \"PILE_TL_COND\": \"float64\",\n",
-    "        \"CWD_TL_UNADJ\": \"float64\",\n",
-    "        \"PILE_TL_ADJ\": \"float64\",\n",
-    "        \"PILE_TL_COND\": \"float64\",\n",
     "    },\n",
     "    \"dwm_coarse_woody_debris\": {\n",
     "        \"CONDID\": \"float64\",\n",

diff --git a/scripts/fluxnet/01_raw_to_parquet.ipynb b/scripts/fluxnet/01_raw_to_parquet.ipynb
@@ -35,7 +35,6 @@
     "import pathlib\n",
     "\n",
     "import dask.dataframe as dd\n",
-    "import fsspec\n",
     "import gcsfs\n",
     "import pandas as pd\n",
     "from fsspec.implementations.zip import ZipFileSystem\n",

diff --git a/scripts/glas/01_cache_glas_data.ipynb b/scripts/glas/01_cache_glas_data.ipynb
@@ -10,7 +10,7 @@
    },
    "outputs": [],
    "source": [
-    "pip install git+https://github.com/pangeo-forge/pangeo-forge.git"
+    "%pip install git+https://github.com/pangeo-forge/pangeo-forge.git"
    ]
   },
   {
@@ -51,7 +51,6 @@
     "#    machine urs.earthdata.nasa.gov login myusername password mypassword\n",
     "# where 'myusername' and 'mypassword' are your Earthdata credentials.\n",
     "#\n",
-    "from __future__ import print_function\n",
     "\n",
     "import base64\n",
     "import itertools\n",
@@ -62,24 +61,24 @@
     "from getpass import getpass\n",
     "\n",
     "try:\n",
+    "    from urllib.error import HTTPError, URLError\n",
     "    from urllib.parse import urlparse\n",
     "    from urllib.request import (\n",
-    "        urlopen,\n",
+    "        HTTPCookieProcessor,\n",
     "        Request,\n",
     "        build_opener,\n",
-    "        HTTPCookieProcessor,\n",
+    "        urlopen,\n",
     "    )\n",
-    "    from urllib.error import HTTPError, URLError\n",
     "except ImportError:\n",
-    "    from urlparse import urlparse\n",
     "    from urllib2 import (\n",
-    "        urlopen,\n",
-    "        Request,\n",
+    "        HTTPCookieProcessor,\n",
     "        HTTPError,\n",
+    "        Request,\n",
     "        URLError,\n",
     "        build_opener,\n",
-    "        HTTPCookieProcessor,\n",
+    "        urlopen,\n",
     "    )\n",
+    "    from urlparse import urlparse\n",
     "\n",
     "# short_name = 'GLAH01'\n",
     "# version = '033'\n",
@@ -96,9 +95,9 @@
     "URS_URL = \"https://urs.earthdata.nasa.gov\"\n",
     "CMR_PAGE_SIZE = 2000\n",
     "CMR_FILE_URL = (\n",
-    "    \"{0}/search/granules.json?provider=NSIDC_ECS\"\n",
+    "    f\"{CMR_URL}/search/granules.json?provider=NSIDC_ECS\"\n",
     "    \"&sort_key[]=start_date&sort_key[]=producer_granule_id\"\n",
-    "    \"&scroll=true&page_size={1}\".format(CMR_URL, CMR_PAGE_SIZE)\n",
+    "    f\"&scroll=true&page_size={CMR_PAGE_SIZE}\"\n",
     ")\n",
     "\n",
     "\n",
@@ -138,22 +137,22 @@
     "        username, account, password = info.authenticators(urlparse(URS_URL).hostname)\n",
     "        errprefix = \"netrc error: \"\n",
     "    except Exception as e:\n",
-    "        if not (\"No such file\" in str(e)):\n",
-    "            print(\"netrc error: {0}\".format(str(e)))\n",
+    "        if \"No such file\" not in str(e):\n",
+    "            print(f\"netrc error: {str(e)}\")\n",
     "        username = None\n",
     "        password = None\n",
     "\n",
     "    while not credentials:\n",
     "        if not username:\n",
     "            username = get_username()\n",
     "            password = get_password()\n",
-    "        credentials = \"{0}:{1}\".format(username, password)\n",
+    "        credentials = f\"{username}:{password}\"\n",
     "        credentials = base64.b64encode(credentials.encode(\"ascii\")).decode(\"ascii\")\n",
     "\n",
     "        if url:\n",
     "            try:\n",
     "                req = Request(url)\n",
-    "                req.add_header(\"Authorization\", \"Basic {0}\".format(credentials))\n",
+    "                req.add_header(\"Authorization\", f\"Basic {credentials}\")\n",
     "                opener = build_opener(HTTPCookieProcessor())\n",
     "                opener.open(req)\n",
     "            except HTTPError:\n",
@@ -169,15 +168,15 @@
     "def build_version_query_params(version):\n",
     "    desired_pad_length = 3\n",
     "    if len(version) > desired_pad_length:\n",
-    "        print('Version string too long: \"{0}\"'.format(version))\n",
+    "        print(f'Version string too long: \"{version}\"')\n",
     "        quit()\n",
     "\n",
     "    version = str(int(version))  # Strip off any leading zeros\n",
     "    query_params = \"\"\n",
     "\n",
     "    while len(version) <= desired_pad_length:\n",
     "        padded_version = version.zfill(desired_pad_length)\n",
-    "        query_params += \"&version={0}\".format(padded_version)\n",
+    "        query_params += f\"&version={padded_version}\"\n",
     "        desired_pad_length -= 1\n",
     "    return query_params\n",
     "\n",
@@ -191,16 +190,16 @@
     "    polygon=None,\n",
     "    filename_filter=None,\n",
     "):\n",
-    "    params = \"&short_name={0}\".format(short_name)\n",
+    "    params = f\"&short_name={short_name}\"\n",
     "    params += build_version_query_params(version)\n",
-    "    params += \"&temporal[]={0},{1}\".format(time_start, time_end)\n",
+    "    params += f\"&temporal[]={time_start},{time_end}\"\n",
     "    if polygon:\n",
-    "        params += \"&polygon={0}\".format(polygon)\n",
+    "        params += f\"&polygon={polygon}\"\n",
     "    elif bounding_box:\n",
-    "        params += \"&bounding_box={0}\".format(bounding_box)\n",
+    "        params += f\"&bounding_box={bounding_box}\"\n",
     "    if filename_filter:\n",
     "        option = \"&options[producer_granule_id][pattern]=true\"\n",
-    "        params += \"&producer_granule_id[]={0}{1}\".format(filename_filter, option)\n",
+    "        params += f\"&producer_granule_id[]={filename_filter}{option}\"\n",
     "    return CMR_FILE_URL + params\n",
     "\n",
     "\n",
@@ -262,7 +261,7 @@
     "        polygon=polygon,\n",
     "        filename_filter=filename_filter,\n",
     "    )\n",
-    "    print(\"Querying for data:\\n\\t{0}\\n\".format(cmr_query_url))\n",
+    "    print(f\"Querying for data:\\n\\t{cmr_query_url}\\n\")\n",
     "\n",
     "    cmr_scroll_id = None\n",
     "    ctx = ssl.create_default_context()\n",
@@ -282,7 +281,7 @@
     "                cmr_scroll_id = headers[\"cmr-scroll-id\"]\n",
     "                hits = int(headers[\"cmr-hits\"])\n",
     "                if hits > 0:\n",
-    "                    print(\"Found {0} matches.\".format(hits))\n",
+    "                    print(f\"Found {hits} matches.\")\n",
     "                else:\n",
     "                    print(\"Found no matches.\")\n",
     "            search_page = response.read()\n",
@@ -326,8 +325,9 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "import fsspec\n",
+    "\n",
     "import dask\n",
+    "import fsspec\n",
     "\n",
     "\n",
     "@dask.delayed\n",
@@ -338,7 +338,7 @@
     "        return out\n",
     "\n",
     "    url_count = len(urls)\n",
-    "    print(\"Downloading {0} files...\".format(url_count))\n",
+    "    print(f\"Downloading {url_count} files...\")\n",
     "\n",
     "    for index, url in enumerate(urls, start=1):\n",
     "        if not credentials and urlparse(url).scheme == \"https\":\n",
@@ -359,7 +359,7 @@
     "            # open(filename, 'wb').write(resp.content)\n",
     "            req = Request(url)\n",
     "            if credentials:\n",
-    "                req.add_header(\"Authorization\", \"Basic {0}\".format(credentials))\n",
+    "                req.add_header(\"Authorization\", f\"Basic {credentials}\")\n",
     "            opener = build_opener(HTTPCookieProcessor())\n",
     "\n",
     "            with fsspec.open(target_url, mode=\"wb\") as target:\n",
@@ -368,11 +368,11 @@
     "            out.append(target_url)\n",
     "\n",
     "        except HTTPError as e:\n",
-    "            print(\"HTTPError {0}, {1}\".format(e.code, e.reason), filename)\n",
+    "            print(f\"HTTPError {e.code}, {e.reason}\", filename)\n",
     "        except URLError as e:\n",
-    "            print(\"URLError: {0}\".format(e.reason), filename)\n",
-    "        except IOError:\n",
-    "            print(\"IOError: {0}\".format(e.reason), filename)\n",
+    "            print(f\"URLError: {e.reason}\", filename)\n",
+    "        except OSError as e:\n",
+    "            print(f\"IOError: {e.reason}\", filename)\n",
     "        except KeyboardInterrupt:\n",
     "            quit()\n",
     "        except:\n",

diff --git a/scripts/global-biomass/01_biomass_to_cogs.ipynb b/scripts/global-biomass/01_biomass_to_cogs.ipynb
@@ -32,12 +32,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import io\n",
     "import os\n",
-    "import pathlib\n",
     "\n",
-    "from google.cloud import storage\n",
-    "from rasterio.io import MemoryFile\n",
     "from rio_cogeo.cogeo import cog_translate\n",
     "from rio_cogeo.profiles import cog_profiles\n",
     "\n",