carbonplan · norlandrhagen · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -2,19 +2,42 @@ name: CI
 
 on: [push, pull_request]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+
+
 jobs:
-  build:
+  build-python:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.12
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .
+
+
+  build-js:
     runs-on: ubuntu-latest
 
     strategy:
       matrix:
-        node-version: [14.x, 16.x]
+        node-version: [18.x, 20.x]
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Use Node.js ${{ matrix.node-version }}
         uses: actions/setup-node@v2.5.1
         with:
           node-version: ${{ matrix.node-version }}
+
       - run: npm install
       - run: npm run build --if-present
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,101 +1,20 @@
+# https://pre-commit.com/
 ci:
-  autoupdate_schedule: quarterly
-  autofix_prs: false
-
+  autoupdate_schedule: monthly
 repos:
-  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: 'v4.0.0-alpha.8'
-    hooks:
-      - id: prettier
-        additional_dependencies:
-          - 'prettier@2.5.1'
-          - '@carbonplan/prettier@1.2.0'
-        language_version: system
-        files: "\\.(\
-          css|less|scss\
-          |graphql|gql\
-          |html\
-          |js|jsx\
-          |ts|tsx\
-          |vue\
-          |yaml|yml\
-          )$"
-
-  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: 'v4.0.0-alpha.8'
-    hooks:
-      - id: prettier
-        additional_dependencies:
-          - 'prettier@2.5.1'
-          - '@carbonplan/prettier@1.2.0'
-        language_version: system
-        name: prettier-markdown
-        entry: prettier --write --parser mdx
-        files: "\\.(\
-          |md|markdown|mdown|mkdn\
-          |mdx\
-          )$"
-
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v5.0.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
-      - id: check-docstring-first
-      - id: check-json
       - id: check-yaml
-      - id: debug-statements
-      - id: mixed-line-ending
-      - id: pretty-format-json
-        args: ['--autofix', '--indent=2', '--no-sort-keys']
-        exclude: 'ipynb'
-
-  - repo: https://github.com/psf/black
-    rev: 23.12.1
-    hooks:
-      - id: black
-        args: ['--line-length', '100']
 
-  - repo: https://github.com/keewis/blackdoc
-    rev: v0.3.9
-    hooks:
-      - id: blackdoc
-
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: 'v0.1.11'
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: "v0.8.1"
     hooks:
+      # Run the linter.
       - id: ruff
-        args: ['--fix', '--line-length', '100']
-
-  - repo: https://github.com/kynan/nbstripout
-    rev: 0.6.1
-    hooks:
-      - id: nbstripout
-        args:
-          [
-            '--extra-keys',
-            'metadata.celltoolbar metadata.kernelspec',
-            'metadata.language_info.codemirror_mode.version',
-            'metadata.language_info.pygments_lexer',
-            'metadata.language_info.version',
-            'metadata.toc',
-            'metadata.notify_time',
-            'metadata.varInspector',
-            'cell.metadata.heading_collapsed',
-            'cell.metadata.hidden',
-            'cell.metadata.code_folding',
-            'cell.metadata.tags',
-            'cell.metadata.init_cell',
-          ]
-
-  - repo: https://github.com/nbQA-dev/nbQA
-    rev: 1.7.1
-    hooks:
-      - id: nbqa-ruff
-        args: ['--fix']
-      - id: nbqa-isort
-        args: ['--profile=black']
-        additional_dependencies: [isort==5.6.4]
-      - id: nbqa-black
-      - id: nbqa-pyupgrade
-        args: ['--py37-plus']
+        args: [ --fix ]
+      # Run the formatter.
+      - id: ruff-format
diff --git a/compliance_users/__init__.py b/compliance_users/__init__.py
@@ -0,0 +1,6 @@
+from importlib.metadata import version as _version
+
+try:
+    __version__ = _version("{{ cookiecutter.project_slug }}")
+except Exception:
+    __version__ = "9999"
diff --git a/scripts/build_users_data.py → compliance_users/scripts/build_users_data.py b/scripts/build_users_data.py → compliance_users/scripts/build_users_data.py
@@ -72,7 +72,10 @@ def prune_data(user_project_df, project_df, user_facility_df, facility_df):
     # drop user/facility data for facilities that don't appear in MRR data
     print()
     print("Facility ids that appear in the compliance data but not the MRR data:")
-    print("-----> " + str(set(user_facility_df["facility_id"]) - set(facility_df["facility_id"])))
+    print(
+        "-----> "
+        + str(set(user_facility_df["facility_id"]) - set(facility_df["facility_id"]))
+    )
     print()
     uf_df = uf_df[uf_df["facility_id"].isin(f_df["facility_id"])]
 

diff --git a/scripts/facilities.py → compliance_users/scripts/facilities.py b/scripts/facilities.py → compliance_users/scripts/facilities.py
@@ -1,7 +1,7 @@
+import warnings
 from collections import defaultdict
 
 import pandas as pd
-import warnings
 
 # FOR UPDATES: add new mrr data keys
 mrr_file_year = {
@@ -80,17 +80,23 @@ def read_facility_data(data_path, mrr_data_years):
     facility_df["state"] = facility_df["state"].str.upper().str.strip()
 
     # keep most recent info associated with a fid in each reporting period
-    facility_df = facility_df.drop_duplicates(["facility_id", "reporting_period"], keep="last")
+    facility_df = facility_df.drop_duplicates(
+        ["facility_id", "reporting_period"], keep="last"
+    )
     return facility_df
 
 
 def make_facility_info(facility_df, user_facility_df):
-    facility_name_to_id = facility_df.set_index("facility_name")["facility_id"].to_dict()
+    facility_name_to_id = facility_df.set_index("facility_name")[
+        "facility_id"
+    ].to_dict()
 
     facility_id_to_info = defaultdict(dict)
     for i, row in facility_df.iterrows():
         facility_id_to_info[row["facility_id"]][row["reporting_period"]] = {
-            "facility_name": row["facility_name"] if row["facility_name"] else "missing",
+            "facility_name": row["facility_name"]
+            if row["facility_name"]
+            else "missing",
             "city": "missing" if pd.isna(row["city"]) else row["city"],
             "state": "missing" if pd.isna(row["state"]) else row["state"],
             "sector": "missing" if pd.isna(row["sector"]) else row["sector"],

diff --git a/scripts/projects.py → compliance_users/scripts/projects.py b/scripts/projects.py → compliance_users/scripts/projects.py
@@ -51,7 +51,9 @@ def make_opr_to_arbs(issuance_df):
     opr_to_arbs = {}
     combined_arbs = []
     for opr_id in opr_ids:
-        arbs = issuance_df.loc[issuance_df["opr_id"] == opr_id, "arb_id"].unique().tolist()
+        arbs = (
+            issuance_df.loc[issuance_df["opr_id"] == opr_id, "arb_id"].unique().tolist()
+        )
 
         # if an opr id maps to multiple arbs (as is the case with certain early
         # early action projects), concatenate into a combined opr id
@@ -67,7 +69,9 @@ def make_arb_to_oprs(issuance_df, combined_arbs):
     arb_ids = issuance_df["arb_id"].unique().tolist()
     arb_to_oprs = {}
     for arb_id in arb_ids:
-        oprs = issuance_df.loc[issuance_df["arb_id"] == arb_id, "opr_id"].unique().tolist()
+        oprs = (
+            issuance_df.loc[issuance_df["arb_id"] == arb_id, "opr_id"].unique().tolist()
+        )
         # currently, there are two arb ids (CAMM5244 & CALS5030) that map
         # to multiple opr ids; after confirming that the underlying project
         # information is the same, we simply map to the most recent opr id
@@ -86,7 +90,9 @@ def make_project_info(issuance_df):
     opr_rows = issuance_df.drop_duplicates(
         ["opr_id", "project_name", "project_type", "state", "documentation"]
     )
-    opr_rows = opr_rows[["opr_id", "project_name", "project_type", "state", "documentation"]]
+    opr_rows = opr_rows[
+        ["opr_id", "project_name", "project_type", "state", "documentation"]
+    ]
 
     opr_to_project_info = opr_rows.set_index("opr_id").to_dict(orient="index")
     project_name_to_opr = opr_rows.set_index("project_name")["opr_id"].to_dict()

diff --git a/scripts/sandbox.ipynb → compliance_users/scripts/sandbox.ipynb b/scripts/sandbox.ipynb → compliance_users/scripts/sandbox.ipynb
@@ -8,7 +8,6 @@
    "outputs": [],
    "source": [
     "import facilities\n",
-    "import pandas as pd\n",
     "import projects\n",
     "import users_and_facilities\n",
     "import users_and_projects"

diff --git a/scripts/users_and_facilities.py → ...nce_users/scripts/users_and_facilities.py b/scripts/users_and_facilities.py → ...nce_users/scripts/users_and_facilities.py
@@ -27,8 +27,12 @@ def read_user_facility_data(data_path, reporting_periods):
             file_path = data_path + config["file"]
             sheet_name = config["sheet"]
         else:
-            file_path = data_path + default_file_template.format(reporting_period=reporting_period)
-            sheet_name = default_sheet_template.format(reporting_period=reporting_period)
+            file_path = data_path + default_file_template.format(
+                reporting_period=reporting_period
+            )
+            sheet_name = default_sheet_template.format(
+                reporting_period=reporting_period
+            )
 
         # read the Excel file
         df = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=4)
@@ -51,7 +55,9 @@ def read_user_facility_data(data_path, reporting_periods):
         df = df[df["facility_ids"].notna()]
 
         # make a row for each facility id connected to a user and compliance period
-        df["facility_ids"] = df["facility_ids"].apply(lambda x: str(x).replace(" ", "").split(","))
+        df["facility_ids"] = df["facility_ids"].apply(
+            lambda x: str(x).replace(" ", "").split(",")
+        )
         df = df.explode("facility_ids")
         df = df.rename(columns={"facility_ids": "facility_id"})
 

diff --git a/scripts/users_and_projects.py → ...iance_users/scripts/users_and_projects.py b/scripts/users_and_projects.py → ...iance_users/scripts/users_and_projects.py
@@ -26,8 +26,12 @@ def read_user_project_data(data_path, reporting_periods):
             file_path = data_path + config["file"]
             sheet_name = config["sheet"]
         else:
-            file_path = data_path + default_file_template.format(reporting_period=reporting_period)
-            sheet_name = default_sheet_template.format(reporting_period=reporting_period)
+            file_path = data_path + default_file_template.format(
+                reporting_period=reporting_period
+            )
+            sheet_name = default_sheet_template.format(
+                reporting_period=reporting_period
+            )
 
         # read the Excel file
         df = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=4, usecols="A:E")
@@ -51,7 +55,9 @@ def read_user_project_data(data_path, reporting_periods):
         columns=rename_d,
         inplace=True,
     )
-    user_project_df = user_project_df[["user_id", "arb_id", "quantity", "reporting_period"]]
+    user_project_df = user_project_df[
+        ["user_id", "arb_id", "quantity", "reporting_period"]
+    ]
     user_project_df["user_id"] = user_project_df["user_id"].str.strip()
     user_project_df["arb_id"] = user_project_df["arb_id"].str.strip()
 
@@ -63,7 +69,9 @@ def read_user_project_data(data_path, reporting_periods):
     user_project_df["arb_id"] = user_project_df["arb_id"].str.replace(
         "CAOD-", "CAOD"
     )  # fixing typo in 2024 data for CAOD6458
-    user_project_df["arb_id"] = user_project_df["arb_id"].str.split("-").apply(lambda x: x[0])
+    user_project_df["arb_id"] = (
+        user_project_df["arb_id"].str.split("-").apply(lambda x: x[0])
+    )
 
     user_project_df = (
         user_project_df.groupby(["user_id", "arb_id", "reporting_period"])["quantity"]

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,74 @@
+
+[project]
+name = "compliance-users"
+description = "Carbonplan compliance-users"
+authors = [{name = "CarbonPlan", email = "hello@carbonplan.org"}]
+readme = "README.md"
+requires-python = ">=3.10"
+
+license = {text = "MIT"}
+
+dependencies = [
+"pandas",
+"numpy"
+]
+dynamic = ["version"]
+
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+
+[project.optional-dependencies]
+test = [
+    "pytest",
+    "ruff",
+    "pre-commit",
+    "mypy"
+]
+
+[tool.setuptools_scm]
+fallback_version = "9999"
+
+[tool.setuptools.packages.find]
+exclude = ["docs", "tests", "tests.*", "docs.*"]
+
+[tool.setuptools.package-data]
+datatree = ["py.typed"]
+
+
+[tool.mypy]
+files = "compliance_users/**/*.py"
+show_error_codes = true
+
+
+[tool.ruff]
+# Same as Black.
+line-length = 88
+indent-width = 4
+target-version = "py39"
+
+exclude = [
+    "docs",
+    ".eggs"]
+
+[tool.ruff.lint]
+# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.
+# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
+# McCabe complexity (`C901`) by default.
+select = ["E4", "E7", "E9", "F", "I"]
+per-file-ignores = {}
+
+
+[tool.ruff.format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+# Indent with spaces, rather than tabs.
+indent-style = "space"
+# Respect magic trailing commas.
+skip-magic-trailing-comma = false
+# Automatically detect the appropriate line ending.
+line-ending = "auto"
+
+[tool.ruff.lint.isort]
+known-first-party = ["{{ cookiecutter.project_slug }}"]
diff --git a/requirements.txt b/requirements.txt