From 10519f102597f1a9c50460bc7b3c7d0c2b219377 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 19 Sep 2023 16:22:37 -0600 Subject: [PATCH] feat: add collection validation Includes: - Python dependencies file and instructions - CI - Actual fixes --- .github/workflows/ci.yaml | 23 +++++++++++ README.md | 32 ++++++++++++++++ .../collections/caldor-fire-behavior.json | 8 ++-- .../caldor-fire-burn-severity.json | 8 ++-- .../collections/disturbance-probability.json | 33 ++++++++++------ .../ecco-surface-height-change.json | 8 ++-- .../lis-tws-nonstationarity-index.json | 2 +- ...lies-diff-covid-19-changing-landscape.json | 8 ++-- .../collections/mtbs-burn-severity.json | 8 ++-- .../collections/nceo-africa-2017.json | 2 +- .../collections/pzd-anomaly-covid-19.json | 10 ++--- .../recovery-proxy-maps-covid-19.json | 8 ++-- ...wn-proxy-covid-19-changing-landscapes.json | 8 ++-- .../togo-agriculture-covid-19.json | 8 ++-- requirements.in | 2 + requirements.txt | 34 +++++++++++++++++ scripts/validate_collections.py | 38 +++++++++++++++++++ 17 files changed, 190 insertions(+), 50 deletions(-) create mode 100644 .github/workflows/ci.yaml create mode 100644 requirements.in create mode 100644 requirements.txt create mode 100755 scripts/validate_collections.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..db25494 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,23 @@ +name: Continuous integration + +on: + push: + branches: + - main + pull_request: + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + cache: "pip" + - name: Install dependencies + run: pip install -r requirements.txt + - name: Validate collections + run: python scripts/validate_collections.py diff --git a/README.md b/README.md index 1947270..14eba74 100644 --- a/README.md +++ b/README.md @@ -105,3 +105,35 @@ Should follow the following format: "dry_run": "", } ``` + +## Validation + +This repository provides a script for validating all collections. +First, install the requirements (preferably in a virtual environment): + +```shell +pip install -r requirements.txt +``` + +Then: + +```shell +python scripts/validate_collections.py +``` + +## Development + +If you need to add new dependencies, first install the requirements: + +```shell +pip install -r requirements.txt +``` + +Add your dependency to `requirements.in` *without a version specifier* (unless you really need one). +Then run: + +```shell +pip-compile +``` + +This will update `requirements.txt` with a complete, realized set of Python dependencies. diff --git a/ingestion-data/collections/caldor-fire-behavior.json b/ingestion-data/collections/caldor-fire-behavior.json index bfa9b72..231e40e 100644 --- a/ingestion-data/collections/caldor-fire-behavior.json +++ b/ingestion-data/collections/caldor-fire-behavior.json @@ -8,10 +8,10 @@ "spatial":{ "bbox":[ [ - "-180", - "90", - "-90", - "180" + -180, + 90, + -90, + 180 ] ] }, diff --git a/ingestion-data/collections/caldor-fire-burn-severity.json b/ingestion-data/collections/caldor-fire-burn-severity.json index 3ace6b4..e42c73e 100644 --- a/ingestion-data/collections/caldor-fire-burn-severity.json +++ b/ingestion-data/collections/caldor-fire-burn-severity.json @@ -8,10 +8,10 @@ "spatial":{ "bbox":[ [ - "-180", - "90", - "-90", - "180" + -180, + 90, + -90, + 180 ] ] }, diff --git a/ingestion-data/collections/disturbance-probability.json b/ingestion-data/collections/disturbance-probability.json index 4d3ed51..21322d2 100644 --- a/ingestion-data/collections/disturbance-probability.json +++ b/ingestion-data/collections/disturbance-probability.json @@ -1,16 +1,26 @@ { - "collection": "disturbance-probability-percentile", + "type": "Collection", + "stac_version": "1.0.0", + "id": "disturbance-probability-percentile", "title": "Near Real-time Disturbance probability map (%)", "data_type": "cog", - "spatial_extent": { - "xmin": -84.132, - "ymin": 25.224, - "xmax": -79.853, - "ymax": 30.728 - }, - "temporal_extent": { - "startdate": "2022-10-03T00:00:00Z", - "enddate": "2022-10-03T23:59:59Z" + "extent": { + "spatial": { + "bbox": [ + -84.132, + 25.224, + -79.853, + 30.728 + ] + }, + "temporal": { + "interval": [ + [ + "2022-10-03T00:00:00Z", + "2022-10-03T23:59:59Z" + ] + ] + } }, "license": "CC-BY-NC-SA-1.0", "providers": [ @@ -52,5 +62,6 @@ "bucket": "veda-data-store-staging", "filename_regex": "(.*)spec_prob_mosaic_2022-10-03_day.tif$" } - ] + ], + "links": [] } \ No newline at end of file diff --git a/ingestion-data/collections/ecco-surface-height-change.json b/ingestion-data/collections/ecco-surface-height-change.json index 9634368..73bfd1c 100644 --- a/ingestion-data/collections/ecco-surface-height-change.json +++ b/ingestion-data/collections/ecco-surface-height-change.json @@ -8,10 +8,10 @@ "spatial":{ "bbox":[ [ - "-180", - "90", - "-90", - "180" + -180, + 90, + -90, + 180 ] ] }, diff --git a/ingestion-data/collections/lis-tws-nonstationarity-index.json b/ingestion-data/collections/lis-tws-nonstationarity-index.json index 03f3ea6..81693a5 100644 --- a/ingestion-data/collections/lis-tws-nonstationarity-index.json +++ b/ingestion-data/collections/lis-tws-nonstationarity-index.json @@ -20,7 +20,7 @@ ] } }, - "license": "Creative Commons Zero (CC0-1.0)", + "license": "CC0-1.0", "description": "The global Terrestrial Water Storage (TWS) non-stationarity index integrates the trend, seasonal shifts, and variability change of TWS for the period of 2003 - 2020. TWS is derived by jointly assimilating the MODIS Leaf Area Index, the ESA CCI surface soil moisture, and the GSFC GRACE mascon-based TWS anomalies into the Noah-MP land surface model within the NASA Land Information System (LIS) at 10 km spatial resolution forced by the combination of MERRA2 and IMERG meteorological fields. The smaller the non-stationarity index is, the more the water cycle is under a non-stationary process. Glaciers and Greenland are excluded from the analysis.", "item_assets": { "cog_default": { diff --git a/ingestion-data/collections/modis-fire-anomalies-diff-covid-19-changing-landscape.json b/ingestion-data/collections/modis-fire-anomalies-diff-covid-19-changing-landscape.json index ead3300..67db93b 100644 --- a/ingestion-data/collections/modis-fire-anomalies-diff-covid-19-changing-landscape.json +++ b/ingestion-data/collections/modis-fire-anomalies-diff-covid-19-changing-landscape.json @@ -8,10 +8,10 @@ "spatial":{ "bbox":[ [ - "-125.0", - "24.0", - "-75.0", - "43.0" + -125.0, + 24.0, + -75.0, + 43.0 ] ] }, diff --git a/ingestion-data/collections/mtbs-burn-severity.json b/ingestion-data/collections/mtbs-burn-severity.json index 0d0c38c..10c5592 100644 --- a/ingestion-data/collections/mtbs-burn-severity.json +++ b/ingestion-data/collections/mtbs-burn-severity.json @@ -8,10 +8,10 @@ "spatial":{ "bbox":[ [ - "-126.49459612498832", - "24.0478678762251", - "-71.50752568733597", - "50.55916724898132" + -126.494596, + 24.047867, + -71.507525, + 50.559167 ] ] }, diff --git a/ingestion-data/collections/nceo-africa-2017.json b/ingestion-data/collections/nceo-africa-2017.json index e00409e..c30a5c5 100644 --- a/ingestion-data/collections/nceo-africa-2017.json +++ b/ingestion-data/collections/nceo-africa-2017.json @@ -12,7 +12,7 @@ "title": "NCEO Africa Aboveground Woody Biomass 2017", "extent": { "spatial": { - " bbox": [[-18.2735295, -35.054059, 51.8642329, 37.7310386]] + "bbox": [[-18.2735295, -35.054059, 51.8642329, 37.7310386]] }, "temporal": { "interval": [["2017-01-01T00:00:00Z", "2018-01-01T00:00:00Z"]] diff --git a/ingestion-data/collections/pzd-anomaly-covid-19.json b/ingestion-data/collections/pzd-anomaly-covid-19.json index 50b7a52..b202847 100644 --- a/ingestion-data/collections/pzd-anomaly-covid-19.json +++ b/ingestion-data/collections/pzd-anomaly-covid-19.json @@ -8,10 +8,10 @@ "spatial":{ "bbox":[ [ - "-92.2", - "40.9975", - "-76.0", - "49.09" + -92.2, + 40.9975, + -76.0, + 49.09 ] ] }, @@ -19,7 +19,7 @@ "interval":[ [ "2020-01-01T00:00:00Z", - "2021-23-31T23:59:59Z" + "2021-12-31T23:59:59Z" ] ] } diff --git a/ingestion-data/collections/recovery-proxy-maps-covid-19.json b/ingestion-data/collections/recovery-proxy-maps-covid-19.json index 218ed6f..abef194 100644 --- a/ingestion-data/collections/recovery-proxy-maps-covid-19.json +++ b/ingestion-data/collections/recovery-proxy-maps-covid-19.json @@ -8,10 +8,10 @@ "spatial":{ "bbox":[ [ - "-180", - "90", - "-90", - "180" + -180, + 90, + -90, + 180 ] ] }, diff --git a/ingestion-data/collections/slowdown-proxy-covid-19-changing-landscapes.json b/ingestion-data/collections/slowdown-proxy-covid-19-changing-landscapes.json index 6e57fb2..dfd2678 100644 --- a/ingestion-data/collections/slowdown-proxy-covid-19-changing-landscapes.json +++ b/ingestion-data/collections/slowdown-proxy-covid-19-changing-landscapes.json @@ -8,10 +8,10 @@ "spatial":{ "bbox":[ [ - "-180", - "90", - "-90", - "180" + -180, + 90, + -90, + 180 ] ] }, diff --git a/ingestion-data/collections/togo-agriculture-covid-19.json b/ingestion-data/collections/togo-agriculture-covid-19.json index 260235d..5f40584 100644 --- a/ingestion-data/collections/togo-agriculture-covid-19.json +++ b/ingestion-data/collections/togo-agriculture-covid-19.json @@ -8,10 +8,10 @@ "spatial":{ "bbox":[ [ - "-0.14", - "6.10", - "1.80", - "11.13" + -0.14, + 6.10, + 1.80, + 11.13 ] ] }, diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..71b6894 --- /dev/null +++ b/requirements.in @@ -0,0 +1,2 @@ +pip-tools +pystac[validation] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f8c8d16 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,34 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile +# +attrs==23.1.0 + # via jsonschema +build==1.0.3 + # via pip-tools +click==8.1.7 + # via pip-tools +jsonschema==4.17.3 + # via pystac +packaging==23.1 + # via build +pip-tools==7.3.0 + # via -r requirements.in +pyproject-hooks==1.0.0 + # via build +pyrsistent==0.19.3 + # via jsonschema +pystac[validation]==1.8.3 + # via -r requirements.in +python-dateutil==2.8.2 + # via pystac +six==1.16.0 + # via python-dateutil +wheel==0.41.2 + # via pip-tools + +# The following packages are considered to be unsafe in a requirements file: +# pip +# setuptools diff --git a/scripts/validate_collections.py b/scripts/validate_collections.py new file mode 100755 index 0000000..8a24726 --- /dev/null +++ b/scripts/validate_collections.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 + +"""Validates all collections in ingestion-data/collections""" + +import json +import sys +from pathlib import Path + +from pystac import Collection, STACValidationError + +root = Path(__file__).parents[1] +collections = root / "ingestion-data" / "collections" + +errors = dict() +for path in collections.rglob("*.json"): + try: + collection = Collection.from_file(str(path)) + except Exception as error: + errors[path.name] = { + "type": "error", + "message": f"cannot read collection, {type(error)}: {error}", + } + continue + try: + collection.validate() + except STACValidationError as error: + if isinstance(error.source, list): + message = [str(e) for e in error.source] + else: + message = str(error.source) + errors[path.name] = { + "type": "invalid", + "message": message, + } + +if errors: + json.dump(errors, sys.stdout, indent=2) + sys.exit(1)