Skip to content

Commit

Permalink
feat: add collection validation
Browse files Browse the repository at this point in the history
Includes:

- Python dependencies file and instructions
- CI
- Actual fixes
  • Loading branch information
gadomski committed Sep 20, 2023
1 parent 2a6b114 commit 10519f1
Show file tree
Hide file tree
Showing 17 changed files with 190 additions and 50 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Continuous integration

on:
push:
branches:
- main
pull_request:

jobs:
validate:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: "pip"
- name: Install dependencies
run: pip install -r requirements.txt
- name: Validate collections
run: python scripts/validate_collections.py
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,35 @@ Should follow the following format:
"dry_run": "<true/false>",
}
```

## Validation

This repository provides a script for validating all collections.
First, install the requirements (preferably in a virtual environment):

```shell
pip install -r requirements.txt
```

Then:

```shell
python scripts/validate_collections.py
```

## Development

If you need to add new dependencies, first install the requirements:

```shell
pip install -r requirements.txt
```

Add your dependency to `requirements.in` *without a version specifier* (unless you really need one).
Then run:

```shell
pip-compile
```

This will update `requirements.txt` with a complete, realized set of Python dependencies.
8 changes: 4 additions & 4 deletions ingestion-data/collections/caldor-fire-behavior.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-180",
"90",
"-90",
"180"
-180,
90,
-90,
180
]
]
},
Expand Down
8 changes: 4 additions & 4 deletions ingestion-data/collections/caldor-fire-burn-severity.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-180",
"90",
"-90",
"180"
-180,
90,
-90,
180
]
]
},
Expand Down
33 changes: 22 additions & 11 deletions ingestion-data/collections/disturbance-probability.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
{
"collection": "disturbance-probability-percentile",
"type": "Collection",
"stac_version": "1.0.0",
"id": "disturbance-probability-percentile",
"title": "Near Real-time Disturbance probability map (%)",
"data_type": "cog",
"spatial_extent": {
"xmin": -84.132,
"ymin": 25.224,
"xmax": -79.853,
"ymax": 30.728
},
"temporal_extent": {
"startdate": "2022-10-03T00:00:00Z",
"enddate": "2022-10-03T23:59:59Z"
"extent": {
"spatial": {
"bbox": [
-84.132,
25.224,
-79.853,
30.728
]
},
"temporal": {
"interval": [
[
"2022-10-03T00:00:00Z",
"2022-10-03T23:59:59Z"
]
]
}
},
"license": "CC-BY-NC-SA-1.0",
"providers": [
Expand Down Expand Up @@ -52,5 +62,6 @@
"bucket": "veda-data-store-staging",
"filename_regex": "(.*)spec_prob_mosaic_2022-10-03_day.tif$"
}
]
],
"links": []
}
8 changes: 4 additions & 4 deletions ingestion-data/collections/ecco-surface-height-change.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-180",
"90",
"-90",
"180"
-180,
90,
-90,
180
]
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
]
}
},
"license": "Creative Commons Zero (CC0-1.0)",
"license": "CC0-1.0",
"description": "The global Terrestrial Water Storage (TWS) non-stationarity index integrates the trend, seasonal shifts, and variability change of TWS for the period of 2003 - 2020. TWS is derived by jointly assimilating the MODIS Leaf Area Index, the ESA CCI surface soil moisture, and the GSFC GRACE mascon-based TWS anomalies into the Noah-MP land surface model within the NASA Land Information System (LIS) at 10 km spatial resolution forced by the combination of MERRA2 and IMERG meteorological fields. The smaller the non-stationarity index is, the more the water cycle is under a non-stationary process. Glaciers and Greenland are excluded from the analysis.",
"item_assets": {
"cog_default": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-125.0",
"24.0",
"-75.0",
"43.0"
-125.0,
24.0,
-75.0,
43.0
]
]
},
Expand Down
8 changes: 4 additions & 4 deletions ingestion-data/collections/mtbs-burn-severity.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-126.49459612498832",
"24.0478678762251",
"-71.50752568733597",
"50.55916724898132"
-126.494596,
24.047867,
-71.507525,
50.559167
]
]
},
Expand Down
2 changes: 1 addition & 1 deletion ingestion-data/collections/nceo-africa-2017.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"title": "NCEO Africa Aboveground Woody Biomass 2017",
"extent": {
"spatial": {
" bbox": [[-18.2735295, -35.054059, 51.8642329, 37.7310386]]
"bbox": [[-18.2735295, -35.054059, 51.8642329, 37.7310386]]
},
"temporal": {
"interval": [["2017-01-01T00:00:00Z", "2018-01-01T00:00:00Z"]]
Expand Down
10 changes: 5 additions & 5 deletions ingestion-data/collections/pzd-anomaly-covid-19.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
"spatial":{
"bbox":[
[
"-92.2",
"40.9975",
"-76.0",
"49.09"
-92.2,
40.9975,
-76.0,
49.09
]
]
},
"temporal":{
"interval":[
[
"2020-01-01T00:00:00Z",
"2021-23-31T23:59:59Z"
"2021-12-31T23:59:59Z"
]
]
}
Expand Down
8 changes: 4 additions & 4 deletions ingestion-data/collections/recovery-proxy-maps-covid-19.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-180",
"90",
"-90",
"180"
-180,
90,
-90,
180
]
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-180",
"90",
"-90",
"180"
-180,
90,
-90,
180
]
]
},
Expand Down
8 changes: 4 additions & 4 deletions ingestion-data/collections/togo-agriculture-covid-19.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-0.14",
"6.10",
"1.80",
"11.13"
-0.14,
6.10,
1.80,
11.13
]
]
},
Expand Down
2 changes: 2 additions & 0 deletions requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pip-tools
pystac[validation]
34 changes: 34 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile
#
attrs==23.1.0
# via jsonschema
build==1.0.3
# via pip-tools
click==8.1.7
# via pip-tools
jsonschema==4.17.3
# via pystac
packaging==23.1
# via build
pip-tools==7.3.0
# via -r requirements.in
pyproject-hooks==1.0.0
# via build
pyrsistent==0.19.3
# via jsonschema
pystac[validation]==1.8.3
# via -r requirements.in
python-dateutil==2.8.2
# via pystac
six==1.16.0
# via python-dateutil
wheel==0.41.2
# via pip-tools

# The following packages are considered to be unsafe in a requirements file:
# pip
# setuptools
38 changes: 38 additions & 0 deletions scripts/validate_collections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python3

"""Validates all collections in ingestion-data/collections"""

import json
import sys
from pathlib import Path

from pystac import Collection, STACValidationError

root = Path(__file__).parents[1]
collections = root / "ingestion-data" / "collections"

errors = dict()
for path in collections.rglob("*.json"):
try:
collection = Collection.from_file(str(path))
except Exception as error:
errors[path.name] = {
"type": "error",
"message": f"cannot read collection, {type(error)}: {error}",
}
continue
try:
collection.validate()
except STACValidationError as error:
if isinstance(error.source, list):
message = [str(e) for e in error.source]
else:
message = str(error.source)
errors[path.name] = {
"type": "invalid",
"message": message,
}

if errors:
json.dump(errors, sys.stdout, indent=2)
sys.exit(1)

0 comments on commit 10519f1

Please sign in to comment.