Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add collection validation #53

Merged
merged 1 commit into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Continuous integration

on:
push:
branches:
- main
pull_request:

jobs:
validate:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: "pip"
- name: Install dependencies
run: pip install -r requirements.txt
- name: Validate collections
run: python scripts/validate_collections.py
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,35 @@ Should follow the following format:
"dry_run": "<true/false>",
}
```

## Validation

This repository provides a script for validating all collections.
First, install the requirements (preferably in a virtual environment):

```shell
pip install -r requirements.txt
```

Then:

```shell
python scripts/validate_collections.py
```

## Development

If you need to add new dependencies, first install the requirements:

```shell
pip install -r requirements.txt
```

Add your dependency to `requirements.in` *without a version specifier* (unless you really need one).
Then run:

```shell
pip-compile
```

This will update `requirements.txt` with a complete, realized set of Python dependencies.
8 changes: 4 additions & 4 deletions ingestion-data/collections/caldor-fire-behavior.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-180",
"90",
"-90",
"180"
-180,
90,
-90,
180
]
]
},
Expand Down
8 changes: 4 additions & 4 deletions ingestion-data/collections/caldor-fire-burn-severity.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-180",
"90",
"-90",
"180"
-180,
90,
-90,
180
]
]
},
Expand Down
33 changes: 22 additions & 11 deletions ingestion-data/collections/disturbance-probability.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
{
"collection": "disturbance-probability-percentile",
"type": "Collection",
"stac_version": "1.0.0",
"id": "disturbance-probability-percentile",
"title": "Near Real-time Disturbance probability map (%)",
"data_type": "cog",
"spatial_extent": {
"xmin": -84.132,
"ymin": 25.224,
"xmax": -79.853,
"ymax": 30.728
},
"temporal_extent": {
"startdate": "2022-10-03T00:00:00Z",
"enddate": "2022-10-03T23:59:59Z"
"extent": {
"spatial": {
"bbox": [
-84.132,
25.224,
-79.853,
30.728
]
},
"temporal": {
"interval": [
[
"2022-10-03T00:00:00Z",
"2022-10-03T23:59:59Z"
]
]
}
},
"license": "CC-BY-NC-SA-1.0",
"providers": [
Expand Down Expand Up @@ -52,5 +62,6 @@
"bucket": "veda-data-store-staging",
"filename_regex": "(.*)spec_prob_mosaic_2022-10-03_day.tif$"
}
]
],
"links": []
}
8 changes: 4 additions & 4 deletions ingestion-data/collections/ecco-surface-height-change.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-180",
"90",
"-90",
"180"
-180,
90,
-90,
180
]
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
]
}
},
"license": "Creative Commons Zero (CC0-1.0)",
"license": "CC0-1.0",
"description": "The global Terrestrial Water Storage (TWS) non-stationarity index integrates the trend, seasonal shifts, and variability change of TWS for the period of 2003 - 2020. TWS is derived by jointly assimilating the MODIS Leaf Area Index, the ESA CCI surface soil moisture, and the GSFC GRACE mascon-based TWS anomalies into the Noah-MP land surface model within the NASA Land Information System (LIS) at 10 km spatial resolution forced by the combination of MERRA2 and IMERG meteorological fields. The smaller the non-stationarity index is, the more the water cycle is under a non-stationary process. Glaciers and Greenland are excluded from the analysis.",
"item_assets": {
"cog_default": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-125.0",
"24.0",
"-75.0",
"43.0"
-125.0,
24.0,
-75.0,
43.0
]
]
},
Expand Down
8 changes: 4 additions & 4 deletions ingestion-data/collections/mtbs-burn-severity.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-126.49459612498832",
"24.0478678762251",
"-71.50752568733597",
"50.55916724898132"
-126.494596,
24.047867,
-71.507525,
50.559167
]
]
},
Expand Down
2 changes: 1 addition & 1 deletion ingestion-data/collections/nceo-africa-2017.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"title": "NCEO Africa Aboveground Woody Biomass 2017",
"extent": {
"spatial": {
" bbox": [[-18.2735295, -35.054059, 51.8642329, 37.7310386]]
"bbox": [[-18.2735295, -35.054059, 51.8642329, 37.7310386]]
},
"temporal": {
"interval": [["2017-01-01T00:00:00Z", "2018-01-01T00:00:00Z"]]
Expand Down
10 changes: 5 additions & 5 deletions ingestion-data/collections/pzd-anomaly-covid-19.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
"spatial":{
"bbox":[
[
"-92.2",
"40.9975",
"-76.0",
"49.09"
-92.2,
40.9975,
-76.0,
49.09
]
]
},
"temporal":{
"interval":[
[
"2020-01-01T00:00:00Z",
"2021-23-31T23:59:59Z"
"2021-12-31T23:59:59Z"
]
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-180",
"90",
"-90",
"180"
-180,
90,
-90,
180
]
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-180",
"90",
"-90",
"180"
-180,
90,
-90,
180
]
]
},
Expand Down
8 changes: 4 additions & 4 deletions ingestion-data/collections/togo-agriculture-covid-19.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"spatial":{
"bbox":[
[
"-0.14",
"6.10",
"1.80",
"11.13"
-0.14,
6.10,
1.80,
11.13
]
]
},
Expand Down
2 changes: 2 additions & 0 deletions requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pip-tools
pystac[validation]
34 changes: 34 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile
#
attrs==23.1.0
# via jsonschema
build==1.0.3
# via pip-tools
click==8.1.7
# via pip-tools
jsonschema==4.17.3
# via pystac
packaging==23.1
# via build
pip-tools==7.3.0
# via -r requirements.in
pyproject-hooks==1.0.0
# via build
pyrsistent==0.19.3
# via jsonschema
pystac[validation]==1.8.3
# via -r requirements.in
python-dateutil==2.8.2
# via pystac
six==1.16.0
# via python-dateutil
wheel==0.41.2
# via pip-tools

# The following packages are considered to be unsafe in a requirements file:
# pip
# setuptools
38 changes: 38 additions & 0 deletions scripts/validate_collections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python3

"""Validates all collections in ingestion-data/collections"""

import json
import sys
from pathlib import Path

from pystac import Collection, STACValidationError

root = Path(__file__).parents[1]
collections = root / "ingestion-data" / "collections"

errors = dict()
for path in collections.rglob("*.json"):
try:
collection = Collection.from_file(str(path))
except Exception as error:
errors[path.name] = {
"type": "error",
"message": f"cannot read collection, {type(error)}: {error}",
}
continue
try:
collection.validate()
except STACValidationError as error:
if isinstance(error.source, list):
message = [str(e) for e in error.source]
else:
message = str(error.source)
errors[path.name] = {
"type": "invalid",
"message": message,
}

if errors:
json.dump(errors, sys.stdout, indent=2)
sys.exit(1)