Skip to content

Commit

Permalink
feat: stac-geoparquet and stac-arrow
Browse files Browse the repository at this point in the history
  • Loading branch information
gadomski committed Aug 8, 2024
1 parent d76ca59 commit aab7827
Show file tree
Hide file tree
Showing 29 changed files with 1,256 additions and 63 deletions.
22 changes: 20 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ jobs:
- "-p stac -F reqwest"
- "-p stac-api"
- "-p stac -p stac-api -F geo"
- "-p stac-arrow"
- "-p stac-async"
- "-p stac-cli --no-default-features"
- "-p stac-geoparquet"
- "-p stac-server --no-default-features"
- "-p stac-server --no-default-features -F axum"
- "-p stac-server --no-default-features -F memory-item-search"
Expand Down Expand Up @@ -144,8 +146,9 @@ jobs:
with:
python-version: "3.10"
cache: "pip"
cache-dependency-path: scripts/requirements-stac-server.txt
- name: Install stac-api-validator
run: pip install -r scripts/requirements.txt
run: pip install -r scripts/requirements-stac-server.txt
- name: Validate
run: scripts/validate-stac-server
validate-stac-server-pgstac:
Expand All @@ -169,7 +172,22 @@ jobs:
with:
python-version: "3.10"
cache: "pip"
cache-dependency-path: scripts/requirements-stac-server.txt
- name: Install stac-api-validator
run: pip install -r scripts/requirements.txt
run: pip install -r scripts/requirements-stac-server.txt
- name: Validate
run: scripts/validate-stac-server --pgstac
validate-stac-geoparquet:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
cache-dependency-path: scripts/requirements-stac-geoparquet.txt
- name: Install requirements
run: pip install -r scripts/requirements-stac-geoparquet.txt
- name: Validate
run: scripts/validate-stac-geoparquet
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@ members = [
"stac",
"pgstac",
"stac-api",
"stac-arrow",
"stac-async",
"stac-cli",
"stac-geoparquet",
"stac-server",
"stac-validate",
]
default-members = [
"stac",
"stac-api",
"stac-arrow",
"stac-async",
"stac-cli",
"stac-geoparquet",
"stac-server",
"stac-validate",
]
3 changes: 3 additions & 0 deletions scripts/requirements-stac-geoparquet.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
deepdiff
pyarrow
stac-geoparquet
65 changes: 65 additions & 0 deletions scripts/requirements-stac-geoparquet.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# This file was autogenerated by uv via the following command:
# uv pip compile scripts/requirements-stac-geoparquet.in
certifi==2024.7.4
# via
# pyogrio
# pyproj
ciso8601==2.3.1
# via stac-geoparquet
deepdiff==7.0.1
# via -r scripts/requirements-stac-geoparquet.in
deltalake==0.18.2
# via stac-geoparquet
geopandas==1.0.1
# via stac-geoparquet
numpy==2.0.1
# via
# geopandas
# pandas
# pyarrow
# pyogrio
# shapely
ordered-set==4.1.0
# via deepdiff
orjson==3.10.6
# via stac-geoparquet
packaging==24.1
# via
# geopandas
# pyogrio
# stac-geoparquet
pandas==2.2.2
# via
# geopandas
# stac-geoparquet
pyarrow==17.0.0
# via
# -r scripts/requirements-stac-geoparquet.in
# deltalake
# stac-geoparquet
pyarrow-hotfix==0.6
# via deltalake
pyogrio==0.9.0
# via geopandas
pyproj==3.6.1
# via
# geopandas
# stac-geoparquet
pystac==1.10.1
# via stac-geoparquet
python-dateutil==2.9.0.post0
# via
# pandas
# pystac
pytz==2024.1
# via pandas
shapely==2.0.5
# via
# geopandas
# stac-geoparquet
six==1.16.0
# via python-dateutil
stac-geoparquet==0.6.0
# via -r scripts/requirements-stac-geoparquet.in
tzdata==2024.1
# via pandas
File renamed without changes.
20 changes: 10 additions & 10 deletions scripts/requirements.txt → scripts/requirements-stac-server.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file was autogenerated by uv via the following command:
# uv pip compile scripts/requirements.in
attrs==23.2.0
# uv pip compile scripts/requirements-stac-server.in
attrs==24.2.0
# via
# jsonschema
# referencing
Expand All @@ -19,7 +19,7 @@ deepdiff==6.7.1
# via stac-api-validator
idna==3.7
# via requests
jsonschema==4.22.0
jsonschema==4.23.0
# via
# pystac
# stac-api-validator
Expand All @@ -29,13 +29,13 @@ jsonschema-specifications==2023.12.1
# via jsonschema
more-itertools==8.14.0
# via stac-api-validator
numpy==2.0.0
numpy==2.0.1
# via shapely
ordered-set==4.1.0
# via deepdiff
orjson==3.10.5
orjson==3.10.6
# via pystac
pystac[orjson,validation]==1.10.1
pystac==1.10.1
# via
# pystac-client
# stac-api-validator
Expand All @@ -47,7 +47,7 @@ python-dateutil==2.9.0.post0
# pystac-client
python-dotenv==1.0.1
# via stac-check
pyyaml==6.0.1
pyyaml==6.0.2
# via
# stac-api-validator
# stac-check
Expand All @@ -61,16 +61,16 @@ requests==2.32.3
# stac-api-validator
# stac-check
# stac-validator
rpds-py==0.18.1
rpds-py==0.20.0
# via
# jsonschema
# referencing
shapely==2.0.4
shapely==2.0.5
# via stac-api-validator
six==1.16.0
# via python-dateutil
stac-api-validator==0.6.2
# via -r requirements.in
# via -r scripts/requirements-stac-server.in
stac-check==1.3.3
# via stac-api-validator
stac-validator==3.3.2
Expand Down
62 changes: 62 additions & 0 deletions scripts/validate-stac-geoparquet
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python

import json
import sys
import shutil
import subprocess
import tempfile
from typing import Any
from deepdiff import DeepDiff
from pathlib import Path
import pyarrow.parquet
import stac_geoparquet.arrow
import pyarrow

root = Path(__file__).parents[1]
path = root / "spec-examples" / "v1.0.0" / "extended-item.json"
directory = tempfile.mkdtemp()
parquet_path = Path(directory) / "extended-item.parquet"

def clean_report(report: dict[str, Any]) -> dict[str, Any]:
"""We expect datetime values to be changed in the report."""
if report.get("values_changed"):
if report["values_changed"].get("root['properties']['datetime']") == {
"new_value": "2020-12-14T18:02:31.437Z",
"old_value": "2020-12-14T18:02:31.437000Z",
}:
del report["values_changed"]["root['properties']['datetime']"]
if not report["values_changed"]:
del report["values_changed"]
return report

try:
# Writing
subprocess.check_call(
["cargo", "run", "--no-default-features", "--", "translate", path, parquet_path]
)
table = pyarrow.parquet.read_table(parquet_path)
after = next(stac_geoparquet.arrow.stac_table_to_items(table))
with open(path) as f:
before = json.load(f)
report = DeepDiff(before, after).to_dict()
report = clean_report(report)
if report:
print(json.dumps(report, indent=2))
sys.exit(1)
else:
parquet_path.unlink()

# Reading
table = stac_geoparquet.arrow.parse_stac_items_to_arrow([before])
stac_geoparquet.arrow.to_parquet(table, parquet_path)
item_collection = json.loads(subprocess.check_output(
["cargo", "run", "--no-default-features", "--", "translate", parquet_path]
))
report = DeepDiff(before, item_collection["features"][0]).to_dict()
report = clean_report(report)
if report:
print(json.dumps(report, indent=2))
sys.exit(1)

finally:
shutil.rmtree(directory)
22 changes: 22 additions & 0 deletions stac-arrow/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[package]
name = "stac-arrow"
version = "0.1.0"
edition = "2021"

[dependencies]
arrow-array = "52"
arrow-cast = "52"
arrow-schema = "52"
arrow-json = "52"
geoarrow = { git = "https://github.com/geoarrow/geoarrow-rs", rev = "476562b3da7dde9cd324fc5bf5ceb5451f76c451" }
geojson = "0.24"
geo-types = "0.7"
serde_json = "1"
stac = { version = "0.7", path = "../stac" }
thiserror = "1"

[dev-dependencies]
geoarrow = { git = "https://github.com/geoarrow/geoarrow-rs", rev = "476562b3da7dde9cd324fc5bf5ceb5451f76c451", features = [
"parquet",
] }
stac-validate = { version = "0.1", path = "../stac-validate" }
1 change: 1 addition & 0 deletions stac-arrow/data
Binary file added stac-arrow/examples/extended-item.parquet
Binary file not shown.
Loading

0 comments on commit aab7827

Please sign in to comment.