Skip to content

Commit

Permalink
feat: stac-geoparquet and stac-arrow
Browse files Browse the repository at this point in the history
  • Loading branch information
gadomski committed Aug 8, 2024
1 parent eace9bc commit 49bf5ba
Show file tree
Hide file tree
Showing 31 changed files with 1,422 additions and 63 deletions.
22 changes: 20 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ jobs:
- "-p stac -F reqwest"
- "-p stac-api"
- "-p stac -p stac-api -F geo"
- "-p stac-arrow"
- "-p stac-async"
- "-p stac-cli --no-default-features"
- "-p stac-geoparquet"
- "-p stac-server --no-default-features"
- "-p stac-server --no-default-features -F axum"
- "-p stac-server --no-default-features -F memory-item-search"
Expand Down Expand Up @@ -144,8 +146,9 @@ jobs:
with:
python-version: "3.10"
cache: "pip"
cache-dependency-path: scripts/requirements-stac-server.txt
- name: Install stac-api-validator
run: pip install -r scripts/requirements.txt
run: pip install -r scripts/requirements-stac-server.txt
- name: Validate
run: scripts/validate-stac-server
validate-stac-server-pgstac:
Expand All @@ -169,7 +172,22 @@ jobs:
with:
python-version: "3.10"
cache: "pip"
cache-dependency-path: scripts/requirements-stac-server.txt
- name: Install stac-api-validator
run: pip install -r scripts/requirements.txt
run: pip install -r scripts/requirements-stac-server.txt
- name: Validate
run: scripts/validate-stac-server --pgstac
validate-stac-geoparquet:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
cache-dependency-path: scripts/requirements-stac-geoparquet.txt
- name: Install requirements
run: pip install -r scripts/requirements-stac-geoparquet.txt
- name: Validate
run: scripts/validate-stac-geoparquet
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@ members = [
"stac",
"pgstac",
"stac-api",
"stac-arrow",
"stac-async",
"stac-cli",
"stac-geoparquet",
"stac-server",
"stac-validate",
]
default-members = [
"stac",
"stac-api",
"stac-arrow",
"stac-async",
"stac-cli",
"stac-geoparquet",
"stac-server",
"stac-validate",
]
3 changes: 3 additions & 0 deletions scripts/requirements-stac-geoparquet.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
deepdiff
pyarrow
stac-geoparquet
65 changes: 65 additions & 0 deletions scripts/requirements-stac-geoparquet.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# This file was autogenerated by uv via the following command:
# uv pip compile scripts/requirements-stac-geoparquet.in
certifi==2024.7.4
# via
# pyogrio
# pyproj
ciso8601==2.3.1
# via stac-geoparquet
deepdiff==7.0.1
# via -r scripts/requirements-stac-geoparquet.in
deltalake==0.18.2
# via stac-geoparquet
geopandas==1.0.1
# via stac-geoparquet
numpy==2.0.1
# via
# geopandas
# pandas
# pyarrow
# pyogrio
# shapely
ordered-set==4.1.0
# via deepdiff
orjson==3.10.6
# via stac-geoparquet
packaging==24.1
# via
# geopandas
# pyogrio
# stac-geoparquet
pandas==2.2.2
# via
# geopandas
# stac-geoparquet
pyarrow==17.0.0
# via
# -r scripts/requirements-stac-geoparquet.in
# deltalake
# stac-geoparquet
pyarrow-hotfix==0.6
# via deltalake
pyogrio==0.9.0
# via geopandas
pyproj==3.6.1
# via
# geopandas
# stac-geoparquet
pystac==1.10.1
# via stac-geoparquet
python-dateutil==2.9.0.post0
# via
# pandas
# pystac
pytz==2024.1
# via pandas
shapely==2.0.5
# via
# geopandas
# stac-geoparquet
six==1.16.0
# via python-dateutil
stac-geoparquet==0.6.0
# via -r scripts/requirements-stac-geoparquet.in
tzdata==2024.1
# via pandas
File renamed without changes.
20 changes: 10 additions & 10 deletions scripts/requirements.txt → scripts/requirements-stac-server.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file was autogenerated by uv via the following command:
# uv pip compile scripts/requirements.in
attrs==23.2.0
# uv pip compile scripts/requirements-stac-server.in
attrs==24.2.0
# via
# jsonschema
# referencing
Expand All @@ -19,7 +19,7 @@ deepdiff==6.7.1
# via stac-api-validator
idna==3.7
# via requests
jsonschema==4.22.0
jsonschema==4.23.0
# via
# pystac
# stac-api-validator
Expand All @@ -29,13 +29,13 @@ jsonschema-specifications==2023.12.1
# via jsonschema
more-itertools==8.14.0
# via stac-api-validator
numpy==2.0.0
numpy==2.0.1
# via shapely
ordered-set==4.1.0
# via deepdiff
orjson==3.10.5
orjson==3.10.6
# via pystac
pystac[orjson,validation]==1.10.1
pystac==1.10.1
# via
# pystac-client
# stac-api-validator
Expand All @@ -47,7 +47,7 @@ python-dateutil==2.9.0.post0
# pystac-client
python-dotenv==1.0.1
# via stac-check
pyyaml==6.0.1
pyyaml==6.0.2
# via
# stac-api-validator
# stac-check
Expand All @@ -61,16 +61,16 @@ requests==2.32.3
# stac-api-validator
# stac-check
# stac-validator
rpds-py==0.18.1
rpds-py==0.20.0
# via
# jsonschema
# referencing
shapely==2.0.4
shapely==2.0.5
# via stac-api-validator
six==1.16.0
# via python-dateutil
stac-api-validator==0.6.2
# via -r requirements.in
# via -r scripts/requirements-stac-server.in
stac-check==1.3.3
# via stac-api-validator
stac-validator==3.3.2
Expand Down
62 changes: 62 additions & 0 deletions scripts/validate-stac-geoparquet
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python

import json
import sys
import shutil
import subprocess
import tempfile
from typing import Any
from deepdiff import DeepDiff
from pathlib import Path
import pyarrow.parquet
import stac_geoparquet.arrow
import pyarrow

root = Path(__file__).parents[1]
path = root / "spec-examples" / "v1.0.0" / "extended-item.json"
directory = tempfile.mkdtemp()
parquet_path = Path(directory) / "extended-item.parquet"

def clean_report(report: dict[str, Any]) -> dict[str, Any]:
"""We expect datetime values to be changed in the report."""
if report.get("values_changed"):
if report["values_changed"].get("root['properties']['datetime']") == {
"new_value": "2020-12-14T18:02:31.437Z",
"old_value": "2020-12-14T18:02:31.437000Z",
}:
del report["values_changed"]["root['properties']['datetime']"]
if not report["values_changed"]:
del report["values_changed"]
return report

try:
# Writing
subprocess.check_call(
["cargo", "run", "--no-default-features", "--", "translate", path, parquet_path]
)
table = pyarrow.parquet.read_table(parquet_path)
after = next(stac_geoparquet.arrow.stac_table_to_items(table))
with open(path) as f:
before = json.load(f)
report = DeepDiff(before, after).to_dict()
report = clean_report(report)
if report:
print(json.dumps(report, indent=2))
sys.exit(1)
else:
parquet_path.unlink()

# Reading
table = stac_geoparquet.arrow.parse_stac_items_to_arrow([before])
stac_geoparquet.arrow.to_parquet(table, parquet_path)
item_collection = json.loads(subprocess.check_output(
["cargo", "run", "--no-default-features", "--", "translate", parquet_path]
))
report = DeepDiff(before, item_collection["features"][0]).to_dict()
report = clean_report(report)
if report:
print(json.dumps(report, indent=2))
sys.exit(1)

finally:
shutil.rmtree(directory)
30 changes: 30 additions & 0 deletions stac-arrow/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[package]
name = "stac-arrow"
version = "0.0.1"
edition = "2021"
authors = ["Pete Gadomski <pete.gadomski@gmail.com>"]
description = "Read and write STAC using the geoarrow specification"
homepage = "https://github.com/stac-utils/stac-rs"
repository = "https://github.com/stac-utils/stac-rs"
license = "MIT OR Apache-2.0"
keywords = ["geospatial", "stac", "metadata", "geo"]
categories = ["science", "data-structures"]


[dependencies]
arrow-array = "52"
arrow-cast = "52"
arrow-schema = "52"
arrow-json = "52"
geoarrow = { git = "https://github.com/geoarrow/geoarrow-rs", rev = "476562b3da7dde9cd324fc5bf5ceb5451f76c451" }
geojson = "0.24"
geo-types = "0.7"
serde_json = "1"
stac = { version = "0.7", path = "../stac" }
thiserror = "1"

[dev-dependencies]
geoarrow = { git = "https://github.com/geoarrow/geoarrow-rs", rev = "476562b3da7dde9cd324fc5bf5ceb5451f76c451", features = [
"parquet",
] }
stac-validate = { version = "0.1", path = "../stac-validate" }
35 changes: 35 additions & 0 deletions stac-arrow/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# stac-arrow

[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/stac-utils/stac-rs/ci.yml?branch=main&style=for-the-badge)](https://github.com/stac-utils/stac-rs/actions/workflows/ci.yml)
[![docs.rs](https://img.shields.io/docsrs/stac-arrow?style=for-the-badge)](https://docs.rs/stac-arrow/latest/stac_arrow/)
[![Crates.io](https://img.shields.io/crates/v/stac-arrow?style=for-the-badge)](https://crates.io/crates/stac-arrow)
![Crates.io](https://img.shields.io/crates/l/stac-arrow?style=for-the-badge)
[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg?style=for-the-badge)](./CODE_OF_CONDUCT)

Convert STAC item collections to and from [geoarrow](https://github.com/geoarrow/geoarrow-rs/) tables.
To read and write [stac-geoparquet](https://github.com/stac-utils/stac-geoparquet), use [our crate with the same name](../stac-geoparquet/).

**WARNING**: This library should be considered experimental while [geoarrow-rs](https://github.com/geoarrow/geoarrow-rs/) stabalizes.

## Usage

To use the library in your project:

```toml
[dependencies]
stac-arrow = "0.0.1"
```

## Examples

```rust
let item = stac::read("data/simple-item.json").unwrap();
let table = stac_arrow::to_table(vec![item].into()).unwrap();
let item_collection = stac_arrow::from_table(table).unwrap();
```

Please see the [documentation](https://docs.rs/stac-arrow) for more usage examples.

## Other info

This crate is part of the [stac-rs](https://github.com/stac-utils/stac-rs) monorepo, see its README for contributing and license information.
1 change: 1 addition & 0 deletions stac-arrow/data
Binary file added stac-arrow/examples/extended-item.parquet
Binary file not shown.
Loading

0 comments on commit 49bf5ba

Please sign in to comment.