Skip to content

Commit

Permalink
feat: read stac-geoparquet
Browse files Browse the repository at this point in the history
  • Loading branch information
gadomski committed Jun 25, 2024
1 parent 6d448f1 commit 74c74ff
Show file tree
Hide file tree
Showing 13 changed files with 748 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ members = [
"stac-async",
"stac-cli",
"stac-server",
"stac-validate",
"stac-validate", "stac-arrow",
]
default-members = [
"stac",
Expand Down
29 changes: 29 additions & 0 deletions scripts/create_geoparquet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import stac_geoparquet
import json
from pathlib import Path
from pyarrow import Table
from pystac_client import Client

root = Path(__file__).parents[1]
examples = root / "stac-arrow" / "examples"
if not examples.is_dir():
examples.mkdir(parents=True)

client = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
item_search = client.search(
max_items=10,
collections=["sentinel-2-l2a"],
intersects={
"type": "Point",
"coordinates": [-105.10, 40.17],
"sortby": "-properties.datetime",
},
)
items = list(item_search.items_as_dicts())
batches = stac_geoparquet.arrow.parse_stac_items_to_arrow(items)
table = Table.from_batches(batches)
for version in ["1.0.0", "1.1.0"]:
path = examples / f"sentinel-2-l2a-{version}.parquet"
stac_geoparquet.arrow.to_parquet(table, path, schema_version=version)
with open(examples / "sentinel-2-l2a.json", "w") as f:
json.dump(items, f)
3 changes: 3 additions & 0 deletions scripts/requirements.in
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
pyarrow
pystac-client
stac-api-validator
git+https://github.com/stac-utils/stac-geoparquet@e13f237
53 changes: 50 additions & 3 deletions scripts/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,25 @@ attrs==23.2.0
# referencing
certifi==2024.6.2
# via
# pyogrio
# pyproj
# requests
# stac-api-validator
charset-normalizer==3.3.2
# via requests
ciso8601==2.3.1
# via stac-geoparquet
click==8.1.7
# via
# stac-api-validator
# stac-check
# stac-validator
deepdiff==6.7.1
# via stac-api-validator
deltalake==0.18.1
# via stac-geoparquet
geopandas==1.0.0
# via stac-geoparquet
idna==3.7
# via requests
jsonschema==4.22.0
Expand All @@ -30,23 +38,55 @@ jsonschema-specifications==2023.12.1
more-itertools==8.14.0
# via stac-api-validator
numpy==2.0.0
# via shapely
# via
# geopandas
# pandas
# pyarrow
# pyogrio
# shapely
ordered-set==4.1.0
# via deepdiff
orjson==3.10.5
# via pystac
# via
# pystac
# stac-geoparquet
packaging==24.1
# via
# geopandas
# pyogrio
# stac-geoparquet
pandas==2.2.2
# via
# geopandas
# stac-geoparquet
pyarrow==16.1.0
# via
# deltalake
# stac-geoparquet
pyarrow-hotfix==0.6
# via deltalake
pyogrio==0.9.0
# via geopandas
pyproj==3.6.1
# via
# geopandas
# stac-geoparquet
pystac==1.10.1
# via
# pystac-client
# stac-api-validator
# stac-geoparquet
pystac-client==0.7.7
# via stac-api-validator
python-dateutil==2.9.0.post0
# via
# pandas
# pystac
# pystac-client
python-dotenv==1.0.1
# via stac-check
pytz==2024.1
# via pandas
pyyaml==6.0.1
# via
# stac-api-validator
Expand All @@ -66,16 +106,23 @@ rpds-py==0.18.1
# jsonschema
# referencing
shapely==2.0.4
# via stac-api-validator
# via
# geopandas
# stac-api-validator
# stac-geoparquet
six==1.16.0
# via python-dateutil
stac-api-validator==0.6.2
# via -r scripts/requirements.in
stac-check==1.3.3
# via stac-api-validator
stac-geoparquet @ git+https://github.com/stac-utils/stac-geoparquet@e13f237be2aa341ceb21abb341fc47d8fe3efeda
# via -r scripts/requirements.in
stac-validator==3.3.2
# via
# stac-api-validator
# stac-check
tzdata==2024.1
# via pandas
urllib3==2.2.2
# via requests
17 changes: 17 additions & 0 deletions stac-arrow/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[package]
name = "stac-arrow"
version = "0.1.0"
edition = "2021"

[features]
parquet = ["geoarrow/parquet"]
parquet-compression = ["geoarrow/parquet_compression"]

[dependencies]
arrow = "52"
arrow-cast = "52"
geoarrow = { git = "https://github.com/geoarrow/geoarrow-rs", rev = "6fd07d5555f03b113a5cd36c7bf71c1a7cec8579" }
geojson = "0.24"
serde_json = "1"
stac = { version = "0.7", path = "../stac" }
thiserror = "1"
Binary file added stac-arrow/examples/sentinel-2-l2a-1.0.0.parquet
Binary file not shown.
Binary file added stac-arrow/examples/sentinel-2-l2a-1.1.0.parquet
Binary file not shown.
1 change: 1 addition & 0 deletions stac-arrow/examples/sentinel-2-l2a.json

Large diffs are not rendered by default.

33 changes: 33 additions & 0 deletions stac-arrow/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use thiserror::Error;

/// Crate-specific error enum.
#[derive(Debug, Error)]
pub enum Error {
/// [arrow::error::ArrowError]
#[error(transparent)]
Arrow(#[from] arrow::error::ArrowError),

/// The bounding box is not a mapping.
#[error("bbox is not a map: {0:?}")]
BBoxIsNotAMap(serde_json::Value),

/// [geoarrow::error::GeoArrowError]
#[error(transparent)]
GeoArrow(#[from] geoarrow::error::GeoArrowError),

/// An invalid bbox mapping.
#[error("invalid bbox mapping: {0:?}")]
InvalidBBoxMap(serde_json::Map<String, serde_json::Value>),

/// [std::io::Error]
#[error(transparent)]
Io(#[from] std::io::Error),

/// [serde_json::Error]
#[error(transparent)]
SerdeJson(#[from] serde_json::Error),

/// [stac::Error]
#[error(transparent)]
Stac(#[from] stac::Error),
}
Loading

0 comments on commit 74c74ff

Please sign in to comment.