Skip to content

Commit

Permalink
Fix pre-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
GriffinBabe committed Mar 14, 2024
1 parent 11c2fe1 commit 9c993be
Showing 1 changed file with 18 additions and 21 deletions.
39 changes: 18 additions & 21 deletions src/openeo_gfmap/manager/job_splitters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
from pathlib import Path
from typing import List

import requests
import geopandas as gpd
import h3
import requests

from openeo_gfmap.manager import _log

Expand All @@ -16,14 +16,14 @@ def _load_s2_grid() -> gpd.GeoDataFrame:
# Builds the path where the geodataframe should be
gdf_path = Path.home() / ".openeo-gfmap" / "s2grid_bounds.geojson"
if not gdf_path.exists():
_log.info('S2 grid not found, downloading it from artifactory.')
_log.info("S2 grid not found, downloading it from artifactory.")
# Downloads the file from the artifactory URL
gdf_path.parent.mkdir(exist_ok=True)
response = requests.get(
'https://artifactory.vgt.vito.be/artifactory/auxdata-public/gfmap/s2grid_bounds.geojson',
timeout=180 # 3mins
"https://artifactory.vgt.vito.be/artifactory/auxdata-public/gfmap/s2grid_bounds.geojson",
timeout=180, # 3mins
)
with open(gdf_path, 'wb') as f:
with open(gdf_path, "wb") as f:
f.write(response.content)
return gpd.read_file(gdf_path)

Expand All @@ -34,12 +34,10 @@ def _resplit_group(polygons: gpd.GeoDataFrame, max_points: int) -> List[gpd.GeoD
yield polygons.iloc[i : i + max_points].reset_index(drop=True)


def split_job_s2grid(
polygons: gpd.GeoDataFrame, max_points: int = 500
) -> List[gpd.GeoDataFrame]:
def split_job_s2grid(polygons: gpd.GeoDataFrame, max_points: int = 500) -> List[gpd.GeoDataFrame]:
"""Split a job into multiple jobs from the position of the polygons/points. The centroid of
the geometries to extract are used to select tile in the Sentinel-2 tile grid.
Parameters
----------
polygons: gpd.GeoDataFrae
Expand All @@ -53,24 +51,24 @@ def split_job_s2grid(
"""
if "geometry" not in polygons.columns:
raise ValueError("The GeoDataFrame must contain a 'geometry' column.")

if polygons.crs is None:
raise ValueError("The GeoDataFrame must contain a CRS")

polygons = polygons.to_crs(epsg=4326)
if polygons.geometry.geom_type[0] != 'Point':
polygons['geometry'] = polygons.geometry.centroid
if polygons.geometry.geom_type[0] != "Point":
polygons["geometry"] = polygons.geometry.centroid

# Dataset containing all the S2 tiles, find the nearest S2 tile for each point
s2_grid = _load_s2_grid()
s2_grid['geometry'] = s2_grid.geometry.centroid
s2_grid["geometry"] = s2_grid.geometry.centroid

polygons = gpd.sjoin_nearest(
polygons, s2_grid[['tile', 'geometry']]
).drop(columns=['index_right'])
polygons = gpd.sjoin_nearest(polygons, s2_grid[["tile", "geometry"]]).drop(
columns=["index_right"]
)

split_datasets = []
for _, sub_gdf in polygons.groupby('tile'):
for _, sub_gdf in polygons.groupby("tile"):
if len(sub_gdf) > max_points:
# Performs another split
split_datasets.extend(_resplit_group(sub_gdf, max_points))
Expand All @@ -85,11 +83,10 @@ def _append_h3_index(polygons: gpd.GeoDataFrame, grid_resolution: int = 3) -> gp
geom_col = polygons.geometry.centroid
else:
geom_col = polygons.geometry
polygons["h3index"] = geom_col.apply(
lambda pt: h3.geo_to_h3(pt.y, pt.x, grid_resolution)
)
polygons["h3index"] = geom_col.apply(lambda pt: h3.geo_to_h3(pt.y, pt.x, grid_resolution))
return polygons


def split_job_hex(
polygons: gpd.GeoDataFrame, max_points: int = 500, grid_resolution: int = 3
) -> List[gpd.GeoDataFrame]:
Expand Down

0 comments on commit 9c993be

Please sign in to comment.