From 7ddf6c2aae7d8befa22d3746eacef16c83ba31b0 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Wed, 20 Sep 2023 15:07:59 +0200 Subject: [PATCH 01/21] 2023.9.1 (#162) * update openeo-processes * bump 2023.9.1 * bump 2023.9.0 * bump 2023.9.0 --- openeo_processes_dask/specs/openeo-processes | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/specs/openeo-processes b/openeo_processes_dask/specs/openeo-processes index 730c13c5..6885090f 160000 --- a/openeo_processes_dask/specs/openeo-processes +++ b/openeo_processes_dask/specs/openeo-processes @@ -1 +1 @@ -Subproject commit 730c13c50fa9b0e3bd9525f950dc82bbe2799329 +Subproject commit 6885090f736092353a0558b14c1aedee03ce87c6 diff --git a/pyproject.toml b/pyproject.toml index 8b4aecc4..44ff0e4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.9.0" +version = "2023.9.1" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From bc52fc96e713ad06d4f59838cc2e1a60ad52d487 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Thu, 5 Oct 2023 15:00:52 +0200 Subject: [PATCH 02/21] fit_curve update time (#167) fit_curve with dimension convertion --- .../ml/curve_fitting.py | 29 +++++++++++++++++-- tests/test_ml.py | 13 +++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index 81736ea7..8f5f400d 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -26,6 +26,23 @@ def fit_curve( f"Provided dimension ({dimension}) not found in data.dims: {data.dims}" ) + try: + # Try parsing as datetime first + dates = data[dimension].values + dates = np.asarray(dates, dtype=np.datetime64) + except ValueError: + dates = np.asarray(data[dimension].values) + + if np.issubdtype(dates.dtype, np.datetime64): + timestep = [ + ( + (np.datetime64(x) - np.datetime64("1970-01-01", "s")) + / np.timedelta64(1, "s") + ) + for x in dates + ] + data[dimension] = np.array(timestep) + dims_before = list(data.dims) # In the spec, parameters is a list, but xr.curvefit requires names for them, @@ -87,8 +104,16 @@ def predict_curve( labels = np.asarray(labels) if np.issubdtype(labels.dtype, np.datetime64): - labels = labels.astype(int) labels_were_datetime = True + initial_labels = labels + timestep = [ + ( + (np.datetime64(x) - np.datetime64("1970-01-01", "s")) + / np.timedelta64(1, "s") + ) + for x in labels + ] + labels = np.array(timestep) # This is necessary to pipe the arguments correctly through @process def wrapper(f): @@ -122,6 +147,6 @@ def _wrap(*args, **kwargs): predictions = predictions.assign_coords({dimension: labels.data}) if labels_were_datetime: - predictions[dimension] = pd.DatetimeIndex(predictions[dimension].values) + predictions[dimension] = initial_labels return predictions diff --git a/tests/test_ml.py b/tests/test_ml.py index c9157010..4248b212 100644 --- a/tests/test_ml.py +++ b/tests/test_ml.py @@ -84,6 +84,19 @@ def fitFunction(x, parameters): assert len(result.coords["param"]) == len(parameters) labels = dimension_labels(origin_cube, origin_cube.openeo.temporal_dims[0]) + labels = [float(l) for l in labels] + predictions = predict_curve( + result, + _process, + origin_cube.openeo.temporal_dims[0], + labels=labels, + ).compute() + + assert len(predictions.coords[origin_cube.openeo.temporal_dims[0]]) == len(labels) + assert "param" not in predictions.dims + assert result.rio.crs == predictions.rio.crs + + labels = ["2020-02-02", "2020-03-02", "2020-04-02", "2020-05-02"] predictions = predict_curve( result, _process, From 19af1504e58e1457239e5441387cb59f3120e0f2 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Thu, 5 Oct 2023 15:13:25 +0200 Subject: [PATCH 03/21] Uc6 update (#168) * fit_curve with dimension convertion * bump 2023.9.2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 44ff0e4a..1d01fd51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.9.1" +version = "2023.9.2" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From 6032ac914b28a312732bdb580c76d27a31ebde3d Mon Sep 17 00:00:00 2001 From: clausmichele <31700619+clausmichele@users.noreply.github.com> Date: Fri, 6 Oct 2023 10:53:17 +0200 Subject: [PATCH 04/21] fix: import experimental processes (#163) Fix import experimental processes --- openeo_processes_dask/process_implementations/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/openeo_processes_dask/process_implementations/__init__.py b/openeo_processes_dask/process_implementations/__init__.py index 1129749d..875ee6d5 100644 --- a/openeo_processes_dask/process_implementations/__init__.py +++ b/openeo_processes_dask/process_implementations/__init__.py @@ -15,6 +15,13 @@ "Did not load machine learning processes due to missing dependencies: Install them like this: `pip install openeo-processes-dask[implementations, ml]`" ) +try: + from .experimental import * +except ImportError as e: + logger.warning( + "Did not experimental processes due to missing dependencies: Install them like this: `pip install openeo-processes-dask[implementations, experimental]`" + ) + import rioxarray as rio # Required for the .rio accessor on xarrays. import openeo_processes_dask.process_implementations.cubes._xr_interop From 8588f3d29953b2fca8ace72e77f81233737189f9 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Fri, 6 Oct 2023 11:38:54 +0200 Subject: [PATCH 05/21] Release 2023.10.1 (#169) release 2023.10.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1d01fd51..d63bbb5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.9.2" +version = "2023.10.1" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From c254f6ec484d27b2e0ee4465d3c4633bc48791ac Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Wed, 11 Oct 2023 09:30:00 +0200 Subject: [PATCH 06/21] Fix: update chunking of all dimensions to auto (except dimension) (#171) * update chunking of all dimensions to auto (except dimension) * bump ver --------- Co-authored-by: Gerald Walter Irsiegler --- .../process_implementations/ml/curve_fitting.py | 5 ++++- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index 8f5f400d..aad967be 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -49,8 +49,11 @@ def fit_curve( # so we do this to generate names locally parameters = {f"param_{i}": v for i, v in enumerate(parameters)} + chunking = {key: "auto" for key in data.dims if key != dimension} + chunking[dimension] = -1 + # The dimension along which to fit the curves cannot be chunked! - rechunked_data = data.chunk({dimension: -1}) + rechunked_data = data.chunk(chunking) def wrapper(f): def _wrap(*args, **kwargs): diff --git a/pyproject.toml b/pyproject.toml index d63bbb5b..a2ccff80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.1" +version = "2023.10.2" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From 6e77c76d8dc52d12302bf0638c3fe2144cdb29c6 Mon Sep 17 00:00:00 2001 From: clausmichele <31700619+clausmichele@users.noreply.github.com> Date: Wed, 11 Oct 2023 14:29:05 +0200 Subject: [PATCH 07/21] fix: reduce_spatial (#164) * fix reduce_spatial * Fix context * Revert "fix reduce_spatial" This reverts commit f1fdac3d2b51777ec1dc9dbd2b60b7613364a3c6. * Revert "Revert "fix reduce_spatial"" This reverts commit b3d45dd64ecb443fd8b45aba3d68ec66e9a585f9. * Revert "Fix context" This reverts commit c8fbcfc5b0b7dbcd29291dde5137d84159fb651a. --- .../process_implementations/cubes/reduce.py | 3 +- tests/test_reduce.py | 36 ++++++++++++++++++- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/openeo_processes_dask/process_implementations/cubes/reduce.py b/openeo_processes_dask/process_implementations/cubes/reduce.py index 0998fc81..5641edc4 100644 --- a/openeo_processes_dask/process_implementations/cubes/reduce.py +++ b/openeo_processes_dask/process_implementations/cubes/reduce.py @@ -54,9 +54,8 @@ def reduce_spatial( spatial_dims = data.openeo.spatial_dims if data.openeo.spatial_dims else None return data.reduce( reducer, - dimension=spatial_dims, + dim=spatial_dims, keep_attrs=True, - context=context, positional_parameters=positional_parameters, named_parameters=named_parameters, ) diff --git a/tests/test_reduce.py b/tests/test_reduce.py index 3c3804da..5bc4ed8c 100644 --- a/tests/test_reduce.py +++ b/tests/test_reduce.py @@ -5,7 +5,10 @@ import xarray as xr from openeo_pg_parser_networkx.pg_schema import ParameterReference -from openeo_processes_dask.process_implementations.cubes.reduce import reduce_dimension +from openeo_processes_dask.process_implementations.cubes.reduce import ( + reduce_dimension, + reduce_spatial, +) from tests.general_checks import general_output_checks from tests.mockdata import create_fake_rastercube @@ -39,3 +42,34 @@ def test_reduce_dimension( ) xr.testing.assert_equal(output_cube, input_cube.mean(dim="t")) + + +@pytest.mark.parametrize("size", [(30, 30, 20, 4)]) +@pytest.mark.parametrize("dtype", [np.float32]) +def test_reduce_spatial( + temporal_interval, bounding_box, random_raster_data, process_registry +): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["B02", "B03", "B04", "B08"], + backend="dask", + ) + + _process = partial( + process_registry["sum"].implementation, + ignore_nodata=True, + data=ParameterReference(from_parameter="data"), + ) + + output_cube = reduce_spatial(data=input_cube, reducer=_process) + + general_output_checks( + input_cube=input_cube, + output_cube=output_cube, + verify_attrs=False, + verify_crs=True, + ) + + xr.testing.assert_equal(output_cube, input_cube.sum(dim=["x", "y"])) From 2c8f1ccf9c6d036897eba23f5310483a427331a2 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Wed, 11 Oct 2023 16:10:40 +0200 Subject: [PATCH 08/21] 2023.10.3 (#173) * update prediction * update tests for code cov * handle one band --- .../process_implementations/ml/curve_fitting.py | 12 ++++++++++-- pyproject.toml | 2 +- tests/test_ml.py | 10 ++++++++-- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index aad967be..98ef6db4 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -25,6 +25,10 @@ def fit_curve( raise DimensionNotAvailable( f"Provided dimension ({dimension}) not found in data.dims: {data.dims}" ) + bands_required = False + if "bands" in data.dims: + if len(data["bands"].values) == 1: + bands_required = data["bands"].values[0] try: # Try parsing as datetime first @@ -81,11 +85,15 @@ def _wrap(*args, **kwargs): .drop_dims(["cov_i", "cov_j"]) .to_array() .squeeze() - .transpose(*expected_dims_after) ) fit_result.attrs = data.attrs fit_result = fit_result.rio.write_crs(rechunked_data.rio.crs) + if bands_required and not "bands" in fit_result.dims: + fit_result = fit_result.assign_coords(**{"bands": bands_required}) + fit_result = fit_result.expand_dims(dim="bands") + + fit_result = fit_result.transpose(*expected_dims_after) return fit_result @@ -99,6 +107,7 @@ def predict_curve( ): labels_were_datetime = False dims_before = list(parameters.dims) + initial_labels = labels try: # Try parsing as datetime first @@ -108,7 +117,6 @@ def predict_curve( if np.issubdtype(labels.dtype, np.datetime64): labels_were_datetime = True - initial_labels = labels timestep = [ ( (np.datetime64(x) - np.datetime64("1970-01-01", "s")) diff --git a/pyproject.toml b/pyproject.toml index a2ccff80..426ffefa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.2" +version = "2023.10.3" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] diff --git a/tests/test_ml.py b/tests/test_ml.py index 4248b212..3f185fd1 100644 --- a/tests/test_ml.py +++ b/tests/test_ml.py @@ -83,8 +83,14 @@ def fitFunction(x, parameters): assert len(result.coords["y"]) == len(origin_cube.coords["y"]) assert len(result.coords["param"]) == len(parameters) + origin_cube_B02 = origin_cube.sel(bands=["B02"]) + result_B02 = fit_curve( + origin_cube_B02, parameters=parameters, function=_process, dimension="t" + ) + assert "bands" in result_B02.dims + assert result_B02["bands"].values == "B02" + labels = dimension_labels(origin_cube, origin_cube.openeo.temporal_dims[0]) - labels = [float(l) for l in labels] predictions = predict_curve( result, _process, @@ -96,7 +102,7 @@ def fitFunction(x, parameters): assert "param" not in predictions.dims assert result.rio.crs == predictions.rio.crs - labels = ["2020-02-02", "2020-03-02", "2020-04-02", "2020-05-02"] + labels = [0, 1, 2, 3] predictions = predict_curve( result, _process, From 65c80ff89306c6648659710df3601b8f7b0dc81e Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Thu, 12 Oct 2023 10:16:55 +0200 Subject: [PATCH 09/21] Upd: persist rechunked array to cluster first to avoid multiple file calls (#174) persist rechunked array to cluster first to avoid multiple file calls Co-authored-by: Gerald Walter Irsiegler --- .../process_implementations/ml/curve_fitting.py | 1 + pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index 98ef6db4..38d9d64e 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -58,6 +58,7 @@ def fit_curve( # The dimension along which to fit the curves cannot be chunked! rechunked_data = data.chunk(chunking) + rechunked_data.persist() def wrapper(f): def _wrap(*args, **kwargs): diff --git a/pyproject.toml b/pyproject.toml index 426ffefa..f136374e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.3" +version = "2023.10.4" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From ee873177994fa5ba615c02885f14b30f2f3a1293 Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Thu, 12 Oct 2023 10:48:19 +0200 Subject: [PATCH 10/21] fix persist not using itself (#176) Co-authored-by: Gerald Walter Irsiegler --- .../process_implementations/ml/curve_fitting.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index 38d9d64e..d31207e3 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -58,7 +58,7 @@ def fit_curve( # The dimension along which to fit the curves cannot be chunked! rechunked_data = data.chunk(chunking) - rechunked_data.persist() + rechunked_data = rechunked_data.persist() def wrapper(f): def _wrap(*args, **kwargs): diff --git a/pyproject.toml b/pyproject.toml index f136374e..a64890b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.4" +version = "2023.10.5" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From a0edb504ad9fb6b503d2cc00e6a50a60e51aa526 Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Thu, 12 Oct 2023 13:47:41 +0200 Subject: [PATCH 11/21] experimental: allow chunking along time axis (#177) Co-authored-by: Gerald Walter Irsiegler --- .../process_implementations/ml/curve_fitting.py | 3 +-- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index d31207e3..7b14b74d 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -53,8 +53,7 @@ def fit_curve( # so we do this to generate names locally parameters = {f"param_{i}": v for i, v in enumerate(parameters)} - chunking = {key: "auto" for key in data.dims if key != dimension} - chunking[dimension] = -1 + chunking = {key: "auto" for key in data.dims} # The dimension along which to fit the curves cannot be chunked! rechunked_data = data.chunk(chunking) diff --git a/pyproject.toml b/pyproject.toml index a64890b4..73659e25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.5" +version = "2023.10.6" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From 597d1005a91bb36823d191cb14507f5a1ee716f1 Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Thu, 12 Oct 2023 14:20:21 +0200 Subject: [PATCH 12/21] reverse experimental chunking change (#178) Co-authored-by: Gerald Walter Irsiegler --- .../process_implementations/ml/curve_fitting.py | 3 ++- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index 7b14b74d..d31207e3 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -53,7 +53,8 @@ def fit_curve( # so we do this to generate names locally parameters = {f"param_{i}": v for i, v in enumerate(parameters)} - chunking = {key: "auto" for key in data.dims} + chunking = {key: "auto" for key in data.dims if key != dimension} + chunking[dimension] = -1 # The dimension along which to fit the curves cannot be chunked! rechunked_data = data.chunk(chunking) diff --git a/pyproject.toml b/pyproject.toml index 73659e25..3e4f6f92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.6" +version = "2023.10.7" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From 6a218e725c203555cdf9f1d5a42a9a6b72f1d2a7 Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Thu, 19 Oct 2023 17:13:58 +0200 Subject: [PATCH 13/21] Fix: fix logger logging unbound attribute (#179) fix logger logging unbound attribute Co-authored-by: Gerald Walter Irsiegler --- openeo_processes_dask/process_implementations/core.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/openeo_processes_dask/process_implementations/core.py b/openeo_processes_dask/process_implementations/core.py index 1d78ec85..cefaf919 100644 --- a/openeo_processes_dask/process_implementations/core.py +++ b/openeo_processes_dask/process_implementations/core.py @@ -75,10 +75,11 @@ def wrapper( resolved_kwargs.pop(arg, None) pretty_args = {k: repr(v)[:80] for k, v in resolved_kwargs.items()} - logger.info(f"Running process {f.__name__}") - logger.debug( - f"Running process {f.__name__} with resolved parameters: {pretty_args}" - ) + if hasattr(f, "__name__"): + logger.info(f"Running process {f.__name__}") + logger.debug( + f"Running process {f.__name__} with resolved parameters: {pretty_args}" + ) return f(*resolved_args, **resolved_kwargs) From c865ba972206ea1ab05f18de8f9ef2d7bee618f0 Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Thu, 19 Oct 2023 17:16:32 +0200 Subject: [PATCH 14/21] Up: bump ver 2023.10.8 (#180) bump ver Co-authored-by: Gerald Walter Irsiegler --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3e4f6f92..220388c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.7" +version = "2023.10.8" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From 046c4a9841891e45af4f0419075126f65be46493 Mon Sep 17 00:00:00 2001 From: Michele Claus <31700619+clausmichele@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:00:15 +0100 Subject: [PATCH 15/21] Fix ignore_nodata=True (#183) --- openeo_processes_dask/process_implementations/math.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openeo_processes_dask/process_implementations/math.py b/openeo_processes_dask/process_implementations/math.py index 2888b285..1bc927f7 100644 --- a/openeo_processes_dask/process_implementations/math.py +++ b/openeo_processes_dask/process_implementations/math.py @@ -126,21 +126,21 @@ def median(data, ignore_nodata=True, axis=None, keepdims=False): return np.median(data, axis=axis, keepdims=keepdims) -def mean(data, ignore_nodata=False, axis=None, keepdims=False): +def mean(data, ignore_nodata=True, axis=None, keepdims=False): if ignore_nodata: return np.nanmean(data, axis=axis, keepdims=keepdims) else: return np.mean(data, axis=axis, keepdims=keepdims) -def sd(data, ignore_nodata=False, axis=None, keepdims=False): +def sd(data, ignore_nodata=True, axis=None, keepdims=False): if ignore_nodata: return np.nanstd(data, axis=axis, ddof=1, keepdims=keepdims) else: return np.std(data, axis=axis, ddof=1, keepdims=keepdims) -def variance(data, ignore_nodata=False, axis=None, keepdims=False): +def variance(data, ignore_nodata=True, axis=None, keepdims=False): if ignore_nodata: return np.nanvar(data, axis=axis, ddof=1, keepdims=keepdims) else: From e63516bec12b40f6317b7c03da07bcaeaa5a4e22 Mon Sep 17 00:00:00 2001 From: Mohammad Alasawedah <61426508+masawdah@users.noreply.github.com> Date: Mon, 30 Oct 2023 16:14:14 +0100 Subject: [PATCH 16/21] Filter spatial (#170) * spatial filter * spatial filter * improvement * name dimesnions * remove compute * handle CRS * support feature collection * clean up * updated filter * pytest filter_spatial * reformatted files * pre-check * pre-check * remove parcels * mask_polygon * mask_polygon * pytest mask_polygon * add parameters to mask_polygon * add parameters to mask_polygon * update __init__ file * mask parameter * automated expand mask dimension * update pytest mask_polygon * mask_polygon process * mask_polygon * update mask_polygon * update mask_polygon * final check mask_polygon * final check mask_polygon * clean up * clean up * clean the load.py --- .../process_implementations/cubes/__init__.py | 1 + .../process_implementations/cubes/_filter.py | 38 +++- .../cubes/mask_polygon.py | 165 ++++++++++++++++++ tests/conftest.py | 25 +++ tests/test_filter.py | 23 +++ tests/test_mask.py | 31 ++++ 6 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 openeo_processes_dask/process_implementations/cubes/mask_polygon.py create mode 100644 tests/test_mask.py diff --git a/openeo_processes_dask/process_implementations/cubes/__init__.py b/openeo_processes_dask/process_implementations/cubes/__init__.py index 164a5795..d18a1475 100644 --- a/openeo_processes_dask/process_implementations/cubes/__init__.py +++ b/openeo_processes_dask/process_implementations/cubes/__init__.py @@ -4,5 +4,6 @@ from .general import * from .indices import * from .load import * +from .mask_polygon import * from .merge import * from .reduce import * diff --git a/openeo_processes_dask/process_implementations/cubes/_filter.py b/openeo_processes_dask/process_implementations/cubes/_filter.py index 3cf5d4a0..7eb50f4c 100644 --- a/openeo_processes_dask/process_implementations/cubes/_filter.py +++ b/openeo_processes_dask/process_implementations/cubes/_filter.py @@ -1,13 +1,21 @@ +import json import logging import warnings from typing import Callable +import dask.array as da +import geopandas as gpd import numpy as np import pyproj +import rasterio import rioxarray +import shapely import xarray as xr from openeo_pg_parser_networkx.pg_schema import BoundingBox, TemporalInterval +from openeo_processes_dask.process_implementations.cubes.mask_polygon import ( + mask_polygon, +) from openeo_processes_dask.process_implementations.data_model import RasterCube from openeo_processes_dask.process_implementations.exceptions import ( BandFilterParameterMissing, @@ -16,9 +24,18 @@ TooManyDimensions, ) +DEFAULT_CRS = "EPSG:4326" + + logger = logging.getLogger(__name__) -__all__ = ["filter_labels", "filter_temporal", "filter_bands", "filter_bbox"] +__all__ = [ + "filter_labels", + "filter_temporal", + "filter_bands", + "filter_bbox", + "filter_spatial", +] def filter_temporal( @@ -102,6 +119,25 @@ def filter_bands(data: RasterCube, bands: list[str] = None) -> RasterCube: return data +def filter_spatial(data: RasterCube, geometries) -> RasterCube: + if "type" in geometries and geometries["type"] == "FeatureCollection": + gdf = gpd.GeoDataFrame.from_features(geometries, DEFAULT_CRS) + elif "type" in geometries and geometries["type"] in ["Polygon"]: + polygon = shapely.geometry.Polygon(geometries["coordinates"][0]) + gdf = gpd.GeoDataFrame(geometry=[polygon]) + gdf.crs = DEFAULT_CRS + + bbox = gdf.total_bounds + spatial_extent = BoundingBox( + west=bbox[0], east=bbox[2], south=bbox[1], north=bbox[3] + ) + + data = filter_bbox(data, spatial_extent) + data = mask_polygon(data, geometries) + + return data + + def filter_bbox(data: RasterCube, extent: BoundingBox) -> RasterCube: try: input_crs = str(data.rio.crs) diff --git a/openeo_processes_dask/process_implementations/cubes/mask_polygon.py b/openeo_processes_dask/process_implementations/cubes/mask_polygon.py new file mode 100644 index 00000000..5731b45a --- /dev/null +++ b/openeo_processes_dask/process_implementations/cubes/mask_polygon.py @@ -0,0 +1,165 @@ +import json +import logging +from typing import Any, Union + +import dask.array as da +import geopandas as gpd +import numpy as np +import rasterio +import rioxarray +import shapely +import xarray as xr +from xarray.core import dtypes + +from openeo_processes_dask.process_implementations.data_model import ( + RasterCube, + VectorCube, +) + +DEFAULT_CRS = "EPSG:4326" + + +logger = logging.getLogger(__name__) + +__all__ = [ + "mask_polygon", +] + + +def mask_polygon( + data: RasterCube, + mask: Union[VectorCube, str], + replacement: Any = dtypes.NA, + inside: bool = True, +) -> RasterCube: + y_dim = data.openeo.y_dim + x_dim = data.openeo.x_dim + t_dim = data.openeo.temporal_dims + b_dim = data.openeo.band_dims + + if len(t_dim) == 0: + t_dim = None + else: + t_dim = t_dim[0] + if len(b_dim) == 0: + b_dim = None + else: + b_dim = b_dim[0] + + y_dim_size = data.sizes[y_dim] + x_dim_size = data.sizes[x_dim] + + # Reproject vector data to match the raster data cube. + ## Get the CRS of data cube + try: + data_crs = str(data.rio.crs) + except Exception as e: + raise Exception(f"Not possible to estimate the input data projection! {e}") + + data = data.rio.set_crs(data_crs) + + ## Reproject vector data if the input vector data is Polygon or Multi Polygon + if "type" in mask and mask["type"] == "FeatureCollection": + geometries = gpd.GeoDataFrame.from_features(mask, DEFAULT_CRS) + geometries = geometries.to_crs(data_crs) + geometries = geometries.to_json() + geometries = json.loads(geometries) + elif "type" in mask and mask["type"] in ["Polygon"]: + polygon = shapely.geometry.Polygon(mask["coordinates"][0]) + geometries = gpd.GeoDataFrame(geometry=[polygon]) + geometries.crs = DEFAULT_CRS + geometries = geometries.to_crs(data_crs) + geometries = geometries.to_json() + geometries = json.loads(geometries) + else: + raise ValueError( + "Unsupported or missing geometry type. Expected 'Polygon' or 'FeatureCollection'." + ) + + data_dims = list(data.dims) + + # Get the Affine transformer + transform = data.rio.transform() + + # Initialize an empty mask + # Set the same chunk size as the input data + data_chunks = {} + chunks_shapes = data.chunks + for i, d in enumerate(data_dims): + data_chunks[d] = chunks_shapes[i][0] + + if data_dims.index(x_dim[0]) < data_dims.index(y_dim[0]): + final_mask = da.zeros( + (x_dim_size, y_dim_size), + chunks={x_dim: data_chunks[x_dim], y_dim: data_chunks[y_dim]}, + dtype=bool, + ) + + dask_out_shape = da.from_array( + (x_dim_size, y_dim_size), + chunks={x_dim: data_chunks[x_dim], y_dim: data_chunks[y_dim]}, + ) + else: + final_mask = da.zeros( + (y_dim_size, x_dim_size), + chunks={y_dim: data_chunks[y_dim], x_dim: data_chunks[x_dim]}, + dtype=bool, + ) + + dask_out_shape = da.from_array( + (y_dim_size, x_dim_size), + chunks={y_dim: data_chunks[y_dim], x_dim: data_chunks[x_dim]}, + ) + + # CHECK IF the input single polygon or multiple Polygons + if "type" in geometries and geometries["type"] == "FeatureCollection": + for feature in geometries["features"]: + polygon = shapely.geometry.Polygon(feature["geometry"]["coordinates"][0]) + + # Create a GeoSeries from the geometry + geo_series = gpd.GeoSeries(polygon) + + # Convert the GeoSeries to a GeometryArray + geometry_array = geo_series.geometry.array + + mask = da.map_blocks( + rasterio.features.geometry_mask, + geometry_array, + transform=transform, + out_shape=dask_out_shape, + dtype=bool, + invert=inside, + ) + final_mask |= mask + + elif "type" in geometries and geometries["type"] in ["Polygon"]: + polygon = shapely.geometry.Polygon(geometries["coordinates"][0]) + geo_series = gpd.GeoSeries(polygon) + + # Convert the GeoSeries to a GeometryArray + geometry_array = geo_series.geometry.array + mask = da.map_blocks( + rasterio.features.geometry_mask, + geometry_array, + transform=transform, + out_shape=dask_out_shape, + dtype=bool, + invert=inside, + ) + final_mask |= mask + + masked_dims = len(final_mask.shape) + + diff_axes = [] + for axis in range(len(data_dims)): + try: + if final_mask.shape[axis] != data.shape[axis]: + diff_axes.append(axis) + except: + if len(diff_axes) < (len(data_dims) - 2): + diff_axes.append(axis) + + final_mask = np.expand_dims(final_mask, axis=diff_axes) + filtered_ds = data.where(final_mask, other=replacement) + + return filtered_ds diff --git a/tests/conftest.py b/tests/conftest.py index a80b3231..f7dfcf73 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ import importlib import inspect +import json import logging import dask_geopandas @@ -14,6 +15,7 @@ BoundingBox, TemporalInterval, ) +from shapely.geometry import Polygon from openeo_processes_dask.process_implementations.core import process from openeo_processes_dask.process_implementations.data_model import VectorCube @@ -68,6 +70,29 @@ def bounding_box_small( return BoundingBox.parse_obj(spatial_extent) +@pytest.fixture +def polygon_geometry_small( + west=10.47, east=10.48, south=46.12, north=46.18, crs="EPSG:4326" +): + # Bounding box coordinates + west, east, south, north = 10.47, 10.48, 46.12, 46.18 + + # Create a small polygon + geometry = [ + Polygon( + [(west, south), (west, north), (east, north), (east, south), (west, south)] + ) + ] + + # Create a GeoDataFrame with a single polygon and default CRS 'wgs84' + gdf = gpd.GeoDataFrame(geometry=geometry, crs=crs) + + geometries = gdf.to_json() + geometries = json.loads(geometries) + + return geometries + + @pytest.fixture def temporal_interval(interval=["2018-05-01", "2018-06-01"]) -> TemporalInterval: return TemporalInterval.parse_obj(interval) diff --git a/tests/test_filter.py b/tests/test_filter.py index efb1ebba..cf7f840f 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -10,6 +10,7 @@ from openeo_processes_dask.process_implementations.cubes._filter import ( filter_bands, filter_bbox, + filter_spatial, filter_temporal, ) from openeo_processes_dask.process_implementations.cubes.reduce import reduce_dimension @@ -83,6 +84,28 @@ def test_filter_bands(temporal_interval, bounding_box, random_raster_data): assert output_cube["bands"].values == "SCL" +@pytest.mark.parametrize("size", [(30, 30, 30, 1)]) +@pytest.mark.parametrize("dtype", [np.uint8]) +def test_filter_spatial( + temporal_interval, + bounding_box, + random_raster_data, + polygon_geometry_small, +): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["B02"], + backend="dask", + ) + + output_cube = filter_spatial(data=input_cube, geometries=polygon_geometry_small) + + assert len(output_cube.y) < len(input_cube.y) + assert len(output_cube.x) < len(input_cube.x) + + @pytest.mark.parametrize("size", [(30, 30, 1, 1)]) @pytest.mark.parametrize("dtype", [np.uint8]) def test_filter_bbox( diff --git a/tests/test_mask.py b/tests/test_mask.py new file mode 100644 index 00000000..c5c6148e --- /dev/null +++ b/tests/test_mask.py @@ -0,0 +1,31 @@ +import numpy as np +import pytest +from openeo_pg_parser_networkx.pg_schema import TemporalInterval + +from openeo_processes_dask.process_implementations.cubes.mask_polygon import ( + mask_polygon, +) +from tests.mockdata import create_fake_rastercube + + +@pytest.mark.parametrize("size", [(30, 30, 30, 1)]) +@pytest.mark.parametrize("dtype", [np.uint8]) +def test_mask_polygon( + temporal_interval, + bounding_box, + random_raster_data, + polygon_geometry_small, +): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["B02"], + backend="dask", + ) + + output_cube = mask_polygon(data=input_cube, mask=polygon_geometry_small) + + assert np.isnan(output_cube).sum() > np.isnan(input_cube).sum() + assert len(output_cube.y) == len(input_cube.y) + assert len(output_cube.x) == len(input_cube.x) From 9719c74c038b3074d2832557318eb016affc6bcc Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Mon, 30 Oct 2023 16:17:05 +0100 Subject: [PATCH 17/21] bump release 2023.10.9 (#185) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 220388c1..956f39ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.8" +version = "2023.10.9" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From 416ccdb321d78307551363a77f8bd95db95367cc Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Thu, 2 Nov 2023 12:19:08 +0100 Subject: [PATCH 18/21] add new processes filter_spatial, mask_polygon (#189) --- openeo_processes_dask/specs/openeo-processes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openeo_processes_dask/specs/openeo-processes b/openeo_processes_dask/specs/openeo-processes index 6885090f..86110fac 160000 --- a/openeo_processes_dask/specs/openeo-processes +++ b/openeo_processes_dask/specs/openeo-processes @@ -1 +1 @@ -Subproject commit 6885090f736092353a0558b14c1aedee03ce87c6 +Subproject commit 86110facbca0e321afd0ac155b753864ecc7778a From cfe15ce0ac2fa6ed0ae0c19437378ec2e495e24f Mon Sep 17 00:00:00 2001 From: Michele Claus <31700619+clausmichele@users.noreply.github.com> Date: Thu, 2 Nov 2023 12:21:41 +0100 Subject: [PATCH 19/21] Fixing edge case drop_dimension (#188) * Fixing edge case drop_dimension * fix pre-commit * fix pre-commit --- .../process_implementations/cubes/general.py | 2 +- tests/test_dimensions.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/process_implementations/cubes/general.py b/openeo_processes_dask/process_implementations/cubes/general.py index d99e5e74..14639250 100644 --- a/openeo_processes_dask/process_implementations/cubes/general.py +++ b/openeo_processes_dask/process_implementations/cubes/general.py @@ -21,7 +21,7 @@ def drop_dimension(data: RasterCube, name: str) -> RasterCube: raise DimensionLabelCountMismatch( f"The number of dimension labels exceeds one, which requires a reducer. Dimension ({name}) has {len(data[name])} labels." ) - return data.drop_vars(name).squeeze() + return data.drop_vars(name).squeeze(name) def create_raster_cube() -> RasterCube: diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py index 1088cb7f..d17742f1 100644 --- a/tests/test_dimensions.py +++ b/tests/test_dimensions.py @@ -42,7 +42,7 @@ def test_add_dimension(temporal_interval, bounding_box, random_raster_data): assert output_cube_2.openeo.temporal_dims[1] == "weird" -@pytest.mark.parametrize("size", [(30, 30, 20, 2)]) +@pytest.mark.parametrize("size", [(30, 30, 1, 2)]) @pytest.mark.parametrize("dtype", [np.float32]) def test_drop_dimension(temporal_interval, bounding_box, random_raster_data): input_cube = create_fake_rastercube( @@ -63,4 +63,6 @@ def test_drop_dimension(temporal_interval, bounding_box, random_raster_data): suitable_cube = input_cube.where(input_cube.bands == "B02", drop=True) output_cube = drop_dimension(suitable_cube, DIM_TO_DROP) + DIMS_TO_KEEP = tuple(filter(lambda y: y != DIM_TO_DROP, input_cube.dims)) assert DIM_TO_DROP not in output_cube.dims + assert DIMS_TO_KEEP == output_cube.dims From 1d68c5116919509945eb44de6840f491875d3c50 Mon Sep 17 00:00:00 2001 From: Michele Claus <31700619+clausmichele@users.noreply.github.com> Date: Thu, 2 Nov 2023 12:22:47 +0100 Subject: [PATCH 20/21] fix: hardcoded band dim name (#166) * Fix hardcoded band dim name * Update merge.py --- .../process_implementations/cubes/merge.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/openeo_processes_dask/process_implementations/cubes/merge.py b/openeo_processes_dask/process_implementations/cubes/merge.py index d26a696c..78045551 100644 --- a/openeo_processes_dask/process_implementations/cubes/merge.py +++ b/openeo_processes_dask/process_implementations/cubes/merge.py @@ -100,18 +100,17 @@ def merge_cubes( previous_dim_order = list(cube1.dims) + [ dim for dim in cube2.dims if dim not in cube1.dims ] - + band_dim1 = cube1.openeo.band_dims[0] + band_dim2 = cube2.openeo.band_dims[0] if len(cube1.openeo.band_dims) > 0 or len(cube2.openeo.band_dims) > 0: # Same reordering issue mentioned above - previous_band_order = list( - cube1[cube1.openeo.band_dims[0]].values - ) + [ + previous_band_order = list(cube1[band_dim1].values) + [ band for band in list(cube2[cube2.openeo.band_dims[0]].values) - if band not in list(cube1[cube1.openeo.band_dims[0]].values) + if band not in list(cube1[band_dim1].values) ] - cube1 = cube1.to_dataset(cube1.openeo.band_dims[0]) - cube2 = cube2.to_dataset(cube2.openeo.band_dims[0]) + cube1 = cube1.to_dataset(band_dim1) + cube2 = cube2.to_dataset(band_dim2) # compat="override" to deal with potentially conflicting coords # see https://github.com/Open-EO/openeo-processes-dask/pull/148 for context @@ -119,8 +118,8 @@ def merge_cubes( [cube1, cube2], combine_attrs="drop_conflicts", compat="override" ) if isinstance(merged_cube, xr.Dataset): - merged_cube = merged_cube.to_array(dim="bands") - merged_cube = merged_cube.reindex({"bands": previous_band_order}) + merged_cube = merged_cube.to_array(dim=band_dim1) + merged_cube = merged_cube.reindex({band_dim1: previous_band_order}) merged_cube = merged_cube.transpose(*previous_dim_order) From c09c805e337f357af48e393cc84a0719639b6b05 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Thu, 2 Nov 2023 16:47:47 +0100 Subject: [PATCH 21/21] bump release 2023.11.1 (#190) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 956f39ed..9f58b1ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.9" +version = "2023.11.1" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "]