From 7553fd5878e9bd8af3ddf3ebd5c12038eb2e7441 Mon Sep 17 00:00:00 2001 From: FarmVibes Release Pipeline Date: Thu, 26 Oct 2023 17:48:24 +0000 Subject: [PATCH 1/7] Sync with prod branch Co-authored-by: Alex Crown Co-authored-by: Eduardo Rodrigues Co-authored-by: Rafael Padilha Co-authored-by: Renato Luiz de Freitas Cunha --- .../source/docfiles/markdown/WORKFLOW_LIST.md | 18 ++++++++++++++++ .../vibe_core/terraform/aks/README.md | 21 +++++++++++++++++++ .../terraform/aks/example-vars.tfvars | 7 +++++++ 3 files changed, 46 insertions(+) create mode 100644 src/vibe_core/vibe_core/terraform/aks/README.md create mode 100644 src/vibe_core/vibe_core/terraform/aks/example-vars.tfvars diff --git a/docs/source/docfiles/markdown/WORKFLOW_LIST.md b/docs/source/docfiles/markdown/WORKFLOW_LIST.md index eca05337..649504a6 100644 --- a/docs/source/docfiles/markdown/WORKFLOW_LIST.md +++ b/docs/source/docfiles/markdown/WORKFLOW_LIST.md @@ -124,6 +124,24 @@ Below is a list of all available workflows within the FarmVibes.AI platform. For - [`heatmap/classification` πŸ“„](workflow_yaml/data_processing/heatmap/classification.md): Utilizes input Sentinel-2 satellite imagery & the sensor samples as labeled data that contain nutrient information (Nitrogen, Carbon, pH, Phosphorus) to train a model using Random Forest classifier. The inference operation predicts nutrients in soil for the chosen farm boundary. +The workflow generates a heatmap for selected nutrient. It relies on sample soil data that contain information of nutrients. +The quantity of samples define the accuracy of the heat map generation. During the research performed testing with +samples spaced at 200 feet, 100 feet and 50 feet. The 50 feet sample spaced distance provided results matching to the +ground truth. Generating heatmap with this approach reduce the number of samples. +It utilizes the logic below behind the scenes to generate heatmap. + - Read the sentinel raster provided. + - Sensor samples needs to be uploaded into prescriptions entity in Azure data manager for Agriculture (ADMAg). ADMAg is having hierarchy + to hold information of Farmer, Field, Seasons, Crop, Boundary etc. Prior to uploading prescriptions, it is required to build hierarchy and + a prescription_map_id. All prescriptions uploaded to ADMAg are related to farm hierarchy through prescription_map_id. Please refer to + https://learn.microsoft.com/en-us/rest/api/data-manager-for-agri/ for more information on ADMAg. + - Compute indices using the spyndex python package. + - Clip the satellite imagery & sensor samples using farm boundary. + - Perform spatial interpolation to find raster pixels within the offset distance from sample location and assign the value of nutrients to group of pixels. + - Classify the data based on number of bins. + - Train the model using Random Forest classifier. + - Predict the nutrients using the satellite imagery. + - Generate a shape file using the predicted outputs. + - [`index/index` πŸ“„](workflow_yaml/data_processing/index/index.md): Computes an index from the bands of an input raster. - [`linear_trend/chunked_linear_trend` πŸ“„](workflow_yaml/data_processing/linear_trend/chunked_linear_trend.md): Computes the pixel-wise linear trend of a list of rasters (e.g. NDVI). diff --git a/src/vibe_core/vibe_core/terraform/aks/README.md b/src/vibe_core/vibe_core/terraform/aks/README.md new file mode 100644 index 00000000..d9fc4e92 --- /dev/null +++ b/src/vibe_core/vibe_core/terraform/aks/README.md @@ -0,0 +1,21 @@ +How to use this terraform file? + +Install Terraform from https://developer.hashicorp.com/terraform/downloads + +In current directory, execute: + +terraform init +terraform apply -var-file=example-vars.tfvars + +Terraform apply will ask you the following questions: +You can also refer to example-vars.tfvars + +location - This is the Azure Region you want to deploy in. For example, westus2, eastus2, etc. +tenantId - This is the Azure Tenant GUID of your Tenant. You can find this by going to Azure Active Directory or navigating to: https://ms.portal.azure.com/#view/Microsoft_AAD_IAM/ActiveDirectoryMenuBlade/~/Overview +subscriptionId - This is the Subscription GUID for the subscription you want to us. +namespace - This is the kubernetes namespace you want to deploy your services in. This will be a new namespace which the script will create. Recommneded value is "terravibes" +acr_registry - This is the path to the Docker Registry where the images are location. Public location for FarmVibes is mcr.microsoft.com/farmai/terravibes +acr_registry_username - Username to access the Docker Registry +acr_registry_password - Password to access the Docker Registry +prefix - A short prefix to distinguish your deployment +resource_group_name - If you want to use an existing resource group, specify it here \ No newline at end of file diff --git a/src/vibe_core/vibe_core/terraform/aks/example-vars.tfvars b/src/vibe_core/vibe_core/terraform/aks/example-vars.tfvars new file mode 100644 index 00000000..c96b1679 --- /dev/null +++ b/src/vibe_core/vibe_core/terraform/aks/example-vars.tfvars @@ -0,0 +1,7 @@ +tenantId="" +subscriptionId="" +acr_registry="mcr.microsoft.com/farmai/terravibes" +namespace="terravibes" +location="" +prefix="" +resource_group_name="" \ No newline at end of file From 52a67388b7b726f51f8786722f4e0002cbf535a2 Mon Sep 17 00:00:00 2001 From: FarmVibes Release Pipeline Date: Tue, 23 Jan 2024 13:05:05 +0000 Subject: [PATCH 2/7] Sync with prod branch Co-authored-by: Bruno Silva Co-authored-by: Eduardo Rodrigues Co-authored-by: Jessica Wolk Co-authored-by: Naga Bilwanth Gangarapu Co-authored-by: Rafael Padilha Co-authored-by: Renato Luiz de Freitas Cunha Co-authored-by: Rick Gutierrez --- .../source/docfiles/markdown/WORKFLOW_LIST.md | 17 --------------- .../vibe_core/terraform/aks/README.md | 21 ------------------- .../terraform/aks/example-vars.tfvars | 7 ------- 3 files changed, 45 deletions(-) delete mode 100644 src/vibe_core/vibe_core/terraform/aks/README.md delete mode 100644 src/vibe_core/vibe_core/terraform/aks/example-vars.tfvars diff --git a/docs/source/docfiles/markdown/WORKFLOW_LIST.md b/docs/source/docfiles/markdown/WORKFLOW_LIST.md index 649504a6..b00e0daf 100644 --- a/docs/source/docfiles/markdown/WORKFLOW_LIST.md +++ b/docs/source/docfiles/markdown/WORKFLOW_LIST.md @@ -124,23 +124,6 @@ Below is a list of all available workflows within the FarmVibes.AI platform. For - [`heatmap/classification` πŸ“„](workflow_yaml/data_processing/heatmap/classification.md): Utilizes input Sentinel-2 satellite imagery & the sensor samples as labeled data that contain nutrient information (Nitrogen, Carbon, pH, Phosphorus) to train a model using Random Forest classifier. The inference operation predicts nutrients in soil for the chosen farm boundary. -The workflow generates a heatmap for selected nutrient. It relies on sample soil data that contain information of nutrients. -The quantity of samples define the accuracy of the heat map generation. During the research performed testing with -samples spaced at 200 feet, 100 feet and 50 feet. The 50 feet sample spaced distance provided results matching to the -ground truth. Generating heatmap with this approach reduce the number of samples. -It utilizes the logic below behind the scenes to generate heatmap. - - Read the sentinel raster provided. - - Sensor samples needs to be uploaded into prescriptions entity in Azure data manager for Agriculture (ADMAg). ADMAg is having hierarchy - to hold information of Farmer, Field, Seasons, Crop, Boundary etc. Prior to uploading prescriptions, it is required to build hierarchy and - a prescription_map_id. All prescriptions uploaded to ADMAg are related to farm hierarchy through prescription_map_id. Please refer to - https://learn.microsoft.com/en-us/rest/api/data-manager-for-agri/ for more information on ADMAg. - - Compute indices using the spyndex python package. - - Clip the satellite imagery & sensor samples using farm boundary. - - Perform spatial interpolation to find raster pixels within the offset distance from sample location and assign the value of nutrients to group of pixels. - - Classify the data based on number of bins. - - Train the model using Random Forest classifier. - - Predict the nutrients using the satellite imagery. - - Generate a shape file using the predicted outputs. - [`index/index` πŸ“„](workflow_yaml/data_processing/index/index.md): Computes an index from the bands of an input raster. diff --git a/src/vibe_core/vibe_core/terraform/aks/README.md b/src/vibe_core/vibe_core/terraform/aks/README.md deleted file mode 100644 index d9fc4e92..00000000 --- a/src/vibe_core/vibe_core/terraform/aks/README.md +++ /dev/null @@ -1,21 +0,0 @@ -How to use this terraform file? - -Install Terraform from https://developer.hashicorp.com/terraform/downloads - -In current directory, execute: - -terraform init -terraform apply -var-file=example-vars.tfvars - -Terraform apply will ask you the following questions: -You can also refer to example-vars.tfvars - -location - This is the Azure Region you want to deploy in. For example, westus2, eastus2, etc. -tenantId - This is the Azure Tenant GUID of your Tenant. You can find this by going to Azure Active Directory or navigating to: https://ms.portal.azure.com/#view/Microsoft_AAD_IAM/ActiveDirectoryMenuBlade/~/Overview -subscriptionId - This is the Subscription GUID for the subscription you want to us. -namespace - This is the kubernetes namespace you want to deploy your services in. This will be a new namespace which the script will create. Recommneded value is "terravibes" -acr_registry - This is the path to the Docker Registry where the images are location. Public location for FarmVibes is mcr.microsoft.com/farmai/terravibes -acr_registry_username - Username to access the Docker Registry -acr_registry_password - Password to access the Docker Registry -prefix - A short prefix to distinguish your deployment -resource_group_name - If you want to use an existing resource group, specify it here \ No newline at end of file diff --git a/src/vibe_core/vibe_core/terraform/aks/example-vars.tfvars b/src/vibe_core/vibe_core/terraform/aks/example-vars.tfvars deleted file mode 100644 index c96b1679..00000000 --- a/src/vibe_core/vibe_core/terraform/aks/example-vars.tfvars +++ /dev/null @@ -1,7 +0,0 @@ -tenantId="" -subscriptionId="" -acr_registry="mcr.microsoft.com/farmai/terravibes" -namespace="terravibes" -location="" -prefix="" -resource_group_name="" \ No newline at end of file From 37af374e1cfe2b801a530191351166ca8786926b Mon Sep 17 00:00:00 2001 From: FarmVibes Release Pipeline Date: Thu, 8 Feb 2024 19:52:52 +0000 Subject: [PATCH 3/7] Here's a summary of the changes from the last pull request: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - [πŸŽ‰ NEW] We now support remote cluster creation on any Azure Cloud Environment, abstracting the requirements of different cloud environments and backend services. This means we now support deployments in sovereign clouds, such as the [US Government Cloud](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-welcome), the [German Cloud](https://learn.microsoft.com/en-us/azure/germany/), and the [China Cloud](https://learn.microsoft.com/en-us/azure/china/overview-operations), in addition to the Azure Public Cloud. - [πŸ“ˆ IMPROVEMENT] We upgraded terraform provider to version 3.89.0 to support cluster setup through Azure Cloud Shell. - [βš’οΈ FIX] We fixed an issue that would cause the local cluster setup to fail in case the cluster name had underscores. Related to issue #133. - [πŸ“ˆ IMPROVEMENT] We revised the notebook dependencies and changed the default environment and package manager from Conda to [Micromamba](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html). Environments should build considerably faster and present fewer dependency issues. Related to issues #141 and #144. - [πŸŽ‰ NEW] We added instructions on setting up the remote cluster using Azure Cloud Shell to the [Remote Cluster User Guide](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/AKS.html). - [βš’οΈ FIX] We fixed a bug that prevented the REST API documentation page to be rendered correctly. Related to issue #131. ---- Co-authored-by: Bruno Silva Co-authored-by: Peter Hammond Co-authored-by: Rafael Padilha Co-authored-by: Renato Luiz de Freitas Cunha Co-authored-by: Rick Gutierrez --- docs/source/docfiles/markdown/WORKFLOW_LIST.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/docfiles/markdown/WORKFLOW_LIST.md b/docs/source/docfiles/markdown/WORKFLOW_LIST.md index b00e0daf..eca05337 100644 --- a/docs/source/docfiles/markdown/WORKFLOW_LIST.md +++ b/docs/source/docfiles/markdown/WORKFLOW_LIST.md @@ -124,7 +124,6 @@ Below is a list of all available workflows within the FarmVibes.AI platform. For - [`heatmap/classification` πŸ“„](workflow_yaml/data_processing/heatmap/classification.md): Utilizes input Sentinel-2 satellite imagery & the sensor samples as labeled data that contain nutrient information (Nitrogen, Carbon, pH, Phosphorus) to train a model using Random Forest classifier. The inference operation predicts nutrients in soil for the chosen farm boundary. - - [`index/index` πŸ“„](workflow_yaml/data_processing/index/index.md): Computes an index from the bands of an input raster. - [`linear_trend/chunked_linear_trend` πŸ“„](workflow_yaml/data_processing/linear_trend/chunked_linear_trend.md): Computes the pixel-wise linear trend of a list of rasters (e.g. NDVI). From 79cf68640e2d1919dace2dc220df3210fef1f76e Mon Sep 17 00:00:00 2001 From: FarmVibes Release Pipeline Date: Fri, 5 Apr 2024 01:54:21 +0000 Subject: [PATCH 4/7] Sync with prod branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bruno Silva Co-authored-by: Naga Bilwanth Gangarapu Co-authored-by: Rafael Padilha Co-authored-by: Renato Luiz de Freitas Cunha Co-authored-by: Roberto de Moura EstevΓ£o Filho --- SUPPORT.md | 5 +- docs/source/conf.py | 1 + .../docfiles/code/vibe_core_client/client.md | 1 + .../source/docfiles/markdown/NOTEBOOK_LIST.md | 37 + docs/source/docfiles/markdown/QUICKSTART.md | 3 + docs/source/docfiles/markdown/REST_API.md | 167 ++++ .../docfiles/markdown/TROUBLESHOOTING.md | 9 + docs/source/docfiles/markdown/WORKFLOWS.md | 1 + .../source/docfiles/markdown/WORKFLOW_LIST.md | 16 +- .../core_types_hierarchy.md | 6 + .../data_types_diagram/farm_hierarchy.md | 9 + .../data_types_diagram/products_hierarchy.md | 3 + .../admag/admag_seasonal_field.md | 48 +- .../data_ingestion/admag/prescriptions.md | 83 +- .../data_ingestion/airbus/airbus_download.md | 44 +- .../data_ingestion/airbus/airbus_price.md | 44 +- .../alos/alos_forest_extent_download.md | 44 +- .../alos/alos_forest_extent_download_merge.md | 56 +- .../data_ingestion/bing/basemap_download.md | 46 +- .../bing/basemap_download_merge.md | 54 +- .../data_ingestion/cdl/download_cdl.md | 40 +- .../data_ingestion/dem/download_dem.md | 48 +- .../data_ingestion/gedi/download_gedi.md | 46 +- .../gedi/download_gedi_rh100.md | 48 +- .../glad/glad_forest_extent_download.md | 40 +- .../glad/glad_forest_extent_download_merge.md | 52 +- .../gnatsgo/download_gnatsgo.md | 85 +- .../hansen/hansen_forest_change_download.md | 110 ++ .../landsat/preprocess_landsat.md | 52 +- .../download_modis_surface_reflectance.md | 46 +- .../modis/download_modis_vegetation_index.md | 48 +- .../data_ingestion/naip/download_naip.md | 44 +- .../data_ingestion/osm_road_geometries.md | 49 +- .../data_ingestion/sentinel1/preprocess_s1.md | 108 +- .../sentinel1/preprocess_s1_rtc.md | 111 --- .../sentinel2/cloud_ensemble.md | 72 +- .../sentinel2/improve_cloud_mask.md | 66 +- .../sentinel2/improve_cloud_mask_ensemble.md | 60 +- .../data_ingestion/sentinel2/preprocess_s2.md | 80 +- .../sentinel2/preprocess_s2_ensemble_masks.md | 60 +- .../sentinel2/preprocess_s2_improved_masks.md | 68 +- .../data_ingestion/soil/soilgrids.md | 55 +- .../workflow_yaml/data_ingestion/soil/usda.md | 44 +- .../data_ingestion/spaceeye/spaceeye.md | 64 +- .../spaceeye/spaceeye_inference.md | 86 +- .../spaceeye/spaceeye_interpolation.md | 60 +- .../spaceeye_interpolation_inference.md | 72 +- .../spaceeye/spaceeye_preprocess.md | 81 +- .../spaceeye/spaceeye_preprocess_ensemble.md | 60 +- .../user_data/ingest_geometry.md | 40 +- .../data_ingestion/user_data/ingest_raster.md | 40 +- .../data_ingestion/user_data/ingest_smb.md | 52 +- .../data_ingestion/weather/download_chirps.md | 46 +- .../data_ingestion/weather/download_era5.md | 59 +- .../weather/download_era5_monthly.md | 57 +- .../weather/download_gridmet.md | 60 +- .../data_ingestion/weather/download_herbie.md | 71 +- .../weather/download_terraclimate.md | 58 +- .../weather/get_ambient_weather.md | 44 +- .../data_ingestion/weather/get_forecast.md | 52 +- .../data_ingestion/weather/herbie_forecast.md | 60 +- .../data_processing/chunk_onnx/chunk_onnx.md | 60 +- .../chunk_onnx/chunk_onnx_sequence.md | 54 +- .../data_processing/clip/clip.md | 40 +- .../gradient/raster_gradient.md | 34 +- .../data_processing/heatmap/classification.md | 99 +- .../data_processing/index/index.md | 38 +- .../linear_trend/chunked_linear_trend.md | 54 +- .../merge/match_merge_to_ref.md | 66 +- .../data_processing/outlier/detect_outlier.md | 56 +- .../threshold/threshold_raster.md | 38 +- .../timeseries/timeseries_aggregation.md | 46 +- .../timeseries_masked_aggregation.md | 56 +- .../farm_ai/agriculture/canopy_cover.md | 72 +- .../farm_ai/agriculture/change_detection.md | 94 +- .../farm_ai/agriculture/emergence_summary.md | 60 +- .../agriculture/green_house_gas_fluxes.md | 38 +- .../heatmap_using_classification.md | 69 +- .../heatmap_using_classification_admag.md | 79 +- .../heatmap_using_neighboring_data_points.md | 72 +- .../farm_ai/agriculture/methane_index.md | 64 +- .../farm_ai/agriculture/ndvi_summary.md | 54 +- .../farm_ai/agriculture/weed_detection.md | 60 +- .../carbon_local/admag_carbon_integration.md | 68 +- .../farm_ai/carbon_local/carbon_whatif.md | 46 +- .../conservation_practices.md | 120 ++- .../land_degradation/landsat_ndvi_trend.md | 50 +- .../land_degradation/ndvi_linear_trend.md | 46 +- .../farm_ai/segmentation/segment_basemap.md | 84 +- .../farm_ai/segmentation/segment_s2.md | 82 +- .../farm_ai/sensor/optimal_locations.md | 71 +- .../water/irrigation_classification.md | 182 ++-- .../deforestation/alos_trend_detection.md | 134 +++ .../deforestation/ordinal_trend_detection.md | 123 +++ .../markdown/workflow_yaml/helloworld.md | 34 +- .../workflow_yaml/ml/crop_segmentation.md | 60 +- .../datagen_crop_segmentation.md | 56 +- .../workflow_yaml/ml/driveway_detection.md | 74 +- .../basemap_prompt_segmentation.md | 97 ++ .../s2_prompt_segmentation.md | 97 ++ docs/source/index.md | 1 + ...riculture_and_comet_farm_api_example.ipynb | 418 +++++++- ...data_manager_for_agriculture_example.ipynb | 559 ++++++++++- notebooks/crop_cycles/env.yaml | 1 + .../forest/download_alos_forest_map.ipynb | 497 +++++++++ .../forest/download_glad_forest_map.ipynb | 442 +++++++++ .../forest/download_hansen_forest_map.ipynb | 939 ++++++++++++++++++ .../forest/forest_change_detection.ipynb | 901 +++++++++++++++++ notebooks/heatmaps/notebook_lib/utils.py | 7 +- ...nutrients_using_classification_admag.ipynb | 39 +- .../basemap_segmentation.ipynb | 2 +- .../segment_anything/sam_exploration.ipynb | 6 +- .../sentinel2_segmentation.ipynb | 6 +- .../field_level_spectral_indices.ipynb | 2 +- notebooks/shared_nb_lib/plot.py | 104 +- scripts/export_sam_models.py | 217 ++++ src/vibe_core/pyproject.toml | 2 +- src/vibe_core/vibe_core/admag_client.py | 312 +++--- src/vibe_core/vibe_core/cli/constants.py | 2 +- src/vibe_core/vibe_core/cli/helper.py | 17 +- src/vibe_core/vibe_core/cli/local.py | 25 + src/vibe_core/vibe_core/cli/parsers.py | 21 + src/vibe_core/vibe_core/cli/remote.py | 40 +- src/vibe_core/vibe_core/cli/wrappers.py | 128 ++- src/vibe_core/vibe_core/client.py | 65 +- src/vibe_core/vibe_core/data/__init__.py | 5 + src/vibe_core/vibe_core/data/core_types.py | 20 + src/vibe_core/vibe_core/data/farm.py | 78 +- .../vibe_core/data/json_converter.py | 5 +- src/vibe_core/vibe_core/data/products.py | 102 ++ src/vibe_core/vibe_core/monitor.py | 31 +- src/vibe_core/vibe_core/terraform/aks/main.tf | 44 +- .../aks/modules/infra/azure_monitor.tf | 39 + .../terraform/aks/modules/infra/keyvault.tf | 6 - .../terraform/aks/modules/infra/outputs.tf | 5 + .../terraform/aks/modules/infra/variables.tf | 5 + .../terraform/aks/modules/kubernetes/dapr.tf | 4 +- .../terraform/aks/modules/kubernetes/init.tf | 14 + .../terraform/aks/modules/kubernetes/otel.tf | 109 +- .../aks/modules/kubernetes/outputs.tf | 2 +- .../aks/modules/kubernetes/variables.tf | 9 + .../vibe_core/terraform/aks/variables.tf | 10 + .../vibe_core/terraform/local/main.tf | 2 + .../local/modules/kubernetes/dapr.tf | 4 +- .../local/modules/kubernetes/outputs.tf | 2 +- .../local/modules/kubernetes/variables.tf | 5 + .../vibe_core/terraform/local/variables.tf | 5 + .../vibe_core/terraform/services/cache.tf | 15 +- .../vibe_core/terraform/services/dataops.tf | 15 +- .../terraform/services/orchestrator.tf | 12 +- .../vibe_core/terraform/services/restapi.tf | 12 +- .../vibe_core/terraform/services/variables.tf | 3 + .../vibe_core/terraform/services/worker.tf | 17 +- 153 files changed, 9543 insertions(+), 1688 deletions(-) create mode 100644 docs/source/docfiles/markdown/REST_API.md create mode 100644 docs/source/docfiles/markdown/workflow_yaml/data_ingestion/hansen/hansen_forest_change_download.md delete mode 100644 docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel1/preprocess_s1_rtc.md create mode 100644 docs/source/docfiles/markdown/workflow_yaml/forest_ai/deforestation/alos_trend_detection.md create mode 100644 docs/source/docfiles/markdown/workflow_yaml/forest_ai/deforestation/ordinal_trend_detection.md create mode 100644 docs/source/docfiles/markdown/workflow_yaml/ml/segment_anything/basemap_prompt_segmentation.md create mode 100644 docs/source/docfiles/markdown/workflow_yaml/ml/segment_anything/s2_prompt_segmentation.md create mode 100644 notebooks/forest/download_alos_forest_map.ipynb create mode 100644 notebooks/forest/download_glad_forest_map.ipynb create mode 100644 notebooks/forest/download_hansen_forest_map.ipynb create mode 100644 notebooks/forest/forest_change_detection.ipynb create mode 100644 scripts/export_sam_models.py create mode 100644 src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf diff --git a/SUPPORT.md b/SUPPORT.md index 613bc15b..63e4baa8 100644 --- a/SUPPORT.md +++ b/SUPPORT.md @@ -2,8 +2,8 @@ ## How to file issues and get help -This project uses GitHub Issues to track bugs and feature requests. Please search the existing -issues before filing new issues to avoid duplicates. For new issues, file your bug or +This project uses GitHub Issues to track bugs and feature requests. Please search the existing +issues before filing new issues to avoid duplicates. For new issues, file your bug or feature request as a new Issue. For help and questions about using this project, please reach out to the team at farmvibes at microsoft.com. @@ -11,6 +11,7 @@ For help and questions about using this project, please reach out to the team at ## Troubleshooting A list of common issues and their resolution can be found in the [troubleshooting documentation](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html). +We also provide a current list of [known issues on our GitHub](https://github.com/microsoft/farmvibes-ai/labels/known%20issues) that are actively being worked on. ## Microsoft Support Policy diff --git a/docs/source/conf.py b/docs/source/conf.py index cd777be6..2fe34b1c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,6 +22,7 @@ "sphinxcontrib.mermaid", "myst_parser", "sphinx_autodoc_typehints", + "sphinxcontrib.openapi", ] autosummary_generate = True diff --git a/docs/source/docfiles/code/vibe_core_client/client.md b/docs/source/docfiles/code/vibe_core_client/client.md index 90a26a8e..b2286cf1 100644 --- a/docs/source/docfiles/code/vibe_core_client/client.md +++ b/docs/source/docfiles/code/vibe_core_client/client.md @@ -4,6 +4,7 @@ .. automodule:: vibe_core.client :members: :show-inheritance: + :private-members: _form_payload .. autosummary:: :toctree: _autosummary diff --git a/docs/source/docfiles/markdown/NOTEBOOK_LIST.md b/docs/source/docfiles/markdown/NOTEBOOK_LIST.md index 3e4ad74a..caa6879d 100644 --- a/docs/source/docfiles/markdown/NOTEBOOK_LIST.md +++ b/docs/source/docfiles/markdown/NOTEBOOK_LIST.md @@ -61,6 +61,19 @@ We organize available notebooks in the following topics: - [`Crop land segmentation (4/4) - inference` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/crop_segmentation/04_inference.ipynb) + +
+ Deforestation + +- [`Detecting Forest Changes` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/forest_change_detection.ipynb) + +- [`Download ALOS forest extent maps` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_alos_forest_map.ipynb) + +- [`Download Glad Forest Map` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_glad_forest_map.ipynb) + +- [`Download Global Forest Change (Hansen) maps.` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_hansen_forest_map.ipynb) + +
Index Computation @@ -151,6 +164,14 @@ We organize available notebooks in the following topics: - [`Crop land segmentation (4/4) - inference` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/crop_segmentation/04_inference.ipynb) +- [`Detecting Forest Changes` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/forest_change_detection.ipynb) + +- [`Download ALOS forest extent maps` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_alos_forest_map.ipynb) + +- [`Download Glad Forest Map` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_glad_forest_map.ipynb) + +- [`Download Global Forest Change (Hansen) maps.` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_hansen_forest_map.ipynb) + - [`Field boundary segmentation (SAM exploration)` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/segment_anything/sam_exploration.ipynb) - [`Field-level Irrigation Classification` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/irrigation/field_level_irrigation_classification.ipynb) @@ -239,6 +260,14 @@ We organize available notebooks in the following topics: - [`Carbon sequestration evaluation with Microsoft Azure Data Manager for Agriculture (ADMAg) and COMET-Farm API` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/admag/azure_data_manager_for_agriculture_and_comet_farm_api_example.ipynb) +- [`Detecting Forest Changes` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/forest_change_detection.ipynb) + +- [`Download ALOS forest extent maps` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_alos_forest_map.ipynb) + +- [`Download Glad Forest Map` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_glad_forest_map.ipynb) + +- [`Download Global Forest Change (Hansen) maps.` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_hansen_forest_map.ipynb) + - [`Green House Gas fluxes` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/ghg_fluxes/ghg_fluxes.ipynb) - [`Nutrient Heatmap Estimation - Classification` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/heatmaps/nutrients_using_classification.ipynb) @@ -341,6 +370,14 @@ We organize available notebooks in the following topics: - [`Crop land segmentation (4/4) - inference` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/crop_segmentation/04_inference.ipynb) : Infer crop land segmentation for new regions with a trained model. +- [`Detecting Forest Changes` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/forest_change_detection.ipynb) : Helps users to detect forest changes + +- [`Download ALOS forest extent maps` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_alos_forest_map.ipynb) : This notebook downloads the ALOS (Advanced Land Observing Satellite) forest extent maps + +- [`Download Glad Forest Map` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_glad_forest_map.ipynb) : This notebook downloads the Global Land Analysis (GLAD) forest extent maps. + +- [`Download Global Forest Change (Hansen) maps.` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/forest/download_hansen_forest_map.ipynb) : This notebook contains functions to download and process the Global Forest Change (Hansen) maps. + - [`Field boundary segmentation (SAM exploration)` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/segment_anything/sam_exploration.ipynb) : Segment Anything Model exploration over FarmVibes.AI data to segment crop field boundaries. - [`Field-level Irrigation Classification` πŸ““](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/irrigation/field_level_irrigation_classification.ipynb) : Estimate an irrigation probability map over crop fields segmented with Segment Anything Model. diff --git a/docs/source/docfiles/markdown/QUICKSTART.md b/docs/source/docfiles/markdown/QUICKSTART.md index 0747f31f..6963a861 100644 --- a/docs/source/docfiles/markdown/QUICKSTART.md +++ b/docs/source/docfiles/markdown/QUICKSTART.md @@ -111,3 +111,6 @@ that FarmVibes.AI and the python client are working properly. For more information on how to execute workflows, please take a look at our [client guide](./CLIENT.md). For information on any issues running the cluster, including on how to re-start it after a machine reboot, take a look at our [troubleshoot guide](./TROUBLESHOOTING.md). +If you do not find the information you are looking for, please reach out to the team by opening +an issue on our [GitHub repository](https://github.com/microsoft/farmvibes-ai/issues) or browsing +through our [known issues](https://github.com/microsoft/farmvibes-ai/labels/known%20issues). diff --git a/docs/source/docfiles/markdown/REST_API.md b/docs/source/docfiles/markdown/REST_API.md new file mode 100644 index 00000000..a6d35596 --- /dev/null +++ b/docs/source/docfiles/markdown/REST_API.md @@ -0,0 +1,167 @@ +# REST API + +Once the FarmVibes.AI cluster is up and running, you can interact with it using the REST API, which provides a set of endpoints that allow you to list and describe workflows, as well as manage workflow runs. +The REST API is available at the URL and port specified during cluster creation, and its address is printed in the terminal once the setup is complete. You can also check the address by running the following command in the terminal: + +```bash +$ farmvibes-ai status +2024-01-01 00:00:00,000 - INFO - Cluster farmvibes-ai-username is running with 1 servers and 0 agents. +2024-01-01 00:00:00,001 - INFO - Service url is http://ip.address:port +``` + +## Interacting with the API + +The API is accessible from the [FarmVibes.AI Python client](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/CLIENT.html), which provides an interface to interact with the cluster, list workflows, and manage workflow runs. +Alternativelly, interacting with the API can be done using any tool that can send HTTP requests, such as `curl` or [Bruno](https://www.usebruno.com/). + +For example, to list the available workflows, you can use the following command: + +```bash +$ curl -X GET http://localhost:31108/v0/workflows +``` + +Which will return the following list: + +``` +["helloworld","farm_ai/land_degradation/landsat_ndvi_trend","farm_ai/land_degradation/ndvi_linear_trend", ...] +``` + +For submiting a run of a specific workflow, we need to pass a JSON with the run configuration +(i.e., workflow name, input geometry and time range, workflow parameters, etc) as the body of the +request. For example, we can use the following command to create a `helloworld` workflow run: + +```bash +$ curl -X POST -H "Content-Type: application/json" -d +``` + +Replacing the body of the request `` with the following: + +```json +{ + 'name': 'Hello!', + 'workflow': 'helloworld', + 'parameters': None, + 'user_input': { + 'start_date': datetime.datetime(2020, 5, 1, 0, 0), + 'end_date': datetime.datetime(2020, 5, 5, 0, 0), + 'geojson': { + 'features': [ + { + 'geometry': { + 'type': 'Polygon', + 'coordinates': (((-119.14896203939314, 46.51578909859286), + (-119.14896203939314, 46.37578909859286), + (-119.28896203939313, 46.37578909859286), + (-119.28896203939313, 46.51578909859286), + (-119.14896203939314, 46.51578909859286)),) + }, + 'type': 'Feature' + } + ], + 'type': 'FeatureCollection' + } + } +} +``` + +To help in understanding the expected format and structure of the json in our requests, we provide in +our Python client the `_form_payload` method ([`vibe_core.client.FarmvibesAiClient._form_payload`](https://microsoft.github.io/farmvibes-ai/docfiles/code/vibe_core_client/client.html#vibe_core.client.FarmvibesAiClient._form_payload)) that can be used to +generate the request payload for a given run configuration. For example, the following code could +be used to obtain the json above for the helloworld workflow: + +```python +from vibe_core.client import get_default_vibe_client +import shapely.geometry as shpg +from datetime import datetime + +client = get_default_vibe_client() + +geom = shpg.Point(-119.21896203939313, 46.44578909859286).buffer(.07, cap_style=3) +time_range = (datetime(2020, 5, 1), datetime(2020, 5, 5)) + +payload = client._form_payload("helloworld", None, geom, time_range, None,"Hello!") +``` + +Another example, considering the `farm_ai/segmentation/segment_s2` workflow run submited in the +[Sentinel-2 Segmentation notebook](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/segment_anything/sentinel2_segmentation.ipynb), would be: + +```python +payload = client._form_payload("farm_ai/segmentation/segment_s2", None, None, None, {"user_input": roi_time_range, "prompts": geom_collection},"SAM segmentation") +``` + +Which would generate the following json: + +```json +{ + 'name': 'SAM segmentation', + 'workflow': 'farm_ai/segmentation/segment_s2', + 'parameters': None, + 'user_input': { + 'user_input': { + 'type': 'Feature', + 'stac_version': '1.0.0', + 'id': 'f6465ad0-5e01-4792-ad99-a0bd240c1e7d', + 'properties': { + 'start_datetime': '2020-05-01T00:00:00+00:00', + 'end_datetime': '2020-05-05T00:00:00+00:00', + 'datetime': '2020-05-01T00:00:00Z' + }, + 'geometry': {'type': 'Polygon', + 'coordinates': (((-119.14896203939314, 46.51578909859286), + (-119.14896203939314, 46.37578909859286), + (-119.28896203939313, 46.37578909859286), + (-119.28896203939313, 46.51578909859286), + (-119.14896203939314, 46.51578909859286)),) + }, + 'links': [], + 'assets': {}, + 'bbox': [-119.28896203939313, + 46.37578909859286, + -119.14896203939314, + 46.51578909859286], + 'stac_extensions': [], + 'terravibes_data_type': 'DataVibe' + }, + 'prompts': { + 'type': 'Feature', + 'stac_version': '1.0.0', + 'id': 'geo_734c6441-cb25-4c40-8204-6b7286f24bb9', + 'properties': { + 'urls': ['/mnt/734c6441-cb25-4c40-8204-6b7286f24bb9_geometry_collection.geojson'], + 'start_datetime': '2020-05-01T00:00:00+00:00', + 'end_datetime': '2020-05-05T00:00:00+00:00', + 'datetime': '2020-05-01T00:00:00Z' + }, + 'geometry': {'type': 'Polygon', + 'coordinates': (((-119.14896203939314, 46.51578909859286), + (-119.14896203939314, 46.37578909859286), + (-119.28896203939313, 46.37578909859286), + (-119.28896203939313, 46.51578909859286), + (-119.14896203939314, 46.51578909859286)),) + }, + 'links': [], + 'assets': {}, + 'bbox': [-119.28896203939313, + 46.37578909859286, + -119.14896203939314, + 46.51578909859286], + 'stac_extensions': [], + 'terravibes_data_type': 'ExternalReferenceList' + } + } +} +``` + +For more information about the `_form_payload` method, please refer to the [FarmVibes.AI Python client documentation](https://microsoft.github.io/farmvibes-ai/docfiles/code/vibe_core_client/client.html#vibe_core.client.FarmvibesAiClient._form_payload). + +## Endpoints + +We provide below a list of the available endpoints and their descriptions. + +----------------------------- + +```{eval-rst} +.. openapi:: ../openapi.json + :examples: + :format: markdown +``` diff --git a/docs/source/docfiles/markdown/TROUBLESHOOTING.md b/docs/source/docfiles/markdown/TROUBLESHOOTING.md index 1e2d056d..d3e54963 100644 --- a/docs/source/docfiles/markdown/TROUBLESHOOTING.md +++ b/docs/source/docfiles/markdown/TROUBLESHOOTING.md @@ -1,6 +1,8 @@ # Troubleshooting This document compiles the most common issues encountered when installing and running FarmVibes.AI platform, grouped into broad categories. +Besides the issues listed here, we also collect a list of [known issues on our GitHub repository](https://github.com/microsoft/farmvibes-ai/labels/known%20issues) +that are currently being addressed by the development team. - **Package installation:** @@ -224,6 +226,13 @@ This document compiles the most common issues encountered when installing and ru
+
+ Workflow run monitor table not rendering inside notebook + + Make sure to have the `ipywidgets` [package](https://pypi.org/project/ipywidgets/) installed in your environment. + +
+
- **Segment Anything Model (SAM):** diff --git a/docs/source/docfiles/markdown/WORKFLOWS.md b/docs/source/docfiles/markdown/WORKFLOWS.md index ed585ef0..02ed1383 100644 --- a/docs/source/docfiles/markdown/WORKFLOWS.md +++ b/docs/source/docfiles/markdown/WORKFLOWS.md @@ -7,6 +7,7 @@ We group FarmVibes.AI workflows in the following categories: This includes raw data sources (e.g., Sentinel 1 and 2, LandSat, CropDataLayer) as well as the SpaceEye cloud-removal model; - **Data Processing**: workflows that transform data into different data types (e.g., computing NDVI/MSAVI/Methane indexes, aggregating mean/max/min statistics of rasters, timeseries aggregation); - **FarmAI**: composed workflows (data ingestion + processing) whose outputs enable FarmAI scenarios (e.g., predicting conservation practices, estimating soil carbon sequestration, identifying methane leakage); +- **ForestAI**: composed workflows (data ingestion + processing) whose outputs enable ForestAI scenarios (e.g., detecting forest change, estimating forest extent); - **ML**: machine learning-related workflows to train, evaluate, and infer models within the FarmVibes.AI platform (e.g., dataset creation, inference); For a list of all available workflows within the FarmVibes.AI platform, please diff --git a/docs/source/docfiles/markdown/WORKFLOW_LIST.md b/docs/source/docfiles/markdown/WORKFLOW_LIST.md index eca05337..473d9a7e 100644 --- a/docs/source/docfiles/markdown/WORKFLOW_LIST.md +++ b/docs/source/docfiles/markdown/WORKFLOW_LIST.md @@ -6,6 +6,7 @@ We group FarmVibes.AI workflows in the following categories: This includes raw data sources (e.g., Sentinel 1 and 2, LandSat, CropDataLayer) as well as the SpaceEye cloud-removal model; - **Data Processing**: workflows that transform data into different data types (e.g., computing NDVI/MSAVI/Methane indexes, aggregating mean/max/min statistics of rasters, timeseries aggregation); - **FarmAI**: composed workflows (data ingestion + processing) whose outputs enable FarmAI scenarios (e.g., predicting conservation practices, estimating soil carbon sequestration, identifying methane leakage); +- **ForestAI**: composed workflows (data ingestion + processing) whose outputs enable ForestAI scenarios (e.g., detecting forest change, estimating forest extent); - **ML**: machine learning-related workflows to train, evaluate, and infer models within the FarmVibes.AI platform (e.g., dataset creation, inference); Below is a list of all available workflows within the FarmVibes.AI platform. For each of them, we provide a brief description and a link to the corresponding documentation page. @@ -44,6 +45,8 @@ Below is a list of all available workflows within the FarmVibes.AI platform. For - [`gnatsgo/download_gnatsgo` πŸ“„](workflow_yaml/data_ingestion/gnatsgo/download_gnatsgo.md): Downloads gNATSGO raster data that intersect with the input geometry and time range. +- [`hansen/hansen_forest_change_download` πŸ“„](workflow_yaml/data_ingestion/hansen/hansen_forest_change_download.md): Downloads and merges Global Forest Change (Hansen) rasters that intersect the user-provided geometry/time range. + - [`landsat/preprocess_landsat` πŸ“„](workflow_yaml/data_ingestion/landsat/preprocess_landsat.md): Downloads and preprocesses LANDSAT tiles that intersect with the input geometry and time range. - [`modis/download_modis_surface_reflectance` πŸ“„](workflow_yaml/data_ingestion/modis/download_modis_surface_reflectance.md): Downloads MODIS 8-day surface reflectance rasters that intersect with the input geometry and time range. @@ -56,8 +59,6 @@ Below is a list of all available workflows within the FarmVibes.AI platform. For - [`sentinel1/preprocess_s1` πŸ“„](workflow_yaml/data_ingestion/sentinel1/preprocess_s1.md): Downloads and preprocesses tiles of Sentinel-1 imagery that intersect with the input Sentinel-2 products in the input time range. -- [`sentinel1/preprocess_s1_rtc` πŸ“„](workflow_yaml/data_ingestion/sentinel1/preprocess_s1_rtc.md): Downloads and preprocesses tiles of Sentinel-1 imagery that intersect with the input Sentinel-2 products in the input time range. - - [`sentinel2/cloud_ensemble` πŸ“„](workflow_yaml/data_ingestion/sentinel2/cloud_ensemble.md): Computes the cloud probability of a Sentinel-2 L2A raster using an ensemble of five cloud segmentation models. - [`sentinel2/improve_cloud_mask` πŸ“„](workflow_yaml/data_ingestion/sentinel2/improve_cloud_mask.md): Improves cloud masks by merging the product cloud mask with cloud and shadow masks computed by machine learning segmentation models. @@ -180,6 +181,13 @@ Below is a list of all available workflows within the FarmVibes.AI platform. For - [`water/irrigation_classification` πŸ“„](workflow_yaml/farm_ai/water/irrigation_classification.md): Develops 30m pixel-wise irrigation probability map. +## forest_ai + +- [`deforestation/alos_trend_detection` πŸ“„](workflow_yaml/forest_ai/deforestation/alos_trend_detection.md): Detects increase/decrease trends in forest pixel levels over the user-input geometry and time range for the ALOS forest map. + +- [`deforestation/ordinal_trend_detection` πŸ“„](workflow_yaml/forest_ai/deforestation/ordinal_trend_detection.md): Detects increase/decrease trends in the pixel levels over the user-input geometry and time range. + + ## ml - [`crop_segmentation` πŸ“„](workflow_yaml/ml/crop_segmentation.md): Runs a crop segmentation model based on NDVI from SpaceEye imagery along the year. @@ -188,4 +196,8 @@ Below is a list of all available workflows within the FarmVibes.AI platform. For - [`driveway_detection` πŸ“„](workflow_yaml/ml/driveway_detection.md): Detects driveways in front of houses. +- [`segment_anything/basemap_prompt_segmentation` πŸ“„](workflow_yaml/ml/segment_anything/basemap_prompt_segmentation.md): Runs Segment Anything Model (SAM) over BingMaps basemap rasters with points and/or bounding boxes as prompts. + +- [`segment_anything/s2_prompt_segmentation` πŸ“„](workflow_yaml/ml/segment_anything/s2_prompt_segmentation.md): Runs Segment Anything Model (SAM) over Sentinel-2 rasters with points and/or bounding boxes as prompts. + diff --git a/docs/source/docfiles/markdown/data_types_diagram/core_types_hierarchy.md b/docs/source/docfiles/markdown/data_types_diagram/core_types_hierarchy.md index c9fb1b1b..45f7fbfd 100644 --- a/docs/source/docfiles/markdown/data_types_diagram/core_types_hierarchy.md +++ b/docs/source/docfiles/markdown/data_types_diagram/core_types_hierarchy.md @@ -28,10 +28,14 @@ classDiagram } class GeometryCollection { } + class OrdinalTrendTest { + } class ProteinSequence { } class PydanticAssetVibe { } + class RasterPixelCount { + } class TimeSeries { } class Tmp { @@ -54,7 +58,9 @@ classDiagram GHGFlux --|> DataVibe GHGProtocolVibe --|> DataVibe GeometryCollection --|> DataVibe + OrdinalTrendTest --|> DataVibe ProteinSequence --|> DataVibe + RasterPixelCount --|> DataVibe TimeSeries --|> DataVibe UnresolvedDataVibe --|> BaseVibe diff --git a/docs/source/docfiles/markdown/data_types_diagram/farm_hierarchy.md b/docs/source/docfiles/markdown/data_types_diagram/farm_hierarchy.md index 9b058cec..2deb78fb 100644 --- a/docs/source/docfiles/markdown/data_types_diagram/farm_hierarchy.md +++ b/docs/source/docfiles/markdown/data_types_diagram/farm_hierarchy.md @@ -6,6 +6,12 @@ classDiagram } class DataVibe { } + class ADMAgPrescription { + } + class ADMAgPrescriptionInput { + } + class ADMAgPrescriptionMapInput { + } class ADMAgSeasonalFieldInput { } class FertilizerInformation { @@ -19,6 +25,9 @@ classDiagram class TillageInformation { } DataVibe --|> BaseVibe + ADMAgPrescription --|> BaseVibe + ADMAgPrescriptionInput --|> BaseVibe + ADMAgPrescriptionMapInput --|> BaseVibe ADMAgSeasonalFieldInput --|> BaseVibe SeasonalFieldInformation --|> DataVibe diff --git a/docs/source/docfiles/markdown/data_types_diagram/products_hierarchy.md b/docs/source/docfiles/markdown/data_types_diagram/products_hierarchy.md index 12db2fde..e90cd707 100644 --- a/docs/source/docfiles/markdown/data_types_diagram/products_hierarchy.md +++ b/docs/source/docfiles/markdown/data_types_diagram/products_hierarchy.md @@ -28,6 +28,8 @@ classDiagram } class GNATSGOProduct { } + class HansenProduct { + } class HerbieProduct { } class LandsatProduct { @@ -48,6 +50,7 @@ classDiagram GEDIProduct --|> DataVibe GLADProduct --|> DataVibe GNATSGOProduct --|> DataVibe + HansenProduct --|> DataVibe HerbieProduct --|> DataVibe LandsatProduct --|> DataVibe ModisProduct --|> DataVibe diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/admag/admag_seasonal_field.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/admag/admag_seasonal_field.md index 02e04341..d9f5206f 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/admag/admag_seasonal_field.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/admag/admag_seasonal_field.md @@ -1,5 +1,42 @@ # data_ingestion/admag/admag_seasonal_field +Generates SeasonalFieldInformation using ADMAg (Microsoft Azure Data Manager for Agriculture). The workflow creates a DataVibe subclass SeasonalFieldInformation that contains farm-related operations (e.g., fertilization, harvest, tillage, planting, crop name). + +```{mermaid} + graph TD + inp1>admag_input] + out1>seasonal_field] + tsk1{{admag_seasonal_field}} + inp1>admag_input] -- admag_input --> tsk1{{admag_seasonal_field}} + tsk1{{admag_seasonal_field}} -- seasonal_field --> out1>seasonal_field] +``` + +## Sources + +- **admag_input**: Unique identifiers for ADMAg seasonal field, and party. + +## Sinks + +- **seasonal_field**: Crop SeasonalFieldInformation which contains SeasonalFieldInformation that contains farm-related operations (e.g., fertilization, harvest, tillage, planting, crop name). + +## Parameters + +- **base_url**: Azure Data Manager for Agriculture host. Please visit https://aka.ms/farmvibesDMA to check how to get these credentials. + +- **client_id**: Azure Data Manager for Agriculture client id. Please visit https://aka.ms/farmvibesDMA to check how to get these credentials. + +- **client_secret**: Azure Data Manager for Agriculture client secret. Please visit https://aka.ms/farmvibesDMA to check how to get these credentials. + +- **authority**: Azure Data Manager for Agriculture authority. Please visit https://aka.ms/farmvibesDMA to check how to get these credentials. + +- **default_scope**: Azure Data Manager for Agriculture default scope. Please visit https://aka.ms/farmvibesDMA to check how to get these credentials. + +## Tasks + +- **admag_seasonal_field**: Establishes the connection with ADMAg and fetches seasonal field information. + +## Workflow Yaml + ```yaml name: admag_seasonal_field @@ -31,7 +68,7 @@ description: that contains farm-related operations (e.g., fertilization, harvest, tillage, planting, crop name). sources: - admag_input: Unique identifiers for ADMAg seasonal field, boundary, and farmer. + admag_input: Unique identifiers for ADMAg seasonal field, and party. sinks: seasonal_field: Crop SeasonalFieldInformation which contains SeasonalFieldInformation that contains farm-related operations (e.g., fertilization, harvest, tillage, @@ -49,13 +86,4 @@ description: https://aka.ms/farmvibesDMA to check how to get these credentials. -``` - -```{mermaid} - graph TD - inp1>admag_input] - out1>seasonal_field] - tsk1{{admag_seasonal_field}} - inp1>admag_input] -- admag_input --> tsk1{{admag_seasonal_field}} - tsk1{{admag_seasonal_field}} -- seasonal_field --> out1>seasonal_field] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/admag/prescriptions.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/admag/prescriptions.md index d4ebeecf..bf21c61c 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/admag/prescriptions.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/admag/prescriptions.md @@ -1,10 +1,57 @@ # data_ingestion/admag/prescriptions +Fetches prescriptions using ADMAg (Microsoft Azure Data Manager for Agriculture). The workflow fetch prescriptions (sensor samples) linked to prescription_map_id. Each sensor sample have the information of nutrient (Nitrogen, Carbon, Phosphorus, pH, Latitude, Longitude etc., ). The Latitude & Longitude used to create a point geometry. Geometry and nutrient information transformed to GeoJSON. The GeoJSON stored as asset in farmvibes-ai. + +```{mermaid} + graph TD + inp1>admag_input] + out1>response] + tsk1{{list_prescriptions}} + tsk2{{get_prescription}} + tsk3{{admag_prescriptions}} + tsk1{{list_prescriptions}} -- prescriptions/prescription_without_geom_input --> tsk2{{get_prescription}} + tsk2{{get_prescription}} -- prescription_with_geom/prescriptions_with_geom_input --> tsk3{{admag_prescriptions}} + inp1>admag_input] -- admag_input --> tsk1{{list_prescriptions}} + inp1>admag_input] -- admag_input --> tsk3{{admag_prescriptions}} + tsk3{{admag_prescriptions}} -- response --> out1>response] +``` + +## Sources + +- **admag_input**: Required inputs to access ADMAg resources, party_id and prescription_map_id that helps fetching prescriptions. + +## Sinks + +- **response**: Prescriptions received from ADMAg. + +## Parameters + +- **base_url**: URL to access the registered app. Refer this url to create required resources for admag. https://learn.microsoft.com/en-us/azure/data-manager-for-agri/quickstart-install-data-manager-for-agriculture + +- **client_id**: Value uniquely identifies registered application in the Microsoft identity platform. Visit url https://learn.microsoft.com/en-us/azure/data-manager-for-agri/quickstart-install-data-manager-for-agriculture to register the app. + +- **client_secret**: Sometimes called an application password, a client secret is a string value your app can use in place of a certificate to identity itself. + +- **authority**: The endpoint URIs for your app are generated automatically when you register or configure your app. It is used by client to obtain authorization from the resource owner + +- **default_scope**: URL for default azure OAuth2 permissions + +## Tasks + +- **list_prescriptions**: List available prescriptions using prescription map. + +- **get_prescription**: Get prescription using ADMAg API. + +- **admag_prescriptions**: Downloads boundary and prescriptions linked to seasonal field from ADMAg data source. + +## Workflow Yaml + ```yaml name: admag_prescritpions sources: admag_input: + - list_prescriptions.admag_input - admag_prescriptions.admag_input sinks: response: admag_prescriptions.response @@ -15,6 +62,24 @@ parameters: authority: null default_scope: null tasks: + list_prescriptions: + op: list_prescriptions + op_dir: admag + parameters: + base_url: '@from(base_url)' + client_id: '@from(client_id)' + client_secret: '@from(client_secret)' + authority: '@from(authority)' + default_scope: '@from(default_scope)' + get_prescription: + op: get_prescription + op_dir: admag + parameters: + base_url: '@from(base_url)' + client_id: '@from(client_id)' + client_secret: '@from(client_secret)' + authority: '@from(authority)' + default_scope: '@from(default_scope)' admag_prescriptions: op: prescriptions op_dir: admag @@ -24,6 +89,13 @@ tasks: client_secret: '@from(client_secret)' authority: '@from(authority)' default_scope: '@from(default_scope)' +edges: +- origin: list_prescriptions.prescriptions + destination: + - get_prescription.prescription_without_geom_input +- origin: get_prescription.prescription_with_geom + destination: + - admag_prescriptions.prescriptions_with_geom_input description: short_description: Fetches prescriptions using ADMAg (Microsoft Azure Data Manager for Agriculture). @@ -33,7 +105,7 @@ description: geometry. Geometry and nutrient information transformed to GeoJSON. The GeoJSON stored as asset in farmvibes-ai. sources: - admag_input: Required inputs to access ADMAg resources, farmer_id and prescription_map_id + admag_input: Required inputs to access ADMAg resources, party_id and prescription_map_id that helps fetching prescriptions. sinks: response: Prescriptions received from ADMAg. @@ -51,13 +123,4 @@ description: default_scope: URL for default azure OAuth2 permissions -``` - -```{mermaid} - graph TD - inp1>admag_input] - out1>response] - tsk1{{admag_prescriptions}} - inp1>admag_input] -- admag_input --> tsk1{{admag_prescriptions}} - tsk1{{admag_prescriptions}} -- response --> out1>response] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/airbus/airbus_download.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/airbus/airbus_download.md index 81d7c27a..f3338809 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/airbus/airbus_download.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/airbus/airbus_download.md @@ -1,5 +1,38 @@ # data_ingestion/airbus/airbus_download +Downloads available AirBus imagery for the input geometry and time range. The workflow will check available imagery, using the AirBus API, that contains the input geometry and inside the input time range. Matching images will be purchased (if they are not already in the user's library) and downloaded. This workflow requires an AirBus API key. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- airbus_products --> tsk2{{download}} + inp1>user_input] -- input_item --> tsk1{{list}} + tsk2{{download}} -- downloaded_products --> out1>raster] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: AirBus raster. + +## Parameters + +- **api_key**: AirBus API key. Required to run the workflow. + +## Tasks + +- **list**: Lists available AirBus products for the input geometry and time range. + +- **download**: Downloads the AirBus imagery from the listed product. + +## Workflow Yaml + ```yaml name: airbus_download @@ -38,15 +71,4 @@ description: api_key: AirBus API key. Required to run the workflow. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- airbus_products --> tsk2{{download}} - inp1>user_input] -- input_item --> tsk1{{list}} - tsk2{{download}} -- downloaded_products --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/airbus/airbus_price.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/airbus/airbus_price.md index fe025d19..ac6ac4f8 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/airbus/airbus_price.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/airbus/airbus_price.md @@ -1,5 +1,38 @@ # data_ingestion/airbus/airbus_price +Prices available AirBus imagery for the input geometry and time range. The workflow will check available imagery, using the AirBus API, that contains the input geometry inside the input time range. The aggregate price (in kB) for matching images will be computed, discounting images already in the user's library. This workflow requires an AirBus API key. + +```{mermaid} + graph TD + inp1>user_input] + out1>price] + tsk1{{list}} + tsk2{{price}} + tsk1{{list}} -- airbus_products --> tsk2{{price}} + inp1>user_input] -- input_item --> tsk1{{list}} + tsk2{{price}} -- products_price --> out1>price] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **price**: Price for all matching imagery. + +## Parameters + +- **api_key**: AirBus API key. Required to run the workflow. + +## Tasks + +- **list**: Lists available AirBus products for the input geometry and time range. + +- **price**: Calculates the aggregate price (in kB) for selected AirBus images, discounting images already in the user's library. + +## Workflow Yaml + ```yaml name: airbus_price @@ -38,15 +71,4 @@ description: api_key: AirBus API key. Required to run the workflow. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>price] - tsk1{{list}} - tsk2{{price}} - tsk1{{list}} -- airbus_products --> tsk2{{price}} - inp1>user_input] -- input_item --> tsk1{{list}} - tsk2{{price}} -- products_price --> out1>price] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/alos/alos_forest_extent_download.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/alos/alos_forest_extent_download.md index 0094a816..9e544b44 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/alos/alos_forest_extent_download.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/alos/alos_forest_extent_download.md @@ -1,5 +1,38 @@ # data_ingestion/alos/alos_forest_extent_download +Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map. The workflow lists all ALOS forest/non-forest classification products that intersect with the input geometry and time range (available range 2015-2020), then downloads the data for each of them. The data will be returned in the form of rasters. + +```{mermaid} + graph TD + inp1>user_input] + out1>downloaded_product] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- alos_products/product --> tsk2{{download}} + inp1>user_input] -- input_data --> tsk1{{list}} + tsk2{{download}} -- raster --> out1>downloaded_product] +``` + +## Sources + +- **user_input**: Geometry of interest for which to download the ALOS forest/non-forest classification map. + +## Sinks + +- **downloaded_product**: Downloaded ALOS forest/non-forest classification map. + +## Parameters + +- **pc_key**: Planetary computer API key. + +## Tasks + +- **list**: Lists ALOS forest products for input geometry and time range. + +- **download**: Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map. + +## Workflow Yaml + ```yaml name: alos_forest_extent_download @@ -35,15 +68,4 @@ description: downloaded_product: Downloaded ALOS forest/non-forest classification map. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>downloaded_product] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- alos_products/product --> tsk2{{download}} - inp1>user_input] -- input_data --> tsk1{{list}} - tsk2{{download}} -- raster --> out1>downloaded_product] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/alos/alos_forest_extent_download_merge.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/alos/alos_forest_extent_download_merge.md index 427c31ad..046e46b5 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/alos/alos_forest_extent_download_merge.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/alos/alos_forest_extent_download_merge.md @@ -1,5 +1,46 @@ # data_ingestion/alos/alos_forest_extent_download_merge +Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map and merges it into a single raster. The workflow lists the ALOS forest/non-forest classification products that intersect with the input geometry and time range (available range 2015-2020), and downloads the filtered products. The workflow processes the downloaded products and merge them into a single raster. + +```{mermaid} + graph TD + inp1>user_input] + out1>merged_raster] + out2>categorical_raster] + tsk1{{alos_forest_extent_download}} + tsk2{{group_rasters_by_time}} + tsk3{{merge}} + tsk1{{alos_forest_extent_download}} -- downloaded_product/rasters --> tsk2{{group_rasters_by_time}} + tsk2{{group_rasters_by_time}} -- raster_groups/raster_sequence --> tsk3{{merge}} + inp1>user_input] -- user_input --> tsk1{{alos_forest_extent_download}} + tsk3{{merge}} -- raster --> out1>merged_raster] + tsk1{{alos_forest_extent_download}} -- downloaded_product --> out2>categorical_raster] +``` + +## Sources + +- **user_input**: Geometry of interest for which to download the ALOS forest/non-forest classification map. + +## Sinks + +- **merged_raster**: ALOS forest/non-forest classification products converted to raster and merged. + +- **categorical_raster**: ALOS forest/non-forest classification products that intersect with the input geometry & time range. + +## Parameters + +- **pc_key**: Planetary computer API key. + +## Tasks + +- **alos_forest_extent_download**: Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map. + +- **group_rasters_by_time**: This op groups rasters in time according to 'criterion'. + +- **merge**: Merges rasters in a sequence to a single raster. + +## Workflow Yaml + ```yaml name: alos_forest_extent_download_merge @@ -48,19 +89,4 @@ description: pc_key: Planetary computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>merged_raster] - out2>categorical_raster] - tsk1{{alos_forest_extent_download}} - tsk2{{group_rasters_by_time}} - tsk3{{merge}} - tsk1{{alos_forest_extent_download}} -- downloaded_product/rasters --> tsk2{{group_rasters_by_time}} - tsk2{{group_rasters_by_time}} -- raster_groups/raster_sequence --> tsk3{{merge}} - inp1>user_input] -- user_input --> tsk1{{alos_forest_extent_download}} - tsk3{{merge}} -- raster --> out1>merged_raster] - tsk1{{alos_forest_extent_download}} -- downloaded_product --> out2>categorical_raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/bing/basemap_download.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/bing/basemap_download.md index 881d3632..9f30f0db 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/bing/basemap_download.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/bing/basemap_download.md @@ -1,5 +1,40 @@ # data_ingestion/bing/basemap_download +Downloads Bing Maps basemaps. The workflow will list all tiles intersecting with the input geometry for a given zoom level and download a basemap for each of them using Bing Maps API. The basemap tiles will be returned as individual rasters. + +```{mermaid} + graph TD + inp1>input_geometry] + out1>basemaps] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- products/input_product --> tsk2{{download}} + inp1>input_geometry] -- user_input --> tsk1{{list}} + tsk2{{download}} -- basemap --> out1>basemaps] +``` + +## Sources + +- **input_geometry**: Geometry of interest for which to download the basemap tiles. + +## Sinks + +- **basemaps**: Downloaded basemaps. + +## Parameters + +- **api_key**: Required BingMaps API key. + +- **zoom_level**: Zoom level of interest, ranging from 0 to 20. For instance, a zoom level of 1 corresponds to a resolution of 78271.52 m/pixel, a zoom level of 10 corresponds to 152.9 m/pixel, and a zoom level of 19 corresponds to 0.3 m/pixel. For more information on zoom levels and their corresponding scale and resolution, please refer to the BingMaps API documentation at https://learn.microsoft.com/en-us/bingmaps/articles/understanding-scale-and-resolution + +## Tasks + +- **list**: Lists BingMaps basemap tile products intersecting the input geometry for a given `zoom_level`. + +- **download**: Downloads a basemap tile represented by a BingMapsProduct using BingMapsAPI. + +## Workflow Yaml + ```yaml name: basemap_download @@ -36,15 +71,4 @@ description: basemaps: Downloaded basemaps. -``` - -```{mermaid} - graph TD - inp1>input_geometry] - out1>basemaps] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- products/input_product --> tsk2{{download}} - inp1>input_geometry] -- user_input --> tsk1{{list}} - tsk2{{download}} -- basemap --> out1>basemaps] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/bing/basemap_download_merge.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/bing/basemap_download_merge.md index 1ad37192..121723af 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/bing/basemap_download_merge.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/bing/basemap_download_merge.md @@ -1,5 +1,46 @@ # data_ingestion/bing/basemap_download_merge +Downloads Bing Maps basemap tiles and merges them into a single raster. The workflow will list all tiles intersecting with the input geometry for a given zoom level, and download a basemap for each of them using Bing Maps API. The basemaps will be merged into a single raster with the union of the geometries of all tiles. + +```{mermaid} + graph TD + inp1>input_geometry] + out1>merged_basemap] + tsk1{{basemap_download}} + tsk2{{to_sequence}} + tsk3{{merge}} + tsk1{{basemap_download}} -- basemaps/list_rasters --> tsk2{{to_sequence}} + tsk2{{to_sequence}} -- rasters_seq/raster_sequence --> tsk3{{merge}} + inp1>input_geometry] -- input_geometry --> tsk1{{basemap_download}} + tsk3{{merge}} -- raster --> out1>merged_basemap] +``` + +## Sources + +- **input_geometry**: Geometry of interest for which to download the basemap tiles. + +## Sinks + +- **merged_basemap**: Merged basemap raster. + +## Parameters + +- **api_key**: Required BingMaps API key. + +- **zoom_level**: Zoom level of interest, ranging from 0 to 20. For instance, a zoom level of 1 corresponds to a resolution of 78271.52 m/pixel, a zoom level of 10 corresponds to 152.9 m/pixel, and a zoom level of 19 corresponds to 0.3 m/pixel. For more information on zoom levels and their corresponding scale and resolution, please refer to the BingMaps API documentation at https://learn.microsoft.com/en-us/bingmaps/articles/understanding-scale-and-resolution + +- **merge_resolution**: Determines how the resolution of the output raster is defined. One of 'equal' (breaks if the resolution of the sequence rasters are not the same), 'lowest' (uses the lowest resolution among rasters), 'highest' (uses the highest resolution among rasters), or 'average' (averages the resolution of all rasters in the sequence). + +## Tasks + +- **basemap_download**: Downloads Bing Maps basemaps. + +- **to_sequence**: Combines a list of Rasters into a RasterSequence. + +- **merge**: Merges rasters in a sequence to a single raster. + +## Workflow Yaml + ```yaml name: basemap_download_merge @@ -44,17 +85,4 @@ description: merged_basemap: Merged basemap raster. -``` - -```{mermaid} - graph TD - inp1>input_geometry] - out1>merged_basemap] - tsk1{{basemap_download}} - tsk2{{to_sequence}} - tsk3{{merge}} - tsk1{{basemap_download}} -- basemaps/list_rasters --> tsk2{{to_sequence}} - tsk2{{to_sequence}} -- rasters_seq/raster_sequence --> tsk3{{merge}} - inp1>input_geometry] -- input_geometry --> tsk1{{basemap_download}} - tsk3{{merge}} -- raster --> out1>merged_basemap] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/cdl/download_cdl.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/cdl/download_cdl.md index aa4d0424..1f844998 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/cdl/download_cdl.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/cdl/download_cdl.md @@ -1,5 +1,34 @@ # data_ingestion/cdl/download_cdl +Downloads crop classes maps in the continental USA for the input time range. The workflow will download crop-specific land cover maps from the USDA Cropland Data Layer, available for the continental United States. The input geometry must intersect with the coverage area. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{list_cdl}} + tsk2{{download_cdl}} + tsk1{{list_cdl}} -- cdl_products/input_product --> tsk2{{download_cdl}} + inp1>user_input] -- input_item --> tsk1{{list_cdl}} + tsk2{{download_cdl}} -- cdl_raster --> out1>raster] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: CDL land cover raster. + +## Tasks + +- **list_cdl**: Lists all years for the input time range and creates a product for each of them to be downloaded. + +- **download_cdl**: Downloads a CategoricalRaster from a CDLProduct. + +## Workflow Yaml + ```yaml name: download_cdl @@ -30,15 +59,4 @@ description: raster: CDL land cover raster. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{list_cdl}} - tsk2{{download_cdl}} - tsk1{{list_cdl}} -- cdl_products/input_product --> tsk2{{download_cdl}} - inp1>user_input] -- input_item --> tsk1{{list_cdl}} - tsk2{{download_cdl}} -- cdl_raster --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/dem/download_dem.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/dem/download_dem.md index 2ab1eca9..57419d31 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/dem/download_dem.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/dem/download_dem.md @@ -1,5 +1,42 @@ # data_ingestion/dem/download_dem +Downloads digital elevation map tiles that intersect with the input geometry and time range. The workflow will download digital elevation maps from the USGS 3DEP datasets (available for the United States at 10 and 30 meters) or Copernicus DEM GLO-30 (globally at 30 meters) through the Planetary Computer. For more information, see https://planetarycomputer.microsoft.com/dataset/3dep-seamless and https://planetarycomputer.microsoft.com/dataset/cop-dem-glo-30 . + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- dem_products/input_product --> tsk2{{download}} + inp1>user_input] -- input_items --> tsk1{{list}} + tsk2{{download}} -- downloaded_product --> out1>raster] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: DEM raster. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +- **resolution**: Spatial resolution of the DEM. 10m and 30m are available. + +- **provider**: Provider of the DEM. "USGS3DEP" and "CopernicusDEM30" are available. + +## Tasks + +- **list**: Lists digital elevation map tiles that intersect with the input geometry and time range. + +- **download**: Downloads digital elevation map raster given a DemProduct. + +## Workflow Yaml + ```yaml name: download_dem @@ -44,15 +81,4 @@ description: provider: Provider of the DEM. "USGS3DEP" and "CopernicusDEM30" are available. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- dem_products/input_product --> tsk2{{download}} - inp1>user_input] -- input_items --> tsk1{{list}} - tsk2{{download}} -- downloaded_product --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gedi/download_gedi.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gedi/download_gedi.md index dbd2eb0c..17cc5147 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gedi/download_gedi.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gedi/download_gedi.md @@ -1,5 +1,40 @@ # data_ingestion/gedi/download_gedi +Downloads GEDI products for the input region and time range. The workflow downloads Global Ecosystem Dynamics Investigation (GEDI) products at the desired processing level using NASA's EarthData API. This workflow requires an EarthData API token. + +```{mermaid} + graph TD + inp1>user_input] + out1>product] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- gedi_products/gedi_product --> tsk2{{download}} + inp1>user_input] -- input_data --> tsk1{{list}} + tsk2{{download}} -- downloaded_product --> out1>product] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **product**: GEDI products. + +## Parameters + +- **earthdata_token**: API token for the EarthData platform. Required to run the workflow. + +- **processing_level**: GEDI product processing level. One of 'GEDI01_B.002', 'GEDI02_A.002', 'GEDI02_B.002'. + +## Tasks + +- **list**: Lists GEDI Products from NASA's EarthData API. + +- **download**: Downloads GEDI products. + +## Workflow Yaml + ```yaml name: download_gedi @@ -39,15 +74,4 @@ description: 'GEDI02_B.002'. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>product] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- gedi_products/gedi_product --> tsk2{{download}} - inp1>user_input] -- input_data --> tsk1{{list}} - tsk2{{download}} -- downloaded_product --> out1>product] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gedi/download_gedi_rh100.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gedi/download_gedi_rh100.md index 15563e8d..a77db935 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gedi/download_gedi_rh100.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gedi/download_gedi_rh100.md @@ -1,5 +1,41 @@ # data_ingestion/gedi/download_gedi_rh100 +Downloads L2B GEDI products and extracts RH100 variables. The workflow will download the products for the input region and time range, and then extract RH100 variables for each of the beam shots. Each value is geolocated according to the lowest mode latitude and longitude values. + +```{mermaid} + graph TD + inp1>user_input] + out1>rh100] + tsk1{{download}} + tsk2{{extract}} + tsk1{{download}} -- product/gedi_product --> tsk2{{extract}} + inp1>user_input] -- user_input --> tsk1{{download}} + inp1>user_input] -- roi --> tsk2{{extract}} + tsk2{{extract}} -- rh100 --> out1>rh100] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **rh100**: Points in EPSG:4326 with their associated RH100 values. + +## Parameters + +- **earthdata_token**: API token for the EarthData platform. Required to run the workflow. + +- **check_quality**: Whether to filter points according to the quality flag. + +## Tasks + +- **download**: Downloads GEDI products for the input region and time range. + +- **extract**: Extracts RH100 variables within the region of interest of a GEDIProduct. + +## Workflow Yaml + ```yaml name: download_gedi_rh100 @@ -38,16 +74,4 @@ description: check_quality: Whether to filter points according to the quality flag. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>rh100] - tsk1{{download}} - tsk2{{extract}} - tsk1{{download}} -- product/gedi_product --> tsk2{{extract}} - inp1>user_input] -- user_input --> tsk1{{download}} - inp1>user_input] -- roi --> tsk2{{extract}} - tsk2{{extract}} -- rh100 --> out1>rh100] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/glad/glad_forest_extent_download.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/glad/glad_forest_extent_download.md index 6c3d7e41..cc0bf726 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/glad/glad_forest_extent_download.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/glad/glad_forest_extent_download.md @@ -1,5 +1,34 @@ # data_ingestion/glad/glad_forest_extent_download +Downloads Global Land Analysis (GLAD) forest extent data. The workflow will list all GLAD forest extent products that intersect with the input geometry and download the data for each of them. The data will be returned as rasters. + +```{mermaid} + graph TD + inp1>input_item] + out1>downloaded_product] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- glad_products/glad_product --> tsk2{{download}} + inp1>input_item] -- input_item --> tsk1{{list}} + tsk2{{download}} -- downloaded_product --> out1>downloaded_product] +``` + +## Sources + +- **input_item**: Geometry of interest for which to download the GLAD forest extent data. + +## Sinks + +- **downloaded_product**: Downloaded GLAD forest extent product. + +## Tasks + +- **list**: Lists Global Land Analysis (GLAD) forest products that intersect the user-provided geometry/time range. + +- **download**: Downloads a GLADProduct + +## Workflow Yaml + ```yaml name: glad_forest_extent_download @@ -31,15 +60,4 @@ description: downloaded_product: Downloaded GLAD forest extent product. -``` - -```{mermaid} - graph TD - inp1>input_item] - out1>downloaded_product] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- glad_products/glad_product --> tsk2{{download}} - inp1>input_item] -- input_item --> tsk1{{list}} - tsk2{{download}} -- downloaded_product --> out1>downloaded_product] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/glad/glad_forest_extent_download_merge.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/glad/glad_forest_extent_download_merge.md index 4f3a5dfd..ccb0fc8b 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/glad/glad_forest_extent_download_merge.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/glad/glad_forest_extent_download_merge.md @@ -1,5 +1,42 @@ # data_ingestion/glad/glad_forest_extent_download_merge +Downloads the tiles from Global Land Analysis (GLAD) forest data that intersect with the user input geometry and time range, and merges them into a single raster. The workflow lists the GLAD forest products that intersect with the input geometry and time range, and downloads the filtered products. The downloaded products are merged into a single raster and classified. The result tiles have pixel values categorized into two classes - 0 (non-forest) and 1 (forest). This workflow uses the same forest definition as the Food and Agriculture Organization of the United Nations (FAO). + +```{mermaid} + graph TD + inp1>input_item] + out1>merged_product] + out2>categorical_raster] + tsk1{{glad_forest_extent_download}} + tsk2{{group_rasters_by_time}} + tsk3{{merge}} + tsk1{{glad_forest_extent_download}} -- downloaded_product/rasters --> tsk2{{group_rasters_by_time}} + tsk2{{group_rasters_by_time}} -- raster_groups/raster_sequence --> tsk3{{merge}} + inp1>input_item] -- input_item --> tsk1{{glad_forest_extent_download}} + tsk3{{merge}} -- raster --> out1>merged_product] + tsk1{{glad_forest_extent_download}} -- downloaded_product --> out2>categorical_raster] +``` + +## Sources + +- **input_item**: Geometry of interest for which to download the GLAD forest extent data. + +## Sinks + +- **merged_product**: Merged GLAD forest extent product to geometry of interest. + +- **categorical_raster**: Raster with the GLAD forest extent data. + +## Tasks + +- **glad_forest_extent_download**: Downloads Global Land Analysis (GLAD) forest extent data. + +- **group_rasters_by_time**: This op groups rasters in time according to 'criterion'. + +- **merge**: Merges rasters in a sequence to a single raster. + +## Workflow Yaml + ```yaml name: glad_forest_extent_download_merge @@ -44,19 +81,4 @@ description: categorical_raster: Raster with the GLAD forest extent data. -``` - -```{mermaid} - graph TD - inp1>input_item] - out1>merged_product] - out2>categorical_raster] - tsk1{{glad_forest_extent_download}} - tsk2{{group_rasters_by_time}} - tsk3{{merge}} - tsk1{{glad_forest_extent_download}} -- downloaded_product/rasters --> tsk2{{group_rasters_by_time}} - tsk2{{group_rasters_by_time}} -- raster_groups/raster_sequence --> tsk3{{merge}} - inp1>input_item] -- input_item --> tsk1{{glad_forest_extent_download}} - tsk3{{merge}} -- raster --> out1>merged_product] - tsk1{{glad_forest_extent_download}} -- downloaded_product --> out2>categorical_raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gnatsgo/download_gnatsgo.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gnatsgo/download_gnatsgo.md index c54c541b..b0dd9828 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gnatsgo/download_gnatsgo.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/gnatsgo/download_gnatsgo.md @@ -1,5 +1,79 @@ # data_ingestion/gnatsgo/download_gnatsgo +Downloads gNATSGO raster data that intersect with the input geometry and time range. This workflow lists and downloads raster products of gNATSGO dataset from Planetary Computer. Input geometry must fall within Continel USA, whereas input time range can be arbitrary (all gNATSGO assets are from 2020-07-01). For more information on the available properties, see https://planetarycomputer.microsoft.com/dataset/gnatsgo-rasters. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- gnatsgo_products/gnatsgo_product --> tsk2{{download}} + inp1>user_input] -- input_item --> tsk1{{list}} + tsk2{{download}} -- downloaded_raster --> out1>raster] +``` + +## Sources + +- **user_input**: Geometry of interest (arbitrary time range). + +## Sinks + +- **raster**: Raster with desired property. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +- **variable**: Options are: + aws{DEPTH} - Available water storage estimate (AWS) for the DEPTH zone. + soc{DEPTH} - Soil organic carbon stock estimate (SOC) for the DEPTH zone. + tk{DEPTH}a - Thickness of soil components used in the DEPTH zone for the AWS calculation. + tk{DEPTH}s - Thickness of soil components used in the DEPTH zone for the SOC calculation. + mukey - Map unit key, a unique identifier of a record for matching with gNATSGO tables. + droughty - Drought vulnerability estimate. + nccpi3all - National Commodity Crop Productivity Index that has the highest value among Corn +and Soybeans, Small Grains, or Cotton for major earthy components. + nccpi3corn - National Commodity Crop Productivity Index for Corn for major earthy +components. + nccpi3cot - National Commodity Crop Productivity Index for Cotton for major earthy +components. + nccpi3sg - National Commodity Crop Productivity Index for Small Grains for major earthy +components. + nccpi3soy - National Commodity Crop Productivity Index for Soy for major earthy components. + pctearthmc - National Commodity Crop Productivity Index map unit percent earthy is the map +unit summed comppct_r for major earthy components. + pwsl1pomu - Potential Wetland Soil Landscapes (PWSL). + rootznaws - Root zone (commodity crop) available water storage estimate (RZAWS). + rootznemc - Root zone depth is the depth within the soil profile that commodity crop (cc) +roots can effectively extract water and nutrients for growth. + musumcpct - Sum of the comppct_r (SSURGO component table) values for all listed components +in the map unit. + musumcpcta - Sum of the comppct_r (SSURGO component table) values used in the available +water storage calculation for the map unit. + musumcpcts - Sum of the comppct_r (SSURGO component table) values used in the soil organic +carbon calculation for the map unit. +gNATSGO has properties available for multiple soil depths. You may exchange DEPTH in the variable names above for any of the following (all measured in cm): + 0_5 + 0_20 + 0_30 + 5_20 + 0_100 + 0_150 + 0_999 + 20_50 + 50_100 + 100_150 + 150_999 + +## Tasks + +- **list**: Lists gNATSGO products from Planetary Computer that intersect with input geometry. + +- **download**: Downloads the raster asset for 'variable' given a GNATSGO product. + +## Workflow Yaml + ```yaml name: download_gnatsgo @@ -66,15 +140,4 @@ description: \ 0_999\n 20_50\n 50_100\n 100_150\n 150_999" -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- gnatsgo_products/gnatsgo_product --> tsk2{{download}} - inp1>user_input] -- input_item --> tsk1{{list}} - tsk2{{download}} -- downloaded_raster --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/hansen/hansen_forest_change_download.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/hansen/hansen_forest_change_download.md new file mode 100644 index 00000000..1a36c647 --- /dev/null +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/hansen/hansen_forest_change_download.md @@ -0,0 +1,110 @@ +# data_ingestion/hansen/hansen_forest_change_download + +Downloads and merges Global Forest Change (Hansen) rasters that intersect the user-provided geometry/time range. The workflow lists Global Forest Change (Hansen) products that intersect the user-provided geometry/time range, downloads the data for each of them, and merges the rasters. The dataset is available at 30m resolution and is updated annually. The data contains information on forest cover, loss, and gain. The default dataset version is GFC-2022-v1.10 and is passed to the workflow as the parameter tiles_folder_url. For the default version, the dataset is available from 2000 to 2022. Dataset details can be found at https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html. + +```{mermaid} + graph TD + inp1>input_item] + out1>merged_raster] + out2>downloaded_raster] + tsk1{{list}} + tsk2{{download}} + tsk3{{group}} + tsk4{{merge}} + tsk1{{list}} -- hansen_products/hansen_product --> tsk2{{download}} + tsk2{{download}} -- raster/rasters --> tsk3{{group}} + tsk3{{group}} -- raster_groups/raster_sequence --> tsk4{{merge}} + inp1>input_item] -- input_item --> tsk1{{list}} + tsk4{{merge}} -- raster --> out1>merged_raster] + tsk2{{download}} -- raster --> out2>downloaded_raster] +``` + +## Sources + +- **input_item**: User-provided geometry and time range. + +## Sinks + +- **merged_raster**: Merged Global Forest Change (Hansen) data as a raster. + +- **downloaded_raster**: Individual Global Forest Change (Hansen) rasters prior to the merge operation. + +## Parameters + +- **layer_name**: Name of the Global Forest Change (Hansen) layer. Can be any of the following names 'treecover2000', 'loss', 'gain', 'lossyear', 'datamask', 'first', 'last'. + +- **tiles_folder_url**: URL to the Global Forest Change (Hansen) dataset. It specifies the dataset version and is used to download the data. + +## Tasks + +- **list**: Lists Global Forest Change (Hansen) products that intersect the user-provided geometry/time range. + +- **download**: Downloads Global Forest Change (Hansen) data. + +- **group**: This op groups rasters in time according to 'criterion'. + +- **merge**: Merges rasters in a sequence to a single raster. + +## Workflow Yaml + +```yaml + +name: glad_forest_change_download +sources: + input_item: + - list.input_item +sinks: + merged_raster: merge.raster + downloaded_raster: download.raster +parameters: + layer_name: null + tiles_folder_url: https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/ +tasks: + list: + op: list_hansen_products + parameters: + tiles_folder_url: '@from(tiles_folder_url)' + layer_name: '@from(layer_name)' + download: + op: download_hansen + group: + op: group_rasters_by_time + parameters: + criterion: year + merge: + op: merge_rasters +edges: +- origin: list.hansen_products + destination: + - download.hansen_product +- origin: download.raster + destination: + - group.rasters +- origin: group.raster_groups + destination: + - merge.raster_sequence +description: + short_description: Downloads and merges Global Forest Change (Hansen) rasters that + intersect the user-provided geometry/time range. + long_description: The workflow lists Global Forest Change (Hansen) products that + intersect the user-provided geometry/time range, downloads the data for each of + them, and merges the rasters. The dataset is available at 30m resolution and is + updated annually. The data contains information on forest cover, loss, and gain. + The default dataset version is GFC-2022-v1.10 and is passed to the workflow as + the parameter tiles_folder_url. For the default version, the dataset is available + from 2000 to 2022. Dataset details can be found at https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html. + sources: + input_item: User-provided geometry and time range. + sinks: + merged_raster: Merged Global Forest Change (Hansen) data as a raster. + downloaded_raster: Individual Global Forest Change (Hansen) rasters prior to the + merge operation. + parameters: + tiles_folder_url: URL to the Global Forest Change (Hansen) dataset. It specifies + the dataset version and is used to download the data. + layer_name: Name of the Global Forest Change (Hansen) layer. Can be any of the + following names 'treecover2000', 'loss', 'gain', 'lossyear', 'datamask', 'first', + 'last'. + + +``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/landsat/preprocess_landsat.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/landsat/preprocess_landsat.md index ae984f20..0e167384 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/landsat/preprocess_landsat.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/landsat/preprocess_landsat.md @@ -1,5 +1,44 @@ # data_ingestion/landsat/preprocess_landsat +Downloads and preprocesses LANDSAT tiles that intersect with the input geometry and time range. The workflow will download the tile bands from the Planetary Computer and stack them into a single raster at 30m resolution. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{list}} + tsk2{{download}} + tsk3{{stack}} + tsk1{{list}} -- landsat_products/landsat_product --> tsk2{{download}} + tsk2{{download}} -- downloaded_product/landsat_product --> tsk3{{stack}} + inp1>user_input] -- input_item --> tsk1{{list}} + tsk3{{stack}} -- landsat_raster --> out1>raster] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: LANDSAT rasters at 30m resolution. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +- **qa_mask_value**: Bitmap for which pixel to be included. See documentation for each bit in https://www.usgs.gov/media/images/landsat-collection-2-pixel-quality-assessment-bit-index For example, the default value 64 (i.e. 1<<6 ) corresponds to "Clear" pixels + +## Tasks + +- **list**: Lists LANDSAT tiles that intersect with the input geometry and time range. + +- **download**: Downloads LANDSAT tile bands from product. + +- **stack**: Stacks downloaded bands into a single raster. + +## Workflow Yaml + ```yaml name: preprocess_landsat @@ -45,17 +84,4 @@ description: For example, the default value 64 (i.e. 1<<6 ) corresponds to "Clear" pixels -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{list}} - tsk2{{download}} - tsk3{{stack}} - tsk1{{list}} -- landsat_products/landsat_product --> tsk2{{download}} - tsk2{{download}} -- downloaded_product/landsat_product --> tsk3{{stack}} - inp1>user_input] -- input_item --> tsk1{{list}} - tsk3{{stack}} -- landsat_raster --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/modis/download_modis_surface_reflectance.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/modis/download_modis_surface_reflectance.md index 7c8dbd06..8024938d 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/modis/download_modis_surface_reflectance.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/modis/download_modis_surface_reflectance.md @@ -1,5 +1,40 @@ # data_ingestion/modis/download_modis_surface_reflectance +Downloads MODIS 8-day surface reflectance rasters that intersect with the input geometry and time range. The workflow will download MODIS raster images either at 250m or 500m resolution. The products are available at a 8-day interval and pixel values are selected based on low clouds, low view angle, and highest index value. Notice that only bands 1, 2 and quality control are available on 250m. For more information, see https://planetarycomputer.microsoft.com/dataset/modis-09Q1-061 https://planetarycomputer.microsoft.com/dataset/modis-09A1-061 + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- modis_products/product --> tsk2{{download}} + inp1>user_input] -- input_data --> tsk1{{list}} + tsk2{{download}} -- raster --> out1>raster] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: Products containing MODIS reflectance bands and data. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +- **resolution_m**: Product resolution, in meters. Either 250 or 500. + +## Tasks + +- **list**: Lists MODIS 8-day surface reflectance rasters intersecting with the input geometry and time range for desired resolution. + +- **download**: Downloads MODIS surface reflectance rasters. + +## Workflow Yaml + ```yaml name: download_modis_surface_reflectance @@ -41,15 +76,4 @@ description: resolution_m: Product resolution, in meters. Either 250 or 500. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- modis_products/product --> tsk2{{download}} - inp1>user_input] -- input_data --> tsk1{{list}} - tsk2{{download}} -- raster --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/modis/download_modis_vegetation_index.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/modis/download_modis_vegetation_index.md index ca221ee7..1ca2651d 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/modis/download_modis_vegetation_index.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/modis/download_modis_vegetation_index.md @@ -1,5 +1,42 @@ # data_ingestion/modis/download_modis_vegetation_index +Downloads MODIS 16-day vegetation index products that intersect with the input geometry and time range. The workflow will download products at the chosen index and resolution. The products are available at a 16-day interval and pixel values are selected based on low clouds, low view angle, and highest index value. Vegetation index values range from (-2000 to 10000). For more information, see https://planetarycomputer.microsoft.com/dataset/modis-13Q1-061 and https://lpdaac.usgs.gov/products/mod13a1v061/ . + +```{mermaid} + graph TD + inp1>user_input] + out1>index] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- modis_products/product --> tsk2{{download}} + inp1>user_input] -- input_data --> tsk1{{list}} + tsk2{{download}} -- index --> out1>index] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **index**: Products containing the chosen index at the chosen resolution. + +## Parameters + +- **index**: Vegetation index that should be downloaded. Either 'evi' or 'ndvi'. + +- **pc_key**: Optional Planetary Computer API key. + +- **resolution_m**: Product resolution, in meters. Either 250 or 500. + +## Tasks + +- **list**: Lists MODIS vegetation products for input geometry, time range and resolution. + +- **download**: Downloads selected index raster from Modis product. + +## Workflow Yaml + ```yaml name: download_modis_vegetation_index @@ -44,15 +81,4 @@ description: resolution_m: Product resolution, in meters. Either 250 or 500. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>index] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- modis_products/product --> tsk2{{download}} - inp1>user_input] -- input_data --> tsk1{{list}} - tsk2{{download}} -- index --> out1>index] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/naip/download_naip.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/naip/download_naip.md index 915ea69e..ce471e87 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/naip/download_naip.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/naip/download_naip.md @@ -1,5 +1,38 @@ # data_ingestion/naip/download_naip +Downloads NAIP tiles that intersect with the input geometry and time range. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- naip_products/input_product --> tsk2{{download}} + inp1>user_input] -- input_item --> tsk1{{list}} + tsk2{{download}} -- downloaded_product --> out1>raster] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: NAIP tiles. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **list**: Lists Naip tiles that intersect with input geometry and time range. + +- **download**: Downloads Naip raster from Naip product. + +## Workflow Yaml + ```yaml name: download_naip @@ -33,15 +66,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- naip_products/input_product --> tsk2{{download}} - inp1>user_input] -- input_item --> tsk1{{list}} - tsk2{{download}} -- downloaded_product --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/osm_road_geometries.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/osm_road_geometries.md index 0ba4612c..c266c945 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/osm_road_geometries.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/osm_road_geometries.md @@ -1,5 +1,45 @@ # data_ingestion/osm_road_geometries +Downloads road geometry for input region from Open Street Maps. The workflow downloads information from Open Street Maps for the target region and generates geometries for roads that intercept the input region bounding box. + +```{mermaid} + graph TD + inp1>user_input] + out1>roads] + tsk1{{download}} + inp1>user_input] -- input_region --> tsk1{{download}} + tsk1{{download}} -- roads --> out1>roads] +``` + +## Sources + +- **user_input**: List of external references. + +## Sinks + +- **roads**: Geometry collection with road geometries that intercept the input region bounding box. + +## Parameters + +- **network_type**: Type of roads that will be selected. One of: + - 'drive_service': get drivable streets, including service roads. + - 'walk': get all streets and paths that pedestrians can use (this network type ignores + one-way directionality). + - 'bike': get all streets and paths that cyclists can use. + - 'all': download all non-private OSM streets and paths (this is the default network type + unless you specify a different one). + - 'all_private': download all OSM streets and paths, including private-access ones. + - 'drive': get drivable public streets (but not service roads). +For more information see https://osmnx.readthedocs.io/en/stable/index.html. + +- **buffer_size**: Size of buffer, in meters, to search for nodes in OSM. + +## Tasks + +- **download**: Downloads road geometry for input region from Open Street Maps. + +## Workflow Yaml + ```yaml name: osm_road_geometries @@ -40,13 +80,4 @@ description: buffer_size: Size of buffer, in meters, to search for nodes in OSM. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>roads] - tsk1{{download}} - inp1>user_input] -- input_region --> tsk1{{download}} - tsk1{{download}} -- roads --> out1>roads] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel1/preprocess_s1.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel1/preprocess_s1.md index 643f8495..d25e498d 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel1/preprocess_s1.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel1/preprocess_s1.md @@ -1,8 +1,75 @@ # data_ingestion/sentinel1/preprocess_s1 +Downloads and preprocesses tiles of Sentinel-1 imagery that intersect with the input Sentinel-2 products in the input time range. The workflow fetches Sentinel-1 tiles that intersects with the Sentinel-2 products, downloads and preprocesses them, and produces Sentinel-1 rasters in the Sentinel-2 tiling system. + +```{mermaid} + graph TD + inp1>user_input] + inp2>s2_products] + out1>raster] + tsk1{{union}} + tsk2{{merge_geom_tr}} + tsk3{{list}} + tsk4{{filter}} + tsk5{{download}} + tsk6{{tile}} + tsk7{{group}} + tsk8{{merge}} + tsk1{{union}} -- merged/geometry --> tsk2{{merge_geom_tr}} + tsk2{{merge_geom_tr}} -- merged/input_item --> tsk3{{list}} + tsk3{{list}} -- sentinel_products/items --> tsk4{{filter}} + tsk4{{filter}} -- filtered_items/sentinel_product --> tsk5{{download}} + tsk5{{download}} -- downloaded_product/sentinel1_products --> tsk6{{tile}} + tsk6{{tile}} -- tiled_products/rasters --> tsk7{{group}} + tsk7{{group}} -- raster_groups/raster_group --> tsk8{{merge}} + inp1>user_input] -- time_range --> tsk2{{merge_geom_tr}} + inp2>s2_products] -- items --> tsk1{{union}} + inp2>s2_products] -- bounds_items --> tsk4{{filter}} + inp2>s2_products] -- sentinel2_products --> tsk6{{tile}} + tsk8{{merge}} -- merged_product --> out1>raster] +``` + +## Sources + +- **user_input**: Time range of interest. + +- **s2_products**: Sentinel-2 products whose geometries are used to select Sentinel-1 tiles. + +## Sinks + +- **raster**: Sentinel-1 rasters in the Sentinel-2 tiling system. + +## Parameters + +- **pc_key**: Planetary Computer API key. + +- **min_cover**: Minimum amount of cover required for a group to be used. + +- **dl_timeout**: Maximum time, in seconds, before a band reading operation times out. + +## Tasks + +- **union**: Create item with merged geometry from item list. + +- **merge_geom_tr**: Create item that contains the geometry from one item and the time range from another. + +- **list**: List Sentinel-1 GRD or RTC products given geometry and time range. + +- **filter**: Select items necessary to spatially cover the geometry of the bounds items. + +- **download**: Downloads the Sentinel-1 RTC product bands. + +- **tile**: Match Sentinel-1 products that intersect with Sentinel-2 tiles. + +- **group**: Groups raster files representing the same tile and moment in time that might have been partially generated and split due to the movement of Sentinel-1 through base stations. + +- **merge**: Merge items from the same absolute orbit into the appropriate MGRS (Sentinel-2 tiling system) tile. + +## Workflow Yaml + ```yaml -name: preprocess_s1 +name: preprocess_s1_rtc sources: user_input: - merge_geom_tr.time_range @@ -15,6 +82,7 @@ sinks: parameters: pc_key: null min_cover: 0.4 + dl_timeout: null tasks: union: op: merge_geometries @@ -32,10 +100,10 @@ tasks: op: download_sentinel1 parameters: api_key: '@from(pc_key)' + timeout_s: '@from(dl_timeout)' tile: - op: tile_sentinel1 - preprocess: - op: apply_sentinel1_snap_processing + op: tile_sentinel1_rtc + op_dir: tile_sentinel1 group: op: group_sentinel1_orbits merge: @@ -57,9 +125,6 @@ edges: destination: - tile.sentinel1_products - origin: tile.tiled_products - destination: - - preprocess.sentinel1_product -- origin: preprocess.preprocessed_product destination: - group.rasters - origin: group.raster_groups @@ -81,33 +146,4 @@ description: pc_key: Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - inp2>s2_products] - out1>raster] - tsk1{{union}} - tsk2{{merge_geom_tr}} - tsk3{{list}} - tsk4{{filter}} - tsk5{{download}} - tsk6{{tile}} - tsk7{{preprocess}} - tsk8{{group}} - tsk9{{merge}} - tsk1{{union}} -- merged/geometry --> tsk2{{merge_geom_tr}} - tsk2{{merge_geom_tr}} -- merged/input_item --> tsk3{{list}} - tsk3{{list}} -- sentinel_products/items --> tsk4{{filter}} - tsk4{{filter}} -- filtered_items/sentinel_product --> tsk5{{download}} - tsk5{{download}} -- downloaded_product/sentinel1_products --> tsk6{{tile}} - tsk6{{tile}} -- tiled_products/sentinel1_product --> tsk7{{preprocess}} - tsk7{{preprocess}} -- preprocessed_product/rasters --> tsk8{{group}} - tsk8{{group}} -- raster_groups/raster_group --> tsk9{{merge}} - inp1>user_input] -- time_range --> tsk2{{merge_geom_tr}} - inp2>s2_products] -- items --> tsk1{{union}} - inp2>s2_products] -- bounds_items --> tsk4{{filter}} - inp2>s2_products] -- sentinel2_products --> tsk6{{tile}} - tsk9{{merge}} -- merged_product --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel1/preprocess_s1_rtc.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel1/preprocess_s1_rtc.md deleted file mode 100644 index 5f5ece2a..00000000 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel1/preprocess_s1_rtc.md +++ /dev/null @@ -1,111 +0,0 @@ -# data_ingestion/sentinel1/preprocess_s1_rtc - -```yaml - -name: preprocess_s1_rtc -sources: - user_input: - - merge_geom_tr.time_range - s2_products: - - union.items - - filter.bounds_items - - tile.sentinel2_products -sinks: - raster: merge.merged_product -parameters: - pc_key: null - min_cover: 0.4 - dl_timeout: null -tasks: - union: - op: merge_geometries - merge_geom_tr: - op: merge_geometry_and_time_range - list: - op: list_sentinel1_products_pc - op_dir: list_sentinel1_products - parameters: - collection: rtc - filter: - op: select_necessary_coverage_items - parameters: - min_cover: '@from(min_cover)' - group_attribute: orbit_number - download: - op: download_sentinel1_rtc - parameters: - api_key: '@from(pc_key)' - timeout_s: '@from(dl_timeout)' - tile: - op: tile_sentinel1_rtc - op_dir: tile_sentinel1 - group: - op: group_sentinel1_orbits - merge: - op: merge_sentinel1_orbits -edges: -- origin: union.merged - destination: - - merge_geom_tr.geometry -- origin: merge_geom_tr.merged - destination: - - list.input_item -- origin: list.sentinel_products - destination: - - filter.items -- origin: filter.filtered_items - destination: - - download.sentinel_product -- origin: download.downloaded_product - destination: - - tile.sentinel1_products -- origin: tile.tiled_products - destination: - - group.rasters -- origin: group.raster_groups - destination: - - merge.raster_group -description: - short_description: Downloads and preprocesses tiles of Sentinel-1 imagery that intersect - with the input Sentinel-2 products in the input time range. - long_description: The workflow fetches Sentinel-1 tiles that intersects with the - Sentinel-2 products, downloads and preprocesses them, and produces Sentinel-1 - rasters in the Sentinel-2 tiling system. - sources: - user_input: Time range of interest. - s2_products: Sentinel-2 products whose geometries are used to select Sentinel-1 - tiles. - sinks: - raster: Sentinel-1 rasters in the Sentinel-2 tiling system. - parameters: - pc_key: Planetary Computer API key. - - -``` - -```{mermaid} - graph TD - inp1>user_input] - inp2>s2_products] - out1>raster] - tsk1{{union}} - tsk2{{merge_geom_tr}} - tsk3{{list}} - tsk4{{filter}} - tsk5{{download}} - tsk6{{tile}} - tsk7{{group}} - tsk8{{merge}} - tsk1{{union}} -- merged/geometry --> tsk2{{merge_geom_tr}} - tsk2{{merge_geom_tr}} -- merged/input_item --> tsk3{{list}} - tsk3{{list}} -- sentinel_products/items --> tsk4{{filter}} - tsk4{{filter}} -- filtered_items/sentinel_product --> tsk5{{download}} - tsk5{{download}} -- downloaded_product/sentinel1_products --> tsk6{{tile}} - tsk6{{tile}} -- tiled_products/rasters --> tsk7{{group}} - tsk7{{group}} -- raster_groups/raster_group --> tsk8{{merge}} - inp1>user_input] -- time_range --> tsk2{{merge_geom_tr}} - inp2>s2_products] -- items --> tsk1{{union}} - inp2>s2_products] -- bounds_items --> tsk4{{filter}} - inp2>s2_products] -- sentinel2_products --> tsk6{{tile}} - tsk8{{merge}} -- merged_product --> out1>raster] -``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/cloud_ensemble.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/cloud_ensemble.md index cbfcd622..33546b57 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/cloud_ensemble.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/cloud_ensemble.md @@ -1,5 +1,54 @@ # data_ingestion/sentinel2/cloud_ensemble +Computes the cloud probability of a Sentinel-2 L2A raster using an ensemble of five cloud segmentation models. The workflow computes cloud probabilities for each model independently, and averages them to obtain a single probability map. + +```{mermaid} + graph TD + inp1>sentinel_raster] + out1>cloud_probability] + tsk1{{cloud1}} + tsk2{{cloud2}} + tsk3{{cloud3}} + tsk4{{cloud4}} + tsk5{{cloud5}} + tsk6{{ensemble}} + tsk1{{cloud1}} -- cloud_probability/cloud1 --> tsk6{{ensemble}} + tsk2{{cloud2}} -- cloud_probability/cloud2 --> tsk6{{ensemble}} + tsk3{{cloud3}} -- cloud_probability/cloud3 --> tsk6{{ensemble}} + tsk4{{cloud4}} -- cloud_probability/cloud4 --> tsk6{{ensemble}} + tsk5{{cloud5}} -- cloud_probability/cloud5 --> tsk6{{ensemble}} + inp1>sentinel_raster] -- sentinel_raster --> tsk1{{cloud1}} + inp1>sentinel_raster] -- sentinel_raster --> tsk2{{cloud2}} + inp1>sentinel_raster] -- sentinel_raster --> tsk3{{cloud3}} + inp1>sentinel_raster] -- sentinel_raster --> tsk4{{cloud4}} + inp1>sentinel_raster] -- sentinel_raster --> tsk5{{cloud5}} + tsk6{{ensemble}} -- cloud_probability --> out1>cloud_probability] +``` + +## Sources + +- **sentinel_raster**: Sentinel-2 L2A raster. + +## Sinks + +- **cloud_probability**: Cloud probability map. + +## Tasks + +- **cloud1**: Computes cloud probabilities using a convolutional segmentation model for L2A. + +- **cloud2**: Computes cloud probabilities using a convolutional segmentation model for L2A. + +- **cloud3**: Computes cloud probabilities using a convolutional segmentation model for L2A. + +- **cloud4**: Computes cloud probabilities using a convolutional segmentation model for L2A. + +- **cloud5**: Computes cloud probabilities using a convolutional segmentation model for L2A. + +- **ensemble**: Computes ensemble cloud probabilities from all 5 models. + +## Workflow Yaml + ```yaml name: cloud_ensemble @@ -62,27 +111,4 @@ description: cloud_probability: Cloud probability map. -``` - -```{mermaid} - graph TD - inp1>sentinel_raster] - out1>cloud_probability] - tsk1{{cloud1}} - tsk2{{cloud2}} - tsk3{{cloud3}} - tsk4{{cloud4}} - tsk5{{cloud5}} - tsk6{{ensemble}} - tsk1{{cloud1}} -- cloud_probability/cloud1 --> tsk6{{ensemble}} - tsk2{{cloud2}} -- cloud_probability/cloud2 --> tsk6{{ensemble}} - tsk3{{cloud3}} -- cloud_probability/cloud3 --> tsk6{{ensemble}} - tsk4{{cloud4}} -- cloud_probability/cloud4 --> tsk6{{ensemble}} - tsk5{{cloud5}} -- cloud_probability/cloud5 --> tsk6{{ensemble}} - inp1>sentinel_raster] -- sentinel_raster --> tsk1{{cloud1}} - inp1>sentinel_raster] -- sentinel_raster --> tsk2{{cloud2}} - inp1>sentinel_raster] -- sentinel_raster --> tsk3{{cloud3}} - inp1>sentinel_raster] -- sentinel_raster --> tsk4{{cloud4}} - inp1>sentinel_raster] -- sentinel_raster --> tsk5{{cloud5}} - tsk6{{ensemble}} -- cloud_probability --> out1>cloud_probability] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/improve_cloud_mask.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/improve_cloud_mask.md index 08705065..730a0e62 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/improve_cloud_mask.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/improve_cloud_mask.md @@ -1,5 +1,55 @@ # data_ingestion/sentinel2/improve_cloud_mask +Improves cloud masks by merging the product cloud mask with cloud and shadow masks computed by machine learning segmentation models. This workflow computes cloud and shadow probabilities using segmentation models, thresholds them, and merges the models' masks with the product mask. + +```{mermaid} + graph TD + inp1>s2_raster] + inp2>product_mask] + out1>mask] + tsk1{{cloud}} + tsk2{{shadow}} + tsk3{{merge}} + tsk1{{cloud}} -- cloud_probability --> tsk3{{merge}} + tsk2{{shadow}} -- shadow_probability --> tsk3{{merge}} + inp1>s2_raster] -- sentinel_raster --> tsk1{{cloud}} + inp1>s2_raster] -- sentinel_raster --> tsk2{{shadow}} + inp2>product_mask] -- product_mask --> tsk3{{merge}} + tsk3{{merge}} -- merged_cloud_mask --> out1>mask] +``` + +## Sources + +- **s2_raster**: Sentinel-2 L2A raster. + +- **product_mask**: Cloud mask obtained from the product's quality indicators. + +## Sinks + +- **mask**: Improved cloud mask. + +## Parameters + +- **cloud_thr**: Confidence threshold to assign a pixel as cloud. + +- **shadow_thr**: Confidence threshold to assign a pixel as shadow. + +- **in_memory**: Whether to load the whole raster in memory when running predictions. Uses more memory (~4GB/worker) but speeds up inference for fast models. + +- **cloud_model**: ONNX file for the cloud model. Available models are 'cloud_model{idx}_cpu.onnx' with idx ∈ {1, 2} being FPN-based models, which are more accurate but slower, and idx ∈ {3, 4, 5} being cheaplab models, which are less accurate but faster. + +- **shadow_model**: ONNX file for the shadow model. 'shadow.onnx' is the only currently available model. + +## Tasks + +- **cloud**: Computes cloud probabilities using a convolutional segmentation model for L2A. + +- **shadow**: Computes shadow probabilities using a convolutional segmentation model for L2A. + +- **merge**: Merges cloud, shadow and product cloud masks into a single mask. + +## Workflow Yaml + ```yaml name: improve_cloud_mask @@ -64,20 +114,4 @@ description: available model. -``` - -```{mermaid} - graph TD - inp1>s2_raster] - inp2>product_mask] - out1>mask] - tsk1{{cloud}} - tsk2{{shadow}} - tsk3{{merge}} - tsk1{{cloud}} -- cloud_probability --> tsk3{{merge}} - tsk2{{shadow}} -- shadow_probability --> tsk3{{merge}} - inp1>s2_raster] -- sentinel_raster --> tsk1{{cloud}} - inp1>s2_raster] -- sentinel_raster --> tsk2{{shadow}} - inp2>product_mask] -- product_mask --> tsk3{{merge}} - tsk3{{merge}} -- merged_cloud_mask --> out1>mask] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/improve_cloud_mask_ensemble.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/improve_cloud_mask_ensemble.md index 01bebbf9..ea175a74 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/improve_cloud_mask_ensemble.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/improve_cloud_mask_ensemble.md @@ -1,5 +1,49 @@ # data_ingestion/sentinel2/improve_cloud_mask_ensemble +Improves cloud masks by merging the product cloud mask with cloud and shadow masks computed by an ensemble of machine learning segmentation models. This workflow computes cloud and shadow probabilities using and ensemble of segmentation models, thresholds them, and merges the models' masks with the product mask. + +```{mermaid} + graph TD + inp1>s2_raster] + inp2>product_mask] + out1>mask] + tsk1{{cloud}} + tsk2{{shadow}} + tsk3{{merge}} + tsk1{{cloud}} -- cloud_probability --> tsk3{{merge}} + tsk2{{shadow}} -- shadow_probability --> tsk3{{merge}} + inp1>s2_raster] -- sentinel_raster --> tsk1{{cloud}} + inp1>s2_raster] -- sentinel_raster --> tsk2{{shadow}} + inp2>product_mask] -- product_mask --> tsk3{{merge}} + tsk3{{merge}} -- merged_cloud_mask --> out1>mask] +``` + +## Sources + +- **s2_raster**: Sentinel-2 L2A raster. + +- **product_mask**: Cloud mask obtained from the product's quality indicators. + +## Sinks + +- **mask**: Improved cloud mask. + +## Parameters + +- **cloud_thr**: Confidence threshold to assign a pixel as cloud. + +- **shadow_thr**: Confidence threshold to assign a pixel as shadow. + +## Tasks + +- **cloud**: Computes the cloud probability of a Sentinel-2 L2A raster using an ensemble of five cloud segmentation models. + +- **shadow**: Computes shadow probabilities using a convolutional segmentation model for L2A. + +- **merge**: Merges cloud, shadow and product cloud masks into a single mask. + +## Workflow Yaml + ```yaml name: improve_cloud_mask_ensemble @@ -48,20 +92,4 @@ description: shadow_thr: Confidence threshold to assign a pixel as shadow. -``` - -```{mermaid} - graph TD - inp1>s2_raster] - inp2>product_mask] - out1>mask] - tsk1{{cloud}} - tsk2{{shadow}} - tsk3{{merge}} - tsk1{{cloud}} -- cloud_probability --> tsk3{{merge}} - tsk2{{shadow}} -- shadow_probability --> tsk3{{merge}} - inp1>s2_raster] -- sentinel_raster --> tsk1{{cloud}} - inp1>s2_raster] -- sentinel_raster --> tsk2{{shadow}} - inp2>product_mask] -- product_mask --> tsk3{{merge}} - tsk3{{merge}} -- merged_cloud_mask --> out1>mask] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2.md index 49010c0a..b7f845fe 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2.md @@ -1,5 +1,63 @@ # data_ingestion/sentinel2/preprocess_s2 +Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range. This workflow selects a minimum set of tiles that covers the input geometry, downloads Sentinel-2 imagery for the selected time range, and preprocesses it by generating a single multi-band raster at 10m resolution. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + out2>mask] + tsk1{{list}} + tsk2{{filter}} + tsk3{{download}} + tsk4{{group}} + tsk5{{merge}} + tsk1{{list}} -- sentinel_products/items --> tsk2{{filter}} + tsk2{{filter}} -- filtered_items/sentinel_product --> tsk3{{download}} + tsk3{{download}} -- raster/rasters --> tsk4{{group}} + tsk3{{download}} -- cloud/masks --> tsk4{{group}} + tsk4{{group}} -- raster_groups/raster_group --> tsk5{{merge}} + tsk4{{group}} -- mask_groups/mask_group --> tsk5{{merge}} + inp1>user_input] -- input_item --> tsk1{{list}} + inp1>user_input] -- bounds_items --> tsk2{{filter}} + tsk5{{merge}} -- output_raster --> out1>raster] + tsk5{{merge}} -- output_mask --> out2>mask] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: Sentinel-2 L2A rasters with all bands resampled to 10m resolution. + +- **mask**: Cloud mask at 10m resolution from the product's quality indicators. + +## Parameters + +- **min_tile_cover**: Minimum RoI coverage to consider a set of tiles sufficient. + +- **max_tiles_per_time**: Maximum number of tiles used to cover the RoI in each date. + +- **pc_key**: Optional Planetary Computer API key. + +- **dl_timeout**: Maximum time, in seconds, before a band reading operation times out. + +## Tasks + +- **list**: Lists Sentinel-2 products that intersect with input geometry and time range. + +- **filter**: Select items necessary to spatially cover the geometry of the bounds items. + +- **download**: Downloads and preprocesses Sentinel-2 products. + +- **group**: Groups raster files representing the same tile and moment in time that might have been partially generated and split due to the movement of Sentinel-2 through base stations. + +- **merge**: Combines raster files grouped by group_sentinel2_orbits into a single raster. + +## Workflow Yaml + ```yaml name: preprocess_s2 @@ -69,26 +127,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - out2>mask] - tsk1{{list}} - tsk2{{filter}} - tsk3{{download}} - tsk4{{group}} - tsk5{{merge}} - tsk1{{list}} -- sentinel_products/items --> tsk2{{filter}} - tsk2{{filter}} -- filtered_items/sentinel_product --> tsk3{{download}} - tsk3{{download}} -- raster/rasters --> tsk4{{group}} - tsk3{{download}} -- cloud/masks --> tsk4{{group}} - tsk4{{group}} -- raster_groups/raster_group --> tsk5{{merge}} - tsk4{{group}} -- mask_groups/mask_group --> tsk5{{merge}} - inp1>user_input] -- input_item --> tsk1{{list}} - inp1>user_input] -- bounds_items --> tsk2{{filter}} - tsk5{{merge}} -- output_raster --> out1>raster] - tsk5{{merge}} -- output_mask --> out2>mask] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.md index 133ee6f6..1b774adb 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.md @@ -1,5 +1,51 @@ # data_ingestion/sentinel2/preprocess_s2_ensemble_masks +Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and computes improved cloud masks using an ensemble of cloud and shadow segmentation models. This workflow selects a minimum set of tiles that covers the input geometry, downloads Sentinel-2 imagery for the selected time range, and preprocesses it by generating a single multi-band raster at 10m resolution. It then improves cloud masks by merging the product mask with cloud and shadow masks computed using an ensemble of cloud and shadow segmentation models. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + out2>mask] + tsk1{{s2}} + tsk2{{cloud}} + tsk1{{s2}} -- raster/s2_raster --> tsk2{{cloud}} + tsk1{{s2}} -- mask/product_mask --> tsk2{{cloud}} + inp1>user_input] -- user_input --> tsk1{{s2}} + tsk1{{s2}} -- raster --> out1>raster] + tsk2{{cloud}} -- mask --> out2>mask] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: Sentinel-2 L2A rasters with all bands resampled to 10m resolution. + +- **mask**: Cloud masks at 10m resolution. + +## Parameters + +- **min_tile_cover**: Minimum RoI coverage to consider a set of tiles sufficient. + +- **max_tiles_per_time**: Maximum number of tiles used to cover the RoI in each date. + +- **cloud_thr**: Confidence threshold to assign a pixel as cloud. + +- **shadow_thr**: Confidence threshold to assign a pixel as shadow. + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **s2**: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range. + +- **cloud**: Improves cloud masks by merging the product cloud mask with cloud and shadow masks computed by an ensemble of machine learning segmentation models. + +## Workflow Yaml + ```yaml name: preprocess_s2_ensemble_masks @@ -50,18 +96,4 @@ description: mask: Cloud masks at 10m resolution. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - out2>mask] - tsk1{{s2}} - tsk2{{cloud}} - tsk1{{s2}} -- raster/s2_raster --> tsk2{{cloud}} - tsk1{{s2}} -- mask/product_mask --> tsk2{{cloud}} - inp1>user_input] -- user_input --> tsk1{{s2}} - tsk1{{s2}} -- raster --> out1>raster] - tsk2{{cloud}} -- mask --> out2>mask] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2_improved_masks.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2_improved_masks.md index d69b1e58..5f9fa237 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2_improved_masks.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/sentinel2/preprocess_s2_improved_masks.md @@ -1,5 +1,59 @@ # data_ingestion/sentinel2/preprocess_s2_improved_masks +Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and computes improved cloud masks using cloud and shadow segmentation models. This workflow selects a minimum set of tiles that covers the input geometry, downloads Sentinel-2 imagery for the selected time range, and preprocesses it by generating a single multi-band raster at 10m resolution. It then improves cloud masks by merging the product mask with cloud and shadow masks computed using cloud and shadow segmentation models. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + out2>mask] + tsk1{{s2}} + tsk2{{cloud}} + tsk1{{s2}} -- raster/s2_raster --> tsk2{{cloud}} + tsk1{{s2}} -- mask/product_mask --> tsk2{{cloud}} + inp1>user_input] -- user_input --> tsk1{{s2}} + tsk1{{s2}} -- raster --> out1>raster] + tsk2{{cloud}} -- mask --> out2>mask] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: Sentinel-2 L2A rasters with all bands resampled to 10m resolution. + +- **mask**: Cloud masks at 10m resolution. + +## Parameters + +- **min_tile_cover**: Minimum RoI coverage to consider a set of tiles sufficient. + +- **max_tiles_per_time**: Maximum number of tiles used to cover the RoI in each date. + +- **cloud_thr**: Confidence threshold to assign a pixel as cloud. + +- **shadow_thr**: Confidence threshold to assign a pixel as shadow. + +- **in_memory**: Whether to load the whole raster in memory when running predictions. Uses more memory (~4GB/worker) but speeds up inference for fast models. + +- **cloud_model**: ONNX file for the cloud model. Available models are 'cloud_model{idx}_cpu.onnx' with idx ∈ {1, 2} being FPN-based models, which are more accurate but slower, and idx ∈ {3, 4, 5} being cheaplab models, which are less accurate but faster. + +- **shadow_model**: ONNX file for the shadow model. 'shadow.onnx' is the only currently available model. + +- **pc_key**: Optional Planetary Computer API key. + +- **dl_timeout**: Maximum time, in seconds, before a band reading operation times out. + +## Tasks + +- **s2**: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range. + +- **cloud**: Improves cloud masks by merging the product cloud mask with cloud and shadow masks computed by machine learning segmentation models. + +## Workflow Yaml + ```yaml name: preprocess_s2_improved_masks @@ -58,18 +112,4 @@ description: mask: Cloud masks at 10m resolution. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - out2>mask] - tsk1{{s2}} - tsk2{{cloud}} - tsk1{{s2}} -- raster/s2_raster --> tsk2{{cloud}} - tsk1{{s2}} -- mask/product_mask --> tsk2{{cloud}} - inp1>user_input] -- user_input --> tsk1{{s2}} - tsk1{{s2}} -- raster --> out1>raster] - tsk2{{cloud}} -- mask --> out2>mask] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/soil/soilgrids.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/soil/soilgrids.md index 9eccbf63..cd4f96df 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/soil/soilgrids.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/soil/soilgrids.md @@ -1,5 +1,51 @@ # data_ingestion/soil/soilgrids +Downloads digital soil mapping information from SoilGrids for the input geometry. The workflow downloads a raster containing the map and identifiers for the input geometry. SoilGrids is a system for digital soil mapping based on global compilation of soil profile data and environmental layers. + +```{mermaid} + graph TD + inp1>input_item] + out1>downloaded_raster] + tsk1{{download_soilgrids}} + inp1>input_item] -- input_item --> tsk1{{download_soilgrids}} + tsk1{{download_soilgrids}} -- downloaded_raster --> out1>downloaded_raster] +``` + +## Sources + +- **input_item**: Input geometry. + +## Sinks + +- **downloaded_raster**: Raster with the map and identifiers requested. + +## Parameters + +- **map**: Map to download. Options: + - wrb - World Reference Base classes and probabilites + - bdod - Bulk density - kg/dm^3 + - cec - Cation exchange capacity at ph 7 - cmol(c)/kg + - cfvo - Coarse fragments volumetric) - cm3/100cm3 (vol%) + - clay - Clay content - g/100g (%) + - nitrogen - Nitrogen - g/kg + - phh2o - Soil pH in H2O - pH + - sand - Sand content - g/100g (%) + - silt - Silt content - g/100g (%) + - soc - Soil organic carbon content - g/kg + - ocs - Soil organic carbon stock - kg/m^3 + - ocd - Organic carbon densities - kg/m^3 + +- **identifier**: Variable identifier to be downloaded. Depends on map. + - wrb: Acrisols, Albeluvisols, Alisols, Andosols, Arenosols, Calcisols, Cambisols, +Chernozems, Cryosols, Durisols, Ferralsols, Fluvisols, Gleysols, Gypsisols, Histosols, Kastanozems, Leptosols, Lixisols, Luvisols, MostProbable, Nitisols, Phaeozems, Planosols, Plinthosols, Podzols, Regosols, Solonchaks, Solonetz, Stagnosols, Umbrisols, Vertisols. +Other identifiers follow the nomenclature defined in the [link=https://www.isric.org/explore/soilgrids/faq-soilgrids#What_do_the_filename_codes_mean]SoilGrids documentation page: https://www.isric.org/explore/soilgrids/faq-soilgrids#What_do_the_filename_codes_mean[/]. + +## Tasks + +- **download_soilgrids**: Downloads digital soil mapping information from SoilGrids for the input geometry. + +## Workflow Yaml + ```yaml name: soilgrids @@ -47,13 +93,4 @@ description: \ documentation page: https://www.isric.org/explore/soilgrids/faq-soilgrids#What_do_the_filename_codes_mean[/]." -``` - -```{mermaid} - graph TD - inp1>input_item] - out1>downloaded_raster] - tsk1{{download_soilgrids}} - inp1>input_item] -- input_item --> tsk1{{download_soilgrids}} - tsk1{{download_soilgrids}} -- downloaded_raster --> out1>downloaded_raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/soil/usda.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/soil/usda.md index 7362b8a6..6f7c35f5 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/soil/usda.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/soil/usda.md @@ -1,5 +1,38 @@ # data_ingestion/soil/usda +Downloads USDA soil classification raster. The workflow will download a global raster with USDA soil classes at 1/30 degree resolution. + +```{mermaid} + graph TD + inp1>input_item] + out1>downloaded_raster] + tsk1{{datavibe_filter}} + tsk2{{download_usda_soils}} + tsk1{{datavibe_filter}} -- output_item/input_item --> tsk2{{download_usda_soils}} + inp1>input_item] -- input_item --> tsk1{{datavibe_filter}} + tsk2{{download_usda_soils}} -- downloaded_raster --> out1>downloaded_raster] +``` + +## Sources + +- **input_item**: Dummy input. + +## Sinks + +- **downloaded_raster**: Raster with USDA soil classes. + +## Parameters + +- **ignore**: Selection of each field of input item should be ignored (among "time_range", "geometry", or "all" for both of them). + +## Tasks + +- **datavibe_filter**: Filters out time range and/or geometry information from the input item. + +- **download_usda_soils**: Downloads a global raster with USDA soil classes at 1/30 degree resolution. + +## Workflow Yaml + ```yaml name: usda_soils @@ -34,15 +67,4 @@ description: "geometry", or "all" for both of them). -``` - -```{mermaid} - graph TD - inp1>input_item] - out1>downloaded_raster] - tsk1{{datavibe_filter}} - tsk2{{download_usda_soils}} - tsk1{{datavibe_filter}} -- output_item/input_item --> tsk2{{download_usda_soils}} - inp1>input_item] -- input_item --> tsk1{{datavibe_filter}} - tsk2{{download_usda_soils}} -- downloaded_raster --> out1>downloaded_raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye.md index 22cdbd87..4dbda7d0 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye.md @@ -1,5 +1,55 @@ # data_ingestion/spaceeye/spaceeye +Runs the SpaceEye cloud removal pipeline, yielding daily cloud-free images for the input geometry and time range. The workflow fetches both Sentinel-1 and Sentinel-2 tiles that cover the input geometry and time range, preprocesses them, computes cloud masks, and runs SpaceEye inference in a sliding window on the retrieved tiles. This workflow can be reused as a preprocess step in many applications that require cloud-free Sentinel-2 data. For more information about SpaceEye, read the paper: https://arxiv.org/abs/2106.08408. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{preprocess}} + tsk2{{spaceeye}} + tsk1{{preprocess}} -- s2_raster/s2_rasters --> tsk2{{spaceeye}} + tsk1{{preprocess}} -- s1_raster/s1_rasters --> tsk2{{spaceeye}} + tsk1{{preprocess}} -- cloud_mask/cloud_rasters --> tsk2{{spaceeye}} + inp1>user_input] -- user_input --> tsk1{{preprocess}} + inp1>user_input] -- input_data --> tsk2{{spaceeye}} + tsk2{{spaceeye}} -- raster --> out1>raster] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: Cloud-free rasters. + +## Parameters + +- **duration**: Time window, in days, considered in the inference. Controls the amount of temporal context for inpainting clouds. Larger windows require more compute and memory. + +- **time_overlap**: Overlap ratio of each temporal window. Controls the temporal step between windows as a fraction of the window size. + +- **min_tile_cover**: Minimum RoI coverage to consider a set of tiles sufficient. + +- **max_tiles_per_time**: Maximum number of tiles used to cover the RoI in each date. + +- **cloud_thr**: Confidence threshold to assign a pixel as cloud. + +- **shadow_thr**: Confidence threshold to assign a pixel as shadow. + +- **pc_key**: Optional Planetary Computer API key. + +- **s2_timeout**: Maximum time, in seconds, before a band reading operation times out. + +## Tasks + +- **preprocess**: Runs the SpaceEye preprocessing pipeline. + +- **spaceeye**: Performs SpaceEye inference to generate daily cloud-free images given Sentinel data and cloud masks. + +## Workflow Yaml + ```yaml name: spaceeye @@ -58,18 +108,4 @@ description: parameters: null -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{preprocess}} - tsk2{{spaceeye}} - tsk1{{preprocess}} -- s2_raster/s2_rasters --> tsk2{{spaceeye}} - tsk1{{preprocess}} -- s1_raster/s1_rasters --> tsk2{{spaceeye}} - tsk1{{preprocess}} -- cloud_mask/cloud_rasters --> tsk2{{spaceeye}} - inp1>user_input] -- user_input --> tsk1{{preprocess}} - inp1>user_input] -- input_data --> tsk2{{spaceeye}} - tsk2{{spaceeye}} -- raster --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_inference.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_inference.md index 53995783..20309895 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_inference.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_inference.md @@ -1,5 +1,66 @@ # data_ingestion/spaceeye/spaceeye_inference +Performs SpaceEye inference to generate daily cloud-free images given Sentinel data and cloud masks. The workflow will group input Sentinel-1, Sentinel-2, and cloud mask rasters into spatio-temporal windows and perform inference of each window. The windows will then be merged into rasters for the RoI. More information about SpaceEye available in the paper: https://arxiv.org/abs/2106.08408. + +```{mermaid} + graph TD + inp1>input_data] + inp2>s1_rasters] + inp3>s2_rasters] + inp4>cloud_rasters] + out1>raster] + tsk1{{group_s1}} + tsk2{{group_s2}} + tsk3{{group_mask}} + tsk4{{spaceeye}} + tsk5{{split}} + tsk1{{group_s1}} -- tile_sequences/s1_products --> tsk4{{spaceeye}} + tsk2{{group_s2}} -- tile_sequences/s2_products --> tsk4{{spaceeye}} + tsk3{{group_mask}} -- tile_sequences/cloud_masks --> tsk4{{spaceeye}} + tsk4{{spaceeye}} -- spaceeye_sequence/sequences --> tsk5{{split}} + inp1>input_data] -- input_data --> tsk1{{group_s1}} + inp1>input_data] -- input_data --> tsk2{{group_s2}} + inp1>input_data] -- input_data --> tsk3{{group_mask}} + inp2>s1_rasters] -- rasters --> tsk1{{group_s1}} + inp3>s2_rasters] -- rasters --> tsk2{{group_s2}} + inp4>cloud_rasters] -- rasters --> tsk3{{group_mask}} + tsk5{{split}} -- rasters --> out1>raster] +``` + +## Sources + +- **input_data**: Time range and region of interest. Will determine the spatio-temporal windows and region for the output rasters. + +- **s1_rasters**: Sentinel-1 rasters tiled to the Sentinel-2 grid. + +- **s2_rasters**: Sentinel-2 tile rasters for the input time range. + +- **cloud_rasters**: Cloud masks for each of the Sentinel-2 tiles. + +## Sinks + +- **raster**: Cloud-free rasters for the input time range and region of interest. + +## Parameters + +- **duration**: Time window, in days, considered in the inference. Controls the amount of temporal context for inpainting clouds. Larger windows require more compute and memory. + +- **time_overlap**: Overlap ratio of each temporal window. Controls the temporal step between windows as a fraction of the window size. + +## Tasks + +- **group_s1**: Groups Sentinel-1 tiles into time windows of defined duration. + +- **group_s2**: Groups Sentinel-2 tiles into time windows of defined duration. + +- **group_mask**: Groups Sentinel-2 cloud masks into time windows of defined duration. + +- **spaceeye**: Runs SpaceEye to remove clouds in input rasters. + +- **split**: Splits a list of multiple TileSequence back to a list of Rasters. + +## Workflow Yaml + ```yaml name: spaceeye_inference @@ -81,29 +142,4 @@ description: between windows as a fraction of the window size. -``` - -```{mermaid} - graph TD - inp1>input_data] - inp2>s1_rasters] - inp3>s2_rasters] - inp4>cloud_rasters] - out1>raster] - tsk1{{group_s1}} - tsk2{{group_s2}} - tsk3{{group_mask}} - tsk4{{spaceeye}} - tsk5{{split}} - tsk1{{group_s1}} -- tile_sequences/s1_products --> tsk4{{spaceeye}} - tsk2{{group_s2}} -- tile_sequences/s2_products --> tsk4{{spaceeye}} - tsk3{{group_mask}} -- tile_sequences/cloud_masks --> tsk4{{spaceeye}} - tsk4{{spaceeye}} -- spaceeye_sequence/sequences --> tsk5{{split}} - inp1>input_data] -- input_data --> tsk1{{group_s1}} - inp1>input_data] -- input_data --> tsk2{{group_s2}} - inp1>input_data] -- input_data --> tsk3{{group_mask}} - inp2>s1_rasters] -- rasters --> tsk1{{group_s1}} - inp3>s2_rasters] -- rasters --> tsk2{{group_s2}} - inp4>cloud_rasters] -- rasters --> tsk3{{group_mask}} - tsk5{{split}} -- rasters --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_interpolation.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_interpolation.md index 38140361..33772c17 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_interpolation.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_interpolation.md @@ -1,5 +1,52 @@ # data_ingestion/spaceeye/spaceeye_interpolation +Runs the SpaceEye cloud removal pipeline using an interpolation-based algorithm, yielding daily cloud-free images for the input geometry and time range. The workflow fetches Sentinel-2 tiles that cover the input geometry and time range, preprocesses them, computes cloud masks, and runs SpaceEye inference in a sliding window on the retrieved tiles. This workflow can be reused as a preprocess step in many applications that require cloud-free Sentinel-2 data. For more information about SpaceEye, read the [link=https://arxiv.org/abs/2106.08408]paper: https://arxiv.org/abs/2106.08408[/link]. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{preprocess}} + tsk2{{spaceeye}} + tsk1{{preprocess}} -- raster/s2_rasters --> tsk2{{spaceeye}} + tsk1{{preprocess}} -- mask/cloud_rasters --> tsk2{{spaceeye}} + inp1>user_input] -- user_input --> tsk1{{preprocess}} + inp1>user_input] -- input_data --> tsk2{{spaceeye}} + tsk2{{spaceeye}} -- raster --> out1>raster] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **raster**: Cloud-free rasters. + +## Parameters + +- **duration**: Time window, in days, considered in the inference. Controls the amount of temporal context for inpainting clouds. Larger windows require more compute and memory. + +- **time_overlap**: Overlap ratio of each temporal window. Controls the temporal step between windows as a fraction of the window size. + +- **min_tile_cover**: Minimum RoI coverage to consider a set of tiles sufficient. + +- **max_tiles_per_time**: Maximum number of tiles used to cover the RoI in each date. + +- **cloud_thr**: Confidence threshold to assign a pixel as cloud. + +- **shadow_thr**: Confidence threshold to assign a pixel as shadow. + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **preprocess**: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and computes improved cloud masks using cloud and shadow segmentation models. + +- **spaceeye**: Performs temporal damped interpolation to generate daily cloud-free images given Sentinel-2 data and cloud masks. + +## Workflow Yaml + ```yaml name: spaceeye_interpolation @@ -63,17 +110,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{preprocess}} - tsk2{{spaceeye}} - tsk1{{preprocess}} -- raster/s2_rasters --> tsk2{{spaceeye}} - tsk1{{preprocess}} -- mask/cloud_rasters --> tsk2{{spaceeye}} - inp1>user_input] -- user_input --> tsk1{{preprocess}} - inp1>user_input] -- input_data --> tsk2{{spaceeye}} - tsk2{{spaceeye}} -- raster --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_interpolation_inference.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_interpolation_inference.md index 07679d06..1bd24935 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_interpolation_inference.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_interpolation_inference.md @@ -1,5 +1,57 @@ # data_ingestion/spaceeye/spaceeye_interpolation_inference +Performs temporal damped interpolation to generate daily cloud-free images given Sentinel-2 data and cloud masks. The workflow will group input Sentinel-2 and cloud mask rasters into spatio-temporal windows and perform inference of each window. The windows will then be merged into rasters for the RoI. More information about SpaceEye available in the paper: https://arxiv.org/abs/2106.08408. + +```{mermaid} + graph TD + inp1>input_data] + inp2>s2_rasters] + inp3>cloud_rasters] + out1>raster] + tsk1{{group_s2}} + tsk2{{group_mask}} + tsk3{{spaceeye}} + tsk4{{split}} + tsk1{{group_s2}} -- tile_sequences/s2_products --> tsk3{{spaceeye}} + tsk2{{group_mask}} -- tile_sequences/cloud_masks --> tsk3{{spaceeye}} + tsk3{{spaceeye}} -- spaceeye_sequence/sequences --> tsk4{{split}} + inp1>input_data] -- input_data --> tsk1{{group_s2}} + inp1>input_data] -- input_data --> tsk2{{group_mask}} + inp2>s2_rasters] -- rasters --> tsk1{{group_s2}} + inp3>cloud_rasters] -- rasters --> tsk2{{group_mask}} + tsk4{{split}} -- rasters --> out1>raster] +``` + +## Sources + +- **input_data**: Time range and region of interest. Will determine the spatio-temporal windows and region for the output rasters. + +- **s2_rasters**: Sentinel-2 tile rasters for the input time range. + +- **cloud_rasters**: Cloud masks for each of the Sentinel-2 tiles. + +## Sinks + +- **raster**: Cloud-free rasters for the input time range and region of interest. + +## Parameters + +- **duration**: Time window, in days, considered in the inference. Controls the amount of temporal context for inpainting clouds. Larger windows require more compute and memory. + +- **time_overlap**: Overlap ratio of each temporal window. Controls the temporal step between windows as a fraction of the window size. + +## Tasks + +- **group_s2**: Groups Sentinel-2 tiles into time windows of defined duration. + +- **group_mask**: Groups Sentinel-2 cloud masks into time windows of defined duration. + +- **spaceeye**: Runs the interpolation version of SpaceEye to remove clouds in input rasters. + +- **split**: Splits a list of multiple TileSequence back to a list of Rasters. + +## Workflow Yaml + ```yaml name: spaceeye_interpolation_inference @@ -69,24 +121,4 @@ description: between windows as a fraction of the window size. -``` - -```{mermaid} - graph TD - inp1>input_data] - inp2>s2_rasters] - inp3>cloud_rasters] - out1>raster] - tsk1{{group_s2}} - tsk2{{group_mask}} - tsk3{{spaceeye}} - tsk4{{split}} - tsk1{{group_s2}} -- tile_sequences/s2_products --> tsk3{{spaceeye}} - tsk2{{group_mask}} -- tile_sequences/cloud_masks --> tsk3{{spaceeye}} - tsk3{{spaceeye}} -- spaceeye_sequence/sequences --> tsk4{{split}} - inp1>input_data] -- input_data --> tsk1{{group_s2}} - inp1>input_data] -- input_data --> tsk2{{group_mask}} - inp2>s2_rasters] -- rasters --> tsk1{{group_s2}} - inp3>cloud_rasters] -- rasters --> tsk2{{group_mask}} - tsk4{{split}} -- rasters --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_preprocess.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_preprocess.md index 88888c2c..c0a9da93 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_preprocess.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_preprocess.md @@ -1,8 +1,62 @@ # data_ingestion/spaceeye/spaceeye_preprocess +Runs the SpaceEye preprocessing pipeline. The workflow fetches both Sentinel-1 and Sentinel-2 tiles that cover the input geometry and time range and preprocesses them. It also computes improved cloud masks using cloud and shadow segmentation models. + +```{mermaid} + graph TD + inp1>user_input] + out1>s2_raster] + out2>s1_raster] + out3>cloud_mask] + tsk1{{s2}} + tsk2{{s1}} + tsk1{{s2}} -- raster/s2_products --> tsk2{{s1}} + inp1>user_input] -- user_input --> tsk1{{s2}} + inp1>user_input] -- user_input --> tsk2{{s1}} + tsk1{{s2}} -- raster --> out1>s2_raster] + tsk2{{s1}} -- raster --> out2>s1_raster] + tsk1{{s2}} -- mask --> out3>cloud_mask] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **s2_raster**: Sentinel-2 rasters. + +- **s1_raster**: Sentinel-1 rasters. + +- **cloud_mask**: Cloud and cloud shadow mask. + +## Parameters + +- **min_tile_cover**: Minimum RoI coverage to consider a set of tiles sufficient. + +- **max_tiles_per_time**: Maximum number of tiles used to cover the RoI in each date. + +- **cloud_thr**: Confidence threshold to assign a pixel as cloud. + +- **shadow_thr**: Confidence threshold to assign a pixel as shadow. + +- **pc_key**: Optional Planetary Computer API key. + +- **s1_timeout**: Maximum time, in seconds, before a band reading operation times out. + +- **s2_timeout**: Maximum time, in seconds, before a band reading operation times out. + +## Tasks + +- **s2**: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and computes improved cloud masks using cloud and shadow segmentation models. + +- **s1**: Downloads and preprocesses tiles of Sentinel-1 imagery that intersect with the input Sentinel-2 products in the input time range. + +## Workflow Yaml + ```yaml -name: spaceeye_preprocess +name: spaceeye_preprocess_rtc sources: user_input: - s2.user_input @@ -12,11 +66,12 @@ sinks: s1_raster: s1.raster cloud_mask: s2.mask parameters: - min_tile_cover: null + min_tile_cover: 0.4 max_tiles_per_time: null cloud_thr: null shadow_thr: null - pc_key: null + pc_key: '@SECRET(eywa-secrets, pc-sub-key)' + s1_timeout: null s2_timeout: null tasks: s2: @@ -27,9 +82,13 @@ tasks: cloud_thr: '@from(cloud_thr)' shadow_thr: '@from(shadow_thr)' pc_key: '@from(pc_key)' + in_memory: true dl_timeout: '@from(s2_timeout)' s1: workflow: data_ingestion/sentinel1/preprocess_s1 + parameters: + pc_key: '@from(pc_key)' + dl_timeout: '@from(s1_timeout)' edges: - origin: s2.raster destination: @@ -47,20 +106,4 @@ description: cloud_mask: Cloud and cloud shadow mask. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>s2_raster] - out2>s1_raster] - out3>cloud_mask] - tsk1{{s2}} - tsk2{{s1}} - tsk1{{s2}} -- raster/s2_products --> tsk2{{s1}} - inp1>user_input] -- user_input --> tsk1{{s2}} - inp1>user_input] -- user_input --> tsk2{{s1}} - tsk1{{s2}} -- raster --> out1>s2_raster] - tsk2{{s1}} -- raster --> out2>s1_raster] - tsk1{{s2}} -- mask --> out3>cloud_mask] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.md index 11bd46a0..7fcfa842 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.md @@ -1,5 +1,47 @@ # data_ingestion/spaceeye/spaceeye_preprocess_ensemble +Runs the SpaceEye preprocessing pipeline with an ensemble of cloud segmentation models. The workflow fetches both Sentinel-1 and Sentinel-2 tiles that cover the input geometry and time range and preprocesses them, it also computes improved cloud masks using cloud and shadow segmentation models. Cloud probabilities are computed with an ensemble of five models. + +```{mermaid} + graph TD + inp1>user_input] + out1>s2_raster] + out2>s1_raster] + out3>cloud_mask] + tsk1{{s2}} + tsk2{{s1}} + tsk1{{s2}} -- raster/s2_products --> tsk2{{s1}} + inp1>user_input] -- user_input --> tsk1{{s2}} + inp1>user_input] -- user_input --> tsk2{{s1}} + tsk1{{s2}} -- raster --> out1>s2_raster] + tsk2{{s1}} -- raster --> out2>s1_raster] + tsk1{{s2}} -- mask --> out3>cloud_mask] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **s2_raster**: Sentinel-2 rasters. + +- **s1_raster**: Sentinel-1 rasters. + +- **cloud_mask**: Cloud and cloud shadow mask. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **s2**: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and computes improved cloud masks using an ensemble of cloud and shadow segmentation models. + +- **s1**: Downloads and preprocesses tiles of Sentinel-1 imagery that intersect with the input Sentinel-2 products in the input time range. + +## Workflow Yaml + ```yaml name: spaceeye_preprocess_ensemble @@ -12,7 +54,7 @@ sinks: s1_raster: s1.raster cloud_mask: s2.mask parameters: - pc_key: null + pc_key: '@SECRET(eywa-secrets, pc-sub-key)' tasks: s2: workflow: data_ingestion/sentinel2/preprocess_s2_ensemble_masks @@ -43,20 +85,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>s2_raster] - out2>s1_raster] - out3>cloud_mask] - tsk1{{s2}} - tsk2{{s1}} - tsk1{{s2}} -- raster/s2_products --> tsk2{{s1}} - inp1>user_input] -- user_input --> tsk1{{s2}} - inp1>user_input] -- user_input --> tsk2{{s1}} - tsk1{{s2}} -- raster --> out1>s2_raster] - tsk2{{s1}} -- raster --> out2>s1_raster] - tsk1{{s2}} -- mask --> out3>cloud_mask] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_geometry.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_geometry.md index 1f54f5f3..7a80108a 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_geometry.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_geometry.md @@ -1,5 +1,34 @@ # data_ingestion/user_data/ingest_geometry +Adds user geometries into the cluster storage, allowing for them to be used on workflows. The workflow downloads geometries provided in the references and generates GeometryCollection objects with local assets that can be used in other operations. + +```{mermaid} + graph TD + inp1>user_input] + out1>geometry] + tsk1{{unpack}} + tsk2{{download}} + tsk1{{unpack}} -- ref_list/input_ref --> tsk2{{download}} + inp1>user_input] -- input_refs --> tsk1{{unpack}} + tsk2{{download}} -- downloaded --> out1>geometry] +``` + +## Sources + +- **user_input**: List of external references. + +## Sinks + +- **geometry**: GeometryCollections with downloaded assets. + +## Tasks + +- **unpack**: Unpacks the urls from the list of external references. + +- **download**: Downloads geometries provided in the reference and generates a GeometryCollection. + +## Workflow Yaml + ```yaml name: ingest_geometry @@ -30,15 +59,4 @@ description: geometry: GeometryCollections with downloaded assets. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>geometry] - tsk1{{unpack}} - tsk2{{download}} - tsk1{{unpack}} -- ref_list/input_ref --> tsk2{{download}} - inp1>user_input] -- input_refs --> tsk1{{unpack}} - tsk2{{download}} -- downloaded --> out1>geometry] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_raster.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_raster.md index 673bde0c..8d0274bb 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_raster.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_raster.md @@ -1,5 +1,34 @@ # data_ingestion/user_data/ingest_raster +Adds user rasters into the cluster storage, allowing for them to be used on workflows. The workflow downloads rasters provided in the references and generates Raster objects with local assets that can be used in other operations. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{unpack}} + tsk2{{download}} + tsk1{{unpack}} -- ref_list/input_ref --> tsk2{{download}} + inp1>user_input] -- input_refs --> tsk1{{unpack}} + tsk2{{download}} -- downloaded --> out1>raster] +``` + +## Sources + +- **user_input**: List of external references. + +## Sinks + +- **raster**: Rasters with downloaded assets. + +## Tasks + +- **unpack**: Unpacks the urls from the list of external references. + +- **download**: Downloads the raster from the input reference's url. + +## Workflow Yaml + ```yaml name: ingest_raster @@ -29,15 +58,4 @@ description: raster: Rasters with downloaded assets. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{unpack}} - tsk2{{download}} - tsk1{{unpack}} -- ref_list/input_ref --> tsk2{{download}} - inp1>user_input] -- input_refs --> tsk1{{unpack}} - tsk2{{download}} -- downloaded --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_smb.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_smb.md index 169fc3d6..75a4a2c7 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_smb.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/user_data/ingest_smb.md @@ -1,5 +1,48 @@ # data_ingestion/user_data/ingest_smb +Adds user rasters into the cluster storage from an SMB share, allowing for them to be used on workflows. The workflow downloads rasters from the provided SMB share and generates Raster objects with local assets that can be used in other operations. + +```{mermaid} + graph TD + inp1>user_input] + out1>rasters] + tsk1{{download}} + inp1>user_input] -- user_input --> tsk1{{download}} + tsk1{{download}} -- rasters --> out1>rasters] +``` + +## Sources + +- **user_input**: DataVibe containing the time range and geometry metadata of the set rasters to be downloaded. + +## Sinks + +- **rasters**: Rasters with downloaded assets. + +## Parameters + +- **server_name**: The name of the SMB server + +- **server_ip**: The IP address of the SMB server + +- **server_port**: The port to connect to on the SMB server + +- **username**: Username used to connect to server + +- **password**: Password to access server + +- **share_name**: Name of file share + +- **directory_path**: Path to directory containing rasters + +- **bands**: Ordered list of bands within the rasters + +## Tasks + +- **download**: Downloads rasters from an SMB share. + +## Workflow Yaml + ```yaml name: ingest_smb @@ -46,13 +89,4 @@ description: rasters: Rasters with downloaded assets. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>rasters] - tsk1{{download}} - inp1>user_input] -- user_input --> tsk1{{download}} - tsk1{{download}} -- rasters --> out1>rasters] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_chirps.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_chirps.md index 7245de19..92ed4cc3 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_chirps.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_chirps.md @@ -1,5 +1,40 @@ # data_ingestion/weather/download_chirps +Downloads accumulated precipitation data from the CHIRPS dataset. + +```{mermaid} + graph TD + inp1>user_input] + out1>product] + tsk1{{list_chirps}} + tsk2{{download_chirps}} + tsk1{{list_chirps}} -- chirps_products/chirps_product --> tsk2{{download_chirps}} + inp1>user_input] -- input_item --> tsk1{{list_chirps}} + tsk2{{download_chirps}} -- downloaded_product --> out1>product] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **product**: TIFF file containing accumulated precipitation. + +## Parameters + +- **freq**: daily or monthly frequencies + +- **res**: p05 for 0.05 degree resolution or p25 for 0.25 degree resolution, p25 is only available daily + +## Tasks + +- **list_chirps**: Lists products from the CHIRPS dataset with desired frequency and resolution for input geometry and time range. + +- **download_chirps**: Downloads accumulated precipitation data from listed products. + +## Workflow Yaml + ```yaml name: chirps @@ -36,15 +71,4 @@ description: only available daily -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>product] - tsk1{{list_chirps}} - tsk2{{download_chirps}} - tsk1{{list_chirps}} -- chirps_products/chirps_product --> tsk2{{download_chirps}} - inp1>user_input] -- input_item --> tsk1{{list_chirps}} - tsk2{{download_chirps}} -- downloaded_product --> out1>product] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_era5.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_era5.md index d56761dc..4b13dcc3 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_era5.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_era5.md @@ -1,5 +1,53 @@ # data_ingestion/weather/download_era5 +Hourly estimated weather variables. Hourly weather variables obtained from combining observations and numerical model runs to estimate the state of the atmosphere. + +```{mermaid} + graph TD + inp1>user_input] + out1>downloaded_product] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- era5_products/era5_product --> tsk2{{download}} + inp1>user_input] -- input_item --> tsk1{{list}} + tsk2{{download}} -- downloaded_product --> out1>downloaded_product] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **downloaded_product**: 30km resolution weather variables. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +- **variable**: Options are: + 2t - 2 meter temperature (default) + 100u - 100 meter U wind component + 100v - 100 meter V wind component + 10u - 10 meter U wind component + 10v - 10 meter V wind component + 2d - 2 meter dewpoint temperature + mn2t - Minimum temperature at 2 meters since previous post-processing + msl - Mean sea level pressure + mx2t - Maximum temperature at 2 meters since previous post-processing + sp - Surface pressure + ssrd - Surface solar radiation downwards + sst - Sea surface temperature + tp - Total precipitation + +## Tasks + +- **list**: Lists ERA5 products for input geometry and time range. + +- **download**: Downloads requested property from ERA5 products. + +## Workflow Yaml + ```yaml name: download_era5 @@ -43,15 +91,4 @@ description: \ radiation downwards\n sst - Sea surface temperature\n tp - Total precipitation" -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>downloaded_product] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- era5_products/era5_product --> tsk2{{download}} - inp1>user_input] -- input_item --> tsk1{{list}} - tsk2{{download}} -- downloaded_product --> out1>downloaded_product] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_era5_monthly.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_era5_monthly.md index 56122e61..2c5985dc 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_era5_monthly.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_era5_monthly.md @@ -1,5 +1,51 @@ # data_ingestion/weather/download_era5_monthly +Monthly estimated weather variables. Monthly weather variables obtained from combining observations and numerical model runs to estimate the state of the atmosphere. + +```{mermaid} + graph TD + inp1>user_input] + out1>downloaded_product] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- era5_products/era5_product --> tsk2{{download}} + inp1>user_input] -- input_item --> tsk1{{list}} + tsk2{{download}} -- downloaded_product --> out1>downloaded_product] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **downloaded_product**: 30km resolution weather variables. + +## Parameters + +- **cds_api_key**: api key for Copernicus CDS (https://cds.climate.copernicus.eu/user/register) + +- **variable**: Options are: + 2t - 2 meter temperature (default) + 100u - 100 meter U wind component + 100v - 100 meter V wind component + 10u - 10 meter U wind component + 10v - 10 meter V wind component + 2d - 2 meter dewpoint temperature + msl - Mean sea level pressure + sp - Surface pressure + ssrd - Surface solar radiation downwards + sst - Sea surface temperature + tp - Total precipitation + +## Tasks + +- **list**: Lists monthly ERA5 products for the input time range and geometry. + +- **download**: Downloads requested property from ERA5 products. + +## Workflow Yaml + ```yaml name: download_era5_monthly @@ -43,15 +89,4 @@ description: \ - Total precipitation" -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>downloaded_product] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- era5_products/era5_product --> tsk2{{download}} - inp1>user_input] -- input_item --> tsk1{{list}} - tsk2{{download}} -- downloaded_product --> out1>downloaded_product] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_gridmet.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_gridmet.md index e99907b3..8c5b81a4 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_gridmet.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_gridmet.md @@ -1,5 +1,54 @@ # data_ingestion/weather/download_gridmet +Daily surface meteorological properties from GridMET. The workflow downloads weather and hydrological data for the input time range. Data is available for the contiguous US and southern British Columbia surfaces from 1979-present, with a daily temporal resolution and a ~4-km (1/24th degree) spatial resolution. + +```{mermaid} + graph TD + inp1>user_input] + out1>downloaded_product] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- products/input_product --> tsk2{{download}} + inp1>user_input] -- input_item --> tsk1{{list}} + tsk2{{download}} -- downloaded_product --> out1>downloaded_product] +``` + +## Sources + +- **user_input**: Time range of interest. + +## Sinks + +- **downloaded_product**: Downloaded variable for each year in the input time range. + +## Parameters + +- **variable**: Options are: + bi - Burning Index + erc - Energy Release Component + etr - Daily reference evapotranspiration (alfafa, units = mm) + fm100 - Fuel Moisture (100-hr, units = %) + fm1000 - Fuel Moisture (1000-hr, units = %) + pet - Potential evapotranspiration (reference grass evapotranspiration, units = mm) + pr - Precipitation amount (daily total, units = mm) + rmax - Maximum relative humidity (units = %) + rmin - Minimum relative humidity (units = %) + sph - Specific humididy (units = kg/kg) + srad - Downward surface shortwave radiation (units = W/m^2) + th - Wind direction (degrees clockwise from North) + tmmn - Minimum temperature (units = K) + tmmx - Maximum temperature (units = K) + vpd - Vapor Pressure Deficit (units = kPa) + vs - Wind speed at 10m (units = m/s) + +## Tasks + +- **list**: Lists GridMET products of `variable` from years intersecting with input time range. + +- **download**: Downloads Climatology Lab weather products (TerraClimate and GridMET) defined by the input product. + +## Workflow Yaml + ```yaml name: download_gridmet @@ -46,15 +95,4 @@ description: \ vs - Wind speed at 10m (units = m/s)" -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>downloaded_product] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- products/input_product --> tsk2{{download}} - inp1>user_input] -- input_item --> tsk1{{list}} - tsk2{{download}} -- downloaded_product --> out1>downloaded_product] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_herbie.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_herbie.md index 4daaad77..a2bb5c8c 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_herbie.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_herbie.md @@ -1,5 +1,48 @@ # data_ingestion/weather/download_herbie +Downloads forecast data for provided location & time range using herbie python package. Herbie is a python package that downloads recent and archived numerical weather prediction (NWP) model outputs from different cloud archive sources. Its most popular capability is to download HRRR model data. NWP data in GRIB2 format can be read with xarray+cfgrib. Model data Herbie can retrieve includes the High Resolution Rapid Refresh (HRRR), Rapid Refresh (RAP), Global Forecast System (GFS), National Blend of Models (NBM), Rapid Refresh Forecast System - Prototype (RRFS), and ECMWF open data forecast products (ECMWF). + +```{mermaid} + graph TD + inp1>user_input] + out1>forecast] + tsk1{{list_herbie}} + tsk2{{download_herbie}} + tsk1{{list_herbie}} -- product/herbie_product --> tsk2{{download_herbie}} + inp1>user_input] -- input_item --> tsk1{{list_herbie}} + tsk2{{download_herbie}} -- forecast --> out1>forecast] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **forecast**: Grib file with the requested forecast. + +## Parameters + +- **model**: Model name as defined in the models template folder. CASE INSENSITIVE Below are examples of model types 'hrrr' HRRR contiguous United States model 'hrrrak' HRRR Alaska model (alias 'alaska') 'rap' RAP model 'gfs' Global Forecast System (atmosphere) 'gfs_wave' Global Forecast System (wave) 'rrfs' Rapid Refresh Forecast System prototype for more information see https://herbie.readthedocs.io/en/latest/user_guide/model_info.html + +- **product**: Output variable product file type (sfc (surface fields), prs (pressure fields), nat (native fields), subh (subhourly fields)). Not specifying this will use the first product in model template file. + +- **frequency**: frequency in hours of the forecast + +- **forecast_lead_times**: Forecast lead time in the format [start_time, end_time, increment] (in hours). This parameter can be None, and in this case see parameter 'forecast_start_date' for more details. You cannot specify 'forecast_lead_times' and 'forecast_start_date' at the same time. + +- **forecast_start_date**: latest datetime (in the format "%Y-%m-%d %H:%M") for which analysis (zero lead time) are retrieved. After this datetime, forecasts with progressively increasing lead times are retrieved. If this parameter is set to None and 'forecast_lead_times' is also set to None, then the workflow returns analysis (zero lead time) up to the latest analysis available, and from that point it returns forecasts with progressively increasing lead times. + +- **search_text**: It's a regular expression used to search on GRIB2 Index files and allow you to download just the layer of the file required instead of complete file. For more information on search_text refer to below url. https://blaylockbk.github.io/Herbie/_build/html/user_guide/searchString.html + +## Tasks + +- **list_herbie**: Lists herbie products. + +- **download_herbie**: Download herbie grib files. + +## Workflow Yaml + ```yaml name: download_herbie @@ -12,10 +55,8 @@ parameters: model: hrrr product: null frequency: 1 - forecast_lead_times: - - 0 - - 1 - - 1 + forecast_lead_times: null + forecast_start_date: null search_text: :TMP:2 m tasks: list_herbie: @@ -25,6 +66,7 @@ tasks: product: '@from(product)' frequency: '@from(frequency)' forecast_lead_times: '@from(forecast_lead_times)' + forecast_start_date: '@from(forecast_start_date)' search_text: '@from(search_text)' download_herbie: op: download_herbie @@ -57,21 +99,18 @@ description: will use the first product in model template file. frequency: frequency in hours of the forecast forecast_lead_times: Forecast lead time in the format [start_time, end_time, increment] - (in hours) + (in hours). This parameter can be None, and in this case see parameter 'forecast_start_date' + for more details. You cannot specify 'forecast_lead_times' and 'forecast_start_date' + at the same time. + forecast_start_date: latest datetime (in the format "%Y-%m-%d %H:%M") for which + analysis (zero lead time) are retrieved. After this datetime, forecasts with + progressively increasing lead times are retrieved. If this parameter is set + to None and 'forecast_lead_times' is also set to None, then the workflow returns + analysis (zero lead time) up to the latest analysis available, and from that + point it returns forecasts with progressively increasing lead times. search_text: It's a regular expression used to search on GRIB2 Index files and allow you to download just the layer of the file required instead of complete file. For more information on search_text refer to below url. https://blaylockbk.github.io/Herbie/_build/html/user_guide/searchString.html -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>forecast] - tsk1{{list_herbie}} - tsk2{{download_herbie}} - tsk1{{list_herbie}} -- product/herbie_product --> tsk2{{download_herbie}} - inp1>user_input] -- input_item --> tsk1{{list_herbie}} - tsk2{{download_herbie}} -- forecast --> out1>forecast] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_terraclimate.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_terraclimate.md index 23bdd08e..a8c31a4e 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_terraclimate.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/download_terraclimate.md @@ -1,5 +1,52 @@ # data_ingestion/weather/download_terraclimate +Monthly climate and hydroclimate properties from TerraClimate. The workflow downloads weather and hydrological data for the input time range. Data is available for global terrestrial surfaces from 1958-present, with a monthly temporal resolution and a ~4-km (1/24th degree) spatial resolution. + +```{mermaid} + graph TD + inp1>user_input] + out1>downloaded_product] + tsk1{{list}} + tsk2{{download}} + tsk1{{list}} -- products/input_product --> tsk2{{download}} + inp1>user_input] -- input_item --> tsk1{{list}} + tsk2{{download}} -- downloaded_product --> out1>downloaded_product] +``` + +## Sources + +- **user_input**: Time range of interest. + +## Sinks + +- **downloaded_product**: Downloaded variable for each year in the input time range. + +## Parameters + +- **variable**: Options are: + aet - Actual Evapotranspiration (monthly total, units = mm) + def - Climate Water Deficit (monthly total, units = mm) + pet - Potential evapotranspiration (monthly total, units = mm) + ppt - Precipitation (monthly total, units = mm) + q - Runoff (monthly total, units = mm) + soil - Soil Moisture (total column at end of month, units = mm) + srad - Downward surface shortwave radiation (units = W/m2) + swe - Snow water equivalent (at end of month, units = mm) + tmax - Max Temperature (average for month, units = C) + tmin - Min Temperature (average for month, units = C) + vap - Vapor pressure (average for month, units = kPa) + ws - Wind speed (average for month, units = m/s) + vpd - Vapor Pressure Deficit (average for month, units = kPa) + PDSI - Palmer Drought Severity Index (at end of month, units = unitless) + +## Tasks + +- **list**: Lists TerraClimate products of `variable` from years intersecting with input time range. + +- **download**: Downloads Climatology Lab weather products (TerraClimate and GridMET) defined by the input product. + +## Workflow Yaml + ```yaml name: download_terraclimate @@ -46,15 +93,4 @@ description: \ = unitless)" -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>downloaded_product] - tsk1{{list}} - tsk2{{download}} - tsk1{{list}} -- products/input_product --> tsk2{{download}} - inp1>user_input] -- input_item --> tsk1{{list}} - tsk2{{download}} -- downloaded_product --> out1>downloaded_product] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/get_ambient_weather.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/get_ambient_weather.md index d32b0f61..71ca7308 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/get_ambient_weather.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/get_ambient_weather.md @@ -1,5 +1,40 @@ # data_ingestion/weather/get_ambient_weather +Downloads weather data from an Ambient Weather station. The workflow connects to the Ambient Weather REST API and requests data for the input time range. The input geometry will be used to find a device inside the region. If not devices are found in the geometry, the workflow will fail. Connection to the API requires an API key and an App key. + +```{mermaid} + graph TD + inp1>user_input] + out1>weather] + tsk1{{get_weather}} + inp1>user_input] -- user_input --> tsk1{{get_weather}} + tsk1{{get_weather}} -- weather --> out1>weather] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **weather**: Weather data from the station. + +## Parameters + +- **api_key**: Ambient Weather API key. + +- **app_key**: Ambient Weather App key. + +- **limit**: Maximum number of data points. If -1, do not limit. + +- **feed_interval**: Interval between samples. Defined by the weather station. + +## Tasks + +- **get_weather**: Connects to the Ambient Weather REST API and requests weather data for the input time range from stations within input geometry. + +## Workflow Yaml + ```yaml name: get_ambient_weather @@ -40,13 +75,4 @@ description: feed_interval: Interval between samples. Defined by the weather station. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>weather] - tsk1{{get_weather}} - inp1>user_input] -- user_input --> tsk1{{get_weather}} - tsk1{{get_weather}} -- weather --> out1>weather] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/get_forecast.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/get_forecast.md index deb96ed3..5b560ecc 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/get_forecast.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/get_forecast.md @@ -1,5 +1,43 @@ # data_ingestion/weather/get_forecast +Downloads weather forecast data from NOAA Global Forecast System (GFS) for the input time range. The workflow downloads global forecast data from the Planetary Computer with 13km resolution between grid points. The workflow requires a SAS token to access the blob storage, which can be found at https://planetarycomputer.microsoft.com/dataset/storage/noaa-gfs. + +```{mermaid} + graph TD + inp1>user_input] + out1>forecast] + tsk1{{preprocessing}} + tsk2{{gfs_download}} + tsk3{{read_forecast}} + tsk1{{preprocessing}} -- time --> tsk2{{gfs_download}} + tsk1{{preprocessing}} -- location --> tsk3{{read_forecast}} + tsk2{{gfs_download}} -- global_forecast --> tsk3{{read_forecast}} + inp1>user_input] -- user_input --> tsk1{{preprocessing}} + tsk3{{read_forecast}} -- local_forecast --> out1>forecast] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **forecast**: Weather forecast data. + +## Parameters + +- **noaa_gfs_token**: SAS token to access blob storage. + +## Tasks + +- **preprocessing**: Gets the most relevant model date and forecast hour of product for the given input day, time and location. + +- **gfs_download**: Downloads the global forecast for the given input time. + +- **read_forecast**: Extracts the local data from a global forecast. + +## Workflow Yaml + ```yaml name: get_forecast @@ -48,18 +86,4 @@ description: noaa_gfs_token: SAS token to access blob storage. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>forecast] - tsk1{{preprocessing}} - tsk2{{gfs_download}} - tsk3{{read_forecast}} - tsk1{{preprocessing}} -- time --> tsk2{{gfs_download}} - tsk1{{preprocessing}} -- location --> tsk3{{read_forecast}} - tsk2{{gfs_download}} -- global_forecast --> tsk3{{read_forecast}} - inp1>user_input] -- user_input --> tsk1{{preprocessing}} - tsk3{{read_forecast}} -- local_forecast --> out1>forecast] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/herbie_forecast.md b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/herbie_forecast.md index faffe2fa..8f1d67d7 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/herbie_forecast.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_ingestion/weather/herbie_forecast.md @@ -1,5 +1,52 @@ # data_ingestion/weather/herbie_forecast +Downloads forecast observations for provided location & time range using herbie python package. Herbie is a python package that downloads recent and archived numerical weather prediction (NWP) model outputs from different cloud archive sources. Its most popular capability is to download HRRR model data. NWP data in GRIB2 format can be read with xarray+cfgrib. Model data Herbie can retrieve includes the High Resolution Rapid Refresh (HRRR), Rapid Refresh (RAP), Global Forecast System (GFS), National Blend of Models (NBM), Rapid Refresh Forecast System - Prototype (RRFS), and ECMWF open data forecast products (ECMWF). + +```{mermaid} + graph TD + inp1>user_input] + out1>weather_forecast] + out2>forecast_range] + tsk1{{forecast_range}} + tsk2{{forecast_download}} + tsk1{{forecast_range}} -- download_period/user_input --> tsk2{{forecast_download}} + inp1>user_input] -- user_input --> tsk1{{forecast_range}} + tsk2{{forecast_download}} -- weather_forecast --> out1>weather_forecast] + tsk1{{forecast_range}} -- download_period --> out2>forecast_range] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **weather_forecast**: Downloaded Forecast observations, cleaned, interpolated and mapped to each hour. + +- **forecast_range**: Time range of forecast observations. + +## Parameters + +- **forecast_lead_times**: Help to define forecast lead time in hours. Accept the input in range format. Example - (1, 25, 1) For more information refer below url. https://blaylockbk.github.io/Herbie/_build/html/reference_guide/_autosummary/herbie.archive.Herbie.html + +- **search_text**: It's a regular expression used to search on GRIB2 Index files and allow you to download just the layer of the file required instead of complete file. For more information on search_text refer to below url. https://blaylockbk.github.io/Herbie/_build/html/user_guide/searchString.html + +- **weather_type**: It's a user preferred text to represent weather parameter type (temperature, humidity, wind_speed etc). This is used as column name for the output returned by operator. + +- **model**: Model name as defined in the models template folder. CASE INSENSITIVE Below are examples of model types 'hrrr' HRRR contiguous United States model 'hrrrak' HRRR Alaska model (alias 'alaska') 'rap' RAP model 'gfs' Global Forecast System (atmosphere) 'gfs_wave' Global Forecast System (wave) 'rrfs' Rapid Refresh Forecast System prototype + +- **overwrite**: If true, look for GRIB2 file even if local copy exists. If false, use the local copy + +- **product**: Output variable product file type (sfc (surface fields), prs (pressure fields), nat (native fields), subh (subhourly fields)). Not specifying this will use the first product in model template file. + +## Tasks + +- **forecast_range**: Splits input time range according to frequency and number of hours in lead time. + +- **forecast_download**: Downloads forecast observations with Herbie. + +## Workflow Yaml + ```yaml name: forecast_weather @@ -75,17 +122,4 @@ description: by operator. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>weather_forecast] - out2>forecast_range] - tsk1{{forecast_range}} - tsk2{{forecast_download}} - tsk1{{forecast_range}} -- download_period/user_input --> tsk2{{forecast_download}} - inp1>user_input] -- user_input --> tsk1{{forecast_range}} - tsk2{{forecast_download}} -- weather_forecast --> out1>weather_forecast] - tsk1{{forecast_range}} -- download_period --> out2>forecast_range] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/chunk_onnx/chunk_onnx.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/chunk_onnx/chunk_onnx.md index dafb56ec..879d499a 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/chunk_onnx/chunk_onnx.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/chunk_onnx/chunk_onnx.md @@ -1,5 +1,49 @@ # data_processing/chunk_onnx/chunk_onnx +Runs an Onnx model over all rasters in the input to produce a single raster. This workflow is intended to apply an Onnx model over all rasters in the input to produce a single raster output. This can be used, for instance, to compute time-series analysis of a list of rasters that span multiple times. The analysis can be any computation that can be expressed as an Onnx model (for an example, see notebooks/crop_cycles/crop_cycles.ipynb). In order to run the model in parallel (and avoid running out of memory if the list of rasters is large), the input rasters are divided spatially into chunks (that span all times). The Onnx model is applied to these chunks and then combined back to produce the final output. + +```{mermaid} + graph TD + inp1>rasters] + out1>raster] + tsk1{{chunk_raster}} + tsk2{{list_to_sequence}} + tsk3{{compute_onnx}} + tsk4{{combine_chunks}} + tsk1{{chunk_raster}} -- chunk_series/chunk --> tsk3{{compute_onnx}} + tsk2{{list_to_sequence}} -- rasters_seq/input_raster --> tsk3{{compute_onnx}} + tsk3{{compute_onnx}} -- output_raster/chunks --> tsk4{{combine_chunks}} + inp1>rasters] -- rasters --> tsk1{{chunk_raster}} + inp1>rasters] -- list_rasters --> tsk2{{list_to_sequence}} + tsk4{{combine_chunks}} -- raster --> out1>raster] +``` + +## Sources + +- **rasters**: Input rasters. + +## Sinks + +- **raster**: Result of the Onnx model run. + +## Parameters + +- **model_file**: An Onnx model which needs to be deployed with "farmvibes-ai local add-onnx" command. + +- **step**: Size of the chunk in pixels. + +## Tasks + +- **chunk_raster**: Splits input rasters into a series of chunks. + +- **list_to_sequence**: Combines a list of Rasters into a RasterSequence. + +- **compute_onnx**: Runs the onnx model across chunks of the input rasters. + +- **combine_chunks**: Combines series of chunks into a final raster. + +## Workflow Yaml + ```yaml name: chunk_onnx @@ -60,20 +104,4 @@ description: step: Size of the chunk in pixels. -``` - -```{mermaid} - graph TD - inp1>rasters] - out1>raster] - tsk1{{chunk_raster}} - tsk2{{list_to_sequence}} - tsk3{{compute_onnx}} - tsk4{{combine_chunks}} - tsk1{{chunk_raster}} -- chunk_series/chunk --> tsk3{{compute_onnx}} - tsk2{{list_to_sequence}} -- rasters_seq/input_raster --> tsk3{{compute_onnx}} - tsk3{{compute_onnx}} -- output_raster/chunks --> tsk4{{combine_chunks}} - inp1>rasters] -- rasters --> tsk1{{chunk_raster}} - inp1>rasters] -- list_rasters --> tsk2{{list_to_sequence}} - tsk4{{combine_chunks}} -- raster --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/chunk_onnx/chunk_onnx_sequence.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/chunk_onnx/chunk_onnx_sequence.md index b4614c4a..f85ae376 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/chunk_onnx/chunk_onnx_sequence.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/chunk_onnx/chunk_onnx_sequence.md @@ -1,5 +1,45 @@ # data_processing/chunk_onnx/chunk_onnx_sequence +Runs an Onnx model over all rasters in the input to produce a single raster. This workflow is intended to run an Onnx model on all input rasters to produce a single raster output. This can be used, for instance, to compute time-series analysis of a list of rasters that span multiple times. The analysis can be any computation that can be expressed as an Onnx model (for an example, see notebooks/crop_cycles/crop_cycles.ipynb). In order to run the model in parallel (and avoid running out of memory if the list of rasters is large), the input rasters are divided spatially into chunks (that span all times). The Onnx model is applied to these chunks and then combined back to produce the final output. + +```{mermaid} + graph TD + inp1>rasters] + out1>raster] + tsk1{{chunk_raster}} + tsk2{{compute_onnx}} + tsk3{{combine_chunks}} + tsk1{{chunk_raster}} -- chunk_series/chunk --> tsk2{{compute_onnx}} + tsk2{{compute_onnx}} -- output_raster/chunks --> tsk3{{combine_chunks}} + inp1>rasters] -- rasters --> tsk1{{chunk_raster}} + inp1>rasters] -- input_raster --> tsk2{{compute_onnx}} + tsk3{{combine_chunks}} -- raster --> out1>raster] +``` + +## Sources + +- **rasters**: Input rasters. + +## Sinks + +- **raster**: Result of the Onnx model run. + +## Parameters + +- **model_file**: An Onnx model which needs to be deployed with "farmvibes-ai local add-onnx" command. + +- **step**: Size of the chunk in pixels. + +## Tasks + +- **chunk_raster**: Splits input rasters into a series of chunks. + +- **compute_onnx**: Runs the onnx model across chunks of the input rasters. + +- **combine_chunks**: Combines series of chunks into a final raster. + +## Workflow Yaml + ```yaml name: chunk_onnx_sequence @@ -55,18 +95,4 @@ description: step: Size of the chunk in pixels. -``` - -```{mermaid} - graph TD - inp1>rasters] - out1>raster] - tsk1{{chunk_raster}} - tsk2{{compute_onnx}} - tsk3{{combine_chunks}} - tsk1{{chunk_raster}} -- chunk_series/chunk --> tsk2{{compute_onnx}} - tsk2{{compute_onnx}} -- output_raster/chunks --> tsk3{{combine_chunks}} - inp1>rasters] -- rasters --> tsk1{{chunk_raster}} - inp1>rasters] -- input_raster --> tsk2{{compute_onnx}} - tsk3{{combine_chunks}} -- raster --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/clip/clip.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/clip/clip.md index 680a81e2..6f31f6c3 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/clip/clip.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/clip/clip.md @@ -1,5 +1,34 @@ # data_processing/clip/clip +Performs a soft clip on an input raster based on a provided reference geometry. The workflow outputs a new raster copied from the input raster with its geometry metadata as the intersection between the input raster's geometry and the provided reference geometry. The workflow raises an error if there is no intersection between both geometries. + +```{mermaid} + graph TD + inp1>raster] + inp2>input_geometry] + out1>clipped_raster] + tsk1{{clip_raster}} + inp1>raster] -- raster --> tsk1{{clip_raster}} + inp2>input_geometry] -- input_item --> tsk1{{clip_raster}} + tsk1{{clip_raster}} -- clipped_raster --> out1>clipped_raster] +``` + +## Sources + +- **raster**: Input raster to be clipped. + +- **input_geometry**: Reference geometry. + +## Sinks + +- **clipped_raster**: Clipped raster with the reference geometry. + +## Tasks + +- **clip_raster**: Soft clips the input raster based on the provided referente geometry. + +## Workflow Yaml + ```yaml name: clip @@ -29,15 +58,4 @@ description: parameters: null -``` - -```{mermaid} - graph TD - inp1>raster] - inp2>input_geometry] - out1>clipped_raster] - tsk1{{clip_raster}} - inp1>raster] -- raster --> tsk1{{clip_raster}} - inp2>input_geometry] -- input_item --> tsk1{{clip_raster}} - tsk1{{clip_raster}} -- clipped_raster --> out1>clipped_raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/gradient/raster_gradient.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/gradient/raster_gradient.md index c09c717b..b36ad676 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/gradient/raster_gradient.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/gradient/raster_gradient.md @@ -1,5 +1,30 @@ # data_processing/gradient/raster_gradient +Computes the gradient of each band of the input raster with a Sobel operator. + +```{mermaid} + graph TD + inp1>raster] + out1>gradient] + tsk1{{gradient}} + inp1>raster] -- input_raster --> tsk1{{gradient}} + tsk1{{gradient}} -- output_raster --> out1>gradient] +``` + +## Sources + +- **raster**: Input raster. + +## Sinks + +- **gradient**: Raster with the gradients. + +## Tasks + +- **gradient**: Computes the gradient of each band of the input raster with a Sobel operator. + +## Workflow Yaml + ```yaml name: raster_gradient @@ -23,13 +48,4 @@ description: parameters: null -``` - -```{mermaid} - graph TD - inp1>raster] - out1>gradient] - tsk1{{gradient}} - inp1>raster] -- input_raster --> tsk1{{gradient}} - tsk1{{gradient}} -- output_raster --> out1>gradient] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/heatmap/classification.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/heatmap/classification.md index d1ec2e5b..fef8e003 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/heatmap/classification.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/heatmap/classification.md @@ -1,5 +1,83 @@ # data_processing/heatmap/classification +Utilizes input Sentinel-2 satellite imagery & the sensor samples as labeled data that contain nutrient information (Nitrogen, Carbon, pH, Phosphorus) to train a model using Random Forest classifier. The inference operation predicts nutrients in soil for the chosen farm boundary. + The workflow generates a heatmap for selected nutrient. It relies on sample soil data that +contain information of nutrients. The quantity of samples define the accuracy of the heat map +generation. During the research performed testing with samples spaced at 200 feet, 100 feet and +50 feet. The 50 feet sample spaced distance provided results matching to the ground truth. +Generating heatmaps with this approach reduces the number of samples. It utilizes the logic +below behind the scenes to generate heatmap. + - Read the sentinel raster provided. + - Sensor samples needs to be uploaded into prescriptions entity in Azure + data manager for Agriculture (ADMAg). ADMAg is having hierarchy to hold + information of Party, Field, Seasons, Crop etc. Prior to + uploading prescriptions, it is required to build hierarchy and + a `prescription_map_id`. All prescriptions uploaded to ADMAg are + related to farm hierarchy through `prescription_map_id`. Please refer to + https://learn.microsoft.com/en-us/rest/api/data-manager-for-agri/ for + more information on ADMAg. + - Compute indices using the spyndex python package. + - Clip the satellite imagery & sensor samples using farm boundary. + - Perform spatial interpolation to find raster pixels within the offset distance + from sample location and assign the value of nutrients to group of pixels. + - Classify the data based on number of bins. + - Train the model using Random Forest classifier. + - Predict the nutrients using the satellite imagery. + - Generate a shape file using the predicted outputs. + +```{mermaid} + graph TD + inp1>input_raster] + inp2>samples] + out1>result] + tsk1{{compute_index}} + tsk2{{soil_sample_heatmap}} + tsk1{{compute_index}} -- index_raster/raster --> tsk2{{soil_sample_heatmap}} + inp1>input_raster] -- raster --> tsk1{{compute_index}} + inp2>samples] -- samples --> tsk2{{soil_sample_heatmap}} + tsk2{{soil_sample_heatmap}} -- result --> out1>result] +``` + +## Sources + +- **input_raster**: Input raster for index computation. + +- **samples**: External references to sensor samples for nutrients. + +## Sinks + +- **result**: Zip file containing cluster geometries. + +## Parameters + +- **attribute_name**: Nutrient property name in sensor samples geojson file. For example CARBON (C), Nitrogen (N), Phosphorus (P) etc., + +- **buffer**: Offset distance from sample to perform interpolate operations with raster. + +- **index**: Type of index to be used to generate heatmap. For example - evi, pri etc., + +- **bins**: Possible number of groups used to move value to nearest group using [numpy histogram](https://numpy.org/doc/stable/reference/generated/numpy.histogram.html) and to pre-process the data to support model training with classification . + +- **simplify**: Replace small polygons in input with value of their largest neighbor after converting from raster to vector. Accepts 'simplify' or 'convex' or 'none'. + +- **tolerance**: All parts of a [simplified geometry](https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.simplify.html) will be no more than tolerance distance from the original. It has the same units as the coordinate reference system of the GeoSeries. For example, using tolerance=100 in a projected CRS with meters as units means a distance of 100 meters in reality. + +- **data_scale**: Accepts True or False. Default is False. On True, it scale data using [StandardScalar] (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html) from scikit-learn package. It Standardize features by removing the mean and scaling to unit variance. + +- **max_depth**: The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + +- **n_estimators**: The number of trees in the forest. For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + +- **random_state**: Controls both the randomness of the bootstrapping of the samples used when building trees (if bootstrap=True) and the sampling of the features to consider when looking for the best split at each node (if max_features < n_features). For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + +## Tasks + +- **compute_index**: Computes an index from the bands of an input raster. + +- **soil_sample_heatmap**: Generate heatmap for nutrients using satellite or spaceEye imagery. + +## Workflow Yaml + ```yaml name: heatmap_intermediate @@ -59,10 +137,10 @@ description: \ the logic\nbelow behind the scenes to generate heatmap.\n - Read the sentinel\ \ raster provided.\n - Sensor samples needs to be uploaded into prescriptions\ \ entity in Azure\n data manager for Agriculture (ADMAg). ADMAg is having hierarchy\ - \ to hold\n information of Farmer, Field, Seasons, Crop, Boundary etc. Prior\ - \ to\n uploading prescriptions, it is required to build hierarchy and\n \ - \ a `prescription_map_id`. All prescriptions uploaded to ADMAg are\n related\ - \ to farm hierarchy through `prescription_map_id`. Please refer to\n https://learn.microsoft.com/en-us/rest/api/data-manager-for-agri/\ + \ to hold\n information of Party, Field, Seasons, Crop etc. Prior to\n uploading\ + \ prescriptions, it is required to build hierarchy and\n a `prescription_map_id`.\ + \ All prescriptions uploaded to ADMAg are\n related to farm hierarchy through\ + \ `prescription_map_id`. Please refer to\n https://learn.microsoft.com/en-us/rest/api/data-manager-for-agri/\ \ for\n more information on ADMAg.\n - Compute indices using the spyndex python\ \ package.\n - Clip the satellite imagery & sensor samples using farm boundary.\n\ \ - Perform spatial interpolation to find raster pixels within the offset distance\n\ @@ -103,17 +181,4 @@ description: n_features). For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) -``` - -```{mermaid} - graph TD - inp1>input_raster] - inp2>samples] - out1>result] - tsk1{{compute_index}} - tsk2{{soil_sample_heatmap}} - tsk1{{compute_index}} -- index_raster/raster --> tsk2{{soil_sample_heatmap}} - inp1>input_raster] -- raster --> tsk1{{compute_index}} - inp2>samples] -- samples --> tsk2{{soil_sample_heatmap}} - tsk2{{soil_sample_heatmap}} -- result --> out1>result] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/index/index.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/index/index.md index 11aade4f..6b730148 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/index/index.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/index/index.md @@ -1,5 +1,34 @@ # data_processing/index/index +Computes an index from the bands of an input raster. In addition to the indices 'ndvi', 'evi', 'msavi', 'ndre', 'reci', 'ndmi', 'methane' and 'pri' all indices in https://github.com/awesome-spectral-indices/awesome-spectral-indices are available (depending on the bands available on the corresponding satellite product). + +```{mermaid} + graph TD + inp1>raster] + out1>index_raster] + tsk1{{compute_index}} + inp1>raster] -- raster --> tsk1{{compute_index}} + tsk1{{compute_index}} -- index --> out1>index_raster] +``` + +## Sources + +- **raster**: Input raster. + +## Sinks + +- **index_raster**: Single-band raster with the computed index. + +## Parameters + +- **index**: The choice of index to be computed ('ndvi', 'evi', 'msavi', 'ndre', 'reci', 'ndmi', 'methane', 'pri' or any of the awesome-spectral-indices). + +## Tasks + +- **compute_index**: Computes `index` over the input raster. + +## Workflow Yaml + ```yaml name: index @@ -31,13 +60,4 @@ description: 'ndmi', 'methane', 'pri' or any of the awesome-spectral-indices). -``` - -```{mermaid} - graph TD - inp1>raster] - out1>index_raster] - tsk1{{compute_index}} - inp1>raster] -- raster --> tsk1{{compute_index}} - tsk1{{compute_index}} -- index --> out1>index_raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/linear_trend/chunked_linear_trend.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/linear_trend/chunked_linear_trend.md index 3f1b76b2..0585bc77 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/linear_trend/chunked_linear_trend.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/linear_trend/chunked_linear_trend.md @@ -1,5 +1,45 @@ # data_processing/linear_trend/chunked_linear_trend +Computes the pixel-wise linear trend of a list of rasters (e.g. NDVI). The workflow computes the linear trend over chunks of data, combining them into the final raster. + +```{mermaid} + graph TD + inp1>input_rasters] + out1>linear_trend_raster] + tsk1{{chunk_raster}} + tsk2{{linear_trend}} + tsk3{{combine_chunks}} + tsk1{{chunk_raster}} -- chunk_series/series --> tsk2{{linear_trend}} + tsk2{{linear_trend}} -- trend/chunks --> tsk3{{combine_chunks}} + inp1>input_rasters] -- rasters --> tsk1{{chunk_raster}} + inp1>input_rasters] -- rasters --> tsk2{{linear_trend}} + tsk3{{combine_chunks}} -- raster --> out1>linear_trend_raster] +``` + +## Sources + +- **input_rasters**: List of rasters to compute linear trend. + +## Sinks + +- **linear_trend_raster**: Raster with the trend and the test statistics. + +## Parameters + +- **chunk_step_y**: steps used to divide the rasters into chunks in the y direction (units are grid points). + +- **chunk_step_x**: steps used to divide the rasters into chunks in the x direction (units are grid points). + +## Tasks + +- **chunk_raster**: Splits input rasters into a series of chunks. + +- **linear_trend**: Computes the pixel-wise linear trend across rasters. + +- **combine_chunks**: Combines series of chunks into a final raster. + +## Workflow Yaml + ```yaml name: chunked_linear_trend @@ -45,18 +85,4 @@ description: (units are grid points). -``` - -```{mermaid} - graph TD - inp1>input_rasters] - out1>linear_trend_raster] - tsk1{{chunk_raster}} - tsk2{{linear_trend}} - tsk3{{combine_chunks}} - tsk1{{chunk_raster}} -- chunk_series/series --> tsk2{{linear_trend}} - tsk2{{linear_trend}} -- trend/chunks --> tsk3{{combine_chunks}} - inp1>input_rasters] -- rasters --> tsk1{{chunk_raster}} - inp1>input_rasters] -- rasters --> tsk2{{linear_trend}} - tsk3{{combine_chunks}} -- raster --> out1>linear_trend_raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/merge/match_merge_to_ref.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/merge/match_merge_to_ref.md index 1c0b0491..863b2323 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/merge/match_merge_to_ref.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/merge/match_merge_to_ref.md @@ -1,5 +1,52 @@ # data_processing/merge/match_merge_to_ref +Resamples input rasters to the reference rasters' grid. The workflow will produce input and reference raster pairs with intersecting geometries. For each pair, the input raster is resampled to match the reference raster's grid. Afterwards, all resampled rasters are groupped if they are contained in a reference raster geometry, and each raster group is matched into single raster. The output should contain the information available in the input rasters, gridded according to the reference rasters. + +```{mermaid} + graph TD + inp1>rasters] + inp2>ref_rasters] + out1>match_rasters] + tsk1{{pair}} + tsk2{{match}} + tsk3{{group}} + tsk4{{merge}} + tsk1{{pair}} -- paired_rasters1/ref_raster --> tsk2{{match}} + tsk1{{pair}} -- paired_rasters2/raster --> tsk2{{match}} + tsk2{{match}} -- output_raster/rasters --> tsk3{{group}} + tsk3{{group}} -- raster_groups/raster_sequence --> tsk4{{merge}} + inp1>rasters] -- rasters2 --> tsk1{{pair}} + inp2>ref_rasters] -- rasters1 --> tsk1{{pair}} + inp2>ref_rasters] -- group_by --> tsk3{{group}} + tsk4{{merge}} -- raster --> out1>match_rasters] +``` + +## Sources + +- **rasters**: Input rasters that will be resampled. + +- **ref_rasters**: Reference rasters. + +## Sinks + +- **match_rasters**: Rasters with information from the input rasters on the reference grid. + +## Parameters + +- **resampling**: Type of resampling when reprojecting the rasters. See [link=https://rasterio.readthedocs.io/en/latest/api/rasterio.enums.html#rasterio.enums.Resampling] rasterio documentation: https://rasterio.readthedocs.io/en/latest/api/rasterio.enums.html#rasterio.enums.Resampling[/] for all available resampling options. + +## Tasks + +- **pair**: Creates pairs of rasters with intersecting geometries between two input lists of Raster. + +- **match**: Resamples the input `raster` to match the grid of `ref_raster`. + +- **group**: Groups input rasters that are contained in the geometry of a reference raster. + +- **merge**: Merges rasters in a sequence to a single raster. + +## Workflow Yaml + ```yaml name: match_merge_to_ref @@ -59,23 +106,4 @@ description: for all available resampling options.' -``` - -```{mermaid} - graph TD - inp1>rasters] - inp2>ref_rasters] - out1>match_rasters] - tsk1{{pair}} - tsk2{{match}} - tsk3{{group}} - tsk4{{merge}} - tsk1{{pair}} -- paired_rasters1/ref_raster --> tsk2{{match}} - tsk1{{pair}} -- paired_rasters2/raster --> tsk2{{match}} - tsk2{{match}} -- output_raster/rasters --> tsk3{{group}} - tsk3{{group}} -- raster_groups/raster_sequence --> tsk4{{merge}} - inp1>rasters] -- rasters2 --> tsk1{{pair}} - inp2>ref_rasters] -- rasters1 --> tsk1{{pair}} - inp2>ref_rasters] -- group_by --> tsk3{{group}} - tsk4{{merge}} -- raster --> out1>match_rasters] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/outlier/detect_outlier.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/outlier/detect_outlier.md index 5a77e9fd..614702e8 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/outlier/detect_outlier.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/outlier/detect_outlier.md @@ -1,5 +1,46 @@ # data_processing/outlier/detect_outlier +Fits a single-component Gaussian Mixture Model (GMM) over input data to detect outliers according to the threshold parameter. The workflow outputs segmentation and outlier maps based on the threshold parameter and the likelihood of each sample belonging to the GMM component. It also yields heatmaps of the likelihood, and the mean of GMM's component. + +```{mermaid} + graph TD + inp1>rasters] + out1>segmentation] + out2>heatmap] + out3>outliers] + out4>mixture_means] + tsk1{{outlier}} + inp1>rasters] -- rasters --> tsk1{{outlier}} + tsk1{{outlier}} -- segmentation --> out1>segmentation] + tsk1{{outlier}} -- heatmap --> out2>heatmap] + tsk1{{outlier}} -- outliers --> out3>outliers] + tsk1{{outlier}} -- mixture_means --> out4>mixture_means] +``` + +## Sources + +- **rasters**: Input rasters. + +## Sinks + +- **segmentation**: Segmentation maps based on the likelihood of each sample belonging to the GMM's single-component. + +- **heatmap**: Likelihood maps. + +- **outliers**: Outlier maps based on the thresholded likelihood map. + +- **mixture_means**: Mean of the GMM. + +## Parameters + +- **threshold**: Likelihood threshold value to consider a sample as an outlier. + +## Tasks + +- **outlier**: Fits a single-component Gaussian Mixture Model (GMM) over input rasters to detect outliers according to the threshold parameter. + +## Workflow Yaml + ```yaml name: detect_outlier @@ -37,19 +78,4 @@ description: threshold: Likelihood threshold value to consider a sample as an outlier. -``` - -```{mermaid} - graph TD - inp1>rasters] - out1>segmentation] - out2>heatmap] - out3>outliers] - out4>mixture_means] - tsk1{{outlier}} - inp1>rasters] -- rasters --> tsk1{{outlier}} - tsk1{{outlier}} -- segmentation --> out1>segmentation] - tsk1{{outlier}} -- heatmap --> out2>heatmap] - tsk1{{outlier}} -- outliers --> out3>outliers] - tsk1{{outlier}} -- mixture_means --> out4>mixture_means] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/threshold/threshold_raster.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/threshold/threshold_raster.md index fbd3c354..ca237596 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/threshold/threshold_raster.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/threshold/threshold_raster.md @@ -1,5 +1,34 @@ # data_processing/threshold/threshold_raster +Thresholds values of the input raster if higher than the threshold parameter. + +```{mermaid} + graph TD + inp1>raster] + out1>thresholded_raster] + tsk1{{threshold_task}} + inp1>raster] -- raster --> tsk1{{threshold_task}} + tsk1{{threshold_task}} -- thresholded --> out1>thresholded_raster] +``` + +## Sources + +- **raster**: Input raster. + +## Sinks + +- **thresholded_raster**: Thresholded raster. + +## Parameters + +- **threshold**: Threshold value. + +## Tasks + +- **threshold_task**: Thresholds values of the input raster if higher than the threshold parameter. + +## Workflow Yaml + ```yaml name: threshold_raster @@ -28,13 +57,4 @@ description: threshold: Threshold value. -``` - -```{mermaid} - graph TD - inp1>raster] - out1>thresholded_raster] - tsk1{{threshold_task}} - inp1>raster] -- raster --> tsk1{{threshold_task}} - tsk1{{threshold_task}} -- thresholded --> out1>thresholded_raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/timeseries/timeseries_aggregation.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/timeseries/timeseries_aggregation.md index b9a12869..7b00c33a 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/timeseries/timeseries_aggregation.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/timeseries/timeseries_aggregation.md @@ -1,5 +1,38 @@ # data_processing/timeseries/timeseries_aggregation +Computes the mean, standard deviation, maximum, and minimum values of all regions of the raster and aggregates them into a timeseries. + +```{mermaid} + graph TD + inp1>raster] + inp2>input_geometry] + out1>timeseries] + tsk1{{summary}} + tsk2{{timeseries}} + tsk1{{summary}} -- summary/stats --> tsk2{{timeseries}} + inp1>raster] -- raster --> tsk1{{summary}} + inp2>input_geometry] -- input_geometry --> tsk1{{summary}} + tsk2{{timeseries}} -- timeseries --> out1>timeseries] +``` + +## Sources + +- **raster**: Input raster. + +- **input_geometry**: Geometry of interest. + +## Sinks + +- **timeseries**: Aggregated statistics of the raster. + +## Tasks + +- **summary**: Computes the mean, standard deviation, maximum, and minimum values across the whole raster. + +- **timeseries**: Aggregates list of summary statistics into a timeseries. + +## Workflow Yaml + ```yaml name: timeseries_aggregation @@ -30,17 +63,4 @@ description: timeseries: Aggregated statistics of the raster. -``` - -```{mermaid} - graph TD - inp1>raster] - inp2>input_geometry] - out1>timeseries] - tsk1{{summary}} - tsk2{{timeseries}} - tsk1{{summary}} -- summary/stats --> tsk2{{timeseries}} - inp1>raster] -- raster --> tsk1{{summary}} - inp2>input_geometry] -- input_geometry --> tsk1{{summary}} - tsk2{{timeseries}} -- timeseries --> out1>timeseries] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/data_processing/timeseries/timeseries_masked_aggregation.md b/docs/source/docfiles/markdown/workflow_yaml/data_processing/timeseries/timeseries_masked_aggregation.md index 443176b2..963f925d 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/data_processing/timeseries/timeseries_masked_aggregation.md +++ b/docs/source/docfiles/markdown/workflow_yaml/data_processing/timeseries/timeseries_masked_aggregation.md @@ -1,5 +1,46 @@ # data_processing/timeseries/timeseries_masked_aggregation +Computes the mean, standard deviation, maximum, and minimum values of all regions of the raster considered by the mask and aggregates them into a timeseries. + +```{mermaid} + graph TD + inp1>raster] + inp2>mask] + inp3>input_geometry] + out1>timeseries] + tsk1{{masked_summary}} + tsk2{{timeseries}} + tsk1{{masked_summary}} -- summary/stats --> tsk2{{timeseries}} + inp1>raster] -- raster --> tsk1{{masked_summary}} + inp2>mask] -- mask --> tsk1{{masked_summary}} + inp3>input_geometry] -- input_geometry --> tsk1{{masked_summary}} + tsk2{{timeseries}} -- timeseries --> out1>timeseries] +``` + +## Sources + +- **raster**: Input raster. + +- **mask**: Mask of the regions to be considered during summarization; + +- **input_geometry**: Geometry of interest. + +## Sinks + +- **timeseries**: Aggregated statistics of the raster considered by the mask. + +## Parameters + +- **timeseries_masked_thr**: Threshold of the maximum ratio of masked content allowed in a raster. The statistics of rasters with masked content above the threshold (e.g., heavily clouded) are not included in the timeseries. + +## Tasks + +- **masked_summary**: Computes the mean, standard deviation, maximum, and minimum values across non-masked regions of the raster. + +- **timeseries**: Aggregates list of summary statistics into a timeseries. + +## Workflow Yaml + ```yaml name: timeseries_masked_aggregation @@ -43,19 +84,4 @@ description: (e.g., heavily clouded) are not included in the timeseries. -``` - -```{mermaid} - graph TD - inp1>raster] - inp2>mask] - inp3>input_geometry] - out1>timeseries] - tsk1{{masked_summary}} - tsk2{{timeseries}} - tsk1{{masked_summary}} -- summary/stats --> tsk2{{timeseries}} - inp1>raster] -- raster --> tsk1{{masked_summary}} - inp2>mask] -- mask --> tsk1{{masked_summary}} - inp3>input_geometry] -- input_geometry --> tsk1{{masked_summary}} - tsk2{{timeseries}} -- timeseries --> out1>timeseries] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/canopy_cover.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/canopy_cover.md index 3b4f7ab2..11656a17 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/canopy_cover.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/canopy_cover.md @@ -1,5 +1,56 @@ # farm_ai/agriculture/canopy_cover +Estimates pixel-wise canopy cover for a region and date. The workflow retrieves the relevant Sentinel-2 products with Planetary Computer (PC) API, and computes the NDVI for each available tile and date. It applies a linear regressor trained with polynomial features (up to the 3rd degree) on top of the index raster to estimate canopy cover. The coeficients and intercept of the regressor were obtained beforehand using as ground-truth masked/annotated drone imagery, and are used for inference in this workflow. + +```{mermaid} + graph TD + inp1>user_input] + out1>ndvi] + out2>estimated_canopy_cover] + out3>ndvi_timeseries] + out4>canopy_timeseries] + tsk1{{ndvi_summary}} + tsk2{{canopy}} + tsk3{{canopy_summary_timeseries}} + tsk1{{ndvi_summary}} -- index/indices --> tsk2{{canopy}} + tsk2{{canopy}} -- estimated_canopy_cover/raster --> tsk3{{canopy_summary_timeseries}} + tsk1{{ndvi_summary}} -- merged_cloud_mask/mask --> tsk3{{canopy_summary_timeseries}} + inp1>user_input] -- user_input --> tsk1{{ndvi_summary}} + inp1>user_input] -- input_geometry --> tsk3{{canopy_summary_timeseries}} + tsk1{{ndvi_summary}} -- index --> out1>ndvi] + tsk2{{canopy}} -- estimated_canopy_cover --> out2>estimated_canopy_cover] + tsk1{{ndvi_summary}} -- timeseries --> out3>ndvi_timeseries] + tsk3{{canopy_summary_timeseries}} -- timeseries --> out4>canopy_timeseries] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **ndvi**: NDVI raster. + +- **estimated_canopy_cover**: Raster with pixel-wise canopy cover estimation; + +- **ndvi_timeseries**: Aggregated NDVI statistics of the retrieved tiles within the input geometry and time range. + +- **canopy_timeseries**: Aggregated canopy cover statistics. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **ndvi_summary**: Calculates NDVI statistics (mean, standard deviation, maximum and minimum) for the input geometry and time range. + +- **canopy**: Applies a linear regressor with pre-computed polynomial features on top of the index raster to estimate canopy cover. + +- **canopy_summary_timeseries**: Computes the mean, standard deviation, maximum, and minimum values of all regions of the raster considered by the mask and aggregates them into a timeseries. + +## Workflow Yaml + ```yaml name: canopy_cover @@ -53,25 +104,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>ndvi] - out2>estimated_canopy_cover] - out3>ndvi_timeseries] - out4>canopy_timeseries] - tsk1{{ndvi_summary}} - tsk2{{canopy}} - tsk3{{canopy_summary_timeseries}} - tsk1{{ndvi_summary}} -- index/indices --> tsk2{{canopy}} - tsk2{{canopy}} -- estimated_canopy_cover/raster --> tsk3{{canopy_summary_timeseries}} - tsk1{{ndvi_summary}} -- merged_cloud_mask/mask --> tsk3{{canopy_summary_timeseries}} - inp1>user_input] -- user_input --> tsk1{{ndvi_summary}} - inp1>user_input] -- input_geometry --> tsk3{{canopy_summary_timeseries}} - tsk1{{ndvi_summary}} -- index --> out1>ndvi] - tsk2{{canopy}} -- estimated_canopy_cover --> out2>estimated_canopy_cover] - tsk1{{ndvi_summary}} -- timeseries --> out3>ndvi_timeseries] - tsk3{{canopy_summary_timeseries}} -- timeseries --> out4>canopy_timeseries] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/change_detection.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/change_detection.md index 3cb9dc1e..92b49b29 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/change_detection.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/change_detection.md @@ -1,5 +1,71 @@ # farm_ai/agriculture/change_detection +Identifies changes/outliers over NDVI across dates. The workflow generates SpaceEye imagery for the input region and time range and computes NDVI raster for each date. It aggregates NDVI statistics (mean, standard deviation, maximum and minimum) in time and detects outliers across dates with a single-component Gaussian Mixture Model (GMM). + +```{mermaid} + graph TD + inp1>user_input] + out1>spaceeye_raster] + out2>index] + out3>timeseries] + out4>segmentation] + out5>heatmap] + out6>outliers] + out7>mixture_means] + tsk1{{spaceeye}} + tsk2{{ndvi}} + tsk3{{summary_timeseries}} + tsk4{{outliers}} + tsk1{{spaceeye}} -- raster --> tsk2{{ndvi}} + tsk2{{ndvi}} -- index_raster/raster --> tsk3{{summary_timeseries}} + tsk2{{ndvi}} -- index_raster/rasters --> tsk4{{outliers}} + inp1>user_input] -- user_input --> tsk1{{spaceeye}} + inp1>user_input] -- input_geometry --> tsk3{{summary_timeseries}} + tsk1{{spaceeye}} -- raster --> out1>spaceeye_raster] + tsk2{{ndvi}} -- index_raster --> out2>index] + tsk3{{summary_timeseries}} -- timeseries --> out3>timeseries] + tsk4{{outliers}} -- segmentation --> out4>segmentation] + tsk4{{outliers}} -- heatmap --> out5>heatmap] + tsk4{{outliers}} -- outliers --> out6>outliers] + tsk4{{outliers}} -- mixture_means --> out7>mixture_means] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **spaceeye_raster**: SpaceEye cloud-free rasters. + +- **index**: NDVI rasters. + +- **timeseries**: Aggregated NDVI statistics over the time range. + +- **segmentation**: Segmentation maps based on the likelihood of each sample belonging to the GMM's single-component. + +- **heatmap**: Likelihood maps. + +- **outliers**: Outlier maps. + +- **mixture_means**: Means of the GMM. + +## Parameters + +- **pc_key**: PlanetaryComputer API key. + +## Tasks + +- **spaceeye**: Runs the SpaceEye cloud removal pipeline, yielding daily cloud-free images for the input geometry and time range. + +- **ndvi**: Computes an index from the bands of an input raster. + +- **summary_timeseries**: Computes the mean, standard deviation, maximum, and minimum values of all regions of the raster and aggregates them into a timeseries. + +- **outliers**: Fits a single-component Gaussian Mixture Model (GMM) over input data to detect outliers according to the threshold parameter. + +## Workflow Yaml + ```yaml name: change_detection @@ -59,32 +125,4 @@ description: pc_key: PlanetaryComputer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>spaceeye_raster] - out2>index] - out3>timeseries] - out4>segmentation] - out5>heatmap] - out6>outliers] - out7>mixture_means] - tsk1{{spaceeye}} - tsk2{{ndvi}} - tsk3{{summary_timeseries}} - tsk4{{outliers}} - tsk1{{spaceeye}} -- raster --> tsk2{{ndvi}} - tsk2{{ndvi}} -- index_raster/raster --> tsk3{{summary_timeseries}} - tsk2{{ndvi}} -- index_raster/rasters --> tsk4{{outliers}} - inp1>user_input] -- user_input --> tsk1{{spaceeye}} - inp1>user_input] -- input_geometry --> tsk3{{summary_timeseries}} - tsk1{{spaceeye}} -- raster --> out1>spaceeye_raster] - tsk2{{ndvi}} -- index_raster --> out2>index] - tsk3{{summary_timeseries}} -- timeseries --> out3>timeseries] - tsk4{{outliers}} -- segmentation --> out4>segmentation] - tsk4{{outliers}} -- heatmap --> out5>heatmap] - tsk4{{outliers}} -- outliers --> out6>outliers] - tsk4{{outliers}} -- mixture_means --> out7>mixture_means] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/emergence_summary.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/emergence_summary.md index 1f29750c..ebc26dba 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/emergence_summary.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/emergence_summary.md @@ -1,5 +1,48 @@ # farm_ai/agriculture/emergence_summary +Calculates emergence statistics using thresholded MSAVI (mean, standard deviation, maximum and minimum) for the input geometry and time range. The workflow retrieves Sentinel2 products with Planetary Computer (PC) API, forwards them to a cloud detection model and combines the predicted cloud mask to the mask provided by PC. It computes the MSAVI for each available tile and date, thresholds them above a certain value and summarizes each with the mean, standard deviation, maximum and minimum values for the regions not obscured by clouds. Finally, it outputs a timeseries with such statistics for all available dates, filtering out heavily-clouded tiles. + +```{mermaid} + graph TD + inp1>user_input] + out1>timeseries] + tsk1{{s2}} + tsk2{{msavi}} + tsk3{{emergence}} + tsk4{{summary_timeseries}} + tsk1{{s2}} -- raster --> tsk2{{msavi}} + tsk2{{msavi}} -- index_raster/raster --> tsk3{{emergence}} + tsk3{{emergence}} -- thresholded_raster/raster --> tsk4{{summary_timeseries}} + tsk1{{s2}} -- mask --> tsk4{{summary_timeseries}} + inp1>user_input] -- user_input --> tsk1{{s2}} + inp1>user_input] -- input_geometry --> tsk4{{summary_timeseries}} + tsk4{{summary_timeseries}} -- timeseries --> out1>timeseries] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **timeseries**: Aggregated emergence statistics of the retrieved tiles within the input geometry and time range. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **s2**: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and computes improved cloud masks using cloud and shadow segmentation models. + +- **msavi**: Computes an index from the bands of an input raster. + +- **emergence**: Thresholds values of the input raster if higher than the threshold parameter. + +- **summary_timeseries**: Computes the mean, standard deviation, maximum, and minimum values of all regions of the raster considered by the mask and aggregates them into a timeseries. + +## Workflow Yaml + ```yaml name: emergence_summary @@ -59,21 +102,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>timeseries] - tsk1{{s2}} - tsk2{{msavi}} - tsk3{{emergence}} - tsk4{{summary_timeseries}} - tsk1{{s2}} -- raster --> tsk2{{msavi}} - tsk2{{msavi}} -- index_raster/raster --> tsk3{{emergence}} - tsk3{{emergence}} -- thresholded_raster/raster --> tsk4{{summary_timeseries}} - tsk1{{s2}} -- mask --> tsk4{{summary_timeseries}} - inp1>user_input] -- user_input --> tsk1{{s2}} - inp1>user_input] -- input_geometry --> tsk4{{summary_timeseries}} - tsk4{{summary_timeseries}} -- timeseries --> out1>timeseries] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/green_house_gas_fluxes.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/green_house_gas_fluxes.md index 37a25582..69a69764 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/green_house_gas_fluxes.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/green_house_gas_fluxes.md @@ -1,5 +1,34 @@ # farm_ai/agriculture/green_house_gas_fluxes +Computes Green House Fluxes for a region and date range The workflow follows the GHG Protocol guidelines published for Brazil (which are based on IPCC reports) to compute Green House Gas emission fluxes (sequestration versus emissions) for a given crop. + +```{mermaid} + graph TD + inp1>user_input] + out1>fluxes] + tsk1{{ghg}} + inp1>user_input] -- ghg --> tsk1{{ghg}} + tsk1{{ghg}} -- fluxes --> out1>fluxes] +``` + +## Sources + +- **user_input**: The user-provided inputs for GHG computation. + +## Sinks + +- **fluxes**: The computed fluxes for the given area and date range considering the user input data. + +## Parameters + +- **crop_type**: The type of the crop to compute GHG emissions. Supported crops are 'wheat', 'corn', 'cotton', and 'soybeans'. + +## Tasks + +- **ghg**: Computes Green House Gas emission fluxes based on emission factors based on IPCC methodology. + +## Workflow Yaml + ```yaml name: green_house_gas_fluxes @@ -31,13 +60,4 @@ description: 'wheat', 'corn', 'cotton', and 'soybeans'. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>fluxes] - tsk1{{ghg}} - inp1>user_input] -- ghg --> tsk1{{ghg}} - tsk1{{ghg}} -- fluxes --> out1>fluxes] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_classification.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_classification.md index c4343de2..ff5faf67 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_classification.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_classification.md @@ -1,5 +1,61 @@ # farm_ai/agriculture/heatmap_using_classification +The workflow generates a nutrient heatmap for samples provided by user by downloading the samples from user input. The samples provided are related with farm boundary and have required nutrient information to create a heatmap. + +```{mermaid} + graph TD + inp1>input_samples] + inp2>input_raster] + out1>result] + tsk1{{download_samples}} + tsk2{{soil_sample_heatmap_classification}} + tsk1{{download_samples}} -- geometry/samples --> tsk2{{soil_sample_heatmap_classification}} + inp1>input_samples] -- user_input --> tsk1{{download_samples}} + inp2>input_raster] -- input_raster --> tsk2{{soil_sample_heatmap_classification}} + tsk2{{soil_sample_heatmap_classification}} -- result --> out1>result] +``` + +## Sources + +- **input_raster**: Input raster for index computation. + +- **input_samples**: External references to sensor samples for nutrients. + +## Sinks + +- **result**: Zip file containing cluster geometries. + +## Parameters + +- **attribute_name**: Nutrient property name in sensor samples geojson file. For example CARBON (C), Nitrogen (N), Phosphorus (P) etc., + +- **buffer**: Offset distance from sample to perform interpolate operations with raster. + +- **index**: Type of index to be used to generate heatmap. For example - evi, pri etc., + +- **bins**: Possible number of groups used to move value to nearest group using [numpy histogram](https://numpy.org/doc/stable/reference/generated/numpy.histogram.html) and to pre-process the data to support model training with classification . + +- **simplify**: Replace small polygons in input with value of their largest neighbor after converting from raster to vector. Accepts 'simplify' or 'convex' or 'none'. + +- **tolerance**: All parts of a [simplified geometry](https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.simplify.html) will be no more than tolerance distance from the original. It has the same units as the coordinate reference system of the GeoSeries. For example, using tolerance=100 in a projected CRS with meters as units means a distance of 100 meters in reality. + +- **data_scale**: Accepts True or False. Default is False. On True, it scale data using [StandardScalar] (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html) from scikit-learn package. It Standardize features by removing the mean and scaling to unit variance. + +- **max_depth**: The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + +- **n_estimators**: The number of trees in the forest. For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + +- **random_state**: Controls both the randomness of the bootstrapping of the samples used when building trees (if bootstrap=True) and the sampling of the features to consider when looking for the best split at each node (if max_features < n_features). For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + +## Tasks + +- **download_samples**: Adds user geometries into the cluster storage, allowing for them to be used on workflows. + +- **soil_sample_heatmap_classification**: Utilizes input Sentinel-2 satellite imagery & the sensor samples as labeled data that contain nutrient information (Nitrogen, Carbon, pH, Phosphorus) to train a model using Random Forest classifier. The inference operation predicts nutrients in soil for the chosen farm boundary. + + +## Workflow Yaml + ```yaml name: heatmap_using_classification @@ -54,17 +110,4 @@ description: parameters: null -``` - -```{mermaid} - graph TD - inp1>input_samples] - inp2>input_raster] - out1>result] - tsk1{{download_samples}} - tsk2{{soil_sample_heatmap_classification}} - tsk1{{download_samples}} -- geometry/samples --> tsk2{{soil_sample_heatmap_classification}} - inp1>input_samples] -- user_input --> tsk1{{download_samples}} - inp2>input_raster] -- input_raster --> tsk2{{soil_sample_heatmap_classification}} - tsk2{{soil_sample_heatmap_classification}} -- result --> out1>result] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_classification_admag.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_classification_admag.md index 246b8094..07167ee9 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_classification_admag.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_classification_admag.md @@ -1,5 +1,71 @@ # farm_ai/agriculture/heatmap_using_classification_admag +This workflow integrate the ADMAG API to download prescriptions and generate heatmap. The prescriptions are related with farm boundary and the nutrient information. Each prescription represent a sensor sample at a location within a farm boundary. + +```{mermaid} + graph TD + inp1>admag_input] + inp2>input_raster] + out1>result] + tsk1{{prescriptions}} + tsk2{{soil_sample_heatmap_classification}} + tsk1{{prescriptions}} -- response/samples --> tsk2{{soil_sample_heatmap_classification}} + inp1>admag_input] -- admag_input --> tsk1{{prescriptions}} + inp2>input_raster] -- input_raster --> tsk2{{soil_sample_heatmap_classification}} + tsk2{{soil_sample_heatmap_classification}} -- result --> out1>result] +``` + +## Sources + +- **input_raster**: Input raster for index computation. + +- **admag_input**: Required inputs to download prescriptions from admag. + +## Sinks + +- **result**: Zip file containing cluster geometries. + +## Parameters + +- **base_url**: URL to access the registered app + +- **client_id**: Value uniquely identifies registered application in the Microsoft identity platform. Visit url https://learn.microsoft.com/en-us/azure/active-directory/develop/quickstart-register-app to register the app. + +- **client_secret**: Sometimes called an application password, a client secret is a string value your app can use in place of a certificate to identity itself. + +- **authority**: The endpoint URIs for your app are generated automatically when you register or configure your app. It is used by client to obtain authorization from the resource owner + +- **default_scope**: URL for default azure OAuth2 permissions + +- **attribute_name**: Nutrient property name in sensor samples geojson file. For example CARBON (C), Nitrogen (N), Phosphorus (P) etc., + +- **buffer**: Offset distance from sample to perform interpolate operations with raster. + +- **index**: Type of index to be used to generate heatmap. For example - evi, pri etc., + +- **bins**: Possible number of groups used to move value to nearest group using [numpy histogram](https://numpy.org/doc/stable/reference/generated/numpy.histogram.html) and to pre-process the data to support model training with classification . + +- **simplify**: Replace small polygons in input with value of their largest neighbor after converting from raster to vector. Accepts 'simplify' or 'convex' or 'none'. + +- **tolerance**: All parts of a [simplified geometry](https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.simplify.html) will be no more than tolerance distance from the original. It has the same units as the coordinate reference system of the GeoSeries. For example, using tolerance=100 in a projected CRS with meters as units means a distance of 100 meters in reality. + +- **data_scale**: Accepts True or False. Default is False. On True, it scale data using [StandardScalar] (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html) from scikit-learn package. It Standardize features by removing the mean and scaling to unit variance. + +- **max_depth**: The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + +- **n_estimators**: The number of trees in the forest. For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + +- **random_state**: Controls both the randomness of the bootstrapping of the samples used when building trees (if bootstrap=True) and the sampling of the features to consider when looking for the best split at each node (if max_features < n_features). For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + +## Tasks + +- **prescriptions**: Fetches prescriptions using ADMAg (Microsoft Azure Data Manager for Agriculture). + +- **soil_sample_heatmap_classification**: Utilizes input Sentinel-2 satellite imagery & the sensor samples as labeled data that contain nutrient information (Nitrogen, Carbon, pH, Phosphorus) to train a model using Random Forest classifier. The inference operation predicts nutrients in soil for the chosen farm boundary. + + +## Workflow Yaml + ```yaml name: heatmap_using_classification_admag @@ -76,17 +142,4 @@ description: default_scope: URL for default azure OAuth2 permissions -``` - -```{mermaid} - graph TD - inp1>admag_input] - inp2>input_raster] - out1>result] - tsk1{{prescriptions}} - tsk2{{soil_sample_heatmap_classification}} - tsk1{{prescriptions}} -- response/samples --> tsk2{{soil_sample_heatmap_classification}} - inp1>admag_input] -- admag_input --> tsk1{{prescriptions}} - inp2>input_raster] -- input_raster --> tsk2{{soil_sample_heatmap_classification}} - tsk2{{soil_sample_heatmap_classification}} -- result --> out1>result] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_neighboring_data_points.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_neighboring_data_points.md index e7fefb03..fd1ad086 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_neighboring_data_points.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/heatmap_using_neighboring_data_points.md @@ -1,5 +1,60 @@ # farm_ai/agriculture/heatmap_using_neighboring_data_points +Creates heatmap using the neighbors by performing spatial interpolation operations. It utilizes soil information collected at optimal sensor/sample locations and downloaded sentinel satellite imagery. The optimal location of nutrient samples are identified using workflow . The quantity of samples defines the accuracy of the heatmap generation. During the research performed testing on a 100 acre farm using sample count of approximately 20, 80, 130, 600. The research concluded that a sample count of 20 provided decent results, also accuracy of nutrient information improved with increase in sample count. + +```{mermaid} + graph TD + inp1>input_raster] + inp2>input_samples] + inp3>input_sample_clusters] + out1>result] + tsk1{{download_samples}} + tsk2{{download_sample_clusters}} + tsk3{{soil_sample_heatmap}} + tsk1{{download_samples}} -- geometry/samples --> tsk3{{soil_sample_heatmap}} + tsk2{{download_sample_clusters}} -- geometry/samples_boundary --> tsk3{{soil_sample_heatmap}} + inp1>input_raster] -- raster --> tsk3{{soil_sample_heatmap}} + inp2>input_samples] -- user_input --> tsk1{{download_samples}} + inp3>input_sample_clusters] -- user_input --> tsk2{{download_sample_clusters}} + tsk3{{soil_sample_heatmap}} -- result --> out1>result] +``` + +## Sources + +- **input_raster**: Sentinel-2 raster. + +- **input_samples**: Sensor samples with nutrient information. + +- **input_sample_clusters**: Clusters boundaries of sensor samples locations. + +## Sinks + +- **result**: Zip file containing heatmap output as shape files. + +## Parameters + +- **attribute_name**: Nutrient property name in sensor samples geojson file. For example: CARBON (C), Nitrogen (N), Phosphorus (P) etc., + +- **simplify**: Replace small polygons in input with value of their largest neighbor after converting from raster to vector. Accepts 'simplify' or 'convex' or 'none'. + +- **tolerance**: All parts of a [simplified geometry](https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.simplify.html) will be no more than tolerance distance from the original. It has the same units as the coordinate reference system of the GeoSeries. For example, using tolerance=100 in a projected CRS with meters as units means a distance of 100 meters in reality. + +- **algorithm**: Algorithm used to identify nearest neighbors. Accepts 'cluster overlap' or 'nearest neighbor' or 'kriging neighbor'. + +- **resolution**: Defines the output resolution as the ratio of input raster resolution. For example, if resolution is 5, the output heatmap is 5 times coarser than input raster. + +- **bins**: it defines the number of equal-width bins in the given range.Refer to this article to learn more about bins https://numpy.org/doc/stable/reference/generated/numpy.histogram.html + +## Tasks + +- **download_samples**: Adds user geometries into the cluster storage, allowing for them to be used on workflows. + +- **download_sample_clusters**: Adds user geometries into the cluster storage, allowing for them to be used on workflows. + +- **soil_sample_heatmap**: Generate heatmap for nutrients using satellite or spaceEye imagery. + +## Workflow Yaml + ```yaml name: heatmap_using_neighboring_data_points @@ -75,21 +130,4 @@ description: article to learn more about bins https://numpy.org/doc/stable/reference/generated/numpy.histogram.html -``` - -```{mermaid} - graph TD - inp1>input_raster] - inp2>input_samples] - inp3>input_sample_clusters] - out1>result] - tsk1{{download_samples}} - tsk2{{download_sample_clusters}} - tsk3{{soil_sample_heatmap}} - tsk1{{download_samples}} -- geometry/samples --> tsk3{{soil_sample_heatmap}} - tsk2{{download_sample_clusters}} -- geometry/samples_boundary --> tsk3{{soil_sample_heatmap}} - inp1>input_raster] -- raster --> tsk3{{soil_sample_heatmap}} - inp2>input_samples] -- user_input --> tsk1{{download_samples}} - inp3>input_sample_clusters] -- user_input --> tsk2{{download_sample_clusters}} - tsk3{{soil_sample_heatmap}} -- result --> out1>result] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/methane_index.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/methane_index.md index c6eaa654..48b02fe4 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/methane_index.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/methane_index.md @@ -1,5 +1,51 @@ # farm_ai/agriculture/methane_index +Computes methane index from ultra emitters for a region and date range. The workflow retrieves the relevant Sentinel-2 products with Planetary Computer (PC) API and crop the rasters for the region defined in user_input. All bands are normalized and an anti-aliasing guassian filter is applied to smooth and remove potential artifacts. An unsupervised K-Nearest Neighbor is applied to identify bands similar to band 12, and the index is computed by the difference between band 12 to the pixel-wise median of top K similar bands. + +```{mermaid} + graph TD + inp1>user_input] + out1>index] + out2>s2_raster] + out3>cloud_mask] + tsk1{{s2}} + tsk2{{clip}} + tsk3{{methane}} + tsk1{{s2}} -- raster --> tsk2{{clip}} + tsk2{{clip}} -- clipped_raster/raster --> tsk3{{methane}} + inp1>user_input] -- user_input --> tsk1{{s2}} + inp1>user_input] -- input_geometry --> tsk2{{clip}} + tsk3{{methane}} -- index_raster --> out1>index] + tsk1{{s2}} -- raster --> out2>s2_raster] + tsk1{{s2}} -- mask --> out3>cloud_mask] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **index**: Methane index raster. + +- **s2_raster**: Sentinel-2 raster. + +- **cloud_mask**: Cloud mask. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **s2**: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and computes improved cloud masks using cloud and shadow segmentation models. + +- **clip**: Performs a soft clip on an input raster based on a provided reference geometry. + +- **methane**: Computes an index from the bands of an input raster. + +## Workflow Yaml + ```yaml name: methane_index @@ -50,22 +96,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>index] - out2>s2_raster] - out3>cloud_mask] - tsk1{{s2}} - tsk2{{clip}} - tsk3{{methane}} - tsk1{{s2}} -- raster --> tsk2{{clip}} - tsk2{{clip}} -- clipped_raster/raster --> tsk3{{methane}} - inp1>user_input] -- user_input --> tsk1{{s2}} - inp1>user_input] -- input_geometry --> tsk2{{clip}} - tsk3{{methane}} -- index_raster --> out1>index] - tsk1{{s2}} -- raster --> out2>s2_raster] - tsk1{{s2}} -- mask --> out3>cloud_mask] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/ndvi_summary.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/ndvi_summary.md index c7307156..c97aabf3 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/ndvi_summary.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/ndvi_summary.md @@ -1,5 +1,44 @@ # farm_ai/agriculture/ndvi_summary +Calculates NDVI statistics (mean, standard deviation, maximum and minimum) for the input geometry and time range. The workflow retrieves the relevant Sentinel-2 products with Planetary Computer (PC) API, forwards them to a cloud detection model and combines the predicted cloud mask to the mask obtained from the product. The workflow computes the NDVI for each available tile and date, summarizing each with the mean, standard deviation, maximum and minimum values for the regions not obscured by clouds. Finally, it outputs a timeseries with such statistics for all available dates, ignoring heavily-clouded tiles. + +```{mermaid} + graph TD + inp1>user_input] + out1>timeseries] + tsk1{{s2}} + tsk2{{compute_ndvi}} + tsk3{{summary_timeseries}} + tsk1{{s2}} -- raster --> tsk2{{compute_ndvi}} + tsk2{{compute_ndvi}} -- index_raster/raster --> tsk3{{summary_timeseries}} + tsk1{{s2}} -- mask --> tsk3{{summary_timeseries}} + inp1>user_input] -- user_input --> tsk1{{s2}} + inp1>user_input] -- input_geometry --> tsk3{{summary_timeseries}} + tsk3{{summary_timeseries}} -- timeseries --> out1>timeseries] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **timeseries**: Aggregated NDVI statistics of the retrieved tiles within the input geometry and time range. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **s2**: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and computes improved cloud masks using cloud and shadow segmentation models. + +- **compute_ndvi**: Computes an index from the bands of an input raster. + +- **summary_timeseries**: Computes the mean, standard deviation, maximum, and minimum values of all regions of the raster considered by the mask and aggregates them into a timeseries. + +## Workflow Yaml + ```yaml name: ndvi_summary @@ -50,19 +89,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>timeseries] - tsk1{{s2}} - tsk2{{compute_ndvi}} - tsk3{{summary_timeseries}} - tsk1{{s2}} -- raster --> tsk2{{compute_ndvi}} - tsk2{{compute_ndvi}} -- index_raster/raster --> tsk3{{summary_timeseries}} - tsk1{{s2}} -- mask --> tsk3{{summary_timeseries}} - inp1>user_input] -- user_input --> tsk1{{s2}} - inp1>user_input] -- input_geometry --> tsk3{{summary_timeseries}} - tsk3{{summary_timeseries}} -- timeseries --> out1>timeseries] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/weed_detection.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/weed_detection.md index 086f04da..57c16c24 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/weed_detection.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/agriculture/weed_detection.md @@ -1,5 +1,54 @@ # farm_ai/agriculture/weed_detection +Generates shape files for similarly colored regions in the input raster. The workflow retrieves a remote raster and trains a Gaussian Mixture Model (GMM) over a subset of the input data with a fixed number of components. The GMM is then used to cluster all images pixels. Clustered regions are converted to polygons with a minimum size threshold. These polygons are then simplified to smooth their borders. All polygons of a given cluster are written to a single shapefile. All files are then compressed and returned as a single zip archive. + +```{mermaid} + graph TD + inp1>user_input] + out1>result] + tsk1{{download_raster}} + tsk2{{weed_detection}} + tsk1{{download_raster}} -- raster --> tsk2{{weed_detection}} + inp1>user_input] -- user_input --> tsk1{{download_raster}} + tsk2{{weed_detection}} -- result --> out1>result] +``` + +## Sources + +- **user_input**: External references to raster data. + +## Sinks + +- **result**: Zip file containing cluster geometries. + +## Parameters + +- **buffer**: Buffer size, in projected CRS, to apply to the input geometry before sampling training points. A negative number can be used to avoid sampling unwanted regions if the geometry is not very precise. + +- **no_data**: Value to use as nodata when reading the raster. Uses the raster's internal nodata value if not provided. + +- **clusters**: Number of clusters to use when segmenting the image. + +- **sieve_size**: Area of the minimum connected region. Smaller regions will have their class assigned to the largest adjancent region. + +- **simplify**: Method used to simplify the geometries. Accepts 'none', for no simplification, 'simplify', for tolerance-based simplification, and 'convex', for returning the convex hull. + +- **tolerance**: Tolerance for simplifcation algorithm. Only applicable if simplification method is 'simplify'. + +- **samples**: Number os samples to use during training. + +- **bands**: List of band indices to use during training and inference. + +- **alpha_index**: Positive index of alpha band, if used to filter out nodata values. + +## Tasks + +- **download_raster**: Adds user rasters into the cluster storage, allowing for them to be used on workflows. + +- **weed_detection**: Trains a Gaussian Mixture Model (GMM), cluster all images pixels, and convert clustered regions into polygons. + +## Workflow Yaml + ```yaml name: weed_detection @@ -69,15 +118,4 @@ description: alpha_index: Positive index of alpha band, if used to filter out nodata values. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>result] - tsk1{{download_raster}} - tsk2{{weed_detection}} - tsk1{{download_raster}} -- raster --> tsk2{{weed_detection}} - inp1>user_input] -- user_input --> tsk1{{download_raster}} - tsk2{{weed_detection}} -- result --> out1>result] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/carbon_local/admag_carbon_integration.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/carbon_local/admag_carbon_integration.md index 593c499e..ca7dddcb 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/carbon_local/admag_carbon_integration.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/carbon_local/admag_carbon_integration.md @@ -1,5 +1,58 @@ # farm_ai/carbon_local/admag_carbon_integration +Computes the offset amount of carbon that would be sequestered in a seasonal field using Microsoft Azure Data Manager for Agriculture (ADMAg) data. Derives carbon sequestration information. Microsoft Azure Data Manager for Agriculture (ADMAg) and the COMET-Farm API are used to obtain farming data and evaluate carbon offset. ADMAg is capable of describing important farming activities such as fertilization, tillage, and organic amendments applications, all of which are represented in the data manager. FarmVibes.AI retrieves this information from the data manager and builds SeasonalFieldInformation FarmVibes.AI objects. These objects are then used to call the COMET-Farm API and evaluate Carbon Offset Information. + +```{mermaid} + graph TD + inp1>baseline_admag_input] + inp2>scenario_admag_input] + out1>carbon_output] + tsk1{{baseline_seasonal_field_list}} + tsk2{{scenario_seasonal_field_list}} + tsk3{{admag_carbon}} + tsk1{{baseline_seasonal_field_list}} -- seasonal_field/baseline_seasonal_fields --> tsk3{{admag_carbon}} + tsk2{{scenario_seasonal_field_list}} -- seasonal_field/scenario_seasonal_fields --> tsk3{{admag_carbon}} + inp1>baseline_admag_input] -- admag_input --> tsk1{{baseline_seasonal_field_list}} + inp2>scenario_admag_input] -- admag_input --> tsk2{{scenario_seasonal_field_list}} + tsk3{{admag_carbon}} -- carbon_output --> out1>carbon_output] +``` + +## Sources + +- **baseline_admag_input**: List of ADMAgSeasonalFieldInput to retrieve SeasonalFieldInformation objects for baseline COMET-Farm API Carbon offset evaluation. + +- **scenario_admag_input**: List of ADMAgSeasonalFieldInput to retrieve SeasonalFieldInformation objects for scenarios COMET-Farm API Carbon offset evaluation. + +## Sinks + +- **carbon_output**: Carbon sequestration received for scenario information provided as input. + +## Parameters + +- **base_url**: Azure Data Manager for Agriculture host. Please visit https://aka.ms/farmvibesDMA to check how to get these credentials. + +- **client_id**: Azure Data Manager for Agriculture client id. Please visit https://aka.ms/farmvibesDMA to check how to get these credentials. + +- **client_secret**: Azure Data Manager for Agriculture client secret. Please visit https://aka.ms/farmvibesDMA to check how to get these credentials. + +- **authority**: Azure Data Manager for Agriculture authority. Please visit https://aka.ms/farmvibesDMA to check how to get these credentials. + +- **default_scope**: Azure Data Manager for Agriculture default scope. Please visit https://aka.ms/farmvibesDMA to check how to get these credentials. + +- **comet_support_email**: Comet support email. The email used to register for a COMET account. The requests are forwarded to comet with this email reference. This email is used by comet to share the information back to you for failed requests. + +- **ngrok_token**: NGROK session token. A token that FarmVibes uses to create a web_hook url that is shared with Comet in a request when running the workflow. Comet can use this link to send back a response to FarmVibes. NGROK is a service that creates temporary urls for local servers. To use NGROK, FarmVibes needs to get a token from this website, https://dashboard.ngrok.com/. + +## Tasks + +- **baseline_seasonal_field_list**: Generates SeasonalFieldInformation using ADMAg (Microsoft Azure Data Manager for Agriculture). + +- **scenario_seasonal_field_list**: Generates SeasonalFieldInformation using ADMAg (Microsoft Azure Data Manager for Agriculture). + +- **admag_carbon**: Computes the offset amount of carbon that would be sequestered in a seasonal field using the baseline (historical) and scenario (time range interested in) information. + +## Workflow Yaml + ```yaml name: admag_carbon_integration @@ -88,19 +141,4 @@ description: https://aka.ms/farmvibesDMA to check how to get these credentials. -``` - -```{mermaid} - graph TD - inp1>baseline_admag_input] - inp2>scenario_admag_input] - out1>carbon_output] - tsk1{{baseline_seasonal_field_list}} - tsk2{{scenario_seasonal_field_list}} - tsk3{{admag_carbon}} - tsk1{{baseline_seasonal_field_list}} -- seasonal_field/baseline_seasonal_fields --> tsk3{{admag_carbon}} - tsk2{{scenario_seasonal_field_list}} -- seasonal_field/scenario_seasonal_fields --> tsk3{{admag_carbon}} - inp1>baseline_admag_input] -- admag_input --> tsk1{{baseline_seasonal_field_list}} - inp2>scenario_admag_input] -- admag_input --> tsk2{{scenario_seasonal_field_list}} - tsk3{{admag_carbon}} -- carbon_output --> out1>carbon_output] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/carbon_local/carbon_whatif.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/carbon_local/carbon_whatif.md index b1c3b082..67e43dc4 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/carbon_local/carbon_whatif.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/carbon_local/carbon_whatif.md @@ -1,5 +1,40 @@ # farm_ai/carbon_local/carbon_whatif +Computes the offset amount of carbon that would be sequestered in a seasonal field using the baseline (historical) and scenario (time range interested in) information. To derive amount of carbon, it relies on seasonal information information provided for both baseline and scenario. The baseline represents historical information of farm practices used during each season that includes fertilizers, tillage, harvest and organic amendment. Minimum 2 years of baseline information required to execute the workflow. The scenario represents future farm practices planning to do during each season that includes fertilizers, tillage, harvest and organic amendment. For the scenario information provided, the workflow compute the offset amount of carbon that would be sequestrated in a seasonal field. Minimum 2years of baseline information required to execute the workflow. The requests received by workflow are forwarded to comet api. To know more information of comet refer to https://gitlab.com/comet-api/api-docs/-/tree/master/. To understand the enumerations and information accepted by comet refer to https://gitlab.com/comet-api/api-docs/-/blob/master/COMET-Farm_API_File_Specification.xlsx The request submitted get executed with in 5 minutes to max 2 hours. If response not received from comet within this time period, check comet_support_email for information on failed requests, if no emails received check status of requests by contacting to this support email address of comet "appnrel@colostate.edu". For public use comet limits 50 requests each day. If more requests need to send contact support email address. + +```{mermaid} + graph TD + inp1>baseline_seasonal_fields] + inp2>scenario_seasonal_fields] + out1>carbon_output] + tsk1{{comet_task}} + inp1>baseline_seasonal_fields] -- baseline_seasonal_fields --> tsk1{{comet_task}} + inp2>scenario_seasonal_fields] -- scenario_seasonal_fields --> tsk1{{comet_task}} + tsk1{{comet_task}} -- carbon_output --> out1>carbon_output] +``` + +## Sources + +- **baseline_seasonal_fields**: List of seasonal fields that holds the historical information of farm practices such as fertilizers, tillage, harvest and organic amendment. + +- **scenario_seasonal_fields**: List of seasonal fields that holds the future information of farm practices such as fertilizers, tillage, harvest and organic amendment. + +## Sinks + +- **carbon_output**: Carbon sequestration received for scenario information provided as input. + +## Parameters + +- **comet_support_email**: COMET-Farm API Registered email. The requests are forwarded to comet with this email reference. This email used by comet to share the information back to you for failed requests. + +- **ngrok_token**: NGROK session token. FarmVibes generate web_hook url and shared url with comet along the request to receive the response from comet. It's publicly accessible url and it's unique for each session. The url gets destroyed once the session ends. To start the ngrok session a token, it is generated from this url https://dashboard.ngrok.com/ + +## Tasks + +- **comet_task**: Computes the offset amount of carbon that would be sequestered in a seasonal field using the baseline (historical) and scenario (time range interested in) information. + +## Workflow Yaml + ```yaml name: carbon_whatif @@ -61,15 +96,4 @@ description: url https://dashboard.ngrok.com/ -``` - -```{mermaid} - graph TD - inp1>baseline_seasonal_fields] - inp2>scenario_seasonal_fields] - out1>carbon_output] - tsk1{{comet_task}} - inp1>baseline_seasonal_fields] -- baseline_seasonal_fields --> tsk1{{comet_task}} - inp2>scenario_seasonal_fields] -- scenario_seasonal_fields --> tsk1{{comet_task}} - tsk1{{comet_task}} -- carbon_output --> out1>carbon_output] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_cover_mapping/conservation_practices.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_cover_mapping/conservation_practices.md index 07ed936b..7d65b7c5 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_cover_mapping/conservation_practices.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_cover_mapping/conservation_practices.md @@ -1,5 +1,88 @@ # farm_ai/land_cover_mapping/conservation_practices +Identifies conservation practices (terraces and grassed waterways) using elevation data. The workflow classifies pixels in terraces or grassed waterways. It starts downloading NAIP and USGS 3DEP tiles. Then, it computes the elevation gradient using a Sobel filter. And it computes local clusters using an overlap clustering method. Then, it combines cluster and elevation tiles to compute the average elevation per cluster. Finally, it uses a CNN model to classify pixels in either terraces or grassed waterways. + +```{mermaid} + graph TD + inp1>user_input] + out1>dem_raster] + out2>naip_raster] + out3>dem_gradient] + out4>cluster] + out5>average_elevation] + out6>practices] + tsk1{{naip}} + tsk2{{cluster}} + tsk3{{dem}} + tsk4{{gradient}} + tsk5{{match_grad}} + tsk6{{match_elev}} + tsk7{{avg_elev}} + tsk8{{practice}} + tsk1{{naip}} -- raster/user_input --> tsk3{{dem}} + tsk1{{naip}} -- raster/input_raster --> tsk2{{cluster}} + tsk1{{naip}} -- raster/ref_rasters --> tsk6{{match_elev}} + tsk1{{naip}} -- raster/ref_rasters --> tsk5{{match_grad}} + tsk3{{dem}} -- raster --> tsk4{{gradient}} + tsk3{{dem}} -- raster/rasters --> tsk6{{match_elev}} + tsk4{{gradient}} -- gradient/rasters --> tsk5{{match_grad}} + tsk2{{cluster}} -- output_raster/input_cluster_raster --> tsk7{{avg_elev}} + tsk6{{match_elev}} -- match_rasters/input_dem_raster --> tsk7{{avg_elev}} + tsk7{{avg_elev}} -- output_raster/average_elevation --> tsk8{{practice}} + tsk5{{match_grad}} -- match_rasters/elevation_gradient --> tsk8{{practice}} + inp1>user_input] -- user_input --> tsk1{{naip}} + tsk3{{dem}} -- raster --> out1>dem_raster] + tsk1{{naip}} -- raster --> out2>naip_raster] + tsk4{{gradient}} -- gradient --> out3>dem_gradient] + tsk2{{cluster}} -- output_raster --> out4>cluster] + tsk7{{avg_elev}} -- output_raster --> out5>average_elevation] + tsk8{{practice}} -- output_raster --> out6>practices] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **dem_raster**: USGS 3DEP tiles that overlap the NAIP tiles that overlap the area of interest. + +- **naip_raster**: NAIP tiles that overlap the area of interest. + +- **dem_gradient**: A copy of the USGS 3DEP tiles where the pixel values are the gradient computed using the Sobel filter. + +- **cluster**: A copy of the NAIP tiles with one band representing the output of the overlap clustering method. Each pixel has a value between one and four. + +- **average_elevation**: A combination of the dem_gradient and cluster sinks, where each pixel value is the average elevation of all pixels that fall in the same cluster. + +- **practices**: A copy of the NAIP tile with one band where each pixel value refers to a conservation practice (0 = none, 1 = terraces, 2 = grassed waterways). + +## Parameters + +- **clustering_iterations**: The number of iterations used in the overlap clustering method. + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **naip**: Downloads NAIP tiles that intersect with the input geometry and time range. + +- **cluster**: Computes local clusters using an overlap clustering method. + +- **dem**: Downloads digital elevation map tiles that intersect with the input geometry and time range. + +- **gradient**: Computes the gradient of each band of the input raster with a Sobel operator. + +- **match_grad**: Resamples input rasters to the reference rasters' grid. + +- **match_elev**: Resamples input rasters to the reference rasters' grid. + +- **avg_elev**: Computes average elevation per-class in overlapping windows, combining cluster and elevation tiles. + +- **practice**: Classifies pixels in either terraces or grassed waterways using a CNN model. + +## Workflow Yaml + ```yaml name: conservation_practices @@ -95,41 +178,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>dem_raster] - out2>naip_raster] - out3>dem_gradient] - out4>cluster] - out5>average_elevation] - out6>practices] - tsk1{{naip}} - tsk2{{cluster}} - tsk3{{dem}} - tsk4{{gradient}} - tsk5{{match_grad}} - tsk6{{match_elev}} - tsk7{{avg_elev}} - tsk8{{practice}} - tsk1{{naip}} -- raster/user_input --> tsk3{{dem}} - tsk1{{naip}} -- raster/input_raster --> tsk2{{cluster}} - tsk1{{naip}} -- raster/ref_rasters --> tsk6{{match_elev}} - tsk1{{naip}} -- raster/ref_rasters --> tsk5{{match_grad}} - tsk3{{dem}} -- raster --> tsk4{{gradient}} - tsk3{{dem}} -- raster/rasters --> tsk6{{match_elev}} - tsk4{{gradient}} -- gradient/rasters --> tsk5{{match_grad}} - tsk2{{cluster}} -- output_raster/input_cluster_raster --> tsk7{{avg_elev}} - tsk6{{match_elev}} -- match_rasters/input_dem_raster --> tsk7{{avg_elev}} - tsk7{{avg_elev}} -- output_raster/average_elevation --> tsk8{{practice}} - tsk5{{match_grad}} -- match_rasters/elevation_gradient --> tsk8{{practice}} - inp1>user_input] -- user_input --> tsk1{{naip}} - tsk3{{dem}} -- raster --> out1>dem_raster] - tsk1{{naip}} -- raster --> out2>naip_raster] - tsk4{{gradient}} -- gradient --> out3>dem_gradient] - tsk2{{cluster}} -- output_raster --> out4>cluster] - tsk7{{avg_elev}} -- output_raster --> out5>average_elevation] - tsk8{{practice}} -- output_raster --> out6>practices] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_degradation/landsat_ndvi_trend.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_degradation/landsat_ndvi_trend.md index 8836bf98..8d1dcb46 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_degradation/landsat_ndvi_trend.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_degradation/landsat_ndvi_trend.md @@ -1,5 +1,42 @@ # farm_ai/land_degradation/landsat_ndvi_trend +Estimates a linear trend over NDVI computer over LANDSAT tiles that intersect with the input geometry and time range. The workflow downloads LANDSAT data, compute NDVI over them, and estimate a linear trend over chunks of data, combining them into a final trend raster. + +```{mermaid} + graph TD + inp1>user_input] + out1>ndvi] + out2>linear_trend] + tsk1{{landsat}} + tsk2{{trend}} + tsk1{{landsat}} -- raster --> tsk2{{trend}} + inp1>user_input] -- user_input --> tsk1{{landsat}} + tsk2{{trend}} -- ndvi_raster --> out1>ndvi] + tsk2{{trend}} -- linear_trend --> out2>linear_trend] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **ndvi**: NDVI rasters. + +- **linear_trend**: Raster with the trend and the test statistics. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **landsat**: Downloads and preprocesses LANDSAT tiles that intersect with the input geometry and time range. + +- **trend**: Computes the pixel-wise NDVI linear trend over the input raster. + +## Workflow Yaml + ```yaml name: landsat_ndvi_trend @@ -37,17 +74,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>ndvi] - out2>linear_trend] - tsk1{{landsat}} - tsk2{{trend}} - tsk1{{landsat}} -- raster --> tsk2{{trend}} - inp1>user_input] -- user_input --> tsk1{{landsat}} - tsk2{{trend}} -- ndvi_raster --> out1>ndvi] - tsk2{{trend}} -- linear_trend --> out2>linear_trend] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_degradation/ndvi_linear_trend.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_degradation/ndvi_linear_trend.md index b320cf82..85f3761a 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_degradation/ndvi_linear_trend.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/land_degradation/ndvi_linear_trend.md @@ -1,5 +1,38 @@ # farm_ai/land_degradation/ndvi_linear_trend +Computes the pixel-wise NDVI linear trend over the input raster. The workflow computes the NDVI from the input raster, calculates the linear trend over chunks of data, combining them into the final raster. + +```{mermaid} + graph TD + inp1>raster] + out1>ndvi_raster] + out2>linear_trend] + tsk1{{ndvi}} + tsk2{{chunked_linear_trend}} + tsk1{{ndvi}} -- index_raster/input_rasters --> tsk2{{chunked_linear_trend}} + inp1>raster] -- raster --> tsk1{{ndvi}} + tsk1{{ndvi}} -- index_raster --> out1>ndvi_raster] + tsk2{{chunked_linear_trend}} -- linear_trend_raster --> out2>linear_trend] +``` + +## Sources + +- **raster**: Input raster. + +## Sinks + +- **ndvi_raster**: NDVI raster. + +- **linear_trend**: Raster with the trend and the test statistics. + +## Tasks + +- **ndvi**: Computes an index from the bands of an input raster. + +- **chunked_linear_trend**: Computes the pixel-wise linear trend of a list of rasters (e.g. NDVI). + +## Workflow Yaml + ```yaml name: ndvi_linear_trend @@ -34,17 +67,4 @@ description: linear_trend: Raster with the trend and the test statistics. -``` - -```{mermaid} - graph TD - inp1>raster] - out1>ndvi_raster] - out2>linear_trend] - tsk1{{ndvi}} - tsk2{{chunked_linear_trend}} - tsk1{{ndvi}} -- index_raster/input_rasters --> tsk2{{chunked_linear_trend}} - inp1>raster] -- raster --> tsk1{{ndvi}} - tsk1{{ndvi}} -- index_raster --> out1>ndvi_raster] - tsk2{{chunked_linear_trend}} -- linear_trend_raster --> out2>linear_trend] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/segmentation/segment_basemap.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/segmentation/segment_basemap.md index 513eea50..38950101 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/segmentation/segment_basemap.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/segmentation/segment_basemap.md @@ -1,17 +1,65 @@ # farm_ai/segmentation/segment_basemap +Downloads basemap with BingMaps API and runs Segment Anything Model (SAM) over them with points and/or bounding boxes as prompts. The workflow lists and downloads basemaps tiles with BingMaps API, and merges them into a single raster. The raster is then split into chips of 1024x1024 pixels with an overlap defined by `spatial_overlap`. Chips intersecting with prompts are processed by SAM's image encoder, followed by prompt encoder and mask decoder. Before running the workflow, make sure the model has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. The script will download the desired model weights from SAM repository, export the image encoder and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the [FarmVibes.AI troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) page in the documentation. + +```{mermaid} + graph TD + inp1>user_input] + inp2>prompts] + out1>basemap] + out2>segmentation_mask] + tsk1{{basemap_download}} + tsk2{{basemap_segmentation}} + tsk1{{basemap_download}} -- merged_basemap/input_raster --> tsk2{{basemap_segmentation}} + inp1>user_input] -- input_geometry --> tsk1{{basemap_download}} + inp1>user_input] -- input_geometry --> tsk2{{basemap_segmentation}} + inp2>prompts] -- input_prompts --> tsk2{{basemap_segmentation}} + tsk1{{basemap_download}} -- merged_basemap --> out1>basemap] + tsk2{{basemap_segmentation}} -- segmentation_mask --> out2>segmentation_mask] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +- **prompts**: ExternalReferences to the point and/or bounding box prompts. These are GeoJSON with coordinates, label (foreground/background) and prompt id (in case, the raster contains multiple entities that should be segmented in a single workflow run). + +## Sinks + +- **basemap**: Merged basemap used as input to the segmentation. + +- **segmentation_mask**: Output segmentation masks. + +## Parameters + +- **bingmaps_api_key**: Required BingMaps API key. + +- **basemap_zoom_level**: Zoom level of interest, ranging from 0 to 20. For instance, a zoom level of 1 corresponds to a resolution of 78271.52 m/pixel, a zoom level of 10 corresponds to 152.9 m/pixel, and a zoom level of 19 corresponds to 0.3 m/pixel. For more information on zoom levels and their corresponding scale and resolution, please refer to the BingMaps API documentation at https://learn.microsoft.com/en-us/bingmaps/articles/understanding-scale-and-resolution + +- **model_type**: SAM's image encoder backbone architecture, among 'vit_h', 'vit_l', or 'vit_b'. Before running the workflow, make sure the desired model has been exported to the cluster by running `scripts/export_sam_models.py`. For more information, refer to the FarmVibes.AI troubleshooting page in the documentation. + +- **spatial_overlap**: Percentage of spatial overlap between chips in the range of [0.0, 1.0). + +## Tasks + +- **basemap_download**: Downloads Bing Maps basemap tiles and merges them into a single raster. + +- **basemap_segmentation**: Runs Segment Anything Model (SAM) over BingMaps basemap rasters with points and/or bounding boxes as prompts. + +## Workflow Yaml + ```yaml name: segment_basemap sources: user_input: - basemap_download.input_geometry - - sam_inference.input_geometry + - basemap_segmentation.input_geometry prompts: - - ingest_points.user_input + - basemap_segmentation.input_prompts sinks: basemap: basemap_download.merged_basemap - segmentation_mask: sam_inference.segmentation_mask + segmentation_mask: basemap_segmentation.segmentation_mask parameters: bingmaps_api_key: null basemap_zoom_level: 14 @@ -23,21 +71,15 @@ tasks: parameters: api_key: '@from(bingmaps_api_key)' zoom_level: '@from(basemap_zoom_level)' - ingest_points: - workflow: data_ingestion/user_data/ingest_geometry - sam_inference: - op: basemap_prompt_segmentation - op_dir: segment_anything + basemap_segmentation: + workflow: ml/segment_anything/basemap_prompt_segmentation parameters: model_type: '@from(model_type)' spatial_overlap: '@from(spatial_overlap)' edges: - origin: basemap_download.merged_basemap destination: - - sam_inference.input_raster -- origin: ingest_points.geometry - destination: - - sam_inference.input_prompts + - basemap_segmentation.input_raster description: short_description: Downloads basemap with BingMaps API and runs Segment Anything Model (SAM) over them with points and/or bounding boxes as prompts. @@ -62,22 +104,4 @@ description: segmentation_mask: Output segmentation masks. -``` - -```{mermaid} - graph TD - inp1>user_input] - inp2>prompts] - out1>basemap] - out2>segmentation_mask] - tsk1{{basemap_download}} - tsk2{{ingest_points}} - tsk3{{sam_inference}} - tsk1{{basemap_download}} -- merged_basemap/input_raster --> tsk3{{sam_inference}} - tsk2{{ingest_points}} -- geometry/input_prompts --> tsk3{{sam_inference}} - inp1>user_input] -- input_geometry --> tsk1{{basemap_download}} - inp1>user_input] -- input_geometry --> tsk3{{sam_inference}} - inp2>prompts] -- user_input --> tsk2{{ingest_points}} - tsk1{{basemap_download}} -- merged_basemap --> out1>basemap] - tsk3{{sam_inference}} -- segmentation_mask --> out2>segmentation_mask] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/segmentation/segment_s2.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/segmentation/segment_s2.md index a5c8a115..6cac4be5 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/segmentation/segment_s2.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/segmentation/segment_s2.md @@ -1,17 +1,63 @@ # farm_ai/segmentation/segment_s2 +Downloads Sentinel-2 imagery and runs Segment Anything Model (SAM) over them with points and/or bounding boxes as prompts. The workflow retrieves the relevant Sentinel-2 products with the Planetary Computer (PC) API, and splits the input rasters into chips of 1024x1024 pixels with an overlap defined by `spatial_overlap`. Chips intersecting with prompts are processed by SAM's image encoder, followed by prompt encoder and mask decoder. Before running the workflow, make sure the model has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. The script will download the desired model weights from SAM repository, export the image encoder and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the [FarmVibes.AI troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) page in the documentation. + +```{mermaid} + graph TD + inp1>user_input] + inp2>prompts] + out1>s2_raster] + out2>segmentation_mask] + tsk1{{preprocess_s2}} + tsk2{{s2_segmentation}} + tsk1{{preprocess_s2}} -- raster/input_raster --> tsk2{{s2_segmentation}} + inp1>user_input] -- user_input --> tsk1{{preprocess_s2}} + inp1>user_input] -- input_geometry --> tsk2{{s2_segmentation}} + inp2>prompts] -- input_prompts --> tsk2{{s2_segmentation}} + tsk1{{preprocess_s2}} -- raster --> out1>s2_raster] + tsk2{{s2_segmentation}} -- segmentation_mask --> out2>segmentation_mask] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +- **prompts**: ExternalReferences to the point and/or bounding box prompts. These are GeoJSON with coordinates, label (foreground/background) and prompt id (in case, the raster contains multiple entities that should be segmented in a single workflow run). + +## Sinks + +- **s2_raster**: Sentinel-2 rasters used as input for the segmentation. + +- **segmentation_mask**: Output segmentation masks. + +## Parameters + +- **model_type**: SAM's image encoder backbone architecture, among 'vit_h', 'vit_l', or 'vit_b'. Before running the workflow, make sure the desired model has been exported to the cluster by running `scripts/export_sam_models.py`. For more information, refer to the FarmVibes.AI troubleshooting page in the documentation. + +- **spatial_overlap**: Percentage of spatial overlap between chips in the range of [0.0, 1.0). + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **preprocess_s2**: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range. + +- **s2_segmentation**: Runs Segment Anything Model (SAM) over Sentinel-2 rasters with points and/or bounding boxes as prompts. + +## Workflow Yaml + ```yaml name: segment_s2 sources: user_input: - preprocess_s2.user_input - - sam_inference.input_geometry + - s2_segmentation.input_geometry prompts: - - ingest_points.user_input + - s2_segmentation.input_prompts sinks: s2_raster: preprocess_s2.raster - segmentation_mask: sam_inference.segmentation_mask + segmentation_mask: s2_segmentation.segmentation_mask parameters: model_type: vit_b spatial_overlap: 0.5 @@ -21,21 +67,15 @@ tasks: workflow: data_ingestion/sentinel2/preprocess_s2 parameters: pc_key: '@from(pc_key)' - ingest_points: - workflow: data_ingestion/user_data/ingest_geometry - sam_inference: - op: s2_prompt_segmentation - op_dir: segment_anything + s2_segmentation: + workflow: ml/segment_anything/s2_prompt_segmentation parameters: model_type: '@from(model_type)' spatial_overlap: '@from(spatial_overlap)' edges: - origin: preprocess_s2.raster destination: - - sam_inference.input_raster -- origin: ingest_points.geometry - destination: - - sam_inference.input_prompts + - s2_segmentation.input_raster description: short_description: Downloads Sentinel-2 imagery and runs Segment Anything Model (SAM) over them with points and/or bounding boxes as prompts. @@ -60,22 +100,4 @@ description: segmentation_mask: Output segmentation masks. -``` - -```{mermaid} - graph TD - inp1>user_input] - inp2>prompts] - out1>s2_raster] - out2>segmentation_mask] - tsk1{{preprocess_s2}} - tsk2{{ingest_points}} - tsk3{{sam_inference}} - tsk1{{preprocess_s2}} -- raster/input_raster --> tsk3{{sam_inference}} - tsk2{{ingest_points}} -- geometry/input_prompts --> tsk3{{sam_inference}} - inp1>user_input] -- user_input --> tsk1{{preprocess_s2}} - inp1>user_input] -- input_geometry --> tsk3{{sam_inference}} - inp2>prompts] -- user_input --> tsk2{{ingest_points}} - tsk1{{preprocess_s2}} -- raster --> out1>s2_raster] - tsk3{{sam_inference}} -- segmentation_mask --> out2>segmentation_mask] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/sensor/optimal_locations.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/sensor/optimal_locations.md index 0b0ededd..354e86c1 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/sensor/optimal_locations.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/sensor/optimal_locations.md @@ -1,5 +1,63 @@ # farm_ai/sensor/optimal_locations +Identify optimal locations by performing clustering operation using Gaussian Mixture model on computed raster indices. The clustering operation separate computed raster indices values into n groups of equal variance, each group assigned a location and that location is considered as a +optimal locations. The sample locations generated provide information of latitude and longitude. The optimal location can be utilized to install sensors and collect +soil information. The index parameter used as input to run the computed index workflow internally using the input raster submitted. The selection of index parameter varies +based on requirement. The workflow supports all the indices supported by spyndex library (https://github.com/awesome-spectral-indices/awesome-spectral-indices#vegetation). +Below provided various indices that are used to identify optimal locations and generated a nutrients heatmap. +Enhanced Vegetation Index (EVI) - EVI is designed to minimize the influence of soil brightness and atmospheric conditions on vegetation assessment. It is calculated +using the red, blue, and near-infrared (NIR) bands. EVI is particularly useful for monitoring vegetation in regions with high canopy cover and in areas where atmospheric +interference is significant. This indices also used in notebook (notebooks/heatmaps/nutrients_using_neighbors.ipynb) that derive nutrient information for Carbon, Nitrogen, +and Phosphorus. +Photochemical Reflectance Index (PRI) - It is a vegetation index used to assess the light-use efficiency of plants in terms of photosynthesis and their response to +changes in light conditions, particularly variations in the blue and red parts of the electromagnetic spectrum. This index also used in notebook +(notebooks/heatmaps/nutrients_using_neighbors.ipynb) that derive nutrient information for pH. +The number of sample locations generated depend on input parameters submitted. Tune n_clusters and sieve_size parameters to generate more or less location data points. +For a 100 acre farm, +- 20 sample locations are generated using n_clusters=5 and sieve_size=10. +- 30 sample locations are generated using n_clusters=5 and sieve_size=20. +- 80 sample locations are generated using n_clusters=5 and sieve_size=5. +- 130 sample locations are generated using n_clusters=8 and sieve_size=5. + +```{mermaid} + graph TD + inp1>user_input] + inp2>input_raster] + out1>result] + tsk1{{compute_index}} + tsk2{{find_samples}} + tsk1{{compute_index}} -- index_raster/raster --> tsk2{{find_samples}} + inp1>user_input] -- user_input --> tsk2{{find_samples}} + inp2>input_raster] -- raster --> tsk1{{compute_index}} + tsk2{{find_samples}} -- locations --> out1>result] +``` + +## Sources + +- **input_raster**: List of computed raster indices generated using the sentinel 2 satellite imagery. + +- **user_input**: DataVibe with time range information. + +## Sinks + +- **result**: Zip file containing sample locations in a shape file (.shp) format. + +## Parameters + +- **n_clusters**: number of clusters used to generate sample locations. + +- **sieve_size**: Group the nearest neighbor pixel values. + +- **index**: Index used to generate sample locations. + +## Tasks + +- **compute_index**: Computes an index from the bands of an input raster. + +- **find_samples**: Find minimum soil sample locations by grouping indices values that are derived from satellite or spaceEye imagery bands. + +## Workflow Yaml + ```yaml name: optimal_locations @@ -71,17 +129,4 @@ description: index: Index used to generate sample locations. -``` - -```{mermaid} - graph TD - inp1>user_input] - inp2>input_raster] - out1>result] - tsk1{{compute_index}} - tsk2{{find_samples}} - tsk1{{compute_index}} -- index_raster/raster --> tsk2{{find_samples}} - inp1>user_input] -- user_input --> tsk2{{find_samples}} - inp2>input_raster] -- raster --> tsk1{{compute_index}} - tsk2{{find_samples}} -- locations --> out1>result] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/water/irrigation_classification.md b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/water/irrigation_classification.md index db666502..4ff91326 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/farm_ai/water/irrigation_classification.md +++ b/docs/source/docfiles/markdown/workflow_yaml/farm_ai/water/irrigation_classification.md @@ -1,5 +1,131 @@ # farm_ai/water/irrigation_classification +Develops 30m pixel-wise irrigation probability map. The workflow retrieves LANDSAT 8 Surface Reflectance (SR) image tile and land surface elevation DEM data, and runs four ops to compute irrigation probability map. The land surface elevation data source are 10m USGS DEM, or 30m Copernicus DEM; but Copernicus DEM is set as the default source in the workflow. Landsat Op compute_cloud_water_mask utilizes the qa_pixel band of image and NDVI index to generate mask of cloud cover and water bodies. Op compute_evaporative_fraction utilizes NDVI index, land surface temperature (LST), green and near infra-red bands, and DEM data to estimate evaporative flux (ETRF). Op compute_ngi_egi_layers utilizes NDVI index, ETRF estimates, green and near infra-red bands to generate NGI and EGI irrigation layers. Lastly op compute_irrigation_probability uses NGI and EGI layers along with LST band; and applies optimized logistic regression model to compute 30m pixel-wise irrigation probability map. The coeficients and intercept of the model were obtained beforehand using as ground-truth data from Nebraska state, USA for the year 2015. + +```{mermaid} + graph TD + inp1>user_input] + out1>landsat_bands] + out2>ndvi] + out3>cloud_water_mask] + out4>dem] + out5>evaporative_fraction] + out6>ngi] + out7>egi] + out8>lst] + out9>irrigation_probability] + tsk1{{landsat}} + tsk2{{ndvi}} + tsk3{{merge_geom}} + tsk4{{merge_geom_time_range}} + tsk5{{cloud_water_mask}} + tsk6{{dem}} + tsk7{{match_dem}} + tsk8{{evaporative_fraction}} + tsk9{{ngi_egi_layers}} + tsk10{{irrigation_probability}} + tsk1{{landsat}} -- raster/items --> tsk3{{merge_geom}} + tsk1{{landsat}} -- raster --> tsk2{{ndvi}} + tsk1{{landsat}} -- raster/landsat_raster --> tsk5{{cloud_water_mask}} + tsk1{{landsat}} -- raster/ref_rasters --> tsk7{{match_dem}} + tsk1{{landsat}} -- raster/landsat_raster --> tsk8{{evaporative_fraction}} + tsk1{{landsat}} -- raster/landsat_raster --> tsk9{{ngi_egi_layers}} + tsk1{{landsat}} -- raster/landsat_raster --> tsk10{{irrigation_probability}} + tsk2{{ndvi}} -- index/ndvi_raster --> tsk5{{cloud_water_mask}} + tsk2{{ndvi}} -- index/ndvi_raster --> tsk8{{evaporative_fraction}} + tsk2{{ndvi}} -- index/ndvi_raster --> tsk9{{ngi_egi_layers}} + tsk3{{merge_geom}} -- merged/geometry --> tsk4{{merge_geom_time_range}} + tsk4{{merge_geom_time_range}} -- merged/user_input --> tsk6{{dem}} + tsk6{{dem}} -- raster/rasters --> tsk7{{match_dem}} + tsk7{{match_dem}} -- match_rasters/dem_raster --> tsk8{{evaporative_fraction}} + tsk8{{evaporative_fraction}} -- evaporative_fraction --> tsk9{{ngi_egi_layers}} + tsk5{{cloud_water_mask}} -- cloud_water_mask/cloud_water_mask_raster --> tsk8{{evaporative_fraction}} + tsk5{{cloud_water_mask}} -- cloud_water_mask/cloud_water_mask_raster --> tsk9{{ngi_egi_layers}} + tsk5{{cloud_water_mask}} -- cloud_water_mask/cloud_water_mask_raster --> tsk10{{irrigation_probability}} + tsk9{{ngi_egi_layers}} -- ngi --> tsk10{{irrigation_probability}} + tsk9{{ngi_egi_layers}} -- egi --> tsk10{{irrigation_probability}} + tsk9{{ngi_egi_layers}} -- lst --> tsk10{{irrigation_probability}} + inp1>user_input] -- user_input --> tsk1{{landsat}} + inp1>user_input] -- time_range --> tsk4{{merge_geom_time_range}} + tsk1{{landsat}} -- raster --> out1>landsat_bands] + tsk2{{ndvi}} -- index --> out2>ndvi] + tsk5{{cloud_water_mask}} -- cloud_water_mask --> out3>cloud_water_mask] + tsk7{{match_dem}} -- match_rasters --> out4>dem] + tsk8{{evaporative_fraction}} -- evaporative_fraction --> out5>evaporative_fraction] + tsk9{{ngi_egi_layers}} -- ngi --> out6>ngi] + tsk9{{ngi_egi_layers}} -- egi --> out7>egi] + tsk9{{ngi_egi_layers}} -- lst --> out8>lst] + tsk10{{irrigation_probability}} -- irrigation_probability --> out9>irrigation_probability] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **landsat_bands**: Raster of Landsat bands. + +- **ndvi**: NDVI raster. + +- **cloud_water_mask**: Mask of cloud cover and water bodies. + +- **dem**: DEM raster. Options are CopernicusDEM30 and USGS3DEP. + +- **evaporative_fraction**: Raster with estimates of evaporative fraction flux. + +- **ngi**: Raster of NGI irrigation layer. + +- **egi**: Raster of EGI irrigation layer. + +- **lst**: Raster of land surface temperature. + +- **irrigation_probability**: Raster of irrigation probability map in 30m resolution. + +## Parameters + +- **ndvi_threshold**: NDVI index threshold value for masking water bodies. + +- **ndvi_hot_threshold**: Maximum NDVI index threshold value for selecting hot pixel. + +- **coef_ngi**: Coefficient of NGI layer in optimized logistic regression model. + +- **coef_egi**: Coefficient of EGI layer in optimized logistic regression model. + +- **coef_lst**: Coefficient of land surface temperature band in optimized logistic regression model. + +- **intercept**: Intercept value of optimized logistic regression model. + +- **dem_resolution**: Spatial resolution of the DEM. 10m and 30m are available. + +- **dem_provider**: Provider of the DEM. "USGS3DEP" and "CopernicusDEM30" are available. + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **landsat**: Downloads and preprocesses LANDSAT tiles that intersect with the input geometry and time range. + +- **ndvi**: Computes `index` over the input raster. + +- **merge_geom**: Create item with merged geometry from item list. + +- **merge_geom_time_range**: Create item that contains the geometry from one item and the time range from another. + +- **cloud_water_mask**: Merges landsat cloud mask and NDVI-based mask to produce a cloud water mask. + +- **dem**: Downloads digital elevation map tiles that intersect with the input geometry and time range. + +- **match_dem**: Resamples input rasters to the reference rasters' grid. + +- **evaporative_fraction**: Computes evaporative fraction layer based on the percentile values of lst_dem (created by treating land surface temperature with dem) and ndvi layers. The source of constants used is "Senay, G.B.; Bohms, S.; Singh, R.K.; Gowda, P.H.; Velpuri, N.M.; Alemu, H.; Verdin, J.P. Operational Evapotranspiration Mapping Using Remote Sensing and Weather Datasets - A New Parameterization for the SSEB Approach. JAWRA J. Am. Water Resour. Assoc. 2013, 49, 577–591. The land surface elevation data source are 10m USGS DEM, and 30m Copernicus DEM; but Copernicus DEM is set as default source in the workflow. + +- **ngi_egi_layers**: Computes NGI, EGI, and LST layers from landsat bands, ndvi layer, cloud water mask layer and evaporative fraction layer + +- **irrigation_probability**: Computes irrigation probability values for each pixel in raster using optimized logistic regression model with ngi, egi, and lst rasters as input + +## Workflow Yaml + ```yaml name: irrigation_classification @@ -145,60 +271,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>landsat_bands] - out2>ndvi] - out3>cloud_water_mask] - out4>dem] - out5>evaporative_fraction] - out6>ngi] - out7>egi] - out8>lst] - out9>irrigation_probability] - tsk1{{landsat}} - tsk2{{ndvi}} - tsk3{{merge_geom}} - tsk4{{merge_geom_time_range}} - tsk5{{cloud_water_mask}} - tsk6{{dem}} - tsk7{{match_dem}} - tsk8{{evaporative_fraction}} - tsk9{{ngi_egi_layers}} - tsk10{{irrigation_probability}} - tsk1{{landsat}} -- raster/items --> tsk3{{merge_geom}} - tsk1{{landsat}} -- raster --> tsk2{{ndvi}} - tsk1{{landsat}} -- raster/landsat_raster --> tsk5{{cloud_water_mask}} - tsk1{{landsat}} -- raster/ref_rasters --> tsk7{{match_dem}} - tsk1{{landsat}} -- raster/landsat_raster --> tsk8{{evaporative_fraction}} - tsk1{{landsat}} -- raster/landsat_raster --> tsk9{{ngi_egi_layers}} - tsk1{{landsat}} -- raster/landsat_raster --> tsk10{{irrigation_probability}} - tsk2{{ndvi}} -- index/ndvi_raster --> tsk5{{cloud_water_mask}} - tsk2{{ndvi}} -- index/ndvi_raster --> tsk8{{evaporative_fraction}} - tsk2{{ndvi}} -- index/ndvi_raster --> tsk9{{ngi_egi_layers}} - tsk3{{merge_geom}} -- merged/geometry --> tsk4{{merge_geom_time_range}} - tsk4{{merge_geom_time_range}} -- merged/user_input --> tsk6{{dem}} - tsk6{{dem}} -- raster/rasters --> tsk7{{match_dem}} - tsk7{{match_dem}} -- match_rasters/dem_raster --> tsk8{{evaporative_fraction}} - tsk8{{evaporative_fraction}} -- evaporative_fraction --> tsk9{{ngi_egi_layers}} - tsk5{{cloud_water_mask}} -- cloud_water_mask/cloud_water_mask_raster --> tsk8{{evaporative_fraction}} - tsk5{{cloud_water_mask}} -- cloud_water_mask/cloud_water_mask_raster --> tsk9{{ngi_egi_layers}} - tsk5{{cloud_water_mask}} -- cloud_water_mask/cloud_water_mask_raster --> tsk10{{irrigation_probability}} - tsk9{{ngi_egi_layers}} -- ngi --> tsk10{{irrigation_probability}} - tsk9{{ngi_egi_layers}} -- egi --> tsk10{{irrigation_probability}} - tsk9{{ngi_egi_layers}} -- lst --> tsk10{{irrigation_probability}} - inp1>user_input] -- user_input --> tsk1{{landsat}} - inp1>user_input] -- time_range --> tsk4{{merge_geom_time_range}} - tsk1{{landsat}} -- raster --> out1>landsat_bands] - tsk2{{ndvi}} -- index --> out2>ndvi] - tsk5{{cloud_water_mask}} -- cloud_water_mask --> out3>cloud_water_mask] - tsk7{{match_dem}} -- match_rasters --> out4>dem] - tsk8{{evaporative_fraction}} -- evaporative_fraction --> out5>evaporative_fraction] - tsk9{{ngi_egi_layers}} -- ngi --> out6>ngi] - tsk9{{ngi_egi_layers}} -- egi --> out7>egi] - tsk9{{ngi_egi_layers}} -- lst --> out8>lst] - tsk10{{irrigation_probability}} -- irrigation_probability --> out9>irrigation_probability] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/forest_ai/deforestation/alos_trend_detection.md b/docs/source/docfiles/markdown/workflow_yaml/forest_ai/deforestation/alos_trend_detection.md new file mode 100644 index 00000000..56cd8468 --- /dev/null +++ b/docs/source/docfiles/markdown/workflow_yaml/forest_ai/deforestation/alos_trend_detection.md @@ -0,0 +1,134 @@ +# forest_ai/deforestation/alos_trend_detection + +Detects increase/decrease trends in forest pixel levels over the user-input geometry and time range for the ALOS forest map. This workflow combines the alos_forest_extent_download_merge and ordinal_trend_detection workflows to detect increase/decrease trends in the forest pixel levels over the user-provided geometry and time range for the ALOS forest map. The ALOS PALSAR 2.1 Forest/Non-Forest Maps are downloaded in the alos_forest_extent_download_merge workflow. Then the ordinal_trend_detection workflow clips the ordinal raster to the user-provided geometry and time range and determines if there is an increasing or decreasing trend in the forest pixel levels over them. alos_trend_detection uses the Cochran-Armitage test to detect trends in the forest levels over the years. The null hypothesis is that there is no trend in the pixel levels over the list of rasters. The alternative hypothesis is that there is a trend in the forest pixel levels over the list of rasters (one for each year). It returns a p-value and a z-score. If the p-value is less than some significance level, the null hypothesis is rejected and the alternative hypothesis is accepted. If the z-score is positive, the trend is increasing. If the z-score is negative, the trend is decreasing. + +```{mermaid} + graph TD + inp1>user_input] + out1>merged_raster] + out2>categorical_raster] + out3>recoded_raster] + out4>clipped_raster] + out5>trend_test_result] + tsk1{{alos_forest_extent_download_merge}} + tsk2{{ordinal_trend_detection}} + tsk1{{alos_forest_extent_download_merge}} -- merged_raster/raster --> tsk2{{ordinal_trend_detection}} + inp1>user_input] -- user_input --> tsk1{{alos_forest_extent_download_merge}} + inp1>user_input] -- input_geometry --> tsk2{{ordinal_trend_detection}} + tsk1{{alos_forest_extent_download_merge}} -- merged_raster --> out1>merged_raster] + tsk1{{alos_forest_extent_download_merge}} -- categorical_raster --> out2>categorical_raster] + tsk2{{ordinal_trend_detection}} -- recoded_raster --> out3>recoded_raster] + tsk2{{ordinal_trend_detection}} -- clipped_raster --> out4>clipped_raster] + tsk2{{ordinal_trend_detection}} -- trend_test_result --> out5>trend_test_result] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **merged_raster**: Merged raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range. + +- **categorical_raster**: Categorical raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range before the merge operation. + +- **recoded_raster**: Recoded raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range. + +- **clipped_raster**: Clipped ordinal raster for the user-provided geometry and time range. + +- **trend_test_result**: Cochran-armitage test results composed of p-value and z-score. + +## Parameters + +- **pc_key**: Planetary Computer API key. + +- **from_values**: Values to recode from. + +- **to_values**: Values to recode to. + +## Tasks + +- **alos_forest_extent_download_merge**: Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map and merges it into a single raster. + +- **ordinal_trend_detection**: Detects increase/decrease trends in the pixel levels over the user-input geometry and time range. + +## Workflow Yaml + +```yaml + +name: alos_trend_detection +sources: + user_input: + - alos_forest_extent_download_merge.user_input + - ordinal_trend_detection.input_geometry +sinks: + merged_raster: alos_forest_extent_download_merge.merged_raster + categorical_raster: alos_forest_extent_download_merge.categorical_raster + recoded_raster: ordinal_trend_detection.recoded_raster + clipped_raster: ordinal_trend_detection.clipped_raster + trend_test_result: ordinal_trend_detection.trend_test_result +parameters: + pc_key: null + from_values: + - 4 + - 3 + - 0 + - 2 + - 1 + to_values: + - 0 + - 0 + - 0 + - 1 + - 1 +tasks: + alos_forest_extent_download_merge: + workflow: data_ingestion/alos/alos_forest_extent_download_merge + parameters: + pc_key: '@from(pc_key)' + ordinal_trend_detection: + workflow: forest_ai/deforestation/ordinal_trend_detection + parameters: + from_values: '@from(from_values)' + to_values: '@from(to_values)' +edges: +- origin: alos_forest_extent_download_merge.merged_raster + destination: + - ordinal_trend_detection.raster +description: + short_description: Detects increase/decrease trends in forest pixel levels over + the user-input geometry and time range for the ALOS forest map. + long_description: This workflow combines the alos_forest_extent_download_merge and + ordinal_trend_detection workflows to detect increase/decrease trends in the forest + pixel levels over the user-provided geometry and time range for the ALOS forest + map. The ALOS PALSAR 2.1 Forest/Non-Forest Maps are downloaded in the alos_forest_extent_download_merge + workflow. Then the ordinal_trend_detection workflow clips the ordinal raster + to the user-provided geometry and time range and determines if there is an increasing + or decreasing trend in the forest pixel levels over them. alos_trend_detection + uses the Cochran-Armitage test to detect trends in the forest levels over the + years. The null hypothesis is that there is no trend in the pixel levels over + the list of rasters. The alternative hypothesis is that there is a trend in the + forest pixel levels over the list of rasters (one for each year). It returns a + p-value and a z-score. If the p-value is less than some significance level, the + null hypothesis is rejected and the alternative hypothesis is accepted. If the + z-score is positive, the trend is increasing. If the z-score is negative, the + trend is decreasing. + sources: + user_input: Time range and geometry of interest. + sinks: + merged_raster: Merged raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for + the user-provided geometry and time range. + categorical_raster: Categorical raster of the ALOS PALSAR 2.1 Forest/Non-Forest + Map for the user-provided geometry and time range before the merge operation. + recoded_raster: Recoded raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for + the user-provided geometry and time range. + clipped_raster: Clipped ordinal raster for the user-provided geometry and time + range. + trend_test_result: Cochran-armitage test results composed of p-value and z-score. + parameters: + pc_key: Planetary Computer API key. + from_values: Values to recode from. + to_values: Values to recode to. + + +``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/forest_ai/deforestation/ordinal_trend_detection.md b/docs/source/docfiles/markdown/workflow_yaml/forest_ai/deforestation/ordinal_trend_detection.md new file mode 100644 index 00000000..bb347a72 --- /dev/null +++ b/docs/source/docfiles/markdown/workflow_yaml/forest_ai/deforestation/ordinal_trend_detection.md @@ -0,0 +1,123 @@ +# forest_ai/deforestation/ordinal_trend_detection + +Detects increase/decrease trends in the pixel levels over the user-input geometry and time range. This workflow prepares rasters to perform the Cochran-Armitage trend test over a user-provided geometry and time range. Initially, it recodes the input raster according to the 'from_values' and 'to_values' parameters. For example, if the original raster has values (2, 1, 3, 4, 5) and the default values of 'from_values' and 'to_values' are respectively [1, 2, 3, 4, 5] and [6, 7, 8, 9, 10], the recoded raster will have values (7, 6, 8, 9, 10). The workflow then clips the user-provided geometries and computes an ordinal raster. It also counts each unique pixel present in the recoded rasters to create a pixel frequency contingency table. This data is used to determine if there is an increasing or decreasing trend in pixel levels. The Cochran-Armitage test is a non-parametric test used to ascertain this trend. The null hypothesis assumes no trend in pixel levels, while the alternative hypothesis assumes a trend exists. The test returns a p-value and a z-score. If the p-value is less than some significance level, the null hypothesis is rejected in favor of the alternative. A positive z-score indicates an increasing trend, while a negative one indicates a decreasing trend. + +```{mermaid} + graph TD + inp1>raster] + inp2>input_geometry] + out1>recoded_raster] + out2>trend_test_result] + out3>clipped_raster] + tsk1{{recode_raster}} + tsk2{{clip}} + tsk3{{compute_pixel_count}} + tsk4{{trend_test}} + tsk1{{recode_raster}} -- recoded_raster/raster --> tsk2{{clip}} + tsk2{{clip}} -- clipped_raster/raster --> tsk3{{compute_pixel_count}} + tsk3{{compute_pixel_count}} -- pixel_count --> tsk4{{trend_test}} + inp1>raster] -- raster --> tsk1{{recode_raster}} + inp2>input_geometry] -- input_geometry --> tsk2{{clip}} + tsk1{{recode_raster}} -- recoded_raster --> out1>recoded_raster] + tsk4{{trend_test}} -- ordinal_trend_result --> out2>trend_test_result] + tsk2{{clip}} -- clipped_raster --> out3>clipped_raster] +``` + +## Sources + +- **raster**: Raster to be processed and tested for trends. + +- **input_geometry**: Reference geometry. + +## Sinks + +- **recoded_raster**: Recoded raster for the user-provided geometry and time range. + +- **trend_test_result**: Cochran-armitage test results composed of p-value and z-score. + +- **clipped_raster**: Clipped ordinal raster for the user-provided geometry and time range. + +## Parameters + +- **from_values**: List of values to recode from. + +- **to_values**: List of values to recode to. + +## Tasks + +- **recode_raster**: Recodes values of the input raster. + +- **clip**: Performs a soft clip on an input raster based on a provided reference geometry. + +- **compute_pixel_count**: Counts the pixel values in the input raster. + +- **trend_test**: Detects increase/decrease trends over a list of Rasters. + +## Workflow Yaml + +```yaml + +name: ordinal_trend_detection +sources: + raster: + - recode_raster.raster + input_geometry: + - clip.input_geometry +sinks: + recoded_raster: recode_raster.recoded_raster + trend_test_result: trend_test.ordinal_trend_result + clipped_raster: clip.clipped_raster +parameters: + from_values: [] + to_values: [] +tasks: + recode_raster: + op: recode_raster + parameters: + from_values: '@from(from_values)' + to_values: '@from(to_values)' + clip: + workflow: data_processing/clip/clip + compute_pixel_count: + op: compute_pixel_count + trend_test: + op: ordinal_trend_test +edges: +- origin: recode_raster.recoded_raster + destination: + - clip.raster +- origin: clip.clipped_raster + destination: + - compute_pixel_count.raster +- origin: compute_pixel_count.pixel_count + destination: + - trend_test.pixel_count +description: + short_description: Detects increase/decrease trends in the pixel levels over the + user-input geometry and time range. + long_description: This workflow prepares rasters to perform the Cochran-Armitage + trend test over a user-provided geometry and time range. Initially, it recodes + the input raster according to the 'from_values' and 'to_values' parameters. For + example, if the original raster has values (2, 1, 3, 4, 5) and the default values + of 'from_values' and 'to_values' are respectively [1, 2, 3, 4, 5] and [6, 7, 8, + 9, 10], the recoded raster will have values (7, 6, 8, 9, 10). The workflow then + clips the user-provided geometries and computes an ordinal raster. It also counts + each unique pixel present in the recoded rasters to create a pixel frequency contingency + table. This data is used to determine if there is an increasing or decreasing + trend in pixel levels. The Cochran-Armitage test is a non-parametric test used + to ascertain this trend. The null hypothesis assumes no trend in pixel levels, + while the alternative hypothesis assumes a trend exists. The test returns a p-value + and a z-score. If the p-value is less than some significance level, the null hypothesis + is rejected in favor of the alternative. A positive z-score indicates an increasing + trend, while a negative one indicates a decreasing trend. + sources: + raster: Raster to be processed and tested for trends. + input_geometry: Reference geometry. + sinks: + recoded_raster: Recoded raster for the user-provided geometry and time range. + trend_test_result: Cochran-armitage test results composed of p-value and z-score. + clipped_raster: Clipped ordinal raster for the user-provided geometry and time + range. + + +``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/helloworld.md b/docs/source/docfiles/markdown/workflow_yaml/helloworld.md index e8c6f4eb..9dc52e08 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/helloworld.md +++ b/docs/source/docfiles/markdown/workflow_yaml/helloworld.md @@ -1,5 +1,30 @@ # helloworld +Hello world! Small test workflow that generates an image of the Earth with countries that intersect with the input geometry highlighted in orange. + +```{mermaid} + graph TD + inp1>user_input] + out1>raster] + tsk1{{hello}} + inp1>user_input] -- user_input --> tsk1{{hello}} + tsk1{{hello}} -- raster --> out1>raster] +``` + +## Sources + +- **user_input**: Input geometry. + +## Sinks + +- **raster**: Raster with highlighted countries. + +## Tasks + +- **hello**: Test op that generates an image of the Earth with countries that intersect with the input geometry highlighted in orange. + +## Workflow Yaml + ```yaml name: helloworld @@ -21,13 +46,4 @@ description: raster: Raster with highlighted countries. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>raster] - tsk1{{hello}} - inp1>user_input] -- user_input --> tsk1{{hello}} - tsk1{{hello}} -- raster --> out1>raster] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/ml/crop_segmentation.md b/docs/source/docfiles/markdown/workflow_yaml/ml/crop_segmentation.md index 7f6b0232..20f17572 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/ml/crop_segmentation.md +++ b/docs/source/docfiles/markdown/workflow_yaml/ml/crop_segmentation.md @@ -1,5 +1,50 @@ # ml/crop_segmentation +Runs a crop segmentation model based on NDVI from SpaceEye imagery along the year. The workflow generates SpaceEye cloud-free data for the input region and time range and computes NDVI over those. NDVI values sampled regularly along the year are stacked as bands and used as input to the crop segmentation model. + +```{mermaid} + graph TD + inp1>user_input] + out1>segmentation] + tsk1{{spaceeye}} + tsk2{{ndvi}} + tsk3{{group}} + tsk4{{inference}} + tsk1{{spaceeye}} -- raster --> tsk2{{ndvi}} + tsk2{{ndvi}} -- index_raster/rasters --> tsk3{{group}} + tsk3{{group}} -- sequence/input_raster --> tsk4{{inference}} + inp1>user_input] -- user_input --> tsk1{{spaceeye}} + tsk4{{inference}} -- output_raster --> out1>segmentation] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **segmentation**: Crop segmentation map at 10m resolution. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +- **model_file**: Path to the ONNX file containing the model architecture and weights. + +- **model_bands**: Number of NDVI bands to stack as the model input. + +## Tasks + +- **spaceeye**: Runs the SpaceEye cloud removal pipeline using an interpolation-based algorithm, yielding daily cloud-free images for the input geometry and time range. + +- **ndvi**: Computes an index from the bands of an input raster. + +- **group**: Selects "num" entries from a Raster list so that the output sequence has a fixed length. + +- **inference**: Processes a sequence of rasters with an ONNX model. + +## Workflow Yaml + ```yaml name: crop_segmentation @@ -62,19 +107,4 @@ description: model_bands: Number of NDVI bands to stack as the model input. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>segmentation] - tsk1{{spaceeye}} - tsk2{{ndvi}} - tsk3{{group}} - tsk4{{inference}} - tsk1{{spaceeye}} -- raster --> tsk2{{ndvi}} - tsk2{{ndvi}} -- index_raster/rasters --> tsk3{{group}} - tsk3{{group}} -- sequence/input_raster --> tsk4{{inference}} - inp1>user_input] -- user_input --> tsk1{{spaceeye}} - tsk4{{inference}} -- output_raster --> out1>segmentation] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/ml/dataset_generation/datagen_crop_segmentation.md b/docs/source/docfiles/markdown/workflow_yaml/ml/dataset_generation/datagen_crop_segmentation.md index f494620b..7617ebdd 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/ml/dataset_generation/datagen_crop_segmentation.md +++ b/docs/source/docfiles/markdown/workflow_yaml/ml/dataset_generation/datagen_crop_segmentation.md @@ -1,5 +1,46 @@ # ml/dataset_generation/datagen_crop_segmentation +Generates a dataset for crop segmentation, based on NDVI raster and Crop Data Layer (CDL) maps. The workflow generates SpaceEye cloud-free data for the input region and time range and computes NDVI over those. It also downloads CDL maps for the years comprised in the time range. + +```{mermaid} + graph TD + inp1>user_input] + out1>ndvi] + out2>cdl] + tsk1{{spaceeye}} + tsk2{{ndvi}} + tsk3{{cdl}} + tsk1{{spaceeye}} -- raster --> tsk2{{ndvi}} + inp1>user_input] -- user_input --> tsk1{{spaceeye}} + inp1>user_input] -- user_input --> tsk3{{cdl}} + tsk2{{ndvi}} -- index_raster --> out1>ndvi] + tsk3{{cdl}} -- raster --> out2>cdl] +``` + +## Sources + +- **user_input**: Time range and geometry of interest. + +## Sinks + +- **ndvi**: NDVI rasters. + +- **cdl**: CDL map for the years comprised in the input time range. + +## Parameters + +- **pc_key**: Optional Planetary Computer API key. + +## Tasks + +- **spaceeye**: Runs the SpaceEye cloud removal pipeline using an interpolation-based algorithm, yielding daily cloud-free images for the input geometry and time range. + +- **ndvi**: Computes an index from the bands of an input raster. + +- **cdl**: Downloads crop classes maps in the continental USA for the input time range. + +## Workflow Yaml + ```yaml name: datagen_crop_segmentation @@ -42,19 +83,4 @@ description: pc_key: Optional Planetary Computer API key. -``` - -```{mermaid} - graph TD - inp1>user_input] - out1>ndvi] - out2>cdl] - tsk1{{spaceeye}} - tsk2{{ndvi}} - tsk3{{cdl}} - tsk1{{spaceeye}} -- raster --> tsk2{{ndvi}} - inp1>user_input] -- user_input --> tsk1{{spaceeye}} - inp1>user_input] -- user_input --> tsk3{{cdl}} - tsk2{{ndvi}} -- index_raster --> out1>ndvi] - tsk3{{cdl}} -- raster --> out2>cdl] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/ml/driveway_detection.md b/docs/source/docfiles/markdown/workflow_yaml/ml/driveway_detection.md index 4e90413e..d7ee4456 100644 --- a/docs/source/docfiles/markdown/workflow_yaml/ml/driveway_detection.md +++ b/docs/source/docfiles/markdown/workflow_yaml/ml/driveway_detection.md @@ -1,5 +1,60 @@ # ml/driveway_detection +Detects driveways in front of houses. The workflow downloads road geometry from Open Street Maps and segments the front of houses in the input image using a machine learning model. It then uses the input image, segmentation map, road geometry, and input property boundaries to detect the presence of driveways in the front of each house. + +```{mermaid} + graph TD + inp1>input_raster] + inp2>property_boundaries] + out1>properties] + out2>driveways] + tsk1{{segment}} + tsk2{{osm}} + tsk3{{detect}} + tsk1{{segment}} -- segmentation_raster --> tsk3{{detect}} + tsk2{{osm}} -- roads --> tsk3{{detect}} + inp1>input_raster] -- input_raster --> tsk1{{segment}} + inp1>input_raster] -- input_raster --> tsk3{{detect}} + inp1>input_raster] -- user_input --> tsk2{{osm}} + inp2>property_boundaries] -- property_boundaries --> tsk3{{detect}} + tsk3{{detect}} -- properties_with_driveways --> out1>properties] + tsk3{{detect}} -- driveways --> out2>driveways] +``` + +## Sources + +- **input_raster**: Aerial imagery of the region of interest with RBG + NIR bands. + +- **property_boundaries**: Property boundary information for the region of interest. + +## Sinks + +- **properties**: Boundaries of properties that contain a driveway. + +- **driveways**: Regions of each property boundary where a driveway was detected. + +## Parameters + +- **min_region_area**: Minimum contiguous region that will be considered as a potential driveway, in meters. + +- **ndvi_thr**: Only areas under this NDVI threshold will be considered for driveways. + +- **car_size**: Expected size of a car, in pixels, defined as [height, width]. + +- **num_kernels**: Number of rotated kernels to try to fit a car inside a potential driveway region. + +- **car_thr**: Ratio of pixels of a kernel that have to be inside a region in order to consider it a parkable spot. + +## Tasks + +- **segment**: Segments the front of houses in the input raster using a machine learning model. + +- **osm**: Downloads road geometry for input region from Open Street Maps. + +- **detect**: Detects driveways in the front of each house, using the input image, segmentation map, road geometry, and input property boundaries. + +## Workflow Yaml + ```yaml name: driveway_detection @@ -65,23 +120,4 @@ description: to consider it a parkable spot. -``` - -```{mermaid} - graph TD - inp1>input_raster] - inp2>property_boundaries] - out1>properties] - out2>driveways] - tsk1{{segment}} - tsk2{{osm}} - tsk3{{detect}} - tsk1{{segment}} -- segmentation_raster --> tsk3{{detect}} - tsk2{{osm}} -- roads --> tsk3{{detect}} - inp1>input_raster] -- input_raster --> tsk1{{segment}} - inp1>input_raster] -- input_raster --> tsk3{{detect}} - inp1>input_raster] -- user_input --> tsk2{{osm}} - inp2>property_boundaries] -- property_boundaries --> tsk3{{detect}} - tsk3{{detect}} -- properties_with_driveways --> out1>properties] - tsk3{{detect}} -- driveways --> out2>driveways] ``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/ml/segment_anything/basemap_prompt_segmentation.md b/docs/source/docfiles/markdown/workflow_yaml/ml/segment_anything/basemap_prompt_segmentation.md new file mode 100644 index 00000000..67fa8bae --- /dev/null +++ b/docs/source/docfiles/markdown/workflow_yaml/ml/segment_anything/basemap_prompt_segmentation.md @@ -0,0 +1,97 @@ +# ml/segment_anything/basemap_prompt_segmentation + +Runs Segment Anything Model (SAM) over BingMaps basemap rasters with points and/or bounding boxes as prompts. The workflow splits the input BingMaps basemap rasters into chips of 1024x1024 pixels with an overlap defined by `spatial_overlap`. Chips intersecting with prompts are processed by SAM's image encoder, followed by prompt encoder and mask decoder. Before running the workflow, make sure the model has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. The script will download the desired model weights from SAM repository, export the image encoder and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the [FarmVibes.AI troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) page in the documentation. + +```{mermaid} + graph TD + inp1>input_raster] + inp2>input_geometry] + inp3>input_prompts] + out1>segmentation_mask] + tsk1{{ingest_points}} + tsk2{{sam_inference}} + tsk1{{ingest_points}} -- geometry/input_prompts --> tsk2{{sam_inference}} + inp1>input_raster] -- input_raster --> tsk2{{sam_inference}} + inp2>input_geometry] -- input_geometry --> tsk2{{sam_inference}} + inp3>input_prompts] -- user_input --> tsk1{{ingest_points}} + tsk2{{sam_inference}} -- segmentation_mask --> out1>segmentation_mask] +``` + +## Sources + +- **input_geometry**: Geometry of interest within the raster for the segmentation. + +- **input_raster**: BingMaps basemap rasters used as input for the segmentation. + +- **input_prompts**: ExternalReferences to the point and/or bounding box prompts. These are GeoJSON with coordinates, label (foreground/background) and prompt id (in case, the raster contains multiple entities that should be segmented in a single workflow run). + +## Sinks + +- **segmentation_mask**: Output segmentation masks. + +## Parameters + +- **model_type**: SAM's image encoder backbone architecture, among 'vit_h', 'vit_l', or 'vit_b'. Before running the workflow, make sure the desired model has been exported to the cluster by running `scripts/export_sam_models.py`. For more information, refer to the FarmVibes.AI troubleshooting page in the documentation. + +- **spatial_overlap**: Percentage of spatial overlap between chips in the range of [0.0, 1.0). + +## Tasks + +- **ingest_points**: Adds user geometries into the cluster storage, allowing for them to be used on workflows. + +- **sam_inference**: Runs SAM over the input BingMaps basemap raster with points and bounding boxes as prompts. + +## Workflow Yaml + +```yaml + +name: basemap_prompt_segmentation +sources: + input_raster: + - sam_inference.input_raster + input_geometry: + - sam_inference.input_geometry + input_prompts: + - ingest_points.user_input +sinks: + segmentation_mask: sam_inference.segmentation_mask +parameters: + model_type: vit_b + spatial_overlap: 0.5 +tasks: + ingest_points: + workflow: data_ingestion/user_data/ingest_geometry + sam_inference: + op: basemap_prompt_segmentation + op_dir: segment_anything + parameters: + model_type: '@from(model_type)' + spatial_overlap: '@from(spatial_overlap)' +edges: +- origin: ingest_points.geometry + destination: + - sam_inference.input_prompts +description: + short_description: Runs Segment Anything Model (SAM) over BingMaps basemap rasters + with points and/or bounding boxes as prompts. + long_description: The workflow splits the input BingMaps basemap rasters into chips + of 1024x1024 pixels with an overlap defined by `spatial_overlap`. Chips intersecting + with prompts are processed by SAM's image encoder, followed by prompt encoder + and mask decoder. Before running the workflow, make sure the model has been imported + into the cluster by running `scripts/export_prompt_segmentation_models.py`. The + script will download the desired model weights from SAM repository, export the + image encoder and mask decoder to ONNX format, and add them to the cluster. For + more information, refer to the [FarmVibes.AI troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) + page in the documentation. + sources: + input_geometry: Geometry of interest within the raster for the segmentation. + input_raster: BingMaps basemap rasters used as input for the segmentation. + input_prompts: ExternalReferences to the point and/or bounding box prompts. These + are GeoJSON with coordinates, label (foreground/background) and prompt id (in + case, the raster contains multiple entities that should be segmented in a single + workflow run). + sinks: + segmentation_mask: Output segmentation masks. + + +``` \ No newline at end of file diff --git a/docs/source/docfiles/markdown/workflow_yaml/ml/segment_anything/s2_prompt_segmentation.md b/docs/source/docfiles/markdown/workflow_yaml/ml/segment_anything/s2_prompt_segmentation.md new file mode 100644 index 00000000..2428583f --- /dev/null +++ b/docs/source/docfiles/markdown/workflow_yaml/ml/segment_anything/s2_prompt_segmentation.md @@ -0,0 +1,97 @@ +# ml/segment_anything/s2_prompt_segmentation + +Runs Segment Anything Model (SAM) over Sentinel-2 rasters with points and/or bounding boxes as prompts. The workflow splits the input Sentinel-2 rasters into chips of 1024x1024 pixels with an overlap defined by `spatial_overlap`. Chips intersecting with prompts are processed by SAM's image encoder, followed by prompt encoder and mask decoder. Before running the workflow, make sure the model has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. The script will download the desired model weights from SAM repository, export the image encoder and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the [FarmVibes.AI troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) page in the documentation. + +```{mermaid} + graph TD + inp1>input_raster] + inp2>input_geometry] + inp3>input_prompts] + out1>segmentation_mask] + tsk1{{ingest_points}} + tsk2{{sam_inference}} + tsk1{{ingest_points}} -- geometry/input_prompts --> tsk2{{sam_inference}} + inp1>input_raster] -- input_raster --> tsk2{{sam_inference}} + inp2>input_geometry] -- input_geometry --> tsk2{{sam_inference}} + inp3>input_prompts] -- user_input --> tsk1{{ingest_points}} + tsk2{{sam_inference}} -- segmentation_mask --> out1>segmentation_mask] +``` + +## Sources + +- **input_geometry**: Geometry of interest within the raster for the segmentation. + +- **input_raster**: Sentinel-2 rasters used as input for the segmentation. + +- **input_prompts**: ExternalReferences to the point and/or bounding box prompts. These are GeoJSON with coordinates, label (foreground/background) and prompt id (in case, the raster contains multiple entities that should be segmented in a single workflow run). + +## Sinks + +- **segmentation_mask**: Output segmentation masks. + +## Parameters + +- **model_type**: SAM's image encoder backbone architecture, among 'vit_h', 'vit_l', or 'vit_b'. Before running the workflow, make sure the desired model has been exported to the cluster by running `scripts/export_sam_models.py`. For more information, refer to the FarmVibes.AI troubleshooting page in the documentation. + +- **spatial_overlap**: Percentage of spatial overlap between chips in the range of [0.0, 1.0). + +## Tasks + +- **ingest_points**: Adds user geometries into the cluster storage, allowing for them to be used on workflows. + +- **sam_inference**: Runs SAM over the input Sentinel-2 raster with points and bounding boxes as prompts. + +## Workflow Yaml + +```yaml + +name: s2_prompt_segmentation +sources: + input_raster: + - sam_inference.input_raster + input_geometry: + - sam_inference.input_geometry + input_prompts: + - ingest_points.user_input +sinks: + segmentation_mask: sam_inference.segmentation_mask +parameters: + model_type: vit_b + spatial_overlap: 0.5 +tasks: + ingest_points: + workflow: data_ingestion/user_data/ingest_geometry + sam_inference: + op: s2_prompt_segmentation + op_dir: segment_anything + parameters: + model_type: '@from(model_type)' + spatial_overlap: '@from(spatial_overlap)' +edges: +- origin: ingest_points.geometry + destination: + - sam_inference.input_prompts +description: + short_description: Runs Segment Anything Model (SAM) over Sentinel-2 rasters with + points and/or bounding boxes as prompts. + long_description: The workflow splits the input Sentinel-2 rasters into chips of + 1024x1024 pixels with an overlap defined by `spatial_overlap`. Chips intersecting + with prompts are processed by SAM's image encoder, followed by prompt encoder + and mask decoder. Before running the workflow, make sure the model has been imported + into the cluster by running `scripts/export_prompt_segmentation_models.py`. The + script will download the desired model weights from SAM repository, export the + image encoder and mask decoder to ONNX format, and add them to the cluster. For + more information, refer to the [FarmVibes.AI troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) + page in the documentation. + sources: + input_geometry: Geometry of interest within the raster for the segmentation. + input_raster: Sentinel-2 rasters used as input for the segmentation. + input_prompts: ExternalReferences to the point and/or bounding box prompts. These + are GeoJSON with coordinates, label (foreground/background) and prompt id (in + case, the raster contains multiple entities that should be segmented in a single + workflow run). + sinks: + segmentation_mask: Output segmentation masks. + + +``` \ No newline at end of file diff --git a/docs/source/index.md b/docs/source/index.md index 465cc703..e22ffdb8 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -35,6 +35,7 @@ Additionally, the following user guides and links may be helpful: docfiles/markdown/CLIENT docfiles/markdown/WORKFLOWS docfiles/markdown/NOTEBOOK_LIST + docfiles/markdown/REST_API docfiles/markdown/CACHE docfiles/markdown/SECRETS docfiles/markdown/TROUBLESHOOTING diff --git a/notebooks/admag/azure_data_manager_for_agriculture_and_comet_farm_api_example.ipynb b/notebooks/admag/azure_data_manager_for_agriculture_and_comet_farm_api_example.ipynb index 30296b19..e20c3428 100644 --- a/notebooks/admag/azure_data_manager_for_agriculture_and_comet_farm_api_example.ipynb +++ b/notebooks/admag/azure_data_manager_for_agriculture_and_comet_farm_api_example.ipynb @@ -16,7 +16,7 @@ "\n", "This notebook shows how to use [Microsoft Azure Data Manager for Agriculture](https://aka.ms/farmvibesDMA) (ADMAg) and the [COMET-Farm API](https://gitlab.com/comet-api/api-docs/-/tree/master/) to derive carbon sequestration information for agricultural fields. The idea is to obtain farming data from Microsoft Azure Data Manager for Agriculture and input this data directly into the COMET-Farm API. In this notebook, we use a single workflow to calculate soil carbon sequestration using ADMAg ids. The steps executed by the `farm_ai/carbon_local/admag_carbon_integration` are the following:\n", "\n", - "1. FarmVibes.AI needs the farmer_id, boundary_id, and a seasonal_field_id (`ADMAgSeasonalFieldInput`), to retrieve farming data from Azure Data Manager for Agriculture. \n", + "1. FarmVibes.AI needs the party_id, and a seasonal_field_id (`ADMAgSeasonalFieldInput`), to retrieve farming data from Azure Data Manager for Agriculture. \n", "\n", "2. The information is sent back to FarmVibes.AI (Tillage, Fertilization, Organic Amendments, Planting, Harvest, …).\n", "\n", @@ -53,17 +53,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "3685ff85", "metadata": {}, "outputs": [], "source": [ - "import os\n", "from typing import List\n", - "from datetime import datetime, timezone\n", "\n", - "from vibe_core.datamodel import RunStatus\n", - "from vibe_core.client import FarmvibesAiClient, get_default_vibe_client, get_local_service_url\n", + "from vibe_core.client import FarmvibesAiClient, get_default_vibe_client\n", "from vibe_core.data import ADMAgSeasonalFieldInput" ] }, @@ -80,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "1698286c", "metadata": {}, "outputs": [], @@ -90,15 +87,13 @@ "# ADMAg client id\n", "CLIENT_ID = \"\"\n", "# ADMAg client secret\n", - "CLIENT_SECRET = \"@SECRET(eywa-secrets, data-manager-ag-secret)\"\n", + "CLIENT_SECRET = \"\"\n", "# ADMAg authority\n", "AUTHORITY = \"\"\n", "# ADMAg default scope\n", "DEFAULT_SCOPE = \"\"\n", - "# Farmer ADMAg ID\n", - "FARMER_ID = \"\"\n", - "# Boundary ADMAg ID\n", - "BOUNDARY_ID = \"\"\n", + "# Party ADMAg ID\n", + "PARTY_ID = \"\"\n", "# A list of seasonal field scenarios ids from ADMAg\n", "SCENARIO_IDS = []\n", "# A list of baseline seasonal field ids from ADMAg\n", @@ -113,27 +108,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "be3373e2", "metadata": {}, "outputs": [], "source": [ "def get_seasonal_field_inputs(\n", - " farmer_id: str,\n", - " boundary_id: str,\n", + " party_id: str,\n", " seasonal_field_ids: List[str]\n", ") -> List[ADMAgSeasonalFieldInput]:\n", " return [\n", " ADMAgSeasonalFieldInput(\n", - " farmer_id=FARMER_ID,\n", - " boundary_id=BOUNDARY_ID,\n", + " party_id=party_id,\n", " seasonal_field_id=seasonal_field_id\n", " )\n", " for seasonal_field_id in seasonal_field_ids\n", " ]\n", "\n", - "baseline_admag_inputs = get_seasonal_field_inputs(FARMER_ID, BOUNDARY_ID, BASELINE_IDS)\n", - "scenario_admag_inputs = get_seasonal_field_inputs(FARMER_ID, BOUNDARY_ID, SCENARIO_IDS)" + "baseline_admag_inputs = get_seasonal_field_inputs(PARTY_ID, BASELINE_IDS)\n", + "scenario_admag_inputs = get_seasonal_field_inputs(PARTY_ID, SCENARIO_IDS)" ] }, { @@ -147,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "0b299fb0", "metadata": {}, "outputs": [], @@ -167,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "69526660", "metadata": {}, "outputs": [], @@ -177,27 +170,381 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "a00f3b59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
Workflow: farm_ai/carbon_local/admag_carbon_integration\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32mWorkflow:\u001b[0m \u001b[1;4;38;5;27mfarm_ai/carbon_local/admag_carbon_integration\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Description:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mDescription:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    Computes the offset amount of carbon that would be sequestered in a seasonal field using        \n",
+       "    Microsoft Azure Data Manager for Agriculture (ADMAg) data. Derives carbon sequestration         \n",
+       "    information. Microsoft Azure Data Manager for Agriculture (ADMAg) and the COMET-Farm API are    \n",
+       "    used to obtain farming data and evaluate carbon offset.  ADMAg is capable of describing         \n",
+       "    important farming activities such as fertilization, tillage, and organic amendments             \n",
+       "    applications, all of which are represented in the data manager. FarmVibes.AI retrieves this     \n",
+       "    information from the data manager and builds SeasonalFieldInformation FarmVibes.AI objects.     \n",
+       "    These objects are then used to call the COMET-Farm API and evaluate Carbon Offset Information.  \n",
+       "
\n" + ], + "text/plain": [ + " Computes the offset amount of carbon that would be sequestered in a seasonal field using \n", + " Microsoft Azure Data Manager for Agriculture (ADMAg) data. Derives carbon sequestration \n", + " information. Microsoft Azure Data Manager for Agriculture (ADMAg) and the COMET-Farm API are \n", + " used to obtain farming data and evaluate carbon offset. ADMAg is capable of describing \n", + " important farming activities such as fertilization, tillage, and organic amendments \n", + " applications, all of which are represented in the data manager. FarmVibes.AI retrieves this \n", + " information from the data manager and builds SeasonalFieldInformation FarmVibes.AI objects. \n", + " These objects are then used to call the COMET-Farm API and evaluate Carbon Offset Information. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Sources:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSources:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - baseline_admag_input (vibe_core.data.farm.ADMAgSeasonalFieldInput): List of                   \n",
+       "    ADMAgSeasonalFieldInput to retrieve SeasonalFieldInformation objects for baseline COMET-Farm API\n",
+       "    Carbon offset evaluation.                                                                       \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mbaseline_admag_input\u001b[0m (\u001b[34mvibe_core.data.farm.ADMAgSeasonalFieldInput\u001b[0m): List of \n", + " ADMAgSeasonalFieldInput to retrieve SeasonalFieldInformation objects for baseline COMET-Farm API\n", + " Carbon offset evaluation. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - scenario_admag_input (vibe_core.data.farm.ADMAgSeasonalFieldInput): List of                   \n",
+       "    ADMAgSeasonalFieldInput to retrieve SeasonalFieldInformation objects for scenarios COMET-Farm   \n",
+       "    API Carbon offset evaluation.                                                                   \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mscenario_admag_input\u001b[0m (\u001b[34mvibe_core.data.farm.ADMAgSeasonalFieldInput\u001b[0m): List of \n", + " ADMAgSeasonalFieldInput to retrieve SeasonalFieldInformation objects for scenarios COMET-Farm \n", + " API Carbon offset evaluation. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Sinks:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSinks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - carbon_output (vibe_core.data.core_types.CarbonOffsetInfo): Carbon sequestration received for \n",
+       "    scenario information provided as input.                                                         \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mcarbon_output\u001b[0m (\u001b[34mvibe_core.data.core_types.CarbonOffsetInfo\u001b[0m): Carbon sequestration received for \n", + " scenario information provided as input. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Parameters:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mParameters:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - base_url (default: None): Azure Data Manager for Agriculture host. Please visit               \n",
+       "    https://aka.ms/farmvibesDMA to check how to get these credentials.                              \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mbase_url\u001b[0m (\u001b[34mdefault: None\u001b[0m): Azure Data Manager for Agriculture host. Please visit \n", + " https://aka.ms/farmvibesDMA to check how to get these credentials. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - client_id (default: None): Azure Data Manager for Agriculture client id. Please visit         \n",
+       "    https://aka.ms/farmvibesDMA to check how to get these credentials.                              \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mclient_id\u001b[0m (\u001b[34mdefault: None\u001b[0m): Azure Data Manager for Agriculture client id. Please visit \n", + " https://aka.ms/farmvibesDMA to check how to get these credentials. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - client_secret (default: None): Azure Data Manager for Agriculture client secret. Please visit \n",
+       "    https://aka.ms/farmvibesDMA to check how to get these credentials.                              \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mclient_secret\u001b[0m (\u001b[34mdefault: None\u001b[0m): Azure Data Manager for Agriculture client secret. Please visit \n", + " https://aka.ms/farmvibesDMA to check how to get these credentials. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - authority (default: None): Azure Data Manager for Agriculture authority. Please visit         \n",
+       "    https://aka.ms/farmvibesDMA to check how to get these credentials.                              \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mauthority\u001b[0m (\u001b[34mdefault: None\u001b[0m): Azure Data Manager for Agriculture authority. Please visit \n", + " https://aka.ms/farmvibesDMA to check how to get these credentials. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - default_scope (default: None): Azure Data Manager for Agriculture default scope. Please visit \n",
+       "    https://aka.ms/farmvibesDMA to check how to get these credentials.                              \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mdefault_scope\u001b[0m (\u001b[34mdefault: None\u001b[0m): Azure Data Manager for Agriculture default scope. Please visit \n", + " https://aka.ms/farmvibesDMA to check how to get these credentials. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - comet_support_email (default: None): Comet support email. The email used to register for a    \n",
+       "    COMET account. The requests are forwarded to comet with this email reference.  This email is    \n",
+       "    used by comet to share the information back to you for failed requests.                         \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mcomet_support_email\u001b[0m (\u001b[34mdefault: None\u001b[0m): Comet support email. The email used to register for a \n", + " COMET account. The requests are forwarded to comet with this email reference. This email is \n", + " used by comet to share the information back to you for failed requests. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - ngrok_token (default: None): NGROK session token. A token that FarmVibes uses to create a     \n",
+       "    web_hook url that is shared with Comet in a request when running the workflow. Comet can use    \n",
+       "    this link to send back a response to FarmVibes.  NGROK is a service that creates temporary urls \n",
+       "    for local servers. To use NGROK, FarmVibes needs to get a token from this website,              \n",
+       "    https://dashboard.ngrok.com/.                                                                   \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mngrok_token\u001b[0m (\u001b[34mdefault: None\u001b[0m): NGROK session token. A token that FarmVibes uses to create a \n", + " web_hook url that is shared with Comet in a request when running the workflow. Comet can use \n", + " this link to send back a response to FarmVibes. NGROK is a service that creates temporary urls \n", + " for local servers. To use NGROK, FarmVibes needs to get a token from this website, \n", + " https://dashboard.ngrok.com/. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Tasks:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mTasks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - baseline_seasonal_field_list: Generates SeasonalFieldInformation using ADMAg (Microsoft Azure \n",
+       "    Data Manager for Agriculture).                                                                  \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mbaseline_seasonal_field_list\u001b[0m: Generates SeasonalFieldInformation using ADMAg (Microsoft Azure \n", + " Data Manager for Agriculture). \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - scenario_seasonal_field_list: Generates SeasonalFieldInformation using ADMAg (Microsoft Azure \n",
+       "    Data Manager for Agriculture).                                                                  \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mscenario_seasonal_field_list\u001b[0m: Generates SeasonalFieldInformation using ADMAg (Microsoft Azure \n", + " Data Manager for Agriculture). \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - admag_carbon: Computes the offset amount of carbon that would be sequestered in a seasonal    \n",
+       "    field using the baseline (historical) and scenario (time range interested in) information.      \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1madmag_carbon\u001b[0m: Computes the offset amount of carbon that would be sequestered in a seasonal \n", + " field using the baseline (historical) and scenario (time range interested in) information. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "client.document_workflow(CARBON_WORKFLOW)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "e8bdefac", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "run = client.run(\n",
     "    CARBON_WORKFLOW,\n",
     "    \"Carbon what-if scenario\",\n",
     "    input_data={\n",
-    "        \"baseline_admag_input\": baseline_admag_inputs,\n",
-    "        \"scenario_admag_input\": scenario_admag_inputs,\n",
+    "        \"baseline_admag_input\": baseline_admag_inputs, \n",
+    "        \"scenario_admag_input\": scenario_admag_inputs, # type: ignore\n",
     "    },\n",
     "    parameters={\n",
     "        \"base_url\": BASE_URL,\n",
@@ -224,12 +571,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "16345f43",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'-0.074 Mg Co2e/year'"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "run.output['carbon_output'][0].carbon"
+    "run.output['carbon_output'][0].carbon # type: ignore"
    ]
   }
  ],
diff --git a/notebooks/admag/azure_data_manager_for_agriculture_example.ipynb b/notebooks/admag/azure_data_manager_for_agriculture_example.ipynb
index d8be84ca..be50e40f 100644
--- a/notebooks/admag/azure_data_manager_for_agriculture_example.ipynb
+++ b/notebooks/admag/azure_data_manager_for_agriculture_example.ipynb
@@ -8,7 +8,7 @@
    "source": [
     "# Microsoft Azure Data Manager for Agriculture and NDVI summary workflows into a single custom workflow\n",
     "\n",
-    "In this notebook, we will explain how to connect FarmVibes.AI with [Microsoft Azure Data Manager for Agriculture](https://aka.ms/farmvibesDMA), and provide an example of how to leverage the FarmVibes.AI workflows using ADMAg inputs. We will demonstrate how to compose the ADMAg and NDVI summary workflows into a single custom workflow, and check the results for the user's agriculture field."
+    "In this notebook, we will explain how to connect FarmVibes.AI with [Microsoft Azure Data Manager for Agriculture](https://aka.ms/farmvibesDMA), and provide an example of how to leverage the FarmVibes.AI workflows using [ADMAg for Agri](https://learn.microsoft.com/en-us/rest/api/data-manager-for-agri/). We will demonstrate how to compose the ADMAg and NDVI summary workflows into a single custom workflow, and check the results for the user's agriculture field. The Notebook use ADMAg version 2023-11-01-preview for demonstration."
    ]
   },
   {
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "b2e34591",
    "metadata": {},
    "outputs": [],
@@ -59,7 +59,7 @@
    "source": [
     "## Define Azure Data Manager for Agriculture entities\n",
     "\n",
-    "We will start by providing the parameters that specify the Azure Data Manager for Agriculture connection (e.g., seasonal field, boundary, and farmer identifiers). Please, check Microsoft Azure Data Manager for Agriculture [documentation](https://aka.ms/farmvibesDMA) to check how to obtain these fields.\n",
+    "We will start by providing the parameters that specify the Azure Data Manager for Agriculture connection (e.g., seasonal field, and farmer identifiers). Please, check Microsoft Azure Data Manager for Agriculture [documentation](https://learn.microsoft.com/en-us/rest/api/data-manager-for-agri/) to check how to obtain these fields.\n",
     "\n",
     "In the next cell, we retrieve the `CLIENT_SECRET` variable from the `data-manager-ag-secret` registered on the FarmVibes.AI cluster. To create a new key on the cluster you may want to use the following command on project's root folder:\n",
     "\n",
@@ -75,22 +75,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "4afec3ab",
    "metadata": {},
    "outputs": [],
    "source": [
     "WORKFLOW_NAME = \"data_ingestion/admag/admag_seasonal_field\"\n",
     "\n",
-    "BASE_URL = \"\"\n",
-    "CLIENT_ID = \"\"\n",
-    "CLIENT_SECRET = \"@SECRET(eywa-secrets, data-manager-ag-secret)\"\n",
-    "AUTHORITY = \"\"\n",
-    "DEFAULT_SCOPE = \"\"\n",
+    "BASE_URL = \"\"\n",
+    "CLIENT_ID = \"\"\n",
+    "CLIENT_SECRET = \"\"\n",
+    "AUTHORITY = \"\"\n",
+    "DEFAULT_SCOPE = \"\"\n",
     "\n",
-    "FARMER_ID = \"\"\n",
-    "SEASONAL_FIELD_ID=\"\"\n",
-    "BOUNDARY_ID=\"\""
+    "PARTY_ID = \"\"\n",
+    "SEASONAL_FIELD_ID=\"\""
    ]
   },
   {
@@ -101,20 +100,19 @@
    "source": [
     "## Create Seasonal Field input\n",
     "\n",
-    "Azure Data Manager for Agriculture uses `farmer_id`, `seasonal_field_id`, and `boundary_id` to identify a crop during a given season. This triple will be used to create a DataVibe subclass `SeasonalFieldInformation` that contains farm-related operations (e.g., fertilization, harvest, tillage, planting, crop name) that is used as input to the workflow (`data_ingestion/admag/admag_seasonal_field`). "
+    "Azure Data Manager for Agriculture uses `party_id` and `seasonal_field_id` to identify a crop during a given season. This triple will be used to create a DataVibe subclass `SeasonalFieldInformation` that contains farm-related operations (e.g., fertilization, harvest, tillage, planting, crop name) that is used as input to the workflow (`data_ingestion/admag/admag_seasonal_field`). "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "f63c1b1b",
    "metadata": {},
    "outputs": [],
    "source": [
     "input_data = ADMAgSeasonalFieldInput(\n",
-    "    farmer_id=FARMER_ID,\n",
+    "    party_id=PARTY_ID,\n",
     "    seasonal_field_id=SEASONAL_FIELD_ID,\n",
-    "    boundary_id=BOUNDARY_ID,\n",
     ")"
    ]
   },
@@ -131,7 +129,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "ea8f8112",
    "metadata": {},
    "outputs": [],
@@ -141,20 +139,279 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "6ad9225c",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
Workflow: data_ingestion/admag/admag_seasonal_field\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32mWorkflow:\u001b[0m \u001b[1;4;38;5;27mdata_ingestion/admag/admag_seasonal_field\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Description:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mDescription:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    Generates SeasonalFieldInformation using ADMAg (Microsoft Azure Data Manager for Agriculture).  \n",
+       "    The workflow creates a DataVibe subclass SeasonalFieldInformation that contains farm-related    \n",
+       "    operations (e.g., fertilization, harvest, tillage, planting, crop name).                        \n",
+       "
\n" + ], + "text/plain": [ + " Generates SeasonalFieldInformation using ADMAg (Microsoft Azure Data Manager for Agriculture). \n", + " The workflow creates a DataVibe subclass SeasonalFieldInformation that contains farm-related \n", + " operations (e.g., fertilization, harvest, tillage, planting, crop name). \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Sources:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSources:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - admag_input (vibe_core.data.farm.ADMAgSeasonalFieldInput): Unique identifiers for ADMAg       \n",
+       "    seasonal field, and party.                                                                      \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1madmag_input\u001b[0m (\u001b[34mvibe_core.data.farm.ADMAgSeasonalFieldInput\u001b[0m): Unique identifiers for ADMAg \n", + " seasonal field, and party. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Sinks:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSinks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - seasonal_field (vibe_core.data.farm.SeasonalFieldInformation): Crop SeasonalFieldInformation  \n",
+       "    which contains SeasonalFieldInformation that contains farm-related operations (e.g.,            \n",
+       "    fertilization, harvest, tillage, planting, crop name).                                          \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mseasonal_field\u001b[0m (\u001b[34mvibe_core.data.farm.SeasonalFieldInformation\u001b[0m): Crop SeasonalFieldInformation \n", + " which contains SeasonalFieldInformation that contains farm-related operations (e.g., \n", + " fertilization, harvest, tillage, planting, crop name). \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Parameters:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mParameters:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - base_url (default: None): Azure Data Manager for Agriculture host. Please visit               \n",
+       "    https://aka.ms/farmvibesDMA to check how to get these credentials.                              \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mbase_url\u001b[0m (\u001b[34mdefault: None\u001b[0m): Azure Data Manager for Agriculture host. Please visit \n", + " https://aka.ms/farmvibesDMA to check how to get these credentials. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - client_id (default: None): Azure Data Manager for Agriculture client id. Please visit         \n",
+       "    https://aka.ms/farmvibesDMA to check how to get these credentials.                              \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mclient_id\u001b[0m (\u001b[34mdefault: None\u001b[0m): Azure Data Manager for Agriculture client id. Please visit \n", + " https://aka.ms/farmvibesDMA to check how to get these credentials. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - client_secret (default: None): Azure Data Manager for Agriculture client secret. Please visit \n",
+       "    https://aka.ms/farmvibesDMA to check how to get these credentials.                              \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mclient_secret\u001b[0m (\u001b[34mdefault: None\u001b[0m): Azure Data Manager for Agriculture client secret. Please visit \n", + " https://aka.ms/farmvibesDMA to check how to get these credentials. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - authority (default: None): Azure Data Manager for Agriculture authority. Please visit         \n",
+       "    https://aka.ms/farmvibesDMA to check how to get these credentials.                              \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mauthority\u001b[0m (\u001b[34mdefault: None\u001b[0m): Azure Data Manager for Agriculture authority. Please visit \n", + " https://aka.ms/farmvibesDMA to check how to get these credentials. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - default_scope (default: None): Azure Data Manager for Agriculture default scope. Please visit \n",
+       "    https://aka.ms/farmvibesDMA to check how to get these credentials.                              \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mdefault_scope\u001b[0m (\u001b[34mdefault: None\u001b[0m): Azure Data Manager for Agriculture default scope. Please visit \n", + " https://aka.ms/farmvibesDMA to check how to get these credentials. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Tasks:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mTasks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - admag_seasonal_field: Establishes the connection with ADMAg and fetches seasonal field        \n",
+       "    information.                                                                                    \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1madmag_seasonal_field\u001b[0m: Establishes the connection with ADMAg and fetches seasonal field \n", + " information. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "client.document_workflow(WORKFLOW_NAME)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "3ae9e9d6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "run = client.run(\n",
     "    WORKFLOW_NAME,\n",
@@ -185,10 +442,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "fdfd9eda",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SeasonalFieldInformation(id='6ee1f27b-c1b8-4e5a-a5c5-d862745229e4', time_range=(datetime.datetime(2000, 2, 15, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2000, 9, 5, 0, 0, tzinfo=datetime.timezone.utc)), bbox=(-117.04672810633345, 47.03859765371245, -117.04516997816333, 47.039879416641384), geometry={'type': 'MultiPolygon', 'coordinates': [[[[-117.0466947519078, 47.038850194363874], [-117.045889480774, 47.03859765371245], [-117.045889480774, 47.03859765371245], [-117.045889480774, 47.03859765371245], [-117.04516997816333, 47.039188503538426], [-117.04555593651739, 47.039807942872116], [-117.04627543912807, 47.039879416641384], [-117.04672810633345, 47.03963640582586], [-117.0466947519078, 47.038850194363874]]]]}, assets=[], crop_name='Alfalfa', crop_type='annual', properties={'pre_1980': 'Irrigation (Pre 1980s)', 'crp_type': 'None', 'crp_start': '', 'crp_end': '', 'year_1980_2000': 'Irrigated: Annual Crops in Rotation', 'year_1980_2000_tillage': 'Intensive Tillage'}, fertilizers=[], harvests=[{'is_grain': True, 'start_date': '2000-09-05T00:00:00Z', 'end_date': '2000-09-05T00:00:00Z', 'crop_yield': 39.0, 'stray_stover_hay_removal': '0'}, {'is_grain': True, 'start_date': '2000-09-05T00:00:00Z', 'end_date': '2000-09-05T00:00:00Z', 'crop_yield': 39.0, 'stray_stover_hay_removal': '0'}], tillages=[{'start_date': '2000-01-01T00:00:00Z', 'end_date': '2000-01-01T00:00:00Z', 'implement': 'Reduced Tillage'}, {'start_date': '2000-01-01T00:00:00Z', 'end_date': '2000-01-01T00:00:00Z', 'implement': 'Reduced Tillage'}], organic_amendments=[])"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "run.output['seasonal_field'][0]"
    ]
@@ -206,10 +474,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "035246fb",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "'Seasonal field planting date: 2000/02/15'\n",
+      "'Seasonal field harvest date: 2000/09/05'\n",
+      "{'coordinates': [[[[-117.0466947519078, 47.038850194363874],\n",
+      "                   [-117.045889480774, 47.03859765371245],\n",
+      "                   [-117.045889480774, 47.03859765371245],\n",
+      "                   [-117.045889480774, 47.03859765371245],\n",
+      "                   [-117.04516997816333, 47.039188503538426],\n",
+      "                   [-117.04555593651739, 47.039807942872116],\n",
+      "                   [-117.04627543912807, 47.039879416641384],\n",
+      "                   [-117.04672810633345, 47.03963640582586],\n",
+      "                   [-117.0466947519078, 47.038850194363874]]]],\n",
+      " 'type': 'MultiPolygon'}\n"
+     ]
+    }
+   ],
    "source": [
     "seasonal_field = run.output['seasonal_field'][0]\n",
     "\n",
@@ -232,10 +519,218 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "fc0fde75",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
Workflow: farm_ai/agriculture/ndvi_summary\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32mWorkflow:\u001b[0m \u001b[1;4;38;5;27mfarm_ai/agriculture/ndvi_summary\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Description:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mDescription:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    Calculates NDVI statistics (mean, standard deviation, maximum and minimum) for the input        \n",
+       "    geometry and time range. The workflow retrieves the relevant Sentinel-2 products with Planetary \n",
+       "    Computer (PC) API, forwards them to a cloud detection model and combines the predicted cloud    \n",
+       "    mask to the mask obtained from the product. The workflow computes the NDVI for each available   \n",
+       "    tile and date, summarizing each with the mean, standard deviation, maximum and minimum values   \n",
+       "    for the regions not obscured by clouds. Finally, it outputs a timeseries with such statistics   \n",
+       "    for all available dates, ignoring heavily-clouded tiles.                                        \n",
+       "
\n" + ], + "text/plain": [ + " Calculates NDVI statistics (mean, standard deviation, maximum and minimum) for the input \n", + " geometry and time range. The workflow retrieves the relevant Sentinel-2 products with Planetary \n", + " Computer (PC) API, forwards them to a cloud detection model and combines the predicted cloud \n", + " mask to the mask obtained from the product. The workflow computes the NDVI for each available \n", + " tile and date, summarizing each with the mean, standard deviation, maximum and minimum values \n", + " for the regions not obscured by clouds. Finally, it outputs a timeseries with such statistics \n", + " for all available dates, ignoring heavily-clouded tiles. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Sources:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSources:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - user_input (vibe_core.data.core_types.DataVibe): Time range and geometry of interest.         \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1muser_input\u001b[0m (\u001b[34mvibe_core.data.core_types.DataVibe\u001b[0m): Time range and geometry of interest. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Sinks:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSinks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - timeseries (List[vibe_core.data.core_types.TimeSeries]): Aggregated NDVI statistics of the    \n",
+       "    retrieved tiles within the input geometry and time range.                                       \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mtimeseries\u001b[0m (\u001b[34mList[vibe_core.data.core_types.TimeSeries]\u001b[0m): Aggregated NDVI statistics of the \n", + " retrieved tiles within the input geometry and time range. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Parameters:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mParameters:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - pc_key (default: ): Optional Planetary Computer API key.                                      \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mpc_key\u001b[0m (\u001b[34mdefault: \u001b[0m): Optional Planetary Computer API key. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Tasks:\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mTasks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - s2: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time     \n",
+       "    range, and computes improved cloud masks using cloud and shadow segmentation models.            \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1ms2\u001b[0m: Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time \n", + " range, and computes improved cloud masks using cloud and shadow segmentation models. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - compute_ndvi: Computes an index from the bands of an input raster.                            \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1mcompute_ndvi\u001b[0m: Computes an index from the bands of an input raster. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - summary_timeseries: Computes the mean, standard deviation, maximum, and minimum values of all \n",
+       "    regions of the raster considered by the mask and aggregates them into a timeseries.             \n",
+       "
\n" + ], + "text/plain": [ + " - \u001b[1msummary_timeseries\u001b[0m: Computes the mean, standard deviation, maximum, and minimum values of all \n", + " regions of the raster considered by the mask and aggregates them into a timeseries. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "client.document_workflow(\"farm_ai/agriculture/ndvi_summary\")" ] @@ -255,7 +750,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "ede223f5", "metadata": {}, "outputs": [], @@ -296,7 +791,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "250cb567", "metadata": {}, "outputs": [], @@ -328,7 +823,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "2b797c26", "metadata": {}, "outputs": [], @@ -348,7 +843,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "98d061af", "metadata": {}, "outputs": [], @@ -371,7 +866,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "9ab5e280", "metadata": {}, "outputs": [], diff --git a/notebooks/crop_cycles/env.yaml b/notebooks/crop_cycles/env.yaml index bf4e4221..9b1606d5 100644 --- a/notebooks/crop_cycles/env.yaml +++ b/notebooks/crop_cycles/env.yaml @@ -10,6 +10,7 @@ dependencies: - tf2onnx=1.9.3 - rioxarray=0.3.1 - ipykernel=6.15.2 + - ipywidgets~=8.0.2 - yaml=0.2.5 - matplotlib=3.5.3 - pip~=21.2.4 diff --git a/notebooks/forest/download_alos_forest_map.ipynb b/notebooks/forest/download_alos_forest_map.ipynb new file mode 100644 index 00000000..b5834c38 --- /dev/null +++ b/notebooks/forest/download_alos_forest_map.ipynb @@ -0,0 +1,497 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download ALOS Forest Extent Dataset\n", + "In this notebook, we download an [ALOS Forest Extent](https://planetarycomputer.microsoft.com/dataset/alos-fnf-mosaic) map using FarmVibes.AI, and visualize it.\n", + "\n", + "The ALOS PALSAR/PALSAR-2 Annual Mosaic is a dataset that provides annual observations of forest extent produced by JAXA's ALOS and ALOS-2 satellites. The dataset spans from 2015 to 2020 and covers the whole globe. Each dataset (provided as Rasters) contains the following categories:\n", + "```txt\n", + "0 - No data\n", + "1 - Forest (>90% canopy cover)\n", + "2 - Forest (10-90% canopy cover)\n", + "3 - Non-forest\n", + "4 - Water\n", + "```\n", + "\n", + "The download process involves the following steps:\n", + "1. Listing of the products that intersect with the user-provided geometry and time range.\n", + "2. Downloading each raster listed in the previous step.\n", + "\n", + "The output is provided as the categorical rasters listed in step 1.\n", + "\n", + "NOTE: To install the required packages used in this notebook, see [this README file](../README.md)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from vibe_core.client import get_default_vibe_client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Vibe client and document the ALOS download workflow\n", + "\n", + "Before executing the [workflow](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/WORKFLOWS.html), let's observe its documentation using a FarmVibes.AI python client." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "
Workflow: data_ingestion/alos/alos_forest_extent_download_merge\n",
+                            "
\n" + ], + "text/plain": [ + "\u001b[1;32mWorkflow:\u001b[0m \u001b[1;4;38;5;27mdata_ingestion/alos/alos_forest_extent_download_merge\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Description:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mDescription:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map and     \n",
+                            "    merges it into a single raster. The workflow lists the ALOS forest/non-forest classification    \n",
+                            "    products that intersect with the input geometry and time range (available range 2015-2020), and \n",
+                            "    downloads the filtered products. The workflow processes the downloaded products and merge them  \n",
+                            "    into a single raster.                                                                           \n",
+                            "
\n" + ], + "text/plain": [ + " Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map and \n", + " merges it into a single raster. The workflow lists the ALOS forest/non-forest classification \n", + " products that intersect with the input geometry and time range (available range 2015-2020), and \n", + " downloads the filtered products. The workflow processes the downloaded products and merge them \n", + " into a single raster. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Sources:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSources:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - user_input (vibe_core.data.core_types.DataVibe): Geometry of interest for which to download   \n",
+                            "    the ALOS forest/non-forest classification map.                                                  \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1muser_input\u001b[0m (\u001b[34mvibe_core.data.core_types.DataVibe\u001b[0m): Geometry of interest for which to download \n", + " the ALOS forest/non-forest classification map. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Sinks:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSinks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - merged_raster (vibe_core.data.rasters.Raster): ALOS forest/non-forest classification products \n",
+                            "    converted to raster and merged.                                                                 \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mmerged_raster\u001b[0m (\u001b[34mvibe_core.data.rasters.Raster\u001b[0m): ALOS forest/non-forest classification products \n", + " converted to raster and merged. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - categorical_raster (vibe_core.data.rasters.CategoricalRaster): ALOS forest/non-forest         \n",
+                            "    classification products that intersect with the input geometry & time range.                    \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mcategorical_raster\u001b[0m (\u001b[34mvibe_core.data.rasters.CategoricalRaster\u001b[0m): ALOS forest/non-forest \n", + " classification products that intersect with the input geometry & time range. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Parameters:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mParameters:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - pc_key (default: ): Planetary computer API key.                                               \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mpc_key\u001b[0m (\u001b[34mdefault: \u001b[0m): Planetary computer API key. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Tasks:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mTasks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - alos_forest_extent_download: Downloads Advanced Land Observing Satellite (ALOS)               \n",
+                            "    forest/non-forest classification map.                                                           \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1malos_forest_extent_download\u001b[0m: Downloads Advanced Land Observing Satellite (ALOS) \n", + " forest/non-forest classification map. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - group_rasters_by_time: This op groups rasters in time according to 'criterion'.               \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mgroup_rasters_by_time\u001b[0m: This op groups rasters in time according to 'criterion'. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - merge: Merges rasters in a sequence to a single raster.                                       \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mmerge\u001b[0m: Merges rasters in a sequence to a single raster. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "client = get_default_vibe_client()\n", + "\n", + "WORKFLOW_NAME = \"data_ingestion/alos/alos_forest_extent_download_merge\"\n", + "client.document_workflow(WORKFLOW_NAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define input Geometry and Time Range\n", + "\n", + "Next, we are going to define the geometry of interest and the time range that will be considered to download the ALOS products. The workflow will download all the tiles that intersect with the input." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from shapely import geometry as shpg\n", + "from datetime import datetime\n", + "\n", + "# GeoJSON data\n", + "geo_json = {\n", + " \"type\": \"Feature\",\n", + " \"geometry\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [-86.773827, 14.575498],\n", + " [-86.770459, 14.579300],\n", + " [-86.764283, 14.575102],\n", + " [-86.769591, 14.567595],\n", + " [-86.773827, 14.575498],\n", + " ]\n", + " ],\n", + " },\n", + " \"properties\": {},\n", + "}\n", + "\n", + "geom = shpg.shape(geo_json[\"geometry\"])\n", + "time_range = (datetime(2020, 1, 1), datetime(2020, 1, 2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run FarmVibes.AI Workflow\n", + "\n", + "In this step, the client requests the workflow execution on the FarmVines Cluster. Note that we provide the following inputs to the client.run call:\n", + "\n", + "1. Workflow name. Users can list the existing workflows by calling the command `client.list_workflows()`. They also can refer to the existing [workflow list](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/WORKFLOW_LIST.html).\n", + "2. Workflow execution name: The name we give for this particular workflow execution. \n", + "3. Geometry of interest.\n", + "4. Time range.\n", + "5. Parameters list. Check the [workflow](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/WORKFLOWS.html) documentation page to see how parameters are provided. We used the `pc_key` parameter, which corresponds to the Planetary Computer API key, that is useful to download planetary computer imagery.\n", + "\n", + "Please refer to the [SECRETS documentation](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/SECRETS.html) to learn how a secret can be added to the FarmVibes.AI cluster." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "application/vnd.jupyter.widget-view+json": {
+                            "model_id": "188eac6094214bf7aa3adc3e2f204972",
+                            "version_major": 2,
+                            "version_minor": 0
+                        },
+                        "text/plain": [
+                            "Output()"
+                        ]
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                }
+            ],
+            "source": [
+                "run = client.run(\n",
+                "    WORKFLOW_NAME,\n",
+                "    \"Download ALOS Forest Map\",\n",
+                "    geometry=geom,\n",
+                "    time_range=time_range,\n",
+                "    parameters={\"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\"},\n",
+                ")\n",
+                "run.monitor()"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Read the output data\n",
+                "\n",
+                "In the next cell, we adopt the user-provided geometry to read the output raster and create some buffer around it."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 5,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "# Add shared notebook library to path\n",
+                "import sys\n",
+                "from vibe_core.data import CategoricalRaster, Raster\n",
+                "from shapely.geometry import box\n",
+                "from typing import cast\n",
+                "\n",
+                "sys.path.append(\"../\")\n",
+                "from shared_nb_lib.raster import read_raster\n",
+                "from shared_nb_lib.plot import plot_categorical_map\n",
+                "\n",
+                "# Define your geometry\n",
+                "bounding_box = box(*geom.buffer(0.01).bounds)\n",
+                "\n",
+                "# Get the bounds of the geometry\n",
+                "minx, miny, maxx, maxy = bounding_box.bounds\n",
+                "\n",
+                "merged_raster = cast(Raster, run.output[\"merged_raster\"][0])\n",
+                "categories = cast(CategoricalRaster, run.output[\"categorical_raster\"][0]).categories\n",
+                "\n",
+                "out_image = read_raster(merged_raster, bounding_box)[0]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Plot the result map\n",
+                "\n",
+                "Finally, we plot the raster image with the existing categories and the user-provided geometry (red area within the plot)."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 6,
+            "metadata": {},
+            "outputs": [
+                {
+                    "data": {
+                        "image/png": "",
+                        "text/plain": [
+                            "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "color_dict = {\n", + " 0: \"black\",\n", + " 1: \"darkgreen\",\n", + " 2: \"lightgreen\",\n", + " 3: \"gray\",\n", + " 4: \"blue\",\n", + "}\n", + "\n", + "plot_categorical_map(\n", + " out_image[0],\n", + " color_dict,\n", + " categories,\n", + " geom.exterior.xy,\n", + " extent=[minx, maxx, miny, maxy],\n", + " title=\"ALOS Forest Map\",\n", + ")" + ] + } + ], + "metadata": { + "description": "This notebook downloads the ALOS (Advanced Land Observing Satellite) forest extent maps", + "disk_space": "", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + }, + "name": "Download ALOS forest extent maps", + "running_time": "", + "tags": [ + "Remote Sensing", + "Deforestation", + "Sustainability" + ] + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/notebooks/forest/download_glad_forest_map.ipynb b/notebooks/forest/download_glad_forest_map.ipynb new file mode 100644 index 00000000..28bb42d2 --- /dev/null +++ b/notebooks/forest/download_glad_forest_map.ipynb @@ -0,0 +1,442 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3c0a2457", + "metadata": {}, + "source": [ + "# Download GLAD Forest Extent Dataset\n", + "\n", + "Here's a simple example of how to download [GLAD forest extent maps](https://glad.umd.edu/dataset/GLCLUC2020) on FarmVibes. Just like the other FarmVibes.AI notebooks, you can refer to [this README file](README.md) to see how to install the required packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d12f59be", + "metadata": {}, + "outputs": [], + "source": [ + "from vibe_core.client import get_default_vibe_client" + ] + }, + { + "cell_type": "markdown", + "id": "10cf50ca", + "metadata": {}, + "source": [ + "### Create vibe client and document the GLAD download workflow\n", + "\n", + "The following cell creates a new FarmVibes.AI client which is able to communicate with the FarmVibes.AI backend. Next, it documents \n", + "the `glad_forest_extent_download_merge` workflow." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "88bdcacb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "
Workflow: data_ingestion/glad/glad_forest_extent_download_merge\n",
+                            "
\n" + ], + "text/plain": [ + "\u001b[1;32mWorkflow:\u001b[0m \u001b[1;4;38;5;27mdata_ingestion/glad/glad_forest_extent_download_merge\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Description:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mDescription:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    Downloads Global Land Analysis (GLAD) forest extent data and merges them into a single raster.  \n",
+                            "    The workflow lists the GLAD forest products that intersect with the input geometry and time     \n",
+                            "    range, and downloads the filtered products. The downloaded products are merged into a single    \n",
+                            "    raster and classified. The result tiles have pixel values categorized into two classes - 0      \n",
+                            "    (non-forest) and 1 (forest). This workflow uses the same forest definition as the Food and      \n",
+                            "    Agriculture Organization of the United Nations (FAO).                                           \n",
+                            "
\n" + ], + "text/plain": [ + " Downloads Global Land Analysis (GLAD) forest extent data and merges them into a single raster. \n", + " The workflow lists the GLAD forest products that intersect with the input geometry and time \n", + " range, and downloads the filtered products. The downloaded products are merged into a single \n", + " raster and classified. The result tiles have pixel values categorized into two classes - 0 \n", + " (non-forest) and 1 (forest). This workflow uses the same forest definition as the Food and \n", + " Agriculture Organization of the United Nations (FAO). \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Sources:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSources:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - input_item (vibe_core.data.core_types.DataVibe): Geometry of interest for which to download   \n",
+                            "    the GLAD forest extent data.                                                                    \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1minput_item\u001b[0m (\u001b[34mvibe_core.data.core_types.DataVibe\u001b[0m): Geometry of interest for which to download \n", + " the GLAD forest extent data. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Sinks:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSinks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - merged_product (vibe_core.data.rasters.Raster): Merged GLAD forest extent product to geometry \n",
+                            "    of interest.                                                                                    \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mmerged_product\u001b[0m (\u001b[34mvibe_core.data.rasters.Raster\u001b[0m): Merged GLAD forest extent product to geometry \n", + " of interest. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - categorical_raster (vibe_core.data.rasters.Raster): Raster with the GLAD forest extent data.  \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mcategorical_raster\u001b[0m (\u001b[34mvibe_core.data.rasters.Raster\u001b[0m): Raster with the GLAD forest extent data. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Tasks:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mTasks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - glad_forest_extent_download: Downloads Global Land Analysis (GLAD) forest extent data.        \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mglad_forest_extent_download\u001b[0m: Downloads Global Land Analysis (GLAD) forest extent data. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - group_rasters_by_time: This op groups rasters in time according to 'criterion'.               \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mgroup_rasters_by_time\u001b[0m: This op groups rasters in time according to 'criterion'. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - merge: Merges rasters in a sequence to a single raster.                                       \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mmerge\u001b[0m: Merges rasters in a sequence to a single raster. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "client = get_default_vibe_client()\n", + "\n", + "WORKFLOW_NAME = \"data_ingestion/glad/glad_forest_extent_download_merge\"\n", + "client.document_workflow(WORKFLOW_NAME)" + ] + }, + { + "cell_type": "markdown", + "id": "696f2717", + "metadata": {}, + "source": [ + "### Create sample geometry and time range\n", + "\n", + "Like most FarmVibes.AI workflows, the user input involves a geometry and a time-range. The following cell creates a `shapely` geometry and a time range object to be provided as the workflow input." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "868a2720", + "metadata": {}, + "outputs": [], + "source": [ + "from shapely import geometry as shpg\n", + "from datetime import datetime\n", + "\n", + "# GeoJSON data\n", + "geo_json = {\n", + " \"type\": \"Feature\",\n", + " \"geometry\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [-86.773827, 14.575498],\n", + " [-86.770459, 14.579301],\n", + " [-86.764283, 14.575102],\n", + " [-86.769591, 14.567595],\n", + " [-86.773827, 14.575497],\n", + " ]\n", + " ],\n", + " },\n", + " \"properties\": {},\n", + "}\n", + "\n", + "geom = shpg.shape(geo_json[\"geometry\"])\n", + "time_range = datetime(2020, 1, 1), datetime(2020, 1, 2)" + ] + }, + { + "cell_type": "markdown", + "id": "c2604106", + "metadata": {}, + "source": [ + "### Execute FarmVibes.AI to download the GLAD Forest extent tiles" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bc8ffe3a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "application/vnd.jupyter.widget-view+json": {
+                            "model_id": "9e1e6f1ff0bd4d87828384a5e88db045",
+                            "version_major": 2,
+                            "version_minor": 0
+                        },
+                        "text/plain": [
+                            "Output()"
+                        ]
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                }
+            ],
+            "source": [
+                "run = client.run(\n",
+                "    WORKFLOW_NAME,\n",
+                "    \"Download GLAD Forest Map\",\n",
+                "    geometry=geom,\n",
+                "    time_range=time_range,\n",
+                ")\n",
+                "run.monitor()"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "id": "57a4c441",
+            "metadata": {},
+            "source": [
+                "### Visualize the Resulting data cropped to the input geometry with some Buffer\n",
+                "\n",
+                "In the next cell, we use the user-provided geometry to read the output raster and create some buffer around it. Next, we plot the raster image and the user geometry in red."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 13,
+            "id": "4ccd5569",
+            "metadata": {},
+            "outputs": [
+                {
+                    "data": {
+                        "image/png": "",
+                        "text/plain": [
+                            "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from shapely.geometry import box\n", + "import matplotlib.pyplot as plt\n", + "from vibe_core.data import CategoricalRaster, Raster\n", + "from typing import cast\n", + "\n", + "import sys\n", + "sys.path.append(\"../\")\n", + "from shared_nb_lib.raster import read_raster\n", + "\n", + "\n", + "# Define your geometry\n", + "bounding_box = box(* geom.buffer(0.01).bounds)\n", + "\n", + "# Get the bounds of the geometry\n", + "minx, miny, maxx, maxy = bounding_box.bounds\n", + "\n", + "merged_raster = cast(Raster, run.output[\"merged_product\"][0])\n", + "categories = cast(CategoricalRaster, run.output[\"categorical_raster\"][0]).categories\n", + "\n", + "out_image = read_raster(merged_raster, bounding_box)[0]\n", + "\n", + "cmap = plt.get_cmap(\"Greens\", len(categories))\n", + "\n", + "# Plot the cropped image with latitude and longitude in the axes\n", + "plt.imshow(out_image[0], cmap=cmap, extent=[minx, maxx, miny, maxy])\n", + "\n", + "# Add a legend\n", + "legend = plt.legend(\n", + " handles=[\n", + " plt.Rectangle((0, 0), 1, 1, color=cmap(0)),\n", + " plt.Rectangle((0, 0), 1, 1, color=cmap(1)),\n", + " ],\n", + " labels=categories,\n", + ")\n", + "\n", + "# Plot geom on top of the cropped image\n", + "plt.plot(*geom.exterior.xy, color=\"red\")\n", + "\n", + "plt.title(\"GLAD Forest Map\")\n", + "plt.xlabel(\"Longitude\")\n", + "plt.ylabel(\"Latitude\")\n", + "\n", + "\n", + "plt.show()" + ] + } + ], + "metadata": { + "description": "This notebook downloads the Global Land Analysis (GLAD) forest extent maps.", + "disk_space": "", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + }, + "name": "Download Glad Forest Map", + "running_time": "", + "tags": [ + "Remote Sensing", + "Deforestation", + "Sustainability" + ] + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/notebooks/forest/download_hansen_forest_map.ipynb b/notebooks/forest/download_hansen_forest_map.ipynb new file mode 100644 index 00000000..00e44fc3 --- /dev/null +++ b/notebooks/forest/download_hansen_forest_map.ipynb @@ -0,0 +1,939 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3c0a2457", + "metadata": {}, + "source": [ + "# Download Hansen Forest Change\n", + "\n", + "In this notebook, we download layers from the [Hansen Dataset](https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html) using FarmVibes.AI and visualize it. The data, distributed under the [Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by/4.0/), illustrates how forests changed from 2000 to 2022 with 30 meter resolution and covers the whole planet. Users can use this dataset to observe how the forest changed over time in a region of interest.\n", + "\n", + "This global dataset, split into 10x10 degree tiles, contains seven files per tile. Each file has unsigned 8-bit values and a resolution of around 30 meters per pixel at the equator. The dataset includes the following layers:\n", + "\n", + " - `treecover2000`: Tree cover in the year 2000, defined as canopy closure for all vegetation taller than 5m in height. Encoded as a percentage per output grid cell, in the range 0–100.\n", + " - `gain`: Forest gain during the period 2000-2012, defined as the inverse of loss, or a non-forest to forest change entirely within the study period. Encoded as either 1 (gain) or 0 (no gain).\n", + " - `lossyear`: Forest loss during the period 2000-2022, defined as a stand-replacement disturbance, or a change from a forest to non-forest state. Encoded as either 0 (no loss) or else a value in the range 1-22, representing loss detected primarily in the year 2001-2022, respectively.\n", + " - `datamask`: Three values representing areas of no data (0), mapped land surface (1), and persistent water bodies (2) based on 2000-2012.\n", + " - `first`: Circa year 2000 Landsat 7 cloud-free image composite (first).\n", + "Reference multispectral imagery from the first available year, typically 2000. \n", + " - `last`: cloud-free image composites for the last year in the series (e.g., 2022). \n", + " \n", + " Only the 'lossyear' and 'last' categories are updated annually. The reflectance values in the imagery are scaled to an 8-bit data range.\n", + "\n", + "Dataset Reference:\n", + "\n", + "Hansen, M. C., P. V. Potapov, R. Moore, M. Hancher, S. A. Turubanova, A. Tyukavina, D. Thau, S. V. Stehman, S. J. Goetz, T. R. Loveland, A. Kommareddy, A. Egorov, L. Chini, C. O. Justice, and J. R. G. Townshend. 2013. High-Resolution Global Maps of 21st-Century Forest Cover Change. Science 342 (15 November): 850-53. Data available on-line from: https://glad.earthengine.app/view/global-forest-change." + ] + }, + { + "cell_type": "markdown", + "id": "bfa4f4fc", + "metadata": {}, + "source": [ + "### Micromamba environment setup\n", + "To install the required packages, see [this README file](../README.md). You can activate the environment with the following command:\n", + "\n", + "\n", + "```bash\n", + "$ micromamba activate farmvibes-ai\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "447dcf5f", + "metadata": {}, + "outputs": [], + "source": [ + "from shapely import geometry as shpg\n", + "from datetime import datetime\n", + "\n", + "from matplotlib.ticker import MaxNLocator\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.colors as mcolors\n", + "import matplotlib.ticker as ticker\n", + "\n", + "from shapely.geometry import box\n", + "import matplotlib.pyplot as plt\n", + "from typing import cast\n", + "\n", + "import sys\n", + "\n", + "sys.path.append(\"../\")\n", + "from shared_nb_lib.raster import read_raster\n", + "\n", + "from vibe_core.data import DataVibe, Raster\n", + "from vibe_core.client import get_default_vibe_client" + ] + }, + { + "cell_type": "markdown", + "id": "3a8aff46", + "metadata": {}, + "source": [ + "### Create Vibe client and document the hansen download workflow\n", + "\n", + "Before executing the [workflow](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/WORKFLOWS.html), let's observe its documentation using a FarmVibes.AI python client." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "99fa0c54", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "
Workflow: data_ingestion/hansen/hansen_forest_change_download\n",
+                            "
\n" + ], + "text/plain": [ + "\u001b[1;32mWorkflow:\u001b[0m \u001b[1;4;38;5;27mdata_ingestion/hansen/hansen_forest_change_download\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Description:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mDescription:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    Downloads and merges Global Forest Change (Hansen) rasters that intersect the user-provided     \n",
+                            "    geometry/time range. The workflow lists Global Forest Change (Hansen) products that intersect   \n",
+                            "    the user-provided geometry/time range, downloads the data for each of them, and merges the      \n",
+                            "    rasters. The dataset is available at 30m resolution and is updated annually. The data contains  \n",
+                            "    information on forest cover, loss, and gain. The default dataset version is GFC-2022-v1.10 and  \n",
+                            "    is passed to the workflow as the parameter tiles_folder_url. For the default version, the       \n",
+                            "    dataset is available from 2000 to 2022.  Dataset details can be found at                        \n",
+                            "    https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html.         \n",
+                            "
\n" + ], + "text/plain": [ + " Downloads and merges Global Forest Change (Hansen) rasters that intersect the user-provided \n", + " geometry/time range. The workflow lists Global Forest Change (Hansen) products that intersect \n", + " the user-provided geometry/time range, downloads the data for each of them, and merges the \n", + " rasters. The dataset is available at 30m resolution and is updated annually. The data contains \n", + " information on forest cover, loss, and gain. The default dataset version is GFC-2022-v1.10 and \n", + " is passed to the workflow as the parameter tiles_folder_url. For the default version, the \n", + " dataset is available from 2000 to 2022. Dataset details can be found at \n", + " https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Sources:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSources:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - input_item (vibe_core.data.core_types.DataVibe): User-provided geometry and time range.       \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1minput_item\u001b[0m (\u001b[34mvibe_core.data.core_types.DataVibe\u001b[0m): User-provided geometry and time range. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Sinks:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSinks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - merged_raster (vibe_core.data.rasters.Raster): Merged Global Forest Change (Hansen) data as a \n",
+                            "    raster.                                                                                         \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mmerged_raster\u001b[0m (\u001b[34mvibe_core.data.rasters.Raster\u001b[0m): Merged Global Forest Change (Hansen) data as a \n", + " raster. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - downloaded_raster (vibe_core.data.rasters.Raster): Individual Global Forest Change (Hansen)   \n",
+                            "    rasters prior to the merge operation.                                                           \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mdownloaded_raster\u001b[0m (\u001b[34mvibe_core.data.rasters.Raster\u001b[0m): Individual Global Forest Change (Hansen) \n", + " rasters prior to the merge operation. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Parameters:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mParameters:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - layer_name (default: None): Name of the Global Forest Change (Hansen) layer. Can be any of the\n",
+                            "    following names 'treecover2000', 'loss', 'gain', 'lossyear', 'datamask', 'first', 'last'.       \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mlayer_name\u001b[0m (\u001b[34mdefault: None\u001b[0m): Name of the Global Forest Change (Hansen) layer. Can be any of the\n", + " following names 'treecover2000', 'loss', 'gain', 'lossyear', 'datamask', 'first', 'last'. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - tiles_folder_url (default:                                                                    \n",
+                            "    https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/): URL to the Global   \n",
+                            "    Forest Change (Hansen) dataset. It specifies the dataset version and is used to download the    \n",
+                            "    data.                                                                                           \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mtiles_folder_url\u001b[0m (\u001b[34mdefault: \u001b[0m \n", + " \u001b[34mhttps://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/\u001b[0m): URL to the Global \n", + " Forest Change (Hansen) dataset. It specifies the dataset version and is used to download the \n", + " data. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Tasks:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mTasks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - list: Lists Global Forest Change (Hansen) products that intersect the user-provided           \n",
+                            "    geometry/time range.                                                                            \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mlist\u001b[0m: Lists Global Forest Change (Hansen) products that intersect the user-provided \n", + " geometry/time range. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - download: Downloads Global Forest Change (Hansen) data.                                       \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mdownload\u001b[0m: Downloads Global Forest Change (Hansen) data. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - group: This op groups rasters in time according to 'criterion'.                               \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mgroup\u001b[0m: This op groups rasters in time according to 'criterion'. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - merge: Merges rasters in a sequence to a single raster.                                       \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mmerge\u001b[0m: Merges rasters in a sequence to a single raster. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "client = get_default_vibe_client()\n", + "\n", + "WORKFLOW_NAME = \"data_ingestion/hansen/hansen_forest_change_download\"\n", + "client.document_workflow(WORKFLOW_NAME)" + ] + }, + { + "cell_type": "markdown", + "id": "b894d84c", + "metadata": {}, + "source": [ + "### Setting up Input Geometry and Time Frame\n", + " \n", + "Now, we will establish the desired geometry and time frame for downloading the Hansen products. The workflow will fetch and merge all the tiles that intersect with the given input." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f12b5333", + "metadata": {}, + "outputs": [], + "source": [ + "# GeoJSON definition of a polygon over the potential forest area\n", + "geo_json = {\n", + " \"type\": \"Feature\",\n", + " \"geometry\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [-86.773827, 14.575496],\n", + " [-86.770459, 14.579302],\n", + " [-86.764283, 14.575102],\n", + " [-86.769591, 14.567595],\n", + " [-86.773827, 14.575496],\n", + " ]\n", + " ],\n", + " },\n", + " \"properties\": {},\n", + "}\n", + "\n", + "geom = shpg.shape(geo_json[\"geometry\"])\n", + "time_range = datetime(2000, 1, 1), datetime(2022, 1, 2)" + ] + }, + { + "cell_type": "markdown", + "id": "fb34e580", + "metadata": {}, + "source": [ + "### Run FarmVibes.AI Workflow\n", + "\n", + "To execute the workflow users need to provide the geometry of interest (`geom`), time range (`time_range`), and the name of the layer to be downloaded as a workflow parameter (`layer_name`). The layer can be any value from the set (`treecover2000`, `gain`, `lossyear`, `datamask`, `first`, `last`).\n", + "\n", + "In the next cell, we initiate two `runs` for the `treecover2000` and `lossyear` layers, and then wait for both workflows to complete (`client.monitor(runs)`)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "76b0f81f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "application/vnd.jupyter.widget-view+json": {
+                            "model_id": "a2e39cd7cde14fa4b96039434eed5ede",
+                            "version_major": 2,
+                            "version_minor": 0
+                        },
+                        "text/plain": [
+                            "Output()"
+                        ]
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                }
+            ],
+            "source": [
+                "runs = []\n",
+                "\n",
+                "for layer_name in [\"treecover2000\", \"lossyear\"]:\n",
+                "    run = client.run(\n",
+                "        WORKFLOW_NAME,\n",
+                "        \"Hansen dataset download\",\n",
+                "        geometry=geom,\n",
+                "        time_range=time_range,\n",
+                "        parameters={\"layer_name\": layer_name},\n",
+                "    )\n",
+                "    runs.append(run)\n",
+                "\n",
+                "client.monitor(runs)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "id": "4b517e3c",
+            "metadata": {},
+            "source": [
+                "### Visualizing Dataset Details\n",
+                " \n",
+                "In the upcoming cells, we will depict the changes in the forest over the years within the user's region of interest. Following that, we will examine the division of the area in terms of forest and non-forest proportions. Finally, we will assess how the percentage of forest pixels has evolved over time."
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "id": "44e97e42",
+            "metadata": {},
+            "source": [
+                "#### Plot forest loss over time\n",
+                "\n",
+                "In the following cell, we create a plot function that reads the `treecover2000` layer and the `lossyear`. The `treecover2000` layer has pixel values ranging from 0 to 100 that represents the percentage of tree cover in the area. Here, we use a black to green colormap. Then, we plot the `lossyear` with pixel values enconded as 0 (no loss) or else a value in the range 1-20, representing loss detected primarily in the year 2001-2022, respectively. The second layer is depicted using a yellow to red colormap. "
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 5,
+            "id": "e9c3e812",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "def plot_hansen_map(treecover2000: Raster, lossyear: Raster, geom: shpg.Polygon, first_year: int):\n",
+                "\n",
+                "    colors = [\"black\", \"green\"]\n",
+                "    cmap = mcolors.LinearSegmentedColormap.from_list(\"mycmap\", colors)\n",
+                "\n",
+                "    # Define your geometry\n",
+                "    bounding_box = box(*geom.buffer(0.01).bounds)\n",
+                "\n",
+                "    # Get the bounds of the geometry\n",
+                "    minx, miny, maxx, maxy = bounding_box.bounds\n",
+                "\n",
+                "    merged_raster = cast(Raster, treecover2000)\n",
+                "    out_image = read_raster(merged_raster, bounding_box)[0]\n",
+                "\n",
+                "    loss_image = read_raster(cast(Raster, lossyear), bounding_box)[0][0]\n",
+                "    \n",
+                "    # Create a masked array where the mask is True for zero values\n",
+                "    masked_loss = np.ma.masked_where(loss_image == 0, loss_image)\n",
+                "\n",
+                "    # Set data type to float\n",
+                "    masked_loss = masked_loss.astype(float)\n",
+                "    masked_loss += first_year\n",
+                "\n",
+                "    # Plot the cropped image with latitude and longitude in the axes\n",
+                "    plt.imshow(out_image[0], cmap=cmap, extent=[minx, maxx, miny, maxy])\n",
+                "\n",
+                "    loss_cmap = plt.cm.get_cmap(\"YlOrRd\").copy()\n",
+                "    loss_cmap.set_bad(color=\"none\")\n",
+                "\n",
+                "    # Plot the loss image on top of the cropped image\n",
+                "    plt.imshow(masked_loss, cmap=loss_cmap, alpha=0.8, extent=[minx, maxx, miny, maxy])\n",
+                "\n",
+                "    # Plot geom on top of the cropped image\n",
+                "    plt.plot(*geom.exterior.xy, color=\"blue\")\n",
+                "\n",
+                "    # Add a legend for the loss_image\n",
+                "    cbar = plt.colorbar()\n",
+                "    tick_locator = ticker.MaxNLocator(nbins=max(loss_image.flatten()))\n",
+                "    cbar.locator = tick_locator\n",
+                "    cbar.update_ticks()\n",
+                "\n",
+                "    cbar.set_label(\"Year for the forest to non-forest transition\")\n",
+                "\n",
+                "    plt.title(\"Forest Extent\")\n",
+                "    plt.xlabel(\"Longitude\")\n",
+                "    plt.ylabel(\"Latitude\")\n",
+                "\n",
+                "    plt.text(\n",
+                "        0.11,\n",
+                "        -0.005,\n",
+                "        \"Source: Hansen/UMD/Google/USGS/NASA\",\n",
+                "        fontsize=7,\n",
+                "        transform=plt.gcf().transFigure,\n",
+                "    )\n",
+                "\n",
+                "    plt.show()"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "id": "b334b0e1",
+            "metadata": {},
+            "source": [
+                "### Plot the `treecover2000` and `lossyear` rasters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 6,
+            "id": "223eef5c",
+            "metadata": {},
+            "outputs": [
+                {
+                    "data": {
+                        "image/png": "",
+                        "text/plain": [
+                            "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "treecover2000 = runs[0].output[\"merged_raster\"][0]\n", + "lossyear = runs[1].output[\"merged_raster\"][0]\n", + "\n", + "plot_hansen_map(treecover2000, lossyear, geom, time_range[0].year)" + ] + }, + { + "cell_type": "markdown", + "id": "a8be22b0", + "metadata": {}, + "source": [ + "### Plot the proportion of forest/non-forest pixels\n", + "\n", + "In the next cell, the proportion of forest/non-forest pixels is plotted. This is done by analyzing the `lossyear` raster, which represents forest loss over time. The cell calculates the ratio of forest pixels to total pixels within the specified geometry. Then, a pie chart is created using matplotlib to visually compare the proportion of forest and non-forest pixels. Finally, we show the loss table over time in case the user wants to access the values used to plot the graphs." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b1be08cf", + "metadata": {}, + "outputs": [], + "source": [ + "def read_loss_dict(lossyear: Raster, geom: shpg.Polygon, nodata: int = 255):\n", + " # Read the raster\n", + " loss_image = read_raster(cast(Raster, lossyear), geom, nodata=nodata)[0][0]\n", + "\n", + " # Count the frequency of each value in the loss_image\n", + " unique, counts = np.unique(loss_image, return_counts=True)\n", + "\n", + " loss_dict = {uni: count for uni, count in zip(unique, counts)}\n", + "\n", + " # Delete 255 from the dictionary\n", + " del loss_dict[nodata]\n", + "\n", + " return loss_dict\n", + "\n", + "\n", + "def plot_forest_ratio(lossyear: Raster, geom: shpg.Polygon):\n", + " # Read the raster\n", + "\n", + " loss_dict = read_loss_dict(lossyear, geom)\n", + "\n", + " # Amount of pixels\n", + " total_pixels = sum(loss_dict.values())\n", + "\n", + " # Forest Pixels\n", + " forest_pixels = loss_dict[0]\n", + " forest_ratio = forest_pixels / total_pixels\n", + "\n", + " # Plot a matplotlib pie chart comparing forest and not forest pixels\n", + " labels = \"Forest\", \"Not Forest\"\n", + "\n", + " sizes = [forest_ratio, 1 - forest_ratio]\n", + "\n", + " fig1, ax1 = plt.subplots()\n", + " ax1.pie(sizes, labels=labels, autopct=\"%1.1f%%\", startangle=90)\n", + " ax1.axis(\"equal\")\n", + "\n", + " # Title\n", + " plt.title(\"Forest/Non-Forest pixels proportion\")\n", + "\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "7e73283c", + "metadata": {}, + "source": [ + "### Plot the Forest/Non-Forest pie chart" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7fe9a3ef", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_forest_ratio(lossyear, geom)" + ] + }, + { + "cell_type": "markdown", + "id": "ec65f6ba", + "metadata": {}, + "source": [ + "### Plot the Forest change over time\n", + "\n", + "In the next cell, we create a `plot_forest_loss` function that takes a raster image of forest loss per year and a polygon geometry as inputs. It reads the raster data and calculates the percentage of total forest loss for each year. The function then generates a bar plot with the percentage of forest loss for each year in the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "0354f1bb", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_forest_loss(lossyear: Raster, geom: shpg.Polygon):\n", + " # Read the raster\n", + " loss_dict = read_loss_dict(lossyear, geom)\n", + "\n", + " # Amount of pixels\n", + " total_pixels = sum(loss_dict.values())\n", + "\n", + " # Pixel count\n", + " new_values = {key: 100 * loss_dict[key] / total_pixels for key in loss_dict}\n", + "\n", + " # Remove the key 0 as it is not a loss. It is the forest \n", + " # pixels that have not been lost.\n", + " new_values.pop(0, None)\n", + "\n", + " # Create lists of the years and pixel counts\n", + " years = list(new_values.keys())\n", + " years = [year + time_range[0].year for year in years]\n", + "\n", + " pixel_counts = list(new_values.values())\n", + "\n", + " # Create a bar plot\n", + " plt.bar(years, pixel_counts)\n", + "\n", + " # Set the labels for the x and y axes\n", + " plt.xlabel(\"Year\")\n", + " plt.ylabel(\"Forest Loss (%)\")\n", + " plt.title(\"Forest to Non-Forest affected area (%) over time\")\n", + "\n", + " # Set x-axis to only use integer values\n", + " ax = plt.gca()\n", + " ax.xaxis.set_major_locator(MaxNLocator(integer=True))\n", + "\n", + " plt.xticks(range(min(years), max(years) + 1), rotation=90)\n", + "\n", + " # Show the plot\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0065c6e2", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_forest_loss(lossyear, geom)" + ] + }, + { + "cell_type": "markdown", + "id": "b7e20044", + "metadata": {}, + "source": [ + "### Displaying the Forest Loss Pixels Over Time\n", + "\n", + "Finally, create a function that reads the forest loss data for each year and plots a table showing the pixel count for each respective year." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "eb6bc18d", + "metadata": {}, + "outputs": [], + "source": [ + "def show_loss_table(lossyear: Raster, geom: shpg.Polygon):\n", + " # Read the raster\n", + " loss_dict = read_loss_dict(lossyear, geom)\n", + "\n", + " # Create a dictionary with the loss_dict values\n", + " data = {\n", + " \"Year\": [year + time_range[0].year for year in list(loss_dict.keys())[1:]],\n", + " \"#Pixels\": list(loss_dict.values())[1:],\n", + " }\n", + "\n", + " # Create a dataframe from the dictionary\n", + " df_loss = pd.DataFrame(data)\n", + "\n", + " # Sort the dataframe by the 'Year' column\n", + " df_loss = df_loss.sort_values(\"Year\")\n", + "\n", + " # Reset the index of the dataframe\n", + " df_loss = df_loss.reset_index(drop=True)\n", + "\n", + " # Amount of pixels\n", + " total_pixels = sum(loss_dict.values())\n", + " \n", + " return total_pixels, df_loss" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "01810a05", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total pixels: 1786\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Year#Pixels
0200123
120043
2200798
320082
420112
52012244
6201328
72016183
8201768
920191
\n", + "
" + ], + "text/plain": [ + " Year #Pixels\n", + "0 2001 23\n", + "1 2004 3\n", + "2 2007 98\n", + "3 2008 2\n", + "4 2011 2\n", + "5 2012 244\n", + "6 2013 28\n", + "7 2016 183\n", + "8 2017 68\n", + "9 2019 1" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_pixels, df_loss = show_loss_table(lossyear, geom)\n", + "\n", + "print(f\"Total pixels: {total_pixels}\\n\")\n", + "\n", + "# Print the dataframe without the indexes\n", + "df_loss" + ] + } + ], + "metadata": { + "description": "This notebook contains functions to download and process the Global Forest Change (Hansen) maps.", + "disk_space": "", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + }, + "name": "Download Global Forest Change (Hansen) maps.", + "running_time": "", + "tags": [ + "Remote Sensing", + "Deforestation", + "Sustainability" + ] + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/notebooks/forest/forest_change_detection.ipynb b/notebooks/forest/forest_change_detection.ipynb new file mode 100644 index 00000000..6261efde --- /dev/null +++ b/notebooks/forest/forest_change_detection.ipynb @@ -0,0 +1,901 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Detecting Forest Changes with FarmVibes.AI\n", + "\n", + "This notebook demonstrates how to detect forest changes in ALOS PALSAR 2.1 Forest/Non-Forest maps using FarmVibes.AI. The reader can check [this notebook](./download_alos_forest_map.ipynb) to see how to download and visualize ALOS forest maps.\n", + "\n", + "This notebook is divided into the following sections:\n", + "\n", + "1. **Workflow setup**: It checks the workflow documentation using the FarmVibes.AI python client, and define the evaluation geometry and time range.\n", + "2. **Running the workflow**: The section shows how to execute the forest changes workflow and provide its parameters.\n", + "3. **Interpreting the results**. Finally, we will visualize and discuss the results." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Micromamba environment setup\n", + "To install the required packages, see [this README file](../README.md). You can activate the environment with the following command:\n", + "```bash\n", + "$ micromamba activate farmvibes-ai\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Workflow setup\n", + "\n", + "In this Jupyter notebook, we are going to run the `forest_ai/deforestation/forest_change_detection` workflow in FarmVibes.AI, \n", + "designed to analyze changes in forest coverage over a specific time range within a user-defined geographical area. \n", + "The next cells will document the workflow using the FarmVibes.AI default client and define the user input (geometry + time-range).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "
Workflow: forest_ai/deforestation/alos_trend_detection\n",
+                            "
\n" + ], + "text/plain": [ + "\u001b[1;32mWorkflow:\u001b[0m \u001b[1;4;38;5;27mforest_ai/deforestation/alos_trend_detection\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Description:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mDescription:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    Detects increase/decrease trends in forest pixel levels over the user-input geometry and time   \n",
+                            "    range for the ALOS forest map. This workflow combines the alos_forest_extent_download_merge and \n",
+                            "    ordinal_trend_detection workflows to detect increase/decrease trends in the forest pixel levels \n",
+                            "    over the user-provided geometry and time range for the ALOS forest map. The ALOS PALSAR 2.1     \n",
+                            "    Forest/Non-Forest Maps are downloaded in the alos_forest_extent_download_merge workflow.  Then  \n",
+                            "    the ordinal_trend_detection workflow clips the ordinal raster to the user-provided geometry and \n",
+                            "    time range and determines if there is an increasing or decreasing trend in the forest pixel     \n",
+                            "    levels over them. alos_trend_detection uses the Cochran-Armitage test to detect trends in the   \n",
+                            "    forest levels over the years.  The null hypothesis is that there is no trend in the pixel levels\n",
+                            "    over the list of rasters. The alternative hypothesis is that there is a trend in the forest     \n",
+                            "    pixel levels over the list of rasters (one for each year). It returns a p-value and a z-score.  \n",
+                            "    If the p-value is less than some significance level, the null hypothesis is rejected and the    \n",
+                            "    alternative hypothesis is accepted. If the z-score is positive, the trend is increasing.  If the\n",
+                            "    z-score is negative, the trend is decreasing.                                                   \n",
+                            "
\n" + ], + "text/plain": [ + " Detects increase/decrease trends in forest pixel levels over the user-input geometry and time \n", + " range for the ALOS forest map. This workflow combines the alos_forest_extent_download_merge and \n", + " ordinal_trend_detection workflows to detect increase/decrease trends in the forest pixel levels \n", + " over the user-provided geometry and time range for the ALOS forest map. The ALOS PALSAR 2.1 \n", + " Forest/Non-Forest Maps are downloaded in the alos_forest_extent_download_merge workflow. Then \n", + " the ordinal_trend_detection workflow clips the ordinal raster to the user-provided geometry and \n", + " time range and determines if there is an increasing or decreasing trend in the forest pixel \n", + " levels over them. alos_trend_detection uses the Cochran-Armitage test to detect trends in the \n", + " forest levels over the years. The null hypothesis is that there is no trend in the pixel levels\n", + " over the list of rasters. The alternative hypothesis is that there is a trend in the forest \n", + " pixel levels over the list of rasters (one for each year). It returns a p-value and a z-score. \n", + " If the p-value is less than some significance level, the null hypothesis is rejected and the \n", + " alternative hypothesis is accepted. If the z-score is positive, the trend is increasing. If the\n", + " z-score is negative, the trend is decreasing. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Sources:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSources:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - user_input (vibe_core.data.core_types.DataVibe): Time range and geometry of interest.         \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1muser_input\u001b[0m (\u001b[34mvibe_core.data.core_types.DataVibe\u001b[0m): Time range and geometry of interest. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Sinks:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mSinks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - merged_raster (vibe_core.data.rasters.Raster): Merged raster of the ALOS PALSAR 2.1           \n",
+                            "    Forest/Non-Forest Map for the user-provided geometry and time range.                            \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mmerged_raster\u001b[0m (\u001b[34mvibe_core.data.rasters.Raster\u001b[0m): Merged raster of the ALOS PALSAR 2.1 \n", + " Forest/Non-Forest Map for the user-provided geometry and time range. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - categorical_raster (vibe_core.data.rasters.CategoricalRaster): Categorical raster of the ALOS \n",
+                            "    PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range before the merge \n",
+                            "    operation.                                                                                      \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mcategorical_raster\u001b[0m (\u001b[34mvibe_core.data.rasters.CategoricalRaster\u001b[0m): Categorical raster of the ALOS \n", + " PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range before the merge \n", + " operation. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - recoded_raster (vibe_core.data.rasters.Raster): Recoded raster of the ALOS PALSAR 2.1         \n",
+                            "    Forest/Non-Forest Map for the user-provided geometry and time range.                            \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mrecoded_raster\u001b[0m (\u001b[34mvibe_core.data.rasters.Raster\u001b[0m): Recoded raster of the ALOS PALSAR 2.1 \n", + " Forest/Non-Forest Map for the user-provided geometry and time range. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - clipped_raster (vibe_core.data.rasters.Raster): Clipped ordinal raster for the user-provided  \n",
+                            "    geometry and time range.                                                                        \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mclipped_raster\u001b[0m (\u001b[34mvibe_core.data.rasters.Raster\u001b[0m): Clipped ordinal raster for the user-provided \n", + " geometry and time range. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - trend_test_result (vibe_core.data.core_types.OrdinalTrendTest): Cochran-armitage test results \n",
+                            "    composed of p-value and z-score.                                                                \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mtrend_test_result\u001b[0m (\u001b[34mvibe_core.data.core_types.OrdinalTrendTest\u001b[0m): Cochran-armitage test results \n", + " composed of p-value and z-score. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Parameters:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mParameters:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - pc_key (default: None): Planetary Computer API key.                                           \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mpc_key\u001b[0m (\u001b[34mdefault: None\u001b[0m): Planetary Computer API key. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - from_values (default: task defined): Values to recode from.                                   \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mfrom_values\u001b[0m (\u001b[34mdefault: task defined\u001b[0m): Values to recode from. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - to_values (default: task defined): Values to recode to.                                       \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mto_values\u001b[0m (\u001b[34mdefault: task defined\u001b[0m): Values to recode to. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+                            "Tasks:\n",
+                            "
\n" + ], + "text/plain": [ + "\n", + "\u001b[1;32mTasks:\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - alos_forest_extent_download_merge: Downloads Advanced Land Observing Satellite (ALOS)         \n",
+                            "    forest/non-forest classification map and merges it into a single raster.                        \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1malos_forest_extent_download_merge\u001b[0m: Downloads Advanced Land Observing Satellite (ALOS) \n", + " forest/non-forest classification map and merges it into a single raster. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
    - ordinal_trend_detection: Detects increase/decrease trends in the pixel levels over the        \n",
+                            "    user-input geometry and time range.                                                             \n",
+                            "
\n" + ], + "text/plain": [ + " - \u001b[1mordinal_trend_detection\u001b[0m: Detects increase/decrease trends in the pixel levels over the \n", + " user-input geometry and time range. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import sys\n", + "\n", + "from shapely.geometry import box\n", + "from shapely import geometry as shpg\n", + "from typing import cast\n", + "from datetime import datetime\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from typing import Optional, List\n", + "\n", + "from vibe_core.client import get_default_vibe_client\n", + "from vibe_core.data import CategoricalRaster, Raster\n", + "from vibe_core.data import CategoricalRaster\n", + "\n", + "sys.path.append(\"../\")\n", + "from shared_nb_lib.raster import read_raster\n", + "from shared_nb_lib.plot import plot_categorical_maps\n", + "\n", + "\n", + "# Create the FarmVibes.AI default client\n", + "client = get_default_vibe_client()\n", + "\n", + "WORKFLOW_NAME = \"forest_ai/deforestation/alos_trend_detection\"\n", + "client.document_workflow(WORKFLOW_NAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Workflow discussion\n", + "\n", + "The workflow tasks involve downloading Advanced Land Observing Satellite (ALOS) forest/non-forest classification maps and merging them into a single raster (`alos_forest_extent_download_merge`). The ALOS products are clipped to the user's geometry (`clip`), and the result is used to calculate the cochran-armitage trend test (`trend_rest_result`). This test checks if there has been a statistically significant trend in the forest distribution over time.\n", + "\n", + "The `user_input` is composed of a geometry and a time-range, which will be defined in the next cell.\n", + "\n", + "As output (`sinks`), the workflow provides a list of merged rasters (`merged_raster`) that encompass the user input geometry for each year defined in the time-range, the products are available in the [Planetary Computer dataset](https://planetarycomputer.microsoft.com/dataset/alos-fnf-mosaic).\n", + "It also produces a list of categorical rasters (`categorical_raster`) that intersect with the user-provided geometry and time range. The distinction between `categorical_raster` and `merged_raster` is that multiple `categorical_raster` tiles can be combined to form the `merged_raster` if the user's geometry intersects with more than one forest tile. The `recoded_raster` contains the rasters with the recoded values for the forest maps. Finally, `trend_rest_result` determines whether the pixel distribution has changed over time. This is useful for determining if there is statistical evidence of trend in the forest area over time. This change could represent either an increase or decrease in the frequency of forest pixels (i.e., only changes are detected).\n", + "\n", + "The workflow parameters are the Planetary Computer API key (`pc_key`) and the recode rasters parameters that are used to map the values from the dataset raster to the recoded raster. For example, if the original raster has values `(2, 1, 3, 4, 5)` and assuming the default values of `from_values` and `to_values` are respectively `[1, 2, 3, 4, 5]` and `[6, 7, 8, 9, 10`], the recoded raster will have values `(7, 6, 8, 9, 10)`.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "geo_json = {\n", + " \"type\": \"Feature\",\n", + " \"geometry\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [-86.783827, 14.565498],\n", + " [-86.780459, 14.569303],\n", + " [-86.774283, 14.565106],\n", + " [-86.779591, 14.557595],\n", + " [-86.783827, 14.565498],\n", + " ]\n", + " ],\n", + " },\n", + " \"properties\": {},\n", + "}\n", + "\n", + "\n", + "geom = shpg.shape(geo_json[\"geometry\"])\n", + "time_range = ((datetime(2017, 1, 1), datetime(2020, 12, 31)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Running the Workflow\n", + "\n", + "Observe that we pass the geometry and time-range as workflow inputs, along with the following parameters:\n", + "\n", + "* `pc_key`: This corresponds to the Planetary Computer API key, which is useful for downloading planetary computer imagery.\n", + "* `from_values`: Values to recode from, for the ALOS dataset the default value is `[4, 3, 0, 2, 1]`.\n", + "* `to_values`: Values to recode to, `[0, 0, 0, 1, 1]` are the default values for the ALOS dataset.\n", + "\n", + "\n", + "For this particular case, we are mapping the forest values from [ALOS dataset](https://planetarycomputer.microsoft.com/dataset/alos-fnf-mosaic) to `1` and `2` depending on the canopy cover level and everything else to `0`.\n", + "\n", + "| Encoded Value | Description | Recoded Value (Forest-Level) | Recoded Value Semantics |\n", + "| ------------- | ----------- | ----------- | --------------|\n", + "| 0 | No data | 0 | Non-Forest |\n", + "| 1 | Forest (>90% canopy cover) | 2 | Dense-Forest |\n", + "| 2 | Forest (10-90% canopy cover) | 1 | Forest |\n", + "| 3 | Non-forest | 0 | Non-Forest |\n", + "| 4 | Water | 0 | Non-Forest |\n", + "\n", + "Please check the [workflow documentation page](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/WORKFLOWS.html) to see how parameters are provided. Also, refer to the [SECRETS documentation](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/SECRETS.html) to learn how a secret can be added to the FarmVibes.AI cluster.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "application/vnd.jupyter.widget-view+json": {
+                            "model_id": "671c2cedcf4a49b2a183d9014f732178",
+                            "version_major": 2,
+                            "version_minor": 0
+                        },
+                        "text/plain": [
+                            "Output()"
+                        ]
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "
\n"
+                        ],
+                        "text/plain": []
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                }
+            ],
+            "source": [
+                "# Execute the workflow\n",
+                "run = client.run(\n",
+                "    WORKFLOW_NAME,\n",
+                "    \"Forest Change Detection\",\n",
+                "    geometry=geom,\n",
+                "    time_range=time_range,\n",
+                "    parameters={\n",
+                "        \"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\",\n",
+                "        \"from_values\": [4, 3, 0, 2, 1],\n",
+                "        \"to_values\": [0, 0, 0, 1, 2]\n",
+                "    },\n",
+                ")\n",
+                "run.monitor()"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "## 3. Interpreting the results\n",
+                "\n",
+                "The `trend_rest_result` is an output from the workflow run, which performs a trend test (Cochan-Armitage). The result of this test includes a `csv` file containing a contingency table, which shows the distribution of pixel counts across the different categories of land cover for each year. This allows us to observe how the distribution of pixel categories has changed over time."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 4,
+            "metadata": {},
+            "outputs": [
+                {
+                    "data": {
+                        "text/html": [
+                            "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
2017/01/01-2017/12/312018/01/01-2018/12/312019/01/01-2019/12/312020/01/01-2020/12/31
category
Non-Forest178.049.0107.069.0
Forest675.0628.0553.0363.0
Dense-forest273.0449.0466.0694.0
\n", + "
" + ], + "text/plain": [ + " 2017/01/01-2017/12/31 2018/01/01-2018/12/31 \\\n", + "category \n", + "Non-Forest 178.0 49.0 \n", + "Forest 675.0 628.0 \n", + "Dense-forest 273.0 449.0 \n", + "\n", + " 2019/01/01-2019/12/31 2020/01/01-2020/12/31 \n", + "category \n", + "Non-Forest 107.0 69.0 \n", + "Forest 553.0 363.0 \n", + "Dense-forest 466.0 694.0 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trend_test_test_results = run.output[\"trend_test_result\"][0] # type: ignore\n", + "df = pd.read_csv(trend_test_test_results.assets[0].path_or_url, index_col=0) # type: ignore\n", + "\n", + "\n", + "level_names = [\"Non-Forest\", \"Forest\", \"Dense-forest\"]\n", + "index = dict(zip(df.index, level_names))\n", + "df.index = df.index.map(index)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read the output data\n", + "\n", + "In the next cell, we adopt the user-provided geometry to read the output raster and create some buffer around it." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Define your geometry\n", + "bounding_box = box(*geom.buffer(0.01).bounds)\n", + "\n", + "# Get the bounds of the geometry\n", + "minx, miny, maxx, maxy = bounding_box.bounds\n", + "\n", + "merged_rasters = run.output[\"merged_raster\"]\n", + "categories = cast(CategoricalRaster, run.output[\"categorical_raster\"][0]).categories\n", + "forest_images = []\n", + "\n", + "for raster in merged_rasters:\n", + " merged_raster = cast(Raster, raster)\n", + " forest_images.append(read_raster(merged_raster, bounding_box)[0][0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot the result map\n", + "\n", + "Finally, we plot the raster images with the existing categories and the user-provided geometry (red area within the plot)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "color_dict = {\n", + " 0: \"black\",\n", + " 1: \"darkgreen\",\n", + " 2: \"lightgreen\",\n", + " 3: \"gray\",\n", + " 4: \"blue\",\n", + "}\n", + "\n", + "titles = [\n", + " str(raster.time_range[0].year)\n", + " for raster in run.output[\"merged_raster\"]\n", + "]\n", + "\n", + "plot_categorical_maps(\n", + " forest_images,\n", + " color_dict,\n", + " categories,\n", + " titles=titles,\n", + " suptitle=\"ALOS Forest Map\",\n", + " geom_exterior=geom.exterior.xy,\n", + " extent=[minx, maxx, miny, maxy],\n", + " figsize=(10, 7),\n", + " xlabel=\"Longitude\",\n", + " ylabel=\"Latitude\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read the ordinal raster data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "recoded_rasters = run.output[\"recoded_raster\"]\n", + "forest_images = []\n", + "\n", + "for raster in recoded_rasters:\n", + " ordinal_raster = cast(Raster, raster)\n", + " forest_images.append(read_raster(ordinal_raster, bounding_box)[0][0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Present the results using plot\n", + "\n", + "Although we've observed a change in the frequency of forest pixels over the years, we can use statistical testing to determine whether there is a significant trend in the forest change or it is simply the result of random fluctuations in the data. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def plot_pixel_distribution(\n", + " df: pd.DataFrame,\n", + " highlight_column: Optional[int] = None,\n", + " ax: Optional[plt.Axes] = None,\n", + " use_legend: bool = True,\n", + " colors: Optional[List[\"str\"]] = None,\n", + " title: Optional[str] = \"Pixel Distribution Over Time\",\n", + "):\n", + " filtered_df = df.copy()\n", + " # Extract the year from the column names\n", + " filtered_df.columns = [label[:4] for label in filtered_df.columns]\n", + " # Transpose the DataFrame\n", + " df_transposed = filtered_df.transpose()\n", + "\n", + " # Normalize the data so that the pixel counts sum to 100% for each year\n", + " df_normalized = df_transposed.div(df_transposed.sum(axis=1), axis=0) * 100\n", + "\n", + " if colors: \n", + " ax = df_normalized.plot(kind=\"bar\", stacked=True, ax=ax, color=colors)\n", + " else:\n", + " ax = df_normalized.plot(kind=\"bar\", stacked=True, ax=ax)\n", + "\n", + " # Highlight the specified column by setting its alpha to 1\n", + " if highlight_column is not None:\n", + " # Apply alpha to all bars\n", + " for container in ax.containers:\n", + " for bar in container:\n", + " bar.set_alpha(0.3)\n", + "\n", + " for container in ax.containers:\n", + " container[highlight_column].set_alpha(1)\n", + "\n", + " ax.set_title(title)\n", + " ax.set_ylabel(\"Pixel Distribution\")\n", + " ax.set_xlabel(\"Category\")\n", + "\n", + " # Add a legend outside of the plot\n", + " if use_legend:\n", + " ax.legend(bbox_to_anchor=(1.05, 1), loc=\"upper left\") \n", + " else:\n", + " ax.get_legend().remove()\n", + "\n", + " return df_normalized\n", + "\n", + "\n", + "color_dict = {\n", + " 0: \"grey\",\n", + " 1: \"lightgreen\",\n", + " 2: \"darkgreen\",\n", + "}\n", + "\n", + "titles = [str(raster.time_range[0].year) for raster in recoded_rasters]\n", + "\n", + "# We plot the categorical raster pixel distribution again for\n", + "# better visualization of the pixel distribution over time\n", + "plot_categorical_maps(\n", + " forest_images,\n", + " color_dict,\n", + " level_names,\n", + " titles=titles,\n", + " suptitle=\"Recoded ALOS Forest Map\",\n", + " geom_exterior=geom.exterior.xy,\n", + " extent=[minx, maxx, miny, maxy],\n", + " figsize=(10, 7),\n", + " xlabel=\"Longitude\",\n", + " ylabel=\"Latitude\",\n", + ")\n", + "\n", + "# Filter out the \"no data\" category\n", + "filtered_df = df\n", + "colors = [color_dict[level_names.index(cat)] for cat in filtered_df.index]\n", + "# get the corresponding colors\n", + "_ = plot_pixel_distribution(filtered_df, use_legend=True, colors=colors)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Conclusion\n", + "\n", + "Finally, we employed the Cochan-Armitage trend test. This test is specifically designed to determine if there is an increasing/decreasing trend between the two categorical variables. In our case, the two variables are 'forest/non-forest pixels distribution' and 'year'. By applying the trend test, we can quantitatively assess whether the changes we've observed in pixel categories over time are statistically significant, or if they could be attributed to random variation.\n", + "\n", + "The conclusion of this test is printed in the next cell." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "******* COCHAN-ARMITAGE TREND TEST RESULTS *******\n", + "p-value 6.127822361209561e-60\n", + "significance level 0.05\n", + "z_score 16.329103461498548\n", + "******* CONCLUSION *******\n", + "The null hypothesis is rejected.\n", + "The categorical rasters are positively dependent, so the level of forest cover is increasing.\n" + ] + } + ], + "source": [ + "SIGNIFICANCE_LEVEL = 0.05\n", + "\n", + "print(\"******* COCHAN-ARMITAGE TREND TEST RESULTS *******\")\n", + "print(f\"p-value {trend_test_test_results.p_value}\")\n", + "print(f\"significance level {SIGNIFICANCE_LEVEL}\")\n", + "print(f\"z_score {trend_test_test_results.z_score}\")\n", + "\n", + "print(\"******* CONCLUSION *******\")\n", + "if trend_test_test_results.p_value < SIGNIFICANCE_LEVEL:\n", + " print(\"The null hypothesis is rejected.\")\n", + " \n", + " if trend_test_test_results.z_score > 0:\n", + " print(\"The categorical rasters are positively dependent, so the level of forest cover is increasing.\")\n", + " else:\n", + " print(\"The categorical rasters are negatively dependent, so the level of forest cover is decreasing.\")\n", + "\n", + "else:\n", + " print(\"The null hypothesis is not rejected. The categorical rasters are independent.\")" + ] + } + ], + "metadata": { + "description": "Helps users to detect forest changes", + "disk_space": "", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + }, + "name": "Detecting Forest Changes", + "running_time": "", + "tags": [ + "Remote Sensing", + "Deforestation", + "Sustainability" + ] + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/notebooks/heatmaps/notebook_lib/utils.py b/notebooks/heatmaps/notebook_lib/utils.py index 82013373..ec52c060 100644 --- a/notebooks/heatmaps/notebook_lib/utils.py +++ b/notebooks/heatmaps/notebook_lib/utils.py @@ -77,9 +77,7 @@ def create_heatmap_admag( imagery: Raster, farm_infO: Dict[str, str], parameters: Dict[str, Any] ) -> str: sample_inputs = ADMAgSeasonalFieldInput( - farmer_id=farm_infO["farmer_id"], - seasonal_field_id=farm_infO["seasonal_field_id"], - boundary_id=farm_infO["boundary_id"], + party_id=farm_infO["party_id"], seasonal_field_id=farm_infO["seasonal_field_id"] ) inputs = {"input_raster": imagery, "admag_input": sample_inputs} @@ -161,9 +159,8 @@ def get_seasonal_field( farm_infO: Dict[str, str], parameters: Dict[str, Any] ) -> Dict[str, Any]: sample_inputs = ADMAgSeasonalFieldInput( - farmer_id=farm_infO["farmer_id"], + party_id=farm_infO["party_id"], seasonal_field_id=farm_infO["seasonal_field_id"], - boundary_id=farm_infO["boundary_id"], ) inputs = {"admag_input": sample_inputs} diff --git a/notebooks/heatmaps/nutrients_using_classification_admag.ipynb b/notebooks/heatmaps/nutrients_using_classification_admag.ipynb index 497ed334..55a2fc6c 100755 --- a/notebooks/heatmaps/nutrients_using_classification_admag.ipynb +++ b/notebooks/heatmaps/nutrients_using_classification_admag.ipynb @@ -7,7 +7,7 @@ "source": [ "# FarmVibes.AI Nutrients Heatmap\n", "\n", - "This notebook demonstrates how to run the heatmap workflow on sentinel imagery by integrating with [Microsoft Azure Data Manager for Agriculture (ADMAg)](https://learn.microsoft.com/en-us/azure/data-manager-for-agri/). The workflow accepts Farmer_ID, Seasonal_Field_ID and Boundary_ID information to download samples of soil properties (such as carbon and nitrogen) from ADMAg, and generate an interpolated heatmap based on the input imagery.\n", + "This notebook demonstrates how to run the heatmap workflow on sentinel imagery by integrating with [Microsoft Azure Data Manager for Agriculture (ADMAg)](https://learn.microsoft.com/en-us/azure/data-manager-for-agri/). The workflow accepts Party_ID and Seasonal_Field_ID information to download samples of soil properties (such as carbon and nitrogen) from ADMAg, and generate an interpolated heatmap based on the input imagery. The notebook is using the ADMAg version 2023-11-01-preview for demonstration.\n", "\n", "### Micromamba environment setup\n", "Before running this notebook, let's build a micromamba environment. If you do not have micromamba installed, please follow the instructions from the [micromamba installation guide](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html).\n", @@ -92,21 +92,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Farm information\n", "FARM_INFO = {\n", - " \"farmer_id\": \"\",\n", - " \"boundary_id\": \"\",\n", - " \"seasonal_field_id\": \"\"\n", + " \"party_id\": \"\",\n", + " \"seasonal_field_id\": '',\n", "}" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -156,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -166,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -218,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -680,7 +679,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -728,6 +727,26 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/azureuser/.cache/farmvibes-ai/data/assets/8d11c61e-afc3-4656-aaf7-65503a7937d0/result.zip'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "archive_path" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/notebooks/segment_anything/basemap_segmentation.ipynb b/notebooks/segment_anything/basemap_segmentation.ipynb index 4a259530..6ab7113a 100644 --- a/notebooks/segment_anything/basemap_segmentation.ipynb +++ b/notebooks/segment_anything/basemap_segmentation.ipynb @@ -55,7 +55,7 @@ "**If you are importing the ONNX files to your cluster for the first time**, make sure the following environment is configured:\n", "\n", "```bash\n", - "$ micromamba env create -f env_cpu.yml\n", + "$ micromamba env create -f env_cpu.yaml\n", "$ micromamba activate segment_anything_cpu\n", "```\n", "\n", diff --git a/notebooks/segment_anything/sam_exploration.ipynb b/notebooks/segment_anything/sam_exploration.ipynb index a990ae1d..31b5d4d3 100644 --- a/notebooks/segment_anything/sam_exploration.ipynb +++ b/notebooks/segment_anything/sam_exploration.ipynb @@ -33,13 +33,13 @@ "Without GPU support (CPU):\n", "\n", "```bash\n", - "$ micromamba env create -f env_cpu.yml\n", + "$ micromamba env create -f env_cpu.yaml\n", "$ micromamba activate segment_anything_cpu\n", "```\n", "\n", "With GPU support:\n", "```bash\n", - "$ micromamba env create -f env_gpu.yml\n", + "$ micromamba env create -f env_gpu.yaml\n", "$ micromamba activate segment_anything\n", "```" ] @@ -899,7 +899,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.8.17" }, "name": "Field boundary segmentation (SAM exploration)", "orig_nbformat": 4, diff --git a/notebooks/segment_anything/sentinel2_segmentation.ipynb b/notebooks/segment_anything/sentinel2_segmentation.ipynb index 7b75ae0f..f5c8c549 100644 --- a/notebooks/segment_anything/sentinel2_segmentation.ipynb +++ b/notebooks/segment_anything/sentinel2_segmentation.ipynb @@ -34,7 +34,7 @@ "**If you are importing the ONNX files to your cluster for the first time**, make sure the following environment is configured:\n", "\n", "```bash\n", - "$ micromamba env create -f env_cpu.yml\n", + "$ micromamba env create -f env_cpu.yaml\n", "$ micromamba activate segment_anything_cpu\n", "```\n", "\n", @@ -147,9 +147,9 @@ "source": [ "## Workflow setup\n", "\n", - "FarmVibes.AI has a few workflows related to SAM. The `ml/segment_anything/prompt_segmentation` is the basic workflow that takes a Sentinel-2 raster, an input geometry of the Region of Interest (RoI), and an `ExternalReferenceList` pointing to a GeoDataFrame containing the points and/or bounding boxes used as prompts, their labels (`foreground` or `background`) and associated prompt ids (indicating the prompt to which a point belongs), and returns a CategoricalRaster with the segmentation results (one per prompt).\n", + "FarmVibes.AI has a few workflows related to SAM. The `ml/segment_anything/s2_prompt_segmentation` is the basic workflow that takes a Sentinel-2 raster, an input geometry of the Region of Interest (RoI), and an `ExternalReferenceList` pointing to a GeoDataFrame containing the points and/or bounding boxes used as prompts, their labels (`foreground` or `background`) and associated prompt ids (indicating the prompt to which a point belongs), and returns a CategoricalRaster with the segmentation results (one per prompt).\n", "\n", - "To facilitate its use, we also provide the `farm_ai/segmentation/segment_s2` workflow, which combines the `data_ingestion/sentinel2/preprocess_s2` workflow to download Sentinel-2 imagery and the `ml/segment_anything/prompt_segmentation` workflow to run the segmentation. In addition to the `ExternalReferenceList` inputs for the prompts, this workflow expects a `DataVibe` with the geometry and a time range of interest.\n", + "To facilitate its use, we also provide the `farm_ai/segmentation/segment_s2` workflow, which combines the `data_ingestion/sentinel2/preprocess_s2` workflow to download Sentinel-2 imagery and the `ml/segment_anything/s2_prompt_segmentation` workflow to run the segmentation. In addition to the `ExternalReferenceList` inputs for the prompts, this workflow expects a `DataVibe` with the geometry and a time range of interest.\n", "\n", "Before inspecting how the workflow is defined, let's instantiate our client:\n" ] diff --git a/notebooks/sentinel/field_level_spectral_indices.ipynb b/notebooks/sentinel/field_level_spectral_indices.ipynb index 672feae6..52db3d86 100644 --- a/notebooks/sentinel/field_level_spectral_indices.ipynb +++ b/notebooks/sentinel/field_level_spectral_indices.ipynb @@ -296,7 +296,7 @@ "idx_list = np.linspace(0, len(cloud_free_rasters) - 1, num=10, dtype=int)\n", "for raster_idx in idx_list:\n", " run = client.run(\n", - " \"ml/segment_anything/prompt_segmentation\", \n", + " \"ml/segment_anything/s2_prompt_segmentation\", \n", " f\"SAM - Raster {raster_idx}\", \n", " input_data={\n", " \"input_raster\": cloud_free_rasters[raster_idx], \n", diff --git a/notebooks/shared_nb_lib/plot.py b/notebooks/shared_nb_lib/plot.py index 36d6742f..fd406270 100644 --- a/notebooks/shared_nb_lib/plot.py +++ b/notebooks/shared_nb_lib/plot.py @@ -1,13 +1,15 @@ import io from copy import deepcopy -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple import matplotlib import matplotlib.pyplot as plt import numpy as np from IPython.core.display import Image from IPython.display import display +from matplotlib.axes import Axes from matplotlib.colors import ListedColormap +from matplotlib.figure import Figure from numpy._typing import NDArray @@ -30,16 +32,22 @@ def transparent_cmap(cmap: ListedColormap, max_alpha: float = 0.8, N: int = 255) return mycmap -def plot_categorical_map( +def _plot_categorical_map( dataset: List[List[float]], color_dict: Dict[int, str], labels: List[str], geom_exterior: Optional[NDArray[Any]] = None, extent: Optional[List[float]] = None, - title: str = "Category Map", - xlabel: str = "longitude", - ylabel: str = "latitude", + title: str = "", + xlabel: str = "", + ylabel: str = "", + fig: Optional[Figure] = None, + ax: Optional[Axes] = None, ): + # Plot the figure + if not fig or not ax: + fig, ax = plt.subplots() + # Create a colormap from the color dictionary cmap = ListedColormap([color_dict[x] for x in color_dict.keys()]) # type: ignore @@ -49,22 +57,92 @@ def plot_categorical_map( norm = matplotlib.colors.BoundaryNorm(norm_bins, len(labels), clip=True) fmt = matplotlib.ticker.FuncFormatter(lambda x, _: labels[norm(x)]) # type: ignore - # Plot the figure - fig, ax = plt.subplots() - extent = extent or [0, len(dataset[0]), 0, len(dataset)] im = ax.imshow(dataset, cmap=cmap, extent=extent, norm=norm) if geom_exterior is not None: # Plot geom on top of the cropped image - plt.plot(*geom_exterior, color="red") + ax.plot(*geom_exterior, color="red") - plt.title(title) - plt.xlabel(xlabel) - plt.ylabel(ylabel) + if title: + ax.set_title(title) + if xlabel: + ax.set_xlabel(xlabel) + if ylabel: + ax.set_ylabel(ylabel) diff = norm_bins[1:] - norm_bins[:-1] tickz = norm_bins[:-1] + diff / 2 - fig.colorbar(im, format=fmt, ticks=tickz) + + return im, fmt, tickz + + +def plot_categorical_map( + dataset: List[List[float]], + color_dict: Dict[int, str], + labels: List[str], + geom_exterior: Optional[NDArray[Any]] = None, + extent: Optional[List[float]] = None, + title: str = "Category Map", + xlabel: str = "longitude", + ylabel: str = "latitude", + fig: Optional[Figure] = None, + ax: Optional[Axes] = None, +): + im, fmt, tickz = _plot_categorical_map( + dataset=dataset, + color_dict=color_dict, + labels=labels, + geom_exterior=geom_exterior, + extent=extent, + title=title, + xlabel=xlabel, + ylabel=ylabel, + fig=fig, + ax=ax, + ) + + plt.colorbar(im, format=fmt, ticks=tickz) + plt.show() + + return im, fmt, tickz + + +def plot_categorical_maps( + datasets: List[List[List[float]]], + color_dict: Dict[int, str], + labels: List[str], + titles: List[str], + suptitle: str, + geom_exterior: Optional[NDArray[Any]] = None, + extent: Optional[List[float]] = None, + xlabel: str = "", + ylabel: str = "", + n_cols: int = 2, + figsize: Tuple[int, int] = (12, 10), +): + rows = int(np.ceil(len(datasets) / n_cols)) + fig, axes = plt.subplots(rows, n_cols, figsize=figsize, sharex=True, sharey=True) + + im, fmt, tickz = None, None, None + for i, dataset in enumerate(datasets): + im, fmt, tickz = _plot_categorical_map( + dataset=dataset, + color_dict=color_dict, + labels=labels, + geom_exterior=geom_exterior, + extent=extent, + title=titles[i], + fig=fig, + ax=axes[i // n_cols, i % n_cols], # type: ignore + ) + fig.supxlabel(xlabel) + fig.supylabel(ylabel) + fig.suptitle(suptitle) + + fig.subplots_adjust(right=0.8) + cbar_ax = fig.add_axes([0.85, 0.15, 0.02, 0.7]) + fig.colorbar(im, cax=cbar_ax, format=fmt, ticks=tickz) + plt.show() diff --git a/scripts/export_sam_models.py b/scripts/export_sam_models.py new file mode 100644 index 00000000..72578604 --- /dev/null +++ b/scripts/export_sam_models.py @@ -0,0 +1,217 @@ +# Script to export SAM models to ONNX files and add them to FarmVibes.AI cluster. +# This was heavily inspired by Visheratin's export_onnx_model script available in: +# https://github.com/visheratin/segment-anything/blob/main/scripts/export_onnx_model.py + +import argparse +import os +import subprocess +import warnings +from dataclasses import dataclass +from tempfile import TemporaryDirectory +from typing import Optional, Tuple + +import onnx +import torch +from onnx.external_data_helper import convert_model_to_external_data +from segment_anything import sam_model_registry +from segment_anything.modeling.sam import Sam +from segment_anything.utils.onnx import SamOnnxModel + +from vibe_core.file_downloader import download_file + +try: + import onnxruntime # type: ignore + + onnxruntime_exists = True +except ImportError: + onnxruntime_exists = False + + +@dataclass +class ModelInfo: + url: str + should_use_data_file: bool + + +MODELS = { + "vit_b": ModelInfo( + url="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth", + should_use_data_file=False, + ), + "vit_l": ModelInfo( + url="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth", + should_use_data_file=False, + ), + "vit_h": ModelInfo( + url="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth", + should_use_data_file=True, + ), +} +RETURN_SINGLE_MASK = True +ONNX_OPSET = 17 + +HERE = os.path.dirname(os.path.abspath(__file__)) +PROJECT_DIR = os.path.abspath(os.path.join(HERE, "..")) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Download SAM model(s), export to ONNX files, and add to FarmVibes.AI cluster." + ) + + parser.add_argument( + "--models", + nargs="+", + choices=["vit_b", "vit_l", "vit_h"], + required=True, + help="A list of SAM model types to export (among 'vit_b', 'vit_l', and 'vit_h').", + ) + + return parser.parse_args() + + +def export_model(model_type: str, downloaded_path: str, dir_path: str) -> Tuple[str, str]: + encoder_output = os.path.join(dir_path, f"{model_type}_encoder.onnx") + encoder_data_file = ( + os.path.join(dir_path, f"{model_type}_encoder_data_file.onnx") + if MODELS[model_type].should_use_data_file + else None + ) + + decoder_output = os.path.join(dir_path, f"{model_type}_decoder.onnx") + + sam = sam_model_registry[model_type](checkpoint=downloaded_path) + + encoder_path = export_encoder(sam, encoder_output, encoder_data_file) + decoder_path = export_decoder(sam, decoder_output) + + return (encoder_path, decoder_path) + + +def export_encoder(sam: Sam, output: str, data_file_output: Optional[str]) -> str: + dynamic_axes = { + "x": {0: "batch"}, + } + dummy_inputs = { + "x": torch.randn(1, 3, 1024, 1024, dtype=torch.float), + } + _ = sam.image_encoder(**dummy_inputs) + + output_names = ["image_embeddings"] + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) # type: ignore + warnings.filterwarnings("ignore", category=UserWarning) + print(f"Exporting onnx model to {output}...") + torch.onnx.export( + sam.image_encoder, + tuple(dummy_inputs.values()), + output, + export_params=True, + verbose=False, + opset_version=ONNX_OPSET, + do_constant_folding=True, + input_names=list(dummy_inputs.keys()), + output_names=output_names, + dynamic_axes=dynamic_axes, + ) + + if data_file_output: + onnx_model = onnx.load(output) + convert_model_to_external_data( + onnx_model, + all_tensors_to_one_file=True, + location=data_file_output, + size_threshold=1024, + convert_attribute=False, + ) + onnx.save_model(onnx_model, output) + + if onnxruntime_exists: + ort_inputs = {k: v.cpu().numpy() for k, v in dummy_inputs.items()} + ort_session = onnxruntime.InferenceSession(output) # type: ignore + _ = ort_session.run(None, ort_inputs) + print("Encoder has successfully been run with ONNXRuntime.") + + return output + + +def export_decoder(sam: Sam, output: str) -> str: + onnx_model = SamOnnxModel(model=sam, return_single_mask=RETURN_SINGLE_MASK) + + dynamic_axes = { + "point_coords": {1: "num_points"}, + "point_labels": {1: "num_points"}, + } + + embed_dim = sam.prompt_encoder.embed_dim + embed_size = sam.prompt_encoder.image_embedding_size + mask_input_size = [4 * x for x in embed_size] + dummy_inputs = { + "image_embeddings": torch.randn(1, embed_dim, *embed_size, dtype=torch.float), + "point_coords": torch.randint(low=0, high=1024, size=(1, 5, 2), dtype=torch.float), + "point_labels": torch.randint(low=0, high=4, size=(1, 5), dtype=torch.float), + "mask_input": torch.randn(1, 1, *mask_input_size, dtype=torch.float), + "has_mask_input": torch.tensor([1], dtype=torch.float), + "orig_im_size": torch.tensor([1500, 2250], dtype=torch.float), + } + + _ = onnx_model(**dummy_inputs) + + output_names = ["masks", "iou_predictions", "low_res_masks"] + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) # type: ignore + warnings.filterwarnings("ignore", category=UserWarning) + with open(output, "wb") as f: + print(f"Exporting onnx model to {output}...") + torch.onnx.export( + onnx_model, + tuple(dummy_inputs.values()), + f, # type: ignore + export_params=True, + verbose=False, + opset_version=ONNX_OPSET, + do_constant_folding=True, + input_names=list(dummy_inputs.keys()), + output_names=output_names, + dynamic_axes=dynamic_axes, + ) + + if onnxruntime_exists: + ort_inputs = {k: v.cpu().numpy() for k, v in dummy_inputs.items()} + providers = ["CPUExecutionProvider"] + ort_session = onnxruntime.InferenceSession(output, providers=providers) # type: ignore + _ = ort_session.run(None, ort_inputs) + print("Decoder has successfully been run with ONNXRuntime.") + + return output + + +def add_to_cluster(exported_paths: Tuple[str, str]): + for path in exported_paths: + print(f"Adding {path} to cluster...") + subprocess.run( + [ + "farmvibes-ai", + "local", + "add-onnx", + path, + ], + check=True, + ) + + +def main(): + args = parse_args() + + with TemporaryDirectory() as tmp_dir: + for model_type in args.models: + model_url = MODELS[model_type].url + downloaded_path = download_file(model_url, os.path.join(tmp_dir, f"{model_type}.pth")) + exported_paths = export_model(model_type, downloaded_path, tmp_dir) + add_to_cluster(exported_paths) + + +if __name__ == "__main__": + main() diff --git a/src/vibe_core/pyproject.toml b/src/vibe_core/pyproject.toml index 0a82306a..108d9d01 100644 --- a/src/vibe_core/pyproject.toml +++ b/src/vibe_core/pyproject.toml @@ -18,7 +18,7 @@ vibe_core = ["terraform/*.tf"] [project] name = "vibe-core" -version ="2024.02.08" +version ="2024.04.04" authors = [ { name="Microsoft FarmVibes.AI Team", email="eywa-devs@microsoft.com" }, ] diff --git a/src/vibe_core/vibe_core/admag_client.py b/src/vibe_core/vibe_core/admag_client.py index 8ec85f79..f7f0b9f8 100644 --- a/src/vibe_core/vibe_core/admag_client.py +++ b/src/vibe_core/vibe_core/admag_client.py @@ -104,20 +104,50 @@ def header(self) -> Dict[str, str]: return header - def _request(self, method: str, endpoint: str, *args: Any, **kwargs: Any): - response = self.session.request(method, urljoin(self.base_url, endpoint), *args, **kwargs) + def _request( + self, method: str, endpoint: str, data: Dict[str, Any] = {}, *args: Any, **kwargs: Any + ): + resp = self.session.request( + method, urljoin(self.base_url, endpoint), *args, **kwargs, json=data + ) try: - r = json.loads(response.text) + r = json.loads(resp.text) except json.JSONDecodeError: - r = response.text + r = resp.text try: - response.raise_for_status() + resp.raise_for_status() except HTTPError as e: error_message = r.get("message", "") if isinstance(r, dict) else r msg = f"{e}. {error_message}" raise HTTPError(msg, response=e.response) + return cast(Any, r) + def _iterate(self, response: Dict[str, Any]): + visited_next_links = set() + + composed_response = {self.CONTENT_TAG: response[self.CONTENT_TAG]} + next_link = "" if self.LINK_TAG not in response else response[self.LINK_TAG] + next_link_index = 0 + while next_link: + if next_link in visited_next_links: + raise RuntimeError(f"Repeated nextLink {next_link} in ADMAg get request") + + if next_link_index >= self.NEXT_PAGES_LIMIT: + raise RuntimeError(f"Next pages limit {self.NEXT_PAGES_LIMIT} exceded") + tmp_response = self._request( + "GET", + next_link, + timeout=self.DEFAULT_TIMEOUT, + ) + if self.CONTENT_TAG in tmp_response: + composed_response[self.CONTENT_TAG].extend(tmp_response[self.CONTENT_TAG]) + visited_next_links.add(next_link) + next_link_index = next_link_index + 1 + next_link = "" if self.LINK_TAG not in tmp_response else tmp_response[self.LINK_TAG] + response = composed_response + return response + def _get(self, endpoint: str, params: Dict[str, Any] = {}): request_params = {"api-version": self.api_version} request_params.update(params) @@ -127,41 +157,36 @@ def _get(self, endpoint: str, params: Dict[str, Any] = {}): params=request_params, timeout=self.DEFAULT_TIMEOUT, ) - visited_next_links = set() if self.CONTENT_TAG in response: - composed_response = {self.CONTENT_TAG: response[self.CONTENT_TAG]} - next_link = "" if self.LINK_TAG not in response else response[self.LINK_TAG] - next_link_index = 0 - while next_link: - if next_link in visited_next_links: - raise RuntimeError(f"Repeated nextLink {next_link} in ADMAg get request") - - if next_link_index >= self.NEXT_PAGES_LIMIT: - raise RuntimeError(f"Next pages limit {self.NEXT_PAGES_LIMIT} exceded") - tmp_response = self._request( - "GET", - next_link, - timeout=self.DEFAULT_TIMEOUT, - ) - if self.CONTENT_TAG in tmp_response: - composed_response[self.CONTENT_TAG].extend(tmp_response[self.CONTENT_TAG]) - visited_next_links.add(next_link) - next_link_index = next_link_index + 1 - next_link = "" if self.LINK_TAG not in tmp_response else tmp_response[self.LINK_TAG] - response = composed_response + response = self._iterate(response) + return response - def get_seasonal_fields(self, farmer_id: str, params: Dict[str, Any] = {}): - """Retrieves the seasonal fields for a given farmer. + def _post( + self, endpoint: str, params: Dict[str, Any] = {}, data: Dict[str, Any] = {} + ) -> Dict[str, Any]: + request_params = {"api-version": self.api_version, "maxPageSize": 1000} + request_params.update(params) + response = self._request( + "POST", endpoint, params=request_params, timeout=self.DEFAULT_TIMEOUT, data=data + ) - :param farmer_id: The ID of the farmer. + if self.CONTENT_TAG in response: + response = self._iterate(response) + + return response + + def get_seasonal_fields(self, party_id: str, params: Dict[str, Any] = {}): + """Retrieves the seasonal fields for a given party. + + :param party_id: The ID of the party. :param params: Additional parameters to be passed to the request. Defaults to {}. :return: The information for each seasonal fields. """ - endpoint = f"/farmers/{farmer_id}/seasonal-fields" + endpoint = f"/parties/{party_id}/seasonal-fields" request_params = {"api-version": self.api_version} request_params.update(params) @@ -170,41 +195,29 @@ def get_seasonal_fields(self, farmer_id: str, params: Dict[str, Any] = {}): params=request_params, ) - def get_field(self, farmer_id: str, field_id: str): + def get_field(self, party_id: str, field_id: str): """ - Retrieves the field information for a given farmer and field. + Retrieves the field information for a given party and field. - :param farmer_id: The ID of the farmer. + :param party_id: The ID of the party. :param field_id: The ID of the field. :return: The field information. """ - endpoint = f"/farmers/{farmer_id}/fields/{field_id}" + endpoint = f"/parties/{party_id}/fields/{field_id}" return self._get(endpoint) - def get_seasonal_field(self, farmer_id: str, seasonal_field_id: str): - """Retrieves the information of a seasonal field for a given farmer. + def get_seasonal_field(self, party_id: str, seasonal_field_id: str): + """Retrieves the information of a seasonal field for a given party. - :param farmer_id: The ID of the farmer. + :param party_id: The ID of the party. :param seasonal_field_id: The ID of the seasonal field. :return: The seasonal field information. """ - endpoint = f"/farmers/{farmer_id}/seasonal-fields/{seasonal_field_id}" - return self._get(endpoint) - - def get_boundary(self, farmer_id: str, boundary_id: str): - """Retrieves the information of a boundary for a given farmer. - - :param farmer_id: The ID of the farmer. - - :param boundary_id: The ID of the boundary. - - :return: The boundary information. - """ - endpoint = f"farmers/{farmer_id}/boundaries/{boundary_id}" + endpoint = f"/parties/{party_id}/seasonal-fields/{seasonal_field_id}" return self._get(endpoint) def get_season(self, season_id: str): @@ -219,22 +232,23 @@ def get_season(self, season_id: str): def get_operation_info( self, - farmer_id: str, - associated_boundary_ids: List[str], + party_id: str, + intersects_with_geometry: Dict[str, Any], operation_name: str, min_start_operation: str, max_end_operation: str, + associated_resource: Dict[str, str], sources: List[str] = [], ): """ - Retrieves the information of a specified operation for a given farmer. + Retrieves the information of a specified operation for a given party. This method will return information about the specified operation name, - in the specified time range, for the given farmer and associated boundary IDs. + in the specified time range, for the given party and associated resource. - :param farmer_id: The ID of the farmer. + :param party_id: The ID of the party. - :param associated_boundary_ids: The IDs of the boundaries associated to the operation. + :param intersects_with_geometry: geometry of associated resource. :param operation_name: The name of the operation. @@ -246,35 +260,42 @@ def get_operation_info( :return: The operation information. """ - endpoint = f"/farmers/{farmer_id}/{operation_name}" + endpoint = f"/{operation_name}:search" params = { "api-version": self.api_version, - "associatedBoundaryIds": associated_boundary_ids, + } + + data = { + "partyId": party_id, + "intersectsWithGeometry": intersects_with_geometry, "minOperationStartDateTime": min_start_operation, "maxOperationEndDateTime": max_end_operation, + "associatedResourceType": associated_resource["type"], + "associatedResourceIds": [associated_resource["id"]], } if sources: - params["sources"] = sources + data["sources"] = sources - return self._get(endpoint, params=params) + return self._post(endpoint, params=params, data=data) def get_harvest_info( self, - farmer_id: str, - associated_boundary_ids: List[str], + party_id: str, + intersects_with_geometry: Dict[str, Any], min_start_operation: str, max_end_operation: str, + associated_resource: Dict[str, str], ): - """Retrieves the harvest information for a given farmer. + """Retrieves the harvest information for a given party. - This method will return the harvest information for a given farmer, - associated with the provided boundary ids, between the start and end - operation dates specified. + This method will return the harvest information for a given resource, + associated with the provided party id, between the start & end + operation dates specified and intersecting with input geometry. - :param farmer_id: ID of the farmer. + :param party_id: ID of the party. - :param associated_boundary_ids: List of associated boundary IDs. + :param intersects_with_geometry: geometry of associated resource. :param min_start_operation: The minimum start date of the operation. @@ -283,29 +304,31 @@ def get_harvest_info( :return: Dictionary with harvest information. """ return self.get_operation_info( - farmer_id=farmer_id, - associated_boundary_ids=associated_boundary_ids, + party_id=party_id, + intersects_with_geometry=intersects_with_geometry, operation_name="harvest-data", min_start_operation=min_start_operation, max_end_operation=max_end_operation, + associated_resource=associated_resource, ) def get_fertilizer_info( self, - farmer_id: str, - associated_boundary_ids: List[str], + party_id: str, + intersects_with_geometry: Dict[str, Any], min_start_operation: str, max_end_operation: str, + associated_resource: Dict[str, str], ): - """Retrieves the fertilizer information for a given farmer. + """Retrieves the fertilizer information for a given party. - This method will return the fertilizer information for a given farmer, - associated with the provided boundary ids, between the start and end - operation dates specified. + This method will return the fertilizer information for a given resource, + associated with the provided party id, between the start & end + operation dates specified and intersecting with input geometry. - :param farmer_id: ID of the farmer. + :param party_id: ID of the party. - :param associated_boundary_ids: List of associated boundary IDs. + :param intersects_with_geometry: geometry of associated resource. :param min_start_operation: The minimum start date of the operation. @@ -314,30 +337,32 @@ def get_fertilizer_info( :return: Dictionary with fertilizer information. """ return self.get_operation_info( - farmer_id=farmer_id, - associated_boundary_ids=associated_boundary_ids, + party_id=party_id, + intersects_with_geometry=intersects_with_geometry, operation_name="application-data", min_start_operation=min_start_operation, max_end_operation=max_end_operation, sources=["Fertilizer"], + associated_resource=associated_resource, ) def get_organic_amendments_info( self, - farmer_id: str, - associated_boundary_ids: List[str], + party_id: str, + intersects_with_geometry: Dict[str, Any], min_start_operation: str, max_end_operation: str, + associated_resource: Dict[str, str], ): - """Retrieves the organic amendments information for a given farmer. + """Retrieves the organic amendments information for a given party. - This method will return the organic amendments information for a given farmer, - associated with the provided boundary ids, between the start and end - operation dates specified. + This method will return the organic amendments information for a given resource, + associated with the provided party id, between the start & end + operation dates specified and intersecting with input geometry. - :param farmer_id: ID of the farmer. + :param party_id: ID of the party. - :param associated_boundary_ids: List of associated boundary IDs. + :param intersects_with_geometry: geometry of associated resource. :param min_start_operation: The minimum start date of the operation. @@ -347,30 +372,32 @@ def get_organic_amendments_info( """ return self.get_operation_info( - farmer_id=farmer_id, - associated_boundary_ids=associated_boundary_ids, + party_id=party_id, + intersects_with_geometry=intersects_with_geometry, operation_name="application-data", min_start_operation=min_start_operation, max_end_operation=max_end_operation, sources=["Omad"], + associated_resource=associated_resource, ) def get_tillage_info( self, - farmer_id: str, - associated_boundary_ids: List[str], + party_id: str, + intersects_with_geometry: Dict[str, Any], min_start_operation: str, max_end_operation: str, + associated_resource: Dict[str, str], ): - """Retrieves the tillage information for a given farmer. + """Retrieves the tillage information for a given party. - This method will return the tillage information for a given farmer, - associated with the provided boundary ids, between the start and end - operation dates specified. + This method will return the tillage information for a given resource, + associated with the provided party id, between the start & end + operation dates specified and intersecting with input geometry. - :param farmer_id: ID of the farmer. + :param party_id: ID of the Party. - :param associated_boundary_ids: List of associated boundary IDs. + :param intersects_with_geometry: geometry of associated resource. :param min_start_operation: The minimum start date of the operation. @@ -379,20 +406,21 @@ def get_tillage_info( :return: Dictionary with tillage information. """ return self.get_operation_info( - farmer_id=farmer_id, - associated_boundary_ids=associated_boundary_ids, + party_id=party_id, + intersects_with_geometry=intersects_with_geometry, operation_name="tillage-data", min_start_operation=min_start_operation, max_end_operation=max_end_operation, + associated_resource=associated_resource, ) - def get_prescription_map_id(self, farmer_id: str, field_id: str, crop_id: str): - """Retrieves the prescription map ID for a given farmer. + def get_prescription_map_id(self, party_id: str, field_id: str, crop_id: str): + """Retrieves the prescription map ID for a given party. - This method will return the prescription map ID for a given farmer, + This method will return the prescription map ID for a given party, associated with the provided field and crop IDs. - :param farmer_id: ID of the farmer. + :param party_id: ID of the Party. :param field_id: ID of the field. @@ -400,20 +428,80 @@ def get_prescription_map_id(self, farmer_id: str, field_id: str, crop_id: str): return: Dictionary with prescription map ID. """ - endpoint = f"farmers/{farmer_id}/prescription-maps" - return self._get(endpoint, params={"fieldId": field_id, "cropId": crop_id}) + endpoint = f"parties/{party_id}/prescription-maps" + return self._get(endpoint, params={"fieldIds": [field_id], "cropIds": [crop_id]}) - def get_prescriptions(self, farmer_id: str, prescription_map_id: str): - """Retrieves the prescriptions for a given farmer. + def get_prescriptions( + self, party_id: str, prescription_map_id: str, geometry: Dict[str, Any] = {} + ) -> Dict[str, Any]: + """Retrieves the prescriptions for a given party. - This method will return the prescriptions for a given farmer, + This method will return the prescriptions for a given party, associated with the provided prescription map ID. - :param farmer_id: ID of the farmer. + :param party_id: ID of the party. :param prescription_map_id: ID of the prescription map. + :param geometry: geometry intersect with prescriptions. + return: Dictionary with prescriptions. """ - endpoint = f"farmers/{farmer_id}/prescriptions" - return self._get(endpoint, params={"prescriptionMapIds": prescription_map_id}) + endpoint = "/prescription:search" + return self._post( + endpoint, + params={}, + data={ + "partyId": party_id, + "prescriptionMapIds": [prescription_map_id], + "intersectsWithGeometry": geometry, + }, + ) + + def get_prescription(self, party_id: str, prescription_id: str): + """Retrieves the prescription for a given party. + + This method will return the prescription for a given party, + associated with the provided party_id. + + :param party_id: ID of the Party. + + :param prescription_id: ID of the prescription. + + return: Dictionary with prescription. + """ + endpoint = f"parties/{party_id}/prescriptions/{prescription_id}" + return self._get(endpoint) + + def get_planting_info( + self, + party_id: str, + intersects_with_geometry: Dict[str, Any], + min_start_operation: str, + max_end_operation: str, + associated_resource: Dict[str, str], + ): + """Retrieves the Planting information for a given resource. + + This method will return the Planting information for a given resource, + associated with the provided party id, between the start & end + operation dates specified and intersecting with input geometry. + + :param resource: resource linked to planting information. + + :param intersects_with_geometry: resource geometry. + + :param min_start_operation: The minimum start date of the operation. + + :param max_end_operation: The maximum end date of the operation. + + :return: Dictionary with planting information. + """ + return self.get_operation_info( + party_id=party_id, + intersects_with_geometry=intersects_with_geometry, + operation_name="planting-data", + min_start_operation=min_start_operation, + max_end_operation=max_end_operation, + associated_resource=associated_resource, + ) diff --git a/src/vibe_core/vibe_core/cli/constants.py b/src/vibe_core/vibe_core/cli/constants.py index 2107b1ad..998af048 100644 --- a/src/vibe_core/vibe_core/cli/constants.py +++ b/src/vibe_core/vibe_core/cli/constants.py @@ -1,5 +1,5 @@ DEFAULT_IMAGE_PREFIX = "farmai/terravibes/" -DEFAULT_IMAGE_TAG = "2024.02.08" +DEFAULT_IMAGE_TAG = "2024.04.04" DEFAULT_REGISTRY_PATH = "mcr.microsoft.com" LOCAL_SERVICE_URL_PATH_FILE = "service_url" diff --git a/src/vibe_core/vibe_core/cli/helper.py b/src/vibe_core/vibe_core/cli/helper.py index e49efbf3..51f7f6b9 100644 --- a/src/vibe_core/vibe_core/cli/helper.py +++ b/src/vibe_core/vibe_core/cli/helper.py @@ -10,6 +10,7 @@ AUTO_CONFIRMATION = False DEFAULT_ERROR_STRING = "Unable to execute command" +WARNING_STRINGS = ("[warning]", "[Warning]", "[WARNING]", "WARNING:", "Warning:", "warning:") @lru_cache @@ -42,10 +43,12 @@ def execute_cmd( stdout_capture: List[str] = [] with process.stdout: # type: ignore binary = os.path.basename(cmd[0]) + is_running_az = binary.split(".")[0].lower() == "az" for line in iter(process.stdout.readline, b""): # type: ignore if line: decoded = line.decode(get_subprocess_encoding()).rstrip() - stdout_capture.append(decoded) + if not is_running_az or (is_running_az and not decoded.startswith(WARNING_STRINGS)): + stdout_capture.append(decoded) if not censor_output: log_subprocess(binary, decoded, subprocess_log_level) retcode = process.wait() @@ -74,8 +77,16 @@ def verify_to_proceed(message: str) -> bool: if AUTO_CONFIRMATION: return True - confirmation = input(f"{message} (y/n): ") - if confirmation and confirmation.lower() == "y": + answered = False + confirmation = False + while not answered: + confirmation = input(f"{message} (y/n): ").lower() + if confirmation not in ["y", "n", "yes", "no"]: + print("Invalid input. Please enter 'y' or 'n'") + continue + answered = True + confirmation = confirmation[0] + if confirmation == "y": return True return False diff --git a/src/vibe_core/vibe_core/cli/local.py b/src/vibe_core/vibe_core/cli/local.py index 509d6684..9af42564 100644 --- a/src/vibe_core/vibe_core/cli/local.py +++ b/src/vibe_core/vibe_core/cli/local.py @@ -18,6 +18,7 @@ from vibe_core.cli.osartifacts import InstallType, OSArtifacts from vibe_core.cli.wrappers import ( AzureCliWrapper, + DaprWrapper, DockerWrapper, K3dWrapper, KubectlWrapper, @@ -237,6 +238,7 @@ def setup( image_prefix: str = DEFAULT_IMAGE_PREFIX, data_path: str = "", worker_replicas: int = 0, + enable_telemetry: bool = False, port: int = DEFAULT_PORT, host: str = DEFAULT_HOST, is_update: bool = False, @@ -304,6 +306,10 @@ def setup( if username and password: log(f"Creating Docker credentials for registry {registry}") + try: + kubectl.delete_secret("acrtoken") + except Exception: + pass kubectl.create_docker_token("acrtoken", registry, username, password) if not worker_replicas: @@ -314,6 +320,15 @@ def setup( ) return False + dapr_updated = False + dapr = DaprWrapper(kubectl.os_artifacts, kubectl) + if is_update and dapr.needs_upgrade(): + log("Upgrading Dapr CRDs") + if not dapr.upgrade_crds(): + log("Unable to upgrade Dapr CRDs", level="error") + return False + dapr_updated = True + terraform = TerraformWrapper(k3d.os_artifacts, az) with terraform.workspace(f"farmvibes-k3d-{k3d.cluster_name}"): terraform.ensure_local_cluster( @@ -327,6 +342,7 @@ def setup( data_path, worker_replicas, kubectl.context_name, + enable_telemetry, is_update=is_update, ) # We might have downloaded newer images, so we have to fix permissions @@ -340,6 +356,13 @@ def setup( except Exception: log("Unable to fix permissions on containerd image path", level="warning") + if dapr_updated: + log("dapr upgraded, restarting services") + with kubectl.context(kubectl.cluster_name): + kubectl.restart( + "deployment", selectors=["backend=terravibes"] + ) + log(f"Cluster {'update' if is_update else 'setup'} complete!") if not is_update: @@ -574,6 +597,7 @@ def dispatch(args: argparse.Namespace): else: log("Aborting update due to old cluster being present", level="error") return False + enable_telemetry = args.enable_telemetry if hasattr(args, "enable_telemetry") else False return setup( k3d, args.servers, @@ -589,6 +613,7 @@ def dispatch(args: argparse.Namespace): args.image_prefix, data_path, args.worker_replicas, + enable_telemetry, args.port, args.host, is_update=is_update, diff --git a/src/vibe_core/vibe_core/cli/parsers.py b/src/vibe_core/vibe_core/cli/parsers.py index 4e8aba20..c10ff46e 100644 --- a/src/vibe_core/vibe_core/cli/parsers.py +++ b/src/vibe_core/vibe_core/cli/parsers.py @@ -34,6 +34,11 @@ "china", ] +HERE = os.path.dirname(os.path.abspath(__file__)) +CORE_DIR = os.path.dirname(HERE) +LOCAL_OTEL_PATH = os.path.join(CORE_DIR, "terraform", "local", "modules", "kubernetes", "otel.tf") +REMOTE_OTEL_PATH = os.path.join(CORE_DIR, "terraform", "aks", "modules", "kubernetes", "otel.tf") + class CliParser(ABC): SUPPORTED_COMMANDS = [ @@ -196,6 +201,14 @@ def _add_setup_update_flags(self): help="Port to use for registry on host", ) + if os.path.exists(LOCAL_OTEL_PATH): + command.add_argument( + "--enable-telemetry", + default=False, + action="store_true", + help="Enable telemetry for FarmVibes.AI", + ) + def _add_common_flags(self): cluster_name = os.environ.get( "FARMVIBES_AI_CLUSTER_NAME", @@ -314,3 +327,11 @@ def _add_setup_update_flags(self): default=3, help="Number of worker replicas to use", ) + + if os.path.exists(REMOTE_OTEL_PATH): + command.add_argument( + "--enable-telemetry", + default=False, + action="store_true", + help="Enable telemetry for FarmVibes.AI", + ) diff --git a/src/vibe_core/vibe_core/cli/remote.py b/src/vibe_core/vibe_core/cli/remote.py index 5f62cdf6..8318ce5a 100644 --- a/src/vibe_core/vibe_core/cli/remote.py +++ b/src/vibe_core/vibe_core/cli/remote.py @@ -7,7 +7,7 @@ from vibe_core.cli.helper import in_wsl, log_should_be_logged_in, verify_to_proceed from vibe_core.cli.logging import ColorFormatter, log from vibe_core.cli.osartifacts import OSArtifacts -from vibe_core.cli.wrappers import AzureCliWrapper, KubectlWrapper, TerraformWrapper +from vibe_core.cli.wrappers import AzureCliWrapper, DaprWrapper, KubectlWrapper, TerraformWrapper DESTROY_WARNING = ( "Destroying the cluster will delete *ALL* resources under the resource group " @@ -27,7 +27,9 @@ def _initialize_kubectl( if not config_context: log("Couldn't get Kubernetes config context", level="error") return None - return KubectlWrapper(az.os_artifacts, config_context=config_context) + return KubectlWrapper( + az.os_artifacts, cluster_name=az.cluster_name, config_context=config_context + ) def status(os_artifacts: OSArtifacts, az: AzureCliWrapper, environment: str) -> bool: @@ -100,6 +102,7 @@ def setup_or_upgrade( log_level: str, is_update: bool, max_worker_nodes: int = MAX_WORKER_NODES, + enable_telemetry: bool = False, worker_replicas: int = 0, environment: str = "", current_user_name: str = "", @@ -172,6 +175,8 @@ def setup_or_upgrade( az.cluster_name, az.resource_group, ) + else: + az.refresh_aks_credentials() storage_name, container_name, storage_access_key = az.ensure_azurerm_backend( region, @@ -210,9 +215,24 @@ def setup_or_upgrade( storage_name, container_name, storage_access_key, - cleanup_state=not is_update, + enable_telemetry, # Required to create azure monitor and application insights + cleanup_state=True, is_update=is_update, ) + + dapr_updated = False + kubectl = _initialize_kubectl(az, terraform) + if not kubectl: + log("Couldn't initialize kubectl, not updating", level="error") + return False + dapr = DaprWrapper(kubectl.os_artifacts, kubectl) + if is_update and dapr.needs_upgrade(): + log("Upgrading Dapr CRDs") + if not dapr.upgrade_crds(): + log("Unable to upgrade Dapr CRDs", level="error") + return False + dapr_updated = True + k8s_results = terraform.ensure_k8s_cluster( az.cluster_name, tenant_id, @@ -231,10 +251,12 @@ def setup_or_upgrade( infra_results["storage_connection_key"]["value"], infra_results["storage_account_name"]["value"], infra_results["userfile_container_name"]["value"], + infra_results["monitor_instrumentation_key"]["value"], storage_name, container_name, storage_access_key, - cleanup_state=not is_update, + enable_telemetry, + cleanup_state=True, ) terraform.ensure_services( az.cluster_name, @@ -247,11 +269,17 @@ def setup_or_upgrade( image_prefix, image_tag, k8s_results["shared_resource_pv_claim_name"]["value"], + k8s_results["otel_service_name"]["value"] if enable_telemetry else "", worker_replicas, log_level, - cleanup_state=not is_update, + cleanup_state=True, ) + if dapr_updated: + log("dapr upgraded, restarting services") + with kubectl.context(kubectl.cluster_name): + kubectl.restart("deployment", selectors=["backend=terravibes"]) + except Exception as e: log(f"{e.__class__.__name__}: {e}") log( @@ -392,6 +420,7 @@ def dispatch(args: argparse.Namespace): if args.action in {"setup", "update"}: az.refresh_az_creds() az.expand_azure_region(args.region.strip()) + enable_telemetry = args.enable_telemetry if hasattr(args, "enable_telemetry") else False ret = setup_or_upgrade( os_artifacts, az, @@ -405,6 +434,7 @@ def dispatch(args: argparse.Namespace): args.log_level, any([args.action in e for e in {"up", "upgrade", "update"}]), max_worker_nodes=args.max_worker_nodes, + enable_telemetry=enable_telemetry, worker_replicas=args.worker_replicas, environment=args.environment, current_user_name=args.cluster_admin_name, diff --git a/src/vibe_core/vibe_core/cli/wrappers.py b/src/vibe_core/vibe_core/cli/wrappers.py index 9cde8739..d4df6b96 100644 --- a/src/vibe_core/vibe_core/cli/wrappers.py +++ b/src/vibe_core/vibe_core/cli/wrappers.py @@ -1,6 +1,7 @@ import hashlib import json import os +import pkgutil import platform import re import shutil @@ -11,6 +12,8 @@ from functools import partialmethod from typing import Any, Dict, List, Optional, Tuple +import requests + from .constants import RABBITMQ_IMAGE_TAG, REDIS_IMAGE_TAG from .helper import execute_cmd, is_port_free, log_should_be_logged_in, verify_to_proceed from .logging import ColorFormatter, log @@ -208,7 +211,7 @@ def init( if backend_config: f = tempfile.NamedTemporaryFile(mode="w", dir=temp_dir, delete=False) contents = "\n".join([f'{k} = "{v}"' for k, v in backend_config.items()]) - if on_windows: + if on_windows(): log( ( "We're on Windows, replacing backslashes in backend file " @@ -271,6 +274,7 @@ def ensure_infra( storage_name: str, container_name: str, storage_access_key: str, + enable_telemetry: bool, cleanup_state: bool = False, is_update: bool = False, ): @@ -296,6 +300,7 @@ def ensure_infra( "prefix": cluster_name, "kubeconfig_location": self.os_artifacts.config_dir, "max_worker_nodes": worker_nodes, + "enable_telemetry": f"{'true' if enable_telemetry else 'false'}", "resource_group_name": resource_group, } @@ -353,9 +358,11 @@ def ensure_k8s_cluster( storage_connection_key: str, storage_account_name: str, userfile_container_name: str, + monitor_instrumentation_key: str, backend_storage_name: str, backend_container_name: str, backend_storage_access_key: str, + enable_telemetry: bool, cleanup_state: bool = False, ): # Do kubernetes infra now @@ -390,9 +397,11 @@ def ensure_k8s_cluster( "storage_connection_key": storage_connection_key, "storage_account_name": storage_account_name, "userfile_container_name": userfile_container_name, + "monitor_instrumentation_key": monitor_instrumentation_key, "resource_group_name": resource_group, "current_user_name": current_user_name, "certificate_email": certificate_email, + "enable_telemetry": str(enable_telemetry).lower(), } state_file = self.os_artifacts.get_terraform_file( @@ -414,6 +423,7 @@ def ensure_services( image_prefix: str, image_tag: str, shared_resource_pv_claim_name: str, + otel_service_name: str, worker_replicas: int, log_level: str, cleanup_state: bool = False, @@ -442,6 +452,7 @@ def ensure_services( "image_prefix": image_prefix, "image_tag": image_tag, "shared_resource_pv_claim_name": shared_resource_pv_claim_name, + "otel_service_name": otel_service_name, "worker_replicas": worker_replicas, "farmvibes_log_level": log_level, } @@ -465,6 +476,7 @@ def ensure_local_cluster( data_path: str, worker_replicas: int, config_context: str, + enable_telemetry: bool, redis_image_tag: str = REDIS_IMAGE_TAG, rabbitmq_image_tag: str = RABBITMQ_IMAGE_TAG, is_update: bool = False, @@ -484,6 +496,7 @@ def ensure_local_cluster( "image_prefix": image_prefix, "redis_image_tag": redis_image_tag, "rabbitmq_image_tag": rabbitmq_image_tag, + "enable_telemetry": f"{'true' if enable_telemetry else 'false'}", "farmvibes_log_level": log_level, "max_log_file_bytes": f"{max_log_file_bytes}" if max_log_file_bytes else "", "log_backup_count": f"{log_backup_count}" if log_backup_count else "", @@ -1018,6 +1031,7 @@ def get_storage_account_key(self, storage_name: str): storage_name, "-o", "json", + "--only-show-errors", ] error = "Couldn't get storage account keys. Do you have access to the resource group?" results = execute_cmd(cmd, True, False, error, censor_output=True) @@ -1446,6 +1460,27 @@ def restart(self, kind: str, selectors: List[str] = [], name: str = "", cluster_ ) return True + def apply_or_replace(self, file_path: str, cluster_name: str = ""): + cluster_name = self._actual_cluster_name(cluster_name) + with self.context(cluster_name): + for kind in "apply replace".split(): + try: + log(f"Applying {kind} {file_path}", level="debug") + cmd = [self.os_artifacts.kubectl, kind, "-f", file_path] + execute_cmd( + cmd, + error_string=f"Unable to {kind} {file_path}", + subprocess_log_level="debug", + ) + log(f"Successfully {kind} {file_path}", level="debug") + return True + except Exception as e: + if kind == "apply": + log(f"Failed to apply {file_path}: {e} (will try again)", level="warning") + continue + log(f"Failed to apply updates to CRD {file_path}", level="error") + return False # Should never reach here + class K3dWrapper: CONTAINERD_IMAGE_PATH = "/var/lib/rancher/k3s/agent/containerd/io.containerd.content.v1.content" @@ -1652,3 +1687,94 @@ def exec(self, container_name: str, command: List[str]): check_empty_result=False, ) return result + + +class DaprWrapper: # DaprWrapr 🫠 + VERSION_STRING = "VERSION" + CRD_BASE = "https://raw.githubusercontent.com/dapr/dapr/v{}/charts/dapr/crds/" + CRD_FILES = [ + "components.yaml", + "configuration.yaml", + "subscription.yaml", + "resiliency.yaml", + "httpendpoints.yaml", + ] + + def __init__( + self, + os_artifacts: OSArtifacts, + kubectl: KubectlWrapper, + cluster_kind: str = "local", + namespace: str = "dapr-system", + ): + self.cluster_kind = cluster_kind + self.os_artifacts = os_artifacts + self.namespace = namespace + self.kubectl = kubectl + + def _version_column(self, header: str) -> int: + reversed_header = list(reversed(header.split())) + return -reversed_header.index(self.VERSION_STRING) - 1 - 1 + + def _target_version(self) -> str: + # use pkg_resources to find dapr.tf: + dapr_tf = pkgutil.get_data( + "vibe_core.terraform", f"{self.cluster_kind}/modules/kubernetes/dapr.tf" + ) + if not dapr_tf: + raise ValueError("Unable to find dapr.tf") + target = re.findall('version\\s+=\\s+"(.*)"', dapr_tf.decode("utf-8"))[0] + assert len(target) > 0, "Unable to find Dapr version in dapr.tf" + return target + + def version(self): + cmd = [self.os_artifacts.dapr, "status", "-k"] + with self.kubectl.context(self.kubectl.cluster_name): + result = execute_cmd( + cmd, error_string="Unable to get Dapr version", subprocess_log_level="debug" + ) + lines = result.split("\n") + version_column = self._version_column(lines[0]) + all_versions = set([line.split()[version_column] for line in lines[1:] if line]) + return [v for v in all_versions] + + def needs_upgrade(self): + version_tuple = tuple(map(int, self._target_version().split("."))) + current_versions_tuples = [tuple(map(int, v.split("."))) for v in self.version()] + return len(current_versions_tuples) == 0 or any( + [v < version_tuple for v in current_versions_tuples if v > (1, 0, 0)] + ) + + def url_exists(self, url: str) -> bool: + try: + response = requests.head(url) + return response.status_code == 200 + except requests.exceptions.RequestException: + return False + + def upgrade_crds(self): + # Upgrading dapr is a two-stage process. + # First, we upgrade the CRDs, then, we use terraform to upgrade the dapr runtime. + status = [] + for crd in self.CRD_FILES: + url = self.CRD_BASE.format(self._target_version()) + crd + if not self.url_exists(url): + log(f"CRD {crd} not found at {url}, ignoring it", level="warning") + continue + status.append(self.kubectl.apply_or_replace(url)) + return all(status) + + def upgrade(self): + cmd = [ + self.os_artifacts.dapr, + "upgrade", + "-k", + f"--runtime-version={self._target_version()}", + ] + log(f"Upgrading Dapr to version {self._target_version()}") + with self.kubectl.context(self.kubectl.cluster_name): + execute_cmd( + cmd, + error_string="Unable to upgrade Dapr", + subprocess_log_level="debug", + ) diff --git a/src/vibe_core/vibe_core/client.py b/src/vibe_core/vibe_core/client.py index 108adc1c..4e397d0e 100644 --- a/src/vibe_core/vibe_core/client.py +++ b/src/vibe_core/vibe_core/client.py @@ -1,4 +1,5 @@ import json +import logging import os import time import warnings @@ -93,7 +94,6 @@ def list_workflows(self) -> List[str]: """Lists all available workflows. :return: A list of workflow names. - :raises NotImplementedError: If the method is not implemented by a subclass. """ raise NotImplementedError @@ -110,9 +110,7 @@ def run( :param workflow: The name of the workflow to run. :param geometry: The geometry to run the workflow on. :param time_range: The time range to run the workflow on. - :return: A :class:`WorkflowRun` object. - :raises NotImplementedError: If the method is not implemented by a subclass. """ raise NotImplementedError @@ -275,6 +273,12 @@ def describe_workflow(self, workflow_name: str) -> Dict[str, Any]: The keys are 'name', 'description', 'inputs', 'outputs' and 'parameters'. """ desc = self._request("GET", f"v0/workflows/{workflow_name}?return_format=description") + + param_descriptions = desc["description"]["parameters"] + for p, d in param_descriptions.items(): + if isinstance(d, List): + param_descriptions[p] = d[0] + desc["description"] = TaskDescription(**desc["description"]) return desc @@ -432,6 +436,29 @@ def get_run_by_id(self, id: str) -> "VibeWorkflowRun": run = self.list_runs(id, fields=fields)[0] return VibeWorkflowRun(*(run[f] for f in fields), self) # type: ignore + def get_last_runs(self, n: int) -> List["VibeWorkflowRun"]: + """Gets the last 'n' workflow runs. + + This method returns a list of :class:`VibeWorkflowRun` objects containing + the details of the last n workflow runs. + + :param n: The number of workflow runs to get (with n>0). + + :return: A list of :class:`VibeWorkflowRun` objects. + """ + if n <= 0: + raise ValueError(f"The number of runs (n) must be greater than 0. Got {n} instead.") + + last_runs = self.list_runs()[-n:] + if not last_runs: + raise ValueError("No past runs available.") + elif len(last_runs) < n: + logging.warning( + f"Requested {n} runs, but only {len(last_runs)} are available. " + "Returning all available runs." + ) + return [self.get_run_by_id(run_id) for run_id in last_runs] + def get_api_time_zone(self) -> tzfile: """Gets the time zone of the FarmVibes.AI REST-API. @@ -582,19 +609,25 @@ def _loop_update_monitor_table( ) time.sleep(refresh_time_s) - curent_time = time.monotonic() + current_time = time.monotonic() # Check for warnings every refresh_warnings_time_min minutes - if (curent_time - last_warning_refresh) / 60.0 > refresh_warnings_time_min: + if (current_time - last_warning_refresh) / 60.0 > refresh_warnings_time_min: self.verify_disk_space() - last_warning_refresh = curent_time + last_warning_refresh = current_time # Check for timeout did_timeout = ( - timeout_min is not None and (curent_time - time_start) / 60.0 > timeout_min + timeout_min is not None and (current_time - time_start) / 60.0 > timeout_min ) stop_monitoring = ( - all([RunStatus.finished(r.status) for r in runs]) or did_timeout + all( + [ + RunStatus.finished(r.status) or r.status == RunStatus.deleted + for r in runs + ] + ) + or did_timeout ) # Update one last time to make sure we have the latest state @@ -605,7 +638,7 @@ def _loop_update_monitor_table( def monitor( self, - runs: Union[List["VibeWorkflowRun"], "VibeWorkflowRun"], + runs: Union[List["VibeWorkflowRun"], "VibeWorkflowRun", int] = 1, refresh_time_s: int = 1, refresh_warnings_time_min: int = 5, timeout_min: Optional[int] = None, @@ -614,11 +647,14 @@ def monitor( """Monitors workflow runs. This method will block and print the status of the runs each refresh_time_s seconds, - until the workflow run finishes or it reaches timeout_min minutes. It will also + until the workflow runs finish or it reaches timeout_min minutes. It will also print warnings every refresh_warnings_time_min minutes. - :param runs: A list of workflow runs to monitor. If only one run is provided, - the method will monitor that run directly. + :param runs: A list of workflow runs, a single run object, or an integer. The method will + monitor the provided workflow runs. If a list of runs is provided, the method will + provide a summarized table with the status of each run. If only one run is provided, + the method will monitor that run directly. If an integer > 0 is provided, the method + will fetch the respective last runs and provide the summarized monitor table. :param refresh_time_s: Refresh interval in seconds (defaults to 1 second). @@ -633,9 +669,14 @@ def monitor( :raises ValueError: If no workflow runs are provided (empty list). """ + if isinstance(runs, int): + runs = self.get_last_runs(runs) + if isinstance(runs, VibeWorkflowRun): runs = [runs] + runs = cast(List[VibeWorkflowRun], runs) + if len(runs) == 0: raise ValueError("At least one workflow run must be provided.") diff --git a/src/vibe_core/vibe_core/data/__init__.py b/src/vibe_core/vibe_core/data/__init__.py index d0dc6292..dee86916 100644 --- a/src/vibe_core/vibe_core/data/__init__.py +++ b/src/vibe_core/vibe_core/data/__init__.py @@ -14,8 +14,10 @@ GeometryCollection, GHGFlux, GHGProtocolVibe, + OrdinalTrendTest, Point, ProteinSequence, + RasterPixelCount, TimeRange, TimeSeries, TypeDictVibe, @@ -23,6 +25,8 @@ gen_hash_id, ) from .farm import ( + ADMAgPrescription, + ADMAgPrescriptionInput, ADMAgSeasonalFieldInput, FertilizerInformation, HarvestInformation, @@ -40,6 +44,7 @@ GEDIProduct, GLADProduct, GNATSGOProduct, + HansenProduct, HerbieProduct, LandsatProduct, ModisProduct, diff --git a/src/vibe_core/vibe_core/data/core_types.py b/src/vibe_core/vibe_core/data/core_types.py index 2ce8888b..3bc381af 100644 --- a/src/vibe_core/vibe_core/data/core_types.py +++ b/src/vibe_core/vibe_core/data/core_types.py @@ -2,6 +2,7 @@ import logging import re import uuid +from copy import deepcopy from dataclasses import asdict, dataclass, field, fields, is_dataclass from datetime import datetime, timezone from pathlib import Path @@ -353,6 +354,10 @@ def schema(cls, *args, **kwargs): # type: ignore def pydantic_model(cls): # type: ignore if is_dataclass(cls): if issubclass(cls, DataVibe): + cls = deepcopy(cls) + if 'asset_geometry' in cls.__dataclass_fields__: # type: ignore + f = cls.__dataclass_fields__['asset_geometry'] + f.type = Dict[str, Any] # type: ignore @pydataclass class PydanticAssetVibe(AssetVibe): @@ -512,6 +517,13 @@ class TimeSeries(DataVibe): pass +@dataclass +class RasterPixelCount(DataVibe): + """Represents a data object in FarmVibes.AI that stores the pixel count of a raster.""" + + pass + + @dataclass class DataSummaryStatistics(DataVibe): """Represents a data summary statistics object in FarmVibes.AI.""" @@ -519,6 +531,14 @@ class DataSummaryStatistics(DataVibe): pass +@dataclass +class OrdinalTrendTest(DataVibe): + """Represents a trend test (Chochan-Armitage) result object in FarmVibes.AI.""" + + p_value: float + z_score: float + + @dataclass class DataSequence(DataVibe): """Represents a sequence of data assets in FarmVibes.AI.""" diff --git a/src/vibe_core/vibe_core/data/farm.py b/src/vibe_core/vibe_core/data/farm.py index 02312b34..1fb4a5ad 100644 --- a/src/vibe_core/vibe_core/data/farm.py +++ b/src/vibe_core/vibe_core/data/farm.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from .core_types import BaseVibe, DataVibe @@ -10,12 +10,10 @@ class ADMAgSeasonalFieldInput(BaseVibe): Represents an ADMAg Seasonal Field input. """ - farmer_id: str - """The ID of the farmer.""" + party_id: str + """The ID of the party.""" seasonal_field_id: str """The ID of the seasonal field.""" - boundary_id: str - """The ID of the boundary.""" @dataclass @@ -111,3 +109,73 @@ class SeasonalFieldInformation(DataVibe): organic_amendments: List[OrganicAmendmentInformation] """A list of :class:`OrganicAmendmentInformation` objects representing the organic amendments for the seasonal field.""" + + +@dataclass +class ADMAgPrescriptionMapInput(BaseVibe): + """ + Represents an ADMAg Prescription Map input. + """ + + party_id: str + """The ID of the party.""" + fieldId: str + """The ID of the field.""" + seasonal_field_id: Optional[str] + """The ID of the seasonal field.""" + cropId: str + """The ID of the crop.""" + + +@dataclass +class ADMAgPrescriptionInput(BaseVibe): + """ + Represents an ADMAg Prescriptions input. + """ + + party_id: str + """The ID of the party.""" + prescription_id: str + """The ID of the prescription.""" + + +@dataclass +class ADMAgPrescription(BaseVibe): + """ + Represents an ADMAg Prescriptions. + """ + + partyId: str + """The id of Party.""" + prescriptionMapId: str + """The id of mapping with seasonal field.""" + productCode: str + """The productCode of the sensor.""" + productName: str + """The productName of the sensor.""" + type: str + """type of the analysis.""" + measurements: str + """The measurements received from the sensor.""" + id: str + """Prescription Id.""" + eTag: str + """eTag of the prescription.""" + status: str + """status of the analysis.""" + createdDateTime: str + """createdDateTime of the prescription.""" + modifiedDateTime: str + """modifiedDateTime of the prescription.""" + source: str + """source of the analysis.""" + geometry: Dict[str, Any] + """The geometry of the nutrient analysis location.""" + name: str + """The name of the analysis.""" + description: str + """The description of the nutrient analysis.""" + createdBy: str + """createdBy of the prescription.""" + modifiedBy: str + """modifiedBy of the prescription.""" diff --git a/src/vibe_core/vibe_core/data/json_converter.py b/src/vibe_core/vibe_core/data/json_converter.py index 7d218913..9ea6f9fd 100644 --- a/src/vibe_core/vibe_core/data/json_converter.py +++ b/src/vibe_core/vibe_core/data/json_converter.py @@ -8,10 +8,7 @@ class DataclassJSONEncoder(json.JSONEncoder): - """ - A class that extends the `json.JSONEncoder` class to support - encoding of dataclasses and pydantic models. - """ + """Class that extends `json.JSONEncoder` to support encoding dataclasses and pydantic models""" def default(self, obj: Any): """Encodes a dataclass or pydantic model to JSON. diff --git a/src/vibe_core/vibe_core/data/products.py b/src/vibe_core/vibe_core/data/products.py index fe3ef9b6..c5cd8b9b 100644 --- a/src/vibe_core/vibe_core/data/products.py +++ b/src/vibe_core/vibe_core/data/products.py @@ -1,4 +1,5 @@ import mimetypes +import re from dataclasses import dataclass, field from typing import Dict, cast @@ -188,6 +189,107 @@ def tile_name(self) -> str: return tile_name +@dataclass +class HansenProduct(DataVibe): + """ + Represents metadata information about a Hansen product. + """ + + asset_keys = ["treecover2000", "gain", "lossyear", "datamask", "first", "last"] + """ The asset keys (dataset layers) for the Hansen products.""" + + asset_url: str = field(default_factory=str) + """ The URL of the Hansen product.""" + + def __post_init__(self): + super().__post_init__() + valid = self.validate_url() + if not valid: + raise ValueError(f"Invalid URL: {self.asset_url}") + + def validate_url(self): + # Urls are expected to be in the format: + # 'https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/Hansen_GFC-2022-v1.10_treecover2000_20N_090W.tif' + pattern = ( + r"https://storage\.googleapis\.com/earthenginepartners-hansen" + r"/GFC-\d{4}-v\d+\.\d+/Hansen_GFC-\d{4}-v\d+\.\d+_\w+" + r"_\d{2}[NS]_\d{3}[WE]\.tif" + ) + match = re.match(pattern, self.asset_url) + return bool(match) + + @staticmethod + def extract_hansen_url_property( + asset_url: str, regular_expression: str, property_name: str + ) -> str: + """Extracts the property from the base URL and the tile name.""" + + # Use re.search to find the pattern in the URL + match = re.search(regular_expression, asset_url) + + if match is None: + raise ValueError(f"Could not extract {property_name} from {asset_url}") + + return match.group(1) + + @staticmethod + def extract_tile_name(asset_url: str) -> str: + """Extracts the tile name from the base URL and the tile name.""" + + # Define the regex pattern for the tile name + # The tile name is expected to be in the format: '20N_090W' + pattern = r"(\d{2}[NS]_\d{3}[WE])" + + return HansenProduct.extract_hansen_url_property(asset_url, pattern, "tile name") + + @staticmethod + def extract_last_year(asset_url: str) -> int: + """Extracts the last year from the base URL and the tile name.""" + + # Define the regex pattern for the last year - e.g., GFC-2022-v1.10 -> 2022 + pattern = r"GFC-(\d{4})-" + + return int(HansenProduct.extract_hansen_url_property(asset_url, pattern, "last year")) + + @staticmethod + def extract_version(asset_url: str) -> str: + """Extracts the version from the base URL and the tile name.""" + + # Define the regex pattern for the version - e.g., GFC-2022-v1.10 -> v1.10 + pattern = r"GFC-\d{4}-(v\d+\.\d+)" + + return HansenProduct.extract_hansen_url_property(asset_url, pattern, "version") + + @staticmethod + def extract_layer_name(asset_url: str) -> str: + """Extracts the layer name from the base URL and the tile name.""" + + # Define the regex pattern for the layer name + pattern = r"_(\w+)_(\d{2}[NS]_\d{3}[WE])" + + return HansenProduct.extract_hansen_url_property(asset_url, pattern, "layer name") + + @property + def tile_name(self) -> str: + """The tile name of the Hansen product.""" + return self.extract_tile_name(self.asset_url) + + @property + def last_year(self) -> int: + """The last year of the Hansen product.""" + return self.extract_last_year(self.asset_url) + + @property + def version(self) -> str: + """The version of the Hansen product.""" + return self.extract_version(self.asset_url) + + @property + def layer_name(self) -> str: + """The layer name of the Hansen product.""" + return self.extract_layer_name(self.asset_url) + + @dataclass class EsriLandUseLandCoverProduct(DataVibe): """Represents metadata information about Esri LandUse/LandCover (9-class) dataset.""" diff --git a/src/vibe_core/vibe_core/monitor.py b/src/vibe_core/vibe_core/monitor.py index 31d36dbd..0decd5ed 100644 --- a/src/vibe_core/vibe_core/monitor.py +++ b/src/vibe_core/vibe_core/monitor.py @@ -24,6 +24,8 @@ RunStatus.done: "[green]done[/]", RunStatus.queued: "[yellow]queued[/]", RunStatus.cancelled: "[yellow]cancelled[/]", + RunStatus.deleted: "[orange_red1]deleted[/]", + RunStatus.deleting: "[dark_orange]deleting[/]", } FETCHING_ICON_STR = ":hourglass_not_done:" @@ -108,9 +110,11 @@ def formatted_parameters(self) -> Dict[str, str]: :return: A dictionary containing the formatted parameters and default values. """ return { - param_name: "default: task defined" - if isinstance(param_value, list) - else f"default: {param_value}" + param_name: ( + "default: task defined" + if isinstance(param_value, list) + else f"default: {param_value}" + ) for param_name, param_value in self.parameters.items() } @@ -136,7 +140,7 @@ def _print_sinks(self, section_name: str = "Sinks"): def _print_parameters(self, section_name: str = "Parameters"): if self.parameters: desc = { - k: str(v) if not isinstance(v, list) else "" + k: str(v) if not isinstance(v, dict) else list(v.values())[0] for k, v in self.description.parameters.items() } self._print_items_description(desc, section_name, self.formatted_parameters) @@ -205,6 +209,11 @@ class VibeWorkflowRunMonitor: "Total duration: [dodger_blue3]{}[/][/]" ) + DELETE_RUN_STR = ( + "[light_salmon1]Run status marked as[/] {}\n" + "[light_salmon1]Associated cached data will be / has been deleted as long as there have " + "been no other runs with operations in common with this run.[/]\n" + ) WARNING_HEADER_STR = "\n[yellow]:warning: Warnings :warning:[/]" WARNING_STR = "\n{}\n[yellow]:warning: :warning: :warning:[/]" TABLE_FIELDS = [ @@ -323,13 +332,17 @@ def _add_task_row(self, task_name: str, task_info: RunDetails): def _add_workflow_row( self, run: MonitoredWorkflowRun, sorted_tasks: List[Tuple[str, RunDetails]] ): - start_time_str = self._get_time_str(sorted_tasks[-1][1].submission_time) - end_time_str = self._get_time_str(sorted_tasks[0][1].end_time) + if sorted_tasks: + start_time_str = self._get_time_str(sorted_tasks[-1][1].submission_time) + end_time_str = self._get_time_str(sorted_tasks[0][1].end_time) - run_progress = self._render_progress(sorted_tasks) + run_progress = self._render_progress(sorted_tasks) - # Compute run duration - run_duration = self._get_run_duration(sorted_tasks, run.status) + # Compute run duration + run_duration = self._get_run_duration(sorted_tasks, run.status) + else: # For runs with no tasks set (e.g. deleted runs) + start_time_str = end_time_str = run_duration = "N/A".center(len(self.TIME_FORMAT), " ") + run_progress = "" # TODO: Add missing fields self.table.add_row( diff --git a/src/vibe_core/vibe_core/terraform/aks/main.tf b/src/vibe_core/vibe_core/terraform/aks/main.tf index 39a10a5c..68b7dbc3 100644 --- a/src/vibe_core/vibe_core/terraform/aks/main.tf +++ b/src/vibe_core/vibe_core/terraform/aks/main.tf @@ -14,31 +14,34 @@ module "infrastructure" { subscriptionId = var.subscriptionId resource_group_name = var.resource_group_name max_worker_nodes = var.worker_replicas + enable_telemetry = var.enable_telemetry farmvibes_log_level = var.farmvibes_log_level depends_on = [module.rg] } module "kubernetes" { - source = "./modules/kubernetes" - tenantId = var.tenantId - namespace = var.namespace - acr_registry = var.acr_registry - acr_registry_username = var.acr_registry_username - acr_registry_password = var.acr_registry_password - kubernetes_config_path = module.infrastructure.kubernetes_config_path - kubernetes_config_context = module.infrastructure.kubernetes_config_context - public_ip_address = module.infrastructure.public_ip_address - public_ip_fqdn = module.infrastructure.public_ip_fqdn - public_ip_dns = module.infrastructure.public_ip_dns - keyvault_name = module.infrastructure.keyvault_name - application_id = module.infrastructure.application_id - storage_connection_key = module.infrastructure.storage_connection_key - storage_account_name = module.infrastructure.storage_account_name - userfile_container_name = module.infrastructure.userfile_container_name - resource_group_name = module.infrastructure.resource_group_name - size_of_shared_volume = var.size_of_shared_volume - certificate_email = var.certificate_email - current_user_name = module.infrastructure.current_user_name + source = "./modules/kubernetes" + tenantId = var.tenantId + namespace = var.namespace + acr_registry = var.acr_registry + acr_registry_username = var.acr_registry_username + acr_registry_password = var.acr_registry_password + kubernetes_config_path = module.infrastructure.kubernetes_config_path + kubernetes_config_context = module.infrastructure.kubernetes_config_context + public_ip_address = module.infrastructure.public_ip_address + public_ip_fqdn = module.infrastructure.public_ip_fqdn + public_ip_dns = module.infrastructure.public_ip_dns + keyvault_name = module.infrastructure.keyvault_name + application_id = module.infrastructure.application_id + storage_connection_key = module.infrastructure.storage_connection_key + storage_account_name = module.infrastructure.storage_account_name + userfile_container_name = module.infrastructure.userfile_container_name + resource_group_name = module.infrastructure.resource_group_name + size_of_shared_volume = var.size_of_shared_volume + monitor_instrumentation_key = var.monitor_instrumentation_key + enable_telemetry = var.enable_telemetry + certificate_email = var.certificate_email + current_user_name = module.infrastructure.current_user_name } module "services" { @@ -53,6 +56,7 @@ module "services" { dapr_sidecars_deployed = module.kubernetes.dapr_sidecars_deployed startup_type = "aks" shared_resource_pv_claim_name = module.kubernetes.shared_resource_pv_claim_name + otel_service_name = try(module.kubernetes.otel_service_name, "") image_prefix = var.image_prefix image_tag = var.image_tag worker_replicas = var.worker_replicas diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf new file mode 100644 index 00000000..0c9bca85 --- /dev/null +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf @@ -0,0 +1,39 @@ +resource "azurerm_log_analytics_workspace" "analyticsworkspace" { + name = "${var.prefix}-analytics-workspace-${resource.random_string.name_suffix.result}" + count = var.enable_telemetry ? 1 : 0 + location = var.location + resource_group_name = var.resource_group_name + sku = "PerGB2018" +} + +resource "azurerm_application_insights" "appinsights" { + name = "${var.prefix}-app-insights-${resource.random_string.name_suffix.result}" + count = var.enable_telemetry ? 1 : 0 + location = var.location + resource_group_name = var.resource_group_name + application_type = "web" +} + + +resource "azurerm_monitor_diagnostic_setting" "diagsetting" { + name = "${var.prefix}-diagsetting-${resource.random_string.name_suffix.result}" + count = var.enable_telemetry ? 1 : 0 + target_resource_id = azurerm_application_insights.appinsights[0].id + log_analytics_workspace_id = azurerm_log_analytics_workspace.analyticsworkspace[0].id + + enabled_log { + category = "AppTraces" + + retention_policy { + enabled = false + } + } + + metric { + category = "AllMetrics" + + retention_policy { + enabled = false + } + } +} diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf index a296a5f3..8ac3a46c 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf @@ -50,12 +50,6 @@ resource "azurerm_key_vault" "keyvault" { depends_on = [data.azurerm_resource_group.resourcegroup, data.http.ip, data.azurerm_user_assigned_identity.kubernetesidentity] } -resource "time_sleep" "wait_keyvault_pe" { - depends_on = [azurerm_key_vault.keyvault] - - create_duration = "900s" # 5 min should give us enough time for the Private endpoint to come online -} - resource "azurerm_key_vault_secret" "cosmosdbsecret" { name = "cosmos-db-database" value = azurerm_cosmosdb_sql_database.cosmosdb.name diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf index 1251eb87..44a54738 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf @@ -55,4 +55,9 @@ output "max_worker_nodes" { output "max_default_nodes" { value = azurerm_kubernetes_cluster.kubernetes.default_node_pool[0].max_count +} + +output "monitor_instrumentation_key" { + value = var.enable_telemetry ? azurerm_application_insights.appinsights[0].instrumentation_key : "" + sensitive = true } \ No newline at end of file diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf index 7299f0c7..da7cd61d 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf @@ -30,3 +30,8 @@ variable "max_worker_nodes" { variable "environment" { description = "Azure Cloud Environment to use" } + +variable "enable_telemetry" { + description = "Use telemetry" + type = bool +} diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf index 27467ddb..56aa3769 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf @@ -141,8 +141,8 @@ resource "kubectl_manifest" "resiliency-sidecar" { opExecution: 3h # should be bigger than any individual op run retries: workerRetry: - policy: constant - duration: 60s + policy: exponential + maxInterval: 60s maxRetries: -1 targets: components: diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf index c4b645bb..5aee9b92 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf @@ -27,6 +27,20 @@ resource "kubernetes_secret" "user-storage-secret" { depends_on = [data.kubernetes_namespace.kubernetesnamespace] } +resource "kubernetes_secret" "monitor_instrumentation_key_secret" { + metadata { + name = "monitor-instrumentation-key-secret" + namespace = var.namespace + } + + data = { + monitor_instrumentation_key = var.monitor_instrumentation_key + } + + type = "Opaque" + depends_on = [data.kubernetes_namespace.kubernetesnamespace] +} + resource "kubernetes_secret" "eywaregistrysecret" { metadata { name = "acrtoken" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf index 17079cf8..aff46a84 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf @@ -1,16 +1,22 @@ resource "kubernetes_config_map" "otel" { + count = var.enable_telemetry ? 1 : 0 metadata { - name = "otel-collector-conf" + name = "otel-collector-config" labels = { app = "opentelemetry" component = "otel-collector-conf" } } + data = { - "otel-collector-config" = < Date: Fri, 5 Apr 2024 14:10:02 -0300 Subject: [PATCH 5/7] Delete leftover file --- .../terraform/aks/modules/kubernetes/otel.tf | 194 ------------------ 1 file changed, 194 deletions(-) delete mode 100644 src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf deleted file mode 100644 index aff46a84..00000000 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf +++ /dev/null @@ -1,194 +0,0 @@ -resource "kubernetes_config_map" "otel" { - count = var.enable_telemetry ? 1 : 0 - metadata { - name = "otel-collector-config" - labels = { - app = "opentelemetry" - component = "otel-collector-conf" - } - } - - data = { - "otel-collector-config.yaml" = < Date: Fri, 5 Apr 2024 15:58:01 -0300 Subject: [PATCH 6/7] Use double quotes in JSON examples --- docs/source/docfiles/markdown/REST_API.md | 96 +++++++++++------------ 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/docs/source/docfiles/markdown/REST_API.md b/docs/source/docfiles/markdown/REST_API.md index a6d35596..527f6e8a 100644 --- a/docs/source/docfiles/markdown/REST_API.md +++ b/docs/source/docfiles/markdown/REST_API.md @@ -38,27 +38,27 @@ Replacing the body of the request `` with the following: ```json { - 'name': 'Hello!', - 'workflow': 'helloworld', - 'parameters': None, - 'user_input': { - 'start_date': datetime.datetime(2020, 5, 1, 0, 0), - 'end_date': datetime.datetime(2020, 5, 5, 0, 0), - 'geojson': { - 'features': [ + "name": "Hello!", + "workflow": "helloworld", + "parameters": None, + "user_input": { + "start_date": datetime.datetime(2020, 5, 1, 0, 0), + "end_date": datetime.datetime(2020, 5, 5, 0, 0), + "geojson": { + "features": [ { - 'geometry': { - 'type': 'Polygon', - 'coordinates': (((-119.14896203939314, 46.51578909859286), + "geometry": { + "type": "Polygon", + "coordinates": (((-119.14896203939314, 46.51578909859286), (-119.14896203939314, 46.37578909859286), (-119.28896203939313, 46.37578909859286), (-119.28896203939313, 46.51578909859286), (-119.14896203939314, 46.51578909859286)),) }, - 'type': 'Feature' + "type": "Feature" } ], - 'type': 'FeatureCollection' + "type": "FeatureCollection" } } } @@ -93,60 +93,60 @@ Which would generate the following json: ```json { - 'name': 'SAM segmentation', - 'workflow': 'farm_ai/segmentation/segment_s2', - 'parameters': None, - 'user_input': { - 'user_input': { - 'type': 'Feature', - 'stac_version': '1.0.0', - 'id': 'f6465ad0-5e01-4792-ad99-a0bd240c1e7d', - 'properties': { - 'start_datetime': '2020-05-01T00:00:00+00:00', - 'end_datetime': '2020-05-05T00:00:00+00:00', - 'datetime': '2020-05-01T00:00:00Z' + "name": "SAM segmentation", + "workflow": "farm_ai/segmentation/segment_s2", + "parameters": None, + "user_input": { + "user_input": { + "type": "Feature", + "stac_version": "1.0.0", + "id": "f6465ad0-5e01-4792-ad99-a0bd240c1e7d", + "properties": { + "start_datetime": "2020-05-01T00:00:00+00:00", + "end_datetime": "2020-05-05T00:00:00+00:00", + "datetime": "2020-05-01T00:00:00Z" }, - 'geometry': {'type': 'Polygon', - 'coordinates': (((-119.14896203939314, 46.51578909859286), + "geometry": {"type": "Polygon", + "coordinates": (((-119.14896203939314, 46.51578909859286), (-119.14896203939314, 46.37578909859286), (-119.28896203939313, 46.37578909859286), (-119.28896203939313, 46.51578909859286), (-119.14896203939314, 46.51578909859286)),) }, - 'links': [], - 'assets': {}, - 'bbox': [-119.28896203939313, + "links": [], + "assets": {}, + "bbox": [-119.28896203939313, 46.37578909859286, -119.14896203939314, 46.51578909859286], - 'stac_extensions': [], - 'terravibes_data_type': 'DataVibe' + "stac_extensions": [], + "terravibes_data_type": "DataVibe" }, - 'prompts': { - 'type': 'Feature', - 'stac_version': '1.0.0', - 'id': 'geo_734c6441-cb25-4c40-8204-6b7286f24bb9', - 'properties': { - 'urls': ['/mnt/734c6441-cb25-4c40-8204-6b7286f24bb9_geometry_collection.geojson'], - 'start_datetime': '2020-05-01T00:00:00+00:00', - 'end_datetime': '2020-05-05T00:00:00+00:00', - 'datetime': '2020-05-01T00:00:00Z' + "prompts": { + "type": "Feature", + "stac_version": "1.0.0", + "id": "geo_734c6441-cb25-4c40-8204-6b7286f24bb9", + "properties": { + "urls": ["/mnt/734c6441-cb25-4c40-8204-6b7286f24bb9_geometry_collection.geojson"], + "start_datetime": "2020-05-01T00:00:00+00:00", + "end_datetime": "2020-05-05T00:00:00+00:00", + "datetime": "2020-05-01T00:00:00Z" }, - 'geometry': {'type': 'Polygon', - 'coordinates': (((-119.14896203939314, 46.51578909859286), + "geometry": {"type": "Polygon", + "coordinates": (((-119.14896203939314, 46.51578909859286), (-119.14896203939314, 46.37578909859286), (-119.28896203939313, 46.37578909859286), (-119.28896203939313, 46.51578909859286), (-119.14896203939314, 46.51578909859286)),) }, - 'links': [], - 'assets': {}, - 'bbox': [-119.28896203939313, + "links": [], + "assets": {}, + "bbox": [-119.28896203939313, 46.37578909859286, -119.14896203939314, 46.51578909859286], - 'stac_extensions': [], - 'terravibes_data_type': 'ExternalReferenceList' + "stac_extensions": [], + "terravibes_data_type": "ExternalReferenceList" } } } From efb0830d6483f9d0b6dae4f821e5118c63ed9831 Mon Sep 17 00:00:00 2001 From: "Renato L. de F. Cunha" Date: Fri, 5 Apr 2024 16:02:50 -0300 Subject: [PATCH 7/7] Use valid JSON syntax throughout --- docs/source/docfiles/markdown/REST_API.md | 215 ++++++++++++++-------- 1 file changed, 140 insertions(+), 75 deletions(-) diff --git a/docs/source/docfiles/markdown/REST_API.md b/docs/source/docfiles/markdown/REST_API.md index 527f6e8a..a0e9ccad 100644 --- a/docs/source/docfiles/markdown/REST_API.md +++ b/docs/source/docfiles/markdown/REST_API.md @@ -38,29 +38,48 @@ Replacing the body of the request `` with the following: ```json { - "name": "Hello!", - "workflow": "helloworld", - "parameters": None, - "user_input": { - "start_date": datetime.datetime(2020, 5, 1, 0, 0), - "end_date": datetime.datetime(2020, 5, 5, 0, 0), - "geojson": { - "features": [ - { - "geometry": { - "type": "Polygon", - "coordinates": (((-119.14896203939314, 46.51578909859286), - (-119.14896203939314, 46.37578909859286), - (-119.28896203939313, 46.37578909859286), - (-119.28896203939313, 46.51578909859286), - (-119.14896203939314, 46.51578909859286)),) - }, - "type": "Feature" - } - ], - "type": "FeatureCollection" + "name": "Hello!", + "workflow": "helloworld", + "parameters": null, + "user_input": { + "start_date": "2020-05-01T00:00:00", + "end_date": "2020-05-05T00:00:00", + "geojson": { + "features": [ + { + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -119.14896203939314, + 46.51578909859286 + ], + [ + -119.14896203939314, + 46.37578909859286 + ], + [ + -119.28896203939313, + 46.37578909859286 + ], + [ + -119.28896203939313, + 46.51578909859286 + ], + [ + -119.14896203939314, + 46.51578909859286 + ] + ] + ] + }, + "type": "Feature" } + ], + "type": "FeatureCollection" } + } } ``` @@ -93,62 +112,108 @@ Which would generate the following json: ```json { - "name": "SAM segmentation", - "workflow": "farm_ai/segmentation/segment_s2", - "parameters": None, + "name": "SAM segmentation", + "workflow": "farm_ai/segmentation/segment_s2", + "parameters": null, + "user_input": { "user_input": { - "user_input": { - "type": "Feature", - "stac_version": "1.0.0", - "id": "f6465ad0-5e01-4792-ad99-a0bd240c1e7d", - "properties": { - "start_datetime": "2020-05-01T00:00:00+00:00", - "end_datetime": "2020-05-05T00:00:00+00:00", - "datetime": "2020-05-01T00:00:00Z" - }, - "geometry": {"type": "Polygon", - "coordinates": (((-119.14896203939314, 46.51578909859286), - (-119.14896203939314, 46.37578909859286), - (-119.28896203939313, 46.37578909859286), - (-119.28896203939313, 46.51578909859286), - (-119.14896203939314, 46.51578909859286)),) - }, - "links": [], - "assets": {}, - "bbox": [-119.28896203939313, - 46.37578909859286, - -119.14896203939314, - 46.51578909859286], - "stac_extensions": [], - "terravibes_data_type": "DataVibe" - }, - "prompts": { - "type": "Feature", - "stac_version": "1.0.0", - "id": "geo_734c6441-cb25-4c40-8204-6b7286f24bb9", - "properties": { - "urls": ["/mnt/734c6441-cb25-4c40-8204-6b7286f24bb9_geometry_collection.geojson"], - "start_datetime": "2020-05-01T00:00:00+00:00", - "end_datetime": "2020-05-05T00:00:00+00:00", - "datetime": "2020-05-01T00:00:00Z" - }, - "geometry": {"type": "Polygon", - "coordinates": (((-119.14896203939314, 46.51578909859286), - (-119.14896203939314, 46.37578909859286), - (-119.28896203939313, 46.37578909859286), - (-119.28896203939313, 46.51578909859286), - (-119.14896203939314, 46.51578909859286)),) - }, - "links": [], - "assets": {}, - "bbox": [-119.28896203939313, - 46.37578909859286, - -119.14896203939314, - 46.51578909859286], - "stac_extensions": [], - "terravibes_data_type": "ExternalReferenceList" - } + "type": "Feature", + "stac_version": "1.0.0", + "id": "f6465ad0-5e01-4792-ad99-a0bd240c1e7d", + "properties": { + "start_datetime": "2020-05-01T00:00:00+00:00", + "end_datetime": "2020-05-05T00:00:00+00:00", + "datetime": "2020-05-01T00:00:00Z" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -119.14896203939314, + 46.51578909859286 + ], + [ + -119.14896203939314, + 46.37578909859286 + ], + [ + -119.28896203939313, + 46.37578909859286 + ], + [ + -119.28896203939313, + 46.51578909859286 + ], + [ + -119.14896203939314, + 46.51578909859286 + ] + ] + ] + }, + "links": [], + "assets": {}, + "bbox": [ + -119.28896203939313, + 46.37578909859286, + -119.14896203939314, + 46.51578909859286 + ], + "stac_extensions": [], + "terravibes_data_type": "DataVibe" + }, + "prompts": { + "type": "Feature", + "stac_version": "1.0.0", + "id": "geo_734c6441-cb25-4c40-8204-6b7286f24bb9", + "properties": { + "urls": [ + "/mnt/734c6441-cb25-4c40-8204-6b7286f24bb9_geometry_collection.geojson" + ], + "start_datetime": "2020-05-01T00:00:00+00:00", + "end_datetime": "2020-05-05T00:00:00+00:00", + "datetime": "2020-05-01T00:00:00Z" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -119.14896203939314, + 46.51578909859286 + ], + [ + -119.14896203939314, + 46.37578909859286 + ], + [ + -119.28896203939313, + 46.37578909859286 + ], + [ + -119.28896203939313, + 46.51578909859286 + ], + [ + -119.14896203939314, + 46.51578909859286 + ] + ] + ] + }, + "links": [], + "assets": {}, + "bbox": [ + -119.28896203939313, + 46.37578909859286, + -119.14896203939314, + 46.51578909859286 + ], + "stac_extensions": [], + "terravibes_data_type": "ExternalReferenceList" } + } } ```