diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index e37e2bb0..00000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "build": { - "dockerfile": "../resources/docker/Dockerfile-devcontainer" - }, - "overrideCommand": false, - "mounts": [ - "type=bind,source=${localEnv:HOME}${localEnv:USERPROFILE},target=/user-home", - "source=terravibes-devcontainer-profile,target=/home/vscode,type=volume", - "target=/home/vscode/.vscode-server,type=volume", - "source=terravibes-devcontainer-var-lib-docker,target=/var/lib/docker,type=volume" - ], - "customizations": { - "vscode": { - "extensions": [ - "charliermarsh.ruff", - "ms-python.python", - "ms-python.vscode-pylance", - "ms-toolsai.jupyter", - "ms-azuretools.vscode-docker", - "DavidAnson.vscode-markdownlint", - "ms-vscode-remote.remote-containers", - "eamodio.gitlens", - "mutantdino.resourcemonitor" - ], - "settings": { - "python.defaultInterpreterPath": "/opt/venv/bin/python", - "python.testing.pytestArgs": [ - "src", - "ops" - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - "editor.formatOnSave": true, - "editor.codeActionsOnSave": { - "source.fixAll.ruff": "explicit", - "source.organizeImports.ruff": "explicit" - }, - "python.analysis.typeCheckingMode": "basic", - "python.analysis.diagnosticMode": "workspace", - "python.terminal.activateEnvironment": false, - "python.linting.flake8Enabled": true, - "git.autofetch": "all", - "terminal.integrated.allowChords": false, - "[python]": { - "editor.formatOnSave": true, - "editor.defaultFormatter": "charliermarsh.ruff" - } - } - } - }, - "remoteUser": "vscode", - "updateRemoteUserUID": true, - "postCreateCommand": "sed 's/\r$//' .devcontainer/post-create.sh | bash -", - "runArgs": [ - "--init", - "--privileged" - ], - "containerEnv": { - "USER": "vscode", - "PATH": "/home/vscode/.local/bin:/opt/venv/bin:/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - } -} \ No newline at end of file diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh deleted file mode 100755 index 170f9f2f..00000000 --- a/.devcontainer/post-create.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/sh -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - - -DOCKER_VERSION=24.0.2 -VSCODE_HOME=/home/vscode - -if [ -d /user-home/.ssh ]; then - echo "Making user ssh available in container..." - mkdir -p $VSCODE_HOME/.ssh - chmod 0700 $VSCODE_HOME/.ssh - for f in /user-home/.ssh/* - do - cp "$f" $VSCODE_HOME/.ssh/"$(basename "$f")" - chmod 0600 $VSCODE_HOME/.ssh/"$(basename "$f")" - done -fi - -# If the user has a git config file, copy it -if [ -f /user-home/.gitconfig ]; then - echo "Copying user .gitconfig..." - cp /user-home/.gitconfig $VSCODE_HOME/.gitconfig - echo "Enabling HTTP use path, in case the user cloned with HTTP" - git config --global credential.useHttpPath true -fi - -if [ "$(stat -c '%u' .)" != "$UID" ]; then - echo "The permissions of the current directory differ from the current user," - echo "which means we're probably running in Docker under a Windows host..." - echo "Adding the current directory to the git safe directory list" - git config --global --add safe.directory /workspaces/TerraVibes -fi - -sudo mkdir /opt/venv -sudo chown vscode /opt/venv -/opt/conda/bin/python3 -m venv --system-site-packages /opt/venv || exit 1 -/opt/venv/bin/pip install --upgrade pip - -if [[ "$(uname -a)" == *"WSL2"* ]]; then - # We're either in WSL2 or in a Windows host - echo "If we're on a Windows host, we need to convert files to unix mode..." - find cli scripts -type f -exec dos2unix --allow-chown {} \; -fi - -sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended -zsh -c "zstyle ':omz:update' mode auto" -zsh -c "zstyle ':omz:update' verbose minimal" -git clone https://github.com/zsh-users/zsh-autosuggestions.git ~/.oh-my-zsh/plugins/zsh-autosuggestions -git clone https://github.com/zsh-users/zsh-syntax-highlighting.git $ZSH_CUSTOM/plugins/zsh-syntax-highlighting -git clone --depth 1 -- https://github.com/marlonrichert/zsh-autocomplete.git $ZSH_CUSTOM/plugins/zsh-autocomplete -sed -i 's/plugins=(git)/plugins=(git zsh-autosuggestions zsh-syntax-highlighting zsh-autocomplete)/g' ~/.zshrc -echo "export LD_LIBRARY_PATH=/opt/conda/lib:\$LD_LIBRARY_PATH" >> ~/.zshrc -echo "export LD_LIBRARY_PATH=/opt/conda/lib:\$LD_LIBRARY_PATH" >> ~/.bashrc - -/opt/venv/bin/pip install --upgrade pyright -/opt/venv/bin/pip install --upgrade "pytest" "anyio[trio]" -sed -e '1,/dependencies:/d' < resources/envs/dev.yaml | \ - sed 's/-//' | \ - xargs /opt/venv/bin/pip install -eval $(grep 'terravibes_packages=' < "scripts/setup_python_develop_env.sh") -for package in $terravibes_packages -do - /opt/venv/bin/pip install -e src/$package -done - -sudo mkdir -p /opt/terravibes/ops -sudo ln -sf $(pwd)/op_resources /opt/terravibes/ops/resources -sudo mkdir /app -sudo ln -sf $(pwd)/ops /app/ops -sudo ln -sf $(pwd)/workflows /app/workflows \ No newline at end of file diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 214fd1ba..00000000 --- a/.gitattributes +++ /dev/null @@ -1,4 +0,0 @@ -*.onnx filter=lfs diff=lfs merge=lfs -text -*.xls filter=lfs diff=lfs merge=lfs -text -*.kml filter=lfs diff=lfs merge=lfs -text -*.tif filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/base-build.yml b/.github/workflows/base-build.yml deleted file mode 100644 index 87b4178e..00000000 --- a/.github/workflows/base-build.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: Build base images -on: - workflow_dispatch: - -permissions: - id-token: write - contents: write - -jobs: - - build-and-push: - runs-on: ubuntu-latest - environment: build - strategy: - matrix: - include: - - dockerfile: Dockerfile-services-base - image-name: services-base - - dockerfile: Dockerfile-worker-base - image-name: worker-base - steps: - - uses: actions/checkout@v4 - with: - ref: dev - - name: 'Az CLI login' - uses: azure/login@v1 - with: - client-id: ${{ secrets.AZURE_CLIENT_ID }} - tenant-id: ${{ secrets.AZURE_TENANT_ID }} - subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - - name: 'Build and push image' - run: | - az acr login -n ${{ secrets.ACR_NAME }} - VERSION_TAG=${{ github.run_id }} - export IMAGE_TAG=${{ secrets.ACR_NAME }}.azurecr.io/unlisted/farmai/terravibes/${{ matrix.image-name }}:$VERSION_TAG - docker build . -f ./resources/docker/${{ matrix.dockerfile }} -t $IMAGE_TAG - docker push $IMAGE_TAG - update-tags: - needs: build-and-push - runs-on: ubuntu-latest - environment: build - steps: - - uses: actions/checkout@v4 - with: - ref: dev - ssh-key: ${{ secrets.WORKFLOW_KEY }} - - name: 'Update tags' - run: | - TAG=${{ github.run_id }} - sed -i "s|\(\s\+image.*:\).*|\1${TAG}|" ./.github/workflows/lint-test.yml - sed -i "s|\(FROM.*:\).*|\1${TAG}|" ./resources/docker/Dockerfile-api_orchestrator - sed -i "s|\(FROM.*:\).*|\1${TAG}|" ./resources/docker/Dockerfile-cache - sed -i "s|\(FROM.*:\).*|\1${TAG}|" ./resources/docker/Dockerfile-worker - sed -i "s|\(FROM.*:\).*|\1${TAG}|" ./resources/docker/Dockerfile-devcontainer - - name: Commit changes - run: | - git config --global user.email "farmvibesaicd@microsoft.com" - git config --global user.name "FarmVibes.AI Release Pipeline" - BRANCH=update-base-${{ github.run_id }} - git checkout -b $BRANCH - git add ./.github/workflows/lint-test.yml - git add ./resources/docker/ - git commit -m "Update base tag to latest image" - git push --set-upstream origin $BRANCH diff --git a/.github/workflows/cluster-build.yml b/.github/workflows/cluster-build.yml new file mode 100644 index 00000000..5ab517e8 --- /dev/null +++ b/.github/workflows/cluster-build.yml @@ -0,0 +1,96 @@ +name: Build FarmVibes.AI cluster +run-name: Cluster build and helloworld test +on: [push, pull_request, workflow_dispatch] +env: + FARMVIBES_AI_SKIP_DOCKER_FREE_SPACE_CHECK: yes +jobs: + build: + name: Build and test + runs-on: ubuntu-latest + steps: + - name: Free space before cleanup + shell: bash + run: | + echo "Memory and swap:" + free -h + echo + echo "Available storage:" + df -h + echo + - name: Remove unused software + shell: bash + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -fr /usr/local/lib/android + sudo rm -fr /opt/ghc + - name: Free space after cleanup + shell: bash + run: | + echo "Memory and swap:" + free -h + echo + echo "Available storage:" + df -h + echo + + - name: Update pip version + shell: bash + run: pip install --upgrade pip + + - name: Checkout + uses: actions/checkout@v3 + + - name: Install Python requirements + shell: bash + run: pip install --user ./src/vibe_core + + - name: Actually build cluster + shell: bash + run: farmvibes-ai local setup + + - name: Cluster status + shell: bash + run: | + echo "Deployments:" + kubectl get deployments + kubectl describe deployments + echo "Pods:" + kubectl get pods + echo "Events:" + kubectl get events + + - name: Free space after cluster creation + shell: bash + run: | + echo "Memory and swap:" + free -h + echo + echo "Available storage:" + df -h + echo + + - name: Run Hello World workflow + shell: bash + run: python -m vibe_core.farmvibes_ai_hello_world 300 + + - name: Install tool to view helloworld output + shell: bash + run: yes | sudo apt install caca-utils || echo + + - name: Show helloworld output + shell: bash + run: find ~/.cache/farmvibes-ai/data -type f -name '*.tif' -exec img2txt {} \; 2> /dev/null + + - name: Restart cluster + shell: bash + run: farmvibes-ai local restart + + - name: Wait a bit before submitting a new run (as documented) + shell: bash + run: sleep 90 + + - name: Run Hello World workflow again + shell: bash + run: | + rm -fr ~/.cache/farmvibes-ai/data/{assets,stac} + python -m vibe_core.farmvibes_ai_hello_world 300 diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml deleted file mode 100644 index 3eb3e9fe..00000000 --- a/.github/workflows/docker-build.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: Build service images -on: - push: - branches: - - dev - - main - -permissions: - id-token: write - contents: read -jobs: - build-and-push: - runs-on: ubuntu-latest - environment: build - strategy: - matrix: - include: - - dockerfile: Dockerfile-api_orchestrator - image-name: api-orchestrator - - dockerfile: Dockerfile-worker - image-name: worker - - dockerfile: Dockerfile-cache - image-name: cache - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: 'Az CLI login' - uses: azure/login@v1 - with: - client-id: ${{ secrets.AZURE_CLIENT_ID }} - tenant-id: ${{ secrets.AZURE_TENANT_ID }} - subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - - name: 'Build and push image' - run: | - az acr login -n ${{ secrets.ACR_NAME }} - export VERSION_TAG=${GITHUB_REF#refs/heads/} - export IMAGE_TAG=${{ secrets.ACR_NAME }}.azurecr.io/unlisted/farmai/terravibes/${{ matrix.image-name }}:$VERSION_TAG - docker build . -f ./resources/docker/${{ matrix.dockerfile }} -t $IMAGE_TAG - docker push $IMAGE_TAG - diff --git a/.github/workflows/lint-test.yml b/.github/workflows/lint-test.yml deleted file mode 100644 index 8fac7957..00000000 --- a/.github/workflows/lint-test.yml +++ /dev/null @@ -1,197 +0,0 @@ -name: Linting and testing -on: - push: - branches: - - dev - - main - pull_request: - branches: - - dev - - main - workflow_dispatch: - -env: - PYRIGHT_PYTHON_FORCE_VERSION: 1.1.268 - -concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' - cancel-in-progress: true - -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.11 - uses: actions/setup-python@v3 - with: - python-version: '3.11' - - name: Install dependencies - run: | - pip install wheel setuptools - - name: Build packages - run: | - for pkg in vibe_core vibe_common vibe_agent vibe_server vibe_dev; do cd src/$pkg && python setup.py bdist_wheel --dist-dir ../../dist; cd ../../; done - - name: Save packages - uses: actions/upload-artifact@v4 - with: - name: packages - path: dist - test: - needs: build - runs-on: ubuntu-latest - strategy: - fail-fast: true - matrix: - package-to-test: [vibe_core, vibe_common, vibe_server, vibe_agent] - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.11 - uses: actions/setup-python@v3 - with: - python-version: '3.11' - - name: Retrieve packages - uses: actions/download-artifact@v4 - with: - name: packages - path: dist - - name: Install dependencies - run: | - pip install pyright ruff - - name: Install package - run: | - pip install ${{ matrix.package-to-test }}[test] --find-links dist - - name: Lint with ruff - run: | - ruff check ./src/${{ matrix.package-to-test }} --config ./.ruff.toml - - name: Type checking with pyright - run: | - pyright ./src/${{ matrix.package-to-test }} - - name: Test with pytest - run: | - pip install vibe_dev --find-links dist - pytest ./src/${{ matrix.package-to-test}} -v --junitxml=junit/test-results.xml --cov=. --cov-report=xml - - ops-test: - runs-on: ubuntu-latest - container: - image: mcr.microsoft.com/farmai/terravibes/worker-base:12380 - steps: - - uses: actions/checkout@v4 - - name: Install dependencies - run: | - pip install pyright ruff - - name: Setup op resources - run: | - mkdir -p /opt/terravibes/ops - ln -sf $(pwd)/op_resources /opt/terravibes/ops/resources - mkdir /app - ln -sf $(pwd)/ops /app/ops - ln -sf $(pwd)/workflows /app/workflows - - name: Install packages - run: | - pip install ./src/vibe_core - pip install ./src/vibe_common - pip install ./src/vibe_agent - pip install ./src/vibe_server - pip install ./src/vibe_lib - pip install ./src/vibe_dev - - name: Linting ops - run: | - ruff check ./ops --config ./.ruff.toml - - name: Type checking ops - run: | - pyright ./ops - - name: Get SAM model - run: | - pip install git+https://github.com/facebookresearch/segment-anything.git - mkdir -p /mnt/onnx_resources - python -c "from scripts.export_sam_models import dev; dev()" - - name: Run integration tests - run: | - pytest ./src/vibe_lib ./ops ./src/tests -v --durations=0 --full-trace --junitxml=test-output.xml - check-docstrings: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.11 - uses: actions/setup-python@v3 - with: - python-version: '3.11' - - name: Install ruff - run: | - pip install ruff - - name: "Check docstrings for vibe_core" - run: | - ruff check --select D,D401 --ignore D105 --force-exclude --exclude src/vibe_core/vibe_core/farmvibes_ai_hello_world.py --config "lint.pydocstyle.convention = 'google'" src/vibe_core/vibe_core/*.py - - name: "Check docstrings for vibe_core/data" - run: | - ruff check --select D,D401 --ignore D105 --config "lint.pydocstyle.convention = 'google'" src/vibe_core/vibe_core/data/*.py - local-integration-tests: - runs-on: ubuntu-latest - steps: - - name: Free space before cleanup - shell: bash - run: | - echo "Memory and swap:" - free -h - echo - echo "Available storage:" - df -h - echo - - name: Remove unused software - shell: bash - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -fr /usr/local/lib/android - sudo rm -fr /opt/ghc - - name: Free space after cleanup - shell: bash - run: | - echo "Memory and swap:" - free -h - echo - echo "Available storage:" - df -h - echo - - uses: actions/checkout@v4 - with: - lfs: true - - name: Set up Python 3.11 - uses: actions/setup-python@v3 - with: - python-version: '3.11' - - name: Install packages - run: | - pip install ./src/vibe_core - pip install ./src/vibe_common - pip install ./src/vibe_agent - pip install ./src/vibe_server - pip install ./src/vibe_lib - pip install ./src/vibe_dev - - name: Actually build cluster - run: farmvibes-ai local setup --auto-confirm - - name: Cluster status before building local images - run: | - bash ./scripts/local-k8s-diagnostics.sh - - name: Build images - run: | - WAIT_AT_THE_END=1 make local - - name: Cluster status after building local images - run: | - bash ./scripts/local-k8s-diagnostics.sh - - name: Free space after cluster creation - run: | - echo "Memory and swap:" - free -h - echo - echo "Available storage:" - df -h - echo - - name: Run integration tests - run: | - pytest ./src/tests_local_cluster/ -v --junitxml=junit/test-results.xml - - name: Cluster status after running tests - if: always() - run: | - bash ./scripts/local-k8s-diagnostics.sh \ No newline at end of file diff --git a/.github/workflows/release-to-main.yml b/.github/workflows/release-to-main.yml deleted file mode 100644 index c3e27eda..00000000 --- a/.github/workflows/release-to-main.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: Release to main -on: - workflow_dispatch: - -permissions: - id-token: write - contents: write - -jobs: - release: - runs-on: ubuntu-latest - environment: build - steps: - - uses: actions/checkout@v4 - with: - ref: dev - - name: 'Az CLI login' - uses: azure/login@v1 - with: - client-id: ${{ secrets.AZURE_CLIENT_ID }} - tenant-id: ${{ secrets.AZURE_TENANT_ID }} - subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - - name: Retag images - run: | - az acr login -n ${{ secrets.ACR_NAME }} - for image in api-orchestrator worker cache; do - export IMAGE_TAG=${{ secrets.ACR_NAME }}.azurecr.io/unlisted/farmai/terravibes/$image - export DEV_TAG=$IMAGE_TAG:dev - export MAIN_TAG=$IMAGE_TAG:${{ github.run_id }} - echo Retagging from $DEV_TAG to $MAIN_TAG - docker pull $DEV_TAG - docker tag $DEV_TAG $MAIN_TAG - docker push $MAIN_TAG - done - - name: Adjust default tag - run: | - ROOT=$(git rev-parse --show-toplevel) - CONSTANTS_MODULE="$ROOT"/src/vibe_core/vibe_core/cli/constants.py - sed -i "s|DEFAULT_IMAGE_TAG.*|DEFAULT_IMAGE_TAG = ${{ github.run_id }}|g" "$CONSTANTS_MODULE" - - name: Check modified file - run: cat src/vibe_core/vibe_core/cli/constants.py - - name: Commit changes - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - git config --global user.email "farmvibesaicd@microsoft.com" - git config --global user.name "FarmVibes.AI Release Pipeline" - ROOT=$(git rev-parse --show-toplevel) - CONSTANTS_MODULE="$ROOT"/src/vibe_core/vibe_core/cli/constants.py - RELEASE_BRANCH=release-main-${{ github.run_id }} - git checkout -b $RELEASE_BRANCH - git add $CONSTANTS_MODULE - git commit -m "Update default tag to latest image" - git push --set-upstream origin $RELEASE_BRANCH diff --git a/.gitignore b/.gitignore index bc3b9cba..97f7d813 100644 --- a/.gitignore +++ b/.gitignore @@ -122,9 +122,12 @@ resources/test/ !terravibes/vibe/lib/ .cspell/* +user-interface/package-lock.json +carbon/notebook/farmbeats/* +carbon/notebook/data/* .env.development test-output.xml /outputs/* -venv/ +/op_resources/* makeenv diff --git a/.ruff.toml b/.ruff.toml deleted file mode 100644 index b135b9cc..00000000 --- a/.ruff.toml +++ /dev/null @@ -1,17 +0,0 @@ -src= ["src"] -line-length = 100 -extend-include = ["*.ipynb"] - -[lint] -select = ["E", "F", "I", "W"] -ignore = ["E203"] -exclude = [".git", "__pycache__"] - -[lint.pycodestyle] -max-line-length = 100 - -[lint.per-file-ignores] -"__init__.py" = ["F401"] - -[format] -quote-style = "double" \ No newline at end of file diff --git a/Makefile b/Makefile deleted file mode 100644 index 71e3ae9a..00000000 --- a/Makefile +++ /dev/null @@ -1,202 +0,0 @@ -.PHONY: help local clean revert-% revert clean-% local-% cluster set-image %-base - -SHELL = /bin/bash - -export PATH := $(HOME)/.config/farmvibes-ai:$(PATH) - -CACHE_DEPLOYMENT := terravibes-cache -ORCHESTRATOR_DEPLOYMENT := terravibes-orchestrator -REST_API_DEPLOYMENT := terravibes-rest-api -DATA_OPS_DEPLOYMENT := terravibes-data-ops -WORKER_DEPLOYMENT := terravibes-worker - -CACHE_REPO := farmai/terravibes/cache -ORCHESTRATOR_REPO := farmai/terravibes/api-orchestrator -REST_API_REPO := farmai/terravibes/api-orchestrator -DATA_OPS_REPO := farmai/terravibes/cache -WORKER_REPO := farmai/terravibes/worker - -CONTAINER_DEBUG_PORT := 5678 -REST_API_DEBUG_PORT := 5678 -ORCHESTRATOR_DEBUG_PORT := 5679 -CACHE_DEBUG_PORT := 5680 -WORKER_DEBUG_PORT := 5681 -DATA_OPS_DEBUG_PORT := 5682 - -CURRENT_CACHE_REPLICAS := $(shell env PATH=$(PATH) kubectl get deployment $(CACHE_DEPLOYMENT) -o jsonpath='{.status.replicas}') -CURRENT_REST_API_REPLICAS := $(shell env PATH=$(PATH) kubectl get deployment $(REST_API_DEPLOYMENT) -o jsonpath='{.status.replicas}') -CURRENT_ORCHESTRATOR_REPLICAS := $(shell env PATH=$(PATH) kubectl get deployment $(ORCHESTRATOR_DEPLOYMENT) -o jsonpath='{.status.replicas}') -CURRENT_DATA_OPS_REPLICAS := $(shell env PATH=$(PATH) kubectl get deployment $(DATA_OPS_DEPLOYMENT) -o jsonpath='{.status.replicas}') -CURRENT_WORKER_REPLICAS := $(shell env PATH=$(PATH) kubectl get deployment $(WORKER_DEPLOYMENT) -o jsonpath='{.status.replicas}') - -TAG := tmp-$(shell date +%s) -ROOT := $(shell git rev-parse --show-toplevel) - -build_cluster := env FARMVIBES_AI_IMAGE_PREFIX=terravibes- CONTAINER_REGISTRY_BASE=mcr.microsoft.com bash farmvibes-ai local setup -base_image_name := grep -oE 'FROM ([-a-zA-Z0-9@:%._\+~\#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~\#?&//=]*))' FILE | cut -d ' ' -f 2 - -define transform_image_name -$(shell docker ps | grep registry | rev | cut -d ' ' -f 1 | rev):5000/$(1) -endef - -help: ## Shows this help message - @echo -e This is the farmvibes.ai makefile. Supported targets are:\\n - @grep -E -h '\s##\s' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' - -local: cluster restore-git-lfs local-rest-api local-cache local-worker local-orchestrator local-data-ops ## Builds all images locally and deploys them into the local farmvibes.ai cluster - [ -z $(WAIT_AT_THE_END) ] || kubectl delete pods -l backend=terravibes && \ - kubectl wait --for=condition=Available deployment --timeout=300s -l backend=terravibes - -revert: cluster revert-rest-api revert-cache revert-worker revert-orchestrator ## Reverts all images to the official version - -restore-git-lfs: -git lfs pull || echo "git lfs was not found. Please see https://git-lfs.com/ to install it." && exit 1 - -services-base: resources/docker/Dockerfile-services-base - @docker manifest inspect `$(subst FILE,$<,$(base_image_name))` || \ - az acr login -n `$(subst FILE,$<,$(base_image_name)) | cut -d / -f 1 | sed 's|.azurecr.io||g'` || \ - echo "Failed to log into container registry. Please perform an `az login` and try again" - -%-base: resources/docker/Dockerfile-% - @docker manifest inspect `$(subst FILE,$<,$(base_image_name))` || \ - az acr login -n `$(subst FILE,$<,$(base_image_name)) | cut -d / -f 1 | sed 's|.azurecr.io||g'` || \ - echo "Failed to log into container registry. Please perform an `az login` and try again" - -delete-%: - kubectl scale deployment $(subst delete-,,$@) --replicas=0 - kubectl delete pod --wait=true -l app=$(subst delete-,,$@) --grace-period=0 --force - kubectl rollout status deployment $(subst delete-,,$@) - -repo-%: - docker pull $(CONTAINER_REGISTRY_BASE)/$(subst repo-,,$@):$(FARMVIBES_AI_IMAGE_TAG) - -set-image: - kubectl set image deployment $(DEPLOYMENT) "*=$(IMAGE_FULL_REFERENCE)" - kubectl rollout status deployment $(DEPLOYMENT) - -set-registry-image: push-image - DEPLOYMENT=$(DEPLOYMENT) IMAGE_FULL_REFERENCE=$(call transform_image_name,$(IMAGE_FULL_REFERENCE)) make -C . set-image - -push-image: - docker tag $(IMAGE_FULL_REFERENCE) 127.0.0.1:5000/$(IMAGE_FULL_REFERENCE) - docker push 127.0.0.1:5000/$(IMAGE_FULL_REFERENCE) - -scale: - kubectl scale deployment $(DEPLOYMENT) --replicas=$(shell [ "$(REPLICAS)" ] && echo "$(REPLICAS)" || echo 1) - [ ! -z $(WAIT_AT_THE_END) ] || kubectl wait --for=condition=Available deployment --timeout=300s $(DEPLOYMENT) - -# Have to replace Xfrozen_modules=on with Xfrozen_modules=off in the deployment -disable-frozen-modules: - kubectl get deployment $(DEPLOYMENT) -o yaml | sed 's|Xfrozen_modules=on|Xfrozen_modules=off|g' | kubectl apply -f - - -add-debug-flag: - kubectl get deployment $(DEPLOYMENT) -o yaml | sed 's|\(\s\+-\)\(.*port=3000\)|\1\2\n\1 --debug|' | kubectl apply -f - - -add-debug-flag-agent: - kubectl get deployment $(DEPLOYMENT) -o yaml | sed 's|\(\s\+-\)\(.*port=3000\)|\1\2\n\1 debug.activate=true|' | kubectl apply -f - - -local-rest-api: cluster local-rest-api-orchestrator delete-$(REST_API_DEPLOYMENT) ## Builds and deploys a local REST API image (enabling debug) - DEPLOYMENT=$(REST_API_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(REST_API_REPO):$(TAG) $(MAKE) -C . set-registry-image - @kubectl get deployment $(REST_API_DEPLOYMENT) -o json | grep -v last | grep -qo -- --debug || DEPLOYMENT=$(REST_API_DEPLOYMENT) $(MAKE) -C . add-debug-flag - DEPLOYMENT=$(REST_API_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules - DEPLOYMENT=$(REST_API_DEPLOYMENT) REPLICAS=$(CURRENT_REST_API_REPLICAS) $(MAKE) scale - -revert-rest-api: cluster repo-$(REST_API_REPO) delete-$(REST_API_DEPLOYMENT) ## Reverts the REST API deployment to use the official image - DEPLOYMENT=$(REST_API_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(REST_API_REPO):$(FARMVIBES_AI_IMAGE_TAG) $(MAKE) set-registry-image - DEPLOYMENT=$(REST_API_DEPLOYMENT) REPLICAS=$(CURRENT_REST_API_REPLICAS) make scale - -local-orchestrator: cluster local-rest-api-orchestrator delete-$(ORCHESTRATOR_DEPLOYMENT) ## Builds and deploys a local ORCHESTRATOR image (enabling debug) - DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(ORCHESTRATOR_REPO):$(TAG) $(MAKE) -C . set-registry-image - @kubectl get deployment $(ORCHESTRATOR_DEPLOYMENT) -o json | grep -v last | grep -qo -- --debug || DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) $(MAKE) -C . add-debug-flag - DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules - DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) REPLICAS=$(CURRENT_ORCHESTRATOR_REPLICAS) $(MAKE) scale - -revert-orchestrator: cluster repo-$(ORCHESTRATOR_REPO) delete-$(ORCHESTRATOR_DEPLOYMENT) ## Reverts the ORCHESTRATOR deployment to use the official image - DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(ORCHESTRATOR_REPO):$(FARMVIBES_AI_IMAGE_TAG) $(MAKE) set-registry-image - DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) REPLICAS=$(CURRENT_ORCHESTRATOR_REPLICAS) make scale - -local-data-ops: cluster local-cache-repo delete-$(DATA_OPS_DEPLOYMENT) ## Builds and deploys a local data ops image (enabling debug) - DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(DATA_OPS_REPO):$(TAG) $(MAKE) -C . set-registry-image - @kubectl get deployment $(DATA_OPS_DEPLOYMENT) -o json | grep -v last | grep -qo debug.activate || DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) $(MAKE) -C . add-debug-flag-agent - DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules - DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) REPLICAS=$(CURRENT_DATA_OPS_REPLICAS) $(MAKE) scale - -revert-data-ops: cluster repo-$(DATA_OPS_REPO) delete-$(DATA_OPS_DEPLOYMENT) ## Reverts the data ops deployment to use the official image - DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(DATA_OPS_REPO):$(FARMVIBES_AI_IMAGE_TAG) $(MAKE) set-registry-image - DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) REPLICAS=$(CURRENT_DATA_OPS_REPLICAS) make scale - -local-worker: cluster restore-git-lfs local-worker-repo delete-$(WORKER_DEPLOYMENT) ## Builds and deploys a local WORKER image (enabling debug) - DEPLOYMENT=$(WORKER_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(WORKER_REPO):$(TAG) $(MAKE) -C . set-registry-image - DEPLOYMENT=$(WORKER_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules - DEPLOYMENT=$(WORKER_DEPLOYMENT) REPLICAS=$(CURRENT_WORKER_REPLICAS) make scale - -revert-worker: cluster repo-$(WORKER_REPO) delete-$(WORKER_DEPLOYMENT) ## Reverts the WORKER deployment to use the official image - DEPLOYMENT=$(WORKER_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(WORKER_REPO):$(FARMVIBES_AI_IMAGE_TAG) make set-registry-image - DEPLOYMENT=$(WORKER_DEPLOYMENT) REPLICAS=$(CURRENT_WORKER_REPLICAS) make scale - -local-cache: cluster local-cache-repo delete-$(CACHE_DEPLOYMENT) ## Builds and deploys a local CACHE image (enabling debug) - DEPLOYMENT=$(CACHE_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CACHE_REPO):$(TAG) $(MAKE) -C . set-registry-image - @kubectl get deployment $(CACHE_DEPLOYMENT) -o json | grep -v last | grep -qo debug.activate || DEPLOYMENT=$(CACHE_DEPLOYMENT) $(MAKE) -C . add-debug-flag-agent - DEPLOYMENT=$(CACHE_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules - DEPLOYMENT=$(CACHE_DEPLOYMENT) REPLICAS=$(CURRENT_CACHE_REPLICAS) make scale - -revert-cache: cluster repo-$(CACHE_REPO) delete-$(CACHE_DEPLOYMENT) ## Reverts the CACHE deployment to use the official image - DEPLOYMENT=$(CACHE_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(CACHE_REPO):$(FARMVIBES_AI_IMAGE_TAG) make set-registry-image - DEPLOYMENT=$(CACHE_DEPLOYMENT) REPLICAS=$(CURRENT_CACHE_REPLICAS) make scale - -local-rest-api-orchestrator: cluster services-base - $(eval export PATH=$(HOME)/.config/farmvibes-ai:$(PATH)) - docker build -t $(REST_API_REPO):$(TAG) -t $(ORCHESTRATOR_REPO):$(TAG) -f $(ROOT)/resources/docker/Dockerfile-api_orchestrator . - -local-cache-repo: cluster services-base - $(eval export PATH=$(HOME)/.config/farmvibes-ai:$(PATH)) - docker build -t $(CACHE_REPO):$(TAG) -f $(ROOT)/resources/docker/Dockerfile-cache . - -local-worker-repo: cluster worker-base - $(eval export PATH=$(HOME)/.config/farmvibes-ai:$(PATH)) - docker build -t $(WORKER_REPO):$(TAG) -f $(ROOT)/resources/docker/Dockerfile-worker . - -debug-rest-api: cluster local-rest-api ## Starts listening to debug the REST API - DEPLOYMENT=$(REST_API_DEPLOYMENT) REPLICAS=1 make scale - kubectl port-forward deployments/$(REST_API_DEPLOYMENT) $(REST_API_DEBUG_PORT):$(CONTAINER_DEBUG_PORT) - -debug-orchestrator: cluster local-orchestrator ## Starts listening to debug the ORCHESTRATOR - DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) REPLICAS=1 make scale - kubectl port-forward deployments/$(ORCHESTRATOR_DEPLOYMENT) $(ORCHESTRATOR_DEBUG_PORT):$(CONTAINER_DEBUG_PORT) - -debug-worker: cluster local-worker ## Starts listening to debug the WORKER - @kubectl get deployment $(WORKER_DEPLOYMENT) -o json | grep -v last | grep -qo debug.activate || DEPLOYMENT=$(WORKER_DEPLOYMENT) $(MAKE) -C . add-debug-flag-agent - DEPLOYMENT=$(WORKER_DEPLOYMENT) REPLICAS=1 make scale - kubectl port-forward pod/`kubectl get pods -l app=$(WORKER_DEPLOYMENT) --field-selector status.phase=Running | awk '/Running/{ print $$1 }'` \ - $(WORKER_DEBUG_PORT):$(CONTAINER_DEBUG_PORT) - -debug-cache: cluster local-cache ## Starts listening to debug the CACHE - DEPLOYMENT=$(CACHE_DEPLOYMENT) REPLICAS=1 make scale - kubectl port-forward pod/`kubectl get pods -l app=$(CACHE_DEPLOYMENT) --field-selector status.phase=Running | awk '/Running/{ print $$1 }'` \ - $(CACHE_DEBUG_PORT):$(CONTAINER_DEBUG_PORT) - -debug-data-ops: cluster local-data-ops ## Starts listening to debug the DATA_OPS - DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) REPLICAS=1 make scale - kubectl port-forward deployments/$(DATA_OPS_DEPLOYMENT) $(DATA_OPS_DEBUG_PORT):$(CONTAINER_DEBUG_PORT) - -clean: cluster revert clean-worker clean-orchestrator clean-rest-api clean-cache - -clean-cache: cluster revert-cache revert-worker ## Cleans up the cache image from the local docker "registry" - docker images | grep -E "$(CACHE_REPO)\\s+tmp.*" | awk '{ print $$3 }' | xargs docker rmi - -clean-worker: cluster revert-cache revert-worker ## Cleans up the worker image from the local docker "registry" - docker images | grep -E "$(WORKER_REPO)\\s+tmp.*" | awk '{ print $$3 }' | xargs docker rmi - -clean-orchestrator: cluster revert-rest-api revert-orchestrator ## Cleans up the orchestrator image from the local docker "registry" - docker images | grep -E "$(ORCHESTRATOR_REPO)\\s+tmp.*" | awk '{ print $$3 }' | xargs docker rmi - -clean-data-ops: cluster revert-rest-api revert-data-ops ## Cleans up the data-ops image from the local docker "registry" - docker images | grep -E "$(DATA_OPS_REPO)\\s+tmp.*" | awk '{ print $$3 }' | xargs docker rmi - -clean-rest-api: cluster revert-rest-api revert-orchestrator ## Cleans up the orchestrator image from the local docker "registry" - docker images | grep -E "$(REST_API_REPO)\\s+tmp.*" | awk '{ print $$3 }' | xargs docker rmi - -cluster: - $(eval export PATH=$(HOME)/.config/farmvibes-ai:$(PATH)) - which k3d || $(build_cluster) - docker ps | grep -q farmvibes-ai || farmvibes-ai local start || $(build_cluster) diff --git a/docs/source/docfiles/markdown/QUICKSTART.md b/docs/source/docfiles/markdown/QUICKSTART.md index 878f3b27..6963a861 100644 --- a/docs/source/docfiles/markdown/QUICKSTART.md +++ b/docs/source/docfiles/markdown/QUICKSTART.md @@ -22,9 +22,6 @@ In order to run FarmVibes.AI cluster, you need the following: the repository. If you already have access to the source code, then Git is not required. - * [Git LFS](https://git-lfs.com/) to restore some of the large files in the - repository (e.g., model weights). - * [Docker](https://docs.docker.com/engine/install/ubuntu/). Make sure you can run the docker client without running `sudo` by adding your user account to the `docker` group (which might require a logout/login when adding oneself @@ -63,20 +60,6 @@ bash ./resources/vm/setup_farmvibes_ai_vm.sh You might needed to restart your shell session once the script finishes. -## Restore files with Git LFS - -In case you did not have Git LFS installed when cloning the repository, you will need to do so -to restore the large files in the repository. Note that the last step -["Installing software dependencies](#optional-installing-software-dependencies) already installs -Git LFS. - -To restore the missing files, you can run the following command in the root of the repository: - -```shell -git lfs install -git lfs pull -``` - ## Install the FarmVibes.AI cluster With python3.8+ and pip installed on your machine, please install diff --git a/docs/source/docfiles/markdown/TROUBLESHOOTING.md b/docs/source/docfiles/markdown/TROUBLESHOOTING.md index ca53bfde..d3e54963 100644 --- a/docs/source/docfiles/markdown/TROUBLESHOOTING.md +++ b/docs/source/docfiles/markdown/TROUBLESHOOTING.md @@ -122,15 +122,6 @@ that are currently being addressed by the development team. -
- Updating cluster in the `dev` branch after pulling files with Git LFS - - If you did not have Git LFS installed when cloning the repository and checking out to `dev`, - you will be missing some of the large files in the repository (e.g., ONNX models). Make sure - to install and setup Git LFS as described in the [Quickstart guide](QUICKSTART.md#restore-files-with-git-lfs). - You will also need to update your cluster with `make local`. -
-
- **Composing and running workflows:** diff --git a/notebooks/deepmc/mc_forecast.ipynb b/notebooks/deepmc/mc_forecast.ipynb index 7fbfa798..d41b43b7 100755 --- a/notebooks/deepmc/mc_forecast.ipynb +++ b/notebooks/deepmc/mc_forecast.ipynb @@ -15,9 +15,7 @@ "```bash\n", "$ micromamba env create -f ./deepmc_env.yaml\n", "$ micromamba activate deepmc-pytorch\n", - "```\n", - "\n", - "**We currently only support Unix-based systems (Linux and MacOS) for running this notebook.**" + "```\n" ] }, { @@ -57,22 +55,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/azureuser/.conda/envs/deepmc-pytorch/lib/python3.8/site-packages/torchvision/io/image.py:11: UserWarning: Failed to load image Python extension: /home/azureuser/.conda/envs/deepmc-pytorch/lib/python3.8/site-packages/torchvision/image.so: undefined symbol: _ZNK3c1010TensorImpl36is_contiguous_nondefault_policy_implENS_12MemoryFormatE\n", + " warn(f\"Failed to load image Python extension: {e}\")\n" + ] + } + ], "source": [ - "import warnings\n", - "from datetime import datetime\n", - "\n", - "import numpy as np\n", "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from datetime import datetime, timedelta\n", "from matplotlib import pyplot as plt\n", - "from notebook_lib import train\n", - "from shapely import geometry\n", "\n", - "from vibe_notebook.deepmc import prediction, utils\n", - "from vibe_notebook.deepmc.forecast import Forecast\n", + "from shapely import geometry\n", + "\n", + "from notebook_lib import utils\n", + "from notebook_lib import prediction\n", + "from notebook_lib import train\n", + "from notebook_lib.forecast import Forecast\n", "\n", + "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, @@ -81,7 +90,7 @@ "metadata": {}, "source": [ "### Workflows\n", - "The notebook utilizes the workflow below, which is available in FarmVibes.AI: " + "The notebook utilize below workflows available in farmvibes" ] }, { @@ -98,9 +107,9 @@ "metadata": {}, "source": [ "### Data\n", - "The notebook utilizes two types of datasets:\n", + "The notebook utilizing two types of datasets\n", "\n", - "1. The historical observations recorded by weather stations.\n", + "1. The historical observations recorded by weather stations\n", "2. The forecast observations downloaded using the [herbie package](https://blaylockbk.github.io/Herbie/_build/html/). This package helps to download recent and archived numerical weather prediction (NWP) model output from different cloud archive sources. Its most popular capability is to download HRRR model data.\n" ] }, @@ -109,7 +118,7 @@ "metadata": {}, "source": [ "### AGWeatherNet\n", - "In this notebook, we utilize historical observations downloaded from AGWeatherNet for the station `Palouse`. The data used for training range from May 2020 to June 2022. For more information check [AGWeatherNet documentation](http://weather.wsu.edu/?p=92850&desktop)." + "In this notebook, we utilize historical observations downloaded from AGWeatherNet for a station \\\"Palouse\\\". The data used for training range from May 2020 to June 2022. For more information check [AGWeatherNet documentation](http://weather.wsu.edu/?p=92850&desktop)." ] }, { @@ -137,8 +146,8 @@ "metadata": {}, "outputs": [], "source": [ - "PREDICT = \"%s\"\n", - "RELEVANT = \"%s\"\n", + "PREDICT=\"%s\"\n", + "RELEVANT=\"%s\"\n", "ROOT_PATH = f\"./data/model_{PREDICT}/\"\n", "DATA_EXPORT_PATH = ROOT_PATH + f\"{STATION_NAME}/{RELEVANT}/train_data.pkl\"" ] @@ -157,7 +166,7 @@ "outputs": [], "source": [ "# weather dataset filtered and model training limited to train features.\n", - "HISTORICAL_MODEL_TRAIN_FEATURES = [\"humidity\", \"wind_speed\", \"temperature\"]\n", + "HISTORICAL_MODEL_TRAIN_FEATURES = ['humidity', 'wind_speed', 'temperature']\n", "\n", "# Historical data aligned using INDEX variable\n", "INDEX = \"date\"" @@ -194,7 +203,7 @@ "outputs": [], "source": [ "# Models trained to predict out features\n", - "OUT_FEATURES = [\"wind_speed\", \"temperature\"]" + "OUT_FEATURES = ['wind_speed' , 'temperature']" ] }, { @@ -202,16 +211,14 @@ "metadata": {}, "source": [ "### Relevant vs Not Relevant\n", - "The scenario is considered relevant when there is a close match between historical data and forecasts, with minimal discrepancies else it's not relevant.\n", - "\n", - "The notebook supports performing micro climate predictions with the following approaches: \n", + "The notebook support performing micro climate predictions with below approaches. \n", "\n", "1. Utilizing both Historical & Forecast observations. This approach is suggested to use if both observations are relevant.

\n", "\n", "2. Utilizing only Historical dataset. This approach is suggested to use if both Historical & Forecast observations are not relevant or Forecast dataset doesn't exist.

\n", "\n", "\n", - "In next cells, we demonstrate the training and prediction processes for both relevant and non-relevant scenarios. " + "In next cells, demonstrated training & prediction process for both relevant and not relevant scenarios. " ] }, { @@ -263,7 +270,7 @@ "1. The index variable is converted to datetime\n", "2. The input data is interpolated to fill the missing values using their neighbors\n", "3. The script focuses on training the model with a 60-minute frequency, hence the data is grouped for this frequency.\n", - "4. The data is scaled using the scikit-learn StandardScalar. For more information check [scikit-learn documentation](https://github.com/scikit-learn/scikit-learn)" + "4. The data is scaled using the scikit-learn StandardScalar. For more information check [scikit-learn documentaion](https://github.com/scikit-learn/scikit-learn)" ] }, { @@ -289,7 +296,7 @@ "metadata": {}, "outputs": [], "source": [ - "historical_dataset = utils.get_csv_data(path=file_path, interpolate=False, fill_na=False)" + "historical_dataset = utils.get_csv_data(path=file_path)" ] }, { @@ -312,7 +319,7 @@ "  - humidity - \"RH:2 m\"
\n", "  - wind speed - The forecast observations of wind speed are derived using data downloaded for u & v components. The algebraic expression used to calculate wind speed is
\n", " $$ \n", - " ws(u, v) = \\sqrt{u^2 + v^2}\n", + " ws = \\sqrt{u^2 + v^2}\n", " $$\n", "   i. u component - \"UGRD:10 m\"
\n", "   ii. v component - \"VGRD:10 m\"" @@ -341,14 +348,24 @@ "start_date = datetime(year=2020, month=5, day=31)\n", "end_date = datetime(year=2022, month=8, day=2)\n", "time_range = (start_date, end_date)\n", - "date_column = \"date\"\n", + "date_column=\"date\"\n", "\n", - "parameters = [\n", - " {\"weather_type\": \"temperature\", \"search_text\": \"TMP:2 m\"},\n", - " {\"weather_type\": \"humidity\", \"search_text\": \"RH:2 m\"},\n", - " {\"weather_type\": \"u-component\", \"search_text\": \"UGRD:10 m\"},\n", - " {\"weather_type\": \"v-component\", \"search_text\": \"VGRD:10 m\"},\n", - "]" + "parameters = [{\n", + " \"weather_type\": \"temperature\",\n", + " \"search_text\": \"TMP:2 m\"\n", + " },\n", + " {\n", + " \"weather_type\": \"humidity\",\n", + " \"search_text\": \"RH:2 m\"\n", + " },\n", + " {\n", + " \"weather_type\": \"u-component\",\n", + " \"search_text\": \"UGRD:10 m\"\n", + " },\n", + " {\n", + " \"weather_type\": \"v-component\",\n", + " \"search_text\": \"VGRD:10 m\"\n", + " }]" ] }, { @@ -356,7 +373,7 @@ "metadata": {}, "source": [ "### Submit Request to Worker\n", - "We download forecast observations by submitting a request to the worker running in the background. If more than one worker instance is running in the background, the request is processed in parallel for each parameter. Workflow execution utilizes the parameters below while processing requests, this can be overwritten using the parameter argument.\n", + "Download forecast observations by submitting request to worker running in background. If more than one worker instance running in background, it process the request in parallel for each parameter. Workflow execution utilize below parameters while processing requests, this can be overwritten using the parameter argument.\n", "\n", "- fxx: [1, 25, 1] # start, stop, step\n", "- search_text: \"TMP:2 m\"\n", @@ -371,48 +388,24 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c00b48cb983f4c2184d411cd346f2bdb",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Output()"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "'VibeWorkflowRun'(id='d7c0dc6a-339f-45b9-81d1-2fb93d2938f6', name='forecast_temperature', workflow='data_ingestion/weather/herbie_forecast', status='done')\n",
+      "'VibeWorkflowRun'(id='61d952d1-b068-4c2c-b522-a680efed450f', name='forecast_humidity', workflow='data_ingestion/weather/herbie_forecast', status='running')\n",
+      "'VibeWorkflowRun'(id='8c95f7ab-6d6b-40e8-a3bd-c12b854d0a7b', name='forecast_u-component', workflow='data_ingestion/weather/herbie_forecast', status='running')\n",
+      "'VibeWorkflowRun'(id='7490cd70-9731-4cac-ab36-051d3903776a', name='forecast_v-component', workflow='data_ingestion/weather/herbie_forecast', status='running')\n"
+     ]
     }
    ],
    "source": [
-    "forecast = Forecast(\n",
-    "    workflow_name=HERBIE_DOWNLOAD_WORKFLOW,\n",
-    "    geometry=STATION_GEOMETRY,\n",
-    "    time_range=time_range,\n",
-    "    parameters=parameters,\n",
-    ")\n",
-    "run_list = forecast.submit_download_request()"
+    "forecast_ = Forecast(\n",
+    "                workflow_name=HERBIE_DOWNLOAD_WORKFLOW,\n",
+    "                geometry=STATION_GEOMETRY,\n",
+    "                time_range=time_range,\n",
+    "                parameters=parameters,\n",
+    "                )\n",
+    "run_list = forecast_.submit_download_request()"
    ]
   },
   {
@@ -420,14 +413,12 @@
    "metadata": {},
    "source": [
     "### Monitor download of Forecast observations\n",
-    "Check the download status and fetch the downloaded data from FarmVibes.AI. The execution time of the download depends on the time range. \n",
-    "\n",
-    "The downloaded data undergoes the following changes:\n",
+    "Check the download status and fetch the downloaded data from the cluster running in backend. The execution time of download depends on time_range. The downloaded data undergoes below changes.\n",
     "\n",
-    "1. Concatenate the output of all submitted requests.\n",
+    "1. concatenate the output of all requests submitted.\n",
     "2. Set index on date column.\n",
-    "3. Interpolate to derive the missing data.\n",
-    "4. The data downloaded follows the UTC timezone. It's required to transform the data to the timezone of historical observations. The historical observations used in this notebook follows the PST timezone, hence the data is offset by -8 hours."
+    "3. Does interpolate to derive the missing data.\n",
+    "4. The data downloaded follows the utc timezone. It's required to transform the data to the timezone of historical observations. The historical observations used in this notebook follows pst timezone, hence the data offset by -8 hours."
    ]
   },
   {
@@ -510,14 +501,14 @@
        "2020-05-30 17:00:00             -2.861307              1.178179  "
       ]
      },
-     "execution_count": 16,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "# transform downloaded data from utc to pst timezone\n",
-    "forecast_dataset = forecast.get_downloaded_data(run_list=run_list, offset_hours=-8)\n",
+    "forecast_dataset = forecast_.get_downloaded_data(run_list=run_list, offset_hours=-8)\n",
     "forecast_dataset.to_csv(f\"{STATION_NAME}_forecast.csv\")\n",
     "forecast_dataset.head(2)"
    ]
@@ -526,40 +517,109 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Preprocess Forecast Observations"
+    "### Preprocess Forecast Observations\n",
+    "Below preprocessing performed on downloaded data before performing model training.\n",
+    "\n",
+    "- Temperature: The downloaded data has units \"kelvin\". It will be converted to Fahrenheit.\n",
+    "- wind_speed: Using the u-component & v-component values downloaded, the wind_speed values derived. The derived values multiplied by 2.23 to convert from m/sec to mph\n",
+    "- drop u-component & v-component"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "We perform the following preprocessing in the downloaded data before training the model.\n",
+    "# Temperature\n",
+    "# convert kelvin to celsius\n",
+    "forecast_dataset[\"temperature_forecast\"] = forecast_dataset[\"temperature_forecast\"]-273.15\n",
     "\n",
-    "- `temperature`: The downloaded data is in Kelvin. It will be converted to Fahrenheit.\n",
-    "- `wind_speed`: Using the u-component & v-component values downloaded, the `wind_speed` values are derived. The derived values are multiplied by 2.23 to convert from m/sec to mph.\n",
-    "- Drop u-component & v-component"
+    "# convert celsius to Fahrenheit\n",
+    "forecast_dataset[\"temperature_forecast\"] = forecast_dataset[\"temperature_forecast\"].apply(lambda x: (x * 9/5) + 32)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 17,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
temperature_forecasthumidity_forecastwind_speed_forecast
date
2020-05-30 16:00:0084.17363349.2999997.025768
2020-05-30 17:00:0082.14973154.5999986.900466
\n", + "
" + ], + "text/plain": [ + " temperature_forecast humidity_forecast \\\n", + "date \n", + "2020-05-30 16:00:00 84.173633 49.299999 \n", + "2020-05-30 17:00:00 82.149731 54.599998 \n", + "\n", + " wind_speed_forecast \n", + "date \n", + "2020-05-30 16:00:00 7.025768 \n", + "2020-05-30 17:00:00 6.900466 " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "forecast_dataset = utils.convert_forecast_data(forecast_dataset)\n", + "# wind_speed\n", + "# multiplying with 2.23 to convert wind speed from m/sec to mph\n", + "forecast_dataset[\"wind_speed_forecast\"] = forecast_dataset.apply(lambda x: np.sqrt(np.square(x[\"u-component_forecast\"]) + \n", + " np.square(x[\"v-component_forecast\"]))*2.23, axis=1)\n", + "\n", + "forecast_dataset.drop(columns=[\"u-component_forecast\", \"v-component_forecast\"], inplace=True)\n", "forecast_dataset.head(2)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We also clean the input data with the following operations:\n", - "- Exclude input data outside the time_range of interest.\n", - "- Shift forecast data by number of hours\n", - "- Fill missing data with neighboring data points using pandas interpolate techniques." - ] - }, { "cell_type": "code", "execution_count": 18, @@ -638,21 +698,18 @@ "2020-07-06 01:00:00 57.220984 3.85 10.642863 " ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "input_df = utils.clean_relevant_data_using_hrrr(\n", - " actual_df=historical_dataset,\n", - " forecast_df=forecast_dataset,\n", - " out_variables=RELEVANT_FEATURES,\n", - " freq_hours=frequency_hour,\n", - " num_of_indices=number_of_hours,\n", - " start_date=start_date,\n", - " end_date=end_date,\n", - ")\n", + "input_df = utils.clean_relevant_data(\n", + " actual_df=historical_dataset, \n", + " forecast_df=forecast_dataset, \n", + " out_variables=RELEVANT_FEATURES,\n", + " freq_hours=frequency_hour,\n", + " num_of_indices=number_of_hours)\n", "input_df.head(2)" ] }, @@ -660,12 +717,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Verifying if the forecast observations are relevant or not relevant" + "### Verifying the forecast observations are relevant or not relevant" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -674,7 +731,7 @@ "" ] }, - "execution_count": 21, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, @@ -690,7 +747,7 @@ } ], "source": [ - "plot_df = input_df[(input_df.index.month == 7) & (input_df.index.year == 2020)]\n", + "plot_df = input_df[(input_df.index.month==7) & (input_df.index.year==2020)]\n", "\n", "plt.figure(figsize=(20, 4))\n", "plt.plot(plot_df.index.values, plot_df[\"temperature_forecast\"].values, label=\"forecast\")\n", @@ -702,7 +759,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Based on the distribution of observation in above plot, the forecast observations are relevant. In this scenario, we will continue with model training using the relevant dataset." + "Based on the distribution of observation in above plot, the forecast observations are relevant. In this scenario continue with model training process using relevant dataset." ] }, { @@ -711,7 +768,7 @@ "source": [ "### Training\n", "\n", - "The script is configured to train the Micro Climate prediction model for 24 hours and the historical weather station data has points with a 60-minute frequency. Below inputs vary based on the number of hours of prediction and frequency of weather station data points.\n", + "The script is configured to train the Micro Climate prediction model for 24 hours and the historical weather station data has points with a 60-minute frequency. Below inputs vary based on number of hours of prediction and frequency of weather station data points.\n", "\n", "1. `chunk_size` - The value of the chunk size is based on the frequency of the weather station data points. For a frequency of 60 minutes, the minimum required data points are 528. If the data frequency is 15 minutes, the minimum number of data points required is 528*4 = 2112. These are the minimum number of data points need to be provided as input during the inference.\n", "2. `ts_lookahead` - The value used during the data preprocessing. It is the value used to consider weather data points ahead for a given time period while grouping the data.\n", @@ -734,7 +791,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": { "tags": [] }, @@ -746,8 +803,7 @@ " root_path=ROOT_PATH,\n", " data_export_path=DATA_EXPORT_PATH,\n", " station_name=STATION_NAME,\n", - " relevant=True,\n", - ")\n", + " relevant=True)\n", "\n", "train_weather.train_model(input_df)" ] @@ -764,27 +820,26 @@ "metadata": {}, "source": [ "### Current\n", - "To predict the weather for next 24 hours, we need certain hours of historical forecast observations beforehand. The default size (chunk size) of historical forecast observations is 528. Choosing a start time to predict is important, if historical observations used to train model have a starting time of 12:00:00, then the historical observations used for prediction should start at the same time." + "Predict weather for the next 24 hours. To predict weather for next 24 hours it is required to certain hours of historical forecast observations, the default size called chunk size of historical forecast observations is 528. Choosing start time of prediction is important, if historical observations used to train model has the start time of 12:00:00 then the historical observations used for prediction should start at the same time." ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "weather_forecast = prediction.InferenceWeather(\n", - " root_path=ROOT_PATH,\n", - " data_export_path=DATA_EXPORT_PATH,\n", - " station_name=STATION_NAME,\n", - " predicts=OUT_FEATURES,\n", - " relevant=True,\n", - ")" + " root_path=ROOT_PATH,\n", + " data_export_path=DATA_EXPORT_PATH,\n", + " station_name=STATION_NAME,\n", + " predicts=OUT_FEATURES,\n", + " relevant=True)" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -793,69 +848,55 @@ "p_end_date = datetime(year=2022, month=6, day=3, hour=0, minute=0, second=0)\n", "\n", "time_range = (p_start_date, p_end_date)\n", - "date_column = \"date\"\n", + "date_column=\"date\"\n", "\n", - "parameters = [\n", - " {\"weather_type\": \"temperature\", \"search_text\": \"TMP:2 m\"},\n", - " {\"weather_type\": \"humidity\", \"search_text\": \"RH:2 m\"},\n", - " {\"weather_type\": \"u-component\", \"search_text\": \"UGRD:10 m\"},\n", - " {\"weather_type\": \"v-component\", \"search_text\": \"VGRD:10 m\"},\n", - "]" + "parameters = [{\n", + " \"weather_type\": \"temperature\",\n", + " \"search_text\": \"TMP:2 m\"\n", + " },\n", + " {\n", + " \"weather_type\": \"humidity\",\n", + " \"search_text\": \"RH:2 m\"\n", + " },\n", + " {\n", + " \"weather_type\": \"u-component\",\n", + " \"search_text\": \"UGRD:10 m\"\n", + " },\n", + " {\n", + " \"weather_type\": \"v-component\",\n", + " \"search_text\": \"VGRD:10 m\"\n", + " }]" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 22, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "04e890c9992c4710acdbb661b49c1e56",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Output()"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "'VibeWorkflowRun'(id='ea662424-c9c5-4d1f-8d1c-ed907f0176ea', name='forecast_temperature', workflow='data_ingestion/weather/herbie_forecast', status='done')\n",
+      "'VibeWorkflowRun'(id='e93b52c2-9c20-4bf2-b647-a7dc04ff4947', name='forecast_humidity', workflow='data_ingestion/weather/herbie_forecast', status='done')\n",
+      "'VibeWorkflowRun'(id='5cbd7199-626c-43dc-aa73-17639c97bc30', name='forecast_u-component', workflow='data_ingestion/weather/herbie_forecast', status='done')\n",
+      "'VibeWorkflowRun'(id='e9373c58-5730-4b03-aee4-83015ab08848', name='forecast_v-component', workflow='data_ingestion/weather/herbie_forecast', status='done')\n"
+     ]
     }
    ],
    "source": [
-    "forecast = Forecast(\n",
-    "    workflow_name=HERBIE_DOWNLOAD_WORKFLOW,\n",
-    "    geometry=STATION_GEOMETRY,\n",
-    "    time_range=time_range,\n",
-    "    parameters=parameters,\n",
-    ")\n",
-    "run_list = forecast.submit_download_request()"
+    "forecast_ = Forecast(\n",
+    "                workflow_name=HERBIE_DOWNLOAD_WORKFLOW,\n",
+    "                geometry=STATION_GEOMETRY,\n",
+    "                time_range=time_range,\n",
+    "                parameters=parameters,\n",
+    "                )\n",
+    "run_list = forecast_.submit_download_request()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -933,21 +974,21 @@
        "2022-03-17 17:00:00              4.563419              1.176411  "
       ]
      },
-     "execution_count": 27,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "# transform downloaded data from utc to pst timezone\n",
-    "p_forecast_dataset = forecast.get_downloaded_data(run_list=run_list, offset_hours=-8)\n",
+    "p_forecast_dataset = forecast_.get_downloaded_data(run_list=run_list, offset_hours=-8)\n",
     "p_forecast_dataset.to_csv(f\"{STATION_NAME}_forecast.csv\")\n",
     "p_forecast_dataset.head(2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -1006,14 +1047,14 @@
        "2022-03-18 14:00:00    66.300      16.175       50.075"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "predict_file_path = f\"./data/{STATION_NAME}/prediction.csv\"\n",
-    "p_historical_dataset = utils.get_csv_data(path=predict_file_path, interpolate=False, fill_na=False)\n",
+    "p_historical_dataset = utils.get_csv_data(path=predict_file_path)\n",
     "p_historical_dataset = p_historical_dataset[HISTORICAL_MODEL_TRAIN_FEATURES]\n",
     "\n",
     "p_historical_dataset.head(2)"
@@ -1021,26 +1062,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
-    "p_forecast_dataset = utils.convert_forecast_data(p_forecast_dataset)"
+    "# Temperature\n",
+    "# convert kelvin to celsius\n",
+    "p_forecast_dataset[\"temperature_forecast\"] = p_forecast_dataset[\"temperature_forecast\"]-273.15\n",
+    "\n",
+    "# convert celsius to Fahrenheit\n",
+    "p_forecast_dataset[\"temperature_forecast\"] = p_forecast_dataset[\"temperature_forecast\"].apply(lambda x: (x * 9/5) + 32)"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 26,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "We clean the input data with the following operations:\n",
-    "- Exclude input data outside the time range of interest.\n",
-    "- Shift forecast data by number of hours\n",
-    "- Fill missing data with neighboring data points using pandas interpolate techniques."
+    "# wind_speed\n",
+    "# multiplying with 2.23 to convert wind speed from m/sec to mph\n",
+    "p_forecast_dataset[\"wind_speed_forecast\"] = p_forecast_dataset.apply(lambda x: np.sqrt(np.square(x[\"u-component_forecast\"]) + \n",
+    "                                    np.square(x[\"v-component_forecast\"]))*2.23, axis=1)\n",
+    "\n",
+    "p_forecast_dataset.drop(columns=[\"u-component_forecast\", \"v-component_forecast\"], inplace=True)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -1116,21 +1166,19 @@
        "2022-03-18 14:00:00             45.456384      16.175            17.855009  "
       ]
      },
-     "execution_count": 32,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "input_df = utils.clean_relevant_data_using_hrrr(\n",
-    "    actual_df=p_historical_dataset.copy(),\n",
-    "    forecast_df=p_forecast_dataset.copy(),\n",
-    "    out_variables=RELEVANT_FEATURES,\n",
-    "    freq_hours=frequency_hour,\n",
-    "    num_of_indices=number_of_hours,\n",
-    "    start_date=start_date,\n",
-    "    end_date=end_date,\n",
-    ")\n",
+    "input_df = utils.clean_relevant_data(\n",
+    "                    actual_df=p_historical_dataset.copy(),\n",
+    "                    forecast_df= p_forecast_dataset.copy(),\n",
+    "                    out_variables= RELEVANT_FEATURES,\n",
+    "                    freq_hours=frequency_hour,\n",
+    "                    num_of_indices=number_of_hours\n",
+    "                )\n",
     "\n",
     "base_data_df = input_df[RELEVANT_FEATURES]\n",
     "base_data_df.head(2)"
@@ -1138,7 +1186,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1149,7 +1197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -1191,7 +1239,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
@@ -1267,45 +1315,45 @@
        "2022-03-16 17:00:00             44.783197      14.325            10.509131  "
       ]
      },
-     "execution_count": 36,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "predict_file_path = f\"./data/{STATION_NAME}/training.csv\"\n",
-    "p_historical_dataset = utils.get_csv_data(path=predict_file_path, interpolate=False, fill_na=False)\n",
+    "p_historical_dataset = utils.get_csv_data(path=predict_file_path)\n",
     "p_historical_dataset = p_historical_dataset[HISTORICAL_MODEL_TRAIN_FEATURES]\n",
     "p_historical_dataset.head(5)\n",
     "\n",
     "input_df = utils.clean_relevant_data(\n",
-    "    p_historical_dataset.copy(),\n",
-    "    p_forecast_dataset.copy(),\n",
-    "    RELEVANT_FEATURES,\n",
-    "    freq_hours=frequency_hour,\n",
-    "    num_of_indices=number_of_hours,\n",
-    ")\n",
+    "                    p_historical_dataset.copy(), \n",
+    "                    p_forecast_dataset.copy(), \n",
+    "                    RELEVANT_FEATURES,\n",
+    "                    freq_hours=frequency_hour,\n",
+    "                    num_of_indices=number_of_hours)\n",
     "base_data_df = input_df[RELEVANT_FEATURES]\n",
     "base_data_df.head(2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
     "predict_start_datetime = datetime(year=2022, month=4, day=30, hour=13, minute=0, second=0)\n",
     "predict_end_datetime = datetime(year=2022, month=5, day=21, hour=13, minute=0, second=0)\n",
     "\n",
-    "df_out = weather_forecast.inference_historical(\n",
-    "    base_data_df.copy(), start_datetime=predict_start_datetime, end_datetime=predict_end_datetime\n",
-    ")"
+    "df_out = weather_forecast.inference_historical(base_data_df.copy(),\n",
+    "            start_datetime=predict_start_datetime,\n",
+    "            end_datetime=predict_end_datetime\n",
+    "            )"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -1330,22 +1378,20 @@
     }
    ],
    "source": [
-    "base_data_df = base_data_df[\n",
-    "    (base_data_df.index >= predict_start_datetime) & (base_data_df.index <= predict_end_datetime)\n",
-    "]\n",
+    "base_data_df = base_data_df[(base_data_df.index >= predict_start_datetime) & (base_data_df.index <= predict_end_datetime)]\n",
     "\n",
     "for predict in OUT_FEATURES:\n",
     "    plt.figure(figsize=(18, 6))\n",
-    "    plt.plot(df_out[\"date\"].values, utils.smooth(df_out[predict].values, 2), label=\"Prediction\")\n",
+    "    plt.plot(df_out[\"date\"].values, utils.smooth(df_out[predict].values, 2), label=\"Predict\")\n",
     "    plt.plot(base_data_df.index.values, base_data_df[predict].values, label=\"Ground Truth\")\n",
-    "\n",
+    "    # plt.plot(base_data_df.index.values, base_data_df[predict+\"_forecast\"].values, label=\"Forecast\")\n",
     "    plt.title(f\"24 Models {predict} Ground Truth Vs Predict\")\n",
     "    plt.legend()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
@@ -1365,28 +1411,31 @@
     }
    ],
    "source": [
+    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
+    "import math\n",
+    "\n",
+    "def calculate_KPI(y, yhat):\n",
+    "    print(\"RMSE: {}\".format(round(mean_squared_error(y,yhat,squared=False),2)))\n",
+    "    print(\"MAE: {}\".format(round(mean_absolute_error(y,yhat),2)))\n",
+    "    print(\"MAE%: {}%\".format(round(100*sum(abs(y-yhat))/sum(y),2)))\n",
+    "\n",
     "print(\"temperature\")\n",
-    "utils.calculate_KPI(\n",
-    "    utils.smooth(list(df_out[\"temperature\"].values), 1),\n",
-    "    np.array(base_data_df[\"temperature\"].values),\n",
-    ")\n",
+    "calculate_KPI(utils.smooth(df_out[\"temperature\"].values, 1),base_data_df[\"temperature\"].values)\n",
     "\n",
     "print(\"\\n\", \"wind_speed\")\n",
-    "utils.calculate_KPI(\n",
-    "    utils.smooth(list(df_out[\"wind_speed\"].values), 1), np.array(base_data_df[\"wind_speed\"].values)\n",
-    ")"
+    "calculate_KPI(utils.smooth(df_out[\"wind_speed\"].values, 1),base_data_df[\"wind_speed\"].values)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Training model using non-relevant dataset or without forecast observations"
+    "### Training model using not relevant dataset or without forecast observations"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1396,7 +1445,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1406,8 +1455,7 @@
     "    root_path=ROOT_PATH,\n",
     "    data_export_path=DATA_EXPORT_PATH,\n",
     "    station_name=STATION_NAME,\n",
-    "    relevant=False,\n",
-    ")\n",
+    "    relevant=False)\n",
     "\n",
     "train_weather.train_model(historical_df, start=0, epochs=1)"
    ]
@@ -1422,16 +1470,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
     "weather_forecast = prediction.InferenceWeather(\n",
-    "    root_path=ROOT_PATH,\n",
-    "    data_export_path=DATA_EXPORT_PATH,\n",
-    "    station_name=STATION_NAME,\n",
-    "    predicts=OUT_FEATURES,\n",
-    ")"
+    "                        root_path=ROOT_PATH,\n",
+    "                        data_export_path=DATA_EXPORT_PATH,\n",
+    "                        station_name=STATION_NAME,\n",
+    "                        predicts=OUT_FEATURES)"
    ]
   },
   {
@@ -1444,7 +1491,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1454,7 +1501,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1464,12 +1511,14 @@
     "\n",
     "df_output_merge = pd.DataFrame(columns=base_data_df.columns)\n",
     "\n",
-    "df_out = weather_forecast.inference(base_data_df, start_datetime=predict_start_datetime)"
+    "df_out = weather_forecast.inference(base_data_df,\n",
+    "            start_datetime=predict_start_datetime\n",
+    "            )"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -1511,7 +1560,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1522,14 +1571,15 @@
     "predict_start_datetime = datetime(year=2022, month=4, day=30, hour=13, minute=0, second=0)\n",
     "predict_end_datetime = datetime(year=2022, month=5, day=21, hour=13, minute=0, second=0)\n",
     "\n",
-    "df_out = weather_forecast.inference_historical(\n",
-    "    base_data_df, start_datetime=predict_start_datetime, end_datetime=predict_end_datetime\n",
-    ")"
+    "df_out = weather_forecast.inference_historical(base_data_df,\n",
+    "            start_datetime=predict_start_datetime,\n",
+    "            end_datetime=predict_end_datetime\n",
+    "            )"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -1554,14 +1604,12 @@
     }
    ],
    "source": [
-    "base_data_df = base_data_df[\n",
-    "    (base_data_df.index >= predict_start_datetime) & (base_data_df.index <= predict_end_datetime)\n",
-    "]\n",
+    "base_data_df = base_data_df[(base_data_df.index >= predict_start_datetime) & (base_data_df.index <= predict_end_datetime)]\n",
     "for predict in OUT_FEATURES:\n",
     "    plt.figure(figsize=(20, 5))\n",
     "    plt.plot(df_out[\"date\"].values, df_out[predict].values)\n",
     "    plt.plot(base_data_df.index.values, base_data_df[predict].values)\n",
-    "    plt.title(f\"24 Models {predict} Ground Truth Vs Prediction\")\n",
+    "    plt.title(f\"24 Models {predict} Ground Truth Vs Predict\")\n",
     "    plt.legend([\"Predict\", \"Ground Truth\"])"
    ]
   }
@@ -1584,7 +1632,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.18"
+   "version": "3.8.16"
   },
   "name": "Micro climate prediction",
   "running_time": "",
diff --git a/notebooks/deepmc/notebook_lib/forecast.py b/notebooks/deepmc/notebook_lib/forecast.py
index 993a6617..645b8e8d 100644
--- a/notebooks/deepmc/notebook_lib/forecast.py
+++ b/notebooks/deepmc/notebook_lib/forecast.py
@@ -1,3 +1,4 @@
+import time
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Tuple, cast
 
@@ -7,7 +8,7 @@
 from shapely.geometry import Point
 
 from vibe_core.client import FarmvibesAiClient, get_default_vibe_client
-from vibe_core.datamodel import RunConfig, RunConfigUser, SpatioTemporalJson
+from vibe_core.datamodel import RunConfig, RunConfigUser, RunDetails, SpatioTemporalJson
 
 
 class Forecast:
@@ -30,8 +31,7 @@ def submit_download_request(self):
         """
         Submit request to worker to download forecast data
         """
-        run_metadata_list = []
-        runs = []
+        run_list = []
         for parameter in self.parameters:
             run_name = f"forecast_{parameter['weather_type']}"
             run = self.client.run(
@@ -42,40 +42,57 @@ def submit_download_request(self):
                 parameters=parameter,
             )
 
-            run_metadata_list.append(
+            try:
+                run.block_until_complete(5)
+            except RuntimeError:
+                print(run)
+
+            run_list.append(
                 {
                     "id": run.id,
                     "weather_type": parameter["weather_type"],
                 }
             )
-            runs.append(run)
-
-        self.client.monitor(runs, 5)
 
-        return run_metadata_list
+        return run_list
 
     def get_run_status(self, run_list: List[Dict[str, str]]):
         clear_output(wait=True)
-        out = []
+        all_done = True
+        out_ = []
         for run_item in run_list:
             o = self.client.describe_run(run_item["id"])
             print(f"Execution status for {run_item['weather_type']}: {o.details.status}")
 
             if o.details.status == "done":
-                out.append(o)
+                out_.append(o)
+            elif o.details.status == "failed":
+                print(o.details)
             else:
-                raise Exception(
-                    f"Execution status for {run_item['weather_type']}: {o.details.status}"
-                )
-
-        return out
+                all_done = False
+                cnt_complete = 0
+                for key, value in o.task_details.items():
+                    value = cast(RunDetails, value)
+                    assert value.subtasks is not None, "Subtasks don't exist"
+                    for subtask in value.subtasks:
+                        if subtask.status == "done":
+                            cnt_complete += 1
+                    print(
+                        "\t",
+                        f"Subtask {key}",
+                        cnt_complete,
+                        "/",
+                        len(value.subtasks),
+                    )
+                    cnt_complete = 0
+        return all_done, out_
 
     def get_all_assets(self, details: RunConfigUser):
         asset_files = []
         output = details.output["weather_forecast"]
         record: Dict[str, Any]
         for record in cast(List[Dict[str, Any]], output):
-            for value in record["assets"].values():
+            for _, value in record["assets"].items():
                 asset_files.append(value["href"])
         df_assets = [pd.read_csv(f, index_col=False) for f in asset_files]
         df_out = pd.concat(df_assets)
@@ -87,15 +104,21 @@ def get_downloaded_data(self, run_list: List[Dict[str, str]], offset_hours: int
         check the download status. If status is done, fetch the downloaded data
         """
         forecast_dataset = pd.DataFrame()
-        out = self.get_run_status(run_list)
-        for detail in out:
-            df = self.get_all_assets(detail)
+        status = False
+        out_ = []
+        while status is False:
+            status, out_ = self.get_run_status(run_list)
+            time.sleep(10)
+
+        if status:
+            for detail in out_:
+                df = self.get_all_assets(detail)
 
-            # Offset from UTC to specified timezone
-            df.index = df.index + pd.offsets.Hour(offset_hours)
+                # Offset from UTC to specified timezone
+                df.index = df.index + pd.offsets.Hour(offset_hours)
 
-            if not df.empty:
-                forecast_dataset = pd.concat([forecast_dataset, df], axis=1)
+                if not df.empty:
+                    forecast_dataset = pd.concat([forecast_dataset, df], axis=1)
 
         return forecast_dataset
 
diff --git a/notebooks/deepmc/notebook_lib/modules.py b/notebooks/deepmc/notebook_lib/modules.py
index 9be52ab4..5fbfe012 100644
--- a/notebooks/deepmc/notebook_lib/modules.py
+++ b/notebooks/deepmc/notebook_lib/modules.py
@@ -59,14 +59,14 @@ def training_step(self, train_batch: Tensor, _):
         x, y = train_batch[:6], train_batch[6]
         y_hat = self.deepmc(x)
         loss = self.loss(y_hat, y)
-        self.log("train_loss/total", loss, on_epoch=True, prog_bar=True, logger=True, on_step=True)
+        self.log("train_loss/total", loss)
         return loss
 
     def validation_step(self, validation_batch: Tensor, _):
         x, y = validation_batch[:6], validation_batch[6]
         y_hat = self.deepmc(x)
         loss = self.loss(y_hat, y)
-        self.log("val_loss/total", loss, on_epoch=True, prog_bar=True, logger=True, on_step=True)
+        self.log("val_loss/total", loss, on_epoch=True)
         return loss
 
 
diff --git a/notebooks/deepmc/notebook_lib/post_models.py b/notebooks/deepmc/notebook_lib/post_models.py
new file mode 100644
index 00000000..224be6fd
--- /dev/null
+++ b/notebooks/deepmc/notebook_lib/post_models.py
@@ -0,0 +1,34 @@
+from keras.layers import BatchNormalization, Dense, Input
+from keras.models import Sequential
+from keras.utils.vis_utils import plot_model
+
+
+def simple_mixture_model(inshape: int):
+    model = Sequential()
+    model.add(Input(shape=(inshape,)))
+
+    model.add(Dense(inshape * 2, activation="relu"))
+    model.add(BatchNormalization())
+    model.add(Dense(inshape * 4, activation="relu"))
+    model.add(BatchNormalization())
+    model.add(Dense(inshape))
+
+    model.compile(loss="mae", optimizer="adam")
+    return model
+
+
+def fit_model(model, train_X, train_y, test_X, test_y, batch_size: int):
+    batch_size = batch_size
+    validation_data = (test_X, test_y)
+
+    # fit network
+    history = model.fit(
+        train_X,
+        train_y,
+        epochs=20,
+        batch_size=batch_size,
+        validation_data=validation_data,
+        verbose=1,
+    )
+
+    return model, history
diff --git a/src/vibe_notebook/vibe_notebook/deepmc/prediction.py b/notebooks/deepmc/notebook_lib/prediction.py
similarity index 89%
rename from src/vibe_notebook/vibe_notebook/deepmc/prediction.py
rename to notebooks/deepmc/notebook_lib/prediction.py
index 9180ae10..fad17778 100644
--- a/src/vibe_notebook/vibe_notebook/deepmc/prediction.py
+++ b/notebooks/deepmc/notebook_lib/prediction.py
@@ -1,6 +1,3 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
 import os
 import pickle
 from datetime import datetime, timedelta
@@ -9,9 +6,9 @@
 import numpy as np
 import onnxruntime
 import pandas as pd
-from numpy.typing import NDArray
+from numpy._typing import NDArray
 
-from vibe_notebook.deepmc.preprocess import Preprocess
+from .preprocess import Preprocess
 
 MODEL_SUFFIX = "deepmc."
 
@@ -170,7 +167,7 @@ def run_individual_predict(
             relevant=self.relevant,
         )
 
-        test_X, _, _ = preprocess.wavelet_transform_predict(df_in=df_in, predict=predict)
+        test_X = preprocess.wavelet_transform_predict(df_in=df_in, predict=predict)
         time_arr = []
         post_yhat = np.empty([1, self.ts_lookahead, self.ts_lookahead])
         for idx in range(0, self.total_models):
@@ -254,7 +251,7 @@ def run_individual_predict_historical(
         )
 
         inshape = self.total_models
-        test_X, _, _ = preprocess.wavelet_transform_predict(df_in=df_in, predict=predict)
+        test_X = preprocess.wavelet_transform_predict(df_in=df_in, predict=predict)
         post_yhat = np.empty([test_X[0].shape[0] + 1 - inshape, inshape, self.total_models])
         for idx in range(0, self.total_models):
             out_x = self.predict(path=self.onnx_file, predict=predict, model_idx=idx, inputs=test_X)
@@ -282,24 +279,3 @@ def run_individual_predict_historical(
         yhat_final = output_scaler.inverse_transform(np.expand_dims(yhat_final, axis=1))[:, 0]
         df_predict = pd.DataFrame(data=list(zip(df_out, yhat_final)), columns=["date", predict])
         return df_predict
-
-    def deepmc_preprocess(self, df_in: pd.DataFrame, predict: str):
-        with open(self.data_export_path, "rb") as f:
-            train_scaler, output_scaler = pickle.load(f)[4:6]
-
-        preprocess = Preprocess(
-            train_scaler=train_scaler,
-            output_scaler=output_scaler,
-            is_training=False,
-            ts_lookahead=self.ts_lookahead,
-            ts_lookback=self.ts_lookback,
-            chunk_size=self.chunk_size,
-            wavelet=self.wavelet,
-            mode=self.mode,
-            level=self.level,
-            relevant=self.relevant,
-        )
-
-        test_x, test_x_dates, _ = preprocess.wavelet_transform_predict(df_in=df_in, predict=predict)
-
-        return test_x, test_x_dates, train_scaler, output_scaler
diff --git a/src/vibe_notebook/vibe_notebook/deepmc/preprocess.py b/notebooks/deepmc/notebook_lib/preprocess.py
similarity index 64%
rename from src/vibe_notebook/vibe_notebook/deepmc/preprocess.py
rename to notebooks/deepmc/notebook_lib/preprocess.py
index 67c4e043..c8d81b93 100644
--- a/src/vibe_notebook/vibe_notebook/deepmc/preprocess.py
+++ b/notebooks/deepmc/notebook_lib/preprocess.py
@@ -1,14 +1,10 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-from datetime import timedelta
 from math import ceil
-from typing import Any, List, Optional, Tuple
+from typing import Any, Optional, Tuple
 
+from numpy._typing import NDArray
 import numpy as np
 import pandas as pd
 import pywt
-from numpy.typing import NDArray
 from sklearn.preprocessing import StandardScaler
 
 
@@ -39,15 +35,11 @@ def __init__(
         self.is_validation = is_validation
         self.relevant = relevant
 
-    def wavelet_transform_predict(
-        self, df_in: pd.DataFrame, predict: str
-    ) -> Tuple[NDArray[Any], List[Any], List[Any]]:
+    def wavelet_transform_predict(self, df_in: pd.DataFrame, predict: str) -> NDArray[Any]:
         i = 1
         start = i
         end = start
         t_test_X = []
-        t_x_dates = []
-        t_y_dates = []
 
         test_df = pd.DataFrame(
             self.train_scaler.transform(df_in), columns=df_in.columns, index=df_in.index
@@ -60,13 +52,9 @@ def wavelet_transform_predict(
             i = i + 1
             chunkdataDF = test_df.iloc[start:end]
 
-            test_uX, _, test_x_dates, test_y_dates = self.convert_df_wavelet_input(
-                data_df=chunkdataDF, predict=predict
-            )
+            test_uX, _ = self.convert_df_wavelet_input(data_df=chunkdataDF, predict=predict)
 
             t_test_X.append(test_uX)
-            t_x_dates.append(test_x_dates)
-            t_y_dates.append(test_y_dates)
 
         test_X = t_test_X[0].copy()
 
@@ -74,51 +62,30 @@ def wavelet_transform_predict(
             for j in range(len(t_test_X[i])):
                 test_X[j] = np.append(test_X[j], t_test_X[i][j], axis=0)
 
-        return test_X, t_x_dates, t_y_dates
+        return test_X
 
     def wavelet_transform_train(
         self, train_df: pd.DataFrame, test_df: pd.DataFrame, out_feature: str
     ) -> Tuple[NDArray[Any], ...]:
-        t_train_X, t_train_y, t_train_X_dates, t_train_y_dates = self.prepare_wavelet_data(
-            train_df, out_feature=out_feature
-        )
+        t_train_X, t_train_y = self.prepare_wavelet_data(train_df, out_feature=out_feature)
 
-        t_test_X, t_test_y, t_test_X_dates, t_test_y_dates = self.prepare_wavelet_data(
-            test_df, out_feature=out_feature
-        )
+        t_test_X, t_test_y = self.prepare_wavelet_data(test_df, out_feature=out_feature)
 
         train_X = t_train_X[0].copy()
         train_y = t_train_y[0].copy()
-        train_dates_X = t_train_X_dates[0][0].copy()
-        train_dates_y = t_train_y_dates[0].copy()
-        for i in range(len(t_train_X)):
+        for i in range(1, len(t_train_X)):
             train_y = np.append(train_y, t_train_y[i], axis=0)
-            train_dates_X = np.append(train_dates_X, t_train_X_dates[i][0], axis=0)
-            train_dates_y = np.append(train_dates_y, t_train_y_dates[i], axis=0)
             for j in range(len(t_train_X[i])):
                 train_X[j] = np.append(train_X[j], t_train_X[i][j], axis=0)
 
         test_X = t_test_X[0].copy()
         test_y = t_test_y[0].copy()
-        test_dates_X = t_test_X_dates[0][0].copy()
-        test_dates_y = t_test_y_dates[0].copy()
         for i in range(1, len(t_test_X)):
             test_y = np.append(test_y, t_test_y[i], axis=0)
-            test_dates_X = np.append(test_dates_X, t_test_X_dates[i][0], axis=0)
-            test_dates_y = np.append(test_dates_y, t_test_y_dates[i], axis=0)
             for j in range(len(t_test_X[i])):
                 test_X[j] = np.append(test_X[j], t_test_X[i][j], axis=0)
 
-        return (
-            train_X,
-            train_y,
-            test_X,
-            test_y,
-            train_dates_X,
-            train_dates_y,
-            test_dates_X,
-            test_dates_y,
-        )
+        return train_X, train_y, test_X, test_y
 
     def prepare_wavelet_data(self, data_df: pd.DataFrame, out_feature: str):
         i = 0
@@ -126,8 +93,6 @@ def prepare_wavelet_data(self, data_df: pd.DataFrame, out_feature: str):
         end = start
         t_data_x = []
         t_data_y = []
-        t_dates_x = []
-        t_dates_y = []
 
         while end < data_df.shape[0]:
             start = i
@@ -135,16 +100,14 @@ def prepare_wavelet_data(self, data_df: pd.DataFrame, out_feature: str):
             i = i + 1
             o_data_df = data_df.iloc[start:end]
 
-            data_ux, data_uy, data_ux_dates, data_uy_dates = self.convert_df_wavelet_input(
+            data_ux, data_uy = self.convert_df_wavelet_input(
                 o_data_df,
                 predict=out_feature,
             )
             t_data_x.append(data_ux)
             t_data_y.append(data_uy)
-            t_dates_x.append(data_ux_dates)
-            t_dates_y.append(data_uy_dates)
 
-        return t_data_x, t_data_y, t_dates_x, t_dates_y
+        return t_data_x, t_data_y
 
     def dl_preprocess_data(
         self,
@@ -152,7 +115,7 @@ def dl_preprocess_data(
         predict: str,
         per_split: float = 0.8,
         training: bool = False,
-    ) -> Tuple[NDArray, Optional[NDArray], Optional[NDArray], Optional[NDArray], Optional[NDArray]]:  # type: ignore
+    ) -> Tuple[NDArray, Optional[NDArray], Optional[NDArray], Optional[NDArray]]:  # type: ignore
         """
         merge chunk of data as single entity
         Args:
@@ -177,7 +140,7 @@ def dl_preprocess_data(
             label_data = label_df.values
 
             # label_data = label_df.values
-            X, y, dates = list(), list(), list()
+            X, y = list(), list()
             in_start = 0
 
             # step over the entire history one time step at a time
@@ -190,37 +153,30 @@ def dl_preprocess_data(
                 if out_end <= len(data):
                     X.append(data[in_start:in_end, :])
                     y.append(label_data[in_end:out_end, :])
-                    dates.append(df.index[in_end:out_end].strftime("%Y-%m-%d %H:%M:%S").values)
                 # move along one time step
                 in_start += 1
 
             X = np.array(X)
             y = np.array(y)
-            dates = np.array(dates)
 
             if self.is_validation is True:
                 n_train_split = ceil(len(data) * per_split)
                 train_X, train_y = X[:n_train_split, :, :], y[:n_train_split, :, :]
                 test_X, test_y = X[n_train_split:, :], y[n_train_split:, :]
 
-                return train_X, train_y, test_X, test_y, dates
+                return train_X, train_y, test_X, test_y
             else:
-                return X, y, None, None, dates
+                return X, y, None, None
         else:
-            X, dates = list(), list()
+            X = list()
             in_start = 0
             for _ in range(len(data) - n_in + 1):
                 in_end = in_start + n_in
                 if in_end <= len(data):
                     X.append(data[in_start:in_end, :])
-                    # shift dates by lookahead to match it with the y
-                    dates.append(
-                        [t + timedelta(hours=self.ts_lookback) for t in df.index[in_start:in_end]]
-                    )
                 in_start += 1
             X = np.array(X)
-            dates = np.array(dates)
-        return X, None, None, None, dates
+        return X, None, None, None
 
     def convert_df_wavelet_input(self, data_df: pd.DataFrame, predict: str):
         if self.relevant:
@@ -232,66 +188,59 @@ def convert_df_wavelet_input_not_relevant(self, data_df: pd.DataFrame, predict:
         level = self.level
         rd = list()
         N = data_df.shape[0]
-        test_X, test_X_dates, test_y_dates, test_y = list(), list(), list(), list()
+        test_X = list()
 
         if self.is_training:
-            (_, test_y, _, _, test_y_dates) = self.dl_preprocess_data(
+            test_y = self.dl_preprocess_data(
                 data_df.iloc[-self.ts_lookback - self.ts_lookahead :],
                 predict=predict,
                 training=self.is_training,
-            )
+            )[1]
 
             assert test_y is not None
             test_y = test_y[[-1], :, :]
-            dates = test_y_dates[[-1], :]
 
             data_df = data_df.iloc[: -self.ts_lookahead]
+        else:
+            test_y = []
 
         wp5 = pywt.wavedec(data=data_df[predict], wavelet=self.wavelet, mode=self.mode, level=level)
         N = data_df.shape[0]
         for i in range(1, level + 1):
             rd.append(pywt.waverec(wp5[:-i] + [None] * i, wavelet=self.wavelet, mode=self.mode)[:N])
 
-        (t_test_X, _, _, _, t_test_X_dates) = self.dl_preprocess_data(
-            data_df.iloc[-self.ts_lookback :], predict=predict
-        )
+        t_test_X = self.dl_preprocess_data(data_df.iloc[-self.ts_lookback :], predict=predict)[0]
 
         test_X.append(t_test_X[[-1], :, :])
-        test_X_dates.append(t_test_X_dates[[-1], :])
         wpt_df = data_df[[]].copy()
 
         for i in range(0, level):
             wpt_df[predict] = rd[i][:]
 
-            (t_test_X, _, _, _, t_test_X_dates) = self.dl_preprocess_data(
-                wpt_df.iloc[-self.ts_lookback :], predict=predict
-            )
+            t_test_X = self.dl_preprocess_data(wpt_df.iloc[-self.ts_lookback :], predict=predict)[0]
 
             test_X.append(t_test_X[[-1], :, :])
-            test_X_dates.append(t_test_X_dates)
 
-        return test_X, test_y, test_X_dates, test_y_dates
+        return test_X, test_y
 
     def convert_df_wavelet_input_relevant(self, data_df: pd.DataFrame, predict: str):
         rd = list()
         test_X = list()
-        test_X, test_X_dates, test_y_dates, test_y = list(), list(), list(), list()
 
         if self.is_training:
-            (_, test_y, _, _, test_y_dates) = self.dl_preprocess_data(
+            test_y = self.dl_preprocess_data(
                 data_df.iloc[-self.ts_lookback - self.ts_lookahead :],
                 predict=predict,
                 training=self.is_training,
-            )
+            )[1]
 
             assert test_y is not None
             test_y = test_y[[-1], :, :]
-            test_y_dates = test_y_dates[[-1], :]
+        else:
+            test_y = []
 
         data_df = data_df.iloc[: -self.ts_lookahead]
-        (t_test_X, _, _, _, t_test_X_dates) = self.dl_preprocess_data(
-            data_df.iloc[-self.ts_lookback :], predict=predict
-        )
+        t_test_X = self.dl_preprocess_data(data_df.iloc[-self.ts_lookback :], predict=predict)[0]
 
         data = data_df[predict]
         data = data.append(data_df[predict + "_forecast"].iloc[-self.ts_lookback :]).values
@@ -304,17 +253,13 @@ def convert_df_wavelet_input_relevant(self, data_df: pd.DataFrame, predict: str)
             )
 
         test_X.append(t_test_X[[-1], :, :])
-        test_X_dates.append(t_test_X_dates[[-1], :])
         wpt_df = data_df[[]].copy()
 
         for i in range(0, self.level):
             wpt_df[predict] = rd[i]
 
-            (t_test_X, _, _, _, t_test_X_dates) = self.dl_preprocess_data(
-                wpt_df.iloc[-self.ts_lookback :], predict=predict
-            )
+            t_test_X = self.dl_preprocess_data(wpt_df.iloc[-self.ts_lookback :], predict=predict)[0]
 
             test_X.append(t_test_X[[-1], :, :])
-            test_X_dates.append(t_test_X_dates)
 
-        return test_X, test_y, test_X_dates, test_y_dates
+        return test_X, test_y
diff --git a/notebooks/deepmc/notebook_lib/train.py b/notebooks/deepmc/notebook_lib/train.py
index 4b9fa8b7..6a6f2242 100644
--- a/notebooks/deepmc/notebook_lib/train.py
+++ b/notebooks/deepmc/notebook_lib/train.py
@@ -14,8 +14,8 @@
 from torch import Tensor
 from torch.utils.data import DataLoader, TensorDataset
 
-from vibe_notebook.deepmc import utils
-from vibe_notebook.deepmc.preprocess import Preprocess
+from . import utils
+from .preprocess import Preprocess
 
 MODEL_SUFFIX = "deepmc."
 
@@ -35,7 +35,7 @@ def __init__(
         wavelet: str = "bior3.5",
         mode: str = "periodic",
         level: int = 5,
-        batch_size: int = 24,
+        batch_size: int = 256,
         relevant: bool = False,
     ):
         if relevant:
@@ -67,7 +67,6 @@ def train_model(
         start: int = 0,
         end: int = -1,
         epochs: int = 20,
-        reset_preprocess: bool = False,
     ):
         end = self.total_models if end == -1 else end
 
@@ -81,22 +80,12 @@ def train_model(
             input_order_df[out_feature] = out_feature_df
 
             # data preprocessing
-            (
-                train_scaler,
-                output_scaler,
-                train_df,
-                test_df,
-            ) = utils.get_split_scaled_data(
+            (train_scaler, output_scaler, train_df, test_df,) = utils.get_split_scaled_data(
                 data=input_order_df, out_feature=out_feature, split_ratio=0.92
             )
-            if reset_preprocess and os.path.exists(
-                self.data_export_path % (out_feature, self.relevant_text)
-            ):
-                os.remove(self.data_export_path % (out_feature, self.relevant_text))
 
             if os.path.exists(self.data_export_path % (out_feature, self.relevant_text)):
-                exp_path = self.data_export_path.replace("train_data.pkl", "train_data_dates.pkl")
-                with open(exp_path % (out_feature, self.relevant_text), "rb") as f:
+                with open(self.data_export_path % (out_feature, self.relevant_text), "rb") as f:
                     (
                         train_X,
                         train_y,
@@ -104,10 +93,6 @@ def train_model(
                         test_y,
                         train_scaler,
                         output_scaler,
-                        train_dates_X,
-                        train_dates_y,
-                        test_dates_X,
-                        test_dates_y,
                     ) = pickle.load(f)
 
                 self.preprocess = Preprocess(
@@ -143,10 +128,6 @@ def train_model(
                     train_y,
                     test_X,
                     test_y,
-                    train_dates_X,
-                    train_dates_y,
-                    test_dates_X,
-                    test_dates_y,
                 ) = self.preprocess.wavelet_transform_train(train_df, test_df, out_feature)
 
                 with open(self.data_export_path % (out_feature, self.relevant_text), "wb") as f:
@@ -155,25 +136,6 @@ def train_model(
                         f,
                     )
 
-                exp_path = self.data_export_path.replace("train_data.pkl", "train_data_dates.pkl")
-
-                with open(exp_path % (out_feature, self.relevant_text), "wb") as f:
-                    pickle.dump(
-                        [
-                            train_X,
-                            train_y,
-                            test_X,
-                            test_y,
-                            train_scaler,
-                            output_scaler,
-                            train_dates_X,
-                            train_dates_y,
-                            test_dates_X,
-                            test_dates_y,
-                        ],
-                        f,
-                    )
-
             self.train_models(
                 train_X=train_X,  # type: ignore
                 train_y=train_y,  # type: ignore
@@ -183,8 +145,6 @@ def train_model(
                 out_feature=out_feature,
                 start=start,
                 end=end,
-                train_dates_y=train_dates_y,  # type: ignore
-                test_dates_y=test_dates_y,  # type: ignore
             )
 
     def train_models(
@@ -197,8 +157,6 @@ def train_models(
         out_feature: str,
         start: int,
         end: int,
-        train_dates_y: List[str],
-        test_dates_y: List[str],
     ):
         first_channels = train_X[0].shape[2]
         rest_channels = train_X[1].shape[2]
@@ -251,6 +209,7 @@ def train_models(
                         dirpath=model_path,
                     ),
                 ],
+                num_processes=1,
             )
 
             t_obj.fit(m, train_loader, val_loader)
@@ -266,8 +225,6 @@ def train_models(
                 out_feature=out_feature,
                 model_index=i,
                 epochs=epochs,
-                train_dates_y=train_dates_y,
-                test_dates_y=test_dates_y,
             )
 
     def export_to_onnx(
@@ -292,24 +249,19 @@ def export_to_onnx(
         )
 
     def get_dataloader(
-        self,
-        gt: NDArray[Any],
-        target: NDArray[Any],
-        o_feature: str,
-        dates_mapped: NDArray[Any],
+        self, gt: NDArray[Any], target: NDArray[Any], o_feature: str
     ) -> Tuple[DataLoader[Any], List[Tensor]]:
-        dates_mapped = pd.to_datetime(dates_mapped, format="%Y-%m-%d %H:%M:%S").values
-        df = pd.DataFrame(list(zip(gt, dates_mapped)), columns=["data", "date"])
-        df.set_index("date", inplace=True)
-        o_x = self.preprocess.dl_preprocess_data(df, o_feature)[0][:, :, 0].astype(np.float32)
+        o_x = self.preprocess.dl_preprocess_data(pd.DataFrame(gt), o_feature)[0][:, :, 0].astype(
+            np.float32
+        )
 
-        df = pd.DataFrame(list(zip(target, dates_mapped)), columns=["data", "date"])
-        df.set_index("date", inplace=True)
-        o_y = self.preprocess.dl_preprocess_data(df, o_feature)[0][:, :, 0].astype(np.float32)
+        o_y = self.preprocess.dl_preprocess_data(pd.DataFrame(target), o_feature)[0][
+            :, :, 0
+        ].astype(np.float32)
 
         o_inputs = [torch.from_numpy(x.astype(np.float32)) for x in (o_x, o_y)]
         o_dataset = TensorDataset(*o_inputs)
-        o_loader = DataLoader(o_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True)
+        o_loader = DataLoader(o_dataset, batch_size=self.batch_size, shuffle=True)
         return o_loader, o_inputs
 
     def post_model(
@@ -322,8 +274,6 @@ def post_model(
         out_feature: str,
         model_index: int,
         epochs: int,
-        train_dates_y: List[str],
-        test_dates_y: List[str],
     ):
         m.eval()
 
@@ -338,17 +288,11 @@ def xf(a: List[NDArray[Any]]) -> List[Tensor]:
             os.mkdir(post_model_path)
 
         train_dataloader, _ = self.get_dataloader(
-            gt=train_y[:, model_index, 0],  # type: ignore
-            target=train_yhat,
-            o_feature=out_feature,
-            dates_mapped=train_dates_y[:, model_index],  # type: ignore
+            gt=train_y[:, model_index, 0], target=train_yhat, o_feature=out_feature  # type: ignore
         )
 
-        val_dataloader, _ = self.get_dataloader(
-            gt=test_y[:, model_index, 0],  # type: ignore
-            target=test_yhat,
-            o_feature=out_feature,
-            dates_mapped=test_dates_y[:, model_index],  # type: ignore
+        val_dataloader, val_inputs = self.get_dataloader(
+            gt=test_y[:, model_index, 0], target=test_yhat, o_feature=out_feature  # type: ignore
         )
 
         p_m = DeepMCPostTrain(first_in_features=self.total_models)
@@ -364,113 +308,9 @@ def xf(a: List[NDArray[Any]]) -> List[Tensor]:
                     dirpath=post_model_path,
                 ),
             ],
+            num_processes=1,
         )
 
         t_obj.fit(p_m, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)
 
         self.export_to_onnx(file_path=post_model_path, model=p_m.deepmc, inputs=torch.rand((1, 24)))
-
-    def preprocess_data(
-        self,
-        input_df: pd.DataFrame,
-        out_path: str,
-        start: int = 0,
-        end: int = -1,
-        epochs: int = 20,
-        reset_preprocess: bool = False,
-    ):
-        end = self.total_models if end == -1 else end
-
-        for out_feature in self.out_features:
-            if not os.path.exists(self.path_to_station % out_feature):
-                os.makedirs(self.path_to_station % out_feature, exist_ok=True)
-
-            input_order_df = input_df[self.train_features].copy()
-            out_feature_df = input_order_df[out_feature]
-            input_order_df.drop(columns=[out_feature], inplace=True)
-            input_order_df[out_feature] = out_feature_df
-
-            # data preprocessing
-            (
-                train_scaler,
-                output_scaler,
-                train_df,
-                test_df,
-            ) = utils.get_split_scaled_data(
-                data=input_order_df, out_feature=out_feature, split_ratio=0.92
-            )
-            if reset_preprocess and os.path.exists(
-                self.data_export_path % (out_feature, self.relevant_text)
-            ):
-                os.remove(self.data_export_path % (out_feature, self.relevant_text))
-
-            if os.path.exists(self.data_export_path % (out_feature, self.relevant_text)):
-                with open(self.data_export_path % (out_feature, self.relevant_text), "rb") as f:
-                    (
-                        train_X,
-                        train_y,
-                        test_X,
-                        test_y,
-                        train_scaler,
-                        output_scaler,
-                    ) = pickle.load(f)
-
-                self.preprocess = Preprocess(
-                    train_scaler=train_scaler,
-                    output_scaler=output_scaler,
-                    is_training=True,
-                    is_validation=self.is_validation,
-                    ts_lookahead=self.ts_lookahead,
-                    ts_lookback=self.ts_lookback,
-                    chunk_size=self.chunk_size,
-                    wavelet=self.wavelet,
-                    mode=self.mode,
-                    level=self.level,
-                    relevant=self.relevant,
-                )
-            else:
-                self.preprocess = Preprocess(
-                    train_scaler=train_scaler,
-                    output_scaler=output_scaler,
-                    is_training=True,
-                    is_validation=self.is_validation,
-                    ts_lookahead=self.ts_lookahead,
-                    ts_lookback=self.ts_lookback,
-                    chunk_size=self.chunk_size,
-                    wavelet=self.wavelet,
-                    mode=self.mode,
-                    level=self.level,
-                    relevant=self.relevant,
-                )
-
-                (
-                    train_X,
-                    train_y,
-                    test_X,
-                    test_y,
-                    train_dates,
-                    test_dates,
-                ) = self.preprocess.wavelet_transform_train(train_df, test_df, out_feature)
-
-                with open(self.data_export_path % (out_feature, self.relevant_text), "wb") as f:
-                    pickle.dump(
-                        [train_X, train_y, test_X, test_y, train_scaler, output_scaler],
-                        f,
-                    )
-
-                exp_path = self.data_export_path.replace("train_data.pkl", "train_data_dates.pkl")
-
-                with open(exp_path % (out_feature, self.relevant_text), "wb") as f:
-                    pickle.dump(
-                        [
-                            train_X,
-                            train_y,
-                            test_X,
-                            test_y,
-                            train_scaler,
-                            output_scaler,
-                            train_dates,
-                            test_dates,
-                        ],
-                        f,
-                    )
diff --git a/notebooks/deepmc/notebook_lib/transformer_models_ts.py b/notebooks/deepmc/notebook_lib/transformer_models_ts.py
new file mode 100644
index 00000000..ba55aaca
--- /dev/null
+++ b/notebooks/deepmc/notebook_lib/transformer_models_ts.py
@@ -0,0 +1,367 @@
+import numpy as np
+import tensorflow as tf
+
+
+def get_angles(pos, i, d_model):
+    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
+    return pos * angle_rates
+
+
+def positional_encoding(position, d_model):
+    angle_rads = get_angles(
+        np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model
+    )
+
+    # apply sin to even indices in the array; 2i
+    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
+
+    # apply cos to odd indices in the array; 2i+1
+    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
+
+    pos_encoding = angle_rads[np.newaxis, ...]
+
+    return tf.cast(pos_encoding, dtype=tf.float32)
+
+
+# create mask for padding, 0 --> 1 (mask)
+def create_padding_mask(seq):
+    seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
+
+    # add extra dimensions to add the padding
+    # to the attention logits.
+    return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)
+
+
+def create_look_ahead_mask(size):
+    mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
+    return mask  # (seq_len, seq_len)
+
+
+def scaled_dot_product_attention(q, k, v, mask):
+    """Calculate the attention weights.
+    q, k, v must have matching leading dimensions.
+    k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.
+    The mask has different shapes depending on its type(padding or look ahead)
+    but it must be broadcastable for addition.
+
+    Args:
+    q: query shape == (..., seq_len_q, depth)
+    k: key shape == (..., seq_len_k, depth)
+    v: value shape == (..., seq_len_v, depth_v)
+    mask: Float tensor with shape broadcastable
+          to (..., seq_len_q, seq_len_k). Defaults to None.
+
+    Returns:
+    output, attention_weights
+    """
+
+    matmul_qk = tf.matmul(q, k, transpose_b=True)  # (..., seq_len_q, seq_len_k)
+
+    # scale matmul_qk
+    dk = tf.cast(tf.shape(k)[-1], tf.float32)
+    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
+
+    # add the mask to the scaled tensor.
+    if mask is not None:
+        scaled_attention_logits += mask * -1e9
+
+    # softmax is normalized on the last axis (seq_len_k) so that the scores
+    # add up to 1.
+    attention_weights = tf.nn.softmax(
+        scaled_attention_logits, axis=-1
+    )  # (..., seq_len_q, seq_len_k)
+
+    output = tf.matmul(attention_weights, v)  # (..., seq_len_q, depth_v)
+
+    return output, attention_weights
+
+
+def print_out(q, k, v):
+    temp_out, temp_attn = scaled_dot_product_attention(q, k, v, None)
+    print("Attention weights are:")
+    print(temp_attn)
+    print("Output is:")
+    print(temp_out)
+
+
+"""
+    - Q (query), K (key) and V (value) are split into multiple heads (num_heads)
+    - each tuple (q, k, v) are fed to scaled_dot_product_attention
+    - all attention outputs are concatenated
+"""
+
+
+class MultiHeadAttention(tf.keras.layers.Layer):
+    def __init__(self, d_model, num_heads):
+        super(MultiHeadAttention, self).__init__()
+        self.num_heads = num_heads
+        self.d_model = d_model
+
+        assert d_model % self.num_heads == 0
+
+        self.depth = d_model // self.num_heads
+
+        self.wq = tf.keras.layers.Dense(d_model)
+        self.wk = tf.keras.layers.Dense(d_model)
+        self.wv = tf.keras.layers.Dense(d_model)
+
+        self.dense = tf.keras.layers.Dense(d_model)
+
+    def split_heads(self, x, batch_size):
+        """Split the last dimension into (num_heads, depth).
+        Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth)
+        """
+        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
+        return tf.transpose(x, perm=[0, 2, 1, 3])
+
+    def call(self, v, k, q, mask):
+        batch_size = tf.shape(q)[0]
+
+        q = self.wq(q)  # (batch_size, seq_len, d_model)
+        k = self.wk(k)  # (batch_size, seq_len, d_model)
+        v = self.wv(v)  # (batch_size, seq_len, d_model)
+
+        q = self.split_heads(q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
+        k = self.split_heads(k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
+        v = self.split_heads(v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)
+
+        scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
+
+        scaled_attention = tf.transpose(
+            scaled_attention, perm=[0, 2, 1, 3]
+        )  # (batch_size, seq_len_q, num_heads, depth)
+
+        concat_attention = tf.reshape(
+            scaled_attention, (batch_size, -1, self.d_model)
+        )  # (batch_size, seq_len_q, d_model)
+
+        output = self.dense(concat_attention)  # (batch_size, seq_len_q, d_model)
+
+        return output, attention_weights
+
+
+def point_wise_feed_forward_network(d_model, dff):
+    return tf.keras.Sequential(
+        [
+            tf.keras.layers.Dense(dff, activation="relu"),  # (batch_size, seq_len, dff)
+            tf.keras.layers.Dense(d_model),  # (batch_size, seq_len, d_model)
+        ]
+    )
+
+
+class EncoderLayer(tf.keras.layers.Layer):
+    def __init__(self, d_model, num_heads, dff, rate=0.1):
+        super(EncoderLayer, self).__init__()
+
+        self.mha = MultiHeadAttention(d_model, num_heads)
+        self.ffn = point_wise_feed_forward_network(d_model, dff)
+
+        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
+        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
+
+        self.dropout1 = tf.keras.layers.Dropout(rate)
+        self.dropout2 = tf.keras.layers.Dropout(rate)
+
+    def call(self, x, training, mask):
+
+        attn_output, _ = self.mha(x, x, x, mask)  # (batch_size, input_seq_len, d_model)
+        attn_output = self.dropout1(attn_output, training=training)
+        out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)
+
+        ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
+        ffn_output = self.dropout2(ffn_output, training=training)
+        out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)
+
+        return out2
+
+
+class DecoderLayer(tf.keras.layers.Layer):
+    def __init__(self, d_model, num_heads, dff, rate=0.1):
+        super(DecoderLayer, self).__init__()
+
+        self.mha1 = MultiHeadAttention(d_model, num_heads)
+        self.mha2 = MultiHeadAttention(d_model, num_heads)
+
+        self.ffn = point_wise_feed_forward_network(d_model, dff)
+
+        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
+        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
+        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
+
+        self.dropout1 = tf.keras.layers.Dropout(rate)
+        self.dropout2 = tf.keras.layers.Dropout(rate)
+        self.dropout3 = tf.keras.layers.Dropout(rate)
+
+    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
+        # enc_output.shape == (batch_size, input_seq_len, d_model)
+
+        attn1, attn_weights_block1 = self.mha1(
+            x, x, x, look_ahead_mask
+        )  # (batch_size, target_seq_len, d_model)
+        attn1 = self.dropout1(attn1, training=training)
+        out1 = self.layernorm1(attn1 + x)
+
+        attn2, attn_weights_block2 = self.mha2(
+            enc_output, enc_output, out1, padding_mask
+        )  # (batch_size, target_seq_len, d_model)
+        attn2 = self.dropout2(attn2, training=training)
+        out2 = self.layernorm2(attn2 + out1)  # (batch_size, target_seq_len, d_model)
+
+        ffn_output = self.ffn(out2)  # (batch_size, target_seq_len, d_model)
+        ffn_output = self.dropout3(ffn_output, training=training)
+        out3 = self.layernorm3(ffn_output + out2)  # (batch_size, target_seq_len, d_model)
+
+        return out3, attn_weights_block1, attn_weights_block2
+
+
+class Encoder(tf.keras.layers.Layer):
+    def __init__(self, num_layers, d_model, num_heads, dff, maximum_position_encoding, rate=0.1):
+        super(Encoder, self).__init__()
+
+        self.d_model = d_model
+        self.num_layers = num_layers
+
+        self.embedding = tf.keras.layers.Dense(d_model, activation="relu")
+        self.pos_encoding = positional_encoding(maximum_position_encoding, self.d_model)
+
+        self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]
+
+        self.dropout = tf.keras.layers.Dropout(rate)
+
+    def call(self, x, training, mask):
+
+        seq_len = tf.shape(x)[1]
+
+        # print("Encoder:", x.shape)
+        # adding embedding and position encoding.
+        x = self.embedding(x)  # (batch_size, input_seq_len, d_model)
+        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
+        x += self.pos_encoding[:, :seq_len, :]
+
+        x = self.dropout(x, training=training)
+
+        for i in range(self.num_layers):
+            x = self.enc_layers[i](x, training, mask)
+
+        return x  # (batch_size, input_seq_len, d_model)
+
+
+class Decoder(tf.keras.layers.Layer):
+    def __init__(self, num_layers, d_model, num_heads, dff, maximum_position_encoding, rate=0.1):
+        super(Decoder, self).__init__()
+
+        self.d_model = d_model
+        self.num_layers = num_layers
+
+        self.embedding = tf.keras.layers.Dense(d_model, activation="relu")
+        self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)
+
+        self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]
+        self.dropout = tf.keras.layers.Dropout(rate)
+
+    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
+
+        seq_len = tf.shape(x)[1]
+        attention_weights = {}
+
+        x = self.embedding(x)  # (batch_size, target_seq_len, d_model)
+        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
+        x += self.pos_encoding[:, :seq_len, :]
+
+        x = self.dropout(x, training=training)
+
+        for i in range(self.num_layers):
+            x, block1, block2 = self.dec_layers[i](
+                x, enc_output, training, look_ahead_mask, padding_mask
+            )
+            attention_weights["decoder_layer{}_block1".format(i + 1)] = block1
+            attention_weights["decoder_layer{}_block2".format(i + 1)] = block2
+
+        return x, attention_weights
+
+
+class Transformer(tf.keras.Model):
+    def __init__(
+        self, num_layers, d_model, num_heads, dff, target_vocab_size, pe_input, pe_target, rate=0.1
+    ):
+        super(Transformer, self).__init__()
+
+        self.encoder = Encoder(num_layers, d_model, num_heads, dff, pe_input, rate)
+
+        self.decoder = Decoder(num_layers, d_model, num_heads, dff, pe_target, rate)
+
+        self.final_layer = tf.keras.layers.Dense(target_vocab_size)
+
+    def call(self, inp, tar, training, enc_padding_mask, look_ahead_mask, dec_padding_mask):
+
+        enc_output = self.encoder(
+            inp, training, enc_padding_mask
+        )  # (batch_size, inp_seq_len, d_model)
+
+        # dec_output.shape == (batch_size, tar_seq_len, d_model)
+        dec_output, attention_weights = self.decoder(
+            tar, enc_output, training, look_ahead_mask, dec_padding_mask
+        )
+
+        final_output = self.final_layer(dec_output)  # (batch_size, tar_seq_len, target_vocab_size)
+
+        return final_output, attention_weights
+
+
+class GLU(tf.keras.layers.Layer):
+    def __init__(self, input_channel, output_channel):
+        super(GLU, self).__init__()
+        self.linear_left = tf.keras.layers.Dense(output_channel)
+        self.linear_right = tf.keras.layers.Dense(output_channel)
+
+    def call(self, x):
+        return tf.math.multiply(
+            self.linear_left(x), tf.keras.activations.sigmoid(self.linear_right(x))
+        )
+
+
+class FFT(tf.keras.layers.Layer):
+    def __init__(self, time_step, order, output_channel):
+        super(FFT, self).__init__()
+        self.time_step = time_step
+        self.order = order
+        self.output_channel = output_channel
+        self.GLUs = []  # nn.ModuleList()
+        for i in range(3):
+            if i == 0:
+                self.GLUs.append(
+                    GLU(self.time_step * self.order, self.time_step * self.output_channel)
+                )
+                self.GLUs.append(
+                    GLU(self.time_step * self.order, self.time_step * self.output_channel)
+                )
+            elif i == 1:
+                self.GLUs.append(
+                    GLU(self.time_step * self.output_channel, self.time_step * self.output_channel)
+                )
+                self.GLUs.append(
+                    GLU(self.time_step * self.output_channel, self.time_step * self.output_channel)
+                )
+            else:
+                self.GLUs.append(
+                    GLU(self.time_step * self.output_channel, self.time_step * self.output_channel)
+                )
+                self.GLUs.append(
+                    GLU(self.time_step * self.output_channel, self.time_step * self.output_channel)
+                )
+
+    def call(self, x):
+        # x should be (b, seq_len, units)
+        x = tf.keras.layers.Permute((2, 1))(x)
+        ffted = tf.signal.fft(tf.cast(x, dtype=tf.complex64))  # (b, units, seq_len)
+        real = tf.math.real(ffted)  # [b, units, seq_len]
+        img = tf.math.imag(ffted)
+        for i in range(3):
+            real = self.GLUs[i * 2](real)
+            img = self.GLUs[2 * i + 1](img)
+
+        time_step_as_inner = tf.dtypes.complex(real, img)
+        iffted = tf.signal.ifft(time_step_as_inner)  # [b, k, node_cnt, 48]
+        iffted = tf.cast(iffted, dtype=tf.float32)
+        iffted = tf.keras.layers.Permute((2, 1))(iffted)
+        return iffted
diff --git a/notebooks/deepmc/notebook_lib/utils.py b/notebooks/deepmc/notebook_lib/utils.py
new file mode 100644
index 00000000..eacec1aa
--- /dev/null
+++ b/notebooks/deepmc/notebook_lib/utils.py
@@ -0,0 +1,104 @@
+from datetime import datetime, timedelta
+from typing import Any, Dict, List
+
+import numpy as np
+import pandas as pd
+from numpy._typing import NDArray
+from pandas.tseries.offsets import DateOffset
+from sklearn.preprocessing import StandardScaler
+
+
+def get_csv_data(
+    path: str,
+    date_attribute: str = "date",
+    columns_rename: Dict[str, str] = {},
+    frequency: str = "60min",
+):
+    """
+    Read data from CSV file using Pandas python package.
+    """
+
+    data_df = pd.read_csv(path)
+    data_df[date_attribute] = pd.to_datetime(data_df[date_attribute])
+
+    if columns_rename:
+        data_df.rename(columns=columns_rename, inplace=True)
+
+    # apply index on date
+    data_df.reset_index(drop=True, inplace=True)
+    data_df.set_index(date_attribute, inplace=True)
+    data_df.sort_index(ascending=True, inplace=True)
+
+    # interpolate to derive missing data
+    data_df = data_df.interpolate(method="from_derivatives")
+    assert data_df is not None, "Interpolate deleted all data"
+    data_df = data_df.dropna()
+
+    # Group rows by frequency, requires date attribute indexed to execute this
+    data_df = data_df.fillna(method="ffill")
+    data_df = data_df.fillna(method="bfill")
+    data_df = data_df.groupby(pd.Grouper(freq=frequency)).mean()
+    data_df = data_df.fillna(method="ffill")
+    data_df = data_df.fillna(method="bfill")
+
+    return data_df
+
+
+def hour_round(t: datetime):
+    # Rounds to nearest hour by adding a timedelta hour if minute >= 30
+    return t.replace(second=0, microsecond=0, minute=0, hour=t.hour) + timedelta(
+        hours=t.minute // 30
+    )
+
+
+def get_split_scaled_data(data: pd.DataFrame, out_feature: str, split_ratio: float = 0.92):
+    split = int(split_ratio * data.shape[0])
+
+    train_data = data.iloc[:split]
+    test_data = data.iloc[split:]
+
+    output_scaler = StandardScaler()
+    output_scaler.fit_transform(np.expand_dims(data[out_feature].values, axis=1))  # type: ignore
+
+    train_scaler = StandardScaler()
+    train_scale_df = pd.DataFrame(
+        train_scaler.fit_transform(train_data), columns=train_data.columns, index=train_data.index
+    )
+    test_scale_df = pd.DataFrame(
+        train_scaler.transform(test_data), columns=test_data.columns, index=test_data.index
+    )
+
+    return train_scaler, output_scaler, train_scale_df, test_scale_df
+
+
+def shift_index(ds_df: pd.DataFrame, freq_minutes: int, num_indices: int, dateColumn: str = "date"):
+    ds_df[dateColumn] = ds_df.index.shift(-num_indices, freq=DateOffset(minutes=freq_minutes))
+    ds_df = ds_df.reset_index(drop=True)
+    ds_df = ds_df.set_index(dateColumn)
+    return ds_df
+
+
+def clean_relevant_data(
+    actual_df: pd.DataFrame,
+    forecast_df: pd.DataFrame,
+    out_variables: List[str],
+    freq_hours: int,
+    num_of_indices: int,
+):
+    base_data_df = actual_df.copy()
+    current_ws_df = forecast_df.add_suffix("Current")
+    base_data_df = base_data_df.join(current_ws_df)
+    shift_forecast_df = shift_index(forecast_df, freq_hours * 60, num_of_indices)
+    base_data_df = base_data_df.join(shift_forecast_df)
+
+    base_data_df = base_data_df[out_variables]
+    base_data_df = base_data_df.interpolate(method="from_derivatives")
+    assert base_data_df is not None, "Interpolate deleted all data"
+    base_data_df = base_data_df.dropna()
+    return base_data_df
+
+
+def smooth(y: NDArray[Any], box_pts: int):
+    box = np.ones(box_pts) / box_pts
+    y_smooth = np.convolve(y, box, mode="same")
+    return y_smooth
diff --git a/notebooks/deepmc_neighbors/deepmc_neighbors_env.yaml b/notebooks/deepmc_neighbors/deepmc_neighbors_env.yaml
deleted file mode 100644
index 834e34c9..00000000
--- a/notebooks/deepmc_neighbors/deepmc_neighbors_env.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: deepmc-pytorch-neighbors
-channels:
-  - pyg
-  - conda-forge
-  - defaults
-dependencies:
-  - python=3.9.*
-  - pip~=21.2.4
-  - pip:
-    - geopandas~=0.9.0
-    - einops~=0.6.0
-    - geopy~=2.4.1
-    - ipykernel~=6.17.1
-    - unfoldNd~=0.2.0
-    - pyWavelets~=1.3.0
-    - pydantic~=1.10.12
-    - matplotlib~=3.9.0
-    - ../../src/vibe_core
-    - ../../src/vibe_notebook
\ No newline at end of file
diff --git a/notebooks/deepmc_neighbors/gnn_forecast.ipynb b/notebooks/deepmc_neighbors/gnn_forecast.ipynb
deleted file mode 100644
index 75834edc..00000000
--- a/notebooks/deepmc_neighbors/gnn_forecast.ipynb
+++ /dev/null
@@ -1,643 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Micro Climate Predictions with Nearby Weather Stations\n",
-    "\n",
-    "It helps infer weather forecasts for stations that have no data or limited data by utilizing data of neighboring stations. The notebook demonstrates configuring inputs and training a model using neighboring weather stations data.\n",
-    "\n",
-    "This is an extension of the deepmc notebook [notebooks/deepmc/mc_forecast.ipynb](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/deepmc/mc_forecast.ipynb).\n",
-    "\n",
-    "Before running this notebook, let's build a micromamba environment. If you do not have micromamba installed, please follow the instructions from the [micromamba installation guide](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html).\n",
-    "\n",
-    "```bash\n",
-    "$ micromamba env create -f ./deepmc_neighbors_env.yaml\n",
-    "$ micromamba activate deepmc-pytorch-neighbors\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Install Packages**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "! pip install torch==1.12.1 --index-url https://download.pytorch.org/whl/cpu\n",
-    "! pip install torch-scatter==2.1.0 torch-sparse==0.6.15 torch-geometric==2.3.0 -f https://data.pyg.org/whl/torch-1.12.1%2Bcpu.html\n",
-    "! pip install torch-geometric-temporal~=0.54.0 onnxruntime~=1.15.0 pytorch-lightning~=1.8.0"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Notebook overview\n",
-    "In this notebook, we describe steps to generate forecast for weather variables at a specific station with no or limited data. We employ [Graphical Neural Network (GNNs)](https://pytorch-geometric.readthedocs.io/) for cross-learning from nearby weather stations by capturing spatial relationships. \n",
-    "\n",
-    "To illustrate this approach, we focus on three locations in Washington state, U.S.A., utilizing data accessible through [AGWeatherNet](https://weather.wsu.edu/). An example is shown the figure below. For instance, assuming that the Warden SW station has missing data, we look to neighboring stations (such as Royal Slope and Ringold) that provide relevant data. We consider the weather variables, temperature, humidity and wind_speed.\n",
-    "\n",
-    "\n",
-    "\n",
-    "Selecting appropriate neighboring stations is crucial for accurate predictions. When choosing neighboring weather stations, consider the following factors:\n",
-    "\n",
-    "- Elevation Similarity: In the current model the neighboring stations should be at a similar elevation to the target station. This ensures that altitude-related effects are consistent. Although, one can build a edge weight model which includes altitude differential to accommodate for the topography (this notebook does not cover that). \n",
-    "\n",
-    "- Spatial Proximity: The distance between neighboring stations should be small. Proximity often implies similar local weather patterns. For example, in the example, we chose stations with distance less than 25 km between them. In our experiments we noticed significant errors with distances greater than 25 Kms.\n",
-    "\n",
-    "**Graph Representation of Weather Stations for GNNs**\n",
-    "\n",
-    "Each weather station corresponds to a node in our graph. To capture the relationships between stations, we connect stations based on the distance between them. This graph does not change with time during inference. If a new station is available which can be helpful to increase accuracy, then the model can be dynamically updated by recomputing & retraining the GNN.\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Following steps are required for training a model and inference.\n",
-    "\n",
-    "**Step 1: Download AgWeatherNet data**\n",
-    "- Download historical weather data for the stations Royal Slope and Ringold from [AGWeatherNet]( https://weather.wsu.edu/) for the time range of interest (minimum 2 years of data).\n",
-    "- Clean downloaded historical data for considered variables temperature, humidity and wind_speed. \n",
-    "\n",
-    "Note: these two steps are not included in the notebook. See [sample data](sample_data.csv) for an example. \n",
-    "\n",
-    "**Step 2: Download forecast data**\n",
-    "- Download HRRR data for the stations Warden SW, Royal Slope and Ringold using herbie_forecast workflow in Farmvibes for the time range of interest (minimum 2 years of data).\n",
-    "- Clean downloaded HRRR data for considered variables temperature, humidity and wind_speed.\n",
-    "\n",
-    "**Step 3: Train DeepMC models**\n",
-    "- For stations Royal Slope and Ringold, train the DeepMC model using the notebook [notebooks/deepmc/mc_forecast.ipynb]( https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/deepmc/mc_forecast.ipynb). You will need to train separately for each station.\n",
-    "- The results received from DeepMC inference results are weather forecasts for next 24 hours for the stations Royal Slope and Ringold.\n",
-    "\n",
-    "**Step 4: Preparation for GNN model training**\n",
-    "- Create embeddings: Concatenate cleaned HRRR weather forecast data of station Warden SW and DeepMC inference results of station Royal Slope & Ringold.\n",
-    "- Create train and test splits from the embeddings.\n",
-    "- Train GNN model.\n",
-    "\n",
-    "**Step 5: Inference**\n",
-    "\n",
-    "Run the inference to infer weather forecasts for the Warden SW station.\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Notebook Setup\n",
-    "\n",
-    "Let's start by importing the required packages and defining some constants.\n",
-    "\n",
-    "### Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import warnings\n",
-    "from datetime import datetime\n",
-    "\n",
-    "from notebook_lib.post_deepmc_inference import download_forecast_data\n",
-    "from notebook_lib.train import MC_Neighbors\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Constants\n",
-    "- ROOT_DIR - Root directory of DeepMC output.\n",
-    "- WEATHER_TYPE - temperature, humidity, or wind_speed.\n",
-    "- INFERENCE_STATION - Station having missing weather data.\n",
-    "- MODEL_TYPE - relevant or not-relevant"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ROOT_DIR = \"\"\n",
-    "WEATHER_TYPE = \"temperature\"\n",
-    "INFERENCE_STATION = \"Warden_SW\"\n",
-    "MODEL_TYPE = \"relevant\"\n",
-    "ROOT_PATH = os.path.join(ROOT_DIR, WEATHER_TYPE)\n",
-    "\n",
-    "# Forecast data\n",
-    "infer_forecast_data_path = f\"{ROOT_PATH}/{INFERENCE_STATION}/{MODEL_TYPE}/forecast.csv\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 1: Download stations data.  \n",
-    "Here, we are taking the stations from [AGWeatherNet](https://weather.wsu.edu/). \n",
-    "\n",
-    "We are assuming that the station Warden_SW does not have the weather station data. We consider the stations Royal Slope and Ringold as neighboring weather stations having similar weather patterns, hence historical data download is required for these two stations. See [sample data](sample_data.csv) for an example."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Neighboring stations\n",
-    "# Coordinates are in (longitude, latitude)\n",
-    "neighbor_stations = [\n",
-    "    {\n",
-    "        \"name\": \"Warden_SW\",\n",
-    "        \"column_name\": \"temperature_forecast\",\n",
-    "        \"coordinates\": (-119.12, 46.93),\n",
-    "    },\n",
-    "    {\n",
-    "        \"name\": \"royal_slope\",\n",
-    "        \"column_name\": \"temperature\",\n",
-    "        \"coordinates\": (-119.32, 46.95),\n",
-    "    },\n",
-    "    {\n",
-    "        \"name\": \"ringold\",\n",
-    "        \"column_name\": \"temperature\",\n",
-    "        \"coordinates\": (-119.18, 46.48),\n",
-    "    },\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 2: Download Forecast data\n",
-    "\n",
-    "For weather station Warden SW, download weather forecast observations by submitting request to worker running in background. Workflow execution utilize below parameters while processing requests, this can be overwritten using the parameter argument.\n",
-    "\n",
-    "- fxx: [1, 25, 1] # start, stop, step\n",
-    "- search_text: \"TMP:2 m\"\n",
-    "- interval: 60 # in minutes\n",
-    "- weather_type: \"temperature\"\n",
-    "- multi_threads: 25"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "start_date = datetime(year=2021, month=7, day=30)\n",
-    "end_date = datetime(year=2023, month=8, day=2)\n",
-    "forecast_data = download_forecast_data([neighbor_stations[0]], start_date, end_date)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "forecast_data[\"Warden_SW\"].to_csv(infer_forecast_data_path)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Step 3: Train DeepMC models\n",
-    "\n",
-    "Complete the DeepMC model training using the notebook [notebooks/deepmc/mc_forecast.ipynb](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/deepmc/mc_forecast.ipynb) for weather stations Royal Slope and Ringold.\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Step 4: Train [Graphical Neural Network (GNN)](https://pytorch-geometric.readthedocs.io/) model\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Step 4.1 Create embeddings\n",
-    "\n",
-    "The get_embeddings module does the following: \n",
-    "1. Run inference using DeepMC trained model to find weather forecasts of temperature for station Royal Slope and Ringold weather stations.\n",
-    "2. Pre-process inference results to create a lookback by transforming it to a 2D matrix.\n",
-    "3. Pre-process HRRR weather forecast to create a lookback by transforming it to a 2D matrix.\n",
-    "4. Embeddings created by concatenating pre-process results. The embeddings are sorted by timestamp and station name."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "obj_neighbors = MC_Neighbors(root_dir=ROOT_PATH, learning_rate=0.0025, use_edge_weights=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_embeddings, test_embeddings = obj_neighbors.get_embeddings(\n",
-    "    INFERENCE_STATION,\n",
-    "    neighbor_stations,\n",
-    "    24,\n",
-    "    infer_forecast_data_path,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Step 4.2 Model training\n",
-    "\n",
-    "The GNN training script does the following:\n",
-    "\n",
-    "1. Creates Dataset that reads the input embeddings, creates a node for each timestamp, and creates edges connecting weather stations.\n",
-    "2. Creates BatchSampler to split data into batches for training and testing dataset.\n",
-    "3. Using PyTorch lightning package, the model training is initiated."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "obj_neighbors.run_train(\n",
-    "    train_embeddings=train_embeddings,\n",
-    "    test_embeddings=test_embeddings,\n",
-    "    neighbor_stations=neighbor_stations,\n",
-    "    infer_station=INFERENCE_STATION,\n",
-    "    epochs=20,\n",
-    "    batch_size=24 * len(neighbor_stations),\n",
-    "    forecast_hours=24,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Run Inference to validate the trained model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pred_df = obj_neighbors.run_inference(\n",
-    "    embeddings=test_embeddings.copy(),\n",
-    "    neighbors_station=neighbor_stations,\n",
-    "    infer_station=INFERENCE_STATION,\n",
-    "    batch_size=len(neighbor_stations),\n",
-    "    forecast_hours=24,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- historical_data_path: it's a path to historical weather data downloaded and cleaned in Step 1.\n",
-    "- hrrr_data_path: it's a path to hrr weather data downloaded and cleaned in Step 2."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "historical_data_path = \"\"\n",
-    "hrrr_data_path = \"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "",
-      "text/plain": [
-       "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "obj_neighbors.view_plot(pred_df, historical_data_path, hrrr_data_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GNN temperature\n", - "RMSE: 3.98\n", - "MAE: 3.19\n", - "MAE%: 4.22%\n", - "\n", - "Hrrr temperature\n", - "RMSE: 4.64\n", - "MAE: 3.8\n", - "MAE%: 4.91%\n" - ] - } - ], - "source": [ - "obj_neighbors.view_performance(pred_df, historical_data_path, hrrr_data_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 5: Inference\n", - "For weather stations used in GNN model training, we will perform the steps below to get inference results.\n", - "\n", - "**5.1 Download data**\n", - "- Download historical weather data for the stations Royal Slope and Ringold from [AGWeatherNet]( https://weather.wsu.edu/) for the time range interested in.\n", - "\n", - "Note: To perform the inference for 24 hours with 60 minutes interval, the minimum data required for 528 hours, similarly If the data frequency is 15 minutes, the minimum number of data points required is 528*4 = 2112. These are the minimum number of data points need to be provided as input during the inference.\n", - "\n", - "**5.2 Preprocessing**\n", - "- For each weather station, historical and HRRR data are concatenated by timestamp.\n", - "- Data processing is done using Wavelet Transformation techniques. \n", - "- For each weather station, using the trained DeepMC model, we run the inference to find weather forecasts.\n", - "- Embeddings are created by combining HRRR data and the predicted weather forecasts.\n", - "\n", - "**5.3 Run GNN model inference**\n", - "\n", - "Finally, we plot the results and calculate KPIs." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.1 Download data\n", - "\n", - "Download AgWeatherNet data and clean it. See [sample data](sample_data.csv)." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# weather dataset filtered and model training limited to train features.\n", - "HISTORICAL_MODEL_TRAIN_FEATURES = [\"humidity\", \"wind_speed\", \"temperature\"]\n", - "\n", - "# Historical data aligned using INDEX variable\n", - "INDEX = \"date\"\n", - "\n", - "# weather dataset filtered and model training limited to train features.\n", - "FORECAST_MODEL_TRAIN_FEATURES = [\"humidity_forecast\", \"wind_speed_forecast\", \"temperature_forecast\"]\n", - "\n", - "# Models trained to predict out features\n", - "OUT_FEATURES = [\"temperature\"] # ['wind_speed' , 'temperature']" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# Get actual observations data for a station, '%s' is a place holder for station name.\n", - "file_path = f\"/%s/prediction.csv\"" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# time range interested in\n", - "start_date = datetime(year=2022, month=7, day=1, hour=0, minute=0, second=0)\n", - "end_date = datetime(year=2022, month=8, day=15, hour=0, minute=0, second=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Execution status for temperature: done\n", - "Execution status for humidity: done\n", - "Execution status for u-component: done\n", - "Execution status for v-component: done\n" - ] - } - ], - "source": [ - "forecast_data = download_forecast_data(neighbor_stations, start_date, end_date)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.2 Preprocessing\n", - "\n", - "Steps to derive embeddings:\n", - "\n", - "- We perform wavelet transformation on selected weather variables (historical and forecast data). \n", - "- The preprocessed output is used as input to run the inference using the DeepMC trained model. The inference results are weather forecasts for neighboring stations.\n", - "- The DeepMC inference results are concatenated with HRRR forecast data to create embeddings." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "obj_neighbors = MC_Neighbors(root_dir=ROOT_PATH, learning_rate=0.0025, use_edge_weights=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "embeddings = obj_neighbors.get_embeddings_inference(\n", - " INFERENCE_STATION,\n", - " neighbor_stations,\n", - " 24,\n", - " infer_forecast_data_path,\n", - " OUT_FEATURES,\n", - " file_path,\n", - " forecast_data,\n", - " start_date,\n", - " end_date,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.3 Run inference\n", - "\n", - "The inference results are weather forecast for stations that are missing station data." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "pred_df = obj_neighbors.run_inference(\n", - " embeddings=embeddings.copy(),\n", - " neighbors_station=neighbor_stations,\n", - " infer_station=INFERENCE_STATION,\n", - " batch_size=len(neighbor_stations),\n", - " forecast_hours=24,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.4 Plot results and calculate KPIs" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- historical_data_path: it's a path to historical weather data downloaded and cleaned in Step 5.1.\n", - "- hrrr_data_path: it's a path to hrr weather data downloaded and cleaned in Step 5.1." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "historical_data_path = \"\"\n", - "hrrr_data_path = \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "obj_neighbors.view_plot(pred_df, historical_data_path, hrrr_data_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GNN temperature\n", - "RMSE: 3.05\n", - "MAE: 2.57\n", - "MAE%: 3.48%\n", - "\n", - "Hrrr temperature\n", - "RMSE: 3.72\n", - "MAE: 3.02\n", - "MAE%: 4.05%\n" - ] - } - ], - "source": [ - "obj_neighbors.view_performance(pred_df, historical_data_path, hrrr_data_path)" - ] - } - ], - "metadata": { - "description": "It helps to find weather forecasts for sensors that have no data by utilizing data of neighboring stations", - "disk_space": "", - "kernelspec": { - "display_name": "dev-vibes3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.19" - }, - "name": "Micro Climate Predictions using Neighbor stations", - "running_time": "", - "tags": [ - "Weather", - "Model Training" - ] - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/deepmc_neighbors/graph.svg b/notebooks/deepmc_neighbors/graph.svg deleted file mode 100755 index 53ae5c23..00000000 --- a/notebooks/deepmc_neighbors/graph.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/notebooks/deepmc_neighbors/notebook_lib/base_dataset.py b/notebooks/deepmc_neighbors/notebook_lib/base_dataset.py deleted file mode 100644 index 82488fd7..00000000 --- a/notebooks/deepmc_neighbors/notebook_lib/base_dataset.py +++ /dev/null @@ -1,318 +0,0 @@ -from math import cos, sin -from typing import Any, Dict, List, Union - -import geopy.distance -import numpy as np -import pandas as pd -import torch -import torch.utils -import torch.utils.data -from sklearn.preprocessing import StandardScaler -from torch import Tensor -from torch.utils.data import IterableDataset - - -class GNNDataset: - def __init__( - self, - data: pd.DataFrame, - scaler_input: StandardScaler, - scaler_label: StandardScaler, - neighbor_station: Dict[str, Any], - infer_station: str, - forecast_hours: int, - label_column_index: Union[int, None], - forecast_step: int = 0, - device_count: int = torch.cuda.device_count(), - ): - super().__init__() - self.data = data - self.forecast_step = forecast_step - self.device_count = device_count - self.scaler_input = scaler_input - self.scaler_label = scaler_label - self.neighbor_stations = neighbor_station - self.stations_count = len(self.neighbor_stations["stations"]) - self.infer_station = infer_station - self.forecast_hours = forecast_hours - self.label_column_index = label_column_index - self.load_nodes() - self.load_edges() - - def load_node_labels(self, data: pd.DataFrame): - if "labels" not in data.columns: - return data - - node_labels = data["labels"].to_numpy() - node_labels = node_labels.reshape(-1)[ - : int(len(data.index.get_level_values(0)) / self.node_num) * self.node_num * 1 - ] - - self.node_labels = torch.from_numpy( - node_labels.reshape( - int(len(data.index.get_level_values(0)) / self.node_num), - self.node_num, - 1, - ).astype("float32") - ) - data.drop(columns=["labels"], inplace=True) - return data - - def load_nodes(self): - data = self.node_feature_selection(self.data) - data["timestamp"] = [pd.Timestamp(a).replace(tzinfo=None) for a in data["timestamp"]] - data = data.rename(columns={"station": "Node"}) - self.node_names = data["Node"].unique().astype(str) - self.node_num = len(self.node_names) - data.set_index(["timestamp", "Node"], inplace=True) - data = self.load_node_labels(data) - data.drop(columns=["forecast_step"], inplace=True) - - # Set node variables - self.lookback_indices = list(range(self.forecast_hours)) - self.target_idx = self.forecast_step - self.timestamps = data.index.get_level_values(0).unique() - self.infer_station_index = next( - (i for i, a in enumerate(self.node_names) if a == self.infer_station), None - ) - self.node_feas = list(data.columns) - self.node_fea_dim = len(self.node_feas) - node_vals = data.values.reshape(-1)[ - : int(len(data.index.get_level_values(0)) / self.node_num) - * self.node_num - * self.node_fea_dim - ] - - self.node_data = torch.from_numpy( - node_vals.reshape( - int(len(data.index.get_level_values(0)) / self.node_num), - self.node_num, - self.node_fea_dim, - ).astype("float32") - ) - - self.timestamps = self.timestamps[: self.node_data.shape[0]] - - def get_from_to_nodes(self, neighbor_stations: Dict[str, Any]): - from_node = [] - to_node = [] - for s in neighbor_stations["stations"]: - for c in self.neighbor_stations["stations"]: - if s != c and s != self.infer_station: - from_node.append(s) - to_node.append(c) - return from_node, to_node - - def get_edges(self, neighbor_stations: Dict[str, Any]): - from_node, to_node = self.get_from_to_nodes(neighbor_stations) - - coords = neighbor_stations["long_lat"] - edges = zip(from_node, to_node) - distances = [] - turbine_dir_x = [] - turbine_dir_y = [] - - for edge in edges: - coord_1 = coords[edge[0]][::-1] - coord_2 = coords[edge[1]][::-1] - distances.append(geopy.distance.geodesic(coord_1, coord_2).km) - x1, y1 = coord_1 - x2, y2 = coord_2 - turbine_dir_x.append(cos(x1) * sin(y1 - y2)) - turbine_dir_y.append(cos(x2) * sin(x1) - sin(x2) * cos(x1) * cos(y1 - y2)) - - data = { - "from_node": from_node, - "to_node": to_node, - "distance": distances, - "dir_x": turbine_dir_x, - "dir_y": turbine_dir_y, - } - return data - - def load_edges(self): - data = self.get_edges(self.neighbor_stations) - data = pd.DataFrame(data) - data["to_node"] = data["to_node"] - data["from_node"] = data["from_node"] - data["edge"] = data.apply(lambda x: "{}->{}".format(x["from_node"], x["to_node"]), axis=1) - data.loc[:, "distance"] = 1 / data.loc[:, "distance"] - data.drop(columns=["from_node", "to_node"], inplace=True) - edge_names = sorted(data["edge"].unique()) - node2id = dict(zip(self.node_names, range(len(self.node_names)))) - edge_index = [ - [node2id[src_node], node2id[tgt_node]] - for src_node, tgt_node in [edge.split("->") for edge in edge_names] - ] - - edge_df = data[["distance", "edge"]].set_index(["edge"]) - self.edge_names = edge_names - self.edge_feas = list(edge_df.columns) - self.edge_index = torch.LongTensor(edge_index) - self.edge_num = len(self.edge_names) - - self.edge_fea_dim = len(self.edge_feas) - self.edge_data = torch.from_numpy( - edge_df.values.reshape( - self.edge_num, - self.edge_fea_dim, - ).astype("float32") - ) - - def node_feature_selection(self, df_node: pd.DataFrame): - df_node = df_node.sort_values(["timestamp", "forecast_step", "station"]) - scaled_input_array = self.scaler_input.transform( - df_node.to_numpy()[:, 0 : self.forecast_hours] - ) - df_node.iloc[:, 0 : self.forecast_hours] = scaled_input_array # type: ignore - - if self.label_column_index is not None: - scaled_label = self.scaler_label.transform( - np.expand_dims(df_node.to_numpy()[:, self.label_column_index], axis=-1) - ) - df_node.iloc[:, self.label_column_index] = scaled_label # type: ignore - return df_node - - -class BatchSampler(IterableDataset): # type: ignore - def __init__( - self, - dataset: GNNDataset, - batch_size: int, - lookahead_horizon: int, - lookback_horizon: int, - device: Union[str, torch.device], - random: bool = True, - noise_parameters: Dict[str, Any] = {}, - use_edge_weights: bool = False, - ): - self.dataset = dataset - self.batch_size = batch_size - self.device_count = dataset.device_count - self.random = random - self.lookahead_horizon = lookahead_horizon - self.lookback_horizon = lookback_horizon - self.device = device - self.noise_parameters = noise_parameters - self.use_edge_weights = use_edge_weights - self.stations_count = dataset.stations_count - - def get_forecast_indices(self): - forecast_indices = list(range(len(self.dataset.timestamps))) - if self.random: - np.random.seed() - np.random.shuffle(forecast_indices) - - return forecast_indices - - def get_batch_edge_index(self, cur_batch_size: int, num_devices: int): - edge_num = self.dataset.edge_num - if num_devices == 0: - num_devices = 1 - - batch_size_each_device = int(cur_batch_size / num_devices) - - # Reshape edge_index to [batch_size, 2, edge_num] - self.edge_index = torch.cat( - batch_size_each_device * [self.dataset.edge_index] # type: ignore - ).reshape( # type: ignore - batch_size_each_device, 2, edge_num - ) - - # Add offset to edge_index - offset = torch.arange( - 0, batch_size_each_device * self.dataset.node_num, self.dataset.node_num - ).view(-1, 1, 1) - self.edge_index = self.edge_index + offset - self.edge_index = torch.cat(num_devices * [self.edge_index]).reshape( - cur_batch_size, 2, edge_num - ) - - def get_batch_edge_data(self, cur_batch_size: int, num_devices: int): - edge_num = self.dataset.edge_num - if num_devices == 0: - num_devices = 1 - batch_size_each_device = int(cur_batch_size / num_devices) - - # Reshape edge_index to [batch_size, 2, edge_num] - self.edge_data = torch.cat(batch_size_each_device * [self.dataset.edge_data]).reshape( - batch_size_each_device, self.dataset.edge_fea_dim, edge_num - ) # batch_size, edge_in_fea_dim, num_edges - # Add offset to edge_index - offset = torch.arange( - 0, batch_size_each_device * self.dataset.node_num, self.dataset.node_num - ).view(-1, 1, 1) - self.edge_data = self.edge_data + offset # [batch_size, edge_node_dim, num_edges] - - self.edge_data = torch.cat(num_devices * [self.edge_data]).reshape( - cur_batch_size, self.dataset.edge_fea_dim, edge_num - ) - - def generate(self): - total_forecast_indices = self.get_forecast_indices() - num_batches = (len(total_forecast_indices) // (self.batch_size)) + ( - len(total_forecast_indices) % self.batch_size != 0 - ) - - for batch_id in range(num_batches): - lookback_indices = [] - batch_id_s = batch_id * self.batch_size - batch_id_e = batch_id_s + self.batch_size - forecast_indices = total_forecast_indices[batch_id_s:batch_id_e] - cur_batch_size = len(forecast_indices) - lookback_indices = forecast_indices - - # Collect meta data - forecast_timestamps = [self.dataset.timestamps[i] for i in forecast_indices] - - # Collect node-level time series - node_lookback = ( - self.dataset.node_data[lookback_indices] - .reshape(cur_batch_size, 1, self.dataset.node_num, self.dataset.node_fea_dim) - .transpose(1, 2) - .contiguous() - ) - - if self.dataset.label_column_index is not None: - # Collect node-level time series - node_lookback_labels = ( - self.dataset.node_labels[lookback_indices] - .reshape(cur_batch_size, 1, self.dataset.node_num, 1) - .transpose(1, 2) - .contiguous() - ) - else: - node_lookback_labels = None - - self.get_batch_edge_index(cur_batch_size, self.device_count) - self.get_batch_edge_data(cur_batch_size, self.device_count) - - batch = self.get_output(node_lookback, node_lookback_labels, forecast_timestamps) - - yield batch - - def get_output( - self, - node_lookback: Tensor, - node_lookback_labels: Union[Tensor, None], - forecast_timestamps: List[str], - ): - if self.use_edge_weights: - self.edge_data = torch.squeeze(self.edge_data.reshape(-1, 1)) - - self.edge_index = self.edge_index.permute(1, 0, 2).contiguous().view(2, -1) - # node_lookahead not implemented - # when we get it in the future, we will implement it - batch = {} - batch["node_data"] = node_lookback[:, :, :, :] - batch["edge_index"] = self.edge_index - batch["edge_data"] = self.edge_data - batch["forecast_timestamps"] = forecast_timestamps - - if node_lookback_labels is not None: - batch["node_labels"] = node_lookback_labels - - return list(batch.values()) - - def __iter__(self): - return iter(self.generate()) diff --git a/notebooks/deepmc_neighbors/notebook_lib/base_deepmc.py b/notebooks/deepmc_neighbors/notebook_lib/base_deepmc.py deleted file mode 100644 index 83129eaa..00000000 --- a/notebooks/deepmc_neighbors/notebook_lib/base_deepmc.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -from typing import Any, List - -import numpy as np -import onnxruntime -from numpy.typing import NDArray - -from vibe_notebook.deepmc.utils import transform_to_array - - -def inference_deepmc(model_path: str, data_x: NDArray[Any], inference_hours: int): - list_data_x = [] - for pred_idx in range(inference_hours): - model_onnx_path = os.path.join(model_path, f"model_{pred_idx}", "export.onnx") - session = onnxruntime.InferenceSession(model_onnx_path, None) - data_in = { - out.name: data_x[i].astype(np.float32) for i, out in enumerate(session.get_inputs()) - } - - result = session.run(None, input_feed=data_in)[0] - result = result.astype(np.float32) - result = transform_to_array(result, inference_hours) - result = result[..., 0] - list_data_x.append(result) - return list_data_x - - -def inference_deepmc_post( - model_path: str, - post_data_x: List[NDArray[Any]], -): - # Train Post-Processing Scaling Models - inshape = len(post_data_x) - mix_data_yhat = np.empty([post_data_x[0].shape[0], inshape, inshape]) - idx = 0 - - for pred_idx, train_yhat in enumerate(post_data_x): - post_model_onnx_path = os.path.join(model_path, f"model_{pred_idx}", "post", "export.onnx") - post_session = onnxruntime.InferenceSession(post_model_onnx_path, None) - data_in = { - out.name: train_yhat.astype(np.float32) - for i, out in enumerate(post_session.get_inputs()) - } - result = post_session.run(None, input_feed=data_in)[0] - mix_data_yhat[:, :, idx] = result - idx = idx + 1 - return mix_data_yhat diff --git a/notebooks/deepmc_neighbors/notebook_lib/base_model.py b/notebooks/deepmc_neighbors/notebook_lib/base_model.py deleted file mode 100644 index 78769485..00000000 --- a/notebooks/deepmc_neighbors/notebook_lib/base_model.py +++ /dev/null @@ -1,154 +0,0 @@ -from typing import Any, Dict, List, Tuple, Union - -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch import Tensor -from torch.nn import Conv1d, Linear -from torch.utils.data import TensorDataset -from torch_geometric_temporal.nn.recurrent import TGCN - -from .schema import BatchTGCNInputs - - -def get_batch(batch: Union[Tensor, List[Tensor], TensorDataset], use_edge_weights: bool): - if isinstance(batch, TensorDataset): - batch = batch[:] - node_data = batch[0] - edge_index = batch[1] - # used for training - # skipped during inference - if len(batch) == 5: - node_labels = batch[4] - else: - node_labels = torch.tensor([]) - - if use_edge_weights: - edge_data = batch[2] - else: - edge_data = torch.tensor([]) - return node_data, edge_index, edge_data, node_labels - - -class BaseModule(nn.Module): - def __init__(self, problem_params: Dict[str, Any]): - super().__init__() - self.batch_size = problem_params["batch_size"] - self.lookback_horizon = problem_params["lookback_horizon"] - self.lookahead_horizon = problem_params["lookahead_horizon"] - - # node - self.num_nodes = problem_params["node_num"] - self.node_in_fea_dim = problem_params["node_in_fea_dim"] - self.node_out_fea_dim = problem_params["node_out_fea_dim"] - self.node_input_dim = self.lookback_horizon * self.node_in_fea_dim - self.node_output_dim = self.lookahead_horizon * self.node_out_fea_dim - self.use_dropout = problem_params["use_dropout"] - - # edge - self.edge_in_fea_dim = problem_params["edge_in_fea_dim"] - self.edge_out_fea_dim = problem_params["edge_out_fea_dim"] - self.edge_input_dim = self.lookback_horizon * self.edge_in_fea_dim - self.edge_output_dim = self.lookahead_horizon * self.edge_out_fea_dim - - # Add day and hour embeddings - self.day_em_dim = problem_params["day_em_dim"] - self.hour_em_dim = problem_params["hour_em_dim"] - # 7 days - self.day_em = nn.Embedding(7, self.day_em_dim) - # 24 hours - self.hour_em = nn.Embedding(24, self.hour_em_dim) - - # GRU hidden him - self.hidden_dim = problem_params["hidden_dim"] - self.dropout = nn.Dropout2d(0.01) - - # linear layer - self.linear1_node = nn.Linear(self.hidden_dim, self.node_output_dim) - self.linear2_node = nn.Linear(self.node_in_fea_dim - 1, self.lookahead_horizon) - self.ar = nn.Linear(self.lookback_horizon, self.lookahead_horizon) - - # Multi-dimensional edge attribute to one dimension - self.edge_num = problem_params["edge_num"] - self.use_edge_weights = problem_params["use_edge_weights"] - self.linear_edge = nn.Linear(self.edge_in_fea_dim, 1) - - def weights_init(self, m: Union[Conv1d, Linear]): - if isinstance(m, Conv1d) or isinstance(m, Linear): - nn.init.xavier_uniform_(m.weight.data) - if m.bias is not None: - nn.init.zeros_(m.bias.data) - - def initialize_weights(self): - pass - - def forward(self, batch: Dict[str, Any]): - pass - - -class BatchTGCN(BaseModule): - def __init__( - self, - inputs: BatchTGCNInputs, - ): - super().__init__(inputs.dict()) - self.inputs = inputs.dict() - self.decoder_in_fea_dim = 2 - self.node_in_fea_dim = self.node_in_fea_dim - - self.tgcn_cell_encoder = TGCN(self.node_in_fea_dim, self.hidden_dim) - self.tgcn_cell_encoder1 = TGCN(self.node_in_fea_dim, self.hidden_dim) - - self.tgcn_cell_decoder = TGCN(self.decoder_in_fea_dim, self.hidden_dim) - self.tgcn_cell_decoder1 = TGCN(self.decoder_in_fea_dim, self.hidden_dim) - # stopping loop reference - self.get_batch = get_batch - self.dropout_encoder1 = nn.Dropout(0.05) - - def forward(self, inputs: Union[Tensor, List[Tensor]]): - node_data, edge_index, edge_data, _ = get_batch(inputs, self.use_edge_weights) - h = torch.empty - self.edge_index = edge_index # 2, num_edges - # Process edge - self.batch_size, self.num_nodes, _, _ = node_data.shape - hh, e = self.process(node_data, edge_data) - h = F.relu_(hh) - h = self.linear1_node(h) - h = h.reshape(self.batch_size, self.num_nodes, self.lookahead_horizon) # type: ignore - hh = hh.reshape(self.batch_size, self.num_nodes, self.hidden_dim) # type: ignore - return h, e, hh - - def get_hidden_embedding( - self, - horizon: int, - x: Tensor, - edge_weights: Union[Tensor, None], - ) -> Tuple[Tensor, Union[Tensor, None]]: - for i in range(horizon): - indices_lookback = torch.tensor(self.inputs["lookback_indices"]).to(x.device) - input = torch.index_select(x[:, :, i, :], 2, indices_lookback) - input = input.reshape(self.batch_size * self.num_nodes, -1) - h = self.tgcn_cell_encoder(input, self.edge_index, edge_weights) - h = F.relu(h) - h = self.dropout_encoder1(h) - return h, edge_weights - - def process( - self, - node_data: Tensor, - edge_data: Tensor, - ) -> Tuple[Tensor, Union[Tensor, None]]: - # Add hour and day embedding - horizon = self.lookback_horizon - x = node_data - - if self.use_dropout: - x = self.dropout(x) - - edge_weights = None - if self.use_edge_weights: - edge_weights = edge_data - - self.prev_input = x[:, :, -1, :horizon] - h, e = self.get_hidden_embedding(horizon, x, edge_weights) - return h, e diff --git a/notebooks/deepmc_neighbors/notebook_lib/base_modules.py b/notebooks/deepmc_neighbors/notebook_lib/base_modules.py deleted file mode 100644 index 34896c29..00000000 --- a/notebooks/deepmc_neighbors/notebook_lib/base_modules.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import List, Union - -import pytorch_lightning as pl -from notebook_lib.base_model import BatchTGCN -from torch import Tensor, nn -from torch.optim import Adagrad - -from .schema import BatchTGCNInputs - - -class BatchTGCNTrain(pl.LightningModule): - def __init__( - self, - inputs: BatchTGCNInputs, - learning_rate: float = 0.001, - ): - super().__init__() - self.gnn = BatchTGCN(inputs) - self.loss = nn.MSELoss() - self.learning_rate = learning_rate - - def forward(self, batch: Union[Tensor, List[Tensor]]): - y_hat, _, _ = self.gnn(batch) - return y_hat - - def configure_optimizers(self): - optimizer = Adagrad( - self.parameters(), - lr=self.learning_rate, - initial_accumulator_value=1e-6, - eps=1e-6, - weight_decay=1e-6, - ) - return optimizer - - def training_step(self, train_batch: Union[Tensor, List[Tensor]], _): - _, _, _, node_labels = self.gnn.get_batch(train_batch, self.gnn.use_edge_weights) - y = node_labels - y_hat, _, _ = self.gnn(train_batch) - loss = self.loss(y_hat, y.reshape(y_hat.shape)) - self.log("train_loss/total", loss, on_epoch=True, prog_bar=True) - return loss - - def validation_step(self, validation_batch: Union[Tensor, List[Tensor]], _): - _, _, _, node_labels = self.gnn.get_batch(validation_batch, self.gnn.use_edge_weights) - y = node_labels - y_hat, _, _ = self.gnn(validation_batch) - loss = self.loss(y_hat, y.reshape(y_hat.shape)) - self.log("val_loss/total", loss, on_epoch=True, prog_bar=True) - return loss diff --git a/notebooks/deepmc_neighbors/notebook_lib/data_utils.py b/notebooks/deepmc_neighbors/notebook_lib/data_utils.py deleted file mode 100644 index 85ce7231..00000000 --- a/notebooks/deepmc_neighbors/notebook_lib/data_utils.py +++ /dev/null @@ -1,241 +0,0 @@ -import os -import pickle -from typing import Any, Dict, List, Tuple, Union - -import numpy as np -import pandas as pd -import torch -from numpy.typing import NDArray -from sklearn.preprocessing import StandardScaler -from torch import Tensor -from torch.utils.data import DataLoader, TensorDataset - -from vibe_notebook.deepmc.utils import transform_to_array - -from .base_dataset import BatchSampler, GNNDataset -from .base_modules import BatchTGCNTrain - - -def build_scaler(train_embeddings: pd.DataFrame, forecast_hours: int) -> StandardScaler: - train_data_scaler = StandardScaler() - train_data_scaler.fit(train_embeddings.to_numpy()[:, :forecast_hours]) - return train_data_scaler - - -def build_scaler_label( - train_embeddings: pd.DataFrame, labels_column: str -) -> Tuple[StandardScaler, int]: - index = -1 - for i, column in enumerate(train_embeddings.columns): - if column == labels_column: - index = i - - if index == -1: - raise ValueError(f"Labels column '{labels_column}' not found") - - train_label_scaler = StandardScaler() - train_label_scaler.fit(np.expand_dims(train_embeddings.to_numpy()[:, index], axis=-1)) - return train_label_scaler, index - - -def get_batch_sample( - train_dataset: GNNDataset, - test_dataset: GNNDataset, - batch_size: int, - lookahead_horizon: int, - lookback_horizon: int, - device: torch.device, - use_edge_weights: bool, -) -> Tuple[BatchSampler, BatchSampler]: - train_sampler = BatchSampler( - dataset=train_dataset, - batch_size=batch_size, - lookahead_horizon=lookahead_horizon, - lookback_horizon=lookback_horizon, - device=device, - random=False, - use_edge_weights=use_edge_weights, - ) - - test_sampler = BatchSampler( - dataset=test_dataset, - batch_size=batch_size, - lookahead_horizon=lookahead_horizon, - lookback_horizon=lookback_horizon, - device=device, - random=False, - use_edge_weights=use_edge_weights, - ) - - return (train_sampler, test_sampler) - - -def train_test_dataset( - train_data: pd.DataFrame, - test_data: pd.DataFrame, - step: int, - neighbors_station: Dict[str, Any], - scaler_data: StandardScaler, - scaler_label: StandardScaler, - infer_station: str, - labels_column_index: int, -) -> Tuple[GNNDataset, GNNDataset]: - train_dataset = GNNDataset( - train_data, - forecast_step=step, - scaler_input=scaler_data, - scaler_label=scaler_label, - neighbor_station=neighbors_station, - forecast_hours=24, - infer_station=infer_station, - label_column_index=labels_column_index, - ) - - test_dataset = GNNDataset( - test_data, - forecast_step=step, - scaler_input=scaler_data, - scaler_label=scaler_label, - neighbor_station=neighbors_station, - forecast_hours=24, - infer_station=infer_station, - label_column_index=labels_column_index, - ) - - return (train_dataset, test_dataset) - - -def problem_params( - dataset: GNNDataset, - batch_size: int, - lookback_horizon: int, - lookahead_horizon: int, - use_edge_weights: bool, - use_dropout: bool, - hidden_dim: int, - forecast_hours: int, -) -> Dict[str, Any]: - problem_params = { - "lookback_horizon": lookback_horizon, - "lookahead_horizon": lookahead_horizon, - "node_num": dataset.node_num, - "node_in_fea_dim": dataset.node_fea_dim, - "node_out_fea_dim": 1, - "edge_in_fea_dim": dataset.edge_fea_dim, - "edge_out_fea_dim": 1, - "edge_num": dataset.edge_num, - "use_edge_weights": use_edge_weights, - "day_em_dim": 1, - "hour_em_dim": 1, - "period": 5, # for attention model - "batch_size": batch_size, - "use_dropout": use_dropout, - "hidden_dim": hidden_dim, - "device_count": torch.cuda.device_count(), - "lookback_indices": list(range(forecast_hours)), - } - - return problem_params - - -def export_to_onnx( - file_path: str, - model: BatchTGCNTrain, - inputs: DataLoader, # type: ignore - use_edge_weights: bool, - edge_num: int, - number_of_stations: int, -): - data = next(iter(inputs)) - node_data, edge_index, edge_data, _ = get_batch(data, use_edge_weights) - data = { - "node_data": node_data[:number_of_stations], - "edge_index": edge_index[:, : (edge_num * number_of_stations)], - "edge_data": edge_data[: (edge_num * number_of_stations)], - } - keys = list(data.keys()) - batch_axes = {keys[i]: {0: "batch_size"} for i in range(len(keys))} - onnx_output_path = os.path.join(file_path, "model_output.onnx") - if os.path.exists(onnx_output_path): - os.remove(onnx_output_path) - - # Export the model - torch.onnx.export( - model, - list(data.values()), # type: ignore - onnx_output_path, - input_names=list(batch_axes.keys()), - dynamic_axes=batch_axes, - opset_version=16, - ) - - -def write_to_file(output_file: str, data: List[Any]): - with open(output_file, "wb") as f: - pickle.dump(data, f) - - -def get_file(file_path: str) -> List[Any]: - if os.path.exists(file_path): - with open(file_path, "rb") as f: - return pickle.load(f) - else: - raise Exception(f"File {file_path} not found") - - -def get_batch(batch: Union[Tensor, List[Tensor], TensorDataset], use_edge_weights: bool): - if type(batch) == TensorDataset: - batch = batch[:] - node_data = batch[0] - edge_index = batch[1] - # considered for training - # skipped during inference - if len(batch) == 5: - node_labels = batch[4] - else: - node_labels = torch.tensor([]) - - if use_edge_weights: - edge_data = batch[2] - else: - edge_data = torch.tensor([]) - return node_data, edge_index, edge_data, node_labels - - -def smooth(y: List[float], box_pts: int): - box = np.ones(box_pts) / box_pts - y_smooth = np.convolve(y, box, mode="same") - return y_smooth - - -def get_split_data(split_data: NDArray[Any], timestamps: NDArray[Any], split_at_index: int): - split_by_index = [] - for i in range(split_at_index): - data_at_index = split_data[i::split_at_index][:, i] - timestamp_at_index = timestamps[i::split_at_index] - split_by_index.append( - pd.DataFrame(zip(timestamp_at_index, data_at_index), columns=["timestamp", "label"]) - ) - - split_data_df = pd.concat(split_by_index, axis=0, ignore_index=True) - split_data_df["timestamp"] = pd.to_datetime(split_data_df["timestamp"]) - split_data_df = split_data_df.sort_values(by="timestamp") - - return np.array(split_data_df["label"].values) - - -def preprocess_transform( - mix_data_yhat: NDArray[Any], - inference_hours: int, - dates_list: NDArray[Any], -): - init_start = 0 - data_list = [] - end = mix_data_yhat.shape[0] - for i in range(init_start, end, inference_hours): - for j in range(inference_hours): - data_list.append(mix_data_yhat[i, 0, j]) - - mix_data_yhat = transform_to_array(np.array(data_list))[: mix_data_yhat.shape[0]] - dates_list = dates_list[: mix_data_yhat.shape[0]] - return mix_data_yhat, dates_list diff --git a/notebooks/deepmc_neighbors/notebook_lib/embeddings.py b/notebooks/deepmc_neighbors/notebook_lib/embeddings.py deleted file mode 100644 index 012f8a4a..00000000 --- a/notebooks/deepmc_neighbors/notebook_lib/embeddings.py +++ /dev/null @@ -1,235 +0,0 @@ -import os -from datetime import datetime -from typing import Any, Dict, List - -import pandas as pd - -from .data_utils import get_file - - -def construct_neighbor_stations(stations: List[Dict[str, Any]]): - neighbors = {"stations": [], "coordinates": {}} - for station in stations: - neighbors["stations"].append(station["name"]) - neighbors["coordinates"][station["name"]] = station["coordinates"] - - return neighbors - - -def get_deepmc_post_results(root_path: str, stations: List[Dict[str, Any]], model_type: str): - predict_out = {} - for station in stations: - deepmc_post_path = os.path.join( - root_path, station["name"], model_type, "embeddings", "post_processed_results.pkl" - ) - ( - intermediate_test, - intermediate_train, - _, - _, - train_labels_station, - test_labels_station, - out_train_dates, - out_test_dates, - ) = get_file(deepmc_post_path) - predict_out[station["name"]] = ( - intermediate_train, - intermediate_test, - train_labels_station, - test_labels_station, - out_train_dates, - out_test_dates, - ) - - return predict_out - - -def get_date(stations: Dict[str, Any], data_index: int = -2, date_type: int = 0): - """Retrieves the start date and end date by comparing data of all stations. - :param stations: Dictionary with station name as key and values - with collection of station information used to generate embeddings. - - :param data_index: It defines position of data in array. - will use -2 for train, -1 for test, 1 for inference. - - :param date_type: 0 for start_date, -1 for end_date. - - return: date. - """ - station_name = next(iter(stations)) - station_values = stations[station_name] - date = datetime.strptime(station_values[data_index][date_type], "%Y-%m-%d %H:%M:%S") - for station_values in stations.values(): - try: - s_date = datetime.strptime(station_values[data_index][date_type], "%Y-%m-%d %H:%M:%S") - # for start date - if date_type == 0 and date < s_date: - date = s_date - # for end date - if date_type == -1 and date > s_date: - date = s_date - except Exception as e: - print(e) - return date - - -def create_embeddings( - stations: List[Dict[str, Any]], - inference_hours: int, - root_path: str, - model_type: str, -): - neighbor_stations = construct_neighbor_stations(stations) - predict_out = get_deepmc_post_results(root_path, stations, model_type) - - # get start date - train_start_date = get_date(predict_out, data_index=-2, date_type=0) - test_start_date = get_date(predict_out, data_index=-1, date_type=0) - - # get end date - train_end_date = get_date(predict_out, data_index=-2, date_type=-1) - test_end_date = get_date(predict_out, data_index=-1, date_type=-1) - - test_start_date = datetime.strptime( - test_start_date.strftime("%Y-%m-%d") + " " + train_start_date.strftime("%H:%M:%S"), - "%Y-%m-%d %H:%M:%S", - ) - - df_train_embeddings = process_embeddings( - predict_out=predict_out, - inference_hours=inference_hours, - neighbor_stations=neighbor_stations, - start_date=train_start_date, - end_date=train_end_date, - data_index=0, - label_index=2, - timestamp_index=4, - ) - - df_test_embeddings = process_embeddings( - predict_out=predict_out, - inference_hours=inference_hours, - neighbor_stations=neighbor_stations, - start_date=test_start_date, - end_date=test_end_date, - data_index=1, - label_index=3, - timestamp_index=5, - ) - - return df_train_embeddings, df_test_embeddings - - -def create_embeddings_inference( - stations: List[Dict[str, Any]], - inference_hours: int, - deepmc_post_results: Dict[str, Any], -): - neighbor_stations = construct_neighbor_stations(stations) - inference_start_date = get_date(deepmc_post_results, data_index=1, date_type=0) - inference_end_date = get_date(deepmc_post_results, data_index=1, date_type=-1) - - df_embeddings = get_inference_embeddings( - predict_out=deepmc_post_results, - inference_hours=inference_hours, - neighbor_stations=neighbor_stations, - start_date=inference_start_date, - end_date=inference_end_date, - ) - - return df_embeddings - - -def get_inference_embeddings( - predict_out: Dict[str, Any], - inference_hours: int, - neighbor_stations: Dict[str, Any], - start_date: datetime, - end_date: datetime, -): - embeddings = [] - for station in neighbor_stations["stations"]: - df = pd.DataFrame( - predict_out[station][0].reshape( - predict_out[station][0].shape[0], predict_out[station][0].shape[2] - ), - columns=list(range(inference_hours)), - ) - timestamps = predict_out[station][1] - - df["station"] = station - df["timestamp"] = timestamps - df["timestamp"] = pd.to_datetime(df["timestamp"], format="%Y-%m-%d %H:%M:%S") - - mask = (df["timestamp"] >= start_date) & (df["timestamp"] <= end_date) - df = df.loc[mask] - - df.reset_index(drop=True, inplace=True) - df["forecast_step"] = df.index - embeddings.append(df) - - df_embeddings = pd.concat(embeddings, axis=0) - df_embeddings.sort_values(by=["forecast_step", "station"], inplace=True) - return df_embeddings - - -def process_embeddings( - predict_out: Dict[str, Any], - inference_hours: int, - neighbor_stations: Dict[str, Any], - start_date: datetime, - end_date: datetime, - data_index: int, - label_index: int, - timestamp_index: int, -): - """ - Process embeddings for train or test data. - - :param predict_out: Dictionary with station name as key and values. It's output of deepmc post processing. - :param inference_hours: Number of hours to predict. - :param neighbor_stations: Dictionary with stations and coordinates. - :param start_date: Start date for embeddings. - :param end_date: End date for embeddings. - :param data_index: Index of train or test data in predict_out. The pickle file - generated by deepmc follows this index train=0, test=1 - :param label_index: Index of train or test labels in predict_out. The pickle file - generated by deepmc follows this index train=2, test=3 - :param timestamp_index: Index of train or test timestamps in predict_out. The pickle file - generated by deepmc follows this index train=4, test=5 - """ - embeddings = [] - for station in neighbor_stations["stations"]: - df = pd.DataFrame( - predict_out[station][data_index].reshape( - predict_out[station][data_index].shape[0], predict_out[station][data_index].shape[2] - ), - columns=list(range(inference_hours)), - ) - - labels = predict_out[station][label_index] - timestamps = predict_out[station][timestamp_index] - - df["station"] = station - if len(timestamps) < len(labels): - labels = labels[: len(timestamps)] - - df["labels"] = labels - - if len(timestamps) > len(labels): - timestamps = timestamps[: len(labels)] - df["timestamp"] = timestamps - - df["timestamp"] = pd.to_datetime(df["timestamp"], format="%Y-%m-%d %H:%M:%S") - - mask = (df["timestamp"] >= start_date) & (df["timestamp"] <= end_date) - df = df.loc[mask] - - df.reset_index(drop=True, inplace=True) - df["forecast_step"] = df.index - - embeddings.append(df) - - df_embeddings = pd.concat(embeddings, axis=0) - df_embeddings.sort_values(by=["forecast_step", "station"], inplace=True) - return df_embeddings diff --git a/notebooks/deepmc_neighbors/notebook_lib/post_deepmc.py b/notebooks/deepmc_neighbors/notebook_lib/post_deepmc.py deleted file mode 100644 index 77191ab3..00000000 --- a/notebooks/deepmc_neighbors/notebook_lib/post_deepmc.py +++ /dev/null @@ -1,271 +0,0 @@ -import os -import pickle -from typing import Any, Dict, List - -import numpy as np -import pandas as pd -from notebook_lib.base_deepmc import inference_deepmc, inference_deepmc_post -from notebook_lib.data_utils import get_file, preprocess_transform -from numpy.typing import NDArray -from sklearn.preprocessing import StandardScaler - -from vibe_notebook.deepmc.utils import get_csv_data, transform_to_array_3D - - -def write_embeddings_input( - embeddings_input_path: str, - data_scaler: StandardScaler, - mix_yhat: NDArray[Any], - mix_train_yhat: NDArray[Any], - mix_yc: NDArray[Any], - mix_train_yc: NDArray[Any], - train_y: NDArray[Any], - test_y: NDArray[Any], - train_dates_list: NDArray[Any], - test_dates_list: NDArray[Any], -): - if os.path.exists(embeddings_input_path): - os.remove(embeddings_input_path) - - p_path_dir = os.path.dirname(embeddings_input_path) - if not os.path.exists(p_path_dir): - os.makedirs(p_path_dir) - - # Inverse transform outputs, save results - with open( - embeddings_input_path, - "wb", - ) as f: - mix_yhat = np.expand_dims(np.array(data_scaler.inverse_transform(mix_yhat[:, :])), axis=1) - mix_yc = np.expand_dims(np.array(data_scaler.inverse_transform(mix_yc[:, 0, :])), axis=1) - mix_train_yhat = np.expand_dims( - np.array(data_scaler.inverse_transform(mix_train_yhat[:, :])), axis=1 - ) - mix_train_yc = np.expand_dims( - np.array(data_scaler.inverse_transform(mix_train_yc[:, 0, :])), axis=1 - ) - train_dates_list = train_dates_list[:, 0] - test_dates_list = test_dates_list[:, 0] - train_labels = np.array(data_scaler.inverse_transform(np.rollaxis(train_y, 2, 1)[:, 0, :])) - test_labels = np.array(data_scaler.inverse_transform(np.rollaxis(test_y, 2, 1)[:, 0, :])) - train_labels = train_labels[:, 0] - test_labels = test_labels[:, 0] - pickle.dump( - [ - mix_yhat, - mix_train_yhat, - mix_yc, - mix_train_yc, - train_labels, - test_labels, - train_dates_list, - test_dates_list, - ], - f, - ) - - return mix_yhat, mix_train_yhat, mix_yc, mix_train_yc, train_labels, test_labels - - -def get_date_range( - stations: List[Dict[str, Any]], infer_station_name: str, root_path: str, model_type: str -): - for station in stations: - if station["name"] != infer_station_name: - model_path = os.path.join(root_path, station["name"], model_type) - train_data_path = os.path.join(model_path, "train_data_dates.pkl") - ( - _, - _, - _, - _, - _, - _, - _, - train_dates_list, - _, - test_dates_list, - ) = get_file(train_data_path) - - return (train_dates_list, test_dates_list) - raise Exception("No station found to get date range") - - -def get_station_object(stations: List[Dict[str, Any]], infer_station_name: str): - station, column_name = None, None - for stations_dict in stations: - if stations_dict["name"] == infer_station_name: - station = stations_dict["name"] - column_name = stations_dict["column_name"] - return station, column_name - - raise Exception(f"No station found with name {infer_station_name}") - - -def dump_forecast_output( - train_df: pd.DataFrame, - test_df: pd.DataFrame, - model_path: str, - column_name: str, - train_dates_list: List[str], - test_dates_list: List[str], - inference_hours: int, -): - train_data = np.array(train_df[column_name].values) - test_data = np.array(test_df[column_name].values) - mix_train_yhat = transform_to_array_3D(train_data[:-inference_hours], inference_hours) - mix_train_y = transform_to_array_3D(train_data[inference_hours:], inference_hours) - mix_test_yhat = transform_to_array_3D(test_data[:-inference_hours], inference_hours) - mix_test_y = transform_to_array_3D(test_data[inference_hours:], inference_hours) - out_dir = os.path.join(model_path, "embeddings") - if not os.path.exists(out_dir): - os.makedirs(out_dir) - - out_path = os.path.join(out_dir, "post_processed_results.pkl") - - # Inverse transform outputs, save results - with open(out_path, "wb") as f: - train_labels = mix_train_y.squeeze() - test_labels = mix_test_y.squeeze() - train_labels = train_labels[:, 0] - test_labels = test_labels[:, 0] - - pickle.dump( - [ - mix_test_yhat, - mix_train_yhat, - mix_test_y, - mix_train_y, - train_labels, - test_labels, - train_dates_list, - test_dates_list, - ], - f, - ) - - -def embeddings_preprocess_forecast( - stations: List[Dict[str, Any]], - infer_station_name: str, - root_path: str, - input_data_path: str, - forecast_interval: int, - model_type: str, - column_name: str, -): - model_path = os.path.join(root_path, infer_station_name, model_type) - forecast_df = get_csv_data(input_data_path) - train_dates_list, test_dates_list = get_date_range( - stations, infer_station_name, root_path, model_type - ) - train_df = forecast_df[forecast_df.index.isin(train_dates_list[:, 0])] - test_df = forecast_df[forecast_df.index.isin(test_dates_list[:, 0])] - - train_dates_list = ( - train_df[forecast_interval:].index.strftime("%Y-%m-%d %H:%M:%S").tolist() # type: ignore - ) - test_dates_list = ( - test_df[forecast_interval:].index.strftime("%Y-%m-%d %H:%M:%S").tolist() # type: ignore - ) - - dump_forecast_output( - train_df, - test_df, - model_path, - column_name, - train_dates_list, - test_dates_list, - forecast_interval, - ) - - -def embeddings_preprocess_deepmc( - model_path: str, - inference_hours: int, -): - train_data_path = os.path.join(model_path, "train_data_dates.pkl") - ( - train_X, - train_y, - test_X, - test_y, - _, - output_scaler1, - _, - train_dates_list, - _, - test_dates_list, - ) = get_file(train_data_path) - - list_train_X = inference_deepmc(model_path, train_X, inference_hours) - list_test_X = inference_deepmc(model_path, test_X, inference_hours) - - # Train data deepmc inference Post-Processing - mix_train_yc = preprocess_post_deepmc_gt(list_train_X, train_y, inference_hours) - mix_train_yhat = inference_deepmc_post(model_path, list_train_X) - - # Test data deepmc inference Post-Processing - mix_yc = preprocess_post_deepmc_gt(list_test_X, test_y, inference_hours) - mix_yhat = inference_deepmc_post(model_path, list_test_X) - - mix_train_yhat, train_dates_list = preprocess_transform( - mix_train_yhat, inference_hours, train_dates_list - ) - mix_yhat, test_dates_list = preprocess_transform(mix_yhat, inference_hours, test_dates_list) - embeddings_input_path = os.path.join(model_path, "embeddings", "post_processed_results.pkl") - - # Inverse transform outputs, save results - write_embeddings_input( - embeddings_input_path, - output_scaler1, - mix_yhat, - mix_train_yhat, - mix_yc, - mix_train_yc, - train_y, - test_y, - train_dates_list, - test_dates_list, - ) - - -def preprocess_post_deepmc_gt( - post_data_x: List[NDArray[Any]], data_y: NDArray[Any], inference_hours: int -): - data_y = data_y[: data_y.shape[0] - inference_hours] - mix_data_gt = np.empty([data_y.shape[0], data_y.shape[1], len(post_data_x)]) - - idx = 0 - for _, _ in enumerate(post_data_x): - mix_data_gt[:, :, idx] = mix_data_gt[:, idx, :] - idx = idx + 1 - - return mix_data_gt - - -def initialize_embeddings_preprocessing( - infer_station_name: str, - stations: List[Dict[str, Any]], - root_path: str, - infer_forecast_data_path: str, - infer_interval: int, - model_type: str, -): - for station in stations: - model_path = os.path.join(root_path, station["name"], model_type) - if station["name"] == infer_station_name: - embeddings_preprocess_forecast( - stations, - infer_station_name, - root_path, - infer_forecast_data_path, - infer_interval, - model_type, - station["column_name"], - ) - else: - embeddings_preprocess_deepmc( - model_path, - inference_hours=24, - ) diff --git a/notebooks/deepmc_neighbors/notebook_lib/post_deepmc_inference.py b/notebooks/deepmc_neighbors/notebook_lib/post_deepmc_inference.py deleted file mode 100644 index eb6065ef..00000000 --- a/notebooks/deepmc_neighbors/notebook_lib/post_deepmc_inference.py +++ /dev/null @@ -1,268 +0,0 @@ -import os -from datetime import datetime -from typing import Any, Dict, List, Tuple - -import numpy as np -import pandas as pd -from notebook_lib.base_deepmc import inference_deepmc, inference_deepmc_post -from notebook_lib.data_utils import preprocess_transform -from numpy.typing import NDArray -from shapely import geometry -from sklearn.preprocessing import StandardScaler - -from vibe_notebook.deepmc import prediction, utils -from vibe_notebook.deepmc.forecast import Forecast -from vibe_notebook.deepmc.utils import get_csv_data, transform_to_array_3D - -HRRR_PARAMETERS = [ - {"weather_type": "temperature", "search_text": "TMP:2 m"}, - {"weather_type": "humidity", "search_text": "RH:2 m"}, - {"weather_type": "u-component", "search_text": "UGRD:10 m"}, - {"weather_type": "v-component", "search_text": "VGRD:10 m"}, -] - - -def get_date_range( - stations: List[Dict[str, Any]], - infer_station_name: str, - deepmc_inference_results: Dict[str, Any], -): - for station in stations: - if station["name"] != infer_station_name: - (_, dates_list, _, _) = deepmc_inference_results[station["name"]] - dates_list = np.squeeze(np.array(dates_list)[:, 0]) - dates_list = dates_list[:, 0] - return dates_list - - raise Exception("No station found to get date range") - - -def get_station_object(stations: List[Dict[str, Any]], infer_station_name: str): - station, column_name = None, None - for stations_dict in stations: - if stations_dict["name"] == infer_station_name: - station = stations_dict["name"] - column_name = stations_dict["column_name"] - return station, column_name - - if station is None: - raise Exception(f"No station found with name {infer_station_name}") - - -def embeddings_preprocess_forecast( - stations: List[Dict[str, Any]], - infer_station_name: str, - input_data_path: str, - forecast_interval: int, - deepmc_inference_results: Dict[str, Any], - column_name: str, -): - forecast_df = get_csv_data(input_data_path) - dates_list = get_date_range(stations, infer_station_name, deepmc_inference_results) - data_df = forecast_df[forecast_df.index.isin(dates_list)] - - dates_list = ( - data_df[forecast_interval:].index.strftime("%Y-%m-%d %H:%M:%S").tolist() # type: ignore - ) - - data_forecast = np.array(data_df[column_name].values) - data_forecast = transform_to_array_3D(data_forecast[:], forecast_interval) - - return data_forecast, dates_list - - -def embeddings_preprocess_deepmc( - model_path: str, - inference_hours: int, - deepmc_inference_results: Tuple[NDArray[Any], NDArray[Any], StandardScaler, StandardScaler], -): - (data_x, dates_list, _, output_scaler) = deepmc_inference_results - - deepmc_out = inference_deepmc(model_path, data_x, inference_hours) - - # Train Post-Processing Scaling Models - mix_yhat = inference_deepmc_post(model_path, deepmc_out) - mix_yhat, dates_list = preprocess_transform(mix_yhat, inference_hours, dates_list) - dates_list = np.squeeze(np.array(dates_list)[:, 0]) - dates_list = dates_list[:, 0] - dates_list = pd.to_datetime(dates_list).strftime("%Y-%m-%d %H:%M:%S") - mix_yhat = np.expand_dims(np.array(output_scaler.inverse_transform(mix_yhat[:, :])), axis=1) - return mix_yhat, dates_list - - -def inference_embeddings_preprocessing( - infer_station_name: str, - stations: List[Dict[str, Any]], - root_path: str, - infer_forecast_data_path: str, - infer_interval: int, - model_type: str, - deepmc_inference_results: Dict[str, Any], -): - process_out = {} - for station in stations: - model_path = os.path.join(root_path, station["name"], model_type) - if station["name"] == infer_station_name: - process_out[station["name"]] = embeddings_preprocess_forecast( - stations, - infer_station_name, - infer_forecast_data_path, - infer_interval, - deepmc_inference_results, - station["column_name"], - ) - else: - process_out[station["name"]] = embeddings_preprocess_deepmc( - model_path, - infer_interval, - deepmc_inference_results[station["name"]], - ) - return process_out - - -def download_forecast_data( - stations: List[Dict[str, Any]], - start_date: datetime, - end_date: datetime, -): - parameters = HRRR_PARAMETERS - hrrr_data_workflow = "data_ingestion/weather/herbie_forecast" - time_range = (start_date, end_date) - forecast_dataset = {} - - for station in stations: - # AGWeatherNet station - station_name = station["name"] - station_location = station["coordinates"] - station_geometry = geometry.Point(station_location) - - forecast_ = Forecast( - workflow_name=hrrr_data_workflow, - geometry=station_geometry, - time_range=time_range, - parameters=parameters, - ) - run_list = forecast_.submit_download_request() - - p_forecast_dataset = forecast_.get_downloaded_data(run_list=run_list, offset_hours=-8) - p_forecast_dataset = utils.convert_forecast_data(p_forecast_dataset) - forecast_dataset[station_name] = p_forecast_dataset - return forecast_dataset - - -def get_historical_data( - stations: List[Dict[str, Any]], - historical_data_path: str, - historical_dataset_features: List[str], - inference_station: str, -): - historical_datasets = {} - for station in stations: - if station["name"] != inference_station: - p = historical_data_path % station["name"] - historical_df = utils.get_csv_data(path=p, interpolate=False, fill_na=False) - historical_df = historical_df[historical_dataset_features] - - historical_datasets[station["name"]] = historical_df - - return historical_datasets - - -def concat_historical_forecast( - stations: List[Dict[str, Any]], - historical_data_path: str, - hrrr_datasets: Dict[str, pd.DataFrame], - start_date: datetime, - end_date: datetime, - inference_station: str, - historical_dataset_features: List[str] = ["humidity", "wind_speed", "temperature"], - forecast_dataset_features: List[str] = [ - "humidity_forecast", - "wind_speed_forecast", - "temperature_forecast", - ], - frequency_hour: int = 1, - number_of_hours: int = 24, - weather_inference_type: str = "temperature", -): - historical_datasets = get_historical_data( - stations, historical_data_path, historical_dataset_features, inference_station - ) - - dataset_variables = historical_dataset_features.copy() - dataset_variables.extend(forecast_dataset_features) - dataset_variables.sort() - - out_dataset = {} - for station, historical_df in historical_datasets.items(): - forecast_df = hrrr_datasets[station] - - input_df = utils.clean_relevant_data_using_hrrr( - actual_df=historical_df.copy(), - forecast_df=forecast_df.copy(), - out_variables=dataset_variables, - freq_hours=frequency_hour, - num_of_indices=number_of_hours, - start_date=start_date, - end_date=end_date, - ) - - input_df = input_df[dataset_variables] - input_df = input_df[input_df.columns] - out_feature_df = input_df[weather_inference_type] - input_df.drop(columns=[weather_inference_type], inplace=True) - input_df[weather_inference_type] = out_feature_df - out_dataset[station] = input_df - - return out_dataset - - -def run_deepmc_inference( - root_path: str, - model_type: str, - out_features: List[str], - stations: List[Dict[str, Any]], - historical_data_path: str, - hrrr_datasets: Dict[str, pd.DataFrame], - start_date: datetime, - end_date: datetime, - inference_station: str, - historical_dataset_features: List[str] = ["humidity", "wind_speed", "temperature"], - forecast_dataset_features: List[str] = [ - "humidity_forecast", - "wind_speed_forecast", - "temperature_forecast", - ], - frequency_hour: int = 1, - number_of_hours: int = 24, - weather_inference_type: str = "temperature", -): - historical_clean_dataset = concat_historical_forecast( - stations, - historical_data_path, - hrrr_datasets, - start_date, - end_date, - inference_station, - historical_dataset_features, - forecast_dataset_features, - frequency_hour, - number_of_hours, - weather_inference_type, - ) - - inference_output = {} - for station, clean_dataset in historical_clean_dataset.items(): - train_data_export_path = os.path.join(root_path, station, model_type, "train_data.pkl") - - weather_forecast = prediction.InferenceWeather( - root_path=root_path, - data_export_path=train_data_export_path, - station_name=station, - predicts=out_features, - relevant=True, - ) - - inference_output[station] = weather_forecast.deepmc_preprocess(clean_dataset, "temperature") - - return inference_output diff --git a/notebooks/deepmc_neighbors/notebook_lib/schema.py b/notebooks/deepmc_neighbors/notebook_lib/schema.py deleted file mode 100644 index a9868007..00000000 --- a/notebooks/deepmc_neighbors/notebook_lib/schema.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import List - -from pydantic import BaseModel - - -class BatchTGCNInputs(BaseModel): - lookback_horizon: int - lookahead_horizon: int - node_num: int - node_in_fea_dim: int - node_out_fea_dim: int - edge_in_fea_dim: int - edge_out_fea_dim: int - edge_num: int - use_edge_weights: bool - day_em_dim: int - hour_em_dim: int - period: int - batch_size: int - use_dropout: bool - hidden_dim: int - device_count: int - lookback_indices: List[int] diff --git a/notebooks/deepmc_neighbors/notebook_lib/train.py b/notebooks/deepmc_neighbors/notebook_lib/train.py deleted file mode 100644 index 84b84c9a..00000000 --- a/notebooks/deepmc_neighbors/notebook_lib/train.py +++ /dev/null @@ -1,516 +0,0 @@ -import os -import shutil -import warnings -from datetime import datetime -from typing import Any, Dict, List, Union - -import numpy as np -import onnxruntime -import pandas as pd -import pytorch_lightning as pl -import torch -from matplotlib import pyplot as plt -from notebook_lib.embeddings import create_embeddings, create_embeddings_inference -from notebook_lib.post_deepmc import initialize_embeddings_preprocessing -from notebook_lib.post_deepmc_inference import ( - inference_embeddings_preprocessing, - run_deepmc_inference, -) -from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint -from sklearn.preprocessing import StandardScaler -from torch.utils.data import DataLoader - -from vibe_notebook.deepmc.utils import calculate_KPI, get_csv_data - -from .base_dataset import BatchSampler, GNNDataset -from .base_modules import BatchTGCNInputs, BatchTGCNTrain -from .data_utils import ( - build_scaler, - build_scaler_label, - export_to_onnx, - get_batch, - get_batch_sample, - get_file, - get_split_data, - problem_params, - smooth, - train_test_dataset, - write_to_file, -) - - -class MC_Neighbors: - def __init__( - self, - root_dir: str, - hidden_dim: int = 528, - lookahead_horizon: int = 1, - lookback_horizon: int = 1, - learning_rate: float = 0.001, - use_dropout: bool = False, - use_edge_weights: bool = False, - device_type: str = "cpu", # cuda, cpu - labels_column: str = "labels", - weather_type: str = "temperature", - model_type: str = "relevant", - ): - """ - Initialize the MC_Neighbors. - - :param root_dir: Path to trained model and preprocessed files. - :param hidden_dim: Input dimension transforms it to linear layer. - :param lookahead_horizon: Number of hours to lookahead. - :param lookback_horizon: Number of hours to lookback. - :param learning_rate: The learning rate of the model. - :param use_dropout: True or False to use dropout layer for model training. - :param use_edge_weights: True or False. If True consider spatial distance - between stations for model training. - :param device_type: The device type of the model. - :param labels_column: The labels column of the dataset. - :param weather_type: Purpose of trained model. It can be temperature or wind_speed etc.,. - :param model_type: relevant or not-relevant. - """ - self.weather_type = weather_type - self.root_dir = root_dir - self.lookahead_horizon = lookahead_horizon - self.lookback_horizon = lookback_horizon - self.hidden_dim = hidden_dim - self.learning_rate = learning_rate - self.use_dropout = use_dropout - self.use_edge_weights = use_edge_weights - self.labels_column = labels_column - self.device = torch.device( - device_type if device_type == "cuda" and torch.cuda.is_available() else "cpu" - ) - self.model_type = model_type - - def gnn_output_dir(self, infer_station: str): - if self.use_edge_weights: - edge_weights = "edge_weights" - else: - edge_weights = "no_edge_weights" - return os.path.join( - self.root_dir, - infer_station, - self.model_type, - "gnn_models", - edge_weights, - ) - - def gnn_preprocess_file(self, infer_station: str): - output_dir = self.gnn_output_dir(infer_station) - return os.path.join(output_dir, "pre_process_data_export.json") - - def run_train( - self, - train_embeddings: pd.DataFrame, - test_embeddings: pd.DataFrame, - neighbor_stations: List[Dict[str, Any]], - infer_station: str, - epochs: int, - batch_size: int, - forecast_hours: int, - ) -> None: - self.output_dir = self.gnn_output_dir(infer_station) - stations = self.get_neighbor_stations(neighbor_stations) - scaler_data = build_scaler(train_embeddings.copy(), forecast_hours) - scaler_label, labels_column_index = build_scaler_label( - train_embeddings.copy(), self.labels_column - ) - data_export_path = self.gnn_preprocess_file(infer_station) - if not os.path.exists(data_export_path): - os.makedirs(os.path.dirname(data_export_path), exist_ok=True) - write_to_file(data_export_path, data=[scaler_data, scaler_label, labels_column_index]) - - self.initialize_train( - train_embeddings, - test_embeddings, - stations, - infer_station, - epochs, - batch_size, - forecast_hours, - scaler_data, - scaler_label, - labels_column_index, - ) - - def initialize_train( - self, - train_embeddings: pd.DataFrame, - test_embeddings: pd.DataFrame, - neighbors_station: Dict[str, Any], - infer_station: str, - epochs: int, - batch_size: int, - forecast_hours: int, - scaler_data: StandardScaler, - scaler_label: StandardScaler, - labels_column_index: int, - ): - for step in range(forecast_hours): - train_dataset, test_dataset = train_test_dataset( - train_data=train_embeddings, - test_data=test_embeddings, - step=step, - neighbors_station=neighbors_station, - scaler_data=scaler_data, - scaler_label=scaler_label, - infer_station=infer_station, - labels_column_index=labels_column_index, - ) - - train_sampler, test_sampler = get_batch_sample( - train_dataset=train_dataset, - test_dataset=test_dataset, - batch_size=batch_size, - lookahead_horizon=self.lookahead_horizon, - lookback_horizon=self.lookback_horizon, - device=self.device, - use_edge_weights=self.use_edge_weights, - ) - - inputs = BatchTGCNInputs( - **problem_params( - train_dataset, - batch_size, - self.lookback_horizon, - self.lookahead_horizon, - self.use_edge_weights, - self.use_dropout, - self.hidden_dim, - forecast_hours, - ) - ) - model = BatchTGCNTrain(inputs, self.learning_rate) - model.to(self.device) - self.train_model(model, epochs, train_sampler, test_sampler, step) - - def train_model( - self, - model: BatchTGCNTrain, - epochs: int, - train_sampler: BatchSampler, - test_sampler: BatchSampler, - forecast_step: int, - ): - if not os.path.exists(self.output_dir): - os.makedirs(self.output_dir) - - model_path = "{}/model_{}".format(self.output_dir, forecast_step) - - if os.path.exists(model_path): - shutil.rmtree(model_path, ignore_errors=True) - - os.makedirs(model_path, exist_ok=True) - - # batch_size is set to None to avoid batch size in dataloader - # batch_size is set when creating the sampler - train_loader = DataLoader(train_sampler, batch_size=None, collate_fn=lambda x: x) - val_loader = DataLoader(test_sampler, batch_size=None, collate_fn=lambda x: x) - - t_obj = pl.Trainer( - logger=True, - max_epochs=epochs, - callbacks=[ - LearningRateMonitor(), - ModelCheckpoint( - monitor="val_loss/total", - save_last=True, - dirpath=model_path, - ), - ], - ) - t_obj.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader) - export_to_onnx( - model_path, - model, - train_loader, - self.use_edge_weights, - train_sampler.dataset.edge_num, - train_sampler.dataset.stations_count, - ) - - def run_inference( - self, - embeddings: pd.DataFrame, - neighbors_station: List[Dict[str, Any]], - infer_station: str, - batch_size: int, - forecast_hours: int, - ): - self.output_dir = self.gnn_output_dir(infer_station) - stations = self.get_neighbor_stations(neighbors_station) - scaler_data, scaler_label, labels_column_index = get_file( - self.gnn_preprocess_file(infer_station) - ) - - pred_data = [] - for step in range(forecast_hours): - dataset, sampler = self.get_infer_inputs( - embeddings, - stations, - infer_station, - batch_size, - forecast_hours, - step, - None, - scaler_data, - scaler_label, - ) - loader = DataLoader(sampler, batch_size=None, collate_fn=lambda x: x) - for index, data in enumerate(loader): - onnx_file_path = "{}/model_{}/model_output.onnx".format(self.output_dir, step) - if data[0].shape[0] != batch_size: - warnings.warn( - f"""Data at step {step} batch index {index} is less than batch size. - It will be skipped from running inference.""" - ) - continue - if step == 0: - results = np.zeros((batch_size, forecast_hours)) - results[:, step] = self.inference(onnx_file_path, data)[ - :, dataset.infer_station_index - ].squeeze() - pred_data.append(results) - else: - pred_data[index][:, step] = self.inference(onnx_file_path, data)[ - :, dataset.infer_station_index - ].squeeze() - pred_data = np.concatenate(pred_data, axis=0) - pred_data = scaler_data.inverse_transform(pred_data) - timestamps = dataset.timestamps[: pred_data.shape[0]] - pred_data = get_split_data(pred_data, timestamps, forecast_hours) # type: ignore - pred_data_df = pd.DataFrame( - zip(pred_data, timestamps), columns=[self.weather_type, "timestamp"] - ) - return pred_data_df - - def get_historical_data(self, data_path: str): - historical_data_df = get_csv_data(data_path) - historical_data_df.reset_index(inplace=True) - historical_data_df.rename(columns={"date": "timestamp"}, inplace=True) - return historical_data_df - - def get_hrrr_data( - self, - data_path: str, - ): - df_node = pd.read_csv(data_path, parse_dates=["date"]) - df_node.rename(columns={"date": "timestamp"}, inplace=True) - return df_node - - def get_infer_inputs( - self, - embeddings: pd.DataFrame, - neighbors_station: Dict[str, Any], - infer_station: str, - batch_size: int, - forecast_hours: int, - step: int, - labels_column_index: Union[int, None], - scaler_data: StandardScaler, - scaler_label: StandardScaler, - ): - dataset = GNNDataset( - embeddings, - forecast_step=step, - scaler_input=scaler_data, - scaler_label=scaler_label, - neighbor_station=neighbors_station, - forecast_hours=forecast_hours, - infer_station=infer_station, - label_column_index=labels_column_index, - ) - - sampler = BatchSampler( - dataset=dataset, - batch_size=batch_size, - lookahead_horizon=self.lookahead_horizon, - lookback_horizon=self.lookback_horizon, - device=self.device, - random=False, - use_edge_weights=self.use_edge_weights, - ) - - return dataset, sampler - - def inference(self, onnx_file_path: str, data: torch.Tensor): - session = onnxruntime.InferenceSession(onnx_file_path, None) - node_data, edge_index, edge_data, _ = get_batch(data, self.use_edge_weights) - - inputs = { - "node_data": node_data.numpy(), - "edge_index": edge_index.numpy(), - "edge_data": edge_data.numpy(), - } - - inputs = {out.name: inputs[out.name] for i, out in enumerate(session.get_inputs())} - results = session.run(None, input_feed=inputs)[0] - return results - - def get_embeddings( - self, - inference_station: str, - neighbor_stations: List[Dict[str, Any]], - inference_hours: int, - infer_forecast_data_path: str, - ): - initialize_embeddings_preprocessing( - infer_station_name=inference_station, - stations=neighbor_stations, - root_path=self.root_dir, - infer_forecast_data_path=infer_forecast_data_path, - infer_interval=inference_hours, - model_type=self.model_type, - ) - - df_train_embeddings, df_test_embeddings = create_embeddings( - stations=neighbor_stations, - inference_hours=inference_hours, - root_path=self.root_dir, - model_type=self.model_type, - ) - - return df_train_embeddings, df_test_embeddings - - def get_neighbor_stations( - self, - neighbor_stations: List[Dict[str, Any]], - ): - stations_connection = {} - stations = [] - station_long_lat = {} - for station in neighbor_stations: - stations.append(station["name"]) - station_long_lat[station["name"]] = station["coordinates"] - - stations_connection["stations"] = stations - stations_connection["long_lat"] = station_long_lat - - return stations_connection - - def filter_data( - self, - df_inference: pd.DataFrame, - df_historical: pd.DataFrame, - df_forecast: pd.DataFrame, - ): - start_date = df_inference["timestamp"].min() - end_date = df_inference["timestamp"].max() - - df_historical = df_historical[df_historical.timestamp.between(start_date, end_date)] - df_historical = df_historical[["timestamp", self.weather_type]] - - df_inference = df_inference[df_inference.timestamp.between(start_date, end_date)] - df_inference = df_inference[["timestamp", self.weather_type]] - - df_forecast = df_forecast[df_forecast.timestamp.between(start_date, end_date)] - df_forecast.rename(columns={"temperature_forecast": self.weather_type}, inplace=True) - df_forecast = df_forecast[["timestamp", self.weather_type]] - - return df_inference, df_historical, df_forecast - - def view_plot( - self, - df_inference: pd.DataFrame, - historical_data_path: str, - hrrr_data_path: str, - ): - df_historical = self.get_historical_data(historical_data_path) - df_forecast = self.get_hrrr_data(hrrr_data_path) - - df_inference, df_historical, df_forecast = self.filter_data( - df_inference, df_historical, df_forecast - ) - - timestamps = df_inference["timestamp"] - y_hat = list(df_inference[self.weather_type].values) - y = list(df_historical[self.weather_type].values) - hrrr_data_y = list(df_forecast[self.weather_type].values) - - plt.figure(figsize=(18, 6)) - plt.plot(timestamps, smooth(y_hat, 2), label="Predict") - plt.plot(timestamps, y, label="Ground Truth") - plt.plot(timestamps, hrrr_data_y, label="HRRR", linestyle="--") - plt.title("Comparison Ground Truth Vs Inference Results Vs HRRR") - plt.legend() - - def view_performance( - self, - df_inference: pd.DataFrame, - historical_data_path: str, - hrrr_data_path: str, - ): - df_historical = self.get_historical_data(historical_data_path) - df_forecast = self.get_hrrr_data(hrrr_data_path) - - df_inference, df_historical, df_forecast = self.filter_data( - df_inference, df_historical, df_forecast - ) - - y_hat = list(df_inference[self.weather_type].values) - y = np.array(df_historical[self.weather_type].values) - hrrr_data_y = list(df_forecast[self.weather_type].values) - - print("GNN ", self.weather_type) - calculate_KPI(smooth(y_hat, 1), y) - print("") - print("Hrrr", self.weather_type) - calculate_KPI(smooth(hrrr_data_y, 1), y) - - def get_embeddings_inference( - self, - inference_station: str, - neighbor_stations: List[Dict[str, Any]], - inference_hours: int, - infer_forecast_data_path: str, - out_features: List[str], - historical_data_path: str, - hrrr_datasets: Dict[str, pd.DataFrame], - start_date: datetime, - end_date: datetime, - historical_dataset_featues: List[str] = ["humidity", "wind_speed", "temperature"], - forecast_dataset_features: List[str] = [ - "humidity_forecast", - "wind_speed_forecast", - "temperature_forecast", - ], - frequency_hour: int = 1, - number_of_hours: int = 24, - weather_inference_type: str = "temperature", - ): - deepmc_results = run_deepmc_inference( - self.root_dir, - self.model_type, - out_features, - neighbor_stations, - historical_data_path, - hrrr_datasets, - start_date, - end_date, - inference_station, - historical_dataset_featues, - forecast_dataset_features, - frequency_hour, - number_of_hours, - weather_inference_type, - ) - - deepmc_post_results = inference_embeddings_preprocessing( - infer_station_name=inference_station, - stations=neighbor_stations, - root_path=self.root_dir, - infer_forecast_data_path=infer_forecast_data_path, - infer_interval=inference_hours, - model_type=self.model_type, - deepmc_inference_results=deepmc_results, - ) - - df_embeddings = create_embeddings_inference( - stations=neighbor_stations, - inference_hours=inference_hours, - deepmc_post_results=deepmc_post_results, - ) - - return df_embeddings diff --git a/notebooks/deepmc_neighbors/sample_data.csv b/notebooks/deepmc_neighbors/sample_data.csv deleted file mode 100644 index b4646fd3..00000000 --- a/notebooks/deepmc_neighbors/sample_data.csv +++ /dev/null @@ -1,92 +0,0 @@ -date,temperature,humidity,wind_speed -2021-07-25 00:15:00,79.4,33.3,8.5 -2021-07-25 00:30:00,78.4,35.3,9.4 -2021-07-25 00:45:00,78.1,34.9,9.2 -2021-07-25 01:00:00,78.0,35.0,8.9 -2021-07-25 01:15:00,77.7,35.6,9.0 -2021-07-25 01:30:00,77.8,35.8,8.9 -2021-07-25 01:45:00,77.6,35.9,8.8 -2021-07-25 02:00:00,76.8,37.5,9.1 -2021-07-25 02:15:00,75.4,39.8,9.4 -2021-07-25 02:30:00,74.5,41.6,8.0 -2021-07-25 02:45:00,74.4,41.9,7.3 -2021-07-25 03:00:00,74.6,41.5,6.0 -2021-07-25 03:15:00,73.8,43.1,5.0 -2021-07-25 03:30:00,73.4,43.7,5.9 -2021-07-25 03:45:00,73.4,42.8,6.3 -2021-07-25 04:00:00,73.5,42.2,4.9 -2021-07-25 04:15:00,72.4,44.5,5.2 -2021-07-25 04:30:00,73.9,40.8,7.1 -2021-07-25 04:45:00,74.1,40.7,7.2 -2021-07-25 05:00:00,73.9,41.6,7.2 -2021-07-25 05:15:00,73.6,42.5,7.2 -2021-07-25 05:30:00,73.7,42.6,6.9 -2021-07-25 05:45:00,74.0,42.3,6.9 -2021-07-25 06:00:00,74.2,42.5,6.3 -2021-07-25 06:15:00,74.4,42.5,7.1 -2021-07-25 06:30:00,75.1,41.1,6.9 -2021-07-25 06:45:00,76.1,39.9,5.7 -2021-07-25 07:00:00,76.9,40.2,4.6 -2021-07-25 07:15:00,76.7,44.9,3.9 -2021-07-25 07:30:00,76.8,46.7,3.1 -2021-07-25 07:45:00,77.0,45.6,3.5 -2021-07-25 08:00:00,77.7,44.3,3.4 -2021-07-25 08:15:00,78.5,44.1,3.9 -2021-07-25 08:30:00,79.0,44.0,3.9 -2021-07-25 08:45:00,79.9,42.2,3.4 -2021-07-25 09:00:00,81.0,43.7,3.8 -2021-07-25 09:15:00,81.5,44.2,5.0 -2021-07-25 09:30:00,81.8,42.4,6.3 -2021-07-25 09:45:00,82.5,42.7,6.9 -2021-07-25 10:00:00,82.8,40.9,7.3 -2021-07-25 10:15:00,83.2,38.5,7.0 -2021-07-25 10:30:00,83.7,36.1,5.8 -2021-07-25 10:45:00,84.4,35.2,5.2 -2021-07-25 11:00:00,86.0,31.6,4.8 -2021-07-25 11:15:00,86.5,29.0,5.2 -2021-07-25 11:30:00,87.6,26.1,6.2 -2021-07-25 11:45:00,87.9,26.2,6.5 -2021-07-25 12:00:00,88.0,25.9,6.0 -2021-07-25 12:15:00,88.5,27.0,5.7 -2021-07-25 12:30:00,89.2,25.6,5.0 -2021-07-25 12:45:00,89.7,24.0,4.6 -2021-07-25 13:00:00,90.4,23.0,4.7 -2021-07-25 13:15:00,91.6,21.7,5.2 -2021-07-25 13:30:00,91.5,20.5,5.8 -2021-07-25 13:45:00,91.7,21.1,5.5 -2021-07-25 14:00:00,93.4,20.4,4.9 -2021-07-25 14:15:00,94.3,18.2,4.4 -2021-07-25 14:30:00,93.4,18.4,4.7 -2021-07-25 14:45:00,94.4,17.4,4.0 -2021-07-25 15:00:00,94.6,17.8,4.3 -2021-07-25 15:15:00,93.9,19.1,5.4 -2021-07-25 15:30:00,93.4,18.9,6.5 -2021-07-25 15:45:00,93.6,18.1,5.2 -2021-07-25 16:00:00,93.8,18.3,4.7 -2021-07-25 16:15:00,93.9,17.9,4.5 -2021-07-25 16:30:00,94.0,16.4,4.7 -2021-07-25 16:45:00,94.0,16.7,4.3 -2021-07-25 17:00:00,94.0,16.9,4.2 -2021-07-25 17:15:00,94.2,16.7,3.2 -2021-07-25 17:30:00,94.3,16.8,3.7 -2021-07-25 17:45:00,93.7,18.9,3.8 -2021-07-25 18:00:00,93.7,18.3,3.2 -2021-07-25 18:15:00,93.3,20.6,3.0 -2021-07-25 18:30:00,91.7,26.2,3.5 -2021-07-25 18:45:00,90.5,26.8,2.6 -2021-07-25 19:00:00,88.4,28.1,3.0 -2021-07-25 19:15:00,85.3,30.7,4.3 -2021-07-25 19:30:00,83.4,31.0,4.7 -2021-07-25 19:45:00,82.0,33.8,4.6 -2021-07-25 20:00:00,80.7,36.4,3.6 -2021-07-25 20:15:00,78.6,39.3,4.4 -2021-07-25 20:30:00,80.6,31.3,4.7 -2021-07-25 20:45:00,79.1,37.6,4.8 -2021-07-25 21:00:00,80.4,30.2,5.7 -2021-07-25 21:15:00,82.8,24.7,6.5 -2021-07-25 21:30:00,82.2,24.7,6.8 -2021-07-25 21:45:00,81.6,25.2,6.7 -2021-07-25 22:00:00,80.8,26.2,6.9 -2021-07-25 22:15:00,80.2,27.3,6.9 -2021-07-25 22:30:00,79.8,28.0,6.9 -2021-07-25 22:45:00,79.3,28.5,6.8 \ No newline at end of file diff --git a/notebooks/forest/download_alos_forest_map.ipynb b/notebooks/forest/download_alos_forest_map.ipynb index f3444d0a..687cd8c7 100644 --- a/notebooks/forest/download_alos_forest_map.ipynb +++ b/notebooks/forest/download_alos_forest_map.ipynb @@ -378,6 +378,7 @@ " \"Download ALOS Forest Map\",\n", " geometry=geom,\n", " time_range=time_range,\n", + " parameters={\"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\"},\n", ")\n", "run.monitor()" ] diff --git a/notebooks/forest/forest_change_detection.ipynb b/notebooks/forest/forest_change_detection.ipynb index 1876d2af..b21c78a7 100644 --- a/notebooks/forest/forest_change_detection.ipynb +++ b/notebooks/forest/forest_change_detection.ipynb @@ -480,6 +480,7 @@ " geometry=geom,\n", " time_range=time_range,\n", " parameters={\n", + " \"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\",\n", " \"from_values\": [4, 3, 0, 2, 1],\n", " \"to_values\": [0, 0, 0, 1, 2],\n", " },\n", diff --git a/notebooks/sentinel/sentinel_spaceeye.ipynb b/notebooks/sentinel/sentinel_spaceeye.ipynb index e9c1e2a6..1d364ac4 100755 --- a/notebooks/sentinel/sentinel_spaceeye.ipynb +++ b/notebooks/sentinel/sentinel_spaceeye.ipynb @@ -137,6 +137,7 @@ " f\"Amazon {time_range[0].year}\",\n", " geometry=geom,\n", " time_range=time_range,\n", + " parameters={\"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\"},\n", ")" ] }, @@ -232,6 +233,7 @@ " f\"Amazon {time_range[0].year}\",\n", " geometry=geom,\n", " time_range=time_range,\n", + " parameters={\"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\"},\n", ")\n", "run2.monitor()" ] @@ -382,6 +384,7 @@ " f\"Amazon {tr[0].year}\",\n", " geometry=geom,\n", " time_range=tr,\n", + " parameters={\"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\"},\n", " )\n", " for tr in time_ranges\n", "]\n", diff --git a/op_resources/average_model/pixel_average_model.onnx b/op_resources/average_model/pixel_average_model.onnx deleted file mode 100644 index 5b009f3b..00000000 --- a/op_resources/average_model/pixel_average_model.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d789bd1b4f6eb3ace534e9ffd74ea6aaf95f141c1b614211d5f6cfd0a329151d -size 186 diff --git a/op_resources/cdl_metadata/CDL_codes_names_colors.xls b/op_resources/cdl_metadata/CDL_codes_names_colors.xls deleted file mode 100644 index fd4a92ba..00000000 --- a/op_resources/cdl_metadata/CDL_codes_names_colors.xls +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd7f4adef7278f19db714da8cce766779d026cc65b858614ed4bc482de49ffe4 -size 70144 diff --git a/op_resources/cdl_metadata/us_continental.wkt b/op_resources/cdl_metadata/us_continental.wkt deleted file mode 100644 index e8133628..00000000 --- a/op_resources/cdl_metadata/us_continental.wkt +++ /dev/null @@ -1 +0,0 @@ -POLYGON((-94.81758 49.38905, -94.64 48.84, -94.32914 48.67074, -93.63087 48.60926, -92.61 48.45, -91.64 48.14, -90.83 48.27, -89.6 48.01, -89.272917 48.019808, -88.378114 48.302918, -87.439793 47.94, -86.461991 47.553338, -85.652363 47.220219, -84.87608 46.900083, -84.779238 46.637102, -84.543749 46.538684, -84.6049 46.4396, -84.3367 46.40877, -84.14212 46.512226, -84.091851 46.275419, -83.890765 46.116927, -83.616131 46.116927, -83.469551 45.994686, -83.592851 45.816894, -82.550925 45.347517, -82.337763 44.44, -82.137642 43.571088, -82.43 42.98, -82.9 42.43, -83.12 42.08, -83.142 41.975681, -83.02981 41.832796, -82.690089 41.675105, -82.439278 41.675105, -81.277747 42.209026, -80.247448 42.3662, -78.939362 42.863611, -78.92 42.965, -79.01 43.27, -79.171674 43.466339, -78.72028 43.625089, -77.737885 43.629056, -76.820034 43.628784, -76.5 44.018459, -76.375 44.09631, -75.31821 44.81645, -74.867 45.00048, -73.34783 45.00738, -71.50506 45.0082, -71.405 45.255, -71.08482 45.30524, -70.66 45.46, -70.305 45.915, -69.99997 46.69307, -69.237216 47.447781, -68.905 47.185, -68.23444 47.35486, -67.79046 47.06636, -67.79134 45.70281, -67.13741 45.13753, -66.96466 44.8097, -68.03252 44.3252, -69.06 43.98, -70.11617 43.68405, -70.645476 43.090238, -70.81489 42.8653, -70.825 42.335, -70.495 41.805, -70.08 41.78, -70.185 42.145, -69.88497 41.92283, -69.96503 41.63717, -70.64 41.475, -71.12039 41.49445, -71.86 41.32, -72.295 41.27, -72.87643 41.22065, -73.71 40.931102, -72.24126 41.11948, -71.945 40.93, -73.345 40.63, -73.982 40.628, -73.952325 40.75075, -74.25671 40.47351, -73.96244 40.42763, -74.17838 39.70926, -74.90604 38.93954, -74.98041 39.1964, -75.20002 39.24845, -75.52805 39.4985, -75.32 38.96, -75.071835 38.782032, -75.05673 38.40412, -75.37747 38.01551, -75.94023 37.21689, -76.03127 37.2566, -75.72205 37.93705, -76.23287 38.319215, -76.35 39.15, -76.542725 38.717615, -76.32933 38.08326, -76.989998 38.239992, -76.30162 37.917945, -76.25874 36.9664, -75.9718 36.89726, -75.86804 36.55125, -75.72749 35.55074, -76.36318 34.80854, -77.397635 34.51201, -78.05496 33.92547, -78.55435 33.86133, -79.06067 33.49395, -79.20357 33.15839, -80.301325 32.509355, -80.86498 32.0333, -81.33629 31.44049, -81.49042 30.72999, -81.31371 30.03552, -80.98 29.18, -80.535585 28.47213, -80.53 28.04, -80.056539 26.88, -80.088015 26.205765, -80.13156 25.816775, -80.38103 25.20616, -80.68 25.08, -81.17213 25.20126, -81.33 25.64, -81.71 25.87, -82.24 26.73, -82.70515 27.49504, -82.85526 27.88624, -82.65 28.55, -82.93 29.1, -83.70959 29.93656, -84.1 30.09, -85.10882 29.63615, -85.28784 29.68612, -85.7731 30.15261, -86.4 30.4, -87.53036 30.27433, -88.41782 30.3849, -89.18049 30.31598, -89.593831 30.159994, -89.413735 29.89419, -89.43 29.48864, -89.21767 29.29108, -89.40823 29.15961, -89.77928 29.30714, -90.15463 29.11743, -90.880225 29.148535, -91.626785 29.677, -92.49906 29.5523, -93.22637 29.78375, -93.84842 29.71363, -94.69 29.48, -95.60026 28.73863, -96.59404 28.30748, -97.14 27.83, -97.37 27.38, -97.38 26.69, -97.33 26.21, -97.14 25.87, -97.53 25.84, -98.24 26.06, -99.02 26.37, -99.3 26.84, -99.52 27.54, -100.11 28.11, -100.45584 28.69612, -100.9576 29.38071, -101.6624 29.7793, -102.48 29.76, -103.11 28.97, -103.94 29.27, -104.45697 29.57196, -104.70575 30.12173, -105.03737 30.64402, -105.63159 31.08383, -106.1429 31.39995, -106.50759 31.75452, -108.24 31.754854, -108.24194 31.34222, -109.035 31.34194, -111.02361 31.33472, -113.30498 32.03914, -114.815 32.52528, -114.72139 32.72083, -115.99135 32.61239, -117.12776 32.53534, -117.295938 33.046225, -117.944 33.621236, -118.410602 33.740909, -118.519895 34.027782, -119.081 34.078, -119.438841 34.348477, -120.36778 34.44711, -120.62286 34.60855, -120.74433 35.15686, -121.71457 36.16153, -122.54747 37.55176, -122.51201 37.78339, -122.95319 38.11371, -123.7272 38.95166, -123.86517 39.76699, -124.39807 40.3132, -124.17886 41.14202, -124.2137 41.99964, -124.53284 42.76599, -124.14214 43.70838, -124.020535 44.615895, -123.89893 45.52341, -124.079635 46.86475, -124.39567 47.72017, -124.68721 48.184433, -124.566101 48.379715, -123.12 48.04, -122.58736 47.096, -122.34 47.36, -122.5 48.18, -122.84 49, -120 49, -117.03121 49, -116.04818 49, -113 49, -110.05 49, -107.05 49, -104.04826 48.99986, -100.65 49, -97.22872 49.0007, -95.15907 49, -95.15609 49.38425, -94.81758 49.38905)) diff --git a/op_resources/cloud_models/NOTICE.md b/op_resources/cloud_models/NOTICE.md deleted file mode 100644 index f4d8bab6..00000000 --- a/op_resources/cloud_models/NOTICE.md +++ /dev/null @@ -1,8 +0,0 @@ -# NOTICE - -The models in this directory were converted to the ONNX format from the models -trained with data from the, and made available by, -[azavea cloud model](https://github.com/azavea/cloud-model). - -The dataset is licensed under the terms of the -[Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by/4.0/). \ No newline at end of file diff --git a/op_resources/cloud_models/cloud_model1_cpu.onnx b/op_resources/cloud_models/cloud_model1_cpu.onnx deleted file mode 100644 index bd09b46d..00000000 --- a/op_resources/cloud_models/cloud_model1_cpu.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:45786f149f2cd0820846ad22336126a8a4d71d9e7d9cda02e0c3d58c3c5a1cd1 -size 56973507 diff --git a/op_resources/cloud_models/cloud_model2_cpu.onnx b/op_resources/cloud_models/cloud_model2_cpu.onnx deleted file mode 100644 index f85f74ea..00000000 --- a/op_resources/cloud_models/cloud_model2_cpu.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:45de808bfa418d9fe31f7f4bdd656acdd20101fdc9219b901415e0e007e2db06 -size 56973507 diff --git a/op_resources/cloud_models/cloud_model3_cpu.onnx b/op_resources/cloud_models/cloud_model3_cpu.onnx deleted file mode 100644 index cf9b2069..00000000 --- a/op_resources/cloud_models/cloud_model3_cpu.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9ed62734bde0250af30e0085a37c1b81615c3ab286469d5a4b2709963153c329 -size 26225 diff --git a/op_resources/cloud_models/cloud_model4_cpu.onnx b/op_resources/cloud_models/cloud_model4_cpu.onnx deleted file mode 100644 index 9b3eed2c..00000000 --- a/op_resources/cloud_models/cloud_model4_cpu.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b5638e37571e320e3625c163df2e250330d2f3d79e03216fc92f7899bbc909f -size 29269 diff --git a/op_resources/cloud_models/cloud_model5_cpu.onnx b/op_resources/cloud_models/cloud_model5_cpu.onnx deleted file mode 100644 index 4526178a..00000000 --- a/op_resources/cloud_models/cloud_model5_cpu.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ad93ee817eb724405760130ffe368c6d76932a0d6c6a6407de03f1abd7995229 -size 29269 diff --git a/op_resources/conservation_practices_models/terraces_grassed_waterways.onnx b/op_resources/conservation_practices_models/terraces_grassed_waterways.onnx deleted file mode 100644 index 49868bcd..00000000 --- a/op_resources/conservation_practices_models/terraces_grassed_waterways.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d8b275e771e6478a6bbc4a155adea7c55c9d1de7b7fb03fde4808efcece0f743 -size 97707605 diff --git a/op_resources/driveways_models/driveway.onnx b/op_resources/driveways_models/driveway.onnx deleted file mode 100644 index 29c0e554..00000000 --- a/op_resources/driveways_models/driveway.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c07b7695ad6cff83e11a59570c670640f2b9914ce174906ea926f33ccde2726b -size 106619744 diff --git a/op_resources/glad_tile_geometry/10d_tiles.geojson b/op_resources/glad_tile_geometry/10d_tiles.geojson deleted file mode 100644 index 28e3f75b..00000000 --- a/op_resources/glad_tile_geometry/10d_tiles.geojson +++ /dev/null @@ -1,510 +0,0 @@ -{ -"type": "FeatureCollection", -"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, -"features": [ -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": -50.0, "LR_X": -170.0, "LR_Y": -60.0, "NAME": "50S_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -60.0 ], [ -180.0, -50.0 ], [ -170.0, -50.0 ], [ -170.0, -60.0 ], [ -180.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": -40.0, "LR_X": -170.0, "LR_Y": -50.0, "NAME": "40S_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -50.0 ], [ -180.0, -40.0 ], [ -170.0, -40.0 ], [ -170.0, -50.0 ], [ -180.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": -30.0, "LR_X": -170.0, "LR_Y": -40.0, "NAME": "30S_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -40.0 ], [ -180.0, -30.0 ], [ -170.0, -30.0 ], [ -170.0, -40.0 ], [ -180.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": -20.0, "LR_X": -170.0, "LR_Y": -30.0, "NAME": "20S_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -30.0 ], [ -180.0, -20.0 ], [ -170.0, -20.0 ], [ -170.0, -30.0 ], [ -180.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": -10.0, "LR_X": -170.0, "LR_Y": -20.0, "NAME": "10S_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -20.0 ], [ -180.0, -10.0 ], [ -170.0, -10.0 ], [ -170.0, -20.0 ], [ -180.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 0.0, "LR_X": -170.0, "LR_Y": -10.0, "NAME": "00N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -10.0 ], [ -180.0, 0.0 ], [ -170.0, 0.0 ], [ -170.0, -10.0 ], [ -180.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 10.0, "LR_X": -170.0, "LR_Y": 0.0, "NAME": "10N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 0.0 ], [ -180.0, 10.0 ], [ -170.0, 10.0 ], [ -170.0, 0.0 ], [ -180.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 20.0, "LR_X": -170.0, "LR_Y": 10.0, "NAME": "20N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 10.0 ], [ -180.0, 20.0 ], [ -170.0, 20.0 ], [ -170.0, 10.0 ], [ -180.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 30.0, "LR_X": -170.0, "LR_Y": 20.0, "NAME": "30N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 20.0 ], [ -180.0, 30.0 ], [ -170.0, 30.0 ], [ -170.0, 20.0 ], [ -180.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 40.0, "LR_X": -170.0, "LR_Y": 30.0, "NAME": "40N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 30.0 ], [ -180.0, 40.0 ], [ -170.0, 40.0 ], [ -170.0, 30.0 ], [ -180.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 50.0, "LR_X": -170.0, "LR_Y": 40.0, "NAME": "50N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 40.0 ], [ -180.0, 50.0 ], [ -170.0, 50.0 ], [ -170.0, 40.0 ], [ -180.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 60.0, "LR_X": -170.0, "LR_Y": 50.0, "NAME": "60N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 50.0 ], [ -180.0, 60.0 ], [ -170.0, 60.0 ], [ -170.0, 50.0 ], [ -180.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 70.0, "LR_X": -170.0, "LR_Y": 60.0, "NAME": "70N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 60.0 ], [ -180.0, 70.0 ], [ -170.0, 70.0 ], [ -170.0, 60.0 ], [ -180.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 80.0, "LR_X": -170.0, "LR_Y": 70.0, "NAME": "80N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 70.0 ], [ -180.0, 80.0 ], [ -170.0, 80.0 ], [ -170.0, 70.0 ], [ -180.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": -50.0, "LR_X": -160.0, "LR_Y": -60.0, "NAME": "50S_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -60.0 ], [ -170.0, -50.0 ], [ -160.0, -50.0 ], [ -160.0, -60.0 ], [ -170.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": -40.0, "LR_X": -160.0, "LR_Y": -50.0, "NAME": "40S_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -50.0 ], [ -170.0, -40.0 ], [ -160.0, -40.0 ], [ -160.0, -50.0 ], [ -170.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": -30.0, "LR_X": -160.0, "LR_Y": -40.0, "NAME": "30S_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -40.0 ], [ -170.0, -30.0 ], [ -160.0, -30.0 ], [ -160.0, -40.0 ], [ -170.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": -20.0, "LR_X": -160.0, "LR_Y": -30.0, "NAME": "20S_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -30.0 ], [ -170.0, -20.0 ], [ -160.0, -20.0 ], [ -160.0, -30.0 ], [ -170.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": -10.0, "LR_X": -160.0, "LR_Y": -20.0, "NAME": "10S_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -20.0 ], [ -170.0, -10.0 ], [ -160.0, -10.0 ], [ -160.0, -20.0 ], [ -170.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 0.0, "LR_X": -160.0, "LR_Y": -10.0, "NAME": "00N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -10.0 ], [ -170.0, 0.0 ], [ -160.0, 0.0 ], [ -160.0, -10.0 ], [ -170.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 10.0, "LR_X": -160.0, "LR_Y": 0.0, "NAME": "10N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 0.0 ], [ -170.0, 10.0 ], [ -160.0, 10.0 ], [ -160.0, 0.0 ], [ -170.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 20.0, "LR_X": -160.0, "LR_Y": 10.0, "NAME": "20N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 10.0 ], [ -170.0, 20.0 ], [ -160.0, 20.0 ], [ -160.0, 10.0 ], [ -170.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 30.0, "LR_X": -160.0, "LR_Y": 20.0, "NAME": "30N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 20.0 ], [ -170.0, 30.0 ], [ -160.0, 30.0 ], [ -160.0, 20.0 ], [ -170.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 40.0, "LR_X": -160.0, "LR_Y": 30.0, "NAME": "40N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 30.0 ], [ -170.0, 40.0 ], [ -160.0, 40.0 ], [ -160.0, 30.0 ], [ -170.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 50.0, "LR_X": -160.0, "LR_Y": 40.0, "NAME": "50N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 40.0 ], [ -170.0, 50.0 ], [ -160.0, 50.0 ], [ -160.0, 40.0 ], [ -170.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 60.0, "LR_X": -160.0, "LR_Y": 50.0, "NAME": "60N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 50.0 ], [ -170.0, 60.0 ], [ -160.0, 60.0 ], [ -160.0, 50.0 ], [ -170.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 70.0, "LR_X": -160.0, "LR_Y": 60.0, "NAME": "70N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 60.0 ], [ -170.0, 70.0 ], [ -160.0, 70.0 ], [ -160.0, 60.0 ], [ -170.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 80.0, "LR_X": -160.0, "LR_Y": 70.0, "NAME": "80N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 70.0 ], [ -170.0, 80.0 ], [ -160.0, 80.0 ], [ -160.0, 70.0 ], [ -170.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": -50.0, "LR_X": -150.0, "LR_Y": -60.0, "NAME": "50S_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -60.0 ], [ -160.0, -50.0 ], [ -150.0, -50.0 ], [ -150.0, -60.0 ], [ -160.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": -40.0, "LR_X": -150.0, "LR_Y": -50.0, "NAME": "40S_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -50.0 ], [ -160.0, -40.0 ], [ -150.0, -40.0 ], [ -150.0, -50.0 ], [ -160.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": -30.0, "LR_X": -150.0, "LR_Y": -40.0, "NAME": "30S_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -40.0 ], [ -160.0, -30.0 ], [ -150.0, -30.0 ], [ -150.0, -40.0 ], [ -160.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": -20.0, "LR_X": -150.0, "LR_Y": -30.0, "NAME": "20S_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -30.0 ], [ -160.0, -20.0 ], [ -150.0, -20.0 ], [ -150.0, -30.0 ], [ -160.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": -10.0, "LR_X": -150.0, "LR_Y": -20.0, "NAME": "10S_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -20.0 ], [ -160.0, -10.0 ], [ -150.0, -10.0 ], [ -150.0, -20.0 ], [ -160.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 0.0, "LR_X": -150.0, "LR_Y": -10.0, "NAME": "00N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -10.0 ], [ -160.0, 0.0 ], [ -150.0, 0.0 ], [ -150.0, -10.0 ], [ -160.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 10.0, "LR_X": -150.0, "LR_Y": 0.0, "NAME": "10N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 0.0 ], [ -160.0, 10.0 ], [ -150.0, 10.0 ], [ -150.0, 0.0 ], [ -160.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 20.0, "LR_X": -150.0, "LR_Y": 10.0, "NAME": "20N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 10.0 ], [ -160.0, 20.0 ], [ -150.0, 20.0 ], [ -150.0, 10.0 ], [ -160.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 30.0, "LR_X": -150.0, "LR_Y": 20.0, "NAME": "30N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 20.0 ], [ -160.0, 30.0 ], [ -150.0, 30.0 ], [ -150.0, 20.0 ], [ -160.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 40.0, "LR_X": -150.0, "LR_Y": 30.0, "NAME": "40N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 30.0 ], [ -160.0, 40.0 ], [ -150.0, 40.0 ], [ -150.0, 30.0 ], [ -160.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 50.0, "LR_X": -150.0, "LR_Y": 40.0, "NAME": "50N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 40.0 ], [ -160.0, 50.0 ], [ -150.0, 50.0 ], [ -150.0, 40.0 ], [ -160.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 60.0, "LR_X": -150.0, "LR_Y": 50.0, "NAME": "60N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 50.0 ], [ -160.0, 60.0 ], [ -150.0, 60.0 ], [ -150.0, 50.0 ], [ -160.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 70.0, "LR_X": -150.0, "LR_Y": 60.0, "NAME": "70N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 60.0 ], [ -160.0, 70.0 ], [ -150.0, 70.0 ], [ -150.0, 60.0 ], [ -160.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 80.0, "LR_X": -150.0, "LR_Y": 70.0, "NAME": "80N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 70.0 ], [ -160.0, 80.0 ], [ -150.0, 80.0 ], [ -150.0, 70.0 ], [ -160.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": -50.0, "LR_X": -140.0, "LR_Y": -60.0, "NAME": "50S_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -60.0 ], [ -150.0, -50.0 ], [ -140.0, -50.0 ], [ -140.0, -60.0 ], [ -150.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": -40.0, "LR_X": -140.0, "LR_Y": -50.0, "NAME": "40S_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -50.0 ], [ -150.0, -40.0 ], [ -140.0, -40.0 ], [ -140.0, -50.0 ], [ -150.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": -30.0, "LR_X": -140.0, "LR_Y": -40.0, "NAME": "30S_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -40.0 ], [ -150.0, -30.0 ], [ -140.0, -30.0 ], [ -140.0, -40.0 ], [ -150.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": -20.0, "LR_X": -140.0, "LR_Y": -30.0, "NAME": "20S_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -30.0 ], [ -150.0, -20.0 ], [ -140.0, -20.0 ], [ -140.0, -30.0 ], [ -150.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": -10.0, "LR_X": -140.0, "LR_Y": -20.0, "NAME": "10S_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -20.0 ], [ -150.0, -10.0 ], [ -140.0, -10.0 ], [ -140.0, -20.0 ], [ -150.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 0.0, "LR_X": -140.0, "LR_Y": -10.0, "NAME": "00N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -10.0 ], [ -150.0, 0.0 ], [ -140.0, 0.0 ], [ -140.0, -10.0 ], [ -150.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 10.0, "LR_X": -140.0, "LR_Y": 0.0, "NAME": "10N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 0.0 ], [ -150.0, 10.0 ], [ -140.0, 10.0 ], [ -140.0, 0.0 ], [ -150.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 20.0, "LR_X": -140.0, "LR_Y": 10.0, "NAME": "20N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 10.0 ], [ -150.0, 20.0 ], [ -140.0, 20.0 ], [ -140.0, 10.0 ], [ -150.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 30.0, "LR_X": -140.0, "LR_Y": 20.0, "NAME": "30N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 20.0 ], [ -150.0, 30.0 ], [ -140.0, 30.0 ], [ -140.0, 20.0 ], [ -150.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 40.0, "LR_X": -140.0, "LR_Y": 30.0, "NAME": "40N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 30.0 ], [ -150.0, 40.0 ], [ -140.0, 40.0 ], [ -140.0, 30.0 ], [ -150.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 50.0, "LR_X": -140.0, "LR_Y": 40.0, "NAME": "50N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 40.0 ], [ -150.0, 50.0 ], [ -140.0, 50.0 ], [ -140.0, 40.0 ], [ -150.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 60.0, "LR_X": -140.0, "LR_Y": 50.0, "NAME": "60N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 50.0 ], [ -150.0, 60.0 ], [ -140.0, 60.0 ], [ -140.0, 50.0 ], [ -150.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 70.0, "LR_X": -140.0, "LR_Y": 60.0, "NAME": "70N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 60.0 ], [ -150.0, 70.0 ], [ -140.0, 70.0 ], [ -140.0, 60.0 ], [ -150.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 80.0, "LR_X": -140.0, "LR_Y": 70.0, "NAME": "80N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 70.0 ], [ -150.0, 80.0 ], [ -140.0, 80.0 ], [ -140.0, 70.0 ], [ -150.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": -50.0, "LR_X": -130.0, "LR_Y": -60.0, "NAME": "50S_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -60.0 ], [ -140.0, -50.0 ], [ -130.0, -50.0 ], [ -130.0, -60.0 ], [ -140.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": -40.0, "LR_X": -130.0, "LR_Y": -50.0, "NAME": "40S_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -50.0 ], [ -140.0, -40.0 ], [ -130.0, -40.0 ], [ -130.0, -50.0 ], [ -140.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": -30.0, "LR_X": -130.0, "LR_Y": -40.0, "NAME": "30S_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -40.0 ], [ -140.0, -30.0 ], [ -130.0, -30.0 ], [ -130.0, -40.0 ], [ -140.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": -20.0, "LR_X": -130.0, "LR_Y": -30.0, "NAME": "20S_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -30.0 ], [ -140.0, -20.0 ], [ -130.0, -20.0 ], [ -130.0, -30.0 ], [ -140.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": -10.0, "LR_X": -130.0, "LR_Y": -20.0, "NAME": "10S_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -20.0 ], [ -140.0, -10.0 ], [ -130.0, -10.0 ], [ -130.0, -20.0 ], [ -140.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 0.0, "LR_X": -130.0, "LR_Y": -10.0, "NAME": "00N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -10.0 ], [ -140.0, 0.0 ], [ -130.0, 0.0 ], [ -130.0, -10.0 ], [ -140.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 10.0, "LR_X": -130.0, "LR_Y": 0.0, "NAME": "10N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 0.0 ], [ -140.0, 10.0 ], [ -130.0, 10.0 ], [ -130.0, 0.0 ], [ -140.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 20.0, "LR_X": -130.0, "LR_Y": 10.0, "NAME": "20N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 10.0 ], [ -140.0, 20.0 ], [ -130.0, 20.0 ], [ -130.0, 10.0 ], [ -140.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 30.0, "LR_X": -130.0, "LR_Y": 20.0, "NAME": "30N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 20.0 ], [ -140.0, 30.0 ], [ -130.0, 30.0 ], [ -130.0, 20.0 ], [ -140.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 40.0, "LR_X": -130.0, "LR_Y": 30.0, "NAME": "40N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 30.0 ], [ -140.0, 40.0 ], [ -130.0, 40.0 ], [ -130.0, 30.0 ], [ -140.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 50.0, "LR_X": -130.0, "LR_Y": 40.0, "NAME": "50N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 40.0 ], [ -140.0, 50.0 ], [ -130.0, 50.0 ], [ -130.0, 40.0 ], [ -140.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 60.0, "LR_X": -130.0, "LR_Y": 50.0, "NAME": "60N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 50.0 ], [ -140.0, 60.0 ], [ -130.0, 60.0 ], [ -130.0, 50.0 ], [ -140.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 70.0, "LR_X": -130.0, "LR_Y": 60.0, "NAME": "70N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 60.0 ], [ -140.0, 70.0 ], [ -130.0, 70.0 ], [ -130.0, 60.0 ], [ -140.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 80.0, "LR_X": -130.0, "LR_Y": 70.0, "NAME": "80N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 70.0 ], [ -140.0, 80.0 ], [ -130.0, 80.0 ], [ -130.0, 70.0 ], [ -140.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": -50.0, "LR_X": -120.0, "LR_Y": -60.0, "NAME": "50S_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -60.0 ], [ -130.0, -50.0 ], [ -120.0, -50.0 ], [ -120.0, -60.0 ], [ -130.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": -40.0, "LR_X": -120.0, "LR_Y": -50.0, "NAME": "40S_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -50.0 ], [ -130.0, -40.0 ], [ -120.0, -40.0 ], [ -120.0, -50.0 ], [ -130.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": -30.0, "LR_X": -120.0, "LR_Y": -40.0, "NAME": "30S_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -40.0 ], [ -130.0, -30.0 ], [ -120.0, -30.0 ], [ -120.0, -40.0 ], [ -130.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": -20.0, "LR_X": -120.0, "LR_Y": -30.0, "NAME": "20S_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -30.0 ], [ -130.0, -20.0 ], [ -120.0, -20.0 ], [ -120.0, -30.0 ], [ -130.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": -10.0, "LR_X": -120.0, "LR_Y": -20.0, "NAME": "10S_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -20.0 ], [ -130.0, -10.0 ], [ -120.0, -10.0 ], [ -120.0, -20.0 ], [ -130.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 0.0, "LR_X": -120.0, "LR_Y": -10.0, "NAME": "00N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -10.0 ], [ -130.0, 0.0 ], [ -120.0, 0.0 ], [ -120.0, -10.0 ], [ -130.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 10.0, "LR_X": -120.0, "LR_Y": 0.0, "NAME": "10N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 0.0 ], [ -130.0, 10.0 ], [ -120.0, 10.0 ], [ -120.0, 0.0 ], [ -130.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 20.0, "LR_X": -120.0, "LR_Y": 10.0, "NAME": "20N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 10.0 ], [ -130.0, 20.0 ], [ -120.0, 20.0 ], [ -120.0, 10.0 ], [ -130.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 30.0, "LR_X": -120.0, "LR_Y": 20.0, "NAME": "30N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 20.0 ], [ -130.0, 30.0 ], [ -120.0, 30.0 ], [ -120.0, 20.0 ], [ -130.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 40.0, "LR_X": -120.0, "LR_Y": 30.0, "NAME": "40N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 30.0 ], [ -130.0, 40.0 ], [ -120.0, 40.0 ], [ -120.0, 30.0 ], [ -130.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 50.0, "LR_X": -120.0, "LR_Y": 40.0, "NAME": "50N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 40.0 ], [ -130.0, 50.0 ], [ -120.0, 50.0 ], [ -120.0, 40.0 ], [ -130.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 60.0, "LR_X": -120.0, "LR_Y": 50.0, "NAME": "60N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 50.0 ], [ -130.0, 60.0 ], [ -120.0, 60.0 ], [ -120.0, 50.0 ], [ -130.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 70.0, "LR_X": -120.0, "LR_Y": 60.0, "NAME": "70N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 60.0 ], [ -130.0, 70.0 ], [ -120.0, 70.0 ], [ -120.0, 60.0 ], [ -130.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 80.0, "LR_X": -120.0, "LR_Y": 70.0, "NAME": "80N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 70.0 ], [ -130.0, 80.0 ], [ -120.0, 80.0 ], [ -120.0, 70.0 ], [ -130.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": -50.0, "LR_X": -110.0, "LR_Y": -60.0, "NAME": "50S_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -60.0 ], [ -120.0, -50.0 ], [ -110.0, -50.0 ], [ -110.0, -60.0 ], [ -120.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": -40.0, "LR_X": -110.0, "LR_Y": -50.0, "NAME": "40S_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -50.0 ], [ -120.0, -40.0 ], [ -110.0, -40.0 ], [ -110.0, -50.0 ], [ -120.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": -30.0, "LR_X": -110.0, "LR_Y": -40.0, "NAME": "30S_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -40.0 ], [ -120.0, -30.0 ], [ -110.0, -30.0 ], [ -110.0, -40.0 ], [ -120.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": -20.0, "LR_X": -110.0, "LR_Y": -30.0, "NAME": "20S_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -30.0 ], [ -120.0, -20.0 ], [ -110.0, -20.0 ], [ -110.0, -30.0 ], [ -120.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": -10.0, "LR_X": -110.0, "LR_Y": -20.0, "NAME": "10S_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -20.0 ], [ -120.0, -10.0 ], [ -110.0, -10.0 ], [ -110.0, -20.0 ], [ -120.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 0.0, "LR_X": -110.0, "LR_Y": -10.0, "NAME": "00N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -10.0 ], [ -120.0, 0.0 ], [ -110.0, 0.0 ], [ -110.0, -10.0 ], [ -120.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 10.0, "LR_X": -110.0, "LR_Y": 0.0, "NAME": "10N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 0.0 ], [ -120.0, 10.0 ], [ -110.0, 10.0 ], [ -110.0, 0.0 ], [ -120.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 20.0, "LR_X": -110.0, "LR_Y": 10.0, "NAME": "20N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 10.0 ], [ -120.0, 20.0 ], [ -110.0, 20.0 ], [ -110.0, 10.0 ], [ -120.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 30.0, "LR_X": -110.0, "LR_Y": 20.0, "NAME": "30N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 20.0 ], [ -120.0, 30.0 ], [ -110.0, 30.0 ], [ -110.0, 20.0 ], [ -120.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 40.0, "LR_X": -110.0, "LR_Y": 30.0, "NAME": "40N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 30.0 ], [ -120.0, 40.0 ], [ -110.0, 40.0 ], [ -110.0, 30.0 ], [ -120.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 50.0, "LR_X": -110.0, "LR_Y": 40.0, "NAME": "50N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 40.0 ], [ -120.0, 50.0 ], [ -110.0, 50.0 ], [ -110.0, 40.0 ], [ -120.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 60.0, "LR_X": -110.0, "LR_Y": 50.0, "NAME": "60N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 50.0 ], [ -120.0, 60.0 ], [ -110.0, 60.0 ], [ -110.0, 50.0 ], [ -120.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 70.0, "LR_X": -110.0, "LR_Y": 60.0, "NAME": "70N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 60.0 ], [ -120.0, 70.0 ], [ -110.0, 70.0 ], [ -110.0, 60.0 ], [ -120.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 80.0, "LR_X": -110.0, "LR_Y": 70.0, "NAME": "80N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 70.0 ], [ -120.0, 80.0 ], [ -110.0, 80.0 ], [ -110.0, 70.0 ], [ -120.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": -50.0, "LR_X": -100.0, "LR_Y": -60.0, "NAME": "50S_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -60.0 ], [ -110.0, -50.0 ], [ -100.0, -50.0 ], [ -100.0, -60.0 ], [ -110.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": -40.0, "LR_X": -100.0, "LR_Y": -50.0, "NAME": "40S_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -50.0 ], [ -110.0, -40.0 ], [ -100.0, -40.0 ], [ -100.0, -50.0 ], [ -110.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": -30.0, "LR_X": -100.0, "LR_Y": -40.0, "NAME": "30S_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -40.0 ], [ -110.0, -30.0 ], [ -100.0, -30.0 ], [ -100.0, -40.0 ], [ -110.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": -20.0, "LR_X": -100.0, "LR_Y": -30.0, "NAME": "20S_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -30.0 ], [ -110.0, -20.0 ], [ -100.0, -20.0 ], [ -100.0, -30.0 ], [ -110.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": -10.0, "LR_X": -100.0, "LR_Y": -20.0, "NAME": "10S_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -20.0 ], [ -110.0, -10.0 ], [ -100.0, -10.0 ], [ -100.0, -20.0 ], [ -110.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 0.0, "LR_X": -100.0, "LR_Y": -10.0, "NAME": "00N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -10.0 ], [ -110.0, 0.0 ], [ -100.0, 0.0 ], [ -100.0, -10.0 ], [ -110.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 10.0, "LR_X": -100.0, "LR_Y": 0.0, "NAME": "10N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 0.0 ], [ -110.0, 10.0 ], [ -100.0, 10.0 ], [ -100.0, 0.0 ], [ -110.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 20.0, "LR_X": -100.0, "LR_Y": 10.0, "NAME": "20N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 10.0 ], [ -110.0, 20.0 ], [ -100.0, 20.0 ], [ -100.0, 10.0 ], [ -110.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 30.0, "LR_X": -100.0, "LR_Y": 20.0, "NAME": "30N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 20.0 ], [ -110.0, 30.0 ], [ -100.0, 30.0 ], [ -100.0, 20.0 ], [ -110.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 40.0, "LR_X": -100.0, "LR_Y": 30.0, "NAME": "40N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 30.0 ], [ -110.0, 40.0 ], [ -100.0, 40.0 ], [ -100.0, 30.0 ], [ -110.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 50.0, "LR_X": -100.0, "LR_Y": 40.0, "NAME": "50N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 40.0 ], [ -110.0, 50.0 ], [ -100.0, 50.0 ], [ -100.0, 40.0 ], [ -110.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 60.0, "LR_X": -100.0, "LR_Y": 50.0, "NAME": "60N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 50.0 ], [ -110.0, 60.0 ], [ -100.0, 60.0 ], [ -100.0, 50.0 ], [ -110.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 70.0, "LR_X": -100.0, "LR_Y": 60.0, "NAME": "70N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 60.0 ], [ -110.0, 70.0 ], [ -100.0, 70.0 ], [ -100.0, 60.0 ], [ -110.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 80.0, "LR_X": -100.0, "LR_Y": 70.0, "NAME": "80N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 70.0 ], [ -110.0, 80.0 ], [ -100.0, 80.0 ], [ -100.0, 70.0 ], [ -110.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": -50.0, "LR_X": -90.0, "LR_Y": -60.0, "NAME": "50S_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -60.0 ], [ -100.0, -50.0 ], [ -90.0, -50.0 ], [ -90.0, -60.0 ], [ -100.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": -40.0, "LR_X": -90.0, "LR_Y": -50.0, "NAME": "40S_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -50.0 ], [ -100.0, -40.0 ], [ -90.0, -40.0 ], [ -90.0, -50.0 ], [ -100.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": -30.0, "LR_X": -90.0, "LR_Y": -40.0, "NAME": "30S_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -40.0 ], [ -100.0, -30.0 ], [ -90.0, -30.0 ], [ -90.0, -40.0 ], [ -100.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": -20.0, "LR_X": -90.0, "LR_Y": -30.0, "NAME": "20S_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -30.0 ], [ -100.0, -20.0 ], [ -90.0, -20.0 ], [ -90.0, -30.0 ], [ -100.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": -10.0, "LR_X": -90.0, "LR_Y": -20.0, "NAME": "10S_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -20.0 ], [ -100.0, -10.0 ], [ -90.0, -10.0 ], [ -90.0, -20.0 ], [ -100.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 0.0, "LR_X": -90.0, "LR_Y": -10.0, "NAME": "00N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -10.0 ], [ -100.0, 0.0 ], [ -90.0, 0.0 ], [ -90.0, -10.0 ], [ -100.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 10.0, "LR_X": -90.0, "LR_Y": 0.0, "NAME": "10N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 0.0 ], [ -100.0, 10.0 ], [ -90.0, 10.0 ], [ -90.0, 0.0 ], [ -100.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 20.0, "LR_X": -90.0, "LR_Y": 10.0, "NAME": "20N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 10.0 ], [ -100.0, 20.0 ], [ -90.0, 20.0 ], [ -90.0, 10.0 ], [ -100.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 30.0, "LR_X": -90.0, "LR_Y": 20.0, "NAME": "30N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 20.0 ], [ -100.0, 30.0 ], [ -90.0, 30.0 ], [ -90.0, 20.0 ], [ -100.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 40.0, "LR_X": -90.0, "LR_Y": 30.0, "NAME": "40N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 30.0 ], [ -100.0, 40.0 ], [ -90.0, 40.0 ], [ -90.0, 30.0 ], [ -100.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 50.0, "LR_X": -90.0, "LR_Y": 40.0, "NAME": "50N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 40.0 ], [ -100.0, 50.0 ], [ -90.0, 50.0 ], [ -90.0, 40.0 ], [ -100.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 60.0, "LR_X": -90.0, "LR_Y": 50.0, "NAME": "60N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 50.0 ], [ -100.0, 60.0 ], [ -90.0, 60.0 ], [ -90.0, 50.0 ], [ -100.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 70.0, "LR_X": -90.0, "LR_Y": 60.0, "NAME": "70N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 60.0 ], [ -100.0, 70.0 ], [ -90.0, 70.0 ], [ -90.0, 60.0 ], [ -100.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 80.0, "LR_X": -90.0, "LR_Y": 70.0, "NAME": "80N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 70.0 ], [ -100.0, 80.0 ], [ -90.0, 80.0 ], [ -90.0, 70.0 ], [ -100.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": -50.0, "LR_X": -80.0, "LR_Y": -60.0, "NAME": "50S_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -60.0 ], [ -90.0, -50.0 ], [ -80.0, -50.0 ], [ -80.0, -60.0 ], [ -90.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": -40.0, "LR_X": -80.0, "LR_Y": -50.0, "NAME": "40S_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -50.0 ], [ -90.0, -40.0 ], [ -80.0, -40.0 ], [ -80.0, -50.0 ], [ -90.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": -30.0, "LR_X": -80.0, "LR_Y": -40.0, "NAME": "30S_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -40.0 ], [ -90.0, -30.0 ], [ -80.0, -30.0 ], [ -80.0, -40.0 ], [ -90.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": -20.0, "LR_X": -80.0, "LR_Y": -30.0, "NAME": "20S_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -30.0 ], [ -90.0, -20.0 ], [ -80.0, -20.0 ], [ -80.0, -30.0 ], [ -90.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": -10.0, "LR_X": -80.0, "LR_Y": -20.0, "NAME": "10S_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -20.0 ], [ -90.0, -10.0 ], [ -80.0, -10.0 ], [ -80.0, -20.0 ], [ -90.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 0.0, "LR_X": -80.0, "LR_Y": -10.0, "NAME": "00N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -10.0 ], [ -90.0, 0.0 ], [ -80.0, 0.0 ], [ -80.0, -10.0 ], [ -90.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 10.0, "LR_X": -80.0, "LR_Y": 0.0, "NAME": "10N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 0.0 ], [ -90.0, 10.0 ], [ -80.0, 10.0 ], [ -80.0, 0.0 ], [ -90.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 20.0, "LR_X": -80.0, "LR_Y": 10.0, "NAME": "20N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 10.0 ], [ -90.0, 20.0 ], [ -80.0, 20.0 ], [ -80.0, 10.0 ], [ -90.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 30.0, "LR_X": -80.0, "LR_Y": 20.0, "NAME": "30N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 20.0 ], [ -90.0, 30.0 ], [ -80.0, 30.0 ], [ -80.0, 20.0 ], [ -90.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 40.0, "LR_X": -80.0, "LR_Y": 30.0, "NAME": "40N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 30.0 ], [ -90.0, 40.0 ], [ -80.0, 40.0 ], [ -80.0, 30.0 ], [ -90.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 50.0, "LR_X": -80.0, "LR_Y": 40.0, "NAME": "50N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 40.0 ], [ -90.0, 50.0 ], [ -80.0, 50.0 ], [ -80.0, 40.0 ], [ -90.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 60.0, "LR_X": -80.0, "LR_Y": 50.0, "NAME": "60N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 50.0 ], [ -90.0, 60.0 ], [ -80.0, 60.0 ], [ -80.0, 50.0 ], [ -90.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 70.0, "LR_X": -80.0, "LR_Y": 60.0, "NAME": "70N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 60.0 ], [ -90.0, 70.0 ], [ -80.0, 70.0 ], [ -80.0, 60.0 ], [ -90.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 80.0, "LR_X": -80.0, "LR_Y": 70.0, "NAME": "80N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 70.0 ], [ -90.0, 80.0 ], [ -80.0, 80.0 ], [ -80.0, 70.0 ], [ -90.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": -50.0, "LR_X": -70.0, "LR_Y": -60.0, "NAME": "50S_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -60.0 ], [ -80.0, -50.0 ], [ -70.0, -50.0 ], [ -70.0, -60.0 ], [ -80.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": -40.0, "LR_X": -70.0, "LR_Y": -50.0, "NAME": "40S_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -50.0 ], [ -80.0, -40.0 ], [ -70.0, -40.0 ], [ -70.0, -50.0 ], [ -80.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": -30.0, "LR_X": -70.0, "LR_Y": -40.0, "NAME": "30S_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -40.0 ], [ -80.0, -30.0 ], [ -70.0, -30.0 ], [ -70.0, -40.0 ], [ -80.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": -20.0, "LR_X": -70.0, "LR_Y": -30.0, "NAME": "20S_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -30.0 ], [ -80.0, -20.0 ], [ -70.0, -20.0 ], [ -70.0, -30.0 ], [ -80.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": -10.0, "LR_X": -70.0, "LR_Y": -20.0, "NAME": "10S_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -20.0 ], [ -80.0, -10.0 ], [ -70.0, -10.0 ], [ -70.0, -20.0 ], [ -80.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 0.0, "LR_X": -70.0, "LR_Y": -10.0, "NAME": "00N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -10.0 ], [ -80.0, 0.0 ], [ -70.0, 0.0 ], [ -70.0, -10.0 ], [ -80.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 10.0, "LR_X": -70.0, "LR_Y": 0.0, "NAME": "10N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 0.0 ], [ -80.0, 10.0 ], [ -70.0, 10.0 ], [ -70.0, 0.0 ], [ -80.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 20.0, "LR_X": -70.0, "LR_Y": 10.0, "NAME": "20N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 10.0 ], [ -80.0, 20.0 ], [ -70.0, 20.0 ], [ -70.0, 10.0 ], [ -80.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 30.0, "LR_X": -70.0, "LR_Y": 20.0, "NAME": "30N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 20.0 ], [ -80.0, 30.0 ], [ -70.0, 30.0 ], [ -70.0, 20.0 ], [ -80.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 40.0, "LR_X": -70.0, "LR_Y": 30.0, "NAME": "40N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 30.0 ], [ -80.0, 40.0 ], [ -70.0, 40.0 ], [ -70.0, 30.0 ], [ -80.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 50.0, "LR_X": -70.0, "LR_Y": 40.0, "NAME": "50N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 40.0 ], [ -80.0, 50.0 ], [ -70.0, 50.0 ], [ -70.0, 40.0 ], [ -80.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 60.0, "LR_X": -70.0, "LR_Y": 50.0, "NAME": "60N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 50.0 ], [ -80.0, 60.0 ], [ -70.0, 60.0 ], [ -70.0, 50.0 ], [ -80.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 70.0, "LR_X": -70.0, "LR_Y": 60.0, "NAME": "70N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 60.0 ], [ -80.0, 70.0 ], [ -70.0, 70.0 ], [ -70.0, 60.0 ], [ -80.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 80.0, "LR_X": -70.0, "LR_Y": 70.0, "NAME": "80N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 70.0 ], [ -80.0, 80.0 ], [ -70.0, 80.0 ], [ -70.0, 70.0 ], [ -80.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": -50.0, "LR_X": -60.0, "LR_Y": -60.0, "NAME": "50S_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -60.0 ], [ -70.0, -50.0 ], [ -60.0, -50.0 ], [ -60.0, -60.0 ], [ -70.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": -40.0, "LR_X": -60.0, "LR_Y": -50.0, "NAME": "40S_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -50.0 ], [ -70.0, -40.0 ], [ -60.0, -40.0 ], [ -60.0, -50.0 ], [ -70.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": -30.0, "LR_X": -60.0, "LR_Y": -40.0, "NAME": "30S_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -40.0 ], [ -70.0, -30.0 ], [ -60.0, -30.0 ], [ -60.0, -40.0 ], [ -70.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": -20.0, "LR_X": -60.0, "LR_Y": -30.0, "NAME": "20S_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -30.0 ], [ -70.0, -20.0 ], [ -60.0, -20.0 ], [ -60.0, -30.0 ], [ -70.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": -10.0, "LR_X": -60.0, "LR_Y": -20.0, "NAME": "10S_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -20.0 ], [ -70.0, -10.0 ], [ -60.0, -10.0 ], [ -60.0, -20.0 ], [ -70.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 0.0, "LR_X": -60.0, "LR_Y": -10.0, "NAME": "00N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -10.0 ], [ -70.0, 0.0 ], [ -60.0, 0.0 ], [ -60.0, -10.0 ], [ -70.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 10.0, "LR_X": -60.0, "LR_Y": 0.0, "NAME": "10N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 0.0 ], [ -70.0, 10.0 ], [ -60.0, 10.0 ], [ -60.0, 0.0 ], [ -70.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 20.0, "LR_X": -60.0, "LR_Y": 10.0, "NAME": "20N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 10.0 ], [ -70.0, 20.0 ], [ -60.0, 20.0 ], [ -60.0, 10.0 ], [ -70.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 30.0, "LR_X": -60.0, "LR_Y": 20.0, "NAME": "30N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 20.0 ], [ -70.0, 30.0 ], [ -60.0, 30.0 ], [ -60.0, 20.0 ], [ -70.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 40.0, "LR_X": -60.0, "LR_Y": 30.0, "NAME": "40N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 30.0 ], [ -70.0, 40.0 ], [ -60.0, 40.0 ], [ -60.0, 30.0 ], [ -70.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 50.0, "LR_X": -60.0, "LR_Y": 40.0, "NAME": "50N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 40.0 ], [ -70.0, 50.0 ], [ -60.0, 50.0 ], [ -60.0, 40.0 ], [ -70.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 60.0, "LR_X": -60.0, "LR_Y": 50.0, "NAME": "60N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 50.0 ], [ -70.0, 60.0 ], [ -60.0, 60.0 ], [ -60.0, 50.0 ], [ -70.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 70.0, "LR_X": -60.0, "LR_Y": 60.0, "NAME": "70N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 60.0 ], [ -70.0, 70.0 ], [ -60.0, 70.0 ], [ -60.0, 60.0 ], [ -70.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 80.0, "LR_X": -60.0, "LR_Y": 70.0, "NAME": "80N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 70.0 ], [ -70.0, 80.0 ], [ -60.0, 80.0 ], [ -60.0, 70.0 ], [ -70.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": -50.0, "LR_X": -50.0, "LR_Y": -60.0, "NAME": "50S_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -60.0 ], [ -60.0, -50.0 ], [ -50.0, -50.0 ], [ -50.0, -60.0 ], [ -60.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": -40.0, "LR_X": -50.0, "LR_Y": -50.0, "NAME": "40S_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -50.0 ], [ -60.0, -40.0 ], [ -50.0, -40.0 ], [ -50.0, -50.0 ], [ -60.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": -30.0, "LR_X": -50.0, "LR_Y": -40.0, "NAME": "30S_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -40.0 ], [ -60.0, -30.0 ], [ -50.0, -30.0 ], [ -50.0, -40.0 ], [ -60.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": -20.0, "LR_X": -50.0, "LR_Y": -30.0, "NAME": "20S_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -30.0 ], [ -60.0, -20.0 ], [ -50.0, -20.0 ], [ -50.0, -30.0 ], [ -60.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": -10.0, "LR_X": -50.0, "LR_Y": -20.0, "NAME": "10S_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -20.0 ], [ -60.0, -10.0 ], [ -50.0, -10.0 ], [ -50.0, -20.0 ], [ -60.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 0.0, "LR_X": -50.0, "LR_Y": -10.0, "NAME": "00N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -10.0 ], [ -60.0, 0.0 ], [ -50.0, 0.0 ], [ -50.0, -10.0 ], [ -60.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 10.0, "LR_X": -50.0, "LR_Y": 0.0, "NAME": "10N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 0.0 ], [ -60.0, 10.0 ], [ -50.0, 10.0 ], [ -50.0, 0.0 ], [ -60.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 20.0, "LR_X": -50.0, "LR_Y": 10.0, "NAME": "20N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 10.0 ], [ -60.0, 20.0 ], [ -50.0, 20.0 ], [ -50.0, 10.0 ], [ -60.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 30.0, "LR_X": -50.0, "LR_Y": 20.0, "NAME": "30N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 20.0 ], [ -60.0, 30.0 ], [ -50.0, 30.0 ], [ -50.0, 20.0 ], [ -60.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 40.0, "LR_X": -50.0, "LR_Y": 30.0, "NAME": "40N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 30.0 ], [ -60.0, 40.0 ], [ -50.0, 40.0 ], [ -50.0, 30.0 ], [ -60.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 50.0, "LR_X": -50.0, "LR_Y": 40.0, "NAME": "50N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 40.0 ], [ -60.0, 50.0 ], [ -50.0, 50.0 ], [ -50.0, 40.0 ], [ -60.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 60.0, "LR_X": -50.0, "LR_Y": 50.0, "NAME": "60N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 50.0 ], [ -60.0, 60.0 ], [ -50.0, 60.0 ], [ -50.0, 50.0 ], [ -60.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 70.0, "LR_X": -50.0, "LR_Y": 60.0, "NAME": "70N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 60.0 ], [ -60.0, 70.0 ], [ -50.0, 70.0 ], [ -50.0, 60.0 ], [ -60.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 80.0, "LR_X": -50.0, "LR_Y": 70.0, "NAME": "80N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 70.0 ], [ -60.0, 80.0 ], [ -50.0, 80.0 ], [ -50.0, 70.0 ], [ -60.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": -50.0, "LR_X": -40.0, "LR_Y": -60.0, "NAME": "50S_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -60.0 ], [ -50.0, -50.0 ], [ -40.0, -50.0 ], [ -40.0, -60.0 ], [ -50.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": -40.0, "LR_X": -40.0, "LR_Y": -50.0, "NAME": "40S_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -50.0 ], [ -50.0, -40.0 ], [ -40.0, -40.0 ], [ -40.0, -50.0 ], [ -50.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": -30.0, "LR_X": -40.0, "LR_Y": -40.0, "NAME": "30S_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -40.0 ], [ -50.0, -30.0 ], [ -40.0, -30.0 ], [ -40.0, -40.0 ], [ -50.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": -20.0, "LR_X": -40.0, "LR_Y": -30.0, "NAME": "20S_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -30.0 ], [ -50.0, -20.0 ], [ -40.0, -20.0 ], [ -40.0, -30.0 ], [ -50.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": -10.0, "LR_X": -40.0, "LR_Y": -20.0, "NAME": "10S_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -20.0 ], [ -50.0, -10.0 ], [ -40.0, -10.0 ], [ -40.0, -20.0 ], [ -50.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 0.0, "LR_X": -40.0, "LR_Y": -10.0, "NAME": "00N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -10.0 ], [ -50.0, 0.0 ], [ -40.0, 0.0 ], [ -40.0, -10.0 ], [ -50.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 10.0, "LR_X": -40.0, "LR_Y": 0.0, "NAME": "10N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 0.0 ], [ -50.0, 10.0 ], [ -40.0, 10.0 ], [ -40.0, 0.0 ], [ -50.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 20.0, "LR_X": -40.0, "LR_Y": 10.0, "NAME": "20N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 10.0 ], [ -50.0, 20.0 ], [ -40.0, 20.0 ], [ -40.0, 10.0 ], [ -50.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 30.0, "LR_X": -40.0, "LR_Y": 20.0, "NAME": "30N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 20.0 ], [ -50.0, 30.0 ], [ -40.0, 30.0 ], [ -40.0, 20.0 ], [ -50.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 40.0, "LR_X": -40.0, "LR_Y": 30.0, "NAME": "40N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 30.0 ], [ -50.0, 40.0 ], [ -40.0, 40.0 ], [ -40.0, 30.0 ], [ -50.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 50.0, "LR_X": -40.0, "LR_Y": 40.0, "NAME": "50N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 40.0 ], [ -50.0, 50.0 ], [ -40.0, 50.0 ], [ -40.0, 40.0 ], [ -50.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 60.0, "LR_X": -40.0, "LR_Y": 50.0, "NAME": "60N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 50.0 ], [ -50.0, 60.0 ], [ -40.0, 60.0 ], [ -40.0, 50.0 ], [ -50.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 70.0, "LR_X": -40.0, "LR_Y": 60.0, "NAME": "70N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 60.0 ], [ -50.0, 70.0 ], [ -40.0, 70.0 ], [ -40.0, 60.0 ], [ -50.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 80.0, "LR_X": -40.0, "LR_Y": 70.0, "NAME": "80N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 70.0 ], [ -50.0, 80.0 ], [ -40.0, 80.0 ], [ -40.0, 70.0 ], [ -50.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": -50.0, "LR_X": -30.0, "LR_Y": -60.0, "NAME": "50S_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -60.0 ], [ -40.0, -50.0 ], [ -30.0, -50.0 ], [ -30.0, -60.0 ], [ -40.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": -40.0, "LR_X": -30.0, "LR_Y": -50.0, "NAME": "40S_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -50.0 ], [ -40.0, -40.0 ], [ -30.0, -40.0 ], [ -30.0, -50.0 ], [ -40.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": -30.0, "LR_X": -30.0, "LR_Y": -40.0, "NAME": "30S_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -40.0 ], [ -40.0, -30.0 ], [ -30.0, -30.0 ], [ -30.0, -40.0 ], [ -40.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": -20.0, "LR_X": -30.0, "LR_Y": -30.0, "NAME": "20S_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -30.0 ], [ -40.0, -20.0 ], [ -30.0, -20.0 ], [ -30.0, -30.0 ], [ -40.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": -10.0, "LR_X": -30.0, "LR_Y": -20.0, "NAME": "10S_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -20.0 ], [ -40.0, -10.0 ], [ -30.0, -10.0 ], [ -30.0, -20.0 ], [ -40.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 0.0, "LR_X": -30.0, "LR_Y": -10.0, "NAME": "00N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -10.0 ], [ -40.0, 0.0 ], [ -30.0, 0.0 ], [ -30.0, -10.0 ], [ -40.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 10.0, "LR_X": -30.0, "LR_Y": 0.0, "NAME": "10N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 0.0 ], [ -40.0, 10.0 ], [ -30.0, 10.0 ], [ -30.0, 0.0 ], [ -40.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 20.0, "LR_X": -30.0, "LR_Y": 10.0, "NAME": "20N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 10.0 ], [ -40.0, 20.0 ], [ -30.0, 20.0 ], [ -30.0, 10.0 ], [ -40.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 30.0, "LR_X": -30.0, "LR_Y": 20.0, "NAME": "30N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 20.0 ], [ -40.0, 30.0 ], [ -30.0, 30.0 ], [ -30.0, 20.0 ], [ -40.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 40.0, "LR_X": -30.0, "LR_Y": 30.0, "NAME": "40N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 30.0 ], [ -40.0, 40.0 ], [ -30.0, 40.0 ], [ -30.0, 30.0 ], [ -40.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 50.0, "LR_X": -30.0, "LR_Y": 40.0, "NAME": "50N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 40.0 ], [ -40.0, 50.0 ], [ -30.0, 50.0 ], [ -30.0, 40.0 ], [ -40.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 60.0, "LR_X": -30.0, "LR_Y": 50.0, "NAME": "60N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 50.0 ], [ -40.0, 60.0 ], [ -30.0, 60.0 ], [ -30.0, 50.0 ], [ -40.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 70.0, "LR_X": -30.0, "LR_Y": 60.0, "NAME": "70N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 60.0 ], [ -40.0, 70.0 ], [ -30.0, 70.0 ], [ -30.0, 60.0 ], [ -40.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 80.0, "LR_X": -30.0, "LR_Y": 70.0, "NAME": "80N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 70.0 ], [ -40.0, 80.0 ], [ -30.0, 80.0 ], [ -30.0, 70.0 ], [ -40.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": -50.0, "LR_X": -20.0, "LR_Y": -60.0, "NAME": "50S_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -60.0 ], [ -30.0, -50.0 ], [ -20.0, -50.0 ], [ -20.0, -60.0 ], [ -30.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": -40.0, "LR_X": -20.0, "LR_Y": -50.0, "NAME": "40S_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -50.0 ], [ -30.0, -40.0 ], [ -20.0, -40.0 ], [ -20.0, -50.0 ], [ -30.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": -30.0, "LR_X": -20.0, "LR_Y": -40.0, "NAME": "30S_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -40.0 ], [ -30.0, -30.0 ], [ -20.0, -30.0 ], [ -20.0, -40.0 ], [ -30.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": -20.0, "LR_X": -20.0, "LR_Y": -30.0, "NAME": "20S_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -30.0 ], [ -30.0, -20.0 ], [ -20.0, -20.0 ], [ -20.0, -30.0 ], [ -30.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": -10.0, "LR_X": -20.0, "LR_Y": -20.0, "NAME": "10S_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -20.0 ], [ -30.0, -10.0 ], [ -20.0, -10.0 ], [ -20.0, -20.0 ], [ -30.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 0.0, "LR_X": -20.0, "LR_Y": -10.0, "NAME": "00N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -10.0 ], [ -30.0, 0.0 ], [ -20.0, 0.0 ], [ -20.0, -10.0 ], [ -30.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 10.0, "LR_X": -20.0, "LR_Y": 0.0, "NAME": "10N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 0.0 ], [ -30.0, 10.0 ], [ -20.0, 10.0 ], [ -20.0, 0.0 ], [ -30.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 20.0, "LR_X": -20.0, "LR_Y": 10.0, "NAME": "20N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 10.0 ], [ -30.0, 20.0 ], [ -20.0, 20.0 ], [ -20.0, 10.0 ], [ -30.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 30.0, "LR_X": -20.0, "LR_Y": 20.0, "NAME": "30N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 20.0 ], [ -30.0, 30.0 ], [ -20.0, 30.0 ], [ -20.0, 20.0 ], [ -30.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 40.0, "LR_X": -20.0, "LR_Y": 30.0, "NAME": "40N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 30.0 ], [ -30.0, 40.0 ], [ -20.0, 40.0 ], [ -20.0, 30.0 ], [ -30.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 50.0, "LR_X": -20.0, "LR_Y": 40.0, "NAME": "50N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 40.0 ], [ -30.0, 50.0 ], [ -20.0, 50.0 ], [ -20.0, 40.0 ], [ -30.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 60.0, "LR_X": -20.0, "LR_Y": 50.0, "NAME": "60N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 50.0 ], [ -30.0, 60.0 ], [ -20.0, 60.0 ], [ -20.0, 50.0 ], [ -30.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 70.0, "LR_X": -20.0, "LR_Y": 60.0, "NAME": "70N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 60.0 ], [ -30.0, 70.0 ], [ -20.0, 70.0 ], [ -20.0, 60.0 ], [ -30.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 80.0, "LR_X": -20.0, "LR_Y": 70.0, "NAME": "80N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 70.0 ], [ -30.0, 80.0 ], [ -20.0, 80.0 ], [ -20.0, 70.0 ], [ -30.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": -50.0, "LR_X": -10.0, "LR_Y": -60.0, "NAME": "50S_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -60.0 ], [ -20.0, -50.0 ], [ -10.0, -50.0 ], [ -10.0, -60.0 ], [ -20.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": -40.0, "LR_X": -10.0, "LR_Y": -50.0, "NAME": "40S_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -50.0 ], [ -20.0, -40.0 ], [ -10.0, -40.0 ], [ -10.0, -50.0 ], [ -20.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": -30.0, "LR_X": -10.0, "LR_Y": -40.0, "NAME": "30S_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -40.0 ], [ -20.0, -30.0 ], [ -10.0, -30.0 ], [ -10.0, -40.0 ], [ -20.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": -20.0, "LR_X": -10.0, "LR_Y": -30.0, "NAME": "20S_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -30.0 ], [ -20.0, -20.0 ], [ -10.0, -20.0 ], [ -10.0, -30.0 ], [ -20.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": -10.0, "LR_X": -10.0, "LR_Y": -20.0, "NAME": "10S_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -20.0 ], [ -20.0, -10.0 ], [ -10.0, -10.0 ], [ -10.0, -20.0 ], [ -20.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 0.0, "LR_X": -10.0, "LR_Y": -10.0, "NAME": "00N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -10.0 ], [ -20.0, 0.0 ], [ -10.0, 0.0 ], [ -10.0, -10.0 ], [ -20.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 10.0, "LR_X": -10.0, "LR_Y": 0.0, "NAME": "10N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 0.0 ], [ -20.0, 10.0 ], [ -10.0, 10.0 ], [ -10.0, 0.0 ], [ -20.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 20.0, "LR_X": -10.0, "LR_Y": 10.0, "NAME": "20N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 10.0 ], [ -20.0, 20.0 ], [ -10.0, 20.0 ], [ -10.0, 10.0 ], [ -20.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 30.0, "LR_X": -10.0, "LR_Y": 20.0, "NAME": "30N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 20.0 ], [ -20.0, 30.0 ], [ -10.0, 30.0 ], [ -10.0, 20.0 ], [ -20.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 40.0, "LR_X": -10.0, "LR_Y": 30.0, "NAME": "40N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 30.0 ], [ -20.0, 40.0 ], [ -10.0, 40.0 ], [ -10.0, 30.0 ], [ -20.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 50.0, "LR_X": -10.0, "LR_Y": 40.0, "NAME": "50N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 40.0 ], [ -20.0, 50.0 ], [ -10.0, 50.0 ], [ -10.0, 40.0 ], [ -20.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 60.0, "LR_X": -10.0, "LR_Y": 50.0, "NAME": "60N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 50.0 ], [ -20.0, 60.0 ], [ -10.0, 60.0 ], [ -10.0, 50.0 ], [ -20.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 70.0, "LR_X": -10.0, "LR_Y": 60.0, "NAME": "70N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 60.0 ], [ -20.0, 70.0 ], [ -10.0, 70.0 ], [ -10.0, 60.0 ], [ -20.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 80.0, "LR_X": -10.0, "LR_Y": 70.0, "NAME": "80N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 70.0 ], [ -20.0, 80.0 ], [ -10.0, 80.0 ], [ -10.0, 70.0 ], [ -20.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": -50.0, "LR_X": 0.0, "LR_Y": -60.0, "NAME": "50S_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -60.0 ], [ -10.0, -50.0 ], [ 0.0, -50.0 ], [ 0.0, -60.0 ], [ -10.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": -40.0, "LR_X": 0.0, "LR_Y": -50.0, "NAME": "40S_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -50.0 ], [ -10.0, -40.0 ], [ 0.0, -40.0 ], [ 0.0, -50.0 ], [ -10.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": -30.0, "LR_X": 0.0, "LR_Y": -40.0, "NAME": "30S_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -40.0 ], [ -10.0, -30.0 ], [ 0.0, -30.0 ], [ 0.0, -40.0 ], [ -10.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": -20.0, "LR_X": 0.0, "LR_Y": -30.0, "NAME": "20S_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -30.0 ], [ -10.0, -20.0 ], [ 0.0, -20.0 ], [ 0.0, -30.0 ], [ -10.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": -10.0, "LR_X": 0.0, "LR_Y": -20.0, "NAME": "10S_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -20.0 ], [ -10.0, -10.0 ], [ 0.0, -10.0 ], [ 0.0, -20.0 ], [ -10.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 0.0, "LR_X": 0.0, "LR_Y": -10.0, "NAME": "00N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -10.0 ], [ -10.0, 0.0 ], [ 0.0, 0.0 ], [ 0.0, -10.0 ], [ -10.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 10.0, "LR_X": 0.0, "LR_Y": 0.0, "NAME": "10N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 0.0 ], [ -10.0, 10.0 ], [ 0.0, 10.0 ], [ 0.0, 0.0 ], [ -10.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 20.0, "LR_X": 0.0, "LR_Y": 10.0, "NAME": "20N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 10.0 ], [ -10.0, 20.0 ], [ 0.0, 20.0 ], [ 0.0, 10.0 ], [ -10.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 30.0, "LR_X": 0.0, "LR_Y": 20.0, "NAME": "30N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 20.0 ], [ -10.0, 30.0 ], [ 0.0, 30.0 ], [ 0.0, 20.0 ], [ -10.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 40.0, "LR_X": 0.0, "LR_Y": 30.0, "NAME": "40N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 30.0 ], [ -10.0, 40.0 ], [ 0.0, 40.0 ], [ 0.0, 30.0 ], [ -10.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 50.0, "LR_X": 0.0, "LR_Y": 40.0, "NAME": "50N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 40.0 ], [ -10.0, 50.0 ], [ 0.0, 50.0 ], [ 0.0, 40.0 ], [ -10.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 60.0, "LR_X": 0.0, "LR_Y": 50.0, "NAME": "60N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 50.0 ], [ -10.0, 60.0 ], [ 0.0, 60.0 ], [ 0.0, 50.0 ], [ -10.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 70.0, "LR_X": 0.0, "LR_Y": 60.0, "NAME": "70N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 60.0 ], [ -10.0, 70.0 ], [ 0.0, 70.0 ], [ 0.0, 60.0 ], [ -10.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 80.0, "LR_X": 0.0, "LR_Y": 70.0, "NAME": "80N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 70.0 ], [ -10.0, 80.0 ], [ 0.0, 80.0 ], [ 0.0, 70.0 ], [ -10.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": -50.0, "LR_X": 10.0, "LR_Y": -60.0, "NAME": "50S_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -60.0 ], [ 0.0, -50.0 ], [ 10.0, -50.0 ], [ 10.0, -60.0 ], [ 0.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": -40.0, "LR_X": 10.0, "LR_Y": -50.0, "NAME": "40S_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -50.0 ], [ 0.0, -40.0 ], [ 10.0, -40.0 ], [ 10.0, -50.0 ], [ 0.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": -30.0, "LR_X": 10.0, "LR_Y": -40.0, "NAME": "30S_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -40.0 ], [ 0.0, -30.0 ], [ 10.0, -30.0 ], [ 10.0, -40.0 ], [ 0.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": -20.0, "LR_X": 10.0, "LR_Y": -30.0, "NAME": "20S_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -30.0 ], [ 0.0, -20.0 ], [ 10.0, -20.0 ], [ 10.0, -30.0 ], [ 0.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": -10.0, "LR_X": 10.0, "LR_Y": -20.0, "NAME": "10S_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -20.0 ], [ 0.0, -10.0 ], [ 10.0, -10.0 ], [ 10.0, -20.0 ], [ 0.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 0.0, "LR_X": 10.0, "LR_Y": -10.0, "NAME": "00N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -10.0 ], [ 0.0, 0.0 ], [ 10.0, 0.0 ], [ 10.0, -10.0 ], [ 0.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 10.0, "LR_X": 10.0, "LR_Y": 0.0, "NAME": "10N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 0.0, 10.0 ], [ 10.0, 10.0 ], [ 10.0, 0.0 ], [ 0.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 20.0, "LR_X": 10.0, "LR_Y": 10.0, "NAME": "20N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 10.0 ], [ 0.0, 20.0 ], [ 10.0, 20.0 ], [ 10.0, 10.0 ], [ 0.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 30.0, "LR_X": 10.0, "LR_Y": 20.0, "NAME": "30N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 20.0 ], [ 0.0, 30.0 ], [ 10.0, 30.0 ], [ 10.0, 20.0 ], [ 0.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 40.0, "LR_X": 10.0, "LR_Y": 30.0, "NAME": "40N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 30.0 ], [ 0.0, 40.0 ], [ 10.0, 40.0 ], [ 10.0, 30.0 ], [ 0.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 50.0, "LR_X": 10.0, "LR_Y": 40.0, "NAME": "50N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 40.0 ], [ 0.0, 50.0 ], [ 10.0, 50.0 ], [ 10.0, 40.0 ], [ 0.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 60.0, "LR_X": 10.0, "LR_Y": 50.0, "NAME": "60N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 50.0 ], [ 0.0, 60.0 ], [ 10.0, 60.0 ], [ 10.0, 50.0 ], [ 0.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 70.0, "LR_X": 10.0, "LR_Y": 60.0, "NAME": "70N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 60.0 ], [ 0.0, 70.0 ], [ 10.0, 70.0 ], [ 10.0, 60.0 ], [ 0.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 80.0, "LR_X": 10.0, "LR_Y": 70.0, "NAME": "80N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 70.0 ], [ 0.0, 80.0 ], [ 10.0, 80.0 ], [ 10.0, 70.0 ], [ 0.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": -50.0, "LR_X": 20.0, "LR_Y": -60.0, "NAME": "50S_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -60.0 ], [ 10.0, -50.0 ], [ 20.0, -50.0 ], [ 20.0, -60.0 ], [ 10.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": -40.0, "LR_X": 20.0, "LR_Y": -50.0, "NAME": "40S_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -50.0 ], [ 10.0, -40.0 ], [ 20.0, -40.0 ], [ 20.0, -50.0 ], [ 10.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": -30.0, "LR_X": 20.0, "LR_Y": -40.0, "NAME": "30S_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -40.0 ], [ 10.0, -30.0 ], [ 20.0, -30.0 ], [ 20.0, -40.0 ], [ 10.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": -20.0, "LR_X": 20.0, "LR_Y": -30.0, "NAME": "20S_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -30.0 ], [ 10.0, -20.0 ], [ 20.0, -20.0 ], [ 20.0, -30.0 ], [ 10.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": -10.0, "LR_X": 20.0, "LR_Y": -20.0, "NAME": "10S_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -20.0 ], [ 10.0, -10.0 ], [ 20.0, -10.0 ], [ 20.0, -20.0 ], [ 10.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 0.0, "LR_X": 20.0, "LR_Y": -10.0, "NAME": "00N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -10.0 ], [ 10.0, 0.0 ], [ 20.0, 0.0 ], [ 20.0, -10.0 ], [ 10.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 10.0, "LR_X": 20.0, "LR_Y": 0.0, "NAME": "10N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 0.0 ], [ 10.0, 10.0 ], [ 20.0, 10.0 ], [ 20.0, 0.0 ], [ 10.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 20.0, "LR_X": 20.0, "LR_Y": 10.0, "NAME": "20N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 10.0 ], [ 10.0, 20.0 ], [ 20.0, 20.0 ], [ 20.0, 10.0 ], [ 10.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 30.0, "LR_X": 20.0, "LR_Y": 20.0, "NAME": "30N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 20.0 ], [ 10.0, 30.0 ], [ 20.0, 30.0 ], [ 20.0, 20.0 ], [ 10.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 40.0, "LR_X": 20.0, "LR_Y": 30.0, "NAME": "40N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 30.0 ], [ 10.0, 40.0 ], [ 20.0, 40.0 ], [ 20.0, 30.0 ], [ 10.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 50.0, "LR_X": 20.0, "LR_Y": 40.0, "NAME": "50N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 40.0 ], [ 10.0, 50.0 ], [ 20.0, 50.0 ], [ 20.0, 40.0 ], [ 10.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 60.0, "LR_X": 20.0, "LR_Y": 50.0, "NAME": "60N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 50.0 ], [ 10.0, 60.0 ], [ 20.0, 60.0 ], [ 20.0, 50.0 ], [ 10.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 70.0, "LR_X": 20.0, "LR_Y": 60.0, "NAME": "70N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 60.0 ], [ 10.0, 70.0 ], [ 20.0, 70.0 ], [ 20.0, 60.0 ], [ 10.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 80.0, "LR_X": 20.0, "LR_Y": 70.0, "NAME": "80N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 70.0 ], [ 10.0, 80.0 ], [ 20.0, 80.0 ], [ 20.0, 70.0 ], [ 10.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": -50.0, "LR_X": 30.0, "LR_Y": -60.0, "NAME": "50S_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -60.0 ], [ 20.0, -50.0 ], [ 30.0, -50.0 ], [ 30.0, -60.0 ], [ 20.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": -40.0, "LR_X": 30.0, "LR_Y": -50.0, "NAME": "40S_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -50.0 ], [ 20.0, -40.0 ], [ 30.0, -40.0 ], [ 30.0, -50.0 ], [ 20.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": -30.0, "LR_X": 30.0, "LR_Y": -40.0, "NAME": "30S_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -40.0 ], [ 20.0, -30.0 ], [ 30.0, -30.0 ], [ 30.0, -40.0 ], [ 20.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": -20.0, "LR_X": 30.0, "LR_Y": -30.0, "NAME": "20S_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -30.0 ], [ 20.0, -20.0 ], [ 30.0, -20.0 ], [ 30.0, -30.0 ], [ 20.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": -10.0, "LR_X": 30.0, "LR_Y": -20.0, "NAME": "10S_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -20.0 ], [ 20.0, -10.0 ], [ 30.0, -10.0 ], [ 30.0, -20.0 ], [ 20.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 0.0, "LR_X": 30.0, "LR_Y": -10.0, "NAME": "00N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -10.0 ], [ 20.0, 0.0 ], [ 30.0, 0.0 ], [ 30.0, -10.0 ], [ 20.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 10.0, "LR_X": 30.0, "LR_Y": 0.0, "NAME": "10N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 0.0 ], [ 20.0, 10.0 ], [ 30.0, 10.0 ], [ 30.0, 0.0 ], [ 20.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 20.0, "LR_X": 30.0, "LR_Y": 10.0, "NAME": "20N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 10.0 ], [ 20.0, 20.0 ], [ 30.0, 20.0 ], [ 30.0, 10.0 ], [ 20.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 30.0, "LR_X": 30.0, "LR_Y": 20.0, "NAME": "30N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 20.0 ], [ 20.0, 30.0 ], [ 30.0, 30.0 ], [ 30.0, 20.0 ], [ 20.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 40.0, "LR_X": 30.0, "LR_Y": 30.0, "NAME": "40N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 30.0 ], [ 20.0, 40.0 ], [ 30.0, 40.0 ], [ 30.0, 30.0 ], [ 20.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 50.0, "LR_X": 30.0, "LR_Y": 40.0, "NAME": "50N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 40.0 ], [ 20.0, 50.0 ], [ 30.0, 50.0 ], [ 30.0, 40.0 ], [ 20.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 60.0, "LR_X": 30.0, "LR_Y": 50.0, "NAME": "60N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 50.0 ], [ 20.0, 60.0 ], [ 30.0, 60.0 ], [ 30.0, 50.0 ], [ 20.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 70.0, "LR_X": 30.0, "LR_Y": 60.0, "NAME": "70N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 60.0 ], [ 20.0, 70.0 ], [ 30.0, 70.0 ], [ 30.0, 60.0 ], [ 20.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 80.0, "LR_X": 30.0, "LR_Y": 70.0, "NAME": "80N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 70.0 ], [ 20.0, 80.0 ], [ 30.0, 80.0 ], [ 30.0, 70.0 ], [ 20.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": -50.0, "LR_X": 40.0, "LR_Y": -60.0, "NAME": "50S_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -60.0 ], [ 30.0, -50.0 ], [ 40.0, -50.0 ], [ 40.0, -60.0 ], [ 30.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": -40.0, "LR_X": 40.0, "LR_Y": -50.0, "NAME": "40S_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -50.0 ], [ 30.0, -40.0 ], [ 40.0, -40.0 ], [ 40.0, -50.0 ], [ 30.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": -30.0, "LR_X": 40.0, "LR_Y": -40.0, "NAME": "30S_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -40.0 ], [ 30.0, -30.0 ], [ 40.0, -30.0 ], [ 40.0, -40.0 ], [ 30.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": -20.0, "LR_X": 40.0, "LR_Y": -30.0, "NAME": "20S_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -30.0 ], [ 30.0, -20.0 ], [ 40.0, -20.0 ], [ 40.0, -30.0 ], [ 30.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": -10.0, "LR_X": 40.0, "LR_Y": -20.0, "NAME": "10S_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -20.0 ], [ 30.0, -10.0 ], [ 40.0, -10.0 ], [ 40.0, -20.0 ], [ 30.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 0.0, "LR_X": 40.0, "LR_Y": -10.0, "NAME": "00N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -10.0 ], [ 30.0, 0.0 ], [ 40.0, 0.0 ], [ 40.0, -10.0 ], [ 30.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 10.0, "LR_X": 40.0, "LR_Y": 0.0, "NAME": "10N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 0.0 ], [ 30.0, 10.0 ], [ 40.0, 10.0 ], [ 40.0, 0.0 ], [ 30.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 20.0, "LR_X": 40.0, "LR_Y": 10.0, "NAME": "20N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 10.0 ], [ 30.0, 20.0 ], [ 40.0, 20.0 ], [ 40.0, 10.0 ], [ 30.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 30.0, "LR_X": 40.0, "LR_Y": 20.0, "NAME": "30N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 20.0 ], [ 30.0, 30.0 ], [ 40.0, 30.0 ], [ 40.0, 20.0 ], [ 30.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 40.0, "LR_X": 40.0, "LR_Y": 30.0, "NAME": "40N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 30.0 ], [ 30.0, 40.0 ], [ 40.0, 40.0 ], [ 40.0, 30.0 ], [ 30.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 50.0, "LR_X": 40.0, "LR_Y": 40.0, "NAME": "50N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 40.0 ], [ 30.0, 50.0 ], [ 40.0, 50.0 ], [ 40.0, 40.0 ], [ 30.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 60.0, "LR_X": 40.0, "LR_Y": 50.0, "NAME": "60N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 50.0 ], [ 30.0, 60.0 ], [ 40.0, 60.0 ], [ 40.0, 50.0 ], [ 30.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 70.0, "LR_X": 40.0, "LR_Y": 60.0, "NAME": "70N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 60.0 ], [ 30.0, 70.0 ], [ 40.0, 70.0 ], [ 40.0, 60.0 ], [ 30.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 80.0, "LR_X": 40.0, "LR_Y": 70.0, "NAME": "80N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 70.0 ], [ 30.0, 80.0 ], [ 40.0, 80.0 ], [ 40.0, 70.0 ], [ 30.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": -50.0, "LR_X": 50.0, "LR_Y": -60.0, "NAME": "50S_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -60.0 ], [ 40.0, -50.0 ], [ 50.0, -50.0 ], [ 50.0, -60.0 ], [ 40.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": -40.0, "LR_X": 50.0, "LR_Y": -50.0, "NAME": "40S_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -50.0 ], [ 40.0, -40.0 ], [ 50.0, -40.0 ], [ 50.0, -50.0 ], [ 40.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": -30.0, "LR_X": 50.0, "LR_Y": -40.0, "NAME": "30S_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -40.0 ], [ 40.0, -30.0 ], [ 50.0, -30.0 ], [ 50.0, -40.0 ], [ 40.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": -20.0, "LR_X": 50.0, "LR_Y": -30.0, "NAME": "20S_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -30.0 ], [ 40.0, -20.0 ], [ 50.0, -20.0 ], [ 50.0, -30.0 ], [ 40.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": -10.0, "LR_X": 50.0, "LR_Y": -20.0, "NAME": "10S_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -20.0 ], [ 40.0, -10.0 ], [ 50.0, -10.0 ], [ 50.0, -20.0 ], [ 40.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 0.0, "LR_X": 50.0, "LR_Y": -10.0, "NAME": "00N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -10.0 ], [ 40.0, 0.0 ], [ 50.0, 0.0 ], [ 50.0, -10.0 ], [ 40.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 10.0, "LR_X": 50.0, "LR_Y": 0.0, "NAME": "10N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 0.0 ], [ 40.0, 10.0 ], [ 50.0, 10.0 ], [ 50.0, 0.0 ], [ 40.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 20.0, "LR_X": 50.0, "LR_Y": 10.0, "NAME": "20N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 10.0 ], [ 40.0, 20.0 ], [ 50.0, 20.0 ], [ 50.0, 10.0 ], [ 40.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 30.0, "LR_X": 50.0, "LR_Y": 20.0, "NAME": "30N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 20.0 ], [ 40.0, 30.0 ], [ 50.0, 30.0 ], [ 50.0, 20.0 ], [ 40.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 40.0, "LR_X": 50.0, "LR_Y": 30.0, "NAME": "40N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 30.0 ], [ 40.0, 40.0 ], [ 50.0, 40.0 ], [ 50.0, 30.0 ], [ 40.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 50.0, "LR_X": 50.0, "LR_Y": 40.0, "NAME": "50N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 40.0 ], [ 40.0, 50.0 ], [ 50.0, 50.0 ], [ 50.0, 40.0 ], [ 40.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 60.0, "LR_X": 50.0, "LR_Y": 50.0, "NAME": "60N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 50.0 ], [ 40.0, 60.0 ], [ 50.0, 60.0 ], [ 50.0, 50.0 ], [ 40.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 70.0, "LR_X": 50.0, "LR_Y": 60.0, "NAME": "70N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 60.0 ], [ 40.0, 70.0 ], [ 50.0, 70.0 ], [ 50.0, 60.0 ], [ 40.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 80.0, "LR_X": 50.0, "LR_Y": 70.0, "NAME": "80N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 70.0 ], [ 40.0, 80.0 ], [ 50.0, 80.0 ], [ 50.0, 70.0 ], [ 40.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": -50.0, "LR_X": 60.0, "LR_Y": -60.0, "NAME": "50S_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -60.0 ], [ 50.0, -50.0 ], [ 60.0, -50.0 ], [ 60.0, -60.0 ], [ 50.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": -40.0, "LR_X": 60.0, "LR_Y": -50.0, "NAME": "40S_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -50.0 ], [ 50.0, -40.0 ], [ 60.0, -40.0 ], [ 60.0, -50.0 ], [ 50.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": -30.0, "LR_X": 60.0, "LR_Y": -40.0, "NAME": "30S_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -40.0 ], [ 50.0, -30.0 ], [ 60.0, -30.0 ], [ 60.0, -40.0 ], [ 50.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": -20.0, "LR_X": 60.0, "LR_Y": -30.0, "NAME": "20S_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -30.0 ], [ 50.0, -20.0 ], [ 60.0, -20.0 ], [ 60.0, -30.0 ], [ 50.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": -10.0, "LR_X": 60.0, "LR_Y": -20.0, "NAME": "10S_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -20.0 ], [ 50.0, -10.0 ], [ 60.0, -10.0 ], [ 60.0, -20.0 ], [ 50.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 0.0, "LR_X": 60.0, "LR_Y": -10.0, "NAME": "00N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -10.0 ], [ 50.0, 0.0 ], [ 60.0, 0.0 ], [ 60.0, -10.0 ], [ 50.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 10.0, "LR_X": 60.0, "LR_Y": 0.0, "NAME": "10N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 0.0 ], [ 50.0, 10.0 ], [ 60.0, 10.0 ], [ 60.0, 0.0 ], [ 50.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 20.0, "LR_X": 60.0, "LR_Y": 10.0, "NAME": "20N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 10.0 ], [ 50.0, 20.0 ], [ 60.0, 20.0 ], [ 60.0, 10.0 ], [ 50.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 30.0, "LR_X": 60.0, "LR_Y": 20.0, "NAME": "30N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 20.0 ], [ 50.0, 30.0 ], [ 60.0, 30.0 ], [ 60.0, 20.0 ], [ 50.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 40.0, "LR_X": 60.0, "LR_Y": 30.0, "NAME": "40N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 30.0 ], [ 50.0, 40.0 ], [ 60.0, 40.0 ], [ 60.0, 30.0 ], [ 50.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 50.0, "LR_X": 60.0, "LR_Y": 40.0, "NAME": "50N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 40.0 ], [ 50.0, 50.0 ], [ 60.0, 50.0 ], [ 60.0, 40.0 ], [ 50.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 60.0, "LR_X": 60.0, "LR_Y": 50.0, "NAME": "60N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 50.0 ], [ 50.0, 60.0 ], [ 60.0, 60.0 ], [ 60.0, 50.0 ], [ 50.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 70.0, "LR_X": 60.0, "LR_Y": 60.0, "NAME": "70N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 60.0 ], [ 50.0, 70.0 ], [ 60.0, 70.0 ], [ 60.0, 60.0 ], [ 50.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 80.0, "LR_X": 60.0, "LR_Y": 70.0, "NAME": "80N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 70.0 ], [ 50.0, 80.0 ], [ 60.0, 80.0 ], [ 60.0, 70.0 ], [ 50.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": -50.0, "LR_X": 70.0, "LR_Y": -60.0, "NAME": "50S_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -60.0 ], [ 60.0, -50.0 ], [ 70.0, -50.0 ], [ 70.0, -60.0 ], [ 60.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": -40.0, "LR_X": 70.0, "LR_Y": -50.0, "NAME": "40S_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -50.0 ], [ 60.0, -40.0 ], [ 70.0, -40.0 ], [ 70.0, -50.0 ], [ 60.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": -30.0, "LR_X": 70.0, "LR_Y": -40.0, "NAME": "30S_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -40.0 ], [ 60.0, -30.0 ], [ 70.0, -30.0 ], [ 70.0, -40.0 ], [ 60.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": -20.0, "LR_X": 70.0, "LR_Y": -30.0, "NAME": "20S_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -30.0 ], [ 60.0, -20.0 ], [ 70.0, -20.0 ], [ 70.0, -30.0 ], [ 60.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": -10.0, "LR_X": 70.0, "LR_Y": -20.0, "NAME": "10S_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -20.0 ], [ 60.0, -10.0 ], [ 70.0, -10.0 ], [ 70.0, -20.0 ], [ 60.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 0.0, "LR_X": 70.0, "LR_Y": -10.0, "NAME": "00N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -10.0 ], [ 60.0, 0.0 ], [ 70.0, 0.0 ], [ 70.0, -10.0 ], [ 60.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 10.0, "LR_X": 70.0, "LR_Y": 0.0, "NAME": "10N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 0.0 ], [ 60.0, 10.0 ], [ 70.0, 10.0 ], [ 70.0, 0.0 ], [ 60.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 20.0, "LR_X": 70.0, "LR_Y": 10.0, "NAME": "20N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 10.0 ], [ 60.0, 20.0 ], [ 70.0, 20.0 ], [ 70.0, 10.0 ], [ 60.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 30.0, "LR_X": 70.0, "LR_Y": 20.0, "NAME": "30N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 20.0 ], [ 60.0, 30.0 ], [ 70.0, 30.0 ], [ 70.0, 20.0 ], [ 60.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 40.0, "LR_X": 70.0, "LR_Y": 30.0, "NAME": "40N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 30.0 ], [ 60.0, 40.0 ], [ 70.0, 40.0 ], [ 70.0, 30.0 ], [ 60.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 50.0, "LR_X": 70.0, "LR_Y": 40.0, "NAME": "50N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 40.0 ], [ 60.0, 50.0 ], [ 70.0, 50.0 ], [ 70.0, 40.0 ], [ 60.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 60.0, "LR_X": 70.0, "LR_Y": 50.0, "NAME": "60N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 50.0 ], [ 60.0, 60.0 ], [ 70.0, 60.0 ], [ 70.0, 50.0 ], [ 60.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 70.0, "LR_X": 70.0, "LR_Y": 60.0, "NAME": "70N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 60.0 ], [ 60.0, 70.0 ], [ 70.0, 70.0 ], [ 70.0, 60.0 ], [ 60.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 80.0, "LR_X": 70.0, "LR_Y": 70.0, "NAME": "80N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 70.0 ], [ 60.0, 80.0 ], [ 70.0, 80.0 ], [ 70.0, 70.0 ], [ 60.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": -50.0, "LR_X": 80.0, "LR_Y": -60.0, "NAME": "50S_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -60.0 ], [ 70.0, -50.0 ], [ 80.0, -50.0 ], [ 80.0, -60.0 ], [ 70.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": -40.0, "LR_X": 80.0, "LR_Y": -50.0, "NAME": "40S_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -50.0 ], [ 70.0, -40.0 ], [ 80.0, -40.0 ], [ 80.0, -50.0 ], [ 70.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": -30.0, "LR_X": 80.0, "LR_Y": -40.0, "NAME": "30S_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -40.0 ], [ 70.0, -30.0 ], [ 80.0, -30.0 ], [ 80.0, -40.0 ], [ 70.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": -20.0, "LR_X": 80.0, "LR_Y": -30.0, "NAME": "20S_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -30.0 ], [ 70.0, -20.0 ], [ 80.0, -20.0 ], [ 80.0, -30.0 ], [ 70.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": -10.0, "LR_X": 80.0, "LR_Y": -20.0, "NAME": "10S_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -20.0 ], [ 70.0, -10.0 ], [ 80.0, -10.0 ], [ 80.0, -20.0 ], [ 70.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 0.0, "LR_X": 80.0, "LR_Y": -10.0, "NAME": "00N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -10.0 ], [ 70.0, 0.0 ], [ 80.0, 0.0 ], [ 80.0, -10.0 ], [ 70.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 10.0, "LR_X": 80.0, "LR_Y": 0.0, "NAME": "10N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 0.0 ], [ 70.0, 10.0 ], [ 80.0, 10.0 ], [ 80.0, 0.0 ], [ 70.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 20.0, "LR_X": 80.0, "LR_Y": 10.0, "NAME": "20N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 10.0 ], [ 70.0, 20.0 ], [ 80.0, 20.0 ], [ 80.0, 10.0 ], [ 70.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 30.0, "LR_X": 80.0, "LR_Y": 20.0, "NAME": "30N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 20.0 ], [ 70.0, 30.0 ], [ 80.0, 30.0 ], [ 80.0, 20.0 ], [ 70.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 40.0, "LR_X": 80.0, "LR_Y": 30.0, "NAME": "40N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 30.0 ], [ 70.0, 40.0 ], [ 80.0, 40.0 ], [ 80.0, 30.0 ], [ 70.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 50.0, "LR_X": 80.0, "LR_Y": 40.0, "NAME": "50N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 40.0 ], [ 70.0, 50.0 ], [ 80.0, 50.0 ], [ 80.0, 40.0 ], [ 70.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 60.0, "LR_X": 80.0, "LR_Y": 50.0, "NAME": "60N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 50.0 ], [ 70.0, 60.0 ], [ 80.0, 60.0 ], [ 80.0, 50.0 ], [ 70.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 70.0, "LR_X": 80.0, "LR_Y": 60.0, "NAME": "70N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 60.0 ], [ 70.0, 70.0 ], [ 80.0, 70.0 ], [ 80.0, 60.0 ], [ 70.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 80.0, "LR_X": 80.0, "LR_Y": 70.0, "NAME": "80N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 70.0 ], [ 70.0, 80.0 ], [ 80.0, 80.0 ], [ 80.0, 70.0 ], [ 70.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": -50.0, "LR_X": 90.0, "LR_Y": -60.0, "NAME": "50S_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -60.0 ], [ 80.0, -50.0 ], [ 90.0, -50.0 ], [ 90.0, -60.0 ], [ 80.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": -40.0, "LR_X": 90.0, "LR_Y": -50.0, "NAME": "40S_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -50.0 ], [ 80.0, -40.0 ], [ 90.0, -40.0 ], [ 90.0, -50.0 ], [ 80.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": -30.0, "LR_X": 90.0, "LR_Y": -40.0, "NAME": "30S_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -40.0 ], [ 80.0, -30.0 ], [ 90.0, -30.0 ], [ 90.0, -40.0 ], [ 80.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": -20.0, "LR_X": 90.0, "LR_Y": -30.0, "NAME": "20S_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -30.0 ], [ 80.0, -20.0 ], [ 90.0, -20.0 ], [ 90.0, -30.0 ], [ 80.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": -10.0, "LR_X": 90.0, "LR_Y": -20.0, "NAME": "10S_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -20.0 ], [ 80.0, -10.0 ], [ 90.0, -10.0 ], [ 90.0, -20.0 ], [ 80.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 0.0, "LR_X": 90.0, "LR_Y": -10.0, "NAME": "00N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -10.0 ], [ 80.0, 0.0 ], [ 90.0, 0.0 ], [ 90.0, -10.0 ], [ 80.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 10.0, "LR_X": 90.0, "LR_Y": 0.0, "NAME": "10N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 0.0 ], [ 80.0, 10.0 ], [ 90.0, 10.0 ], [ 90.0, 0.0 ], [ 80.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 20.0, "LR_X": 90.0, "LR_Y": 10.0, "NAME": "20N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 10.0 ], [ 80.0, 20.0 ], [ 90.0, 20.0 ], [ 90.0, 10.0 ], [ 80.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 30.0, "LR_X": 90.0, "LR_Y": 20.0, "NAME": "30N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 20.0 ], [ 80.0, 30.0 ], [ 90.0, 30.0 ], [ 90.0, 20.0 ], [ 80.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 40.0, "LR_X": 90.0, "LR_Y": 30.0, "NAME": "40N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 30.0 ], [ 80.0, 40.0 ], [ 90.0, 40.0 ], [ 90.0, 30.0 ], [ 80.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 50.0, "LR_X": 90.0, "LR_Y": 40.0, "NAME": "50N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 40.0 ], [ 80.0, 50.0 ], [ 90.0, 50.0 ], [ 90.0, 40.0 ], [ 80.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 60.0, "LR_X": 90.0, "LR_Y": 50.0, "NAME": "60N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 50.0 ], [ 80.0, 60.0 ], [ 90.0, 60.0 ], [ 90.0, 50.0 ], [ 80.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 70.0, "LR_X": 90.0, "LR_Y": 60.0, "NAME": "70N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 60.0 ], [ 80.0, 70.0 ], [ 90.0, 70.0 ], [ 90.0, 60.0 ], [ 80.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 80.0, "LR_X": 90.0, "LR_Y": 70.0, "NAME": "80N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 70.0 ], [ 80.0, 80.0 ], [ 90.0, 80.0 ], [ 90.0, 70.0 ], [ 80.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": -50.0, "LR_X": 100.0, "LR_Y": -60.0, "NAME": "50S_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -60.0 ], [ 90.0, -50.0 ], [ 100.0, -50.0 ], [ 100.0, -60.0 ], [ 90.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": -40.0, "LR_X": 100.0, "LR_Y": -50.0, "NAME": "40S_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -50.0 ], [ 90.0, -40.0 ], [ 100.0, -40.0 ], [ 100.0, -50.0 ], [ 90.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": -30.0, "LR_X": 100.0, "LR_Y": -40.0, "NAME": "30S_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -40.0 ], [ 90.0, -30.0 ], [ 100.0, -30.0 ], [ 100.0, -40.0 ], [ 90.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": -20.0, "LR_X": 100.0, "LR_Y": -30.0, "NAME": "20S_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -30.0 ], [ 90.0, -20.0 ], [ 100.0, -20.0 ], [ 100.0, -30.0 ], [ 90.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": -10.0, "LR_X": 100.0, "LR_Y": -20.0, "NAME": "10S_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -20.0 ], [ 90.0, -10.0 ], [ 100.0, -10.0 ], [ 100.0, -20.0 ], [ 90.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 0.0, "LR_X": 100.0, "LR_Y": -10.0, "NAME": "00N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -10.0 ], [ 90.0, 0.0 ], [ 100.0, 0.0 ], [ 100.0, -10.0 ], [ 90.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 10.0, "LR_X": 100.0, "LR_Y": 0.0, "NAME": "10N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 0.0 ], [ 90.0, 10.0 ], [ 100.0, 10.0 ], [ 100.0, 0.0 ], [ 90.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 20.0, "LR_X": 100.0, "LR_Y": 10.0, "NAME": "20N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 10.0 ], [ 90.0, 20.0 ], [ 100.0, 20.0 ], [ 100.0, 10.0 ], [ 90.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 30.0, "LR_X": 100.0, "LR_Y": 20.0, "NAME": "30N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 20.0 ], [ 90.0, 30.0 ], [ 100.0, 30.0 ], [ 100.0, 20.0 ], [ 90.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 40.0, "LR_X": 100.0, "LR_Y": 30.0, "NAME": "40N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 30.0 ], [ 90.0, 40.0 ], [ 100.0, 40.0 ], [ 100.0, 30.0 ], [ 90.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 50.0, "LR_X": 100.0, "LR_Y": 40.0, "NAME": "50N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 40.0 ], [ 90.0, 50.0 ], [ 100.0, 50.0 ], [ 100.0, 40.0 ], [ 90.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 60.0, "LR_X": 100.0, "LR_Y": 50.0, "NAME": "60N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 50.0 ], [ 90.0, 60.0 ], [ 100.0, 60.0 ], [ 100.0, 50.0 ], [ 90.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 70.0, "LR_X": 100.0, "LR_Y": 60.0, "NAME": "70N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 60.0 ], [ 90.0, 70.0 ], [ 100.0, 70.0 ], [ 100.0, 60.0 ], [ 90.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 80.0, "LR_X": 100.0, "LR_Y": 70.0, "NAME": "80N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 70.0 ], [ 90.0, 80.0 ], [ 100.0, 80.0 ], [ 100.0, 70.0 ], [ 90.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": -50.0, "LR_X": 110.0, "LR_Y": -60.0, "NAME": "50S_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -60.0 ], [ 100.0, -50.0 ], [ 110.0, -50.0 ], [ 110.0, -60.0 ], [ 100.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": -40.0, "LR_X": 110.0, "LR_Y": -50.0, "NAME": "40S_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -50.0 ], [ 100.0, -40.0 ], [ 110.0, -40.0 ], [ 110.0, -50.0 ], [ 100.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": -30.0, "LR_X": 110.0, "LR_Y": -40.0, "NAME": "30S_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -40.0 ], [ 100.0, -30.0 ], [ 110.0, -30.0 ], [ 110.0, -40.0 ], [ 100.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": -20.0, "LR_X": 110.0, "LR_Y": -30.0, "NAME": "20S_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -30.0 ], [ 100.0, -20.0 ], [ 110.0, -20.0 ], [ 110.0, -30.0 ], [ 100.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": -10.0, "LR_X": 110.0, "LR_Y": -20.0, "NAME": "10S_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -20.0 ], [ 100.0, -10.0 ], [ 110.0, -10.0 ], [ 110.0, -20.0 ], [ 100.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 0.0, "LR_X": 110.0, "LR_Y": -10.0, "NAME": "00N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -10.0 ], [ 100.0, 0.0 ], [ 110.0, 0.0 ], [ 110.0, -10.0 ], [ 100.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 10.0, "LR_X": 110.0, "LR_Y": 0.0, "NAME": "10N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 0.0 ], [ 100.0, 10.0 ], [ 110.0, 10.0 ], [ 110.0, 0.0 ], [ 100.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 20.0, "LR_X": 110.0, "LR_Y": 10.0, "NAME": "20N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 10.0 ], [ 100.0, 20.0 ], [ 110.0, 20.0 ], [ 110.0, 10.0 ], [ 100.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 30.0, "LR_X": 110.0, "LR_Y": 20.0, "NAME": "30N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 20.0 ], [ 100.0, 30.0 ], [ 110.0, 30.0 ], [ 110.0, 20.0 ], [ 100.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 40.0, "LR_X": 110.0, "LR_Y": 30.0, "NAME": "40N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 30.0 ], [ 100.0, 40.0 ], [ 110.0, 40.0 ], [ 110.0, 30.0 ], [ 100.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 50.0, "LR_X": 110.0, "LR_Y": 40.0, "NAME": "50N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 40.0 ], [ 100.0, 50.0 ], [ 110.0, 50.0 ], [ 110.0, 40.0 ], [ 100.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 60.0, "LR_X": 110.0, "LR_Y": 50.0, "NAME": "60N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 50.0 ], [ 100.0, 60.0 ], [ 110.0, 60.0 ], [ 110.0, 50.0 ], [ 100.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 70.0, "LR_X": 110.0, "LR_Y": 60.0, "NAME": "70N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 60.0 ], [ 100.0, 70.0 ], [ 110.0, 70.0 ], [ 110.0, 60.0 ], [ 100.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 80.0, "LR_X": 110.0, "LR_Y": 70.0, "NAME": "80N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 70.0 ], [ 100.0, 80.0 ], [ 110.0, 80.0 ], [ 110.0, 70.0 ], [ 100.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": -50.0, "LR_X": 120.0, "LR_Y": -60.0, "NAME": "50S_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -60.0 ], [ 110.0, -50.0 ], [ 120.0, -50.0 ], [ 120.0, -60.0 ], [ 110.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": -40.0, "LR_X": 120.0, "LR_Y": -50.0, "NAME": "40S_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -50.0 ], [ 110.0, -40.0 ], [ 120.0, -40.0 ], [ 120.0, -50.0 ], [ 110.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": -30.0, "LR_X": 120.0, "LR_Y": -40.0, "NAME": "30S_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -40.0 ], [ 110.0, -30.0 ], [ 120.0, -30.0 ], [ 120.0, -40.0 ], [ 110.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": -20.0, "LR_X": 120.0, "LR_Y": -30.0, "NAME": "20S_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -30.0 ], [ 110.0, -20.0 ], [ 120.0, -20.0 ], [ 120.0, -30.0 ], [ 110.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": -10.0, "LR_X": 120.0, "LR_Y": -20.0, "NAME": "10S_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -20.0 ], [ 110.0, -10.0 ], [ 120.0, -10.0 ], [ 120.0, -20.0 ], [ 110.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 0.0, "LR_X": 120.0, "LR_Y": -10.0, "NAME": "00N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -10.0 ], [ 110.0, 0.0 ], [ 120.0, 0.0 ], [ 120.0, -10.0 ], [ 110.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 10.0, "LR_X": 120.0, "LR_Y": 0.0, "NAME": "10N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 0.0 ], [ 110.0, 10.0 ], [ 120.0, 10.0 ], [ 120.0, 0.0 ], [ 110.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 20.0, "LR_X": 120.0, "LR_Y": 10.0, "NAME": "20N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 10.0 ], [ 110.0, 20.0 ], [ 120.0, 20.0 ], [ 120.0, 10.0 ], [ 110.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 30.0, "LR_X": 120.0, "LR_Y": 20.0, "NAME": "30N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 20.0 ], [ 110.0, 30.0 ], [ 120.0, 30.0 ], [ 120.0, 20.0 ], [ 110.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 40.0, "LR_X": 120.0, "LR_Y": 30.0, "NAME": "40N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 30.0 ], [ 110.0, 40.0 ], [ 120.0, 40.0 ], [ 120.0, 30.0 ], [ 110.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 50.0, "LR_X": 120.0, "LR_Y": 40.0, "NAME": "50N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 40.0 ], [ 110.0, 50.0 ], [ 120.0, 50.0 ], [ 120.0, 40.0 ], [ 110.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 60.0, "LR_X": 120.0, "LR_Y": 50.0, "NAME": "60N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 50.0 ], [ 110.0, 60.0 ], [ 120.0, 60.0 ], [ 120.0, 50.0 ], [ 110.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 70.0, "LR_X": 120.0, "LR_Y": 60.0, "NAME": "70N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 60.0 ], [ 110.0, 70.0 ], [ 120.0, 70.0 ], [ 120.0, 60.0 ], [ 110.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 80.0, "LR_X": 120.0, "LR_Y": 70.0, "NAME": "80N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 70.0 ], [ 110.0, 80.0 ], [ 120.0, 80.0 ], [ 120.0, 70.0 ], [ 110.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": -50.0, "LR_X": 130.0, "LR_Y": -60.0, "NAME": "50S_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -60.0 ], [ 120.0, -50.0 ], [ 130.0, -50.0 ], [ 130.0, -60.0 ], [ 120.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": -40.0, "LR_X": 130.0, "LR_Y": -50.0, "NAME": "40S_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -50.0 ], [ 120.0, -40.0 ], [ 130.0, -40.0 ], [ 130.0, -50.0 ], [ 120.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": -30.0, "LR_X": 130.0, "LR_Y": -40.0, "NAME": "30S_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -40.0 ], [ 120.0, -30.0 ], [ 130.0, -30.0 ], [ 130.0, -40.0 ], [ 120.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": -20.0, "LR_X": 130.0, "LR_Y": -30.0, "NAME": "20S_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -30.0 ], [ 120.0, -20.0 ], [ 130.0, -20.0 ], [ 130.0, -30.0 ], [ 120.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": -10.0, "LR_X": 130.0, "LR_Y": -20.0, "NAME": "10S_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -20.0 ], [ 120.0, -10.0 ], [ 130.0, -10.0 ], [ 130.0, -20.0 ], [ 120.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 0.0, "LR_X": 130.0, "LR_Y": -10.0, "NAME": "00N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -10.0 ], [ 120.0, 0.0 ], [ 130.0, 0.0 ], [ 130.0, -10.0 ], [ 120.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 10.0, "LR_X": 130.0, "LR_Y": 0.0, "NAME": "10N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 0.0 ], [ 120.0, 10.0 ], [ 130.0, 10.0 ], [ 130.0, 0.0 ], [ 120.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 20.0, "LR_X": 130.0, "LR_Y": 10.0, "NAME": "20N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 10.0 ], [ 120.0, 20.0 ], [ 130.0, 20.0 ], [ 130.0, 10.0 ], [ 120.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 30.0, "LR_X": 130.0, "LR_Y": 20.0, "NAME": "30N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 20.0 ], [ 120.0, 30.0 ], [ 130.0, 30.0 ], [ 130.0, 20.0 ], [ 120.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 40.0, "LR_X": 130.0, "LR_Y": 30.0, "NAME": "40N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 30.0 ], [ 120.0, 40.0 ], [ 130.0, 40.0 ], [ 130.0, 30.0 ], [ 120.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 50.0, "LR_X": 130.0, "LR_Y": 40.0, "NAME": "50N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 40.0 ], [ 120.0, 50.0 ], [ 130.0, 50.0 ], [ 130.0, 40.0 ], [ 120.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 60.0, "LR_X": 130.0, "LR_Y": 50.0, "NAME": "60N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 50.0 ], [ 120.0, 60.0 ], [ 130.0, 60.0 ], [ 130.0, 50.0 ], [ 120.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 70.0, "LR_X": 130.0, "LR_Y": 60.0, "NAME": "70N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 60.0 ], [ 120.0, 70.0 ], [ 130.0, 70.0 ], [ 130.0, 60.0 ], [ 120.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 80.0, "LR_X": 130.0, "LR_Y": 70.0, "NAME": "80N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 70.0 ], [ 120.0, 80.0 ], [ 130.0, 80.0 ], [ 130.0, 70.0 ], [ 120.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": -50.0, "LR_X": 140.0, "LR_Y": -60.0, "NAME": "50S_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -60.0 ], [ 130.0, -50.0 ], [ 140.0, -50.0 ], [ 140.0, -60.0 ], [ 130.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": -40.0, "LR_X": 140.0, "LR_Y": -50.0, "NAME": "40S_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -50.0 ], [ 130.0, -40.0 ], [ 140.0, -40.0 ], [ 140.0, -50.0 ], [ 130.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": -30.0, "LR_X": 140.0, "LR_Y": -40.0, "NAME": "30S_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -40.0 ], [ 130.0, -30.0 ], [ 140.0, -30.0 ], [ 140.0, -40.0 ], [ 130.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": -20.0, "LR_X": 140.0, "LR_Y": -30.0, "NAME": "20S_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -30.0 ], [ 130.0, -20.0 ], [ 140.0, -20.0 ], [ 140.0, -30.0 ], [ 130.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": -10.0, "LR_X": 140.0, "LR_Y": -20.0, "NAME": "10S_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -20.0 ], [ 130.0, -10.0 ], [ 140.0, -10.0 ], [ 140.0, -20.0 ], [ 130.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 0.0, "LR_X": 140.0, "LR_Y": -10.0, "NAME": "00N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -10.0 ], [ 130.0, 0.0 ], [ 140.0, 0.0 ], [ 140.0, -10.0 ], [ 130.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 10.0, "LR_X": 140.0, "LR_Y": 0.0, "NAME": "10N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 0.0 ], [ 130.0, 10.0 ], [ 140.0, 10.0 ], [ 140.0, 0.0 ], [ 130.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 20.0, "LR_X": 140.0, "LR_Y": 10.0, "NAME": "20N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 10.0 ], [ 130.0, 20.0 ], [ 140.0, 20.0 ], [ 140.0, 10.0 ], [ 130.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 30.0, "LR_X": 140.0, "LR_Y": 20.0, "NAME": "30N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 20.0 ], [ 130.0, 30.0 ], [ 140.0, 30.0 ], [ 140.0, 20.0 ], [ 130.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 40.0, "LR_X": 140.0, "LR_Y": 30.0, "NAME": "40N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 30.0 ], [ 130.0, 40.0 ], [ 140.0, 40.0 ], [ 140.0, 30.0 ], [ 130.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 50.0, "LR_X": 140.0, "LR_Y": 40.0, "NAME": "50N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 40.0 ], [ 130.0, 50.0 ], [ 140.0, 50.0 ], [ 140.0, 40.0 ], [ 130.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 60.0, "LR_X": 140.0, "LR_Y": 50.0, "NAME": "60N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 50.0 ], [ 130.0, 60.0 ], [ 140.0, 60.0 ], [ 140.0, 50.0 ], [ 130.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 70.0, "LR_X": 140.0, "LR_Y": 60.0, "NAME": "70N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 60.0 ], [ 130.0, 70.0 ], [ 140.0, 70.0 ], [ 140.0, 60.0 ], [ 130.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 80.0, "LR_X": 140.0, "LR_Y": 70.0, "NAME": "80N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 70.0 ], [ 130.0, 80.0 ], [ 140.0, 80.0 ], [ 140.0, 70.0 ], [ 130.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": -50.0, "LR_X": 150.0, "LR_Y": -60.0, "NAME": "50S_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -60.0 ], [ 140.0, -50.0 ], [ 150.0, -50.0 ], [ 150.0, -60.0 ], [ 140.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": -40.0, "LR_X": 150.0, "LR_Y": -50.0, "NAME": "40S_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -50.0 ], [ 140.0, -40.0 ], [ 150.0, -40.0 ], [ 150.0, -50.0 ], [ 140.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": -30.0, "LR_X": 150.0, "LR_Y": -40.0, "NAME": "30S_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -40.0 ], [ 140.0, -30.0 ], [ 150.0, -30.0 ], [ 150.0, -40.0 ], [ 140.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": -20.0, "LR_X": 150.0, "LR_Y": -30.0, "NAME": "20S_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -30.0 ], [ 140.0, -20.0 ], [ 150.0, -20.0 ], [ 150.0, -30.0 ], [ 140.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": -10.0, "LR_X": 150.0, "LR_Y": -20.0, "NAME": "10S_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -20.0 ], [ 140.0, -10.0 ], [ 150.0, -10.0 ], [ 150.0, -20.0 ], [ 140.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 0.0, "LR_X": 150.0, "LR_Y": -10.0, "NAME": "00N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -10.0 ], [ 140.0, 0.0 ], [ 150.0, 0.0 ], [ 150.0, -10.0 ], [ 140.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 10.0, "LR_X": 150.0, "LR_Y": 0.0, "NAME": "10N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 0.0 ], [ 140.0, 10.0 ], [ 150.0, 10.0 ], [ 150.0, 0.0 ], [ 140.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 20.0, "LR_X": 150.0, "LR_Y": 10.0, "NAME": "20N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 10.0 ], [ 140.0, 20.0 ], [ 150.0, 20.0 ], [ 150.0, 10.0 ], [ 140.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 30.0, "LR_X": 150.0, "LR_Y": 20.0, "NAME": "30N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 20.0 ], [ 140.0, 30.0 ], [ 150.0, 30.0 ], [ 150.0, 20.0 ], [ 140.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 40.0, "LR_X": 150.0, "LR_Y": 30.0, "NAME": "40N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 30.0 ], [ 140.0, 40.0 ], [ 150.0, 40.0 ], [ 150.0, 30.0 ], [ 140.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 50.0, "LR_X": 150.0, "LR_Y": 40.0, "NAME": "50N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 40.0 ], [ 140.0, 50.0 ], [ 150.0, 50.0 ], [ 150.0, 40.0 ], [ 140.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 60.0, "LR_X": 150.0, "LR_Y": 50.0, "NAME": "60N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 50.0 ], [ 140.0, 60.0 ], [ 150.0, 60.0 ], [ 150.0, 50.0 ], [ 140.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 70.0, "LR_X": 150.0, "LR_Y": 60.0, "NAME": "70N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 60.0 ], [ 140.0, 70.0 ], [ 150.0, 70.0 ], [ 150.0, 60.0 ], [ 140.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 80.0, "LR_X": 150.0, "LR_Y": 70.0, "NAME": "80N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 70.0 ], [ 140.0, 80.0 ], [ 150.0, 80.0 ], [ 150.0, 70.0 ], [ 140.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": -50.0, "LR_X": 160.0, "LR_Y": -60.0, "NAME": "50S_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -60.0 ], [ 150.0, -50.0 ], [ 160.0, -50.0 ], [ 160.0, -60.0 ], [ 150.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": -40.0, "LR_X": 160.0, "LR_Y": -50.0, "NAME": "40S_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -50.0 ], [ 150.0, -40.0 ], [ 160.0, -40.0 ], [ 160.0, -50.0 ], [ 150.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": -30.0, "LR_X": 160.0, "LR_Y": -40.0, "NAME": "30S_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -40.0 ], [ 150.0, -30.0 ], [ 160.0, -30.0 ], [ 160.0, -40.0 ], [ 150.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": -20.0, "LR_X": 160.0, "LR_Y": -30.0, "NAME": "20S_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -30.0 ], [ 150.0, -20.0 ], [ 160.0, -20.0 ], [ 160.0, -30.0 ], [ 150.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": -10.0, "LR_X": 160.0, "LR_Y": -20.0, "NAME": "10S_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -20.0 ], [ 150.0, -10.0 ], [ 160.0, -10.0 ], [ 160.0, -20.0 ], [ 150.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 0.0, "LR_X": 160.0, "LR_Y": -10.0, "NAME": "00N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -10.0 ], [ 150.0, 0.0 ], [ 160.0, 0.0 ], [ 160.0, -10.0 ], [ 150.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 10.0, "LR_X": 160.0, "LR_Y": 0.0, "NAME": "10N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 0.0 ], [ 150.0, 10.0 ], [ 160.0, 10.0 ], [ 160.0, 0.0 ], [ 150.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 20.0, "LR_X": 160.0, "LR_Y": 10.0, "NAME": "20N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 10.0 ], [ 150.0, 20.0 ], [ 160.0, 20.0 ], [ 160.0, 10.0 ], [ 150.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 30.0, "LR_X": 160.0, "LR_Y": 20.0, "NAME": "30N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 20.0 ], [ 150.0, 30.0 ], [ 160.0, 30.0 ], [ 160.0, 20.0 ], [ 150.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 40.0, "LR_X": 160.0, "LR_Y": 30.0, "NAME": "40N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 30.0 ], [ 150.0, 40.0 ], [ 160.0, 40.0 ], [ 160.0, 30.0 ], [ 150.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 50.0, "LR_X": 160.0, "LR_Y": 40.0, "NAME": "50N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 40.0 ], [ 150.0, 50.0 ], [ 160.0, 50.0 ], [ 160.0, 40.0 ], [ 150.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 60.0, "LR_X": 160.0, "LR_Y": 50.0, "NAME": "60N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 50.0 ], [ 150.0, 60.0 ], [ 160.0, 60.0 ], [ 160.0, 50.0 ], [ 150.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 70.0, "LR_X": 160.0, "LR_Y": 60.0, "NAME": "70N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 60.0 ], [ 150.0, 70.0 ], [ 160.0, 70.0 ], [ 160.0, 60.0 ], [ 150.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 80.0, "LR_X": 160.0, "LR_Y": 70.0, "NAME": "80N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 70.0 ], [ 150.0, 80.0 ], [ 160.0, 80.0 ], [ 160.0, 70.0 ], [ 150.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": -50.0, "LR_X": 170.0, "LR_Y": -60.0, "NAME": "50S_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -60.0 ], [ 160.0, -50.0 ], [ 170.0, -50.0 ], [ 170.0, -60.0 ], [ 160.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": -40.0, "LR_X": 170.0, "LR_Y": -50.0, "NAME": "40S_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -50.0 ], [ 160.0, -40.0 ], [ 170.0, -40.0 ], [ 170.0, -50.0 ], [ 160.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": -30.0, "LR_X": 170.0, "LR_Y": -40.0, "NAME": "30S_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -40.0 ], [ 160.0, -30.0 ], [ 170.0, -30.0 ], [ 170.0, -40.0 ], [ 160.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": -20.0, "LR_X": 170.0, "LR_Y": -30.0, "NAME": "20S_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -30.0 ], [ 160.0, -20.0 ], [ 170.0, -20.0 ], [ 170.0, -30.0 ], [ 160.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": -10.0, "LR_X": 170.0, "LR_Y": -20.0, "NAME": "10S_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -20.0 ], [ 160.0, -10.0 ], [ 170.0, -10.0 ], [ 170.0, -20.0 ], [ 160.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 0.0, "LR_X": 170.0, "LR_Y": -10.0, "NAME": "00N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -10.0 ], [ 160.0, 0.0 ], [ 170.0, 0.0 ], [ 170.0, -10.0 ], [ 160.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 10.0, "LR_X": 170.0, "LR_Y": 0.0, "NAME": "10N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 0.0 ], [ 160.0, 10.0 ], [ 170.0, 10.0 ], [ 170.0, 0.0 ], [ 160.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 20.0, "LR_X": 170.0, "LR_Y": 10.0, "NAME": "20N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 10.0 ], [ 160.0, 20.0 ], [ 170.0, 20.0 ], [ 170.0, 10.0 ], [ 160.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 30.0, "LR_X": 170.0, "LR_Y": 20.0, "NAME": "30N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 20.0 ], [ 160.0, 30.0 ], [ 170.0, 30.0 ], [ 170.0, 20.0 ], [ 160.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 40.0, "LR_X": 170.0, "LR_Y": 30.0, "NAME": "40N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 30.0 ], [ 160.0, 40.0 ], [ 170.0, 40.0 ], [ 170.0, 30.0 ], [ 160.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 50.0, "LR_X": 170.0, "LR_Y": 40.0, "NAME": "50N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 40.0 ], [ 160.0, 50.0 ], [ 170.0, 50.0 ], [ 170.0, 40.0 ], [ 160.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 60.0, "LR_X": 170.0, "LR_Y": 50.0, "NAME": "60N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 50.0 ], [ 160.0, 60.0 ], [ 170.0, 60.0 ], [ 170.0, 50.0 ], [ 160.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 70.0, "LR_X": 170.0, "LR_Y": 60.0, "NAME": "70N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 60.0 ], [ 160.0, 70.0 ], [ 170.0, 70.0 ], [ 170.0, 60.0 ], [ 160.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 80.0, "LR_X": 170.0, "LR_Y": 70.0, "NAME": "80N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 70.0 ], [ 160.0, 80.0 ], [ 170.0, 80.0 ], [ 170.0, 70.0 ], [ 160.0, 70.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": -50.0, "LR_X": 180.0, "LR_Y": -60.0, "NAME": "50S_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -60.0 ], [ 170.0, -50.0 ], [ 180.0, -50.0 ], [ 180.0, -60.0 ], [ 170.0, -60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": -40.0, "LR_X": 180.0, "LR_Y": -50.0, "NAME": "40S_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -50.0 ], [ 170.0, -40.0 ], [ 180.0, -40.0 ], [ 180.0, -50.0 ], [ 170.0, -50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": -30.0, "LR_X": 180.0, "LR_Y": -40.0, "NAME": "30S_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -40.0 ], [ 170.0, -30.0 ], [ 180.0, -30.0 ], [ 180.0, -40.0 ], [ 170.0, -40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": -20.0, "LR_X": 180.0, "LR_Y": -30.0, "NAME": "20S_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -30.0 ], [ 170.0, -20.0 ], [ 180.0, -20.0 ], [ 180.0, -30.0 ], [ 170.0, -30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": -10.0, "LR_X": 180.0, "LR_Y": -20.0, "NAME": "10S_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -20.0 ], [ 170.0, -10.0 ], [ 180.0, -10.0 ], [ 180.0, -20.0 ], [ 170.0, -20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 0.0, "LR_X": 180.0, "LR_Y": -10.0, "NAME": "00N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -10.0 ], [ 170.0, 0.0 ], [ 180.0, 0.0 ], [ 180.0, -10.0 ], [ 170.0, -10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 10.0, "LR_X": 180.0, "LR_Y": 0.0, "NAME": "10N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 0.0 ], [ 170.0, 10.0 ], [ 180.0, 10.0 ], [ 180.0, 0.0 ], [ 170.0, 0.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 20.0, "LR_X": 180.0, "LR_Y": 10.0, "NAME": "20N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 10.0 ], [ 170.0, 20.0 ], [ 180.0, 20.0 ], [ 180.0, 10.0 ], [ 170.0, 10.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 30.0, "LR_X": 180.0, "LR_Y": 20.0, "NAME": "30N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 20.0 ], [ 170.0, 30.0 ], [ 180.0, 30.0 ], [ 180.0, 20.0 ], [ 170.0, 20.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 40.0, "LR_X": 180.0, "LR_Y": 30.0, "NAME": "40N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 30.0 ], [ 170.0, 40.0 ], [ 180.0, 40.0 ], [ 180.0, 30.0 ], [ 170.0, 30.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 50.0, "LR_X": 180.0, "LR_Y": 40.0, "NAME": "50N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 40.0 ], [ 170.0, 50.0 ], [ 180.0, 50.0 ], [ 180.0, 40.0 ], [ 170.0, 40.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 60.0, "LR_X": 180.0, "LR_Y": 50.0, "NAME": "60N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 50.0 ], [ 170.0, 60.0 ], [ 180.0, 60.0 ], [ 180.0, 50.0 ], [ 170.0, 50.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 70.0, "LR_X": 180.0, "LR_Y": 60.0, "NAME": "70N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 60.0 ], [ 170.0, 70.0 ], [ 180.0, 70.0 ], [ 180.0, 60.0 ], [ 170.0, 60.0 ] ] ] } }, -{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 80.0, "LR_X": 180.0, "LR_Y": 70.0, "NAME": "80N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 70.0 ], [ 170.0, 80.0 ], [ 180.0, 80.0 ], [ 180.0, 70.0 ], [ 170.0, 70.0 ] ] ] } } -] -} diff --git a/op_resources/glad_tile_geometry/NOTICE.md b/op_resources/glad_tile_geometry/NOTICE.md deleted file mode 100644 index 60fcb08e..00000000 --- a/op_resources/glad_tile_geometry/NOTICE.md +++ /dev/null @@ -1,9 +0,0 @@ -# NOTICE - -In this directory, the [`10d_tiles.geojson`](./10d_tiles.geojson) file was created using the -[10x10 degrees shape file](https://glad.umd.edu/users/Potapov/GLCLUC2020/10d_tiles.zip) -from the Global Land Cover and Land Use Change (GLAD) dataset. - -This worldwide dataset is freely accessible online and can be redistributed or -utilized without any restrictions, as long as the appropriate citation is given -in accordance with the Creative Commons Attribution License (CC BY). diff --git a/op_resources/sentinel_tile_geometry/NOTICE.md b/op_resources/sentinel_tile_geometry/NOTICE.md deleted file mode 100644 index 64eee1e3..00000000 --- a/op_resources/sentinel_tile_geometry/NOTICE.md +++ /dev/null @@ -1,7 +0,0 @@ -# NOTICE - -The kml file in this directory is Copernicus Sentinel data (2015) and was -published by the European Commission (Copernicus), ESA. - -Sentinel data is free, full and open for public use under EU law. For full details of use, refer to the -[Copernicus Sentinel Data Terms and Conditions](https://scihub.copernicus.eu/twiki/pub/SciHubWebPortal/TermsConditions/Sentinel_Data_Terms_and_Conditions.pdf) \ No newline at end of file diff --git a/op_resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml b/op_resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml deleted file mode 100644 index 1bacab76..00000000 --- a/op_resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ffcd58f1443f01f1c6f96baeef83a96239f86b50911729ed12195690ebc4de61 -size 108817408 diff --git a/op_resources/shadow_models/NOTICE.md b/op_resources/shadow_models/NOTICE.md deleted file mode 100644 index 33515751..00000000 --- a/op_resources/shadow_models/NOTICE.md +++ /dev/null @@ -1,12 +0,0 @@ -# NOTICE - -The model in this directory represents a fully convolutional model for -producing segmentation maps of cloud shadows in Sentinel-2 L2A imagery. - -The L2A cloud shadow dataset was built by merging annotations from two cloud mask datasets that also -included cloud shadow annotations: - - [Sentinel-2 Cloud Mask Catalogue](https://zenodo.org/record/4172871) and - - [Sentinel-2 KappaZeta Cloud and Cloud Shadow Masks](https://zenodo.org/record/5095024) - -Both datasets are available under the License -[Creative Commons Attribution 4.0 International](https://creativecommons.org/licenses/by/4.0/legalcode). \ No newline at end of file diff --git a/op_resources/shadow_models/shadow.onnx b/op_resources/shadow_models/shadow.onnx deleted file mode 100644 index ae6075d9..00000000 --- a/op_resources/shadow_models/shadow.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:88fae46b0afb6a83ccc27862fee312f8a562925b051b5a6dce2b5a91c81008e9 -size 52302553 diff --git a/op_resources/spaceeye_models/spaceeye.onnx b/op_resources/spaceeye_models/spaceeye.onnx deleted file mode 100644 index 8ca3eb9a..00000000 --- a/op_resources/spaceeye_models/spaceeye.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed8447392326bdeb562b2bbbdf61b3faf5cd56541739773a6f0f37a139c7c06b -size 211010 diff --git a/op_resources/spectral_extension_model/spectral_extension.onnx b/op_resources/spectral_extension_model/spectral_extension.onnx deleted file mode 100644 index 863f0602..00000000 --- a/op_resources/spectral_extension_model/spectral_extension.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46de812e5a0996e9a381e81905a9bef9363456b95f4839b6f031dd3d2bc16012 -size 530041 diff --git a/ops/admag/admag_seasonal_field.yaml b/ops/admag/admag_seasonal_field.yaml deleted file mode 100644 index 42501c91..00000000 --- a/ops/admag/admag_seasonal_field.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: admag_seasonal_field -inputs: - admag_input: ADMAgSeasonalFieldInput -output: - seasonal_field: SeasonalFieldInformation -parameters: - base_url: - client_id: - client_secret: - authority: - default_scope: -entrypoint: - file: admag_seasonal_field_op.py - callback_builder: ADMAgConnector -version: 2 -description: - short_description: Establishes the connection with ADMAg and fetches seasonal field information. diff --git a/ops/admag/admag_seasonal_field_op.py b/ops/admag/admag_seasonal_field_op.py deleted file mode 100644 index e2513fc8..00000000 --- a/ops/admag/admag_seasonal_field_op.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from typing import Any, Dict, Tuple - -from vibe_core.admag_client import ADMAgClient -from vibe_core.data import ( - ADMAgSeasonalFieldInput, - FertilizerInformation, - HarvestInformation, - OrganicAmendmentInformation, - SeasonalFieldInformation, - TillageInformation, - gen_guid, -) - -API_VERSION = "2023-11-01-preview" - - -class ADMAgConnector: - def __init__( - self, - base_url: str, - client_id: str, - client_secret: str, - authority: str, - default_scope: str, - ): - self.admag_client = ADMAgClient( - base_url=base_url, - api_version=API_VERSION, - client_id=client_id, - client_secret=client_secret, - authority=authority, - default_scope=default_scope, - ) - self.date_fmt = "%Y-%m-%dT%H:%M:%S%z" - - def get_field_entities( - self, admag_input: ADMAgSeasonalFieldInput - ) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]: - seasonal_field_info: Dict[str, Any] = self.admag_client.get_seasonal_field( - admag_input.party_id, admag_input.seasonal_field_id - ) - - field_info = self.admag_client.get_field( - admag_input.party_id, seasonal_field_info["fieldId"] - ) - - season_info: Dict[str, Any] = self.admag_client.get_season(seasonal_field_info["seasonId"]) - - return ( - seasonal_field_info, - field_info, - season_info, - ) - - def get_harvests( - self, - party_id: str, - intersects_with_geometry: Dict[str, Any], - min_start_operation: str, - max_end_operation: str, - associated_resource: Dict[str, str], - ): - def check_harvest_properties(harvest: Dict[str, Any]) -> Dict[str, Any]: - if "gfsrt" not in harvest["properties"]: - raise ValueError( - "Harvest does not have gfsrt property. " - f"Please check harvest properties with id={harvest['id']} in Admag. " - "havest['properties']['gfsrt'] = True, means the crop is grain." - ) - - if "strawStoverHayRemoval" not in harvest["properties"]: - raise ValueError( - "Harvest does not have strawStoverHayRemoval property " - f"for entity with id={harvest['id']}. " - "Please check harvest properties in Admag. " - "strawStoverHayremoval is percentage of straw, " - "stover, and hay removed at harvest." - ) - - return harvest - - harvest_result = self.admag_client.get_harvest_info( - party_id, - intersects_with_geometry, - min_start_operation, - max_end_operation, - associated_resource, - ) - - [check_harvest_properties(harvest) for harvest in harvest_result["value"]] - - return [ - HarvestInformation( - is_grain=harvest["properties"]["gfsrt"] == "True", - start_date=harvest["operationStartDateTime"], - end_date=harvest["operationEndDateTime"], - crop_yield=harvest["totalYield"]["value"], - stray_stover_hay_removal=harvest["properties"]["strawStoverHayRemoval"], - ) - for harvest in harvest_result["value"] - ] - - def get_latest_harvest( - self, - operation_params: Dict[str, Any], - ) -> Dict[str, Any]: - harvest_result = self.admag_client.get_harvest_info(**operation_params) - if "value" in harvest_result and len(harvest_result["value"]) == 0: - raise ValueError(f"No harvest found with parameters: {operation_params}") - latest_harvest = max(harvest_result["value"], key=lambda x: x["operationEndDateTime"]) - return latest_harvest - - def get_fertilizers( - self, - party_id: str, - intersects_with_geometry: Dict[str, Any], - min_start_operation: str, - max_end_operation: str, - associated_resource: Dict[str, str], - ): - def check_fertilizer_properties(fertilizer: Dict[str, Any]): - if "totalNitrogen" not in fertilizer["properties"]: - raise ValueError( - "Fertilizer does not have totalNitrogen property. " - f"Please check ADMAg application with id={fertilizer['id']}. " - "totalNitrogen is the total amount of nitrogen applied (lbs N/acre)." - ) - - if "eep" not in fertilizer["properties"]: - raise ValueError( - "Fertilizer does not have eep property. " - f"Please check ADMAg application with id={fertilizer['id']}. " - "eep is the enhanced efficiency phosphorus." - ) - - possible_eeps = ["None", "Slow Release", "Nitrification Inhibitor"] - if fertilizer["properties"]["eep"] not in possible_eeps: - raise ValueError( - f"eep property of ADMAg application with id={fertilizer['id']} " - "is not one of the allowed values. " - f"Allowed values are {possible_eeps}" - ) - - fertilizer_result = self.admag_client.get_fertilizer_info( - party_id, - intersects_with_geometry, - min_start_operation, - max_end_operation, - associated_resource, - ) - - [check_fertilizer_properties(fertilizer) for fertilizer in fertilizer_result["value"]] - - return [ - FertilizerInformation( - start_date=fertilizer["operationStartDateTime"], - end_date=fertilizer["operationEndDateTime"], - application_type=fertilizer["name"], - total_nitrogen=fertilizer["properties"]["totalNitrogen"], - enhanced_efficiency_phosphorus=fertilizer["properties"]["eep"], - ) - for fertilizer in fertilizer_result["value"] - ] - - def get_first_planting( - self, - operation_params: Dict[str, Any], - ): - operation_result = self.admag_client.get_planting_info(**operation_params) - - if "value" in operation_result and len(operation_result["value"]) == 0: - raise ValueError(f"No planting found with parameters: {operation_params}") - obj_start = min(operation_result["value"], key=lambda x: x["operationStartDateTime"]) - return obj_start["operationStartDateTime"] - - def get_tillages( - self, - party_id: str, - intersects_with_geometry: Dict[str, Any], - min_start_operation: str, - max_end_operation: str, - associated_resource: Dict[str, str], - ): - tillage_result = self.admag_client.get_tillage_info( - party_id, - intersects_with_geometry, - min_start_operation, - max_end_operation, - associated_resource, - ) - - return [ - TillageInformation( - implement=tilage["name"], - start_date=tilage["operationStartDateTime"], - end_date=tilage["operationEndDateTime"], - ) - for tilage in tillage_result["value"] - ] - - def get_organic_amendments( - self, - party_id: str, - intersects_with_geometry: Dict[str, Any], - min_start_operation: str, - max_end_operation: str, - associated_resource: Dict[str, str], - ): - def check_organic_amendment_properties(organic_amendments: Dict[str, Any]): - if "type" not in organic_amendments["properties"]: - raise ValueError( - "Organic amendment does not have type property. " - f"Please check ADMAg application with id={organic_amendments['id']}. " - "Type is the type of organic amendment. Check Comet-Farm API documentation " - "for the list of allowed values." - ) - - if "amount" not in organic_amendments["properties"]: - raise ValueError( - "Organic amendment does not have amount property. " - f"Please check ADMAg application with id={organic_amendments['id']}. " - "Amount is the amount of organic amendment applied (tons/acre)." - ) - - if "percentN" not in organic_amendments["properties"]: - raise ValueError( - "Organic amendment does not have percentN property. " - f"Please check ADMAg application with id={organic_amendments['id']}. " - "percentN is the percent nitrogen in the organic amendment." - ) - - if "CNratio" not in organic_amendments["properties"]: - raise ValueError( - "Organic amendment does not have CNratio property. " - f"Please check ADMAg application with id={organic_amendments['id']}. " - "CNratio is the carbon nitrogen ratio of the organic amendment." - ) - - omad_result = self.admag_client.get_organic_amendments_info( - party_id, - intersects_with_geometry, - min_start_operation, - max_end_operation, - associated_resource, - ) - - [ - check_organic_amendment_properties(organic_amendment) - for organic_amendment in omad_result["value"] - ] - - return [ - OrganicAmendmentInformation( - start_date=omad["operationStartDateTime"], - end_date=omad["operationEndDateTime"], - organic_amendment_type=omad["properties"]["type"], - organic_amendment_amount=omad["properties"]["amount"], - organic_amendment_percent_nitrogen=omad["properties"]["percentN"], - organic_amendment_carbon_nitrogen_ratio=omad["properties"]["CNratio"], - ) - for omad in omad_result["value"] - ] - - def get_season_field_data( - self, - party_id: str, - seasonal_field_info: Dict[str, Any], - season_info: Dict[str, Any], - field_info: Dict[str, Any], - ) -> SeasonalFieldInformation: - associated_resource = {"type": "SeasonalField", "id": seasonal_field_info["id"]} - - operation_params = { - "party_id": party_id, - "intersects_with_geometry": seasonal_field_info["geometry"], - "min_start_operation": season_info["startDateTime"], - "max_end_operation": season_info["endDateTime"], - "associated_resource": associated_resource, - } - - latest_harvest = self.get_latest_harvest(operation_params) - - planting_start_time = self.get_first_planting(operation_params) - - return SeasonalFieldInformation( - id=gen_guid(), - time_range=( - datetime.strptime(planting_start_time, self.date_fmt), - datetime.strptime(latest_harvest["operationEndDateTime"], self.date_fmt), - ), - geometry=seasonal_field_info["geometry"], - assets=[], - crop_name=seasonal_field_info["name"], - crop_type=seasonal_field_info["description"], - fertilizers=self.get_fertilizers(**operation_params), - harvests=self.get_harvests(**operation_params), - tillages=self.get_tillages(**operation_params), - organic_amendments=self.get_organic_amendments(**operation_params), - properties=field_info["properties"], - ) - - def __call__(self): - def get_admag_seasonal_field( - admag_input: ADMAgSeasonalFieldInput, - ) -> Dict[str, SeasonalFieldInformation]: - seasonal_field_info, field_info, season_info = self.get_field_entities(admag_input) - seasonal_field = self.get_season_field_data( - admag_input.party_id, seasonal_field_info, season_info, field_info - ) - - return {"seasonal_field": seasonal_field} - - return get_admag_seasonal_field diff --git a/ops/admag/get_prescription.py b/ops/admag/get_prescription.py deleted file mode 100644 index 5bf9ff41..00000000 --- a/ops/admag/get_prescription.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict - -from vibe_core.admag_client import ADMAgClient -from vibe_core.data import ADMAgPrescription, ADMAgPrescriptionInput - -API_VERSION = "2023-11-01-preview" - - -class CallbackBuilder: - def __init__( - self, - base_url: str, - client_id: str, - client_secret: str, - authority: str, - default_scope: str, - ): - self.admag_client = ADMAgClient( - base_url=base_url, - api_version=API_VERSION, - client_id=client_id, - client_secret=client_secret, - authority=authority, - default_scope=default_scope, - ) - - def prescriptions(self, user_input: ADMAgPrescriptionInput) -> ADMAgPrescription: - response = self.admag_client.get_prescription( - user_input.party_id, user_input.prescription_id - ) - - prescription = ADMAgPrescription(**response) - - return prescription - - def __call__(self): - def prescriptions_init( - prescription_without_geom_input: ADMAgPrescriptionInput, - ) -> Dict[str, ADMAgPrescription]: - out_prescriptions = self.prescriptions(prescription_without_geom_input) - return {"prescription_with_geom": out_prescriptions} - - return prescriptions_init diff --git a/ops/admag/get_prescription.yaml b/ops/admag/get_prescription.yaml deleted file mode 100644 index 64e3affe..00000000 --- a/ops/admag/get_prescription.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: get_prescription -inputs: - prescription_without_geom_input: ADMAgPrescriptionInput -output: - prescription_with_geom: ADMAgPrescription -parameters: - base_url: - client_id: - client_secret: - authority: - default_scope: -entrypoint: - file: get_prescription.py - callback_builder: CallbackBuilder -description: - short_description: - Get prescription using ADMAg API. \ No newline at end of file diff --git a/ops/admag/list_prescriptions.py b/ops/admag/list_prescriptions.py deleted file mode 100644 index cb89004f..00000000 --- a/ops/admag/list_prescriptions.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Dict, List, Tuple - -from vibe_core.admag_client import ADMAgClient -from vibe_core.data import ADMAgPrescriptionInput, ADMAgSeasonalFieldInput - -API_VERSION = "2023-11-01-preview" - - -class CallbackBuilder: - def __init__( - self, - base_url: str, - client_id: str, - client_secret: str, - authority: str, - default_scope: str, - ): - self.admag_client = ADMAgClient( - base_url=base_url, - api_version=API_VERSION, - client_id=client_id, - client_secret=client_secret, - authority=authority, - default_scope=default_scope, - ) - - def get_prescriptions( - self, party_id: str, field_info: Dict[str, str], intersect_geometry: Dict[str, Any] - ) -> List[ADMAgPrescriptionInput]: - response = self.admag_client.get_prescription_map_id( - party_id=party_id, - field_id=field_info["fieldId"], - crop_id=field_info["cropId"], - ) - - prescription_map_id = None - for p_map in response["value"]: - if "properties" in p_map and "seasonal_field_id" in p_map["properties"]: - if p_map["properties"]["seasonal_field_id"] == field_info["seasonal_field_id"]: - prescription_map_id = p_map["id"] - break - - if not prescription_map_id: - raise ValueError("Prescription map not found") - - response = self.admag_client.get_prescriptions( - party_id, prescription_map_id, geometry=intersect_geometry - ) - - prescriptions = [] - - for value in response["value"]: - prescriptions.append( - ADMAgPrescriptionInput( - prescription_id=value["id"], - party_id=value["partyId"], - ) - ) - - return prescriptions - - def get_field_info( - self, party_id: str, seasonal_field_id: str - ) -> Tuple[Dict[str, str], Dict[str, Any]]: - response = self.admag_client.get_seasonal_field(party_id, seasonal_field_id) - field_info = { - "fieldId": response["fieldId"], - "cropId": response["cropId"], - "seasonId": response["seasonId"], - "createdDateTime": response["createdDateTime"], - "modifiedDateTime": response["modifiedDateTime"], - "seasonal_field_id": seasonal_field_id, - } - geometry = response["geometry"] - return field_info, geometry - - def prescriptions(self, user_input: ADMAgSeasonalFieldInput) -> List[ADMAgPrescriptionInput]: - field_info, geometry = self.get_field_info( - user_input.party_id, user_input.seasonal_field_id - ) - - list_prescriptions = self.get_prescriptions( - user_input.party_id, field_info, intersect_geometry=geometry - ) - return list_prescriptions - - def __call__(self): - def prescriptions_init( - admag_input: ADMAgSeasonalFieldInput, - ) -> Dict[str, List[ADMAgPrescriptionInput]]: - out_prescriptions = self.prescriptions(admag_input) - return {"prescriptions": out_prescriptions} - - return prescriptions_init diff --git a/ops/admag/list_prescriptions.yaml b/ops/admag/list_prescriptions.yaml deleted file mode 100644 index dfc33027..00000000 --- a/ops/admag/list_prescriptions.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: get_prescription_map -inputs: - admag_input: ADMAgSeasonalFieldInput -output: - prescriptions: List[ADMAgPrescriptionInput] -parameters: - base_url: - client_id: - client_secret: - authority: - default_scope: -entrypoint: - callback_builder: CallbackBuilder - file: list_prescriptions.py -description: - short_description: List available prescriptions using prescription map. \ No newline at end of file diff --git a/ops/admag/prescriptions.py b/ops/admag/prescriptions.py deleted file mode 100644 index 2917d62b..00000000 --- a/ops/admag/prescriptions.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Tuple - -import pandas as pd -from geopandas.geodataframe import GeoDataFrame -from shapely.geometry import shape - -from vibe_core.admag_client import ADMAgClient -from vibe_core.data import ( - ADMAgPrescription, - ADMAgSeasonalFieldInput, - AssetVibe, - GeometryCollection, - gen_guid, - gen_hash_id, -) - -API_VERSION = "2023-11-01-preview" -DATE_FORMAT = "%Y-%m-%dT%H:%M:%S%z" - - -class CallbackBuilder: - def __init__( - self, - base_url: str, - client_id: str, - client_secret: str, - authority: str, - default_scope: str, - ): - self.temp_dir = TemporaryDirectory() - - self.admag_client = ADMAgClient( - base_url=base_url, - api_version=API_VERSION, - client_id=client_id, - client_secret=client_secret, - authority=authority, - default_scope=default_scope, - ) - - def get_prescriptions(self, prescriptions: List[ADMAgPrescription]) -> AssetVibe: - if not prescriptions: - raise ValueError("No prescriptions found") - - measures = [item.measurements for item in prescriptions] - geometry = [shape(item.geometry) for item in prescriptions] - df = pd.DataFrame(measures) - - for column in df.columns: - df[column] = df[column].apply(lambda x: x["value"]) # type: ignore - - df["geometry"] = geometry - - df = GeoDataFrame(data=df, geometry="geometry") # type: ignore - out_path = f"{self.temp_dir.name}/prescription.geojson" - df.to_file(out_path, driver="GeoJSON") - asset_vibe = AssetVibe(reference=out_path, type="application/json", id=gen_guid()) - return asset_vibe - - def get_field_info( - self, party_id: str, seasonal_field_id: str - ) -> Tuple[Dict[str, str], Dict[str, Any]]: - response = self.admag_client.get_seasonal_field(party_id, seasonal_field_id) - field_info = { - "fieldId": response["fieldId"], - "cropId": response["cropId"], - "seasonId": response["seasonId"], - "createdDateTime": response["createdDateTime"], - "modifiedDateTime": response["modifiedDateTime"], - } - geometry = response["geometry"] - return field_info, geometry - - def prescriptions( - self, user_input: ADMAgSeasonalFieldInput, prescriptions: List[ADMAgPrescription] - ) -> GeometryCollection: - field_info, geometry = self.get_field_info( - user_input.party_id, user_input.seasonal_field_id - ) - asset_vibe = self.get_prescriptions(prescriptions) - - time_range = ( - datetime.strptime(prescriptions[0].createdDateTime, DATE_FORMAT), - datetime.strptime(prescriptions[0].modifiedDateTime, DATE_FORMAT), - ) - return GeometryCollection( - id=gen_hash_id("heatmap_nutrients", geometry, time_range), - time_range=time_range, - geometry=geometry, - assets=[asset_vibe], - ) - - def __call__(self): - def prescriptions_init( - admag_input: ADMAgSeasonalFieldInput, - prescriptions_with_geom_input: List[ADMAgPrescription], - ) -> Dict[str, GeometryCollection]: - out_prescriptions = self.prescriptions(admag_input, prescriptions_with_geom_input) - return {"response": out_prescriptions} - - return prescriptions_init - - def __del__(self): - if self.temp_dir: - self.temp_dir.cleanup() diff --git a/ops/admag/prescriptions.yaml b/ops/admag/prescriptions.yaml deleted file mode 100644 index b3f224ba..00000000 --- a/ops/admag/prescriptions.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: prescriptions -inputs: - admag_input: ADMAgSeasonalFieldInput - prescriptions_with_geom_input: List[ADMAgPrescription] -output: - response: GeometryCollection -parameters: - base_url: - client_id: - client_secret: - authority: - default_scope: -entrypoint: - file: prescriptions.py - callback_builder: CallbackBuilder -version: 2 -description: - short_description: Downloads boundary and prescriptions linked to seasonal field from ADMAg data source. diff --git a/ops/admag/test_admag.py b/ops/admag/test_admag.py deleted file mode 100644 index 770e3bfa..00000000 --- a/ops/admag/test_admag.py +++ /dev/null @@ -1,1013 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import copy -import json -import os -from typing import Any, Dict, List, cast -from unittest.mock import MagicMock, Mock, patch - -import geopandas as gpd -import pytest -from shapely import geometry as shpg - -from vibe_core.admag_client import ADMAgClient -from vibe_core.data import ( - ADMAgPrescription, - ADMAgPrescriptionInput, - ADMAgSeasonalFieldInput, - AssetVibe, -) -from vibe_dev.mock_utils import Request -from vibe_dev.testing.op_tester import OpTester - -HERE = os.path.dirname(os.path.abspath(__file__)) -ADMAG_SEASONAL_FIELD_OP = os.path.join(HERE, "admag_seasonal_field.yaml") - - -@pytest.fixture -@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") -def admag_client(get_token: MagicMock): - return ADMAgClient( - base_url="fake_url", - api_version="fake_admag_version", - client_id="fake_client_id", - client_secret="fake_client_secret", - authority="fake_authority", - default_scope="fake_scope", - ) - - -@pytest.fixture -def fake_get_response_without_next_link() -> Dict[str, Any]: - return { - "value": [ - { - "fake_key": "fake_value", - }, - ], - } - - -@pytest.fixture -def fake_get_response_with_next_link() -> Dict[str, Any]: - return { - "value": [ - { - "fake_key": "fake_value", - }, - ], - "nextLink": "http://fake-url", - } - - -@pytest.fixture -def fake_input_data() -> ADMAgSeasonalFieldInput: - return ADMAgSeasonalFieldInput( - party_id="fake-party-id", - seasonal_field_id="fake-seasonal-field-id", - ) - - -@pytest.fixture -def fake_prescription_input_data() -> ADMAgPrescriptionInput: - return ADMAgPrescriptionInput( - party_id="fake-party-id", - prescription_id="fake-prescription-id", - ) - - -@patch.object(ADMAgClient, "_request") -def test_admag_client_get_limit_requests( - _request: MagicMock, - monkeypatch: pytest.MonkeyPatch, - admag_client: ADMAgClient, - fake_get_response_with_next_link: Dict[str, Any], - fake_get_response_without_next_link: Dict[str, Any], -): - fake_response_different_link = fake_get_response_with_next_link.copy() - fake_response_different_link.update({"nextLink": "different_fake_link"}) - fake_response_another_link = fake_get_response_with_next_link.copy() - fake_response_another_link.update({"nextLink": "another_fake_link"}) - - monkeypatch.setattr(ADMAgClient, "NEXT_PAGES_LIMIT", 1) - _request.side_effect = [ - fake_get_response_with_next_link, - fake_response_different_link, - fake_get_response_without_next_link, - ] - - with pytest.raises(RuntimeError): - admag_client._get("fake_url") - - -@patch.object(ADMAgClient, "_request") -def test_admag_client_get_repeated_link( - _request: MagicMock, - admag_client: ADMAgClient, - fake_get_response_with_next_link: Dict[str, Any], - fake_get_response_without_next_link: Dict[str, Any], -): - _request.side_effect = [ - fake_get_response_with_next_link, - fake_get_response_with_next_link, - fake_get_response_without_next_link, - ] - - with pytest.raises(RuntimeError): - admag_client._get("fake_url") - - -@patch.object(ADMAgClient, "_request") -def test_admag_client_get_follow_link( - _request: MagicMock, - admag_client: ADMAgClient, - fake_get_response_with_next_link: Dict[str, Any], - fake_get_response_without_next_link: Dict[str, Any], -): - fake_response_different_link = fake_get_response_with_next_link.copy() - fake_response_different_link.update({"nextLink": "different_fake_link"}) - _request.side_effect = [ - fake_get_response_with_next_link, - fake_response_different_link, - fake_get_response_without_next_link, - ] - - result = admag_client._get("fake_url") - assert len(result["value"]) == 3 - - -def test_admag_client_creation(admag_client: ADMAgClient): - assert admag_client.header() == { - "Authorization": "Bearer my_fake_token", - "Content-Type": "application/merge-patch+json", - } - - -@pytest.fixture -def seasonal_field_info(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: - return { - "partyId": "fake-party-id", - "farmId": "fake-farm-id", - "fieldId": "fake-field-id", - "seasonId": "fake-season-id", - "cropId": "fake-crop-id", - "id": "fake-seasonal-field-id", - "eTag": "fake-etag", - "status": "Active", - "createdDateTime": "2001-01-01T00:00:00Z", - "modifiedDateTime": "2001-01-01T00:00:00Z", - "name": "fake-seasonal-field-name", - "description": "fake-description", - "geometry": vibe_geometry_dict, - "properties": { - "plantingDateTime": "2001-01-01T00:00:00Z", - }, - } - - -@patch("vibe_core.admag_client.ADMAgClient._get") -def test_get_seasonal_field( - _get: MagicMock, seasonal_field_info: Dict[str, Any], admag_client: ADMAgClient -): - _get.return_value = seasonal_field_info - seasonal_field_result = admag_client.get_seasonal_field( - party_id="fake-party-id", - seasonal_field_id="fake-seasonal-field-id", - ) - assert seasonal_field_result - assert "name" in seasonal_field_result - assert "description" in seasonal_field_result - assert "geometry" in seasonal_field_result - - -@pytest.fixture -def season_info() -> Dict[str, Any]: - return { - "startDateTime": "2001-01-01T00:00:00Z", - "endDateTime": "2001-12-31T00:00:00Z", - "year": 2001, - "id": "fake-season-id", - "eTag": "fake-etag", - "status": "Active", - "createdDateTime": "2001-01-01T00:00:00Z", - "modifiedDateTime": "2001-01-01T00:00:00Z", - "name": "fake-season-name", - } - - -@patch("vibe_core.admag_client.ADMAgClient._get") -def test_get_season(_get: MagicMock, season_info: Dict[str, Any], admag_client: ADMAgClient): - _get.return_value = season_info - season_result = admag_client.get_season( - season_id="fake-season-id", - ) - assert season_result - assert "startDateTime" in season_result - assert "endDateTime" in season_result - assert "year" in season_result - - -@pytest.fixture -def field_info(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: - return { - "partyId": "fake-party-id", - "farmId": "fake-farm-id", - "geometry": vibe_geometry_dict, - "eTag": "fake-etag", - "id": "fake-field-id", - "status": "Active", - "createdDateTime": "2001-01-01T00:00:00Z", - "modifiedDateTime": "2001-01-01T00:00:00Z", - "name": "fake-field-name", - "description": "Fake description", - "properties": { - "pre_1980": "Lowland Non-Irrigate...Pre 1980s)", - "crp_type": "None", - "crp_start": "", - "crp_end": "", - "year_1980_2000": "Irrigated: Continuous Hay", - "year_1980_2000_tillage": "Intensive Tillage", - }, - } - - -@pytest.fixture -def prescription_geom_input() -> List[ADMAgPrescription]: - prescription = { - "partyId": "ae880a1b-4597-46d7-83ac-bfc6a1ae4116-16", - "prescriptionMapId": "831989c4-c15a-4fc5-837b-4c0289d53010", - "productCode": "1635", - "productName": "Nutrient", - "type": "Nutrient", - "measurements": { - "N": {"value": 47.1}, - "P": {"value": 34.99769206227461}, - "pH": {"value": 4.978131831743143}, - "C": {"value": 0.046408031802193}, - }, - "id": "880094d0-1c48-4d7c-b0d3-f7477a937473", - "eTag": "24009696-0000-0100-0000-65fb20540000", - "status": "Active", - "createdDateTime": "2024-03-20T17:43:48Z", - "modifiedDateTime": "2024-03-20T17:43:48Z", - "source": "IOT device", - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-117.03642546099948, 47.044663835752566], - [-117.05642546099949, 47.044663835752566], - [-117.05642546099949, 47.02466383575257], - [-117.03642546099948, 47.02466383575257], - [-117.03642546099948, 47.044663835752566], - ] - ], - }, - "name": "Nitrogen Nutrient", - "description": "", - "createdBy": "f8c6c349-b484-4863-af76-d10eee669306", - "modifiedBy": "f8c6c349-b484-4863-af76-d10eee669306", - } - - return [ADMAgPrescription(**prescription)] - - -@patch("vibe_core.admag_client.ADMAgClient._get") -def test_get_field(_get: MagicMock, field_info: Dict[str, Any], admag_client: ADMAgClient): - _get.return_value = field_info - field_result = admag_client.get_field( - party_id="fake-party-id", - field_id="fake-field-id", - ) - assert field_result - assert "properties" in field_result - properties = field_result["properties"] - assert "pre_1980" in properties - assert "crp_type" in properties - assert "crp_start" in properties - assert "crp_end" in properties - assert "year_1980_2000" in properties - assert "year_1980_2000_tillage" in properties - - -@pytest.fixture -def harvest_result(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: - return { - "value": [ - { - "geometry": vibe_geometry_dict, - "attachmentsLink": "https://fake-attachment.bla", - "createdDateTime": "2021-12-10T00:18:33Z", - "eTag": "5500c45e-0000-0100-0000-61b29cd90000", - "partyId": "fake-party-id", - "id": "fake-harvest-id", - "modifiedDateTime": "2021-12-10T00:18:33Z", - "operationEndDateTime": "2001-09-05T00:00:00Z", - "operationStartDateTime": "2001-09-05T00:00:00Z", - "properties": {"gfsrt": "True", "strawStoverHayRemoval": "0"}, - "source": "Farming", - "status": "Active", - "totalYield": {"unit": "tons", "value": 39.0}, - }, - ] - } - - -@pytest.fixture -def planting_result(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: - return { - "value": [ - { - "partyId": "fake-party-id", - "id": "fake-id", - "source": "Manual", - "name": "Planting data for North Farm", - "description": "some description", - "status": "Active", - "operationStartDateTime": "2021-02-25T16:57:04Z", - "operationEndDateTime": "2021-02-27T10:13:06Z", - "operationModifiedDateTime": "2021-02-28T10:14:12Z", - "avgPlantingRate": {"unit": "seedsperacre", "value": 30}, - "area": {"unit": "acre", "value": 30}, - "totalMaterial": {"unit": "seeds", "value": 758814}, - "avgMaterial": {"unit": "seedsperacre", "value": 25293}, - "plantingProductDetails": [ - { - "productName": "VAR1", - "area": {"unit": "acre", "value": 20}, - "totalMaterial": {"unit": "seeds", "value": 389214}, - "avgMaterial": {"unit": "seedsperacre", "value": 19460}, - } - ], - "properties": {"Region": "Europe", "CountyCode": 123}, - "createdDateTime": "2022-05-11T07:00:10.2750191Z", - "modifiedDateTime": "2022-05-11T07:00:10.2750191Z", - "eTag": "cb00a3ac-0000-0100-0000-601d21ec0000", - }, - ] - } - - -@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") -@patch("vibe_core.admag_client.ADMAgClient._post") -def test_get_harvest_info( - _post: MagicMock, - get_token: MagicMock, - harvest_result: Dict[str, Any], - admag_client: ADMAgClient, - vibe_geometry_dict: Dict[str, Any], -): - _post.return_value = harvest_result - harvest_result = admag_client.get_harvest_info( - party_id="fake-party-id", - intersects_with_geometry=vibe_geometry_dict, - min_start_operation="2001-01-01T00:00:00Z", - max_end_operation="2001-01-01T00:00:00Z", - associated_resource={"type": "SeasonalField", "id": "fake-seasonal-field-id"}, - ) - assert "value" in harvest_result - harvest_list = harvest_result["value"] - assert len(harvest_result) > 0 - harvest_entry = harvest_list[0] - assert "operationStartDateTime" in harvest_entry - assert "operationEndDateTime" in harvest_entry - assert "properties" in harvest_entry - harvest_properties = harvest_entry["properties"] - assert "gfsrt" in harvest_properties - assert "strawStoverHayRemoval" in harvest_properties - assert "totalYield" in harvest_entry - harvest_yield = harvest_entry["totalYield"] - assert "value" in harvest_yield - - -@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") -@patch("vibe_core.admag_client.ADMAgClient.get_field") -@patch("vibe_core.admag_client.ADMAgClient.get_seasonal_field") -@patch("vibe_core.admag_client.ADMAgClient.get_season") -@patch("vibe_core.admag_client.ADMAgClient.get_harvest_info") -@patch("vibe_core.admag_client.ADMAgClient.get_fertilizer_info") -@patch("vibe_core.admag_client.ADMAgClient.get_tillage_info") -@patch("vibe_core.admag_client.ADMAgClient.get_organic_amendments_info") -def test_admag_incomplete_fertilizer( - get_organic_amendments_info: MagicMock, - get_tillage_info: MagicMock, - get_fertilizer_info: MagicMock, - get_harvest_info: MagicMock, - get_season: MagicMock, - get_seasonal_field: MagicMock, - get_field: MagicMock, - get_token: MagicMock, - seasonal_field_info: Dict[str, Any], - field_info: Dict[str, Any], - season_info: Dict[str, Any], - harvest_result: Dict[str, Any], - fertilizer_result: Dict[str, Any], - tillage_result: Dict[str, Any], - omad_result: Dict[str, Any], - fake_input_data: ADMAgSeasonalFieldInput, -): - get_seasonal_field.return_value = seasonal_field_info - get_field.return_value = field_info - get_season.return_value = season_info - get_harvest_info.return_value = harvest_result - get_tillage_info.return_value = tillage_result - get_organic_amendments_info.return_value = omad_result - - fertilizer_missing_total_N = copy.deepcopy(fertilizer_result) - fertilizer_missing_total_N["value"][0]["properties"].pop("totalNitrogen") - get_fertilizer_info.return_value = fertilizer_missing_total_N - - with pytest.raises(ValueError): - OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) - - fertilizer_missing_eep = copy.deepcopy(fertilizer_result) - fertilizer_missing_eep["value"][0]["properties"].pop("eep") - get_fertilizer_info.return_value = fertilizer_missing_eep - - with pytest.raises(ValueError): - OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) - - fertilizer_wrong_eep = copy.deepcopy(fertilizer_result) - fertilizer_wrong_eep["value"][0]["properties"]["eep"] = "fake-eep" - get_fertilizer_info.return_value = fertilizer_wrong_eep - - with pytest.raises(ValueError): - OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) - - -@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") -@patch("vibe_core.admag_client.ADMAgClient.get_field") -@patch("vibe_core.admag_client.ADMAgClient.get_seasonal_field") -@patch("vibe_core.admag_client.ADMAgClient.get_season") -@patch("vibe_core.admag_client.ADMAgClient.get_harvest_info") -@patch("vibe_core.admag_client.ADMAgClient.get_fertilizer_info") -@patch("vibe_core.admag_client.ADMAgClient.get_tillage_info") -@patch("vibe_core.admag_client.ADMAgClient.get_organic_amendments_info") -def test_admag_incomplete_harvest( - get_organic_amendments_info: MagicMock, - get_tillage_info: MagicMock, - get_fertilizer_info: MagicMock, - get_harvest_info: MagicMock, - get_season: MagicMock, - get_seasonal_field: MagicMock, - get_field: MagicMock, - _: MagicMock, - seasonal_field_info: Dict[str, Any], - field_info: Dict[str, Any], - season_info: Dict[str, Any], - harvest_result: Dict[str, Any], - fertilizer_result: Dict[str, Any], - tillage_result: Dict[str, Any], - omad_result: Dict[str, Any], - fake_input_data: ADMAgSeasonalFieldInput, -): - get_seasonal_field.return_value = seasonal_field_info - get_field.return_value = field_info - get_season.return_value = season_info - get_fertilizer_info.return_value = fertilizer_result - get_tillage_info.return_value = tillage_result - get_organic_amendments_info.return_value = omad_result - - # Don't remove code, it may required for different crop - # harvest_missing_gfsrt = copy.deepcopy(harvest_result) - # harvest_missing_gfsrt["value"][0]["properties"].pop("gfsrt") - - # get_harvest_info.return_value = harvest_missing_gfsrt - - with pytest.raises(ValueError): - OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) - - # Don't remove code, it may required for different crop - # harvest_missing_straw_stover_hay_removal = copy.deepcopy(harvest_result) - # harvest_missing_straw_stover_hay_removal["value"][0]["properties"].pop( - # "strawStoverHayRemoval" - # ) - # get_harvest_info.return_value = harvest_missing_straw_stover_hay_removal - - with pytest.raises(ValueError): - OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) - - -@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") -@patch("vibe_core.admag_client.ADMAgClient.get_field") -@patch("vibe_core.admag_client.ADMAgClient.get_seasonal_field") -@patch("vibe_core.admag_client.ADMAgClient.get_season") -@patch("vibe_core.admag_client.ADMAgClient.get_harvest_info") -@patch("vibe_core.admag_client.ADMAgClient.get_fertilizer_info") -@patch("vibe_core.admag_client.ADMAgClient.get_tillage_info") -@patch("vibe_core.admag_client.ADMAgClient.get_organic_amendments_info") -def test_admag_incomplete_organic_amendments( - get_organic_amendments_info: MagicMock, - get_tillage_info: MagicMock, - get_fertilizer_info: MagicMock, - get_harvest_info: MagicMock, - get_season: MagicMock, - get_seasonal_field: MagicMock, - get_field: MagicMock, - _: MagicMock, - seasonal_field_info: Dict[str, Any], - field_info: Dict[str, Any], - season_info: Dict[str, Any], - harvest_result: Dict[str, Any], - fertilizer_result: Dict[str, Any], - tillage_result: Dict[str, Any], - omad_result: Dict[str, Any], - fake_input_data: ADMAgSeasonalFieldInput, -): - get_seasonal_field.return_value = seasonal_field_info - get_field.return_value = field_info - get_season.return_value = season_info - get_harvest_info.return_value = harvest_result - get_fertilizer_info.return_value = fertilizer_result - get_tillage_info.return_value = tillage_result - - organic_amendments_missing_type = copy.deepcopy(omad_result) - organic_amendments_missing_type["value"][0]["properties"].pop("type") - get_organic_amendments_info.return_value = organic_amendments_missing_type - - with pytest.raises(ValueError): - OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) - - organic_amendments_missing_amount = copy.deepcopy(omad_result) - organic_amendments_missing_amount["value"][0]["properties"].pop("amount") - get_organic_amendments_info.return_value = organic_amendments_missing_amount - - with pytest.raises(ValueError): - OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) - - organic_amendments_missing_percentN = copy.deepcopy(omad_result) - organic_amendments_missing_percentN["value"][0]["properties"].pop("percentN") - get_organic_amendments_info.return_value = organic_amendments_missing_percentN - - with pytest.raises(ValueError): - OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) - - organic_amendments_missing_CNratio = copy.deepcopy(omad_result) - organic_amendments_missing_CNratio["value"][0]["properties"].pop("CNratio") - get_organic_amendments_info.return_value = organic_amendments_missing_CNratio - - with pytest.raises(ValueError): - OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) - - -@pytest.fixture -def fertilizer_result() -> Dict[str, Any]: - return { - "value": [ - { - "totalMaterial": {"unit": "tons/ac", "value": 5.0}, - "operationStartDateTime": "2000-01-01T00:00:00Z", - "operationEndDateTime": "2000-01-01T00:00:00Z", - "attachmentsLink": "http://fake-url.com/attachments", - "partyId": "fake-party-id", - "id": "fake-fertilizer-id", - "eTag": "fake-etag", - "createdDateTime": "2021-12-10T00:03:37Z", - "modifiedDateTime": "2021-12-10T00:03:37Z", - "source": "Fertilizer", - "name": "Ammonium Nitrate (34-0-0)", - "properties": { - "eep": "None", - "totalNitrogen": 4.0, - "method": "Surface Band / Sidedress", - }, - } - ], - "nextLink": "https://fake-next-link.com", - } - - -@patch("vibe_core.admag_client.ADMAgClient._post") -def test_get_fertilizer_info( - _post: MagicMock, - fertilizer_result: Dict[str, Any], - admag_client: ADMAgClient, - vibe_geometry_dict: Dict[str, Any], -): - _post.return_value = fertilizer_result - fertilizer_result = admag_client.get_fertilizer_info( - party_id="fake-party-id", - intersects_with_geometry=vibe_geometry_dict, - min_start_operation="2001-01-01T00:00:00Z", - max_end_operation="2001-01-01T00:00:00Z", - associated_resource={"type": "SeasonalField", "id": "fake-seasonal_field-id"}, - ) - assert "value" in fertilizer_result - fertilizer_list = fertilizer_result["value"] - assert len(fertilizer_result) > 0 - fertilizer_entry = fertilizer_list[0] - assert "operationStartDateTime" in fertilizer_entry - assert "operationEndDateTime" in fertilizer_entry - assert "name" in fertilizer_entry - fertilizer_properties = fertilizer_entry["properties"] - assert "totalNitrogen" in fertilizer_properties - assert "eep" in fertilizer_properties - - -@pytest.fixture -def tillage_result(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: - return { - "value": [ - { - "geometry": vibe_geometry_dict, - "attachmentsLink": "fake-attachment-link", - "createdDateTime": "2021-12-10T00:18:33Z", - "eTag": "fake-etag", - "partyId": "fake-party-id", - "id": "fake-tillage-id", - "modifiedDateTime": "2021-12-10T00:18:33Z", - "name": "Fake Tillage", - "operationEndDateTime": "2001-01-01T00:00:00Z", - "operationStartDateTime": "2001-01-01T00:00:00Z", - "source": "fake-source", - "status": "Active", - }, - ] - } - - -@patch("vibe_core.admag_client.ADMAgClient._post") -def test_get_tillage_info( - _post: MagicMock, - tillage_result: Dict[str, Any], - admag_client: ADMAgClient, - vibe_geometry_dict: Dict[str, Any], -): - _post.return_value = tillage_result - tillage_result = admag_client.get_tillage_info( - party_id="fake-party-id", - intersects_with_geometry=vibe_geometry_dict, - min_start_operation="2001-01-01T00:00:00Z", - max_end_operation="2001-01-01T00:00:00Z", - associated_resource={"type": "SeasonalField", "id": "fake-seasonal_field-id"}, - ) - assert "value" in tillage_result - tillage_list = tillage_result["value"] - assert len(tillage_result) > 0 - tillage_entry = tillage_list[0] - assert "operationStartDateTime" in tillage_entry - assert "operationEndDateTime" in tillage_entry - assert "name" in tillage_entry - - -@pytest.fixture -def omad_result(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: - return { - "value": [ - { - "geometry": vibe_geometry_dict, - "attachmentsLink": "fake-attachment-link", - "createdDateTime": "2021-12-10T00:18:33Z", - "eTag": "fake-etag", - "partyId": "fake-party-id", - "id": "fake-tillage-id", - "modifiedDateTime": "2021-12-10T00:18:33Z", - "name": "Fake Tillage", - "operationEndDateTime": "2001-01-01T00:00:00Z", - "operationStartDateTime": "2001-01-01T00:00:00Z", - "source": "fake-source", - "status": "Active", - "properties": { - "type": "fake-omad-tyoe", - "amount": "100", - "percentN": "200", - "CNratio": "0.05", - }, - }, - ] - } - - -@patch("vibe_core.admag_client.ADMAgClient._post") -def test_get_organic_amendments_info( - _post: MagicMock, - omad_result: Dict[str, Any], - admag_client: ADMAgClient, - vibe_geometry_dict: Dict[str, Any], -): - _post.return_value = omad_result - omad_result = admag_client.get_organic_amendments_info( - party_id="fake-party-id", - intersects_with_geometry=vibe_geometry_dict, - min_start_operation="2001-01-01T00:00:00Z", - max_end_operation="2001-01-01T00:00:00Z", - associated_resource={"type": "SeasonalField", "id": "fake-seasonal_field-id"}, - ) - assert "value" in omad_result - omad_list = omad_result["value"] - assert len(omad_result) > 0 - omad_entry = omad_list[0] - assert "operationStartDateTime" in omad_entry - assert "operationEndDateTime" in omad_entry - assert "properties" in omad_entry - omad_properties = omad_entry["properties"] - assert "type" in omad_properties - assert "amount" in omad_properties - assert "percentN" in omad_properties - assert "CNratio" in omad_properties - - -@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") -@patch("vibe_core.admag_client.ADMAgClient.get_field") -@patch("vibe_core.admag_client.ADMAgClient.get_seasonal_field") -@patch("vibe_core.admag_client.ADMAgClient.get_season") -@patch("vibe_core.admag_client.ADMAgClient.get_harvest_info") -@patch("vibe_core.admag_client.ADMAgClient.get_fertilizer_info") -@patch("vibe_core.admag_client.ADMAgClient.get_tillage_info") -@patch("vibe_core.admag_client.ADMAgClient.get_organic_amendments_info") -@patch("vibe_core.admag_client.ADMAgClient.get_planting_info") -def test_admag_op( - get_planting_info: MagicMock, - get_organic_amendments_info: MagicMock, - get_tillage_info: MagicMock, - get_fertilizer_info: MagicMock, - get_harvest_info: MagicMock, - get_season: MagicMock, - get_seasonal_field: MagicMock, - get_field: MagicMock, - get_token: MagicMock, - seasonal_field_info: Dict[str, Any], - field_info: Dict[str, Any], - season_info: Dict[str, Any], - harvest_result: Dict[str, Any], - fertilizer_result: Dict[str, Any], - tillage_result: Dict[str, Any], - omad_result: Dict[str, Any], - planting_result: Dict[str, Any], - fake_input_data: ADMAgSeasonalFieldInput, -): - get_seasonal_field.return_value = seasonal_field_info - get_field.return_value = field_info - get_season.return_value = season_info - get_harvest_info.return_value = harvest_result - get_fertilizer_info.return_value = fertilizer_result - get_tillage_info.return_value = tillage_result - get_organic_amendments_info.return_value = omad_result - get_planting_info.return_value = planting_result - - output_data = OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) - assert output_data - - -@pytest.fixture -def vibe_geometry_dict() -> Dict[str, Any]: - farm_boundary = { - "type": "FeatureCollection", - "name": "small_block_new_new", - "crs": { - "type": "name", - "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"}, - }, - "features": [ - { - "type": "Feature", - "properties": {"id": 1}, - "geometry": { - "type": "MultiPolygon", - "coordinates": [ - [ - [ - [-117.046717186923388, 47.036308491044693], - [-117.04260145498948, 47.036329968998508], - [-117.042643698734992, 47.034569687054848], - [-117.046686589954575, 47.034558181995273], - [-117.046717186923388, 47.036308491044693], - ] - ] - ], - }, - } - ], - } - data_frame = gpd.read_file(json.dumps(farm_boundary), driver="GeoJSON") - - if not data_frame.empty: - geometry = shpg.mapping(data_frame["geometry"][0]) # type: ignore - return geometry - else: - raise Exception("No geometry found in farm boundary") - - -@pytest.fixture -def admag_prescriptions() -> Request: - data = { - "value": [ - { - "partyId": "ae880a1b-4597-46d7-83ac-bfc6a1ae4116-16", - "prescriptionMapId": "831989c4-c15a-4fc5-837b-4c0289d53010", - "productCode": "1635", - "productName": "Nutrient", - "type": "Nutrient", - "measurements": { - "N": {"value": 47.1}, - "P": {"value": 34.99769206227461}, - "pH": {"value": 4.978131831743143}, - "C": {"value": 0.046408031802193}, - }, - "id": "880094d0-1c48-4d7c-b0d3-f7477a937473", - "eTag": "24009696-0000-0100-0000-65fb20540000", - "status": "Active", - "createdDateTime": "2024-03-20T17:43:48Z", - "modifiedDateTime": "2024-03-20T17:43:48Z", - "source": "IOT device", - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-117.03642546099948, 47.044663835752566], - [-117.05642546099949, 47.044663835752566], - [-117.05642546099949, 47.02466383575257], - [-117.03642546099948, 47.02466383575257], - [-117.03642546099948, 47.044663835752566], - ] - ], - }, - "name": "Nitrogen Nutrient", - "description": "", - "createdBy": "f8c6c349-b484-4863-af76-d10eee669306", - "modifiedBy": "f8c6c349-b484-4863-af76-d10eee669306", - } - ] - } - data = Request(**{"text": json.dumps(data)}) - return data - - -@pytest.fixture -def admag_prescriptions_dict() -> Request: - data = { - "partyId": "ae880a1b-4597-46d7-83ac-bfc6a1ae4116-16", - "prescriptionMapId": "831989c4-c15a-4fc5-837b-4c0289d53010", - "productCode": "1635", - "productName": "Nutrient", - "type": "Nutrient", - "measurements": { - "N": {"value": 47.1}, - "P": {"value": 34.99769206227461}, - "pH": {"value": 4.978131831743143}, - "C": {"value": 0.046408031802193}, - }, - "id": "880094d0-1c48-4d7c-b0d3-f7477a937473", - "eTag": "24009696-0000-0100-0000-65fb20540000", - "status": "Active", - "createdDateTime": "2024-03-20T17:43:48Z", - "modifiedDateTime": "2024-03-20T17:43:48Z", - "source": "IOT device", - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-117.03642546099948, 47.044663835752566], - [-117.05642546099949, 47.044663835752566], - [-117.05642546099949, 47.02466383575257], - [-117.03642546099948, 47.02466383575257], - [-117.03642546099948, 47.044663835752566], - ] - ], - }, - "name": "Nitrogen Nutrient", - "description": "", - "createdBy": "f8c6c349-b484-4863-af76-d10eee669306", - "modifiedBy": "f8c6c349-b484-4863-af76-d10eee669306", - } - - data = Request(**{"text": json.dumps(data)}) - return data - - -@pytest.fixture -def admag_get_field_info() -> Request: - data = { - "fieldId": "63c94ae9-b0b6-46b7-8e65-311b9b44191f", - "cropId": "ae600a8a-3011-4d7c-8146-1f039ba619d0", - "seasonId": "ae600a8a-3011-4d7c-8146-1f039ba619d0", - "createdDateTime": "2021-03-21T01:37:06Z", - "modifiedDateTime": "2021-03-21T01:37:06Z", - "seasonal_field_id": "", - } - - data = Request(**{"text": json.dumps(data)}) - return data - - -@pytest.fixture -def admag_get_prescription_map_id() -> Request: - data = { - "value": [ - { - "partyId": "ae880a1b-4597-46d7-83ac-bfc6a1ae4116-16", - "type": "Soil Nutrient Map", - "seasonId": "ae600a8a-3011-4d7c-8146-1f039ba619d0-16", - "cropId": "d4c8427b-4540-4c05-82f6-27c771e48b7c", - "fieldId": "04b1d9f6-7444-4df5-b468-9a4e4c96314e-16", - "id": "831989c4-c15a-4fc5-837b-4c0289d53050", - "eTag": "8400e17b-0000-0100-0000-660075240000", - "status": "Active", - "createdDateTime": "2024-03-21T14:48:27Z", - "modifiedDateTime": "2024-03-24T18:47:00Z", - "source": "IOT devices", - "name": "Prescription test Map", - "description": "Farmbeats Agriculture research", - "createdBy": "f8c6c349-b484-4863-af76-d10eee669306", - "modifiedBy": "255a13c4-c1e0-4ac9-9e60-5139b3f8e0a3", - "properties": {"seasonal_field_id": "fake-seasonal-field-id"}, - } - ] - } - data = Request(**{"text": json.dumps(data)}) - return data - - -@pytest.fixture -def admag_seasonal_field_info(seasonal_field_info: Dict[str, Any]) -> Request: - data = Request(**{"text": json.dumps(seasonal_field_info)}) - return data - - -@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") -@patch("requests.Session.request") -def test_prescriptions( - session_mock: Mock, - _: MagicMock, - admag_prescriptions: str, - admag_seasonal_field_info: str, - fake_input_data: ADMAgSeasonalFieldInput, - prescription_geom_input: List[ADMAgPrescription], -): - session_mock.side_effect = [ - admag_seasonal_field_info, - admag_prescriptions, - ] - parameters = { - "base_url": "base_url", - "client_id": "client_id", - "client_secret": "client_secret", - "authority": "authority", - "default_scope": "default_scope", - } - CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prescriptions.yaml") - op_ = OpTester(CONFIG_PATH) - op_.update_parameters(parameters) - output_data = op_.run( - admag_input=fake_input_data, - prescriptions_with_geom_input=prescription_geom_input, # type: ignore - ) - assets = cast(List[AssetVibe], output_data["response"].assets) # type: ignore - assert len(assets[0].path_or_url) > 0 - - -@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") -@patch("requests.Session.request") -def test_list_prescriptions( - session_mock: Mock, - _: MagicMock, - admag_prescriptions: str, - admag_get_prescription_map_id: str, - admag_seasonal_field_info: str, - fake_input_data: ADMAgSeasonalFieldInput, -): - session_mock.side_effect = [ - admag_seasonal_field_info, - admag_get_prescription_map_id, - admag_prescriptions, - ] - parameters = { - "base_url": "base_url", - "client_id": "client_id", - "client_secret": "client_secret", - "authority": "authority", - "default_scope": "default_scope", - } - CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "list_prescriptions.yaml" - ) - op_ = OpTester(CONFIG_PATH) - op_.update_parameters(parameters) - output_data = op_.run(admag_input=fake_input_data) - assert "prescriptions" in output_data - - -@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") -@patch("requests.Session.request") -def test_get_prescriptions( - session_mock: Mock, - _: MagicMock, - admag_prescriptions_dict: str, - fake_prescription_input_data: ADMAgPrescriptionInput, -): - session_mock.side_effect = [ - admag_prescriptions_dict, - ] - parameters = { - "base_url": "base_url", - "client_id": "client_id", - "client_secret": "client_secret", - "authority": "authority", - "default_scope": "default_scope", - } - CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "get_prescription.yaml") - op_ = OpTester(CONFIG_PATH) - op_.update_parameters(parameters) - output_data = op_.run(prescription_without_geom_input=fake_prescription_input_data) - assert "prescription_with_geom" in output_data diff --git a/ops/aggregate_statistics_timeseries/aggregate_statistics_timeseries.yaml b/ops/aggregate_statistics_timeseries/aggregate_statistics_timeseries.yaml deleted file mode 100644 index 1d76494d..00000000 --- a/ops/aggregate_statistics_timeseries/aggregate_statistics_timeseries.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: aggregate_statistics_timeseries -inputs: - stats: List[DataSummaryStatistics] -output: - timeseries: List[TimeSeries] -parameters: - masked_thr: .8 -entrypoint: - file: aggregate_timeseries.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - masked_thr -description: - short_description: Aggregates list of summary statistics into a timeseries. \ No newline at end of file diff --git a/ops/aggregate_statistics_timeseries/aggregate_timeseries.py b/ops/aggregate_statistics_timeseries/aggregate_timeseries.py deleted file mode 100644 index ef2e245b..00000000 --- a/ops/aggregate_statistics_timeseries/aggregate_timeseries.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Dict, List, cast - -import pandas as pd - -from vibe_core.data import AssetVibe, DataSummaryStatistics, TimeSeries, gen_guid - - -class CallbackBuilder: - def __init__(self, masked_thr: float): - self.tmp_dir = TemporaryDirectory() - self.masked_thr = masked_thr - - def __call__(self): - def callback(stats: List[DataSummaryStatistics]) -> Dict[str, List[TimeSeries]]: - df = pd.concat( - cast( - List[pd.DataFrame], - [ - pd.read_csv(s.assets[0].url, index_col="date", parse_dates=True) - for s in stats - ], - ) - ) - assert df is not None, "DataFrame is None, that should not happen" - # Filter out items above threshold - df = cast(pd.DataFrame, df[df["masked_ratio"] <= self.masked_thr]) # type: ignore - if df.empty: - raise RuntimeError( - f"No available data with less than {self.masked_thr:.1%} masked data" - ) - df.sort_index(inplace=True) - guid = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") - df.to_csv(filepath) - min_date = df.index.min().to_pydatetime() # type: ignore - max_date = df.index.max().to_pydatetime() # type: ignore - timeseries = TimeSeries( - gen_guid(), - time_range=(min_date, max_date), # type: ignore - geometry=stats[0].geometry, - assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], - ) - - return {"timeseries": [timeseries]} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/carbon_local/test_whatif.py b/ops/carbon_local/test_whatif.py deleted file mode 100644 index e8e45af3..00000000 --- a/ops/carbon_local/test_whatif.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from typing import List -from unittest.mock import Mock, patch - -import pytest -from pyngrok.exception import PyngrokError - -from vibe_core.data import CarbonOffsetInfo, SeasonalFieldInformation -from vibe_dev.testing.op_tester import OpTester - - -@pytest.fixture -def baseline_information(): - field_info = [ - { - "id": "25e96fa0-9cf8-4b31-ac9e-24e30c37aeaf", - "time_range": [ - datetime(year=2020, month=2, day=15), - datetime(year=2023, month=9, day=15), - ], - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-87.414918, 37.463251], - [-87.399025, 37.470226], - [-87.393499, 37.472569], - [-87.39827, 37.479898], - [-87.405993, 37.478046], - [-87.407538, 37.47761], - [-87.408122, 37.477501], - [-87.408636, 37.477092], - [-87.409048, 37.476602], - [-87.414918, 37.463251], - ] - ], - }, - "assets": [], - "crop_name": "Alfalfa", - "crop_type": "annual", - "properties": { - "pre_1980": "Lowland Non-Irrigated (Pre 1980s)", - "crp_type": "None", - "crp_start": "", - "crp_end": "", - "year_1980_2000": "Irrigated: Continuous Hay", - "year_1980_2000_tillage": "Intensive Tillage", - }, - "fertilizers": [], - "harvests": [ - { - "is_grain": True, - "start_date": "2000-09-05T00:00:00Z", - "end_date": "2000-09-05T00:00:00Z", - "crop_yield": 39.0, - "stray_stover_hay_removal": "0", - }, - ], - "tillages": [ - { - "start_date": "2000-01-01T00:00:00Z", - "end_date": "2000-01-01T00:00:00Z", - "implement": "Reduced Tillage", - } - ], - "organic_amendments": [], - } - ] - - fi = [SeasonalFieldInformation(**item) for item in field_info] - return fi - - -@pytest.fixture -def scenario_information(): - field_info = [ - { - "id": "0e16be1a-eb0f-4b55-a69c-4fa79af8f406", - "time_range": [ - datetime(year=2023, month=2, day=15), - datetime(year=2025, month=9, day=15), - ], - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-87.414918, 37.463251], - [-87.399025, 37.470226], - [-87.393499, 37.472569], - [-87.39827, 37.479898], - [-87.405993, 37.478046], - [-87.407538, 37.47761], - [-87.408122, 37.477501], - [-87.408636, 37.477092], - [-87.409048, 37.476602], - [-87.414918, 37.463251], - ] - ], - }, - "assets": [], - "crop_name": "Barley", - "crop_type": "annual", - "properties": { - "pre_1980": "Lowland Non-Irrigated (Pre 1980s)", - "crp_type": "None", - "crp_start": "", - "crp_end": "", - "year_1980_2000": "Irrigated: Continuous Hay", - "year_1980_2000_tillage": "Intensive Tillage", - }, - "fertilizers": [], - "harvests": [ - { - "is_grain": True, - "start_date": "2023-11-11T00:00:00Z", - "end_date": "2023-11-11T00:00:00Z", - "crop_yield": 30.0, - "stray_stover_hay_removal": "0", - } - ], - "tillages": [ - { - "start_date": "2023-01-01T00:00:00Z", - "end_date": "2023-01-01T00:00:00Z", - "implement": "Zero Soil Disturbance", - } - ], - "organic_amendments": [], - } - ] - - fi = [SeasonalFieldInformation(**item) for item in field_info] - return fi - - -@pytest.fixture -def fake_comet_error(): - return { - "Errors": { - "ModelRun": { - "@name": "sdk_int1", - "Error": { - "@index": "0", - "@message": "You entered 200 in tag OMADPercentN for " - "CropYear: 2000 and CropScenario: Current " - ".Percent Nitrogen needs to between 0 and 100", - }, - } - } - } - - -@pytest.fixture -def fake_comet_response(): - return { - "Day": { - "@cometEmailId": "fake-email", - "@CFARMVersion": "appengine cometfarm v0-10 build 3.2.8472.37261 (03/13/2023)", - "Cropland": { - "ModelRun": { - "@name": "sdk_int1", - "Scenario": [ - { - "@name": "scenario: 17/03/2023 16:00:01", - "Carbon": { - "SoilCarbon": "1234.4321", - "BiomassBurningCarbon": "0", - "SoilCarbonStock2000": "1234.4321", - "SoilCarbonStockBegin": "1234.4321", - "SoilCarbonStockEnd": "1234.4321", - }, - "CO2": { - "LimingCO2": "0", - "UreaFertilizationCO2": "0", - "DrainedOrganicSoilsCO2": "0", - }, - "N2O": { - "SoilN2O": "1234.4321", - "SoilN2O_Direct": "1234.4321", - "SoilN2O_Indirect_Volatilization": "1234.4321", - "SoilN2O_Indirect_Leaching": "1234.4321", - "WetlandRiceCultivationN2O": "0", - "BiomassBurningN2O": "0", - "DrainedOrganicSoilsN2O": "0", - }, - "CH4": { - "SoilCH4": "0", - "WetlandRiceCultivationCH4": "0", - "BiomassBurningCH4": "0", - }, - } - ], - } - }, - } - } - - -@patch("http.server.HTTPServer.server_bind") -@patch("vibe_lib.comet_farm.comet_server.CometHTTPServer.start_ngrok") -@patch("vibe_lib.comet_farm.comet_server.CometHTTPServer.start") -@patch("vibe_lib.comet_farm.comet_server.CometHTTPServer.shutdown") -@patch("vibe_lib.comet_farm.comet_requester.CometRequester.get_comet_raw_output") -@patch("vibe_lib.comet_farm.comet_requester.CometRequester.parse_comet_response") -def test_whatif_request( - parse_comet_response: Mock, - _: Mock, - __: Mock, - ___: Mock, - ____: Mock, - _____: Mock, - baseline_information: List[SeasonalFieldInformation], - scenario_information: List[SeasonalFieldInformation], - fake_comet_response: str, -): - CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "whatif_comet_local_op.yaml" - ) - parse_comet_response.return_value = fake_comet_response - parameters = { - "comet_support_email": "fake_email", - "ngrok_token": "fake_ngrok", - } - - op_ = OpTester(CONFIG_PATH) - op_.update_parameters(parameters) - - output_data = op_.run( - # pyright misidentifies types here - baseline_seasonal_fields=baseline_information, # type: ignore - scenario_seasonal_fields=scenario_information, # type: ignore - ) - - assert "carbon_output" in output_data - assert isinstance(output_data["carbon_output"], CarbonOffsetInfo) - assert "Mg Co2e/year" in output_data["carbon_output"].carbon - - -@patch("http.server.HTTPServer.server_bind") -@patch("vibe_lib.comet_farm.comet_server.CometHTTPServer.start_ngrok") -@patch("vibe_lib.comet_farm.comet_server.CometHTTPServer.start") -@patch("vibe_lib.comet_farm.comet_requester.CometRequester.get_comet_raw_output") -@patch("vibe_lib.comet_farm.comet_requester.CometRequester.parse_comet_response") -def test_whatif_request_comet_error( - parse_comet_response: Mock, - _: Mock, - __: Mock, - ___: Mock, - ____: Mock, - baseline_information: List[SeasonalFieldInformation], - scenario_information: List[SeasonalFieldInformation], - fake_comet_error: str, -): - CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "whatif_comet_local_op.yaml" - ) - parse_comet_response.return_value = fake_comet_error - parameters = { - "comet_support_email": "fake_email", - "ngrok_token": "fake_ngrok", - } - - op_ = OpTester(CONFIG_PATH) - op_.update_parameters(parameters) - - with pytest.raises(RuntimeError): - op_.run( - # pyright misidentifies types here - baseline_seasonal_fields=baseline_information, # type: ignore - scenario_seasonal_fields=scenario_information, # type: ignore - ) - - -@patch("pyngrok.ngrok.set_auth_token") -def test_whatif_start_ngrok_error( - set_auth_token: Mock, - baseline_information: List[SeasonalFieldInformation], - scenario_information: List[SeasonalFieldInformation], -): - CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "whatif_comet_local_op.yaml" - ) - set_auth_token.side_effect = PyngrokError("Fake Error") - parameters = { - "comet_support_email": "fake_email", - "ngrok_token": "fake_ngrok", - } - - op_ = OpTester(CONFIG_PATH) - op_.update_parameters(parameters) - - with pytest.raises(Exception): - op_.run( - # pyright misidentifies types here - baseline_seasonal_fields=baseline_information, # type: ignore - scenario_seasonal_fields=scenario_information, # type: ignore - ) diff --git a/ops/carbon_local/whatif_comet_local.py b/ops/carbon_local/whatif_comet_local.py deleted file mode 100644 index d6bfd337..00000000 --- a/ops/carbon_local/whatif_comet_local.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import xml.etree.ElementTree as ET -from datetime import datetime, timezone -from typing import Any, Dict, List - -from pyngrok import ngrok -from pyproj import Geod -from shapely.geometry import shape - -from vibe_core.data import ( - CarbonOffsetInfo, - FertilizerInformation, - HarvestInformation, - OrganicAmendmentInformation, - SeasonalFieldInformation, - TillageInformation, - gen_guid, -) -from vibe_lib.comet_farm.comet_requester import CometRequester, CometServerParameters -from vibe_lib.comet_farm.comet_server import HTTP_SERVER_HOST, HTTP_SERVER_PORT - -WEBHOOK_URL = f"http://{HTTP_SERVER_HOST}:{HTTP_SERVER_PORT}" - - -class SeasonalFieldConverter: - def get_location(self, geojson: Dict[str, Any]): - """ - calculate area and center point of polygon - """ - s = shape(geojson) - - location = (s.centroid.x, s.centroid.y) # type: ignore - - geod = Geod("+a=6378137 +f=0.0033528106647475126") - area_in_acres = geod.geometry_area_perimeter(s)[0] * 0.000247105 - - return (area_in_acres, location) - - def format_datetime(self, date: str) -> str: - date_obj = datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ") - return date_obj.strftime("%m/%d/%Y") - - def _add_historical(self, historical_data: Dict[str, Any], cropland: ET.Element): - ET.SubElement(cropland, "Pre-1980").text = historical_data["pre_1980"] - ET.SubElement(cropland, "CRP").text = historical_data["crp_type"] - ET.SubElement(cropland, "CRPStartYear").text = historical_data["crp_start"] - ET.SubElement(cropland, "CRPEndYear").text = historical_data["crp_end"] - ET.SubElement(cropland, "CRPType").text = historical_data["crp_type"] - ET.SubElement(cropland, "Year1980-2000").text = historical_data["year_1980_2000"] - ET.SubElement(cropland, "Year1980-2000_Tillage").text = historical_data[ - "year_1980_2000_tillage" - ] - - def _add_harvest_information(self, harvest_data: HarvestInformation, harvest_list: ET.Element): - if isinstance(harvest_data, dict): - harvest_data = HarvestInformation(**harvest_data) - harvest = ET.SubElement(harvest_list, "HarvestEvent") - - ET.SubElement(harvest, "HarvestDate").text = self.format_datetime(harvest_data.end_date) - ET.SubElement(harvest, "Grain").text = "Yes" if harvest_data.is_grain else "No" - ET.SubElement(harvest, "yield").text = str(harvest_data.crop_yield) - ET.SubElement(harvest, "StrawStoverHayRemoval").text = str( - harvest_data.stray_stover_hay_removal - ) - - def _add_tillage_information(self, tillage_data: TillageInformation, tillage_list: ET.Element): - if isinstance(tillage_data, dict): - tillage_data = TillageInformation(**tillage_data) - tillage = ET.SubElement(tillage_list, "TillageEvent") - ET.SubElement(tillage, "TillageDate").text = self.format_datetime(tillage_data.end_date) - ET.SubElement(tillage, "TillageType").text = tillage_data.implement - - def _add_fertilization_information( - self, fertilizer_data: FertilizerInformation, fertilization_list: ET.Element - ): - if isinstance(fertilizer_data, dict): - fertilizer_data = FertilizerInformation(**fertilizer_data) - fertilizer = ET.SubElement(fertilization_list, "NApplicationEvent") - fertilizer_date = self.format_datetime(fertilizer_data.end_date) - ET.SubElement(fertilizer, "NApplicationDate").text = fertilizer_date - ET.SubElement(fertilizer, "NApplicationType").text = fertilizer_data.application_type - ET.SubElement(fertilizer, "NApplicationAmount").text = str(fertilizer_data.total_nitrogen) - ET.SubElement(fertilizer, "NApplicationMethod").text = "Surface Band / Sidedress" - ET.SubElement(fertilizer, "EEP").text = fertilizer_data.enhanced_efficiency_phosphorus - - def _add_organic_amendmentes_information( - self, omad_data: OrganicAmendmentInformation, omad_list: ET.Element - ): - if isinstance(omad_data, dict): - # Same restriction of previous method - omad_data = OrganicAmendmentInformation(**omad_data) - omadevent = ET.SubElement(omad_list, "OMADApplicationEvent") - ET.SubElement(omadevent, "OMADApplicationDate").text = self.format_datetime( - omad_data.end_date - ) - ET.SubElement(omadevent, "OMADType").text = omad_data.organic_amendment_type - ET.SubElement(omadevent, "OMADAmount").text = str(omad_data.organic_amendment_amount) - ET.SubElement(omadevent, "OMADPercentN").text = str( - omad_data.organic_amendment_percent_nitrogen - ) - ET.SubElement(omadevent, "OMADCNRatio").text = str( - omad_data.organic_amendment_carbon_nitrogen_ratio - ) - - def _add_seasonal_field( - self, seasonal_field: SeasonalFieldInformation, year: ET.Element, crop_number: int - ): - crop = ET.SubElement(year, "Crop") - # According to COMET documentation crop numbers - # can be only 1, 2 or -1 if cover - crop_number = crop_number + 1 - crop_number = min(crop_number, 2) - crop.attrib["CropNumber"] = ( - "-1" if "cover" in seasonal_field.crop_type.lower() else str(crop_number) - ) - ET.SubElement(crop, "CropName").text = seasonal_field.crop_name - # We assume SeasonalField.time_range = (plantingDate, lastHarvestDate) - ET.SubElement(crop, "PlantingDate").text = seasonal_field.time_range[0].strftime("%m/%d/%Y") - ET.SubElement(crop, "ContinueFromPreviousYear").text = "N" - - harvest_list = ET.SubElement(crop, "HarvestList") - [ - self._add_harvest_information(harvest_data, harvest_list) - for harvest_data in seasonal_field.harvests - ] - ET.SubElement(crop, "GrazingList") - - tillage_list = ET.SubElement(crop, "TillageList") - [ - self._add_tillage_information(tillage_data, tillage_list) - for tillage_data in seasonal_field.tillages - ] - - fertilizer_list = ET.SubElement(crop, "NApplicationList") - [ - self._add_fertilization_information(fertilizer_data, fertilizer_list) - for fertilizer_data in seasonal_field.fertilizers - ] - - omad_application_list = ET.SubElement(crop, "OMADApplicationList") - [ - self._add_organic_amendmentes_information(omad_data, omad_application_list) - for omad_data in seasonal_field.organic_amendments - ] - - ET.SubElement(crop, "IrrigationList") - - pass - - def _add_scenario(self, seasonal_fields: List[SeasonalFieldInformation], scenario: ET.Element): - min_year = min(seasonal_fields, key=lambda x: x.time_range[0].year).time_range[0].year - max_year = max(seasonal_fields, key=lambda x: x.time_range[0].year).time_range[0].year - - for crop_year in list(range(min_year, max_year + 1)): - if any(s.time_range[0].year == crop_year for s in seasonal_fields): - year_element = ET.SubElement(scenario, "CropYear") - year_element.attrib["Year"] = str(crop_year) - for crop_number, seasonal_field in enumerate( - filter(lambda s: s.time_range[0].year == crop_year, seasonal_fields) - ): - self._add_seasonal_field(seasonal_field, year_element, crop_number) - - def build_comet_request( - self, - support_email: str, - baseline_seasonal_fields: List[SeasonalFieldInformation], - scenario_seasonal_fields: List[SeasonalFieldInformation], - ) -> str: - root = ET.fromstring("") - tree = ET.ElementTree(root) - root.attrib["cometEmailId"] = support_email - - cropland = ET.SubElement(root, "Cropland") - cropland.attrib["name"] = "sdk_int1" - - # Baseline field - baseline_field = baseline_seasonal_fields[0] - - # cropland elements - farm_location = self.get_location(baseline_field.geometry) - - geom = ET.SubElement(cropland, "GEOM") - geom.attrib["SRID"] = "4326" - geom.attrib["AREA"] = str(farm_location[0]) - geom.text = f"POINT({farm_location[1][0]} {farm_location[1][1]})" - - self._add_historical(baseline_field.properties, cropland) - - scenario = ET.SubElement(cropland, "CropScenario") - scenario.attrib["Name"] = "Current" - self._add_scenario(seasonal_fields=baseline_seasonal_fields, scenario=scenario) - - scenario = ET.SubElement(cropland, "CropScenario") - scenario.attrib["Name"] = "scenario: " + datetime.now().strftime("%d/%m/%Y %H:%M:%S") - self._add_scenario(seasonal_fields=scenario_seasonal_fields, scenario=scenario) - - return ET.tostring(tree.getroot(), encoding="unicode") - - -class CallbackBuilder: - def __init__(self, comet_url: str, comet_support_email: str, ngrok_token: str): - self.cometRequest = CometServerParameters( - url=comet_url, - webhook=WEBHOOK_URL, - supportEmail=comet_support_email, - ngrokToken=ngrok_token, - ) - - self.comet_requester = CometRequester(self.cometRequest) - - self.start_date = datetime.now(timezone.utc) - self.end_date = datetime.now(timezone.utc) - - def get_carbon_offset( - self, - baseline_seasonal_fields: List[SeasonalFieldInformation], - scenario_seasonal_fields: List[SeasonalFieldInformation], - ) -> Dict[str, CarbonOffsetInfo]: - converter = SeasonalFieldConverter() - xml_str = converter.build_comet_request( - self.cometRequest.supportEmail, baseline_seasonal_fields, scenario_seasonal_fields - ) - - comet_response = self.comet_requester.run_comet_request(xml_str) - - obj_carbon = CarbonOffsetInfo( - id=gen_guid(), - geometry=scenario_seasonal_fields[-1].geometry, - time_range=( - baseline_seasonal_fields[0].time_range[0], - scenario_seasonal_fields[-1].time_range[1], - ), - assets=[], - carbon=comet_response, - ) - - return {"carbon_output": obj_carbon} - - def __call__(self): - return self.get_carbon_offset - - def __del__(self): - try: - ngrok.kill() - except Exception: - pass diff --git a/ops/carbon_local/whatif_comet_local_op.yaml b/ops/carbon_local/whatif_comet_local_op.yaml deleted file mode 100644 index 6a2bb593..00000000 --- a/ops/carbon_local/whatif_comet_local_op.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: whatif_comet_op -inputs: - baseline_seasonal_fields: List[SeasonalFieldInformation] - scenario_seasonal_fields: List[SeasonalFieldInformation] -output: - carbon_output: CarbonOffsetInfo -parameters: - comet_url: "https://comet-farm.com/ApiMain/AddToQueue" - comet_support_email: - ngrok_token: -entrypoint: - file: whatif_comet_local.py - callback_builder: CallbackBuilder -version: 2 -description: - short_description: - Computes the offset amount of carbon that would be sequestered in a seasonal field using the - baseline (historical) and scenario (time range interested in) information. diff --git a/ops/chunk_raster/chunk_raster.py b/ops/chunk_raster/chunk_raster.py deleted file mode 100644 index 9c51873e..00000000 --- a/ops/chunk_raster/chunk_raster.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from typing import Any, Dict, List, Tuple, Union - -import numpy as np -import pyproj -import rioxarray -import xarray as xr -from numpy.typing import NDArray -from rasterio.windows import Window, bounds -from shapely import geometry as shpg -from shapely.geometry import mapping -from shapely.ops import transform - -from vibe_core.data import ChunkLimits, Raster, RasterChunk, RasterSequence, gen_guid -from vibe_lib.spaceeye.dataset import get_read_intervals, get_write_intervals - -PosChunk = Tuple[int, int] - - -def get_geometry(limits: ChunkLimits, ref: xr.DataArray) -> Dict[str, Any]: - """ - return geojson with the geometry of the particular chunk - """ - p = shpg.box(*bounds(Window(*limits), ref.rio.transform())) # type: ignore - - # convert polygon to lat lon - if ref.rio.crs is not None and str(ref.rio.crs) != "EPSG:4326": - crs = str(ref.rio.crs) - origin = pyproj.CRS(crs) - dest = pyproj.CRS("EPSG:4326") - project = pyproj.Transformer.from_crs(origin, dest, always_xy=True).transform - return mapping(transform(project, p)) - else: - return mapping(p) - - -def make_chunk( - pos: PosChunk, - size: Tuple[int, int], - limits: ChunkLimits, - write_rel_limits: ChunkLimits, - rasters: List[Raster], -) -> RasterChunk: - chunk_id = hashlib.sha256( - (f"chunk-{str(limits)}" + "".join(i.id for i in rasters)).encode() - ).hexdigest() - - # instead of using the geometry of the rasters, using the computed geometry of - # the specific chunk - geom = get_geometry( - limits, # type: ignore - rioxarray.open_rasterio(rasters[0].raster_asset.path_or_url), # type: ignore - ) - - time_range = [rasters[0].time_range[0], rasters[-1].time_range[0]] - res = RasterChunk.clone_from( - rasters[0], - id=chunk_id, - assets=[], - time_range=time_range, - geometry=geom, - limits=limits, - chunk_pos=pos, - num_chunks=size, - write_rel_limits=write_rel_limits, - ) - return res - - -def meshgrid_1d_array( - y: NDArray[np.int_], x: NDArray[np.int_] -) -> Tuple[NDArray[np.int_], NDArray[np.int_]]: - return tuple(i.reshape(-1) for i in np.meshgrid(y, x, indexing="ij")) - - -def get_limits( - start_col: NDArray[np.int_], - start_row: NDArray[np.int_], - width: NDArray[np.int_], - height: NDArray[np.int_], -) -> List[ChunkLimits]: - Y, X = meshgrid_1d_array(start_row, start_col) - H, W = meshgrid_1d_array(height, width) - return [tuple(i) for i in np.stack((X, Y, W, H)).T.tolist()] - - -def make_chunks( - shape: Tuple[int, ...], step_y: int, step_x: int, rasters: List[Raster] -) -> List[RasterChunk]: - if len(shape) == 2 or len(shape) == 3: - # assuming the spatial dimensions are the last two - end_y, end_x = shape[-2:] - else: - raise ValueError(f"Chunk assumes rasters have dimension 2 or 3, but {len(shape)} found") - - start_abs_read_y, end_abs_read_y = get_read_intervals(end_y, step_y, step_y, 0) - start_abs_read_x, end_abs_read_x = get_read_intervals(end_x, step_x, step_x, 0) - _, rel_write_y = get_write_intervals(end_y, step_y, step_y, 0) - _, rel_write_x = get_write_intervals(end_x, step_x, step_x, 0) - start_rel_write_y, end_rel_write_y = rel_write_y - start_rel_write_x, end_rel_write_x = rel_write_x - - size = (len(start_abs_read_y), len(start_abs_read_x)) - abs_read_limits = get_limits( - start_abs_read_x, - start_abs_read_y, - end_abs_read_x - start_abs_read_x, - end_abs_read_y - start_abs_read_y, - ) - rel_write_limits = get_limits( - start_rel_write_x, - start_rel_write_y, - end_rel_write_x - start_rel_write_x, - end_rel_write_y - start_rel_write_y, - ) - Y, X = meshgrid_1d_array(np.arange(size[0]), np.arange(size[1])) - positions = [tuple(i) for i in np.stack((Y, X)).T.tolist()] - - res = [] - for position, read_limits, write_limits in zip(positions, abs_read_limits, rel_write_limits): - res.append(make_chunk(position, size, read_limits, write_limits, rasters)) - - return res - - -class CallbackBuilder: - def __init__(self, step_y: int, step_x: int): - self.step_y = step_y - self.step_x = step_x - - def __call__(self): - def chunk_callback( - rasters: Union[List[Raster], RasterSequence], - ) -> Dict[str, List[RasterChunk]]: - # the latest raster is the reference for shape and for (later) to warp all images - if isinstance(rasters, RasterSequence): - rasters = [ - Raster.clone_from(rasters, gen_guid(), assets=[i]) - for i in rasters.get_ordered_assets() # type: ignore - ] - else: - rasters = sorted(rasters, key=lambda x: x.time_range[0], reverse=True) - - ref = rasters[0] - - shape = rioxarray.open_rasterio(ref.raster_asset.path_or_url).shape # type: ignore - - chunks = make_chunks(shape, self.step_y, self.step_x, rasters) - - return {"chunk_series": chunks} - - return chunk_callback diff --git a/ops/chunk_raster/chunk_raster.yaml b/ops/chunk_raster/chunk_raster.yaml deleted file mode 100644 index b87219f7..00000000 --- a/ops/chunk_raster/chunk_raster.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: chunk_raster -inputs: - rasters: List[Raster] -output: - chunk_series: List[RasterChunk] -parameters: - step_y: 1000 - step_x: 1000 -dependencies: - parameters: - - step_y - - step_x -entrypoint: - file: chunk_raster.py - callback_builder: CallbackBuilder -description: - short_description: Splits input rasters into a series of chunks. \ No newline at end of file diff --git a/ops/chunk_raster/chunk_sequence_raster.yaml b/ops/chunk_raster/chunk_sequence_raster.yaml deleted file mode 100644 index 7b6e1c32..00000000 --- a/ops/chunk_raster/chunk_sequence_raster.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: chunk_sequence_raster -inputs: - rasters: RasterSequence -output: - chunk_series: List[RasterChunk] -parameters: - step_y: 1000 - step_x: 1000 -dependencies: - parameters: - - step_y - - step_x -entrypoint: - file: chunk_raster.py - callback_builder: CallbackBuilder -description: - short_description: Splits input rasters into a series of chunks. \ No newline at end of file diff --git a/ops/clip_raster/clip_raster.py b/ops/clip_raster/clip_raster.py deleted file mode 100644 index c9762a18..00000000 --- a/ops/clip_raster/clip_raster.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -from tempfile import TemporaryDirectory -from typing import Dict, TypeVar, cast - -import rioxarray as rio -import xarray as xr -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, DataVibe, Raster, gen_guid, gen_hash_id - -LOGGER = logging.getLogger(__name__) -T = TypeVar("T", bound=Raster) - - -class CallbackBuilder: - def __init__(self, hard_clip: bool): - self.tmp_dir = TemporaryDirectory() - self.hard_clip = hard_clip - - def __call__(self): - def operator_callback(input_item: DataVibe, raster: T) -> Dict[str, T]: - ref_geometry = shpg.shape(input_item.geometry) - - raster_shpg = shpg.shape(raster.geometry) - if raster_shpg.intersects(ref_geometry): - intersecting_geometry = raster_shpg.intersection(ref_geometry) - - if not self.hard_clip: - out_raster = type(raster).clone_from( - raster, - id=gen_hash_id( - f"{raster.id}_soft_clip", intersecting_geometry, raster.time_range - ), - geometry=shpg.mapping(intersecting_geometry), - assets=raster.assets, - ) - else: - da = cast(xr.DataArray, rio.open_rasterio(raster.raster_asset.path_or_url)) - fpath = os.path.join(self.tmp_dir.name, "clip.tif") - da.rio.clip( - [intersecting_geometry], crs="EPSG:4326", from_disk=True - ).rio.to_raster(fpath) - new_raster_asset = AssetVibe(reference=fpath, type="image/tiff", id=gen_guid()) - assets = raster.assets.copy() - assets.remove(raster.raster_asset) - assets.append(new_raster_asset) - out_raster = type(raster).clone_from( - raster, - id=gen_hash_id( - f"{raster.id}_hard_clip", intersecting_geometry, raster.time_range - ), - geometry=shpg.mapping(intersecting_geometry), - assets=assets, - ) - - return {"clipped_raster": out_raster} - else: - raise ValueError( - "Input reference geometry does not intersect with raster geometry." - ) - - return operator_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/clip_raster/clip_raster.yaml b/ops/clip_raster/clip_raster.yaml deleted file mode 100644 index 350e616d..00000000 --- a/ops/clip_raster/clip_raster.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: clip_raster -inputs: - input_item: DataVibe - raster: Raster -output: - clipped_raster: "@INHERIT(raster)" -parameters: - hard_clip: false -entrypoint: - file: clip_raster.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - hard_clip -description: - short_description: clips the input raster based on the provided referente geometry. - parameters: - hard_clip: > - if true, keeps only data inside the intersection of reference and input geometries, soft clip - otherwise. diff --git a/ops/combine_chunks/combine_chunks.py b/ops/combine_chunks/combine_chunks.py deleted file mode 100644 index cf5955e4..00000000 --- a/ops/combine_chunks/combine_chunks.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -import logging -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Tuple - -import geopandas as gpd -import rasterio -from rasterio.windows import Window -from shapely import geometry as shpg - -from vibe_core.data import ChunkLimits, RasterChunk -from vibe_core.data.core_types import AssetVibe, BBox, gen_guid -from vibe_core.data.rasters import Raster -from vibe_lib.raster import FLOAT_COMPRESSION_KWARGS, INT_COMPRESSION_KWARGS - -LOGGER = logging.getLogger(__name__) - - -def get_abs_write_limits( - read_abs_limits: ChunkLimits, write_rel_limits: ChunkLimits -) -> ChunkLimits: - return ( - read_abs_limits[0] + write_rel_limits[0], - read_abs_limits[1] + write_rel_limits[1], - write_rel_limits[2], - write_rel_limits[3], - ) - - -def get_structure_and_meta( - chunks: List[RasterChunk], -) -> Tuple[Dict[Tuple[int, int], Any], Dict[str, Any]]: - cs = {} - for c in chunks: - cs[(c.chunk_pos)] = dict( - chunk=c, write_limits=get_abs_write_limits(c.limits, c.write_rel_limits) - ) - with rasterio.open(cs[(0, 0)]["chunk"].raster_asset.path_or_url) as src: - meta = src.meta - ncol, nrow = cs[(0, 0)]["chunk"].num_chunks - meta["width"] = ( - cs[(ncol - 1, nrow - 1)]["write_limits"][0] + cs[(ncol - 1, nrow - 1)]["write_limits"][2] - ) - meta["height"] = ( - cs[(ncol - 1, nrow - 1)]["write_limits"][1] + cs[(ncol - 1, nrow - 1)]["write_limits"][3] - ) - meta["mode"] = "w" - if meta["dtype"].lower().find("float") >= 0: - meta.update(FLOAT_COMPRESSION_KWARGS) - else: - meta.update(INT_COMPRESSION_KWARGS) - return cs, meta - - -def get_combined_tif_and_bounds( - cs: Dict[Tuple[int, int], Any], - meta: Dict[str, Any], - path: str, -) -> Tuple[str, BBox]: - fname = "combined_image.tif" - path = os.path.join(path, fname) - with rasterio.open(path, **meta) as dst: - bounds = dst.bounds - for v in cs.values(): - c = v["chunk"] - write_limits = v["write_limits"] - window_out = Window(*write_limits) - window_in = Window(*c.write_rel_limits) - with rasterio.open(c.raster_asset.path_or_url) as src: - arr = src.read(window=window_in) - dst.write(arr, window=window_out) - return path, bounds - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def combine_chunks_callback(chunks: List[RasterChunk]) -> Dict[str, Raster]: - cs, meta = get_structure_and_meta(chunks) - - path, bounds = get_combined_tif_and_bounds(cs, meta, self.tmp_dir.name) - - asset = AssetVibe(reference=path, type=mimetypes.types_map[".tif"], id=gen_guid()) - res_id = hashlib.sha256("".join(i.id for i in chunks).encode()).hexdigest() - proj_geom = shpg.box(*bounds) - proj_crs = meta.get("crs") - if proj_crs is not None: - geom = gpd.GeoSeries(proj_geom, crs=proj_crs).to_crs("epsg:4326").iloc[0] - else: - LOGGER.warning( - "Could not find projected coordinate system for combined raster," - " using geometry as is" - ) - geom = proj_geom - res = Raster( - id=res_id, - time_range=chunks[0].time_range, - geometry=shpg.mapping(geom), - assets=[asset], - bands=chunks[0].bands, - ) - - return {"raster": res} - - return combine_chunks_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/combine_chunks/combine_chunks.yaml b/ops/combine_chunks/combine_chunks.yaml deleted file mode 100644 index 60c91225..00000000 --- a/ops/combine_chunks/combine_chunks.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: combine_chunks -inputs: - chunks: List[RasterChunk] -output: - raster: Raster -parameters: -entrypoint: - file: combine_chunks.py - callback_builder: CallbackBuilder -version: 2 -description: - short_description: Combines series of chunks into a final raster. diff --git a/ops/compute_cloud_prob/compute_cloud_prob.py b/ops/compute_cloud_prob/compute_cloud_prob.py deleted file mode 100644 index ff4a6fe8..00000000 --- a/ops/compute_cloud_prob/compute_cloud_prob.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict - -import numpy as np -import onnxruntime as ort -from numpy.typing import NDArray -from rasterio.enums import Resampling - -from vibe_core.data import ( - AssetVibe, - S2ProcessingLevel, - Sentinel2CloudProbability, - Sentinel2Raster, - gen_guid, -) -from vibe_lib.raster import DEFAULT_NODATA, resample_raster -from vibe_lib.spaceeye.chip import ChipDataset, Dims, InMemoryReader, get_loader, predict_chips -from vibe_lib.spaceeye.utils import verify_processing_level - - -def softmax(x: NDArray[Any]) -> NDArray[Any]: - """Compute softmax values for each sets of scores in x.""" - x = np.exp(x - np.max(x, axis=1, keepdims=True)) - return x / x.sum(axis=1) - - -def post_process( - chip_data: NDArray[Any], chip_mask: NDArray[Any], model_out: NDArray[Any] -) -> NDArray[Any]: - """ - After prediction, we set nodata (all zeros) regions as 100% cloud - """ - nodata_mask = chip_mask.any(axis=1, keepdims=True) - model_prob = softmax(model_out)[:, 1:] - model_prob[nodata_mask] = 1 - return model_prob - - -class CallbackBuilder: - def __init__( - self, - downsampling: int, - root_dir: str, - model_path: str, - window_size: int, - overlap: float, - batch_size: int, - num_workers: int, - in_memory: bool, - ): - self.downsampling = downsampling - self.root_dir = root_dir - self.model_path = model_path - self.window_size = window_size - self.overlap = overlap - self.batch_size = batch_size - self.num_workers = num_workers - self.in_memory = in_memory - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def compute_cloud_prob( - sentinel_raster: Sentinel2Raster, - ) -> Dict[str, Sentinel2CloudProbability]: - verify_processing_level((sentinel_raster,), S2ProcessingLevel.L2A, prefix="Cloud model") - - if self.downsampling < 1: - raise ValueError( - f"Downsampling must be equal or larger than 1, found {self.downsampling}" - ) - model_path = os.path.join(self.root_dir, self.model_path) - model = ort.InferenceSession(model_path) - chip_size = self.window_size - step_size = int(chip_size * (1 - self.overlap)) - dataset = ChipDataset( - [sentinel_raster], - chip_size=Dims(chip_size, chip_size, 1), - step_size=Dims(step_size, step_size, 1), - downsampling=self.downsampling, - nodata=DEFAULT_NODATA, - reader=InMemoryReader(self.downsampling) if self.in_memory else None, - ) - - dataloader = get_loader( - dataset, self.batch_size, self.num_workers if not self.in_memory else 0 - ) - pred_filepaths = predict_chips( - model, - dataloader, - self.tmp_dir.name, - skip_nodata=True, - post_process=post_process, - ) - assert ( - len(pred_filepaths) == 1 - ), f"Expected one prediction file, found: {len(pred_filepaths)}" - mask_filepath = resample_raster( - pred_filepaths[0], - self.tmp_dir.name, - dataset.width, - dataset.height, - dataset.transform, - Resampling.bilinear, - ) - asset = AssetVibe(reference=mask_filepath, type="image/tiff", id=gen_guid()) - - cloud_mask = Sentinel2CloudProbability.clone_from( - sentinel_raster, id=gen_guid(), assets=[asset] - ) - - return {"cloud_probability": cloud_mask} - - return compute_cloud_prob - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_cloud_prob/compute_cloud_prob.yaml b/ops/compute_cloud_prob/compute_cloud_prob.yaml deleted file mode 100644 index c7cc6b06..00000000 --- a/ops/compute_cloud_prob/compute_cloud_prob.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: compute_cloud_prob -inputs: - sentinel_raster: Sentinel2Raster -output: - cloud_probability: Sentinel2CloudProbability -parameters: - downsampling: 1 - root_dir: /opt/terravibes/ops/resources/cloud_models - model_path: cloud_model2_cpu.onnx - window_size: 512 - overlap: .25 - batch_size: 1 - num_workers: 0 - in_memory: false -entrypoint: - file: compute_cloud_prob.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - model_path - - downsampling - - window_size - - overlap -description: - short_description: Computes cloud probabilities using a convolutional segmentation model for L2A. \ No newline at end of file diff --git a/ops/compute_cloud_water_mask/compute_cloud_water_mask.py b/ops/compute_cloud_water_mask/compute_cloud_water_mask.py deleted file mode 100644 index f687e000..00000000 --- a/ops/compute_cloud_water_mask/compute_cloud_water_mask.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Dict - -import numpy as np -import rioxarray as rio -import xarray as xr - -from vibe_core.data import AssetVibe, LandsatRaster, Raster, gen_guid -from vibe_lib.raster import load_raster_match - -# QA_PIXEL mask for cloud cover -CLOUD_DILATED_CLOUD_BIT = 6 - - -class CallbackBuilder: - def __init__(self, ndvi_threshold: float): - # Create temporary directory to store our new data, which will be transfered to our storage - # automatically when the op is run in a workflow - self.tmp_dir = TemporaryDirectory() - # Define the parameters - self.ndvi_threshold = ndvi_threshold - - def __call__(self): - def callback(landsat_raster: LandsatRaster, ndvi_raster: Raster) -> Dict[str, Raster]: - # Get QA band from the Landsat raster - qa_pixel = rio.open_rasterio(landsat_raster.raster_asset.path_or_url)[ - landsat_raster.bands["qa_pixel"] - ] - qa_pixel = qa_pixel.astype(np.uint16) - - # Calculate the cloud mask - cloud_mask = (qa_pixel & (1 << CLOUD_DILATED_CLOUD_BIT)) > 0 - # Assign pixels without cloud contamination as 1 and nan for pixels with cloud - cloud_mask = xr.where(cloud_mask > 0, 1, np.nan) - - # Retrieve ndvi layer - ndvi = load_raster_match(ndvi_raster, landsat_raster)[0] - - # Assign pixel value of water bodies as nan and rest as 1 - ndvi_mask = xr.where(ndvi > self.ndvi_threshold, 1, np.nan) - - # Merge cloud and ndvi mask - cloud_water_mask = cloud_mask * ndvi_mask - - # Save final mask - filepath = os.path.join(self.tmp_dir.name, "cloud_water_mask.tif") - cloud_water_mask.rio.to_raster(filepath) - cwm_asset = AssetVibe(reference=filepath, type="image/tiff", id=gen_guid()) - - return { - "cloud_water_mask": Raster.clone_from( - landsat_raster, - id=gen_guid(), - assets=[cwm_asset], - bands={"cloud_water_mask": 0}, - ), - } - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_cloud_water_mask/compute_cloud_water_mask.yaml b/ops/compute_cloud_water_mask/compute_cloud_water_mask.yaml deleted file mode 100644 index a1cead6a..00000000 --- a/ops/compute_cloud_water_mask/compute_cloud_water_mask.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: compute_cloud_water_mask -inputs: - landsat_raster: LandsatRaster - ndvi_raster: Raster -output: - cloud_water_mask: Raster -parameters: - ndvi_threshold: 0.0 -entrypoint: - file: compute_cloud_water_mask.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - ndvi_threshold -description: - short_description: Merges landsat cloud mask and NDVI-based mask to produce a cloud water mask. - long_description: NDVI index with less than or equal to 0 values are used to identify water bodies, and qa_pixel band - is used to identify cloudy areas. Both of these are masked out. The clould_water_mask has a value of 1 or 0 per pixel; - 1 meaning the pixel is free of clouds and water bodies, and 0 meaning the pixel is contaminated with either cloud or - waterbodies, or both. - parameters: - ndvi_threshold: NDVI threshold for excluding water bodies - (everything under this threshold is assumed to be water). \ No newline at end of file diff --git a/ops/compute_conservation_practice/compute_conservation_practice.py b/ops/compute_conservation_practice/compute_conservation_practice.py deleted file mode 100644 index 4865f11e..00000000 --- a/ops/compute_conservation_practice/compute_conservation_practice.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict - -import numpy as np -import onnxruntime as ort -import rasterio -from numpy.typing import NDArray -from rasterio import Affine -from rasterio.enums import Resampling - -from vibe_core.data import AssetVibe, gen_guid -from vibe_core.data.rasters import Raster -from vibe_lib.raster import DEFAULT_NODATA, resample_raster -from vibe_lib.spaceeye.chip import Dims, StackOnChannelsChipDataset, get_loader, predict_chips - - -def post_process(_: NDArray[Any], __: NDArray[Any], model_out: NDArray[Any]) -> NDArray[Any]: - """ - After prediction, we transform probabilities into classes via argmax - """ - model_classes = np.argmax(model_out, axis=1, keepdims=True) - return model_classes - - -def get_meta(in_path: str, width: int, height: int, transform: Affine) -> Dict[str, Any]: - with rasterio.open(in_path) as src: - kwargs = src.meta.copy() - kwargs.update( - { - "nodata": 0, - "width": width, - "height": height, - "transform": transform, - } - ) - return kwargs - - -class CallbackBuilder: - def __init__( - self, - downsampling: int, - root_dir: str, - model_path: str, - window_size: int, - overlap: float, - batch_size: int, - num_workers: int, - ): - self.downsampling = downsampling - self.root_dir = root_dir - self.model_path = model_path - self.window_size = window_size - self.overlap = overlap - self.batch_size = batch_size - self.num_workers = num_workers - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def compute_conservation_practice( - elevation_gradient: Raster, average_elevation: Raster - ) -> Dict[str, Raster]: - if self.downsampling < 1: - raise ValueError( - f"Downsampling must be equal or larger than 1, found {self.downsampling}" - ) - model_path = os.path.join(self.root_dir, self.model_path) - model = ort.InferenceSession(model_path) - chip_size = self.window_size - step_size = int(chip_size * (1 - self.overlap)) - - dataset = StackOnChannelsChipDataset( - [[elevation_gradient], [average_elevation]], - chip_size=Dims(chip_size, chip_size, 1), - step_size=Dims(step_size, step_size, 1), - downsampling=self.downsampling, - nodata=DEFAULT_NODATA, - ) - - dataloader = get_loader(dataset, self.batch_size, self.num_workers) - - pred_filepaths = predict_chips( - model, - dataloader, - self.tmp_dir.name, - skip_nodata=False, - post_process=post_process, - ) - assert ( - len(pred_filepaths) == 1 - ), f"Expected one prediction file, found: {len(pred_filepaths)}" - out_filepath = resample_raster( - pred_filepaths[0], - self.tmp_dir.name, - dataset.width, - dataset.height, - dataset.transform, - Resampling.nearest, - ) - asset = AssetVibe(reference=out_filepath, type="image/tiff", id=gen_guid()) - pred = Raster.clone_from(elevation_gradient, id=gen_guid(), assets=[asset]) - - return {"output_raster": pred} - - return compute_conservation_practice - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_conservation_practice/compute_conservation_practice.yaml b/ops/compute_conservation_practice/compute_conservation_practice.yaml deleted file mode 100644 index 35ecafa1..00000000 --- a/ops/compute_conservation_practice/compute_conservation_practice.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# Compute terraces and grassed waterways classes on pixel level -name: compute_conservation_practice -inputs: - elevation_gradient: Raster - average_elevation: Raster -output: - output_raster: Raster -parameters: - downsampling: 2 - root_dir: /opt/terravibes/ops/resources/conservation_practices_models - model_path: terraces_grassed_waterways.onnx - window_size: 512 - overlap: .25 - batch_size: 1 - num_workers: 4 -entrypoint: - file: compute_conservation_practice.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - model_path - - downsampling - - root_dir - - model_path - - window_size - - overlap - - batch_size - - num_workers -description: - short_description: Classifies pixels in either terraces or grassed waterways using a CNN model. \ No newline at end of file diff --git a/ops/compute_evaporative_fraction/compute_evaporative_fraction.py b/ops/compute_evaporative_fraction/compute_evaporative_fraction.py deleted file mode 100644 index 7adb7f70..00000000 --- a/ops/compute_evaporative_fraction/compute_evaporative_fraction.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict, cast - -import numpy as np -import xarray as xr -from numpy.typing import NDArray -from scipy import ndimage - -from vibe_core.data import AssetVibe, LandsatRaster, Raster, gen_guid -from vibe_lib.raster import load_raster, load_raster_match - -# DEFINE CONSTANTS -# source: Senay et. al (2013) -K1 = 0.35 -K2 = 0.7 -LP = 0.65 -# Set threshold of minimum pixel size -PIXEL_SIZE_THRESHOLD = 9 - - -class CallbackBuilder: - def __init__(self, ndvi_hot_threshold: float): - self.tmp_dir = TemporaryDirectory() - self.ndvi_hot_threshold = ndvi_hot_threshold - - def __call__(self): - def calculate_hot_pixels( - lst_elev_m: xr.DataArray, ndvi_hot_mask: NDArray[Any] - ) -> NDArray[Any]: - # Calculate percentile value of lst_elev - lst_elev_p90 = np.nanpercentile(lst_elev_m, 90) - lst_elev_p95 = np.nanpercentile(lst_elev_m, 95) - - lst_hot_mask = np.where(lst_elev_m > lst_elev_p90, lst_elev_m, np.nan) - lst_hot_mask = np.where(lst_hot_mask < lst_elev_p95, lst_hot_mask, np.nan) - - ndvi_hot_mask = np.where(ndvi_hot_mask > self.ndvi_hot_threshold, ndvi_hot_mask, np.nan) - ndvi_hot_mask = np.where(ndvi_hot_mask > 0, 1, np.nan) - - hot_pixels = lst_hot_mask * ndvi_hot_mask - return hot_pixels - - def calculate_cold_pixels( - lst_elev_m: xr.DataArray, ndvi_cold_mask: NDArray[Any] - ) -> NDArray[Any]: - # Calculate percentile value of lst_elev - lst_elev_p02 = np.nanpercentile(lst_elev_m, 2) - lst_elev_p04 = np.nanpercentile(lst_elev_m, 4) - - lst_cold_mask = np.where(lst_elev_m > lst_elev_p02, lst_elev_m, np.nan) - lst_cold_mask = np.where(lst_cold_mask < lst_elev_p04, lst_cold_mask, np.nan) - - ndvi_cold_mask = np.where(ndvi_cold_mask > 0, 1, np.nan) - - cold_pixels = lst_cold_mask * ndvi_cold_mask - return cold_pixels - - def calculate_evap_frxn( - etrf: xr.DataArray, lst: xr.DataArray, hot_pixel_value: float, cold_pixel_value: float - ) -> NDArray[Any]: - etf_nom = hot_pixel_value - lst - etf_dom = hot_pixel_value - cold_pixel_value - etf = etf_nom / etf_dom - evap_frxn = etrf * etf - evap_frxn = np.where(evap_frxn < 0, 0, evap_frxn) - return evap_frxn - - def main_processing( - landsat_raster: LandsatRaster, - dem_raster: Raster, - ndvi_raster: Raster, - cloud_water_mask_raster: Raster, - ) -> xr.DataArray: - lst = load_raster(landsat_raster, bands=["lwir11"])[0] - lst = (lst * 0.00341802) + 149 - - dem = load_raster_match(dem_raster, landsat_raster)[0] - ndvi = load_raster_match(ndvi_raster, landsat_raster)[0] - - lst_elev = lst + (0.0065 * dem) - cloud_water_mask = load_raster_match(cloud_water_mask_raster, landsat_raster)[0] - - lst_elev_m = lst_elev * cloud_water_mask - ndvi_m = ndvi * cloud_water_mask - - # Calculate percentile value of ndvi - ndvi_p01 = np.nanpercentile(ndvi_m, 1) - ndvi_p90 = np.nanpercentile(ndvi_m, 90) - ndvi_p95 = np.nanpercentile(ndvi_m, 95) - - # Define ndvi_hot_mask and ndvi_cold_mask here - ndvi_hot_mask = np.where(ndvi_m < ndvi_p01, ndvi_m, np.nan) - ndvi_hot_mask = np.where(ndvi_hot_mask > self.ndvi_hot_threshold, ndvi_hot_mask, np.nan) - ndvi_hot_mask = np.where(ndvi_hot_mask > 0, 1, np.nan) - - ndvi_cold_mask = np.where(ndvi_m > ndvi_p90, ndvi_m, np.nan) - ndvi_cold_mask = np.where(ndvi_cold_mask < ndvi_p95, ndvi_cold_mask, np.nan) - ndvi_cold_mask = np.where(ndvi_cold_mask > 0, 1, np.nan) - - hot_pixels = calculate_hot_pixels(lst_elev_m, ndvi_hot_mask) - cold_pixels = calculate_cold_pixels(lst_elev_m, ndvi_cold_mask) - - hot_pixels_binary = (hot_pixels > 0).astype(int) - labels, _ = ndimage.label(hot_pixels_binary) # type: ignore - sizes = np.bincount(labels.ravel()) - mask_sizes = sizes > PIXEL_SIZE_THRESHOLD - hot_pixels[~mask_sizes[labels]] = 0 # type: ignore - hot_pixels = np.where(hot_pixels > 0, hot_pixels, np.nan) - hot_pixel_value = cast(float, np.nanmedian(hot_pixels)) - - cold_pixels_binary = (cold_pixels > 0).astype(int) - labels, _ = ndimage.label(cold_pixels_binary) # type: ignore - sizes = np.bincount(labels.ravel()) - mask_sizes = sizes > PIXEL_SIZE_THRESHOLD - cold_pixels[~mask_sizes[labels]] = 0 # type: ignore - cold_pixels = np.where(cold_pixels > 0, cold_pixels, np.nan) - cold_pixel_value = cast(float, np.nanmin(cold_pixels)) - - etrf = ndvi * K1 - etrf = etrf / K2 - etrf = etrf + LP - - evap_frxn = calculate_evap_frxn(etrf, lst, hot_pixel_value, cold_pixel_value) - - evap_frxn_xr = xr.DataArray( - evap_frxn, dims=cloud_water_mask.dims, coords=cloud_water_mask.coords - ) - - return evap_frxn_xr - - def callback( - landsat_raster: LandsatRaster, - dem_raster: Raster, - ndvi_raster: Raster, - cloud_water_mask_raster: Raster, - ) -> Dict[str, Raster]: - evap_frxn_xr_result = main_processing( - landsat_raster, dem_raster, ndvi_raster, cloud_water_mask_raster - ) - - filepath = os.path.join(self.tmp_dir.name, "evaporative_fraction.tif") - evap_frxn_xr_result.rio.to_raster(filepath) - etrf_asset = AssetVibe(reference=filepath, type="image/tiff", id=gen_guid()) - - return { - "evaporative_fraction": Raster.clone_from( - landsat_raster, - id=gen_guid(), - assets=[etrf_asset], - bands={"evaporative_fraction": 0}, - ) - } - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_evaporative_fraction/compute_evaporative_fraction.yaml b/ops/compute_evaporative_fraction/compute_evaporative_fraction.yaml deleted file mode 100644 index 0ed716be..00000000 --- a/ops/compute_evaporative_fraction/compute_evaporative_fraction.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: compute_evaporative_fraction -inputs: - landsat_raster: LandsatRaster - dem_raster: Raster - ndvi_raster: Raster - cloud_water_mask_raster: Raster -output: - evaporative_fraction: Raster -parameters: - ndvi_hot_threshold: 0.02 -entrypoint: - file: compute_evaporative_fraction.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - ndvi_hot_threshold -description: - short_description: Computes evaporative fraction layer based on the percentile values of lst_dem (created by - treating land surface temperature with dem) and ndvi layers. The source of constants used is "Senay, G.B.; Bohms, S.; Singh, R.K.; - Gowda, P.H.; Velpuri, N.M.; Alemu, H.; Verdin, J.P. Operational Evapotranspiration Mapping Using Remote Sensing - and Weather Datasets - A New Parameterization for the SSEB Approach. JAWRA J. Am. Water Resour. Assoc. 2013, 49, 577–591. - The land surface elevation data source are 10m USGS DEM, and 30m Copernicus DEM; but Copernicus DEM is set as default - source in the workflow. - parameters: - ndvi_hot_threshold: Pixels with ndvi values of this threshold will not be considered for hot pixel selection. diff --git a/ops/compute_fcover/compute_fcover.yaml b/ops/compute_fcover/compute_fcover.yaml deleted file mode 100644 index 992d11ae..00000000 --- a/ops/compute_fcover/compute_fcover.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: compute_fcover -inputs: - raster: Raster - angles: Raster -output: - fcover: Raster -parameters: -entrypoint: - file: fcover.py - callback_builder: CallbackBuilder diff --git a/ops/compute_fcover/fcover.py b/ops/compute_fcover/fcover.py deleted file mode 100644 index 9766a9a3..00000000 --- a/ops/compute_fcover/fcover.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -""" -FCOVER computation using a neural network as described in -https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf -https://github.com/senbox-org/s2tbx/blob/master/s2tbx-biophysical/src/main/java/org/esa/s2tbx/biophysical -https://www.sciencedirect.com/science/article/pii/S0034425710002853 -https://custom-scripts.sentinel-hub.com/custom-scripts/sentinel-2/fcover/ - -Following implementation from Sentinel-2 Toolbox -https://github.com/senbox-org/s2tbx/blob/master/s2tbx-biophysical/src/main/java/org/esa/s2tbx/biophysical/BiophysicalOp.java - -Normalization params and weights from Sentinel-2 Toolbox for L2A -https://github.com/senbox-org/s2tbx/tree/master/s2tbx-biophysical/src/main/resources/auxdata/3_0/S2A/FCOVER -""" - -from tempfile import TemporaryDirectory -from typing import Any, Dict, cast, overload - -import numpy as np -import xarray as xr -from numpy.typing import NDArray -from rasterio.warp import Resampling - -from vibe_core.data import Raster, gen_guid -from vibe_lib.raster import get_cmap, json_to_asset, load_raster, save_raster_to_asset - -BANDS = ["B03", "B04", "B05", "B06", "B07", "B8A", "B11", "B12"] - -# Normalization params: min - max for each band -BANDS_NORM = np.array( - ( - (0, 0.23901527463861838), - (0, 0.29172736471507876), - (0, 0.32652671459255694), - (0.008717364330310326, 0.5938903910368211), - (0.019693160430621366, 0.7466909927207045), - (0.026217828282102625, 0.7582393779705984), - (0.018931934894415213, 0.4929337190581187), - (0, 0.4877499217101771), - ) -) -ANGLES_NORM = np.array( - ( - (0.979624800125421, 0.9999999999691099), - (0.342108564072183, 0.9274847491748729), - (-0.9999999986740542, 0.9999999998869543), - ) -) - -DENORMALIZATION = np.array((0.0001143371095669865, 0.9994883064311412)) - -# NN Weights -# Layer 1: 5 hidden neurons -# 5 x 11 matrix -W1 = np.array( - ( - ( - -0.09299549787532572, - 0.03711751310275837, - 0.35917948087916934, - -2.0327599053936245, - -0.3004739931440174, - 5.081364269387806, - -0.5509229514856009, - -1.8459014400791363, - 0.04210879716286216, - -0.1433820536680042, - -0.0919637992244123, - ), - ( - 0.17782538722557306, - -0.3793824396587722, - -0.18316058499587165, - -0.8546862528226032, - -0.07553090207841909, - 2.1968612305059834, - -0.1734580018542482, - -0.89158072360678, - 0.017977829778812265, - 0.19161704265110313, - -0.020341567456493917, - ), - ( - -0.8964833683739212, - -0.6038768961220443, - -0.5995953059405849, - -0.15212446911598965, - 0.3889544003539062, - 1.9871015442471918, - -0.9746781245763875, - -0.28459612830995773, - -0.7195016395928718, - 0.4628341672035696, - 1.652035259226453, - ), - ( - -0.15296262636768043, - 0.17628558201043018, - 0.11212126329600514, - 1.5711153194443364, - 0.5209619736717268, - -3.068192837466073, - 0.1483332044127799, - 1.2331177561153577, - -0.02091226761957991, - -0.23041694611129848, - 0.0031568086031440803, - ), - ( - 1.7234228895153363, - -2.906528582039084, - -1.3938598383149996, - -1.6262956756929428, - 0.3326361580291295, - -0.8862583674506147, - -0.2185426118098439, - 0.5660635905206617, - -0.09949171171933309, - -0.35271418843339297, - 0.06514559686105968, - ), - ) -) -B1 = np.array( - ( - -1.886007283361096, - -0.02498619641898423, - 0.29510485628465327, - 0.0029300996499639458, - -3.359449911074414, - ) -) -# Layer 2: 1 output neuron -# 1 x 5 matrix -W2 = np.array( - ( - 0.21418510066217855, - 2.354410480678047, - 0.039929632100371135, - 1.5480571230482811, - -0.11310020940549115, - ) -) - -B2 = -0.15076057408085747 - - -def fcover_fun(raster: xr.DataArray, angles: xr.DataArray) -> xr.DataArray: - # Normalize bands - norm_bands = normalize(raster, BANDS_NORM[:, :1, None], BANDS_NORM[:, 1:, None]) - # Normalize angles before upsampling - zen_norm = normalize( - cast(xr.DataArray, np.cos(np.deg2rad(angles[[0, 2]]))), - ANGLES_NORM[:2, :1, None], - ANGLES_NORM[:2, 1:, None], - ) - rel_az_norm = cast( - xr.DataArray, - normalize( - np.cos(np.deg2rad(angles[3] - angles[1])), - ANGLES_NORM[2, :1, None], - ANGLES_NORM[2, 1:, None], - ), - ).expand_dims("band") - norm_angles = xr.concat((zen_norm, rel_az_norm), dim="band") - # Upsample angles to the same resolution as the band data - norm_angles = norm_angles.rio.reproject_match(norm_bands, resampling=Resampling.bilinear) - full_data = xr.concat((norm_bands, norm_angles), dim="band").to_numpy() - layer1 = np.tanh(W1.dot(full_data.transpose((1, 0, 2))) + B1[:, None, None]) - layer2 = np.tanh(W2.dot(layer1.transpose(1, 0, 2)) + B2) - fcover = denormalize(layer2, DENORMALIZATION[0], DENORMALIZATION[1])[None] - fcover = raster[:1].copy(data=fcover) # Copy metadata - return fcover - - -@overload -def normalize(unnormalized: NDArray[Any], min: NDArray[Any], max: NDArray[Any]) -> NDArray[Any]: ... - - -@overload -def normalize(unnormalized: xr.DataArray, min: NDArray[Any], max: NDArray[Any]) -> xr.DataArray: ... - - -def normalize(unnormalized: Any, min: NDArray[Any], max: NDArray[Any]): - return 2 * (unnormalized - min) / (np.subtract(max, min)) - 1 - - -@overload -def denormalize(normalized: NDArray[Any], min: NDArray[Any], max: NDArray[Any]) -> NDArray[Any]: ... - - -@overload -def denormalize(normalized: xr.DataArray, min: NDArray[Any], max: NDArray[Any]) -> xr.DataArray: ... - - -def denormalize(normalized: Any, min: NDArray[Any], max: NDArray[Any]): - return 0.5 * (normalized + 1) * (np.subtract(max, min)) + min - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def fcover_callback(raster: Raster, angles: Raster) -> Dict[str, Raster]: - r = load_raster(raster, bands=BANDS, use_geometry=True) * raster.scale + raster.offset - a = load_raster(angles, use_geometry=True) - fcover = fcover_fun(r, a) - asset = save_raster_to_asset(fcover, self.tmp_dir.name) - vis_dict = { - "bands": [0], - "colormap": get_cmap("viridis"), - "range": (0, 1), - } - out_raster = Raster.clone_from( - raster, - id=gen_guid(), - assets=[asset, json_to_asset(vis_dict, self.tmp_dir.name)], - bands={"fcover": 0}, - ) - return {"fcover": out_raster} - - return fcover_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_ghg_fluxes/compute_ghg_fluxes.py b/ops/compute_ghg_fluxes/compute_ghg_fluxes.py deleted file mode 100644 index 137665a4..00000000 --- a/ops/compute_ghg_fluxes/compute_ghg_fluxes.py +++ /dev/null @@ -1,1046 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from copy import copy -from dataclasses import asdict, dataclass -from enum import Enum, IntEnum, auto -from typing import Dict, List, Optional, Tuple, Union - -import geopandas -from pyproj import Geod -from shapely import geometry as shpg - -from vibe_core.data import GHGFlux, GHGProtocolVibe, gen_hash_id - - -@dataclass(frozen=True, unsafe_hash=True) -class Fertilizer: - source: str - details: str - co2: float - n2o: float - nitrogen_ratio: float - unit: str - - -@dataclass -class GHG: - name: str - details: str - factor: float - - -@dataclass -class EmissionFactor: - value: float - unit: str - details: Optional[str] = None - - -class FertilizerType(Enum): - SYNTHETIC = auto() - UREA = auto() - LIMESTONE_CALCITE = auto() - LIMESTONE_DOLOMITE = auto() - GYPSUM = auto() - MANURE = auto() - MANURE_BIRDS = auto() - ORGANIC_COMPOUND = auto() - GENERIC_ORGANIC_COMPOUND = auto() - FILTER_CAKE = auto() - VINASSE = auto() - - -class CropType(Enum): - SOYBEAN = auto() - CORN = auto() - BEANS = auto() - RICE = auto() - WHEAT = auto() - SUGARCANE = auto() - SUGARCANE_WITH_BURNING = auto() - COTTON = auto() - GREEN_MANURE_LEGUMES = auto() - GREEN_MANURE_GRASSES = auto() - GREEN_MANURE = auto() - - -class Biome(Enum): - US_FOREST = 1 - BRAZIL_AMAZON_FOREST = 2 - BRAZIL_AMAZON_SAVANNA = 3 - BRAZIL_CERRADO = 4 - BRAZIL_PANTANAL = 5 - BRAZIL_CAATINGA = 6 - BRAZIL_MATA_ATLANTICA = 7 - BRAZIL_PAMPA = 8 - - -class CurrentLandUse(Enum): - CONVENTIONAL_CROPS = auto() - DIRECT_SEEDING = auto() - SUGARCANE_WITH_BURNING = auto() - SUGARCANE_WITHOUT_BURNING = auto() - - -class PreviousLandUse(Enum): - CONVENTIONAL_CROPS = auto() - DIRECT_SEEDING = auto() - NATIVE = auto() - SUGARCANE_WITH_BURNING = auto() - SUGARCANE_WITHOUT_BURNING = auto() - - -BIOME_TO_CARBON_STOCK = { - Biome.US_FOREST: 88.39, # Source: EPA - Biome.BRAZIL_AMAZON_FOREST: 573.16, - Biome.BRAZIL_AMAZON_SAVANNA: 86.38, - Biome.BRAZIL_CERRADO: 115.92, - Biome.BRAZIL_PANTANAL: 150.52, - Biome.BRAZIL_CAATINGA: 159.57, - Biome.BRAZIL_MATA_ATLANTICA: 468.5, # average value - Biome.BRAZIL_PAMPA: 92.10, -} - - -GLOBAL_HEATING_POTENTIAL_GHG = { - "CO2": GHG("CO2", "Carbon dioxide", 1.0), - "N2O": GHG("N2O", "Nitrous oxide", 298.0), - "CH4": GHG("CH4", "Methane", 25.0), -} - -GHG_CONVERSION = { - "C_CO2": 3.66, - "CO_CO2": 1.57, - "N-N2O_N2O": 1.57, - "NOX_N2O": 0.96, -} - -WORLD_GASOLINE_MIXTURE = 1 - 0.1 -GASOLINE_MIXTURES = { # % gasoline - "Argentina": 1 - 0.05, - "Australia": 1 - 0.1, - "Brazil": 1 - 0.27, - "Canada": 1 - 0.05, - "China": 1 - 0.1, - "Colombia": 1 - 0.1, - "Costa Rica": 1 - 0.07, - "India": 1 - 0.2, - "Jamica": 1 - 0.1, - "Malawi": 1 - 0.1, - "Mexico": 1 - 0.6, - "New Zealand": 1 - 0.1, - "Pakistan": 1 - 0.1, - "Paraguay": 1 - 0.24, - "Peru": 1 - 0.08, - "Philippines": 1 - 0.1, - "Thailand": 1 - 0.2, - "Vietnam": 1 - 0.05, - "Austria": 1 - 0.1, - "Denmark": 1 - 0.05, - "Finland": 1 - 0.1, - "France": 1 - 0.1, - "Germany": 1 - 0.1, - "Ireland": 1 - 0.04, - "Netherlands": 1 - 0.15, - "Romania": 1 - 0.04, - "Sweden": 1 - 0.1, - "United States of America": 1 - 0.1, - "World": WORLD_GASOLINE_MIXTURE, -} - -# Emission factors {{{ - -FERTILIZER_SYNTHETIC = Fertilizer( - "Synthetic", "Except urea", 0.0, 0.01130, 0.0, "kg N2O/kg applied nitrogen" -) -FERTILIZER_UREA = Fertilizer("Urea", "", 0.73300, 0.00880, 45.0 / 100, "kg N2O/kg applied nitrogen") -FERTILIZER_LIMESTONE_CALCITE = Fertilizer( - "Limestone", "Calcite", 0.44000, 0, 0, "kg CO2/kg limestone" -) -FERTILIZER_LIMESTONE_DOLOMITE = Fertilizer( - "Limestone", "Dolomite", 0.47667, 0, 0, "kg CO2/kg limestone" -) -FERTILIZER_GYPSUM = Fertilizer("Agricultural Gypsum", "", 0.40000, 0, 0, "kg CO2/kg gypsum") -FERTILIZER_MANURE = Fertilizer( - "Manure", "Bovine, horse, pig, sheep", 0, 0.00020, 1.6 / 100, "kg N2O/kg manure" -) -FERTILIZER_MANURE_BIRDS = Fertilizer("Manure", "Birds", 0, 0.00038, 3.0 / 100, "kg N2O/kg manure") -FERTILIZER_ORGANIC_COMPOUND = Fertilizer( - "Organic compound", "", 0, 0.000176, 1.4 / 100, "kg N2O/kg manure" -) -FERTILIZER_GENERIC_ORGANIC = Fertilizer( - "Generic organic fertilizer", - "", - 0, - 0.000226285714285714, - 1.8 / 100, - "kg N2O/kg manure", -) -FERTILIZER_FILTER_CAKE = Fertilizer("Filter cake", "", 0, 2.35723, 1.4 / 100, "kg N2O/hectare-year") -FERTILIZER_VINASSE = Fertilizer("Vinasse", "", 0, 0.00001, 0.0313 / 100, "kg N2O/filter") - -C_N2O_FLOW_RATE = 0.0075 # kg N2O/kg N applied -C_FRAC_GAS_F = 0.1 # Fraction of N2O emitted as gas -C_FRAC_LEACH = 0.3 # Fraction of N leached -C_N2O_VOLATILIZATION = 0.02 # kg N2O/kg N applied -N2O_RESIDUE = 0.20 # Ratio -N2O_ATMOSPHERIC_VOLATIZATION_RATE = 0.01 # kg N2O-N/kg N -N2O_SOIL_LOSS = 0.0188571428571429 # N2O tonnes / ha / year -CO2EQ_SOIL_EMISSIONS = 73.3333333333 # CO2eq tonnes / ha -- tropical / subtropical - -FOREST_TO_CROPLAND_CARBON_STOCK = 88.39 # tonnes CO2 / ha -- reference: EPA -# https://www.epa.gov/energy/greenhouse-gases-equivalencies-calculator-calculations-and-references - -HIGH_CLAY_CONTENT_EMISSION_FACTOR = 0.16 # tonnes CO2 / ha year -LOW_CLAY_CONTENT_EMISSION_FACTOR = 0.92 # tonnes CO2 / ha year -CLAY_CONTENT_THRESHOLD = 0.6 -FOREST_STR = "forest" - -RESIDUES = { - CropType.SOYBEAN: EmissionFactor(0.000243624857142857, "kg N2O/kg product"), - CropType.CORN: EmissionFactor(0.000162963428571429, "kg N2O/kg product"), - CropType.BEANS: EmissionFactor(0.000346297285714286, "kg N2O/kg product"), - CropType.RICE: EmissionFactor(0.00011484, "kg N2O/kg product"), - CropType.WHEAT: EmissionFactor(0.000177728571428571, "kg N2O/kg product"), - CropType.SUGARCANE: EmissionFactor(0.0000170657142857143, "kg N2O/kg product"), - CropType.SUGARCANE_WITH_BURNING: EmissionFactor(0.00000341314285714286, "kg N2O/kg product"), - CropType.COTTON: EmissionFactor(0.000361428571428571, "kg N2O/kg product"), - CropType.GREEN_MANURE_LEGUMES: EmissionFactor(0.000382380952380952, "kg N2O/kg product"), - CropType.GREEN_MANURE_GRASSES: EmissionFactor(0.000158015873015873, "kg N2O/kg product"), - CropType.GREEN_MANURE: EmissionFactor(0.000247761904761905, "kg N2O/kg product"), -} - -ENERGY_FACTORS_BY_COUNTRY = { # {{{ - "Albania": 0.003095364, - "Algeria": 0.159542831, - "Angola": 0.065773567, - "Argentina": 0.098421175, - "Armenia": 0.029916277, - "Australia": 0.236261887, - "Austria": 0.045215264, - "Azerbaijan": 0.12282867, - "Bahrain": 0.184169045, - "Bangladesh": 0.162124582, - "Belarus": 0.083745465, - "Belgium": 0.060356361, - "Benin": 0.200827188, - "Bolivia": 0.108945513, - "Bosnia & Herzegovina": 0.214942416, - "Brazil": 0.017763401, - "Brunei Darussalam": 0.209197436, - "Bulgaria": 0.128374791, - "Cameroon": 0.067222554, - "Canada": 0.046323264, - "Chile": 0.10327114, - "China": 0.205740504, - "Chinese Taipei": 0.175834586, - "Colombia": 0.048550178, - "Congo Dem. Rep.": 0.000812663, - "Costa Rica": 0.011027675, - "Cote d'Ivoire": 0.118112301, - "Croatia": 0.078498393, - "Cuba": 0.208176358, - "Cyprus": 0.206163455, - "Czech Republic": 0.142448081, - "Denmark": 0.083861473, - "Dominican Republic": 0.163578361, - "Ecuador": 0.080231471, - "Egypt": 0.128953112, - "El Salvador": 0.088302753, - "Eritrea": 0.186029433, - "Estonia": 0.194967281, - "Ethiopia": 0.032771621, - "Finland": 0.056897185, - "France": 0.024888727, - "Gabon": 0.089232088, - "Georgia": 0.035633834, - "Germany": 0.119247392, - "Ghana": 0.051669336, - "Greece": 0.200104523, - "Guatemala": 0.096702916, - "Haiti": 0.15155357, - "Honduras": 0.095377471, - "Hong Kong (China)": 0.211343355, - "Hungary": 0.08367062, - "Iceland": 0.000117448, - "India": 0.263539434, - "Indonesia": 0.206556241, - "Iran Islamic Rep.": 0.174499668, - "Iraq": 0.18949185, - "Ireland": 0.128871203, - "Israel": 0.192482591, - "Italy": 0.107035847, - "Jamaica": 0.150815254, - "Japan": 0.114874393, - "Jordan": 0.160811796, - "Kazakhstan": 0.132976177, - "Kenya": 0.109399904, - "Korea, Dem Rep. of": 0.138185411, - "Korea, Rep. of": 0.137862623, - "Kuwait": 0.24088064, - "Kyrgyzstan": 0.02242761, - "Latvia": 0.042401553, - "Lebanon": 0.198518255, - "Libya": 0.241484223, - "Lithuania": 0.030789353, - "Luxembourg": 0.106447222, - "Malaysia": 0.179675413, - "Malta": 0.235565038, - "Mexico": 0.126030291, - "Moldova": 0.110827257, - "Morocco": 0.176849154, - "Mozambique": 0.000139414, - "Myanmar": 0.054249536, - "Namibia": 0.06562166, - "Nepal": 0.001203039, - "Netherlands": 0.103732345, - "Netherlands Antilles": 0.195810829, - "New Zealand": 0.046121331, - "Nicaragua": 0.140110007, - "Nigeria": 0.115158373, - "Norway": 0.004788776, - "Oman": 0.233369148, - "Pakistan": 0.126789742, - "Panama": 0.08370217, - "Paraguay": 0, - "Peru": 0.065456928, - "Philippines": 0.132450182, - "Poland": 0.177334846, - "Portugal": 0.102001926, - "Qatar": 0.136840465, - "Romania": 0.114776003, - "Russia": 0.087920908, - "Saudi Arabia": 0.209753403, - "Senegal": 0.170202733, - "Serbia & Montenegro": 0.188450468, - "Singapore": 0.143723334, - "Slovak Republic": 0.061415332, - "Slovenia": 0.087538925, - "South Africa": 0.256475657, - "Spain": 0.082763168, - "Sri Lanka": 0.127450138, - "Sudan": 0.098500037, - "Sweden": 0.011948395, - "Switzerland": 0.011061718, - "Syria": 0.177526918, - "Tajikistan": 0.008095713, - "Tanzania United Rep.": 0.077898522, - "Thailand": 0.142206509, - "The former Yugoslav Republic of Macedonia": 0.196643297, - "Togo": 0.055835278, - "Trinidad & Tobago": 0.19910965, - "Tunisia": 0.149048022, - "Turkey": 0.132940333, - "Turkmenistan": 0.218678315, - "Ukraine": 0.103585535, - "United Arab Emirates": 0.174855004, - "United Kingdom": 0.124511777, - "United States": 0.14076309, - "Uruguay": 0.070130528, - "Uzbekistan": 0.127828824, - "Venezuela": 0.055011591, - "Vietnam": 0.106396642, - "Yemen": 0.174635675, - "Zambia": 0.000899308, - "Zimbabwe": 0.171449399, - "Africa": 0.178111, - "Asia": 0.206365, - "Central and Eastern Europe": 0.093903, - "China (including Hong Kong)": 0.205811, - "Former USSR": 0.096388889, - "Latin America": 0.048475, - "Middle East": 0.19113, - "Rest of Europe": 0.107222222, -} # }}} - - -# }}} Emission factors - - -class Scope(IntEnum): - SCOPE_1 = 1 - SCOPE_2 = 2 - SCOPE_3 = 3 - - -@dataclass -class Emissions: - scope: Scope - source: str - co2: float = 0.0 - n2o: float = 0.0 - ch4: float = 0.0 - - CO2_CO2EQ = GHG("CO2", "Carbon dioxide", 1.0) - N2O_CO2EQ = GHG("N2O", "Nitrous oxide", 298.0) - CH4_CO2EQ = GHG("CH4", "Methane", 25.0) - - @property - def total(self): - # co2 equivalent - return self.co2 + self.n2o * self.N2O_CO2EQ.factor + self.ch4 * self.CH4_CO2EQ.factor - - def __add__(self, other: "Emissions") -> "Emissions": - return Emissions( - scope=self.scope, - source=self.source + " / " + other.source, - co2=self.co2 + other.co2, - n2o=self.n2o + other.n2o, - ch4=self.ch4 + other.ch4, - ) - - def __rmul__(self, scalar: float) -> "Emissions": - return Emissions( - scope=self.scope, - source=self.source, - co2=self.co2 * scalar, - n2o=self.n2o * scalar, - ch4=self.ch4 * scalar, - ) - - -class FuelType(Enum): - DIESEL = 1 - DIESEL_B2 = 2 - DIESEL_B5 = 3 - DIESEL_B6 = 4 - DIESEL_B7 = 5 - DIESEL_B8 = 6 - DIESEL_B9 = 7 - DIESEL_B10 = 8 - GASOLINE = 9 - BIODIESEL = 10 - ETHANOL_ANHYDROUS = 11 - ETHANOL_HYDRATED = 12 - - -FUEL_COMPOSITION = { # 1 - Diesel = Biodiesel - FuelType.DIESEL: 1.0, - FuelType.DIESEL_B2: 0.98, - FuelType.DIESEL_B5: 0.95, - FuelType.DIESEL_B6: 0.94, - FuelType.DIESEL_B7: 0.93, - FuelType.DIESEL_B8: 0.92, - FuelType.DIESEL_B9: 0.91, - FuelType.DIESEL_B10: 0.9, -} - -AVERAGE_FUEL_CONSUMPTION = 20 # liters per hour -FUEL_EMISSION_FACTORS: Dict[FuelType, Emissions] = { - k: v * Emissions(Scope.SCOPE_1, k.name, co2=0.002681, n2o=0.00000002, ch4=0.00000030) - for k, v in FUEL_COMPOSITION.items() - if k != FuelType.GASOLINE -} -FUEL_EMISSION_FACTORS[FuelType.GASOLINE] = Emissions( - Scope.SCOPE_1, "Gasoline", co2=0.002212, n2o=0.0, ch4=0.0 -) -FUEL_EMISSION_FACTORS[FuelType.ETHANOL_ANHYDROUS] = Emissions( - Scope.SCOPE_1, "Ethanol anhydrous", co2=0.001526, n2o=0.0, ch4=0.0 -) -FUEL_EMISSION_FACTORS[FuelType.ETHANOL_HYDRATED] = Emissions( - Scope.SCOPE_1, "Ethanol hydrated", co2=0.001457, n2o=0.0, ch4=0.0 -) -FUEL_EMISSION_FACTORS[FuelType.BIODIESEL] = Emissions( - Scope.SCOPE_1, "Biodiesel", co2=0.002499, n2o=0.0, ch4=0.0 -) - -BURNING_EMISSION_FACTORS = { - CropType.BEANS: Emissions( - Scope.SCOPE_1, - "Biomass Burning (Beans)", - co2=GHG_CONVERSION["CO_CO2"] * 0.0734272, - n2o=0.000288464 + GHG_CONVERSION["NOX_N2O"] * 0.0104259131428571, - ch4=0.00349653333333333, - ), - CropType.CORN: Emissions( - Scope.SCOPE_1, - "Biomass Burning (Corn)", - co2=GHG_CONVERSION["CO_CO2"] * 0.078583792, - n2o=0.000123488816 + GHG_CONVERSION["NOX_N2O"] * 0.00446323863542857, - ch4=0.00374208533333333, - ), - CropType.COTTON: Emissions( - Scope.SCOPE_1, - "Biomass Burning (Cotton)", - co2=GHG_CONVERSION["CO_CO2"] * 0.10773, - n2o=0.000355509 + GHG_CONVERSION["NOX_N2O"] * 0.012849111, - ch4=0.00513, - ), - CropType.RICE: Emissions( - Scope.SCOPE_1, - "Biomass Burning (Rice)", - co2=GHG_CONVERSION["CO_CO2"] * 0.04873344, - n2o=0.000053606784 + GHG_CONVERSION["NOX_N2O"] * 0.001937502336, - ch4=0.00232064, - ), - CropType.SOYBEAN: Emissions( - Scope.SCOPE_1, - "Biomass Burning (Soybeans)", - co2=GHG_CONVERSION["CO_CO2"] * 0.0975744, - n2o=0.000383328 + GHG_CONVERSION["NOX_N2O"] * 0.0138545691428571, - ch4=0.0046464, - ), - CropType.SUGARCANE: Emissions( - Scope.SCOPE_1, - "Biomass Burning (Sugarcane)", - co2=GHG_CONVERSION["CO_CO2"] * 0.00793636844, - n2o=0.0000186425657631827 + GHG_CONVERSION["NOX_N2O"] * 0.000673795591155031, - ch4=0.000377922306666667, - ), - CropType.WHEAT: Emissions( - Scope.SCOPE_1, - "Biomass Burning (Wheat)", - co2=GHG_CONVERSION["CO_CO2"] * 0.058212, - n2o=0.0000548856 + GHG_CONVERSION["NOX_N2O"] * 0.0019837224, - ch4=0.002772, - ), -} - -GREEN_MANURE_CAPTURE_FACTOR = -1.835 # tonnes of CO2 per hectare - - -def geometry_to_country_name( - polygon: Union[ - shpg.Polygon, - shpg.MultiPolygon, - shpg.Point, - shpg.LineString, - shpg.LinearRing, - shpg.MultiLineString, - shpg.GeometryCollection, - ], -) -> str: - # Use geopandas "naturalearth_lowres" dataset - df = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres")) # type: ignore - df = df[df.geometry.intersects(polygon)] - - assert df is not None, "There is not intersection between the geometry, and any country" - if len(df) == 0: - return "World" - return df.iloc[0]["name"] - - -def get_land_use_change_factor( - previous_land_use: PreviousLandUse, - current_land_use: CurrentLandUse, - biome: Biome, - high_clay_content: bool, -): - if previous_land_use.name == current_land_use.name: - return 0.0 - if previous_land_use == PreviousLandUse.DIRECT_SEEDING: - if current_land_use == CurrentLandUse.CONVENTIONAL_CROPS: - return 0.9167 - elif previous_land_use == PreviousLandUse.CONVENTIONAL_CROPS: - if current_land_use == CurrentLandUse.SUGARCANE_WITH_BURNING: - return -2.09 - elif current_land_use == CurrentLandUse.DIRECT_SEEDING: - return -1.52 - elif previous_land_use == PreviousLandUse.NATIVE: - if current_land_use == CurrentLandUse.CONVENTIONAL_CROPS and high_clay_content: - return 0.1613 - elif current_land_use == CurrentLandUse.CONVENTIONAL_CROPS and not high_clay_content: - return 0.9167 - elif current_land_use == CurrentLandUse.SUGARCANE_WITH_BURNING: - return 3.1203 - elif current_land_use == CurrentLandUse.DIRECT_SEEDING: - if biome == Biome.BRAZIL_CERRADO: - return -0.44 - elif biome == Biome.BRAZIL_AMAZON_SAVANNA or biome == Biome.BRAZIL_AMAZON_FOREST: - return 0.88 - return 0.0 # we don't know what this is, so we return 0 - - -class CropEmission: - """General calculation method for emissions from a crop type. - - Computation should be correct for the following crops: - - wheat - - corn - - cotton - - soybeans - - :param crop_type: Crop type - :param cultivation_area: Cultivation area in hectares - """ - - def __init__(self, crop_type: CropType, cultivation_area: float): - self.cultivation_area = cultivation_area / 1000.0 - self.crop_type = crop_type - - if crop_type not in [ - CropType.WHEAT, - CropType.CORN, - CropType.COTTON, - CropType.SOYBEAN, - ]: - raise ValueError("Crop type not supported") - - def fuel_emissions( - self, - fuel_consumptions: List[Tuple[FuelType, float]], - scope: Scope = Scope.SCOPE_1, - desc: str = "", - gasoline_mixture: float = WORLD_GASOLINE_MIXTURE, - ) -> Emissions: - emissions = Emissions(scope, desc) - for fuel_type, fuel_consumption in fuel_consumptions: - tmp = copy(FUEL_EMISSION_FACTORS[fuel_type]) - tmp.scope = scope - emissions += fuel_consumption * tmp - if "DIESEL" in fuel_type.name: - emissions += ( - fuel_consumption - * (1 - FUEL_COMPOSITION[fuel_type]) - * FUEL_EMISSION_FACTORS[FuelType.BIODIESEL] - ) - elif "GASOLINE" in fuel_type.name: - emissions += ( - fuel_consumption - * (1 - gasoline_mixture) - * FUEL_EMISSION_FACTORS[FuelType.ETHANOL_ANHYDROUS] - ) - return emissions - - def biomass_burning_emissions( - self, average_yield: float, burn_area: float, scope: Scope = Scope.SCOPE_1 - ) -> Emissions: - tmp = copy(BURNING_EMISSION_FACTORS[self.crop_type]) - tmp.scope = scope - return average_yield * burn_area * tmp - - def initial_carbon_stock(self, biome: str = "", previous_land_use: str = "") -> Emissions: - if biome.upper() not in Biome.__members__ or "native" not in previous_land_use.lower(): - return Emissions(Scope.SCOPE_1, "Initial carbon stock") - stock = BIOME_TO_CARBON_STOCK[Biome[biome.upper()]] - return Emissions( - Scope.SCOPE_1, - "Initial carbon stock", - co2=(stock * self.cultivation_area * 1000), - ) - - def carbon_capture( - self, - cultivation_area: float, - green_manure_amount: float = 0.0, - green_manure_grass_amount: float = 0.0, - freen_fertilizer_legumes_amount: float = 0.0, - ) -> Emissions: - total_capture = ( - cultivation_area - * GREEN_MANURE_CAPTURE_FACTOR - * any( - ( - green_manure_amount, - green_manure_grass_amount, - freen_fertilizer_legumes_amount, - ) - ) - ) - return Emissions( - Scope.SCOPE_1, - "Carbon captured by Green Manure", - co2=total_capture, - ) - - def land_use_emissions( - self, - biome: str = "", - previous_land_use: str = "", - cultivation_area: float = 0.0, - current_land_use: str = "", - clay_content: float = 0.0, - ) -> Emissions: - try: - previous = PreviousLandUse[previous_land_use.upper()] - except Exception: - for land_use in PreviousLandUse: - if previous_land_use.upper() in land_use.name: - previous = land_use - break - raise ValueError( - f"Previous land use {previous_land_use} not supported. " - f"Supported values: {PreviousLandUse.__members__}" - ) - try: - current = CurrentLandUse[current_land_use.upper()] - except Exception: - for land_use in CurrentLandUse: - if current_land_use.upper() in land_use.name: - current = land_use - break - raise ValueError( - f"Current land use {current_land_use} not supported. " - f"Supported values: {CurrentLandUse.__members__}" - ) - return ( - cultivation_area - * get_land_use_change_factor( - previous, - current, - Biome[biome.upper()], - clay_content > CLAY_CONTENT_THRESHOLD, - ) - * Emissions(Scope.SCOPE_1, "Land use change", co2=1.0) - ) - - def fertilizer_emissions( - self, - average_yield: float = 0.0, - urea_amount: float = 0.0, - gypsum_amount: float = 0.0, - limestone_calcite_amount: float = 0.0, - limestone_dolomite_amount: float = 0.0, - synthetic_fertilizer_amount: float = 0.0, - synthetic_fertilizer_nitrogen_ratio: float = 0.0, - manure_amount: float = 0.0, - manure_birds_amount: float = 0.0, - organic_compound_amount: float = 0.0, - organic_other_amount: float = 0.0, - green_manure_amount: float = 0.0, - green_manure_grass_amount: float = 0.0, - green_manure_legumes_amount: float = 0.0, - soil_management_area: float = 0.0, - ) -> Dict[str, Emissions]: - leached_rate = C_N2O_FLOW_RATE * GHG_CONVERSION["N-N2O_N2O"] * C_FRAC_LEACH - return { - "Urea": Emissions( # ✅ - scope=Scope.SCOPE_1, - source="Fertilizer emissions, urea", - co2=FERTILIZER_UREA.co2 * urea_amount * self.cultivation_area, - n2o=FERTILIZER_UREA.n2o - * (urea_amount * FERTILIZER_UREA.nitrogen_ratio) - * self.cultivation_area, - ), - "Liming, gypsum": ( - Emissions( # ✅ - scope=Scope.SCOPE_1, - source="Fertilizer emissions, gypsum", - co2=gypsum_amount * FERTILIZER_GYPSUM.co2 * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Fertilizer emissions, limestone, calcite", - co2=limestone_calcite_amount - * FERTILIZER_LIMESTONE_CALCITE.co2 - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Fertilizer emissions, limestone, dolomite", - co2=limestone_dolomite_amount - * FERTILIZER_LIMESTONE_DOLOMITE.co2 - * self.cultivation_area, - ) - ), - "Synthetic nitrogen fertilizer": Emissions( # ✅ - scope=Scope.SCOPE_1, - source="Fertilizer emissions, synthetic nitrogen fertilizer", - n2o=FERTILIZER_SYNTHETIC.n2o - * (synthetic_fertilizer_amount * synthetic_fertilizer_nitrogen_ratio) - * self.cultivation_area, - ), - "Organic fertilizers": ( - Emissions( # ✅ - scope=Scope.SCOPE_1, - source="Fertilizer emissions, manure", - n2o=manure_amount * FERTILIZER_MANURE.n2o * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Fertilizer emissions, bird manure", - n2o=manure_birds_amount * FERTILIZER_MANURE_BIRDS.n2o * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Fertilizer emissions, organic fertilizer", - n2o=organic_compound_amount - * FERTILIZER_ORGANIC_COMPOUND.n2o - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Fertilizer emissions, organic others", - n2o=organic_other_amount - * FERTILIZER_GENERIC_ORGANIC.n2o - * self.cultivation_area, - ) - ), - "Leaching / Surface runoff": ( - Emissions( # ✅ - scope=Scope.SCOPE_1, - source="Flow emissions, surface runoff, urea", - n2o=(urea_amount * FERTILIZER_UREA.nitrogen_ratio) - * leached_rate - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Flow emissions, surface runoff, synthetic fertilizer", - n2o=(synthetic_fertilizer_amount * synthetic_fertilizer_nitrogen_ratio) - * leached_rate - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Flow emissions, surface runoff, organic fertilizer", - n2o=(organic_compound_amount * FERTILIZER_ORGANIC_COMPOUND.nitrogen_ratio) - * leached_rate - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Flow emissions, surface runoff, manure", - n2o=(manure_amount * FERTILIZER_MANURE.nitrogen_ratio) - * leached_rate - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Flow emissions, surface runoff, manure, bird", - n2o=(manure_birds_amount * FERTILIZER_MANURE_BIRDS.nitrogen_ratio) - * leached_rate - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Flow emissions, surface runoff, organic, other", - n2o=(organic_other_amount * FERTILIZER_GENERIC_ORGANIC.nitrogen_ratio) - * leached_rate - * self.cultivation_area, - ) - ), - "Atmospheric emissions, N2O": ( - Emissions( - scope=Scope.SCOPE_1, - source="Atmospheric emissions, N2O, Urea", - n2o=urea_amount - * FERTILIZER_UREA.nitrogen_ratio - * C_FRAC_GAS_F - * N2O_ATMOSPHERIC_VOLATIZATION_RATE - * GHG_CONVERSION["N-N2O_N2O"] - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Atmospheric emissions, N2O, Synthetic nitrogen fertilizer", - n2o=synthetic_fertilizer_amount - * synthetic_fertilizer_nitrogen_ratio - * C_FRAC_GAS_F - * N2O_ATMOSPHERIC_VOLATIZATION_RATE - * GHG_CONVERSION["N-N2O_N2O"] - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Atmospheric emissions, N2O, Organic fertilizer", - n2o=organic_compound_amount - * FERTILIZER_ORGANIC_COMPOUND.nitrogen_ratio - * C_FRAC_GAS_F - * C_N2O_VOLATILIZATION - * GHG_CONVERSION["N-N2O_N2O"] - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Atmospheric emissions, N2O, Manure", - n2o=manure_amount - * FERTILIZER_MANURE.nitrogen_ratio - * C_FRAC_GAS_F - * C_N2O_VOLATILIZATION - * GHG_CONVERSION["N-N2O_N2O"] - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Atmospheric emissions, N2O, Manure, Birds", - n2o=manure_birds_amount - * FERTILIZER_MANURE_BIRDS.nitrogen_ratio - * C_FRAC_GAS_F - * C_N2O_VOLATILIZATION - * GHG_CONVERSION["N-N2O_N2O"] - * self.cultivation_area, - ) - + Emissions( - scope=Scope.SCOPE_1, - source="Atmospheric emissions, N2O, Organic, other", - n2o=organic_other_amount - * FERTILIZER_GENERIC_ORGANIC.nitrogen_ratio - * C_FRAC_GAS_F - * C_N2O_VOLATILIZATION - * GHG_CONVERSION["N-N2O_N2O"] - * self.cultivation_area, - ) - ), - "Residue decomposition": ( - Emissions( - scope=Scope.SCOPE_1, - source="Residue decomposition", - n2o=( - (average_yield * RESIDUES[self.crop_type].value) - + (green_manure_amount / 1000 * RESIDUES[CropType.GREEN_MANURE].value) - + ( - green_manure_grass_amount - / 1000 - * RESIDUES[CropType.GREEN_MANURE_GRASSES].value - ) - + ( - green_manure_legumes_amount - / 1000 - * RESIDUES[CropType.GREEN_MANURE_LEGUMES].value - ) - ) - * 10, - ) - ), - "Soil management": ( - Emissions( - scope=Scope.SCOPE_1, - source="Soil management", - co2=soil_management_area * CO2EQ_SOIL_EMISSIONS, - ) - ), - } - - -class CallbackBuilder: - def __init__( - self, - crop_type: str, - ): - if crop_type.upper() not in CropType.__members__: - raise ValueError(f"Unsupported crop type: {crop_type}") - self.crop_type = CropType[crop_type.upper()] - - def __call__(self): - def emissions_callback(ghg: GHGProtocolVibe) -> Dict[str, List[GHGFlux]]: - geometry = shpg.shape(ghg.geometry) - country_name = geometry_to_country_name(geometry) # type: ignore - gasoline_mixture = GASOLINE_MIXTURES.get(country_name, GASOLINE_MIXTURES["World"]) - - if ghg.cultivation_area: - area_ha = ghg.cultivation_area - else: - geod = Geod(ellps="WGS84") - area = abs(geod.geometry_area_perimeter(geometry)[0]) # in m^2 - area_ha = area / 10000 # in ha - - fuel_consumptions = [] - if ghg.diesel_amount != 0: - if ghg.diesel_type is None: - raise ValueError("Diesel amount is not zero, but diesel type is not specified") - fuel_consumptions.append( - ( - FuelType[ghg.diesel_type.upper()], - ghg.diesel_amount, - ) - ) - if ghg.gasoline_amount != 0: - fuel_consumptions.append( - ( - FuelType.GASOLINE, - ghg.gasoline_amount * gasoline_mixture if ghg.gasoline_amount else 0.0, - # The above can be done because all equations are linear - ) - ) - - if not ghg.total_yield: - raise ValueError("Total yield is not specified") - - crop_emission = CropEmission(self.crop_type, area_ha) - internal_operations_emissions = crop_emission.fuel_emissions( - fuel_consumptions, - Scope.SCOPE_1, - "Internal operations", - gasoline_mixture, - ) - transport_emissions = crop_emission.fuel_emissions( - [ - ( - FuelType[ - ghg.transport_diesel_type.upper() - if ghg.transport_diesel_type - else "DIESEL" - ], - ghg.transport_diesel_amount if ghg.transport_diesel_amount else 0.0, - ) - ], - Scope.SCOPE_3, - "Transportation", - gasoline_mixture, - ) - fertilizer_parameters = dict( - average_yield=ghg.total_yield / area_ha, - urea_amount=ghg.urea_amount if ghg.urea_amount else 0, - gypsum_amount=ghg.gypsum_amount if ghg.gypsum_amount else 0, - limestone_calcite_amount=ghg.limestone_calcite_amount, - limestone_dolomite_amount=ghg.limestone_dolomite_amount, - synthetic_fertilizer_amount=ghg.synthetic_fertilizer_amount, - synthetic_fertilizer_nitrogen_ratio=ghg.synthetic_fertilizer_nitrogen_ratio, - manure_amount=ghg.manure_amount, - manure_birds_amount=ghg.manure_birds_amount, - organic_compound_amount=ghg.organic_compound_amount, - organic_other_amount=ghg.organic_other_amount, - green_manure_amount=ghg.green_manure_amount, - green_manure_grass_amount=ghg.green_manure_grass_amount, - green_manure_legumes_amount=ghg.green_manure_legumes_amount, - soil_management_area=ghg.soil_management_area - if ghg.soil_management_area - else area_ha, - ) - fertilizer_parameters = { - k: v if v is not None else 0.0 for k, v in fertilizer_parameters.items() - } - - fertilizer_emissions = crop_emission.fertilizer_emissions(**fertilizer_parameters) - initial_carbon_stock = crop_emission.initial_carbon_stock( - ghg.biome, ghg.previous_land_use - ) - biomass_burning_emissions = crop_emission.biomass_burning_emissions( - average_yield=ghg.total_yield / area_ha, - burn_area=ghg.burn_area if ghg.burn_area else 0.0, - ) - carbon_capture = crop_emission.carbon_capture( - area_ha, - ghg.green_manure_amount if ghg.green_manure_amount else 0.0, - ghg.green_manure_grass_amount if ghg.green_manure_grass_amount else 0.0, - ghg.green_manure_legumes_amount if ghg.green_manure_legumes_amount else 0.0, - ) - land_use_emissions = crop_emission.land_use_emissions( - ghg.biome, - ghg.previous_land_use, - area_ha, - ghg.current_land_use, - ghg.soil_clay_content if ghg.soil_clay_content else 0.0, - ) - - emissions = ( - [internal_operations_emissions] - + [e for e in fertilizer_emissions.values()] - + [initial_carbon_stock] - + [transport_emissions] - + [biomass_burning_emissions] - + [carbon_capture] - + [land_use_emissions] - ) - return { - "fluxes": [ - GHGFlux( - id=gen_hash_id( - f"ghg_{e.scope}_{e.source}_{asdict(ghg)}", - ghg.geometry, - ghg.time_range, - ), - time_range=ghg.time_range, - geometry=ghg.geometry, - scope=str(e.scope.value), - value=e.total, - description=e.source, - assets=[], - ) - for e in emissions - ] - } - - return emissions_callback diff --git a/ops/compute_ghg_fluxes/compute_ghg_fluxes.yaml b/ops/compute_ghg_fluxes/compute_ghg_fluxes.yaml deleted file mode 100644 index 11954e47..00000000 --- a/ops/compute_ghg_fluxes/compute_ghg_fluxes.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: compute_ghg_fluxes -inputs: - ghg: GHGProtocolVibe -output: - fluxes: List[GHGFlux] -parameters: - crop_type: "" -entrypoint: - file: compute_ghg_fluxes.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - crop_type -description: - short_description: Computes Green House Gas emission fluxes based on emission factors based on IPCC methodology. diff --git a/ops/compute_ghg_fluxes/test_ghg_fluxes.py b/ops/compute_ghg_fluxes/test_ghg_fluxes.py deleted file mode 100644 index 73264f64..00000000 --- a/ops/compute_ghg_fluxes/test_ghg_fluxes.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from math import isclose -from typing import Dict, List, cast - -import pytest -from shapely import geometry as shpg - -from vibe_core.data import GHGFlux, GHGProtocolVibe -from vibe_dev.testing.op_tester import OpTester - -YAML_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "compute_ghg_fluxes.yaml") - - -@pytest.fixture -def fake_ghg() -> GHGProtocolVibe: - return GHGProtocolVibe( - id="fake_id", - time_range=(datetime(2020, 1, 1), datetime(2021, 1, 1)), - geometry=shpg.mapping(shpg.box(-43.793839, -20.668953, -43.784183, -20.657266)), - assets=[], - cultivation_area=10, - total_yield=50, # average = 5 - soil_texture_class="sand", - soil_clay_content=0.1, - previous_land_use="native", - current_land_use="conventional_crops", - practice_adoption_period=10, - burn_area=4, - soil_management_area=2, - synthetic_fertilizer_amount=100, - synthetic_fertilizer_nitrogen_ratio=10 / 100.0, - urea_amount=3, - limestone_calcite_amount=11, - limestone_dolomite_amount=22, - gypsum_amount=33, - organic_compound_amount=44, - manure_amount=55, - manure_birds_amount=66, - organic_other_amount=77, - diesel_amount=10, - gasoline_amount=666, - ethanol_amount=42, - biome="BRAZIL_AMAZON_SAVANNA", - transport_diesel_type="DIESEL_B10", - transport_diesel_amount=790, - green_manure_amount=22, - green_manure_grass_amount=33, - green_manure_legumes_amount=44, - ) - - -def test_ghg_fluxes(fake_ghg: GHGProtocolVibe): - op_tester = OpTester(YAML_PATH) - parameters = {"crop_type": "cotton"} - op_tester.update_parameters(parameters) - - output = cast(Dict[str, List[GHGFlux]], op_tester.run(ghg=fake_ghg)) - assert output - - fluxes = {e.description: e.value for e in output["fluxes"]} - - assert isclose(fluxes["Fertilizer emissions, urea"], 0.06, abs_tol=0.01), fluxes[ - "Fertilizer emissions, urea" - ] - - gypsum = [v for k, v in fluxes.items() if ", gypsum" in k][0] # type: ignore - assert isclose(gypsum, 0.29, abs_tol=0.01), gypsum - - assert isclose( - fluxes["Fertilizer emissions, synthetic nitrogen fertilizer"], 0.34, abs_tol=0.01 - ), fluxes["Fertilizer emissions, synthetic nitrogen fertilizer"] - - s = "Fertilizer emissions, manure" - f = [v for k, v in fluxes.items() if s in k][0] # type: ignore - assert isclose(f, 0.18, abs_tol=0.01), (s, f) - - flow = [v for k, v in fluxes.items() if "Flow emissions" in k][0] # type: ignore - assert isclose(flow, 0.17, abs_tol=0.001), flow - - atmospheric = [v for k, v in fluxes.items() if "Atmospheric emissions" in k][0] # type: ignore - assert isclose(atmospheric, 0.098, abs_tol=0.001), atmospheric - - residue = [v for k, v in fluxes.items() if "Residue decomposition" in k][0] # type: ignore - assert isclose(residue, 5.4672, abs_tol=0.001), residue - - assert isclose(fluxes["Soil management"], 146.67, abs_tol=0.1), fluxes["Soil management"] - - s = "Internal operations" - internal = [v for k, v in fluxes.items() if s in k][0] # type: ignore - assert isclose(internal, 1.3027, abs_tol=0.001), (s, internal) - - s = "Initial carbon stock" - assert isclose(fluxes[s], 863.76, abs_tol=1), fluxes[s] - - s = "Transportation / DIESEL_B10 / Biodiesel" - assert isclose(fluxes[s], 2.1131, abs_tol=0.01), fluxes[s] - - s = "Biomass Burning (Cotton)" - assert isclose(fluxes[s], 81.58, abs_tol=0.1), fluxes[s] - - s = "Carbon captured by Green Manure" - assert isclose(fluxes[s], -18.35, abs_tol=0.1), fluxes[s] - - s = "Land use change" - assert isclose(fluxes[s], 9.167, abs_tol=0.1), fluxes[s] diff --git a/ops/compute_illuminance/compute_illuminance.py b/ops/compute_illuminance/compute_illuminance.py deleted file mode 100644 index e36a78a5..00000000 --- a/ops/compute_illuminance/compute_illuminance.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# pyright: reportUnknownMemberType=false -from typing import Dict, List, Union, cast - -import rasterio - -from vibe_core.data import RasterIlluminance, Sentinel2CloudMask, Sentinel2Raster -from vibe_lib.spaceeye.illumination import MIN_CLEAR_RATIO, masked_average_illuminance -from vibe_lib.spaceeye.utils import QUANTIFICATION_VALUE - - -def compute_illuminance(item: Sentinel2Raster, cloud_mask: Sentinel2CloudMask): - """ - Compute illuminance values one band at a time to save memory - """ - data_filepath = item.raster_asset.url - mask_filepath = cloud_mask.raster_asset.url - illuminance: List[float] = [] - with rasterio.open(mask_filepath) as src: - mask = src.read(1).astype(bool) - if mask.mean() < MIN_CLEAR_RATIO: - return None - with rasterio.open(data_filepath) as src: - # rasterio indexes bands starting with 1 - for i in range(1, cast(int, src.count + 1)): - x = src.read(i) / QUANTIFICATION_VALUE - illuminance.append(float(masked_average_illuminance(x, mask))) - - return RasterIlluminance.clone_from(item, id=item.id, assets=[], illuminance=illuminance) - - -class CallbackBuilder: - def __init__(self, num_workers: int): - self.num_workers = num_workers - - def __call__(self): - def callback( - rasters: List[Sentinel2Raster], cloud_masks: List[Sentinel2CloudMask] - ) -> Dict[str, List[RasterIlluminance]]: - results = [compute_illuminance(item, mask) for item, mask in zip(rasters, cloud_masks)] - results = cast(List[Union[RasterIlluminance, None]], results) - results = [r for r in results if r is not None] - - return {"illuminance": results} - - return callback diff --git a/ops/compute_illuminance/compute_illuminance.yaml b/ops/compute_illuminance/compute_illuminance.yaml deleted file mode 100644 index de302175..00000000 --- a/ops/compute_illuminance/compute_illuminance.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: compute_illuminance -inputs: - rasters: List[Sentinel2Raster] - cloud_masks: List[Sentinel2CloudMask] -output: - illuminance: List[RasterIlluminance] -parameters: - num_workers: 6 -entrypoint: - file: compute_illuminance.py - callback_builder: CallbackBuilder diff --git a/ops/compute_index/compute_index.yaml b/ops/compute_index/compute_index.yaml deleted file mode 100644 index 54a822a2..00000000 --- a/ops/compute_index/compute_index.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: compute_index -inputs: - raster: Raster -output: - index: Raster -parameters: - index: ndvi -entrypoint: - file: index.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - index -description: - short_description: Computes `index` over the input raster. \ No newline at end of file diff --git a/ops/compute_index/index.py b/ops/compute_index/index.py deleted file mode 100644 index f9851bac..00000000 --- a/ops/compute_index/index.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from collections import defaultdict -from tempfile import TemporaryDirectory -from typing import Any, Callable, Dict, List, cast - -import numpy as np -import spyndex -import xarray as xr -from scipy.ndimage import gaussian_filter -from sklearn.neighbors import NearestNeighbors - -from vibe_core.data import Raster -from vibe_lib.raster import ( - RGBA, - compute_index, - get_cmap, - interpolated_cmap_from_colors, - json_to_asset, - load_raster, - save_raster_from_ref, -) - -NDVI_CMAP_INTERVALS: List[float] = [ - -1.0, - -0.2, - 0.0, - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 1.0, -] -NDVI_CMAP_COLORS: List[RGBA] = [ - RGBA(0, 0, 255, 255), - RGBA(0, 0, 38, 255), - RGBA(166, 0, 38, 255), - RGBA(214, 48, 38, 255), - RGBA(242, 110, 66, 255), - RGBA(252, 173, 97, 255), - RGBA(252, 224, 140, 255), - RGBA(255, 255, 191, 255), - RGBA(217, 240, 140, 255), - RGBA(166, 217, 107, 255), - RGBA(102, 189, 99, 255), - RGBA(26, 153, 79, 255), - RGBA(0, 102, 54, 255), -] - - -def compute_ndre(bands: xr.DataArray) -> xr.DataArray: - """ - Normalized difference red edge index - """ - re, nir = bands - ndre: xr.DataArray = (nir - re) / (nir + re) - ndre.rio.write_nodata(100, encoded=True, inplace=True) - return ndre - - -def compute_pri(bands: xr.DataArray) -> xr.DataArray: - """ - Photochemical reflectance index - """ - re, nir = bands - pri: xr.DataArray = (re) / (nir + re) - pri.rio.write_nodata(100, encoded=True, inplace=True) - return pri - - -def compute_reci(bands: xr.DataArray) -> xr.DataArray: - """ - Red-Edge Chlorophyll Vegetation Index - """ - re, nir = bands - reci: xr.DataArray = (nir / re) - 1 - reci.rio.write_nodata(100, encoded=True, inplace=True) - return reci - - -def compute_methane(bands: xr.DataArray, neighbors: int = 6, sigma: float = 1.8) -> xr.DataArray: - b12 = bands[-1].to_masked_array() - m = b12.mask - b12 = b12.filled(b12.mean()) - other_bands = bands[:-1].to_masked_array() - m = m | other_bands.mask.any(axis=0) - other_bands = other_bands.filled(other_bands.mean()) - b12 = gaussian_filter(b12, sigma).squeeze() - b12_f = b12.flatten() - other_bands = gaussian_filter(other_bands, sigma) - x = other_bands.reshape(other_bands.shape[0], -1).T - nn = NearestNeighbors(n_neighbors=neighbors).fit(x) - ref_b12_values = np.median( - b12_f[nn.kneighbors(x, return_distance=False)], # type: ignore - axis=1, - ).reshape(b12.shape) - index = (b12 - ref_b12_values) / ref_b12_values - methane_xr = bands[0].astype(np.float32).copy(data=np.ma.masked_array(index, mask=m)) - return methane_xr - - -def default_vis(): - return { - "colormap": interpolated_cmap_from_colors(NDVI_CMAP_COLORS, NDVI_CMAP_INTERVALS), - "range": (-1, 1), - } - - -class CallbackBuilder: - custom_indices: Dict[str, Callable[..., xr.DataArray]] = { - "methane": compute_methane, - "ndre": compute_ndre, - "pri": compute_pri, - "reci": compute_reci, - } - custom_index_bands: Dict[str, List[str]] = { - "methane": ["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B12"], - "ndre": ["RE1", "N"], - "pri": ["R", "N"], - "reci": ["RE1", "N"], - } - index_vis: Dict[str, Dict[str, Any]] = defaultdict( - default_vis, {"methane": {"colormap": get_cmap("gray"), "range": (-0.2, 0.2)}} - ) - - def __init__(self, index: str): - # the indices ndvi, evi, msevi and ndmi are now computed with spyndex - if ( - index not in spyndex.indices - and index.upper() not in spyndex.indices - and index not in self.custom_indices - ): - raise ValueError( - f"Operation compute_index called with unknown index {index}. " - f"Available indices are {list(spyndex.indices) + list(self.custom_indices.keys())}." - ) - self.tmp_dir = TemporaryDirectory() - if index in self.custom_indices.keys(): - self.name = index - self.index_fn = self.custom_indices[index] - else: - self.name = {i.upper(): i for i in spyndex.indices}[index.upper()] - - def check_raster_bands(self, raster: Raster, bands: List[str]) -> None: - if not set(bands).issubset(set(raster.bands)): - raise ValueError( - f"Raster does not contain bands {bands} needed to compute index {self.name}. " - f"Bands in input raster are: {', '.join(raster.bands.keys())}." - ) - - def check_constants(self, constants: Dict[str, Any]) -> None: - unsupported_constants = [] - for k, v in constants.items(): - if v is None or not isinstance(v, (int, float)): - unsupported_constants.append(k) - - if unsupported_constants: - raise ValueError( - f"Index {self.name} still not supported. " - "Spyndex does not define a default int or float value " - f"for constants {unsupported_constants}." - ) - - def __call__(self): - def index_callback(raster: Raster) -> Dict[str, Raster]: - output_dir = self.tmp_dir.name - - # compute index using spyndex - if self.name in spyndex.indices: - bands_spyndex = list(set(spyndex.indices[self.name].bands) - set(spyndex.constants)) - # TODO allow user to use different values for the constants - const_spyndex = { - i: spyndex.constants[i].default - for i in set(spyndex.indices[self.name].bands).intersection( - set(spyndex.constants) - ) - } - self.check_constants(const_spyndex) - self.check_raster_bands(raster, bands_spyndex) - raster_da = load_raster( - raster, bands=cast(List[str], bands_spyndex), use_geometry=True - ) - # Convert to reflectance values, add minimum value to avoid division by zero - raster_da = (raster_da.astype(np.float32) * raster.scale + raster.offset).clip( - min=1e-6 - ) - params = {j: raster_da[i] for i, j in enumerate(bands_spyndex)} - params.update(const_spyndex) - idx = spyndex.computeIndex(index=self.name, params=params) - index_raster = save_raster_from_ref(idx, output_dir, raster) - index_raster.bands = {self.name: 0} - else: - self.check_raster_bands(raster, self.custom_index_bands[self.name]) - index_raster = compute_index( - raster, - self.custom_index_bands[self.name], - self.index_fn, - self.name, - output_dir, - ) - - vis_dict = {"bands": [0], **self.index_vis[self.name]} - index_raster.assets.append(json_to_asset(vis_dict, output_dir)) - - return {"index": index_raster} - - return index_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_index/test_index.py b/ops/compute_index/test_index.py deleted file mode 100644 index 30eb4c72..00000000 --- a/ops/compute_index/test_index.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import List, Tuple, cast - -import numpy as np -import pytest -import rioxarray as rio -import spyndex -import xarray as xr -from index import compute_methane, compute_ndre, compute_reci -from shapely import geometry as shpg - -from vibe_core.data import Raster -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.raster import save_raster_to_asset - -YAML_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "compute_index.yaml") - - -# code originally on index.py. now we are using spyndex -def compute_ndvi(bands: xr.DataArray) -> xr.DataArray: - red, nir = bands - ndvi: xr.DataArray = (nir - red) / (nir + red) - ndvi.rio.write_nodata(100, encoded=True, inplace=True) - return ndvi - - -# code originally on index.py. now we are using spyndex -def compute_evi(bands: xr.DataArray) -> xr.DataArray: - blue, red, nir = bands - evi: xr.DataArray = 2.5 * (nir - red) / (nir + 6 * red - 7.5 * blue + 1) - evi.rio.write_nodata(100, encoded=True, inplace=True) - return evi - - -# code originally on index.py. now we are using spyndex -def compute_msavi(bands: xr.DataArray) -> xr.DataArray: - """ - Modified Soil Adjusted Vegetation Index. - This is technically MSAVI_2 which is frequently used as MSAVI - """ - red, nir = bands - disc = (2 * nir + 1) ** 2 - 8 * (nir - red) - msavi: xr.DataArray = (2 * nir + 1 - disc**0.5) / 2.0 - msavi.rio.write_nodata(100, encoded=True, inplace=True) - return msavi - - -# code originally on index.py. now we are using spyndex -def compute_ndmi(bands: xr.DataArray) -> xr.DataArray: - """ - Normalized Difference Moisture Index - """ - nir, swir16 = bands - ndmi: xr.DataArray = (nir - swir16) / (nir + swir16) - ndmi.rio.write_nodata(100, encoded=True, inplace=True) - return ndmi - - -def compute_ndwi(bands: xr.DataArray) -> xr.DataArray: - g, n = bands - return spyndex.indices.NDWI.compute(G=g, N=n) - - -def compute_lswi(bands: xr.DataArray) -> xr.DataArray: - n, s1 = bands - return spyndex.indices.LSWI.compute(N=n, S1=s1) - - -def compute_nbr(bands: xr.DataArray) -> xr.DataArray: - n, s2 = bands - return spyndex.indices.NBR.compute(N=n, S2=s2) - - -true_index_fn = { - "ndvi": compute_ndvi, - "evi": compute_evi, - "msavi": compute_msavi, - "ndmi": compute_ndmi, - "ndwi": compute_ndwi, - "methane": compute_methane, - "ndre": compute_ndre, - "reci": compute_reci, - "LSWI": compute_lswi, - "NBR": compute_nbr, -} - - -def create_fake_raster( - tmp_dir_name: str, bands: List[str], y: int, x: int -) -> Tuple[Raster, xr.DataArray]: - nbands = len(bands) - fake_data = np.random.random((nbands, y, x)).astype(np.float32) - fake_da = xr.DataArray( - fake_data, - coords={"bands": np.arange(nbands), "x": np.linspace(0, 1, x), "y": np.linspace(0, 1, y)}, - dims=["bands", "y", "x"], - ) - fake_da.rio.write_crs("epsg:4326", inplace=True) - - asset = save_raster_to_asset(fake_da, tmp_dir_name) - - return ( - Raster( - id="fake_id", - time_range=(datetime(2023, 1, 1), datetime(2023, 1, 1)), - geometry=shpg.mapping(shpg.box(*fake_da.rio.bounds())), - assets=[asset], - bands={j: i for i, j in enumerate(bands)}, - ), - fake_da, - ) - - -@pytest.fixture -def tmp_dir(): - _tmp_dir = TemporaryDirectory() - yield _tmp_dir.name - _tmp_dir.cleanup() - - -@pytest.mark.parametrize( - "bands, index, should_fail", - [ - (["R", "N"], "ndvi", False), - (["B", "R", "N"], "evi", False), - (["R", "N"], "msavi", False), - (["N", "S1"], "ndmi", False), - (["RE1", "N"], "ndre", False), - (["RE1", "N"], "reci", False), - ( - ["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B12"], - "methane", - False, - ), - (["G", "N"], "ndwi", False), - (["N"], "LSWI", True), - (["N", "S1"], "LSWI", False), - (["N", "S2"], "NBR", False), - ], -) -def test_op(bands: List[str], index: str, should_fail: bool, tmp_dir: str): - raster, da = create_fake_raster(tmp_dir, bands, 20, 20) - op_tester = OpTester(YAML_PATH) - parameters = {"index": index} - op_tester.update_parameters(parameters) - try: - output = cast(Raster, op_tester.run(raster=raster)["index"]) - except ValueError as e: - if not should_fail: - raise ValueError(f"this should not have failed. {e}") from e - return - output_array = rio.open_rasterio(output.raster_asset.path_or_url).values # type: ignore - true_array = true_index_fn[index](da).values - assert np.all(np.isclose(output_array, true_array)) # type: ignore diff --git a/ops/compute_irrigation_probability/compute_irrigation_probability.py b/ops/compute_irrigation_probability/compute_irrigation_probability.py deleted file mode 100644 index 04f0f9fb..00000000 --- a/ops/compute_irrigation_probability/compute_irrigation_probability.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Dict - -import numpy as np -import xarray as xr -from sklearn.linear_model import LogisticRegression -from sklearn.preprocessing import StandardScaler - -from vibe_core.data import AssetVibe, LandsatRaster, Raster, gen_guid -from vibe_lib.raster import load_raster_match - - -# Define a function for ngi, egi, and lst data treatment -def preprocess_raster_values(raster: xr.DataArray): - raster_values = raster.values.ravel() - - # Handle NaN and Inf values - raster_values[np.isnan(raster_values)] = -9999 - raster_values[np.isinf(raster_values)] = -9999 - - # Replace -9999 with 0 - raster_values = np.where(raster_values == -9999, 0, raster_values) - - return raster_values - - -class CallbackBuilder: - def __init__(self, coef_ngi: float, coef_egi: float, coef_lst: float, intercept: float): - # Create temporary directory to store our new data, which will be transfered to our storage - # automatically when the op is run in a workflow - self.tmp_dir = TemporaryDirectory() - - # Set Parameters - self.coef_ngi = coef_ngi - self.coef_egi = coef_egi - self.coef_lst = coef_lst - self.intercept = intercept - - def __call__(self): - def callback( - landsat_raster: LandsatRaster, - ngi: Raster, - egi: Raster, - lst: Raster, - cloud_water_mask_raster: Raster, - ) -> Dict[str, Raster]: - # Get cloud water mask layer - cloud_water_mask = load_raster_match(cloud_water_mask_raster, landsat_raster)[0] - - # Get ngi, egi, and lst layers - ngi1 = load_raster_match(ngi, landsat_raster)[0] - egi1 = load_raster_match(egi, landsat_raster)[0] - lst1 = load_raster_match(lst, landsat_raster)[0] - - ngi_values = preprocess_raster_values(ngi1) - egi_values = preprocess_raster_values(egi1) - lst_values = preprocess_raster_values(lst1) - - # Reduce dimension - x = np.stack((ngi_values, egi_values, lst_values), axis=1) - x = x.astype(float) - - # Apply scaler - scaler = StandardScaler() - x_scaled = scaler.fit_transform(x) - - # Create a logistic regression model - model = LogisticRegression() - - # Set the coefficients and intercept - coef_ = np.array([[self.coef_ngi, self.coef_ngi, self.coef_lst]]) - intercept_ = [self.intercept] - classes_ = np.array(["1", "2"]) - - # Assign the coefficients and intercept to the model - model.coef_ = coef_ - model.intercept_ = intercept_ - model.classes_ = classes_ - - # Make predictions using the model - predicted_labels = model.predict_proba(x_scaled)[:, 0] - - # Assign shape - predicted_labels = predicted_labels.reshape(cloud_water_mask.shape) - - # Treat the result with cloud water mask - predicted_labels = predicted_labels * cloud_water_mask - - # Create a new DataArray with predicted_labels and the same dimensions as ngi - predicted_labels_xr = xr.DataArray( - predicted_labels, - dims=cloud_water_mask.dims, - coords=cloud_water_mask.coords, - ) - - # Save the DataArray to a raster file - filepath = os.path.join(self.tmp_dir.name, "irrigation_probability.tif") - predicted_labels_xr.rio.to_raster(filepath) - irr_prob_asset = AssetVibe(reference=filepath, type="image/tiff", id=gen_guid()) - return { - "irrigation_probability": Raster.clone_from( - landsat_raster, - id=gen_guid(), - assets=[irr_prob_asset], - bands={"irrigation_probability": 0}, - ) - } - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_irrigation_probability/compute_irrigation_probability.yaml b/ops/compute_irrigation_probability/compute_irrigation_probability.yaml deleted file mode 100644 index 6857f14b..00000000 --- a/ops/compute_irrigation_probability/compute_irrigation_probability.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: compute_irrigation_probability -inputs: - landsat_raster: LandsatRaster - cloud_water_mask_raster: Raster - ngi: Raster - egi: Raster - lst: Raster -output: - irrigation_probability: Raster -parameters: - coef_ngi: -0.50604148 - coef_egi: -0.93103156 - coef_lst: -0.14612046 - intercept: 1.99036986 -entrypoint: - file: compute_irrigation_probability.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - coef_ngi - - coef_egi - - coef_lst - - intercept -description: - short_description: Computes irrigation probability values for each pixel in raster using optimized logistic regression - model with ngi, egi, and lst rasters as input diff --git a/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py b/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py deleted file mode 100644 index ad7d2c38..00000000 --- a/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Dict - -import rioxarray as rio - -from vibe_core.data import AssetVibe, LandsatRaster, Raster, gen_guid -from vibe_lib.raster import load_raster, load_raster_match - -# Scale and Offset Constants of LST and Rest of the Landsat Bands -SCALE_LST = 0.00341802 -OFFSET_LST = 149 -SCALE_BAND = 0.0000275 -OFFSET_BAND = 0.2 - - -class CallbackBuilder: - def __init__(self): - # Create temporary directory to store our new data, which will be transfered to our storage - # automatically when the op is run in a workflow - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def callback( - landsat_raster: LandsatRaster, - ndvi_raster: Raster, - evaporative_fraction: Raster, - cloud_water_mask_raster: Raster, - ) -> Dict[str, Raster]: - # LAYERS PREPARATION - lst = rio.open_rasterio(landsat_raster.raster_asset.path_or_url)[ - landsat_raster.bands["lwir11"] - ] - - # Apply scale and offset value to the band lst band - lst = load_raster(landsat_raster, bands=["lwir11"])[0] - lst = (lst * SCALE_LST) + OFFSET_LST - - # Apply scale and offset value to the band lst band - green = rio.open_rasterio(landsat_raster.raster_asset.path_or_url)[ - landsat_raster.bands["green"] - ] - green = (green * SCALE_BAND) - OFFSET_BAND - - # Apply scale and offset value to the band lst band - nir = rio.open_rasterio(landsat_raster.raster_asset.path_or_url)[ - landsat_raster.bands["nir"] - ] - nir = (nir * SCALE_BAND) - OFFSET_BAND - - # Get ndvi index - ndvi = load_raster_match(ndvi_raster, landsat_raster)[0] - - # Get evaporative fraction raster - evap_fraxn = load_raster_match(evaporative_fraction, landsat_raster)[0] - - # Get cloud water mask raster - cloud_water_mask = load_raster_match(cloud_water_mask_raster, landsat_raster)[0] - - # Calculate Green Index - gi = nir / green - - # Calculate ngi layer from Green Index and ndvi index - ngi = ndvi * gi - - # Calculate egi layer from Green Index and evaporative fraction layer - egi = evap_fraxn / gi - - # Apply cloud water mask to ngi, egi, and lst layers - ngi = ngi * cloud_water_mask - egi = egi * cloud_water_mask - lst = lst * cloud_water_mask - - # Save the DataArray to a raster file - filepath = os.path.join(self.tmp_dir.name, "ngi.tif") - ngi.rio.to_raster(filepath) - ngi_asset = AssetVibe(reference=filepath, type="image/tiff", id=gen_guid()) - - filepath1 = os.path.join(self.tmp_dir.name, "egi.tif") - egi.rio.to_raster(filepath1) - egi_asset = AssetVibe(reference=filepath1, type="image/tiff", id=gen_guid()) - - filepath2 = os.path.join(self.tmp_dir.name, "lst.tif") - lst.rio.to_raster(filepath2) - lst_asset = AssetVibe(reference=filepath2, type="image/tiff", id=gen_guid()) - - return { - "ngi": Raster.clone_from( - landsat_raster, id=gen_guid(), assets=[ngi_asset], bands={"ngi": 0} - ), - "egi": Raster.clone_from( - landsat_raster, id=gen_guid(), assets=[egi_asset], bands={"egi": 0} - ), - "lst": Raster.clone_from( - landsat_raster, id=gen_guid(), assets=[lst_asset], bands={"lst": 0} - ), - } - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.yaml b/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.yaml deleted file mode 100644 index 40b6631f..00000000 --- a/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: compute_ngi_egi_layers -inputs: - landsat_raster: LandsatRaster - ndvi_raster: Raster - cloud_water_mask_raster: Raster - evaporative_fraction: Raster -output: - ngi: Raster - egi: Raster - lst: Raster -entrypoint: - file: compute_ngi_egi_layers.py - callback_builder: CallbackBuilder -dependencies: -parameters: -description: - short_description: Computes NGI, EGI, and LST layers from landsat bands, ndvi layer, cloud water mask layer and evaporative fraction layer diff --git a/ops/compute_onnx/compute_onnx.py b/ops/compute_onnx/compute_onnx.py deleted file mode 100644 index 25d4fe66..00000000 --- a/ops/compute_onnx/compute_onnx.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Dict, List, Optional, Union - -import onnxruntime as ort -from rasterio.enums import Resampling - -from vibe_core.data import AssetVibe, Raster, gen_guid -from vibe_core.data.rasters import RasterChunk, RasterSequence -from vibe_lib.raster import resample_raster -from vibe_lib.spaceeye.chip import Dims, StackOnChannelsChipDataset, get_loader, predict_chips - -ROOT_DIR = "/mnt/onnx_resources/" - - -class CallbackBuilder: - def __init__( - self, - model_file: str, - window_size: int, - overlap: float, - batch_size: int, - num_workers: int, - nodata: Union[float, int], - skip_nodata: bool, - resampling: str = "bilinear", - root_dir: str = ROOT_DIR, - downsampling: int = 1, - ): - self.tmp_dir = TemporaryDirectory() - self.downsampling = downsampling - if model_file is None or not os.path.exists(os.path.join(root_dir, model_file)): - raise ValueError(f"Model file '{model_file}' does not exist.") - self.root_dir = root_dir - self.model_file = model_file - self.window_size = window_size - self.overlap = overlap - self.batch_size = batch_size - self.num_workers = num_workers - self.nodata = nodata - self.skip_nodata = skip_nodata - self.resampling = Resampling[resampling] - - def __call__(self): - def compute_onnx( - input_raster: Union[Raster, RasterSequence, List[Raster]], - chunk: Optional[RasterChunk] = None, - ) -> Dict[str, Union[Raster, RasterChunk]]: - if self.downsampling < 1: - raise ValueError( - f"Downsampling must be equal or larger than 1, found {self.downsampling}" - ) - - if isinstance(input_raster, RasterSequence): - input = [ - Raster.clone_from(input_raster, gen_guid(), assets=[i]) - for i in input_raster.get_ordered_assets() - ] - elif isinstance(input_raster, list): - input = input_raster - else: - input = [input_raster] - - model_path = os.path.join(self.root_dir, self.model_file) - model = ort.InferenceSession(model_path) - chip_size = self.window_size - step_size = int(chip_size * (1 - self.overlap)) - dataset = StackOnChannelsChipDataset( - [[i] for i in input], - chip_size=Dims(chip_size, chip_size, 1), - step_size=Dims(step_size, step_size, 1), - downsampling=self.downsampling, - nodata=self.nodata, - geometry_or_chunk=chunk, - ) - - dataloader = get_loader(dataset, self.batch_size, self.num_workers) - pred_filepaths = predict_chips( - model, dataloader, self.tmp_dir.name, skip_nodata=self.skip_nodata - ) - assert ( - len(pred_filepaths) == 1 - ), f"Expected one prediction file, found: {len(pred_filepaths)}" - pred_filepath = resample_raster( - pred_filepaths[0], - self.tmp_dir.name, - dataset.width, - dataset.height, - dataset.transform, - self.resampling, - ) - asset = AssetVibe(reference=pred_filepath, type="image/tiff", id=gen_guid()) - if chunk is None: - res = Raster.clone_from(input[0], id=gen_guid(), assets=[asset]) - else: - res = RasterChunk.clone_from( - chunk, id=gen_guid(), geometry=chunk.geometry, assets=[asset] - ) - - return {"output_raster": res} - - return compute_onnx - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_onnx/compute_onnx.yaml b/ops/compute_onnx/compute_onnx.yaml deleted file mode 100644 index fe5a792f..00000000 --- a/ops/compute_onnx/compute_onnx.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: compute_onnx -inputs: - input_raster: Raster -output: - output_raster: Raster -parameters: - downsampling: 1 - model_file: - window_size: 512 - overlap: .25 - batch_size: 1 - num_workers: 0 - nodata: 100 - skip_nodata: true - resampling: bilinear -entrypoint: - file: compute_onnx.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - model_file - - downsampling - - window_size - - overlap - - resampling - - skip_nodata diff --git a/ops/compute_onnx/compute_onnx_from_chunks.yaml b/ops/compute_onnx/compute_onnx_from_chunks.yaml deleted file mode 100644 index db19a2d7..00000000 --- a/ops/compute_onnx/compute_onnx_from_chunks.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: compute_onnx_from_chunks -inputs: - input_raster: RasterSequence - chunk: RasterChunk -output: - output_raster: RasterChunk -parameters: - model_file: - window_size: 512 - overlap: .0 - batch_size: 1 - num_workers: 0 - nodata: 100 - skip_nodata: True -entrypoint: - file: compute_onnx.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - model_file - - window_size - - overlap - - skip_nodata -description: - short_description: Runs the onnx model across chunks of the input rasters. \ No newline at end of file diff --git a/ops/compute_onnx/compute_onnx_from_sequence.yaml b/ops/compute_onnx/compute_onnx_from_sequence.yaml deleted file mode 100644 index fd4cd194..00000000 --- a/ops/compute_onnx/compute_onnx_from_sequence.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: compute_onnx_from_sequence -inputs: - input_raster: RasterSequence -output: - output_raster: Raster -parameters: - downsampling: 1 - model_file: - window_size: 512 - overlap: .0 - batch_size: 1 - num_workers: 0 - nodata: 100 - skip_nodata: True - resampling: bilinear -entrypoint: - file: compute_onnx.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - model_file - - downsampling - - window_size - - overlap - - resampling - - skip_nodata -description: - short_description: Processes a sequence of rasters with an ONNX model. \ No newline at end of file diff --git a/ops/compute_onnx/test_compute_onnx.py b/ops/compute_onnx/test_compute_onnx.py deleted file mode 100644 index 148571f7..00000000 --- a/ops/compute_onnx/test_compute_onnx.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from datetime import datetime, timedelta -from tempfile import TemporaryDirectory -from typing import List, Tuple, Union, cast - -import numpy as np -import pytest -import rioxarray -import torch -import xarray as xr -from numpy.typing import NDArray -from shapely import geometry as shpg -from torch import nn -from torch.nn.parameter import Parameter - -from vibe_core.data import AssetVibe, Raster -from vibe_core.data.core_types import gen_guid -from vibe_core.data.rasters import RasterSequence -from vibe_dev.testing.op_tester import OpTester - -YAML_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "compute_onnx.yaml") -YAML_FLIST_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "compute_onnx_from_sequence.yaml" -) -PY_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "compute_onnx.py") - - -class IdentityNetwork(nn.Module): - def __init__(self, channels: int): - super(IdentityNetwork, self).__init__() - self.c1 = nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=1, bias=False) - eye = np.eye(channels).reshape((channels, channels, 1, 1)).astype(np.float32) - self.c1.weight = Parameter(torch.from_numpy(eye)) - - def forward(self, x: torch.Tensor): - return self.c1(x) - - -class DummyCloud(nn.Module): - def __init__(self, channels: int, kernel_size: int = 3): - super(DummyCloud, self).__init__() - self.c1 = nn.Conv2d( - in_channels=channels, - out_channels=1, - kernel_size=kernel_size, - padding=(kernel_size - 1) // 2, - bias=False, - ) - w = np.ones((1, channels, kernel_size, kernel_size)).astype(np.float32) - self.c1.weight = Parameter(torch.from_numpy(w)) - self.p = nn.Sigmoid() - - def forward(self, x: torch.Tensor): - return self.p(self.c1(x)) - - -def create_onnx_model(nn: nn.Module, tmp_dir_name: str, channels: int) -> str: - dims = (1, channels, 3, 3) # any value for batch size, y, x should work here - data = np.random.random(dims).astype(np.float32) - - name = f"{nn.__class__.__name__}.onnx" - - torch.onnx.export( - nn, - torch.Tensor(data), - os.path.join(tmp_dir_name, name), - input_names=["in"], - output_names=["out"], - dynamic_axes={"in": {0: "batch", 2: "y", 3: "x"}, "out": {0: "batch", 2: "y", 3: "x"}}, - ) - - return name - - -def create_fake_raster( - tmp_dir_name: str, bands: int, y: int, x: int, delta: int = 0 -) -> Tuple[Raster, NDArray[np.float32]]: - fake_data = np.random.random((bands, y, x)).astype(np.float32) - fake_da = xr.DataArray( - fake_data, - coords={"bands": np.arange(bands), "x": np.linspace(0, 1, x), "y": np.linspace(0, 1, y)}, - dims=["bands", "y", "x"], - ) - path = os.path.join(tmp_dir_name, f"{gen_guid()}.tif") - fake_da.rio.to_raster(path) - - asset = AssetVibe( - reference=path, - type=mimetypes.types_map[".tif"], - id="fake_asset", - ) - - d = datetime(2022, 1, 1) + timedelta(days=delta) - - return ( - Raster( - id="fake_id", - time_range=(d, d), - geometry=shpg.mapping(shpg.box(*fake_da.rio.bounds())), - assets=[asset], - bands={str(i): i for i in range(bands)}, - ), - fake_data, - ) - - -@pytest.fixture -def tmp_dir(): - _tmp_dir = TemporaryDirectory() - yield _tmp_dir.name - _tmp_dir.cleanup() - - -@pytest.mark.parametrize( - "bands, y, x", - [ - ([3, 2, 1], 512, 512), - ([2, 2, 2], 1024, 1024), - ([1], 514, 513), - (3, 512, 512), - (2, 1024, 1024), - ], -) -def test_op(bands: Union[int, List[int]], y: int, x: int, tmp_dir: str): - model_class_list = [IdentityNetwork, DummyCloud] - channels = np.sum(bands).astype(int) - - model_list = [m(channels) for m in model_class_list] - onnx_list = [create_onnx_model(m, tmp_dir, channels) for m in model_list] - if isinstance(bands, list): - yaml = YAML_FLIST_PATH - rasters = [] - arrays = [] - for i, n in enumerate(bands): - raster, array = create_fake_raster(tmp_dir, n, y, x, delta=i) - rasters.append(raster) - arrays.append(array) - raster = RasterSequence.clone_from(rasters[0], gen_guid(), []) - for r in rasters: - raster.add_item(r) - array = np.concatenate(arrays, axis=0) - else: - yaml = YAML_PATH - raster, array = create_fake_raster(tmp_dir, bands, y, x) - - op_tester = OpTester(yaml) - for model, onnx in zip(model_list, onnx_list): - parameters = {"root_dir": tmp_dir, "model_file": onnx, "overlap": 0.1} - op_tester.update_parameters(parameters) - output_data = cast(Raster, op_tester.run(input_raster=raster)["output_raster"]) - output_array = rioxarray.open_rasterio(output_data.raster_asset.path_or_url).values # type: ignore - true_array = model.forward(torch.from_numpy(array)).detach().numpy() - assert np.all(np.isclose(output_array, true_array)) # type: ignore diff --git a/ops/compute_onnx/test_compute_onnx_chunk.py b/ops/compute_onnx/test_compute_onnx_chunk.py deleted file mode 100644 index f487073c..00000000 --- a/ops/compute_onnx/test_compute_onnx_chunk.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timedelta -from pathlib import Path -from typing import List, Tuple, cast - -import numpy as np -import pytest -import rioxarray -import torch -import xarray as xr -from numpy.typing import NDArray -from shapely import geometry as shpg -from torch import nn - -from vibe_core.data import DataVibe, Raster, RasterChunk, RasterSequence -from vibe_core.data.core_types import gen_guid -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.raster import save_raster_to_asset - -N_SAMPLES = 100 -STEP_Y = 3 -STEP_X = 3 -WINDOW_SIZE = 3 - -HERE = os.path.dirname(os.path.abspath(__file__)) -CHUNK_RASTER_YAML = os.path.join(HERE, "..", "chunk_raster", "chunk_raster.yaml") -LIST_TO_SEQ_YAML = os.path.join(HERE, "..", "list_to_sequence", "list_to_sequence.yaml") -COMPUTE_ONNX_YAML = os.path.join(HERE, "compute_onnx_from_chunks.yaml") -COMBINE_CHUNKS_YAML = os.path.join(HERE, "..", "combine_chunks", "combine_chunks.yaml") - - -class TestModel(nn.Module): - __test__ = False - - def __init__(self, n: int): - super(TestModel, self).__init__() - self.n = n - A = np.stack((np.arange(n), np.ones(n))).T - self.A = torch.from_numpy(A) - self.ATAinv = torch.from_numpy(np.linalg.inv(A.T @ A)) - - def forward(self, x: torch.Tensor): - x = torch.squeeze(x) - B = torch.reshape(x, (self.n, -1)) - ATB = torch.sum(self.A.reshape(self.n, 2, 1) * B.reshape(self.n, 1, -1), dim=0) - beta_hat = (self.ATAinv @ ATB)[0, :] - alpha = (self.ATAinv @ ATB)[1, :] - return torch.stack((beta_hat.reshape(x.shape[1:]), alpha.reshape(x.shape[1:])))[ - None, :, :, : - ] - - -def create_list_fake_raster( - tmp_dir_name: str, t: int, y: int, x: int -) -> Tuple[List[Raster], NDArray[np.float32]]: - def fake_cube(sx: int, sy: int, sz: int): - res = [] - for i in range(sy * sx): - h = i / (sy * sx - 1) - res.append(np.linspace(0, h * (sz - 1), sz)) - - res = np.stack(res) - return res.reshape((sy, sx, -1)).transpose((2, 0, 1)) - - sf = fake_cube(x, y, t) - res = [] - for i in range(t): - fake_da = xr.DataArray( - sf[i : i + 1, :, :], - coords={"bands": [0], "x": np.linspace(0, 1, x), "y": np.linspace(0, 1, y)}, - dims=["bands", "y", "x"], - ) - asset = save_raster_to_asset(fake_da, tmp_dir_name) - - d = datetime(2022, 1, 1) + timedelta(days=i) - res.append( - Raster( - id=gen_guid(), - time_range=(d, d), - geometry=shpg.mapping(shpg.box(*fake_da.rio.bounds())), - bands={"band": 0}, - assets=[asset], - ) - ) - - return res, sf - - -@pytest.mark.parametrize("y, x", [(6, 6), (3, 3), (6, 3), (3, 6), (8, 3), (8, 8), (10, 12)]) -def test_op(y: int, x: int, tmp_path: Path): - raster_list, input_model = create_list_fake_raster(str(tmp_path.absolute()), N_SAMPLES, y, x) - model = TestModel(N_SAMPLES) - model_path = os.path.join(str(tmp_path.absolute()), "model.onnx") - dummy = np.random.random((1, N_SAMPLES, STEP_Y, STEP_X)).astype(np.float32) - torch.onnx.export( - model, - torch.from_numpy(dummy), - model_path, - input_names=["in"], - output_names=["out"], - dynamic_axes={"in": {0: "batch", 2: "y", 3: "x"}, "out": {0: "batch", 2: "y", 3: "x"}}, - ) - - chunk_raster_op = OpTester(CHUNK_RASTER_YAML) - chunk_raster_op.update_parameters({"step_y": STEP_Y, "step_x": STEP_X}) - chunked_rasters = cast( - List[RasterChunk], - # pyright misidentifies types here - chunk_raster_op.run(rasters=cast(List[DataVibe], raster_list))[ # type: ignore - "chunk_series" - ], - ) - - list_to_raster_op = OpTester(LIST_TO_SEQ_YAML) - raster_seq = cast( - RasterSequence, - # pyright misidentifies types here - list_to_raster_op.run(list_rasters=cast(List[DataVibe], raster_list))[ # type: ignore - "rasters_seq" - ], - ) - - out_chunks = [] - ops = [] - for chunk in chunked_rasters: - compute_onnx_op = OpTester(COMPUTE_ONNX_YAML) - compute_onnx_op.update_parameters( - { - "root_dir": HERE, - "model_file": model_path, - "window_size": WINDOW_SIZE, - "downsampling": 1, - "overlap": 0, - } - ) - ops.append(compute_onnx_op) - out_chunks.append( - cast( - RasterChunk, - compute_onnx_op.run(input_raster=cast(DataVibe, raster_seq), chunk=chunk)[ - "output_raster" - ], - ) - ) - - combine_chunks_op = OpTester(COMBINE_CHUNKS_YAML) - output_data = cast(Raster, combine_chunks_op.run(chunks=out_chunks)["raster"]) - output_array = np.squeeze( - rioxarray.open_rasterio(output_data.raster_asset.path_or_url).values # type: ignore - ) - - pred_torch = model.forward(torch.from_numpy(input_model[None, :, :, :].astype(np.float32))) - pred = np.squeeze(pred_torch.detach().numpy()) - - assert np.all(np.isclose(output_array, pred)) diff --git a/ops/compute_pixel_count/compute_pixel_count.py b/ops/compute_pixel_count/compute_pixel_count.py deleted file mode 100644 index a6910254..00000000 --- a/ops/compute_pixel_count/compute_pixel_count.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict - -import numpy as np -import rasterio -from numpy._typing import NDArray -from rasterio.mask import mask -from shapely import geometry as shpg - -from vibe_core.data import Raster, RasterPixelCount, gen_guid -from vibe_core.data.core_types import AssetVibe, BaseGeometry - -UNIQUE_VALUES_COLUMN = "unique_values" -COUNTS_COLUMN = "counts" - - -def read_data(raster: Raster, geom: BaseGeometry) -> NDArray[Any]: - with rasterio.open(raster.raster_asset.path_or_url) as src: - raw_data, _ = mask( - src, - [geom], - crop=True, - filled=False, - ) - - # We are counting the number of pixels - # for all the raster bands - return raw_data.compressed() # type: ignore - - -def calculate_unique_values(data: NDArray[Any]) -> NDArray[Any]: - unique_values, counts = np.unique(data, return_counts=True) - return np.column_stack((unique_values, counts)) - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def callback(raster: Raster) -> Dict[str, RasterPixelCount]: - data = read_data(raster, shpg.shape(raster.geometry)) - stack_data = calculate_unique_values(data) - guid = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") - - # Save the data to a CSV file - np.savetxt( - filepath, - stack_data, - delimiter=",", - fmt="%d", - header=f"{UNIQUE_VALUES_COLUMN},{COUNTS_COLUMN}", - comments="", - ) - - raster_pixel_count = RasterPixelCount.clone_from( - raster, - id="pixel_count_" + raster.id, - assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], - ) - - return {"pixel_count": raster_pixel_count} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_pixel_count/compute_pixel_count.yaml b/ops/compute_pixel_count/compute_pixel_count.yaml deleted file mode 100644 index 82b290b3..00000000 --- a/ops/compute_pixel_count/compute_pixel_count.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: compute_pixels_count -inputs: - raster: Raster -output: - pixel_count: RasterPixelCount -parameters: -entrypoint: - file: compute_pixel_count.py - callback_builder: CallbackBuilder -description: - short_description: Counts the pixel values in the input raster. - long_description: - Receives a raster and returns a RasterPixelCount which - stores an asset with the count of pixel values in the raster. - sources: - raster: Input raster. - sinks: - pixel_count: Counts of pixel values. diff --git a/ops/compute_pixel_count/test_compute_pixel_count.py b/ops/compute_pixel_count/test_compute_pixel_count.py deleted file mode 100644 index 5016dc5f..00000000 --- a/ops/compute_pixel_count/test_compute_pixel_count.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from typing import cast - -import numpy as np -import pandas as pd -import pytest -import shapely.geometry as shpg -import xarray as xr -from compute_pixel_count import COUNTS_COLUMN, UNIQUE_VALUES_COLUMN - -from vibe_core.data import Raster, RasterPixelCount -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.raster import save_raster_to_asset - -NBANDS = 3 -FAKE_RASTER_DATA = np.array([[0, 1, 2], [0, 1, 2], [0, 1, 2]]).astype(np.float32) -CONFIG_PATH = os.path.join(os.path.dirname(__file__), "compute_pixel_count.yaml") - -EXPECTED_UNIQUE_VALUES = [0, 1, 2] -# We are using 3 bands, so we expect 9 counts for each unique value -EXPECTED_COUNTS = [9, 9, 9] - - -@pytest.fixture -def fake_raster(tmpdir: str): - x = 3 - y = 3 - - fake_data = FAKE_RASTER_DATA - fake_data = [fake_data] * NBANDS - - fake_da = xr.DataArray( - fake_data, - coords={"bands": np.arange(NBANDS), "x": np.linspace(0, 1, x), "y": np.linspace(0, 1, y)}, - dims=["bands", "y", "x"], - ) - - fake_da.rio.write_crs("epsg:4326", inplace=True) - asset = save_raster_to_asset(fake_da, tmpdir) - - return Raster( - id="fake_id", - time_range=(datetime(2023, 1, 1), datetime(2023, 1, 1)), - geometry=shpg.mapping(shpg.box(*fake_da.rio.bounds())), - assets=[asset], - bands={j: i for i, j in enumerate(["B1", "B2", "B3"])}, - ) - - -def test_compute_pixel_count(fake_raster: Raster): - op = OpTester(CONFIG_PATH) - - output = op.run(raster=fake_raster) - assert output - assert "pixel_count" in output - - pixel_count = cast(RasterPixelCount, output["pixel_count"]) - assert len(pixel_count.assets) == 1 - - asset_path = pixel_count.assets[0].path_or_url - assert os.path.exists(asset_path) - - # Read the CSV file - df = pd.read_csv(asset_path) - - # Check the columns - assert UNIQUE_VALUES_COLUMN in df.columns # type: ignore - assert COUNTS_COLUMN in df.columns # type: ignore - - # Check the values - assert np.array_equal(df[UNIQUE_VALUES_COLUMN].values, EXPECTED_UNIQUE_VALUES) # type: ignore - assert np.array_equal(df[COUNTS_COLUMN].values, EXPECTED_COUNTS) # type: ignore diff --git a/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py b/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py deleted file mode 100644 index e83fdacd..00000000 --- a/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from tempfile import TemporaryDirectory -from typing import Any, Dict, List - -import torch -import torch.nn.functional as F -from numpy.typing import NDArray - -from vibe_core.data import Raster, gen_guid -from vibe_lib.raster import ( - RGBA, - interpolated_cmap_from_colors, - json_to_asset, - load_raster, - load_raster_match, - save_raster_to_asset, -) - -CMAP_INTERVALS: List[float] = [0.0, 4000.0] - -CMAP_COLORS: List[RGBA] = [ - RGBA(0, 0, 0, 255), - RGBA(255, 255, 255, 255), -] - -LOGGER = logging.getLogger(__name__) - - -def run_average_elevation( - dem: NDArray[Any], cdl: NDArray[Any], window_size: int = 41 -) -> NDArray[Any]: - kernel = torch.ones((1, 1, window_size, window_size)) - padding = (window_size - 1) // 2 - eps = 1e-9 - - dem_torch = torch.from_numpy(dem).to(kernel) - cdl_torch = torch.from_numpy(cdl).to(kernel) - - # Downscale - downscale = 4 - dem_torch = F.interpolate( - dem_torch.unsqueeze(0), - (dem_torch.shape[1] // downscale, dem_torch.shape[2] // downscale), - mode="bilinear", - ).squeeze(0) - - cdl_torch = F.interpolate( - cdl_torch.unsqueeze(0), - (cdl_torch.shape[1] // downscale, cdl_torch.shape[2] // downscale), - mode="nearest", - ).squeeze(0) - - # DEM z-scores - cdl_elevation = torch.zeros_like(dem_torch).to(kernel) - - mean_elev = F.conv2d( - F.pad( - dem_torch.unsqueeze(0).to(kernel), - (padding, padding, padding, padding), - mode="replicate", - ), - kernel, - bias=None, - stride=1, - padding=0, - ).squeeze(0) / (window_size**2) - - std_elev = F.conv2d( - F.pad( - (dem_torch - mean_elev).unsqueeze(0).to(kernel) ** 2, - (padding, padding, padding, padding), - mode="replicate", - ), - kernel, - bias=None, - stride=1, - padding=0, - ).squeeze(0) / (window_size**2 - 1) - - # Compute Z-scores of per-class means (wrt statistics of the whole window) - z_elevation = (dem_torch - mean_elev) / (std_elev + eps) - - # Compute elevation mean per-class in overlapping windows - unique_cdl_labels = torch.unique(cdl_torch) - for i in unique_cdl_labels: - label_mask = cdl_torch == i - masked_elev = z_elevation * label_mask - elev_sum = F.conv2d( - masked_elev.unsqueeze(0), kernel, bias=None, stride=1, padding=padding - ).squeeze(0) - label_count = F.conv2d( - label_mask.unsqueeze(0).to(kernel), kernel, bias=None, stride=1, padding=padding - ).squeeze(0) - cdl_elevation[label_mask] = elev_sum[label_mask] / label_count[label_mask] - - # Upsample to original resolution - cdl_elevation = F.interpolate( - cdl_elevation.unsqueeze(0), (dem.shape[1], dem.shape[2]), mode="bilinear" - ).squeeze(0) - - return cdl_elevation.numpy() - - -class CallbackBuilder: - def __init__( - self, - window_size: int, - ): - self.tmp_dir = TemporaryDirectory() - self.window_size = window_size - - def __call__(self): - def operator_callback( - input_dem_raster: Raster, input_cluster_raster: Raster - ) -> Dict[str, Raster]: - dem_da = load_raster_match( - input_dem_raster, - match_raster=input_cluster_raster, - ) - cluster_da = load_raster(input_cluster_raster, use_geometry=True) - - average_elevation_da: NDArray[Any] = run_average_elevation( - dem_da.to_numpy(), cluster_da.to_numpy(), self.window_size - ) - - vis_dict: Dict[str, Any] = { - "bands": [0], - "colormap": interpolated_cmap_from_colors(CMAP_COLORS, CMAP_INTERVALS), - "range": (0, 4000), - } - - asset = save_raster_to_asset( - dem_da[:1].copy(data=average_elevation_da), self.tmp_dir.name - ) - out_raster = Raster.clone_from( - src=input_dem_raster, - id=gen_guid(), - assets=[ - asset, - json_to_asset(vis_dict, self.tmp_dir.name), - ], - ) - - return {"output_raster": out_raster} - - return operator_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.yaml b/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.yaml deleted file mode 100644 index 8edfb18d..00000000 --- a/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: compute_raster_class_windowed_average -inputs: - input_dem_raster: Raster - input_cluster_raster: Raster -output: - output_raster: Raster -parameters: - window_size: 41 -dependencies: - parameters: - - window_size -entrypoint: - file: compute_raster_class_windowed_average.py - callback_builder: CallbackBuilder -description: - short_description: - Computes average elevation per-class in overlapping windows, combining cluster and elevation tiles. diff --git a/ops/compute_raster_cluster/compute_raster_cluster.py b/ops/compute_raster_cluster/compute_raster_cluster.py deleted file mode 100644 index 1537d990..00000000 --- a/ops/compute_raster_cluster/compute_raster_cluster.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from tempfile import TemporaryDirectory -from typing import Any, Dict - -from numpy.typing import NDArray - -from vibe_core.data import CategoricalRaster, Raster, gen_guid -from vibe_lib import overlap_clustering -from vibe_lib.raster import get_categorical_cmap, json_to_asset, load_raster, save_raster_to_asset - -INT8_MAX_VALUE = 255 - -LOGGER = logging.getLogger(__name__) - - -class CallbackBuilder: - def __init__( - self, - clustering_method: str, - number_classes: int, - half_side_length: int, - number_iterations: int, - stride: int, - warmup_steps: int, - warmup_half_side_length: int, - window: int, - ): - self.tmp_dir = TemporaryDirectory() - self.clustering_method = clustering_method - self.number_classes = number_classes - self.half_side_length = half_side_length - self.number_iterations = number_iterations - self.stride = stride - self.warmup_steps = warmup_steps - self.warmup_half_side_length = warmup_half_side_length - self.window = window - - def __call__(self): - def operator_callback(input_raster: Raster) -> Dict[str, Raster]: - src_xa = load_raster(input_raster, use_geometry=True) - src_data: NDArray[Any] = src_xa.to_numpy() - - if src_xa.dtype == "uint8": # overlap clustering requires a float numpy array - src_data = src_data / float(INT8_MAX_VALUE) - - p: NDArray[Any] = overlap_clustering.run_clustering( - src_data, - number_classes=self.number_classes, - half_side_length=self.half_side_length, - number_iterations=self.number_iterations, - stride=self.stride, - warmup_steps=self.warmup_steps, - warmup_half_side_length=self.warmup_half_side_length, - window=self.window, - ) - - vis_dict: Dict[str, Any] = { - "bands": [0], - "colormap": get_categorical_cmap("tab10", self.number_classes), - "range": (0, self.number_classes - 1), - } - - out_raster = CategoricalRaster( - id=gen_guid(), - geometry=input_raster.geometry, - time_range=input_raster.time_range, - assets=[ - save_raster_to_asset(src_xa[0].copy(data=p), self.tmp_dir.name), - json_to_asset(vis_dict, self.tmp_dir.name), - ], - bands={"cluster": 0}, - categories=[f"cluster{i}" for i in range(self.number_classes)], - ) - - return {"output_raster": out_raster} - - return operator_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_raster_cluster/compute_raster_cluster.yaml b/ops/compute_raster_cluster/compute_raster_cluster.yaml deleted file mode 100644 index 8e6ed4bc..00000000 --- a/ops/compute_raster_cluster/compute_raster_cluster.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: compute_raster_cluster -inputs: - input_raster: Raster -output: - output_raster: Raster -parameters: - clustering_method: "overlap_clustering" - number_classes: 4 # we keep this number of clusters low as we want to distinguish between crop and non-crop - half_side_length: 21 # we keep this number of pixels low as we are looking for local differences - number_iterations: 2 # during tests, this number provided a good balance between speed and good results - stride: 8 # instead of calculating the cluster on each pixel, we skip X strides and interpolate the result - warmup_steps: 0 # we keep this parameter zero as we don't want to run a larger cluster at the beginning - warmup_half_side_length: 127 # size of the window for the initial larger clustering process. ignored when warmup_steps = 0 - window: 1024 -dependencies: - parameters: - - clustering_method - - number_classes - - half_side_length - - number_iterations - - stride - - warmup_steps - - warmup_half_side_length - - window -entrypoint: - file: compute_raster_cluster.py - callback_builder: CallbackBuilder -description: - short_description: Computes local clusters using an overlap clustering method. \ No newline at end of file diff --git a/ops/compute_raster_gradient/compute_raster_gradient.py b/ops/compute_raster_gradient/compute_raster_gradient.py deleted file mode 100644 index d1e35255..00000000 --- a/ops/compute_raster_gradient/compute_raster_gradient.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict, List - -import numpy as np -import rasterio - -from vibe_core.data import AssetVibe, Raster, gen_guid, gen_hash_id -from vibe_lib.raster import ( - RGBA, - compute_sobel_gradient, - include_raster_overviews, - interpolated_cmap_from_colors, - json_to_asset, -) - -GRADIENT_CMAP_INTERVALS: List[float] = [0.0, 100.0, 200.0] - -GRADIENT_CMAP_COLORS: List[RGBA] = [ - RGBA(255, 237, 160, 255), - RGBA(254, 178, 76, 255), - RGBA(240, 59, 32, 255), -] - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def operator_callback(input_raster: Raster) -> Dict[str, Raster]: - input_band_mapping = input_raster.bands - output_band_mapping = {} - output_bands = [] - uid = gen_guid() - - out_path = os.path.join(self.tmp_dir.name, f"{gen_guid()}.tif") - - # Open the original raster and go through the layers computing the gradient. - with rasterio.open(input_raster.raster_asset.url) as src: - out_meta = src.meta - for band_name in input_band_mapping.keys(): - output_bands.insert( - input_band_mapping[band_name], - compute_sobel_gradient(src.read(input_band_mapping[band_name] + 1)), - ) - - # Create a new raster to save the gradient layers. - with rasterio.open(out_path, "w", **out_meta) as dst: - dst.write(np.stack(output_bands, axis=0)) - - # Update output bands name. - output_band_mapping = {f"{k}_gradient": v for k, v in input_band_mapping.items()} - - vis_dict: Dict[str, Any] = { - "bands": [0], - "colormap": interpolated_cmap_from_colors( - GRADIENT_CMAP_COLORS, GRADIENT_CMAP_INTERVALS - ), - "range": (0, 200), - } - - asset = AssetVibe(reference=out_path, type=mimetypes.types_map[".tif"], id=uid) - include_raster_overviews(asset.local_path) - out_raster = Raster.clone_from( - input_raster, - id=gen_hash_id( - f"{input_raster.id}_compute_raster_gradient", - input_raster.geometry, - input_raster.time_range, - ), - assets=[asset, json_to_asset(vis_dict, self.tmp_dir.name)], - bands=output_band_mapping, - ) - - return {"output_raster": out_raster} - - return operator_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_raster_gradient/compute_raster_gradient.yaml b/ops/compute_raster_gradient/compute_raster_gradient.yaml deleted file mode 100644 index 49f803dc..00000000 --- a/ops/compute_raster_gradient/compute_raster_gradient.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: compute_raster_gradient -inputs: - input_raster: Raster -output: - output_raster: Raster -parameters: -entrypoint: - file: compute_raster_gradient.py - callback_builder: CallbackBuilder -description: - short_description: Computes the gradient of each band of the input raster with a Sobel operator. \ No newline at end of file diff --git a/ops/compute_shadow_prob/compute_shadow_prob.py b/ops/compute_shadow_prob/compute_shadow_prob.py deleted file mode 100644 index 3576693b..00000000 --- a/ops/compute_shadow_prob/compute_shadow_prob.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict - -import numpy as np -import onnxruntime as ort -from numpy.typing import NDArray -from rasterio.enums import Resampling - -from vibe_core.data import ( - AssetVibe, - S2ProcessingLevel, - Sentinel2CloudProbability, - Sentinel2Raster, - gen_guid, -) -from vibe_lib.raster import DEFAULT_NODATA, resample_raster -from vibe_lib.spaceeye.chip import ChipDataset, Dims, InMemoryReader, get_loader, predict_chips -from vibe_lib.spaceeye.utils import verify_processing_level - - -def pre_process(scale: float): - def fun(chip_data: NDArray[Any], _): - return chip_data * scale - - return fun - - -def post_process( - chip_data: NDArray[Any], chip_mask: NDArray[Any], model_out: NDArray[Any] -) -> NDArray[Any]: - """ - After prediction, we set nodata (all zeros) regions as 100% cloud - """ - nodata_mask = chip_mask.any(axis=1, keepdims=True) - model_prob = 1 / (1 + np.exp(-model_out)) - model_prob[nodata_mask] = 1 - return model_prob - - -class CallbackBuilder: - def __init__( - self, - downsampling: int, - root_dir: str, - model_path: str, - window_size: int, - overlap: float, - batch_size: int, - num_workers: int, - in_memory: bool, - ): - self.downsampling = downsampling - self.root_dir = root_dir - self.model_path = model_path - self.window_size = window_size - self.overlap = overlap - self.batch_size = batch_size - self.num_workers = num_workers - self.in_memory = in_memory - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def compute_shadow_prob( - sentinel_raster: Sentinel2Raster, - ) -> Dict[str, Sentinel2CloudProbability]: - verify_processing_level((sentinel_raster,), S2ProcessingLevel.L2A, "FPN Shadow model") - - if self.downsampling < 1: - raise ValueError( - f"Downsampling must be equal or larger than 1, found {self.downsampling}" - ) - model_path = os.path.join(self.root_dir, self.model_path) - model = ort.InferenceSession(model_path) - chip_size = self.window_size - step_size = int(chip_size * (1 - self.overlap)) - dataset = ChipDataset( - [sentinel_raster], - chip_size=Dims(chip_size, chip_size, 1), - step_size=Dims(step_size, step_size, 1), - downsampling=self.downsampling, - nodata=DEFAULT_NODATA, - reader=InMemoryReader(self.downsampling) if self.in_memory else None, - ) - - dataloader = get_loader( - dataset, self.batch_size, self.num_workers if not self.in_memory else 0 - ) - pred_filepaths = predict_chips( - model, - dataloader, - self.tmp_dir.name, - skip_nodata=True, - pre_process=pre_process(sentinel_raster.scale), - post_process=post_process, - ) - assert ( - len(pred_filepaths) == 1 - ), f"Expected one prediction file, found: {len(pred_filepaths)}" - mask_filepath = resample_raster( - pred_filepaths[0], - self.tmp_dir.name, - dataset.width, - dataset.height, - dataset.transform, - Resampling.bilinear, - ) - asset = AssetVibe(reference=mask_filepath, type="image/tiff", id=gen_guid()) - - shadow_mask = Sentinel2CloudProbability.clone_from( - sentinel_raster, id=gen_guid(), assets=[asset] - ) - - return {"shadow_probability": shadow_mask} - - return compute_shadow_prob - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/compute_shadow_prob/compute_shadow_prob.yaml b/ops/compute_shadow_prob/compute_shadow_prob.yaml deleted file mode 100644 index 851f6201..00000000 --- a/ops/compute_shadow_prob/compute_shadow_prob.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: compute_shadow_prob -inputs: - sentinel_raster: Sentinel2Raster -output: - shadow_probability: Sentinel2CloudProbability -parameters: - downsampling: 1 - root_dir: /opt/terravibes/ops/resources/shadow_models - model_path: shadow.onnx - window_size: 512 - overlap: .25 - batch_size: 1 - num_workers: 0 - in_memory: false -entrypoint: - file: compute_shadow_prob.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - model_path - - downsampling - - window_size - - overlap -description: - short_description: Computes shadow probabilities using a convolutional segmentation model for L2A. \ No newline at end of file diff --git a/ops/create_raster_sequence/create_raster_sequence.py b/ops/create_raster_sequence/create_raster_sequence.py deleted file mode 100644 index 14d681ea..00000000 --- a/ops/create_raster_sequence/create_raster_sequence.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from typing import Any, Dict, List, Tuple, Union - -from shapely import geometry as shpg -from shapely import ops as shpo - -from vibe_core.data import Raster, gen_guid -from vibe_core.data.rasters import RasterSequence - - -def get_proper_order(seq: Union[List[Raster], RasterSequence]) -> List[Raster]: - if isinstance(seq, RasterSequence): - return [Raster.clone_from(seq, gen_guid(), assets=[i]) for i in seq.get_ordered_assets()] # type: ignore - else: - return sorted(seq, key=lambda r: r.time_range[0]) - - -def get_timerange(list1: List[Raster], list2: List[Raster]) -> Tuple[datetime, datetime]: - dates = sorted([t for list in [list1, list2] for r in list for t in r.time_range]) - return dates[0], dates[-1] - - -def get_geom(list1: List[Raster], list2: List[Raster]) -> Dict[str, Any]: - geoms = [r.geometry for list in [list1, list2] for r in list] - return shpg.mapping(shpo.unary_union([shpg.shape(i) for i in geoms])) - - -class CallbackBuilder: - def __call__(self): - def create_raster_sequence( - rasters1: Union[List[Raster], RasterSequence], - rasters2: Union[List[Raster], RasterSequence], - ) -> Dict[str, RasterSequence]: - list1 = get_proper_order(rasters1) - list2 = get_proper_order(rasters2) - - time_range = get_timerange(list1, list2) - geom = get_geom(list1, list2) - - res = RasterSequence( - gen_guid(), - time_range=time_range, - geometry=geom, - assets=[], - bands=dict(), - ) - for r in list1: - res.add_item(r) - for r in list2: - res.add_item(r) - return {"sequence": res} - - return create_raster_sequence diff --git a/ops/create_raster_sequence/create_raster_sequence.yaml b/ops/create_raster_sequence/create_raster_sequence.yaml deleted file mode 100644 index d92d5392..00000000 --- a/ops/create_raster_sequence/create_raster_sequence.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: create_raster_sequence -inputs: - rasters1: List[Raster] - rasters2: List[Raster] -output: - sequence: RasterSequence -parameters: -entrypoint: - file: create_raster_sequence.py - callback_builder: CallbackBuilder -description: - short_description: Create a raster sequence from two lists of rasters. - long_description: The op will create a single sequence that combines rasters from two input lists. - inputs: - rasters1: First list of rasters. - rasters2: Second list of rasters. - output: - sequence: Combined raster sequence. \ No newline at end of file diff --git a/ops/create_raster_sequence/create_raster_sequence_from_sequence_list.yaml b/ops/create_raster_sequence/create_raster_sequence_from_sequence_list.yaml deleted file mode 100644 index 6a871fb6..00000000 --- a/ops/create_raster_sequence/create_raster_sequence_from_sequence_list.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: create_raster_sequence -inputs: - rasters1: RasterSequence - rasters2: List[Raster] -output: - sequence: RasterSequence -parameters: -entrypoint: - file: create_raster_sequence.py - callback_builder: CallbackBuilder -description: - short_description: Create a raster sequence from a raster sequence and a list of rasters. - long_description: The op will create a single sequence that combines rasters from the input sequence and the input list. - inputs: - rasters1: Raster sequence. - rasters2: List of rasters. - output: - sequence: Combined raster sequence. \ No newline at end of file diff --git a/ops/datavibe_filter/datavibe_filter.py b/ops/datavibe_filter/datavibe_filter.py deleted file mode 100644 index 96bdaa44..00000000 --- a/ops/datavibe_filter/datavibe_filter.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from functools import partial -from typing import Dict - -from shapely.geometry import Polygon, box, mapping - -from vibe_core.data import DataVibe -from vibe_core.data.core_types import gen_hash_id - - -def datavibe_filter(input_item: DataVibe, filter_out: str) -> Dict[str, DataVibe]: - geometry = input_item.geometry - time_range = input_item.time_range - if filter_out in ("all", "geometry"): - bbox = [0.0, -90.0, 360.0, 90.0] - polygon: Polygon = box(*bbox, ccw=True) - geometry = mapping(polygon) # dummy geometry - if filter_out in ("all", "time_range"): - time_range = (datetime(2022, 1, 1), datetime(2022, 1, 1)) # dummy dates - return { - "output_item": DataVibe.clone_from( - input_item, - id=gen_hash_id("datavibe_filter", geometry=geometry, time_range=time_range), - geometry=geometry, - time_range=time_range, - assets=[], - ) - } - - -def callback_builder(filter_out: str): - filter_out_options = ["all", "time_range", "geometry"] - if filter_out not in filter_out_options: - raise ValueError( - f"Invalid filter_out parameter: {filter_out}. " - f"Valid values are: {', '.join(filter_out_options)}" - ) - return partial(datavibe_filter, filter_out=filter_out) diff --git a/ops/datavibe_filter/datavibe_filter.yaml b/ops/datavibe_filter/datavibe_filter.yaml deleted file mode 100644 index 63c59ed5..00000000 --- a/ops/datavibe_filter/datavibe_filter.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: datavibe_filter -inputs: - input_item: DataVibe -output: - output_item: DataVibe -parameters: - filter_out: all # can be "all", "time_range" or "geometry" -entrypoint: - file: datavibe_filter.py - callback_builder: callback_builder -description: - short_description: Filters out time range and/or geometry information from the input item. \ No newline at end of file diff --git a/ops/detect_driveway/detect_driveway.py b/ops/detect_driveway/detect_driveway.py deleted file mode 100644 index cb56cf65..00000000 --- a/ops/detect_driveway/detect_driveway.py +++ /dev/null @@ -1,202 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Optional, Tuple, cast - -import geopandas as gpd -import numpy as np -import rasterio -from numpy.typing import NDArray -from rasterio.features import shapes -from rasterio.mask import mask -from rasterio.transform import Affine -from scipy.ndimage import convolve -from shapely import geometry as shpg -from shapely import ops as shpo -from shapely.geometry.base import BaseGeometry -from skimage.measure import label, regionprops -from skimage.transform import rotate - -from vibe_core.data import CategoricalRaster, DataVibe, Raster -from vibe_core.data.core_types import AssetVibe, GeometryCollection, gen_guid -from vibe_lib.raster import MaskedArrayType - - -def read_raster(filepath: str, geometry: BaseGeometry) -> Tuple[MaskedArrayType, Affine]: - with rasterio.open(filepath) as src: - return mask(src, [geometry], crop=True, filled=False) - - -def get_kernels(kernel_size: Tuple[int, int], n_kernels: int) -> List[NDArray[Any]]: - y, x = kernel_size - k_max = max(kernel_size) - - base_kernel = np.zeros((k_max, k_max)) - off_y = (k_max - y) // 2 - off_x = (k_max - x) // 2 - base_kernel[off_y : k_max - off_y, off_x : k_max - off_x] = 1 - - angles = np.linspace(0, 180, n_kernels + 1)[:-1] - return [rotate(base_kernel, a, order=0) for a in angles] - - -def can_park(mask: NDArray[Any], car_size: Tuple[int, int], n_kernels: int, thr: float): - mask = mask.astype(np.float32) - kernels = get_kernels(car_size, n_kernels) - for kernel in kernels: - ks = kernel.sum() - if np.any(convolve(mask, kernel, mode="constant") / ks >= thr): - return True - return False - - -class DrivewayDetector: - def __init__( - self, - img_filepath: str, - pred_filepath: str, - road_df: gpd.GeoDataFrame, - min_region_area: float, - ndvi_thr: float, - car_size: Tuple[int, int], - num_kernels: int, - car_thr: float, - ) -> None: - self.img_filepath = img_filepath - self.pred_filepath = pred_filepath - - with rasterio.open(img_filepath) as src: - pixel_area = src.res[0] * src.res[1] - self.raster_geom = shpg.box(*src.bounds) - self.raster_crs = src.crs - self.min_area = min_region_area / pixel_area - - self.road_df = cast(gpd.GeoDataFrame, road_df.to_crs(self.raster_crs)) - - self.ndvi_thr = ndvi_thr - self.car_size = car_size - self.num_kernels = num_kernels - self.car_thr = car_thr - - def _get_region_near_road( - self, pred_mask: MaskedArrayType, tr: Affine - ) -> Optional[NDArray[np.bool_]]: - pred_labels = label(pred_mask.filled(0)) - pred_regions = sorted( - [p for p in regionprops(pred_labels) if p.area > self.min_area], - key=lambda x: self.road_df.geometry.distance(shpg.Point(tr * x.centroid[::-1])).min(), - ) - if not pred_regions: - # No region that is large enough - return None - - region = pred_regions[0] # Get region closest to the road - mask = pred_labels == region.label - return mask - - def detect(self, geom: BaseGeometry) -> Optional[BaseGeometry]: - bands, tr = read_raster(self.img_filepath, geom) - pred_mask = read_raster(self.pred_filepath, geom)[0][0] > 0 - - red, nir = bands[[0, 3]] - ndvi = (nir - red) / (nir + red) - not_green = (ndvi < self.ndvi_thr).filled(0) - - region_mask = self._get_region_near_road(pred_mask, tr) - if region_mask is None: - # Not region large enough - return None - - region_mask = not_green * region_mask - region_labels = label(region_mask) - - # Find regions where we could fit a car - dw_regions = [ - p - for p in regionprops(region_labels) - if can_park(p.image, self.car_size, self.num_kernels, self.car_thr) - ] - if not dw_regions: - # No region that can fit a car - return None - # Estimate total region of the driveway - dw_mask = np.sum([region_labels == p.label for p in dw_regions], axis=0).astype(bool) - dw_geom = shpo.unary_union( - [ - shpg.shape(s).convex_hull - for s, _ in shapes( - dw_mask.astype(np.uint8), mask=dw_mask, connectivity=8, transform=tr - ) - ] - ) - return dw_geom - - -class CallbackBuilder: - def __init__( - self, - min_region_area: float, - ndvi_thr: float, - car_size: Tuple[int, int], - num_kernels: int, - car_thr: float, - ): - self.min_region_area = min_region_area - self.ndvi_thr = ndvi_thr - self.car_size = car_size - self.num_kernels = num_kernels - self.car_thr = car_thr - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def callback( - input_raster: Raster, - segmentation_raster: CategoricalRaster, - property_boundaries: GeometryCollection, - roads: GeometryCollection, - ) -> Dict[str, DataVibe]: - road_df = cast(gpd.GeoDataFrame, gpd.read_file(roads.assets[0].url)) - detector = DrivewayDetector( - input_raster.raster_asset.url, - segmentation_raster.raster_asset.url, - road_df=road_df, - min_region_area=self.min_region_area, - ndvi_thr=self.ndvi_thr, - car_size=self.car_size, - num_kernels=self.num_kernels, - car_thr=self.car_thr, - ) - properties_df = cast( - gpd.GeoDataFrame, - gpd.read_file(property_boundaries.assets[0].url).to_crs(detector.raster_crs), # type: ignore - ) - properties_df = properties_df[properties_df.intersects(detector.raster_geom)] - driveway = [] - dw_geoms = [] - assert properties_df is not None, "There are no intersections with properties" - for _, row in properties_df.iterrows(): - geom = row.geometry.buffer(0) - dw_geom = detector.detect(geom) - is_dw = dw_geom is not None - driveway.append(is_dw) - if is_dw: - dw_geoms.append(dw_geom) # type: ignore - full_df = properties_df[driveway].copy() # type: ignore - dw_df = full_df.copy() - dw_df["geometry"] = dw_geoms # type: ignore - out = {} - for out_name, df in zip(("properties_with_driveways", "driveways"), (full_df, dw_df)): - asset_id = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{asset_id}.geojson") - df.to_file(filepath, driver="GeoJSON") # type: ignore - asset = AssetVibe(reference=filepath, type="application/geo+json", id=asset_id) - out[out_name] = DataVibe.clone_from(input_raster, id=gen_guid(), assets=[asset]) - - return out - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/detect_driveway/detect_driveway.yaml b/ops/detect_driveway/detect_driveway.yaml deleted file mode 100644 index 5f5192cb..00000000 --- a/ops/detect_driveway/detect_driveway.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: detect_driveway -inputs: - input_raster: Raster - segmentation_raster: CategoricalRaster - property_boundaries: GeometryCollection - roads: GeometryCollection -output: - properties_with_driveways: GeometryCollection - driveways: GeometryCollection -parameters: - min_region_area: 8 - ndvi_thr: .3 - car_size: [20, 8] - num_kernels: 8 - car_thr: .95 -entrypoint: - file: detect_driveway.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - min_region_area - - ndvi_thr - - car_size - - num_kernels - - car_thr -description: - short_description: - Detects driveways in the front of each house, using the input image, segmentation map, - road geometry, and input property boundaries. \ No newline at end of file diff --git a/ops/detect_outliers/detect_outliers.py b/ops/detect_outliers/detect_outliers.py deleted file mode 100644 index c35bffa5..00000000 --- a/ops/detect_outliers/detect_outliers.py +++ /dev/null @@ -1,233 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Any, Callable, Dict, List, Sequence, Tuple, Union, cast - -import numpy as np -import pandas as pd -import xarray as xr -from numpy.typing import NDArray -from sklearn.preprocessing import StandardScaler - -from vibe_core.data import CategoricalRaster, Raster, TimeSeries, gen_guid -from vibe_lib.gaussian_mixture import ( - cluster_data, - mixture_log_likelihood, - train_mixture_with_component_search, -) -from vibe_lib.raster import ( - get_categorical_cmap, - get_cmap, - json_to_asset, - load_raster, - save_raster_to_asset, -) -from vibe_lib.timeseries import save_timeseries_to_asset - - -def compute_outliers( - curves: NDArray[Any], preprocessing: StandardScaler, thr: float, max_components: int -) -> Tuple[NDArray[np.int32], NDArray[np.float32], NDArray[np.int32], NDArray[Any]]: - x = preprocessing.fit_transform(curves) # Preprocess data - - mix = train_mixture_with_component_search(x, max_components=max_components) - labels = cluster_data(x, mix) # Assign labels - labels = labels.astype(np.int32) - # TODO: How to compute the threshold? Use fixed for now - likelihood = mixture_log_likelihood(x, mix) - outliers = likelihood < thr - likelihood = likelihood.astype(np.float32) - outliers = cast(NDArray[np.int32], outliers.astype(np.int32)) - # Recover means in the NDVI space - mix_means = cast(NDArray[Any], preprocessing.inverse_transform(mix.means_)) - - return labels, likelihood, outliers, mix_means - - -def save_mixture_means( - mix_means: NDArray[Any], - output_dir: str, - geom: Dict[str, Any], - date_list: Sequence[datetime], -) -> TimeSeries: - # Save timeseries output - df = pd.DataFrame(date_list, columns=["date"]) - for i, m in enumerate(mix_means): - df[f"component{i}"] = m - - df.set_index("date", drop=True, inplace=True) - - return TimeSeries( - id=gen_guid(), - geometry=geom, - time_range=(date_list[0], date_list[-1]), - assets=[save_timeseries_to_asset(df, output_dir)], - ) - - -def unpack_data(rasters: Sequence[Raster]) -> Tuple[NDArray[np.float32], xr.DataArray]: - # Sort rasters according to date - rasters = sorted(rasters, key=lambda x: x.time_range[0]) - # Load one raster to get metadata we need - band_data = load_raster(rasters[0], use_geometry=True) - - # Get band data and compress masked data into a stack of timeseries - curves = ( - np.stack( - [band_data.to_masked_array().compressed()] - + [ - load_raster(r, use_geometry=True).to_masked_array().compressed() - for r in rasters[1:] - ] - ) - .astype(np.float32) - .T - ) - return curves, band_data - - -def pack_rasters( - labels: NDArray[np.int32], - likelihood: NDArray[np.float32], - outliers: NDArray[np.int32], - geom: Dict[str, Any], - date_list: Sequence[datetime], - threshold: float, - output_dir: str, - reshape_fun: Callable[[NDArray[Any]], xr.DataArray], -): - output: Dict[str, List[Any]] = {} - time_range = (date_list[0], date_list[-1]) - - # Save likelihood raster - vis_dict = { - "bands": [0], - "colormap": get_cmap("viridis"), - "range": (max(threshold, float(likelihood.min())), float(likelihood.max())), - } - heatmap = Raster( - id=gen_guid(), - geometry=geom, - time_range=time_range, - assets=[ - save_raster_to_asset(reshape_fun(likelihood), output_dir), - json_to_asset(vis_dict, output_dir), - ], - bands={"likelihood": 0}, - ) - output["heatmap"] = [heatmap] - - # Save categorical rasters - classes = np.unique(labels) - num_classes = classes.shape[0] - vis_dict = { - "bands": [0], - "colormap": get_categorical_cmap("tab10", num_classes), - "range": (0, num_classes - 1), - } - output["segmentation"] = [ - CategoricalRaster( - id=gen_guid(), - geometry=geom, - time_range=time_range, - assets=[ - save_raster_to_asset(reshape_fun(labels), output_dir), - json_to_asset(vis_dict, output_dir), - ], - bands={"labels": 0}, - categories=[f"component{i}" for i in range(num_classes)], - ) - ] - vis_dict = { - "bands": [0], - "colormap": get_categorical_cmap("tab10", 2), - "range": (0, 1), - } - output["outliers"] = [ - CategoricalRaster( - id=gen_guid(), - geometry=geom, - time_range=time_range, - assets=[ - save_raster_to_asset(reshape_fun(outliers), output_dir), - json_to_asset(vis_dict, output_dir), - ], - bands={"labels": 0}, - categories=["normal", "outlier"], - ) - ] - return output - - -def pack_data( - labels: NDArray[np.int32], - likelihood: NDArray[np.float32], - outliers: NDArray[np.int32], - mix_means: NDArray[np.float32], - geom: Dict[str, Any], - date_list: Sequence[datetime], - threshold: float, - output_dir: str, - reshape_fun: Callable[[NDArray[Any]], xr.DataArray], -): - output = pack_rasters( - labels, likelihood, outliers, geom, date_list, threshold, output_dir, reshape_fun - ) - output["mixture_means"] = [save_mixture_means(mix_means, output_dir, geom, date_list)] - return output - - -class CallbackBuilder: - def __init__(self, threshold: float): - self.tmp_dir = TemporaryDirectory() - self.threshold = threshold - # TODO: Customize preprocessing - self.preprocessing = StandardScaler() - - def __call__(self): - def outliers_callback(rasters: List[Raster]) -> Dict[str, List[Union[Raster, TimeSeries]]]: - curves, band_data = unpack_data(rasters) - - # Get metadata - geom = rasters[0].geometry - date_list = [r.time_range[0] for r in rasters] - - # Helper function to obtain masked array from 1D array - def reshape_to_geom(values: NDArray[Any]) -> xr.DataArray: - data = np.ma.masked_all(band_data.shape, values.dtype) - data.mask = band_data.isnull() - data.data[~data.mask] = values - data.fill_value = band_data.rio.encoded_nodata # Unused value - data = band_data.copy(data=data.filled()) - data.rio.update_encoding({"dtype": str(values.dtype)}, inplace=True) - return data - - # Gaussian mixtures modeling - labels, likelihood, outliers, mix_means = compute_outliers( - curves, - self.preprocessing, - self.threshold, - max_components=1, # Assume only one component - ) - - # Pack data - output = pack_data( - labels, - likelihood, - outliers, - mix_means, - geom, - date_list, - self.threshold, - self.tmp_dir.name, - reshape_to_geom, - ) - - return output - - return outliers_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/detect_outliers/detect_outliers.yaml b/ops/detect_outliers/detect_outliers.yaml deleted file mode 100644 index a91e5bbf..00000000 --- a/ops/detect_outliers/detect_outliers.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: detect_outliers -inputs: - rasters: List[Raster] -output: - segmentation: List[CategoricalRaster] - heatmap: List[Raster] - outliers: List[CategoricalRaster] - mixture_means: List[TimeSeries] -parameters: - threshold: -60 -entrypoint: - file: detect_outliers.py - callback_builder: CallbackBuilder -description: - short_description: Fits a single-component Gaussian Mixture Model (GMM) over input rasters - to detect outliers according to the threshold parameter. diff --git a/ops/download_airbus/download_airbus.py b/ops/download_airbus/download_airbus.py deleted file mode 100644 index f7bc601a..00000000 --- a/ops/download_airbus/download_airbus.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import re -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Any, Dict, List - -from shapely import geometry as shpg - -from vibe_core.data import AirbusProduct, AirbusRaster, AssetVibe, gen_guid -from vibe_lib.airbus import IMAGE_FORMAT, AirBusAPI, Constellation -from vibe_lib.geometry import norm_intersection -from vibe_lib.raster import json_to_asset - - -def convert_product(product: Dict[str, Any], out_dir: str) -> AirbusRaster: - dt = datetime.fromisoformat(product["acquisitionDate"].replace("Z", "+00:00")) - filepath = product.pop("filepath") - geom = product.pop("geometry") - - asset = AssetVibe( - reference=filepath, - type=IMAGE_FORMAT, - id=gen_guid(), - ) - vis_asset = json_to_asset({"bands": list(range(3))}, out_dir) - # Get actual bounds from the raster - return AirbusRaster( - id=gen_guid(), - time_range=(dt, dt), - geometry=geom, - assets=[asset, vis_asset], - bands={k: v for v, k in enumerate(("red", "green", "blue", "nir"))}, - acquisition_id=product.pop("acquisitionIdentifier"), - extra_info=product, - ) - - -class CallbackBuilder: - def __init__( - self, - api_key: str, - projected_crs: bool, - iou_threshold: float, - delay: float, - timeout: float, - ): - self.api_key = api_key - self.projected_crs = projected_crs - self.iou_thr = iou_threshold - self.delay = delay - self.timeout = timeout - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def download_product(api: AirBusAPI, product: AirbusProduct) -> AirbusRaster: - geom = shpg.shape(product.geometry) - owned = api.query_owned(geom, product.acquisition_id) - owned = sorted( - owned, - key=lambda o: norm_intersection(geom, shpg.shape(o["geometry"])), - reverse=True, - ) - if ( - not owned - or norm_intersection(geom, shpg.shape(owned[0]["geometry"])) < self.iou_thr - ): - # We need to purchase the product - # We choose the envelope to avoid having images with a lot of nodata in the library - order = api.place_order([product.extra_info["id"]], geom.envelope) - order = api.block_until_order_delivered(order["id"]) - product_id = re.findall( - r"items/(.*)/", order["deliveries"][0]["_links"]["download"]["href"] - )[0] - owned = api.get_product_by_id(product_id) - else: - owned = owned[0] - product_id = owned["id"] - owned["filepath"] = api.download_product(product_id, self.tmp_dir.name) - return convert_product(owned, self.tmp_dir.name) - - def download_products( - airbus_products: List[AirbusProduct], - ) -> Dict[str, List[AirbusRaster]]: - api = AirBusAPI( - self.api_key, - self.projected_crs, - [c for c in Constellation], - self.delay, - self.timeout, - ) - return {"downloaded_products": [download_product(api, p) for p in airbus_products]} - - return download_products diff --git a/ops/download_airbus/download_airbus.yaml b/ops/download_airbus/download_airbus.yaml deleted file mode 100644 index 266f8e24..00000000 --- a/ops/download_airbus/download_airbus.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: download_airbus -inputs: - airbus_products: List[AirbusProduct] -output: - downloaded_products: List[AirbusRaster] -parameters: - api_key: "@SECRET(eywa-secrets, msr-airbus-api)" - projected_crs: true - iou_threshold: .95 - delay: 60 - timeout: 1200 -entrypoint: - file: download_airbus.py - callback_builder: CallbackBuilder -description: - short_description: Downloads the AirBus imagery from the listed product. \ No newline at end of file diff --git a/ops/download_alos/download_alos.py b/ops/download_alos/download_alos.py deleted file mode 100644 index 66227133..00000000 --- a/ops/download_alos/download_alos.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Dict - -import planetary_computer as pc - -from vibe_core.data import AlosProduct, AssetVibe, CategoricalRaster, gen_guid, gen_hash_id -from vibe_lib.planetary_computer import AlosForestCollection - - -class CallbackBuilder: - def __init__(self, pc_key: str): - self.tmp_dir = TemporaryDirectory() - pc.set_subscription_key(pc_key) - - def __call__(self): - def callback(product: AlosProduct) -> Dict[str, CategoricalRaster]: - collection = AlosForestCollection() - item = collection.query_by_id(product.id) - if not item: - raise Exception(f"Product {product.id} not found in ALOS Forest collection") - assets = collection.download_item(item, os.path.join(self.tmp_dir.name, product.id)) - if not assets: - raise Exception(f"No assets found for product {product.id}") - assets = [AssetVibe(reference=a, type="image/tiff", id=gen_guid()) for a in assets] - return { - "raster": CategoricalRaster.clone_from( - product, - id=gen_hash_id( - f"{product.id}_download_alos_product", - product.geometry, - product.time_range, - ), - assets=assets, - bands={"forest_non_forest": 0}, - categories=AlosForestCollection.categories, - ) - } - - return callback diff --git a/ops/download_alos/download_alos.yaml b/ops/download_alos/download_alos.yaml deleted file mode 100644 index 01c699e2..00000000 --- a/ops/download_alos/download_alos.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: download_alos -inputs: - product: AlosProduct -output: - raster: CategoricalRaster -parameters: - pc_key: -entrypoint: - file: download_alos.py - callback_builder: CallbackBuilder -description: - short_description: Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map. - long_description: - The op will download an ALOS forest/non-forest classification map and return it as a raster. - inputs: - product: Product with the tile metadata to be downloaded. - output: - raster: Downloaded ALOS forest/non-forest classification map as a raster. - parameters: - pc_key: Planetary computer API key. diff --git a/ops/download_alos/test_download_alos.py b/ops/download_alos/test_download_alos.py deleted file mode 100644 index 3ae53dc8..00000000 --- a/ops/download_alos/test_download_alos.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import cast -from unittest.mock import MagicMock, patch - -import pytest -from pystac import Asset, Item - -from vibe_core.data import AlosProduct, Raster -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import AlosForestCollection - -FAKE_TIME_RANGE = ( - datetime(2020, 11, 1, tzinfo=timezone.utc), - datetime(2020, 11, 2, tzinfo=timezone.utc), -) - - -@pytest.fixture -def alos_product(): - return AlosProduct( - id="N15W087_20_FNF", - geometry={ - "type": "Polygon", - "coordinates": [ - [ - [-86.773827, 14.575498], - [-86.770459, 14.579301], - [-86.764283, 14.575102], - [-86.769591, 14.567595], - [-86.773827, 14.575498], - ] - ], - }, - time_range=FAKE_TIME_RANGE, - assets=[], - ) - - -def fake_items(): - assets = {"N15W087_20_FNF": Asset(href="fake_href", media_type="image/tiff")} - return Item( - id="N15W087_20_FNF", - geometry=None, - bbox=None, - datetime=None, - properties={ - "start_datetime": FAKE_TIME_RANGE[0].isoformat() + "Z", - "end_datetime": FAKE_TIME_RANGE[1].isoformat() + "Z", - }, - assets=assets, - ) - - -@patch.object(AlosForestCollection, "download_item") -@patch.object(AlosForestCollection, "query_by_id") -@patch("vibe_lib.planetary_computer.get_available_collections") -def test_alos_download( - get_collections: MagicMock, - query_by_id: MagicMock, - download_item: MagicMock, - alos_product: AlosProduct, -): - get_collections.return_value = [AlosForestCollection.collection] - query_by_id.return_value = fake_items() - download_item.side_effect = lambda item, _: [item.assets[item.id].href] - - config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_alos.yaml") - - op = OpTester(config_path) - output_data = op.run(product=alos_product) - assert output_data - assert "raster" in output_data - - output_raster = cast(Raster, output_data["raster"]) - assert len(output_raster.assets) == 1 - assert output_raster.assets[0].type == "image/tiff" - assert output_raster.assets[0].path_or_url == "fake_href" - assert output_raster.bands == {"forest_non_forest": 0} - assert output_raster.time_range == FAKE_TIME_RANGE - assert output_raster.geometry == alos_product.geometry diff --git a/ops/download_ambient_weather/download_ambient_weather.py b/ops/download_ambient_weather/download_ambient_weather.py deleted file mode 100644 index 8353af4a..00000000 --- a/ops/download_ambient_weather/download_ambient_weather.py +++ /dev/null @@ -1,239 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import mimetypes -import os -import time -from datetime import timedelta -from random import randint -from tempfile import TemporaryDirectory -from typing import Any, Callable, Dict, Final, List, cast - -import pandas as pd -from ambient_api.ambientapi import AmbientAPI, AmbientWeatherStation -from shapely.geometry import shape - -from vibe_core.data import AssetVibe, DataVibe, gen_guid, gen_hash_id -from vibe_core.data.weather import WeatherVibe - -# Ambient Weather Station API endpoint -ENDPOINT: Final[str] = "https://api.ambientweather.net/v1" - -# time to sleep between API calls to avoid rate limits -ONE_SECOND: Final[int] = 1 - -# in minutes -SKIP_DATA_FOR_PERIOD = 360 - -# data points -MAX_FETCH = 288 - -# data points -MIN_FETCH = 2 - -# allows failed -FAILED_COUNT = 25 - -LOGGER = logging.getLogger(__name__) - - -def get_weather( - user_input: DataVibe, - output_dir: str, - api_key: str, - app_key: str, - limit: int, - feed_interval: int, -) -> WeatherVibe: - """Gets the Ambient Weather Station data at the location and time specified - - Args: - user_input: Specifies location and time for data query - output_dir: directory in which to save data - api_key: API key used to access Ambient Weather Station API - app_key: App key used to access Ambient Weather Station API - limit: Number of data points to be downloaded from ambient service - Returns: - Weather data at specified location and time - Raises: - RuntimeError: if API service, devices, or data is unreachable - """ - api = AmbientAPI( - AMBIENT_ENDPOINT=ENDPOINT, - AMBIENT_API_KEY=api_key, - AMBIENT_APPLICATION_KEY=app_key, - ) - - devices = call_ambient_api(api.get_devices) - assert devices is not None, "No devices found" - device = get_device(devices, user_input.geometry) - - # create a closure to simplify retries - def get_data() -> List[Dict[str, Any]]: - out = device.get_data(end_date=end_date, limit=delta) - assert out is not None, "No data found" - return out - - start_date = user_input.time_range[0] - end_date = user_input.time_range[1] - - delta = end_date - start_date - delta_sec = (delta.seconds // 60) // feed_interval - - if delta.days > 0: - delta = delta_sec + delta.days * 24 * 60 // feed_interval - else: - delta = delta_sec - - out = [] - - # split request into chunks if number of data points is greater than MAX_FETCH - if limit > MAX_FETCH or delta > MAX_FETCH: - limit = max(limit, delta) - lnt = 0 - failed_count = 0 - - # for lnt in range(0, limit, MAX_FETCH): - while end_date > start_date: - try: - if (limit - lnt) < MAX_FETCH: - delta = limit - lnt - else: - delta = MAX_FETCH - - time.sleep(ONE_SECOND) - out.extend(cast(List[Any], call_ambient_api(get_data))) - end_date -= timedelta(minutes=delta * feed_interval) - lnt += MAX_FETCH - failed_count = 0 - except Exception: - # skip from weation station malfunction by every 60 minutes - end_date -= timedelta(minutes=SKIP_DATA_FOR_PERIOD) - start_date -= timedelta(minutes=SKIP_DATA_FOR_PERIOD) - lnt += SKIP_DATA_FOR_PERIOD // feed_interval - failed_count += 1 - - # stop execution if not able to access api 25 times continuously - if failed_count > FAILED_COUNT: - raise RuntimeError("Weather station not responding.") - else: - if limit > 0: - delta = limit - else: - delta = MIN_FETCH if delta == 0 else delta - - out = call_ambient_api(get_data) - - file_path = os.path.join(output_dir, "weather.csv") - pd.DataFrame(out).to_csv(file_path) - - asset = AssetVibe(reference=file_path, type=mimetypes.types_map[".csv"], id=gen_guid()) - return WeatherVibe( - gen_hash_id( - f"AmbientWeather_{device.mac_address}", - user_input.geometry, - user_input.time_range, - ), - user_input.time_range, - user_input.geometry, - [asset], - ) - - -# In the following, pyright fails to detect that we are raising an exception -def get_device( - devices: List[AmbientWeatherStation], geometry: Dict[str, Any] -) -> AmbientWeatherStation: # type: ignore - """Returns a weather device within the bounding box - - Args: - devices: list of weather stations in this subscription - geometry: location of interest - - Returns: - A device within the region - - Raises: - RuntimteError if no matching device is found - """ - search_area = shape(geometry) - for device in devices: - try: - device_loc = shape(device.info["coords"]["geo"]) # type: ignore - except KeyError: - LOGGER.error("Device info did not contain geolocation for device {}".format(device)) - continue - if device_loc.within(search_area): - return device - - log_and_raise_error("No devices found in given geometry {}".format(search_area)) - - -def log_and_raise_error(message: str): - LOGGER.error(message) - raise RuntimeError(message) - - -def call_ambient_api( - api_call: Callable[[], List[Any]], max_attempts: int = 3, backoff: int = ONE_SECOND -): - """Call the given function with retries. - - Args: - api_call: function to call - max_attempts: tries to make before quitting - backoff: seconds to wait before first retry. Wait increases between each call. - - Returns: - result of function call - - Raises: - RuntimeError if function does not return a non-empty result after max_attempts calls - """ - # use 1 based counting - for attempt in range(1, max_attempts + 1): - result = api_call() - if result: - return result - else: - LOGGER.warning( - f"Ambient Weather API call {api_call.__name__} " - f"failed on try {attempt}/{max_attempts}" - ) - if attempt < max_attempts: - time.sleep(backoff + randint(0, 10)) - backoff *= randint(2, 5) - log_and_raise_error("Could not get data from Ambient Weather API") - - -class CallbackBuilder: - def __init__(self, api_key: str, app_key: str, limit: int, feed_interval: int): - """ - Args: - api_key: API key used to access Ambient Weather Station API - app_key: App key used to access Ambient Weather Station API - limit: Number of data points to be downloaded from ambient service - """ - self.temp_dir = TemporaryDirectory() - self.api_key = api_key - self.app_key = app_key - self.limit = limit - self.feed_interval = feed_interval - - def __call__(self): - def get_weather_data(user_input: List[DataVibe]) -> Dict[str, WeatherVibe]: - measured_weather = get_weather( - user_input[0], - output_dir=self.temp_dir.name, - api_key=self.api_key, - app_key=self.app_key, - limit=self.limit, - feed_interval=self.feed_interval, - ) - return {"weather": measured_weather} - - return get_weather_data - - def __del__(self): - self.temp_dir.cleanup() diff --git a/ops/download_ambient_weather/download_ambient_weather.yaml b/ops/download_ambient_weather/download_ambient_weather.yaml deleted file mode 100644 index 8d0f6efb..00000000 --- a/ops/download_ambient_weather/download_ambient_weather.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: download_ambient_weather -inputs: - user_input: List[DataVibe] -output: - weather: WeatherVibe -parameters: - api_key: "@SECRET(eywa-secrets, ambient-api-key)" - app_key: "@SECRET(eywa-secrets, ambient-app-key)" - limit: -1 - # in minutes - feed_interval: 5 -entrypoint: - callback_builder: CallbackBuilder - file: download_ambient_weather.py -dependencies: - parameters: - - limit -description: - short_description: - Connects to the Ambient Weather REST API and requests weather data for the input time range - from stations within input geometry. \ No newline at end of file diff --git a/ops/download_bing_basemap/download_bing_basemap.py b/ops/download_bing_basemap/download_bing_basemap.py deleted file mode 100644 index ac90533f..00000000 --- a/ops/download_bing_basemap/download_bing_basemap.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -import os -from tempfile import TemporaryDirectory -from typing import Dict - -import rasterio -from rasterio.transform import from_bounds - -from vibe_core.data import AssetVibe, BBox, Raster, gen_guid -from vibe_core.data.products import BingMapsProduct -from vibe_lib.bing_maps import BingMapsCollection - - -def build_raster_asset(tile_path: str, tile_bbox: BBox, output_path: str): - """Build a GeoTIFF raster asset from a tile downloaded from BingMaps.""" - with rasterio.open(tile_path) as src: - img = src.read() - - transform = from_bounds(*tile_bbox, img.shape[2], img.shape[1]) - - with rasterio.open( - output_path, - "w", - driver="GTiff", - height=img.shape[1], - width=img.shape[2], - count=3, - dtype=img.dtype, - crs="EPSG:4326", - transform=transform, - ) as dst: - dst.write(img) - - -class CallbackBuilder: - def __init__(self, api_key: str): - if not api_key: - raise ValueError("BingMaps API key was not provided.") - - self.collection = BingMapsCollection(api_key) - self.tmp_dir = TemporaryDirectory() - - def download_basemap(self, product: BingMapsProduct) -> AssetVibe: - img_id = gen_guid() - tile_path = os.path.join(self.tmp_dir.name, f"{img_id}.jpeg") - raster_path = os.path.join(self.tmp_dir.name, f"{img_id}.tiff") - - try: - self.collection.download_tile(product.url, tile_path) - except (RuntimeError, ValueError) as e: - raise type(e)( - f"Failed to download tile {product.id} at zoom level {product.zoom_level}. {e}" - ) from e - - build_raster_asset(tile_path, product.bbox, raster_path) - asset = AssetVibe( - reference=raster_path, - type="image/tiff", - id=gen_guid(), - ) - return asset - - def __call__(self): - def download_bing_basemap( - input_product: BingMapsProduct, - ) -> Dict[str, Raster]: - asset = self.download_basemap(input_product) - - basemap = Raster.clone_from( - input_product, - id=hashlib.sha256(f"downloaded_basemap_{input_product.id}".encode()).hexdigest(), - assets=[asset], - bands={"red": 0, "green": 1, "blue": 2}, - ) - - return {"basemap": basemap} - - return download_bing_basemap - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_bing_basemap/download_bing_basemap.yaml b/ops/download_bing_basemap/download_bing_basemap.yaml deleted file mode 100644 index 3cc0e25f..00000000 --- a/ops/download_bing_basemap/download_bing_basemap.yaml +++ /dev/null @@ -1,22 +0,0 @@ -name: download_bing_basemap -inputs: - input_product: BingMapsProduct -output: - basemap: Raster -parameters: - api_key: -entrypoint: - file: download_bing_basemap.py - callback_builder: CallbackBuilder -dependencies: -description: - short_description: - Downloads a basemap tile represented by a BingMapsProduct using BingMapsAPI. - long_description: - The op will download a basemap tile and return it as a raster. - inputs: - input_product: Product with the tile metadata to be downloaded. - output: - basemap: Downloaded basemap as a raster. - parameters: - api_key: Bing Maps API key. Required to run the workflow. diff --git a/ops/download_bing_basemap/test_download_bing_basemap.py b/ops/download_bing_basemap/test_download_bing_basemap.py deleted file mode 100644 index 9c506b59..00000000 --- a/ops/download_bing_basemap/test_download_bing_basemap.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from unittest.mock import MagicMock, patch - -import numpy as np -from PIL import Image -from shapely.geometry import Polygon, mapping - -from vibe_core.data import Raster -from vibe_core.data.products import BingMapsProduct -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.bing_maps import BingMapsCollection - -FAKE_GEOMETRY = Polygon( - [ - (46.998848, -118.940490), - (46.998848, -118.876148), - (47.013422, -118.876148), - (47.013422, -118.940490), - ] -) -FAKE_TIME_RANGE = (datetime.now(), datetime.now()) - - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_bing_basemap.yaml") - - -def create_blank_jpeg(_: str, out_path: str): - data = np.zeros((100, 100, 3), dtype=np.uint8) - img = Image.fromarray(data) - img.save(out_path) - - -@patch.object( - BingMapsCollection, - "download_tile", - side_effect=create_blank_jpeg, -) -@patch.object( - BingMapsCollection, - "get_download_url_and_subdomains", - return_value=("fake_download_url_{subdomain}_{quadkey}_{api_key}", ["fake_subdomain"]), -) -def test_op(_: MagicMock, __: MagicMock): - input_product = BingMapsProduct( - id="fake_product", - time_range=FAKE_TIME_RANGE, - geometry=mapping(FAKE_GEOMETRY), # type: ignore - assets=[], - url="fake_url", - zoom_level=1, - imagery_set="Aerial", - map_layer="Basemap", - orientation=0.0, - ) - - op_tester = OpTester(CONFIG_PATH) - op_tester.update_parameters({"api_key": "fake_api_key"}) - output_data = op_tester.run(**{"input_product": input_product}) - - # Get op result - output_name = "basemap" - assert output_name in output_data - output_basemap = output_data[output_name] - assert isinstance(output_basemap, Raster) - assert len(output_basemap.assets) == 1 diff --git a/ops/download_cdl_data/download_cdl.py b/ops/download_cdl_data/download_cdl.py deleted file mode 100644 index 02d1e1da..00000000 --- a/ops/download_cdl_data/download_cdl.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict -from zipfile import ZipFile - -import numpy as np -import pandas as pd - -from vibe_core.data import AssetVibe, CategoricalRaster, gen_guid -from vibe_core.data.products import CDL_DOWNLOAD_URL, CDLProduct -from vibe_core.file_downloader import download_file -from vibe_lib.raster import ( - INT_COMPRESSION_KWARGS, - compress_raster, - json_to_asset, - step_cmap_from_colors, -) - - -def download_cdl_tif(cdl_product: CDLProduct, out_path: str) -> None: - """Download the CDL zip and decompress the .tif file and recompress it to out_path""" - cdl_year = cdl_product.time_range[0].year - - with TemporaryDirectory() as tmp: - zip_path = os.path.join(tmp, f"cdl_{cdl_year}.zip") - product_url = CDL_DOWNLOAD_URL.format(cdl_year) - download_file(product_url, zip_path) - - with ZipFile(zip_path) as zf: - zip_member = [f for f in zf.filelist if f.filename.endswith(".tif")][0] - # Trick to extract file without the whole directory tree - # https://stackoverflow.com/questions/4917284/ - zip_member.filename = os.path.basename(zip_member.filename) - file_path = zf.extract(zip_member, path=tmp) - compress_raster(file_path, out_path, **INT_COMPRESSION_KWARGS) - - -class CallbackBuilder: - MIN_CLASS_IDX: int = 0 - MAX_CLASS_IDX: int = 255 - - def __init__(self, metadata_path: str): - self.tmp_dir = TemporaryDirectory() - self.df = pd.read_excel(metadata_path, header=3, index_col=0).dropna(axis=1) - cmap = self.df[["Erdas_Red", "Erdas_Green", "Erdas_Blue"]].values.astype(float) - # Add alpha value - self.cmap = np.concatenate((cmap, cmap.sum(axis=1)[:, None] > 0), axis=1) - - def __call__(self): - def cdl_callback(input_product: CDLProduct) -> Dict[str, CategoricalRaster]: - """ - This op receives a CDLProduct (probably from list_cdl_products op) and - downloads the zipped CDL map. It decompress the .tif file from it and yields - a CategoricalRaster with references to that asset - """ - - out_id = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{out_id}.tif") - - download_cdl_tif(input_product, filepath) - - new_asset = AssetVibe(reference=filepath, type=mimetypes.types_map[".tif"], id=out_id) - - vis_dict: Dict[str, Any] = { - "bands": [0], - "colormap": step_cmap_from_colors( - self.cmap, range(self.MIN_CLASS_IDX + 1, self.MAX_CLASS_IDX + 1) - ), - "range": (self.MIN_CLASS_IDX, self.MAX_CLASS_IDX), - } - - raster = CategoricalRaster.clone_from( - input_product, - id=gen_guid(), - assets=[new_asset, json_to_asset(vis_dict, self.tmp_dir.name)], - bands={"categories": 0}, - categories=self.df["Class_Names"].tolist(), - ) - - return {"cdl_raster": raster} - - return cdl_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_cdl_data/download_cdl.yaml b/ops/download_cdl_data/download_cdl.yaml deleted file mode 100644 index f020cd8d..00000000 --- a/ops/download_cdl_data/download_cdl.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_cdl -inputs: - input_product: CDLProduct -output: - cdl_raster: CategoricalRaster -parameters: - metadata_path: /opt/terravibes/ops/resources/cdl_metadata/CDL_codes_names_colors.xls -entrypoint: - file: download_cdl.py - callback_builder: CallbackBuilder -description: - short_description: Downloads a CategoricalRaster from a CDLProduct. \ No newline at end of file diff --git a/ops/download_cdl_data/download_cdl_data.py b/ops/download_cdl_data/download_cdl_data.py deleted file mode 100644 index 1bf4c98f..00000000 --- a/ops/download_cdl_data/download_cdl_data.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -import xml.etree.ElementTree as ET -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Tuple, cast - -import geopandas as gpd -import numpy as np -import pandas as pd -import requests -import shapely.geometry as shpg -from rasterio.merge import merge -from shapely.geometry.base import BaseGeometry - -from vibe_core.data import AssetVibe, CategoricalRaster, DataVibe, gen_guid -from vibe_lib.raster import json_to_asset, step_cmap_from_colors - -SERVICE_URL = "https://nassgeodata.gmu.edu/axis2/services/CDLService/GetCDLFile" -CDL_CRS = "epsg:5070" -# Maximum area per request is 2M square km, 2e11 seems to work better -MAX_AREA = 1e11 - - -def download_file(url: str, out_path: str) -> None: - with requests.get(url, stream=True) as r: - r.raise_for_status() - with open(out_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - f.write(chunk) - - -def split_geometry(geom: BaseGeometry, max_area: float) -> List[BaseGeometry]: - if geom.area < max_area: - # Done - return [geom] - - # Split it! - x0, y0, x1, y1 = cast(Tuple[int, int, int, int], geom.bounds) - if (x1 - x0) > (y1 - y0): - # Split along width - b1 = shpg.box(x0, y0, (x0 + x1) / 2, y1) - b2 = shpg.box((x0 + x1) / 2, y0, x1, y1) - else: - # Split along height - b1 = shpg.box(x0, y0, x1, (y0 + y1) / 2) - b2 = shpg.box(x0, (y0 + y1) / 2, x1, y1) - return split_geometry(b1, max_area) + split_geometry(b2, max_area) - - -def get_cdl_url(geom: BaseGeometry, dt: datetime) -> str: - formatted_bbox = ",".join([f"{b:.1f}" for b in geom.bounds]) - payload = {"year": str(dt.year), "bbox": formatted_bbox} - r = requests.get(SERVICE_URL, params=payload) - r.raise_for_status() - e = ET.fromstring(r.text) - tif_url = list(e)[0].text - if tif_url is None: - raise ValueError(f"URL is missing from response {r.text}") - return tif_url - - -def save_cdl_tif(geom: BaseGeometry, dt: datetime, out_path: str) -> None: - split_geoms = [g for g in split_geometry(geom, MAX_AREA) if g.intersects(geom)] - with TemporaryDirectory() as tmp: - split_paths = [os.path.join(tmp, f"{i}.tif") for i in range(len(split_geoms))] - for g, p in zip(split_geoms, split_paths): - tif_url = get_cdl_url(g, dt) - download_file(tif_url, p) - if len(split_geoms) > 1: - # Merge all parts into a single tiff - merge(split_paths, bounds=geom.bounds, dst_path=out_path) - else: - os.rename(split_paths[0], out_path) - - -class CallbackBuilder: - MIN_CLASS_IDX: int = 0 - MAX_CLASS_IDX: int = 255 - - def __init__(self, metadata_url: str): - self.tmp_dir = TemporaryDirectory() - self.df = pd.read_excel(metadata_url, header=3, index_col=0).dropna(axis=1) - cmap = self.df[["Erdas_Red", "Erdas_Green", "Erdas_Blue"]].values.astype(float) - # Add alpha value - self.cmap = np.concatenate((cmap, cmap.sum(axis=1)[:, None] > 0), axis=1) - - def __call__(self): - def cdl_callback(input_data: DataVibe) -> CategoricalRaster: - proj_geom: BaseGeometry = ( - gpd.GeoSeries(shpg.shape(input_data.geometry), crs="epsg:4326") - .to_crs(CDL_CRS) - .iloc[0] - ) - # We are taking the year in the middle point of the time range for now - dt = datetime.fromtimestamp(sum(d.timestamp() for d in input_data.time_range) / 2) - out_id = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{out_id}.tif") - save_cdl_tif(proj_geom, dt, filepath) - new_asset = AssetVibe(reference=filepath, type=mimetypes.types_map[".tif"], id=out_id) - - vis_dict: Dict[str, Any] = { - "bands": [0], - "colormap": step_cmap_from_colors( - self.cmap, range(self.MIN_CLASS_IDX + 1, self.MAX_CLASS_IDX + 1) - ), - "range": (self.MIN_CLASS_IDX, self.MAX_CLASS_IDX), - } - - raster = CategoricalRaster.clone_from( - input_data, - id=gen_guid(), - assets=[new_asset, json_to_asset(vis_dict, self.tmp_dir.name)], - bands={"categories": 0}, - categories=self.df["Class_Names"].tolist(), - ) - - return raster - - def cdl_callback_list(input_data: List[DataVibe]) -> Dict[str, List[CategoricalRaster]]: - return {"cdl_rasters": [cdl_callback(input_datum) for input_datum in input_data]} - - return cdl_callback_list - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_cdl_data/download_cdl_data.yaml b/ops/download_cdl_data/download_cdl_data.yaml deleted file mode 100644 index e9baebe8..00000000 --- a/ops/download_cdl_data/download_cdl_data.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: download_cdl_data -inputs: - input_items: List[DataVibe] -output: - cdl_rasters: List[CategoricalRaster] -parameters: - metadata_url: https://www.nass.usda.gov/Research_and_Science/Cropland/docs/CDL_codes_names_colors.xls -entrypoint: - file: download_cdl_data.py - callback_builder: CallbackBuilder diff --git a/ops/download_chirps/download_chirps.py b/ops/download_chirps/download_chirps.py deleted file mode 100644 index 37369df0..00000000 --- a/ops/download_chirps/download_chirps.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -import re -from tempfile import TemporaryDirectory -from typing import Dict, Optional - -from vibe_core.data import AssetVibe, gen_hash_id -from vibe_core.data.core_types import gen_guid -from vibe_core.data.products import ChirpsProduct -from vibe_core.file_downloader import download_file - -LOGGER = logging.getLogger(__name__) - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def download_product( - chirps_product: ChirpsProduct, - ) -> Dict[str, Optional[ChirpsProduct]]: - fname = re.search("chirps-.*cog", chirps_product.url) - if fname is not None: - fname = fname.group() - else: - raise ValueError(f"URL for chirps product has no COG. url: {chirps_product.url}") - fpath = os.path.join(self.tmp_dir.name, fname) - download_file(chirps_product.url, fpath) - - asset = AssetVibe(reference=fpath, type="image/tiff", id=gen_guid()) - - downloaded_product = ChirpsProduct.clone_from( - chirps_product, - id=gen_hash_id(fname, chirps_product.geometry, chirps_product.time_range), - assets=[asset], - ) - - return {"downloaded_product": downloaded_product} - - return download_product - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_chirps/download_chirps.yaml b/ops/download_chirps/download_chirps.yaml deleted file mode 100644 index ef1bd9b0..00000000 --- a/ops/download_chirps/download_chirps.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: download_chirps -inputs: - chirps_product: ChirpsProduct -output: - downloaded_product: ChirpsProduct -parameters: -entrypoint: - file: download_chirps.py - callback_builder: CallbackBuilder -description: - short_description: Downloads accumulated precipitation data from listed products. \ No newline at end of file diff --git a/ops/download_climatology_lab/download_climatology_lab.py b/ops/download_climatology_lab/download_climatology_lab.py deleted file mode 100644 index 6bfcf227..00000000 --- a/ops/download_climatology_lab/download_climatology_lab.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Dict - -from vibe_core.data import AssetVibe, gen_guid, gen_hash_id -from vibe_core.data.products import ClimatologyLabProduct -from vibe_core.file_downloader import download_file - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def download_climatology_lab( - input_product: ClimatologyLabProduct, - ) -> Dict[str, ClimatologyLabProduct]: - asset_id = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{asset_id}.nc") - download_file(input_product.url, filepath) - new_asset = AssetVibe(reference=filepath, type=mimetypes.types_map[".nc"], id=asset_id) - - product = ClimatologyLabProduct.clone_from( - input_product, - id=gen_hash_id( - f"{input_product.id}_downloaded", - input_product.geometry, - input_product.time_range, - ), - assets=[new_asset], - ) - - return {"downloaded_product": product} - - return download_climatology_lab - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_climatology_lab/download_climatology_lab.yaml b/ops/download_climatology_lab/download_climatology_lab.yaml deleted file mode 100644 index 161df8a5..00000000 --- a/ops/download_climatology_lab/download_climatology_lab.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: download_climatology_lab -inputs: - input_product: ClimatologyLabProduct -output: - downloaded_product: ClimatologyLabProduct -parameters: -entrypoint: - file: download_climatology_lab.py - callback_builder: CallbackBuilder -description: - short_description: - Downloads Climatology Lab weather products (TerraClimate and GridMET) defined by the input product. - inputs: - input_product: Input Climatology Lab product. - output: - downloaded_product: Downloaded product with desired variable. diff --git a/ops/download_climatology_lab/test_download_climatology_lab.py b/ops/download_climatology_lab/test_download_climatology_lab.py deleted file mode 100644 index 6d8b9a02..00000000 --- a/ops/download_climatology_lab/test_download_climatology_lab.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from unittest.mock import MagicMock, patch - -from shapely.geometry import Point, mapping - -from vibe_core.data import ClimatologyLabProduct -from vibe_dev.testing.op_tester import OpTester - -FAKE_GEOMETRY = Point(-92.99900, 42.03580).buffer(0.1, cap_style=3) -FAKE_TIME_RANGE = ( - datetime(year=2019, month=1, day=1, tzinfo=timezone.utc), - datetime(year=2019, month=12, day=31, tzinfo=timezone.utc), -) - -CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "download_climatology_lab.yaml" -) - - -@patch("vibe_core.file_downloader.download_file") -def test_op(_: MagicMock): - input_product = ClimatologyLabProduct( - id="fake_product", - time_range=FAKE_TIME_RANGE, - geometry=mapping(FAKE_GEOMETRY), # type: ignore - assets=[], - url="fake_href", - variable="fake_variable", - ) - - op_tester = OpTester(CONFIG_PATH) - output_data = op_tester.run(**{"input_product": input_product}) - - # Get op result - output_name = "downloaded_product" - assert output_name in output_data - output_raster = output_data[output_name] - assert isinstance(output_raster, ClimatologyLabProduct) - assert len(output_raster.assets) == 1 diff --git a/ops/download_dem/download_dem.py b/ops/download_dem/download_dem.py deleted file mode 100644 index e37990e0..00000000 --- a/ops/download_dem/download_dem.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, cast - -import planetary_computer as pc - -from vibe_core.data import AssetVibe, DemProduct, DemRaster, gen_guid, gen_hash_id -from vibe_lib.planetary_computer import validate_dem_provider -from vibe_lib.raster import RGBA, interpolated_cmap_from_colors, json_to_asset - -ELEVATION_CMAP_INTERVALS: List[float] = [0.0, 4000.0] - -ELEVATION_CMAP_COLORS: List[RGBA] = [ - RGBA(0, 0, 0, 255), - RGBA(255, 255, 255, 255), -] - - -class CallbackBuilder: - def __init__(self, api_key: str): - self.tmp_dir = TemporaryDirectory() - self.api_key = api_key - - def __call__(self): - def op(input_product: DemProduct) -> Dict[str, DemRaster]: - pc.set_subscription_key(self.api_key) - collection = validate_dem_provider( - input_product.provider.upper(), input_product.resolution - ) - item = collection.query_by_id(input_product.tile_id) - assets = collection.download_item( - item, os.path.join(self.tmp_dir.name, input_product.id) - ) - assets = [ - AssetVibe(reference=a, type=cast(str, mimetypes.guess_type(a)[0]), id=gen_guid()) - for a in assets - ] - vis_dict: Dict[str, Any] = { - "bands": [0], - "colormap": interpolated_cmap_from_colors( - ELEVATION_CMAP_COLORS, ELEVATION_CMAP_INTERVALS - ), - "range": (0, 4000), - } - assets.append(json_to_asset(vis_dict, self.tmp_dir.name)) - - downloaded_product = DemRaster( - id=gen_hash_id( - f"{input_product.id}_download_dem_product", - input_product.geometry, - input_product.time_range, - ), - time_range=input_product.time_range, - geometry=input_product.geometry, - assets=assets, - bands={"elevation": 0}, - tile_id=input_product.tile_id, - resolution=input_product.resolution, - provider=input_product.provider, - ) - - return {"downloaded_product": downloaded_product} - - return op - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_dem/download_dem.yaml b/ops/download_dem/download_dem.yaml deleted file mode 100644 index ee9378c8..00000000 --- a/ops/download_dem/download_dem.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_dem -inputs: - input_product: DemProduct -output: - downloaded_product: DemRaster -parameters: - api_key: "" -entrypoint: - file: download_dem.py - callback_builder: CallbackBuilder -description: - short_description: Downloads digital elevation map raster given a DemProduct. \ No newline at end of file diff --git a/ops/download_dem/test_download_dem.py b/ops/download_dem/test_download_dem.py deleted file mode 100644 index 6663847c..00000000 --- a/ops/download_dem/test_download_dem.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from unittest.mock import MagicMock, patch - -from shapely.geometry import Polygon, box, mapping - -from vibe_core.data import DemProduct -from vibe_core.data.rasters import DemRaster -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import USGS3DEPCollection - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_dem.yaml") - - -@patch( - "vibe_lib.planetary_computer.get_available_collections", - return_value=[USGS3DEPCollection.collection], -) -@patch.object(USGS3DEPCollection, "query_by_id") -@patch( - "vibe_lib.planetary_computer.USGS3DEPCollection.download_item", return_value=["/tmp/test.tif"] -) -def test_op(_: MagicMock, __: MagicMock, ___: MagicMock): - latitude = 44.0005556 - longitude = -97.0005556 - buffer = 0.1 - bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] - polygon: Polygon = box(*bbox, ccw=True) - start_date = datetime(year=2021, month=2, day=1, tzinfo=timezone.utc) - end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) - - output = DemProduct( - id=str("n44w098-13"), - time_range=( - start_date, - end_date, - ), - geometry=mapping(polygon), - assets=[], - tile_id=str("n44w098-13"), - resolution=10, - provider=str("USGS3DEP"), - ) - - output_data = OpTester(CONFIG_PATH).run(input_product=output) - - # Get op result - output_name = "downloaded_product" - assert output_name in output_data - output_product = output_data[output_name] - assert isinstance(output_product, DemRaster) diff --git a/ops/download_era5/download_era5.py b/ops/download_era5/download_era5.py deleted file mode 100644 index 75d46395..00000000 --- a/ops/download_era5/download_era5.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Dict, Optional, cast - -import cdsapi -import fsspec -import planetary_computer as pc -import xarray as xr - -from vibe_core.data import AssetVibe, Era5Product, gen_guid, gen_hash_id -from vibe_lib.planetary_computer import Era5Collection - -LOGGER = logging.getLogger(__name__) - - -class CallbackBuilder: - def __init__(self, api_key: str): - self.tmp_dir = TemporaryDirectory() - self.api_key = api_key - - def __call__(self): - def download_product( - era5_product: Era5Product, - ) -> Dict[str, Optional[Era5Product]]: - if era5_product.item_id != "": - pc.set_subscription_key(self.api_key) - collection = Era5Collection() - item = collection.query_by_id(era5_product.item_id) - - # Only downloading the asset corresponding to the requested variable. - # In addition, the requested asset is a zarr, which is a directory structure, - # so it not possible to use download_asset. - signed_item = pc.sign(item) - asset = signed_item.assets[era5_product.var] - ds = xr.open_dataset(asset.href, **asset.extra_fields["xarray:open_kwargs"]) - else: - if self.api_key == "": - raise ValueError( - "api_key not supplied for CDS (registration " - "in https://cds.climate.copernicus.eu/user/register)" - ) - if len(era5_product.cds_request) != 1: - raise ValueError(f"Invalid number of CDS requests {era5_product.cds_request}") - dataset, request = next((k, v) for k, v in era5_product.cds_request.items()) - c = cdsapi.Client(url="https://cds.climate.copernicus.eu/api/v2", key=self.api_key) - r = c.retrieve(dataset, request) - if r is None: - raise ValueError(f"CDS request {era5_product.cds_request} returned None") - with fsspec.open(r.location) as f: - ds = xr.open_dataset(f, engine="scipy") # type: ignore - - path = os.path.join(self.tmp_dir.name, f"{era5_product.id}.nc") - ds.to_netcdf(path) - vibe_asset = AssetVibe( - reference=path, type=cast(str, mimetypes.guess_type(path)[0]), id=gen_guid() - ) - downloaded_product = Era5Product.clone_from( - era5_product, - id=gen_hash_id( - f"{era5_product.id}_downloaded", era5_product.geometry, era5_product.time_range - ), - assets=[vibe_asset], - ) - - return {"downloaded_product": downloaded_product} - - return download_product - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_era5/download_era5.yaml b/ops/download_era5/download_era5.yaml deleted file mode 100644 index ef72a78a..00000000 --- a/ops/download_era5/download_era5.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_era5 -inputs: - era5_product: Era5Product -output: - downloaded_product: Era5Product -parameters: - api_key: "" -entrypoint: - file: download_era5.py - callback_builder: CallbackBuilder -description: - short_description: Downloads requested property from ERA5 products. \ No newline at end of file diff --git a/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py b/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py deleted file mode 100644 index 29f958bd..00000000 --- a/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Dict, cast - -import planetary_computer as pc - -from vibe_core.data import AssetVibe, CategoricalRaster, gen_guid, gen_hash_id -from vibe_core.data.products import EsriLandUseLandCoverProduct -from vibe_lib.planetary_computer import EsriLandUseLandCoverCollection -from vibe_lib.raster import json_to_asset - - -class CallbackBuilder: - def __init__(self, api_key: str): - self.tmp_dir = TemporaryDirectory() - self.api_key = api_key - - def __call__(self): - def op(input_product: EsriLandUseLandCoverProduct) -> Dict[str, CategoricalRaster]: - pc.set_subscription_key(self.api_key) - collection = EsriLandUseLandCoverCollection() - item = collection.query_by_id(input_product.id) - assets = collection.download_item( - item, os.path.join(self.tmp_dir.name, input_product.id) - ) - vibe_assets = [ - AssetVibe(reference=a, type=cast(str, mimetypes.guess_type(a)[0]), id=gen_guid()) - for a in assets - ] - vis_asset = json_to_asset({"bands": list(range(1))}, self.tmp_dir.name) - vibe_assets.append(vis_asset) - downloaded_product = CategoricalRaster( - id=gen_hash_id( - f"{input_product.id}_download_esri_landuse_landcover_product", - input_product.geometry, - input_product.time_range, - ), - time_range=input_product.time_range, - geometry=input_product.geometry, - assets=vibe_assets, - bands={"data": 0}, - categories=EsriLandUseLandCoverCollection.categories, - ) - - return {"downloaded_product": downloaded_product} - - return op - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.yaml b/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.yaml deleted file mode 100644 index d7439c1f..00000000 --- a/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_esri_landuse_landcover -inputs: - input_product: EsriLandUseLandCoverProduct -output: - downloaded_product: CategoricalRaster -parameters: - api_key: "" -entrypoint: - file: download_esri_landuse_landcover.py - callback_builder: CallbackBuilder -description: - short_description: Downloads ESRI 10m Land Use/Land Cover (9-class) raster from EsriLandUseLandCoverProduct. \ No newline at end of file diff --git a/ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py b/ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py deleted file mode 100644 index 9068a0cf..00000000 --- a/ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from unittest.mock import MagicMock, patch - -from shapely.geometry import Polygon, box, mapping - -from vibe_core.data import CategoricalRaster -from vibe_core.data.core_types import DataVibe -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import EsriLandUseLandCoverCollection - -CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "download_esri_landuse_landcover.yaml" -) - - -@patch( - "vibe_lib.planetary_computer.get_available_collections", - return_value=[EsriLandUseLandCoverCollection.collection], -) -@patch.object(EsriLandUseLandCoverCollection, "query_by_id") -@patch.object( - EsriLandUseLandCoverCollection, - "download_item", - return_value=["/tmp/test_esri_landuse_landcover.tif"], -) -def test_op(_: MagicMock, __: MagicMock, ___: MagicMock): - latitude = 42.21422 - longitude = -93.22890 - buffer = 0.001 - bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] - polygon: Polygon = box(*bbox, ccw=True) - start_date = datetime(year=2017, month=1, day=1, tzinfo=timezone.utc) - end_date = datetime(year=2018, month=1, day=1, tzinfo=timezone.utc) - - input: DataVibe = DataVibe( - id=str("47P-2017"), - time_range=( - start_date, - end_date, - ), - geometry=mapping(polygon), # type: ignore - assets=[], - ) - - output_data = OpTester(CONFIG_PATH).run(**{"input_product": input}) - - # Get op result - output_name = "downloaded_product" - assert output_name in output_data - output_product = output_data[output_name] - assert isinstance(output_product, CategoricalRaster) diff --git a/ops/download_from_ref/download_from_ref.py b/ops/download_from_ref/download_from_ref.py deleted file mode 100644 index d63cae43..00000000 --- a/ops/download_from_ref/download_from_ref.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -import mimetypes -import os -import pathlib -import shutil -from dataclasses import fields -from tempfile import TemporaryDirectory -from typing import Any, Dict, Type, cast, get_origin - -from vibe_core.data import ( - AssetVibe, - DataVibe, - ExternalReference, - data_registry, - gen_hash_id, -) -from vibe_core.file_downloader import download_file -from vibe_core.uri import is_local, local_uri_to_path, uri_to_filename - -CHUNK_SIZE_BYTES = 1024 * 1024 - - -def hash_file(filepath: str, chunk_size: int = CHUNK_SIZE_BYTES) -> str: - h = hashlib.sha256() - with open(filepath, "rb") as f: - while True: - b = f.read(chunk_size) - if not b: - break - h.update(b) - return h.hexdigest() - - -def get_empty_type(t: Any): - o = get_origin(t) - if o is not None: - return o() - return t() - - -def get_empty_fields(data_type: Type[DataVibe]) -> Dict[str, Any]: - base_fields = [f for f in fields(DataVibe) if f.init] - init_fields = [f for f in fields(data_type) if f.init and f not in base_fields] - return {f.name: get_empty_type(f.type) for f in init_fields} - - -def add_mime_type(extension: str): - if extension == ".geojson": - mimetypes.add_type("application/json", ".geojson") - - -class CallbackBuilder: - def __init__(self, out_type: str): - self.tmp_dir = TemporaryDirectory() - self.out_type = cast(Type[DataVibe], data_registry.retrieve(out_type)) - - def __call__(self): - def callback(input_ref: ExternalReference) -> Dict[str, DataVibe]: - # Download the file - out_path = os.path.join(self.tmp_dir.name, uri_to_filename(input_ref.url)) - if is_local(input_ref.url): - shutil.copy(local_uri_to_path(input_ref.url), out_path) - else: - download_file(input_ref.url, out_path) - - file_extension = pathlib.Path(out_path).suffix - if file_extension not in mimetypes.types_map.keys(): - add_mime_type(file_extension) - - # Create asset and Raster - asset_id = hash_file(out_path) - asset = AssetVibe( - reference=out_path, type=mimetypes.guess_type(out_path)[0], id=asset_id - ) - out = self.out_type.clone_from( - input_ref, - id=gen_hash_id(asset_id, input_ref.geometry, input_ref.time_range), - assets=[asset], - **get_empty_fields(self.out_type), - ) - return {"downloaded": out} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_from_ref/download_geometry_from_ref.yaml b/ops/download_from_ref/download_geometry_from_ref.yaml deleted file mode 100644 index 019e7c1a..00000000 --- a/ops/download_from_ref/download_geometry_from_ref.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_geometry_from_ref -inputs: - input_ref: ExternalReference -output: - downloaded: GeometryCollection -parameters: - out_type: GeometryCollection -entrypoint: - file: download_from_ref.py - callback_builder: CallbackBuilder -description: - short_description: Downloads geometries provided in the reference and generates a GeometryCollection. \ No newline at end of file diff --git a/ops/download_from_ref/download_raster_from_ref.yaml b/ops/download_from_ref/download_raster_from_ref.yaml deleted file mode 100644 index 1c08000b..00000000 --- a/ops/download_from_ref/download_raster_from_ref.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_raster_from_ref -inputs: - input_ref: ExternalReference -output: - downloaded: Raster -parameters: - out_type: Raster -entrypoint: - file: download_from_ref.py - callback_builder: CallbackBuilder -description: - short_description: Downloads the raster from the input reference's url. \ No newline at end of file diff --git a/ops/download_from_smb/download_rasters_from_smb.py b/ops/download_from_smb/download_rasters_from_smb.py deleted file mode 100644 index 12ede0b0..00000000 --- a/ops/download_from_smb/download_rasters_from_smb.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -from pathlib import Path -from tempfile import TemporaryDirectory -from typing import Dict, List - -from smb.SMBConnection import SMBConnection - -from vibe_core.data import AssetVibe, DataVibe, Raster, gen_guid, gen_hash_id - - -def download_all_files( - server_name: str, - server_ip: str, - server_port: int, - username: str, - password: str, - share_name: str, - directory_path: str, - output_dir: Path, -) -> List[AssetVibe]: - """Download all files under directory_path on the SMB share and return a list of AssetVibes.""" - # Establish a connection with the server - conn = SMBConnection( - username, - password, - "FarmVibes_SMB_Downloader", - server_name, - use_ntlm_v2=True, - is_direct_tcp=True, - ) - conn.connect(server_ip, server_port) - - # Collect all files in the directory as assets - asset_list = [] - attributes = conn.getAttributes(share_name, directory_path) - - # Convert path to unix style - directory_path = directory_path.replace("\\", "/") - path = Path(directory_path) - if attributes.isDirectory: - crawl_directory(conn, share_name, path, asset_list, output_dir) - else: - download_asset(conn, share_name, path, asset_list, output_dir) - return asset_list - - -def download_asset( - conn: SMBConnection, - share_name: str, - filepath: Path, - asset_list: List[AssetVibe], - output_dir: Path, -): - # Compute the output path - if filepath.is_absolute(): - filepath = filepath.relative_to("/") - output_path = output_dir.joinpath(filepath) - output_path.parent.mkdir(parents=True, exist_ok=True) - - # Create an Asset type from the file - with open(output_path, "wb") as asset_file: - conn.retrieveFile(share_name, str(filepath), asset_file) - asset = AssetVibe( - reference=asset_file.name, - type=mimetypes.guess_type(asset_file.name)[0], - id=gen_guid(), - ) - asset_list.append(asset) - - -def crawl_directory( - conn: SMBConnection, - share_name: str, - dir_path: Path, - asset_list: List[AssetVibe], - output_dir: Path, -): - """Recursively search through the file system starting at directory - and download all files.""" - files = conn.listPath(share_name, str(dir_path)) - for file in files: - if file.filename not in [".", ".."]: - filepath = dir_path.joinpath(file.filename) - if file.isDirectory: - # Open subfolder - crawl_directory(conn, share_name, filepath, asset_list, output_dir) - else: - # Download the file if it is an image - mimetype = mimetypes.guess_type(str(filepath))[0] - if mimetype and mimetype.startswith("image"): - download_asset(conn, share_name, filepath, asset_list, output_dir) - - -class CallbackBuilder: - def __init__( - self, - server_name: str, - server_ip: str, - server_port: int, - username: str, - password: str, - share_name: str, - directory_path: str, - bands: List[str], - ): - self.server_name = server_name - self.server_ip = server_ip - self.server_port = server_port - self.username = username - self.password = password - self.share_name = share_name - self.directory_path = directory_path - self.bands = bands - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def download(user_input: DataVibe) -> Dict[str, List[Raster]]: - raster_assets = download_all_files( - self.server_name, - self.server_ip, - self.server_port, - self.username, - self.password, - self.share_name, - self.directory_path, - Path(self.tmp_dir.name), - ) - bands = {name: index for index, name in enumerate(self.bands)} - return { - "rasters": [ - Raster.clone_from( - user_input, - id=gen_hash_id(asset.id, user_input.geometry, user_input.time_range), - assets=[asset], - bands=bands, - ) - for asset in raster_assets - ] - } - - return download - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_from_smb/download_rasters_from_smb.yaml b/ops/download_from_smb/download_rasters_from_smb.yaml deleted file mode 100644 index 6062db1d..00000000 --- a/ops/download_from_smb/download_rasters_from_smb.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: download_rasters_from_smb -inputs: - user_input: DataVibe -output: - rasters: List[Raster] -parameters: - server_name: - server_ip: "@SECRET(eywa-secrets, smb-server-ip)" - server_port: 445 - username: "@SECRET(eywa-secrets, smb-username)" - password: "@SECRET(eywa-secrets, smb-password)" - share_name: - directory_path: "/" - bands: ["red", "green", "blue"] -entrypoint: - file: download_rasters_from_smb.py - callback_builder: CallbackBuilder -dependecies: - parameters: - - server_name - - share_name -description: - short_description: - Downloads rasters from an SMB share. - parameters: - server_name: The name of the SMB server - server_ip: The IP address of the SMB server - server_port: The port to connect to on the SMB server - username: Username used to connect to server - password: Password to access server - share_name: Name of file share - directory_path: Path to directory containing rasters - bands: Ordered list of bands within the rasters diff --git a/ops/download_gedi_product/download_gedi_product.py b/ops/download_gedi_product/download_gedi_product.py deleted file mode 100644 index 6e8eead8..00000000 --- a/ops/download_gedi_product/download_gedi_product.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -from tempfile import TemporaryDirectory -from typing import Dict - -from vibe_core.data import AssetVibe, GEDIProduct, gen_guid -from vibe_core.file_downloader import download_file -from vibe_lib.earthdata import EarthDataAPI - -LOGGER = logging.getLogger(__name__) - - -class CallbackBuilder: - def __init__(self, token: str): - self.token = token - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def callback(gedi_product: GEDIProduct) -> Dict[str, GEDIProduct]: - api = EarthDataAPI(gedi_product.processing_level) - LOGGER.info(f"Querying EarthData API for product {gedi_product.product_name}") - items = api.query(id=gedi_product.product_name) - if len(items) != 1: - raise RuntimeError( - f"Query for GEDI product {gedi_product.product_name} " - "returned {len(items)} items, expected one item" - ) - url = items[0]["links"][0]["href"] - asset_guid = gen_guid() - out_path = os.path.join(self.tmp_dir.name, f"{asset_guid}") - h5_path = f"{out_path}.h5" - headers = {"Authorization": f"Bearer {self.token}", "Content-Type": "application/json"} - LOGGER.info(f"Downloading data from {url}") - download_file(url, h5_path, headers=headers) - asset = AssetVibe(reference=h5_path, type="application/x-hdf5", id=asset_guid) - dl_product = GEDIProduct.clone_from(gedi_product, id=gen_guid(), assets=[asset]) - return {"downloaded_product": dl_product} - - return callback diff --git a/ops/download_gedi_product/download_gedi_product.yaml b/ops/download_gedi_product/download_gedi_product.yaml deleted file mode 100644 index ffe893f8..00000000 --- a/ops/download_gedi_product/download_gedi_product.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_gedi_product -inputs: - gedi_product: GEDIProduct -output: - downloaded_product: GEDIProduct -parameters: - token: "@SECRET(eywa-secrets, earthdata-token)" -entrypoint: - file: download_gedi_product.py - callback_builder: CallbackBuilder -description: - short_description: Downloads GEDI products. \ No newline at end of file diff --git a/ops/download_gedi_product/test_download_gedi_product.py b/ops/download_gedi_product/test_download_gedi_product.py deleted file mode 100644 index 73a371bc..00000000 --- a/ops/download_gedi_product/test_download_gedi_product.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from typing import Any, cast -from unittest.mock import Mock, patch - -import h5py -import numpy as np -from shapely import geometry as shpg - -from vibe_core import file_downloader -from vibe_core.data import GEDIProduct -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.earthdata import EarthDataAPI - -HERE = os.path.dirname(os.path.abspath(__file__)) -CONFIG_PATH = os.path.join(HERE, "download_gedi_product.yaml") - -NUM_POINTS = 10 -BEAMS = [ - "BEAM0000", - "BEAM0001", - "BEAM0010", - "BEAM0011", - "BEAM0101", - "BEAM0110", - "BEAM1000", - "BEAM1011", -] -L2B = "GEDI02_B.002" - - -def fake_download(_: str, h5_path: str, **kwargs: Any): - beam_value = 0 - with h5py.File(h5_path, "w") as f: - for b in BEAMS: - beam_value = int(b.replace("BEAM", ""), 2) - f.create_dataset(f"{b}/geolocation/lon_lowestmode", data=np.arange(NUM_POINTS)) - f.create_dataset( - f"{b}/geolocation/lat_lowestmode", data=np.arange(NUM_POINTS) + NUM_POINTS - ) - f.create_dataset(f"{b}/beam", data=beam_value * np.ones(NUM_POINTS)) - f.create_dataset(f"{b}/rh100", data=np.linspace(0, 1, NUM_POINTS) + beam_value) - - -@patch.object(file_downloader, "download_file") -@patch.object(EarthDataAPI, "query") -def test_op(query: Mock, download: Mock): - query.return_value = [{"links": [{"href": "mock_link"}]}] - download.side_effect = fake_download - now = datetime.now() - geom = shpg.box(0, 0, 1, 1) - x = GEDIProduct( - id="1", - time_range=(now, now), - geometry=shpg.mapping(geom), - assets=[], - product_name="fake_product", - start_orbit=0, - stop_orbit=0, - processing_level="whatever", - ) - op_tester = OpTester(CONFIG_PATH) - test_token = "test-token" - op_tester.update_parameters({"token": test_token}) - out = op_tester.run(gedi_product=x) - query.assert_called_once_with(id=x.product_name) - download.assert_called_once() - # Make sure we used the token - assert download.call_args.kwargs["headers"]["Authorization"] == f"Bearer {test_token}" - assert "downloaded_product" in out - dl_product = cast(GEDIProduct, out["downloaded_product"]) - assert dl_product.geometry == x.geometry - assert dl_product.time_range == x.time_range diff --git a/ops/download_glad_data/download_glad.py b/ops/download_glad_data/download_glad.py deleted file mode 100644 index e567d445..00000000 --- a/ops/download_glad_data/download_glad.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Dict - -from vibe_core.data import AssetVibe, CategoricalRaster, gen_hash_id -from vibe_core.data.core_types import gen_guid -from vibe_core.data.products import GLADProduct -from vibe_core.file_downloader import download_file - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def download_product(glad_product: GLADProduct) -> Dict[str, CategoricalRaster]: - fname = f"{glad_product.tile_name}_{glad_product.time_range[0].year}.tif" - fpath = os.path.join(self.tmp_dir.name, fname) - download_file(glad_product.url, fpath) - - asset = AssetVibe(reference=fpath, type=mimetypes.types_map[".tif"], id=gen_guid()) - - downloaded_product = CategoricalRaster.clone_from( - glad_product, - id=gen_hash_id(fname, glad_product.geometry, glad_product.time_range), - assets=[asset], - bands={"forest_extent": 0}, - categories=["Non-Forest", "Forest"], - ) - - return {"downloaded_product": downloaded_product} - - return download_product - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_glad_data/download_glad.yaml b/ops/download_glad_data/download_glad.yaml deleted file mode 100644 index 502c90bc..00000000 --- a/ops/download_glad_data/download_glad.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: download_glad -inputs: - glad_product: GLADProduct -output: - downloaded_product: Raster -parameters: -entrypoint: - file: download_glad.py - callback_builder: CallbackBuilder -description: - short_description: Downloads a GLADProduct \ No newline at end of file diff --git a/ops/download_glad_data/test_download_glad_product.py b/ops/download_glad_data/test_download_glad_product.py deleted file mode 100644 index dcb6211a..00000000 --- a/ops/download_glad_data/test_download_glad_product.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from typing import cast -from unittest.mock import Mock, patch - -import pytest -from shapely import geometry as shpg - -from vibe_core import file_downloader -from vibe_core.data import CategoricalRaster, GLADProduct -from vibe_dev.testing.op_tester import OpTester - - -@pytest.fixture -def glad_product(): - return GLADProduct( - id="test_id", - geometry=shpg.mapping(shpg.box(-115.0, 45.0, -105.0, 55.0)), - time_range=(datetime(2020, 1, 1), datetime(2020, 1, 2)), - url="https://test.com/test.tif", - assets=[], - ) - - -@patch.object(file_downloader, "download_file") -def test_download_glad_product(download: Mock, glad_product: GLADProduct): - config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_glad.yaml") - - op_tester = OpTester(config_path) - out = op_tester.run(glad_product=glad_product) - assert out - assert "downloaded_product" in out - downloaded_product: CategoricalRaster = cast(CategoricalRaster, out["downloaded_product"]) - assert len(downloaded_product.assets) > 0 - asset = downloaded_product.assets[0] - assert asset.path_or_url.endswith( - f"{glad_product.tile_name}_{glad_product.time_range[0].year}.tif" - ) diff --git a/ops/download_gnatsgo/download_gnatsgo.yaml b/ops/download_gnatsgo/download_gnatsgo.yaml deleted file mode 100644 index 3c8b95a0..00000000 --- a/ops/download_gnatsgo/download_gnatsgo.yaml +++ /dev/null @@ -1,60 +0,0 @@ -name: download_gnatsgo -inputs: - gnatsgo_product: GNATSGOProduct -output: - downloaded_raster: GNATSGORaster -parameters: - api_key: "" - variable: -dependencies: - parameters: - - variable -entrypoint: - file: download_gnatsgo_raster.py - callback_builder: CallbackBuilder -description: - short_description: Downloads the raster asset for 'variable' given a GNATSGO product. - parameters: - api_key: Optional Planetary Computer API key. - variable: >- - Options are: - aws{DEPTH} - Available water storage estimate (AWS) for the DEPTH zone. - soc{DEPTH} - Soil organic carbon stock estimate (SOC) for the DEPTH zone. - tk{DEPTH}a - Thickness of soil components used in the DEPTH zone for the AWS calculation. - tk{DEPTH}s - Thickness of soil components used in the DEPTH zone for the SOC calculation. - mukey - Map unit key, a unique identifier of a record for matching with gNATSGO tables. - droughty - Drought vulnerability estimate. - nccpi3all - National Commodity Crop Productivity Index that has the highest value among Corn - and Soybeans, Small Grains, or Cotton for major earthy components. - nccpi3corn - National Commodity Crop Productivity Index for Corn for major earthy - components. - nccpi3cot - National Commodity Crop Productivity Index for Cotton for major earthy - components. - nccpi3sg - National Commodity Crop Productivity Index for Small Grains for major earthy - components. - nccpi3soy - National Commodity Crop Productivity Index for Soy for major earthy components. - pctearthmc - National Commodity Crop Productivity Index map unit percent earthy is the map - unit summed comppct_r for major earthy components. - pwsl1pomu - Potential Wetland Soil Landscapes (PWSL). - rootznaws - Root zone (commodity crop) available water storage estimate (RZAWS). - rootznemc - Root zone depth is the depth within the soil profile that commodity crop (cc) - roots can effectively extract water and nutrients for growth. - musumcpct - Sum of the comppct_r (SSURGO component table) values for all listed components - in the map unit. - musumcpcta - Sum of the comppct_r (SSURGO component table) values used in the available - water storage calculation for the map unit. - musumcpcts - Sum of the comppct_r (SSURGO component table) values used in the soil organic - carbon calculation for the map unit. gNATSGO has properties available for multiple soil - depths. You may exchange DEPTH in the variable names above for any of the following (all - measured in cm): - 0_5 - 0_20 - 0_30 - 5_20 - 0_100 - 0_150 - 0_999 - 20_50 - 50_100 - 100_150 - 150_999 diff --git a/ops/download_gnatsgo/download_gnatsgo_raster.py b/ops/download_gnatsgo/download_gnatsgo_raster.py deleted file mode 100644 index bb25eefc..00000000 --- a/ops/download_gnatsgo/download_gnatsgo_raster.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Dict, Optional - -import planetary_computer as pc - -from vibe_core.data import GNATSGOProduct, gen_hash_id -from vibe_core.data.core_types import AssetVibe, gen_guid -from vibe_core.data.rasters import GNATSGORaster -from vibe_lib.planetary_computer import GNATSGOCollection -from vibe_lib.raster import FLOAT_COMPRESSION_KWARGS, compress_raster - - -def download_asset(input_product: GNATSGOProduct, variable: str, dir_path: str) -> AssetVibe: - """ - Downloads the raster asset of the selected variable and compresses it - """ - collection = GNATSGOCollection() - item = collection.query_by_id(input_product.id) - - uncompressed_asset_path = collection.download_asset(item.assets[variable], dir_path) - - asset_id = gen_guid() - asset_path = os.path.join(dir_path, f"{asset_id}.tif") - - compress_raster(uncompressed_asset_path, asset_path, **FLOAT_COMPRESSION_KWARGS) - - return AssetVibe(reference=asset_path, type="image/tiff", id=asset_id) - - -class CallbackBuilder: - def __init__(self, api_key: str, variable: str): - self.tmp_dir = TemporaryDirectory() - self.api_key = api_key - - if variable not in GNATSGOCollection.asset_keys: - raise ValueError( - f"Requested variable '{variable}' not valid. " - f"Valid values are {', '.join(GNATSGOCollection.asset_keys)}" - ) - self.variable = variable - - def __call__(self): - def download_gnatsgo_raster( - gnatsgo_product: GNATSGOProduct, - ) -> Dict[str, Optional[GNATSGORaster]]: - pc.set_subscription_key(self.api_key) - - asset = download_asset(gnatsgo_product, self.variable, self.tmp_dir.name) - - downloaded_raster = GNATSGORaster.clone_from( - gnatsgo_product, - id=gen_hash_id( - f"{gnatsgo_product.id}_{self.variable}_downloaded_gnatsgo_product", - gnatsgo_product.geometry, - gnatsgo_product.time_range, - ), - assets=[asset], - bands={self.variable: 0}, - variable=self.variable, - ) - - return {"downloaded_raster": downloaded_raster} - - return download_gnatsgo_raster - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_gnatsgo/test_download_gnatsgo.py b/ops/download_gnatsgo/test_download_gnatsgo.py deleted file mode 100644 index d36c8422..00000000 --- a/ops/download_gnatsgo/test_download_gnatsgo.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from unittest.mock import MagicMock, patch - -import pytest -from pystac import Asset, Item -from shapely.geometry import Point, mapping - -from vibe_core.data import GNATSGOProduct -from vibe_core.data.rasters import GNATSGORaster -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import GNATSGOCollection - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_gnatsgo.yaml") -INVALID_VARIABLE = "🙅" -FAKE_DATE = datetime(year=2020, month=7, day=1, tzinfo=timezone.utc) - - -def fake_item(): - assets = {f"{var}": Asset(href=f"fake_href_{var}") for var in GNATSGOCollection.asset_keys} - return Item( - id="fake_id", # type: ignore - geometry=None, - bbox=None, - datetime=None, - properties={ - "start_datetime": FAKE_DATE.isoformat() + "Z", - "end_datetime": FAKE_DATE.isoformat() + "Z", - }, - assets=assets, - ) - - -@pytest.mark.parametrize("variable", GNATSGOCollection.asset_keys) -@patch("vibe_lib.raster.compress_raster") -@patch("vibe_lib.planetary_computer.get_available_collections", return_value=["gnatsgo-rasters"]) -@patch.object(GNATSGOCollection, "download_asset") -@patch.object(GNATSGOCollection, "query_by_id") -def test_op(query: MagicMock, download: MagicMock, _: MagicMock, __: MagicMock, variable: str): - queried_item = fake_item() - query.return_value = queried_item - download.return_value = "/tmp/test.tif" - - polygon = Point(-92.99900, 42.03580).buffer(0.1, cap_style=3) - - input_product = GNATSGOProduct( - id="conus_101445_2236065_265285_2072225", - time_range=(FAKE_DATE, FAKE_DATE), - geometry=mapping(polygon), # type: ignore - assets=[], - ) - - op_tester = OpTester(CONFIG_PATH) - op_tester.update_parameters({"variable": variable}) - output_data = op_tester.run(**{"gnatsgo_product": input_product}) - - # Get op result - output_name = "downloaded_raster" - assert output_name in output_data - output_raster = output_data[output_name] - assert isinstance(output_raster, GNATSGORaster) - assert output_raster.variable == variable - assert len(output_raster.bands) == 1 - assert download.call_args.args[0] == queried_item.assets[variable] - - -def test_op_fails_invalid_variable(): - op_tester = OpTester(CONFIG_PATH) - op_tester.update_parameters({"variable": INVALID_VARIABLE}) - with pytest.raises(ValueError): - op_tester.run(input_item=[]) diff --git a/ops/download_hansen/download_hansen.py b/ops/download_hansen/download_hansen.py deleted file mode 100644 index 058e8199..00000000 --- a/ops/download_hansen/download_hansen.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Dict - -from vibe_core.data import AssetVibe, HansenProduct -from vibe_core.data.core_types import gen_guid, gen_hash_id -from vibe_core.data.rasters import Raster -from vibe_core.file_downloader import download_file - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def download_product(hansen_product: HansenProduct) -> Dict[str, Raster]: - fname = ( - f"hansen_{hansen_product.layer_name}_{hansen_product.tile_name}_" - f"{hansen_product.last_year}.tif" - ) - fpath = os.path.join(self.tmp_dir.name, fname) - download_file(hansen_product.asset_url, fpath) - - asset = AssetVibe(reference=fpath, type=mimetypes.types_map[".tif"], id=gen_guid()) - downloaded_product = Raster.clone_from( - hansen_product, - id=gen_hash_id( - f"{hansen_product.id}_downloaded_hansen_product", - hansen_product.geometry, - hansen_product.time_range, - ), - assets=[asset], - bands={hansen_product.layer_name: 0}, - ) - - return {"raster": downloaded_product} - - return download_product - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_hansen/download_hansen.yaml b/ops/download_hansen/download_hansen.yaml deleted file mode 100644 index 5b106f26..00000000 --- a/ops/download_hansen/download_hansen.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: download_hansen -inputs: - hansen_product: HansenProduct -output: - raster: Raster -parameters: -entrypoint: - file: download_hansen.py - callback_builder: CallbackBuilder -description: - short_description: Downloads Global Forest Change (Hansen) data. - long_description: - The op will download a Global Forest Change (Hansen) product and return it - as a raster. The dataset is available at 30m resolution and is updated - annually. The data includes information on forest cover, loss, and gain. - Full dataset details can be found at - https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html. - sources: - hansen_product: Product with the tile metadata to be downloaded. - sinks: - raster: Downloaded Global Forest Change (Hansen) data as a raster. diff --git a/ops/download_hansen/test_download_hansen_product.py b/ops/download_hansen/test_download_hansen_product.py deleted file mode 100644 index 20ecc5fa..00000000 --- a/ops/download_hansen/test_download_hansen_product.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from typing import List, cast -from unittest.mock import Mock, patch - -import pytest -from shapely import geometry as shpg - -from vibe_core import file_downloader -from vibe_core.data import HansenProduct, Raster -from vibe_dev.testing.op_tester import OpTester - - -@pytest.fixture -def hansen_products(): - return [ - HansenProduct( - id="test_id", - geometry=shpg.mapping(shpg.box(-115.0, 45.0, -105.0, 55.0)), - time_range=(datetime(2000, 1, 1), datetime(2022, 1, 2)), - asset_url=( - f"https://storage.googleapis.com/earthenginepartners-hansen/" - f"GFC-2022-v1.10/Hansen_GFC-2022-v1.10_{asset_key}_00N_000E.tif" - ), - assets=[], - ) - for asset_key in HansenProduct.asset_keys - ] - - -@patch.object(file_downloader, "download_file") -def test_download_hansen_product(download: Mock, hansen_products: List[HansenProduct]): - config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_hansen.yaml") - - op_tester = OpTester(config_path) - - for hansen_product in hansen_products: - out = op_tester.run(hansen_product=hansen_product) - assert out - - raster = cast(Raster, out["raster"]) - - assert raster - assert len(raster.assets) == 1 - assert raster.bands == {hansen_product.layer_name: 0} - - assert raster.time_range == hansen_product.time_range - assert raster.geometry == hansen_product.geometry diff --git a/ops/download_herbie/download_herbie.py b/ops/download_herbie/download_herbie.py deleted file mode 100644 index 0bc06504..00000000 --- a/ops/download_herbie/download_herbie.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import timedelta -from tempfile import TemporaryDirectory -from typing import Dict, Optional - -import rasterio -from herbie import Herbie - -from vibe_core.data import AssetVibe, Grib -from vibe_core.data.core_types import gen_guid -from vibe_core.data.products import HerbieProduct - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def download_herbie( - herbie_product: HerbieProduct, - ) -> Dict[str, Optional[Grib]]: - H = Herbie( - herbie_product.time_range[0].replace(tzinfo=None), - fxx=herbie_product.lead_time_hours, - model=herbie_product.model, - product=herbie_product.product, - ) - grib_path = H.download(herbie_product.search_text) - asset = AssetVibe(reference=str(grib_path), type="application/x-grib", id=gen_guid()) - with rasterio.open(grib_path) as f: - t = herbie_product.time_range[0] + timedelta(hours=herbie_product.lead_time_hours) - forecast = Grib.clone_from( - herbie_product, - time_range=(t, t), - id=gen_guid(), - assets=[asset], - meta={"lead_time": str(herbie_product.lead_time_hours)}, - bands={ - f.tags(i)["GRIB_ELEMENT"]: i - 1 # type: ignore - for i in range(1, f.count + 1) # type: ignore - }, - ) - - return {"forecast": forecast} - - return download_herbie - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_herbie/download_herbie.yaml b/ops/download_herbie/download_herbie.yaml deleted file mode 100644 index 7f8ad5e1..00000000 --- a/ops/download_herbie/download_herbie.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_herbie -inputs: - herbie_product: HerbieProduct -output: - forecast: Grib -parameters: -entrypoint: - file: download_herbie.py - callback_builder: CallbackBuilder -description: - short_description: - Download herbie grib files. \ No newline at end of file diff --git a/ops/download_herbie/forecast_range_split.py b/ops/download_herbie/forecast_range_split.py deleted file mode 100644 index 2d1df602..00000000 --- a/ops/download_herbie/forecast_range_split.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict, List - -import pandas as pd - -from vibe_core.data import DataVibe, gen_hash_id - - -class CallbackBuilder: - def __init__( - self, - forecast_lead_times: List[int], - weather_type: str, - ): - self.weather_type = weather_type - self.frequency = forecast_lead_times[1] - forecast_lead_times[0] - - def get_forecast_weather(self, user_input: DataVibe) -> List[DataVibe]: - dates = pd.date_range( - user_input.time_range[0], user_input.time_range[1], freq=f"{str(self.frequency)}H" - ) - - forecasts = [ - DataVibe( - gen_hash_id( - name=self.weather_type, - geometry=user_input.geometry, - time_range=(date, date), - ), - (date, date), - user_input.geometry, - [], - ) - for date in dates - ] - - return forecasts - - def __call__(self): - def range_split_initialize(user_input: List[DataVibe]) -> Dict[str, List[DataVibe]]: - download_period = self.get_forecast_weather(user_input[0]) - return {"download_period": download_period} - - return range_split_initialize diff --git a/ops/download_herbie/forecast_range_split.yaml b/ops/download_herbie/forecast_range_split.yaml deleted file mode 100644 index 0707b6f1..00000000 --- a/ops/download_herbie/forecast_range_split.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: forecast_range_split -inputs: - user_input: List[DataVibe] -output: - download_period: List[DataVibe] -parameters: - # [frequency_hour, number of hours, increment by] - forecast_lead_times: [1, 25, 1] - weather_type: "temperature" -entrypoint: - callback_builder: CallbackBuilder - file: forecast_range_split.py -dependencies: - parameters: - - weather_type - - forecast_lead_times -description: - short_description: Splits input time range according to frequency and number of hours in lead time. \ No newline at end of file diff --git a/ops/download_herbie/forecast_weather.py b/ops/download_herbie/forecast_weather.py deleted file mode 100644 index 219c3b50..00000000 --- a/ops/download_herbie/forecast_weather.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import shutil -import tempfile -import warnings -from datetime import datetime -from socket import error as SocketError -from tempfile import TemporaryDirectory -from typing import Dict, List, Tuple - -import numpy as np -import pandas as pd -from herbie import FastHerbie - -from vibe_core.data import AssetVibe, DataVibe, gen_guid -from vibe_core.data.weather import WeatherVibe - -warnings.filterwarnings("ignore") - -INDEX_COLUMN = "date" - - -class CallbackBuilder: - def __init__( - self, - model: str, - overwrite: bool, - product: str, - forecast_lead_times: List[int], - search_text: str, - weather_type: str, - ): - self.temp_dir = TemporaryDirectory() - self.model = model - self.overwrite = overwrite - self.product = product - self.forecast_lead_times = range( - forecast_lead_times[0], forecast_lead_times[1], forecast_lead_times[2] - ) - self.frequency = forecast_lead_times[1] - forecast_lead_times[0] - self.search_text = search_text - self.weather_type = weather_type - - def ping_herbie_source(self, date: datetime, coordinates: Tuple[float, float]): - # initialize temporary directory - tmp_dir = tempfile.mkdtemp() - out_ = np.empty(0) - try: - # download forecast data - fh = FastHerbie( - [date], - model=self.model, - product=self.product, - fxx=self.forecast_lead_times, - save_dir=tmp_dir, - overwrite=self.overwrite, - ) - fh.download(searchString=self.search_text) - - # filter records nearest to coordinates - ds = fh.xarray(searchString=self.search_text) - - out_key = [key for key in ds.keys() if key != "gribfile_projection"] - out_ = ds.herbie.nearest_points(coordinates)[out_key[0]].values[0] - - if len(out_) < self.frequency: - out_ = np.empty(0) - - del ds - del fh - except EOFError: - # This error raises due to missing data. - # ignore this error to continue download. - pass - except SocketError: - pass - except Exception: - raise - - finally: - # clear temporary directory - shutil.rmtree(tmp_dir, ignore_errors=True) - return out_ - - def get_forecast_weather(self, user_input: DataVibe) -> WeatherVibe: - start_date = user_input.time_range[0].replace(tzinfo=None) - end_date = user_input.time_range[1].replace(tzinfo=None) - coords = tuple(user_input.geometry["coordinates"]) - dates = pd.date_range(start_date, end_date, freq=f"{str(self.frequency)}H") - - forecasts = [] - for date in dates: - out_ = self.ping_herbie_source(date=date, coordinates=coords) - if len(out_) > 0: - forecasts.append([date] + list(out_)) - - df = pd.DataFrame( - data=forecasts, - columns=[INDEX_COLUMN] + [f"step {x}" for x in self.forecast_lead_times], - ) - - # df = self.clean_forecast_data(forecast_df=df, start_date=start_date, end_date=end_date) - out_path = os.path.join(self.temp_dir.name, f"{self.weather_type}.csv") - df.to_csv(out_path, index=False) - asset = AssetVibe(reference=out_path, type="text/csv", id=gen_guid()) - return WeatherVibe( - gen_guid(), - user_input.time_range, - user_input.geometry, - [asset], - ) - - def __call__(self): - def weather_initialize(user_input: DataVibe) -> Dict[str, WeatherVibe]: - weather_forecast = self.get_forecast_weather(user_input) - return {"weather_forecast": weather_forecast} - - return weather_initialize - - def __del__(self): - self.temp_dir.cleanup() diff --git a/ops/download_herbie/forecast_weather.yaml b/ops/download_herbie/forecast_weather.yaml deleted file mode 100644 index e2c12a10..00000000 --- a/ops/download_herbie/forecast_weather.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: download_forecast_weather -inputs: - user_input: DataVibe -output: - weather_forecast: WeatherVibe -parameters: - model: "hrrr" - overwrite: False - product: "prs" - # [frequency_hour, number of hours, increment by] - forecast_lead_times: [1, 25, 1] - search_text: "TMP:2 m" - weather_type: "temperature" -entrypoint: - callback_builder: CallbackBuilder - file: forecast_weather.py -dependencies: - parameters: - - search_text - - weather_type - - forecast_lead_times -description: - short_description: Downloads forecast observations with Herbie. \ No newline at end of file diff --git a/ops/download_landsat_from_pc/download_landsat_from_pc.yaml b/ops/download_landsat_from_pc/download_landsat_from_pc.yaml deleted file mode 100644 index 011a1371..00000000 --- a/ops/download_landsat_from_pc/download_landsat_from_pc.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_landsat_from_pc -inputs: - landsat_product: LandsatProduct -output: - downloaded_product: LandsatProduct -parameters: - api_key: "" -entrypoint: - file: download_landsat_pc.py - callback_builder: CallbackBuilder -description: - short_description: Downloads LANDSAT tile bands from product. \ No newline at end of file diff --git a/ops/download_landsat_from_pc/download_landsat_pc.py b/ops/download_landsat_from_pc/download_landsat_pc.py deleted file mode 100644 index d5e7feea..00000000 --- a/ops/download_landsat_from_pc/download_landsat_pc.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from tempfile import TemporaryDirectory -from typing import Dict, Optional - -import planetary_computer as pc - -from vibe_core.data import LandsatProduct, gen_hash_id -from vibe_lib.planetary_computer import LandsatCollection - -LOGGER = logging.getLogger(__name__) - - -class CallbackBuilder: - def __init__(self, api_key: str): - self.tmp_dir = TemporaryDirectory() - self.api_key = api_key - - def __call__(self): - def download_product( - landsat_product: LandsatProduct, - ) -> Dict[str, Optional[LandsatProduct]]: - pc.set_subscription_key(self.api_key) - collection = LandsatCollection() - item = collection.query_by_id(landsat_product.tile_id) - - downloaded_product = LandsatProduct.clone_from( - landsat_product, - id=gen_hash_id( - f"{landsat_product.id}_download_landsat_product", - landsat_product.geometry, - landsat_product.time_range, - ), - assets=[], - ) - - for k in collection.asset_keys: - try: - asset_path = collection.download_asset(item.assets[k], self.tmp_dir.name) - downloaded_product.add_downloaded_band(k, asset_path) - except KeyError as e: - LOGGER.warning(f"No band {k} found. Original exception {e}") - - return {"downloaded_product": downloaded_product} - - return download_product - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_modis_sr/download_modis_sr.py b/ops/download_modis_sr/download_modis_sr.py deleted file mode 100644 index bc2d9c7e..00000000 --- a/ops/download_modis_sr/download_modis_sr.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from tempfile import TemporaryDirectory -from typing import Dict, Optional - -import numpy as np -import planetary_computer as pc -import rioxarray as rio -import xarray as xr - -from vibe_core.data import ModisProduct, ModisRaster, gen_guid -from vibe_lib.planetary_computer import Modis8DaySRCollection -from vibe_lib.raster import save_raster_to_asset - -MODIS_SPYNDEX: Dict[str, str] = { - "sur_refl_b01": "R", - "sur_refl_b02": "N", - "sur_refl_b03": "B", - "sur_refl_b04": "G", - "sur_refl_b06": "S1", - "sur_refl_b07": "S2", -} - - -class CallbackBuilder: - def __init__(self, qa_mask_value: int, pc_key: Optional[str]): - self.tmp_dir = TemporaryDirectory() - self.qa_mask_value = qa_mask_value - pc.set_subscription_key(pc_key) # type: ignore - - def __call__(self): - def callback(product: ModisProduct) -> Dict[str, ModisRaster]: - col = Modis8DaySRCollection(product.resolution) - items = col.query( - roi=product.bbox, - time_range=product.time_range, - ids=[product.id], - ) - assert len(items) == 1 - item = items[0] - bands = sorted([k for k in item.assets if k.find("sur_refl") >= 0]) - tifs = [col.download_asset(item.assets[k], self.tmp_dir.name) for k in bands] - da = ( - xr.open_mfdataset(tifs, engine="rasterio", combine="nested", concat_dim="bands") - .to_array() - .squeeze() - ) - - if self.qa_mask_value: - if np.any([b.find("sur_refl_state_") >= 0 for b in bands]): - idx = next( - filter(lambda b: b[1].find("sur_refl_state_") >= 0, enumerate(bands)) - )[0] - qa_pixel = rio.open_rasterio(tifs[idx]).squeeze().values.astype(int) # type: ignore - mask = np.logical_not(np.bitwise_and(qa_pixel, self.qa_mask_value)) - del qa_pixel - da = da.where(mask) - else: - raise ValueError("sur_refl_state not found") - - asset = save_raster_to_asset(da, self.tmp_dir.name) - - band_idx = {name: idx for idx, name in enumerate(bands)} - # Add Spyndex aliases to available bands - for k, v in MODIS_SPYNDEX.items(): - if k in bands: - band_idx[v] = band_idx[k] - - return { - "raster": ModisRaster.clone_from( - product, - id=gen_guid(), - assets=[asset], - bands=band_idx, - ) - } - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_modis_sr/download_modis_sr.yaml b/ops/download_modis_sr/download_modis_sr.yaml deleted file mode 100644 index 681ec0fb..00000000 --- a/ops/download_modis_sr/download_modis_sr.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: download_modis_sr -inputs: - product: ModisProduct -output: - raster: Raster -parameters: - pc_key: - qa_mask_value: 1024 -entrypoint: - file: download_modis_sr.py - callback_builder: CallbackBuilder -version: 2 -description: - short_description: Downloads MODIS surface reflectance rasters. \ No newline at end of file diff --git a/ops/download_modis_vegetation/download_modis_vegetation.py b/ops/download_modis_vegetation/download_modis_vegetation.py deleted file mode 100644 index e4b7f7ec..00000000 --- a/ops/download_modis_vegetation/download_modis_vegetation.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from tempfile import TemporaryDirectory -from typing import Dict, Optional - -import planetary_computer as pc - -from vibe_core.data import AssetVibe, ModisProduct, Raster, gen_guid -from vibe_lib.planetary_computer import Modis16DayVICollection - -VALID_INDICES = ("evi", "ndvi") - - -class CallbackBuilder: - def __init__(self, index: str, pc_key: Optional[str]): - self.tmp_dir = TemporaryDirectory() - if index not in VALID_INDICES: - raise ValueError(f"Expected index to be one of {VALID_INDICES}, got '{index}'.") - self.index = index - pc.set_subscription_key(pc_key) # type: ignore - - def __call__(self): - def callback(product: ModisProduct) -> Dict[str, Raster]: - col = Modis16DayVICollection(product.resolution) - items = col.query( - roi=product.bbox, - time_range=product.time_range, - ids=[product.id], - ) - assert len(items) == 1 - item = items[0] - assets = [v for k, v in item.assets.items() if self.index.upper() in k] - assert len(assets) == 1 - asset = assets[0] - assets = [ - AssetVibe( - reference=col.download_asset(asset, self.tmp_dir.name), - type="image/tiff", - id=gen_guid(), - ) - ] - return { - "index": Raster.clone_from( - product, id=gen_guid(), assets=assets, bands={self.index: 0} - ) - } - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_modis_vegetation/download_modis_vegetation.yaml b/ops/download_modis_vegetation/download_modis_vegetation.yaml deleted file mode 100644 index c5c664e0..00000000 --- a/ops/download_modis_vegetation/download_modis_vegetation.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: download_modis_vegetation -inputs: - product: ModisProduct -output: - index: Raster -parameters: - index: ndvi - pc_key: -dependencies: - parameters: - - index -entrypoint: - file: download_modis_vegetation.py - callback_builder: CallbackBuilder -description: - short_description: Downloads selected index raster from Modis product. \ No newline at end of file diff --git a/ops/download_modis_vegetation/test_download_modis_vegetation.py b/ops/download_modis_vegetation/test_download_modis_vegetation.py deleted file mode 100644 index 46508c0a..00000000 --- a/ops/download_modis_vegetation/test_download_modis_vegetation.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from unittest.mock import MagicMock, patch - -import pytest -from pystac import Asset, Item -from shapely import geometry as shpg - -from vibe_core.data import ModisProduct, Raster -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import Modis16DayVICollection - -HERE = os.path.dirname(os.path.abspath(__file__)) -INDICES = ("ndvi", "evi") -FAKE_TIME_RANGE = (datetime(2020, 11, 1), datetime(2020, 11, 2)) -INVALID_INDEX = "🙅" - - -def fake_items(resolution: int): - assets = { - f"250m_16_days_{index.upper()}": Asset(href=f"fake_href_{resolution}_{index}") - for index in INDICES - } - return [ - Item( - id=f"{resolution}m-id", # type: ignore - geometry=None, - bbox=None, - datetime=None, - properties={ - "start_datetime": FAKE_TIME_RANGE[0].isoformat() + "Z", - "end_datetime": FAKE_TIME_RANGE[1].isoformat() + "Z", - }, - assets=assets, - ) - ] - - -@pytest.mark.parametrize("resolution", (250, 500)) -@pytest.mark.parametrize("index", ("ndvi", "evi")) -@patch("vibe_lib.planetary_computer.get_available_collections") -@patch.object(Modis16DayVICollection, "download_asset") -@patch.object(Modis16DayVICollection, "query") -def test_op( - query: MagicMock, - download_asset: MagicMock, - get_collections: MagicMock, - index: str, - resolution: int, -): - get_collections.return_value = list(Modis16DayVICollection.collections.values()) - items = fake_items(resolution) - query.return_value = items - download_asset.side_effect = lambda asset, path: asset.href - - geom = shpg.Point(1, 1).buffer(0.01, cap_style=3) - time_range = (datetime(2022, 11, 1), datetime(2022, 11, 2)) - x = ModisProduct( - id="1", time_range=time_range, geometry=shpg.mapping(geom), resolution=resolution, assets=[] - ) - - op_tester = OpTester(os.path.join(HERE, "download_modis_vegetation.yaml")) - op_tester.update_parameters({"index": index}) - o = op_tester.run(product=x) - - query.assert_called_once_with(roi=x.bbox, time_range=x.time_range, ids=[x.id]) - download_asset.assert_called_once() - assert isinstance(o["index"], Raster) - assert o["index"].raster_asset.local_path == f"fake_href_{resolution}_{index}" - - -def test_op_fails_invalid_index(): - op_tester = OpTester(os.path.join(HERE, "download_modis_vegetation.yaml")) - op_tester.update_parameters({"index": INVALID_INDEX}) - with pytest.raises(ValueError): - op_tester.run(product=None) # type: ignore diff --git a/ops/download_naip/download_naip.py b/ops/download_naip/download_naip.py deleted file mode 100644 index b14c3e29..00000000 --- a/ops/download_naip/download_naip.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Dict, cast - -import planetary_computer as pc - -from vibe_core.data import AssetVibe, NaipProduct, NaipRaster, gen_guid, gen_hash_id -from vibe_lib.planetary_computer import NaipCollection -from vibe_lib.raster import json_to_asset - - -class CallbackBuilder: - def __init__(self, api_key: str): - self.tmp_dir = TemporaryDirectory() - self.api_key = api_key - - def __call__(self): - def op(input_product: NaipProduct) -> Dict[str, NaipRaster]: - pc.set_subscription_key(self.api_key) - collection = NaipCollection() - item = collection.query_by_id(input_product.tile_id) - assets = collection.download_item( - item, os.path.join(self.tmp_dir.name, input_product.id) - ) - vibe_assets = [ - AssetVibe(reference=a, type=cast(str, mimetypes.guess_type(a)[0]), id=gen_guid()) - for a in assets - ] - vis_asset = json_to_asset({"bands": list(range(3))}, self.tmp_dir.name) - vibe_assets.append(vis_asset) - downloaded_product = NaipRaster( - id=gen_hash_id( - f"{input_product.id}_download_naip_product", - input_product.geometry, - input_product.time_range, - ), - time_range=input_product.time_range, - geometry=input_product.geometry, - assets=vibe_assets, - bands={k: v for v, k in enumerate(("red", "green", "blue", "nir"))}, - tile_id=input_product.tile_id, - year=input_product.year, - resolution=input_product.resolution, - ) - - return {"downloaded_product": downloaded_product} - - return op - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_naip/download_naip.yaml b/ops/download_naip/download_naip.yaml deleted file mode 100644 index 626794da..00000000 --- a/ops/download_naip/download_naip.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_naip -inputs: - input_product: NaipProduct -output: - downloaded_product: NaipRaster -parameters: - api_key: "" -entrypoint: - file: download_naip.py - callback_builder: CallbackBuilder -description: - short_description: Downloads Naip raster from Naip product. \ No newline at end of file diff --git a/ops/download_naip/test_download_naip.py b/ops/download_naip/test_download_naip.py deleted file mode 100644 index 48131738..00000000 --- a/ops/download_naip/test_download_naip.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from unittest.mock import MagicMock, patch - -from shapely.geometry import Polygon, box, mapping - -from vibe_core.data import NaipProduct -from vibe_core.data.rasters import NaipRaster -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import NaipCollection - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_naip.yaml") - - -@patch( - "vibe_lib.planetary_computer.get_available_collections", - return_value=[NaipCollection.collection], -) -@patch.object(NaipCollection, "query_by_id") -@patch.object(NaipCollection, "download_item", return_value=["/tmp/test.tif"]) -def test_op(_: MagicMock, __: MagicMock, ___: MagicMock): - latitude = 42.21422 - longitude = -93.22890 - buffer = 0.001 - bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] - polygon: Polygon = box(*bbox, ccw=True) - start_date = datetime(year=2018, month=2, day=1, tzinfo=timezone.utc) - end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) - - output: NaipProduct = NaipProduct( - id=str("ia_m_4209355_nw_15_060_20190730_20191105"), - time_range=( - start_date, - end_date, - ), - geometry=mapping(polygon), # type: ignore - assets=[], - tile_id=str("ia_m_4209355_nw_15_060_20190730_20191105"), - resolution=0.6, - year=2019, - ) - - output_data = OpTester(CONFIG_PATH).run(**{"input_product": output}) - - # Get op result - output_name = "downloaded_product" - assert output_name in output_data - output_product = output_data[output_name] - assert isinstance(output_product, NaipRaster) diff --git a/ops/download_road_geometries/download_road_geometries.py b/ops/download_road_geometries/download_road_geometries.py deleted file mode 100644 index a1142c31..00000000 --- a/ops/download_road_geometries/download_road_geometries.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Dict, cast - -import geopandas as gpd -import osmnx as ox -from shapely import geometry as shpg - -from vibe_core.data import DataVibe, GeometryCollection -from vibe_core.data.core_types import AssetVibe, gen_guid -from vibe_lib.geometry import wgs_to_utm - - -def get_road_geometries(geom: shpg.Polygon, network_type: str) -> gpd.GeoDataFrame: - graph = ox.graph_from_polygon( - geom, network_type=network_type, truncate_by_edge=True, retain_all=True - ) - df_edges = cast(gpd.GeoDataFrame, ox.graph_to_gdfs(graph, nodes=False)) - df_edges = cast(gpd.GeoDataFrame, df_edges[df_edges.intersects(geom)]) - # Encode Metadata as strings to avoid lists - for k in df_edges.columns: - if k == "geometry": - continue - df_edges[k] = df_edges[k].apply( # type: ignore - lambda x: ",".join([str(i) for i in x]) if isinstance(x, list) else str(x) - ) - return cast(gpd.GeoDataFrame, df_edges) - - -class CallbackBuilder: - def __init__(self, network_type: str, buffer_size: float): - self.network_type = network_type - self.buffer_size = buffer_size - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def callback(input_region: DataVibe) -> Dict[str, GeometryCollection]: - geom = shpg.box(*input_region.bbox) - crs = "epsg:4326" - proj_crs = f"epsg:{wgs_to_utm(geom)}" - buffered_geom = ( - gpd.GeoSeries(geom, crs=crs) - .to_crs(proj_crs) - .buffer(self.buffer_size) - .to_crs(crs=crs) - .iloc[0] - .envelope - ) - df = get_road_geometries(buffered_geom, self.network_type) - guid = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{guid}.gpkg") - df.to_file(filepath, driver="GPKG") - asset = AssetVibe(reference=filepath, type="application/geopackage+sqlite3", id=guid) - - out = GeometryCollection.clone_from(input_region, id=gen_guid(), assets=[asset]) - - return {"roads": out} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_road_geometries/download_road_geometries.yaml b/ops/download_road_geometries/download_road_geometries.yaml deleted file mode 100644 index ebb4595a..00000000 --- a/ops/download_road_geometries/download_road_geometries.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: download_road_geometries -inputs: - input_region: DataVibe -output: - roads: GeometryCollection -parameters: - network_type: all_private - buffer_size: 100 # In meters -entrypoint: - file: download_road_geometries.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - network_type - - buffer_size -description: - short_description: Downloads road geometry for input region from Open Street Maps. \ No newline at end of file diff --git a/ops/download_sentinel1/download_sentinel1.yaml b/ops/download_sentinel1/download_sentinel1.yaml deleted file mode 100644 index 0a822e70..00000000 --- a/ops/download_sentinel1/download_sentinel1.yaml +++ /dev/null @@ -1,27 +0,0 @@ -name: download_sentinel1 -inputs: - sentinel_product: Sentinel1Product -output: - downloaded_product: Sentinel1Raster -parameters: - api_key: "" - block_size: 2048 - num_workers: 20 - timeout_s: 120 -entrypoint: - file: download_sentinel1_rtc.py - callback_builder: CallbackBuilder -description: - short_description: Downloads the Sentinel-1 RTC product bands. - long_description: - The op will read the bands from the Planetary Computer and stack them into a single 2 band TIFF - file. - inputs: - sentinel_product: Product to be downloaded. - output: - sentinel_products: Downloaded product with an asset that contains both Sentinel-1 RTC bands. - parameters: - api_key: Planetary Computer API key. - block_size: Size of blocks that are read by each thread - num_workers: How many threads to use when reading data from the Planetary Computer blobs. - timeout_s: Maximum time, in seconds, before a band reading operation times out. diff --git a/ops/download_sentinel1/download_sentinel1_rtc.py b/ops/download_sentinel1/download_sentinel1_rtc.py deleted file mode 100644 index 46c351fd..00000000 --- a/ops/download_sentinel1/download_sentinel1_rtc.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -from concurrent.futures import TimeoutError -from tempfile import TemporaryDirectory -from typing import Dict - -import planetary_computer as pc -import rasterio -from pystac import Item -from rasterio.enums import Resampling -from rasterio.windows import Window - -from vibe_core.data import AssetVibe, Sentinel1Product, Sentinel1Raster, gen_guid -from vibe_lib.planetary_computer import Sentinel1RTCCollection -from vibe_lib.raster import FLOAT_COMPRESSION_KWARGS, get_profile_from_ref, serial_stack_bands - -LOGGER = logging.getLogger(__name__) - - -def read_block(raster_url: str, win: Window): - with rasterio.open(raster_url) as src: - return src.read(window=win), win - - -class CallbackBuilder: - def __init__(self, api_key: str, num_workers: int, block_size: int, timeout_s: float): - self.api_key = api_key - self.num_workers = num_workers - self.block_size = block_size - self.timeout_s = timeout_s - self.tmp_dir = TemporaryDirectory() - - def stack_bands(self, col: Sentinel1RTCCollection, item: Item) -> AssetVibe: - asset_guid = gen_guid() - out_path = os.path.join(self.tmp_dir.name, f"{asset_guid}.tif") - LOGGER.debug(f"Downloading Sentinel-1 RTC bands for product {item.id}") - band_hrefs = col.download_item(item, os.path.join(self.tmp_dir.name, item.id)) - LOGGER.debug(f"Done downloading Sentinel-1 RTC bands for product {item.id}") - kwargs = get_profile_from_ref( - band_hrefs[0], count=len(band_hrefs), **FLOAT_COMPRESSION_KWARGS - ) - LOGGER.debug(f"Stacking Sentinel-1 RTC bands for product {item.id}") - serial_stack_bands( - band_hrefs, - out_path, - (self.block_size, self.block_size), - Resampling.bilinear, - **kwargs, - ) - LOGGER.debug(f"Done stacking Sentinel-1 RTC bands for product {item.id}") - return AssetVibe(reference=out_path, type="image/tiff", id=asset_guid) - - def __call__(self): - def callback(sentinel_product: Sentinel1Product) -> Dict[str, Sentinel1Raster]: - pc.set_subscription_key(self.api_key) - col = Sentinel1RTCCollection() - item = pc.sign(col.query_by_id(sentinel_product.id)) - try: - asset = self.stack_bands(col, item) - except TimeoutError as e: - raise TimeoutError( - f"Timeout while stacking bands for products {sentinel_product.product_name}" - ) from e - raster = Sentinel1Raster.clone_from( - sentinel_product, - sentinel_product.id, - assets=[asset], - bands={k.upper(): i for i, k in enumerate(col.asset_keys)}, - tile_id="", - ) - return {"downloaded_product": raster} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_sentinel1/test_download_sentinel1_rtc.py b/ops/download_sentinel1/test_download_sentinel1_rtc.py deleted file mode 100644 index 5648e94e..00000000 --- a/ops/download_sentinel1/test_download_sentinel1_rtc.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from pathlib import Path -from unittest.mock import Mock, patch - -import numpy as np -import planetary_computer as pc -import pytest -import rasterio -from pystac import Asset, Item -from shapely import geometry as shpg - -from vibe_core.data import Sentinel1Product, Sentinel1Raster -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import Sentinel1RTCCollection - -HERE = os.path.dirname(os.path.abspath(__file__)) -CONFIG_PATH = os.path.join(HERE, "download_sentinel1.yaml") - -IMG_SIZE = 100 - - -@pytest.fixture -def fake_item(tmp_path: Path): - assets = {} - for i, band in enumerate(("vh", "vv"), start=1): - band_path = str(tmp_path / f"{band}.tif") - with rasterio.open( - band_path, - "w", - driver="GTiff", - count=1, - width=IMG_SIZE, - height=IMG_SIZE, - dtype="float32", - nodata=0, - ) as dst: - dst.write(i * np.ones((1, IMG_SIZE, IMG_SIZE))) - assets[band] = Asset(href=band_path) - - return Item( - id="1", - geometry=shpg.mapping(shpg.box(0, 0, 1, 1)), - bbox=None, - datetime=datetime.now(), - properties={}, - assets=assets, - ) - - -@patch.object(pc, "sign") -@patch.object(Sentinel1RTCCollection, "download_item") -@patch.object(Sentinel1RTCCollection, "query_by_id") -@patch("vibe_lib.planetary_computer.get_available_collections", return_value=["sentinel-1-rtc"]) -def test_op( - collections_mock: Mock, query_mock: Mock, download_mock: Mock, sign_mock: Mock, fake_item: Item -): - query_mock.return_value = fake_item - download_mock.return_value = [fake_item.assets["vh"].href, fake_item.assets["vv"].href] - sign_mock.side_effect = lambda x: x - geom = shpg.box(0, 0, 1, 1) - fake_input = Sentinel1Product( - id="1", - time_range=(datetime.now(), datetime.now()), - geometry=shpg.mapping(geom), - assets=[], - product_name="product_name", - orbit_number=0, - relative_orbit_number=0, - orbit_direction="", - platform="", - extra_info={}, - sensor_mode="", - polarisation_mode="", - ) - - op_tester = OpTester(CONFIG_PATH) - op_tester.update_parameters({"num_workers": 1}) - out = op_tester.run(sentinel_product=fake_input) - key = "downloaded_product" - assert key in out - product = out[key] - assert isinstance(product, Sentinel1Raster) - assert product.time_range == fake_input.time_range - assert product.geometry == fake_input.geometry - with rasterio.open(product.raster_asset.local_path) as src: - profile = src.profile - ar = src.read() - assert profile["dtype"] == "float32" - assert profile["nodata"] == 0.0 - assert ar.shape == (2, IMG_SIZE, IMG_SIZE) diff --git a/ops/download_sentinel1_grd/download_sentinel1_grd.py b/ops/download_sentinel1_grd/download_sentinel1_grd.py deleted file mode 100644 index 66f16fa1..00000000 --- a/ops/download_sentinel1_grd/download_sentinel1_grd.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -import shutil -import time -from tempfile import TemporaryDirectory -from typing import Final, cast - -import planetary_computer as pc -from requests import RequestException - -from vibe_core.data import DownloadedSentinel1Product, Sentinel1Product -from vibe_core.file_downloader import download_file -from vibe_lib.planetary_computer import ( - get_complete_s1_prefix, - get_sentinel1_container_client, - get_sentinel1_scene_files, -) - -RETRY_WAIT: Final[int] = 10 -MAX_RETRIES: Final[int] = 5 -LOGGER: Final[logging.Logger] = logging.getLogger(__name__) -READ_TIMEOUT_S: Final[int] = 90 -MAX_CONCURRENCY: Final[int] = 3 - - -def download_from_blob(item: Sentinel1Product, save_path: str) -> str: - container_client = get_sentinel1_container_client() - scene_files = get_sentinel1_scene_files(item) - LOGGER.debug(f"Obtained {len(scene_files)} scene files for product '{item.product_name}'") - - if not scene_files: - # No scene files found! - raise RuntimeError( - f"Failed to download sentinel 1 product {item.product_name}, no scene files found." - ) - - blob_prefix = get_complete_s1_prefix(scene_files) - LOGGER.debug(f"Obtained blob prefix '{blob_prefix}' for product name '{item.product_name}'") - product_name = blob_prefix.split("/")[-1] - - zip_name = os.path.join(save_path, product_name) - base_dir = f"{zip_name}.SAFE" - - LOGGER.debug(f"Downloading scene files for product '{item.product_name}'") - for blob in scene_files: - out_path = os.path.join(base_dir, os.path.relpath(cast(str, blob.name), blob_prefix)) - save_dir = os.path.dirname(out_path) - os.makedirs(save_dir, exist_ok=True) - for retry in range(MAX_RETRIES): - try: - url = container_client.get_blob_client(blob.name).url - download_file(url, out_path) - break - except RequestException as e: - LOGGER.warning( - f"Exception {e} downloading from blob {blob.name}." - f" Retrying after {RETRY_WAIT}s ({retry+1}/{MAX_RETRIES})." - ) - time.sleep(RETRY_WAIT) - else: - raise RuntimeError(f"Failed asset {blob.name} after {MAX_RETRIES} retries.") - LOGGER.debug(f"Making zip archive '{zip_name}' for root dir '{save_path}'") - zip_path = shutil.make_archive( - zip_name, "zip", root_dir=save_path, base_dir=f"{product_name}.SAFE" - ) - return zip_path - - -class CallbackBuilder: - def __init__(self, api_key: str): - self.tmp_dir = TemporaryDirectory() - self.api_key = api_key - - def __call__(self): - def download_sentinel1_from_pc(sentinel_product: Sentinel1Product): - pc.set_subscription_key(self.api_key) - save_path = os.path.join(self.tmp_dir.name, sentinel_product.id) - zip_path = download_from_blob(sentinel_product, save_path) - new_item = DownloadedSentinel1Product.clone_from( - sentinel_product, sentinel_product.id, assets=[] - ) - new_item.add_zip_asset(zip_path) - return {"downloaded_product": new_item} - - return download_sentinel1_from_pc - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_sentinel1_grd/download_sentinel1_grd.yaml b/ops/download_sentinel1_grd/download_sentinel1_grd.yaml deleted file mode 100644 index 7cfc47cb..00000000 --- a/ops/download_sentinel1_grd/download_sentinel1_grd.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_sentinel_1_grd -inputs: - sentinel_product: Sentinel1Product -output: - downloaded_product: DownloadedSentinel1Product -parameters: - api_key: "" -entrypoint: - file: download_sentinel1_grd.py - callback_builder: CallbackBuilder -description: - short_description: Downloads Sentinel-1 GRD products. \ No newline at end of file diff --git a/ops/download_sentinel1_grd/test_download_sentinel1.py b/ops/download_sentinel1_grd/test_download_sentinel1.py deleted file mode 100644 index 798f36c1..00000000 --- a/ops/download_sentinel1_grd/test_download_sentinel1.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from unittest.mock import Mock, patch -from zipfile import ZipFile - -import pytest -from shapely import geometry as shpg - -from vibe_core.data import DownloadedSentinel1Product, Sentinel1Product -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import generate_sentinel1_blob_path - -HERE = os.path.dirname(os.path.abspath(__file__)) -CONFIG_PATH = os.path.join(HERE, "download_sentinel1_grd.yaml") -FULL_PRODUCT_NAME = "S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0" - - -class MockBlob: - def __init__(self, name: str): - self.name = name - - def __getitem__(self, key: str): - return getattr(self, key) - - -def fake_download(_, file_path: str): - with open(os.path.join(file_path), "w") as f: - f.write("🌎") - - -@pytest.mark.parametrize("product_name", ("complete", "incomplete")) -@patch("vibe_core.file_downloader.download_file") -@patch("vibe_lib.planetary_computer.get_sentinel1_scene_name") -@patch("vibe_lib.planetary_computer.get_sentinel1_scene_files") -@patch("vibe_lib.planetary_computer.get_sentinel1_container_client") -def test_op( - get_s1_client: Mock, - s1_scene_files: Mock, - s1_scene_name: Mock, - download_file: Mock, - product_name: str, -): - s1_scene_name.return_value = FULL_PRODUCT_NAME - download_file.side_effect = fake_download - geom = shpg.box(0, 0, 1, 1) - fake_input = Sentinel1Product( - id="1", - time_range=(datetime.now(), datetime.now()), - geometry=shpg.mapping(geom), - assets=[], - product_name=FULL_PRODUCT_NAME, - orbit_number=0, - relative_orbit_number=0, - orbit_direction="", - platform="", - extra_info={}, - sensor_mode="", - polarisation_mode="", - ) - blob_path = generate_sentinel1_blob_path(fake_input) - s1_scene_files.return_value = [ - MockBlob(f"{blob_path}/fake.txt"), - MockBlob(f"{blob_path}/fake_dir/fake2.txt"), - ] - op_tester = OpTester(CONFIG_PATH) - if product_name == "incomplete": - fake_input.product_name = FULL_PRODUCT_NAME[:-4] - out = op_tester.run(sentinel_product=fake_input) - key = "downloaded_product" - assert key in out - product = out[key] - assert isinstance(product, DownloadedSentinel1Product) - zip_path = product.get_zip_asset().local_path - assert os.path.basename(zip_path) == f"{FULL_PRODUCT_NAME}.zip" - base_dir = f"{FULL_PRODUCT_NAME}.SAFE" - with ZipFile(zip_path) as zf: - il = zf.infolist() - assert len(il) == 4 - assert f"{base_dir}/" == il[0].filename - assert f"{base_dir}/fake_dir/" == il[1].filename - assert f"{base_dir}/fake.txt" == il[2].filename - assert f"{base_dir}/fake_dir/fake2.txt" == il[3].filename - with zf.open(il[2]) as f: - content = f.read() - with zf.open(il[3]) as f: - content2 = f.read() - assert content.decode("utf-8") == content2.decode("utf-8") == "🌎" diff --git a/ops/download_sentinel2_from_pc/download_s2_pc.py b/ops/download_sentinel2_from_pc/download_s2_pc.py deleted file mode 100644 index f47fa23a..00000000 --- a/ops/download_sentinel2_from_pc/download_s2_pc.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -import re -from tempfile import TemporaryDirectory -from typing import Dict, Optional - -import planetary_computer as pc -from azure.storage.blob import BlobClient - -from vibe_core.data import gen_guid -from vibe_core.data.sentinel import DownloadedSentinel2Product, Sentinel2Product, discriminator_date -from vibe_core.file_downloader import download_file -from vibe_lib.planetary_computer import Sentinel2Collection - -LOGGER = logging.getLogger(__name__) - - -def get_partial_id(product_id: str) -> str: - return "_".join(re.sub(r"_N[\d]{4}_", "_", product_id).split("_")[:-1]) - - -class CallbackBuilder: - def __init__(self, api_key: str): - self.tmp_dir = TemporaryDirectory() - self.api_key = api_key - - def __call__(self): - def download_product( - sentinel_product: Sentinel2Product, - ) -> Dict[str, Optional[DownloadedSentinel2Product]]: - pc.set_subscription_key(self.api_key) - collection = Sentinel2Collection() - items = collection.query( - roi=sentinel_product.bbox, time_range=sentinel_product.time_range - ) - partial_id = get_partial_id(sentinel_product.product_name) - matches = [item for item in items if get_partial_id(item.id) == partial_id] - if not matches: - raise RuntimeError( - f"Could not find matches for sentinel 2 product " - f"{sentinel_product.product_name}" - ) - if len(matches) > 1: - matches = sorted(matches, key=lambda x: discriminator_date(x.id), reverse=True) - LOGGER.warning( - f"Found {len(matches)} > 1 matches for product " - f"{sentinel_product.product_name}: {', '.join([m.id for m in matches])}. " - f"Picking newest one ({matches[0].id})." - ) - - item = matches[0] - downloaded_product = DownloadedSentinel2Product.clone_from( - sentinel_product, sentinel_product.id, [] - ) - # Adding bands - for k in collection.asset_keys: # where actual download happens - asset_path = collection.download_asset(item.assets[k], self.tmp_dir.name) - downloaded_product.add_downloaded_band(k, asset_path) - - # Adding cloud mask - gml_out_path = os.path.join(self.tmp_dir.name, f"{gen_guid()}.gml") - mask_pc_path = collection.get_cloud_mask(item) - if BlobClient.from_blob_url(mask_pc_path).exists(): - download_file(mask_pc_path, gml_out_path) - downloaded_product.add_downloaded_cloudmask(gml_out_path) - else: - LOGGER.warning( - f"GML file is not available for product {sentinel_product.product_name}" - ) - - return {"downloaded_product": downloaded_product} - - return download_product - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_sentinel2_from_pc/download_sentinel2_from_pc.yaml b/ops/download_sentinel2_from_pc/download_sentinel2_from_pc.yaml deleted file mode 100644 index 8788e93f..00000000 --- a/ops/download_sentinel2_from_pc/download_sentinel2_from_pc.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: download_sentinel2_from_pc -inputs: - sentinel_product: Sentinel2Product -output: - downloaded_product: DownloadedSentinel2Product -parameters: - api_key: "" -entrypoint: - file: download_s2_pc.py - callback_builder: CallbackBuilder -description: - short_description: Downloads Sentinel-2 products. \ No newline at end of file diff --git a/ops/download_soilgrids/download_soilgrids.py b/ops/download_soilgrids/download_soilgrids.py deleted file mode 100644 index db48457d..00000000 --- a/ops/download_soilgrids/download_soilgrids.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import mimetypes -import os -import time -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Dict, Final, List, cast - -from owslib.wcs import WebCoverageService - -from vibe_core.data import AssetVibe, DataVibe, Raster -from vibe_core.data.core_types import gen_guid, gen_hash_id - -LOGGER = logging.getLogger(__name__) - -MAX_RETRIES = 5 -RETRY_WAIT_S = 10 - - -class SoilGridsWS: - MAPS: Final[Dict[str, List[str]]] = { - "wrb": [ - "World Reference Base classes and probabilites", - "https://maps.isric.org/mapserv?map=/map/wrb.map", - ], - "bdod": ["Bulk density", "https://maps.isric.org/mapserv?map=/map/bdod.map"], - "cec": [ - "Cation exchange capacity at ph 7", - "https://maps.isric.org/mapserv?map=/map/cec.map", - ], - "cfvo": ["Coarse fragments volumetric", "https://maps.isric.org/mapserv?map=/map/cfvo.map"], - "clay": ["Clay content", "https://maps.isric.org/mapserv?map=/map/clay.map"], - "nitrogen": ["Nitrogen", "https://maps.isric.org/mapserv?map=/map/nitrogen.map"], - "phh2o": ["Soil pH in H2O", "https://maps.isric.org/mapserv?map=/map/phh2o.map"], - "sand": ["Sand content", "https://maps.isric.org/mapserv?map=/map/sand.map"], - "silt": ["Silt content", "https://maps.isric.org/mapserv?map=/map/silt.map"], - "soc": ["Soil organic carbon content", "https://maps.isric.org/mapserv?map=/map/soc.map"], - "ocs": ["Soil organic carbon stock", "https://maps.isric.org/mapserv?map=/map/ocs.map"], - "ocd": ["Organic carbon densities", "https://maps.isric.org/mapserv?map=/map/ocd.map"], - } - - def __init__(self, map: str): - self.map = map - try: - _, self.url = self.MAPS[map] - except KeyError: - raise ValueError( - f"Map {map} cannot be found. " - f"The maps available are: all {' '.join(self.MAPS.keys())}." - ) - for retry in range(MAX_RETRIES): - try: - self.wcs = WebCoverageService(self.url, version="2.0.1") # type: ignore - return - except Exception as e: - LOGGER.warning( - f"Exception {e} requesting from {self.url}." - f" Retrying after {RETRY_WAIT_S}s ({retry+1}/{MAX_RETRIES})" - ) - time.sleep(RETRY_WAIT_S) - raise RuntimeError(f"Failed request to {self.url} after {MAX_RETRIES} retries.") - - def get_ids(self) -> List[str]: - return list(self.wcs.contents) # type: ignore - - def download_id(self, id: str, tmpdir: str, input_item: DataVibe) -> Raster: - if id not in self.get_ids(): - raise ValueError( - f"Identifier {id} not found in {self.url}. Identifiers available" - f" are: {' '.join(self.get_ids())}" - ) - bbox = input_item.bbox - subsets = [("long", bbox[0], bbox[2]), ("lat", bbox[1], bbox[3])] - for retry in range(MAX_RETRIES): - try: - response = self.wcs.getCoverage( # type: ignore - identifier=[id], - subsets=subsets, - SUBSETTINGCRS="http://www.opengis.net/def/crs/EPSG/0/4326", - OUTPUTCRS="http://www.opengis.net/def/crs/EPSG/0/4326", - format="image/tiff", - ) - fpath = os.path.join(tmpdir, f"{id}_{gen_guid()}.tif") - with open(fpath, "wb") as file: - file.write(response.read()) - vibe_asset = AssetVibe( - reference=fpath, type=cast(str, mimetypes.guess_type(fpath)[0]), id=gen_guid() - ) - res = Raster( - id=gen_hash_id( - f"soilgrids_{self.map}_{id}", - input_item.geometry, - (datetime(2022, 1, 1), datetime(2022, 1, 1)), # dummy date - ), - time_range=input_item.time_range, - geometry=input_item.geometry, - assets=[vibe_asset], - bands={f"{self.map}:{id}": 0}, - ) - return res - except Exception as e: - LOGGER.warning( - f"Exception {e} downloading {id} from {self.url}." - f" Retrying after {RETRY_WAIT_S}s ({retry+1}/{MAX_RETRIES})" - ) - time.sleep(RETRY_WAIT_S) - raise RuntimeError(f"Failed request for {id} in {self.url} after {MAX_RETRIES} retries.") - - -class CallbackBuilder: - def __init__(self, map: str, identifier: str): - self.tmp_dir = TemporaryDirectory() - self.map = map - self.identifier = identifier - - def __call__(self): - def download_soilgrids( - input_item: DataVibe, - ) -> Dict[str, Raster]: - sg = SoilGridsWS(self.map) - res = sg.download_id(self.identifier, self.tmp_dir.name, input_item) - return {"downloaded_raster": res} - - return download_soilgrids - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_soilgrids/download_soilgrids.yaml b/ops/download_soilgrids/download_soilgrids.yaml deleted file mode 100644 index c52a1b55..00000000 --- a/ops/download_soilgrids/download_soilgrids.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: download_soilgrids -inputs: - input_item: DataVibe -output: - downloaded_raster: Raster -parameters: - map: soc - identifier: soc_0-5cm_mean -dependencies: - parameters: - - map - - identifier -entrypoint: - file: download_soilgrids.py - callback_builder: CallbackBuilder -description: - short_description: Downloads digital soil mapping information from SoilGrids for the input geometry. \ No newline at end of file diff --git a/ops/download_stack_sentinel2/download_stack_s2.py b/ops/download_stack_sentinel2/download_stack_s2.py deleted file mode 100644 index 47819cde..00000000 --- a/ops/download_stack_sentinel2/download_stack_s2.py +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -import logging -import os -import re -from tempfile import TemporaryDirectory -from typing import Dict, Union - -import geopandas as gpd -import numpy as np -import planetary_computer as pc -from azure.storage.blob import BlobClient -from rasterio.enums import Resampling -from rasterio.features import rasterize - -from vibe_core.data import AssetVibe, gen_guid -from vibe_core.data.sentinel import ( - Sentinel2CloudMask, - Sentinel2Product, - Sentinel2Raster, - discriminator_date, -) -from vibe_lib.planetary_computer import Sentinel2Collection -from vibe_lib.raster import ( - INT_COMPRESSION_KWARGS, - get_profile_from_ref, - open_raster_from_ref, - serial_stack_bands, -) - -LOGGER = logging.getLogger(__name__) - -CLOUD_CATEGORIES = ["NO-CLOUD", "OPAQUE", "CIRRUS", "OTHER"] - -SENTINEL2_SPYNDEX: Dict[str, str] = { - "B01": "A", - "B02": "B", - "B03": "G", - "B04": "R", - "B05": "RE1", - "B06": "RE2", - "B07": "RE3", - "B08": "N", - "B8A": "N2", - "B09": "WV", - "B11": "S1", - "B12": "S2", -} - - -def get_partial_id(product_id: str) -> str: - return "_".join(re.sub(r"_N[\d]{4}_", "_", product_id).split("_")[:-1]) - - -def rasterize_clouds(cloud_gml_ref: str, ref_file: str, out_path: str, product_name: str) -> None: - """ - Rasterize cloud shapes and save compressed tiff file. - """ - with open_raster_from_ref(ref_file) as src: - meta = src.meta - meta.update( - {"nodata": 100, "count": 1, "driver": "GTiff", "dtype": "uint8", **INT_COMPRESSION_KWARGS} - ) - out = np.zeros((meta["height"], meta["width"])) - try: - # The file might not exist, in this case we generate empty cloud masks (no clouds) - if BlobClient.from_blob_url(cloud_gml_ref).exists(): - df = gpd.read_file(cloud_gml_ref, WRITE_GFS="NO") - cloud_map = { - "OPAQUE": CLOUD_CATEGORIES.index("OPAQUE"), - "CIRRUS": CLOUD_CATEGORIES.index("CIRRUS"), - } - values = ( - df["maskType"] # type: ignore - .map(cloud_map) # type: ignore - .fillna(CLOUD_CATEGORIES.index("OTHER")) - ) - rasterize( - ((g, v) for g, v in zip(df["geometry"], values)), # type: ignore - out=out, - transform=meta["transform"], - ) - else: - LOGGER.debug( - f"Cloud GML file is not available for product {product_name}, generating empty mask" - ) - except ValueError: - # Empty file means no clouds - LOGGER.debug( - "ValueError when opening cloud GML file. Assuming there are no clouds and ignoring", - exc_info=True, - ) - pass - with open_raster_from_ref(out_path, "w", **meta) as dst: - dst.write(out, 1) - - -class CallbackBuilder: - def __init__(self, api_key: str, num_workers: int, block_size: int, timeout_s: float): - self.tmp_dir = TemporaryDirectory() - self.api_key = api_key - self.num_workers = num_workers - self.block_size = block_size - self.timeout_s = timeout_s - - def __call__(self): - def callback( - sentinel_product: Sentinel2Product, - ) -> Dict[str, Union[Sentinel2Raster, Sentinel2CloudMask]]: - pc.set_subscription_key(self.api_key) - collection = Sentinel2Collection() - items = collection.query( - roi=sentinel_product.bbox, time_range=sentinel_product.time_range - ) - partial_id = get_partial_id(sentinel_product.product_name) - matches = [item for item in items if get_partial_id(item.id) == partial_id] - if not matches: - raise RuntimeError( - f"Could not find matches for sentinel 2 product " - f"{sentinel_product.product_name}" - ) - if len(matches) > 1: - matches = sorted(matches, key=lambda x: discriminator_date(x.id), reverse=True) - LOGGER.warning( - f"Found {len(matches)} > 1 matches for product " - f"{sentinel_product.product_name}: {', '.join([m.id for m in matches])}. " - f"Picking newest one ({matches[0].id})." - ) - - item = matches[0] - item = pc.sign(item) - LOGGER.debug( - f"Downloading Sentinel-2 bands for product {sentinel_product.product_name}" - ) - band_hrefs = collection.download_item( - item, os.path.join(self.tmp_dir.name, sentinel_product.product_name) - ) - LOGGER.debug( - f"Done downloading Sentinel-2 bands for product {sentinel_product.product_name}" - ) - tiff_args = get_profile_from_ref( - band_hrefs[collection.asset_keys.index("B02")], - count=len(band_hrefs), - nodata=0, - **INT_COMPRESSION_KWARGS, - ) - bands_id = gen_guid() - bands_path = os.path.join(self.tmp_dir.name, f"{bands_id}.tif") - LOGGER.debug(f"Stacking Sentinel-2 bands for product {sentinel_product.product_name}") - serial_stack_bands( - band_hrefs, - bands_path, - block_size=(self.block_size, self.block_size), - resampling=Resampling.bilinear, - **tiff_args, - ) - LOGGER.debug(f"Done stacking bands for product {sentinel_product.product_name}") - - # Adding cloud mask - mask_id = gen_guid() - mask_path = os.path.join(self.tmp_dir.name, f"{mask_id}.tif") - - rasterize_clouds( - collection.get_cloud_mask(item), - bands_path, - mask_path, - sentinel_product.product_name, - ) - band_idx = {name: idx for idx, name in enumerate(collection.asset_keys)} - # Add band aliases for spyndex - for k, v in SENTINEL2_SPYNDEX.items(): - band_idx[v] = band_idx[k] - bands_raster = Sentinel2Raster.clone_from( - sentinel_product, - bands=band_idx, - id=hashlib.sha256(f"stacked bands {sentinel_product.id}".encode()).hexdigest(), - assets=[AssetVibe(reference=bands_path, type="image/tiff", id=bands_id)], - ) - cloud_raster = Sentinel2CloudMask.clone_from( - sentinel_product, - bands={"cloud": 0}, - categories=CLOUD_CATEGORIES, - id=hashlib.sha256(f"clouds {sentinel_product.id}".encode()).hexdigest(), - assets=[AssetVibe(reference=mask_path, type="image/tiff", id=mask_id)], - ) - return {"raster": bands_raster, "cloud": cloud_raster} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_stack_sentinel2/download_stack_sentinel2.yaml b/ops/download_stack_sentinel2/download_stack_sentinel2.yaml deleted file mode 100644 index 5ff52288..00000000 --- a/ops/download_stack_sentinel2/download_stack_sentinel2.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: download_stack_sentinel2 -inputs: - sentinel_product: Sentinel2Product -output: - raster: Sentinel2Raster - cloud: Sentinel2CloudMask -parameters: - api_key: "" - block_size: 2048 - num_workers: 20 - timeout_s: 120 -entrypoint: - file: download_stack_s2.py - callback_builder: CallbackBuilder -description: - short_description: Downloads and preprocesses Sentinel-2 products. - long_description: - The op will read the bands from the Planetary Computer, resample them to 10m resolution, and - stack them into a single 12-band TIFF file. A cloud mask obtained from quality indicators is - also generated at 10m resolution. - inputs: - sentinel_product: Product to be downloaded. - output: - raster: - Downloaded product with an asset that contains 12 Sentinel-2 L2A bands at 10m resolution. - cloud: Cloud mask at 10m resolution - parameters: - api_key: Planetary Computer API key. - block_size: Size of blocks that are read by each thread - num_workers: - Number of threads used when reading and resampling data from the Planetary Computer blobs. - timeout_s: Maximum time, in seconds, before a band reading operation times out. diff --git a/ops/download_usda_soils/download_usda_soils.py b/ops/download_usda_soils/download_usda_soils.py deleted file mode 100644 index 9c52bc02..00000000 --- a/ops/download_usda_soils/download_usda_soils.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import zipfile -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Dict, Optional - -import pandas as pd -import rasterio -from shapely import geometry as shpg -from shapely.geometry import mapping - -from vibe_core.data import AssetVibe, CategoricalRaster, DataVibe -from vibe_core.data.core_types import gen_guid, gen_hash_id -from vibe_core.file_downloader import download_file -from vibe_lib.raster import json_to_asset - - -class CallbackBuilder: - def __init__(self, url: str, zip_file: str, tiff_file: str, meta_file: str): - self.url = url - self.zip_file = zip_file - self.tiff_file = tiff_file - self.meta_file = meta_file - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def download_usda_soils( - input_item: DataVibe, - ) -> Dict[str, Optional[CategoricalRaster]]: - fpath = os.path.join(self.tmp_dir.name, self.zip_file) - ftiff = os.path.join(self.tmp_dir.name, self.tiff_file) - fmeta = os.path.join(self.tmp_dir.name, self.meta_file) - - download_file(self.url, fpath) - with zipfile.ZipFile(fpath) as zf: - with open(ftiff, "wb") as f: - f.write(zf.read(self.tiff_file)) - with open(fmeta, "wb") as f: - f.write(zf.read(self.meta_file)) - - vibe_asset = AssetVibe(reference=ftiff, type="image/tiff", id=gen_guid()) - - with rasterio.open(ftiff) as ds: - geometry = mapping(shpg.box(*ds.bounds)) - - classes = pd.read_table(fmeta, index_col=0) - classes = classes["SOIL_ORDER"] + ":" + classes["SUBORDER"] # type: ignore - classes = {v: k for k, v in classes.to_dict().items()} - - downloaded_raster = CategoricalRaster.clone_from( - input_item, - id=gen_hash_id( - "usda_soil", - geometry, - (datetime(2015, 1, 1), datetime(2015, 12, 31)), # dummy dates - ), - assets=[vibe_asset, json_to_asset(classes, self.tmp_dir.name)], - time_range=input_item.time_range, - geometry=geometry, - bands={"soil_order:suborder": 0}, - categories=list(classes.keys()), - ) - return {"downloaded_raster": downloaded_raster} - - return download_usda_soils - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/download_usda_soils/download_usda_soils.yaml b/ops/download_usda_soils/download_usda_soils.yaml deleted file mode 100644 index 9f0c5850..00000000 --- a/ops/download_usda_soils/download_usda_soils.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: download_usda_soils -inputs: - input_item: DataVibe -output: - downloaded_raster: CategoricalRaster -parameters: - url: https://www.nrcs.usda.gov/wps/PA_NRCSConsumption/download?cid=nrcseprd1765433&ext=zip - zip_file: global_soil_regions_geoTIFF.zip - tiff_file: so2015v2.tif - meta_file: 2015_suborders_and_gridcode.txt -entrypoint: - file: download_usda_soils.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - tiff_file -description: - short_description: Downloads a global raster with USDA soil classes at 1/30 degree resolution. \ No newline at end of file diff --git a/ops/ensemble_cloud_prob/ensemble_cloud_prob.py b/ops/ensemble_cloud_prob/ensemble_cloud_prob.py deleted file mode 100644 index 86529f16..00000000 --- a/ops/ensemble_cloud_prob/ensemble_cloud_prob.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from tempfile import TemporaryDirectory -from typing import Dict - -import xarray as xr - -from vibe_core.data import Sentinel2CloudProbability, gen_guid -from vibe_lib.raster import load_raster, save_raster_to_asset - - -class CallbackBuilder: - def __init__(self) -> None: - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def ensemble_cloud_prob( - cloud1: Sentinel2CloudProbability, - cloud2: Sentinel2CloudProbability, - cloud3: Sentinel2CloudProbability, - cloud4: Sentinel2CloudProbability, - cloud5: Sentinel2CloudProbability, - ) -> Dict[str, Sentinel2CloudProbability]: - ar = [load_raster(c) for c in (cloud1, cloud2, cloud3, cloud4, cloud5)] - ar = xr.concat(ar, dim="band").mean(dim="band") - asset = save_raster_to_asset(ar, self.tmp_dir.name) - return { - "cloud_probability": Sentinel2CloudProbability.clone_from( - cloud1, id=gen_guid(), assets=[asset] - ) - } - - return ensemble_cloud_prob diff --git a/ops/ensemble_cloud_prob/ensemble_cloud_prob.yaml b/ops/ensemble_cloud_prob/ensemble_cloud_prob.yaml deleted file mode 100644 index cdf67dc5..00000000 --- a/ops/ensemble_cloud_prob/ensemble_cloud_prob.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Compute ensemble cloud probabilities from all 5 models -name: ensemble_cloud_prob -inputs: - cloud1: Sentinel2CloudProbability - cloud2: Sentinel2CloudProbability - cloud3: Sentinel2CloudProbability - cloud4: Sentinel2CloudProbability - cloud5: Sentinel2CloudProbability -output: - cloud_probability: Sentinel2CloudProbability -parameters: -entrypoint: - file: ensemble_cloud_prob.py - callback_builder: CallbackBuilder -description: - short_description: Computes ensemble cloud probabilities from all 5 models. \ No newline at end of file diff --git a/ops/estimate_canopy_cover/estimate_canopy.py b/ops/estimate_canopy_cover/estimate_canopy.py deleted file mode 100644 index 39aae687..00000000 --- a/ops/estimate_canopy_cover/estimate_canopy.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from tempfile import TemporaryDirectory -from typing import Any, Dict, List - -import numpy as np -import xarray as xr -from numpy.typing import NDArray -from sklearn.linear_model import Ridge -from sklearn.pipeline import Pipeline, make_pipeline -from sklearn.preprocessing import PolynomialFeatures - -from vibe_core.data import Raster -from vibe_lib.raster import get_cmap, json_to_asset, load_raster, save_raster_from_ref - -SUPPORTED_INDICES: Dict[str, Dict[str, NDArray[np.float32]]] = { - "ndvi": { - "coefficients": np.array([[0.0, 0.28480232, 0.8144678, 0.63961434]], dtype=np.float32), - "intercept": np.array([-0.10434419], dtype=np.float32), - }, -} - - -def calibrate(model: Pipeline, index: xr.DataArray): - """ - Calibrate non-masked values, clip to [0, 1] and copy over the geodata from original array - """ - index_masked = index.to_masked_array() - index_compressed = index_masked.compressed() - calibrated = model.predict(index_compressed[:, None]).squeeze().clip(0, 1) # type: ignore - calibrated_masked = index_masked.copy() - calibrated_masked.data[~calibrated_masked.mask] = calibrated - return index.copy(data=calibrated_masked) - - -class CallbackBuilder: - def __init__(self, index: str): - self.tmp_dir = TemporaryDirectory() - if index not in SUPPORTED_INDICES: - raise ValueError(f"Operation estimate_canopy called with unsupported index {index}") - self.index = index - - def __call__(self): - def calibration_callback(index_raster: Raster) -> Raster: - output_dir = self.tmp_dir.name - - # Create model and copy weights - model = make_pipeline(PolynomialFeatures(degree=3), Ridge()) - model[0].fit(np.zeros((1, 1))) - model[1].coef_ = SUPPORTED_INDICES[self.index]["coefficients"].copy() # type: ignore - model[1].intercept_ = SUPPORTED_INDICES[self.index]["intercept"].copy() # type: ignore - index = load_raster(index_raster, use_geometry=True) - calibrated = calibrate(model, index) - - vis_dict: Dict[str, Any] = { - "bands": [0], - "colormap": get_cmap("viridis"), - "range": (0, 1), - } - calibrated_raster = save_raster_from_ref( - calibrated, output_dir, ref_raster=index_raster - ) - calibrated_raster.assets.append(json_to_asset(vis_dict, output_dir)) - return calibrated_raster - - def calibration_callback_list(indices: List[Raster]) -> Dict[str, List[Raster]]: - return {"estimated_canopy_cover": [calibration_callback(index) for index in indices]} - - return calibration_callback_list - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/estimate_canopy_cover/estimate_canopy_cover.yaml b/ops/estimate_canopy_cover/estimate_canopy_cover.yaml deleted file mode 100644 index e9f8f48b..00000000 --- a/ops/estimate_canopy_cover/estimate_canopy_cover.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: estimate_canopy_cover -inputs: - indices: List[Raster] -output: - estimated_canopy_cover: List[Raster] -parameters: - index: ndvi -entrypoint: - file: estimate_canopy.py - callback_builder: CallbackBuilder -description: - short_description: - Applies a linear regressor with pre-computed polynomial features on top of the index raster to estimate canopy cover. \ No newline at end of file diff --git a/ops/extract_gedi_rh100/extract_gedi_rh100.py b/ops/extract_gedi_rh100/extract_gedi_rh100.py deleted file mode 100644 index 1da1b506..00000000 --- a/ops/extract_gedi_rh100/extract_gedi_rh100.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -from collections import defaultdict -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, cast - -import geopandas as gpd -import h5py -import numpy as np -from geopandas.array import GeometryArray -from shapely import geometry as shpg -from shapely.geometry.base import BaseGeometry - -from vibe_core.data import AssetVibe, DataVibe, GEDIProduct, GeometryCollection, gen_guid -from vibe_core.data.core_types import BBox - -BEAMS = [ - "BEAM0000", - "BEAM0001", - "BEAM0010", - "BEAM0011", - "BEAM0101", - "BEAM0110", - "BEAM1000", - "BEAM1011", -] -L2B = "GEDI02_B.002" -LOGGER = logging.getLogger(__name__) - - -def extract_dataset(filepath: str, geometry: BaseGeometry, check_quality: bool): - lon_min, lat_min, lon_max, lat_max = cast(BBox, geometry.bounds) - d: Dict[str, List[Any]] = defaultdict(list) - with h5py.File(filepath) as h5: - for b in BEAMS: - lon = cast(h5py.Dataset, h5.get(f"{b}/geolocation/lon_lowestmode"))[()] - lat = cast(h5py.Dataset, h5.get(f"{b}/geolocation/lat_lowestmode"))[()] - bbox_mask = (lon_min <= lon) & (lon <= lon_max) & (lat_min <= lat) & (lat <= lat_max) - if not bbox_mask.any(): - continue - bbox_idx = np.where(bbox_mask)[0] - pts = gpd.points_from_xy(lon[bbox_idx], lat[bbox_idx]) - within = pts.within(geometry) - if not within.any(): - continue - within_idx = np.where(within)[0] - idx = bbox_idx[within_idx] - - if check_quality: - # Filter data by quality flag: 1 = good, 0 = bad - qual = cast(h5py.Dataset, h5.get(f"{b}/l2b_quality_flag"))[idx].astype(bool) - if not qual.any(): - continue - within_idx = within_idx[qual] - idx = idx[qual] - - d["geometry"].extend(cast(GeometryArray, pts[within_idx])) - d["beam"].extend(cast(h5py.Dataset, h5.get(f"{b}/beam"))[idx]) - d["rh100"].extend(cast(h5py.Dataset, h5.get(f"{b}/rh100"))[idx]) - if not d or any(not v for v in d.values()): - return None - df = gpd.GeoDataFrame(d, crs="epsg:4326") # type: ignore - return df - - -class CallbackBuilder: - def __init__(self, check_quality: bool): - self.tmp_dir = TemporaryDirectory() - self.check_quality = check_quality - - def __call__(self): - def callback(gedi_product: GEDIProduct, roi: DataVibe) -> Dict[str, GeometryCollection]: - if gedi_product.processing_level != L2B: - raise ValueError( - f"Processing level must be {L2B}, found {gedi_product.processing_level}" - ) - h5_path = gedi_product.assets[0].local_path - geom = shpg.shape(roi.geometry) - asset_guid = gen_guid() - LOGGER.info(f"Extracting data from hdf5 file {h5_path}") - df = extract_dataset(h5_path, geom, self.check_quality) - if df is not None: - asset_path = os.path.join(self.tmp_dir.name, f"{asset_guid}.gpkg") - LOGGER.info(f"Saving data to {asset_path}") - df.to_file(asset_path, driver="GPKG") - LOGGER.info("All done! Creating GeometryCollection") - - assets = [ - AssetVibe( - reference=asset_path, type="application/geopackage+sqlite3", id=asset_guid - ) - ] - else: - LOGGER.info( - f"No data available in product {gedi_product.product_name} after filtering, " - "creating assetless output" - ) - assets = [] - rh100 = GeometryCollection.clone_from( - gedi_product, geometry=roi.geometry, id=gen_guid(), assets=assets - ) - return {"rh100": rh100} - - return callback diff --git a/ops/extract_gedi_rh100/extract_gedi_rh100.yaml b/ops/extract_gedi_rh100/extract_gedi_rh100.yaml deleted file mode 100644 index 0635dbab..00000000 --- a/ops/extract_gedi_rh100/extract_gedi_rh100.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Save rh100 data from a GEDI L2B product that intersects with the RoI as a GPKG file -name: extract_gedi_rh100 -inputs: - gedi_product: GEDIProduct - roi: DataVibe -output: - rh100: GeometryCollection -parameters: - check_quality: true -entrypoint: - file: extract_gedi_rh100.py - callback_builder: CallbackBuilder -description: - short_description: Extracts RH100 variables within the region of interest of a GEDIProduct. diff --git a/ops/extract_gedi_rh100/test_extract_gedi_rh100.py b/ops/extract_gedi_rh100/test_extract_gedi_rh100.py deleted file mode 100644 index 8af3add8..00000000 --- a/ops/extract_gedi_rh100/test_extract_gedi_rh100.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from pathlib import Path -from typing import cast - -import geopandas as gpd -import h5py -import numpy as np -import pytest -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, DataVibe, GEDIProduct, GeometryCollection -from vibe_dev.testing.op_tester import OpTester - -HERE = os.path.dirname(os.path.abspath(__file__)) -CONFIG_PATH = os.path.join(HERE, "extract_gedi_rh100.yaml") - -NUM_POINTS = 10 -BEAMS = [ - "BEAM0000", - "BEAM0001", - "BEAM0010", - "BEAM0011", - "BEAM0101", - "BEAM0110", - "BEAM1000", - "BEAM1011", -] -L2B = "GEDI02_B.002" - - -@pytest.fixture -def fake_asset(tmp_path: Path): - beam_value = 0 - filepath = os.path.join(tmp_path.absolute(), "fake.h5") - with h5py.File(filepath, "w") as f: - for b in BEAMS: - beam_value = int(b.replace("BEAM", ""), 2) - f.create_dataset(f"{b}/geolocation/lon_lowestmode", data=np.linspace(0, 2, NUM_POINTS)) - f.create_dataset(f"{b}/geolocation/lat_lowestmode", data=np.linspace(0, 2, NUM_POINTS)) - f.create_dataset(f"{b}/beam", data=beam_value * np.ones(NUM_POINTS)) - f.create_dataset(f"{b}/rh100", data=np.linspace(0, 1, NUM_POINTS) + beam_value) - fake_qual = np.ones(NUM_POINTS) - fake_qual[0] = 0 - f.create_dataset(f"{b}/l2b_quality_flag", data=fake_qual) - return filepath - - -@pytest.mark.parametrize("check_quality", (True, False)) -def test_op(check_quality: bool, fake_asset: str): - now = datetime.now() - x = GEDIProduct( - id="1", - time_range=(now, now), - geometry=shpg.mapping(shpg.box(0, 0, 2, 2)), - product_name="fake_product", - start_orbit=0, - stop_orbit=0, - processing_level=L2B, - assets=[AssetVibe(reference=fake_asset, type="application/x-hdf5", id="fake-id")], - ) - geom = shpg.box(-1, -1, 1, 1) - roi = DataVibe(id="2", time_range=(now, now), geometry=shpg.mapping(geom), assets=[]) - op_tester = OpTester(CONFIG_PATH) - op_tester.update_parameters({"check_quality": check_quality}) - out = op_tester.run(gedi_product=x, roi=roi) - assert "rh100" in out - rh100 = cast(GeometryCollection, out["rh100"]) - assert rh100.geometry == roi.geometry - assert rh100.time_range == x.time_range - - df = gpd.read_file(rh100.assets[0].url) - quality_offset = int(check_quality) - num_points = NUM_POINTS // 2 - quality_offset - assert df.shape[0] == len(BEAMS) * num_points - assert all(isinstance(g, shpg.Point) for g in df.geometry) - assert np.allclose( - df["rh100"], # type: ignore - np.concatenate( - [ - np.linspace(0, 1, NUM_POINTS)[quality_offset : num_points + quality_offset] - + int(b.replace("BEAM", ""), 2) - for b in BEAMS - ] - ), - ) - - # Op breaks with wrong processing level - x.processing_level = "invalid" - with pytest.raises(ValueError): - op_tester.run(gedi_product=x, roi=roi) diff --git a/ops/extract_protein_sequence/extract_protein_sequence.py b/ops/extract_protein_sequence/extract_protein_sequence.py deleted file mode 100644 index 41a4ebb8..00000000 --- a/ops/extract_protein_sequence/extract_protein_sequence.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Dict, List - -import pandas as pd -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, FoodVibe, ProteinSequence, gen_guid - - -def append_nones(length: int, list_: List[str]): - """ - Appends Nones to list to get length of list equal to `length`. - If list is too long raise AttributeError - """ - diff_len = length - len(list_) - if diff_len < 0: - raise AttributeError("Length error list is too long.") - return list_ + [" 0"] * diff_len - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def protein_sequence_callback( - food_item: FoodVibe, - ) -> Dict[str, ProteinSequence]: - protein_list = append_nones(3, food_item.fasta_sequence) - - guid = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") - - df = pd.DataFrame(protein_list, columns=["protein_list"]) - df.to_csv(filepath, index=False) - - protein_sequence = ProteinSequence( - gen_guid(), - time_range=(datetime.now(), datetime.now()), # these are just placeholders - geometry=shpg.mapping(shpg.Point(0, 0)), # this location is a placeholder - assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], - ) - - return {"protein_sequence": protein_sequence} - - return protein_sequence_callback diff --git a/ops/extract_protein_sequence/extract_protein_sequence.yaml b/ops/extract_protein_sequence/extract_protein_sequence.yaml deleted file mode 100644 index abbcbc0a..00000000 --- a/ops/extract_protein_sequence/extract_protein_sequence.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: extract_protein_sequence -inputs: - food_item: FoodVibe -output: - protein_sequence: ProteinSequence -parameters: -entrypoint: - file: extract_protein_sequence.py - callback_builder: CallbackBuilder diff --git a/ops/get_angles/get_angles.py b/ops/get_angles/get_angles.py deleted file mode 100644 index 78709d39..00000000 --- a/ops/get_angles/get_angles.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import io -import mimetypes -import os -import xml.etree.ElementTree as ET -from datetime import datetime, timedelta -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Sequence, Tuple, cast -from xml.etree.ElementTree import Element, ElementTree - -import numpy as np -import planetary_computer as pc -import requests -import rioxarray as rio # noqa: F401 -import xarray as xr -from numpy.typing import NDArray -from pystac.item import Item -from pystac_client import Client -from rasterio.warp import Resampling -from rioxarray.merge import merge_arrays -from shapely import geometry as shpg -from shapely.geometry.base import BaseGeometry - -from vibe_core.data import AssetVibe, Raster, gen_guid -from vibe_lib.raster import get_crs - -CATALOG_URL = "https://planetarycomputer.microsoft.com/api/stac/v1" -COLLECTION = "sentinel-2-l2a" -DATE_FORMAT = "%Y-%m-%d" - -BBox = Tuple[float, float, float, float] -Angles = Tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray] - - -def query_catalog(roi: BBox, time_range: Tuple[datetime, datetime]): - """ - Query the planetary computer for items that intersect with the desired RoI in the time range - """ - catalog = Client.open(CATALOG_URL) - search = catalog.search( - collections=[COLLECTION], - bbox=roi, - datetime="/".join(i.strftime(DATE_FORMAT) for i in time_range), - ) - items = list(search.get_items()) - return items - - -def get_catalog_items(raster: Raster, tol: timedelta = timedelta(days=5)) -> List[Item]: - """ - Get sentinel2 tiles that intersect with the raster geometry - within a tolerance of the raster datetime - """ - geom = shpg.shape(raster.geometry) - roi = cast(BBox, geom.bounds) - raster_dt = raster.time_range[0] - time_range = (raster_dt - tol, raster_dt + tol) - items = query_catalog(roi, time_range) - # Filter items by closest date - dates = list(set(cast(datetime, item.datetime) for item in items)) - date_distance = cast(NDArray[Any], [abs(raster_dt - d).total_seconds() for d in dates]) - closest_date = dates[np.argmin(date_distance)] - items = [item for item in items if item.datetime == closest_date] - - # Return items necessary to cover all the spatial extent of the raster - return filter_necessary_items(geom, items) - - -def filter_necessary_items(poly: BaseGeometry, items: Sequence[Item]) -> List[Item]: - """ - Greedily filter the items so that only a subset necessary to cover all the raster spatial extent - is returned - """ - - def area_func(item: Item) -> float: - bbox = item.bbox - assert bbox is not None - return -shpg.box(*bbox, ccw=True).intersection(poly).area - - sorted_items = sorted(items, key=area_func) - - # Get item with largest intersection - item = sorted_items[0] - assert item - assert item.bbox is not None - item_box = shpg.box(*item.bbox, ccw=True) - if poly.within(item_box): - return [item] - return [item] + filter_necessary_items(poly - item_box, sorted_items[1:]) - - -def get_xml_data(item: Item) -> ElementTree: - """ - Get granule metadata XML from the planetary computer STAC item - """ - href = item.assets["granule-metadata"].href - signed_href = pc.sign(href) - response = requests.get(signed_href) - return ET.parse(io.BytesIO(response.content)) - - -def parse_grid_params(tree: ElementTree) -> Tuple[float, float, float, float, str]: - """ - Parse center grid coordinates and grid resolution from the metadata XML - """ - res = 10 - height, width = [ - int(cast(str, v.text)) - for node in tree.iter("Size") - if node.attrib["resolution"] == str(res) - for tag in ("NROWS", "NCOLS") - for v in node.iter(tag) - ] - xmin, ymax = [ - int(cast(str, v.text)) - for node in tree.iter("Geoposition") - if node.attrib["resolution"] == str(res) - for tag in ("ULX", "ULY") - for v in node.iter(tag) - ] - - xc = xmin + res * width / 2 - yc = ymax - res * height / 2 - res_x = float(cast(str, next(tree.iter("COL_STEP")).text)) - res_y = -float(cast(str, next(tree.iter("ROW_STEP")).text)) - crs = cast(str, next(tree.iter("HORIZONTAL_CS_CODE")).text) - return xc, yc, res_x, res_y, crs - - -def parse_angle_grids(node: Element) -> NDArray[Any]: - """ - Parse zenith and azimuth grids from XML node - Returns array of shape 2 (zenith, azimuth) x H x W - """ - angles = ( - np.array( - [ - [ - [cast(str, line.text).split(" ") for line in mat.iter("VALUES")] - for mat in node.iter(za) - ] - for za in ["Zenith", "Azimuth"] - ] - ) - .astype(float) - .squeeze() # Get rid of the singleton dimension from node.iter(za) - ) - return angles - - -def get_view_angles(tree: ElementTree) -> Tuple[NDArray[Any], NDArray[Any]]: - """ - Parse view angles from XML tree, join per-band detector grids, then average over bands - """ - grid_list = [ - [ - parse_angle_grids(node) - for node in tree.iter("Viewing_Incidence_Angles_Grids") - if node.attrib["bandId"] == str(bi) - ] - for bi in range(13) - ] - # Band indices x Detector ID x Zenith or Azimuth x H x W - partial_grids = np.array(grid_list) - # Join partial grids from all detectors - n = np.nan_to_num(partial_grids).sum(axis=1) - d = np.isfinite(partial_grids).sum(axis=1) - angles = n / d - # Get the average from all bands - view_zenith_mean, view_azimuth_mean = angles.mean(axis=0) - return view_zenith_mean, view_azimuth_mean - - -def get_sun_angles(tree: ElementTree) -> Tuple[NDArray[Any], NDArray[Any]]: - """ - Parse sun angles from XML tree - """ - node = next(tree.iter("Sun_Angles_Grid")) - sun_zenith, sun_azimuth = parse_angle_grids((node)) - return sun_zenith, sun_azimuth - - -def to_georeferenced_array( - angle_grid: NDArray[Any], center: Tuple[float, float], resolution: Tuple[float, float], crs: str -) -> xr.DataArray: - """""" - height, width = angle_grid.shape - grid_x, grid_y = ( - np.linspace(c - (dim - 1) / 2 * res, c + (dim - 1) / 2 * res, dim) - for c, res, dim in zip(center, resolution, (width, height)) - ) - - array = xr.DataArray(angle_grid[None], {"band": [1], "y": grid_y, "x": grid_x}) - array.rio.set_crs(crs) - return array - - -def get_angles_from_item( - item: Item, -) -> Angles: - """ - Get georeferenced view and sun angle grids by querying planetary computer, - parsing the metadata XML for grid coordinates and values, and joining per-band view grids. - Returns mean view zenith, mean view azimuth, sun zenith, and sun azimuth grids, respectively. - """ - tree = get_xml_data(item) - xc, yc, res_x, res_y, crs = parse_grid_params(tree) - angles = (*get_view_angles(tree), *get_sun_angles(tree)) - # get geospatial grid for these arrays - return cast( - Angles, - tuple( - to_georeferenced_array(angle_grid, (xc, yc), (res_x, res_y), crs) - for angle_grid in angles - ), - ) - - -def get_angles(raster: Raster, tol: timedelta = timedelta(days=5)) -> Angles: - """ - Fetch view and sun angle grids, according to the raster geometry and time range. - Time range is assumed to be one value. The closest visit is used in case there is no samples - for the exact date. In case the geometry spans multiple tiles, the angle grids will be merged. - Grids are reprojected to native tif CRS and clipped according to the geometry. - Angle grid resolution is kept at 5000m. - Returns mean view zenith, mean view azimuth, sun zenith, and sun azimuth grids, respectively. - """ - geom = shpg.shape(raster.geometry) - items = get_catalog_items(raster, tol) - items = filter_necessary_items(geom, items) - angles_list = zip(*(get_angles_from_item(item) for item in items)) - - raster_crs = get_crs(raster) - return cast( - Angles, - tuple( - merge_arrays( - [ - ang.rio.reproject(raster_crs, resampling=Resampling.bilinear, nodata=np.nan) - for ang in angles - ] - ).rio.clip([geom], crs="epsg:4326", all_touched=True) - for angles in angles_list - ), - ) - - -class CallbackBuilder: - def __init__(self, tolerance: int): - self.tmp_dir = TemporaryDirectory() - self.tolerance = timedelta(days=tolerance) - - def __call__(self): - def fcover_callback(raster: Raster) -> Dict[str, Raster]: - angles = xr.concat(get_angles(raster, tol=self.tolerance), dim="band") - uid = gen_guid() - out_path = os.path.join(self.tmp_dir.name, f"{uid}.tif") - angles.rio.to_raster(out_path) - asset = AssetVibe(reference=out_path, type=mimetypes.types_map[".tif"], id=uid) - out_raster = Raster.clone_from( - raster, - id=gen_guid(), - assets=[asset], - bands={ - k: v - for v, k in enumerate( - ["view_zenith", "view_azimuth", "sun_zenith", "sun_azimuth"] - ) - }, - ) - return {"angles": out_raster} - - return fcover_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/get_angles/get_angles.yaml b/ops/get_angles/get_angles.yaml deleted file mode 100644 index 756f75b4..00000000 --- a/ops/get_angles/get_angles.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: get_angles -inputs: - raster: Raster -output: - angles: Raster -parameters: - tolerance: 5 -entrypoint: - file: get_angles.py - callback_builder: CallbackBuilder diff --git a/ops/gfs_download/gfs_download.py b/ops/gfs_download/gfs_download.py deleted file mode 100644 index c228a027..00000000 --- a/ops/gfs_download/gfs_download.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Dict, List - -from azure.core.exceptions import ResourceNotFoundError -from azure.storage.blob import ContainerClient - -from vibe_core.data import AssetVibe, GfsForecast, gen_forecast_time_hash_id, gen_guid -from vibe_lib.gfs_blob_utils import blob_url_from_offset, get_sas_uri - -LOGGER = logging.getLogger(__name__) - - -def get_noaa_data(time: GfsForecast, output_dir: str, sas_token: str) -> GfsForecast: - """Get the global forecast for the given input time data - - Args: - time: GfsForecast containing forecast publish time and desired forecast time - output_dir: directory in which to save the grib file - sas_token: token used to access Azure blob storage - - Returns: - GfsForecast containing global forecast for the specified time - - Raises: - azure.core.exceptions.ResourceNotFoundError if forecast file cannot be found - """ - container_client: ContainerClient = ContainerClient.from_container_url(get_sas_uri(sas_token)) - publish_time = datetime.fromisoformat(time.publish_time) - forecast_time = time.time_range[0] - forecast_offset = (forecast_time - publish_time).seconds // 3600 - - blob_url = blob_url_from_offset(publish_time, forecast_offset) - grib_file = "{date}T{cycle_runtime:02}-f{offset:03}.grib".format( - date=publish_time.date().isoformat(), - cycle_runtime=publish_time.hour, - offset=forecast_offset, - ) - - file_path = os.path.join(output_dir, grib_file) - - try: - with open(file_path, "wb") as blob_file: - blob_file.write(container_client.download_blob(blob_url).readall()) - except ResourceNotFoundError as e: - # the specified forecast date has no publications - LOGGER.exception("Failed to download blob {}".format(blob_url)) - raise e - - return GfsForecast( - id=gen_forecast_time_hash_id( - "GlobalForecast", time.geometry, publish_time, time.time_range - ), - time_range=time.time_range, - geometry=time.geometry, - assets=[grib_to_asset(file_path)], - publish_time=time.publish_time, - ) - - -def grib_to_asset(file_path: str) -> AssetVibe: - """Convert the given file to an VibeAsset""" - return AssetVibe(reference=file_path, type=None, id=gen_guid()) - - -class CallbackBuilder: - def __init__(self, sas_token: str): - self.sas_token = sas_token - self.temp_dir = TemporaryDirectory() - - def __call__(self): - def get_weather_forecast(time: List[GfsForecast]) -> Dict[str, List[GfsForecast]]: - global_forecast = get_noaa_data(time[0], self.temp_dir.name, self.sas_token) - return {"global_forecast": [global_forecast]} - - return get_weather_forecast - - def __del__(self): - self.temp_dir.cleanup() diff --git a/ops/gfs_download/gfs_download.yaml b/ops/gfs_download/gfs_download.yaml deleted file mode 100644 index 1b291fd2..00000000 --- a/ops/gfs_download/gfs_download.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: gfs_download -inputs: - time: List[GfsForecast] -output: - global_forecast: List[GfsForecast] -parameters: - sas_token: "@SECRET(eywa-secrets, noaa-gfs-sas)" -entrypoint: - callback_builder: CallbackBuilder - file: gfs_download.py -description: - short_description: Downloads the global forecast for the given input time. \ No newline at end of file diff --git a/ops/gfs_preprocess/gfs_preprocess.py b/ops/gfs_preprocess/gfs_preprocess.py deleted file mode 100644 index d1e9904b..00000000 --- a/ops/gfs_preprocess/gfs_preprocess.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from datetime import datetime, time, timedelta, timezone -from typing import Any, Dict, List, Tuple, Union - -from azure.storage.blob import ContainerClient -from shapely.geometry import Point, mapping - -from vibe_core.data import DataVibe, GfsForecast, gen_forecast_time_hash_id, gen_hash_id -from vibe_lib.gfs_blob_utils import blob_url_from_offset, get_sas_uri - -LOGGER = logging.getLogger(__name__) - -# Geometry pointing to Null Island -NULL_ISLAND: Dict[str, Any] = mapping(Point(0, 0)) - -# The number of hours between model cycle runtimes for GFS data -CC_GAP: int = 6 - - -def datetime_to_query_date( - user_input: DataVibe, sas_token: str -) -> Tuple[datetime, Tuple[datetime, datetime]]: - """Gets the most relevant model date and forecast hour of product for the given day and time - - Input: - user_input: EwyaData representing the day and hour of interest - sas_token: token used to access Azure blob storage - - Output: - published_datetime: datetime representing the publish date and - time of the most relevant forecast data - forecast_datetime: datetime representing the date and time reflected in the forecast - """ - container_client: ContainerClient = ContainerClient.from_container_url(get_sas_uri(sas_token)) - # get the forecast for the beginning of the time range in UTC - input_utc = user_input.time_range[0].astimezone(timezone.utc) - now_utc = datetime.now(tz=timezone.utc) - - if input_utc > now_utc: - # forecast is for a future time; get the latest data - publish_date = now_utc - else: - # forecast is for a past time; fetch old forecasts - publish_date = input_utc - - # modify time to be one of 00, 06, 12, 18 hours - time_utc = publish_date.time() - query_hour = (time_utc.hour // CC_GAP) * CC_GAP - - published_datetime = datetime.combine( - publish_date.date(), time.min.replace(hour=query_hour), tzinfo=timezone.utc - ) - - # compute the difference between the forecast publish time and the target forecast time - forecast_offset = int((input_utc - published_datetime).total_seconds() // 3600) - - # Find the most relevant blob - blob_found = False - valid_duration = 1 - while not blob_found: - blob_url = blob_url_from_offset(published_datetime, forecast_offset) - blob_client = container_client.get_blob_client(blob=blob_url) - if blob_client.exists(): - blob_found = True - else: - # Try the previous cycle runtime - published_datetime -= timedelta(hours=CC_GAP) - forecast_offset += CC_GAP - if forecast_offset > 120 and forecast_offset <= 384: - valid_duration = 3 - # forecasts this far into the future are made with 3 hour granularity - forecast_offset -= forecast_offset % 3 - elif forecast_offset > 384: - # forecasts are not made this far out - LOGGER.exception( - "Could not find valid forecast for time {}".format(input_utc.isoformat) - ) - raise RuntimeError("Forecast not found") - - forecast_datetime = published_datetime + timedelta(hours=forecast_offset) - forecast_end = forecast_datetime + timedelta(hours=valid_duration) - return published_datetime, (forecast_datetime, forecast_end) - - -class CallbackBuilder: - def __init__(self, sas_token: str): - self.sas_token = sas_token - - def __call__(self): - def preprocess_input( - user_input: List[DataVibe], - ) -> Dict[str, List[Union[GfsForecast, DataVibe]]]: - publish_time, time_valid = datetime_to_query_date(user_input[0], self.sas_token) - location = user_input[0].geometry - time_data = GfsForecast( - id=gen_forecast_time_hash_id( - "forecast_time", NULL_ISLAND, publish_time, time_valid - ), - time_range=time_valid, - geometry=NULL_ISLAND, - assets=[], - publish_time=publish_time.isoformat(), - ) - location_data = DataVibe( - gen_hash_id("forecast_location", location, time_valid), time_valid, location, [] - ) - - return {"time": [time_data], "location": [location_data]} - - return preprocess_input diff --git a/ops/gfs_preprocess/gfs_preprocess.yaml b/ops/gfs_preprocess/gfs_preprocess.yaml deleted file mode 100644 index 912a8df8..00000000 --- a/ops/gfs_preprocess/gfs_preprocess.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: gfs_preprocess -inputs: - user_input: List[DataVibe] -output: - time: List[GfsForecast] - location: List[DataVibe] -parameters: - sas_token: "@SECRET(eywa-secrets, noaa-gfs-sas)" -entrypoint: - callback_builder: CallbackBuilder - file: gfs_preprocess.py -description: - short_description: - Gets the most relevant model date and forecast hour of product for the given input day, time and location. \ No newline at end of file diff --git a/ops/group_rasters_by_geometries/group_rasters_by_geometries.py b/ops/group_rasters_by_geometries/group_rasters_by_geometries.py deleted file mode 100644 index 1337e3d7..00000000 --- a/ops/group_rasters_by_geometries/group_rasters_by_geometries.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from functools import partial -from typing import Dict, List - -from shapely import geometry as shpg - -from vibe_core.data import DataVibe, Raster, RasterSequence -from vibe_lib.geometry import is_approx_equal - - -def callback( - rasters: List[Raster], group_by: List[DataVibe], threshold: float -) -> Dict[str, List[RasterSequence]]: - ref_bands = rasters[0].bands - if not all(r.bands == ref_bands for r in rasters): - raise ValueError("Expected to group rasters with the same bands") - sequences: List[RasterSequence] = [] - for g in group_by: - matching_rasters: List[Raster] = [] - geom_g = shpg.shape(g.geometry) - for r in rasters: - geom_r = shpg.shape(r.geometry) - if is_approx_equal(geom_r, geom_g, threshold=threshold): - matching_rasters.append(r) - matching_rasters = sorted(matching_rasters, key=lambda x: x.id) - t = [r.time_range[0] for r in matching_rasters] - seq = RasterSequence( - id=hashlib.sha256("".join([r.id for r in matching_rasters]).encode()).hexdigest(), - time_range=(min(t), max(t)), - geometry=g.geometry, - assets=[], - bands=ref_bands, - ) - for r in matching_rasters: - seq.add_item(r) - sequences.append(seq) - return {"raster_groups": sequences} - - -def callback_builder(geom_threshold: float): - return partial(callback, threshold=geom_threshold) diff --git a/ops/group_rasters_by_geometries/group_rasters_by_geometries.yaml b/ops/group_rasters_by_geometries/group_rasters_by_geometries.yaml deleted file mode 100644 index 38d26323..00000000 --- a/ops/group_rasters_by_geometries/group_rasters_by_geometries.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: group_rasters_by_geometries -inputs: - rasters: List[Raster] - group_by: List[DataVibe] -output: - raster_groups: List[RasterSequence] -parameters: - geom_threshold: .99 -entrypoint: - file: group_rasters_by_geometries.py - callback_builder: callback_builder -dependencies: - parameters: - - geom_threshold -description: - short_description: Groups input rasters that are contained in the geometry of a reference raster. \ No newline at end of file diff --git a/ops/group_rasters_by_time/group_rasters_by_time.py b/ops/group_rasters_by_time/group_rasters_by_time.py deleted file mode 100644 index ee0da933..00000000 --- a/ops/group_rasters_by_time/group_rasters_by_time.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from functools import partial -from itertools import groupby -from typing import Dict, List - -from vibe_core.data import Raster, RasterSequence -from vibe_core.data.core_types import gen_guid - - -def callback(rasters: List[Raster], criterion: str) -> Dict[str, List[RasterSequence]]: - key_func = { - "day_of_year": lambda x: x.time_range[0].timetuple().tm_yday, - "week": lambda x: x.time_range[0].isocalendar()[1], - "month": lambda x: x.time_range[0].month, - "year": lambda x: x.time_range[0].year, - "month_and_year": lambda x: (x.time_range[0].year, x.time_range[0].month), - } - criterion_func = key_func.get(criterion) - if criterion_func is None: - raise ValueError(f"Invalid group criterion {criterion}") - - res = [] - for key, group in groupby(sorted(rasters, key=criterion_func), criterion_func): - group = list(group) - if isinstance(key, list): - key = "_".join([str(k) for k in key]) - - raster_seq = RasterSequence.clone_from(group[0], f"group_{key}_{gen_guid()}", []) - for r in group: - raster_seq.add_item(r) - res.append(raster_seq) - - return {"raster_groups": res} - - -def callback_builder(criterion: str): - return partial(callback, criterion=criterion) diff --git a/ops/group_rasters_by_time/group_rasters_by_time.yaml b/ops/group_rasters_by_time/group_rasters_by_time.yaml deleted file mode 100644 index 040e7f8a..00000000 --- a/ops/group_rasters_by_time/group_rasters_by_time.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: group_rasters_by_time -inputs: - rasters: List[Raster] -output: - raster_groups: List[RasterSequence] -parameters: - criterion: month -entrypoint: - file: group_rasters_by_time.py - callback_builder: callback_builder -dependencies: - parameters: - - criterion -description: - short_description: This op groups rasters in time according to 'criterion'. - parameters: - criterion: Criterion to group rasters among "day_of_year", "week", "month", "year", and "month_and_year". diff --git a/ops/group_rasters_by_time/test_group_rasters_by_time.py b/ops/group_rasters_by_time/test_group_rasters_by_time.py deleted file mode 100644 index 57cda8d2..00000000 --- a/ops/group_rasters_by_time/test_group_rasters_by_time.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from datetime import datetime, timedelta -from typing import List, cast - -import pytest -from shapely.geometry import Polygon, box, mapping - -from vibe_core.data import Raster, RasterSequence -from vibe_core.data.core_types import AssetVibe, BaseVibe, gen_guid -from vibe_dev.testing.op_tester import OpTester - -START_DATE = datetime(2022, 1, 1) -NDAYS = 730 # 2 years -EXPECTED = [("day_of_year", 365), ("week", 52), ("month", 12), ("year", 2), ("month_and_year", 24)] - -YAML_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "group_rasters_by_time.yaml") - - -@pytest.mark.parametrize("criterion, expected", EXPECTED) -def test_op(criterion: str, expected: int): - op_tester = OpTester(YAML_PATH) - op_tester.update_parameters({"criterion": criterion}) - - latitude = 42.0 - longitude = 42.0 - buffer = 0.0042 - bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] - polygon: Polygon = box(*bbox, ccw=True) - - fake_asset = AssetVibe(reference="", type=mimetypes.types_map[".tif"], id="fake_asset") - - rasters = [ - Raster( - id=gen_guid(), - time_range=(START_DATE + timedelta(i), START_DATE + timedelta(i)), - geometry=mapping(polygon), - assets=[fake_asset], - bands={}, - ) - for i in range(NDAYS) - ] - - res = cast( - List[RasterSequence], op_tester.run(rasters=cast(List[BaseVibe], rasters))["raster_groups"] - ) - assert len(res) == expected diff --git a/ops/group_sentinel1_orbits/group_sentinel1_orbits.py b/ops/group_sentinel1_orbits/group_sentinel1_orbits.py deleted file mode 100644 index 735d1021..00000000 --- a/ops/group_sentinel1_orbits/group_sentinel1_orbits.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from collections import defaultdict -from typing import Dict, List, Tuple - -from shapely import geometry as shpg -from shapely.ops import unary_union - -from vibe_core.data import Sentinel1Raster, Sentinel1RasterOrbitGroup - - -def make_orbit_group( - items: List[Sentinel1Raster], -) -> Sentinel1RasterOrbitGroup: - # Make sure we are ordered by time make things consistent for the id hash - rasters = sorted(items, key=lambda x: x.time_range[0]) - # Id depends on all component ids - group_id = hashlib.sha256("".join(i.id for i in rasters).encode()).hexdigest() - geom = shpg.mapping(unary_union([shpg.shape(r.geometry) for r in rasters])) - dates = [r.time_range[0] for r in rasters] - time_range = (min(dates), max(dates)) - group = Sentinel1RasterOrbitGroup.clone_from( - rasters[0], id=group_id, assets=[], time_range=time_range, geometry=geom - ) - for r in rasters: - group.add_raster(r) - - return group - - -def callback_builder(): - def group_by_orbit( - rasters: List[Sentinel1Raster], - ) -> Dict[str, List[Sentinel1RasterOrbitGroup]]: - same_orbits: Dict[Tuple[int, str], List[Sentinel1Raster]] = defaultdict(list) - for item in rasters: - orbit_key = (item.orbit_number, item.tile_id) - same_orbits[orbit_key].append(item) - - groups = [make_orbit_group(v) for v in same_orbits.values()] - - return {"raster_groups": groups} - - return group_by_orbit diff --git a/ops/group_sentinel1_orbits/group_sentinel1_orbits.yaml b/ops/group_sentinel1_orbits/group_sentinel1_orbits.yaml deleted file mode 100644 index 30db5283..00000000 --- a/ops/group_sentinel1_orbits/group_sentinel1_orbits.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: group_sentinel1_orbits -inputs: - rasters: List[Sentinel1Raster] -output: - raster_groups: List[Sentinel1RasterOrbitGroup] -parameters: -entrypoint: - file: group_sentinel1_orbits.py - callback_builder: callback_builder -description: - short_description: - Groups raster files representing the same tile and moment in time that might - have been partially generated and split due to the movement of Sentinel-1 through base stations. \ No newline at end of file diff --git a/ops/group_sentinel2_orbits/group_sentinel2_orbits.py b/ops/group_sentinel2_orbits/group_sentinel2_orbits.py deleted file mode 100644 index 0f136154..00000000 --- a/ops/group_sentinel2_orbits/group_sentinel2_orbits.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from collections import defaultdict -from typing import Dict, List, Tuple, Union, cast - -from shapely import geometry as shpg -from shapely.ops import unary_union - -from vibe_core.data.sentinel import ( - Sentinel2CloudMask, - Sentinel2CloudMaskOrbitGroup, - Sentinel2Raster, - Sentinel2RasterOrbitGroup, - discriminator_date, -) -from vibe_lib.spaceeye.utils import find_s2_product - -TileData = List[Tuple[Sentinel2Raster, Sentinel2CloudMask]] - - -def make_orbit_group( - items: TileData, -) -> Tuple[Sentinel2RasterOrbitGroup, Sentinel2CloudMaskOrbitGroup]: - # Make sure we are ordered by time make things consistent for the id hash - rasters, masks = zip(*sorted(items, key=lambda x: discriminator_date(x[0].product_name))) - rasters = cast(List[Sentinel2Raster], list(rasters)) - masks = cast(List[Sentinel2CloudMask], list(masks)) - # Id depends on all component ids - raster_group_id, cloud_group_id = [ - hashlib.sha256("".join(i.id for i in items).encode()).hexdigest() - for items in (rasters, masks) - ] - geom = shpg.mapping(unary_union([shpg.shape(r.geometry) for r in rasters])) - # dates = [r.time_range[0] for r in rasters] - # time_range = (min(dates), max(dates)) - raster_group = Sentinel2RasterOrbitGroup.clone_from( - rasters[-1], id=raster_group_id, assets=[], geometry=geom - ) - for r in rasters: - raster_group.add_raster(r) - mask_group = Sentinel2CloudMaskOrbitGroup.clone_from( - masks[-1], id=cloud_group_id, assets=[], geometry=geom - ) - for m in masks: - mask_group.add_raster(m) - return raster_group, mask_group - - -def callback_builder(): - def group_by_orbit( - rasters: List[Sentinel2Raster], - masks: List[Sentinel2CloudMask], - ) -> Dict[str, Union[List[Sentinel2RasterOrbitGroup], List[Sentinel2CloudMaskOrbitGroup]]]: - same_orbits: Dict[Tuple[int, str], TileData] = defaultdict(list) - for item in rasters: - orbit_key = (item.orbit_number, item.tile_id) - mask_item = find_s2_product(item.product_name, masks) - same_orbits[orbit_key].append((item, mask_item)) - - groups = [make_orbit_group(v) for v in same_orbits.values()] - raster_groups, mask_groups = zip(*groups) - raster_groups = cast(List[Sentinel2RasterOrbitGroup], list(raster_groups)) - mask_groups = cast(List[Sentinel2CloudMaskOrbitGroup], list(mask_groups)) - - return {"raster_groups": raster_groups, "mask_groups": mask_groups} - - return group_by_orbit diff --git a/ops/group_sentinel2_orbits/group_sentinel2_orbits.yaml b/ops/group_sentinel2_orbits/group_sentinel2_orbits.yaml deleted file mode 100644 index de3df3b3..00000000 --- a/ops/group_sentinel2_orbits/group_sentinel2_orbits.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: group_sentinel2_orbits -inputs: - rasters: List[Sentinel2Raster] - masks: List[Sentinel2CloudMask] -output: - raster_groups: List[Sentinel2RasterOrbitGroup] - mask_groups: List[Sentinel2CloudMaskOrbitGroup] -parameters: -entrypoint: - file: group_sentinel2_orbits.py - callback_builder: callback_builder -description: - short_description: - Groups raster files representing the same tile and moment in time that might - have been partially generated and split due to the movement of Sentinel-2 through base stations. \ No newline at end of file diff --git a/ops/group_tile_sequence/group_s1_tile_sequence.yaml b/ops/group_tile_sequence/group_s1_tile_sequence.yaml deleted file mode 100644 index 75f08dc6..00000000 --- a/ops/group_tile_sequence/group_s1_tile_sequence.yaml +++ /dev/null @@ -1,19 +0,0 @@ -name: group_s1_tile_sequence -inputs: - rasters: List[Sentinel1Raster] - input_data: List[DataVibe] -output: - tile_sequences: List[Sentinel1RasterTileSequence] -parameters: - tile_geometry: /opt/terravibes/ops/resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml - duration: 48 - overlap: 0.5 -entrypoint: - file: group_tile_sequence.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - duration - - overlap -description: - short_description: Groups Sentinel-1 tiles into time windows of defined duration. \ No newline at end of file diff --git a/ops/group_tile_sequence/group_s2_tile_sequence.yaml b/ops/group_tile_sequence/group_s2_tile_sequence.yaml deleted file mode 100644 index decf7eeb..00000000 --- a/ops/group_tile_sequence/group_s2_tile_sequence.yaml +++ /dev/null @@ -1,19 +0,0 @@ -name: group_s2_tile_sequence -inputs: - rasters: List[Sentinel2Raster] - input_data: List[DataVibe] -output: - tile_sequences: List[Sentinel2RasterTileSequence] -parameters: - tile_geometry: /opt/terravibes/ops/resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml - duration: 48 - overlap: 0.5 -entrypoint: - file: group_tile_sequence.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - duration - - overlap -description: - short_description: Groups Sentinel-2 tiles into time windows of defined duration. \ No newline at end of file diff --git a/ops/group_tile_sequence/group_s2cloudmask_tile_sequence.yaml b/ops/group_tile_sequence/group_s2cloudmask_tile_sequence.yaml deleted file mode 100644 index b26c983f..00000000 --- a/ops/group_tile_sequence/group_s2cloudmask_tile_sequence.yaml +++ /dev/null @@ -1,19 +0,0 @@ -name: group_s2cloudmask_tile_sequence -inputs: - rasters: List[Sentinel2CloudMask] - input_data: List[DataVibe] -output: - tile_sequences: List[Sentinel2CloudMaskTileSequence] -parameters: - tile_geometry: /opt/terravibes/ops/resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml - duration: 48 - overlap: 0.5 -entrypoint: - file: group_tile_sequence.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - duration - - overlap -description: - short_description: Groups Sentinel-2 cloud masks into time windows of defined duration. \ No newline at end of file diff --git a/ops/group_tile_sequence/group_tile_sequence.py b/ops/group_tile_sequence/group_tile_sequence.py deleted file mode 100644 index ca59ea41..00000000 --- a/ops/group_tile_sequence/group_tile_sequence.py +++ /dev/null @@ -1,208 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -import logging -from collections import defaultdict -from datetime import timedelta -from typing import Dict, List, Tuple, cast - -import fiona -import geopandas as gpd -from shapely import geometry as shpg -from shapely.geometry.base import BaseGeometry - -from vibe_core.data import BBox, DataVibe, TimeRange -from vibe_core.data.sentinel import ListTileData, Tile2Sequence, TileData, TileSequenceData -from vibe_lib.spaceeye.dataset import get_read_intervals, get_write_intervals - -LOGGER = logging.getLogger(__name__) -KML_DRIVER_NAMES = "kml KML libkml LIBKML".split() - - -def gen_sequence_id( - items: ListTileData, - geom: BaseGeometry, - read_time_range: TimeRange, - write_time_range: TimeRange, -): - """Generate the id for a Tile Sequence, considering all rasters in the sequence""" - id = hashlib.sha256( - "".join( - [i.id for i in items] - + [geom.wkt] - + [ - t.isoformat() - for time_range in (read_time_range, write_time_range) - for t in time_range - ] - ).encode() - ).hexdigest() - return id - - -def group_rasters(rasters: ListTileData, input_data: List[DataVibe], tile_dfs: gpd.GeoDataFrame): - """Group rasters covering the same region (intersection between input geometry and a tile)""" - sequences: Dict[Tuple[str, BBox], ListTileData] = defaultdict(list) - sequences_geom: Dict[Tuple[str, BBox], BaseGeometry] = defaultdict() - sequences_time_range: Dict[Tuple[str, BBox], TimeRange] = defaultdict() - - # Iterate over all rasters that cover the input geometries - for item in rasters: - tile_id = item.tile_id - tile_geom = tile_dfs.loc[tile_dfs["Name"] == tile_id]["geometry"].iloc[0] # type: ignore - tile_start_date = item.time_range[0] - - # For now, we only consider a single geometry within input_data. In the future, - # we might allow multiple geometries, so this already covers that. - for input_geom in input_data: - # We are interested in the intersection between tile geom and input geometry - # for all tiles captured within the time range of the input geometry - geom = shpg.shape(input_geom.geometry) - start_date, end_date = input_geom.time_range - - if (start_date <= tile_start_date <= end_date) and geom.intersects(tile_geom): - intersected_geom = geom.intersection(tile_geom) - - # Use tile id and bounding box of intersecting region as keys - sequence_key = (item.tile_id, tuple(intersected_geom.bounds)) - sequences[sequence_key].append(item) - sequences_geom[sequence_key] = intersected_geom - sequences_time_range[sequence_key] = input_geom.time_range - - return sequences, sequences_geom, sequences_time_range - - -def make_tile_sequence( - items: ListTileData, - seq_geom: BaseGeometry, - read_time_range: TimeRange, - write_time_range: TimeRange, - ref_item: TileData, -) -> TileSequenceData: - """Create a TileSequenceData from the list of rasters and a sequence geometry""" - # Make sure we are ordered by time make things consistent for the id hash - sequence_type = Tile2Sequence[type(ref_item)] - sorted_items = sorted(items, key=lambda x: x.time_range[0]) - - # Generate sequence metadata - sequence_id = gen_sequence_id(sorted_items, seq_geom, read_time_range, write_time_range) - - # Create sequence object - sequence = sequence_type.clone_from( - ref_item, - id=sequence_id, - assets=[], - geometry=shpg.mapping(seq_geom), - time_range=read_time_range, - write_time_range=write_time_range, - product_name="", - orbit_number=-1, - relative_orbit_number=-1, - orbit_direction="", - platform="", - ) - - for r in sorted_items: - sequence.add_item(r) - - return sequence - - -def make_chip_sequences( - items: ListTileData, - seq_geom: BaseGeometry, - seq_time_range: TimeRange, - duration: int, - step: int, -) -> List[TileSequenceData]: - ref_item = items[0] - time_length = (seq_time_range[1] - seq_time_range[0]).days + 1 - if time_length < duration: - LOGGER.warning(f"Time length of {time_length} days is smaller than chip length {duration}") - offset = (time_length - duration) // 2 - time_length = duration - else: - offset = 0 - - read_intervals = list(zip(*get_read_intervals(time_length, duration, step, 0))) - write_intervals = list(zip(*get_write_intervals(time_length, duration, step, 0)[0])) - - sequences = [] - for read_interval, write_interval in zip(read_intervals, write_intervals): - start, end = (seq_time_range[0] + timedelta(days=int(i) + offset) for i in read_interval) - interval_items = [i for i in items if start <= i.time_range[0] < end] - if not interval_items: - LOGGER.warning( - f"Time interval {start.strftime('%Y-%m-%d')} - {end.strftime('%Y-%m-%d')} has no " - f"available data of type {type(ref_item)} for tile_id={ref_item.tile_id}, " - f"geometry={shpg.mapping(seq_geom)}" - ) - write_dates = ( - seq_time_range[0] + timedelta(days=int(write_interval[0]) + offset), - seq_time_range[0] + timedelta(days=int(write_interval[1]) + offset - 1), # type: ignore - ) - # Use end - 1 because our date range is closed at the end and our index range is not - sequences.append( - make_tile_sequence( - interval_items, - seq_geom, - (start, end - timedelta(days=1)), - write_dates, - ref_item, - ) - ) - - return sequences - - -class CallbackBuilder: - def __init__(self, tile_geometry: str, duration: int, overlap: float): - self.tile_geometry = tile_geometry - self.duration = duration - if duration <= 0: - raise ValueError(f"Duration must be larger than 0, found {duration}") - if overlap <= 0 or overlap > 1: - raise ValueError(f"Overlap value must be in range [0, 1), found {overlap}") - self.overlap = overlap - - def __call__(self): - def group_by_tile_geom( - rasters: ListTileData, input_data: List[DataVibe] - ) -> Dict[str, List[TileSequenceData]]: - # List the tiles for which we have products - tile_ids = set(p.tile_id for p in rasters) - - # Read tile geometry and filter for those that we have products - # Make fiona read the file: https://gis.stackexchange.com/questions/114066/ - for driver in KML_DRIVER_NAMES: - fiona.drvsupport.supported_drivers[driver] = "rw" # type: ignore - tile_dfs = gpd.read_file(self.tile_geometry) - # Filter only tiles for which we have products - tile_dfs = cast( - gpd.GeoDataFrame, - tile_dfs[tile_dfs["Name"].isin(tile_ids)], # type: ignore - ) - - # Group rasters by tile_id and geometry - sequences, sequences_geom, sequences_time_range = group_rasters( - rasters, input_data, tile_dfs - ) - - # Create TileSequenceData for each group - step = int(self.duration * self.overlap) - grouped_sequences = [ - group - for k in sequences.keys() - for group in make_chip_sequences( - sequences[k], - sequences_geom[k], - sequences_time_range[k], - self.duration, - step, - ) - ] - - return {"tile_sequences": grouped_sequences} - - return group_by_tile_geom diff --git a/ops/heatmap_sensor/soil_sample_heatmap_using_classification.py b/ops/heatmap_sensor/soil_sample_heatmap_using_classification.py deleted file mode 100644 index 794b4788..00000000 --- a/ops/heatmap_sensor/soil_sample_heatmap_using_classification.py +++ /dev/null @@ -1,195 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from tempfile import TemporaryDirectory -from typing import Any, Dict, Optional, cast - -import geopandas as gpd -import numpy as np -import rasterio -from geopandas.geodataframe import GeoDataFrame, GeoSeries -from pyproj.crs import crs -from rasterio.features import sieve -from rasterio.io import DatasetReader -from rasterio.mask import mask -from shapely.geometry import shape -from sklearn.ensemble import RandomForestClassifier -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder, StandardScaler - -from vibe_core.data import DataVibe, gen_hash_id -from vibe_core.data.core_types import GeometryCollection -from vibe_core.data.rasters import Raster -from vibe_lib.shapefile import write_shapefile - - -class CallbackBuilder: - def __init__( - self, - attribute_name: str, - buffer: int, - bins: int, - simplify: str, - tolerance: float, - data_scale: bool, - max_depth: int, - n_estimators: int, - random_state: int, - ): - self.temp_dir = TemporaryDirectory() - self.attribute_name = attribute_name - self.buffer = buffer - self.bins = bins - self.simplify = simplify - self.tolerance = tolerance - self.data_scale = data_scale - self.max_depth = max_depth - self.n_estimators = n_estimators - self.random_state = random_state - - def create_heatmap(self, raster: Raster, samples: GeometryCollection) -> DataVibe: - # Read and filter GeoDataFrame using input attribute name - samples_df = cast( - gpd.GeoDataFrame, - gpd.read_file(samples.assets[0].url), - ) - samples_df = cast(GeoDataFrame, samples_df[["geometry", self.attribute_name]]) - assert samples_df.crs, "samples dataframe has no CRS" - - # Train Model - model, le, scaler = self.train_classifier( - raster_path=raster.raster_asset.url, - samples=cast(GeoDataFrame, samples_df), - ) - - # Predict - assetVibe = self.predict_classifier( - model=model, - raster_path=raster.raster_asset.url, - label_encoder=le, - scaler=scaler, - farm_boundary=samples.geometry, - samples_crs=samples_df.crs, - ) - - return DataVibe( - gen_hash_id("heatmap_nutrients", raster.geometry, raster.time_range), - raster.time_range, - raster.geometry, - [assetVibe], - ) - - def predict_classifier( - self, - model: RandomForestClassifier, - raster_path: str, - label_encoder: LabelEncoder, - scaler: Optional[StandardScaler], - farm_boundary: Dict[str, Any], - samples_crs: crs.CRS, - ): - # Read input raster and clip it to farm boundary - with rasterio.open(raster_path) as src: - p = GeoSeries([shape(farm_boundary)], crs=samples_crs).to_crs(src.crs)[0] - index_out, tr = mask(src, [p], crop=True, nodata=0) - crs = src.crs - mask1 = (index_out != 0).any(axis=0) - index_out = index_out[0] - - index_out[np.isnan(index_out)] = 0 - index_out[index_out == np.inf] = 0 - s = index_out.reshape(-1, 1) - - # scale indexes - if scaler is not None: - s = scaler.transform(s) - - # predict and perform inverse transform - ck = model.predict(s) - ck = sieve(ck.reshape(index_out.shape).astype(np.int32), self.bins, mask=mask1) - ck = label_encoder.inverse_transform(ck.reshape(-1)) - out_ = ck.reshape(index_out.shape) # type: ignore - out = out_ * mask1.astype(np.int32) - - asset = write_shapefile( - out, - crs, - tr, - mask1, - self.temp_dir.name, - self.simplify, - self.tolerance, - "cluster", - ) - return asset - - def get_train_data(self, samples: GeoDataFrame, raster: DatasetReader): - x_, y_, height = [], [], -1 - for _, row in samples.iterrows(): - # clip raster to field boundary - x, _ = mask(raster, [row["geometry"]], crop=True, nodata=0, filled=True) - x = x[0] - - x[np.isnan(x)] = 0 - height = x.shape - x_.extend(x.reshape(-1, 1)) - - y_.extend((np.ones(height) * row[self.attribute_name]).reshape(-1, 1)) # type: ignore - - # Scale the data - scaler = None - x = x_ - if self.data_scale: - scaler = StandardScaler() - x = scaler.fit_transform(x_) # type: ignore - - # assign data to bins - intervals = np.histogram(y_, bins=self.bins)[1] - intervals[0] = -1 - index = np.searchsorted(intervals, y_) - 1 - y = np.zeros(len(y_)).reshape(index.shape) - - for i in range(len(intervals)): - y[index == i] = np.array(y_)[index == i].mean() - - y = y.reshape(-1) - - # encode labels - le = LabelEncoder() - y = le.fit_transform(y) - - return x, y, le, scaler - - def train_classifier( - self, - raster_path: str, - samples: GeoDataFrame, - ): - # read input files - raster_obj = rasterio.open(raster_path, "r") - - # create grid from sample distance - samples = cast(GeoDataFrame, samples.to_crs(raster_obj.crs)) # type: ignore - samples["geometry"] = cast(GeoSeries, samples["geometry"]).buffer(self.buffer, cap_style=3) - - x, y, le, scaler = self.get_train_data(samples=samples, raster=raster_obj) - - # train model - x_train, _, y_train, _ = train_test_split(x, y, test_size=0.2) - model = RandomForestClassifier( - n_estimators=self.n_estimators, - max_depth=self.max_depth, - random_state=self.random_state, - ) - model.fit(x_train, y_train) - return model, le, scaler - - def __call__(self): - def create_heatmap_init(raster: Raster, samples: GeometryCollection) -> Dict[str, DataVibe]: - out_vibe = self.create_heatmap(raster, samples) - return {"result": out_vibe} - - return create_heatmap_init - - def __del__(self): - self.temp_dir.cleanup() diff --git a/ops/heatmap_sensor/soil_sample_heatmap_using_classification.yaml b/ops/heatmap_sensor/soil_sample_heatmap_using_classification.yaml deleted file mode 100644 index e43ff4db..00000000 --- a/ops/heatmap_sensor/soil_sample_heatmap_using_classification.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: soil_sample_heatmap_using_classification -inputs: - raster: Raster - samples: GeometryCollection -output: - result: DataVibe -parameters: - attribute_name: "C" - buffer: 3 - bins: 4 - simplify: "simplify" - tolerance: 1.0 - data_scale: False - max_depth: 50 - n_estimators: 25 - random_state: 100 -entrypoint: - callback_builder: CallbackBuilder - file: soil_sample_heatmap_using_classification.py -dependencies: - parameters: - - attribute_name - - buffer - - bins - - simplify - - tolerance - - data_scale - - max_depth - - n_estimators - - random_state -description: - short_description: Generate heatmap for nutrients using satellite or spaceEye imagery. diff --git a/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py b/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py deleted file mode 100644 index 65d7ea6d..00000000 --- a/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py +++ /dev/null @@ -1,219 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Tuple, cast - -import geopandas as gpd -import numpy as np -import rasterio -from geopandas import GeoDataFrame -from numpy.typing import NDArray -from rasterio import Affine, features -from rasterio.crs import CRS -from rasterio.enums import MergeAlg -from rasterio.mask import mask -from shapely.geometry import Polygon, shape - -from vibe_core.data import DataVibe, gen_hash_id -from vibe_core.data.core_types import AssetVibe, GeometryCollection -from vibe_core.data.rasters import Raster -from vibe_lib.geometry import create_mesh_grid -from vibe_lib.heatmap_neighbor import ( - run_cluster_overlap, - run_kriging_model, - run_nearest_neighbor, -) -from vibe_lib.raster import write_to_raster -from vibe_lib.shapefile import write_shapefile - - -class CallbackBuilder: - def __init__( - self, - attribute_name: str, - simplify: str, - tolerance: float, - algorithm: str, - resolution: int, - bins: int, - ): - self.temp_shapefile_dir = TemporaryDirectory() - self.temp_tiff_dir = TemporaryDirectory() - self.attribute_name = attribute_name - self.simplify = simplify - self.tolerance = tolerance - self.algorithm = algorithm - self.resolution = resolution - self.bins = bins - - def create_heatmap( - self, - raster: Raster, - samples: GeometryCollection, - samples_boundary: GeometryCollection, - ) -> DataVibe: - with rasterio.open(raster.assets[0].path_or_url) as src: - self.raster_crs = src.crs - # Get reduced samples - samples_df = gpd.read_file(samples.assets[0].url) - samples_df = cast(GeoDataFrame, samples_df[["geometry", self.attribute_name]]) - # Get reduced sample boundaries (clusters) - samples_boundary_df = cast( - GeoDataFrame, - gpd.read_file(samples_boundary.assets[0].url), - ) - samples_boundary_df = cast(GeoDataFrame, samples_boundary_df[["geometry"]]) - boundary = cast(Polygon, shape(samples.geometry)) - # Get mesh grid geo locations for farm boundary - geo_locations = create_mesh_grid(boundary, self.resolution, self.raster_crs) - # Run nutrient algorithm and create heatmap - farm_boundary_df = GeoDataFrame(geometry=[boundary], crs=4326) # type: ignore - nutrients_df = self.run_algorithm(samples_df, samples_boundary_df, geo_locations) - assetVibe = self.generate_samples_heat_map( - nutrients_df, raster.assets[0].url, farm_boundary_df - ) - return DataVibe( - gen_hash_id( - f"heatmap_nutrients_{self.attribute_name}", - raster.geometry, - raster.time_range, - ), - raster.time_range, - raster.geometry, - assetVibe, - ) - - def run_algorithm( - self, - samples_df: GeoDataFrame, - samples_boundary_df: GeoDataFrame, - geo_locations: GeoDataFrame, - ) -> GeoDataFrame: - if self.algorithm == "cluster overlap": - return run_cluster_overlap( - attribute_name=self.attribute_name, - reduced_samples=samples_df, - minimum_sample_polygons=samples_boundary_df, - geo_locations=geo_locations, - ) - elif self.algorithm == "nearest neighbor": - return run_nearest_neighbor( - attribute_name=self.attribute_name, - reduced_samples=samples_df, - geo_locations=geo_locations, - ) - elif self.algorithm == "kriging neighbor": - return run_kriging_model( - attribute_name=self.attribute_name, - reduced_samples=samples_df, - geo_locations=geo_locations, - ) - else: - raise RuntimeError(f"Unknown algorithm: {self.algorithm}") - - def rasterize_heatmap( - self, - shapes: Tuple[Any], - ar: NDArray[Any], - tr: Affine, - raster_mask: NDArray[Any], - ): - # Rasterize the nutrient boundaries - raster_output = features.rasterize( - shapes=shapes, - out_shape=ar[0].shape, - transform=tr, - all_touched=True, - fill=-1, # background value - merge_alg=MergeAlg.replace, - dtype=rasterio.float32, - ) - raster_output[ar.sum(axis=0) == 0] = 0 - out_path = os.path.join(self.temp_tiff_dir.name, "raster_output.tif") - raster_output = self.group_to_nearest(raster_output, raster_mask) - out = raster_output * raster_mask.astype(np.uint16) - asset_vibe = write_to_raster(out, tr, out_path, self.raster_crs) - return out, asset_vibe - - def group_to_nearest(self, raster_output: NDArray[Any], raster_mask: NDArray[Any]): - raster_output[raster_output <= 0] = raster_output[raster_output > 0].mean() - - intervals = np.histogram(raster_output[raster_mask], bins=self.bins)[1] - intervals[0] = -1 - index = np.searchsorted(intervals, raster_output) - 1 - out_grouped_raster = np.zeros(raster_output.shape) - - for i in range(len(intervals)): - out_grouped_raster[index == i] = raster_output[index == i].mean() - - return out_grouped_raster - - def generate_samples_heat_map( - self, - nutrients_df: GeoDataFrame, - src_image_path: str, - farm_boundary_df: GeoDataFrame, - ) -> List[AssetVibe]: - with rasterio.open(src_image_path, "r") as o_raster: - # change spatial projection of inputs matching to sentinel image - nutrients_df = cast(GeoDataFrame, nutrients_df.to_crs(o_raster.crs)) - farm_boundary_df = cast(GeoDataFrame, farm_boundary_df.to_crs(o_raster.crs)) - # create mask for farm boundary - if not farm_boundary_df.empty: - boundary = farm_boundary_df[:1].geometry[0] # type: ignore - ar, tr = mask(o_raster, [boundary], crop=True, nodata=0) - mask1 = (ar != 0).any(axis=0) - shapes = [] - # collect shapes for rasterization - nutrients_df["geometry"] = nutrients_df.buffer(self.resolution, cap_style=3) - nutrients_df["shapes"] = nutrients_df.apply( - lambda row: (row.geometry, row[self.attribute_name]), axis=1 - ) - if not nutrients_df.empty: - shapes = tuple(nutrients_df["shapes"].values) # type: ignore - # rasterize shapes - out, raster_vibe = self.rasterize_heatmap(shapes, ar, tr, mask1) - shape_vibe = self.export_to_shapeFile(out, o_raster.crs, tr, mask1) - - vibes = [shape_vibe, raster_vibe] - return vibes - - raise RuntimeError("Model didn't identified nutrient locations") - - raise RuntimeError("No farm boundary found") - - def export_to_shapeFile( - self, - data: NDArray[Any], - crs: CRS, - tr: Affine, - mask1: NDArray[Any], - ): - asset = write_shapefile( - data, - crs, - tr, - mask1, - self.temp_shapefile_dir.name, - self.simplify, - self.tolerance, - "cluster", - ) - return asset - - def __call__(self): - def create_heatmap_init( - raster: Raster, - samples: GeometryCollection, - samples_boundary: GeometryCollection, - ) -> Dict[str, DataVibe]: - out_vibe = self.create_heatmap(raster, samples, samples_boundary) - return {"result": out_vibe} - - return create_heatmap_init - - def __del__(self): - self.temp_shapefile_dir.cleanup() - self.temp_tiff_dir.cleanup() diff --git a/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.yaml b/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.yaml deleted file mode 100644 index 17f247a3..00000000 --- a/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.yaml +++ /dev/null @@ -1,27 +0,0 @@ -name: soil_sample_heatmap_using_neighbors -inputs: - raster: Raster - samples: GeometryCollection - samples_boundary: GeometryCollection -output: - result: DataVibe -parameters: - attribute_name: "C" - simplify: "simplify" - tolerance: 1.0 - algorithm: "nearest neighbor" - resolution: 5 # in meters - bins: 5 -entrypoint: - callback_builder: CallbackBuilder - file: soil_sample_heatmap_using_neighbors.py -dependencies: - parameters: - - attribute_name - - simplify - - tolerance - - algorithm - - resolution - - bins -description: - short_description: Generate heatmap for nutrients using satellite or spaceEye imagery. diff --git a/ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py b/ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py deleted file mode 100644 index 3defa642..00000000 --- a/ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import time -from datetime import datetime -from typing import Any, Dict, Union, cast - -import geopandas as gpd -import pytest -from shapely import geometry as shpg -from shapely.geometry import MultiPolygon, Polygon - -from vibe_core.client import FarmvibesAiClient, get_default_vibe_client -from vibe_core.data import ADMAgSeasonalFieldInput, DataVibe, ExternalReferenceList -from vibe_core.data.core_types import GeometryCollection -from vibe_core.data.rasters import Raster -from vibe_dev.testing.op_tester import OpTester - -FAKE_TIME_RANGE = (datetime(2022, 6, 30), datetime(2022, 7, 2)) -CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "soil_sample_heatmap_using_neighbors.yaml", -) - - -@pytest.fixture -def vibe_client(): - return get_default_vibe_client() - - -@pytest.fixture -def vibe_geometry_dict() -> Dict[str, Any]: - farm_boundary = "op_resources/heatmap_sensor/long_block_boundary_4326.geojson" - data_frame = gpd.read_file(farm_boundary) - geometry = shpg.mapping(data_frame["geometry"][0]) # type: ignore - return geometry - - -@pytest.fixture -def vibe_geometry_shapely() -> Union[MultiPolygon, Polygon]: - farm_boundary = "op_resources/heatmap_sensor/long_block_boundary_4326.geojson" - data_frame = gpd.read_file(farm_boundary) - if not data_frame.empty: - geometry = data_frame["geometry"][0] # type: ignore - return cast(MultiPolygon, geometry) - - raise RuntimeError("Geometry is None") - - -@pytest.fixture -def download_sentinel_cluster( - vibe_client: FarmvibesAiClient, vibe_geometry_shapely: Union[MultiPolygon, Polygon] -) -> Raster: - run = vibe_client.run( - workflow="data_ingestion/sentinel2/preprocess_s2", - name="sentinel2_example", - geometry=vibe_geometry_shapely, - time_range=(datetime(2022, 6, 30), datetime(2022, 7, 2)), - ) - - while run.status == "running" or run.status == "pending": - continue - time.sleep(5) - if run.status == "done": - obj: Raster = run.output["raster"][0] # type: ignore - return obj - - raise RuntimeError("Download Raster request failed") - - -@pytest.fixture -def download_index_cluster( - vibe_client: FarmvibesAiClient, download_sentinel_cluster: Raster, index: str -) -> Raster: - parameters = {"index": index} - - run = vibe_client.run( - workflow="data_processing/index/index", - name="EVI_example", - input_data=download_sentinel_cluster, - parameters=parameters, - ) - - while run.status == "running" or run.status == "pending": - continue - time.sleep(5) - if run.status == "done": - obj: Raster = run.output["index_raster"][0] # type: ignore - return obj - - raise RuntimeError("Download Raster request failed") - - -@pytest.fixture -def download_samples_cluster( - vibe_client: FarmvibesAiClient, vibe_geometry_dict: Dict[str, Any] -) -> GeometryCollection: - geojson_url = "" - url_hash = str(hash(geojson_url)) - now = datetime.now() - - inputs = ExternalReferenceList( - id=url_hash, - time_range=(now, now), - geometry=vibe_geometry_dict, - assets=[], - urls=[geojson_url], - ) - run = vibe_client.run( - workflow="data_ingestion/user_data/ingest_geometry", - name="geometry_example", - input_data=inputs, - ) - - while run.status == "running" or run.status == "pending": - continue - time.sleep(5) - if run.status == "done": - obj: GeometryCollection = run.output["geometry"][0] # type: ignore - return obj - - raise RuntimeError("Download samples cluster request failed - ") - - -@pytest.fixture -def download_samples_boundary( - vibe_client: FarmvibesAiClient, vibe_geometry_dict: Dict[str, Any] -) -> GeometryCollection: - geojson_url = "" - url_hash = str(hash(geojson_url)) - now = datetime.now() - - inputs = ExternalReferenceList( - id=url_hash, - time_range=(now, now), - geometry=vibe_geometry_dict, - assets=[], - urls=[geojson_url], - ) - run = vibe_client.run( - workflow="data_ingestion/user_data/ingest_geometry", - name="geometry_example", - input_data=inputs, - ) - - while run.status == "running" or run.status == "pending": - continue - time.sleep(5) - if run.status == "done": - obj: GeometryCollection = run.output["geometry"][0] # type: ignore - return obj - - raise RuntimeError("Download samples boundary request failed - ") - - -@pytest.fixture -def data_vibe(vibe_geometry_dict: Dict[str, Any]): - id = str(hash("test_minimums_samples_heatmap")) - return DataVibe(id, FAKE_TIME_RANGE, vibe_geometry_dict, []) - - -@pytest.mark.skip(reason="Dependent on the cluster") -def test_heatmap_c( - download_sentinel_cluster: Raster, - download_samples_cluster: GeometryCollection, - download_samples_boundary: GeometryCollection, -): - op_ = OpTester(CONFIG_PATH) - parameters = {"attribute_name": "C", "simplify": "simplify", "tolerance": 1.0} - op_.update_parameters(parameters) - output_data = op_.run( - raster=download_sentinel_cluster, - samples=download_samples_cluster, - samples_boundary=download_samples_boundary, - ) - - # Get op result - assert "result" in output_data - - -@pytest.fixture -def prescriptions(vibe_client: FarmvibesAiClient): - parameters = { - "base_url": "base_url", - "client_id": "client_id", - "client_secret": "client_secret", - "authority": "authority", - "default_scope": "default_scope", - } - sample_inputs = ADMAgSeasonalFieldInput( - party_id="a460c833-7b96-4905-92ed-f19800b87185", - seasonal_field_id="7db1a756-b898-4ecb-8608-bc2476f242a9", - ) - inputs = {"admag_input": sample_inputs} - run = vibe_client.run( - workflow="data_ingestion/admag/prescriptions", - name="prescriptions_example", - input_data=inputs, # type: ignore - parameters=parameters, - ) - - while run.status == "running" or run.status == "pending": - continue - - if run.status == "done": - obj = cast(GeometryCollection, run.output["response"][0]) # type: ignore - return obj - raise RuntimeError("Fetch prescriptions failed - ") diff --git a/ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py b/ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py deleted file mode 100644 index cbeb89f0..00000000 --- a/ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from typing import Any, Dict, Union, cast - -import geopandas as gpd -import pytest -from shapely import geometry as shpg -from shapely.geometry import MultiPolygon, Polygon - -from vibe_core.client import FarmvibesAiClient, get_default_vibe_client -from vibe_core.data import ADMAgSeasonalFieldInput, ExternalReferenceList -from vibe_core.data.core_types import GeometryCollection -from vibe_core.data.rasters import Raster -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "soil_sample_heatmap_using_classification.yaml", -) - - -@pytest.fixture -def vibe_client(): - return get_default_vibe_client() - - -@pytest.fixture -def vibe_geometry_dict() -> Dict[str, Any]: - farm_boundary = "op_resources/heatmap_sensor/sensor_farm_boundary.geojson" - data_frame = gpd.read_file(farm_boundary) - geometry = shpg.mapping(data_frame["geometry"][0]) # type: ignore - return geometry - - -@pytest.fixture -def vibe_geometry_shapely() -> Union[MultiPolygon, Polygon]: - farm_boundary = "op_resources/heatmap_sensor/sensor_farm_boundary.geojson" - data_frame = gpd.read_file(farm_boundary) - if not data_frame.empty: - geometry = data_frame["geometry"][0] # type: ignore - return cast(MultiPolygon, geometry) - - raise RuntimeError("Geometry is None") - - -@pytest.fixture -def download_sentinel_cluster( - vibe_client: FarmvibesAiClient, vibe_geometry_shapely: Union[MultiPolygon, Polygon] -) -> Raster: - run = vibe_client.run( - workflow="data_ingestion/sentinel2/preprocess_s2", - name="sentinel2_example", - geometry=vibe_geometry_shapely, - time_range=(datetime(2022, 6, 30), datetime(2022, 7, 2)), - ) - - while run is None or run.status == "running" or run.status == "pending": - continue - - if run.status == "done": - obj: Raster = run.output["raster"][0] # type: ignore - return obj - - raise RuntimeError("Download Raster request failed") - - -@pytest.fixture -def download_index_cluster( - vibe_client: FarmvibesAiClient, download_sentinel_cluster: Raster, index: str -) -> Raster: - parameters = {"index": index} - - run = vibe_client.run( - workflow="data_processing/index/index", - name="EVI_example", - input_data=download_sentinel_cluster, - parameters=parameters, - ) - - while run.status == "running" or run.status == "pending": - continue - - if run.status == "done": - obj: Raster = run.output["index_raster"][0] # type: ignore - return obj - - raise RuntimeError("Download Raster request failed") - - -@pytest.fixture -def download_samples_cluster( - vibe_client: FarmvibesAiClient, vibe_geometry_dict: Dict[str, Any] -) -> GeometryCollection: - geojson_url = "" - url_hash = str(hash(geojson_url)) - now = datetime.now() - - inputs = ExternalReferenceList( - id=url_hash, - time_range=(now, now), - geometry=vibe_geometry_dict, - assets=[], - urls=[geojson_url], - ) - run = vibe_client.run( - workflow="data_ingestion/user_data/ingest_geometry", - name="geometry_example", - input_data=inputs, - ) - - while run is None or run.status == "running" or run.status == "pending": - continue - - if run.status == "done": - obj: GeometryCollection = run.output["geometry"][0] # type: ignore - return obj - - raise RuntimeError("Download samples request failed - ") - - -@pytest.mark.skip(reason="Dependent on the cluster") -@pytest.mark.parametrize("index", ["evi"]) -def test_heatmap_c(download_index_cluster: Raster, download_samples_cluster: GeometryCollection): - op_ = OpTester(CONFIG_PATH) - parameters = { - "attribute_name": "C", - "buffer": 3, - "bins": 4, - "simplify": "simplify", - "tolerance": 1.0, - "data_scale": False, - "max_depth": 50, - "n_estimators": 25, - "random_state": 100, - } - op_.update_parameters(parameters) - output_data = op_.run(raster=download_index_cluster, samples=download_samples_cluster) - - # Get op result - assert "result" in output_data - - -@pytest.mark.skip(reason="Dependent on the cluster") -@pytest.mark.parametrize("index", ["evi"]) -def test_heatmap_n(download_index_cluster: Raster, download_samples_cluster: GeometryCollection): - op_ = OpTester(CONFIG_PATH) - parameters = { - "attribute_name": "N", - "buffer": 10, - "bins": 4, - "simplify": "simplify", - "tolerance": 1.0, - "data_scale": True, - "max_depth": 50, - "n_estimators": 25, - "random_state": 100, - } - op_.update_parameters(parameters) - output_data = op_.run(raster=download_index_cluster, samples=download_samples_cluster) - - # Get op result - assert "result" in output_data - - -@pytest.mark.skip(reason="Dependent on the cluster") -@pytest.mark.parametrize("index", ["pri"]) -def test_heatmap_ph(download_index_cluster: Raster, download_samples_cluster: GeometryCollection): - op_ = OpTester(CONFIG_PATH) - parameters = { - "attribute_name": "pH", - "buffer": 10, - "bins": 4, - "simplify": "simplify", - "tolerance": 1.0, - "data_scale": False, - "max_depth": 50, - "n_estimators": 25, - "random_state": 100, - } - op_.update_parameters(parameters) - output_data = op_.run(raster=download_index_cluster, samples=download_samples_cluster) - - # Get op result - assert "result" in output_data - - -@pytest.mark.skip(reason="Dependent on the cluster") -@pytest.mark.parametrize("index", ["evi"]) -def test_heatmap_p(download_index_cluster: Raster, download_samples_cluster: GeometryCollection): - parameters = { - "attribute_name": "P", - "buffer": 3, - "bins": 4, - "simplify": "simplify", - "tolerance": 1.0, - "data_scale": True, - "max_depth": 50, - "n_estimators": 25, - "random_state": 100, - } - op_ = OpTester(CONFIG_PATH) - op_.update_parameters(parameters) - output_data = op_.run(raster=download_index_cluster, samples=download_samples_cluster) - - # Get op result - assert "result" in output_data - - -@pytest.fixture -def prescriptions(vibe_client: FarmvibesAiClient): - parameters = { - "base_url": "base_url", - "client_id": "client_id", - "client_secret": "client_secret", - "authority": "authority", - "default_scope": "default_scope", - } - - sample_inputs = ADMAgSeasonalFieldInput( - party_id="a460c833-7b96-4905-92ed-f19800b87185", - seasonal_field_id="7db1a756-b898-4ecb-8608-bc2476f242a9", - ) - - inputs = {"admag_input": sample_inputs} - - run = vibe_client.run( - workflow="data_ingestion/admag/prescriptions", - name="prescriptions_example", - input_data=inputs, # type: ignore - parameters=parameters, - ) - - while run.status == "running" or run.status == "pending": - continue - - if run.status == "done": - obj = cast(GeometryCollection, run.output["response"][0]) # type: ignore - return obj - - raise RuntimeError("Fetch prescriptions failed - ") - - -@pytest.mark.skip(reason="Dependent on the cluster") -@pytest.mark.parametrize("index", ["evi"]) -def test_heatmap_p_admag(download_index_cluster: Raster, prescriptions: GeometryCollection): - parameters = { - "attribute_name": "P", - "buffer": 3, - "bins": 4, - "simplify": "simplify", - "tolerance": 1.0, - "data_scale": True, - "max_depth": 50, - "n_estimators": 25, - "random_state": 100, - } - op_ = OpTester(CONFIG_PATH) - op_.update_parameters(parameters) - output_data = op_.run(raster=download_index_cluster, samples=prescriptions) - - # Get op result - assert "result" in output_data diff --git a/ops/helloworld/helloworld.py b/ops/helloworld/helloworld.py deleted file mode 100644 index d4ea8aa1..00000000 --- a/ops/helloworld/helloworld.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Dict, List, Union - -import geopandas as gpd -import matplotlib.pyplot as plt -import numpy as np -import rasterio -from PIL import Image, ImageDraw, ImageFont -from rasterio.features import rasterize -from rasterio.transform import from_bounds -from shapely import geometry as shpg - -from vibe_core.data import DataVibe, Raster, gen_guid -from vibe_core.data.core_types import AssetVibe -from vibe_lib.raster import INT_COMPRESSION_KWARGS - -FONT_PATHS = [ - "DejaVuSans.ttf", - "/opt/conda/fonts/DejaVuSans.ttf", - "/usr/share/fonts/TTF/DejaVuSans.ttf", - "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", - "/usr/share/fonts/dejavu-sans-fonts/DejaVuSans.ttf", -] - - -def load_default_font(): - font_set = False - for font in FONT_PATHS: - try: - ImageDraw.ImageDraw.font = ImageFont.truetype(font, 14) # type: ignore - font_set = True - break - except OSError: - pass - if not font_set: - # We failed to load the font, raise an error - raise ValueError("Failed to load font for helloworld op") - - -def get_geoms(g: Union[shpg.Polygon, shpg.MultiPolygon]) -> List[shpg.Polygon]: - """ - Map MultiPolygons and Polygons into list of Polygons - """ - if isinstance(g, shpg.MultiPolygon): - return list(g.geoms) - return [g] - - -class CallbackBuilder: - msg = "HELLO WORLD" - - def __init__(self, width: int, height: int) -> None: - self.width = width - self.height = height - self.tmp_dir = TemporaryDirectory() - load_default_font() - - def __call__(self): - def hello(user_input: DataVibe) -> Dict[str, Raster]: - geom = shpg.shape(user_input.geometry) - df = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) # type: ignore - # Find out which geometries intersect with the input geometry - # Some countries have several polygons, let's split MultiPolygons into Polygons - # So we don't have regions that are far away being highlighted - country_geoms = [gg for g in df.geometry.tolist() for gg in get_geoms(g)] - yes_geom = [(g, 1) for g in country_geoms if g.intersects(geom)] - no_geom = [(g, 2) for g in country_geoms if not g.intersects(geom)] - tr = from_bounds(-180, -90, 180, 90, self.width, self.height) - # Generate RGBA image using tab10 (blue, orange, and green) - ar = ( - plt.cm.tab10( # type: ignore - rasterize( - yes_geom + no_geom + [(geom.boundary, 3)], - out_shape=(self.height, self.width), - transform=tr, # type: ignore - ) - ) - * 255 - ).astype(np.uint8) - - # Let's write a nice message 🙂 - img = Image.fromarray(ar) - img_d = ImageDraw.Draw(img) - offset = (self.width - img_d.getfont().getbbox(self.msg)[3]) // 2 - img_d.text((offset, 10), "HELLO WORLD", fill=(255, 255, 255)) - # Get image into CHW array and pick RGB bands - ar = np.array(img).transpose((2, 0, 1))[:3] - - # Write image to tiff file with the correct CRS and transform - meta = { - "driver": "GTiff", - "dtype": "uint8", - "width": self.width, - "height": self.height, - "count": 3, - "crs": "epsg:4326", - "transform": tr, - } - raster_guid = gen_guid() - out_path = os.path.join(self.tmp_dir.name, f"{raster_guid}.tif") - with rasterio.open(out_path, "w", **meta, **INT_COMPRESSION_KWARGS) as dst: - dst.write(ar) - asset = AssetVibe(out_path, "image/tiff", raster_guid) - # Let's use the geometry and date from the input - return { - "raster": Raster.clone_from( - user_input, - id=gen_guid(), - assets=[asset], - bands={"red": 0, "blue": 1, "green": 2}, - ) - } - - return hello - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/helloworld/helloworld.yaml b/ops/helloworld/helloworld.yaml deleted file mode 100644 index 023af4d4..00000000 --- a/ops/helloworld/helloworld.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: helloworld -inputs: - user_input: DataVibe -output: - raster: Raster -parameters: - width: 512 - height: 256 -entrypoint: - file: helloworld.py - callback_builder: CallbackBuilder -description: - short_description: Test op that generates an image of the Earth with countries that intersect with the - input geometry highlighted in orange. diff --git a/ops/linear_trend/linear_trend.py b/ops/linear_trend/linear_trend.py deleted file mode 100644 index 2ff2dd34..00000000 --- a/ops/linear_trend/linear_trend.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from tempfile import TemporaryDirectory -from typing import Dict, List, Tuple - -import numpy as np -import xarray as xr -from numpy.typing import NDArray - -from vibe_core.data import RasterChunk -from vibe_core.data.rasters import Raster -from vibe_lib.raster import read_chunk_series, save_raster_to_asset - - -def fit_model_in_bulk(da: xr.Dataset) -> Tuple[NDArray[np.float64], NDArray[np.float64]]: - B, A, ATAinv, beta_hat, trend = linear_fit_in_bulk(da) - - test_stat = compute_test_statistics(da, B, A, ATAinv, beta_hat) - - return trend, test_stat - - -def compute_test_statistics( - da: xr.Dataset, - B: NDArray[np.float64], - A: NDArray[np.float64], - ATAinv: NDArray[np.float64], - beta_hat: NDArray[np.float64], -): - # estimating test statistic for the trend - n = np.sum(np.logical_not(np.isnan(B)).astype(int), axis=0) - gamma = ATAinv[0, 0] - sig_hat2 = np.nansum((B - A @ beta_hat) ** 2, axis=0) / (n - 2) - maskout = np.logical_or(np.isnan(sig_hat2), sig_hat2 == 0) - test_stat = beta_hat[0, :] / np.sqrt(np.where(np.logical_not(maskout), sig_hat2, 1.0) * gamma) - - # make sure we have at least two points to store trend - test_stat = np.where(n > 1, test_stat, np.nan) - - test_stat = np.where(np.logical_not(maskout), test_stat, np.nan) - - test_stat = test_stat.reshape(da.shape[1:]) - return test_stat - - -def linear_fit_in_bulk( - da: xr.Dataset, -) -> Tuple[ - NDArray[np.float64], - NDArray[np.float64], - NDArray[np.float64], - NDArray[np.float64], - NDArray[np.float64], -]: - # fitting a linear model in bulk - n = da.shape[0] - B = da.values.reshape((n, -1)) - t = da.time.values - if type(t[0]) is not np.datetime64: - t = np.array(list(map(lambda x: x.to_datetime64(), da.time.values))) - t = (t - np.min(t)) / np.timedelta64(1, "D") - A = np.stack((t, np.ones_like(t))).T - ATAinv = np.linalg.inv(A.T @ A) - - # this is just A.T@B, but avoing issues with nan, so that even if - # one pixel/band has a nan in a given time we still estimate the trend - # by ignoring the particular time (also in test statistic estimation) - ATB = np.nansum(A.reshape(n, 2, 1) * B.reshape(n, 1, -1), axis=0) - - beta_hat = ATAinv @ ATB - trend = beta_hat[0, :] - - # make sure we have at least two points to store trend - trend = np.where(n > 1, trend, np.nan) - - trend = trend.reshape(da.shape[1:]) - return B, A, ATAinv, beta_hat, trend - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def linear_trend_callback( - series: RasterChunk, rasters: List[Raster] - ) -> Dict[str, RasterChunk]: - da = read_chunk_series(series.limits, rasters) - - trend, test_stat = fit_model_in_bulk(da) - - # store results - coords = {k: v for k, v in da.coords.items() if k != "time" and k != "band"} - data = np.concatenate((trend, test_stat)) - res = xr.DataArray(data=data, dims=list(da.dims)[1:], coords=coords, attrs=da.attrs) - asset = save_raster_to_asset(res, self.tmp_dir.name) - bands: Dict[str, int] = {} - for k, v in series.bands.items(): - bands[f"trend_{k}"] = int(v) - bands[f"test_stat_{k}"] = int(v) + len(series.bands) - res = RasterChunk( - id=hashlib.sha256(f"linear_trend-{series.id}".encode()).hexdigest(), - time_range=series.time_range, - geometry=series.geometry, - assets=[asset], - bands=bands, - chunk_pos=series.chunk_pos, - num_chunks=series.num_chunks, - limits=series.limits, - write_rel_limits=series.write_rel_limits, - ) - - return {"trend": res} - - return linear_trend_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/linear_trend/linear_trend.yaml b/ops/linear_trend/linear_trend.yaml deleted file mode 100644 index d3b466c2..00000000 --- a/ops/linear_trend/linear_trend.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: linear_trend -inputs: - series: RasterChunk - rasters: List[Raster] -output: - trend: RasterChunk -parameters: -entrypoint: - file: linear_trend.py - callback_builder: CallbackBuilder -description: - short_description: Computes the pixel-wise linear trend across rasters. \ No newline at end of file diff --git a/ops/linear_trend/test_linear_trend.py b/ops/linear_trend/test_linear_trend.py deleted file mode 100644 index c38db869..00000000 --- a/ops/linear_trend/test_linear_trend.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import numpy as np -import xarray as xr -from linear_trend import fit_model_in_bulk -from pandas import Timedelta, Timestamp - - -def _one_test_fit_model_in_bulk(sy: int, sx: int, sz: int): - TOL = 1e-10 - t = [Timestamp(2001, 1, 1) + Timedelta(days=d) for d in range(sz)] # type: ignore - - fake_rasters = [] - true_trend = [] - for i in range(sy * sx): - h = i / (sy * sx - 1) - true_trend.append(h) - fake_rasters.append(np.linspace(0, h * (sz - 1), sz)) - fake_rasters = np.stack(fake_rasters).reshape((sy, sx, sz)).transpose((2, 0, 1)) - true_trend = np.array(true_trend).reshape((sy, sx)) - - da = xr.DataArray(data=fake_rasters, dims=["time", "y", "x"], coords={"time": t}) - - trend_hat, _ = fit_model_in_bulk(da) # type: ignore - - assert np.max(np.abs(trend_hat - true_trend)) < TOL - - -def test_fit_model_in_bulk(): - s = [32, 64, 128] - for sy in s: - for sx in s: - for sz in s: - _one_test_fit_model_in_bulk(sy, sx, sz) diff --git a/ops/list_airbus_products/list_airbus.py b/ops/list_airbus_products/list_airbus.py deleted file mode 100644 index b5f4a373..00000000 --- a/ops/list_airbus_products/list_airbus.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Any, Dict, List - -from shapely import geometry as shpg -from shapely.geometry.base import BaseGeometry - -from vibe_core.data import AirbusProduct, DataVibe, gen_guid -from vibe_lib.airbus import AirBusAPI, Constellation - - -def convert_product(product: Dict[str, Any], geom: BaseGeometry) -> AirbusProduct: - dt = datetime.fromisoformat(product["acquisitionDate"].replace("Z", "+00:00")) - # This is the geometry for the whole product - product["product_geometry"] = product.pop("geometry") - - # Get actual bounds from the raster - return AirbusProduct( - id=gen_guid(), - time_range=(dt, dt), - geometry=shpg.mapping(geom), - assets=[], - acquisition_id=product.pop("acquisitionIdentifier"), - extra_info=product, - ) - - -class CallbackBuilder: - def __init__( - self, - api_key: str, - constellations: List[str], - max_cloud_cover: int, - ): - self.api_key = api_key - self.constellations = [Constellation(c) for c in constellations] - self.max_cloud_cover = max_cloud_cover - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def download_products( - input_item: DataVibe, - ) -> Dict[str, List[AirbusProduct]]: - api = AirBusAPI( - self.api_key, - projected_crs=False, - constellations=self.constellations, - ) - geom = shpg.shape(input_item.geometry) - - search_results = api.query( - geom, input_item.time_range, self.max_cloud_cover, my_workspace=False - ) - - return {"airbus_products": [convert_product(p, geom) for p in search_results]} - - return download_products diff --git a/ops/list_airbus_products/list_airbus_products.yaml b/ops/list_airbus_products/list_airbus_products.yaml deleted file mode 100644 index b9578b58..00000000 --- a/ops/list_airbus_products/list_airbus_products.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: list_airbus_products -inputs: - input_item: DataVibe -output: - airbus_products: List[AirbusProduct] -parameters: - api_key: "@SECRET(eywa-secrets, msr-airbus-api)" - constellations: ["PHR", "SPOT"] - max_cloud_cover: 10 -entrypoint: - file: list_airbus.py - callback_builder: CallbackBuilder -description: - short_description: Lists available AirBus products for the input geometry and time range. \ No newline at end of file diff --git a/ops/list_alos_products/list_alos_products.py b/ops/list_alos_products/list_alos_products.py deleted file mode 100644 index 436aa871..00000000 --- a/ops/list_alos_products/list_alos_products.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Dict, List, cast - -from dateutil.parser import parse -from pystac import Item -from shapely import geometry as shpg - -from vibe_core.data import AlosProduct, DataVibe -from vibe_lib.planetary_computer import AlosForestCollection - - -class CallbackBuilder: - def __init__(self): - pass - - def validate_item(self, item: Item): - if item.geometry is None: - raise ValueError(f"Item {item.id} is missing geometry field") - if not isinstance(item.geometry, dict): - raise ValueError(f"Item {item.id} geometry is not a dict") - - def convert_product(self, item: Item) -> AlosProduct: - self.validate_item(item) - time_range = tuple(parse(item.properties[k]) for k in ("start_datetime", "end_datetime")) - geometry = cast(Dict[str, Any], item.geometry) - return AlosProduct(id=item.id, geometry=geometry, time_range=time_range, assets=[]) - - def __call__(self): - def callback(input_data: DataVibe) -> Dict[str, List[AlosProduct]]: - collection = AlosForestCollection() - items = collection.query( - geometry=shpg.shape(input_data.geometry), time_range=input_data.time_range - ) - - if not items: - raise ValueError( - f"No items found for geometry {input_data.geometry} " - f"and time range {input_data.time_range}" - ) - - return {"alos_products": [self.convert_product(i) for i in items]} - - return callback diff --git a/ops/list_alos_products/list_alos_products.yaml b/ops/list_alos_products/list_alos_products.yaml deleted file mode 100644 index bd2fd36c..00000000 --- a/ops/list_alos_products/list_alos_products.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: list_alos_products -inputs: - input_data: DataVibe -output: - alos_products: List[AlosProduct] -parameters: -entrypoint: - file: list_alos_products.py - callback_builder: CallbackBuilder -description: - short_description: Lists ALOS forest products for input geometry and time range. \ No newline at end of file diff --git a/ops/list_alos_products/test_alos_list.py b/ops/list_alos_products/test_alos_list.py deleted file mode 100644 index fefb920b..00000000 --- a/ops/list_alos_products/test_alos_list.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from typing import Any, Dict, Tuple -from unittest.mock import Mock, patch - -import pytest -from pystac import Item - -from vibe_core.data import AlosProduct, DataVibe -from vibe_dev.testing.op_tester import OpTester - - -@pytest.fixture -def geometry(): - return { - "type": "Polygon", - "coordinates": [ - [ - [-86.773827, 14.575498], - [-86.770459, 14.579301], - [-86.764283, 14.575102], - [-86.769591, 14.567595], - [-86.773827, 14.575498], - ] - ], - } - - -@pytest.fixture -def time_range(): - return (datetime(2019, 1, 1), datetime(2020, 1, 1)) - - -@pytest.fixture -def data_vibe(geometry: Dict[str, Any], time_range: Tuple[datetime, datetime]): - return DataVibe( - id=str("test_id"), - time_range=time_range, - geometry=geometry, - assets=[], - ) - - -def expected_items(geometry: Dict[str, Any], time_range: Tuple[datetime, datetime]): - bbox = [-87.0, 14.0, -86.0, 15.0] - first_item = Item( - id="N15W087_20_FNF", - geometry=geometry, - bbox=bbox, - datetime=time_range[0], - properties={ - "start_datetime": time_range[0].strftime("%Y-%m-%d"), - "end_datetime": time_range[0].strftime("%Y-%m-%d"), - }, - ) - second_item = Item( - id="N15W087_19_FNF", - geometry=geometry, - bbox=bbox, - datetime=time_range[1], - properties={ - "start_datetime": time_range[1].strftime("%Y-%m-%d"), - "end_datetime": time_range[1].strftime("%Y-%m-%d"), - }, - ) - return [first_item, second_item] - - -@patch("vibe_lib.planetary_computer.AlosForestCollection.query") -def test_alos_list(query: Mock, data_vibe: DataVibe): - mock_items = expected_items(geometry=data_vibe.geometry, time_range=data_vibe.time_range) - query.return_value = mock_items - config_path = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "list_alos_products.yaml" - ) - - op = OpTester(config_path) - output_data = op.run(**{"input_data": data_vibe}) - assert output_data - - assert "alos_products" in output_data - products = output_data["alos_products"] - - # Check variable products is a list of AlosProduct - assert isinstance(products, list) - assert len(products) == len(mock_items) - for item, product in zip(mock_items, products): - assert isinstance(product, AlosProduct) - assert product.id == item.id - assert product.geometry == item.geometry - assert product.time_range == (item.datetime, item.datetime) - assert product.assets == [] diff --git a/ops/list_bing_maps/list_bing_maps.py b/ops/list_bing_maps/list_bing_maps.py deleted file mode 100644 index d37da255..00000000 --- a/ops/list_bing_maps/list_bing_maps.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from datetime import datetime -from typing import Dict, List, Optional - -from pystac.item import Item - -from vibe_core.data import DataVibe -from vibe_core.data.products import BingMapsProduct -from vibe_lib.bing_maps import MAX_ZOOM_LEVEL, MIN_ZOOM_LEVEL, BingMapsCollection - - -class CallbackBuilder: - def __init__( - self, - api_key: str, - zoom_level: int, - imagery_set: str, - map_layer: str, - orientation: Optional[float], - ): - if not api_key: - raise ValueError("BingMaps API key was not provided.") - if imagery_set != "Aerial": - raise ValueError("Only imagery set 'Aerial' is supported.") - if map_layer != "Basemap": - raise ValueError("Only map layer 'Basemap' is supported.") - if orientation is not None: - raise ValueError("Setting an orientation is currently not supported.") - if zoom_level < MIN_ZOOM_LEVEL or zoom_level > MAX_ZOOM_LEVEL: - raise ValueError( - f"Zoom level must be within [{MIN_ZOOM_LEVEL}, {MAX_ZOOM_LEVEL}]. " - f"Found {zoom_level}." - ) - - self.collection = BingMapsCollection(api_key) - self.zoom_level = zoom_level - self.imagery_set = imagery_set - self.map_layer = map_layer - self.orientation = 0.0 if orientation is None else orientation - - def convert_product(self, item: Item) -> BingMapsProduct: - assert item.geometry is not None, "input Item has no geometry" - - product = BingMapsProduct( - id=hashlib.sha256( - (f"bingmaps-{item.id}-{self.imagery_set}-{self.map_layer}").encode() - ).hexdigest(), - time_range=(datetime.now(), datetime.now()), - geometry=item.geometry, - assets=[], - url=item.properties["url"], - zoom_level=self.zoom_level, - imagery_set=self.imagery_set, - map_layer=self.map_layer, - orientation=self.orientation, - ) - return product - - def __call__(self): - def list_bing_maps( - user_input: DataVibe, - ) -> Dict[str, List[BingMapsProduct]]: - items = self.collection.query_tiles(user_input.bbox, self.zoom_level) - - if not items: - raise RuntimeError("No products found for input geometry and zoom level.") - - products = [self.convert_product(item) for item in items] - return {"products": products} - - return list_bing_maps diff --git a/ops/list_bing_maps/list_bing_maps.yaml b/ops/list_bing_maps/list_bing_maps.yaml deleted file mode 100644 index d97eb456..00000000 --- a/ops/list_bing_maps/list_bing_maps.yaml +++ /dev/null @@ -1,44 +0,0 @@ -name: list_bing_maps -inputs: - user_input: DataVibe -output: - products: List[BingMapsProduct] -parameters: - api_key: - zoom_level: 10 - imagery_set: Aerial - map_layer: Basemap - orientation: null -dependencies: - parameters: - - zoom_level - - imagery_set - - map_layer - - orientation -entrypoint: - file: list_bing_maps.py - callback_builder: CallbackBuilder -description: - short_description: - Lists BingMaps basemap tile products intersecting the input geometry for a given `zoom_level`. - inputs: - user_input: Geometry of interest. - output: - products: Listed products. - parameters: - api_key: Required BingMaps API key. - zoom_level: >- - Zoom level of interest, ranging from 0 to 20. For instance, a zoom level of 1 corresponds to - a resolution of 78271.52 m/pixel, a zoom level of 10 corresponds to 152.9 m/pixel, and a zoom - level of 19 corresponds to 0.3 m/pixel. For more information on zoom levels and their - corresponding scale and resolution, please refer to the BingMaps API documentation - at https://learn.microsoft.com/en-us/bingmaps/articles/understanding-scale-and-resolution - imagery_set: >- - The type of imagery. Currently only supporting 'Aerial'. - map_layer: >- - The display layer that renders on top of the imagery set. Currently only supporting 'Basemap'. - orientation: >- - The orientation of the viewport to use for the imagery metadata. - Currently unused and set to None. - - diff --git a/ops/list_bing_maps/test_list_bing_maps.py b/ops/list_bing_maps/test_list_bing_maps.py deleted file mode 100644 index 4c0699af..00000000 --- a/ops/list_bing_maps/test_list_bing_maps.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from typing import List, Optional, cast -from unittest.mock import MagicMock, patch - -import pytest -from shapely.geometry import Polygon, box, mapping - -from vibe_core.data import DataVibe -from vibe_core.data.products import BingMapsProduct -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.bing_maps import BingMapsCollection - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_bing_maps.yaml") -FAKE_TIME_RANGE = (datetime.now(), datetime.now()) - -# Geometries -WORLD_GEOMETRY = box(-180, -90, 180, 90) -WESTERN_HEMISPHERE_GEOMETRY = box(-180, -90, -0.00001, 90) -EASTERN_HEMISPHERE_GEOMETRY = box(0.00001, -90, 180, 90) -NORTHERN_HEMISPHERE_GEOMETRY = box(-180, 0.00001, 180, 90) -SOUTHERN_HEMISPHERE_GEOMETRY = box(-180, -90, 180, -0.00001) -QUARTER_WORLD_CENTERED_GEOMETRY = box(-89.99999, -44.99999, 89.99999, 44.99999) - -FIELD_GEOMETRY = Polygon( - [ - (-118.940490, 46.998848), - (-118.876148, 46.998848), - (-118.876148, 47.013422), - (-118.940490, 47.013422), - ] -) - - -@pytest.mark.parametrize( - "input_geometry, zoom_level, num_tiles", - [ # Whole world geometry - (WORLD_GEOMETRY, zoom_level, n_tiles) - for zoom_level, n_tiles in [(1, 4), (2, 16), (3, 64), (5, 1024), (7, 16384)] - ] - + [ # Half world geometries - (geom, zoom_level, n_tiles) - for geom in [ - WESTERN_HEMISPHERE_GEOMETRY, - EASTERN_HEMISPHERE_GEOMETRY, - NORTHERN_HEMISPHERE_GEOMETRY, - SOUTHERN_HEMISPHERE_GEOMETRY, - ] - for zoom_level, n_tiles in [(1, 2), (2, 8), (3, 32), (5, 512), (7, 8192)] - ] - + [ # Quarter world geometry - (QUARTER_WORLD_CENTERED_GEOMETRY, zoom_level, n_tiles) - for zoom_level, n_tiles in [(1, 4), (2, 4), (3, 16), (5, 160), (7, 2304)] - ] - + [ # Small field geometry - (FIELD_GEOMETRY, zoom_level, n_tiles) - for zoom_level, n_tiles in [ - (1, 1), - (10, 1), - (12, 2), - (14, 8), - (15, 21), - (18, 816), - ] - ], -) -@patch.object( - BingMapsCollection, - "get_download_url_and_subdomains", - return_value=( - "fake_download_url_{subdomain}_{quadkey}_{api_key}", - ["fake_subdomain"], - ), -) -@patch("vibe_lib.bing_maps.tile_is_available", return_value=True) -def test_list_bing_maps( - _: MagicMock, - __: MagicMock, - input_geometry: Polygon, - zoom_level: int, - num_tiles: int, -): - user_input = DataVibe("user_input", FAKE_TIME_RANGE, mapping(input_geometry), []) - - op_tester = OpTester(CONFIG_PATH) - op_tester.update_parameters( - { - "api_key": "valid_fake_api_key", - "zoom_level": zoom_level, - "imagery_set": "Aerial", - "map_layer": "Basemap", - "orientation": None, - } - ) - output_data = op_tester.run(user_input=user_input) - - # Get op result - output_name = "products" - assert output_name in output_data - output_product = output_data[output_name] - assert isinstance(output_product, list) - assert len(cast(List[BingMapsProduct], output_data["products"])) == num_tiles - - -@pytest.mark.parametrize( - "zoom_level, api_key, imagery_set, map_layer, orientation", - [ - # Invalid api_key - (10, "", "Aerial", "Basemap", None), - (10, None, "Aerial", "Basemap", None), - # Invalid zoom_level - (0, "valid_fake_api_key", "Aerial", "Basemap", None), - (21, "valid_fake_api_key", "Aerial", "Basemap", None), - # Invalid imagery_set - (10, "valid_fake_api_key", "invalid_imagery_set", "Basemap", None), - # Invalid map_layer - (10, "valid_fake_api_key", "Aerial", "invalid_map_layer", None), - # Invalid orientation - (10, "valid_fake_api_key", "Aerial", "Basemap", -1), - (10, "valid_fake_api_key", "Aerial", "Basemap", 180), - (10, "valid_fake_api_key", "Aerial", "Basemap", 380), - ], -) -def test_invalid_parameters( - zoom_level: int, - api_key: str, - imagery_set: str, - map_layer: str, - orientation: Optional[float], -): - user_input = DataVibe("user_input", FAKE_TIME_RANGE, mapping(FIELD_GEOMETRY), []) - - op_tester = OpTester(CONFIG_PATH) - - op_tester.update_parameters( - { - "api_key": api_key, - "zoom_level": zoom_level, - "imagery_set": imagery_set, - "map_layer": map_layer, - "orientation": orientation, - } - ) - with pytest.raises(ValueError): - op_tester.run(user_input=user_input) diff --git a/ops/list_cdl_products/list_cdl_products.py b/ops/list_cdl_products/list_cdl_products.py deleted file mode 100644 index d8b18d3c..00000000 --- a/ops/list_cdl_products/list_cdl_products.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# This op receives a date range and geometry and list the respective CDL products -from datetime import datetime -from typing import Dict, List - -from shapely import geometry as shpg -from shapely import wkt - -from vibe_core.data import DataVibe -from vibe_core.data.core_types import gen_hash_id -from vibe_core.data.products import CDL_DOWNLOAD_URL, CDLProduct -from vibe_core.file_downloader import verify_url - - -def check_cdl_for_year(year: int) -> bool: - """Verify if there is a CDL file available for that year""" - url = CDL_DOWNLOAD_URL.format(year) - return verify_url(url) - - -class CallbackBuilder: - def __init__(self, cdl_geometry_wkt: str): - with open(cdl_geometry_wkt, "r") as wkt_file: - self.cdl_geometry = wkt.load(wkt_file) - - def convert_product(self, year: int) -> CDLProduct: - """Given the year, builds the CDLProduct""" - - start_date = datetime(year, 1, 1) - end_date = datetime(year, 12, 31) - time_range = (start_date, end_date) - - cdl_geom = shpg.mapping(self.cdl_geometry) - - product = CDLProduct( - id=gen_hash_id(f"cdl_product_{year}", cdl_geom, time_range), - time_range=time_range, - geometry=cdl_geom, - assets=[], - ) - - return product - - def __call__(self): - def list_cdl_products(input_item: DataVibe) -> Dict[str, List[CDLProduct]]: - """List all years for the input time range and create a product for each of them""" - - # Verify if input geometry intersects with cdl geometry - input_geom = shpg.shape(input_item.geometry) - if input_geom.intersects(self.cdl_geometry): - # List all years - start_date, end_date = input_item.time_range - input_years = range(start_date.year, end_date.year + 1) - - # Create a product for each year that has a CDL map available - products = [ - self.convert_product(year) for year in input_years if check_cdl_for_year(year) - ] - else: - raise ValueError( - "Input geometry does not intersect with CDL coverage area (continental US)." - ) - - return {"cdl_products": products} - - return list_cdl_products diff --git a/ops/list_cdl_products/list_cdl_products.yaml b/ops/list_cdl_products/list_cdl_products.yaml deleted file mode 100644 index c7643a0c..00000000 --- a/ops/list_cdl_products/list_cdl_products.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: list_cdl_products -inputs: - input_item: DataVibe -output: - cdl_products: List[CDLProduct] -parameters: - cdl_geometry_wkt: /opt/terravibes/ops/resources/cdl_metadata/us_continental.wkt -entrypoint: - file: list_cdl_products.py - callback_builder: CallbackBuilder -description: - short_description: - Lists all years for the input time range and creates a product for each of them to be downloaded. \ No newline at end of file diff --git a/ops/list_chirps/list_chirps.py b/ops/list_chirps/list_chirps.py deleted file mode 100644 index 53c0c98e..00000000 --- a/ops/list_chirps/list_chirps.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from calendar import monthrange -from datetime import datetime, timedelta, timezone -from typing import Any, Dict, List, Optional, Tuple, Union - -import pytz -import rasterio -import requests -from dateutil.parser import isoparse -from dateutil.relativedelta import relativedelta -from pystac import MediaType -from pystac.asset import Asset -from pystac.item import Item -from shapely import geometry as shpg -from shapely.geometry import Polygon, mapping - -from vibe_core.data import ChirpsProduct, DataVibe -from vibe_core.data.core_types import BBox - - -class ChirpsCollection: - INI = datetime(1981, 1, 1, tzinfo=timezone.utc) # first day Chirps is available - VALID_FREQ = {"daily", "monthly"} - VALID_RES = {"p05", "p25"} - - def __init__(self, freq: str, res: str): - if freq not in self.VALID_FREQ: - raise ValueError( - f"Invalid Chirps frequency {freq} - valid options are {','.join(self.VALID_FREQ)}" - ) - if res not in self.VALID_RES: - raise ValueError( - f"Invalid Chirps resolution {res} - valid options are {','.join(self.VALID_RES)}" - ) - if freq == "monthly" and res != "p05": - raise ValueError("Monthly Chirps is only available on p05 resolution") - - self.freq = freq - self.res = res - self.end = self.get_latest_chirps() - # all bbox are the same, so we pick from the latest file - self.bbox, self.footprint = self.get_bbox_and_footprint(self.end) - self.var = "precipitation" - - def url(self, year: int) -> str: - if self.freq == "monthly": - return "https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_monthly/cogs/" - else: - return ( - f"https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_daily/" - f"cogs/{self.res}/{year}/" - ) - - def fname(self, date: datetime) -> str: - if self.freq == "monthly": - return f"chirps-v2.0.{date.year}.{date.month:02}.cog" - else: - return f"chirps-v2.0.{date.year}.{date.month:02}.{date.day:02}.cog" - - def get_latest_chirps(self) -> datetime: - ini = self.INI - end = datetime( - datetime.today().year, - datetime.today().month, - datetime.today().day, - tzinfo=timezone.utc, - ) - date = end - for year in range(end.year, ini.year - 1, -1): - text = requests.get(self.url(year)).text - while date >= datetime(year, 1, 1, tzinfo=timezone.utc): - if text.find(self.fname(date)) > 0: - return date - if self.freq == "daily": - date -= timedelta(days=1) - else: - date -= relativedelta(months=1) - date = date.replace(day=monthrange(date.year, date.month)[1]) - date = datetime(year - 1, 12, 31, tzinfo=timezone.utc) - raise ValueError("no Chirps file found") # this point should never be reached - - def get_bbox_and_footprint(self, date: datetime) -> Tuple[BBox, Polygon]: - url = self.url(date.year) + self.fname(date) - with rasterio.open(url) as ds: - bounds = ds.bounds - bbox = (bounds.left, bounds.bottom, bounds.right, bounds.top) - footprint = shpg.box(*bounds) - return (bbox, footprint) - - def get_chirps_list( - self, time_range: Tuple[datetime, datetime] - ) -> List[Tuple[datetime, str, str]]: - tr = [dt.astimezone(pytz.timezone("UTC")) for dt in time_range] - end_range = ( - tr[1] - if self.freq == "daily" - else tr[1].replace(day=monthrange(tr[1].year, tr[1].month)[1]) - ) - if ( - time_range[1].timestamp() < self.INI.timestamp() - or time_range[0].timestamp() > self.end.timestamp() - ): - raise ValueError( - f"Invalid time range {time_range[0].isoformat()} - " - f"{time_range[1].isoformat()} - valid values are in the range" - f"{self.INI.isoformat()} - {self.end.isoformat()}" - ) - ini = tr[0] if tr[0] >= self.INI else self.INI - end = end_range if end_range <= self.end else self.end - date = end - res = [] - while date >= ini: - url = self.url(date.year) + self.fname(date) - fname = self.fname(date) - res.append((date, url, fname)) - if self.freq == "daily": - date -= timedelta(days=1) - else: - date -= relativedelta(months=1) - date = date.replace(day=monthrange(date.year, date.month)[1]) - return res - - def _get_id(self, fname: str) -> str: - return hashlib.sha256(f"{self.res}_{fname}".encode()).hexdigest() - - def query( - self, - roi: Optional[BBox] = None, - time_range: Optional[Tuple[datetime, datetime]] = None, - ids: Optional[List[str]] = None, - ) -> List[Item]: - if roi is not None: - pgon = shpg.box(*roi) - if not pgon.intersects(self.footprint): - return [] - ini = time_range[0] if time_range is not None else self.INI - end = time_range[1] if time_range is not None else self.end - chirpsl = self.get_chirps_list((ini, end)) - res = [] - for date, url, fname in chirpsl: - id = self._get_id(fname) - if ids is not None and id not in ids: - continue - item = self._create_item(date, url, id) - res.append(item) - return res - - def _create_item(self, date: datetime, url: str, id: str) -> Item: - item = Item( - id=id, - geometry=mapping(self.footprint), - bbox=[self.bbox[i] for i in range(4)], - datetime=date, - properties={}, - ) - asset = Asset(href=url, media_type=MediaType.COG) - item.add_asset(self.var, asset) - return item - - def query_by_id(self, id: Union[str, List[str]]) -> List[Item]: - if isinstance(id, str): - ids = [id] - else: - ids = id - res = [] - for date, url, fname in self.get_chirps_list((self.INI, self.end)): - id = self._get_id(fname) - if id in ids: - item = self._create_item(date, url, id) - res.append(item) - return res - - -def convert_product(item: Dict[str, Any], freq: str) -> ChirpsProduct: - date = isoparse(item["properties"]["datetime"]).replace( - hour=0, minute=0, second=0, microsecond=0 - ) - if freq == "daily": - time_range = (date, date) - else: - time_range = (date.replace(day=1), date) - url = item["assets"]["precipitation"]["href"] - output = ChirpsProduct( - id=item["id"], - time_range=time_range, - geometry=item["geometry"], - assets=[], - url=url, - ) - return output - - -class CallbackBuilder: - def __init__(self, freq: str, res: str): - self.freq = freq - self.res = res - - def __call__(self): - def list_chirps( - input_item: DataVibe, - ) -> Dict[str, List[ChirpsProduct]]: - collection = ChirpsCollection(self.freq, self.res) - items = collection.query(roi=input_item.bbox, time_range=input_item.time_range) - - products = [convert_product(item.to_dict(), freq=self.freq) for item in items] - - if not products: - raise RuntimeError( - f"No product found for time range {input_item.time_range} " - f"and geometry {input_item.geometry}" - ) - return {"chirps_products": products} - - return list_chirps diff --git a/ops/list_chirps/list_chirps.yaml b/ops/list_chirps/list_chirps.yaml deleted file mode 100644 index c191aa1c..00000000 --- a/ops/list_chirps/list_chirps.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: list_chips -inputs: - input_item: DataVibe -output: - chirps_products: List[ChirpsProduct] -parameters: - freq: daily - res: p05 -dependencies: - parameters: - - freq - - res -entrypoint: - file: list_chirps.py - callback_builder: CallbackBuilder -description: - short_description: - Lists products from the CHIRPS dataset with desired frequency and resolution - for input geometry and time range. - parameters: - freq: daily or monthly frequencies - res: p05 for 0.05 degree resolution or p25 for 0.25 degree resolution, - p25 is only available daily \ No newline at end of file diff --git a/ops/list_climatology_lab/list_climatology_lab.py b/ops/list_climatology_lab/list_climatology_lab.py deleted file mode 100644 index d1cd5d72..00000000 --- a/ops/list_climatology_lab/list_climatology_lab.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from typing import Dict, List - -from pystac.item import Item - -from vibe_core.data import DataVibe -from vibe_core.data.products import ClimatologyLabProduct -from vibe_lib.climatology_lab import ( - ClimatologyLabCollection, - GridMETCollection, - TerraClimateCollection, -) - - -class CallbackBuilder: - collection: ClimatologyLabCollection - - def __init__(self, variable: str): - if variable not in self.collection.asset_keys: - raise ValueError( - f"Requested variable '{variable}' not valid.\n" - f"Available properties: {', '.join(self.collection.asset_keys)}" - ) - self.variable = variable - - def convert_product(self, item: Item) -> ClimatologyLabProduct: - assert item.geometry is not None, "input Item has no geometry" - assert item.datetime is not None, "input Item has no datetime" - time_range = (datetime(item.datetime.year, 1, 1), datetime(item.datetime.year, 12, 31)) - - product = ClimatologyLabProduct( - id=item.id, - time_range=time_range, - geometry=item.geometry, - assets=[], - url=item.properties["url"], - variable=item.properties["variable"], - ) - return product - - def __call__(self): - def list_climatology_lab( - input_item: DataVibe, - ) -> Dict[str, List[ClimatologyLabProduct]]: - items = self.collection.query(variable=self.variable, time_range=input_item.time_range) - - if not items: - raise RuntimeError(f"No products found for time range {input_item.time_range}") - - products = [self.convert_product(item) for item in items] - return {"products": products} - - return list_climatology_lab - - -class CallbackBuilderGridMET(CallbackBuilder): - collection = GridMETCollection() - - -class CallbackBuilderTerraClimate(CallbackBuilder): - collection = TerraClimateCollection() diff --git a/ops/list_climatology_lab/list_gridmet.yaml b/ops/list_climatology_lab/list_gridmet.yaml deleted file mode 100644 index 0b23049e..00000000 --- a/ops/list_climatology_lab/list_gridmet.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: list_gridmet -inputs: - input_item: DataVibe -output: - products: List[ClimatologyLabProduct] -parameters: - variable: pr -dependencies: - parameters: - - variable -entrypoint: - file: list_climatology_lab.py - callback_builder: CallbackBuilderGridMET -description: - short_description: - Lists GridMET products of `variable` from years intersecting with input time range. - inputs: - input_item: Time range of interest. - output: - products: Listed products. - parameters: - variable: >- - Options are: - bi - Burning Index - erc - Energy Release Component - etr - Daily reference evapotranspiration (alfafa, units = mm) - fm100 - Fuel Moisture (100-hr, units = %) - fm1000 - Fuel Moisture (1000-hr, units = %) - pet - Potential evapotranspiration (reference grass evapotranspiration, units = mm) - pr - Precipitation amount (daily total, units = mm) - rmax - Maximum relative humidity (units = %) - rmin - Minimum relative humidity (units = %) - sph - Specific humididy (units = kg/kg) - srad - Downward surface shortwave radiation (units = W/m^2) - th - Wind direction (degrees clockwise from North) - tmmn - Minimum temperature (units = K) - tmmx - Maximum temperature (units = K) - vpd - Vapor Pressure Deficit (units = kPa) - vs - Wind speed at 10m (units = m/s) diff --git a/ops/list_climatology_lab/list_terraclimate.yaml b/ops/list_climatology_lab/list_terraclimate.yaml deleted file mode 100644 index 5cba8935..00000000 --- a/ops/list_climatology_lab/list_terraclimate.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: list_terraclimate -inputs: - input_item: DataVibe -output: - products: List[ClimatologyLabProduct] -parameters: - variable: tmax -dependencies: - parameters: - - variable -entrypoint: - file: list_climatology_lab.py - callback_builder: CallbackBuilderTerraClimate -description: - short_description: - Lists TerraClimate products of `variable` from years intersecting with input time range. - inputs: - input_item: Time range of interest. - output: - products: Listed products. - parameters: - variable: >- - Options are: - aet - Actual Evapotranspiration (monthly total, units = mm) - def - Climate Water Deficit (monthly total, units = mm) - pet - Potential evapotranspiration (monthly total, units = mm) - ppt - Precipitation (monthly total, units = mm) - q - Runoff (monthly total, units = mm) - soil - Soil Moisture (total column at end of month, units = mm) - srad - Downward surface shortwave radiation (units = W/m2) - swe - Snow water equivalent (at end of month, units = mm) - tmax - Max Temperature (average for month, units = C) - tmin - Min Temperature (average for month, units = C) - vap - Vapor pressure (average for month, units = kPa) - ws - Wind speed (average for month, units = m/s) - vpd - Vapor Pressure Deficit (average for month, units = kPa) - PDSI - Palmer Drought Severity Index (at end of month, units = unitless) diff --git a/ops/list_climatology_lab/test_list_climatology_lab.py b/ops/list_climatology_lab/test_list_climatology_lab.py deleted file mode 100644 index fcf09677..00000000 --- a/ops/list_climatology_lab/test_list_climatology_lab.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import List, cast -from unittest.mock import MagicMock, patch - -import pytest -from shapely.geometry import Point, mapping - -from vibe_core.data import DataVibe -from vibe_core.data.products import ClimatologyLabProduct -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.climatology_lab import ( - ClimatologyLabCollection, - GridMETCollection, - TerraClimateCollection, -) - -TERRACLIMATE_CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "list_terraclimate.yaml" -) -GRIDMET_CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_gridmet.yaml") - -FAKE_GEOMETRY = Point(-92.99900, 42.03580).buffer(0.1, cap_style=3) -FAKE_TIME_RANGE = ( - datetime(year=2019, month=1, day=1, tzinfo=timezone.utc), - datetime(year=2020, month=12, day=31, tzinfo=timezone.utc), -) - -INVALID_VARIABLE = "🙅" - - -@pytest.mark.parametrize( - "config_path, variable", - [ - (p, v) - for p, c in [ - (TERRACLIMATE_CONFIG_PATH, TerraClimateCollection), - (GRIDMET_CONFIG_PATH, GridMETCollection), - ] - for v in c.asset_keys - ], -) -@patch.object(ClimatologyLabCollection, "check_url_variable_year", return_value=True) -def test_gridmet_op(_: MagicMock, config_path: str, variable: str): - input_item = DataVibe("input_item", FAKE_TIME_RANGE, mapping(FAKE_GEOMETRY), []) - - op_tester = OpTester(config_path) - op_tester.update_parameters({"variable": variable}) - output_data = op_tester.run(input_item=input_item) - - # Get op result - output_name = "products" - assert output_name in output_data - output_product = output_data[output_name] - assert isinstance(output_product, list) - assert len(cast(List[ClimatologyLabProduct], output_data["products"])) == 2 - - -@pytest.mark.parametrize("config_path", [TERRACLIMATE_CONFIG_PATH, GRIDMET_CONFIG_PATH]) -def test_op_fails_invalid_variable(config_path: str): - op_tester = OpTester(config_path) - op_tester.update_parameters({"variable": INVALID_VARIABLE}) - with pytest.raises(ValueError): - op_tester.run(input_item=[]) diff --git a/ops/list_dem_products/list_dem_products.py b/ops/list_dem_products/list_dem_products.py deleted file mode 100644 index d5020ecb..00000000 --- a/ops/list_dem_products/list_dem_products.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# This operator receives a region and obtains the digital elevation model -# items associated with the input region. The collection 3dep-seamless -# only covers CONUS (continental us) and contains tiles with distinct -# spatial resolutions (10 and 30 meters). This operator returns a list of -# DemProduct. -from functools import partial -from typing import Any, Dict, List - -from dateutil.parser import isoparse -from shapely import geometry as shpg -from shapely import ops as shpo - -from vibe_core.data import DataVibe, DemProduct -from vibe_lib.planetary_computer import validate_dem_provider - - -def convert_product(item: Dict[str, Any], provider: str) -> DemProduct: - date = isoparse(item["properties"]["datetime"]) - output = DemProduct( - id=str(item["id"]), - time_range=(date, date), - geometry=item["geometry"], - assets=[], - tile_id=str(item["id"]), - resolution=int(item["properties"]["gsd"]), - provider=provider, - ) - - return output - - -def list_dem_products( - input_items: List[DataVibe], resolution: int, provider: str -) -> Dict[str, List[DemProduct]]: - collection = validate_dem_provider(provider.upper(), resolution) - - geom = shpo.unary_union([shpg.shape(i.geometry) for i in input_items]) - items = collection.query(geometry=geom) - - products = [ - convert_product(item.to_dict(), provider) - for item in items - if item.properties["gsd"] == resolution - ] - - if not products: - raise RuntimeError("No product found on provider '{provider}' for geometry {geom}") - - return {"dem_products": products} - - -def callback_builder(resolution: int, provider: str): - return partial(list_dem_products, resolution=resolution, provider=provider) diff --git a/ops/list_dem_products/list_dem_products.yaml b/ops/list_dem_products/list_dem_products.yaml deleted file mode 100644 index 78863278..00000000 --- a/ops/list_dem_products/list_dem_products.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: list_dem_products -inputs: - input_items: List[DataVibe] -output: - dem_products: List[DemProduct] -parameters: - resolution: 10 - provider: "USGS3Dep" -entrypoint: - file: list_dem_products.py - callback_builder: callback_builder -dependencies: - parameters: - - resolution - - provider -description: - short_description: Lists digital elevation map tiles that intersect with the input geometry and time range. \ No newline at end of file diff --git a/ops/list_dem_products/test_list_dem_products.py b/ops/list_dem_products/test_list_dem_products.py deleted file mode 100644 index bfb3b280..00000000 --- a/ops/list_dem_products/test_list_dem_products.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import List, cast - -from shapely.geometry import Polygon, box, mapping - -from vibe_core.data import DataVibe, DemProduct -from vibe_core.data.core_types import BaseVibe -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_dem_products.yaml") - - -def test_op(): - latitude = 44.0005556 - longitude = -97.0005556 - buffer = 0.1 - bbox = [ - longitude - buffer, - latitude - buffer, - longitude + buffer, - latitude + buffer, - ] - polygon: Polygon = box(*bbox, ccw=True) - start_date = datetime(year=2018, month=2, day=1, tzinfo=timezone.utc) - end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) - input_items = [DataVibe("input_item", (start_date, end_date), mapping(polygon), [])] - - output_data = OpTester(CONFIG_PATH).run(input_items=cast(List[BaseVibe], input_items)) - - # Get op result - output_name = "dem_products" - assert output_name in output_data - output_product = output_data[output_name] - assert isinstance(output_product, list) - assert len(cast(List[DemProduct], output_data["dem_products"])) == 4 diff --git a/ops/list_era5/list_era5.py b/ops/list_era5/list_era5.py deleted file mode 100644 index 7d4972d8..00000000 --- a/ops/list_era5/list_era5.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from functools import partial -from typing import Any, Dict, List - -from dateutil.parser import isoparse -from shapely import geometry as shpg -from shapely.geometry import mapping - -from vibe_core.data import DataVibe, Era5Product -from vibe_lib.planetary_computer import Era5Collection - -VARS = { - "msl": "air_pressure_at_mean_sea_level", - "2t": "air_temperature_at_2_metres", - "mx2t": "air_temperature_at_2_metres_1hour_Maximum", - "mn2t": "air_temperature_at_2_metres_1hour_Minimum", - "2d": "dew_point_temperature_at_2_metres", - "100u": "eastward_wind_at_100_metres", - "10u": "eastward_wind_at_10_metres", - "ssrd": "integral_wrt_time_of_surface_direct_downwelling" - "_shortwave_flux_in_air_1hour_Accumulation", - "100v": "northward_wind_at_100_metres", - "10v": "northward_wind_at_10_metres", - "tp": "precipitation_amount_1hour_Accumulation", - "sst": "sea_surface_temperature", - "sp": "surface_air_pressure", -} - - -def convert_product(item: Dict[str, Any], var: str) -> Era5Product: - start_datetime = isoparse(item["properties"]["start_datetime"]) - end_datetime = isoparse(item["properties"]["end_datetime"]) - x_extend = item["properties"]["cube:dimensions"]["lon"]["extent"] - y_extend = item["properties"]["cube:dimensions"]["lat"]["extent"] - geometry = mapping(shpg.box(x_extend[0], y_extend[0], x_extend[1], y_extend[1])) - - output = Era5Product( - id=f"{item['id']}_{var}", - time_range=(start_datetime, end_datetime), - geometry=geometry, - assets=[], - item_id=str(item["id"]), - var=VARS[var], - ) - - return output - - -def list_era5(input_item: DataVibe, variable: str) -> Dict[str, List[Era5Product]]: - if variable not in VARS.keys(): - raise ValueError( - f"Requested variable '{variable}' not valid. " - f"Valid values are {', '.join(VARS.keys())}" - ) - collection = Era5Collection() - items = collection.query(roi=input_item.bbox, time_range=input_item.time_range) - items = filter(lambda item: VARS[variable] in item.assets.keys(), items) - products = [convert_product(item.to_dict(), variable) for item in items] - if not products: - raise RuntimeError( - f"No product found for time range {input_item.time_range} " - f"and geometry {input_item.geometry}" - ) - return {"era5_products": products} - - -def callback_builder(variable: str): - return partial(list_era5, variable=variable) diff --git a/ops/list_era5/list_era5.yaml b/ops/list_era5/list_era5.yaml deleted file mode 100644 index f6a52080..00000000 --- a/ops/list_era5/list_era5.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: list_era5 -inputs: - input_item: DataVibe -output: - era5_products: List[Era5Product] -parameters: - variable: -dependencies: - parameters: - - variable -entrypoint: - file: list_era5.py - callback_builder: callback_builder -description: - short_description: - Lists ERA5 products for input geometry and time range. - parameters: - variable: >- - Options are: - sp - Surface pressure - sst - Sea surface temperature - 10u - 10 meter U wind component - 2t - 2 meter temperature - 100u - 100 meter U wind component - 10v - 10 meter V wind component - 100v - 100 meter V wind component - msl - Mean sea level pressure - 2d - 2 meter dewpoint temperature - tp - Total precipitation - mx2t - Maximum temperature at 2 meters since previous post-processing - mn2t - Minimum temperature at 2 meters since previous post-processing - ssrd - Surface solar radiation downwards diff --git a/ops/list_era5/list_era5_cds.py b/ops/list_era5/list_era5_cds.py deleted file mode 100644 index 5a6d0fb9..00000000 --- a/ops/list_era5/list_era5_cds.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from datetime import datetime -from functools import partial -from typing import Dict, List - -from shapely import geometry as shpg - -from vibe_core.data import DataVibe, Era5Product - -VARS = { - "msl": "mean_sea_level_pressure", - "2t": "2m_temperature", - "2d": "2m_dewpoint_temperature", - "100u": "100m_u_component_of_wind", - "10u": "10m_u_component_of_wind", - "ssrd": "surface_solar_radiation_downwards", - "100v": "100m_v_component_of_wind", - "10v": "10m_v_component_of_wind", - "tp": "total_precipitation", - "sst": "sea_surface_temperature", - "sp": "surface_pressure", -} - - -def list_era5(input_item: DataVibe, variable: str) -> Dict[str, List[Era5Product]]: - # Currently only listing the era5 variable that we have on PC in the monthly - # aggregates (instead of hourly). This should speedup statistics computation - # (and addition to save these assets in our cache). We may add the much richer - # set of variables available on CDS (all Era5 variables, Wildfire reanalysis, etc) - if variable not in VARS.keys(): - raise ValueError( - f"Requested variable '{variable}' not valid. " - f"Valid values are {', '.join(VARS.keys())}" - ) - - year_ini = input_item.time_range[0].year - year_end = input_item.time_range[1].year - - dataset = "reanalysis-era5-single-levels-monthly-means" - request = { - "format": "netcdf", - "variable": [VARS[variable]], - "product_type": "monthly_averaged_reanalysis", - "time": "00:00", - "month": [f"{i:02d}" for i in range(1, 13)], - "year": [f"{i}" for i in range(year_ini, year_end + 1)], - } - - res = Era5Product( - id=hashlib.sha256((dataset + str(request)).encode()).hexdigest(), - time_range=(datetime(year_ini, 1, 1), datetime(year_end, 12, 31)), - geometry=shpg.mapping(shpg.box(-180, -90, 180, 90)), - assets=[], - item_id="", - var=VARS[variable], - cds_request={dataset: request}, - ) - - return {"era5_products": [res]} - - -def callback_builder(variable: str): - return partial(list_era5, variable=variable) diff --git a/ops/list_era5/list_era5_cds.yaml b/ops/list_era5/list_era5_cds.yaml deleted file mode 100644 index c81f152a..00000000 --- a/ops/list_era5/list_era5_cds.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: list_era5_cds -inputs: - input_item: DataVibe -output: - era5_products: List[Era5Product] -parameters: - variable: -dependencies: - parameters: - - variable -entrypoint: - file: list_era5_cds.py - callback_builder: callback_builder -description: - short_description: Lists monthly ERA5 products for the input time range and geometry. - parameters: - variable: >- - Monthly variables, options are: - sp - Surface pressure - sst - Sea surface temperature - 10u - 10 meter U wind component - 2t - 2 meter temperature - 100u - 100 meter U wind component - 10v - 10 meter V wind component - 100v - 100 meter V wind component - msl - Mean sea level pressure - 2d - 2 meter dewpoint temperature - tp - Total precipitation - ssrd - Surface solar radiation downwards diff --git a/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py b/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py deleted file mode 100644 index d66c81d7..00000000 --- a/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Dict, List, cast - -from dateutil.parser import isoparse -from shapely.geometry import shape - -from vibe_core.data import BBox, DataVibe, EsriLandUseLandCoverProduct -from vibe_lib.planetary_computer import EsriLandUseLandCoverCollection - - -def convert_product(item: Dict[str, Any]) -> EsriLandUseLandCoverProduct: - start_date = isoparse(item["properties"]["start_datetime"]) - end_date = isoparse(item["properties"]["end_datetime"]) - output = EsriLandUseLandCoverProduct( - id=str(item["id"]), - time_range=(start_date, end_date), - geometry=item["geometry"], - assets=[], - ) - - return output - - -def list_products(input_item: DataVibe) -> Dict[str, List[EsriLandUseLandCoverProduct]]: - collection = EsriLandUseLandCoverCollection() - input_geometry = shape(input_item.geometry) - time_range = input_item.time_range - bbox = cast(BBox, input_geometry.bounds) - items = collection.query(roi=bbox, time_range=time_range) - products = [convert_product(item.to_dict()) for item in items] - - if not products: - raise RuntimeError( - f"No product found for time range {input_item.time_range} " - f"and geometry {input_item.geometry}" - ) - - return {"listed_products": products} - - -def callback_builder(): - return list_products diff --git a/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.yaml b/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.yaml deleted file mode 100644 index 489a4baa..00000000 --- a/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: list_esri_landuse_landcover -inputs: - input_item: DataVibe -output: - listed_products: List[EsriLandUseLandCoverProduct] -parameters: -entrypoint: - file: list_esri_landuse_landcover.py - callback_builder: callback_builder -description: - short_description: Lists ESRI 10m Land Use/Land Cover (9-class) tiles that intersect with input geometry and time range. \ No newline at end of file diff --git a/ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py b/ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py deleted file mode 100644 index b56ba7a2..00000000 --- a/ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import List, cast - -from shapely.geometry import Polygon, box, mapping - -from vibe_core.data import DataVibe -from vibe_core.data.products import EsriLandUseLandCoverProduct -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "list_esri_landuse_landcover.yaml" -) - - -def test_op(): - latitude = 42.21422 - longitude = -93.22890 - buffer = 0.001 - bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] - polygon: Polygon = box(*bbox, ccw=True) - start_date = datetime(year=2017, month=1, day=1, tzinfo=timezone.utc) - end_date = datetime(year=2017, month=12, day=31, tzinfo=timezone.utc) - input_item = DataVibe("input_item", (start_date, end_date), mapping(polygon), []) - - output_data = OpTester(CONFIG_PATH).run(input_item=input_item) - - # Get op result - output_name = "listed_products" - assert output_name in output_data - output_product = output_data[output_name] - assert isinstance(output_product, list) - assert len(cast(List[EsriLandUseLandCoverProduct], output_data["listed_products"])) == 1 diff --git a/ops/list_gedi_products/list_gedi_products.py b/ops/list_gedi_products/list_gedi_products.py deleted file mode 100644 index bf6ee014..00000000 --- a/ops/list_gedi_products/list_gedi_products.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from typing import Any, Dict, List - -from dateutil.parser import parse as parse_date -from shapely import geometry as shpg - -from vibe_core.data import DataVibe, GEDIProduct -from vibe_lib.earthdata import EarthDataAPI - -LOGGER = logging.getLogger(__name__) - - -def parse_poly(poly_str: str) -> shpg.Polygon: - coords = poly_str.split(" ") - return shpg.Polygon([(float(c2), float(c1)) for c1, c2 in zip(coords[::2], coords[1::2])]) - - -def convert_product(item: Dict[str, Any]) -> GEDIProduct: - geoms = [parse_poly(pp) for p in item["polygons"] for pp in p] - product_id = item["producer_granule_id"] - if not geoms: - raise RuntimeError(f"Failed to parse geometry from GEDI Product {product_id}") - if len(geoms) > 1: - geom = shpg.MultiPolygon(geoms) - else: - geom = geoms[0] - time_range = tuple(parse_date(item[k]) for k in ("time_start", "time_end")) - orbits = item["orbit_calculated_spatial_domains"][0] - concept_id = item["collection_concept_id"] - processing_level = [k for k, v in EarthDataAPI.concept_ids.items() if v == concept_id] - if len(processing_level) == 0: - raise RuntimeError(f"Failed to parse concept id {concept_id} from product {product_id}") - processing_level = processing_level[0] - return GEDIProduct( - id=product_id, - geometry=shpg.mapping(geom), - time_range=time_range, - product_name=product_id, - start_orbit=int(orbits["start_orbit_number"]), - stop_orbit=int(orbits["stop_orbit_number"]), - processing_level=processing_level, - assets=[], - ) - - -def callback_builder(processing_level: str): - if processing_level not in EarthDataAPI.concept_ids: - valid_levels = ", ".join([f"'{i}'" for i in EarthDataAPI.concept_ids]) - raise ValueError(f"Parameters processing_level must be one of {valid_levels}") - - def callback(input_data: DataVibe) -> Dict[str, List[GEDIProduct]]: - api = EarthDataAPI(processing_level) - geom = shpg.shape(input_data.geometry) - time_range = input_data.time_range - LOGGER.info( - f"Querying EarthData API for {processing_level=}, " - f"geometry={shpg.mapping(geom)}, {time_range=}" - ) - items = api.query(geometry=geom, time_range=time_range) - if not items: - raise RuntimeError( - f"Query returned no items for time range {time_range} " - f"and geometry {shpg.mapping(geom)}" - ) - LOGGER.info(f"EarthData API returned {len(items)} items. Converting to DataVibe") - products = [convert_product(i) for i in items] - return {"gedi_products": products} - - return callback diff --git a/ops/list_gedi_products/list_gedi_products.yaml b/ops/list_gedi_products/list_gedi_products.yaml deleted file mode 100644 index 9c64580e..00000000 --- a/ops/list_gedi_products/list_gedi_products.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: list_gedi_products -inputs: - input_data: DataVibe -output: - gedi_products: List[GEDIProduct] -parameters: - processing_level: GEDI02_B.002 -entrypoint: - file: list_gedi_products.py - callback_builder: callback_builder -dependencies: - parameters: - - processing_level -description: - short_description: Lists GEDI Products from NASA's EarthData API. \ No newline at end of file diff --git a/ops/list_gedi_products/mock_items.json b/ops/list_gedi_products/mock_items.json deleted file mode 100644 index 1027a149..00000000 --- a/ops/list_gedi_products/mock_items.json +++ /dev/null @@ -1 +0,0 @@ -[{"producer_granule_id": "GEDI02_B_2021003022816_O11669_01_T07098_02_003_01_V002.h5", "time_start": "2021-01-03T02:28:16.000Z", "updated": "2021-09-16T13:33:58.248Z", "orbit_calculated_spatial_domains": [{"start_orbit_number": "11669", "stop_orbit_number": "11669"}], "dataset_id": "GEDI L2B Canopy Cover and Vertical Profile Metrics Data Global Footprint Level V002", "data_center": "LPDAAC_ECS", "title": "SC:GEDI02_B.002:2479671297", "coordinate_system": "GEODETIC", "day_night_flag": "UNSPECIFIED", "time_end": "2021-01-03T04:01:09.000Z", "id": "G2109010485-LPDAAC_ECS", "original_format": "ECHO10", "granule_size": "147.575", "browse_flag": true, "polygons": [["-30.3626576 -91.2648483 -27.6439792 -88.3897615 -24.8347156 -85.6623114 -21.9673825 -83.0671991 -19.0514493 -80.5838395 -16.095074 -78.1946169 -13.1049797 -75.8827098 -10.0913178 -73.6294082 -7.0568958 -71.422786 -4.0091158 -69.2472455 -0.9533188 -67.0897964 -0.2950898 -66.6261359 -0.2331909 -66.6864572 -0.8909761 -67.1498522 -3.9464484 -69.307554 -6.9941031 -71.4829217 -10.0280734 -73.6896102 -13.0417733 -75.9426575 -16.0305779 -78.2549303 -18.9859929 -80.6442171 -21.9008479 -83.1275515 -24.7669563 -85.7225227 -27.5748973 -88.4497367 -30.3318958 -91.3028519 -30.3626576 -91.2648483"]], "collection_concept_id": "C1908350066-LPDAAC_ECS", "online_access_flag": true, "links": [{"rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "type": "application/x-hdfeos", "title": "GEDI02_B_2021003022816_O11669_01_T07098_02_003_01_V002.h5. MimeType: application/x-hdfeos", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.03/GEDI02_B_2021003022816_O11669_01_T07098_02_003_01_V002.h5"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "type": "text/html", "title": "The Landing Page for this file may be accessed directly from this link (DOI)", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001 "}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/browse#", "type": "image/jpeg", "title": "This Browse file may be downloaded directly from this link (BROWSE)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//WORKING/BRWS/Browse.001/2021.08.24/GEDI02_B_2021003022816_O11669_01_T07098_02_003_01_V002.png"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "type": "text/xml", "title": "This Metadata file may be downloaded directly from this link (EXTENDED METADATA)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.03/GEDI02_B_2021003022816_O11669_01_T07098_02_003_01_V002.h5.xml"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://search.earthdata.nasa.gov/search?q=C1908350066-LPDAAC_ECS"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.002/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/980/gedi_l2b_dictionary_P003_v2.html"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WF_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WFGEO_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_FCCVPM_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://gedi.umd.edu/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/accessing-and-analyzing-gedi-lidar-data-for-vegetation-studies/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/989/GEDI_Quick_Guide_V2.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/getting-started-gedi-l2b-version-2-data-python/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-r/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-python/browse"}]}, {"producer_granule_id": "GEDI02_B_2021004140438_O11692_04_T06050_02_003_01_V002.h5", "time_start": "2021-01-04T14:04:38.000Z", "updated": "2021-09-15T14:44:45.312Z", "orbit_calculated_spatial_domains": [{"start_orbit_number": "11692", "stop_orbit_number": "11692"}], "dataset_id": "GEDI L2B Canopy Cover and Vertical Profile Metrics Data Global Footprint Level V002", "data_center": "LPDAAC_ECS", "title": "SC:GEDI02_B.002:2479801384", "coordinate_system": "GEODETIC", "day_night_flag": "UNSPECIFIED", "time_end": "2021-01-04T15:37:31.000Z", "id": "G2109313701-LPDAAC_ECS", "original_format": "ECHO10", "granule_size": "226.445", "browse_flag": true, "polygons": [["0.5505065 -81.5625675 -2.5086918 -79.4085405 -5.5611308 -77.2421343 -8.6029156 -75.0510247 -11.6259659 -72.8201955 -14.6277405 -70.5371334 -17.5991496 -68.1844306 -20.5338042 -65.7461622 -23.423009 -63.2043847 -26.2586328 -60.540614 -29.0303387 -57.7342285 -31.7271874 -54.7628274 -34.1212741 -51.86561 -34.0555803 -51.8563839 -31.6748997 -54.679901 -28.9801891 -57.6519074 -26.2103994 -60.4590336 -23.3764628 -63.1236187 -20.4887803 -65.6664175 -17.5554686 -68.1055773 -14.5850585 -70.4589025 -11.5839693 -72.7421956 -8.5619262 -74.9742452 -5.5203527 -77.1653332 -2.4687335 -79.332593 0.5910624 -81.4855825 0.5505065 -81.5625675"]], "collection_concept_id": "C1908350066-LPDAAC_ECS", "online_access_flag": true, "links": [{"rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "type": "application/x-hdfeos", "title": "GEDI02_B_2021004140438_O11692_04_T06050_02_003_01_V002.h5. MimeType: application/x-hdfeos", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.04/GEDI02_B_2021004140438_O11692_04_T06050_02_003_01_V002.h5"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "type": "text/html", "title": "The Landing Page for this file may be accessed directly from this link (DOI)", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001 "}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/browse#", "type": "image/jpeg", "title": "This Browse file may be downloaded directly from this link (BROWSE)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//WORKING/BRWS/Browse.001/2021.08.25/GEDI02_B_2021004140438_O11692_04_T06050_02_003_01_V002.png"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "type": "text/xml", "title": "This Metadata file may be downloaded directly from this link (EXTENDED METADATA)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.04/GEDI02_B_2021004140438_O11692_04_T06050_02_003_01_V002.h5.xml"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://search.earthdata.nasa.gov/search?q=C1908350066-LPDAAC_ECS"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.002/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/980/gedi_l2b_dictionary_P003_v2.html"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WF_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WFGEO_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_FCCVPM_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://gedi.umd.edu/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/accessing-and-analyzing-gedi-lidar-data-for-vegetation-studies/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/989/GEDI_Quick_Guide_V2.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/getting-started-gedi-l2b-version-2-data-python/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-r/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-python/browse"}]}, {"producer_granule_id": "GEDI02_B_2021007005422_O11730_01_T09944_02_003_01_V002.h5", "time_start": "2021-01-07T00:54:22.000Z", "updated": "2021-09-16T13:37:31.816Z", "orbit_calculated_spatial_domains": [{"start_orbit_number": "11730", "stop_orbit_number": "11730"}], "dataset_id": "GEDI L2B Canopy Cover and Vertical Profile Metrics Data Global Footprint Level V002", "data_center": "LPDAAC_ECS", "title": "SC:GEDI02_B.002:2479633671", "coordinate_system": "GEODETIC", "day_night_flag": "UNSPECIFIED", "time_end": "2021-01-07T02:27:15.000Z", "id": "G2108920939-LPDAAC_ECS", "original_format": "ECHO10", "granule_size": "126.573", "browse_flag": true, "polygons": [["-30.1461259 -90.9640426 -27.4215507 -88.102234 -24.606821 -85.3862242 -21.7348391 -82.8002922 -18.814933 -80.3247498 -15.8556248 -77.9418952 -12.8638124 -75.6347507 -9.8470872 -73.3867106 -6.8113281 -71.1833118 -3.7627547 -69.0099236 -0.7068341 -66.8531798 -0.2296719 -66.5171417 -0.168137 -66.5766535 -0.6451851 -66.9127594 -3.700955 -69.0695501 -6.7492633 -71.2429883 -9.7844186 -73.4465855 -12.8005944 -75.6946778 -15.7910674 -78.0022638 -18.7494616 -80.3851673 -21.6684271 -82.8606086 -24.5391962 -85.4464038 -27.3524956 -88.1622766 -30.1154458 -91.0020509 -30.1461259 -90.9640426"]], "collection_concept_id": "C1908350066-LPDAAC_ECS", "online_access_flag": true, "links": [{"rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "type": "application/x-hdfeos", "title": "GEDI02_B_2021007005422_O11730_01_T09944_02_003_01_V002.h5. MimeType: application/x-hdfeos", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.07/GEDI02_B_2021007005422_O11730_01_T09944_02_003_01_V002.h5"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "type": "text/html", "title": "The Landing Page for this file may be accessed directly from this link (DOI)", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001 "}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/browse#", "type": "image/jpeg", "title": "This Browse file may be downloaded directly from this link (BROWSE)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//WORKING/BRWS/Browse.001/2021.08.24/GEDI02_B_2021007005422_O11730_01_T09944_02_003_01_V002.png"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "type": "text/xml", "title": "This Metadata file may be downloaded directly from this link (EXTENDED METADATA)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.07/GEDI02_B_2021007005422_O11730_01_T09944_02_003_01_V002.h5.xml"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://search.earthdata.nasa.gov/search?q=C1908350066-LPDAAC_ECS"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.002/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/980/gedi_l2b_dictionary_P003_v2.html"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WF_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WFGEO_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_FCCVPM_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://gedi.umd.edu/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/accessing-and-analyzing-gedi-lidar-data-for-vegetation-studies/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/989/GEDI_Quick_Guide_V2.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/getting-started-gedi-l2b-version-2-data-python/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-r/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-python/browse"}]}, {"producer_granule_id": "GEDI02_B_2021008123045_O11753_04_T08896_02_003_01_V002.h5", "time_start": "2021-01-08T12:30:45.000Z", "updated": "2021-09-15T14:44:51.018Z", "orbit_calculated_spatial_domains": [{"start_orbit_number": "11753", "stop_orbit_number": "11753"}], "dataset_id": "GEDI L2B Canopy Cover and Vertical Profile Metrics Data Global Footprint Level V002", "data_center": "LPDAAC_ECS", "title": "SC:GEDI02_B.002:2479642220", "coordinate_system": "GEODETIC", "day_night_flag": "UNSPECIFIED", "time_end": "2021-01-08T14:03:38.000Z", "id": "G2108936022-LPDAAC_ECS", "original_format": "ECHO10", "granule_size": "228.481", "browse_flag": true, "polygons": [["0.3636339 -81.368489 -2.6948222 -79.2131182 -5.7477117 -77.0464724 -8.7890118 -74.8544509 -11.8139481 -72.6232694 -14.8152193 -70.3379173 -17.7870662 -67.983062 -20.7213548 -65.5421638 -23.6120414 -62.9982185 -26.445649 -60.3281273 -29.2154254 -57.5144296 -31.9098845 -54.5345779 -34.3245843 -51.5987621 -34.2589016 -51.5895276 -31.8573652 -54.4517723 -29.1654292 -57.4318291 -26.3975764 -60.2462847 -23.5658728 -62.9177241 -20.6765899 -65.4622016 -17.7433957 -67.9039735 -14.7727599 -70.259492 -11.7722016 -72.5454193 -8.7483977 -74.7775267 -5.7075006 -76.9700498 -2.6550172 -79.1369356 0.4037688 -81.291898 0.3636339 -81.368489"]], "collection_concept_id": "C1908350066-LPDAAC_ECS", "online_access_flag": true, "links": [{"rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "type": "application/x-hdfeos", "title": "GEDI02_B_2021008123045_O11753_04_T08896_02_003_01_V002.h5. MimeType: application/x-hdfeos", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.08/GEDI02_B_2021008123045_O11753_04_T08896_02_003_01_V002.h5"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "type": "text/html", "title": "The Landing Page for this file may be accessed directly from this link (DOI)", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001 "}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/browse#", "type": "image/jpeg", "title": "This Browse file may be downloaded directly from this link (BROWSE)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//WORKING/BRWS/Browse.001/2021.08.24/GEDI02_B_2021008123045_O11753_04_T08896_02_003_01_V002.png"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "type": "text/xml", "title": "This Metadata file may be downloaded directly from this link (EXTENDED METADATA)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.08/GEDI02_B_2021008123045_O11753_04_T08896_02_003_01_V002.h5.xml"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://search.earthdata.nasa.gov/search?q=C1908350066-LPDAAC_ECS"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.002/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/980/gedi_l2b_dictionary_P003_v2.html"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WF_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WFGEO_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_FCCVPM_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://gedi.umd.edu/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/accessing-and-analyzing-gedi-lidar-data-for-vegetation-studies/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/989/GEDI_Quick_Guide_V2.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/getting-started-gedi-l2b-version-2-data-python/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-r/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-python/browse"}]}, {"producer_granule_id": "GEDI02_B_2021010232025_O11791_01_T07251_02_003_01_V002.h5", "time_start": "2021-01-10T23:20:25.000Z", "updated": "2021-09-16T13:48:44.353Z", "orbit_calculated_spatial_domains": [{"start_orbit_number": "11791", "stop_orbit_number": "11791"}], "dataset_id": "GEDI L2B Canopy Cover and Vertical Profile Metrics Data Global Footprint Level V002", "data_center": "LPDAAC_ECS", "title": "SC:GEDI02_B.002:2479659128", "coordinate_system": "GEODETIC", "day_night_flag": "UNSPECIFIED", "time_end": "2021-01-11T00:53:18.000Z", "id": "G2108978487-LPDAAC_ECS", "original_format": "ECHO10", "granule_size": "323.92", "browse_flag": true, "polygons": [["-30.1241135 -90.8142166 -27.3988696 -87.9532901 -24.5834775 -85.2382504 -21.7107944 -82.6531393 -18.7904635 -80.1781036 -15.8303809 -77.7956122 -12.8376188 -75.4880686 -9.8199741 -73.2400657 -6.7832843 -71.0364172 -3.7340327 -68.8628696 -0.6778065 -66.7054872 -0.1361488 -66.3240182 -0.0740696 -66.3841249 -0.6157131 -66.765595 -3.6717973 -68.9230187 -6.7209337 -71.0962995 -9.7571196 -73.300345 -12.7745146 -75.5482063 -15.7660982 -77.856108 -18.7253112 -80.238563 -21.644579 -82.7135771 -24.5159226 -85.298641 -27.3301816 -88.0133733 -30.0934383 -90.8522218 -30.1241135 -90.8142166"]], "collection_concept_id": "C1908350066-LPDAAC_ECS", "online_access_flag": true, "links": [{"rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "type": "application/x-hdfeos", "title": "GEDI02_B_2021010232025_O11791_01_T07251_02_003_01_V002.h5. MimeType: application/x-hdfeos", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.10/GEDI02_B_2021010232025_O11791_01_T07251_02_003_01_V002.h5"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "type": "text/html", "title": "The Landing Page for this file may be accessed directly from this link (DOI)", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001 "}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/browse#", "type": "image/jpeg", "title": "This Browse file may be downloaded directly from this link (BROWSE)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//WORKING/BRWS/Browse.001/2021.08.24/GEDI02_B_2021010232025_O11791_01_T07251_02_003_01_V002.png"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "type": "text/xml", "title": "This Metadata file may be downloaded directly from this link (EXTENDED METADATA)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.10/GEDI02_B_2021010232025_O11791_01_T07251_02_003_01_V002.h5.xml"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://search.earthdata.nasa.gov/search?q=C1908350066-LPDAAC_ECS"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.002/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/980/gedi_l2b_dictionary_P003_v2.html"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WF_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WFGEO_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_FCCVPM_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://gedi.umd.edu/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/accessing-and-analyzing-gedi-lidar-data-for-vegetation-studies/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/989/GEDI_Quick_Guide_V2.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/getting-started-gedi-l2b-version-2-data-python/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-r/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-python/browse"}]}] \ No newline at end of file diff --git a/ops/list_gedi_products/test_list_gedi_products.py b/ops/list_gedi_products/test_list_gedi_products.py deleted file mode 100644 index f73a56d7..00000000 --- a/ops/list_gedi_products/test_list_gedi_products.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import os -from datetime import datetime -from typing import Any, Dict, List, cast -from unittest.mock import Mock, patch - -import pytest -from dateutil.parser import parse as parse_date -from shapely import geometry as shpg - -from vibe_core.data import DataVibe, GEDIProduct -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.earthdata import EarthDataAPI - -HERE = os.path.dirname(os.path.abspath(__file__)) -CONFIG_PATH = os.path.join(HERE, "list_gedi_products.yaml") - - -@pytest.fixture -def mock_items(): - with open(os.path.join(HERE, "mock_items.json")) as f: - return json.load(f) - - -def compare_product_with_json(product: GEDIProduct, item: Dict[str, Any]): - assert product.product_name == item["producer_granule_id"] - assert isinstance(shpg.shape(product.geometry), shpg.Polygon) - assert product.time_range[0] == parse_date(item["time_start"]) - assert product.start_orbit == int( - item["orbit_calculated_spatial_domains"][0]["start_orbit_number"] - ) - - -@patch.object(EarthDataAPI, "query") -def test_op(query: Mock, mock_items: List[Dict[str, Any]]): - query.return_value = mock_items - now = datetime.now() - geom = shpg.box(0, 0, 1, 1) - x = DataVibe(id="1", time_range=(now, now), geometry=shpg.mapping(geom), assets=[]) - out = OpTester(CONFIG_PATH).run(input_data=x) - assert "gedi_products" in out - products = cast(List[GEDIProduct], out["gedi_products"]) - assert len(products) == 5 - for p, i in zip(products, mock_items): - compare_product_with_json(p, i) diff --git a/ops/list_glad_products/list_glad_products.py b/ops/list_glad_products/list_glad_products.py deleted file mode 100644 index 4f439f6b..00000000 --- a/ops/list_glad_products/list_glad_products.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -import itertools -from datetime import datetime -from typing import Dict, List, cast - -import geopandas as gpd - -from vibe_core.data import DataVibe, GLADProduct -from vibe_lib import glad - - -class CallbackBuilder: - def __init__(self, tile_geometry: str): - self.tiles_gdf: gpd.GeoDataFrame = cast(gpd.GeoDataFrame, gpd.read_file(tile_geometry)) - - def __call__(self): - def list_glad_products(input_item: DataVibe) -> Dict[str, List[GLADProduct]]: - geom_tiles = glad.intersecting_tiles(self.tiles_gdf, input_item.geometry) - years_range = range(input_item.time_range[0].year, input_item.time_range[1].year + 1) - intersection_years = itertools.product(geom_tiles, years_range) - - out_glad_products = [ - GLADProduct.clone_from( - input_item, - id=hashlib.sha256((f"glad-product-{tile_name}-{year}").encode()).hexdigest(), - assets=[], - time_range=(datetime(year, 1, 1), datetime(year, 12, 31)), - geometry=glad.get_tile_geometry(self.tiles_gdf, tile_name), - url=glad.GLAD_DOWNLOAD_URL.format(year=year, tile_name=tile_name), - ) - for tile_name, year in intersection_years - if glad.check_glad_for_year(tile_name, year) - ] - if len(out_glad_products) == 0: - raise RuntimeError( - f"No Glad products found for time range {input_item.time_range}" - f" and geometry {input_item.geometry}" - ) - - return {"glad_products": out_glad_products} - - return list_glad_products diff --git a/ops/list_glad_products/list_glad_products.yaml b/ops/list_glad_products/list_glad_products.yaml deleted file mode 100644 index 4037cda0..00000000 --- a/ops/list_glad_products/list_glad_products.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: list_glad_products -inputs: - input_item: DataVibe -output: - glad_products: List[GLADProduct] -parameters: - tile_geometry: /opt/terravibes/ops/resources/glad_tile_geometry/10d_tiles.geojson -entrypoint: - file: list_glad_products.py - callback_builder: CallbackBuilder -description: - short_description: Lists Global Land Analysis (GLAD) forest products that intersect the user-provided geometry/time range. - long_description: | - Lists forest products from The Global Land Analysis and Discovery (GLAD) - laboratory in the Department of Geographical Sciences at the University of - Maryland. This op lists the 10x10 degree tiles that intersect the user geometry - for each year in the user-provided time range (if the tile is available for - that year). \ No newline at end of file diff --git a/ops/list_glad_products/test_glad_list.py b/ops/list_glad_products/test_glad_list.py deleted file mode 100644 index 83567682..00000000 --- a/ops/list_glad_products/test_glad_list.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import itertools -import os -from datetime import datetime -from typing import List, Tuple, cast -from unittest.mock import Mock, patch - -import pytest - -from vibe_core import file_downloader -from vibe_core.data import DataVibe -from vibe_core.data.products import GLADProduct -from vibe_core.utils import ensure_list -from vibe_dev.testing.op_tester import OpTester - -VALID_GLAD_YEARS = [2000, 2020] -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_glad_products.yaml") -TILES_MAPPING = { - "northwest": ["50N_110W", "50N_120W", "60N_110W", "60N_120W"], - "northeast": ["50N_060E", "50N_070E", "60N_060E", "60N_070E"], - "southwest": ["10S_060W", "10S_070W", "20S_060W", "20S_070W"], - "southeast": ["10S_010E", "00N_010E", "10S_020E", "00N_020E"], -} - -MOCK_TILES = { - "50N_110W", - "50N_120W", - "60N_110W", - "60N_120W", - "50N_060E", - "50N_070E", - "60N_060E", - "60N_070E", - "10S_060W", - "10S_070W", - "20S_060W", - "20S_070W", - "10S_010E", - "00N_010E", - "10S_020E", - "00N_020E", -} - - -def custom_datavibe( - coordinates: List[List[float]], - time_range: Tuple[datetime, datetime] = (datetime(2000, 1, 1), datetime(2023, 1, 1)), -) -> DataVibe: - return DataVibe( - id=str("test_id"), - time_range=time_range, - geometry={ - "type": "Polygon", - "coordinates": [coordinates], - }, - assets=[], - ) - - -TEST_DATAVIBES = { - "northwest": custom_datavibe( - [ - [-115.0, 55.0], - [-105.0, 55.0], - [-105.0, 45.0], - [-115.0, 45.0], - ] - ), - "northeast": custom_datavibe( - [ - [75.0, 55.0], - [65.0, 55.0], - [65.0, 45.0], - [75.0, 45.0], - ] - ), - "southwest": custom_datavibe( - [ - [-65.0, -15.0], - [-55.0, -15.0], - [-55.0, -25.0], - [-65.0, -25.0], - ] - ), - "southeast": custom_datavibe( - [ - [15.0, -5.0], - [25.0, -5.0], - [25.0, -15.0], - [15.0, -15.0], - ] - ), -} - - -def mock_verify(url: str): - # URLs are of the form: - # https://glad.umd.edu/users/Potapov/GLCLUC2020/Forest_extent_2000/00N_000E.tif - return url[-12:-4] in MOCK_TILES and int(url[-17:-13]) in VALID_GLAD_YEARS - - -@patch.object(file_downloader, "verify_url") -@pytest.mark.parametrize( - "test_datavibe, expected_tiles", - [ - (TEST_DATAVIBES["northwest"], TILES_MAPPING["northwest"]), - (TEST_DATAVIBES["northeast"], TILES_MAPPING["northeast"]), - (TEST_DATAVIBES["southwest"], TILES_MAPPING["southwest"]), - (TEST_DATAVIBES["southeast"], TILES_MAPPING["southeast"]), - ], -) -def test_glad_list(verify: Mock, test_datavibe: DataVibe, expected_tiles: List[str]): - verify.side_effect = mock_verify - op = OpTester(CONFIG_PATH) - output_data = op.run(**{"input_item": test_datavibe}) - assert output_data - assert "glad_products" in output_data - - products: List[GLADProduct] = cast(List[GLADProduct], ensure_list(output_data["glad_products"])) - expected_combinations = set(itertools.product(expected_tiles, VALID_GLAD_YEARS)) - - actual_combinations = set((p.tile_name, p.time_range[0].year) for p in products) - - assert expected_combinations == actual_combinations - verify.reset_mock() - - -@patch.object(file_downloader, "verify_url") -def test_glad_list_same_tiles(verify: Mock): - verify.side_effect = mock_verify - - # Create datavibe_1 - test_data_vibe_1 = custom_datavibe( - [ - [15.0, -5.0], - [15.1, -5.0], - [15.1, -5.1 + 0.1], # not the same geom - [15.0, -5.1], - ], - time_range=(datetime(2020, 1, 1), datetime(2020, 1, 1)), - ) - - test_data_vibe_2 = custom_datavibe( - [ - [15.0, -5.0], - [15.1, -5.0], - [15.1, -5.1], - [15.0, -5.1], - ], - time_range=(datetime(2020, 1, 1), datetime(2020, 1, 1)), - ) - - op = OpTester(CONFIG_PATH) - output_1 = op.run(**{"input_item": test_data_vibe_1}) - output_2 = op.run(**{"input_item": test_data_vibe_2}) - - products: List[GLADProduct] = [] - for output in [output_1, output_2]: - assert output - assert "glad_products" in output - assert isinstance(output["glad_products"], list) - assert len(output["glad_products"]) > 0 - - products.append(cast(GLADProduct, output["glad_products"][0])) - - assert products[0].id == products[1].id - assert products[0].time_range == products[1].time_range - assert products[0].geometry == products[1].geometry - assert products[0].assets == products[1].assets - assert products[0].url == products[1].url diff --git a/ops/list_gnatsgo_products/list_gnatsgo_products.py b/ops/list_gnatsgo_products/list_gnatsgo_products.py deleted file mode 100644 index ff40cdb5..00000000 --- a/ops/list_gnatsgo_products/list_gnatsgo_products.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict, List - -from pystac import Item - -from vibe_core.data import DataVibe, GNATSGOProduct -from vibe_lib.planetary_computer import GNATSGOCollection - - -def convert_product(item: Item) -> GNATSGOProduct: - assert item.geometry is not None, "Input item has no geometry" - assert item.datetime is not None, "Input item has no datetime" - - output = GNATSGOProduct( - id=item.id, - time_range=(item.datetime, item.datetime), - geometry=item.geometry, - assets=[], - ) - return output - - -def callback_builder(): - def callback(input_item: DataVibe) -> Dict[str, List[GNATSGOProduct]]: - collection = GNATSGOCollection() - items = collection.query(roi=input_item.bbox) - products = [convert_product(item) for item in items] - if not products: - raise RuntimeError( - f"No product found for geometry {input_item.geometry}. " - f"Please, make sure the geometry is within Continental USA" - ) - return {"gnatsgo_products": products} - - return callback diff --git a/ops/list_gnatsgo_products/list_gnatsgo_products.yaml b/ops/list_gnatsgo_products/list_gnatsgo_products.yaml deleted file mode 100644 index a0231a99..00000000 --- a/ops/list_gnatsgo_products/list_gnatsgo_products.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: list_gnatsgo_products -inputs: - input_item: DataVibe -output: - gnatsgo_products: List[GNATSGOProduct] -parameters: -entrypoint: - file: list_gnatsgo_products.py - callback_builder: callback_builder -description: - short_description: - Lists gNATSGO products from Planetary Computer that intersect with input geometry. diff --git a/ops/list_gnatsgo_products/test_list_gnatsgo_products.py b/ops/list_gnatsgo_products/test_list_gnatsgo_products.py deleted file mode 100644 index e5ff53eb..00000000 --- a/ops/list_gnatsgo_products/test_list_gnatsgo_products.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import List, cast -from unittest.mock import MagicMock, patch - -import pytest -from pystac import Asset, Item -from shapely.geometry import Point, mapping - -from vibe_core.data import DataVibe, GNATSGOProduct -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import GNATSGOCollection - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_gnatsgo_products.yaml") - -VALID_GEOMETRY = Point(-92.99900, 42.03580).buffer(0.1, cap_style=3) -INVALID_GEOMETRY = Point(-47.06966, -22.81709).buffer(0.1, cap_style=3) -FAKE_DATE = datetime(year=2020, month=7, day=1, tzinfo=timezone.utc) - - -def fake_items(): - assets = {f"{var}": Asset(href=f"fake_href_{var}") for var in GNATSGOCollection.asset_keys} - return [ - Item( - id="fake_id", # type: ignore - geometry=mapping(VALID_GEOMETRY), - bbox=VALID_GEOMETRY.bounds, # type: ignore - datetime=FAKE_DATE, - properties={}, - assets=assets, - ) - ] - - -@patch("vibe_lib.planetary_computer.get_available_collections", return_value=["gnatsgo-rasters"]) -@patch.object(GNATSGOCollection, "query") -def test_op(query: MagicMock, _: MagicMock): - query.return_value = fake_items() - - input_item = DataVibe("input_item", (FAKE_DATE, FAKE_DATE), VALID_GEOMETRY, []) # type: ignore - - op_tester = OpTester(CONFIG_PATH) - out = op_tester.run(input_item=input_item) - - assert query.call_args.kwargs["roi"] == VALID_GEOMETRY.bounds - - assert "gnatsgo_products" in out - products = cast(List[GNATSGOProduct], out["gnatsgo_products"]) - assert isinstance(products, list) - assert len(products) == 1 - - -@patch("vibe_lib.planetary_computer.get_available_collections", return_value=["gnatsgo-rasters"]) -@patch.object(GNATSGOCollection, "query") -def test_op_fails_invalid_geometry(query: MagicMock, _: MagicMock): - query.return_value = [] - input_item = DataVibe("input_item", (FAKE_DATE, FAKE_DATE), mapping(INVALID_GEOMETRY), []) - - op_tester = OpTester(CONFIG_PATH) - with pytest.raises(RuntimeError): - op_tester.run(input_item=input_item) diff --git a/ops/list_hansen_products/list_hansen_products.py b/ops/list_hansen_products/list_hansen_products.py deleted file mode 100644 index e42fa62d..00000000 --- a/ops/list_hansen_products/list_hansen_products.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from datetime import datetime -from typing import Dict, List, cast -from urllib.parse import urljoin - -import geopandas as gpd - -from vibe_core.data import DataVibe, HansenProduct -from vibe_core.file_downloader import verify_url -from vibe_lib import glad - -DATASET_START_YEAR = 2000 - - -class CallbackBuilder: - def __init__( - self, - layer_name: str, - tile_geometry: str, - tiles_folder_url: str, - ): - self.layer_name = layer_name - self.tiles_gdf: gpd.GeoDataFrame = cast(gpd.GeoDataFrame, gpd.read_file(tile_geometry)) - # Base urls are expected to be in the format: - # 'https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/' - self.tiles_folder_url = tiles_folder_url - - # Make sure folder url ends with a slash - self.tiles_folder_url = ( - self.tiles_folder_url - if self.tiles_folder_url.endswith("/") - else f"{self.tiles_folder_url}/" - ) - - self.final_year = HansenProduct.extract_last_year(self.tiles_folder_url) - self.version = HansenProduct.extract_version(self.tiles_folder_url) - - # Create an asset template for the products, this will be used to check if the tif files are - # compatible to 'https://storage.googleapis.com/.../Hansen_GFC-2022-v1.10_50N_000E.tif' - template = f"Hansen_GFC-{self.final_year}-{self.version}_{{asset_key}}_{{tile_name}}.tif" - self.asset_template = urljoin(self.tiles_folder_url, template) - - def is_product_available(self, layer_name: str, tile_name: str) -> bool: - return verify_url(self.asset_template.format(asset_key=layer_name, tile_name=tile_name)) - - def validate_time_range(self, input_item: DataVibe): - start_year = input_item.time_range[0].year - if start_year != DATASET_START_YEAR: - raise ValueError( - f"Start year must be {DATASET_START_YEAR} for Hansen dataset " - f"version {self.version}-{self.final_year}, received {start_year}" - ) - - end_year = input_item.time_range[1].year - if end_year > self.final_year: - raise ValueError( - f"End year must be <= {self.final_year} for Hansen dataset " - f"version {self.version}-{self.final_year}, received {end_year}" - ) - - def __call__(self): - def list_hansen_products(input_item: DataVibe) -> Dict[str, List[HansenProduct]]: - self.validate_time_range(input_item) - geom_tiles = glad.intersecting_tiles(self.tiles_gdf, input_item.geometry) - - first_year = input_item.time_range[0].year - last_year = input_item.time_range[1].year - - out_hansen_products = [ - HansenProduct.clone_from( - input_item, - id=hashlib.sha256( - ( - f"hansen-product-{self.layer_name}-{tile_name}" - f"{first_year}-{last_year}-{self.version}" - ).encode() - ).hexdigest(), - assets=[], - time_range=(datetime(first_year, 1, 1), datetime(last_year, 12, 31)), - geometry=glad.get_tile_geometry(self.tiles_gdf, tile_name), - asset_url=self.asset_template.format( - asset_key=self.layer_name, tile_name=tile_name - ), - ) - for tile_name in geom_tiles - if self.is_product_available(self.layer_name, tile_name) - ] - - if len(out_hansen_products) == 0: - raise RuntimeError( - f"No Hansen products found for time range {input_item.time_range}" - f" and geometry {input_item.geometry}" - ) - - return {"hansen_products": out_hansen_products} - - return list_hansen_products diff --git a/ops/list_hansen_products/list_hansen_products.yaml b/ops/list_hansen_products/list_hansen_products.yaml deleted file mode 100644 index 7d6ca9b6..00000000 --- a/ops/list_hansen_products/list_hansen_products.yaml +++ /dev/null @@ -1,40 +0,0 @@ -name: list_hansen_products -inputs: - input_item: DataVibe -output: - hansen_products: List[HansenProduct] -parameters: - layer_name: - tile_geometry: /opt/terravibes/ops/resources/glad_tile_geometry/10d_tiles.geojson - tiles_folder_url: https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/ -entrypoint: - file: list_hansen_products.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - layer_name -description: - short_description: Lists Global Forest Change (Hansen) products that intersect the user-provided geometry/time range. - long_description: - The op will list Global Forest Change (Hansen) products that intersect the - user-provided geometry/time range. The dataset is available at 30m - resolution and is updated annually. The data contains information on forest - cover, loss, and gain. Full dataset details can be found at - https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html. - sources: - input_item: User-provided geometry and time range. - sinks: - hansen_products: List of Global Forest Change (Hansen) products that intersect the user-provided geometry/time range. - parameters: - tiles_folder_url: - URL to the Global Forest Change (Hansen) dataset. It specifies the dataset - version and is used to download the data. The default value is - https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/ - with the dataset version GFC-2022-v1.10. - layer_name: - Name of the Global Forest Change (Hansen) layer. Can be any of the following names - 'treecover2000', 'loss', 'gain', 'lossyear', 'datamask', 'first', 'last'. - tile_geometry: - Path to the GeoJSON file containing the tile geometries. It is used to filter - the tiles that intersect the user-provided geometry/time range. As the Hansen dataset - uses the same tiling system as the GLAD dataset, the default value is the GLAD tile geometry. \ No newline at end of file diff --git a/ops/list_hansen_products/test_hansen_list.py b/ops/list_hansen_products/test_hansen_list.py deleted file mode 100644 index c088135c..00000000 --- a/ops/list_hansen_products/test_hansen_list.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import itertools -import os -from datetime import datetime -from typing import List -from unittest.mock import Mock, patch - -import pytest - -from vibe_core import file_downloader -from vibe_core.data import DataVibe -from vibe_core.data.products import HansenProduct -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_hansen_products.yaml") -DEFAULT_DATASET_FINAL_YEAR = 2022 -DEFAULT_DATASET_FOLDER = "https://storage.googleapis.com/earthenginepartners-hansen/" -DEFAULT_DATASET_VERSION = "v1.10" - -EXPECTED_TILES = { - "northwest": ["50N_110W", "50N_120W", "60N_110W", "60N_120W"], - "northeast": ["50N_060E", "50N_070E", "60N_060E", "60N_070E"], - "southwest": ["10S_060W", "10S_070W", "20S_060W", "20S_070W"], - "southeast": ["10S_010E", "00N_010E", "10S_020E", "00N_020E"], -} - -MOCK_TILES = set([tile_name for tile_list in EXPECTED_TILES.values() for tile_name in tile_list]) - - -def create_fake_datavibe(coordinates: List[List[float]]) -> DataVibe: - return DataVibe( - id=str("test_id"), - time_range=(datetime(2000, 1, 1), datetime(2022, 1, 1)), - geometry={ - "type": "Polygon", - "coordinates": [coordinates], - }, - assets=[], - ) - - -MOCK_INPUT_DICT = { - "northwest": create_fake_datavibe( - [ - [-115.0, 55.0], - [-105.0, 55.0], - [-105.0, 45.0], - [-115.0, 45.0], - ] - ), - "northeast": create_fake_datavibe( - [ - [75.0, 55.0], - [65.0, 55.0], - [65.0, 45.0], - [75.0, 45.0], - ] - ), - "southwest": create_fake_datavibe( - [ - [-65.0, -15.0], - [-55.0, -15.0], - [-55.0, -25.0], - [-65.0, -25.0], - ] - ), - "southeast": create_fake_datavibe( - [ - [15.0, -5.0], - [25.0, -5.0], - [25.0, -15.0], - [15.0, -15.0], - ] - ), -} - - -@patch.object(file_downloader, "verify_url") -@pytest.mark.parametrize( - "test_datavibe, expected_tiles, layer_name", - [ - (MOCK_INPUT_DICT[location], EXPECTED_TILES[location], asset_key) - for location, asset_key in itertools.product( - ["northwest", "northeast", "southwest", "southeast"], HansenProduct.asset_keys - ) - ], -) -def test_hansen_list( - verify: Mock, test_datavibe: DataVibe, expected_tiles: List[str], layer_name: str -): - # URLs are of the form: - # https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/Hansen_GFC-2022-v1.10_treecover2000_20N_090W.tif - def mock_verify(url: str): - return ( - url[-12:-4] in MOCK_TILES - and int(url.split("/")[-2].split("-")[1]) == DEFAULT_DATASET_FINAL_YEAR - ) - - verify.side_effect = mock_verify - op = OpTester(CONFIG_PATH) - op.update_parameters({"layer_name": layer_name}) - - output_data = op.run(input_item=test_datavibe) - assert output_data - assert "hansen_products" in output_data - - tiles = set([product.tile_name for product in output_data["hansen_products"]]) # type: ignore - assert all( - [ - product.layer_name == layer_name - for product in output_data["hansen_products"] # type: ignore - ] - ) - assert tiles == set(expected_tiles), f"Expected {expected_tiles}, got {tiles}" - assert all( - [ - product.last_year == DEFAULT_DATASET_FINAL_YEAR - for product in output_data["hansen_products"] # type: ignore - ] - ) - assert all( - [ - product.version == DEFAULT_DATASET_VERSION - for product in output_data["hansen_products"] # type: ignore - ] - ) - - for product in output_data["hansen_products"]: # type: ignore - expected_url = ( - f"{DEFAULT_DATASET_FOLDER}Hansen_GFC-2022-v1.10_{layer_name}_{product.tile_name}.tif" - ) - assert set(product.asset_url) == set(expected_url) - - -def test_hansen_invalid_years(): - op = OpTester(CONFIG_PATH) - test_datavibe = MOCK_INPUT_DICT["northwest"] - test_datavibe.time_range = (datetime(1999, 1, 1), datetime(2022, 1, 1)) - with pytest.raises(ValueError): - op.run(input_item=test_datavibe) - - test_datavibe.time_range = (datetime(2000, 1, 1), datetime(2023, 1, 1)) - with pytest.raises(ValueError): - op.run(input_item=test_datavibe) diff --git a/ops/list_herbie/list_herbie.py b/ops/list_herbie/list_herbie.py deleted file mode 100644 index b05f84e6..00000000 --- a/ops/list_herbie/list_herbie.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from datetime import datetime -from typing import Dict, List, Optional - -import numpy as np -import pandas as pd -from herbie import Herbie_latest - -from vibe_core.data import DataVibe, HerbieProduct - -N = 6 # latest file within the last N*frequecy hours - - -class CallbackBuilder: - def __init__( - self, - model: str, - product: str, - frequency: int, - search_text: str, - forecast_lead_times: Optional[List[int]] = None, - forecast_start_date: Optional[str] = None, - ): - if forecast_lead_times is not None and forecast_start_date is not None: - raise ValueError( - "You cannot specify 'forecast_lead_times' and" - " 'forecast_start_date' at the same time." - ) - self.model = model - self.product = product - self.frequency = frequency - self.forecast_lead_times = forecast_lead_times - self.search_text = search_text - self.forecast_start_date = forecast_start_date - - def _get_list(self, input_item: DataVibe): - start = input_item.time_range[0].replace(tzinfo=None) - end = input_item.time_range[1].replace(tzinfo=None) - if self.forecast_lead_times is None: - if self.forecast_start_date is None: - H = Herbie_latest(n=N, freq=f"{self.frequency}H", model=self.model) - latest = H.date.to_pydatetime() - else: - latest = datetime.strptime(self.forecast_start_date, "%Y-%m-%d %H:%M") - if end > latest or self.forecast_start_date is not None: - plist = [(t, 0) for t in pd.date_range(start, latest, freq=f"{self.frequency}H")] - r = len(pd.date_range(start, end, freq=f"{self.frequency}H")) - last = plist[-1][0] - plist += [ - (last, int(lead)) - for lead in (np.arange(1, r - len(plist) + 1) * self.frequency) - ] - else: - plist = [(t, 0) for t in pd.date_range(start, end, freq=f"{self.frequency}H")] - else: - plist = [ - (t, lead) - for t in pd.date_range(start, end, freq=f"{self.frequency}H") - for lead in range( - self.forecast_lead_times[0], - self.forecast_lead_times[1], - self.forecast_lead_times[2], - ) - ] - - return plist - - def __call__(self): - def list_herbie( - input_item: DataVibe, - ) -> Dict[str, List[HerbieProduct]]: - plist = self._get_list(input_item) - - products = [ - HerbieProduct.clone_from( - input_item, - hashlib.sha256( - ( - f"{self.model}-{self.product}-" - f"{lead}-{self.search_text}-" - f"{str(input_item.geometry)}-{str(t)}" - ).encode() - ).hexdigest(), - assets=[], - time_range=( - t.tz_localize(input_item.time_range[0].tzinfo), - t.tz_localize(input_item.time_range[0].tzinfo), - ), - model=self.model, - product=self.product, - lead_time_hours=lead, - search_text=self.search_text, - ) - for t, lead in plist - ] - return {"product": products} - - return list_herbie diff --git a/ops/list_herbie/list_herbie.yaml b/ops/list_herbie/list_herbie.yaml deleted file mode 100644 index c94a96a2..00000000 --- a/ops/list_herbie/list_herbie.yaml +++ /dev/null @@ -1,52 +0,0 @@ -name: list_herbie -inputs: - input_item: DataVibe -output: - product: List[HerbieProduct] -parameters: - model: "hrrr" - product: "prs" - frequency: 1 - forecast_lead_times: - forecast_start_date: - search_text: "TMP:2 m" -entrypoint: - callback_builder: CallbackBuilder - file: list_herbie.py -dependencies: - parameters: - - model - - product - - frequency - - forecast_lead_times - - search_text -description: - short_description: - Lists herbie products. - parameters: - model: - Model name as defined in the models template folder. CASE INSENSITIVE Below are examples of model types - 'hrrr' HRRR contiguous United States model - 'hrrrak' HRRR Alaska model (alias 'alaska') - 'rap' RAP model - 'gfs' Global Forecast System (atmosphere) - 'gfs_wave' Global Forecast System (wave) - 'rrfs' Rapid Refresh Forecast System prototype - for more information see https://herbie.readthedocs.io/en/latest/user_guide/model_info.html - product: - Output variable product file type (sfc (surface fields), prs (pressure fields), nat (native fields), - subh (subhourly fields)). Not specifying this will use the first product in model template file. - frequency: frequency in hours of the forecast - forecast_lead_times: - Forecast lead time in the format [start_time, end_time, increment] (in hours). If this parameter is - None, then this op lists analysis (zero lead time) up to the latest analysis available, and from - that point it lists forecasts with progressively increasing lead times. - forecast_start_date: - latest datetime (in the format "%Y-%m-%d %H:%M") for which analysis (zero lead time) are listed. - After this datetime, this op lists forecasts with progressively increasing lead times. This parameter - must be set to None if 'forecast_lead_times' is used. - search_text: - It's a regular expression used to search on GRIB2 Index files and allow you to download just the layer - of the file required instead of complete file. - For more information on search_text refer to below url. - https://blaylockbk.github.io/Herbie/_build/html/user_guide/searchString.html \ No newline at end of file diff --git a/ops/list_landsat_products_pc/list_landsat_pc.py b/ops/list_landsat_products_pc/list_landsat_pc.py deleted file mode 100644 index a00c0568..00000000 --- a/ops/list_landsat_products_pc/list_landsat_pc.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Dict, List - -from dateutil.parser import isoparse - -from vibe_core.data import DataVibe, LandsatProduct -from vibe_lib.planetary_computer import LandsatCollection - - -def convert_product(item: Dict[str, Any]) -> LandsatProduct: - date = isoparse(item["properties"]["datetime"]) - output = LandsatProduct( - id=str(item["id"]), - time_range=(date, date), - geometry=item["geometry"], - assets=[], - tile_id=str(item["id"]), - ) - - return output - - -def callback_builder(): - def list_landsat_products( - input_item: DataVibe, - ) -> Dict[str, List[LandsatProduct]]: - collection = LandsatCollection() - items = collection.query(roi=input_item.bbox, time_range=input_item.time_range) - - products = [convert_product(item.to_dict()) for item in items] - - if not products: - raise RuntimeError( - f"No product found for time range {input_item.time_range} " - f"and geometry {input_item.geometry}" - ) - return {"landsat_products": products} - - return list_landsat_products diff --git a/ops/list_landsat_products_pc/list_landsat_products_pc.yaml b/ops/list_landsat_products_pc/list_landsat_products_pc.yaml deleted file mode 100644 index f441f7ac..00000000 --- a/ops/list_landsat_products_pc/list_landsat_products_pc.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: list_landsat_products_pc -inputs: - input_item: DataVibe -output: - landsat_products: List[LandsatProduct] -parameters: -entrypoint: - file: list_landsat_pc.py - callback_builder: callback_builder -description: - short_description: Lists LANDSAT tiles that intersect with the input geometry and time range. \ No newline at end of file diff --git a/ops/list_modis_sr/list_modis_sr.py b/ops/list_modis_sr/list_modis_sr.py deleted file mode 100644 index 299aad09..00000000 --- a/ops/list_modis_sr/list_modis_sr.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict, List - -from dateutil.parser import parse -from pystac import Item -from shapely import geometry as shpg - -from vibe_core.data import DataVibe, ModisProduct -from vibe_lib.planetary_computer import Modis8DaySRCollection - - -def convert_product(item: Item, resolution: int) -> ModisProduct: - time_range = tuple(parse(item.properties[k]) for k in ("start_datetime", "end_datetime")) - assert item.geometry is not None, f"Item {item.id} is missing geometry field" - return ModisProduct( - id=item.id, geometry=item.geometry, time_range=time_range, assets=[], resolution=resolution - ) - - -def callback_builder(resolution: int): - available_res = Modis8DaySRCollection.collections.keys() - if resolution not in available_res: - raise ValueError(f"Valid resolutions are {available_res}, got {resolution}.") - - def callback(input_data: List[DataVibe]) -> Dict[str, List[ModisProduct]]: - collection = Modis8DaySRCollection(resolution) - items: Dict[str, Item] = {} - for input_datum in input_data: - input_geom = shpg.shape(input_datum.geometry) - datum_items = collection.query(geometry=input_geom, time_range=input_datum.time_range) - for i in datum_items: - items[i.id] = i - return {"modis_products": [convert_product(i, resolution) for i in items.values()]} - - return callback diff --git a/ops/list_modis_sr/list_modis_sr.yaml b/ops/list_modis_sr/list_modis_sr.yaml deleted file mode 100644 index fa1b7956..00000000 --- a/ops/list_modis_sr/list_modis_sr.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: list_modis_sr -inputs: - input_data: List[DataVibe] -output: - modis_products: List[ModisProduct] -parameters: - resolution: 250 -dependencies: - parameters: - - resolution -entrypoint: - file: list_modis_sr.py - callback_builder: callback_builder -description: - short_description: - Lists MODIS 8-day surface reflectance rasters intersecting - with the input geometry and time range for desired resolution. \ No newline at end of file diff --git a/ops/list_modis_vegetation/list_modis_vegetation.py b/ops/list_modis_vegetation/list_modis_vegetation.py deleted file mode 100644 index 6c504f95..00000000 --- a/ops/list_modis_vegetation/list_modis_vegetation.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict, List - -from dateutil.parser import parse -from pystac import Item -from shapely import geometry as shpg - -from vibe_core.data import DataVibe, ModisProduct -from vibe_lib.planetary_computer import Modis16DayVICollection - - -def convert_product(item: Item, resolution: int) -> ModisProduct: - time_range = tuple(parse(item.properties[k]) for k in ("start_datetime", "end_datetime")) - assert item.geometry is not None, f"Item {item.id} is missing geometry field" - return ModisProduct( - id=item.id, geometry=item.geometry, time_range=time_range, assets=[], resolution=resolution - ) - - -def callback_builder(resolution: int): - available_res = Modis16DayVICollection.collections.keys() - if resolution not in available_res: - raise ValueError(f"Valid resolutions are {available_res}, got {resolution}.") - - def callback(input_data: List[DataVibe]) -> Dict[str, List[ModisProduct]]: - collection = Modis16DayVICollection(resolution) - items: Dict[str, Item] = {} - for input_datum in input_data: - input_geom = shpg.shape(input_datum.geometry) - datum_items = collection.query(geometry=input_geom, time_range=input_datum.time_range) - for i in datum_items: - items[i.id] = i - return {"modis_products": [convert_product(i, resolution) for i in items.values()]} - - return callback diff --git a/ops/list_modis_vegetation/list_modis_vegetation.yaml b/ops/list_modis_vegetation/list_modis_vegetation.yaml deleted file mode 100644 index d10af781..00000000 --- a/ops/list_modis_vegetation/list_modis_vegetation.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: list_modis_vegetation -inputs: - input_data: List[DataVibe] -output: - modis_products: List[ModisProduct] -parameters: - resolution: 250 -dependencies: - parameters: - - resolution -entrypoint: - file: list_modis_vegetation.py - callback_builder: callback_builder -description: - short_description: Lists MODIS vegetation products for input geometry, time range and resolution. \ No newline at end of file diff --git a/ops/list_modis_vegetation/test_list_modis_vegetation.py b/ops/list_modis_vegetation/test_list_modis_vegetation.py deleted file mode 100644 index 1062023a..00000000 --- a/ops/list_modis_vegetation/test_list_modis_vegetation.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from unittest.mock import MagicMock, patch - -import pytest -from pystac import Item -from shapely import geometry as shpg - -from vibe_core.data import DataVibe -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import Modis16DayVICollection - -HERE = os.path.dirname(os.path.abspath(__file__)) - -FAKE_TIME_RANGE = (datetime(2020, 11, 1), datetime(2020, 11, 2)) -FAKE_GEOM = shpg.mapping(shpg.box(0, 0, 2, 2)) -INVALID_RESOLUTION = 100 - - -def fake_items(resolution: int): - return [ - Item( - id=f"{resolution}m-id", # type: ignore - geometry=FAKE_GEOM, - bbox=None, - datetime=None, - properties={ - "start_datetime": FAKE_TIME_RANGE[0].isoformat() + "Z", - "end_datetime": FAKE_TIME_RANGE[1].isoformat() + "Z", - }, - ) - ] - - -@pytest.mark.parametrize("resolution", (250, 500)) -@patch("vibe_lib.planetary_computer.get_available_collections") -@patch.object(Modis16DayVICollection, "query") -def test_op(query: MagicMock, get_collections: MagicMock, resolution: int): - query.return_value = fake_items(resolution) - get_collections.return_value = list(Modis16DayVICollection.collections.values()) - - geom1 = shpg.Point(1, 1).buffer(0.1, cap_style=3) - geom2 = shpg.Point(2, 2).buffer(0.1, cap_style=3) - time_range = (datetime(2022, 11, 1), datetime(2022, 11, 16)) - x1 = DataVibe(id="1", time_range=time_range, geometry=shpg.mapping(geom1), assets=[]) - x2 = DataVibe(id="2", time_range=time_range, geometry=shpg.mapping(geom2), assets=[]) - op_tester = OpTester(os.path.join(HERE, "list_modis_vegetation.yaml")) - op_tester.update_parameters({"resolution": resolution}) - o1 = op_tester.run(input_data=[x1]) - query.assert_called_with(geometry=geom1, time_range=x1.time_range) - get_collections.assert_called_once() - o2 = op_tester.run(input_data=[x2]) - query.assert_called_with(geometry=geom2, time_range=x2.time_range) - assert get_collections.call_count == 2 - o3 = op_tester.run(input_data=[x1, x2]) - assert get_collections.call_count == 3 - assert query.call_count == 4 - products = o1["modis_products"] - assert isinstance(products, list) - assert len(products) == 1 - product = products[0] - assert isinstance(product, DataVibe) - assert product.id == f"{resolution}m-id" - assert product.time_range == tuple(t.astimezone() for t in FAKE_TIME_RANGE) - assert product.geometry == FAKE_GEOM - assert o1 == o2 == o3 - - -def test_op_fails_invalid_res(): - op_tester = OpTester(os.path.join(HERE, "list_modis_vegetation.yaml")) - op_tester.update_parameters({"resolution": INVALID_RESOLUTION}) - with pytest.raises(ValueError): - op_tester.run(input_data=[]) diff --git a/ops/list_naip_products/list_naip_products.py b/ops/list_naip_products/list_naip_products.py deleted file mode 100644 index 5125d7a9..00000000 --- a/ops/list_naip_products/list_naip_products.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# This operator receives a region and a date range and obtains the respective -# NAIP items, returning a list of NaipProduct. -from typing import Any, Dict, List, Tuple, cast - -from dateutil.parser import isoparse -from shapely.geometry import shape - -from vibe_core.data import DataVibe, NaipProduct -from vibe_lib.planetary_computer import NaipCollection - - -def convert_product(item: Dict[str, Any]) -> NaipProduct: - date = isoparse(item["properties"]["datetime"]) - output = NaipProduct( - id=str(item["id"]), - time_range=(date, date), - geometry=item["geometry"], - assets=[], - tile_id=str(item["id"]), - resolution=float(item["properties"]["gsd"]), - year=int(item["properties"]["naip:year"]), - ) - - return output - - -def list_naip_products(input_item: DataVibe) -> Dict[str, List[NaipProduct]]: - collection = NaipCollection() - input_geometry = shape(input_item.geometry) - time_range = input_item.time_range - bbox = cast(Tuple[Any, Any, Any, Any], input_geometry.bounds) - items = collection.query(roi=bbox, time_range=time_range) - products = [convert_product(item.to_dict()) for item in items] - - if not products: - raise RuntimeError( - f"No product found for time range {input_item.time_range} " - f"and geometry {input_item.geometry}" - ) - - return {"naip_products": products} - - -def callback_builder(): - return list_naip_products diff --git a/ops/list_naip_products/list_naip_products.yaml b/ops/list_naip_products/list_naip_products.yaml deleted file mode 100644 index 876f168d..00000000 --- a/ops/list_naip_products/list_naip_products.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: list_naip_products -inputs: - input_item: DataVibe -output: - naip_products: List[NaipProduct] -parameters: -entrypoint: - file: list_naip_products.py - callback_builder: callback_builder -description: - short_description: Lists Naip tiles that intersect with input geometry and time range. \ No newline at end of file diff --git a/ops/list_naip_products/test_list_naip_products.py b/ops/list_naip_products/test_list_naip_products.py deleted file mode 100644 index 3f8764b5..00000000 --- a/ops/list_naip_products/test_list_naip_products.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import List, cast - -from shapely.geometry import Polygon, box, mapping - -from vibe_core.data import DataVibe, DemProduct -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_naip_products.yaml") - - -def test_op(): - latitude = 42.21422 - longitude = -93.22890 - buffer = 0.001 - bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] - polygon: Polygon = box(*bbox, ccw=True) - start_date = datetime(year=2018, month=2, day=1, tzinfo=timezone.utc) - end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) - input_item = DataVibe("input_item", (start_date, end_date), mapping(polygon), []) - - output_data = OpTester(CONFIG_PATH).run(input_item=input_item) - - # Get op result - output_name = "naip_products" - assert output_name in output_data - output_product = output_data[output_name] - assert isinstance(output_product, list) - assert len(cast(List[DemProduct], output_data["naip_products"])) == 1 diff --git a/ops/list_sentinel1_products/list_sentinel1_products_pc.py b/ops/list_sentinel1_products/list_sentinel1_products_pc.py deleted file mode 100644 index 70377f96..00000000 --- a/ops/list_sentinel1_products/list_sentinel1_products_pc.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from typing import Dict, List - -import planetary_computer as pc -from shapely import geometry as shpg - -from vibe_core.data import DataVibe, Sentinel1Product -from vibe_lib.planetary_computer import ( - Sentinel1GRDCollection, - Sentinel1RTCCollection, - convert_to_s1_product, -) - -LOGGER = logging.getLogger(__name__) -COLLECTIONS = {"grd": Sentinel1GRDCollection, "rtc": Sentinel1RTCCollection} - - -def callback_builder(pc_key: str, collection: str): - collection = collection.lower() - if collection not in COLLECTIONS: - col_names = ", ".join(f"'{c}'" for c in COLLECTIONS) - raise ValueError( - f"Invalid Sentinel-1 collection '{collection}', expected one of {col_names}" - ) - - def list_sentinel1_products(input_item: DataVibe) -> Dict[str, List[Sentinel1Product]]: - pc.set_subscription_key(pc_key) - - input_range = input_item.time_range - input_geom = shpg.shape(input_item.geometry) - - col = COLLECTIONS[collection]() - items = col.query(geometry=input_geom, time_range=input_range) - LOGGER.debug(f"Planetary Computer query returned {len(items)} STAC items") - products = [convert_to_s1_product(item) for item in items] - if not products: - raise RuntimeError( - f"No product found for time range {input_range} and " - f"and geometry {input_item.geometry}" - ) - return {"sentinel_products": products} - - return list_sentinel1_products diff --git a/ops/list_sentinel1_products/list_sentinel1_products_pc.yaml b/ops/list_sentinel1_products/list_sentinel1_products_pc.yaml deleted file mode 100644 index d1c3137c..00000000 --- a/ops/list_sentinel1_products/list_sentinel1_products_pc.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# List sentinel 1 products from the Planetary Computer. -name: list_sentinel_1_products_pc -inputs: - input_item: DataVibe -output: - sentinel_products: List[Sentinel1Product] -parameters: - pc_key: - collection: RTC -entrypoint: - file: list_sentinel1_products_pc.py - callback_builder: callback_builder -description: - short_description: List Sentinel-1 GRD or RTC products given geometry and time range. - long_description: - The op will query the Planetary Computer for all products that intersect with the input geometry - and are in the input time range. - inputs: - input_item: Input geometry and time range. - output: - sentinel_products: Sentinel-1 products available. - parameters: - pc_key: Planetary Computer API key. -dependencies: - parameters: - - collection diff --git a/ops/list_sentinel1_products/sample_pc_output.json b/ops/list_sentinel1_products/sample_pc_output.json deleted file mode 100644 index 94392656..00000000 --- a/ops/list_sentinel1_products/sample_pc_output.json +++ /dev/null @@ -1 +0,0 @@ -[{"type": "Feature", "stac_version": "1.0.0", "id": "S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD", "properties": {"datetime": "2020-05-08T14:13:07.609978Z", "platform": "SENTINEL-1B", "s1:shape": [25505, 20178], "end_datetime": "2020-05-08 14:13:22.734955+00:00", "constellation": "Sentinel-1", "s1:resolution": "high", "s1:datatake_id": "167133", "start_datetime": "2020-05-08 14:12:52.485002+00:00", "s1:orbit_source": "RESORB", "s1:slice_number": "7", "s1:total_slices": "7", "sar:looks_range": 5, "sat:orbit_state": "descending", "sar:product_type": "GRD", "sar:looks_azimuth": 1, "sar:polarizations": ["VV", "VH"], "sar:frequency_band": "C", "sat:absolute_orbit": 21491, "sat:relative_orbit": 115, "s1:processing_level": "1", "sar:instrument_mode": "IW", "sar:center_frequency": 5.405, "sar:resolution_range": 20, "s1:product_timeliness": "Fast-24h", "sar:resolution_azimuth": 22, "sar:pixel_spacing_range": 10, "sar:observation_direction": "right", "sar:pixel_spacing_azimuth": 10, "sar:looks_equivalent_number": 4.4, "s1:instrument_configuration_ID": "1", "sat:platform_international_designator": "2016-025A"}, "geometry": {"type": "Polygon", "coordinates": [[[-119.138582, 47.4179232], [-119.6290381, 47.4820619], [-119.9631034, 47.5244576], [-120.4712379, 47.586948], [-120.8119799, 47.6275103], [-121.3015743, 47.6839415], [-121.6379534, 47.721425], [-121.9638494, 47.7567655], [-122.4156312, 47.8041106], [-122.4564718, 47.624352], [-122.4992538, 47.4447036], [-122.538563, 47.2646975], [-122.6222922, 46.9052187], [-122.6659226, 46.7256663], [-122.7032974, 46.5454623], [-122.7460242, 46.3658217], [-122.7881959, 46.1861214], [-122.8040405, 46.1164173], [-122.3681793, 46.0690417], [-122.0458206, 46.0328638], [-121.5577475, 45.9762723], [-121.2321514, 45.9372979], [-120.7495731, 45.8777187], [-120.4216433, 45.8360035], [-120.0941551, 45.7933433], [-119.6196888, 45.7297424], [-119.5988331, 45.7992956], [-119.5475278, 45.9790858], [-119.5025768, 46.159743], [-119.4464383, 46.3389037], [-119.3933146, 46.5184848], [-119.3443344, 46.6986317], [-119.2915808, 46.8782849], [-119.2417411, 47.0583384], [-119.138582, 47.4179232]]]}, "links": [{"rel": "collection", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "parent", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "root", "href": "https://planetarycomputer.microsoft.com/api/stac/v1", "type": "application/json", "title": "Microsoft Planetary Computer STAC API"}, {"rel": "self", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD", "type": "application/geo+json"}, {"rel": "license", "href": "https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice"}, {"rel": "preview", "href": "https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD", "type": "text/html", "title": "Map of item"}], "assets": {"vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/measurement/iw-vh.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VH: vertical transmit, horizontal receive", "description": "Amplitude of signal transmitted with vertical polarization and received with horizontal polarization with radiometric terrain correction applied.", "roles": ["data"]}, "vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/measurement/iw-vv.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VV: vertical transmit, vertical receive", "description": "Amplitude of signal transmitted with vertical polarization and received with vertical polarization with radiometric terrain correction applied.", "roles": ["data"]}, "thumbnail": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/preview/quick-look.png", "type": "image/png", "title": "Preview Image", "description": "An averaged, decimated preview image in PNG format. Single polarisation products are represented with a grey scale image. Dual polarisation products are represented by a single composite colour image in RGB with the red channel (R) representing the co-polarisation VV or HH), the green channel (G) represents the cross-polarisation (VH or HV) and the blue channel (B) represents the ratio of the cross an co-polarisations.", "roles": ["thumbnail"]}, "safe-manifest": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/manifest.safe", "type": "application/xml", "title": "Manifest File", "description": "General product metadata in XML format. Contains a high-level textual description of the product and references to all of product's components, the product metadata, including the product identification and the resource references, and references to the physical location of each component file contained in the product.", "roles": ["metadata"]}, "schema-noise-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/calibration/noise-iw-vh.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-noise-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/calibration/noise-iw-vv.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-product-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/iw-vh.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-product-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/iw-vv.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-calibration-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/calibration/calibration-iw-vh.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "schema-calibration-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/calibration/calibration-iw-vv.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "tilejson": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "application/json", "title": "TileJSON with default rendering", "roles": ["tiles"]}, "rendered_preview": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "image/png", "title": "Rendered preview", "rel": "preview", "roles": ["overview"]}}, "bbox": [-122.80404053, 45.72974239, -119.13858201, 47.80411064], "stac_extensions": ["https://stac-extensions.github.io/sar/v1.0.0/schema.json", "https://stac-extensions.github.io/sat/v1.0.0/schema.json", "https://stac-extensions.github.io/eo/v1.0.0/schema.json"], "collection": "sentinel-1-grd"}, {"type": "Feature", "stac_version": "1.0.0", "id": "S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49", "properties": {"datetime": "2020-05-05T01:53:10.759983Z", "platform": "SENTINEL-1B", "s1:shape": [25998, 16696], "end_datetime": "2020-05-05 01:53:23.259048+00:00", "constellation": "Sentinel-1", "s1:resolution": "high", "s1:datatake_id": "166729", "start_datetime": "2020-05-05 01:52:58.260917+00:00", "s1:orbit_source": "RESORB", "s1:slice_number": "12", "s1:total_slices": "19", "sar:looks_range": 5, "sat:orbit_state": "ascending", "sar:product_type": "GRD", "sar:looks_azimuth": 1, "sar:polarizations": ["VV", "VH"], "sar:frequency_band": "C", "sat:absolute_orbit": 21440, "sat:relative_orbit": 64, "s1:processing_level": "1", "sar:instrument_mode": "IW", "sar:center_frequency": 5.405, "sar:resolution_range": 20, "s1:product_timeliness": "Fast-24h", "sar:resolution_azimuth": 22, "sar:pixel_spacing_range": 10, "sar:observation_direction": "right", "sar:pixel_spacing_azimuth": 10, "sar:looks_equivalent_number": 4.4, "s1:instrument_configuration_ID": "1", "sat:platform_international_designator": "2016-025A"}, "geometry": {"type": "Polygon", "coordinates": [[[-121.6939507, 45.4834836], [-121.2754269, 45.5398515], [-120.949968, 45.5825238], [-120.4593687, 45.6449796], [-120.1313604, 45.6854882], [-119.8026649, 45.7250812], [-119.3082196, 45.7827593], [-118.975129, 45.8203419], [-118.5406649, 45.8678048], [-118.5868532, 46.0469466], [-118.6229127, 46.2272559], [-118.6621848, 46.4072244], [-118.7004307, 46.5872996], [-118.7817933, 46.9469381], [-118.8214057, 47.1268714], [-118.8617592, 47.3067269], [-118.8773102, 47.3659107], [-119.3232525, 47.3187693], [-119.6604197, 47.281891], [-120.0036295, 47.2432888], [-120.4923616, 47.1864189], [-120.8394691, 47.1447147], [-121.3351084, 47.083203], [-121.678541, 47.0392535], [-122.1324173, 46.9794471], [-122.1136801, 46.9202853], [-122.0548751, 46.7414745], [-122.0097466, 46.5608568], [-121.9460919, 46.3827264], [-121.898986, 46.2023906], [-121.8458561, 46.0228714], [-121.8037581, 45.8418801], [-121.7493476, 45.6625688], [-121.6939507, 45.4834836]]]}, "links": [{"rel": "collection", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "parent", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "root", "href": "https://planetarycomputer.microsoft.com/api/stac/v1", "type": "application/json", "title": "Microsoft Planetary Computer STAC API"}, {"rel": "self", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49", "type": "application/geo+json"}, {"rel": "license", "href": "https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice"}, {"rel": "preview", "href": "https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49", "type": "text/html", "title": "Map of item"}], "assets": {"vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/measurement/iw-vh.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VH: vertical transmit, horizontal receive", "description": "Amplitude of signal transmitted with vertical polarization and received with horizontal polarization with radiometric terrain correction applied.", "roles": ["data"]}, "vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/measurement/iw-vv.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VV: vertical transmit, vertical receive", "description": "Amplitude of signal transmitted with vertical polarization and received with vertical polarization with radiometric terrain correction applied.", "roles": ["data"]}, "thumbnail": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/preview/quick-look.png", "type": "image/png", "title": "Preview Image", "description": "An averaged, decimated preview image in PNG format. Single polarisation products are represented with a grey scale image. Dual polarisation products are represented by a single composite colour image in RGB with the red channel (R) representing the co-polarisation VV or HH), the green channel (G) represents the cross-polarisation (VH or HV) and the blue channel (B) represents the ratio of the cross an co-polarisations.", "roles": ["thumbnail"]}, "safe-manifest": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/manifest.safe", "type": "application/xml", "title": "Manifest File", "description": "General product metadata in XML format. Contains a high-level textual description of the product and references to all of product's components, the product metadata, including the product identification and the resource references, and references to the physical location of each component file contained in the product.", "roles": ["metadata"]}, "schema-noise-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/calibration/noise-iw-vh.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-noise-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/calibration/noise-iw-vv.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-product-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/iw-vh.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-product-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/iw-vv.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-calibration-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/calibration/calibration-iw-vh.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "schema-calibration-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/calibration/calibration-iw-vv.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "tilejson": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "application/json", "title": "TileJSON with default rendering", "roles": ["tiles"]}, "rendered_preview": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "image/png", "title": "Rendered preview", "rel": "preview", "roles": ["overview"]}}, "bbox": [-122.13241725, 45.48348356, -118.54066489, 47.3659107], "stac_extensions": ["https://stac-extensions.github.io/sar/v1.0.0/schema.json", "https://stac-extensions.github.io/sat/v1.0.0/schema.json", "https://stac-extensions.github.io/eo/v1.0.0/schema.json"], "collection": "sentinel-1-grd"}, {"type": "Feature", "stac_version": "1.0.0", "id": "S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93", "properties": {"datetime": "2020-05-03T14:04:57.359203Z", "platform": "SENTINEL-1B", "s1:shape": [25653, 17974], "end_datetime": "2020-05-03 14:05:10.836563+00:00", "constellation": "Sentinel-1", "s1:resolution": "high", "s1:datatake_id": "166547", "start_datetime": "2020-05-03 14:04:43.881843+00:00", "s1:orbit_source": "RESORB", "s1:slice_number": "10", "s1:total_slices": "10", "sar:looks_range": 5, "sat:orbit_state": "descending", "sar:product_type": "GRD", "sar:looks_azimuth": 1, "sar:polarizations": ["VV", "VH"], "sar:frequency_band": "C", "sat:absolute_orbit": 21418, "sat:relative_orbit": 42, "s1:processing_level": "1", "sar:instrument_mode": "IW", "sar:center_frequency": 5.405, "sar:resolution_range": 20, "s1:product_timeliness": "Fast-24h", "sar:resolution_azimuth": 22, "sar:pixel_spacing_range": 10, "sar:observation_direction": "right", "sar:pixel_spacing_azimuth": 10, "sar:looks_equivalent_number": 4.4, "s1:instrument_configuration_ID": "1", "sat:platform_international_designator": "2016-025A"}, "geometry": {"type": "Polygon", "coordinates": [[[-117.1764847, 47.1004535], [-117.6650564, 47.1646324], [-117.9970043, 47.2069601], [-118.3302859, 47.2484249], [-118.8303154, 47.3087035], [-119.3312506, 47.3667752], [-119.6699184, 47.4047223], [-120.0065821, 47.4414051], [-120.435373, 47.4865971], [-120.4887182, 47.3081555], [-120.5207375, 47.1274403], [-120.5578334, 46.9472389], [-120.5998539, 46.7675515], [-120.6408996, 46.5877646], [-120.6790306, 46.4076721], [-120.7273938, 46.2286521], [-120.7722768, 46.0492732], [-120.7820398, 45.9964776], [-120.3608571, 45.9506475], [-120.0272661, 45.9131756], [-119.6976008, 45.8751337], [-119.2090772, 45.8169084], [-118.8874917, 45.7773694], [-118.4078398, 45.7166092], [-117.9232097, 45.653044], [-117.6113535, 45.6109694], [-117.592719, 45.6630222], [-117.5411059, 45.8427277], [-117.4955474, 46.0232608], [-117.4441308, 46.2030132], [-117.384136, 46.381627], [-117.3291147, 46.5609233], [-117.2799694, 46.7410179], [-117.1764847, 47.1004535]]]}, "links": [{"rel": "collection", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "parent", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "root", "href": "https://planetarycomputer.microsoft.com/api/stac/v1", "type": "application/json", "title": "Microsoft Planetary Computer STAC API"}, {"rel": "self", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93", "type": "application/geo+json"}, {"rel": "license", "href": "https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice"}, {"rel": "preview", "href": "https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93", "type": "text/html", "title": "Map of item"}], "assets": {"vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/measurement/iw-vh.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VH: vertical transmit, horizontal receive", "description": "Amplitude of signal transmitted with vertical polarization and received with horizontal polarization with radiometric terrain correction applied.", "roles": ["data"]}, "vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/measurement/iw-vv.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VV: vertical transmit, vertical receive", "description": "Amplitude of signal transmitted with vertical polarization and received with vertical polarization with radiometric terrain correction applied.", "roles": ["data"]}, "thumbnail": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/preview/quick-look.png", "type": "image/png", "title": "Preview Image", "description": "An averaged, decimated preview image in PNG format. Single polarisation products are represented with a grey scale image. Dual polarisation products are represented by a single composite colour image in RGB with the red channel (R) representing the co-polarisation VV or HH), the green channel (G) represents the cross-polarisation (VH or HV) and the blue channel (B) represents the ratio of the cross an co-polarisations.", "roles": ["thumbnail"]}, "safe-manifest": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/manifest.safe", "type": "application/xml", "title": "Manifest File", "description": "General product metadata in XML format. Contains a high-level textual description of the product and references to all of product's components, the product metadata, including the product identification and the resource references, and references to the physical location of each component file contained in the product.", "roles": ["metadata"]}, "schema-noise-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/calibration/noise-iw-vh.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-noise-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/calibration/noise-iw-vv.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-product-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/iw-vh.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-product-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/iw-vv.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-calibration-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/calibration/calibration-iw-vh.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "schema-calibration-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/calibration/calibration-iw-vv.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "tilejson": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "application/json", "title": "TileJSON with default rendering", "roles": ["tiles"]}, "rendered_preview": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "image/png", "title": "Rendered preview", "rel": "preview", "roles": ["overview"]}}, "bbox": [-120.78203976, 45.61096937, -117.17648474, 47.48659707], "stac_extensions": ["https://stac-extensions.github.io/sar/v1.0.0/schema.json", "https://stac-extensions.github.io/sat/v1.0.0/schema.json", "https://stac-extensions.github.io/eo/v1.0.0/schema.json"], "collection": "sentinel-1-grd"}] \ No newline at end of file diff --git a/ops/list_sentinel1_products/test_list_sentinel1.py b/ops/list_sentinel1_products/test_list_sentinel1.py deleted file mode 100644 index c4f02cc7..00000000 --- a/ops/list_sentinel1_products/test_list_sentinel1.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import os -from datetime import datetime, timezone -from typing import List -from unittest.mock import Mock, patch - -import pytest -from pystac import Item -from shapely import geometry as shpg - -from vibe_core.data import DataVibe, Sentinel1Product -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.planetary_computer import Sentinel1GRDCollection, Sentinel1RTCCollection - -HERE = os.path.dirname(os.path.abspath(__file__)) -CONFIG_PATH_PC = os.path.join(HERE, "list_sentinel1_products_pc.yaml") - - -@pytest.fixture -def fake_items_pc(): - filepath = os.path.join(HERE, "sample_pc_output.json") - with open(filepath) as f: - out = json.load(f) - return [Item.from_dict(i) for i in out] - - -@pytest.fixture -def input_data(): - polygon_coords = [ - (-118.8415739999999943, 46.7963099999999983), - (-118.6759440000000012, 46.7963099999999983), - (-118.6759440000000012, 46.9169079999999994), - (-118.8415739999999943, 46.9169079999999994), - (-118.8415739999999943, 46.7963099999999983), - ] - - geom = shpg.Polygon(polygon_coords) - start_date = datetime(year=2021, month=7, day=10, tzinfo=timezone.utc) - end_date = datetime(year=2021, month=7, day=28, tzinfo=timezone.utc) - return DataVibe("input_test_data", (start_date, end_date), shpg.mapping(geom), []) - - -def compare_product_with_stac(product: Sentinel1Product, stac_item: Item): - assert product.geometry == stac_item.geometry - assert product.id == stac_item.id - assert product.time_range[0] == stac_item.datetime - - -@patch("vibe_lib.planetary_computer.get_available_collections") -@patch.object(Sentinel1GRDCollection, "query") -def test_list_pc( - query: Mock, get_collections: Mock, fake_items_pc: List[Item], input_data: DataVibe -): - query.return_value = fake_items_pc - get_collections.return_value = [Sentinel1GRDCollection.collection] - - op_tester = OpTester(CONFIG_PATH_PC) - op_tester.update_parameters({"collection": "grd"}) - output_data = op_tester.run(input_item=input_data) - - # Get op result - output_name = "sentinel_products" - assert output_name in output_data - products = output_data[output_name] - assert isinstance(products, list) - assert len(products) == 3 - get_collections.assert_called_once() - query.assert_called_once_with( - geometry=shpg.shape(input_data.geometry), time_range=input_data.time_range - ) - for p, i in zip(products, fake_items_pc): - assert isinstance(p, Sentinel1Product) - compare_product_with_stac(p, i) - - -@patch("vibe_lib.planetary_computer.get_available_collections") -@patch.object(Sentinel1RTCCollection, "query") -def test_list_rtc( - query: Mock, get_collections: Mock, fake_items_pc: List[Item], input_data: DataVibe -): - query.return_value = fake_items_pc - get_collections.return_value = [Sentinel1RTCCollection.collection] - - op_tester = OpTester(CONFIG_PATH_PC) - output_data = op_tester.run(input_item=input_data) - - # Get op result - output_name = "sentinel_products" - assert output_name in output_data - products = output_data[output_name] - assert isinstance(products, list) - assert len(products) == 3 - get_collections.assert_called_once() - query.assert_called_once_with( - geometry=shpg.shape(input_data.geometry), time_range=input_data.time_range - ) - for p, i in zip(products, fake_items_pc): - assert isinstance(p, Sentinel1Product) - compare_product_with_stac(p, i) diff --git a/ops/list_sentinel2_products/list_s2_pc.py b/ops/list_sentinel2_products/list_s2_pc.py deleted file mode 100644 index 78213abb..00000000 --- a/ops/list_sentinel2_products/list_s2_pc.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from concurrent.futures import ThreadPoolExecutor -from typing import Dict, List - -from vibe_core.data import DataVibe, Sentinel2Product -from vibe_lib.planetary_computer import Sentinel2Collection, convert_to_s2_product - - -def callback_builder(num_workers: int): - def list_sentinel_2_products( - input_item: DataVibe, - ) -> Dict[str, List[Sentinel2Product]]: - collection = Sentinel2Collection() - items = collection.query(roi=input_item.bbox, time_range=input_item.time_range) - - # We convert products in parallel otherwise this becomes a huge - # bottleneck due to needing to fetch the absolute orbit from the SAFE file - with ThreadPoolExecutor(max_workers=num_workers) as executor: - products = list(executor.map(convert_to_s2_product, items)) - - if not products: - raise RuntimeError( - f"No product found for time range {input_item.time_range} " - f"and geometry {input_item.geometry}" - ) - return {"sentinel_products": products} - - return list_sentinel_2_products diff --git a/ops/list_sentinel2_products/list_sentinel2_products_pc.yaml b/ops/list_sentinel2_products/list_sentinel2_products_pc.yaml deleted file mode 100644 index a3b4ac6b..00000000 --- a/ops/list_sentinel2_products/list_sentinel2_products_pc.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: list_sentinel2_products_pc -inputs: - input_item: DataVibe -output: - sentinel_products: List[Sentinel2Product] -parameters: - num_workers: 24 -entrypoint: - file: list_s2_pc.py - callback_builder: callback_builder -description: - short_description: Lists Sentinel-2 products that intersect with input geometry and time range. \ No newline at end of file diff --git a/ops/list_to_sequence/list_to_sequence.py b/ops/list_to_sequence/list_to_sequence.py deleted file mode 100644 index 60753170..00000000 --- a/ops/list_to_sequence/list_to_sequence.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from datetime import datetime -from typing import Any, Dict, List, Tuple - -from shapely.geometry import mapping, shape -from shapely.ops import unary_union - -from vibe_core.data import Raster -from vibe_core.data.rasters import RasterSequence - - -def time_range_union(list_rasters: List[Raster]) -> Tuple[datetime, datetime]: - return ( - min([r.time_range[0] for r in list_rasters]), - max([r.time_range[1] for r in list_rasters]), - ) - - -def geometry_union(list_rasters: List[Raster]) -> Dict[str, Any]: - return mapping(unary_union([shape(r.geometry) for r in list_rasters])) - - -def callback_builder(): - def callback(list_rasters: List[Raster]) -> Dict[str, RasterSequence]: - res = RasterSequence.clone_from( - list_rasters[0], - id=hashlib.sha256( - ("sequence" + "".join(r.id for r in list_rasters)).encode() - ).hexdigest(), - time_range=time_range_union(list_rasters), - geometry=geometry_union(list_rasters), - assets=[], - ) - for r in list_rasters: - res.add_item(r) - - return {"rasters_seq": res} - - return callback diff --git a/ops/list_to_sequence/list_to_sequence.yaml b/ops/list_to_sequence/list_to_sequence.yaml deleted file mode 100644 index a7aacdfb..00000000 --- a/ops/list_to_sequence/list_to_sequence.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: list_to_sequence -inputs: - list_rasters: List[Raster] -output: - rasters_seq: RasterSequence -parameters: -dependencies: -entrypoint: - file: list_to_sequence.py - callback_builder: callback_builder -description: - short_description: Combines a list of Rasters into a RasterSequence. - long_description: > - The union of the geometries and time ranges of the input rasters are stored as the metadata of - the output RasterSequence. \ No newline at end of file diff --git a/ops/list_to_sequence/test_list_to_sequence.py b/ops/list_to_sequence/test_list_to_sequence.py deleted file mode 100644 index 64d9f1dc..00000000 --- a/ops/list_to_sequence/test_list_to_sequence.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from datetime import datetime, timezone -from typing import List, Tuple - -import pytest -from shapely.geometry import Polygon, box, mapping, shape - -from vibe_core.data import AssetVibe, Raster, RasterSequence, gen_guid -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_to_sequence.yaml") - -# Geometries -WORLD_GEOM = box(-90, -180, 90, 180) -WESTERN_HEMS_GEOM = box(-90, -180, 90, 0.0) -EASTERN_HEMS_GEOM = box(-90, 0.0, 90, 180) -NORTHERN_HEMS_GEOM = box(0.0, -180, 90, 180) -SOUTHERN_HEMS_GEOM = box(-90, -180, 0.0, 180) -NW_REGION_GEOM = box(0.0, -180, 90, 0.0) -FAKE_GEOMETRY = box(-5.0, -5.0, -1.0, -1.0) # SW - -# Time ranges -FAKE_TIME_RANGE = (datetime.now(tz=timezone.utc), datetime.now(tz=timezone.utc)) -TR_1900s = ( - datetime(1900, 1, 1, tzinfo=timezone.utc), - datetime(1999, 12, 31, tzinfo=timezone.utc), -) -TR_1990s = ( - datetime(1990, 1, 1, tzinfo=timezone.utc), - datetime(1999, 12, 31, tzinfo=timezone.utc), -) -TR_2000s = ( - datetime(2000, 1, 1, tzinfo=timezone.utc), - datetime(2009, 12, 31, tzinfo=timezone.utc), -) -TR_1900s_2000s = ( - datetime(1900, 1, 1, tzinfo=timezone.utc), - datetime(2009, 12, 31, tzinfo=timezone.utc), -) - - -def create_raster(geometry: Polygon, time_range: Tuple[datetime, datetime]) -> Raster: - return Raster( - id=gen_guid(), - time_range=time_range, - geometry=mapping(geometry), - assets=[AssetVibe(reference="", type=mimetypes.types_map[".tif"], id=gen_guid())], - bands={}, - ) - - -@pytest.mark.parametrize( - "input_geometry_list, input_time_range_list, expected_geometry", - [ - ([NORTHERN_HEMS_GEOM, SOUTHERN_HEMS_GEOM], [FAKE_TIME_RANGE] * 2, WORLD_GEOM), - ([WESTERN_HEMS_GEOM, EASTERN_HEMS_GEOM], [FAKE_TIME_RANGE] * 2, WORLD_GEOM), - ([WESTERN_HEMS_GEOM, NW_REGION_GEOM], [FAKE_TIME_RANGE] * 2, WESTERN_HEMS_GEOM), - ([FAKE_GEOMETRY], [FAKE_TIME_RANGE], FAKE_GEOMETRY), - ], -) -def test_geometry_combination( - input_geometry_list: List[Polygon], - input_time_range_list: List[Tuple[datetime, datetime]], - expected_geometry: Polygon, -): - rasters = [ - create_raster(geometry, tr) - for geometry, tr in zip(input_geometry_list, input_time_range_list) - ] - - op_tester = OpTester(CONFIG_PATH) - output_data = op_tester.run(list_rasters=rasters) # type: ignore - - # Get op result - output_name = "rasters_seq" - assert output_name in output_data - output_seq = output_data[output_name] - assert type(output_seq) is RasterSequence - assert len(output_seq.asset_geometry) == len(rasters) - assert expected_geometry.equals(shape(output_seq.geometry)) - - -@pytest.mark.parametrize( - "input_time_range_list, expected_time_range", - [ - ([TR_1900s, TR_2000s], TR_1900s_2000s), - ([TR_1900s, TR_1990s], TR_1900s), - ([FAKE_TIME_RANGE], FAKE_TIME_RANGE), - ], -) -def test_time_range_combination( - input_time_range_list: List[Tuple[datetime, datetime]], - expected_time_range: Tuple[datetime, datetime], -): - rasters = [create_raster(FAKE_GEOMETRY, time_range) for time_range in input_time_range_list] - - op_tester = OpTester(CONFIG_PATH) - output_data = op_tester.run(list_rasters=rasters) # type: ignore - - # Get op result - output_name = "rasters_seq" - assert output_name in output_data - output_seq = output_data[output_name] - assert type(output_seq) is RasterSequence - assert len(output_seq.asset_time_range) == len(rasters) - assert output_seq.time_range == expected_time_range diff --git a/ops/match_raster_to_ref/match_raster_to_ref.py b/ops/match_raster_to_ref/match_raster_to_ref.py deleted file mode 100644 index 6fda0506..00000000 --- a/ops/match_raster_to_ref/match_raster_to_ref.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from tempfile import TemporaryDirectory -from typing import Dict - -from rasterio.enums import Resampling - -from vibe_core.data import Raster, gen_guid -from vibe_lib.raster import load_raster_match, save_raster_to_asset - -LOGGER = logging.getLogger(__name__) - - -class CallbackBuilder: - def __init__(self, resampling: str): - self.tmp_dir = TemporaryDirectory() - self.resampling: Resampling = getattr(Resampling, resampling) - - def __call__(self): - def operator_callback(raster: Raster, ref_raster: Raster) -> Dict[str, Raster]: - raster_ar = load_raster_match( - raster, match_raster=ref_raster, resampling=self.resampling - ) - asset = save_raster_to_asset(raster_ar, self.tmp_dir.name) - assets = [asset] - try: - assets.append(raster.visualization_asset) - except ValueError as e: - LOGGER.warning(f"Visualization asset not found {e}") - - out_raster = Raster.clone_from( - src=raster, - id=gen_guid(), - geometry=ref_raster.geometry, - assets=assets, - ) - - return {"output_raster": out_raster} - - return operator_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/match_raster_to_ref/match_raster_to_ref.yaml b/ops/match_raster_to_ref/match_raster_to_ref.yaml deleted file mode 100644 index 54abd4ba..00000000 --- a/ops/match_raster_to_ref/match_raster_to_ref.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# This operator reprojects the input `raster` to the `ref_raster` CRS. -# It also resamples and clips the input `raster` to obtain the same -# extension and number of rows/columns. The output `output_raster` -# has the same number of bands as the input `raster`. -name: match_raster_to_ref -inputs: - raster: Raster - ref_raster: Raster -output: - output_raster: Raster -parameters: - resampling: bilinear -entrypoint: - file: match_raster_to_ref.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - resampling -description: - short_description: - Resamples the input `raster` to match the grid of `ref_raster`. diff --git a/ops/merge_cloud_masks/merge_cloud_masks.py b/ops/merge_cloud_masks/merge_cloud_masks.py deleted file mode 100644 index c4f79ebf..00000000 --- a/ops/merge_cloud_masks/merge_cloud_masks.py +++ /dev/null @@ -1,405 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import datetime -import gc -import mimetypes -import os -from itertools import chain -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Tuple, cast - -import numpy as np -from numpy.typing import NDArray -from osgeo import gdal, gdalconst -from skimage.measure import label, regionprops -from skimage.morphology import binary_dilation, disk - -from vibe_core.data import AssetVibe, Sentinel2CloudMask, Sentinel2CloudProbability, gen_guid -from vibe_lib.raster import load_raster_from_url -from vibe_lib.spaceeye.utils import find_s2_product - -TileData = List[Tuple[Sentinel2CloudMask, Sentinel2CloudProbability]] - - -def write_tiff( - x: NDArray[Any], - tiff_file: str, - ref_file: str, - gdal_type: int = gdalconst.GDT_Float32, - predictor: int = 3, -): - """ - USAGE: write_tiff(array, tiff_file, ref_file) - Use predictor=3 for float types and predictor=2 for integer types. - """ - gtiff_flags = [ - "COMPRESS=ZSTD", # also LZW and DEFLATE works well - "ZSTD_LEVEL=9", # should be between 1-22, and 22 is highest compression. - # 9 is default and gets essentially the same compression-rate - "PREDICTOR=%d" % predictor, # default is 1, use 2 for ints, and 3 for floats - "TILED=YES", # so that we can read sub-arrays efficiently - "BIGTIFF=YES", # in case resulting file is >4GB - ] - - assert x.ndim == 2 or x.ndim == 3 - if x.ndim == 3: - nx, ny, nbands = x.shape - else: - nx, ny = x.shape - nbands = 1 - - if not os.path.exists(ref_file): - raise (FileNotFoundError("<%s> doesn't exist" % ref_file)) - ds = gdal.Open(ref_file) - if (ds.RasterYSize != nx) and (ds.RasterXSize != ny): - print("Size mismatch between reference file and input array") - print("x: %s, ref_file: %d, %d" % (x.shape, ds.RasterYSize, ds.RasterXSize)) - - outDrv = gdal.GetDriverByName("GTiff") - out = outDrv.Create(tiff_file, ny, nx, nbands, gdal_type, gtiff_flags) - out.SetProjection(ds.GetProjection()) - out.SetGeoTransform(ds.GetGeoTransform()) - if x.ndim == 3: - for i in range(nbands): - out.GetRasterBand(i + 1).WriteArray(x[:, :, i]) - else: - out.GetRasterBand(1).WriteArray(x) - out.FlushCache() - del out # guarantee the flush - del ds - - -def read_s2_bands( - tif_file: str, bands: List[int], transpose: bool = False, dtype: type = np.uint16 -) -> NDArray[Any]: - """ - USAGE: x = read_s2_bands(s2_file, [2,3,4]) - The command above reads in the RGB bands of the sentinel-2 tif file. - """ - ds = gdal.Open(tif_file) - nb = ds.RasterCount - nx = ds.RasterYSize - ny = ds.RasterXSize - for i in bands: - if i >= nb: - print("Band %d does not exist, only %d bands in %s" % (i, nb, tif_file)) - assert i < nb - if not transpose: - x = np.zeros((len(bands), nx, ny), dtype=dtype) - for i, b in enumerate(bands): - band = ds.GetRasterBand(b + 1) - x[i, :, :] = band.ReadAsArray() - else: - x = np.zeros((nx, ny, len(bands)), dtype=dtype) - for i, b in enumerate(bands): - band = ds.GetRasterBand(b + 1) - x[:, :, i] = band.ReadAsArray() - return x - - -def compute_missing_mask(s2_file: str, dilation: int = 1): - # TCI is no longer explicitly stored - bands_10m = read_s2_bands(s2_file, [1, 2, 3, 7]) - - # A dicey proposition, but it seems like 0 == NO_DATA in all bands. - missing_mask = np.min(bands_10m, axis=0) == 0 - - # Takes lots of memory, free up fast - del bands_10m - - # Try hard to free it up - gc.collect() - - # Compute missing mask using binary dilation - if dilation > 1 and np.max(missing_mask) == 0: - selem = disk(dilation) - missing_mask = binary_dilation(missing_mask, selem) - - return missing_mask - - -def kill_labels_(clabel: NDArray[Any], min_area: int) -> List[Any]: - """ - USAGE: kill_list = kill_labels(clabel, min_area) - Make a list of regions with area below min_area and return the list of regions. - """ - props = regionprops(clabel) - kill_list = [] - for p in props: - if p.area < min_area: - kill_list.append(p.label) - return kill_list - - -def remove_small_components(cmask: NDArray[Any], min_area: int = 400): - """ - USAGE: new_mask = remove_small_components(cmask, min_area=400) - First removes small connected cloud components, then fill in small - connected holes in clouds to make for a smoother cloud mask. - """ - assert cmask.ndim == 2 - cm2_comp = label(cmask) # remove small clouds - tmp = cmask.copy() - - kill_list = kill_labels_(cm2_comp, min_area) # type: ignore - small_clouds = np.isin(cm2_comp, kill_list) # type: ignore - - tmp[small_clouds] = False - cm2_inv = label(~tmp) # fill small holes in clouds - kill_list = kill_labels_(cm2_inv, min_area) # type: ignore - small_cloud_holes = np.isin(cm2_inv, kill_list) # type: ignore - tmp[small_cloud_holes] = True - - return tmp - - -def shift_arr( - cloud_probs: List[str], - cloud_masks: List[str], - T: int, - w2: int, - cm1_arr: List[NDArray[Any]], - cm2_arr: List[NDArray[Any]], - min_prob: float, -) -> Tuple[List[NDArray[Any]], List[NDArray[Any]]]: - """ - USAGE: cm1_arr, cm2_arr = shift_arr(s2_files, T, w2, cm1_arr, cm2_arr, min_prob) - Remove the first mask in the cm1_arr and cm2_arr and read the next masks in. - This is used to maintain a window (in time) of cloud-masks without having to read - in masks that have already been read in. - """ - c1_new = [cm1_arr[i + 1] for i in range(2 * T)] - c2_new = [cm2_arr[i + 1] for i in range(2 * T)] - - cm1, cm2 = load_cloud_masks(cloud_probs[w2], cloud_masks[w2], min_prob) - c1_new.append(cm1) - c2_new.append(cm2) - - return c1_new, c2_new - - -def compute_mask_with_missing_clouds( - cm1_arr: List[NDArray[Any]], - cm2_arr: List[NDArray[Any]], - idx: int, - max_extra_cloud: float, - min_area: int, - dilation: int, -) -> NDArray[Any]: - cm1 = np.dstack(cm1_arr) - cm2 = np.dstack(cm2_arr) - x = np.sum(np.logical_and(cm2, np.logical_not(cm1)), axis=2) - suspect = np.logical_and(x > max_extra_cloud, cm2[:, :, idx]) - suspect = np.logical_and(suspect, np.logical_not(cm1[:, :, idx])) - - new_mask = cm2[:, :, idx].copy() - new_mask[suspect] = cm1[suspect, idx] # i.e. = False - - new_mask = remove_small_components(new_mask, min_area=min_area) - old_mask = cm1[:, :, idx] - # don't switch off clouds in original built in mask - new_mask = np.logical_or(old_mask, new_mask) - - if dilation > 1: - selem = disk(dilation) - new_mask = binary_dilation(new_mask, selem) - - return new_mask - - -def fill_missing_pixels(ref_file: str, new_mask: NDArray[Any], tmp_dir: str) -> str: - """ - Since part of the region may be outside the footprint of the orbit - we need to handle missing pixels in some way. Here we choose to - simply mark them as clouds and let the reconstruction algorithm - handle it. We detect missing pixels by looking for TCI pixels where - the RGB bands are all zero. - """ - - # Add missing pixels as clouds - out_file = os.path.join(tmp_dir, f"{gen_guid()}.tif") - write_tiff( - new_mask.astype(np.uint8), out_file, ref_file, gdal_type=gdalconst.GDT_Byte, predictor=2 - ) - - return out_file - - -def load_cloud_masks( - cloudless_prob_path: str, l1c_cloud_path: str, min_prob: float -) -> Tuple[NDArray[Any], NDArray[Any]]: - cmask = load_raster_from_url(l1c_cloud_path).to_numpy()[0] - # Open it and fill masked values as clouds - cprob = load_raster_from_url(cloudless_prob_path).to_masked_array()[0] - cmask[cprob.mask] = 1.0 - cprob = cprob.filled(1.0) - cprob_thr = cprob > min_prob - - return cmask, cprob_thr - - -def cloud_masks_for_time_window( - cloudless_files: List[str], mask_files: List[str], min_prob: float -) -> Tuple[List[NDArray[Any]], List[NDArray[Any]]]: - """ - Populate temporal window of cloud masks - """ - - cm1_arr: List[NDArray[Any]] = [] - cm2_arr: List[NDArray[Any]] = [] - for prob, mask in zip(cloudless_files, mask_files): - cm1, cm2 = load_cloud_masks(prob, mask, min_prob) - cm1_arr.append(cm1) - cm2_arr.append(cm2) - - return cm1_arr, cm2_arr - - -# This script should take as input only the cloud masks. -def clean_clouds_for_tile( - probs_files: List[str], - mask_files: List[str], - out_dir: str, - T: int, - min_prob: float, - min_area: int, - max_extra_cloud: int, - dilation: int, -) -> List[str]: - """ - USAGE: clean_clouds_for_tile(tile, start, end, save=True, T=10, min_prob=0.7, - min_area=400, max_extra_cloud=5) reads in all the cloud masks in the directory - and cleans it based on two rules. - 1. If in a time window of length 2*T+1 there are max_extra_cloud pixels that - became cloudy in the s2cloudless mask and were not in the built in cloud - mask, then we back off to the built in mask. - 2. We remove connected cloud components with less than min_area pixels and - fill in holes in clouds with less than min_area pixels. - Finally we take the union of these cloud pixels and the built in cloud mask and - write it to a file named cloud_mask_merged.ny. - """ - - # Window of cloud masks to process - window_start = 0 - window_end = 2 * T + 1 - - selected_probs_files = probs_files[window_start:window_end] - selected_mask_files = mask_files[window_start:window_end] - - cm1_arr, cm2_arr = cloud_masks_for_time_window( - selected_probs_files, selected_mask_files, min_prob - ) - - N = len(probs_files) - saved_masks: List[str] = [] - for i in range(N): - if i + T > window_end and window_end < N: - cm1_arr, cm2_arr = shift_arr( - probs_files, mask_files, T, window_end, cm1_arr, cm2_arr, min_prob - ) - gc.collect() - window_start += 1 - window_end += 1 - idx = i - window_start - new_mask = compute_mask_with_missing_clouds( - cm1_arr, cm2_arr, idx, max_extra_cloud, min_area, dilation - ) - saved_masks.append(fill_missing_pixels(mask_files[i], new_mask, out_dir)) - gc.collect() - - return saved_masks - - -def prepare_tile_data( - items: TileData, -) -> Tuple[List[str], List[str]]: - date_list: List[datetime.datetime] = [] - cloud_masks: List[str] = [] - cloud_probs: List[str] = [] - for mask, prob in items: - cloud_probs.append(prob.raster_asset.local_path) - cloud_masks.append(mask.raster_asset.local_path) - date_list.append(mask.time_range[0]) - - ind = np.argsort(cast(NDArray[Any], date_list)) - out_cloud_probs = [cloud_probs[i] for i in ind] - out_cloud_masks = [cloud_masks[i] for i in ind] - - return out_cloud_probs, out_cloud_masks - - -class CallbackBuilder: - def __init__( - self, - num_workers: int, - window_size: int, - cloud_prob_threshold: float, - min_area: int, - max_extra_cloud: int, - dilation: int, - ): - self.num_workers = num_workers - self.tmp_dir = TemporaryDirectory() - self.window_size = window_size - self.threshold = cloud_prob_threshold - self.min_area = min_area - self.max_extra_cloud = max_extra_cloud - self.dilation = dilation - - def __call__(self): - def compute_cloud_prob( - masks: List[Sentinel2CloudMask], - cloud_probabilities: List[Sentinel2CloudProbability], - ) -> Dict[str, List[Sentinel2CloudMask]]: - def process_single_tile(items: TileData) -> List[Sentinel2CloudMask]: - items = sorted(items, key=lambda x: x[0].time_range[0]) - probs_files, mask_files = prepare_tile_data(items) - - out_files = clean_clouds_for_tile( - probs_files, - mask_files, - self.tmp_dir.name, - T=self.window_size, - min_prob=self.threshold, - min_area=self.min_area, - max_extra_cloud=self.max_extra_cloud, - dilation=self.dilation, - ) - - # Generating output items - output_items: List[Sentinel2CloudMask] = [] - for ( - mask, - _, - ), new_asset in zip(items, out_files): - merged_cloud = AssetVibe( - reference=new_asset, type=mimetypes.types_map[".tif"], id=gen_guid() - ) - new_mask = Sentinel2CloudMask.clone_from(mask, gen_guid(), [merged_cloud]) - output_items.append(new_mask) - - return output_items - - # Grouping by tile_id - tile_dict: Dict[str, TileData] = {} - - for mask in masks: - tile_id = mask.tile_id - prob = find_s2_product(mask.product_name, cloud_probabilities) - if tile_id in tile_dict: - tile_dict[tile_id].append((mask, prob)) - else: - tile_dict[tile_id] = [(mask, prob)] - - results = [process_single_tile(tile) for tile in tile_dict.values()] - results = cast(List[List[Sentinel2CloudMask]], results) - - consolidated_result = [result for result in chain(*results)] - - return {"merged_cloud_masks": consolidated_result} - - return compute_cloud_prob - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/merge_cloud_masks/merge_cloud_masks.yaml b/ops/merge_cloud_masks/merge_cloud_masks.yaml deleted file mode 100644 index 359508b4..00000000 --- a/ops/merge_cloud_masks/merge_cloud_masks.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: merge_cloud_masks -inputs: - masks: List[Sentinel2CloudMask] - cloud_probabilities: List[Sentinel2CloudProbability] -output: - merged_cloud_masks: List[Sentinel2CloudMask] -parameters: - num_workers: 1 - window_size: 10 - cloud_prob_threshold: 0.5 - min_area: 400 - max_extra_cloud: 5 - dilation: 1 -entrypoint: - file: merge_cloud_masks.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - window_size - - cloud_prob_threshold - - min_area - - max_extra_cloud - - dilation diff --git a/ops/merge_cloud_masks/merge_cloud_masks_simple.py b/ops/merge_cloud_masks/merge_cloud_masks_simple.py deleted file mode 100644 index 7ea8bc96..00000000 --- a/ops/merge_cloud_masks/merge_cloud_masks_simple.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Tuple - -import numpy as np -import rasterio -from numpy.typing import NDArray -from skimage.measure import label, regionprops -from skimage.morphology import binary_dilation, disk - -from vibe_core.data import AssetVibe, Sentinel2CloudMask, Sentinel2CloudProbability, gen_guid -from vibe_lib.raster import INT_COMPRESSION_KWARGS - -TileData = List[Tuple[Sentinel2CloudMask, Sentinel2CloudProbability]] - - -def kill_labels(clabel: NDArray[Any], min_area: int) -> List[Any]: - """ - USAGE: kill_list = kill_labels(clabel, min_area) - Make a list of regions with area below min_area and return the list of regions. - """ - props = regionprops(clabel) - kill_list = [] - for p in props: - if p.area < min_area: - kill_list.append(p.label) - return kill_list - - -def remove_small_components(cmask: NDArray[Any], min_area: int): - """ - USAGE: new_mask = remove_small_components(cmask, min_area=400) - First removes small connected cloud components, then fill in small - connected holes in clouds to make for a smoother cloud mask. - """ - # Get cloud components - cloud_comp = label(cmask) - # Mark small components - kill_list = kill_labels(cloud_comp, min_area) # type: ignore - small_clouds = np.isin(cloud_comp, kill_list) # type: ignore - # Remove them - cmask[small_clouds] = False - - # Do the same for small components of clear sky - holes_comp = label(~cmask) - kill_list = kill_labels(holes_comp, min_area) # type: ignore - small_cloud_holes = np.isin(holes_comp, kill_list) # type: ignore - cmask[small_cloud_holes] = True - - return cmask - - -def merge_masks( - product_mask: Sentinel2CloudMask, - cloud_probability: Sentinel2CloudProbability, - shadow_probability: Sentinel2CloudProbability, - cloud_threshold: float, - shadow_threshold: float, - closing_size: int, - min_area: int, -) -> Tuple[NDArray[np.uint8], Dict[str, Any]]: - with rasterio.open(cloud_probability.raster_asset.url) as src: - meta = src.meta - cloud_p = src.read(1) > cloud_threshold - with rasterio.open(shadow_probability.raster_asset.url) as src: - shadow_p = src.read(1) > shadow_threshold - with rasterio.open(product_mask.raster_asset.url) as src: - cloud_m = src.read(1).astype(bool) - # Do the most conservative thing we can, and pick cloud if any model classifies as cloud/shadow - merged = cloud_p | shadow_p | cloud_m - # Remove small holes and keep a buffer - merged = binary_dilation(merged, disk(closing_size)).astype(np.uint8) - if min_area > 0: - merged = remove_small_components(merged, min_area) - meta["dtype"] = "uint8" - return merged[None], meta - - -class CallbackBuilder: - def __init__( - self, - cloud_prob_threshold: float, - shadow_prob_threshold: float, - closing_size: int, - min_area: int, - ): - self.tmp_dir = TemporaryDirectory() - self.cloud_threshold = cloud_prob_threshold - self.shadow_threshold = shadow_prob_threshold - self.closing_size = closing_size - self.min_area = min_area - - def __call__(self): - def compute_cloud_prob( - product_mask: Sentinel2CloudMask, - cloud_probability: Sentinel2CloudProbability, - shadow_probability: Sentinel2CloudProbability, - ) -> Dict[str, Sentinel2CloudMask]: - merged, meta = merge_masks( - product_mask, - cloud_probability, - shadow_probability, - self.cloud_threshold, - self.shadow_threshold, - self.closing_size, - self.min_area, - ) - id = gen_guid() - out_path = os.path.join(self.tmp_dir.name, f"{id}.tif") - with rasterio.open(out_path, "w", **meta, **INT_COMPRESSION_KWARGS) as dst: - dst.write(merged) - return { - "merged_cloud_mask": Sentinel2CloudMask.clone_from( - cloud_probability, - id=gen_guid(), - bands={"cloud": 0}, - categories=["Clear", "Cloud"], - assets=[AssetVibe(id=id, type="image/tiff", reference=out_path)], - ) - } - - return compute_cloud_prob - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/merge_cloud_masks/merge_cloud_masks_simple.yaml b/ops/merge_cloud_masks/merge_cloud_masks_simple.yaml deleted file mode 100644 index ca9eeecb..00000000 --- a/ops/merge_cloud_masks/merge_cloud_masks_simple.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: merge_cloud_masks_simple -inputs: - product_mask: Sentinel2CloudMask - cloud_probability: Sentinel2CloudProbability - shadow_probability: Sentinel2CloudProbability -output: - merged_cloud_mask: Sentinel2CloudMask -parameters: - cloud_prob_threshold: 0.3 - shadow_prob_threshold: 0.2 - closing_size: 5 - min_area: 0 -entrypoint: - file: merge_cloud_masks_simple.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - cloud_prob_threshold - - shadow_prob_threshold - - closing_size - - min_area -description: - short_description: Merges cloud, shadow and product cloud masks into a single mask. \ No newline at end of file diff --git a/ops/merge_geometries/merge_geometries.py b/ops/merge_geometries/merge_geometries.py deleted file mode 100644 index ab32034f..00000000 --- a/ops/merge_geometries/merge_geometries.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from enum import auto -from typing import Dict, List, TypeVar - -from shapely import geometry as shpg -from shapely import ops as shpo -from strenum import StrEnum - -from vibe_core.data import DataVibe - -T = TypeVar("T", bound=DataVibe) - - -class MergeMethod(StrEnum): - union = auto() - intersection = auto() - - -def callback_builder(method: str): - try: - merge_method = MergeMethod[method] - except KeyError: - avail_methods = ", ".join([i.name for i in MergeMethod]) - raise ValueError( - f"Invalid merge method parameter {method}. Available methods are {avail_methods}" - ) - - def callback(items: List[T]) -> Dict[str, T]: - item_type = type(items[0]) - - if merge_method == MergeMethod.union: - merge_geom = shpg.mapping(shpo.unary_union([shpg.shape(i.geometry) for i in items])) - else: - merge_geom = shpg.shape(items[0].geometry) - for i in items: - merge_geom = merge_geom.intersection(shpg.shape(i.geometry)) - merge_geom = shpg.mapping(merge_geom) - merge_id = hashlib.sha256( - "".join([f"merge geometries method={merge_method}"] + [i.id for i in items]).encode() - ).hexdigest() - return { - "merged": item_type.clone_from(items[0], id=merge_id, assets=[], geometry=merge_geom) - } - - return callback diff --git a/ops/merge_geometries/merge_geometries.yaml b/ops/merge_geometries/merge_geometries.yaml deleted file mode 100644 index 38035c29..00000000 --- a/ops/merge_geometries/merge_geometries.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: merge_geometries -inputs: - items: List[DataVibe] -output: - # merged: "@INHERIT(items)" - merged: DataVibe -entrypoint: - file: merge_geometries.py - callback_builder: callback_builder -parameters: - method: union -dependencies: - parameters: - - method -description: - short_description: Create item with merged geometry from item list. - long_description: - The op will merge the items' geometries according to the chosen method and copy all other - metadata from the first item in the list. - inputs: - items: Input items. - output: - merged: Item that contains the merged geometry. - parameters: - method: How to merge the geometry, available methods are 'union' and 'intersection'. diff --git a/ops/merge_geometries/test_merge_geometries.py b/ops/merge_geometries/test_merge_geometries.py deleted file mode 100644 index c8f44c0b..00000000 --- a/ops/merge_geometries/test_merge_geometries.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime - -from shapely import geometry as shpg - -from vibe_core.data import DataVibe -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "merge_geometries.yaml") - - -def test_op(): - geoms = [shpg.box(0, 0, 1, 1), shpg.box(0, 0, 2, 2)] - items = [ - DataVibe( - id=f"{i}", - geometry=shpg.mapping(g), - time_range=(datetime.now(), datetime.now()), - assets=[], - ) - for i, g in enumerate(geoms) - ] - op_tester = OpTester(CONFIG_PATH) - op_tester.update_parameters({"method": "union"}) - out = op_tester.run(items=items) # type: ignore - assert "merged" in out - out_vibe = out["merged"] - assert isinstance(out_vibe, DataVibe) - assert shpg.shape(out_vibe.geometry).equals(geoms[-1]) - assert out_vibe.time_range == items[0].time_range - - op_tester.update_parameters({"method": "intersection"}) - out = op_tester.run(items=items) # type: ignore - assert "merged" in out - out_vibe = out["merged"] - assert isinstance(out_vibe, DataVibe) - assert shpg.shape(out_vibe.geometry).equals(geoms[0]) - assert out_vibe.time_range == items[0].time_range diff --git a/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py b/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py deleted file mode 100644 index abc0361e..00000000 --- a/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from typing import Dict - -from vibe_core.data import DataVibe - - -def callback_builder(): - def callback(geometry: DataVibe, time_range: DataVibe) -> Dict[str, DataVibe]: - id = hashlib.sha256( - f"merge geometry and time range {geometry.id}{time_range.id}".encode() - ).hexdigest() - return { - "merged": DataVibe( - id=id, geometry=geometry.geometry, time_range=time_range.time_range, assets=[] - ) - } - - return callback diff --git a/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.yaml b/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.yaml deleted file mode 100644 index 0e282805..00000000 --- a/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.yaml +++ /dev/null @@ -1,19 +0,0 @@ -name: merge_geometry_and_time_range -inputs: - geometry: DataVibe - time_range: DataVibe -output: - merged: DataVibe -entrypoint: - file: merge_geometry_and_time_range.py - callback_builder: callback_builder -parameters: -description: - short_description: - Create item that contains the geometry from one item and the time range from another. - long_description: The op will create and assetless DataVibe with the copied information. - inputs: - geometry: Item from which the geometry will be copied. - time_range: Item from which the time range will be copied. - output: - merged: Item with geometry from one item and time range from another. diff --git a/ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py b/ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py deleted file mode 100644 index 158b5453..00000000 --- a/ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime - -from shapely import geometry as shpg - -from vibe_core.data import DataVibe -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "merge_geometry_and_time_range.yaml" -) - - -def test_op(): - vibe1 = DataVibe( - id="1", - geometry=shpg.mapping(shpg.box(0, 0, 1, 1)), - time_range=(datetime(2020, 1, 1), datetime(2020, 2, 2)), - assets=[], - ) - vibe2 = DataVibe( - id="2", - geometry=shpg.mapping(shpg.box(0, 0, 2, 2)), - time_range=(datetime(2021, 1, 1), datetime(2021, 2, 2)), - assets=[], - ) - op_tester = OpTester(CONFIG_PATH) - out = op_tester.run(geometry=vibe1, time_range=vibe2) - assert "merged" in out - out_vibe = out["merged"] - assert isinstance(out_vibe, DataVibe) - assert out_vibe.geometry == vibe1.geometry - assert out_vibe.time_range == vibe2.time_range - - out = op_tester.run(geometry=vibe2, time_range=vibe1) - assert "merged" in out - out_vibe = out["merged"] - assert isinstance(out_vibe, DataVibe) - assert out_vibe.geometry == vibe2.geometry - assert out_vibe.time_range == vibe1.time_range diff --git a/ops/merge_rasters/merge_rasters.py b/ops/merge_rasters/merge_rasters.py deleted file mode 100644 index 0d385fa1..00000000 --- a/ops/merge_rasters/merge_rasters.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import mimetypes -import os -from collections import defaultdict -from tempfile import TemporaryDirectory -from typing import Any, Dict, Optional, Set, Tuple, cast - -import geopandas as gpd -import numpy as np -import rasterio -from rasterio.enums import Resampling -from rasterio.merge import merge -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, Raster, RasterSequence, gen_guid -from vibe_lib.raster import FLOAT_COMPRESSION_KWARGS, INT_COMPRESSION_KWARGS - -FIELDS = ("crs", "dtype", "count") -RESOLUTION_METHODS = { - "equal": None, - "average": lambda resolutions: tuple(np.mean(resolutions, axis=0)), - "lowest": lambda resolutions: tuple(np.min(resolutions, axis=0)), - "highest": lambda resolutions: tuple(np.max(resolutions, axis=0)), -} -LOGGER = logging.getLogger(__name__) - - -def get_resolution( - raster_sequence: RasterSequence, resolution_method: str -) -> Optional[Tuple[float, float]]: - resolutions = [] - for r in raster_sequence.get_ordered_assets(): - with rasterio.open(r.url) as src: - resolutions.append((src.res[0], src.res[1])) - - if resolution_method == "equal": - if len(set(resolutions)) > 1: - raise ValueError( - "Found multiple resolutions when merging RasterSequence, " - "but expected all resolutions to be equal." - ) - return None - elif resolution_method in ["average", "lowest", "highest"]: - if len(set(resolutions)) > 1: - LOGGER.warning( - "Found multiple resolutions when merging RasterSequence, " - f"using the {resolution_method} of {len(resolutions)} resolutions." - ) - return cast(Tuple[float, float], RESOLUTION_METHODS[resolution_method](resolutions)) - else: - raise ValueError( - f"Expected resolution method to be in {list(RESOLUTION_METHODS.keys())}. " - f"Found {resolution_method}." - ) - - -def merge_rasters( - raster_sequence: RasterSequence, output_dir: str, resampling: Resampling, resolution: str -) -> Dict[str, Raster]: - out_id = gen_guid() - file_path = os.path.join(output_dir, f"{out_id}.tif") - # All rasters should have the same CRS - assets_meta: Dict[str, Set[Any]] = defaultdict(set) - for r in raster_sequence.get_ordered_assets(): - with rasterio.open(r.url) as src: - for field in FIELDS: - assets_meta[field].add(src.meta[field]) - for field, field_set in assets_meta.items(): - if len(field_set) > 1: - raise ValueError( - f"Expected all rasters in RasterSequence to have the same '{field}', " - f"found {field_set}" - ) - crs = assets_meta["crs"].pop() - dtype = assets_meta["dtype"].pop() - - compression_kwargs = ( - INT_COMPRESSION_KWARGS if np.issubdtype(dtype, np.integer) else FLOAT_COMPRESSION_KWARGS - ) - if not (np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.floating)): - ValueError(f"Expected raster with int or float subtype, found {dtype}") - - bounds = tuple( - gpd.GeoSeries(shpg.shape(raster_sequence.geometry), crs="epsg:4326") - .to_crs(crs) - .bounds.iloc[0] - ) - - merge( - [i.url for i in raster_sequence.get_ordered_assets()], - bounds=bounds, - res=get_resolution(raster_sequence, resolution), - resampling=resampling, - dst_path=file_path, - dst_kwds=compression_kwargs, - ) - - if not os.path.exists(file_path): - raise FileNotFoundError(f"Merged raster not found in {file_path}.") - - asset = AssetVibe(reference=file_path, type=mimetypes.types_map[".tif"], id=out_id) - product = Raster.clone_from(raster_sequence, id=gen_guid(), assets=[asset]) - return {"raster": product} - - -class CallbackBuilder: - def __init__(self, resampling: str, resolution: str): - self.tmp_dir = TemporaryDirectory() - self.resampling = Resampling[resampling] - self.resolution = resolution - - def __call__(self): - def callback(raster_sequence: RasterSequence): - return merge_rasters( - raster_sequence, - output_dir=self.tmp_dir.name, - resampling=self.resampling, - resolution=self.resolution, - ) - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/merge_rasters/merge_rasters.yaml b/ops/merge_rasters/merge_rasters.yaml deleted file mode 100644 index 4b95cdbf..00000000 --- a/ops/merge_rasters/merge_rasters.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Merge rasters in a sequence to a single raster -# All rasters in the sequence should have the same CRS and dtype -name: merge_rasters -inputs: - raster_sequence: RasterSequence -output: - raster: Raster -parameters: - resampling: bilinear - resolution: equal -entrypoint: - file: merge_rasters.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - resampling - - resolution -description: - short_description: Merges rasters in a sequence to a single raster. - parameters: - resampling: - Resampling method used to reproject the rasters to a common CRS. - resolution: >- - Determines how the resolution of the output raster is defined. One of 'equal' (breaks if the - resolution of the sequence rasters are not the same), 'lowest' (uses the lowest resolution - among rasters), 'highest' (uses the highest resolution among rasters), or 'average' (averages - the resolution of all rasters in the sequence). \ No newline at end of file diff --git a/ops/merge_sentinel1_orbits/merge_sentinel1.py b/ops/merge_sentinel1_orbits/merge_sentinel1.py deleted file mode 100644 index 8d588b4e..00000000 --- a/ops/merge_sentinel1_orbits/merge_sentinel1.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Tuple - -import geopandas as gpd -import rasterio -from rasterio.enums import Resampling -from rasterio.merge import merge -from rasterio.vrt import WarpedVRT -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, Sentinel1Raster, Sentinel1RasterOrbitGroup, gen_guid -from vibe_lib.raster import FLOAT_COMPRESSION_KWARGS, tile_to_utm - - -def merge_rasters( - filepaths: List[str], - bounds: Tuple[float, float, float, float], - resampling: Resampling, - out_path: str, - **kwargs: Any, -): - src = [] - vrt = [] - try: - src = [rasterio.open(i) for i in filepaths] - vrt = [WarpedVRT(i, **kwargs) for i in src] - dst_kwds = FLOAT_COMPRESSION_KWARGS - dst_kwds["driver"] = "GTiff" - dst_kwds.update({"blockxsize": 512, "blockysize": 512}) - return merge( - vrt, bounds=bounds, resampling=resampling, dst_path=out_path, dst_kwds=dst_kwds - ) - finally: - for i in src + vrt: - i.close() # type:ignore - - -def process_orbit( - orbit_group: Sentinel1RasterOrbitGroup, output_dir: str, resampling: Resampling -) -> Sentinel1Raster: - out_id = gen_guid() - filepath = os.path.join(output_dir, f"{out_id}.tif") - geom = orbit_group.geometry - tile_id = orbit_group.tile_id - crs = f"epsg:{tile_to_utm(tile_id)}" - bounds = tuple( - gpd.GeoSeries(shpg.shape(geom), crs="epsg:4326").to_crs(crs).bounds.round().iloc[0] - ) - merge_rasters( - [i.url for i in orbit_group.get_ordered_assets()], - bounds=bounds, - resampling=resampling, - out_path=filepath, - crs=crs, - ) - - asset = AssetVibe(reference=filepath, type="image/tiff", id=out_id) - product = Sentinel1Raster.clone_from(orbit_group, id=gen_guid(), assets=[asset]) - return product - - -class CallbackBuilder: - def __init__(self, resampling: str): - self.tmp_dir = TemporaryDirectory() - self.resampling = Resampling[resampling] - - def __call__(self): - def callback( - raster_group: Sentinel1RasterOrbitGroup, - ) -> Dict[str, Sentinel1Raster]: - return { - "merged_product": process_orbit(raster_group, self.tmp_dir.name, self.resampling) - } - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/merge_sentinel1_orbits/merge_sentinel1_orbits.yaml b/ops/merge_sentinel1_orbits/merge_sentinel1_orbits.yaml deleted file mode 100644 index 4641c48d..00000000 --- a/ops/merge_sentinel1_orbits/merge_sentinel1_orbits.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: merge_sentinel1_orbits -inputs: - raster_group: Sentinel1RasterOrbitGroup -output: - merged_product: Sentinel1Raster -parameters: - resampling: bilinear -entrypoint: - file: merge_sentinel1.py - callback_builder: CallbackBuilder -description: - short_description: - Merge items from the same absolute orbit into the appropriate MGRS (Sentinel-2 tiling system) - tile. - long_description: - The op will merge the items by reprojecting the data, if necessary, to the appropriate CRS and - then merging them using the order of the assets in the input as priority. - inputs: - raster_group: Rasters from the same orbit that will be merged. - output: - merged: Raster containing merged data. - parameters: - resampling: - How to resample the input data. See - https://rasterio.readthedocs.io/en/latest/api/rasterio.enums.html#rasterio.enums.Resampling - for more information on available sampling methods. diff --git a/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py b/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py deleted file mode 100644 index 244771fc..00000000 --- a/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Dict, List, Union - -from rasterio.merge import merge - -from vibe_core.data import ( - AssetVibe, - Sentinel2CloudMask, - Sentinel2CloudMaskOrbitGroup, - Sentinel2Raster, - Sentinel2RasterOrbitGroup, - gen_guid, -) -from vibe_core.uri import uri_to_filename - - -def merge_rasters(path_list: List[str], dst_dir: str) -> str: - filename = uri_to_filename(path_list[0]) - dst_path = os.path.join(dst_dir, filename) - # Rasterio is merging by keeping the first pixel while GDAL was keeping the - # last. There seems to be no advantage to either, but the new behavior is - # different. - merge(path_list, dst_path=dst_path, dst_kwds={"zstd_level": 9, "predictor": 2}) - return dst_path - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def merge_orbits( - raster_group: Sentinel2RasterOrbitGroup, mask_group: Sentinel2CloudMaskOrbitGroup - ) -> Dict[str, Union[Sentinel2Raster, Sentinel2CloudMask]]: - raster_list = [a.url for a in raster_group.get_ordered_assets()] - mask_list = [a.url for a in mask_group.get_ordered_assets()] - - if len(raster_list) > 1: - merged_img = merge_rasters(raster_list, self.tmp_dir.name) - merged_cloud = merge_rasters(mask_list, self.tmp_dir.name) - - raster_asset = AssetVibe( - reference=merged_img, type=mimetypes.types_map[".tif"], id=gen_guid() - ) - mask_asset = AssetVibe( - reference=merged_cloud, type=mimetypes.types_map[".tif"], id=gen_guid() - ) - else: - raster_asset = raster_group.get_ordered_assets()[0] - mask_asset = mask_group.get_ordered_assets()[0] - - # Update item geometry - new_raster = Sentinel2Raster.clone_from( - raster_group, - id=gen_guid(), - assets=[raster_asset], - ) - - new_mask = Sentinel2CloudMask.clone_from( - mask_group, - id=gen_guid(), - assets=[mask_asset], - ) - - return {"output_raster": new_raster, "output_mask": new_mask} - - return merge_orbits - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.yaml b/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.yaml deleted file mode 100644 index 3716d66d..00000000 --- a/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: merge_sentinel2_orbits -inputs: - raster_group: Sentinel2RasterOrbitGroup - mask_group: Sentinel2CloudMaskOrbitGroup -output: - output_raster: Sentinel2Raster - output_mask: Sentinel2CloudMask -parameters: -entrypoint: - file: merge_sentinel2_orbits.py - callback_builder: CallbackBuilder -description: - short_description: Combines raster files grouped by group_sentinel2_orbits into a single raster. \ No newline at end of file diff --git a/ops/minimum_samples/find_soil_sample_locations.py b/ops/minimum_samples/find_soil_sample_locations.py deleted file mode 100644 index ecad43a0..00000000 --- a/ops/minimum_samples/find_soil_sample_locations.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Tuple, cast - -import numpy as np -import rasterio -from geopandas import GeoDataFrame, GeoSeries, clip -from numpy._typing import NDArray -from rasterio.features import shapes, sieve -from rasterio.mask import mask -from shapely import geometry as shpg -from shapely.geometry import shape -from shapely.validation import make_valid -from sklearn.mixture import GaussianMixture - -from vibe_core.data import DataVibe, gen_hash_id -from vibe_core.data.core_types import AssetVibe, gen_guid -from vibe_core.data.rasters import Raster -from vibe_lib.archive import create_flat_archive - - -class CallbackBuilder: - def __init__(self, n_clusters: int, sieve_size: int): - self.temp_dir = [] - self.n_clusters = n_clusters - self.random_state = 45 - self.sieve_size = sieve_size - - def find_minimum_samples(self, raster: Raster, user_input: DataVibe) -> DataVibe: - self.geometry_mask = GeoSeries([shape(user_input.geometry)], crs="EPSG:4326") - # read input files - with rasterio.open(raster.raster_asset.url, "r") as r_obj: - p = self.geometry_mask.to_crs(r_obj.crs)[0] - ar, tr = mask(r_obj, [p], crop=True, nodata=0) - self.raster_crs = r_obj.crs - self.tr = tr - x = ar[0] - - asset_vibes = self.get_samples(x) - return DataVibe( - gen_hash_id("heatmap_nutrients", raster.geometry, raster.time_range), - raster.time_range, - raster.geometry, - asset_vibes, - ) - - def get_samples(self, x: NDArray[Any]) -> List[AssetVibe]: - model = self.train_model(x) - geo_clusters, geo_locations = self.inference(model=model, input=x) - asset_vibes = [] - asset_vibes.append(self.write_samples(geo_clusters, "geo_cluster_boundaries")) - asset_vibes.append(self.write_samples(geo_locations, "geo_sample_locations")) - return asset_vibes - - def train_model( - self, - input: NDArray[Any], - ): - x_ = input.reshape(-1, 1) - x_ = np.nan_to_num(x_) - model = GaussianMixture( - n_components=self.n_clusters, covariance_type="full", random_state=self.random_state - ) - model.fit(x_) - return model - - def inference( - self, - model: GaussianMixture, - input: NDArray[Any], - ) -> Tuple[GeoDataFrame, GeoDataFrame]: - # convert input to 2D array - x_ = input.reshape(-1, 1) - x_ = np.nan_to_num(x_) - - # predict clusters - d = model.predict(x_) - blocks = d.reshape(input.shape) - - # group small clusters - blocks = sieve(blocks.astype(np.uint8), self.sieve_size) - - # converting clusters generated to a GeoDataFrame - out = [] - for segment in range(self.n_clusters): - polygons = (blocks == segment).astype(np.uint8) - geoms = [ - make_valid(shpg.shape(s)) - for s, _ in shapes(polygons, mask=polygons, transform=self.tr) - ] - out.extend(geoms) - - if len(out) > 0: - # get lat lon of center of each polygon, the center will be inside the polygon - gdf = GeoDataFrame(data=out, columns=["geometry"], crs=self.raster_crs) # type: ignore - gdf = cast(GeoDataFrame, gdf.to_crs("EPSG:4326")) - gdf = cast(GeoDataFrame, clip(gdf, self.geometry_mask, keep_geom_type=True)) - - if gdf is not None and not gdf.empty: - gdf_locations = gdf.geometry.representative_point() - return (gdf, gdf_locations) - - raise RuntimeError("No samples found") - - def write_samples(self, geo_df: GeoDataFrame, geo_type: str) -> AssetVibe: - temp_d = TemporaryDirectory() - output_path = os.path.join(temp_d.name, f"minimum_samples_location_{geo_df.shape[0]}.shp") - geo_df.to_file(output_path) - self.temp_dir.append(temp_d) - - # Create zip archive containing all output - archive_path = create_flat_archive(temp_d.name, geo_type) - return AssetVibe(reference=archive_path, type="application/zip", id=gen_guid()) - - def __call__(self): - def find_minimum_samples_init(raster: Raster, user_input: DataVibe) -> Dict[str, DataVibe]: - out_vibe = self.find_minimum_samples(raster, user_input) - return {"locations": out_vibe} - - return find_minimum_samples_init - - def __del__(self): - for temp_d in self.temp_dir: - temp_d.cleanup() diff --git a/ops/minimum_samples/find_soil_sample_locations.yaml b/ops/minimum_samples/find_soil_sample_locations.yaml deleted file mode 100644 index 018c9beb..00000000 --- a/ops/minimum_samples/find_soil_sample_locations.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: find_soil_samples -inputs: - raster: Raster - user_input: DataVibe -output: - locations: DataVibe -parameters: - n_clusters: 1 - sieve_size: 1 -entrypoint: - callback_builder: CallbackBuilder - file: find_soil_sample_locations.py -dependencies: - parameters: - - n_clusters - - sieve_size -description: - short_description: - Find minimum soil sample locations by grouping indices values that are derived from - satellite or spaceEye imagery bands. diff --git a/ops/minimum_samples/test_soil_sample_heatmap.py b/ops/minimum_samples/test_soil_sample_heatmap.py deleted file mode 100644 index aec1323a..00000000 --- a/ops/minimum_samples/test_soil_sample_heatmap.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import time -from datetime import datetime -from typing import Any, Dict, Union, cast - -import geopandas as gpd -import pytest -from shapely import geometry as shpg -from shapely.geometry import MultiPolygon, Polygon - -from vibe_core.client import FarmvibesAiClient, get_default_vibe_client -from vibe_core.data import DataVibe -from vibe_core.data.rasters import Raster -from vibe_dev.testing.op_tester import OpTester - -FAKE_TIME_RANGE = (datetime(2022, 6, 30), datetime(2022, 7, 2)) -CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "find_soil_sample_locations.yaml" -) - - -@pytest.fixture -def vibe_client(): - return get_default_vibe_client() - - -@pytest.fixture -def vibe_geometry_dict() -> Dict[str, Any]: - farm_boundary = "op_resources/nutrients/long_block_boundary.geojson" - data_frame = gpd.read_file(farm_boundary, crs="EPSG:32611").to_crs("EPSG:4326") # type: ignore - geometry = shpg.mapping(data_frame["geometry"][0]) # type: ignore - return geometry - - -@pytest.fixture -def vibe_geometry_shapely() -> Union[MultiPolygon, Polygon]: - farm_boundary = "op_resources/heatmap_sensor/sensor_farm_boundary.geojson" - data_frame = gpd.read_file(farm_boundary) - if not data_frame.empty: - geometry = data_frame["geometry"][0] # type: ignore - return cast(MultiPolygon, geometry) - - raise RuntimeError("Geometry is None") - - -@pytest.fixture -def download_sentinel_cluster( - vibe_client: FarmvibesAiClient, vibe_geometry_shapely: Union[MultiPolygon, Polygon] -) -> Raster: - run = vibe_client.run( - workflow="data_ingestion/sentinel2/preprocess_s2", - name="sentinel2_example", - geometry=vibe_geometry_shapely, - time_range=FAKE_TIME_RANGE, - ) - - while run is None or run.status == "running" or run.status == "pending": - continue - time.sleep(5) - if run.status == "done": - obj: Raster = run.output["raster"][0] # type: ignore - return obj - - raise RuntimeError("Download Raster request failed") - - -@pytest.fixture -def download_index_cluster( - vibe_client: FarmvibesAiClient, download_sentinel_cluster: Raster, index: str -) -> Raster: - parameters = {"index": index} - - run = vibe_client.run( - workflow="data_processing/index/index", - name="EVI_example", - input_data=download_sentinel_cluster, - parameters=parameters, - ) - - while run.status == "running" or run.status == "pending": - continue - time.sleep(5) - if run.status == "done": - obj: Raster = run.output["index_raster"][0] # type: ignore - return obj - - raise RuntimeError("Download Raster request failed") - - -@pytest.fixture -def data_vibe(vibe_geometry_dict: Dict[str, Any]): - id = str(hash("test_minimums_samples")) - return DataVibe(id, FAKE_TIME_RANGE, vibe_geometry_dict, []) - - -@pytest.mark.skip(reason="Dependent on the cluster") -@pytest.mark.parametrize("index", ["evi"]) -def test_minimum_samples(download_index_cluster: Raster, data_vibe: DataVibe): - op_ = OpTester(CONFIG_PATH) - parameters = { - "n_clusters": 5, - "sieve_size": 2, - } - op_.update_parameters(parameters) - output_data = op_.run(raster=download_index_cluster, user_input=data_vibe) - - # Get op result - assert "locations" in output_data diff --git a/ops/ordinal_trend_test/ordinal_trend_test.py b/ops/ordinal_trend_test/ordinal_trend_test.py deleted file mode 100644 index 29b9c2dd..00000000 --- a/ops/ordinal_trend_test/ordinal_trend_test.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime as dt -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Tuple - -import numpy as np -import pandas as pd -from numpy._typing import NDArray -from scipy.stats import norm - -from vibe_core.data import AssetVibe, OrdinalTrendTest, RasterPixelCount, gen_guid - -NODATA = None -DATE_FORMAT = "%Y/%m/%d" - - -def cochran_armitage_trend_test(contingency_table: NDArray[Any]) -> Tuple[float, float]: - contingency_table = np.array(contingency_table) - - row_sums = np.sum(contingency_table, axis=1) - column_sums = np.sum(contingency_table, axis=0) - total = np.sum(row_sums) - - row_weights = np.arange(contingency_table.shape[0]) - column_weights = np.arange(contingency_table.shape[1]) - - # Expected value - col_inner = np.inner(column_weights, column_sums) - row_inner = np.inner(row_weights, row_sums) - expected = col_inner * row_inner / total - - # Statistics - statistic = np.inner(row_weights, np.inner(contingency_table, column_weights)) - - # Theorical background can be found here: - # https://real-statistics.com/chi-square-and-f-distributions/cochran-armitage-test/ - # https://doi.org/10.1002/0471249688.ch5 - variance_numerator = np.inner(row_weights**2, row_sums) - row_inner**2 / total - variance_numerator *= np.inner(column_weights**2, column_sums) - col_inner**2 / total - variance = variance_numerator / (total - 1) - - z_score = (statistic - expected) / np.sqrt(variance) - p_value = 2 * norm.cdf(-np.abs(z_score)) - - return float(p_value), float(z_score) - - -def load_contingency_table(pixel_counts: List[RasterPixelCount]) -> pd.DataFrame: - columns = [] - for pixel_count in pixel_counts: - columns.append(np.loadtxt(pixel_count.assets[0].path_or_url, delimiter=",", skiprows=1)) - - # Return the unique values for the existing pixels - unique_values = np.unique(np.concatenate(columns, axis=0)[:, 0]) - contingency_table = pd.DataFrame(index=unique_values) - - for pixel_count, column in zip(pixel_counts, columns): - contingency_table[pixel_count.id] = pd.Series(column[:, 1], index=column[:, 0]) - - return contingency_table.fillna(0) - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def callback(pixel_count: List[RasterPixelCount]) -> Dict[str, OrdinalTrendTest]: - if len(pixel_count) < 2: - raise ValueError("Ordinal trend test requires at least pixel count from 2 rasters.") - - # Order the pixel counts by using the first date in time_range - pixel_count = sorted(pixel_count, key=lambda x: x.time_range[0]) - - time_ranges = [ - f"{dt.strftime(r.time_range[0], DATE_FORMAT)}-" - f"{dt.strftime(r.time_range[1], DATE_FORMAT)}" - for r in pixel_count - ] - - # Calculate the min and max dates for the rasters - min_date = min([r.time_range[0] for r in pixel_count]) - max_date = max([r.time_range[1] for r in pixel_count]) - - contingency_table = load_contingency_table(pixel_count) - p_value, z_score = cochran_armitage_trend_test(contingency_table.values) - - contingency_table.index.name = "category" - contingency_table.columns = time_ranges # type: ignore - - guid = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") - contingency_table.to_csv(filepath) - - ordinal_trend_result = OrdinalTrendTest( - gen_guid(), - time_range=(min_date, max_date), - geometry=pixel_count[0].geometry, - assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], - p_value=p_value, - z_score=z_score, - ) - - return {"ordinal_trend_result": ordinal_trend_result} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/ordinal_trend_test/ordinal_trend_test.yaml b/ops/ordinal_trend_test/ordinal_trend_test.yaml deleted file mode 100644 index abfded46..00000000 --- a/ops/ordinal_trend_test/ordinal_trend_test.yaml +++ /dev/null @@ -1,24 +0,0 @@ -name: ordinal_trend_test -inputs: - pixel_count: List[RasterPixelCount] -output: - ordinal_trend_result: OrdinalTrendTest -parameters: -entrypoint: - file: ordinal_trend_test.py - callback_builder: CallbackBuilder -description: - short_description: Detects increase/decrease trends over a list of Rasters. - long_description: - Performs a cochran-armitage trend test over a list of rasters. The test - determines if there is an increasing/decreasing trend in the pixel levels - over the list of rasters. For instance, if the ordinal raster represents the - presence of forest in a given area (e.g., 0 - Non-forest, 1- Forest, 2- - Dense Forest), the test will determine if the forest is increasing or - decreasing over the sequence of rasters. The null hypothesis is that there - is no trend in the pixel levels over the list of rasters. The alternative - hypothesis is that there is a trend in the pixel levels over the list of - rasters. The test returns a p-value and a z-score. If the p-value is less - than some significance level, the null hypothesis is rejected and the - alternative hypothesis is accepted. If the z-score is positive, the trend - is increasing. If the z-score is negative, the trend is decreasing. diff --git a/ops/ordinal_trend_test/test_ordinal_trend.py b/ops/ordinal_trend_test/test_ordinal_trend.py deleted file mode 100644 index b7ea1658..00000000 --- a/ops/ordinal_trend_test/test_ordinal_trend.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Any, cast - -import numpy as np -import pytest -from numpy._typing import NDArray -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, OrdinalTrendTest, RasterPixelCount -from vibe_dev.testing.op_tester import OpTester - -SIGNIFICANCE_LEVEL = 0.05 -CONFIG_PATH = os.path.join(os.path.dirname(__file__), "ordinal_trend_test.yaml") -CSV_HEADER = "unique_values,counts" - - -@pytest.fixture -def tmp_dir(): - _tmp_dir = TemporaryDirectory() - yield _tmp_dir.name - _tmp_dir.cleanup() - - -def fake_raster_pixel_count( - tmp_dir: str, pixel_id: str, fake_stack_data: NDArray[Any] -) -> RasterPixelCount: - file_path = os.path.join(tmp_dir, f"{pixel_id}.csv") - time_range = (datetime(2023, 1, 1), datetime(2023, 12, 31)) - np.savetxt(file_path, fake_stack_data, delimiter=",", fmt="%d", comments="", header=CSV_HEADER) - - return RasterPixelCount( - id=pixel_id, - time_range=time_range, - geometry=shpg.mapping(shpg.box(0, 0, 0, 0)), - assets=[AssetVibe(reference=file_path, type="text/csv", id="fake_asset_id")], - ) - - -@pytest.fixture -def fake_pixel_count0(tmp_dir: str) -> RasterPixelCount: - stack_data = np.column_stack(([0, 1, 2], [3, 3, 3])) - return fake_raster_pixel_count(tmp_dir, "pixel_id_0", stack_data) - - -@pytest.fixture -def fake_pixel_count1(tmp_dir: str) -> RasterPixelCount: - stack_data = np.column_stack(([0, 1, 2], [3, 3, 3])) - return fake_raster_pixel_count(tmp_dir, "pixel_id_1", stack_data) - - -@pytest.fixture -def fake_pixel_count2(tmp_dir: str) -> RasterPixelCount: - stack_data = np.column_stack(([0, 1, 2], [0, 1, 8])) - return fake_raster_pixel_count(tmp_dir, "pixel_id_2", stack_data) - - -def test_ordinal_trend_no_change( - fake_pixel_count0: RasterPixelCount, fake_pixel_count1: RasterPixelCount -): - op = OpTester(CONFIG_PATH) - output = op.run(pixel_count=[fake_pixel_count0, fake_pixel_count1]) - assert output - assert "ordinal_trend_result" in output - - ordinal_trend_result = output["ordinal_trend_result"] - ordinal_trend_result = cast(OrdinalTrendTest, ordinal_trend_result) - assert ordinal_trend_result.p_value == 1 - assert ordinal_trend_result.z_score == 0 - - -def test_ordinal_trend_increase( - fake_pixel_count0: RasterPixelCount, fake_pixel_count2: RasterPixelCount -): - op = OpTester(CONFIG_PATH) - output = op.run(pixel_count=[fake_pixel_count0, fake_pixel_count2]) - assert output - assert "ordinal_trend_result" in output - - ordinal_trend_result = output["ordinal_trend_result"] - ordinal_trend_result = cast(OrdinalTrendTest, ordinal_trend_result) - assert ordinal_trend_result.p_value < SIGNIFICANCE_LEVEL - assert ordinal_trend_result.z_score > 0 - - -def test_ordinal_trend_decrease( - fake_pixel_count2: RasterPixelCount, fake_pixel_count0: RasterPixelCount -): - op = OpTester(CONFIG_PATH) - output = op.run(pixel_count=[fake_pixel_count2, fake_pixel_count0]) - assert output - assert "ordinal_trend_result" in output - - ordinal_trend_result = output["ordinal_trend_result"] - ordinal_trend_result = cast(OrdinalTrendTest, ordinal_trend_result) - assert ordinal_trend_result.p_value < SIGNIFICANCE_LEVEL - assert ordinal_trend_result.z_score < 0 diff --git a/ops/pair_intersecting_rasters/pair_intersecting_rasters.py b/ops/pair_intersecting_rasters/pair_intersecting_rasters.py deleted file mode 100644 index b26291ec..00000000 --- a/ops/pair_intersecting_rasters/pair_intersecting_rasters.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict, List, Union - -from shapely import geometry as shpg - -from vibe_core.data import Raster - - -def callback( - rasters1: List[Raster], rasters2: List[Raster] -) -> Dict[str, Union[List[Raster], List[Raster]]]: - paired_rasters1 = [] - paired_rasters2 = [] - for r1 in rasters1: - geom_n = shpg.shape(r1.geometry) - for r2 in rasters2: - geom_d = shpg.shape(r2.geometry) - if geom_n.intersects(geom_d): - paired_rasters1.append(r1) - paired_rasters2.append(r2) - - if not paired_rasters1: - raise ValueError("No intersecting rasters could be paired") - return {"paired_rasters1": paired_rasters1, "paired_rasters2": paired_rasters2} - - -def callback_builder(): - return callback diff --git a/ops/pair_intersecting_rasters/pair_intersecting_rasters.yaml b/ops/pair_intersecting_rasters/pair_intersecting_rasters.yaml deleted file mode 100644 index a5f94ec1..00000000 --- a/ops/pair_intersecting_rasters/pair_intersecting_rasters.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: pair_intersecting_rasters -inputs: - rasters1: List[Raster] - rasters2: List[Raster] -output: - paired_rasters1: "@INHERIT(rasters1)" - paired_rasters2: "@INHERIT(rasters2)" -parameters: -entrypoint: - file: pair_intersecting_rasters.py - callback_builder: callback_builder -description: - short_description: Creates pairs of rasters with intersecting geometries between two input lists of Raster. \ No newline at end of file diff --git a/ops/price_airbus_products/price_airbus.py b/ops/price_airbus_products/price_airbus.py deleted file mode 100644 index 67ee4610..00000000 --- a/ops/price_airbus_products/price_airbus.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Dict, List - -from shapely import geometry as shpg -from shapely.ops import unary_union - -from vibe_core.data import AirbusPrice, AirbusProduct, gen_guid -from vibe_lib.airbus import AirBusAPI, Constellation -from vibe_lib.geometry import norm_intersection - -AMOUNT_UNIT = "kB" - - -class CallbackBuilder: - def __init__(self, api_key: str, projected_crs: bool, iou_threshold: float): - self.api_key = api_key - self.projected_crs = projected_crs - self.iou_thr = iou_threshold - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def price_product(api: AirBusAPI, product: AirbusProduct) -> float: - geom = shpg.shape(product.geometry) - owned = api.query_owned(geom, product.acquisition_id) - owned = sorted( - owned, - key=lambda o: norm_intersection(geom, shpg.shape(o["geometry"])), - reverse=True, - ) - if ( - not owned - or norm_intersection(geom, shpg.shape(owned[0]["geometry"])) < self.iou_thr - ): - # We choose the envelope to avoid having images with a lot of nodata in the library - quote = api.get_price([product.extra_info["id"]], geom.envelope)["price"] - if quote["amountUnit"] != AMOUNT_UNIT: - raise ValueError(f"Expected amount in kB, got {quote['amountUnit']}") - return quote["amount"] - return 0 # We already have it so price is 0 - - def price_products( - airbus_products: List[AirbusProduct], - ) -> Dict[str, AirbusPrice]: - api = AirBusAPI(self.api_key, self.projected_crs, [c for c in Constellation]) - total_price = sum(price_product(api, p) for p in airbus_products) - print(total_price) - date = datetime.now() - geom = unary_union([shpg.shape(p.geometry) for p in airbus_products]) - return { - "products_price": AirbusPrice( - id=gen_guid(), - time_range=(date, date), - geometry=shpg.mapping(geom), - assets=[], - price=total_price, - ) - } - - return price_products diff --git a/ops/price_airbus_products/price_airbus_products.yaml b/ops/price_airbus_products/price_airbus_products.yaml deleted file mode 100644 index 74269a09..00000000 --- a/ops/price_airbus_products/price_airbus_products.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: price_airbus_products -inputs: - airbus_products: List[AirbusProduct] -output: - products_price: AirbusPrice -parameters: - api_key: "@SECRET(eywa-secrets, msr-airbus-api)" - projected_crs: true - iou_threshold: .95 -entrypoint: - file: price_airbus.py - callback_builder: CallbackBuilder -description: - short_description: - Calculates the aggregate price (in kB) for selected AirBus images, - discounting images already in the user's library. \ No newline at end of file diff --git a/ops/protlearn/protlearn.py b/ops/protlearn/protlearn.py deleted file mode 100644 index e4932873..00000000 --- a/ops/protlearn/protlearn.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Dict, List, Union, cast - -import pandas as pd -from protlearn.features import aaindex1 -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, FoodFeatures, FoodVibe, ProteinSequence, gen_guid - -PROTLEARN_FEAT_LIST: List[str] = [ - "JOND750102_2nd", - "GEOR030105_1st", - "JOND920102_2nd", - "HOPA770101_1st", - "WERD780102_2nd", - "FUKS010109_1st", -] - -NUTRITIONAL_INFORMATION: List[str] = [ - "Dietary Fiber", - "Magnesium", - "Potassium", - "Manganese", - "Zinc", - "Iron", - "Copper", - "Protein", - "TRP", - "THR", - "ILE", - "LEU", - "LYS", - "MET", - "CYS", - "PHE", - "TYR", - "VAL", - "ARG", - "HIS", -] - -PROTEIN_INFORMATION: List[str] = ["1st family", "2nd family", "3rd family", "Food group"] - -FOOD_GROUP_ID: Dict[str, int] = { - "Cereal & cereal products": 1, - "Roots & tubers": 2, - "Legumes & oilseeds": 3, - "Oil byproducts": 4, - "Fish & fish products": 5, - "Animal products": 6, - "Milk products": 7, - "Fruits & vegetable products": 8, - "Others": 9, - "Plant based ": 10, - "Mixed food (animal + cereal product)": 11, - "Mixed food (plant based)": 12, - "Mixed food (cereal + legume)": 13, - "Mixed food (cereal + animal product)": 14, -} - -PROTEIN_FAMILY_ID: Dict[str, int] = { - "": 0, - "GLOBULIN": 1, - "ALBUMIN": 2, - "ALBUMINS": 2, - "OVALBUMIN": 3, - "OVOTRANSFERRIN": 4, - "OVOMUCOID": 5, - "CASEIN": 6, - "GLYCININ": 7, - "CONGLYCININ": 8, - "GLUTELIN": 9, - "GLIADINS": 10, - "ZEIN": 11, - "PROLAMIN": 12, - "MYOSIN": 13, - "MYOGLOBIN": 14, - "PATATIN": 15, - "LECTIN": 16, - "LEGUMIN": 17, - "OTHER": 18, -} - - -def encode_str(id_dict: Dict[str, int], val: Union[str, str]): - if not val.strip(): - return 0 - - try: - encoded_id = id_dict[val] - except KeyError: - encoded_id = 18 - - return encoded_id - - -def filter_protlearn_shap(protlearn_feats: pd.DataFrame): - return protlearn_feats.filter(PROTLEARN_FEAT_LIST) - - -def extracting_protlearn(aminoacids1: str, aminoacids2: str, aminoacids3: str): - """ - Reads in the aminoacid sequences from the fasta files - Returns a dataframe with the Aaindex features obtained using protlearn package - """ - aminoacids1 = aminoacids1[aminoacids1.rindex(" ") + 1 :] - - aaind1, inds1 = aaindex1(aminoacids1, standardize="zscore") # type: ignore - first = pd.DataFrame(aaind1, columns=inds1) # type: ignore - first = first.add_suffix("_1st") - aminoacids2 = aminoacids2[aminoacids2.rindex(" ") + 1 :] - - try: - aaind2, inds2 = aaindex1(aminoacids2, standardize="zscore") # type: ignore - except ValueError: - aaind2 = 0 - second = pd.DataFrame(aaind2, index=range(1), columns=inds1) # type: ignore - second = second.add_suffix("_2nd") - aminoacids3 = aminoacids3[aminoacids3.rindex(" ") + 1 :] - - try: - aaind3, indes3 = aaindex1(aminoacids3, standardize="zscore") # type: ignore - except ValueError: - aaind3 = 0 - third = pd.DataFrame(aaind3, index=range(1), columns=inds1) # type: ignore - third = third.add_suffix("_3rd") - aaindex_feats = pd.concat([first, second, third], axis=1) - return aaindex_feats - - -def read_protein(protein_df: pd.DataFrame): - protein_list = protein_df["protein_list"] - assert protein_list is not None, "Protein list column is missing" - - fasta_sequence0 = str(protein_list[0]) - - try: - fasta_sequence1 = str(protein_list[1]) - except KeyError: - fasta_sequence1 = " " - - try: - fasta_sequence2 = str(protein_list[2]) - except KeyError: - fasta_sequence2 = " " - - return fasta_sequence0, fasta_sequence1, fasta_sequence2 - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def protlearn_callback( - food_item: FoodVibe, protein_sequence: ProteinSequence - ) -> Dict[str, FoodFeatures]: - protein_df = cast( - pd.DataFrame, pd.read_csv(protein_sequence.assets[0].path_or_url, index_col=0) - ).reset_index() - - fasta_sequence0, fasta_sequence1, fasta_sequence2 = read_protein(protein_df) - - aaindex_feats = extracting_protlearn( - fasta_sequence0, - fasta_sequence1, - fasta_sequence2, - ) - - nutritional_data = [ - food_item.dietary_fiber, - food_item.magnesium, - food_item.potassium, - food_item.manganese, - food_item.zinc, - food_item.iron, - food_item.copper, - food_item.protein, - food_item.trp, - food_item.thr, - food_item.ile, - food_item.leu, - food_item.lys, - food_item.met, - food_item.cys, - food_item.phe, - food_item.tyr, - food_item.val, - food_item.arg, - food_item.his, - ] - - protein_family_food_type = [ - encode_str(PROTEIN_FAMILY_ID, food_item.protein_families[0]), - encode_str(PROTEIN_FAMILY_ID, food_item.protein_families[1]), - encode_str(PROTEIN_FAMILY_ID, food_item.protein_families[2]), - encode_str(FOOD_GROUP_ID, food_item.food_group), - ] - - nutritional_data_df = pd.DataFrame(nutritional_data, index=NUTRITIONAL_INFORMATION) - protein_family_df = pd.DataFrame(protein_family_food_type, index=PROTEIN_INFORMATION) - - protlearn_df = filter_protlearn_shap(aaindex_feats) - - df = pd.concat([nutritional_data_df.T, protlearn_df, protein_family_df.T], axis=1) - - guid = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") - df.to_csv(filepath, index=False) - - food_features = FoodFeatures( - gen_guid(), - time_range=(datetime.now(), datetime.now()), # these are just placeholders - geometry=shpg.mapping(shpg.Point(0, 0)), # this location is a placeholder - assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], - ) - - return {"food_features": food_features} - - return protlearn_callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/protlearn/protlearn.yaml b/ops/protlearn/protlearn.yaml deleted file mode 100644 index 7979a200..00000000 --- a/ops/protlearn/protlearn.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: protlearn -inputs: - food_item: FoodVibe - protein_sequence: ProteinSequence -output: - food_features: FoodFeatures -parameters: -entrypoint: - file: protlearn.py - callback_builder: CallbackBuilder diff --git a/ops/read_grib_forecast/read_grib_forecast.py b/ops/read_grib_forecast/read_grib_forecast.py deleted file mode 100644 index ca5595a9..00000000 --- a/ops/read_grib_forecast/read_grib_forecast.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Dict, List - -import xarray as xr -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, DataVibe, GfsForecast, gen_forecast_time_hash_id, gen_guid - - -def parse_grib_file(grib_file: str, lat: float, lon: float, output_dir: str) -> AssetVibe: - """Extracts the local data from a global forecast. - - Args: - grib_file: the path to the grib file for the given time of interest - lat: the latitude of the forecast [-90, 90] - lon: the longitude of the forecast [-180, 180] - output_dir: directory in which to save csv data for this forecast - - Returns: - VibeAsset containging the forecast for the time and location specified - """ - # GFS stores longitude in a range from 0-360 - # compute unsigned value from [-180,180] scale - gfs_lon = (lon + 360) % 360 - - keys = {"typeOfLevel": "surface"} - if not grib_file.endswith("f000.grib"): - keys["stepType"] = "instant" - - ds = xr.load_dataset(grib_file, engine="cfgrib", filter_by_keys=keys) - forecast = ds.sel(latitude=lat, longitude=gfs_lon, method="nearest") - - data_file = "{file}_{lat}_{lon}.csv".format(file=grib_file[:-5], lat=lat, lon=lon) - - file_path = os.path.join(output_dir, data_file) - - with open(file_path, "w") as forecast_file: - forecast_file.write(forecast.to_pandas().to_csv()) # type: ignore - - return AssetVibe(reference=file_path, type=mimetypes.types_map[".csv"], id=gen_guid()) - - -class CallbackBuilder: - def __init__(self): - self.temp_dir = TemporaryDirectory() - - def __call__(self): - def read_forecast( - location: List[DataVibe], global_forecast: List[GfsForecast] - ) -> Dict[str, List[GfsForecast]]: - loc = location[0] - forecast_data = global_forecast[0] - # wkt format is (lon, lat) - lon, lat = shpg.shape(loc.geometry).centroid.coords[0] - grib_file = forecast_data.assets[0].local_path - forecast_asset = parse_grib_file( - grib_file=grib_file, lat=lat, lon=lon, output_dir=self.temp_dir.name - ) - - local_forecast = GfsForecast( - id=gen_forecast_time_hash_id( - "local_forecast", loc.geometry, forecast_data.publish_time, loc.time_range - ), - geometry=loc.geometry, - time_range=loc.time_range, - assets=[forecast_asset], - publish_time=forecast_data.publish_time, - ) - - output = {"local_forecast": [local_forecast]} - return output - - return read_forecast - - def __del__(self): - self.temp_dir.cleanup() diff --git a/ops/read_grib_forecast/read_grib_forecast.yaml b/ops/read_grib_forecast/read_grib_forecast.yaml deleted file mode 100644 index 00d80260..00000000 --- a/ops/read_grib_forecast/read_grib_forecast.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: read_forecast -inputs: - location: List[DataVibe] - global_forecast: List[GfsForecast] -output: - local_forecast: List[GfsForecast] -parameters: -entrypoint: - callback_builder: CallbackBuilder - file: read_grib_forecast.py -description: - short_description: Extracts the local data from a global forecast. \ No newline at end of file diff --git a/ops/recode_raster/recode_raster.py b/ops/recode_raster/recode_raster.py deleted file mode 100644 index 522611b8..00000000 --- a/ops/recode_raster/recode_raster.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from tempfile import TemporaryDirectory -from typing import Dict, List - -import numpy as np - -from vibe_core.data import Raster -from vibe_lib.raster import load_raster, save_raster_from_ref - - -class CallbackBuilder: - def __init__(self, from_values: List[float], to_values: List[float]): - self.tmp_dir = TemporaryDirectory() - - if len(from_values) != len(to_values): - raise ValueError( - f"'from_values' and 'to_values' must have the same length. " - f"Got {len(from_values)} and {len(to_values)}, respectively." - ) - - self.recode_map = dict(zip(from_values, to_values)) - - def __call__(self): - def callback(raster: Raster) -> Dict[str, Raster]: - data_ar = load_raster(raster) - - # Return the same pixel value if it is not in the recode map - transformed_ar = data_ar.copy( - data=np.vectorize(lambda x: self.recode_map.get(x, x))(data_ar) - ) - transformed_raster = save_raster_from_ref(transformed_ar, self.tmp_dir.name, raster) - - return {"recoded_raster": transformed_raster} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/recode_raster/recode_raster.yaml b/ops/recode_raster/recode_raster.yaml deleted file mode 100644 index b1ff2b78..00000000 --- a/ops/recode_raster/recode_raster.yaml +++ /dev/null @@ -1,30 +0,0 @@ -name: recode_raster -inputs: - raster: Raster -output: - recoded_raster: Raster -parameters: - from_values: - to_values: -entrypoint: - file: recode_raster.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - from_values - - to_values -description: - short_description: Recodes values of the input raster. - long_description: Receives a raster and two lists of values to recode the raster values. The first - list contains the values to recode from and the second list contains the values to recode to. - The lists must have the same length. For example, if the input raster has values (1, 2, 3, 11, 12) - and the from_values list is [1, 2, 3] and the to_values list is [3, 4, 5], the output raster - will have values (3, 4, 5, 11, 12). Observe the raster values that are not in the from_values list - remain unchanged. - sources: - raster: Raster to recode. - sinks: - recoded: Recoded raster. - parameters: - from_values: List of values to recode from. - to_values: List of values to recode to. \ No newline at end of file diff --git a/ops/recode_raster/test_recode_raster.py b/ops/recode_raster/test_recode_raster.py deleted file mode 100644 index 07645d6f..00000000 --- a/ops/recode_raster/test_recode_raster.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import cast - -import numpy as np -import pytest -import xarray as xr -from shapely import geometry as shpg - -from vibe_core.data import Raster -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.raster import load_raster, save_raster_to_asset - -CONFIG_PATH = os.path.join(os.path.dirname(__file__), "recode_raster.yaml") - - -@pytest.fixture -def tmp_dir(): - _tmp_dir = TemporaryDirectory() - yield _tmp_dir.name - _tmp_dir.cleanup() - - -@pytest.fixture -def fake_raster(tmp_dir: str): - nbands = 3 - x = 128 - y = 128 - - fake_data = np.random.randint(0, 4, size=(nbands, y, x)).astype(np.float32) - fake_da = xr.DataArray( - fake_data, - coords={"bands": np.arange(nbands), "x": np.linspace(0, 1, x), "y": np.linspace(0, 1, y)}, - dims=["bands", "y", "x"], - ) - fake_da.rio.write_crs("epsg:4326", inplace=True) - - asset = save_raster_to_asset(fake_da, tmp_dir) - return Raster( - id="fake_id", - time_range=(datetime(2023, 1, 1), datetime(2023, 1, 1)), - geometry=shpg.mapping(shpg.box(*fake_da.rio.bounds())), - assets=[asset], - bands={j: i for i, j in enumerate(["B1", "B2", "B3"])}, - ) - - -def test_recode_raster(fake_raster: Raster): - op = OpTester(CONFIG_PATH) - parameters = { - "from_values": [0, 1, 2, 3], - "to_values": [4, 5, 6, 7], - } - - op.update_parameters(parameters) - output = op.run(raster=fake_raster) - assert output - - raster = cast(Raster, output["recoded_raster"]) - raster_data = load_raster(raster) - fake_raster_data = load_raster(fake_raster) - - # Assert that the recoded raster has the same shape as the original - assert raster_data.shape == fake_raster_data.shape - # Assert fake_raster_data - raster values is always 4 - assert np.all(raster_data - fake_raster_data == 4) - - -def test_recode_not_mapped_values(fake_raster: Raster): - op = OpTester(CONFIG_PATH) - - parameters = { - "from_values": [10, 11, 12, 13], - "to_values": [-1, -2, -3, -4], - } - - op.update_parameters(parameters) - output = op.run(raster=fake_raster) - assert output - - raster = cast(Raster, output["recoded_raster"]) - raster_data = load_raster(raster) - fake_raster_data = load_raster(fake_raster) - - # Assert that the recoded raster has the same shape as the original - assert raster_data.shape == fake_raster_data.shape - - # Assert fake_raster_data and raster_data are the same - assert np.all(raster_data == fake_raster_data) - - # Assert raster_data has no negative values - assert np.all(raster_data >= 0) - - -def test_recode_raster_different_lengths(fake_raster: Raster): - op = OpTester(CONFIG_PATH) - parameters = { - "from_values": [0, 1, 2], - "to_values": [4, 5, 6, 7], - } - - op.update_parameters(parameters) - with pytest.raises(ValueError): - op.run(raster=fake_raster) diff --git a/ops/remove_clouds/remove_clouds.py b/ops/remove_clouds/remove_clouds.py deleted file mode 100644 index 6813c48c..00000000 --- a/ops/remove_clouds/remove_clouds.py +++ /dev/null @@ -1,282 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# pyright: reportUnknownMemberType=false -import logging -import os -from abc import abstractmethod -from datetime import datetime, timedelta -from tempfile import TemporaryDirectory -from typing import Dict, Optional, Union, cast - -import geopandas as gpd -import numpy as np -import onnxruntime as ort -import torch -import torch.nn as nn -from shapely import geometry as shpg -from torch.utils.data import DataLoader - -from vibe_core.data import AssetVibe, gen_guid -from vibe_core.data.sentinel import ( - S2ProcessingLevel, - Sentinel1RasterTileSequence, - Sentinel2CloudMaskTileSequence, - Sentinel2RasterTileSequence, - SpaceEyeRasterSequence, -) -from vibe_lib.raster import INT_COMPRESSION_KWARGS, compress_raster, write_window_to_file -from vibe_lib.spaceeye.dataset import Dims, SpaceEyeReader -from vibe_lib.spaceeye.illumination import add_illuminance -from vibe_lib.spaceeye.interpolation import DampedInterpolation -from vibe_lib.spaceeye.utils import QUANTIFICATION_VALUE, SPACEEYE_TO_SPYNDEX_BAND_NAMES - -S1_NUM_BANDS = 2 -S2_NUM_BANDS = 10 -L1C_BAND_INDICES = [1, 2, 3, 4, 5, 6, 7, 8, 11, 12] -L2A_BAND_INDICES = [1, 2, 3, 4, 5, 6, 7, 8, 10, 11] -FILENAME_TEMPLATE = "preds_{}.tif" - -LOGGER = logging.getLogger(__name__) - - -def get_filename(date: datetime) -> str: - return FILENAME_TEMPLATE.format(date.strftime("%Y%m%d")) - - -def remove_clouds( - model: Union[ort.InferenceSession, nn.Module], - dataset: SpaceEyeReader, - out_dir: str, - num_workers: int, -) -> SpaceEyeRasterSequence: - # TODO: Add meta to write_info dict - meta = { - "driver": "GTiff", - "height": dataset.height, - "width": dataset.width, - "count": S2_NUM_BANDS, - "crs": dataset.crs, - "dtype": "uint16", - "transform": dataset.transform, - "nodata": 0, - } - # Use batch size 1 - dataloader = DataLoader(dataset, collate_fn=lambda x: x, num_workers=num_workers) - total_chips = len(dataloader) - start_datetime = dataset.time_range[0] - for chip_idx, batch in enumerate(dataloader): - chip_data, write_info = batch[0] - t1, t2 = ( - (start_datetime + timedelta(days=t)).strftime("%Y-%m-%d") - for t in write_info["write_times"] - ) - write_window = write_info["write_window"] - (r1, r2), (c1, c2) = write_window.toranges() - LOGGER.info( - f"Running model for {t1}:{t2}, extent {r1}:{r2}, {c1}:{c2} " - f"({chip_idx + 1}/{total_chips})" - ) - inputs = {k: v[None] for k, v in chip_data.items() if k != "illuminance"} - with torch.inference_mode(): - if isinstance(model, nn.Module): - inputs = {k: torch.from_numpy(v) for k, v in inputs.items()} - s2 = cast(nn.Module, model)(inputs).numpy() - else: - s2 = cast(ort.InferenceSession, model).run(None, inputs)[0] - s2 = s2[0, :] - # Put illumination back - s2 = (add_illuminance(s2, chip_data["illuminance"]) * QUANTIFICATION_VALUE).astype( - np.uint16 - ) - chip_times, chip_rows, chip_cols = write_info["chip_slices"] - for write_t, chip_t in zip(range(*write_info["write_times"]), range(*chip_times)): - date = start_datetime + timedelta(days=write_t) - filename = get_filename(date) - filepath = os.path.join(out_dir, filename) - write_window_to_file( - s2[:, chip_t, slice(*chip_rows), slice(*chip_cols)], - None, - write_window, - filepath, - meta, - ) - - # Create a SpaceEyeRasterSequence with the sequence metadata - ref_sequence = dataset.s2_items - geom = shpg.mapping(gpd.GeoSeries(dataset.roi, crs=dataset.crs).to_crs("epsg:4326").iloc[0]) - spaceeye_sequence = SpaceEyeRasterSequence.clone_from( - ref_sequence, - assets=[], - id=gen_guid(), - geometry=geom, - time_range=dataset.time_range, - bands={name: idx for idx, name in enumerate(SPACEEYE_TO_SPYNDEX_BAND_NAMES.values())}, - ) - - geom = shpg.shape(geom) - - # Add each raster asset to the sequence - for time_idx in range(dataset.time_length): - date = start_datetime + timedelta(days=time_idx) - filename = get_filename(date) - filepath = os.path.join(out_dir, filename) - # Skip file if no predictions were made (not enough data) - if not os.path.exists(filepath): - continue - guid = gen_guid() - out_path = os.path.join(out_dir, f"{guid}.tif") - LOGGER.info(f"Compressing raster for {date.strftime('%Y-%m-%d')}") - compress_raster(filepath, out_path, **INT_COMPRESSION_KWARGS) - asset = AssetVibe(reference=out_path, type="image/tiff", id=guid) - spaceeye_sequence.add_asset(asset, (date, date), geom) - - return spaceeye_sequence - - -class CallbackBuilder: - def __init__( - self, - duration: int, - window_size: int, - spatial_overlap: float, - min_clear_ratio: float, - normalize_illuminance: bool, - num_workers: int, - ): - self.duration = duration - self.window_size = window_size - self.spatial_overlap = spatial_overlap - self.min_clear_ratio = min_clear_ratio - self.normalize_illuminance = normalize_illuminance - self.num_workers = num_workers - self.tmp_dir = TemporaryDirectory() - - def get_dataset( - self, - s1_products: Optional[Sentinel1RasterTileSequence], - s2_products: Sentinel2RasterTileSequence, - cloud_masks: Sentinel2CloudMaskTileSequence, - ) -> SpaceEyeReader: - s2_bands = ( - L1C_BAND_INDICES - if s2_products.processing_level == S2ProcessingLevel.L1C - else L2A_BAND_INDICES - ) - sequence_geom = shpg.shape(s2_products.geometry) - sequence_time_range = s2_products.time_range - dataset = SpaceEyeReader( - s1_items=s1_products, - s2_items=s2_products, - cloud_masks=cloud_masks, - time_range=sequence_time_range, - geometry=sequence_geom, - chip_size=Dims(width=self.window_size, height=self.window_size, time=self.duration), - overlap=(self.spatial_overlap, self.spatial_overlap, 0), - s2_bands=s2_bands, - min_clear_ratio=self.min_clear_ratio, - normalize_illuminance=self.normalize_illuminance, - ) - return dataset - - @abstractmethod - def get_model(self) -> Union[ort.InferenceSession, nn.Module]: - raise NotImplementedError - - def __call__(self): - def callback( - s2_products: Sentinel2RasterTileSequence, - cloud_masks: Sentinel2CloudMaskTileSequence, - s1_products: Optional[Sentinel1RasterTileSequence] = None, - ) -> Dict[str, SpaceEyeRasterSequence]: - if not s2_products.assets or (s1_products is not None and not s1_products.assets): - s1_str = ( - "" if s1_products is None else f"Sentinel-1: {len(s1_products.assets)} assets" - ) - LOGGER.warning( - "Received empty input sequence, output will be empty sequence. " - f"Sentinel-2: {len(s2_products.assets)} assets, {s1_str}" - ) - spaceeye_sequence = SpaceEyeRasterSequence.clone_from( - s2_products, - assets=[], - id=gen_guid(), - bands={ - name: idx - for idx, name in enumerate(SPACEEYE_TO_SPYNDEX_BAND_NAMES.values()) - }, - ) - return {"spaceeye_sequence": spaceeye_sequence} - model = self.get_model() - dataset = self.get_dataset(s1_products, s2_products, cloud_masks) - spaceeye_sequence = remove_clouds(model, dataset, self.tmp_dir.name, self.num_workers) - - return {"spaceeye_sequence": spaceeye_sequence} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() - - -class NNCallbackBuilder(CallbackBuilder): - def __init__( - self, - model_path: str, - duration: int, - window_size: int, - spatial_overlap: float, - min_clear_ratio: float, - normalize_illuminance: bool, - num_workers: int, - ): - super().__init__( - duration, - window_size, - spatial_overlap, - min_clear_ratio, - normalize_illuminance, - num_workers, - ) - self.model_path = model_path - - def get_model(self) -> ort.InferenceSession: - return ort.InferenceSession(self.model_path) - - -class InterpolationCallbackBuilder(CallbackBuilder): - def __init__( - self, - duration: int, - window_size: int, - spatial_overlap: float, - min_clear_ratio: float, - normalize_illuminance: bool, - num_workers: int, - damping_factor: float, - tolerance: float, - max_iterations: int, - check_interval: int, - ): - super().__init__( - duration, - window_size, - spatial_overlap, - min_clear_ratio, - normalize_illuminance, - num_workers, - ) - self.damping_factor = damping_factor - self.tol = tolerance - self.max_iter = max_iterations - self.check_interval = check_interval - - def get_model(self): - return DampedInterpolation( - S2_NUM_BANDS, - self.duration, - damping_factor=self.damping_factor, - tol=self.tol, - max_iter=self.max_iter, - check_interval=self.check_interval, - ) diff --git a/ops/remove_clouds/remove_clouds.yaml b/ops/remove_clouds/remove_clouds.yaml deleted file mode 100644 index 2e0bf717..00000000 --- a/ops/remove_clouds/remove_clouds.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: remove_clouds -inputs: - s1_products: Sentinel1RasterTileSequence - s2_products: Sentinel2RasterTileSequence - cloud_masks: Sentinel2CloudMaskTileSequence -output: - spaceeye_sequence: SpaceEyeRasterSequence -parameters: - model_path: /opt/terravibes/ops/resources/spaceeye_models/spaceeye.onnx - duration: 48 - window_size: 448 - spatial_overlap: 0.5 - min_clear_ratio: 0.1 - normalize_illuminance: True - num_workers: 0 -entrypoint: - file: remove_clouds.py - callback_builder: NNCallbackBuilder -dependencies: - parameters: - - duration - - window_size - - spatial_overlap - - min_clear_ratio - - normalize_illuminance -description: - short_description: - Runs SpaceEye to remove clouds in input rasters. \ No newline at end of file diff --git a/ops/remove_clouds/remove_clouds_interpolation.yaml b/ops/remove_clouds/remove_clouds_interpolation.yaml deleted file mode 100644 index 4acbf60b..00000000 --- a/ops/remove_clouds/remove_clouds_interpolation.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: remove_clouds_interpolation -inputs: - s2_products: Sentinel2RasterTileSequence - cloud_masks: Sentinel2CloudMaskTileSequence -output: - spaceeye_sequence: SpaceEyeRasterSequence -parameters: - duration: 48 - window_size: 448 - spatial_overlap: 0.5 - min_clear_ratio: 0.1 - normalize_illuminance: True - num_workers: 0 - damping_factor: 0.1 - tolerance: .001 - max_iterations: 200 - check_interval: 5 -entrypoint: - file: remove_clouds.py - callback_builder: InterpolationCallbackBuilder -dependencies: - parameters: - - duration - - window_size - - spatial_overlap - - min_clear_ratio - - normalize_illuminance - - damping_factor - - tolerance - - max_iterations - - check_interval -description: - short_description: - Runs the interpolation version of SpaceEye to remove clouds in input rasters. \ No newline at end of file diff --git a/ops/remove_clouds/test_remove_clouds.py b/ops/remove_clouds/test_remove_clouds.py deleted file mode 100644 index b66178c6..00000000 --- a/ops/remove_clouds/test_remove_clouds.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import Any, Dict - -from shapely import geometry as shpg - -from vibe_core.data.sentinel import ( - Sentinel1RasterTileSequence, - Sentinel2CloudMaskTileSequence, - Sentinel2RasterTileSequence, -) -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH_NN = os.path.join(os.path.dirname(os.path.abspath(__file__)), "remove_clouds.yaml") - -CONFIG_PATH_INTERP = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "remove_clouds_interpolation.yaml" -) - - -def test_remove_clouds_empty_sequence(): - polygon: Dict[str, Any] = shpg.mapping(shpg.box(0, 0, 1, 1)) # type: ignore - start_date = datetime(year=2021, month=7, day=10, tzinfo=timezone.utc) - end_date = datetime(year=2021, month=7, day=28, tzinfo=timezone.utc) - s1 = Sentinel1RasterTileSequence( - id="s1", - time_range=(start_date, end_date), - geometry=polygon, - assets=[], - product_name="", - orbit_number=0, - relative_orbit_number=0, - orbit_direction="", - platform="", - extra_info={}, - sensor_mode="", - polarisation_mode="", - bands={}, - tile_id="", - write_time_range=(start_date, end_date), - ) - s2 = Sentinel2RasterTileSequence.clone_from(s1, id="s2", assets=[], processing_level="") - cloud = Sentinel2CloudMaskTileSequence.clone_from(s2, id="cloud", assets=[], categories=[]) - - nn_out = OpTester(CONFIG_PATH_NN).run(s1_products=s1, s2_products=s2, cloud_masks=cloud) - assert not nn_out["spaceeye_sequence"].assets # type: ignore - interp_out = OpTester(CONFIG_PATH_INTERP).run(s2_products=s2, cloud_masks=cloud) - assert not interp_out["spaceeye_sequence"].assets # type: ignore diff --git a/ops/segment_anything/automatic_segmentation.yaml b/ops/segment_anything/automatic_segmentation.yaml deleted file mode 100644 index 58ad0f32..00000000 --- a/ops/segment_anything/automatic_segmentation.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: automatic_segmentation -inputs: - input_raster: Raster -output: - segmented_chips: List[SamMaskRaster] -parameters: - model_type: vit_b - band_names: null - band_scaling: null - band_offset: null - spatial_overlap: 0.0 - points_per_side: 16 - n_crop_layers: 0 - crop_overlap_ratio: 0.0 - crop_n_points_downscale_factor: 1 - pred_iou_thresh: 0.88 - stability_score_thresh: 0.95 - stability_score_offset: 1.0 - points_per_batch: 16 - num_workers: 0 - in_memory: True -entrypoint: - file: sam_inference.py - callback_builder: AutomaticSegmentationCallbackBuilder -dependencies: - parameters: - - model_type - - band_names - - band_scaling - - band_offset - - spatial_overlap - - points_per_side - - n_crop_layers - - crop_overlap_ratio - - crop_n_points_downscale_factor - - pred_iou_thresh - - stability_score_thresh - - stability_score_offset -description: - short_description: Runs a SAM automatic segmentation inference over the input raster, generating masks for each chip. - parameters: - model_type: SAM's image encoder backbone architecture, among 'vit_h', 'vit_l', or 'vit_b'. Before running the workflow, make sure the desired model has been exported to the cluster by running `scripts/export_sam_models.py`. For more information, refer to the FarmVibes.AI troubleshooting page in the documentation. - band_names: Name of raster bands that should be selected to compose the 3-channel images expected by SAM. If not provided, will try to use ["R", "G", "B"]. If only a single band name is provided, will replicate it through all three channels. - band_scaling: A list of floats to scale each band by to the range of [0.0, 1.0]. If not provided, will default to the raster scaling parameter. If a list with a single value is provided, will use it for all three bands. - band_offset: A list of floats to offset each band by. If not provided, will default to the raster offset value. If a list with a single value is provided, will use it for all three bands. - spatial_overlap: Percentage of spatial overlap between chips in the range of [0.0, 1.0). - points_per_side: The number of points to be sampled along one side of the chip to be prompts. The total number of points is points_per_side**2. - n_crop_layers: If >0, mask prediction will be run again on crops of the image. Sets the number of layers to run, where each layer has 2**i_layer number of image crops. - crop_overlap_ratio: Sets the degree to which crops overlap. In the first crop layer, crops will overlap by this fraction of the chip length. Later layers with more crops scale down this overlap. - crop_n_points_downscale_factor: The number of points-per-side sampled in layer n is scaled down by crop_n_points_downscale_factor**n. - pred_iou_thresh: A filtering threshold in [0,1] over the model's predicted mask quality/score. - stability_score_thresh: A filtering threshold in [0,1], using the stability of the mask under changes to the cutoff used to binarize the model's mask predictions. - stability_score_offset: The amount to shift the cutoff when calculated the stability score. - points_per_batch: Number of points to process in a single batch. - num_workers: Number of workers to use for parallel processing. - in_memory: Whether to load the whole raster in memory when running predictions. Uses more memory (~4GB/worker) but speeds up inference for fast models. diff --git a/ops/segment_anything/prompt_segmentation.yaml b/ops/segment_anything/prompt_segmentation.yaml deleted file mode 100644 index a30111bc..00000000 --- a/ops/segment_anything/prompt_segmentation.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: prompt_segmentation -inputs: - input_raster: Raster - input_prompts: GeometryCollection -output: - segmentation_mask: CategoricalRaster -parameters: - model_type: vit_b - band_names: null - band_scaling: null - band_offset: null - spatial_overlap: 0.0 - points_per_batch: 64 - num_workers: 0 - in_memory: True -entrypoint: - file: sam_inference.py - callback_builder: PromptCallbackBuilder -dependencies: - parameters: - - model_type - - band_names - - band_scaling - - band_offset - - spatial_overlap -description: - short_description: Runs SAM over the input raster with points and bounding boxes as prompts. - parameters: - model_type: SAM's image encoder backbone architecture, among 'vit_h', 'vit_l', or 'vit_b'. Before running the workflow, make sure the desired model has been exported to the cluster by running `scripts/export_sam_models.py`. For more information, refer to the FarmVibes.AI troubleshooting page in the documentation. - band_names: Name of raster bands that should be selected to compose the 3-channel images expected by SAM. If not provided, will try to use ["R", "G", "B"]. If only a single band name is provided, will replicate it through all three channels. - band_scaling: A list of floats to scale each band by to the range of [0.0, 1.0] or [0.0, 255.0]. If not provided, will default to the raster scaling parameter. If a list with a single value is provided, will use it for all three bands. - band_offset: A list of floats to offset each band by. If not provided, will default to the raster offset value. If a list with a single value is provided, will use it for all three bands. - spatial_overlap: Percentage of spatial overlap between chips in the range of [0.0, 1.0). - points_per_batch: Number of points to process in a single batch. - num_workers: Number of workers to use for parallel processing. - in_memory: Whether to load the whole raster in memory when running predictions. Uses more memory (~4GB/worker) but speeds up inference for fast models. diff --git a/ops/segment_anything/sam_inference.py b/ops/segment_anything/sam_inference.py deleted file mode 100644 index 2c749a4f..00000000 --- a/ops/segment_anything/sam_inference.py +++ /dev/null @@ -1,564 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -from tempfile import TemporaryDirectory -from typing import Any, Callable, Dict, List, Optional, Tuple, cast - -import numpy as np -import onnxruntime as ort -import torch -from numpy.typing import NDArray -from shapely import geometry as shpg -from shapely.geometry.base import BaseGeometry -from torch.utils.data import DataLoader -from torchvision.transforms.functional import resize - -from vibe_core.data import ( - AssetVibe, - BBox, - CategoricalRaster, - ChipWindow, - GeometryCollection, - Raster, - SamMaskRaster, - gen_guid, -) -from vibe_lib.raster import INT_COMPRESSION_KWARGS, write_window_to_file -from vibe_lib.segment_anything import ( - BACKGROUND_VALUE, - MASK_LOGIT_THRESHOLD, - SAM_CHIP_SIZE, - Prompt, - batch_prompt_encoder_preprocess, - build_chip_preprocessing_operation, - build_point_grid, - calculate_stability_score, - extract_img_embeddings_from_chip, - generate_crop_boxes, - get_mask_within_bbox, - get_normalized_prompts_within_chip, - mask_encoder_preprocess, - mask_to_bbox, - preprocess_geometry_collection, - prompt_encoder_preprocess, - translate_bbox, - uncrop_masks, -) -from vibe_lib.spaceeye.chip import ( - ChipDataset, - ChipDataType, - Dims, - InMemoryReader, - Window, - get_loader, - write_prediction_to_file, -) - -BASE_MODEL_PATH = "/mnt/onnx_resources/{model_type}_{model_part}.onnx" -SAM_MODEL_TYPES = ["vit_h", "vit_l", "vit_b"] - - -LOGGER = logging.getLogger(__name__) - - -class CallbackBuilder: - def __init__( - self, - model_type: str, - spatial_overlap: float, - num_workers: int, - in_memory: bool, - band_names: Optional[List[str]], - band_scaling: Optional[List[float]], - band_offset: Optional[List[float]], - ): - self.model_type = model_type - self.spatial_overlap = spatial_overlap - self.num_workers = num_workers - self.in_memory = in_memory - self.tmp_dir = TemporaryDirectory() - self.window_size = SAM_CHIP_SIZE - self.band_names = band_names - self.band_scaling = band_scaling - self.band_offset = band_offset - - def get_model(self) -> Tuple[ort.InferenceSession, ort.InferenceSession]: - if self.model_type not in SAM_MODEL_TYPES: - raise ValueError( - f"Unknown model type: '{self.model_type}'. Expected one of {SAM_MODEL_TYPES}" - ) - - encoder_path = BASE_MODEL_PATH.format(model_type=self.model_type, model_part="encoder") - decoder_path = BASE_MODEL_PATH.format(model_type=self.model_type, model_part="decoder") - - if not os.path.exists(encoder_path) or not os.path.exists(decoder_path): - raise ValueError( - f"Model files not found for model type: '{self.model_type}'. " - f"Refer to the troubleshooting section of FarmVibes.AI documentation " - f"for instructions on how to import the model files to the cluster." - ) - - encoder = ort.InferenceSession(encoder_path) - LOGGER.info(f"Loaded encoder model from {encoder_path}") - decoder = ort.InferenceSession(decoder_path) - LOGGER.info(f"Loaded decoder model from {decoder_path}") - return encoder, decoder - - def get_chip_dataloader( - self, - raster: Raster, - geometry: BaseGeometry, - ) -> DataLoader[ChipDataType]: - chip_size = self.window_size - step_size = int(chip_size * (1 - self.spatial_overlap)) - dataset = ChipDataset( - [raster], - chip_size=Dims(chip_size, chip_size, 1), - step_size=Dims(step_size, step_size, 1), - nodata=BACKGROUND_VALUE, - geometry_or_chunk=geometry, - reader=InMemoryReader(downsampling=1) if self.in_memory else None, - ) - - dataloader = get_loader( - dataset, batch_size=1, num_workers=self.num_workers if not self.in_memory else 0 - ) - - return dataloader - - def __del__(self): - self.tmp_dir.cleanup() - - -class PromptCallbackBuilder(CallbackBuilder): - img_preprocessing_operation: Callable[[NDArray[Any]], NDArray[Any]] - - def __init__( - self, - model_type: str, - spatial_overlap: float, - points_per_batch: int, - num_workers: int, - in_memory: bool, - band_names: Optional[List[str]], - band_scaling: Optional[List[float]], - band_offset: Optional[List[float]], - ): - super().__init__( - model_type, - spatial_overlap, - num_workers, - in_memory, - band_names, - band_scaling, - band_offset, - ) - self.points_per_batch = points_per_batch - - def get_mask_for_prompt_group( - self, - prompt_group: List[Prompt], - chip_data: NDArray[Any], - decoder_session: ort.InferenceSession, - img_embedding: NDArray[Any], - ) -> NDArray[Any]: - prompt_group_mask = np.zeros((1, 1, *chip_data.shape[-2:]), dtype=bool) - for i in range(0, len(prompt_group), self.points_per_batch): - prompt_batch, prompt_label = prompt_encoder_preprocess( - prompt_group[i : i + self.points_per_batch] - ) - mask_prompt, has_mask_prompt = mask_encoder_preprocess() - - ort_inputs = { - "image_embeddings": img_embedding, - "point_coords": prompt_batch, - "point_labels": prompt_label, - "mask_input": mask_prompt, - "has_mask_input": has_mask_prompt, - "orig_im_size": np.array([self.window_size, self.window_size], dtype=np.float32), - } - - predicted_mask, _, _ = decoder_session.run(None, ort_inputs) - predicted_mask = predicted_mask > MASK_LOGIT_THRESHOLD - prompt_group_mask = np.logical_or(prompt_group_mask, predicted_mask) - - # Only include in the mask, pixels within the prompted bounding box - prompt_group_mask = get_mask_within_bbox(prompt_group_mask, prompt_group) - - return prompt_group_mask - - def generate_masks_from_points( - self, - dataloader: DataLoader[ChipDataType], - encoder_session: ort.InferenceSession, - decoder_session: ort.InferenceSession, - input_prompts: Dict[int, List[Prompt]], - ) -> List[str]: - filepaths: List[str] = [] - dataset = cast(ChipDataset, dataloader.dataset) - get_filename = dataset.get_filename - for batch_idx, batch in enumerate(dataloader): - chip_data, chip_mask, write_info_list = batch - output_chip_mask = np.zeros((1, len(input_prompts), *chip_data.shape[-2:]), dtype=bool) - - prompts_in_chip = get_normalized_prompts_within_chip( - input_prompts, dataset.read_windows[batch_idx][0], dataset.offset - ) - - if prompts_in_chip: - LOGGER.info(f"Running model for batch ({batch_idx + 1}/{len(dataloader)})") - - img_embedding = extract_img_embeddings_from_chip( - chip_data, self.img_preprocessing_operation, encoder_session - ) - - for prompt_id, prompt_group in prompts_in_chip.items(): - prompt_group_mask = self.get_mask_for_prompt_group( - prompt_group, chip_data, decoder_session, img_embedding - ) - output_chip_mask[0, prompt_id] = np.logical_or( - output_chip_mask[0, prompt_id], prompt_group_mask[0, 0] - ) - - else: - LOGGER.info( - "Skipping batch with no prompt intersection " - f"({batch_idx + 1}/{len(dataloader)})" - ) - - write_prediction_to_file( - output_chip_mask.astype(np.uint8), - chip_mask, - write_info_list, - self.tmp_dir.name, - filepaths, - get_filename, - ) - - return filepaths - - def __call__(self): - def callback( - input_raster: Raster, - input_prompts: GeometryCollection, - ) -> Dict[str, CategoricalRaster]: - geometry = shpg.shape(input_raster.geometry) - dataloader = self.get_chip_dataloader(input_raster, geometry) - - processed_prompts, prompt_id_map = preprocess_geometry_collection( - input_prompts, cast(ChipDataset, dataloader.dataset), geometry - ) - - self.img_preprocessing_operation = build_chip_preprocessing_operation( - input_raster, self.band_names, self.band_scaling, self.band_offset - ) - - encoder_session, decoder_session = self.get_model() - - mask_filepaths = self.generate_masks_from_points( - dataloader, - encoder_session, - decoder_session, - processed_prompts, - ) - - asset = AssetVibe(reference=mask_filepaths[0], type="image/tiff", id=gen_guid()) - segmentation_mask = CategoricalRaster.clone_from( - input_raster, - id=gen_guid(), - assets=[asset], - bands={ - f"mask_prompt_{prompt_id_map[prompt_id]}": prompt_id - for prompt_id in processed_prompts.keys() - }, - categories=["background", "foreground"], - ) - - return {"segmentation_mask": segmentation_mask} - - return callback - - -class AutomaticSegmentationCallbackBuilder(PromptCallbackBuilder): - def __init__( - self, - model_type: str, - spatial_overlap: float, - points_per_side: int, - n_crop_layers: int, - crop_overlap_ratio: float, - crop_n_points_downscale_factor: int, - pred_iou_thresh: float, - stability_score_thresh: float, - stability_score_offset: float, - points_per_batch: int, - num_workers: int, - in_memory: bool, - band_names: Optional[List[str]], - band_scaling: Optional[List[float]], - band_offset: Optional[List[float]], - ): - super().__init__( - model_type, - spatial_overlap, - points_per_batch, - num_workers, - in_memory, - band_names, - band_scaling, - band_offset, - ) - self.points_per_side = points_per_side - self.n_crop_layers = n_crop_layers - self.crop_overlap_ratio = crop_overlap_ratio - self.crop_n_points_downscale_factor = crop_n_points_downscale_factor - self.pred_iou_thresh = pred_iou_thresh - self.stability_score_thresh = stability_score_thresh - self.stability_score_offset = stability_score_offset - self.validate_parameters() - - def validate_parameters(self): - if not isinstance(self.points_per_side, int) or self.points_per_side < 1: - raise ValueError( - f"'points_per_side' must be a positive integer. Got {self.points_per_side}." - ) - if not isinstance(self.n_crop_layers, int) or self.n_crop_layers < 0: - raise ValueError( - f"'n_crop_layers' must be a non-negative integer. Got {self.n_crop_layers}." - ) - if self.crop_overlap_ratio < 0 or self.crop_overlap_ratio >= 1: - raise ValueError( - "'crop_overlap_ratio' must be a float in the range [0, 1). " - f"Got {self.crop_overlap_ratio}." - ) - if ( - not isinstance(self.crop_n_points_downscale_factor, int) - or self.crop_n_points_downscale_factor < 1 - ): - raise ValueError( - "'crop_n_points_downscale_factor' must be a positive integer. " - f"Got {self.crop_n_points_downscale_factor}." - ) - if self.pred_iou_thresh <= 0 or self.pred_iou_thresh >= 1: - raise ValueError( - "'pred_iou_thresh' must be a float in the range (0, 1). " - f"Got {self.pred_iou_thresh}." - ) - if self.stability_score_thresh <= 0 or self.stability_score_thresh > 1: - raise ValueError( - "'stability_score_thresh' must be a float in the range (0, 1]. " - f"Got {self.stability_score_thresh}." - ) - - def point_grid_inference( - self, - prompts: List[Prompt], - img_embedding: NDArray[Any], - decoder_session: ort.InferenceSession, - ) -> Tuple[NDArray[Any], NDArray[Any], NDArray[Any]]: - mask, mask_scores, mask_bbox = [], [], [] - mask_prompt, has_mask_prompt = mask_encoder_preprocess() - for i in range(0, len(prompts), self.points_per_batch): - LOGGER.info( - f"Processing points {i}-{min(i + self.points_per_batch, len(prompts))} " - f"out of {len(prompts)}" - ) - batch = [[p] for p in prompts[i : i + self.points_per_batch]] - prompt_batch, prompt_label = batch_prompt_encoder_preprocess(batch) - ort_inputs = { - "image_embeddings": img_embedding, - "point_coords": prompt_batch, - "point_labels": prompt_label, - "mask_input": mask_prompt, - "has_mask_input": has_mask_prompt, - "orig_im_size": np.array([self.window_size, self.window_size], dtype=np.float32), - } - pred_mask, pred_scores, _ = decoder_session.run(None, ort_inputs) - - # Filter by the mask quality score provided by SAM - if self.pred_iou_thresh > 0: - keep_masks = (pred_scores > self.pred_iou_thresh).reshape(-1) - pred_mask = pred_mask[keep_masks] - pred_scores = pred_scores[keep_masks] - - # Filter by Stability Score - if self.stability_score_thresh > 0: - stability_score = calculate_stability_score( - pred_mask, MASK_LOGIT_THRESHOLD, self.stability_score_offset - ) - keep_masks = (stability_score > self.stability_score_thresh).reshape(-1) - pred_mask = pred_mask[keep_masks] - pred_scores = pred_scores[keep_masks] - - if pred_mask.shape[0] > 0: - # Binarize mask given logit threshold - pred_mask = pred_mask > MASK_LOGIT_THRESHOLD - mask.append(pred_mask) - mask_scores.append(pred_scores.reshape(-1)) - mask_bbox.append(mask_to_bbox(pred_mask)) - - mask = np.concatenate(mask, axis=0) - mask_scores = np.concatenate(mask_scores, axis=0) - mask_bbox = np.concatenate(mask_bbox, axis=0) - return mask, mask_scores, mask_bbox - - def process_crop( - self, - chip_data: NDArray[Any], - crop_box: BBox, - layer_idx: int, - encoder_session: ort.InferenceSession, - decoder_session: ort.InferenceSession, - ) -> Tuple[NDArray[Any], NDArray[Any], NDArray[Any]]: - # Get crop and resize - x0, y0, x1, y1 = crop_box - cropped_im = chip_data[:, :, y0:y1, x0:x1] - - if layer_idx > 0: # Resize to chip size if not the first layer - cropped_im = cast( - torch.Tensor, - resize(torch.from_numpy(cropped_im), size=[self.window_size]), - ).numpy() - - # Get crop embeddings - crop_img_embedding = extract_img_embeddings_from_chip( - cropped_im, self.img_preprocessing_operation, encoder_session - ) - - # Build point grid for crop - points_per_side_for_layer = int( - self.points_per_side / (self.crop_n_points_downscale_factor**layer_idx) - ) - prompts = build_point_grid(points_per_side_for_layer, self.window_size) - - # Build mask - mask, mask_scores, mask_bbox = self.point_grid_inference( - prompts, crop_img_embedding, decoder_session - ) - - if layer_idx > 0: # Resize mask to crop size if not the first layer - mask, mask_bbox = uncrop_masks(mask, mask_bbox, crop_box, self.window_size) - - # Return to the original image frame - mask_bbox = translate_bbox(mask_bbox, x_offset=crop_box[0], y_offset=crop_box[1]) - - return mask, mask_scores, mask_bbox - - def generate_masks_from_grid( - self, - dataloader: DataLoader[ChipDataType], - encoder_session: ort.InferenceSession, - decoder_session: ort.InferenceSession, - ) -> Tuple[List[str], List[NDArray[Any]], List[NDArray[Any]], List[ChipWindow]]: - filepaths: List[str] = [] - scores: List[NDArray[Any]] = [] - boxes: List[NDArray[Any]] = [] - chip_windows: List[ChipWindow] = [] - - file_id = gen_guid() - dataset = cast(ChipDataset, dataloader.dataset) - - # Generate smaller crops within each chip (if n_crop_layers > 0) - crop_boxes, layer_idxs = generate_crop_boxes( - self.window_size, self.n_crop_layers, self.crop_overlap_ratio - ) - - for batch_idx, batch in enumerate(dataloader): - LOGGER.info(f"Processing batch {batch_idx + 1}/{len(dataloader)}") - chip_data, chip_mask, write_info_list = batch - read_window = dataset.read_windows[batch_idx][0] - - crop_masks, crop_scores, crop_bbox = [], [], [] - - # Generate masks for each crop within chip - for crop_idx, (crop_box, layer_idx) in enumerate(zip(crop_boxes, layer_idxs)): - LOGGER.info( - f"Processing crop {crop_idx + 1}/{len(crop_boxes)} from layer idx {layer_idx}" - ) - mask, mask_scores, mask_bbox = self.process_crop( - chip_data, crop_box, layer_idx, encoder_session, decoder_session - ) - crop_masks.append(mask) - crop_scores.append(mask_scores) - crop_bbox.append(mask_bbox) - - crop_masks = np.concatenate(crop_masks, axis=0) - crop_scores = np.concatenate(crop_scores, axis=0) - crop_bbox = np.concatenate(crop_bbox, axis=0) - - # Translate crop_box in relation to input raster - crop_bbox = translate_bbox( - crop_bbox, x_offset=read_window.col_off, y_offset=read_window.row_off - ) - - # Write chip to file - if crop_masks.shape[0] > 0: - LOGGER.info(f"Writing masks to file {batch_idx + 1}/{len(dataloader)}") - filename = os.path.join(self.tmp_dir.name, f"{file_id}_{batch_idx}.tif") - meta = cast(Dict[str, Any], write_info_list[0]["meta"]) - meta.update({**INT_COMPRESSION_KWARGS}) - - write_window = ( - int(read_window.col_off - dataset.offset.width), - int(read_window.row_off - dataset.offset.height), - int(read_window.width), - int(read_window.height), - ) - - write_window_to_file( - crop_masks.squeeze(axis=1), - chip_mask.any(axis=(0, 1)), - Window(*write_window), # type: ignore - filename, - meta, - ) - filepaths.append(filename) - scores.append(crop_scores) - boxes.append(crop_bbox) - chip_windows.append(write_window) - else: - LOGGER.info(f"No masks to write from batch {batch_idx + 1}/{len(dataloader)}") - - return filepaths, scores, boxes, chip_windows - - def __call__(self): - def callback( - input_raster: Raster, - ) -> Dict[str, List[SamMaskRaster]]: - geometry = shpg.shape(input_raster.geometry) - dataloader = self.get_chip_dataloader(input_raster, geometry) - - self.img_preprocessing_operation = build_chip_preprocessing_operation( - input_raster, self.band_names, self.band_scaling, self.band_offset - ) - - encoder_session, decoder_session = self.get_model() - - chip_filepaths, mask_scores, mask_boxes, chip_windows = self.generate_masks_from_grid( - dataloader, - encoder_session, - decoder_session, - ) - - rasters: List[SamMaskRaster] = [] - for path, scores, boxes, window in zip( - chip_filepaths, mask_scores, mask_boxes, chip_windows - ): - asset = AssetVibe(reference=path, type="image/tiff", id=gen_guid()) - segmented_chip = SamMaskRaster.clone_from( - input_raster, - id=gen_guid(), - assets=[asset], - bands={f"mask_{i}": i for i in range(scores.shape[0])}, - categories=["background", "foreground"], - mask_score=scores.tolist(), - mask_bbox=boxes.tolist(), - chip_window=window, - ) - rasters.append(segmented_chip) - - return {"segmented_chips": rasters} - - return callback diff --git a/ops/segment_anything/test_sam_inference.py b/ops/segment_anything/test_sam_inference.py deleted file mode 100644 index d99a85b4..00000000 --- a/ops/segment_anything/test_sam_inference.py +++ /dev/null @@ -1,474 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Optional, Tuple, Union, cast - -import geopandas as gpd -import numpy as np -import pandas as pd -import pytest -import rioxarray as rio -import xarray as xr -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, GeometryCollection -from vibe_core.data.core_types import gen_guid -from vibe_core.data.rasters import CategoricalRaster, Raster, SamMaskRaster -from vibe_core.data.sentinel import Sentinel2Raster -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.raster import save_raster_to_asset - -CONFIG_PATH_PROMPT = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "prompt_segmentation.yaml" -) - -CONFIG_PATH_AUTOSEG = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "automatic_segmentation.yaml" -) - -DEFAULT_AUTOSEG_PARAMETERS = { - "points_per_side": 2, - "spatial_overlap": 0.0, - "pred_iou_thresh": 0.88, - "stability_score_thresh": 0.95, - "n_crop_layers": 0, - "crop_overlap_ratio": 0.0, - "crop_n_points_downscale_factor": 1, -} - -# Minimum threshold just to make sure the threshold won't remove any masks -MIN_THRESHOLD = 0.00001 -BAND_NAMES = {"s2": ["R", "G", "B"], "basemap": ["red", "green", "blue"]} - - -def edit_autoseg_parameters(key: str, value: Union[int, float]) -> Dict[str, Union[int, float]]: - new_params = DEFAULT_AUTOSEG_PARAMETERS.copy() - new_params[key] = value - return new_params - - -def create_base_raster( - tmp_dir_name: str, - raster_size: int = 2048, - type: str = "s2", - cells_per_side: int = 2, -) -> Union[Sentinel2Raster, Raster]: - now = datetime.now() - geom = shpg.mapping(shpg.box(0, 0, raster_size, raster_size)) - - n_channels = 12 if type == "s2" else 3 - raster_dim = (n_channels, raster_size, raster_size) # enough for two chips/side - - # Create a checkboard pattern - cell_size = raster_size // cells_per_side - row, col = np.indices((raster_size, raster_size)) - pattern_2d = (row // cell_size % 2) ^ (col // cell_size % 2) - fake_data = 10000.0 * np.repeat(pattern_2d[np.newaxis, :, :], n_channels, axis=0) - - fake_da = xr.DataArray( - fake_data, - coords={ - "bands": np.arange(raster_dim[0]), - "x": np.linspace(0, 1, raster_dim[1]), - "y": np.linspace(0, 1, raster_dim[2]), - }, - dims=["bands", "y", "x"], - ) - fake_da.rio.write_crs("epsg:4326", inplace=True) - - asset = save_raster_to_asset(fake_da, tmp_dir_name) - - if type == "s2": - raster = Sentinel2Raster( - id="s2", - time_range=(now, now), - geometry=geom, - assets=[asset], - bands={ - **{b: idx for idx, b in enumerate(BAND_NAMES[type])}, - **{str(idx): idx for idx in range(3, raster_dim[0])}, - }, - tile_id="", - processing_level="", - product_name="", - orbit_number=0, - relative_orbit_number=0, - orbit_direction="", - platform="", - extra_info={}, - ) - else: - raster = Raster( - id="basemap", - time_range=(now, now), - geometry=geom, - assets=[asset], - bands={ - **{b: idx for idx, b in enumerate(BAND_NAMES[type])}, - **{str(idx): idx for idx in range(3, raster_dim[0])}, - }, - ) - - return raster - - -def create_geometry_collection( - prompt_list: List[Union[shpg.Point, shpg.Polygon]], - label: List[int], - prompt_id: List[int], - geom: Dict[str, Any], - time_range: Tuple[datetime, datetime], - tmp_dir_name: str, - column_names: List[str] = ["geometry", "label", "prompt_id"], -): - df = pd.DataFrame( - {col_name: info for col_name, info in zip(column_names, [prompt_list, label, prompt_id])} - ) - gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326") # type: ignore - path = os.path.join(tmp_dir_name, "fake_gdf.geojson") - gdf.to_file(path, driver="GeoJSON") - - asset = AssetVibe(reference=path, type="application/json", id=gen_guid()) - geom_collection = GeometryCollection( - id=gen_guid(), geometry=geom, time_range=time_range, assets=[asset] - ) - return geom_collection - - -@pytest.fixture -def tmp_dir(): - _tmp_dir = TemporaryDirectory() - yield _tmp_dir.name - _tmp_dir.cleanup() - - -@pytest.mark.parametrize( - "prompt_list, label, prompt_id, expected_exception", - [ - ( - [shpg.MultiPoint([[1, 1], [2, 2]])], - [1], - [0], - "Expected each geometry to be a shapely Point or Polygon", - ), - ( - [shpg.Point(4000, 4000)], # outside of the raster - [1], - [0], - "Expected all prompts to be contained within the ROI of input_geometry", - ), - ([shpg.Point(1, 1)], [1], [5.5], "Expected prompt_ids as integers or strings"), - ( - [shpg.Point(1, 1), shpg.Point(2, 2)], - ["a", 5.5], - [0, 1], - "Expected labels to be integers, with 0 or 1 values", - ), - ( - [shpg.box(1, 1, 2, 2), shpg.box(2, 2, 3, 3)], - [1, 1], - [0, 0], - "Expected at most one bounding box per prompt", - ), - ], -) -def test_invalid_prompt_format( - prompt_list: List[Union[shpg.Point, shpg.Polygon]], - label: List[int], - prompt_id: List[int], - expected_exception: Optional[str], - tmp_dir: str, -): - raster = create_base_raster(tmp_dir) - geom_collection = create_geometry_collection( - prompt_list=prompt_list, - label=label, - prompt_id=prompt_id, - geom=raster.geometry, - time_range=raster.time_range, - tmp_dir_name=tmp_dir, - ) - - with pytest.raises(ValueError, match=expected_exception): - OpTester(CONFIG_PATH_PROMPT).run(input_raster=raster, input_prompts=geom_collection) - - -def test_invalid_geometry_collection(tmp_dir: str): - raster = create_base_raster(tmp_dir) - geom_collection = create_geometry_collection( - prompt_list=[shpg.Point(5, 5)], - label=[1], - prompt_id=[0], - geom=raster.geometry, - time_range=raster.time_range, - tmp_dir_name=tmp_dir, - column_names=["geometry", "label", "wrong_column_name"], - ) - - with pytest.raises(ValueError): - OpTester(CONFIG_PATH_PROMPT).run(input_raster=raster, input_prompts=geom_collection) - - -# Points expressed as fraction of the raster size for easier conversion to pixel coordinates -@pytest.mark.parametrize( - "raster_type, raster_size, spatial_overlap, prompt_list, label, prompt_id, expected_mask_area", - [ - ( # One point per quadrant as separate prompts - "s2", - 2048, - 0.0, - [ - shpg.Point(0.25, 0.25), # top-left quadrant - shpg.Point(0.75, 0.25), # top-right quadrant - shpg.Point(0.25, 0.75), # bottom-left quadrant - shpg.Point(0.75, 0.75), # bottom-right quadrant - ], - [1, 1, 1, 1], - [0, 1, 2, 3], - 1024 * 1024, # one quadrant, 1/4 of the raster area - ), - ( # One prompt with 2 points on the top-left and bottom-right quadrants - "basemap", - 2048, - 0.0, - [ - shpg.Point(0.25, 0.25), - shpg.Point(0.75, 0.25), - shpg.Point(0.25, 0.75), - shpg.Point(0.75, 0.75), - ], - [1, 0, 0, 1], - [0, 0, 0, 0], - 2 * 1024 * 1024, # two quadrant, 1/2 of the raster area - ), - ( # Four points per quadrant, each quadrant as separate prompt - "s2", - 2048, - 0.0, - [shpg.Point(0.125 + i * 0.25, 0.125 + j * 0.25) for i in range(4) for j in range(4)], - [1] * 16, - [2 * (i // 2) + (j // 2) for i in range(4) for j in range(4)], - 1024 * 1024, # one quadrant, 1/4 of the raster area - ), - ( # Four points per quadrant, single prompt (top-left, bottom-right), 50% of overlap - "basemap", - 2048, - 0.0, - [shpg.Point(0.125 + i * 0.25, 0.125 + j * 0.25) for i in range(4) for j in range(4)], - [1, 1, 0, 0] * 2 + [0, 0, 1, 1] * 2, - [1] * 16, - 2 * 1024 * 1024, # two quadrant, 1/2 of the raster area - ), - ( # Bbox of half of a quadrant centered in the first quadrant, single prompt, no overlap - "s2", - 2048, - 0.0, - [shpg.box(0.125, 0.125, 0.375, 0.375)], - [1], - [0], - 512 * 512, # half quadrant, 1/8 of the raster area - ), - ( # Same Bbox as above with a centered foreground point, single prompt, no overlap - "basemap", - 2048, - 0.0, - [shpg.box(0.125, 0.125, 0.375, 0.375), shpg.Point(0.25, 0.25)], - [1, 1], - [0, 0], - 512 * 512, # half quadrant, 1/8 of the raster area - ), - ], -) -def test_segmentation_mask( - raster_type: str, - raster_size: int, - spatial_overlap: float, - prompt_list: List[Union[shpg.Point, shpg.Polygon]], - label: List[int], - prompt_id: List[int], - expected_mask_area: int, - tmp_dir: str, -): - raster = create_base_raster(tmp_dir, raster_size, raster_type) - geom_collection = create_geometry_collection( - prompt_list=prompt_list, - label=label, - prompt_id=prompt_id, - geom=raster.geometry, - time_range=raster.time_range, - tmp_dir_name=tmp_dir, - ) - - op_tester = OpTester(CONFIG_PATH_PROMPT) - op_tester.update_parameters( - {"spatial_overlap": spatial_overlap, "band_names": BAND_NAMES[raster_type]} - ) - output = op_tester.run(input_raster=raster, input_prompts=geom_collection) - - assert "segmentation_mask" in output - - mask_raster = cast(CategoricalRaster, output["segmentation_mask"]) - assert len(mask_raster.bands) == len(np.unique(prompt_id)) - - mask = rio.open_rasterio(mask_raster.assets[0].path_or_url).values # type: ignore - assert mask.shape == (len(np.unique(prompt_id)), 2048, 2048) - - for idx, _ in enumerate(np.unique(prompt_id)): - assert ( - np.abs( - np.sum(mask[idx, :, :]) - expected_mask_area # type: ignore - ) - <= 0.05 * expected_mask_area - ), "Mask area is not within 5 percent of the expected area" - - -@pytest.mark.parametrize( - "raster_type, checkboard_cells_per_side, points_per_side, spatial_overlap, " - "pred_iou_thresh, stability_score_thresh, n_crop_layers, n_expected_masks", - [ - ( - "s2", - 2, - 2, - DEFAULT_AUTOSEG_PARAMETERS["spatial_overlap"], - MIN_THRESHOLD, - MIN_THRESHOLD, - DEFAULT_AUTOSEG_PARAMETERS["n_crop_layers"], - 16, - ), # 2x2 raster, 4 chips, 4 masks/chip (2pps**2) = 16 masks - ( - "basemap", - 2, - 2, - DEFAULT_AUTOSEG_PARAMETERS["spatial_overlap"], - MIN_THRESHOLD, - MIN_THRESHOLD, - 1, - 80, # 16 masks for crop layer 0 + 4*16 for the next layer - ), # Same as above, but with an additional crop layer - ( - "s2", - 2, - 2, - 0.5, - MIN_THRESHOLD, - MIN_THRESHOLD, - DEFAULT_AUTOSEG_PARAMETERS["n_crop_layers"], - 36, # SAM removes a few due to low quality and stability scores - ), # 2x2 raster, 9 chips (due to overlap), 4 masks/chip (2pps**2) = 36 masks - ( - "basemap", - 2, - 2, - 0.5, - DEFAULT_AUTOSEG_PARAMETERS["pred_iou_thresh"], - DEFAULT_AUTOSEG_PARAMETERS["stability_score_thresh"], - DEFAULT_AUTOSEG_PARAMETERS["n_crop_layers"], - 31, # SAM removes a few due to low quality and stability scores - ), # Same as above, but with filtered masks - ( - "s2", - 4, - 4, - DEFAULT_AUTOSEG_PARAMETERS["spatial_overlap"], - MIN_THRESHOLD, - MIN_THRESHOLD, - DEFAULT_AUTOSEG_PARAMETERS["n_crop_layers"], - 64, # Without the IoU quality and stability score filtering, we expect all 64 masks - ), # 4x4 raster, 4 chips, 16 masks/chip (4pps**2) = 64 masks - ( - "basemap", - 4, - 4, - DEFAULT_AUTOSEG_PARAMETERS["spatial_overlap"], - DEFAULT_AUTOSEG_PARAMETERS["pred_iou_thresh"], - DEFAULT_AUTOSEG_PARAMETERS["stability_score_thresh"], - DEFAULT_AUTOSEG_PARAMETERS["n_crop_layers"], - 36, # SAM removes a few due to low quality and stability scores - ), # Same as above, but with filtered masks - ], -) -def test_automatic_segmentation_mask( - raster_type: str, - checkboard_cells_per_side: int, - points_per_side: int, - spatial_overlap: float, - pred_iou_thresh: float, - stability_score_thresh: float, - n_crop_layers: int, - n_expected_masks: int, - tmp_dir: str, -): - raster_size = 2048 - raster = create_base_raster(tmp_dir, raster_size, raster_type, checkboard_cells_per_side) - - op_tester = OpTester(CONFIG_PATH_AUTOSEG) - op_tester.update_parameters( - { - "points_per_side": points_per_side, - "spatial_overlap": spatial_overlap, - "n_crop_layers": n_crop_layers, - "pred_iou_thresh": pred_iou_thresh, - "stability_score_thresh": stability_score_thresh, - "band_names": BAND_NAMES[raster_type], - } - ) - output = op_tester.run(input_raster=raster) - - assert "segmented_chips" in output - - segmented_chips = cast(List[SamMaskRaster], output["segmented_chips"]) - step_size = 1024 * (1 - spatial_overlap) - n_expected_rasters = (1 + (raster_size - 1024) / step_size) ** 2 - assert len(segmented_chips) == n_expected_rasters, ( - "Unexpected number of output rasters. " - f"Got {len(segmented_chips)}, expected {n_expected_rasters}." - ) - - n_masks = 0 - mask_areas = [] - for chip in segmented_chips: - mask = cast(xr.Dataset, rio.open_rasterio(chip.assets[0].path_or_url)).values - mask_areas.extend(np.sum(mask, axis=(1, 2)).reshape(-1).tolist()) # type: ignore - n_masks += mask.shape[0] - - assert ( - n_masks == n_expected_masks - ), f"Unexpected number of output masks. Got {n_masks}, expected {n_expected_masks}." - - -@pytest.mark.parametrize( - "param_key, invalid_value", - [ - ("points_per_side", 0), - ("points_per_side", 1.5), - ("n_crop_layers", -1), - ("n_crop_layers", 1.5), - ("crop_overlap_ratio", -1), - ("crop_overlap_ratio", 1.5), - ("crop_n_points_downscale_factor", 0), - ("crop_n_points_downscale_factor", 1.5), - ("pred_iou_thresh", 0), - ("pred_iou_thresh", 1), - ("stability_score_thresh", 0), - ("stability_score_thresh", 1.5), - ("band_names", ["Cyan", "Magenta", "Yellow"]), - ("band_names", ["R", "G", "B", "N", "N2"]), - ("band_names", ["R", "G"]), - ("band_scaling", [1.0, 1.0]), - ("band_offset", [1.0, 1.0]), - ], -) -def test_invalid_autoseg_params( - param_key: str, - invalid_value: Union[int, float], - tmp_dir: str, -): - raster = create_base_raster(tmp_dir) - op_tester = OpTester(CONFIG_PATH_AUTOSEG) - op_tester.update_parameters(edit_autoseg_parameters(param_key, invalid_value)) - with pytest.raises(ValueError): - op_tester.run(input_raster=raster) diff --git a/ops/segment_anything_combine_masks/combine_sam_masks.py b/ops/segment_anything_combine_masks/combine_sam_masks.py deleted file mode 100644 index 238ed20c..00000000 --- a/ops/segment_anything_combine_masks/combine_sam_masks.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Dict, List, Tuple - -import numpy as np -import rasterio -import torch -from torchvision.ops.boxes import batched_nms, box_area - -from vibe_core.data import AssetVibe, BBox, CategoricalRaster, ChipWindow, SamMaskRaster, gen_guid - - -def touch_chip_boundaries(bbox: BBox, chip_window: ChipWindow) -> bool: - return ( - bbox[0] <= chip_window[0] # col_offset - or bbox[1] <= chip_window[1] # row_offset - or bbox[2] >= chip_window[0] + chip_window[2] # col_offset + width - or bbox[3] >= chip_window[1] + chip_window[3] # row_offset + height - ) - - -def is_contained_by_others(current_bbox: BBox, other_boxes: List[BBox], eps: int = 5) -> bool: - for bbox in other_boxes: - if ( - current_bbox[0] >= bbox[0] - eps - and current_bbox[1] >= bbox[1] - eps - and current_bbox[2] <= bbox[2] + eps - and current_bbox[3] <= bbox[3] + eps - ): - return True - return False - - -# - ☑️ Filter masks that touch crop boundaries, but do not touch chip boundaries -# - ❌ NMS of all masks within a crop. I don't think this makes much sense anymore -# - ☑️ NMS for all crops within a chip -# - ❓ Remove small disconnected regions and holdes in a mask, then NMS again -# - ☑️ NMS masks from different chips -def select_masks( - boxes: List[List[BBox]], - scores: List[List[float]], - chip_windows: List[ChipWindow], - chip_nms_thr: float, - mask_nms_thr: float, -) -> List[List[int]]: - # NMS within each chip (using SAM prediction scores) - kept_idx = [] - for chip_boxes, chip_scores in zip(boxes, scores): - keep_by_nms = batched_nms( - boxes=torch.from_numpy(np.array(chip_boxes)).to(torch.float32), - scores=torch.from_numpy(np.array(chip_scores)).to(torch.float32), - idxs=torch.zeros(len(chip_boxes)), - iou_threshold=chip_nms_thr, - ) - kept_idx.append(keep_by_nms.numpy().tolist()) - - # NMS across chips (prefering smaller masks) - idx_map = [ - (cidx, idx) for cidx, chip_idxs in enumerate(kept_idx) for idx in range(len(chip_idxs)) - ] - - kept_boxes = np.array( - [ - boxes[chip_idx][to_keep_idx] - for chip_idx in range(len(kept_idx)) - for to_keep_idx in kept_idx[chip_idx] - ] - ) - - # As in SAM, prefer smaller masks - area_scores = 1 / box_area(torch.from_numpy(kept_boxes)) - - keep_by_nms = batched_nms( - boxes=torch.from_numpy(kept_boxes), - scores=area_scores, - idxs=torch.zeros(kept_boxes.shape[0]), - iou_threshold=mask_nms_thr, - ) - - idx_map = [idx_map[idx] for idx in keep_by_nms.numpy().tolist()] - filtered_mask_idxs = [[] for _ in range(len(boxes))] - for cidx, idx in idx_map: - filtered_mask_idxs[cidx].append(kept_idx[cidx][idx]) - - # Removing masks that touch their chip boundary and are contained within other masks - mask_idx_to_keep = [[] for _ in range(len(boxes))] - for chip_idx, mask_idxs in enumerate(filtered_mask_idxs): - if mask_idxs: - other_boxes = [ - boxes[cidx][idx] - for cidx in range(len(boxes)) - for idx in filtered_mask_idxs[cidx] - if cidx != chip_idx - ] - for idx in mask_idxs: - if not ( - touch_chip_boundaries(boxes[chip_idx][idx], chip_windows[chip_idx]) - and is_contained_by_others(boxes[chip_idx][idx], other_boxes) - ): - mask_idx_to_keep[chip_idx].append(idx) - return mask_idx_to_keep - - -def merge_masks( - masks: List[SamMaskRaster], mask_idx_to_keep: List[List[int]], tmp_dir: str -) -> Tuple[AssetVibe, int]: - n_masks = sum([len(idxs) for idxs in mask_idx_to_keep]) - with rasterio.open(masks[0].assets[0].path_or_url) as src: - out_meta = src.meta - out_meta["count"] = n_masks - - out_path = os.path.join(tmp_dir, f"{gen_guid()}.tif") - band_idx_to_write = 1 - with rasterio.open(out_path, "w", **out_meta) as dst: - for raster, idxs in zip(masks, mask_idx_to_keep): - if idxs: - with rasterio.open(raster.assets[0].path_or_url) as src: - for i in idxs: - dst.write(src.read(i + 1), band_idx_to_write) - band_idx_to_write += 1 - - asset = AssetVibe(reference=out_path, type="image/tiff", id=gen_guid()) - return asset, n_masks - - -class CallbackBuilder: - def __init__(self, chip_nms_thr: float, mask_nms_thr: float): - self.tmp_dir = TemporaryDirectory() - - if chip_nms_thr <= 0 or chip_nms_thr >= 1: - raise ValueError(f"'chip_nms_thr' must be between 0 and 1. Got {chip_nms_thr}") - if mask_nms_thr <= 0 or mask_nms_thr >= 1: - raise ValueError(f"'mask_nms_thr' must be between 0 and 1. Got {mask_nms_thr}") - - self.chip_nms_thr = chip_nms_thr - self.mask_nms_thr = mask_nms_thr - - def __call__(self): - def callback(input_masks: List[SamMaskRaster]) -> Dict[str, CategoricalRaster]: - mask_scores = [m.mask_score for m in input_masks] - mask_bboxes = [m.mask_bbox for m in input_masks] - chip_windows = [m.chip_window for m in input_masks] - - mask_idx_to_keep = select_masks( - mask_bboxes, mask_scores, chip_windows, self.chip_nms_thr, self.mask_nms_thr - ) - - asset, n_masks = merge_masks(input_masks, mask_idx_to_keep, self.tmp_dir.name) - segmentation_mask = CategoricalRaster.clone_from( - input_masks[0], - id=gen_guid(), - assets=[asset], - bands={f"mask_{i}": i for i in range(n_masks)}, - categories=["background", "foreground"], - ) - return {"output_mask": segmentation_mask} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/segment_anything_combine_masks/combine_sam_masks.yaml b/ops/segment_anything_combine_masks/combine_sam_masks.yaml deleted file mode 100644 index bd491aaf..00000000 --- a/ops/segment_anything_combine_masks/combine_sam_masks.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: combine_sam_masks -inputs: - input_masks: List[SamMaskRaster] -output: - output_mask: CategoricalRaster -parameters: - chip_nms_thr: 0.7 - mask_nms_thr: 0.5 -entrypoint: - file: combine_sam_masks.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - chip_nms_thr - - mask_nms_thr -description: - short_description: - Process intermediary segmentation masks, filtering out duplicates and combining into final mask raster. - parameters: - chip_nms_thr: - The box IoU cutoff used by non-maximal suppression to filter duplicate masks within a chip. - mask_nms_thr: - The box IoU cutoff used by non-maximal suppression to filter duplicate masks between different chips. \ No newline at end of file diff --git a/ops/segment_anything_combine_masks/test_combine_sam_masks.py b/ops/segment_anything_combine_masks/test_combine_sam_masks.py deleted file mode 100644 index 43f76bb2..00000000 --- a/ops/segment_anything_combine_masks/test_combine_sam_masks.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from tempfile import TemporaryDirectory -from typing import List, Tuple, Union, cast - -import numpy as np -import pytest -import xarray as xr -from shapely import geometry as shpg - -from vibe_core.data.core_types import gen_guid -from vibe_core.data.rasters import CategoricalRaster, SamMaskRaster -from vibe_dev.testing.op_tester import OpTester -from vibe_lib.raster import save_raster_to_asset - -CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "combine_sam_masks.yaml") - -DEFAULT_BBOXES = [ - (0, 0, 1024, 1024), - (1024, 0, 2048, 1024), - (0, 1024, 1024, 2048), - (1024, 1024, 2048, 2048), -] - - -def create_segmented_raster( - tmp_dir_name: str, - mask_bbox: Tuple[int, int, int, int], - mask_score: float = 1.0, - raster_size: int = 2048, -) -> SamMaskRaster: - now = datetime.now() - geom = shpg.mapping(shpg.box(0, 0, raster_size, raster_size)) - - raster_dim = (1, raster_size, raster_size) - - fake_data = np.zeros(raster_dim, dtype=np.uint8) - fake_data[0, mask_bbox[1] : mask_bbox[3], mask_bbox[0] : mask_bbox[2]] = 1 - - fake_da = xr.DataArray( - fake_data, - coords={ - "bands": np.arange(raster_dim[0]), - "x": np.linspace(0, 1, raster_dim[1]), - "y": np.linspace(0, 1, raster_dim[2]), - }, - dims=["bands", "y", "x"], - ) - fake_da.rio.write_crs("epsg:4326", inplace=True) - - asset = save_raster_to_asset(fake_da, tmp_dir_name) - - return SamMaskRaster( - id=gen_guid(), - time_range=(now, now), - geometry=geom, - assets=[asset], - bands={"mask": 0}, - categories=["background", "foreground"], - mask_score=[mask_score], - mask_bbox=[tuple([float(c) for c in mask_bbox])], # type: ignore - chip_window=(0.0, 0.0, float(raster_size), float(raster_size)), - ) - - -@pytest.fixture -def tmp_dir(): - _tmp_dir = TemporaryDirectory() - yield _tmp_dir.name - _tmp_dir.cleanup() - - -@pytest.mark.parametrize( - "param_key, invalid_value", - [(p, v) for p in ["chip_nms_thr", "mask_nms_thr"] for v in [-1, 0, 1, 1.5]], -) -def test_invalid_params( - param_key: str, - invalid_value: Union[int, float], - tmp_dir: str, -): - raster = create_segmented_raster(tmp_dir, mask_bbox=(0, 0, 1024, 1024)) - op_tester = OpTester(CONFIG_PATH) - op_tester.update_parameters({param_key: invalid_value}) - with pytest.raises(ValueError): - op_tester.run(input_masks=[raster]) - - -# Points expressed as fraction of the raster size for easier conversion to pixel coordinates -@pytest.mark.parametrize( - "bbox_list, chip_nms_thr, mask_nms_thr, n_expected_masks", - [ - ( - DEFAULT_BBOXES, - 0.7, - 0.5, - 4, # No overlapping masks, so expect the same number - ), - ( - DEFAULT_BBOXES + [(10, 10, 1014, 1014)], - 0.7, - 0.5, - 4, # One mask is completely contained in another - ), - ( # Overlapping with top two masks, but with an area slightly larger than a chip - DEFAULT_BBOXES + [(500, 0, 1550, 1024)], - 0.7, - 0.5, # threshold of 0.5 IoU won't suppress the new box - 5, # Overlapping with two masks, but IoU won't pass the threshold so we will keep it - ), - ( # Overlapping with top two masks, but with an area slightly larger than a chip - DEFAULT_BBOXES + [(500, 0, 1550, 1024)], - 0.7, - 0.3, # lowering the threshold so it will be suppressed (we prefer smaller masks) - 4, - ), - ], -) -def test_segmentation_mask( - bbox_list: List[Tuple[int, int, int, int]], - chip_nms_thr: float, - mask_nms_thr: float, - n_expected_masks: int, - tmp_dir: str, -): - input_masks = [create_segmented_raster(tmp_dir, mask_bbox=box) for box in bbox_list] - - op_tester = OpTester(CONFIG_PATH) - op_tester.update_parameters({"chip_nms_thr": chip_nms_thr, "mask_nms_thr": mask_nms_thr}) - output = op_tester.run(input_masks=input_masks) # type: ignore - - assert "output_mask" in output - - mask_raster = cast(CategoricalRaster, output["output_mask"]) - assert len(mask_raster.bands) == n_expected_masks diff --git a/ops/segment_driveway/segment_driveway.py b/ops/segment_driveway/segment_driveway.py deleted file mode 100644 index e5374a5d..00000000 --- a/ops/segment_driveway/segment_driveway.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Callable, Dict, Tuple - -import numpy as np -import onnxruntime as ort -import rasterio -import torch -import torch.nn.functional as F -from numpy.typing import NDArray -from rasterio.enums import Resampling -from rasterio.windows import Window -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, gen_guid -from vibe_core.data.rasters import CategoricalRaster, Raster -from vibe_lib.raster import resample_raster -from vibe_lib.spaceeye.chip import ChipDataset, Dims, get_loader, predict_chips - - -def reader( - raster: Raster, window: Window, out_shape: Tuple[int, int] -) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: - with rasterio.open(raster.raster_asset.url) as src: - x = src.read(window=window, out_shape=out_shape, indexes=[4, 1, 2]) - mask = x == src.nodata - x[mask] = 0 - return x, mask - - -def contrast_enhance(img: NDArray[Any], low: float = 2, high: float = 98) -> NDArray[np.float32]: - img_min, img_max = np.nanpercentile(img, (low, high), axis=(-1, -2), keepdims=True) - return np.clip((img.astype(np.float32) - img_min) / (img_max - img_min), 0, 1) - - -def pre_process(size: Tuple[int, int]) -> Callable[[NDArray[Any], NDArray[Any]], NDArray[Any]]: - """ - Preprocess data by normalizing and picking a few bands - """ - - def fn(chip_data: NDArray[Any], _) -> NDArray[np.float32]: - x = F.interpolate(torch.from_numpy(chip_data), size=size, mode="bilinear").numpy() - x = contrast_enhance(x).astype(np.float32) - return x - - return fn - - -def post_process( - size: Tuple[int, int], -) -> Callable[[NDArray[Any], NDArray[Any], NDArray[Any]], NDArray[Any]]: - """ - Get most probable class - """ - - def fn(_, __: NDArray[Any], model_out: NDArray[Any]) -> NDArray[Any]: - x = F.interpolate(torch.from_numpy(model_out), size=size, mode="bilinear").numpy() - return x.argmax(axis=1, keepdims=True).astype(np.uint8) - - return fn - - -class CallbackBuilder: - def __init__( - self, - downsampling: int, - root_dir: str, - model_path: str, - window_size: int, - model_size: int, - overlap: float, - batch_size: int, - num_workers: int, - ): - self.downsampling = downsampling - self.root_dir = root_dir - self.model_path = model_path - self.window_size = window_size - self.model_size = model_size - self.overlap = overlap - self.batch_size = batch_size - self.num_workers = num_workers - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def callback( - input_raster: Raster, - ) -> Dict[str, CategoricalRaster]: - if self.downsampling < 1: - raise ValueError( - f"Downsampling must be equal or larger than 1, found {self.downsampling}" - ) - model_path = os.path.join(self.root_dir, self.model_path) - model = ort.InferenceSession(model_path) - chip_size = self.window_size - step_size = int(chip_size * (1 - self.overlap)) - dataset = ChipDataset( - [input_raster], - chip_size=Dims(chip_size, chip_size, 1), - step_size=Dims(step_size, step_size, 1), - downsampling=self.downsampling, - nodata=255, - geometry_or_chunk=shpg.shape(input_raster.geometry), - reader=reader, - dtype="uint8", - ) - - dataloader = get_loader(dataset, self.batch_size, self.num_workers) - pred_filepaths = predict_chips( - model, - dataloader, - self.tmp_dir.name, - skip_nodata=False, - pre_process=pre_process((self.model_size, self.model_size)), - post_process=post_process((self.window_size, self.window_size)), - ) - assert ( - len(pred_filepaths) == 1 - ), f"Expected one prediction file, found: {len(pred_filepaths)}" - mask_filepath = resample_raster( - pred_filepaths[0], - self.tmp_dir.name, - dataset.width, - dataset.height, - dataset.transform, - Resampling.nearest, - nodata=255, - ) - asset = AssetVibe(reference=mask_filepath, type="image/tiff", id=gen_guid()) - out = CategoricalRaster.clone_from( - input_raster, - id=gen_guid(), - assets=[asset], - categories=["Background", "Driveway", "Unknown"], - ) - - return {"segmentation_raster": out} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/segment_driveway/segment_driveway.yaml b/ops/segment_driveway/segment_driveway.yaml deleted file mode 100644 index 4c44d2e2..00000000 --- a/ops/segment_driveway/segment_driveway.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: segment_driveway -inputs: - input_raster: Raster -output: - segmentation_raster: CategoricalRaster -parameters: - downsampling: 1 - root_dir: /opt/terravibes/ops/resources/driveway_models - model_path: driveway.onnx - window_size: 128 - model_size: 512 - overlap: .25 - batch_size: 1 - num_workers: 0 -entrypoint: - file: segment_driveway.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - model_path - - downsampling - - window_size - - overlap -description: - short_description: Segments the front of houses in the input raster using a machine learning model. \ No newline at end of file diff --git a/ops/select_necessary_coverage_items/filter_items.py b/ops/select_necessary_coverage_items/filter_items.py deleted file mode 100644 index 44f4e722..00000000 --- a/ops/select_necessary_coverage_items/filter_items.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -""" -Selects a (locally?) minimum subset of items that covers the desired input geometry -(if suchs subset exists) for each timestamp. -Discards items for a timestamp if the geometry cannot be covered at that time. -Assumes items are sparse in time (time range is one moment in time) -""" - -from collections import defaultdict -from typing import Dict, List, Optional, TypeVar - -from shapely import geometry as shpg -from shapely import ops as shpo -from shapely.geometry.base import BaseGeometry - -from vibe_core.data import DataVibe -from vibe_lib.geometry import is_approx_within, norm_intersection - -T = TypeVar("T", bound=DataVibe, covariant=True) - - -def can_cover(geom: BaseGeometry, items: List[T], threshold: float) -> bool: - item_geoms = [shpg.shape(p.geometry) for p in items] - return is_approx_within(geom, shpo.unary_union(item_geoms), threshold) - - -def intersect_area(x: DataVibe, geom: BaseGeometry) -> float: - return shpg.shape(x.geometry).intersection(geom).area - - -def filter_necessary_items( - geom: BaseGeometry, items: List[T], threshold: float, min_area: Optional[float] = None -) -> List[T]: - """ - Greedily filter the items so that only a subset necessary to cover all - the geometry's spatial extent is returned - """ - if min_area is None: - min_area = (1 - threshold) * geom.area - if not items: # No more items left, can't cover the geometry - return [] - sorted_items = sorted(items, key=lambda x: intersect_area(x, geom), reverse=True) - # Get item with largest intersection - item = sorted_items[0] - item_geom = shpg.shape(item.geometry) - if is_approx_within(geom, item_geom, threshold): - return [item] - if norm_intersection(geom, item_geom) < (1 - threshold): - # Can't make more progress, so we give up - return [] - remaining_geom = geom - item_geom - if remaining_geom.area < min_area: - # We covered enough of the area, so we stop now - return [item] - return [item] + filter_necessary_items(remaining_geom, sorted_items[1:], threshold, min_area) - - -def callback_builder( - min_cover: float, within_threshold: float, max_items: Optional[int], group_attribute: str -): - if not 0 < min_cover < 1: - raise ValueError(f"{min_cover=} must be between 0 and 1") - if not 0 < within_threshold < 1: - raise ValueError(f"{within_threshold=} must be between 0 and 1") - if min_cover > within_threshold: - raise ValueError(f"{min_cover=} cannot be larger than {within_threshold}") - min_cover = min(min_cover, within_threshold) - - def filter_items(bounds_item: DataVibe, items: List[T]) -> Dict[str, T]: - input_geometry = shpg.shape(bounds_item.geometry) - item_groups = defaultdict(list) - for p in items: - item_groups[getattr(p, group_attribute)].append(p) - item_groups = [ - sorted(item_group, key=lambda x: intersect_area(x, input_geometry), reverse=True)[ - :max_items - ] - for item_group in item_groups.values() - ] - filtered_items = { - item.id: item - for item_group in item_groups - if can_cover( - input_geometry, - item_group, - min_cover, - ) - for item in filter_necessary_items(input_geometry, item_group, within_threshold) - } - if not filtered_items: - raise RuntimeError(f"No product group can cover input geometry {bounds_item.geometry}") - return filtered_items - - def callback(bounds_items: List[DataVibe], items: List[T]) -> Dict[str, List[T]]: - filtered_items = {} - for bounds_item in bounds_items: - filtered_items.update(filter_items(bounds_item, items)) - - return {"filtered_items": [v for v in filtered_items.values()]} - - return callback diff --git a/ops/select_necessary_coverage_items/select_necessary_coverage_items.yaml b/ops/select_necessary_coverage_items/select_necessary_coverage_items.yaml deleted file mode 100644 index 4f1fc03b..00000000 --- a/ops/select_necessary_coverage_items/select_necessary_coverage_items.yaml +++ /dev/null @@ -1,45 +0,0 @@ -name: select_necessary_coverage_items -inputs: - bounds_items: List[DataVibe] - items: List[DataVibe] -output: - # @INHERIT will make the corresponding port to inherit the type of the port which feeds the - # input in parentheses. - filtered_items: "@INHERIT(items)" -entrypoint: - file: filter_items.py - callback_builder: callback_builder -parameters: - # Minimum amount of overlap to consider a group, should not be higher than within_threshold - # Between 0 and 1 - min_cover: .99 - # Intersection threshold to consider a geometry is within another - # Between 0 and 1 - within_threshold: .99 - max_items: null # null to consider all items - # Attribute used to group items - group_attribute: "time_range" -dependencies: - parameters: - - min_cover - - within_threshold - - max_items - - group_attribute -description: - short_description: Select items necessary to spatially cover the geometry of the bounds items. - long_description: - The op will be group items according to the chosen attribute and then select the necessary items - from each group, provided the group is able to cover the bounds. Items are selected - independently for each of the bounds items and then deduplicated in the end. - inputs: - bounds_items: Items whose geometries should be covered. - items: Items that will be selected. - output: - filtered_items: Items necessary to cover the geometries. - parameters: - min_cover: Minimum amount of cover required for a group to be used. - within_threshold: - Threshold of relative interesection are for a geoemtry to be contained by another. - max_items: - Maximum number of items per group that can be used to cover a geometry. `None` for no limit. - group_attribute: Which attribute should be used to group the items. diff --git a/ops/select_necessary_coverage_items/test_filter.py b/ops/select_necessary_coverage_items/test_filter.py deleted file mode 100644 index ca3d664c..00000000 --- a/ops/select_necessary_coverage_items/test_filter.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import List, cast - -from shapely import affinity as shpa -from shapely import geometry as shpg - -from vibe_core.data import DataVibe -from vibe_core.data.core_types import BaseVibe -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "select_necessary_coverage_items.yaml" -) - - -def test_op(): - bounds = shpg.Point(10, 10).buffer(5) - bounds = [bounds, shpa.translate(bounds, -6, 6)] - geom = shpg.Point(10, 10).buffer(10) - input_geoms = [ - shpa.translate(geom, -7, 0), - shpa.translate(geom, 8, 0), - shpa.translate(geom, 0, 8), - shpa.translate(geom, 5, 5), - ] - - start_date = datetime(year=2021, month=7, day=10, tzinfo=timezone.utc) - end_date = datetime(year=2021, month=7, day=28, tzinfo=timezone.utc) - bounds_vibe = [DataVibe("bounds", (start_date, end_date), shpg.mapping(b), []) for b in bounds] - input_vibe = [ - DataVibe(f"input{i}", (start_date, end_date), shpg.mapping(g), []) - for i, g in enumerate(input_geoms) - ] - inputs = [bounds_vibe[:1], bounds_vibe[1:2], bounds_vibe] - expected_out = [input_vibe[:2], [input_vibe[0], input_vibe[2]], input_vibe[:3]] - - for inp, out in zip(inputs, expected_out): - output_vibe = OpTester(CONFIG_PATH).run( - bounds_items=cast(BaseVibe, inp), items=cast(List[BaseVibe], input_vibe) - ) - - # Get op result - output_name = "filtered_items" - assert output_name in output_vibe - items = output_vibe[output_name] - assert isinstance(items, list) - assert len(items) == len(out) - assert items == out diff --git a/ops/select_sequence/select_sequence.py b/ops/select_sequence/select_sequence.py deleted file mode 100644 index af91056c..00000000 --- a/ops/select_sequence/select_sequence.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from functools import partial -from typing import Dict, List, Union - -import numpy as np -from shapely.geometry import mapping - -from vibe_core.data import Raster, RasterSequence -from vibe_core.data.core_types import gen_guid - - -def callback( - rasters: Union[RasterSequence, List[Raster]], num: int, criterion: str -) -> Dict[str, RasterSequence]: - if isinstance(rasters, RasterSequence): - rasters = [ - Raster.clone_from( - rasters, - gen_guid(), - assets=[i], - geometry=mapping(rasters.asset_geometry[i.id]), - time_range=rasters.asset_time_range[i.id], - ) - for i in rasters.get_ordered_assets() - ] - - if len(rasters) < num: - raise ValueError( - f"The raster sequence has fewer entries ({len(rasters)}) than requested ({num})" - ) - - if criterion == "first": - idxs = np.arange(num) - elif criterion == "last": - idxs = np.arange(len(rasters) - num, len(rasters)) - elif criterion == "regular": - idxs = np.round(np.linspace(0, len(rasters) - 1, num)).astype(int) - else: - raise ValueError( - f"Invalid selection criterion {criterion}. " - f"Valid criteria are 'first', 'last' and 'regular'" - ) - - selected_rasters = [rasters[i] for i in idxs] - - res = RasterSequence.clone_from(rasters[0], f"select_{criterion}_{gen_guid()}", []) - - for r in selected_rasters: - res.add_item(r) - - return {"sequence": res} - - -def callback_builder(num: int, criterion: str): - return partial(callback, num=num, criterion=criterion) diff --git a/ops/select_sequence/select_sequence.yaml b/ops/select_sequence/select_sequence.yaml deleted file mode 100644 index 7743af66..00000000 --- a/ops/select_sequence/select_sequence.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# This op selects "num" entries from a Raster sequence so that the output sequence has a fixed length. -# It can be used to guarantee that the number of elements in a sequence down in the workflow is -# honored -name: select_sequence -inputs: - rasters: RasterSequence -output: - sequence: RasterSequence -parameters: - num: 2 - # criterion is used to select which rasters in the input will be used in the output - # it can be "regular" for regularly spaced selection, "first" to select the "num" first - # rasters, or "last" to select the "num" last rasters - criterion: first -entrypoint: - file: select_sequence.py - callback_builder: callback_builder -dependecies: - parameters: - - num - - criterion \ No newline at end of file diff --git a/ops/select_sequence/select_sequence_from_list.yaml b/ops/select_sequence/select_sequence_from_list.yaml deleted file mode 100644 index a11f1ed9..00000000 --- a/ops/select_sequence/select_sequence_from_list.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# This op selects "num" entries from a Raster list so that the output sequence has a fixed length. -# It can be used to guarantee that the number of elements in a sequence down in the workflow is -# honored -name: select_sequence_from_list -inputs: - rasters: List[Raster] -output: - sequence: RasterSequence -parameters: - num: 37 - criterion: regular -entrypoint: - file: select_sequence.py - callback_builder: callback_builder -dependecies: - parameters: - - num - - criterion -description: - short_description: Selects "num" entries from a Raster list so that the output sequence has a fixed length. - parameters: - num: Number of rasters to select among sequence. - criterion: - Used to select which rasters in the input will be used in the output. - It can be "regular" for regularly spaced selection, "first" to select - the "num" first, rasters, or "last" to select the "num" last rasters. \ No newline at end of file diff --git a/ops/split_sequence/split_sequence.py b/ops/split_sequence/split_sequence.py deleted file mode 100644 index 80696380..00000000 --- a/ops/split_sequence/split_sequence.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict, List - -from vibe_core.data.core_types import gen_guid -from vibe_core.data.sentinel import ListTileData, Sequence2Tile, TileSequenceData - - -def callback_builder(): - """Op that splits a list of multiple TileSequence back to a list of Rasters""" - - def split_sequences( - sequences: List[TileSequenceData], - ) -> Dict[str, ListTileData]: - rasters = [ - Sequence2Tile[type(sequence)].clone_from( - sequence, - id=gen_guid(), - assets=[asset], - time_range=sequence.asset_time_range[asset.id], - ) - for sequence in sequences - for asset in sequence.get_ordered_assets() - ] - return {"rasters": rasters} - - return split_sequences diff --git a/ops/split_sequence/split_spaceeye_sequence.yaml b/ops/split_sequence/split_spaceeye_sequence.yaml deleted file mode 100644 index 37f1af9f..00000000 --- a/ops/split_sequence/split_spaceeye_sequence.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: split_spaceeye_sequence -inputs: - sequences: List[SpaceEyeRasterSequence] -output: - rasters: List[SpaceEyeRaster] -parameters: -entrypoint: - file: split_sequence.py - callback_builder: callback_builder -description: - short_description: Splits a list of multiple TileSequence back to a list of Rasters. \ No newline at end of file diff --git a/ops/split_sequence/test_split_sequence.py b/ops/split_sequence/test_split_sequence.py deleted file mode 100644 index a2370c7c..00000000 --- a/ops/split_sequence/test_split_sequence.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import Any, Dict - -from shapely import geometry as shpg - -from vibe_core.data.sentinel import SpaceEyeRasterSequence -from vibe_dev.testing.op_tester import OpTester - -CONFIG_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "split_spaceeye_sequence.yaml" -) - - -def test_split_empty_sequence(): - polygon: Dict[str, Any] = shpg.mapping(shpg.box(0, 0, 1, 1)) # type: ignore - start_date = datetime(year=2021, month=7, day=10, tzinfo=timezone.utc) - end_date = datetime(year=2021, month=7, day=28, tzinfo=timezone.utc) - seq = SpaceEyeRasterSequence( - id="s1", - time_range=(start_date, end_date), - geometry=polygon, - assets=[], - product_name="", - orbit_number=0, - relative_orbit_number=0, - orbit_direction="", - platform="", - extra_info={}, - tile_id="", - processing_level="", - bands={}, - write_time_range=(start_date, end_date), - ) - out = OpTester(CONFIG_PATH).run(sequences=[seq]) - assert not out["rasters"] diff --git a/ops/stack_landsat/stack_landsat.py b/ops/stack_landsat/stack_landsat.py deleted file mode 100644 index aa331a4c..00000000 --- a/ops/stack_landsat/stack_landsat.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from tempfile import TemporaryDirectory -from typing import Dict, Tuple - -import numpy as np -import rioxarray as rio -import xarray as xr - -from vibe_core.data import AssetVibe, LandsatProduct, gen_hash_id -from vibe_core.data.rasters import LandsatRaster -from vibe_lib.raster import save_raster_to_asset - -LANDSAT_SPYNDEX: Dict[str, str] = { - "blue": "B", - "green": "G", - "red": "R", - "nir08": "N", - "swir16": "S1", - "swir22": "S2", -} - - -def stack_landsat( - input: LandsatProduct, - tmp_folder: str, - qa_mask: int, -) -> Tuple[AssetVibe, Dict[str, int]]: - bands2stack = list(input.asset_map.keys()) - band_filepaths = [input.get_downloaded_band(band).path_or_url for band in bands2stack] - - band_idx = {k: v for v, k in enumerate(bands2stack)} - band_idx["nir"] = band_idx["nir08"] - # Add band aliases for spyndex - for k in LANDSAT_SPYNDEX.keys(): - band_idx[LANDSAT_SPYNDEX[k]] = band_idx[k] - - da = ( - xr.open_mfdataset(band_filepaths, engine="rasterio", combine="nested", concat_dim="bands") - .to_array() - .squeeze() - ) - - if qa_mask: - try: - qa_pixel = ( - rio.open_rasterio(input.get_downloaded_band("qa_pixel").path_or_url) - .squeeze() # type: ignore - .values.astype(int) - ) - mask = np.bitwise_and(qa_pixel, qa_mask) - del qa_pixel - da = da.where(mask) - except Exception as e: - raise ValueError(f"qa_pixel not found {e}") - - asset = save_raster_to_asset(da, tmp_folder) - return asset, band_idx - - -class CallbackBuilder: - def __init__(self, qa_mask_value: int): - self.tmp_dir = TemporaryDirectory() - self.qa_mask = qa_mask_value - - def __call__(self): - def process_landsat( - landsat_product: LandsatProduct, - ) -> Dict[str, LandsatRaster]: - img_asset, band_idx = stack_landsat(landsat_product, self.tmp_dir.name, self.qa_mask) - - bands = LandsatRaster.clone_from( - landsat_product, - id=gen_hash_id( - f"{landsat_product.tile_id}_stacked_landsat", - landsat_product.geometry, - landsat_product.time_range, - ), - assets=[img_asset], - bands=band_idx, - ) - - return {"landsat_raster": bands} - - return process_landsat - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/stack_landsat/stack_landsat.yaml b/ops/stack_landsat/stack_landsat.yaml deleted file mode 100644 index dfeed763..00000000 --- a/ops/stack_landsat/stack_landsat.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: stack_landsat_bands -inputs: - landsat_product: LandsatProduct -output: - landsat_raster: LandsatRaster -parameters: - qa_mask_value: 64 -dependencies: - parameters: - - qa_mask_value -entrypoint: - file: stack_landsat.py - callback_builder: CallbackBuilder -description: - short_description: Stacks downloaded bands into a single raster. \ No newline at end of file diff --git a/ops/stack_sentinel2_bands/stack_sentinel2_bands.py b/ops/stack_sentinel2_bands/stack_sentinel2_bands.py deleted file mode 100644 index 265e4e58..00000000 --- a/ops/stack_sentinel2_bands/stack_sentinel2_bands.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import mimetypes -import os -from tempfile import TemporaryDirectory -from typing import Dict, List, Sequence, Tuple, Union - -import geopandas as gpd -import numpy as np -from rasterio.features import rasterize -from rasterio.vrt import WarpedVRT -from rasterio.warp import Resampling - -from vibe_core.data import ( - AssetVibe, - DownloadedSentinel2Product, - Sentinel2CloudMask, - Sentinel2Raster, - gen_guid, -) -from vibe_lib.raster import INT_COMPRESSION_KWARGS, open_raster_from_ref - -BAND_ORDER: List[str] = [ - "B01", - "B02", - "B03", - "B04", - "B05", - "B06", - "B07", - "B08", - "B8A", - "B09", - "B10", - "B11", - "B12", -] - -CLOUD_CATEGORIES = ["NO-CLOUD", "OPAQUE", "CIRRUS", "OTHER"] -LOGGER = logging.getLogger(__name__) - - -def save_stacked_raster(band_filepaths: Sequence[str], ref_filepath: str, out_path: str) -> None: - """ - Save raster by stacking all bands. - Reprojects all bands to match the reference band file provided - """ - with open_raster_from_ref(ref_filepath) as src: - meta = src.meta - out_meta = meta.copy() - out_meta.update( - { - "count": len(band_filepaths), - "driver": "GTiff", - "nodata": 0, - **INT_COMPRESSION_KWARGS, - } - ) - - vrt_options = { - "resampling": Resampling.bilinear, - "crs": meta["crs"], - "transform": meta["transform"], - "height": meta["height"], - "width": meta["width"], - } - - with open_raster_from_ref(out_path, "w", **out_meta) as dst: - for i, path in enumerate(band_filepaths): - with open_raster_from_ref(path) as src: - with WarpedVRT(src, **vrt_options) as vrt: - data = vrt.read(1) - dst.write(data, i + 1) - - -def rasterize_clouds(item: DownloadedSentinel2Product, ref_file: str, out_path: str) -> None: - """ - Rasterize cloud shapes and save compressed tiff file. - """ - with open_raster_from_ref(ref_file) as src: - meta = src.meta - meta.update({"nodata": 100, "driver": "GTiff", "dtype": "uint8", **INT_COMPRESSION_KWARGS}) - out = np.zeros((meta["height"], meta["width"])) - try: - gml_path = item.get_downloaded_cloudmask().path_or_url - df = gpd.read_file(gml_path, WRITE_GFS="NO") - cloud_map = { - "OPAQUE": CLOUD_CATEGORIES.index("OPAQUE"), - "CIRRUS": CLOUD_CATEGORIES.index("CIRRUS"), - } - values = ( - df["maskType"].map(cloud_map).fillna(CLOUD_CATEGORIES.index("OTHER")) # type: ignore - ) - rasterize( - ((g, v) for g, v in zip(df["geometry"], values)), # type: ignore - out=out, - transform=meta["transform"], - ) - except ValueError: - # Empty file means no clouds - LOGGER.debug( - "ValueError when opening cloud GML file. Assuming there are no clouds and ignoring.", - exc_info=True, - ) - pass - except KeyError: - LOGGER.warning(f"No cloudmask available on downloaded product {item.product_name}") - with open_raster_from_ref(out_path, "w", **meta) as dst: - dst.write(out, 1) - - -def process_s2( - item: DownloadedSentinel2Product, output_file_name: str, tmp_folder: str -) -> Tuple[str, str, List[str]]: - output_img_path = os.path.join(tmp_folder, output_file_name) - output_cloud_path = os.path.join(tmp_folder, "cloudmask.tif") - - # Make sure bands are in order - valid_bands = [b for b in BAND_ORDER if b in item.asset_map] - band_filepaths = [item.get_downloaded_band(b).path_or_url for b in valid_bands] - ref_filepath = band_filepaths[BAND_ORDER.index("B02")] - save_stacked_raster(band_filepaths, ref_filepath, output_img_path) - - # Generate cloud mask - rasterize_clouds(item, ref_filepath, output_cloud_path) - - return output_img_path, output_cloud_path, valid_bands - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def process_sentinel_2( - input_item: DownloadedSentinel2Product, - ) -> Dict[str, Union[Sentinel2Raster, Sentinel2CloudMask]]: - ref_name: str = input_item.product_name - output_file_name = ref_name + ".tif" - tmp_dir = os.path.join(self.tmp_dir.name, ref_name) - os.makedirs(tmp_dir) - - img, cloud, valid_bands = process_s2(input_item, output_file_name, tmp_dir) - - img_asset = AssetVibe(reference=img, type=mimetypes.types_map[".tif"], id=gen_guid()) - cloud_asset = AssetVibe( - reference=cloud, type=mimetypes.types_map[".tif"], id=gen_guid() - ) - - bands = Sentinel2Raster.clone_from( - input_item, - bands={name: idx for idx, name in enumerate(valid_bands)}, - id=ref_name, - assets=[img_asset], - ) - - cloud = Sentinel2CloudMask.clone_from( - input_item, - bands={"cloud": 0}, - categories=CLOUD_CATEGORIES, - id=ref_name, - assets=[cloud_asset], - ) - - return {"sentinel2_raster": bands, "sentinel2_cloud_mask": cloud} - - return process_sentinel_2 - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/stack_sentinel2_bands/stack_sentinel2_bands.yaml b/ops/stack_sentinel2_bands/stack_sentinel2_bands.yaml deleted file mode 100644 index 44827892..00000000 --- a/ops/stack_sentinel2_bands/stack_sentinel2_bands.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: stack_sentinel2_bands -inputs: - input_item: DownloadedSentinel2Product -output: - sentinel2_raster: Sentinel2Raster - sentinel2_cloud_mask: Sentinel2CloudMask -parameters: -entrypoint: - file: stack_sentinel2_bands.py - callback_builder: CallbackBuilder -description: - short_description: - Creates a raster with bands stacked in the correct order and - a cloud mask raster with therasterized cloud shapes. \ No newline at end of file diff --git a/ops/summarize_raster/raster_summary.py b/ops/summarize_raster/raster_summary.py deleted file mode 100644 index c741ef0a..00000000 --- a/ops/summarize_raster/raster_summary.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from typing import Any, Dict, Optional - -import pandas as pd -from shapely import geometry as shpg - -from vibe_core.data import DataSummaryStatistics, DataVibe, Raster, gen_guid -from vibe_core.data.core_types import AssetVibe -from vibe_lib.raster import load_raster_from_url - - -def summarize_raster( - raster: Raster, mask: Optional[Raster], geometry: Dict[str, Any] -) -> Dict[str, float]: - geom = shpg.shape(geometry).intersection(shpg.shape(raster.geometry)) - data_ar = load_raster_from_url(raster.raster_asset.url, geometry=geom, geometry_crs="epsg:4326") - data_ma = data_ar.to_masked_array() - if mask is not None: - mask_ma = load_raster_from_url( - mask.raster_asset.url, - crs=data_ar.rio.crs, - geometry=geom, - geometry_crs="epsg:4326", - ).to_masked_array() - # Update mask - data_ma.mask = data_ma.mask | (mask_ma.data > 0 & ~mask_ma.mask) - masked_ratio = mask_ma.mean() - else: - masked_ratio = 0.0 - return { - "mean": data_ma.mean(), - "std": data_ma.std(), - "min": data_ma.min(), - "max": data_ma.max(), - "masked_ratio": masked_ratio, - } - - -class CallbackBuilder: - def __init__(self): - self.tmp_dir = TemporaryDirectory() - - def __call__(self): - def callback( - raster: Raster, input_geometry: DataVibe, mask: Optional[Raster] = None - ) -> Dict[str, DataSummaryStatistics]: - geom = input_geometry.geometry - stats = summarize_raster(raster, mask, geom) - guid = gen_guid() - filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") - pd.DataFrame(stats, index=pd.Index([raster.time_range[0]], name="date")).to_csv( - filepath - ) - summary = DataSummaryStatistics.clone_from( - raster, - geometry=geom, - id=gen_guid(), - assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], - ) - return {"summary": summary} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/summarize_raster/summarize_masked_raster.yaml b/ops/summarize_raster/summarize_masked_raster.yaml deleted file mode 100644 index eec19835..00000000 --- a/ops/summarize_raster/summarize_masked_raster.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: summarize_masked_raster -inputs: - raster: Raster - mask: Raster - input_geometry: DataVibe -output: - summary: DataSummaryStatistics -parameters: -entrypoint: - file: raster_summary.py - callback_builder: CallbackBuilder -description: - short_description: - Computes the mean, standard deviation, maximum, and minimum values - across non-masked regions of the raster. \ No newline at end of file diff --git a/ops/summarize_raster/summarize_raster.yaml b/ops/summarize_raster/summarize_raster.yaml deleted file mode 100644 index be220e94..00000000 --- a/ops/summarize_raster/summarize_raster.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: summarize_raster -inputs: - raster: Raster - input_geometry: DataVibe -output: - summary: DataSummaryStatistics -parameters: -entrypoint: - file: raster_summary.py - callback_builder: CallbackBuilder -description: - short_description: - Computes the mean, standard deviation, maximum, and minimum values across the whole raster. \ No newline at end of file diff --git a/ops/threshold_raster/threshold_raster.py b/ops/threshold_raster/threshold_raster.py deleted file mode 100644 index 91f84b19..00000000 --- a/ops/threshold_raster/threshold_raster.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from tempfile import TemporaryDirectory -from typing import Dict, Optional, cast - -import numpy as np - -from vibe_core.data import Raster -from vibe_lib.raster import MaskedArrayType, load_raster, save_raster_from_ref - - -class CallbackBuilder: - def __init__(self, threshold: Optional[float]): - self.tmp_dir = TemporaryDirectory() - if threshold is None: - raise ValueError( - "Threshold must not be None. " - "Did you forget to overwrite the value on the workflow definition?" - ) - self.threshold = threshold - - def __call__(self): - def callback(raster: Raster) -> Dict[str, Raster]: - data_ar = load_raster(raster) - # Make a mess to keep the mask intact - data_ma = data_ar.to_masked_array() - thr_ma = cast(MaskedArrayType, (data_ma > self.threshold).astype("float32")) - thr_ar = data_ar.copy(data=thr_ma.filled(np.nan)) - # Save it as uint8 instead of the original dtype - thr_ar.rio.update_encoding({"dtype": "uint8"}, inplace=True) - thr_raster = save_raster_from_ref(thr_ar, self.tmp_dir.name, raster) - return {"thresholded": thr_raster} - - return callback - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/threshold_raster/threshold_raster.yaml b/ops/threshold_raster/threshold_raster.yaml deleted file mode 100644 index 411df772..00000000 --- a/ops/threshold_raster/threshold_raster.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: threshold_raster -inputs: - raster: Raster -output: - thresholded: Raster -parameters: - threshold: null -entrypoint: - file: threshold_raster.py - callback_builder: CallbackBuilder -dependencies: - parameters: - - threshold -description: - short_description: Thresholds values of the input raster if higher than the threshold parameter. \ No newline at end of file diff --git a/ops/tile_sentinel1/tile_sentinel1.py b/ops/tile_sentinel1/tile_sentinel1.py deleted file mode 100644 index 8e14ca55..00000000 --- a/ops/tile_sentinel1/tile_sentinel1.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -import logging -from tempfile import TemporaryDirectory -from typing import Dict, List, Union, cast, overload - -import fiona -import geopandas as gpd -from shapely import geometry as shpg -from shapely.geometry.base import BaseGeometry - -from vibe_core.data import ( - DownloadedSentinel1Product, - Sentinel1Raster, - Sentinel2Product, - TiledSentinel1Product, -) - -LOGGER = logging.getLogger(__name__) -S1List = Union[List[DownloadedSentinel1Product], List[Sentinel1Raster]] -TiledList = Union[List[TiledSentinel1Product], List[Sentinel1Raster]] -KML_DRIVER_NAMES = "kml KML libkml LIBKML".split() - - -@overload -def prepare_items( - s1_products: List[DownloadedSentinel1Product], tiles_df: gpd.GeoDataFrame -) -> List[TiledSentinel1Product]: ... - - -@overload -def prepare_items( - s1_products: List[Sentinel1Raster], tiles_df: gpd.GeoDataFrame -) -> List[Sentinel1Raster]: ... - - -def prepare_items( - s1_products: S1List, - tiles_df: gpd.GeoDataFrame, -) -> TiledList: - processing_items = [] - for s1_item in s1_products: - s1_geom = shpg.shape(s1_item.geometry) - intersecting_df = cast(gpd.GeoDataFrame, tiles_df[tiles_df.intersects(s1_geom)]) - for _, intersecting_tile in intersecting_df.iterrows(): - geom = cast(BaseGeometry, intersecting_tile["geometry"]).buffer(0) - tile_id = cast(str, intersecting_tile["Name"]) - id = hashlib.sha256((s1_item.id + tile_id).encode()).hexdigest() - out_type = ( - TiledSentinel1Product - if isinstance(s1_item, DownloadedSentinel1Product) - else Sentinel1Raster - ) - tiled_s1 = out_type.clone_from( - s1_item, - id=id, - assets=s1_item.assets, - geometry=shpg.mapping(geom), - tile_id=tile_id, - ) - processing_items.append(tiled_s1) - return processing_items - - -class CallbackBuilder: - def __init__(self, tile_geometry: str): - self.tmp_dir = TemporaryDirectory() - self.tile_geometry = tile_geometry - - def __call__(self): - def preprocess_items( - sentinel1_products: S1List, - sentinel2_products: List[Sentinel2Product], - ) -> Dict[str, TiledList]: - tile_ids = set(p.tile_id for p in sentinel2_products) - # Make fiona read the file: https://gis.stackexchange.com/questions/114066/ - for driver in KML_DRIVER_NAMES: - fiona.drvsupport.supported_drivers[driver] = "rw" # type: ignore - - df = gpd.read_file(self.tile_geometry) - # Filter only tiles for which we have products - df = cast(gpd.GeoDataFrame, df[df["Name"].isin(tile_ids)]) # type: ignore - - # Prepare items for preprocessing with the s1 item, target geometry and tile id - processing_items = prepare_items(sentinel1_products, df) - - return {"tiled_products": processing_items} - - return preprocess_items - - def __del__(self): - self.tmp_dir.cleanup() diff --git a/ops/tile_sentinel1/tile_sentinel1.yaml b/ops/tile_sentinel1/tile_sentinel1.yaml deleted file mode 100644 index d13d84df..00000000 --- a/ops/tile_sentinel1/tile_sentinel1.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: tile_sentinel1 -inputs: - sentinel1_products: List[DownloadedSentinel1Product] - sentinel2_products: List[Sentinel2Product] -output: - tiled_products: List[TiledSentinel1Product] -parameters: - tile_geometry: /opt/terravibes/ops/resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml -entrypoint: - file: tile_sentinel1.py - callback_builder: CallbackBuilder -description: - short_description: Match Sentinel-1 products that intersect with Sentinel-2 tiles. - long_description: - The op will generate an item for each Sentinel-1 product x Sentinel-2 tile combination if both - intersect. This op only handles metadata, no asset is changed. - inputs: - sentinel1_products: Sentinel-1 products that will be tiled. - sentinel2_products: Sentinel-2 products from which the tiles will extracted. - output: - tiled_products: Sentinel-1 products with the added tiling metadata. - parameters: - tile_geometry: Path to the resource containing Sentinel-2 tile geometries. diff --git a/ops/tile_sentinel1/tile_sentinel1_rtc.yaml b/ops/tile_sentinel1/tile_sentinel1_rtc.yaml deleted file mode 100644 index 3e93f9a4..00000000 --- a/ops/tile_sentinel1/tile_sentinel1_rtc.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: tile_sentinel1 -inputs: - sentinel1_products: List[Sentinel1Raster] - sentinel2_products: List[Sentinel2Product] -output: - tiled_products: List[Sentinel1Raster] -parameters: - tile_geometry: /opt/terravibes/ops/resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml -entrypoint: - file: tile_sentinel1.py - callback_builder: CallbackBuilder -description: - short_description: Match Sentinel-1 products that intersect with Sentinel-2 tiles. - long_description: - The op will generate an item for each Sentinel-1 product x Sentinel-2 tile combination if both - intersect. This op only handles metadata, no asset is changed. - inputs: - sentinel1_products: Sentinel-1 products that will be tiled. - sentinel2_products: Sentinel-2 products from which the tiles will extracted. - output: - tiled_products: Sentinel-1 products with the added tiling metadata. - parameters: - tile_geometry: Path to the resource containing Sentinel-2 tile geometries. diff --git a/ops/unpack_refs/unpack_refs.py b/ops/unpack_refs/unpack_refs.py deleted file mode 100644 index 33c1db1a..00000000 --- a/ops/unpack_refs/unpack_refs.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict, List - -from vibe_core.data import ExternalReference, ExternalReferenceList, gen_guid - - -def callback_builder(): - def callback( - input_refs: List[ExternalReferenceList], - ) -> Dict[str, List[ExternalReference]]: - return { - "ref_list": [ - ExternalReference.clone_from(refs, id=gen_guid(), url=url, assets=[]) - for refs in input_refs - for url in refs.urls - ] - } - - return callback diff --git a/ops/unpack_refs/unpack_refs.yaml b/ops/unpack_refs/unpack_refs.yaml deleted file mode 100644 index f2ad548e..00000000 --- a/ops/unpack_refs/unpack_refs.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: unpack_refs -inputs: - input_refs: List[ExternalReferenceList] -output: - ref_list: List[ExternalReference] -parameters: -entrypoint: - file: unpack_refs.py - callback_builder: callback_builder -description: - short_description: Unpacks the urls from the list of external references. \ No newline at end of file diff --git a/ops/weed_detection/weed_detection.py b/ops/weed_detection/weed_detection.py deleted file mode 100644 index 1434abc1..00000000 --- a/ops/weed_detection/weed_detection.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from dataclasses import dataclass -from enum import auto -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Tuple, Union - -import geopandas as gpd -import numpy as np -import rasterio -from numpy.typing import NDArray -from rasterio.crs import CRS -from rasterio.features import geometry_mask, shapes, sieve -from rasterio.mask import mask -from rasterio.transform import Affine -from shapely import geometry as shpg -from sklearn.mixture import GaussianMixture -from strenum import StrEnum - -from vibe_core.data import DataVibe -from vibe_core.data.core_types import AssetVibe, gen_guid -from vibe_core.data.rasters import Raster -from vibe_lib.archive import create_flat_archive - - -class SimplifyBy(StrEnum): - simplify = auto() - convex = auto() - none = auto() - - -@dataclass -class OpenedRaster: - """Load a raster for training and prediction - - Attributes: - pixels: 1D array of selected data points - shape: shape of the input raster - alpha_mask: boolean values indicating which pixels were selected from the input raster - transform: affine transform of the input raster - crs: coordinate reference system of the input raster - """ - - def __init__( - self, - raster: Raster, - buffer: int, - no_data: Union[int, None], - alpha_index: int, - bands: List[int], - ): - with rasterio.open(raster.raster_asset.url) as src: - projected_geo = ( - gpd.GeoSeries(shpg.shape(raster.geometry), crs="epsg:4326").to_crs(src.crs).iloc[0] - ) - - if no_data is None: - no_data = src.nodata - ar, self.tr = mask(src, [projected_geo], crop=True, nodata=no_data) - self.input_crs = src.crs - - self.buffer_mask = geometry_mask( - [projected_geo.buffer(buffer)], ar.shape[1:], self.tr, invert=True - ) - - # Create an alpha mask - if alpha_index >= 0: - self._alpha_mask = ar[alpha_index].astype(bool) - else: # no alpha band - self._alpha_mask = np.ones(ar.shape[1:], dtype=bool) - - if not bands: - bands = [i for i in range(ar.shape[0]) if i != alpha_index] - self.pixels = ar[bands] - - self.input_shape = ar.shape - - @property - def shape(self) -> Tuple[int]: - return self.input_shape - - @property - def crs(self) -> CRS: - return self.input_crs - - @property - def transform(self) -> Affine: - return self.tr - - @property - def training_data(self) -> NDArray[Any]: - mask = self.buffer_mask & self.alpha_mask - return self.pixels[:, mask] - - @property - def prediction_data(self) -> NDArray[Any]: - return self.pixels[:, self.alpha_mask] - - @property - def alpha_mask(self) -> NDArray[Any]: - return self._alpha_mask - - -def train_model(open_raster: OpenedRaster, samples: int, clusters: int) -> GaussianMixture: - training_data = open_raster.training_data - idx = np.random.choice(training_data.shape[1], samples) - xy = training_data[:, idx].T - - gmm = GaussianMixture(n_components=clusters, covariance_type="full") - gmm.fit(xy) - - return gmm - - -def predict( - open_raster: OpenedRaster, - sieve_size: int, - clusters: int, - simplify: SimplifyBy, - tolerance: float, - model: GaussianMixture, - output_dir: str, -) -> AssetVibe: - prediction_data = open_raster.prediction_data - classes = model.predict(prediction_data.reshape(prediction_data.shape[0], -1).T) - result = np.zeros(open_raster.shape[1:], dtype=np.uint8) - result[open_raster.alpha_mask] = classes - result = sieve(result, sieve_size) - - file_num = 0 - for segment in range(clusters): - cluster = (result == segment).astype(np.uint8) - - df_shapes = gpd.GeoSeries( - [shpg.shape(s) for s, _ in shapes(cluster, mask=cluster, transform=open_raster.tr)], - crs=open_raster.crs, - ) # type: ignore - - if df_shapes.empty: - # Model could not converge with all requested clusters - continue - - cluster_path = os.path.join(output_dir, f"cluster{file_num}") - file_num += 1 - - if simplify == SimplifyBy.simplify: - df_shapes.simplify(tolerance).to_file(cluster_path) - elif simplify == SimplifyBy.convex: - df_shapes.convex_hull.to_file(cluster_path) - elif simplify == SimplifyBy.none: - df_shapes.to_file(cluster_path) # type: ignore - - # Create zip archive containing all output - archive_path = create_flat_archive(output_dir, "result") - return AssetVibe(reference=archive_path, type="application/zip", id=gen_guid()) - - -class CallbackBuilder: - def __init__( - self, - buffer: int, - no_data: Union[int, None], - clusters: int, - sieve_size: int, - simplify: str, - tolerance: float, - samples: int, - bands: List[int], - alpha_index: int, - ): - self.temp_dir = TemporaryDirectory() - self.buffer = buffer - self.no_data = no_data - self.clusters = clusters - self.sieve_size = sieve_size - self.simplify = SimplifyBy(simplify.lower()) - self.tolerance = tolerance - self.samples = samples - self.bands = bands - self.alpha_index = alpha_index - - def __call__(self): - def detect_weeds( - raster: Raster, - ) -> Dict[str, DataVibe]: - open_raster = OpenedRaster( - raster=raster, - buffer=self.buffer, - no_data=self.no_data, - alpha_index=self.alpha_index, - bands=self.bands, - ) - - model = train_model( - open_raster=open_raster, - samples=self.samples, - clusters=self.clusters, - ) - - prediction = predict( - open_raster=open_raster, - sieve_size=self.sieve_size, - clusters=self.clusters, - simplify=self.simplify, - tolerance=self.tolerance, - model=model, - output_dir=self.temp_dir.name, - ) - - result = DataVibe( - id=gen_guid(), - time_range=raster.time_range, - geometry=raster.geometry, - assets=[prediction], - ) - return {"result": result} - - return detect_weeds - - def __del__(self): - self.temp_dir.cleanup() diff --git a/ops/weed_detection/weed_detection.yaml b/ops/weed_detection/weed_detection.yaml deleted file mode 100644 index ea1f3cc7..00000000 --- a/ops/weed_detection/weed_detection.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: weed_detection -inputs: - raster: Raster -output: - result: DataVibe -parameters: - buffer: -50 - no_data: - clusters: 4 - sieve_size: 2000 - simplify: "simplify" - tolerance: 0.25 - samples: 100000 - bands: [] - alpha_index: -1 -entrypoint: - callback_builder: CallbackBuilder - file: weed_detection.py -dependencies: - parameters: - - buffer - - no_data - - clusters - - sieve_size - - simplify - - tolerance - - samples - - bands - - alpha_index -version: 2 -description: - short_description: - Trains a Gaussian Mixture Model (GMM), cluster all images pixels, and convert clustered - regions into polygons. \ No newline at end of file diff --git a/pyrightconfig.json b/pyrightconfig.json deleted file mode 100644 index f60bffd4..00000000 --- a/pyrightconfig.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "ignore": [ - "ops/run_landcover_model", - ], - "reportMissingParameterType": "error", - "reportInvalidTypeVarUse": "error", - "reportMissingTypeArgument": "error", - "reportMissingImports": "warning", - "typeCheckingMode": "basic", - "useLibraryCodeForTypes": true, -} \ No newline at end of file diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 44effbb2..00000000 --- a/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -[pytest] -addopts = -k 'not benchmark' -pythonpath = src -filterwarnings = ignore:.*fields may not start with an underscore.* diff --git a/resources/docker/Dockerfile-api_orchestrator b/resources/docker/Dockerfile-api_orchestrator deleted file mode 100644 index cc9bca7b..00000000 --- a/resources/docker/Dockerfile-api_orchestrator +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -FROM mcr.microsoft.com/farmai/terravibes/services-base:12380 - -COPY src /app/src -COPY workflows /app/workflows -COPY ops /app/ops - -RUN /opt/conda/bin/pip install /app/src/vibe_core && \ - /opt/conda/bin/pip install /app/src/vibe_common && \ - /opt/conda/bin/pip install /app/src/vibe_server - -RUN rm -rf /app/src - -RUN find /app -type d -name __pycache__ | xargs rm -rf - -EXPOSE 3000 3500 50001 - -WORKDIR /app diff --git a/resources/docker/Dockerfile-cache b/resources/docker/Dockerfile-cache deleted file mode 100644 index bb975c3d..00000000 --- a/resources/docker/Dockerfile-cache +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -FROM mcr.microsoft.com/farmai/terravibes/services-base:12380 - -COPY src /app/src -COPY workflows /app/workflows -COPY ops /app/ops - -RUN /opt/conda/bin/pip install /app/src/vibe_core && \ - /opt/conda/bin/pip install /app/src/vibe_common && \ - /opt/conda/bin/pip install /app/src/vibe_agent - -RUN rm -rf /app/src - -RUN find /app -type d -name __pycache__ | xargs rm -rf - -EXPOSE 3000 3500 50001 - -WORKDIR /app diff --git a/resources/docker/Dockerfile-dev b/resources/docker/Dockerfile-dev deleted file mode 100644 index 7567c838..00000000 --- a/resources/docker/Dockerfile-dev +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -ARG BASE_IMAGE -FROM $BASE_IMAGE -COPY resources/envs/dev.yaml /tmp/dev.yaml - -RUN tdnf update -y || echo "Not updating anything..." && \ - tdnf install -y sudo azure-cli - -RUN micromamba install -f /tmp/dev.yaml - -RUN az extension add --system --name azure-devops - -RUN mkdir -p /opt/terravibes/ops/resources - -RUN chmod 777 /opt/terravibes/ops/resources - -EXPOSE 3000 3500 50001 diff --git a/resources/docker/Dockerfile-devcontainer b/resources/docker/Dockerfile-devcontainer deleted file mode 100644 index a1259414..00000000 --- a/resources/docker/Dockerfile-devcontainer +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -FROM mcr.microsoft.com/farmai/terravibes/worker-base:12380 - -ARG USERNAME=vscode -ARG USER_UID=1000 -ARG USER_GID=$USER_UID -ARG DOCKER_GID=998 -ENV DOCKER_BUILDKIT=1 -ENV PATH="/opt/conda/bin:${PATH}" -ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" - -USER root - -RUN tdnf upgrade -y && \ - tdnf install -y moby-cli moby-compose moby-engine moby-buildx \ - ca-certificates unzip iptables which dos2unix git-lfs \ - icu icu-devel build-essential pkg-config zsh sudo azure-cli && \ - rm -rf /var/cache/tdnf/* - -RUN groupadd --gid $USER_GID $USERNAME && \ - useradd --uid $USER_UID --gid $USER_GID -m $USERNAME -s /usr/bin/zsh && \ - touch /etc/sudoers.d/$USERNAME && \ - zsh -c "echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME" && \ - cat /etc/sudoers.d/$USERNAME && \ - chmod 0440 /etc/sudoers.d/$USERNAME && \ - mkdir /home/$USERNAME/.vscode-server && \ - chown -R $USERNAME:$USERNAME /home/$USERNAME - -RUN groupadd --gid $DOCKER_GID docker || echo ; \ - usermod -aG $(grep docker /etc/group | cut -d : -f 1) $USERNAME - -COPY ./docker-in-docker-install.sh /tmp/ -RUN dos2unix /tmp/docker-in-docker-install.sh && \ - /bin/bash /tmp/docker-in-docker-install.sh - -USER $USERNAME -WORKDIR /home/$USERNAME - -ENTRYPOINT ["/usr/local/share/docker-init.sh"] -VOLUME [ "/var/lib/docker" ] -CMD ["sleep", "infinity"] diff --git a/resources/docker/Dockerfile-services-base b/resources/docker/Dockerfile-services-base deleted file mode 100644 index f0368c21..00000000 --- a/resources/docker/Dockerfile-services-base +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 AS builder - - -RUN tdnf update -y || echo "Not updating anything..." &&\ - tdnf install -y tar ca-certificates && tdnf clean all - -WORKDIR /usr/local - -RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \ - ln -s /usr/local/bin/micromamba /usr/local/bin/conda && \ - chmod +x /usr/local/bin/micromamba - -COPY resources/envs/rest-api_orchestrator.yml /tmp/rest-api.yml -COPY resources/envs/services-requirements.txt /tmp/services-requirements.txt - -RUN conda env create -p /opt/conda -f /tmp/rest-api.yml && \ - conda clean --all --yes - -ENV CONDA_PREFIX=/opt/conda -RUN /opt/conda/bin/pip install uv && \ - /opt/conda/bin/uv pip install -r /tmp/services-requirements.txt && \ - /opt/conda/bin/uv cache clean && \ - rm -r /root/.cache/pip && \ - rm -rf /root/.mamba && \ - rm -rf /opt/*conda/pkgs && \ - rm -rf /app/docker && \ - find / -type d -name __pycache__ | xargs rm -rf && \ - rm /tmp/rest-api.yml && \ - rm /tmp/services-requirements.txt && \ - rm -fr /var/cache/tdnf && \ - rm -fr /var/cache/conda - -ENV PATH "/opt/conda/bin:${PATH}" diff --git a/resources/docker/Dockerfile-worker b/resources/docker/Dockerfile-worker deleted file mode 100644 index bfaca499..00000000 --- a/resources/docker/Dockerfile-worker +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -FROM mcr.microsoft.com/farmai/terravibes/worker-base:12380 - -COPY src /app/src -COPY workflows /app/workflows -COPY ops /app/ops - -COPY op_resources/ /opt/terravibes/ops/resources/ - -RUN /opt/conda/bin/pip install /app/src/vibe_core && \ - /opt/conda/bin/pip install /app/src/vibe_common && \ - /opt/conda/bin/pip install /app/src/vibe_agent && \ - /opt/conda/bin/pip install /app/src/vibe_lib - -RUN rm -rf /app/src - -EXPOSE 3000 3500 50001 - -ENV PYTHONPATH "/app" - -WORKDIR /app - -SHELL ["conda", "run", "--no-capture-output", "-p", "/opt/conda", "/bin/bash", "-c"] diff --git a/resources/docker/Dockerfile-worker-base b/resources/docker/Dockerfile-worker-base deleted file mode 100644 index 7e95ddd7..00000000 --- a/resources/docker/Dockerfile-worker-base +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 AS builder - -RUN tdnf update -y || echo "Not updating anything..." && tdnf install -y \ - libibverbs \ - librdmacm \ - iproute \ - build-essential \ - git \ - wget \ - tar \ - python3-pip \ - freefont \ - procps \ - ca-certificates \ - util-linux \ - dejavu-sans-fonts \ - fontconfig && tdnf clean all # fontconfig used to be for SNAP, but it doesnt hurt leaving it - -FROM builder AS builder1 - -# The line below was for SNAP, but it doesn't hurt to have it -ENV JAVA_OPTS=-Djava.awt.headless=true -ENV CONDA_PREFIX=/opt/conda - -WORKDIR /usr/local - -RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \ - ln -s /usr/local/bin/micromamba /usr/local/bin/conda && \ - chmod +x /usr/local/bin/micromamba - -RUN conda shell init -s bash - -COPY resources/envs/worker.yml /tmp -COPY resources/envs/worker-requirements.txt /tmp - -RUN conda env create -p /opt/conda -f /tmp/worker.yml && \ - conda clean --all --yes - -RUN /opt/conda/bin/pip install uv && \ - /opt/conda/bin/pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu && \ - /opt/conda/bin/pip install torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cpu && \ - /opt/conda/bin/uv pip install -r /tmp/worker-requirements.txt && \ - /opt/conda/bin/uv cache clean && \ - rm -r /root/.cache/pip && \ - rm -rf /root/.mamba && \ - rm -rf /opt/*conda/pkgs && \ - rm -rf /app/docker && \ - find / -type d -name __pycache__ | xargs rm -rf && \ - rm /tmp/worker.yml && \ - rm /tmp/worker-requirements.txt && \ - rm -fr /var/cache/tdnf && \ - rm -fr /var/cache/conda - -ENV PATH "/opt/conda/bin:${PATH}" - -# compilation is necessary the first time aquacrop is imported and this needs writing permission -RUN /opt/conda/bin/pip install aquacrop==2.2.3 --no-deps && \ - python -c "import aquacrop" diff --git a/resources/docker/docker-in-docker-install.sh b/resources/docker/docker-in-docker-install.sh deleted file mode 100755 index 7489faba..00000000 --- a/resources/docker/docker-in-docker-install.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/sh -# Copyright (c) Microsoft Corporation. All rights reserved. -# -# This is based on https://github.com/devcontainers/features/blob/main/src/docker-in-docker/install.sh -# We had to copy this because, at the time of writing, devcontainers didn't have tooling to run -# features on top of CBL-Mariner2/Azure Linux. - -if [ -f "/usr/local/share/docker-init.sh" ]; then - echo "/usr/local/share/docker-init.sh already exists, exiting." - exit 0 -fi - -tee /usr/local/share/docker-init.sh > /dev/null \ -<< 'EOF' -dockerd_start="AZURE_DNS_AUTO_DETECTION=${AZURE_DNS_AUTO_DETECTION} DOCKER_DEFAULT_ADDRESS_POOL=${DOCKER_DEFAULT_ADDRESS_POOL} $(cat << 'INNEREOF' - # explicitly remove dockerd and containerd PID file to ensure that it can start properly if it was stopped uncleanly - # ie: docker kill - find /run /var/run -iname 'docker*.pid' -delete || : - find /run /var/run -iname 'container*.pid' -delete || : - - ## Dind wrapper script from docker team, adapted to a function - # Maintained: https://github.com/moby/moby/blob/master/hack/dind - - export container=docker - - if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security; then - mount -t securityfs none /sys/kernel/security || { - echo >&2 'Could not mount /sys/kernel/security.' - echo >&2 'AppArmor detection and --privileged mode might break.' - } - fi - - # Mount /tmp (conditionally) - if ! mountpoint -q /tmp; then - mount -t tmpfs none /tmp - fi - - # cgroup v2: enable nesting - if [ -f /sys/fs/cgroup/cgroup.controllers ]; then - # move the processes from the root group to the /init group, - # otherwise writing subtree_control fails with EBUSY. - # An error during moving non-existent process (i.e., "cat") is ignored. - mkdir -p /sys/fs/cgroup/init - xargs -rn1 < /sys/fs/cgroup/cgroup.procs > /sys/fs/cgroup/init/cgroup.procs || : - # enable controllers - sed -e 's/ / +/g' -e 's/^/+/' < /sys/fs/cgroup/cgroup.controllers \ - > /sys/fs/cgroup/cgroup.subtree_control - fi - ## Dind wrapper over. - - # Handle DNS - set +e - cat /etc/resolv.conf | grep -i 'internal.cloudapp.net' - if [ $? -eq 0 ] && [ "${AZURE_DNS_AUTO_DETECTION}" = "true" ] - then - echo "Setting dockerd Azure DNS." - CUSTOMDNS="--dns 168.63.129.16" - else - echo "Not setting dockerd DNS manually." - CUSTOMDNS="" - fi - - set -e - - if [ -z "$DOCKER_DEFAULT_ADDRESS_POOL" ] - then - DEFAULT_ADDRESS_POOL="" - else - DEFAULT_ADDRESS_POOL="--default-address-pool $DOCKER_DEFAULT_ADDRESS_POOL" - fi - - # Start docker/moby engine - ( dockerd $CUSTOMDNS $DEFAULT_ADDRESS_POOL > /tmp/dockerd.log 2>&1 ) & -INNEREOF -)" - -# Start using sudo if not invoked as root -if [ "$(id -u)" -ne 0 ]; then - sudo /bin/sh -c "${dockerd_start}" -else - eval "${dockerd_start}" -fi - -set +e - -# Execute whatever commands were passed in (if any). This allows us -# to set this script to ENTRYPOINT while still executing the default CMD. -exec "$@" -EOF - -chmod +x /usr/local/share/docker-init.sh - -echo 'docker-in-docker script completed' diff --git a/resources/documentation_generation/generate_datatype_hierarchy_diagram.py b/resources/documentation_generation/generate_datatype_hierarchy_diagram.py deleted file mode 100644 index 5f670e39..00000000 --- a/resources/documentation_generation/generate_datatype_hierarchy_diagram.py +++ /dev/null @@ -1,81 +0,0 @@ -import os -import subprocess -from typing import List - -from jinja2 import Template - -HERE = os.path.dirname(os.path.abspath(__file__)) -PROJECT_DIR = os.path.abspath(os.path.join(HERE, "..", "..")) -DOC_DIR = os.path.abspath( - os.path.join(PROJECT_DIR, "docs", "source", "docfiles", "markdown", "data_types_diagram") -) -DATA_TYPES_PATH = os.path.abspath( - os.path.join(PROJECT_DIR, "src", "vibe_core", "vibe_core", "data") -) -TEMPLATE_PATH = os.path.abspath(os.path.join(HERE, "templates", "datatype_hierarchy_template.md")) - - -def render_template( - mermaid_diagram: str, - output_path: str, - template_path: str, -): - """Load and render template given a data source""" - - with open(template_path) as f: - t = Template(f.read()) - - rendered_template = t.render(mermaid_diagram=mermaid_diagram) - - if not os.path.exists(os.path.dirname(output_path)): - os.makedirs(os.path.dirname(output_path)) - - with open(output_path, "w") as f: - f.write(rendered_template) - - -def list_modules(module_path: str) -> List[str]: - """List all modules in module_path""" - - paths = [] - for root, dirs, files in os.walk(module_path): - for file in files: - if file.endswith(".py") and not file.startswith("__"): - paths.append(os.path.join(root, file)) - - return paths - - -def build_data_type_diagrams(data_module_paths: List[str]): - for path in data_module_paths: - module_name = path.split("/")[-1].split(".")[0] - subprocess.run( - [ - "pyreverse", - "-my", - "-A", - "-k", - "-o", - "mmd", - "-p", - f"{module_name}", - path, - ], - check=True, - ) - - with open(f"classes_{module_name}.mmd") as f: - mmd = f.read() - render_template(mmd, os.path.join(DOC_DIR, f"{module_name}_hierarchy.md"), TEMPLATE_PATH) - - # Delete the generated mmd file with subprocess.run - subprocess.run(["rm", f"classes_{module_name}.mmd"], check=True) - - -def main(): - data_module_paths = list_modules(DATA_TYPES_PATH) - build_data_type_diagrams(data_module_paths) - - -if __name__ == "__main__": - main() diff --git a/resources/documentation_generation/generate_notebook_list.py b/resources/documentation_generation/generate_notebook_list.py deleted file mode 100644 index 1c544309..00000000 --- a/resources/documentation_generation/generate_notebook_list.py +++ /dev/null @@ -1,160 +0,0 @@ -import json -import os -from dataclasses import dataclass -from math import inf -from typing import Dict, List, Optional, Tuple - -from jinja2 import Template - -HERE = os.path.dirname(os.path.abspath(__file__)) -PROJECT_DIR = os.path.abspath(os.path.join(HERE, "..", "..")) -NOTEBOOK_DIR = os.path.abspath(os.path.join(PROJECT_DIR, "notebooks")) -LIST_TEMPLATE_PATH = os.path.abspath(os.path.join(HERE, "templates", "list_notebook_template.md")) -DOC_DIR = os.path.abspath(os.path.join(PROJECT_DIR, "docs", "source", "docfiles", "markdown")) -OUTPUT_PATH = os.path.abspath(os.path.join(DOC_DIR, "NOTEBOOK_LIST.md")) -GITHUB_URL = "https://github.com/microsoft/farmvibes-ai/blob/main" -PRIVATE_TAG = "private" - - -@dataclass -class NotebookDataSource: - name: str - description: str - tags: List[Tuple[int, str]] - repo_path: str - disk_time_req: str - - -def render_template( - data_source: List[NotebookDataSource], - tag_data_source: List[Tuple[Tuple[int, str], List[NotebookDataSource]]], - output_path: str, -): - """Load and render template given a data source""" - - with open(LIST_TEMPLATE_PATH) as f: - t = Template(f.read()) - - rendered_template = t.render( - data_source=data_source, - tag_data_source=tag_data_source, - ) - - with open(output_path, "w") as f: - f.write(rendered_template) - - -def format_disk_time_req(disk_space: str, running_time: str) -> str: - """Format the disk space and running time requirements""" - output_str = "({}{}{})" if disk_space or running_time else "{}{}{}" - sep = ", " if disk_space and running_time else "" - return output_str.format(disk_space, sep, running_time) - - -def parse_nb_metadata(nb_path: str) -> Optional[NotebookDataSource]: - """Parse the ipynb to extract its metadata""" - with open(nb_path) as f: - nb_json = json.load(f) - - try: - nb_metadata = nb_json["metadata"] - except KeyError: - raise KeyError(f"Notebook {nb_path} has no metadata") - - # Parse tag order - nb_tags = [] - try: - tags = nb_metadata["tags"] - except KeyError: - raise KeyError(f"Notebook {nb_path} with metadata {nb_metadata} has no tags") - - for tag in tags: - tag_components = tag.split("_") - if len(tag_components) == 2: - tag_order = int(tag_components[0]) - tag_name = tag_components[-1] - else: - tag_order = inf - tag_name = tag_components[-1] - - if tag_name == PRIVATE_TAG: - return None - nb_tags.append((tag_order, tag_name)) - - nb_name = nb_metadata["name"] - nb_description = nb_metadata["description"] - nb_repo_path = f"{GITHUB_URL}{nb_path.split(PROJECT_DIR)[-1]}" - nb_disk_time_req = format_disk_time_req(nb_metadata["disk_space"], nb_metadata["running_time"]) - - return NotebookDataSource( - name=nb_name, - description=nb_description, - tags=nb_tags, - repo_path=nb_repo_path, - disk_time_req=nb_disk_time_req, - ) - - -def list_notebooks() -> List[str]: - """Iterate over NOTEBOOK_DIR and retrieve all ipynb paths""" - notebook_list: List[str] = [] - - for folder, _, nb_files in os.walk(NOTEBOOK_DIR): - for nb_file in nb_files: - if nb_file.endswith(".ipynb"): - nb_path = os.path.abspath(os.path.join(folder, nb_file)) - notebook_list.append(nb_path) - - return notebook_list - - -def sort_tags( - tag_data_source: Dict[Tuple[int, str], List[NotebookDataSource]] -) -> List[Tuple[Tuple[int, str], List[NotebookDataSource]]]: - """Sort tags by tag order and then by name""" - sorted_tags_ds = [] - for tag_tuple, nb_data_source_list in tag_data_source.items(): - sorted_nb_data_source_list = sorted(nb_data_source_list, key=lambda x: x.name) - sorted_tags_ds.append((tag_tuple, sorted_nb_data_source_list)) - sorted_tags_ds = sorted(sorted_tags_ds, key=lambda x: x[0]) - return sorted_tags_ds - - -def build_notebook_list(): - """Build the notebook list page""" - data_source: List[NotebookDataSource] = [] - tag_data_source: Dict[Tuple[str, int], List[NotebookDataSource]] = {} - - # List notebooks in NOTEBOOK_DIR - notebook_list = list_notebooks() - - # For each notebook, parse the json metadata and get attributes - for notebook_path in notebook_list: - notebook_data_source = parse_nb_metadata(notebook_path) - - if notebook_data_source: - # Add notebook to data source - data_source.append(notebook_data_source) - - # Add notebook to tag list - for tag_tuple in notebook_data_source.tags: - if tag_tuple not in tag_data_source: - tag_data_source[tag_tuple] = [] - tag_data_source[tag_tuple].append(notebook_data_source) - - # Sort data source by name - data_source = sorted(data_source, key=lambda x: x.name) - - # Sort tag data source by tag order and name - sorted_tags_ds = sort_tags(tag_data_source) - - # Render template - render_template(data_source, sorted_tags_ds, OUTPUT_PATH) - - -def main(): - build_notebook_list() - - -if __name__ == "__main__": - main() diff --git a/resources/documentation_generation/generate_workflow_list.py b/resources/documentation_generation/generate_workflow_list.py deleted file mode 100644 index 0f5509cf..00000000 --- a/resources/documentation_generation/generate_workflow_list.py +++ /dev/null @@ -1,158 +0,0 @@ -import os -from dataclasses import dataclass -from typing import Dict, List, Union - -import yaml -from jinja2 import Template - -from vibe_core.client import FarmvibesAiClient -from vibe_core.datamodel import TaskDescription -from vibe_server.workflow.spec_parser import WorkflowParser - -HERE = os.path.dirname(os.path.abspath(__file__)) -PROJECT_DIR = os.path.abspath(os.path.join(HERE, "..", "..")) -DOC_DIR = os.path.abspath(os.path.join(PROJECT_DIR, "docs", "source", "docfiles", "markdown")) -WORKFLOW_DIR = os.path.abspath(os.path.join(PROJECT_DIR, "workflows")) - -WF_LIST_TEMPLATE_PATH = os.path.abspath( - os.path.join(HERE, "templates", "list_workflow_template.md") -) -WF_LIST_OUTPUT_PATH = os.path.abspath(os.path.join(DOC_DIR, "WORKFLOW_LIST.md")) - -WF_YAML_TEMPLATE_PATH = os.path.abspath( - os.path.join(HERE, "templates", "workflow_yaml_template.md") -) -WF_YAML_OUTPUT_DIR = os.path.abspath(os.path.join(DOC_DIR, "workflow_yaml")) - -WF_CATEGORY_LIST = ["data_ingestion", "data_processing", "farm_ai", "forest_ai", "ml"] - - -@dataclass -class WorkflowInformation: - name: str - description: Union[str, TaskDescription] - markdown_link: str - yaml: str - mermaid_diagram: str - - -@dataclass -class TemplateDataSource: - category: str - wf_list: List[WorkflowInformation] - - -def format_wf_name(full_wf_name: str, category: str): - return full_wf_name.split(f"{category}/")[-1] - - -def render_template( - data_source: Union[List[TemplateDataSource], WorkflowInformation], - output_path: str, - template_path: str, -): - """Load and render template given a data source""" - - with open(template_path) as f: - t = Template(f.read()) - - rendered_template = t.render(data_source=data_source) - - if not os.path.exists(os.path.dirname(output_path)): - os.makedirs(os.path.dirname(output_path)) - - with open(output_path, "w") as f: - f.write(rendered_template) - - -def list_exposed_workflows() -> Dict[str, List[str]]: - """Call the REST API to list the workflows""" - - workflow_list = FarmvibesAiClient("http://localhost:1108/").list_workflows() - - category_dict = { - cat: [wf_name for wf_name in workflow_list if wf_name.startswith(cat)] - for cat in WF_CATEGORY_LIST - } - return category_dict - - -def parse_wf_yamls(category: str, wf_list: List[str]) -> List[WorkflowInformation]: - """Parse the wf yaml files to extract short description""" - parsedList = [] - - client = FarmvibesAiClient("http://localhost:1108/") - - for wf_name in wf_list: - wf_yaml = client.get_workflow_yaml(wf_name) - yaml_dict = yaml.safe_load(wf_yaml) - wf_spec = WorkflowParser.parse_dict(yaml_dict) - - wf_md_link = os.path.relpath( - path=os.path.join(WF_YAML_OUTPUT_DIR, f"{wf_name}.md"), start=DOC_DIR - ) - - wf_name = format_wf_name(wf_name, category) - - parsedList.append( - WorkflowInformation( - name=wf_name, - description=wf_spec.description.short_description, - markdown_link=wf_md_link, - yaml=wf_yaml, - mermaid_diagram="", - ) - ) - - return sorted(parsedList, key=lambda x: x.name) - - -def build_workflow_list(): - """Build the worflow list page from the client""" - data_source: List[TemplateDataSource] = [] - - # List workflows in the REST API - wf_per_category = list_exposed_workflows() - - # For each workflow, parse the yaml and get description - for category, wf_list in wf_per_category.items(): - data_source.append( - TemplateDataSource(category=category, wf_list=parse_wf_yamls(category, wf_list)) - ) - - render_template(data_source, WF_LIST_OUTPUT_PATH, WF_LIST_TEMPLATE_PATH) - - -def build_workflow_yamls(): - """Build the workflow yaml pages from the client""" - client = FarmvibesAiClient("http://localhost:1108/") - - for wf_name in client.list_workflows(): - wf_yaml = client.get_workflow_yaml(wf_name) - yaml_dict = yaml.safe_load(wf_yaml) - wf_spec = WorkflowParser.parse_dict(yaml_dict) - - description = client.describe_workflow(wf_name)["description"] - - wf_yaml_output_path = os.path.join(WF_YAML_OUTPUT_DIR, f"{wf_name}.md") - if not os.path.exists(os.path.dirname(wf_yaml_output_path)): - os.makedirs(os.path.dirname(wf_yaml_output_path)) - - data_source = WorkflowInformation( - name=wf_name, - description=description, - markdown_link="", - yaml=wf_yaml, - mermaid_diagram=wf_spec.to_mermaid(), - ) - - render_template(data_source, wf_yaml_output_path, WF_YAML_TEMPLATE_PATH) - - -def main(): - build_workflow_list() - build_workflow_yamls() - - -if __name__ == "__main__": - main() diff --git a/resources/documentation_generation/templates/datatype_hierarchy_template.md b/resources/documentation_generation/templates/datatype_hierarchy_template.md deleted file mode 100644 index 1804638b..00000000 --- a/resources/documentation_generation/templates/datatype_hierarchy_template.md +++ /dev/null @@ -1,6 +0,0 @@ - -
- -{{mermaid_diagram}} - -
diff --git a/resources/documentation_generation/templates/list_notebook_template.md b/resources/documentation_generation/templates/list_notebook_template.md deleted file mode 100644 index 4b5965a7..00000000 --- a/resources/documentation_generation/templates/list_notebook_template.md +++ /dev/null @@ -1,37 +0,0 @@ -# Notebooks - -We present a complete list of the notebooks available in FarmVibes.AI with a short summary for each of them. Besides their description, we also include the expected disk space and running time required per notebook, considering the recommended VM size. - -
- ---------------- - - -## Summary - -We organize available notebooks in the following topics: - -{% for tag_tuple, nb_list in tag_data_source -%} - -
- {{tag_tuple[1]}} - -{% for nb in nb_list %}- [`{{nb.name}}` 📓]({{nb.repo_path}}) - -{% endfor %} -
-{% endfor %} - - - -
- ---------------- - - -## Notebooks description - -{% for nb in data_source %}- [`{{nb.name}}` 📓]({{nb.repo_path}}) {%if nb.disk_time_req %} {{nb.disk_time_req}} {% endif %}: {{nb.description}} - -{% endfor %} - diff --git a/resources/documentation_generation/templates/list_workflow_template.md b/resources/documentation_generation/templates/list_workflow_template.md deleted file mode 100644 index e3270602..00000000 --- a/resources/documentation_generation/templates/list_workflow_template.md +++ /dev/null @@ -1,23 +0,0 @@ -# Workflow List - -We group FarmVibes.AI workflows in the following categories: - -- **Data Ingestion**: workflows that download and preprocess data from a particular source, preparing data to be the starting point for most of the other workflows in the platform. -This includes raw data sources (e.g., Sentinel 1 and 2, LandSat, CropDataLayer) as well as the SpaceEye cloud-removal model; -- **Data Processing**: workflows that transform data into different data types (e.g., computing NDVI/MSAVI/Methane indexes, aggregating mean/max/min statistics of rasters, timeseries aggregation); -- **FarmAI**: composed workflows (data ingestion + processing) whose outputs enable FarmAI scenarios (e.g., predicting conservation practices, estimating soil carbon sequestration, identifying methane leakage); -- **ForestAI**: composed workflows (data ingestion + processing) whose outputs enable ForestAI scenarios (e.g., detecting forest change, estimating forest extent); -- **ML**: machine learning-related workflows to train, evaluate, and infer models within the FarmVibes.AI platform (e.g., dataset creation, inference); - -Below is a list of all available workflows within the FarmVibes.AI platform. For each of them, we provide a brief description and a link to the corresponding documentation page. - ---------- - -{% for elem in data_source -%} - -## {{elem.category}} - -{% for wf in elem.wf_list %}- [`{{wf.name}}` 📄]({{wf.markdown_link}}): {{wf.description}} - -{% endfor %} -{% endfor %} diff --git a/resources/documentation_generation/templates/workflow_yaml_template.md b/resources/documentation_generation/templates/workflow_yaml_template.md deleted file mode 100644 index 731da38f..00000000 --- a/resources/documentation_generation/templates/workflow_yaml_template.md +++ /dev/null @@ -1,51 +0,0 @@ -# {{data_source.name}} - -{{data_source.description.short_description}} {{data_source.description.long_description}} - -```{mermaid} - {{data_source.mermaid_diagram}} -``` - -## Sources - -{% for source_name, source_desc in data_source.description.inputs.items() -%} - -- **{{source_name}}**: {{source_desc}} - -{% endfor -%} - -## Sinks - -{% for sink_name, sink_desc in data_source.description.outputs.items() -%} - -- **{{sink_name}}**: {{sink_desc}} - -{% endfor -%} - -{% if data_source.description.parameters -%} -## Parameters - -{% for param_name, param_desc in data_source.description.parameters.items() -%} - -- **{{param_name}}**: {% if param_desc is string %}{{param_desc}}{% else %}{{param_desc[0]}}{% endif %} - -{% endfor -%} -{% endif -%} - -{% if data_source.description.task_descriptions -%} -## Tasks - -{% for task_name, task_desc in data_source.description.task_descriptions.items() -%} - -- **{{task_name}}**: {{task_desc}} - -{% endfor -%} -{% endif -%} - -## Workflow Yaml - -```yaml - -{{data_source.yaml}} - -``` diff --git a/resources/envs/dev.yaml b/resources/envs/dev.yaml deleted file mode 100644 index 2ceafa63..00000000 --- a/resources/envs/dev.yaml +++ /dev/null @@ -1,10 +0,0 @@ -channels: - - conda-forge -dependencies: - - ruff - - pytest - - pytest-azurepipelines - - pytest-cov - - nodejs - - Jinja2~=3.1.4 - - cryptography>=42.0.0 diff --git a/resources/envs/rest-api_orchestrator.yml b/resources/envs/rest-api_orchestrator.yml deleted file mode 100644 index a425e437..00000000 --- a/resources/envs/rest-api_orchestrator.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: base -channels: - - conda-forge -dependencies: - - python=3.11.* - - pip - - wheel - - curl diff --git a/resources/envs/services-requirements.txt b/resources/envs/services-requirements.txt deleted file mode 100644 index 4852290f..00000000 --- a/resources/envs/services-requirements.txt +++ /dev/null @@ -1,15 +0,0 @@ -grpcio~=1.53.0 -dapr~=1.13.0 -dapr-ext-grpc~=1.12.0 -fastapi-versioning~=0.10.0 -pystac[validation]~=1.1.0 -strenum~=0.4.7 -requests~=2.32.0 -uvicorn~=0.13.4 -pyyaml~=6.0.1 -debugpy~=1.8.1 -shapely>=1.7.1 -fastapi~=0.97.0 -fastapi_utils~=0.2.1 -pydantic~=1.8.2 -cryptography>=42.0.0 diff --git a/resources/envs/worker-requirements.txt b/resources/envs/worker-requirements.txt deleted file mode 100644 index b0a04891..00000000 --- a/resources/envs/worker-requirements.txt +++ /dev/null @@ -1,68 +0,0 @@ -adlfs~=2022.10.0 -ambient-api==1.5.6 -azure-cosmos~=4.2.0 -azure-identity~=1.14.0 -azure-keyvault>=4.1.0 -azure-storage-blob>=12.5.0 -cdsapi==0.5.1 -pooch<1.5.0 -cfgrib~=0.9.10.4 -jsonschema -cryptography>=42.0.0 -dapr~=1.13.0 -dapr-ext-grpc~=1.12.0 -dask[dataframe]==2024.4.1 -debugpy~=1.8.1 -einops==0.4.1 -fastapi~=0.97.0 -fastapi-versioning~=0.10.0 -fastapi_utils~=0.2.1 -fiona~=1.8.0 -folium~=0.12.0 -fonttools~=4.43.0 -fsspec~=2024.3.1 -grpcio~=1.53.0 -h5py~=3.10.0 -herbie-data~=2022.9.0.post1 -importlib-resources==5.2.2 -ipython~=8.10 -Jinja2~=3.1.4 -matplotlib~=3.8.0 -msal~=1.22.0 -netcdf4==1.6.5 -onnxruntime~=1.17.1 -orjson~=3.9.15 -osmnx~=1.2.2 -owslib~=0.30.0 -pandas~=2.2.1 -pebble~=4.6.3 -pillow~=10.2.0 -pint~=0.23 -planetary-computer~=0.4.5 -protlearn==0.0.3 -pydantic~=1.8.2 -pydap==3.2.2 -pyngrok~=7.1.2 -pysmb==1.2.9.1 -pystac~=1.6.0 -pystac-client~=0.3.2 -PyYAML~=6.0.1 -rasterio~=1.2 -retrying~=1.3.3 -rio-cogeo~=3.4.1 -rioxarray~=0.15.1 -s2cloudless~=1.5.0 -scikit-gstat~=1.0.12 -scikit-image~=0.22.0 -scikit-learn~=1.1.0 -shapely>=1.7.1 -spyndex==0.4.0 -strenum~=0.4.7 -timezonefinder==6.2.0 -tqdm~=4.66.3 -typing-extensions~=4.7.1 -uvicorn~=0.14.0 -xarray~=2022.11.0 -xlrd~=2.0.1 -xmltodict==0.13.0 -zarr~=2.13.3 diff --git a/resources/envs/worker.yml b/resources/envs/worker.yml deleted file mode 100644 index e382dc24..00000000 --- a/resources/envs/worker.yml +++ /dev/null @@ -1,11 +0,0 @@ -channels: - - pytorch - - conda-forge -dependencies: - - python=3.11.* - - libkml - - eccodes - - python-eccodes - - numpy - - wheel - - pip diff --git a/resources/vm/setup_farmvibes_ai_vm.sh b/resources/vm/setup_farmvibes_ai_vm.sh index 21d014ca..a6ba85de 100755 --- a/resources/vm/setup_farmvibes_ai_vm.sh +++ b/resources/vm/setup_farmvibes_ai_vm.sh @@ -1,7 +1,4 @@ #!/bin/bash -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - # Update apt sudo apt update @@ -48,8 +45,3 @@ fi # Run docker without sudo sudo usermod -aG docker $DOCKER_USER - -# Run git-lfs install to restore large files -sudo apt install git-lfs -y -git lfs install -git lfs pull \ No newline at end of file diff --git a/scripts/export_sam_models.py b/scripts/export_sam_models.py index 2f2aaf4d..dbd8f26c 100644 --- a/scripts/export_sam_models.py +++ b/scripts/export_sam_models.py @@ -263,16 +263,5 @@ def main(): add_to_cluster(exported_paths, args.cluster) - -def dev(): - model_type = "vit_b" - out_path = "/mnt/onnx_resources/" - with TemporaryDirectory() as tmp_dir: - model_url = MODELS[model_type].url - downloaded_path = download_file(model_url, os.path.join(tmp_dir, f"{model_type}.pth")) - export_model(model_type, downloaded_path, out_path) - - - if __name__ == "__main__": main() diff --git a/scripts/local-k8s-diagnostics.sh b/scripts/local-k8s-diagnostics.sh deleted file mode 100644 index 9c0c25cd..00000000 --- a/scripts/local-k8s-diagnostics.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/sh -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - - -PATH=$PATH:~/.config/farmvibes-ai - -echo "kubectl location:" -which kubectl - -echo "Cluster pods:" -kubectl get pods -kubectl get pods -o yaml - -echo "Docker images:" -docker images - -echo "REST API description:" -kubectl describe deployment terravibes-rest-api - -echo "Orchestrator description:" -kubectl describe deployment terravibes-orchestrator - -echo "Worker description:" -kubectl describe deployment terravibes-worker - -echo "Cache description:" -kubectl describe deployment terravibes-cache - -echo "REST API logs:" -kubectl logs -l app=terravibes-rest-api --all-containers=true --tail=-1 - -echo "Orchestrator logs:" -kubectl logs -l app=terravibes-orchestrator --all-containers=true --tail=-1 - -echo "Worker logs:" -kubectl logs -l app=terravibes-worker --max-log-requests=8 --all-containers=true --tail=-1 - -echo "Cache logs:" -kubectl logs -l app=terravibes-cache --all-containers=true --tail=-1 - -echo "Data Ops logs:" -kubectl logs -l app=terravibes-data-ops --all-containers=true --tail=-1 - -echo "Kubernetes logs:" -docker ps | egrep 'k3d-farmvibes-ai-.*-0' | awk '{ print $1 }' | xargs docker logs diff --git a/scripts/setup_python_develop_env.sh b/scripts/setup_python_develop_env.sh deleted file mode 100644 index 0cb9c36e..00000000 --- a/scripts/setup_python_develop_env.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - - -SCRIPTFILE=$(readlink -f "$0") -SCRIPTPATH=$(dirname "$SCRIPTFILE") -ROOTDIR=$(realpath $SCRIPTPATH/..) -DEV_ENV_FILE=$ROOTDIR/resources/envs/dev.yaml - -conda env update -f $DEV_ENV_FILE - -# Installing internal packages -terravibes_packages="vibe_core vibe_common vibe_agent vibe_server vibe_lib vibe_dev" -for package in $terravibes_packages; do - echo Installing package $package - pip install -e $ROOTDIR/src/$package -done \ No newline at end of file diff --git a/src/tests/__init__.py b/src/tests/__init__.py deleted file mode 100644 index b7c52582..00000000 --- a/src/tests/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/tests/benchmark/test_spaceeye_ops.py b/src/tests/benchmark/test_spaceeye_ops.py deleted file mode 100644 index 0dbc68f8..00000000 --- a/src/tests/benchmark/test_spaceeye_ops.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import time -from typing import List, cast - -import pytest - -from vibe_core.data import BaseVibeDict, DataVibe -from vibe_core.testing.comparison import assert_all_close -from vibe_dev.testing.op_tester import OpTester, ReferenceRetriever - -HERE = os.path.dirname(os.path.abspath(__file__)) -OPS_DIR = os.path.join(HERE, "..", "..", "..", "ops") -FILES_DIR = "/tmp/op_references/" -TEST_OPS = [ - "compute_cloud_prob", - "compute_sentinel_shadow", - "download_sentinel_1", - "download_sentinel_2_from_gcp", - "download_sentinel_2_from_pc", - "filter_items", - "list_sentinel_1_products", - "list_sentinel_2_L1C", - "list_sentinel_2_L2A", - "merge_cloud_masks", - "merge_sentinel1_orbits", - "merge_sentinel_orbits", - "preprocess_sentinel1", - "preprocess_sentinel2", -] -OP_YAML_DIR = { - "list_sentinel_2_L1C": "list_sentinel_2_products", - "list_sentinel_2_L2A": "list_sentinel_2_products", -} - - -@pytest.fixture -def reference_retriever(): - return ReferenceRetriever(FILES_DIR) - - -@pytest.fixture -def op_tester(request: pytest.FixtureRequest): - op_name: str = request.param # type: ignore - op_dir = OP_YAML_DIR.get(op_name, op_name) - op_config_path = os.path.join(OPS_DIR, op_dir, f"{op_name}.yaml") - return OpTester(op_config_path) - - -@pytest.fixture -def test_data(request: pytest.FixtureRequest, reference_retriever: ReferenceRetriever): - op_name = request.param # type: ignore - return reference_retriever.retrieve(op_name) - - -@pytest.mark.parametrize("op_tester,test_data", [(t, t) for t in TEST_OPS], indirect=True) -def test_op_outputs(op_tester: OpTester, test_data: List[List[BaseVibeDict]]): - for input_data, expected_output in test_data: - start = time.time() - op_output = op_tester.run(**input_data) - end = time.time() - for name, out in op_output.items(): - expected = expected_output[name] - if isinstance(expected, list): - sort_expected = sorted(expected, key=lambda x: x.time_range[0]) - sort_out = sorted(cast(List[DataVibe], out), key=lambda x: x.time_range[0]) - for o1, o2 in zip(sort_expected, sort_out): - assert_all_close(o1, o2) - else: - assert isinstance(out, DataVibe) - assert_all_close(expected, out) - print(f"Spent {end - start}s on op: {op_tester.op.name}") diff --git a/src/tests/conftest.py b/src/tests/conftest.py deleted file mode 100644 index d447dc1f..00000000 --- a/src/tests/conftest.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import pytest - -from vibe_dev.testing import anyio_backend # type: ignore # noqa -from vibe_dev.testing.fake_workflows_fixtures import fake_ops_dir, fake_workflows_dir # noqa -from vibe_dev.testing.storage_fixtures import * # type: ignore # noqa: F403, F401 -from vibe_dev.testing.storage_fixtures import TEST_STORAGE # noqa: F401 -from vibe_dev.testing.utils import WorkflowTestHelper -from vibe_dev.testing.workflow_fixtures import SimpleStrData, workflow_run_config # noqa - - -@pytest.fixture(scope="session") -def workflow_test_helper(): - return WorkflowTestHelper() diff --git a/src/tests/test_notebooks.py b/src/tests/test_notebooks.py deleted file mode 100644 index 259cc4cf..00000000 --- a/src/tests/test_notebooks.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import os -from typing import List - -import pytest - -HERE = os.path.dirname(os.path.abspath(__file__)) -PROJECT_DIR = os.path.abspath(os.path.join(HERE, "..", "..")) -NOTEBOOK_DIR = os.path.abspath(os.path.join(PROJECT_DIR, "notebooks")) -WIKI_URL = ( - "https://dev.azure.com/ResearchForIndustries/EYWA/_wiki/wikis/EYWA.wiki/214/Notebook-Metadata" -) - - -def list_notebooks() -> List[str]: - notebook_list: List[str] = [] - - for folder, _, nb_files in os.walk(NOTEBOOK_DIR): - for nb_file in nb_files: - if nb_file.endswith(".ipynb"): - nb_path = os.path.abspath(os.path.join(folder, nb_file)) - notebook_list.append(nb_path) - - return notebook_list - - -@pytest.mark.parametrize("notebook_path", list_notebooks()) -def test_workflows_description(notebook_path: str): - """Test that all notebooks have name, description and tags metadata""" - with open(notebook_path) as f: - nb_json = json.load(f) - - nb_metadata = nb_json["metadata"] - assert "name" in nb_metadata, f"Missing 'name' metadata, refer to {WIKI_URL}" - assert "description" in nb_metadata, f"Missing 'description' metadata, refer to {WIKI_URL}" - assert "disk_space" in nb_metadata, f"Missing disk space requirements, refer to {WIKI_URL}" - assert "running_time" in nb_metadata, f"Missing expected running time, refer to {WIKI_URL}" - assert "tags" in nb_metadata, f"Missing tags, refer to {WIKI_URL}" - assert len(nb_metadata["tags"]) > 0, f"Tag list is empty, refer to {WIKI_URL}" diff --git a/src/tests/test_op_workflows_integration.py b/src/tests/test_op_workflows_integration.py deleted file mode 100644 index 14a63f7d..00000000 --- a/src/tests/test_op_workflows_integration.py +++ /dev/null @@ -1,315 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import shutil -import tempfile -from dataclasses import asdict -from datetime import datetime -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -import pytest -import yaml - -from vibe_agent.ops import OperationFactoryConfig -from vibe_agent.storage import LocalFileAssetManagerConfig, LocalStorageConfig -from vibe_common.secret_provider import AzureSecretProviderConfig -from vibe_core.data.core_types import BaseVibe, DataVibe, OpIOType -from vibe_core.data.utils import StacConverter, get_base_type, serialize_stac -from vibe_dev.local_runner import LocalWorkflowRunner -from vibe_dev.testing.fake_workflows_fixtures import FakeType, get_fake_workflow_path -from vibe_server.workflow import list_workflows -from vibe_server.workflow.description_validator import WorkflowDescriptionValidator -from vibe_server.workflow.runner import ( - NoOpStateChange, - WorkflowCallback, - WorkflowChange, - WorkflowRunner, -) -from vibe_server.workflow.runner.task_io_handler import WorkflowIOHandler -from vibe_server.workflow.spec_parser import WorkflowParser, get_workflow_dir -from vibe_server.workflow.spec_parser import parse_edge_string as pes -from vibe_server.workflow.workflow import Workflow, load_workflow_by_name - -HERE = os.path.dirname(os.path.abspath(__file__)) - - -def serialize(base: BaseVibe): - return serialize_stac(StacConverter().to_stac_item(base)) # type: ignore - - -def gen_local_runner( - storage_spec: Any, - workflow_path: str, - fake_ops_path: str, - workflows_path: str, - callback: WorkflowCallback = NoOpStateChange, -) -> WorkflowRunner: - factory_spec = OperationFactoryConfig(storage_spec, AzureSecretProviderConfig()) - workflow = Workflow.build(workflow_path, fake_ops_path, workflows_path) - io_mapper = WorkflowIOHandler(workflow) - return LocalWorkflowRunner.build( - workflow, - factory_spec=factory_spec, - io_mapper=io_mapper, - update_state_callback=callback, - max_tries=5, - ) - - -def build_workflow_runner( - tmp_path: Path, - workflow_path: str, - fake_ops_path: str, - workflows_path: str, - callback: WorkflowCallback = NoOpStateChange, -) -> WorkflowRunner: - tmp_asset_path = os.path.join(str(tmp_path), "assets") - storage_spec = LocalStorageConfig( - local_path=str(tmp_path), asset_manager=LocalFileAssetManagerConfig(tmp_asset_path) - ) - return gen_local_runner( - storage_spec, workflow_path, fake_ops_path, workflows_path, callback=callback - ) - - -@pytest.mark.parametrize("workflow_name", list_workflows()) -def test_workflows_load(workflow_name: str): - workflow = load_workflow_by_name(workflow_name) - assert not workflow.has_cycle() - - -@pytest.mark.parametrize( - "workflow_name", [wf_name for wf_name in list_workflows() if not wf_name.startswith("private/")] -) -def test_workflows_description(workflow_name: str): - workflow_dir = get_workflow_dir() - workflow_path = os.path.join(workflow_dir, f"{workflow_name}.yaml") - workflow_spec = WorkflowParser.parse(workflow_path) - WorkflowDescriptionValidator.validate(workflow_spec) - - -@pytest.mark.parametrize("workflow_name", list_workflows()) -def test_list_workflows_schema_generation(workflow_name: str): - workflow = load_workflow_by_name(workflow_name) - ret: Dict[str, Any] = { - k: get_base_type(v).schema() - for k, v in workflow.inputs_spec.items() # type: ignore - } - assert ret - - -def strip_edges_and_nodes_from_workflow( - tmp_path: Path, - workflow_path: str, - fake_ops_path: str, - workflows_path: str, - strip_sinks: bool = False, - tasks_to_keep: int = 1, - del_edges: bool = False, -) -> WorkflowRunner: - base = WorkflowParser.parse(workflow_path, fake_ops_path, workflows_path) - - if len(base.tasks) > tasks_to_keep: - must_exist = [t for i, t in enumerate(base.tasks.keys()) if i < tasks_to_keep] - base.tasks = {m: base.tasks[m] for m in must_exist} - base.sinks = {e.origin: e.origin for e in base.edges if pes(e.origin)[0] in must_exist} - base.edges = [] - base.sources = {k: v for i, (k, v) in enumerate(base.sources.items()) if i < 1} - - if strip_sinks: - base.sinks = {} - - if del_edges: - base.edges = [] # type: ignore - - tasks = {k: v.to_dict() for k, v in base.tasks.items()} - base = asdict(base) - base["tasks"] = tasks - - tmp = tempfile.NamedTemporaryFile("w", delete=False) - yaml.dump(base, tmp) # type: ignore - tmp.close() - - try: - return build_workflow_runner(tmp_path, tmp.name, fake_ops_path, workflows_path) - finally: - os.unlink(tmp.name) - - -def test_no_sinks_workflow( - tmp_path: Path, - fake_ops_dir: str, - fake_workflows_dir: str, -): - with pytest.raises(ValueError): - strip_edges_and_nodes_from_workflow( - tmp_path, - get_fake_workflow_path("nested_workflow"), - fake_ops_dir, - fake_workflows_dir, - True, - ) - - -def test_degenerate_workflow(tmp_path: Path, fake_ops_dir: str, fake_workflows_dir: str): - with pytest.raises(ValueError): - # For the reader that might be asking what is going on here, - # we will end up with a two-node workflow that only has a - # single source. The idea of supporting "single" operation - # workflows is that all operations are sources and sinks. - # So, if that's not the case, then edges are required. - strip_edges_and_nodes_from_workflow( - tmp_path, - get_fake_workflow_path("nested_workflow"), - fake_ops_dir, - fake_workflows_dir, - tasks_to_keep=2, - del_edges=True, - ) - - -@pytest.mark.anyio -async def test_arbitrary_input( - tmp_path: Path, - fake_ops_dir: str, - fake_workflows_dir: str, -): - runner = build_workflow_runner( - tmp_path, get_fake_workflow_path("str_input"), fake_ops_dir, fake_workflows_dir - ) - user_input = FakeType("fake workflow execution") - out = await runner.run({k: serialize(user_input) for k in runner.workflow.inputs_spec}) - for outname in runner.workflow.output_spec: - assert outname in out - - -@pytest.mark.parametrize("workflow_name", ["nested_workflow", "workflow_inception"]) -@pytest.mark.anyio -async def test_composable_workflow( - workflow_name: str, - tmp_path: Path, - fake_ops_dir: str, - fake_workflows_dir: str, -): - user_input = FakeType("fake workflow execution") - - runner = build_workflow_runner( - tmp_path, get_fake_workflow_path(workflow_name), fake_ops_dir, fake_workflows_dir - ) - out = await runner.run({k: serialize(user_input) for k in runner.workflow.inputs_spec}) - for outname in runner.workflow.output_spec: - assert outname in out - - -@pytest.mark.anyio -async def test_ordered_times_in_workflow( - tmp_path: Path, - fake_ops_dir: str, - fake_workflows_dir: str, -): - state: Dict[str, Tuple[WorkflowChange, datetime]] = {} - - runner = build_workflow_runner( - tmp_path, get_fake_workflow_path("nested_workflow"), fake_ops_dir, fake_workflows_dir - ) - await runner.run({k: serialize(FakeType("test")) for k in runner.workflow.inputs_spec}) - - previous = None - for task in (t for t in state.keys() if t.startswith("t")): - if previous is None: - previous = state[task] - continue - assert previous[-1] < state[task][-1] - - -@pytest.mark.anyio -async def test_fan_out_single_element(tmp_path: Path, fake_ops_dir: str, fake_workflows_dir: str): - spec = WorkflowParser.parse( - get_fake_workflow_path("fan_out_and_in"), fake_ops_dir, fake_workflows_dir - ) - tmp_asset_path = os.path.join(str(tmp_path), "assets") - storage_spec = LocalStorageConfig( - local_path=str(tmp_path), asset_manager=LocalFileAssetManagerConfig(tmp_asset_path) - ) - factory_spec = OperationFactoryConfig(storage_spec, AzureSecretProviderConfig()) - for num_items in (1, 5): - spec.tasks["to_list"].parameters["num_items"] = num_items - workflow = Workflow(spec) - io_mapper = WorkflowIOHandler(workflow) - runner = LocalWorkflowRunner.build( - workflow, - io_mapper=io_mapper, - factory_spec=factory_spec, - ) - converter = StacConverter() - x = DataVibe( - "input", - time_range=(datetime.now(), datetime.now()), - geometry={"type": "Point", "coordinates": [0.0, 0.0]}, - assets=[], - ) - out = await runner.run({"input": serialize_stac(converter.to_stac_item(x))}) - shutil.rmtree(tmp_path) # Delete the cache - assert all(len(o) == num_items for o in out.values()) - - -@pytest.mark.anyio -async def test_gather_not_parallel(tmp_path: Path, fake_ops_dir: str, fake_workflows_dir: str): - runner = build_workflow_runner( - tmp_path, get_fake_workflow_path("item_gather"), fake_ops_dir, fake_workflows_dir - ) - converter = StacConverter() - x = DataVibe( - "input", - time_range=(datetime.now(), datetime.now()), - geometry={"type": "Point", "coordinates": [0.0, 0.0]}, - assets=[], - ) - out = await runner.run( - {k: serialize_stac(converter.to_stac_item(x)) for k in runner.workflow.inputs_spec} - ) - assert len(out) == 1 - - -# TODO: Restore "remote" storage_spec after fixing CosmosDB permissions -@pytest.mark.parametrize("storage_spec", ["local"], indirect=True) -@pytest.mark.anyio -async def test_op_run_race_condition(storage_spec: Any, fake_ops_dir: str, fake_workflows_dir: str): - runner = gen_local_runner( - storage_spec, get_fake_workflow_path("workflow_inception"), fake_ops_dir, fake_workflows_dir - ) - user_input = FakeType("fake workflow execution") - await runner.run({k: serialize(user_input) for k in runner.workflow.inputs_spec}) - - -@pytest.mark.parametrize("edges", [None, []]) -def test_parser_loads_workflow_with_no_edges( - edges: List[Optional[List[Any]]], fake_ops_dir: str, fake_workflows_dir: str -) -> None: - workflow_dict = WorkflowParser._load_workflow(get_fake_workflow_path("fan_out_and_in")) - workflow_dict["edges"] = edges - WorkflowParser.parse_dict(workflow_dict, fake_ops_dir, fake_workflows_dir) - - -@pytest.mark.anyio -async def test_running_workflow_with_basevibe_edges( - tmp_path: Path, - fake_ops_dir: str, # noqa - fake_workflows_dir: str, # noqa - SimpleStrData: Any, -): - data = StacConverter().to_stac_item(SimpleStrData("🍔")) # type: ignore - wf_input: OpIOType = {"input": serialize_stac(data)} - - tmp_asset_path = os.path.join(str(tmp_path), "assets") - storage_spec = LocalStorageConfig( - local_path=str(tmp_path), asset_manager=LocalFileAssetManagerConfig(tmp_asset_path) - ) - - runner = gen_local_runner( - storage_spec, get_fake_workflow_path("base_base"), fake_ops_dir, fake_workflows_dir - ) - out = await runner.run(wf_input) - assert out diff --git a/src/tests/test_ops_building.py b/src/tests/test_ops_building.py deleted file mode 100644 index 602a299c..00000000 --- a/src/tests/test_ops_building.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from pathlib import Path -from typing import List - -import pytest - -from vibe_agent.ops import OperationFactory -from vibe_agent.storage.asset_management import LocalFileAssetManager -from vibe_common.constants import DEFAULT_OPS_DIR -from vibe_common.secret_provider import AzureSecretProvider -from vibe_dev.testing.op_tester import FakeStorage - - -@pytest.fixture -def fake_storage(tmp_path: Path) -> FakeStorage: - asset_manager = LocalFileAssetManager(str(tmp_path)) - storage = FakeStorage(asset_manager) - return storage - - -def test_all_ops_pass_sanity_check(fake_storage: FakeStorage): - not_sane = [FileNotFoundError, RuntimeError] - factory = OperationFactory(fake_storage, AzureSecretProvider()) - failures: List[str] = [] - for dirpath, _, filenames in os.walk(DEFAULT_OPS_DIR): - for fn in filenames: - if not fn.endswith(".yaml"): - continue - path = os.path.join(dirpath, fn) - try: - factory.build(path) - except Exception as e: - if any([isinstance(e, n) for n in not_sane]): - failures.append(fn) - print(f"Failed to build op {fn} due to {type(e)}: {e}") - assert not failures, f"Failed to build the following op(s): {', '.join(failures)}" diff --git a/src/tests/test_rest_api.py b/src/tests/test_rest_api.py deleted file mode 100644 index cb8fd2ca..00000000 --- a/src/tests/test_rest_api.py +++ /dev/null @@ -1,332 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from dataclasses import asdict -from typing import Any, Dict, List, Optional, Tuple, Union, cast -from unittest.mock import MagicMock, patch -from uuid import uuid4 as uuid - -import pytest -import requests -from fastapi.testclient import TestClient - -from vibe_common.constants import CONTROL_STATUS_PUBSUB, WORKFLOW_REQUEST_PUBSUB_TOPIC -from vibe_common.messaging import WorkflowCancellationMessage -from vibe_common.statestore import StateStore -from vibe_core.data.core_types import InnerIOType -from vibe_core.data.utils import StacConverter, deserialize_stac -from vibe_core.datamodel import RunConfig, RunConfigInput, RunDetails, RunStatus -from vibe_server.href_handler import BlobHrefHandler, LocalHrefHandler -from vibe_server.server import TerravibesAPI, TerravibesProvider -from vibe_server.workflow.input_handler import build_args_for_workflow -from vibe_server.workflow.workflow import load_workflow_by_name - - -@pytest.fixture -def request_client(): - href_handler = LocalHrefHandler("/tmp") - terravibes_app = TerravibesAPI(href_handler) - client = TestClient(terravibes_app.versioned_wrapper) - yield client - - -@pytest.fixture -def request_client_with_blob(): - href_handler = BlobHrefHandler() - terravibes_app = TerravibesAPI(href_handler) - client = TestClient(terravibes_app.versioned_wrapper) - yield client - - -def test_list_workflows(request_client: requests.Session): - url = "/v0/workflows" - response = request_client.get(url) - - assert response.status_code == 200 - assert isinstance(response.json(), list) - assert len(response.json()) > 0 - - for wfname in response.json(): - response = request_client.get(f"{url}/{wfname}") - assert response.status_code == 200, (wfname, response.text) - assert isinstance(response.json(), dict) - fields = "name inputs outputs parameters description" - for k in response.json(): - assert k in fields - - -def test_get_workflow_schema(request_client: requests.Session): - url = "/v0/workflows" - response = request_client.get(url) - workflow = response.json()[0] - url = f"{url}/{workflow}" - response = request_client.get(url).json() - assert isinstance(response, dict) - assert all(k in response for k in ("name", "inputs", "outputs", "parameters", "description")) - assert isinstance(response["name"], str) - assert isinstance(response["inputs"], dict) - assert isinstance(response["outputs"], dict) - assert isinstance(response["parameters"], dict) - assert isinstance(response["description"], dict) - assert sorted(response["parameters"]) == sorted(response["description"]["parameters"]) - - -def test_generate_api_documentation_page(request_client: requests.Session): - response = request_client.get("/v0/docs") - assert response.status_code == 200 - openapi_json = request_client.get("/v0/openapi.json") - assert openapi_json.status_code == 200 - - -@pytest.mark.parametrize("params", [None, {"param1": "new_param"}]) -@patch("vibe_server.server.send", return_value="OK") -@patch.object(StateStore, "transaction") -@patch.object(StateStore, "retrieve", side_effect=lambda _: []) -@patch.object(StateStore, "retrieve_bulk", side_effect=lambda _: []) -def test_workflow_submission( - retrieve_bulk: MagicMock, - retrieve: MagicMock, - transaction: MagicMock, - send: MagicMock, - workflow_run_config: Dict[str, Any], - params: Dict[str, Any], - request_client: requests.Session, -): - workflow_run_config["parameters"] = params - response = request_client.post("/v0/runs", json=workflow_run_config) - send.assert_called() - assert send.call_args[0][0].content.parameters == params - - assert response.status_code == 201 - assert len(transaction.call_args.args[0]) == 2 - id = response.json()["id"] - assert transaction.call_args.args[0][0]["value"][0] == id - submitted_config = asdict(transaction.call_args.args[0][1]["value"]) - # Add some tasks here - tasks = ["task1", "task2", "task3"] - submitted_config["tasks"] = tasks - retrieve_bulk.side_effect = [[submitted_config], [asdict(RunDetails()) for _ in tasks]] - response = request_client.get(f"/v0/runs/{id}") - assert response.json()["details"]["status"] == RunStatus.pending - retrieved_task_details = response.json()["task_details"] - assert len(retrieved_task_details) == len(tasks) - assert all(retrieved_task_details[t]["status"] == RunStatus.pending for t in tasks) - - retrieve_bulk.side_effect = lambda _: [ # type: ignore - asdict(transaction.call_args.args[0][1]["value"]) - ] - response = request_client.get(f"/v0/runs/?ids={id}") - assert response.status_code == 200 - assert len(response.json()) == 1 - - -@patch.object(StateStore, "retrieve", side_effect=lambda _: []) -def test_no_workflow_runs(_, request_client: requests.Session): - response = request_client.get("/v0/runs") - assert response.status_code == 200 - assert len(response.json()) == 0 - - -def test_invalid_workflow_submission( - workflow_run_config: Dict[str, Any], request_client: requests.Session -): - workflow_run_config["workflow"] = "invalid workflow" - response = request_client.post("/v0/runs", json=workflow_run_config) - assert response.status_code == 400 - - -def test_missing_field_workflow_submission( - workflow_run_config: Dict[str, Any], request_client: requests.Session -): - del workflow_run_config["user_input"] - response = request_client.post("/v0/runs", json=workflow_run_config) - assert response.status_code == 422 - assert response.json()["detail"][0]["type"] == "type_error" - - -@patch.object(TerravibesProvider, "submit_work", side_effect=Exception("sorry")) -@patch.object(TerravibesProvider, "update_run_state") -@patch.object(TerravibesProvider, "list_runs_from_store", return_value=[]) -def test_submit_local_workflows_with_broken_work_submission( - _, __: Any, ___: Any, workflow_run_config: Dict[str, Any], request_client: requests.Session -): - response = request_client.post("/v0/runs", json=workflow_run_config) - assert response.status_code == 500, response - - -@patch("vibe_server.server.send", return_value="OK") -@patch.object(TerravibesProvider, "submit_work") -@patch.object(StateStore, "transaction") -@patch.object(StateStore, "retrieve", side_effect=lambda _: []) -@patch.object(StateStore, "retrieve_bulk") -def test_workflow_submission_and_cancellation( - retrieve_bulk: MagicMock, - retrieve: MagicMock, - transaction: MagicMock, - _: MagicMock, - send: MagicMock, - workflow_run_config: Dict[str, Any], - request_client: requests.Session, -): - response = request_client.post("/v0/runs", json=workflow_run_config) - assert response.status_code == 201 - assert len(transaction.call_args.args[0]) == 2 - id = response.json()["id"] - assert transaction.call_args.args[0][0]["value"][0] == id - - response = request_client.post(f"/v0/runs/{id}/cancel") - assert response.status_code == 202 - assert len(transaction.call_args.args[0]) == 2 - message = send.call_args.args[0] - assert isinstance(message, WorkflowCancellationMessage) - assert str(message.run_id) == id - - send.assert_called_with( - message, "rest-api", CONTROL_STATUS_PUBSUB, WORKFLOW_REQUEST_PUBSUB_TOPIC - ) - - -@pytest.mark.parametrize("params", [None, {"param1": "new_param"}]) -@patch.object(TerravibesProvider, "submit_work") -@patch.object(TerravibesProvider, "update_run_state") -@patch.object(StateStore, "retrieve") -@patch.object(StateStore, "retrieve_bulk", side_effect=lambda _: []) -def test_workflow_resubmission( - retrieve_bulk: MagicMock, - retrieve: MagicMock, - update_run_state: MagicMock, - submit_work: MagicMock, - params: Optional[Dict[str, Any]], - workflow_run_config: Dict[str, Any], - request_client: requests.Session, -): - submitted_runs: List[RunConfig] = [] - first_run = {} - - def submit_work_effect(run: RunConfig): - nonlocal submitted_runs - submitted_runs.append(run) - - def update_run_state_effect(run_ids: List[str], new_run: RunConfig): - nonlocal first_run - first_run = asdict(new_run) - - submit_work.side_effect = submit_work_effect - update_run_state.side_effect = update_run_state_effect - - workflow_run_config["parameters"] = params - response = request_client.post("/v0/runs", json=workflow_run_config) - assert response.status_code == 201 - - retrieve.side_effect = [first_run, []] - response = request_client.post(f"/v0/runs/{uuid()}/resubmit") - - assert response.status_code == 201 - r1, r2 = submitted_runs - for p in ("workflow", "user_input", "parameters", "name"): - assert getattr(r1, p) == getattr(r2, p) - assert r1.id != r2.id - - -@patch.object(StateStore, "retrieve") -def test_resubmission_of_missing_run(retrieve: MagicMock, request_client: requests.Session): - def retrieve_effect(_): - raise KeyError() - - retrieve.side_effect = retrieve_effect - response = request_client.post(f"/v0/runs/{uuid()}/resubmit") - assert response.status_code == 404 - - -@patch.object(StateStore, "retrieve") -def test_cancelling_missing_run(retrieve: MagicMock, request_client: requests.Session): - def retrieve_effect(_): - raise KeyError() - - retrieve.side_effect = retrieve_effect - - response = request_client.post(f"/v0/runs/{uuid()}/cancel") - assert response.status_code == 404 - - -def test_getting_schema_of_missing_workflow(request_client: requests.Session): - response = request_client.get("/v0/workflows/i-don't-exist") - assert response.status_code == 404 - - -def test_build_args_for_workflow_generates_valid_output(workflow_run_config: Dict[str, Any]): - run_config = RunConfigInput(**workflow_run_config) - inputs = load_workflow_by_name(cast(str, run_config.workflow)).inputs_spec - out = build_args_for_workflow(run_config.user_input, list(inputs)) - - def genitems(values: Union[InnerIOType, List[InnerIOType]]): - if isinstance(values, list): - for e in values: - yield deserialize_stac(e) - else: - yield deserialize_stac(values) - - converter = StacConverter() - for v in genitems([v for v in out.values()]): - assert converter.from_stac_item(v) is not None - - -@pytest.mark.parametrize( - "fields_exceptions", - [ - ([], None), - (["user_input.geojson"], None), - (["user_input.geojson", "workflow"], None), - (["user_input.doesnt_exist"], KeyError), - (["something_else.doesnt_exist"], KeyError), - ], -) -def test_summarize_runs( - workflow_run_config: Dict[str, Any], fields_exceptions: Tuple[List[str], Optional[Exception]] -): - href_handler = LocalHrefHandler("/tmp") - provider = TerravibesProvider(href_handler) - fields, exception = fields_exceptions - run_config = RunConfig( - **workflow_run_config, - id=uuid(), - details=RunDetails(), - task_details={}, - spatio_temporal_json=None, - ) - if exception is not None: - with pytest.raises(exception): # type: ignore - provider.summarize_runs([run_config], fields) - else: - summary = provider.summarize_runs([run_config], fields) - print(summary) - if fields: - for field in fields: - if "doesnt" not in field: - assert field in summary[0] - - -@pytest.mark.parametrize("blob_df", [(True, type(None)), (False, int)]) -def test_system_metrics( - request_client: requests.Session, - request_client_with_blob: requests.Session, - blob_df: Tuple[bool, Any], -): - blob, df_type = blob_df - if blob: - response = request_client_with_blob.get("/v0/system-metrics") - else: - response = request_client.get("/v0/system-metrics") - - assert response.status_code == 200 - - metrics = response.json() - for field in "load_avg cpu_usage free_mem used_mem total_mem disk_free".split(): - assert field in metrics - - assert all(isinstance(v, float) for v in metrics["load_avg"]) - assert isinstance(metrics["cpu_usage"], float) - assert isinstance(metrics["free_mem"], int) - assert isinstance(metrics["used_mem"], int) - assert isinstance(metrics["total_mem"], int) - assert isinstance(metrics["disk_free"], df_type) diff --git a/src/tests/test_rest_api_client_integration.py b/src/tests/test_rest_api_client_integration.py deleted file mode 100644 index 329d39d2..00000000 --- a/src/tests/test_rest_api_client_integration.py +++ /dev/null @@ -1,244 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from dataclasses import asdict -from datetime import datetime -from os.path import join as j -from typing import Any, Dict, Optional -from unittest.mock import MagicMock, patch -from uuid import UUID - -import pytest -from fastapi.testclient import TestClient -from shapely.geometry import Polygon - -from vibe_common.statestore import StateStore -from vibe_core.client import FarmvibesAiClient -from vibe_core.data import ADMAgSeasonalFieldInput -from vibe_core.datamodel import RunDetails -from vibe_server.href_handler import LocalHrefHandler -from vibe_server.orchestrator import WorkflowStateUpdate -from vibe_server.server import TerravibesAPI, TerravibesProvider -from vibe_server.workflow.runner import WorkflowChange -from vibe_server.workflow.spec_parser import WorkflowParser, get_workflow_dir - - -@pytest.fixture -def rest_client(): - href_handler = LocalHrefHandler("/tmp") - terravibes_app = TerravibesAPI(href_handler) - client = TestClient(terravibes_app.versioned_wrapper) - url_string = str(client.base_url) - rest_client = FarmvibesAiClient(url_string) - rest_client.session = client - rest_client.session.headers.update(rest_client.default_headers) - yield rest_client - - -@pytest.fixture -def the_polygon(): - return Polygon( - [ - [-88.068487, 37.058836], - [-88.036059, 37.048687], - [-88.012895, 37.068984], - [-88.026622, 37.085711], - [-88.062482, 37.081461], - [-88.068487, 37.058836], - ] - ) - - -@patch("vibe_server.server.list_existing_workflows") -@patch("vibe_server.server.TerravibesProvider.list_workflows") -@pytest.mark.anyio -async def test_list_workflows( - list_workflows: MagicMock, - list_existing_workflows: MagicMock, - rest_client: FarmvibesAiClient, -): - list_workflows.return_value = list_existing_workflows.return_value = "a/b c".split() - workflows = rest_client.list_workflows() # type: ignore - assert workflows - assert len(workflows) == len(await list_workflows()) - - -@patch.object(StateStore, "retrieve", side_effect=lambda _: []) -def test_empty_list_runs(_, rest_client: FarmvibesAiClient): - runs = rest_client.list_runs() - assert not runs - - -@pytest.mark.parametrize("workflow", ["helloworld", j(get_workflow_dir(), "helloworld.yaml")]) -@pytest.mark.parametrize("params", [None, {}, {"param1": 1}]) -@patch.object(TerravibesProvider, "submit_work") -@patch.object(StateStore, "transaction") -@patch.object(StateStore, "retrieve") -@patch.object(StateStore, "retrieve_bulk") -@patch("vibe_server.server.list_existing_workflows") -@patch("vibe_server.server.build_args_for_workflow") -@patch("vibe_server.server.validate_workflow_input") -def test_submit_run( - validate: MagicMock, - build_args: MagicMock, - list_existing_workflows: MagicMock, - retrieve_bulk: MagicMock, - retrieve: MagicMock, - transaction: MagicMock, - _: MagicMock, - rest_client: FarmvibesAiClient, - the_polygon: Polygon, - params: Optional[Dict[str, Any]], - workflow: str, - fake_ops_dir: str, -): - first_retrieve_call = True - - def retrieve_side_effect(_): - nonlocal first_retrieve_call - if first_retrieve_call: - first_retrieve_call = False - return [] - return asdict(transaction.call_args.args[0][1]["value"]) - - def bulk_side_effect(_): - return [retrieve_side_effect(_)] - - retrieve.side_effect = retrieve_side_effect - retrieve_bulk.side_effect = bulk_side_effect - - list_existing_workflows.return_value = ["a/b", "c", "helloworld"] - with patch("vibe_server.workflow.spec_parser.DEFAULT_OPS_DIR", fake_ops_dir): - run = rest_client.run( - (workflow if "yaml" not in workflow else WorkflowParser._load_workflow(workflow)), - "test-run", - geometry=the_polygon, - time_range=(datetime(2021, 2, 1), datetime(2021, 2, 2)), - parameters=params, - ) - assert run - assert run.parameters == params - build_args.assert_called() - validate.assert_called() - - -@patch.object(TerravibesProvider, "submit_work") -@patch.object(StateStore, "transaction") -@patch.object(StateStore, "retrieve") -@patch.object(StateStore, "retrieve_bulk") -def test_submit_base_vibe_run( - retrieve_bulk: MagicMock, - retrieve: MagicMock, - transaction: MagicMock, - _: MagicMock, - rest_client: FarmvibesAiClient, -): - party_id = "fake-party-id" - seasonal_field_id = "fake-seasonal-field-id" - input_data = ADMAgSeasonalFieldInput( - party_id=party_id, - seasonal_field_id=seasonal_field_id, - ) - - first_retrieve_call = True - - def retrieve_side_effect(_): - nonlocal first_retrieve_call - if first_retrieve_call: - first_retrieve_call = False - return [] - return asdict(transaction.call_args.args[0][1]["value"]) - - def bulk_side_effect(_): - return [retrieve_side_effect(_)] - - retrieve.side_effect = retrieve_side_effect - retrieve_bulk.side_effect = bulk_side_effect - - run = rest_client.run( - "data_ingestion/admag/admag_seasonal_field", - "whatever", - input_data=input_data, - ) - assert run - - -@pytest.mark.parametrize("workflow", ["helloworld", j(get_workflow_dir(), "helloworld.yaml")]) -@pytest.mark.parametrize("params", [None, {}, {"param1": 1}]) -@patch.object(TerravibesProvider, "submit_work") -@patch.object(StateStore, "transaction") -@patch.object(StateStore, "retrieve") -@patch.object(StateStore, "retrieve_bulk") -@patch("vibe_common.statestore.StateStore.store") -@patch("vibe_server.server.list_existing_workflows") -@patch("vibe_server.server.build_args_for_workflow") -@patch("vibe_server.server.validate_workflow_input") -@pytest.mark.anyio -async def test_monitor_run_with_none_datetime_fields( - validate: MagicMock, - build_args: MagicMock, - list_existing_workflows: MagicMock, - store: MagicMock, - retrieve_bulk: MagicMock, - retrieve: MagicMock, - transaction: MagicMock, - _: MagicMock, - rest_client: FarmvibesAiClient, - the_polygon: Polygon, - params: Optional[Dict[str, Any]], - workflow: str, - fake_ops_dir: str, -): - first_retrieve_call = True - run_config: Optional[Dict[str, Any]] = None - - def store_side_effect(_: Any, obj: Any): - nonlocal run_config - run_config = obj - - def retrieve_side_effect(_): - nonlocal first_retrieve_call, run_config - if first_retrieve_call: - first_retrieve_call = False - return [] - - if run_config is None: - run_config = asdict(transaction.call_args.args[0][1]["value"]) - if not run_config["task_details"]: - run_config["task_details"]["hello"] = asdict(RunDetails()) - return run_config - - def bulk_side_effect(_): - return [retrieve_side_effect(_)] - - store.side_effect = store_side_effect - retrieve.side_effect = retrieve_side_effect - retrieve_bulk.side_effect = bulk_side_effect - - list_existing_workflows.return_value = ["a/b", "c", "helloworld"] - with patch("vibe_server.workflow.spec_parser.DEFAULT_OPS_DIR", fake_ops_dir): - run = rest_client.run( - (workflow if "yaml" not in workflow else WorkflowParser._load_workflow(workflow)), - "test-run", - geometry=the_polygon, - time_range=(datetime(2021, 2, 1), datetime(2021, 2, 2)), - parameters=params, - ) - assert run - assert run.parameters == params - build_args.assert_called() - validate.assert_called() - - updater = WorkflowStateUpdate(UUID(run.id)) - await updater(WorkflowChange.WORKFLOW_STARTED, tasks=["hello"]) - - assert run.task_details - - await updater(WorkflowChange.WORKFLOW_FINISHED) - run.monitor(1, 0) - - -def test_system_metrics(rest_client: FarmvibesAiClient): - metrics = rest_client.get_system_metrics() - assert metrics - assert metrics["disk_free"] is not None diff --git a/src/tests/test_subprocess_client.py b/src/tests/test_subprocess_client.py deleted file mode 100644 index dd2966cb..00000000 --- a/src/tests/test_subprocess_client.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import Tuple -from unittest.mock import Mock, patch - -import pytest -from shapely.geometry import Polygon - -from vibe_core.datamodel import RunStatus -from vibe_dev.client.subprocess_client import SubprocessClient, get_default_subprocess_client - -HERE = os.path.dirname(os.path.abspath(__file__)) - - -@pytest.fixture -def input_polygon() -> Polygon: - polygon_coords = [ - (-88.062073563448919, 37.081397673802059), - (-88.026349330507315, 37.085463858128762), - (-88.026349330507315, 37.085463858128762), - (-88.012445388773259, 37.069230099135126), - (-88.035931592028305, 37.048441375086092), - (-88.068120429075847, 37.058833638440767), - (-88.062073563448919, 37.081397673802059), - ] - - return Polygon(polygon_coords) - - -@pytest.fixture -def workflow_name() -> str: - return "helloworld" - - -@pytest.fixture -def workflow_path() -> str: - return os.path.join(HERE, "..", "..", "workflows", "helloworld.yaml") - - -@pytest.fixture -def time_range() -> Tuple[datetime, datetime]: - return ( - datetime(year=2021, month=2, day=1, tzinfo=timezone.utc), - datetime(year=2021, month=2, day=11, tzinfo=timezone.utc), - ) - - -@patch("vibe_agent.worker.Worker.is_workflow_complete", return_value=False) -@pytest.mark.anyio -async def test_local_client_with_workflow_name( - _: Mock, - input_polygon: Polygon, - workflow_name: str, - tmp_path: str, - time_range: Tuple[datetime, datetime], - capsys, # type: ignore -): - client: SubprocessClient = get_default_subprocess_client(tmp_path) - with capsys.disabled(): - output = await client.run(workflow_name, input_polygon, time_range) - assert output.status == RunStatus.done - - -@patch("vibe_agent.worker.Worker.is_workflow_complete", return_value=False) -@pytest.mark.anyio -async def test_local_client_with_workflow_path( - _: Mock, - input_polygon: Polygon, - workflow_path: str, - tmp_path: str, - time_range: Tuple[datetime, datetime], - capsys, # type: ignore -): - client: SubprocessClient = get_default_subprocess_client(tmp_path) - with capsys.disabled(): - output = await client.run(workflow_path, input_polygon, time_range) - assert output.status == RunStatus.done diff --git a/src/tests/workflows_integration/__init__.py b/src/tests/workflows_integration/__init__.py deleted file mode 100644 index b7c52582..00000000 --- a/src/tests/workflows_integration/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/tests/workflows_integration/test_helloworld_integration.py b/src/tests/workflows_integration/test_helloworld_integration.py deleted file mode 100644 index 2681f8b2..00000000 --- a/src/tests/workflows_integration/test_helloworld_integration.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import Any, Dict, List, Optional - -import pytest -from hydra_zen import MISSING, builds, instantiate -from shapely.geometry import Polygon, mapping - -from vibe_agent.storage import Storage -from vibe_common.input_handlers import gen_stac_item_from_bounds -from vibe_common.schemas import CacheInfo, ItemDict, OpRunId - -HERE = os.path.dirname(os.path.abspath(__file__)) -WORKFLOW_PATH = os.path.join(HERE, "..", "..", "..", "workflows", "helloworld.yaml") - - -class DipatchedStorage(Storage): - def __init__(self, original: Storage): - self.original = original - - def retrieve_output_from_input_if_exists(self, *args: Any): - ret = self.original.retrieve_output_from_input_if_exists(*args) - assert ret is not None - return ret - - async def retrieve_output_from_input_if_exists_async( - self, cache_info: CacheInfo, **kwargs: Any - ) -> Optional[ItemDict]: - ret = await self.original.retrieve_output_from_input_if_exists_async(cache_info, **kwargs) - assert ret is not None - return ret - - def store(self, *args: Any): - return self.original.store(*args) - - def __getattr__(self, name: str): - return getattr(self.original, name) - - def remove(self, op_run_id: OpRunId): - self.original.remove(op_run_id) - - -PatchedStorageConfig = builds( - DipatchedStorage, - original=MISSING, - zen_dataclass={ - "module": "tests.workflows_integration.test_helloworld_integration", - "cls_name": "PatchedStorageConfig", - }, -) - - -@pytest.fixture -def helloworld_input() -> Dict[str, Any]: - polygon_coords = [ - (-88.062073563448919, 37.081397673802059), - (-88.026349330507315, 37.085463858128762), - (-88.026349330507315, 37.085463858128762), - (-88.012445388773259, 37.069230099135126), - (-88.035931592028305, 37.048441375086092), - (-88.068120429075847, 37.058833638440767), - (-88.062073563448919, 37.081397673802059), - ] - polygon: Dict[str, Any] = mapping(Polygon(polygon_coords)) # type: ignore - start_date = datetime(year=2021, month=2, day=1, tzinfo=timezone.utc) - end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) - - return gen_stac_item_from_bounds(polygon, start_date, end_date) - - -# TODO: add "remote" to the list of storage_spec -@pytest.mark.parametrize("storage_spec", ["local"], indirect=True) -@pytest.mark.anyio -async def test_helloworld_workflow( - storage_spec: Any, - helloworld_input: List[Dict[str, Any]], - workflow_test_helper, # type: ignore -): - runner = workflow_test_helper.gen_workflow(WORKFLOW_PATH, storage_spec) - result = await runner.run({k: helloworld_input for k in runner.workflow.inputs_spec}) - - workflow_test_helper.verify_workflow_result(WORKFLOW_PATH, result) - - -# TODO: add "remote" to the list of storage_spec -@pytest.mark.parametrize("storage_spec", ["local"], indirect=True) -@pytest.mark.anyio -async def test_helloworld_cache( - storage_spec: Any, - helloworld_input: List[Dict[str, Any]], - workflow_test_helper, # type: ignore - tmpdir: str, -): - runner = workflow_test_helper.gen_workflow(WORKFLOW_PATH, storage_spec) - - result_first_run = await runner.run({k: helloworld_input for k in runner.workflow.inputs_spec}) - workflow_test_helper.verify_workflow_result(WORKFLOW_PATH, result_first_run) - - runner = workflow_test_helper.gen_workflow( - WORKFLOW_PATH, PatchedStorageConfig(original=instantiate(storage_spec)) - ) - result_second_run = await runner.run({k: helloworld_input for k in runner.workflow.inputs_spec}) - - workflow_test_helper.verify_workflow_result(WORKFLOW_PATH, result_second_run) - - # Need to improve this test to be agnostic to the order of elements in the list - assert result_first_run.keys() == result_second_run.keys() - for k in result_first_run.keys(): - out1 = result_first_run[k] - out2 = result_second_run[k] - assert len(out1) == len(out2) - assert out1["id"] == out2["id"] - assert out1["assets"].keys() == out2["assets"].keys() diff --git a/src/tests_local_cluster/expected.tif b/src/tests_local_cluster/expected.tif deleted file mode 100644 index 569a8316..00000000 --- a/src/tests_local_cluster/expected.tif +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:098c3513c360b6d7dd69a6c99d08ec7e099853af02658f8e0d5f0ff545d9f160 -size 7300 diff --git a/src/tests_local_cluster/test_cluster_integration.py b/src/tests_local_cluster/test_cluster_integration.py deleted file mode 100644 index 06bdc88a..00000000 --- a/src/tests_local_cluster/test_cluster_integration.py +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import codecs -import getpass -import os -import shutil -from datetime import datetime, timezone -from typing import Callable, Union - -import numpy as np -import pytest -import rasterio -from shapely.geometry import Polygon - -from vibe_core.cli.helper import execute_cmd -from vibe_core.cli.local import find_redis_master -from vibe_core.cli.osartifacts import OSArtifacts -from vibe_core.cli.wrappers import KubectlWrapper -from vibe_core.client import FarmvibesAiClient, VibeWorkflowRun, get_default_vibe_client -from vibe_core.datamodel import RunStatus - -HOME = os.path.expanduser("~") -DEFAULT_FARMVIBES_CACHE_DATA_DIR = os.path.join( - os.path.join(HOME, ".cache", "farmvibes-ai"), "data" -) - -DELETE_KEY_WITH_PREFIX_CMD = 'redis-cli -a {password} KEYS "{key_prefix}" 2> /dev/null | xargs redis-cli -a {password} DEL 2> /dev/null' # noqa - -RUN_KEY_PREFIX = "run:*" -OP_KEY_PREFIX = "op:*" -ASSET_KEY_PREFIX = "asset:*" - - -class KubectlRedisWrapper(KubectlWrapper): - def __init__(self): - self.cluster_name = os.environ.get( - "FARMVIBES_AI_CLUSTER_NAME", - f"farmvibes-ai-{getpass.getuser()}", - ) - super().__init__(os_artifacts=OSArtifacts(), cluster_name=self.cluster_name) - - def delete_keys_with_prefix(self, prefix: str) -> Union[str, None]: - result = self.get_secret("redis", ".data.redis-password", self.cluster_name) - redis_password = codecs.decode(result.encode(), "base64").decode() - master_pod, redis_master, kind = find_redis_master(self) - bash_command = DELETE_KEY_WITH_PREFIX_CMD.format(password=redis_password, key_prefix=prefix) - cmd = [self.os_artifacts.kubectl, "exec", master_pod, "--", "bash", "-c", bash_command] - - retries = 3 - output = None - - for _ in range(retries): - try: - output = execute_cmd(cmd, censor_command=True) - break - except ValueError: - continue - - return output - - -def clear_cache_and_cache_metadata(): - if os.path.exists(DEFAULT_FARMVIBES_CACHE_DATA_DIR): - shutil.rmtree(DEFAULT_FARMVIBES_CACHE_DATA_DIR) - - redis_via_kubectl = KubectlRedisWrapper() - redis_via_kubectl.delete_keys_with_prefix(RUN_KEY_PREFIX) - redis_via_kubectl.delete_keys_with_prefix(OP_KEY_PREFIX) - redis_via_kubectl.delete_keys_with_prefix(ASSET_KEY_PREFIX) - - -def ensure_equal_output_images(expected_path: str, actual_path: str): - with rasterio.open(expected_path) as src: - expected_ar = ( - src.read() - ) # Actually read the data. This is a numpy array with shape (bands, height, width) - expected_profile = src.profile # Metadata about geolocation, compression, and tiling (dict) - with rasterio.open(actual_path) as src: - actual_ar = src.read() - actual_profile = src.profile - assert np.allclose(expected_ar, actual_ar) - assert all(expected_profile[k] == actual_profile[k] for k in expected_profile) - - -def num_files_in_cache(): - num_files = 0 - for dirpath, dirs, files in os.walk(DEFAULT_FARMVIBES_CACHE_DATA_DIR): - num_files += len(files) - return num_files - - -@pytest.fixture -def helloworld_workflow_fixture(): - clear_cache_and_cache_metadata() - - def run_helloworld_workflow(): - polygon_coords = [ - (-88.062073563448919, 37.081397673802059), - (-88.026349330507315, 37.085463858128762), - (-88.026349330507315, 37.085463858128762), - (-88.012445388773259, 37.069230099135126), - (-88.035931592028305, 37.048441375086092), - (-88.068120429075847, 37.058833638440767), - (-88.062073563448919, 37.081397673802059), - ] - polygon = Polygon(polygon_coords) - start_date = datetime(year=2021, month=2, day=1, tzinfo=timezone.utc) - end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) - client: FarmvibesAiClient = get_default_vibe_client() - - run = client.run( - "helloworld", - "test_hello", - geometry=polygon, - time_range=(start_date, end_date), - ) - - run.block_until_complete(30) - return run - - return run_helloworld_workflow - - -def test_helloworld_once(helloworld_workflow_fixture: Callable[[], VibeWorkflowRun]): - run = helloworld_workflow_fixture() - - assert run.status == RunStatus.done, f"Workflow did not finish successfully. {run.task_details}" - assert run.output is not None, "Workflow did not produce output" - - ensure_equal_output_images( - os.path.join(os.path.dirname(__file__), "expected.tif"), - run.output["raster"][0].assets[0].local_path, # type: ignore - ) - - -def test_helloworld_workflow_twice(helloworld_workflow_fixture: Callable[[], VibeWorkflowRun]): - # when run twice result should be cached and output should be the same file - - run1 = helloworld_workflow_fixture() - assert ( - run1.status == RunStatus.done - ), f"Workflow did not finish successfully. {run1.task_details}" - assert run1.output is not None, "Workflow did not produce output" - run1_raster_path = run1.output["raster"][0].assets[0].local_path # type: ignore - - run2 = helloworld_workflow_fixture() - assert ( - run2.status == RunStatus.done - ), f"Workflow did not finish successfully. {run2.task_details}" - assert run2.output is not None, "Workflow did not produce output" - run2_raster_path = run2.output["raster"][0].assets[0].local_path # type: ignore - - assert run1_raster_path == run2_raster_path - - -def test_run_helloworld_once_delete(helloworld_workflow_fixture: Callable[[], VibeWorkflowRun]): - run = helloworld_workflow_fixture() - assert run.status == RunStatus.done, f"Workflow did not finish successfully. {run.task_details}" - assert run.output is not None, "Workflow did not produce output" - assert os.path.exists(run.output["raster"][0].assets[0].local_path) # type: ignore - - run.delete() - run.block_until_deleted(20) - assert ( - run.status == RunStatus.deleted - ), f"Workflow was not deleted successfully. {run.task_details}" - assert 0 == num_files_in_cache() - - -def test_run_helloworld_twice_delete(helloworld_workflow_fixture: Callable[[], VibeWorkflowRun]): - run1 = helloworld_workflow_fixture() - assert ( - run1.status == RunStatus.done - ), f"Workflow did not finish successfully. {run1.task_details}" - assert run1.output is not None, "Workflow did not produce output" - - run2 = helloworld_workflow_fixture() - assert ( - run2.status == RunStatus.done - ), f"Workflow did not finish successfully. {run2.task_details}" - - num_files_in_cache_before_delete = num_files_in_cache() - - run1.delete() - run1.block_until_deleted(20) - - assert ( - run1.status == RunStatus.deleted - ), f"Workflow was not deleted successfully. {run1.task_details}" - - assert num_files_in_cache_before_delete == num_files_in_cache() - assert os.path.exists(run2.output["raster"][0].assets[0].local_path) # type: ignore diff --git a/src/vibe_agent/setup.py b/src/vibe_agent/setup.py deleted file mode 100644 index 35d93eaa..00000000 --- a/src/vibe_agent/setup.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from setuptools import find_packages, setup - -setup( - name="vibe_agent", - version="0.0.1", - author="Microsoft", - author_email="terravibes@microsoft.com", - description="TerraVibes Geospatial Platform Package - vibe package.", - license="Proprietary", - keywords="terravibes geospatial", - packages=find_packages(exclude=["tests*"]), - install_requires=[ - "aiorwlock~=1.3.0", - "azure-cosmos~=4.5.0", - "pystac~=1.6.0", - "azure-identity~=1.14.0", - "azure-storage-blob>=12.5.0", - "httpx~=0.24.1", - "shapely>=1.7.1", - "PyYAML~=6.0.1", - "pebble~=4.6.3", - "grpcio~=1.53.0", - "dapr==1.13.0", - "dapr-ext-grpc~=1.12.0", - "redis~=4.6.0", - "hiredis~=2.2.0", - "vibe-core", - "vibe-common", - ], - entry_points={ - "console_scripts": [ - "vibe-worker = vibe_agent.launch_worker:main", - "vibe-cache = vibe_agent.launch_cache:main", - "vibe-data-ops = vibe_agent.launch_data_ops:main", - ] - }, -) diff --git a/src/vibe_agent/tests/conftest.py b/src/vibe_agent/tests/conftest.py deleted file mode 100644 index cd899eec..00000000 --- a/src/vibe_agent/tests/conftest.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# flake8: noqa -import os -import uuid -from tempfile import TemporaryDirectory -from typing import Any, Dict - -import pytest - -from vibe_agent.ops import OperationFactoryConfig -from vibe_agent.worker import Worker -from vibe_dev.testing.storage_fixtures import * # type: ignore # noqa: F403, F401 -from vibe_dev.testing import anyio_backend # type: ignore # noqa -from vibe_dev.testing.workflow_fixtures import ( - SimpleStrData, - simple_op_spec, - workflow_execution_message, -) # type: ignore # noqa - -FILE_CONTENTS = "SAMPLE FILE CONTENTS FOR TESTING PURPOSES" - - -@pytest.fixture(scope="module") -def file_contents(): - return FILE_CONTENTS - - -@pytest.fixture(scope="module") -def local_file(file_contents: str): - with TemporaryDirectory() as tmp_dir: - filename = f"{uuid.uuid4()}.txt" - filepath = os.path.join(tmp_dir, filename) - with open(os.path.join(tmp_dir, filename), "w") as f: - f.write(file_contents) - yield filepath - - -@pytest.fixture -def local_file_ref(request: pytest.FixtureRequest, local_file: str): - ref_type: str = request.param # type: ignore - if ref_type == "uri": - return f"file://{local_file}" - elif ref_type == "path": - return local_file - else: - raise ValueError(f"Invalid reference type {ref_type}") - - -@pytest.fixture -def op_yaml() -> Dict[str, Any]: - return { - "name": "fake", - "inputs": { - "user_data": "List[DataVibe]", - }, - "output": { - "processed_data": "List[DataVibe]", - }, - "parameters": { - "fake_param": 1, - "fake_another_param": {"fake_nested": 2, "fake_nested_too": 3}, - }, - "entrypoint": {"file": "op.py", "callback_builder": "callback_builder"}, - } - - -@pytest.fixture -def op_foo() -> str: - foo_str: str = """ -def print_args(user_data): - return user_data - -def callback_builder(**kw): - return print_args - """ - return foo_str - - -@pytest.fixture -def non_existing_file(request: pytest.FixtureRequest): - location = request.param # type:ignore - if location == "local": - return "/nodir/nodir2/does_not_exist.txt" - raise ValueError(f"Expected 'local' or 'remote' request, got {location}") diff --git a/src/vibe_agent/tests/ops/test_dependencies_integration.py b/src/vibe_agent/tests/ops/test_dependencies_integration.py deleted file mode 100644 index 5dfbaf4d..00000000 --- a/src/vibe_agent/tests/ops/test_dependencies_integration.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import pytest - -from vibe_agent.ops import EntryPointDict, OperationDependencyResolver, OperationSpec -from vibe_core.data import DataVibe, TypeDictVibe -from vibe_core.datamodel import TaskDescription - - -@pytest.fixture -def operation_spec(): - return OperationSpec( - name="fake", - inputs_spec=TypeDictVibe({"vibe_input": DataVibe}), # type: ignore - output_spec=TypeDictVibe({"processed_data": DataVibe}), - parameters={}, - entrypoint=EntryPointDict(file="fake.py", callback_builder="fake_callback"), - root_folder="/tmp", - description=TaskDescription(), - ) - - -def test_resolver_empty_dependency(operation_spec: OperationSpec): - resolver = OperationDependencyResolver() - empty_dependency = resolver.resolve(operation_spec) - - assert len(empty_dependency) == 0 - - -def test_resolver_valid_dependency(operation_spec: OperationSpec): - operation_spec.parameters = {"param": 1, "another_param": "test"} - operation_spec.dependencies = {"parameters": ["param", "another_param"]} - - resolver = OperationDependencyResolver() - dependencies = resolver.resolve(operation_spec) - target_dependencoes = {"parameters": operation_spec.parameters} - - assert target_dependencoes == dependencies - - -def test_resolver_valid_partial_dependency(operation_spec: OperationSpec): - operation_spec.parameters = {"param": 1, "another_param": "test"} - operation_spec.dependencies = {"parameters": ["another_param"]} - - resolver = OperationDependencyResolver() - dependencies = resolver.resolve(operation_spec) - target_dependencies = {"parameters": {"another_param": "test"}} - - assert target_dependencies == dependencies - - -def test_resolver_invalid_dependency(operation_spec: OperationSpec): - operation_spec.parameters = {"param": 1, "another_param": "test"} - operation_spec.dependencies = {"parameters": ["unexisting_param"]} - - resolver = OperationDependencyResolver() - with pytest.raises(ValueError): - resolver.resolve(operation_spec) diff --git a/src/vibe_agent/tests/ops/test_op_cache_builder.py b/src/vibe_agent/tests/ops/test_op_cache_builder.py deleted file mode 100644 index 34079f46..00000000 --- a/src/vibe_agent/tests/ops/test_op_cache_builder.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import datetime -import random -from dataclasses import dataclass -from typing import Any, Dict, List, Union - -from pystac import Item -from pytest import fixture -from shapely.geometry import Polygon, mapping - -from vibe_common.schemas import CacheInfo, ItemDict, OpResolvedDependencies -from vibe_core.data.core_types import BaseVibe - - -@dataclass -class TestVibe(BaseVibe): - a: int - b: str - - -@fixture -def item_dict(): - num_items = 5 - polygon_coords = [ - (-88.062073563448919, 37.081397673802059), - (-88.026349330507315, 37.085463858128762), - (-88.026349330507315, 37.085463858128762), - (-88.012445388773259, 37.069230099135126), - ] - polygon: Dict[str, Any] = mapping(Polygon(polygon_coords)) # type: ignore - timestamp = datetime.datetime.now(datetime.timezone.utc) - items = [ - Item(id=str(i), geometry=polygon, datetime=timestamp, properties={}, bbox=None) - for i in range(num_items) - ] - single_item = Item( - id=str(num_items), geometry=polygon, datetime=timestamp, properties={}, bbox=None - ) - base_items = [TestVibe(i, f"{i}") for i in range(num_items)] - single_base = TestVibe(num_items, f"{num_items}") - - output_dict = { - "list_input": items, - "single_input": single_item, - "list_base": base_items, - "single_base": single_base, - } - - return output_dict - - -@fixture -def item_dict_hashes() -> Dict[str, Union[str, List[str], Dict[str, Any]]]: - return { - "vibe_source_items": { - "list_input": ["0", "1", "2", "3", "4"], - "single_input": "5", - "list_base": [ - "371c8cb9ac0a9f7d31fd0ab9d1e59efe3a5d98854e86b6bfa3207ccf4e6dfbf6", - "3d15b923441e57a7b3f9dcc93f43d8b41620b3dba7d5c4be78bf0b2a597006d2", - "c5e1ca033cc639402b7352606e8a00676636287f437739a1c773440df76d2799", - "cf3b5755718f90ffe7cdf7b27bd41da19158ea4d1fefdc7aca188bc9dcac7f19", - "eab1e3a83e5b227da228fefdf633ce9a05b12dcdb59d6739f7d1dddeb51d712f", - ], - "single_base": "66756d10b406f729019b8a049f02e293b7f7e0e3b22f613f4c7024f732e5ee11", - }, - "vibe_op_parameters": {"parameters": {"dep": 1, "another_dep": "bla"}}, - "vibe_op_version": "1", - "vibe_op_hash": "5daf389eaad4c50533c2b1ace0b6f551f1a3b9236ec35f1fa3e5a5ab11b68a32", - } - - -@fixture -def op_dependencies(): - return {"parameters": {"dep": 1, "another_dep": "bla"}} - - -def test_stable_hashes( - item_dict: ItemDict, - op_dependencies: OpResolvedDependencies, - item_dict_hashes: Dict[str, Union[str, List[str], Dict[str, Any]]], -): - cache_info = CacheInfo("test_op", "1.0", item_dict, op_dependencies) - storage_dict = cache_info.as_storage_dict() - for k, v in item_dict_hashes.items(): - assert storage_dict[k] == v - - -def test_cache_builder(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): - version = "1.3" - cache_info = CacheInfo("test_op", version, item_dict, op_dependencies) - - assert cache_info.version == version[0] - - for k, v in item_dict.items(): - if isinstance(v, list): - target_ids = sorted(CacheInfo._compute_or_extract_id(v)) - for target_id, input_id in zip(target_ids, cache_info.ids[k]): - assert target_id == input_id - else: - assert cache_info.ids[k] == CacheInfo._compute_or_extract_id(v) - - -def test_cache_builder_hash(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): - version = "1.3" - cache_info = CacheInfo("test_op", version, item_dict, op_dependencies) - cache_info_repeat = CacheInfo("test_op", version[0], item_dict, op_dependencies) - - assert cache_info.hash == cache_info_repeat.hash - - -def test_hash_order_invariances(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): - version = "1.3" - cache_info = CacheInfo("test_op", version, item_dict, op_dependencies) - - # Shufling input ids - random.shuffle(item_dict["list_input"]) # type: ignore - random.shuffle(item_dict["list_base"]) # type: ignore - cache_info_shuffled = CacheInfo("test_op", version, item_dict, op_dependencies) - - assert cache_info.hash == cache_info_shuffled.hash - - -def test_hash_version_dependency(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): - cache_info = CacheInfo("test_op", "1.3", item_dict, op_dependencies) - cache_info_repeat = CacheInfo("test_op", "2.5", item_dict, op_dependencies) - - assert cache_info.hash != cache_info_repeat.hash - - -def test_hash_source_id_dependency_single( - item_dict: ItemDict, op_dependencies: OpResolvedDependencies -): - cache_info = CacheInfo("test_op", "1.3", item_dict, op_dependencies) - item_dict["single_input"].id = str(10) # type: ignore - cache_info2 = CacheInfo("test_op", "1.3", item_dict, op_dependencies) - item_dict["single_base"].a = 2 # type: ignore - cache_info3 = CacheInfo("test_op", "1.3", item_dict, op_dependencies) - - assert cache_info.hash != cache_info2.hash - assert cache_info.hash != cache_info3.hash - assert cache_info2.hash != cache_info3.hash - - -def test_hash_source_id_dependency_list( - item_dict: ItemDict, op_dependencies: OpResolvedDependencies -): - cache_info = CacheInfo("test_op", "1.3", item_dict, op_dependencies) - item_dict["list_input"][-1].id = str(10) # type: ignore - cache_info2 = CacheInfo("test_op", "1.3", item_dict, op_dependencies) - item_dict["list_base"][-1].b = str(10) # type: ignore - cache_info3 = CacheInfo("test_op", "1.3", item_dict, op_dependencies) - - assert cache_info.hash != cache_info2.hash - assert cache_info.hash != cache_info3.hash - assert cache_info2.hash != cache_info3.hash - - -def test_hash_source_name_dependency(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): - cache_info = CacheInfo("test_op", "1.3", item_dict, op_dependencies) - i = item_dict.pop("list_input") - item_dict["different_name_input"] = i - cache_info_repeat = CacheInfo("test_op", "1.3", item_dict, op_dependencies) - - assert cache_info.hash != cache_info_repeat.hash - - -def test_hash_parameter_dependency(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): - op_version = "1.3" - cache_info = CacheInfo("test_op", op_version, item_dict, op_dependencies) - op_dependencies["parameters"]["dep"] = 2 - cache_info_repeat = CacheInfo("test_op", op_version, item_dict, op_dependencies) - - assert cache_info.hash != cache_info_repeat.hash - - -def test_hash_gen_basevibe(): - x = CacheInfo._compute_or_extract_id(TestVibe(1, "1")) - y = CacheInfo._compute_or_extract_id(TestVibe(2, "1")) - z = CacheInfo._compute_or_extract_id(TestVibe(1, "2")) - assert x != y - assert x != z - assert y != z diff --git a/src/vibe_agent/tests/ops/test_op_parser.py b/src/vibe_agent/tests/ops/test_op_parser.py deleted file mode 100644 index c67805fa..00000000 --- a/src/vibe_agent/tests/ops/test_op_parser.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from typing import Any, Dict - -from vibe_agent.ops import OperationParser, OperationSpec -from vibe_core.file_utils import write_yaml - - -def compare_spec_yaml(spec: OperationSpec, op_yaml: Dict[str, Any], root_folder: str): - assert spec.dependencies == op_yaml.get("dependencies", {}) - assert spec.version == op_yaml.get("version", "1.0") - assert spec.parameters == op_yaml["parameters"] - assert spec.name == op_yaml["name"] - assert spec.root_folder == root_folder - assert spec.entrypoint["file"] == op_yaml["entrypoint"]["file"] - assert spec.entrypoint["callback_builder"] == op_yaml["entrypoint"]["callback_builder"] - assert op_yaml["inputs"].keys() == spec.inputs_spec.keys() - - -def test_parser_only_required(tmpdir: str, op_yaml: Dict[str, Any]): - op_yaml_file = os.path.join(tmpdir, "fake.yaml") - write_yaml(op_yaml_file, op_yaml) - spec = OperationParser().parse(op_yaml_file) - compare_spec_yaml(spec, op_yaml, tmpdir) - - -def test_parser_version(tmpdir: str, op_yaml: Dict[str, Any]): - op_yaml_file = os.path.join(tmpdir, "fake.yaml") - op_yaml["version"] = "2.5" - write_yaml(op_yaml_file, op_yaml) - spec = OperationParser().parse(op_yaml_file) - compare_spec_yaml(spec, op_yaml, tmpdir) - - -def test_parser_dependencies(tmpdir: str, op_yaml: Dict[str, Any]): - op_yaml_file = os.path.join(tmpdir, "fake.yaml") - op_yaml["dependencies"] = {"parameters": ["fake_param"]} - write_yaml(op_yaml_file, op_yaml) - spec = OperationParser().parse(op_yaml_file) - compare_spec_yaml(spec, op_yaml, tmpdir) - - -def test_parser_empty_fields(tmpdir: str, op_yaml: Dict[str, Any]): - op_yaml_file = os.path.join(tmpdir, "fake.yaml") - op_yaml["dependencies"] = None - op_yaml["version"] = None - op_yaml["parameters"] = None - write_yaml(op_yaml_file, op_yaml) - spec = OperationParser().parse(op_yaml_file) - assert spec.parameters == {} - assert spec.dependencies == {} - assert spec.version == "1.0" diff --git a/src/vibe_agent/tests/ops/test_operation.py b/src/vibe_agent/tests/ops/test_operation.py deleted file mode 100644 index 1ff91519..00000000 --- a/src/vibe_agent/tests/ops/test_operation.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime -from typing import Any, Callable -from unittest.mock import MagicMock, patch - -import pytest -from shapely import geometry as shpg - -from vibe_agent.ops import Operation, OperationFactory -from vibe_agent.ops_helper import OpIOConverter -from vibe_agent.storage.local_storage import LocalResourceExistsError -from vibe_common.schemas import CacheInfo, OperationParser -from vibe_core.data import DataVibe -from vibe_core.data.utils import StacConverter -from vibe_dev.testing.fake_workflows_fixtures import fake_ops_dir # type: ignore # noqa -from vibe_dev.testing.op_tester import FakeStorage - - -@patch.object(OperationFactory, "resolve_secrets") -def test_callback_output_mismatch_fails(resolve_secrets: MagicMock, fake_ops_dir: str): # noqa - op_spec = OperationParser().parse(os.path.join(fake_ops_dir, "fake/item_item.yaml")) - resolve_secrets.return_value = op_spec.parameters - factory = OperationFactory(None, None) # type: ignore - op = factory.build(op_spec) - - now = datetime.now() - x = DataVibe( - id="1", time_range=(now, now), geometry=shpg.mapping(shpg.box(0, 0, 1, 1)), assets=[] - ) - op._call_validate_op(user_data=x) # type: ignore - - def mock_callback(callback: Callable[..., Any]): - def fun(*args: Any, **kwargs: Any): - return {"wrong": None, **callback(*args, **kwargs)} - - return fun - - op.callback = mock_callback(op.callback) # type: ignore - with pytest.raises(RuntimeError): - op._call_validate_op(user_data=x) # type: ignore - - -@patch.object(Operation, "_call_validate_op") -@patch.object(FakeStorage, "retrieve_output_from_input_if_exists") -@patch.object(OpIOConverter, "serialize_output") -@patch.object(OpIOConverter, "deserialize_input") -@patch.object(OperationFactory, "resolve_secrets") -def test_op_cache_check_before_callback( - resolve_secrets: MagicMock, - deserialize_input: MagicMock, - serialize_output: MagicMock, - retrieve_cache: MagicMock, - call_validate: MagicMock, - fake_ops_dir: str, # noqa -): - deserialize_input.return_value = {"stac": 1} - serialize_output.side_effect = lambda x: x - cached_output = {"cached_before": "no callback 😊"} - retrieve_cache.return_value = cached_output - op_spec = OperationParser().parse(os.path.join(fake_ops_dir, "fake/item_item.yaml")) - resolve_secrets.return_value = op_spec.parameters - factory = OperationFactory(FakeStorage(None), None) # type:ignore - op = factory.build(op_spec) - cache_info = CacheInfo("test-op", "1.0", {}, {}) - object.__setattr__(cache_info, "hash", "cache_before") - out = op.run(None, cache_info) # type:ignore - assert out == cached_output - deserialize_input.assert_called_once() - serialize_output.assert_called_once() - retrieve_cache.assert_called_once() - call_validate.assert_not_called() - - -@patch.object(FakeStorage, "store") -@patch.object(StacConverter, "from_stac_item") -@patch.object(Operation, "_call_validate_op") -@patch.object(FakeStorage, "retrieve_output_from_input_if_exists") -@patch.object(OpIOConverter, "serialize_output") -@patch.object(OpIOConverter, "deserialize_input") -@patch.object(OperationFactory, "resolve_secrets") -def test_op_cache_check_after_callback( - resolve_secrets: MagicMock, - deserialize_input: MagicMock, - serialize_output: MagicMock, - retrieve_cache: MagicMock, - call_validate: MagicMock, - from_stac_item: MagicMock, - store: MagicMock, - fake_ops_dir: str, # noqa -): - deserialize_input.return_value = {"stac": 1} - serialize_output.side_effect = lambda x: x - cached_output = {"cached_after": "yes callback 😔"} - retrieve_cache.side_effect = [None, cached_output] - call_validate.return_value = {"out": "repeated callback output"} - from_stac_item.side_effect = lambda x: x - store.side_effect = LocalResourceExistsError() - op_spec = OperationParser().parse(os.path.join(fake_ops_dir, "fake/item_item.yaml")) - resolve_secrets.return_value = op_spec.parameters - factory = OperationFactory(FakeStorage(None), None) # type:ignore - op = factory.build(op_spec) - cache_info = CacheInfo("test-op", "1.0", {}, {}) - object.__setattr__(cache_info, "hash", "cache_before") - out = op.run(None, cache_info) # type:ignore - assert out == cached_output - deserialize_input.assert_called_once() - serialize_output.assert_called_once() - # Cache retrieval should be called once before the callback, and then again after - assert retrieve_cache.call_count == 2 - call_validate.assert_called_once() diff --git a/src/vibe_agent/tests/test_asset_vibe.py b/src/vibe_agent/tests/test_asset_vibe.py deleted file mode 100644 index 0c563446..00000000 --- a/src/vibe_agent/tests/test_asset_vibe.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -from pathlib import Path - -import pytest - -from vibe_agent.storage.asset_management import AssetManager -from vibe_core.data import AssetVibe - -CONTENT = "FAKE CONTENT FILE" -EXTENSION = ".txt" -ID = "FAKE_FILE" -FNAME = f"{ID}{EXTENSION}" - - -@pytest.fixture -def local_file(tmp_path: Path) -> str: - with open(tmp_path / FNAME, "w") as f: - f.write(CONTENT) - - assert Path.exists(tmp_path / FNAME) - return (tmp_path / FNAME).as_posix() - - -@pytest.fixture -def remote_file(local_file: str, blob_asset_manager: AssetManager) -> str: - blob_asset_manager.store(ID, local_file) - assert blob_asset_manager.exists(ID) - return blob_asset_manager.retrieve(ID) - - -def test_local_asset(local_file: str): - local_asset = AssetVibe(reference=local_file, type=mimetypes.types_map[EXTENSION], id=ID) - - # file is local, then local path must be equal to passed reference - assert local_asset.local_path == local_file - - # Local urls are assigned with file:// prefix - assert local_asset.url == f"file://{local_file}" diff --git a/src/vibe_agent/tests/test_cache_metadata_store.py b/src/vibe_agent/tests/test_cache_metadata_store.py deleted file mode 100644 index 4586dc52..00000000 --- a/src/vibe_agent/tests/test_cache_metadata_store.py +++ /dev/null @@ -1,309 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import uuid -from dataclasses import asdict -from datetime import datetime -from typing import Any, Dict, Set, Tuple -from unittest.mock import AsyncMock, Mock, call, patch - -import pytest - -from vibe_agent.cache_metadata_store import RedisCacheMetadataStore -from vibe_agent.data_ops import DataOpsManager -from vibe_agent.storage import asset_management -from vibe_agent.storage.storage import Storage -from vibe_common.schemas import CacheInfo, OpRunId -from vibe_core.data.core_types import OpIOType -from vibe_core.datamodel import RunConfig, RunDetails, RunStatus, SpatioTemporalJson - - -class FakeOpRunResult: - def __init__(self, op_name: str, fake_asset_ids: Set[str]): - self.cache_info = CacheInfo(op_name, "1.0", {}, {}) - self.asset_ids = fake_asset_ids - - def get_output(self) -> OpIOType: - return {self.cache_info.name: {"assets": {asset_id: {} for asset_id in self.asset_ids}}} - - def get_op_run_id(self) -> OpRunId: - return OpRunId(self.cache_info.name, self.cache_info.hash) - - -@pytest.fixture -def no_asset_op_run(): - return FakeOpRunResult("no_asset_op", set()) - - -@pytest.fixture -def op_1_run(): - return FakeOpRunResult("op_1_run", {"asset-1", "asset-2"}) - - -@pytest.fixture -def op_2_run(): - return FakeOpRunResult("op_2_run", {"asset-2", "asset-3"}) - - -@pytest.fixture -def run_config() -> Dict[str, Any]: - run_config = asdict( - RunConfig( - name="fake", - workflow="fake", - parameters=None, - user_input=SpatioTemporalJson( - datetime.now(), - datetime.now(), - {}, - ), - id=uuid.uuid4(), - details=RunDetails(), - task_details={}, - spatio_temporal_json=None, - output="", - ) - ) - return run_config - - -class AsyncFakeRedis: - def __init__(self): - self.data = {} - - async def sadd(self, key: str, *values: str): - if key not in self.data: - self.data[key] = set() - self.data[key].update(values) - - async def srem(self, key: str, *values: str): - if key in self.data: - self.data[key].difference_update(values) - # Redis does not allow empty sets - if not self.data[key]: - del self.data[key] - - async def smembers(self, key: str): - return self.data.get(key, set()) - - async def scard(self, key: str): - return len(self.data.get(key, set())) - - async def sismember(self, key: str, value: str): - return value in self.data.get(key, set()) - - def pipeline(self, transaction: bool = True): - return AsyncFakeRedisPipeline(self) - - async def close(self): - pass - - -class AsyncFakeRedisPipeline: - def __init__(self, redis_client: AsyncFakeRedis): - self.redis_client = redis_client - self.commands = [] - - def __getattr__(self, name: str): - def method(*args: Any, **kwargs: Any): - command = (name, args, kwargs) - self.commands.append(command) - - return method - - async def execute(self): - coroutines = [] - for command in self.commands: - name, args, kwargs = command - method = getattr(self.redis_client, name) - coro = method(*args, **kwargs) - coroutines.append(coro) - results = await asyncio.gather(*coroutines) - return results - - -def get_mocked_data_ops() -> Tuple[DataOpsManager, AsyncFakeRedis, Mock]: - with patch("vibe_agent.cache_metadata_store.retrieve_dapr_secret"): - redis_client_mock = AsyncFakeRedis() - - storage_mock = Mock(spec=Storage) - storage_mock.asset_manager = Mock(spec=asset_management.AssetManager) - - metadata_store = RedisCacheMetadataStore() - metadata_store._get_redis_client = AsyncMock(return_value=redis_client_mock) - - do_manager = DataOpsManager(storage_mock, metadata_store=metadata_store) - do_manager._init_locks() - return do_manager, redis_client_mock, storage_mock - - -def assert_op_in_fake_redis(redis_client: AsyncFakeRedis, run_id: str, fake_op: FakeOpRunResult): - run_ops_key = RedisCacheMetadataStore._run_ops_key_format.format(run_id=run_id) - op_runs_key = RedisCacheMetadataStore._op_runs_key_format.format( - op_name=fake_op.cache_info.name, op_hash=fake_op.cache_info.hash - ) - op_ref = RedisCacheMetadataStore._op_ref_format.format( - op_name=fake_op.cache_info.name, op_hash=fake_op.cache_info.hash - ) - op_assets_key = RedisCacheMetadataStore._op_assets_key_format.format( - op_name=fake_op.cache_info.name, op_hash=fake_op.cache_info.hash - ) - assert redis_client.data[run_ops_key] == {op_ref} - assert run_id in redis_client.data[op_runs_key] - - if fake_op.asset_ids: - assert redis_client.data[op_assets_key] == fake_op.asset_ids - - for asset_id in fake_op.asset_ids: - asset_op_key = RedisCacheMetadataStore._asset_ops_key_format.format(asset_id=asset_id) - assert op_ref in redis_client.data[asset_op_key] - - -@pytest.mark.anyio -async def test_store_references_with_empty_asset_list(no_asset_op_run: FakeOpRunResult): - do_manager, redis_client_mock, _ = get_mocked_data_ops() - await do_manager.add_references( - "fake-run", no_asset_op_run.get_op_run_id(), no_asset_op_run.get_output() - ) - - assert_op_in_fake_redis(redis_client_mock, "fake-run", no_asset_op_run) - - -@pytest.mark.anyio -async def test_store_references_simple(op_1_run: FakeOpRunResult): - do_manager, redis_client_mock, _ = get_mocked_data_ops() - await do_manager.add_references("fake-run", op_1_run.get_op_run_id(), op_1_run.get_output()) - assert len(redis_client_mock.data) == 3 + len(op_1_run.asset_ids) - assert_op_in_fake_redis(redis_client_mock, "fake-run", op_1_run) - - -@pytest.mark.anyio -async def test_store_references_two_wfs_shared_op(op_1_run: FakeOpRunResult): - do_manager, redis_client_mock, _ = get_mocked_data_ops() - await do_manager.add_references("fake-run-1", op_1_run.get_op_run_id(), op_1_run.get_output()) - await do_manager.add_references("fake-run-2", op_1_run.get_op_run_id(), op_1_run.get_output()) - - assert len(redis_client_mock.data) == 4 + len(op_1_run.asset_ids) - - assert_op_in_fake_redis(redis_client_mock, "fake-run-1", op_1_run) - assert_op_in_fake_redis(redis_client_mock, "fake-run-2", op_1_run) - - -@pytest.mark.anyio -async def test_store_references_two_wfs_shared_asset( - op_1_run: FakeOpRunResult, - op_2_run: FakeOpRunResult, -): - do_manager, redis_client_mock, _ = get_mocked_data_ops() - await do_manager.add_references("fake-run-1", op_1_run.get_op_run_id(), op_1_run.get_output()) - await do_manager.add_references("fake-run-2", op_2_run.get_op_run_id(), op_2_run.get_output()) - - assert len(redis_client_mock.data) == 6 + len(op_1_run.asset_ids) + len(op_2_run.asset_ids) - 1 - - assert_op_in_fake_redis(redis_client_mock, "fake-run-1", op_1_run) - assert_op_in_fake_redis(redis_client_mock, "fake-run-2", op_2_run) - - -@patch("vibe_common.statestore.StateStore.retrieve") -@pytest.mark.anyio -async def test_delete_invalid_workflow_run(ss_retrieve_mock: Mock, run_config: Dict[str, Any]): - do_manager, _, _ = get_mocked_data_ops() - invalid_delete_statuses = [ - RunStatus.pending, - RunStatus.queued, - RunStatus.running, - RunStatus.deleting, - RunStatus.deleted, - ] - - for status in invalid_delete_statuses: - run_config["details"]["status"] = status - ss_retrieve_mock.return_value = run_config - result = await do_manager.delete_workflow_run("fake-run") - assert not result - - -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_delete_workflow_run_no_assets( - ss_store_mock: Mock, - ss_retrieve_mock: Mock, - no_asset_op_run: FakeOpRunResult, - run_config: Dict[str, Any], -): - do_manager, redis_client_mock, storage_mock = get_mocked_data_ops() - await do_manager.add_references( - "fake-run", no_asset_op_run.get_op_run_id(), no_asset_op_run.get_output() - ) - - run_config["details"]["status"] = RunStatus.done - ss_retrieve_mock.return_value = run_config - await do_manager.delete_workflow_run("fake-run") - - assert ss_store_mock.call_count == 2 - rc1 = ss_store_mock.call_args_list[0][0][1] - assert rc1.details.status == RunStatus.deleting - rc2 = ss_store_mock.call_args_list[1][0][1] - assert rc2.details.status == RunStatus.deleted - - storage_mock.asset_manager.remove.assert_not_called() - storage_mock.remove.assert_called_once_with(no_asset_op_run.get_op_run_id()) - - assert len(redis_client_mock.data) == 0 - - -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_delete_workflow_run_simple( - ss_store_mock: Mock, - ss_retrieve_mock: Mock, - op_1_run: FakeOpRunResult, - run_config: Dict[str, Any], -): - do_manager, redis_client_mock, storage_mock = get_mocked_data_ops() - await do_manager.add_references("fake-run", op_1_run.get_op_run_id(), op_1_run.get_output()) - - run_config["details"]["status"] = RunStatus.done - ss_retrieve_mock.return_value = run_config - await do_manager.delete_workflow_run("fake-run") - - assert ss_store_mock.call_count == 2 - rc1 = ss_store_mock.call_args_list[0][0][1] - assert rc1.details.status == RunStatus.deleting - rc2 = ss_store_mock.call_args_list[1][0][1] - assert rc2.details.status == RunStatus.deleted - - calls = [call(asset_id) for asset_id in op_1_run.asset_ids] - storage_mock.asset_manager.remove.assert_has_calls(calls, any_order=True) - storage_mock.remove.assert_called_once_with(op_1_run.get_op_run_id()) - - assert len(redis_client_mock.data) == 0 - - -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_delete_workflow_run_overlapping_op_and_asset( - ss_store_mock: Mock, - ss_retrieve_mock: Mock, - op_1_run: FakeOpRunResult, - op_2_run: FakeOpRunResult, - run_config: Dict[str, Any], -): - do_manager, redis_client_mock, storage_mock = get_mocked_data_ops() - await do_manager.add_references("fake-run-1", op_1_run.get_op_run_id(), op_1_run.get_output()) - await do_manager.add_references("fake-run-1", op_2_run.get_op_run_id(), op_2_run.get_output()) - await do_manager.add_references("fake-run-2", op_1_run.get_op_run_id(), op_1_run.get_output()) - - run_config["details"]["status"] = RunStatus.done - ss_retrieve_mock.return_value = run_config - await do_manager.delete_workflow_run("fake-run-1") - - storage_mock.asset_manager.remove.assert_called_once_with("asset-3") - storage_mock.remove.assert_called_once_with(op_2_run.get_op_run_id()) - - assert_op_in_fake_redis(redis_client_mock, "fake-run-2", op_1_run) - assert len(redis_client_mock.data) == 3 + len(op_1_run.asset_ids) diff --git a/src/vibe_agent/tests/test_local_asset_manager.py b/src/vibe_agent/tests/test_local_asset_manager.py deleted file mode 100644 index 6bca971c..00000000 --- a/src/vibe_agent/tests/test_local_asset_manager.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from tempfile import TemporaryDirectory -from unittest.mock import MagicMock, Mock, patch - -import pytest -import requests - -from vibe_agent.storage.asset_management import LocalFileAssetManager - - -@pytest.fixture -def manager(tmpdir: str): - return LocalFileAssetManager(tmpdir) - - -@patch("os.makedirs") -@patch("shutil.copyfile") -def test_store_add_file(shutil_mock: Mock, makedir_mock: Mock, manager: LocalFileAssetManager): - guid = "123456" - file_path = os.path.join("fake", "file", "path") - manager.exists = MagicMock(return_value=False) - - actual_return = manager.store(guid, file_path) - - target_folder = os.path.join(manager.root_path, guid) - target_file = os.path.join(target_folder, os.path.basename(file_path)) - makedir_mock.assert_called_once_with(target_folder) - shutil_mock.assert_called_once_with(file_path, target_file) - assert actual_return == target_file - - -@patch("os.makedirs") -@patch("shutil.copyfile") -def test_store_exists(shutil_mock: Mock, makedir_mock: Mock, manager: LocalFileAssetManager): - guid = "123456" - file_path = os.path.join("fake", "file", "path") - manager.exists = MagicMock(return_value=True) - return_value = "fake_return_path" - manager.retrieve = MagicMock(return_value=return_value) - - actual_return = manager.store(guid, file_path) - - makedir_mock.assert_not_called() - shutil_mock.assert_not_called() - assert actual_return == return_value - - -def test_remove(manager: LocalFileAssetManager): - guid = "123456" - manager.exists = MagicMock(return_value=True) - - with patch("shutil.rmtree") as shutil_mock: - manager.remove(guid) - shutil_mock.assert_called_once_with(os.path.join(manager.root_path, guid)) - - -@patch("shutil.rmtree") -def test_remove_not_exists(shutil_mock: Mock, manager: LocalFileAssetManager): - guid = "123456" - manager.exists = MagicMock(return_value=False) - - manager.remove(guid) - - shutil_mock.assert_not_called() - - -@patch("os.path.exists") -@patch("os.listdir") -def test_retrieve(listdir_mock: Mock, exists_mock: Mock): - with TemporaryDirectory() as tmp_dir: - guid = "123456" - file_name = os.path.join("fake_file") - manager = LocalFileAssetManager(tmp_dir) - manager.exists = MagicMock(return_value=False) - listdir_mock.return_value = [file_name] - exists_mock.return_value = True - - ret = manager.retrieve(guid) - - listdir_mock.assert_called_once_with(os.path.join(tmp_dir, guid)) - assert ret == os.path.join(tmp_dir, guid, file_name) - - -@patch("os.path.exists") -def test_exists(exists_mock: Mock): - with TemporaryDirectory() as tmp_dir: - guid = "123456" - manager = LocalFileAssetManager(tmp_dir) - manager.exists(guid) - exists_mock.assert_called_once_with(os.path.join(tmp_dir, guid)) - - -@pytest.mark.parametrize("local_file_ref", ["path", "uri"], indirect=True) -def test_store_local(manager: LocalFileAssetManager, local_file_ref: str): - asset_guid = "123456" - assert not manager.exists(asset_guid) - manager.store(asset_guid, local_file_ref) - assert manager.exists(asset_guid) - assert os.path.exists(manager.retrieve(asset_guid)) - - -@pytest.mark.parametrize("non_existing_file", ["local"], indirect=True) -def test_asset_does_not_exist_on_fail(manager: LocalFileAssetManager, non_existing_file: str): - asset_guid = "123456" - assert not manager.exists(asset_guid) - with pytest.raises((FileNotFoundError, requests.exceptions.HTTPError)): - manager.store(asset_guid, non_existing_file) - assert not manager.exists(asset_guid) - with pytest.raises(ValueError): - manager.retrieve(asset_guid) diff --git a/src/vibe_agent/tests/test_storage.py b/src/vibe_agent/tests/test_storage.py deleted file mode 100644 index 273521c8..00000000 --- a/src/vibe_agent/tests/test_storage.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from datetime import datetime, timezone -from typing import Any, Dict -from unittest.mock import MagicMock, patch - -import pytest -from azure.cosmos.exceptions import CosmosHttpResponseError -from pystac import Asset, Item -from shapely import geometry as shpg -from shapely.geometry import Polygon, mapping - -from vibe_agent.storage.remote_storage import CosmosStorage -from vibe_agent.storage.storage import AssetCopyHandler, ItemDict -from vibe_common.schemas import CacheInfo -from vibe_core.data import DataVibe -from vibe_core.data.utils import StacConverter -from vibe_dev.testing.storage_fixtures import * # type: ignore # noqa: F403, F401 - - -@pytest.fixture -def item_dict() -> ItemDict: - num_items = 5 - polygon_coords = [ - (-88.062073563448919, 37.081397673802059), - (-88.026349330507315, 37.085463858128762), - (-88.026349330507315, 37.085463858128762), - (-88.012445388773259, 37.069230099135126), - ] - polygon: Dict[str, Any] = mapping(Polygon(polygon_coords)) # type: ignore - timestamp = datetime.now(timezone.utc) - - def create_item(i: int): - id = str(i) - new_item = Item(id=id, geometry=polygon, datetime=timestamp, properties={}, bbox=None) - asset = Asset(href=os.path.join("/", "fake", id)) - new_item.add_asset(key=id, asset=asset) - - return new_item - - items = [create_item(i) for i in range(num_items)] - - single_item = create_item(num_items) - - output_dict: ItemDict = {"list_input": items, "single_input": single_item} - - return output_dict - - -@patch("vibe_agent.storage.asset_management.AssetManager") -def test_asset_handler_filename(mock_manager: MagicMock, item_dict: ItemDict): - expected_href = "changed!" - mock_manager.store.return_value = expected_href - asset_handler = AssetCopyHandler(mock_manager) - new_items = asset_handler.copy_assets(item_dict) - - for items in new_items.values(): - if isinstance(items, list): - for i in items: - for a in i.get_assets().values(): - assert a.href == expected_href - else: - for a in items.get_assets().values(): - assert a.href == expected_href - - -@patch("vibe_agent.storage.CosmosStorage._store_data") -def test_cosmos_storage_split(mock_handle: MagicMock): - fake_exception = CosmosHttpResponseError(status_code=413) - mock_handle.side_effect = [fake_exception, fake_exception, None] - items = { - "test_data": [ - DataVibe( - id=f"{i}", - time_range=(datetime.utcnow(), datetime.utcnow()), - geometry=shpg.mapping(shpg.box(0, 0, 1, 1)), - assets=[], - ) - for i in range(10) - ] - } - converter = StacConverter() - # `DataVibe` inherits from `BaseVibe` so the below should work fine, but - # pyright/pylance don't like it. - test_items: ItemDict = {k: converter.to_stac_item(v) for k, v in items.items()} # type: ignore - storage = CosmosStorage( - key="", - asset_manager=None, # type: ignore - stac_container_name="", - cosmos_database_name="", - cosmos_url="", - ) - cache_info = CacheInfo("test_op", "1.0", {}, {}) - storage.store("test_run", test_items, cache_info) - assert mock_handle.call_count == 3 - assert len(mock_handle.call_args_list[0].args[2][0]["items"]) == 10 - assert len(mock_handle.call_args_list[1].args[2][0]["items"]) == 5 - assert len(mock_handle.call_args_list[2].args[2][0]["items"]) == 3 diff --git a/src/vibe_agent/tests/test_uri_handling.py b/src/vibe_agent/tests/test_uri_handling.py deleted file mode 100644 index c644824b..00000000 --- a/src/vibe_agent/tests/test_uri_handling.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from pathlib import Path - -import pytest -from azure.storage.blob import ContainerClient - -from vibe_agent.storage.file_upload import upload_to_blob -from vibe_core.uri import is_local, local_uri_to_path, uri_to_filename - - -@pytest.fixture -def filename(local_file: str): - return os.path.basename(local_file) - - -@pytest.mark.parametrize("local_file_ref", ["path", "uri"], indirect=True) -def test_filename_from_local_file(filename: str, local_file_ref: str): - assert is_local(local_file_ref) - assert uri_to_filename(local_file_ref) == filename - - -@pytest.fixture(scope="module") -def remote_file(source_container: ContainerClient, local_file: str): - filename = os.path.basename(local_file) - blob = source_container.get_blob_client(filename) - upload_to_blob(local_file, blob, overwrite=True) - return blob - - -def test_local_uri_to_path(): - abs_path = "/abs/path/to/file" - assert is_local(abs_path) - assert local_uri_to_path(abs_path) == abs_path - assert local_uri_to_path(Path(abs_path).as_uri()) == abs_path - rel_path = "rel/path/to/file" - assert is_local(rel_path) - assert local_uri_to_path(rel_path) == rel_path - abs_from_rel = local_uri_to_path(Path(rel_path).absolute().as_uri()) - assert abs_from_rel == os.path.abspath(rel_path) - assert os.path.relpath(abs_from_rel, ".") == rel_path diff --git a/src/vibe_agent/vibe_agent/__init__.py b/src/vibe_agent/vibe_agent/__init__.py deleted file mode 100644 index b7c52582..00000000 --- a/src/vibe_agent/vibe_agent/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_agent/vibe_agent/agent_config.py b/src/vibe_agent/vibe_agent/agent_config.py deleted file mode 100644 index 3a3f1d69..00000000 --- a/src/vibe_agent/vibe_agent/agent_config.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os - -import debugpy -from hydra_zen import builds - -from vibe_agent.storage.asset_management import BlobAssetManagerConfig -from vibe_agent.storage.local_storage import ( - LocalFileAssetManagerConfig, - LocalStorageConfig, -) -from vibe_agent.storage.remote_storage import CosmosStorageConfig -from vibe_common.constants import ( - DEFAULT_ASSET_PATH, - DEFAULT_CATALOG_PATH, - DEFAULT_SECRET_STORE_NAME, -) -from vibe_common.secret_provider import DaprSecretConfig - - -def setup_debug(activate: bool = False, port: int = 5678): - if not activate: - return - - debugpy.listen(port) - logging.info(f"Debugger enabled and listening on port {port}") - - -DebugConfig = builds(setup_debug, populate_full_signature=True) - -local_storage = LocalStorageConfig( - local_path=DEFAULT_CATALOG_PATH, - asset_manager=LocalFileAssetManagerConfig(DEFAULT_ASSET_PATH), -) - -stac_cosmos_uri = DaprSecretConfig( - store_name=DEFAULT_SECRET_STORE_NAME, - secret_name=os.environ["STAC_COSMOS_URI_SECRET"], - key_name=os.environ["STAC_COSMOS_URI_SECRET"], -) - -stac_cosmos_key = DaprSecretConfig( - store_name=DEFAULT_SECRET_STORE_NAME, - secret_name=os.environ["STAC_COSMOS_CONNECTION_KEY_SECRET"], - key_name=os.environ["STAC_COSMOS_CONNECTION_KEY_SECRET"], -) - -stac_cosmos_db = DaprSecretConfig( - store_name=DEFAULT_SECRET_STORE_NAME, - secret_name=os.environ["STAC_COSMOS_DATABASE_NAME_SECRET"], - key_name=os.environ["STAC_COSMOS_DATABASE_NAME_SECRET"], -) - -stac_cosmos_container = DaprSecretConfig( - store_name=DEFAULT_SECRET_STORE_NAME, - secret_name=os.environ["STAC_CONTAINER_NAME_SECRET"], - key_name=os.environ["STAC_CONTAINER_NAME_SECRET"], -) - -try: - storage_account_url = DaprSecretConfig( - store_name=DEFAULT_SECRET_STORE_NAME, - secret_name=os.environ["BLOB_STORAGE_ACCOUNT_URL"], - key_name=os.environ["BLOB_STORAGE_ACCOUNT_URL"], - ) -except Exception: - storage_account_url = "" - -try: - storage_account_connection_string = DaprSecretConfig( - store_name=DEFAULT_SECRET_STORE_NAME, - secret_name=os.environ["BLOB_STORAGE_ACCOUNT_CONNECTION_STRING"], - key_name=os.environ["BLOB_STORAGE_ACCOUNT_CONNECTION_STRING"], - ) -except Exception: - storage_account_connection_string = "" - - -aks_asset_manager = BlobAssetManagerConfig( - storage_account_url=storage_account_url, - storage_account_connection_string=storage_account_connection_string, - asset_container_name=os.environ["BLOB_CONTAINER_NAME"], - credential=None, - max_upload_concurrency=6, -) - -aks_cosmos_config = CosmosStorageConfig( - key=stac_cosmos_key, - asset_manager=aks_asset_manager, - stac_container_name=stac_cosmos_container, - cosmos_database_name=stac_cosmos_db, - cosmos_url=stac_cosmos_uri, -) diff --git a/src/vibe_agent/vibe_agent/cache.py b/src/vibe_agent/vibe_agent/cache.py deleted file mode 100644 index 7a49ece1..00000000 --- a/src/vibe_agent/vibe_agent/cache.py +++ /dev/null @@ -1,243 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import logging -import os -from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor -from typing import List, Optional, cast - -from cloudevents.sdk.event import v1 -from dapr.conf import settings -from dapr.ext.grpc import App, TopicEventResponse -from hydra_zen import builds -from opentelemetry import trace - -from vibe_common.constants import CACHE_PUBSUB_TOPIC, CONTROL_STATUS_PUBSUB, STATUS_PUBSUB_TOPIC -from vibe_common.dapr import dapr_ready -from vibe_common.messaging import ( - ExecuteRequestContent, - ExecuteRequestMessage, - WorkMessage, - WorkMessageBuilder, - accept_or_fail_event, - event_to_work_message, - extract_message_header_from_event, - send, -) -from vibe_common.schemas import CacheInfo, OperationSpec, OpRunId -from vibe_common.telemetry import ( - add_span_attributes, - add_trace, - get_current_trace_parent, - setup_telemetry, - update_telemetry_context, -) -from vibe_core.data.core_types import OpIOType -from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging - -from .cache_metadata_store_client import CacheMetadataStoreClient -from .ops import OperationDependencyResolver -from .ops_helper import OpIOConverter -from .storage.storage import Storage, StorageConfig -from .worker import WorkerMessenger - - -def get_cache_info( - dependency_resolver: OperationDependencyResolver, - input_items: OpIOType, - op_config: OperationSpec, - traceparent: str, -) -> CacheInfo: - # We need traceparent here as abstract event loop mess up the opentelemetry context - update_telemetry_context(traceparent) - - with trace.get_tracer(__name__).start_as_current_span("get_cache_info"): - dependencies = dependency_resolver.resolve(op_config) - stac = OpIOConverter.deserialize_input(input_items) - cache_info = CacheInfo(op_config.name, op_config.version, stac, dependencies) - return cache_info - - -class Cache: - pubsubname: str - pre_control_topic: str - otel_service_name: str - - def __init__( - self, - storage: Storage, - port: int = settings.GRPC_APP_PORT, - pubsubname: str = CONTROL_STATUS_PUBSUB, - cache_topic: str = CACHE_PUBSUB_TOPIC, - status_topic: str = STATUS_PUBSUB_TOPIC, - logdir: Optional[str] = None, - max_log_file_bytes: int = MAX_LOG_FILE_BYTES, - log_backup_count: int = LOG_BACKUP_COUNT, - loglevel: Optional[str] = None, - otel_service_name: str = "", - running_on_azure: bool = False, - ): - self.storage = storage - self.pubsubname = pubsubname - self.cache_topic = cache_topic - self.port = port - self.dependency_resolver = OperationDependencyResolver() - self.messenger = WorkerMessenger(pubsubname, status_topic) - self.metadata_store = CacheMetadataStoreClient() - self.logdir = logdir - self.loglevel = loglevel - self.otel_service_name = otel_service_name - self.max_log_file_bytes = max_log_file_bytes - self.log_backup_count = log_backup_count - self.executor = ThreadPoolExecutor() if running_on_azure else ProcessPoolExecutor() - self.running_on_azure = running_on_azure - logging.debug(f"Running on azure? {self.running_on_azure}") - logging.debug(f"Pool type: {type(self.executor)}") - - def retrieve_possible_output( - self, cache_info: CacheInfo, exec: Executor, traceparent: str - ) -> Optional[OpIOType]: - possible_output = self.storage.retrieve_output_from_input_if_exists(cache_info) - # We need traceparent here as abstract event loop mess up the opentelemetry context - update_telemetry_context(traceparent) - - with trace.get_tracer(__name__).start_as_current_span("retrieve_possible_output"): - if possible_output: - logging.info(f"Cache hit with hash {cache_info.hash} in op {cache_info.name}") - return OpIOConverter.serialize_output(possible_output) - logging.info(f"Cache miss with hash {cache_info.hash} in op {cache_info.name}") - return None - - @add_trace - def run_new_op(self, message: WorkMessage): - content = cast(ExecuteRequestContent, message.content) - add_span_attributes({"op_name": str(content.operation_spec.name)}) - send( - message, - self.__class__.__name__.lower(), - self.pubsubname, - content.operation_spec.image_name, - ) - - msg = ( - f"Sending new operation to worker. " - f"Op: {content.operation_spec.name}, " - f"Params: {content.operation_spec.parameters}, " - f"Input: {content.operation_spec.inputs_spec}" - ) - - logging.info(msg) - - def fetch_work(self, event: v1.Event) -> TopicEventResponse: - @add_trace - def success_callback(message: WorkMessage) -> TopicEventResponse: - add_span_attributes({"run_id": str(message.header.run_id)}) - content = cast(ExecuteRequestContent, message.content) - op_config = cast(OperationSpec, content.operation_spec) - recursion_msg = f"Recursion error for op {op_config.name} - restarting pod." - try: - try: - cache_info = get_cache_info( - self.dependency_resolver, - content.input, - op_config, - get_current_trace_parent(), - ) - except RecursionError as e: - logging.error(f"{recursion_msg} {e}") - os._exit(1) - except Exception as e: - raise RuntimeError( - f"Failed to get cache info for op {op_config.name} with exception " - f"{type(e)}:{e}" - ) from e - possible_output = self.retrieve_possible_output( - cache_info, self.executor, get_current_trace_parent() - ) - - async def async_closure(): - if possible_output is not None: - await self.metadata_store.add_refs( - str(message.run_id), - OpRunId(name=cache_info.name, hash=cache_info.hash), - possible_output, - ) - logging.info(f"Cache hit for op {op_config.name}") - await self.messenger.send_ack_reply(message) - await self.messenger.send_success_reply( - message, possible_output, cache_info - ) - else: - self.run_new_op( - WorkMessageBuilder.add_cache_info_to_execute_request( - cast(ExecuteRequestMessage, message), cache_info - ) - ) - - asyncio.run(async_closure()) - except RecursionError as e: - logging.error(f"{recursion_msg} {e}") - os._exit(1) - - logging.debug(f"Removing message for run_id {message.header.run_id} from queue") - return TopicEventResponse("success") - - @add_trace - def failure_callback(event: v1.Event, e: Exception, tb: List[str]) -> TopicEventResponse: - message = event_to_work_message(event) - content = cast(ExecuteRequestContent, message.content) - op_config = cast(OperationSpec, content.operation_spec) - log_text = f"Failure callback for op {op_config.name}, Exception {e}, Traceback {tb}" - logging.info(log_text) - # Send failure reply to orchestrator so we don't get our workflow stuck - asyncio.run(self.messenger.send_failure_reply(event.id, e, tb)) - return TopicEventResponse("drop") - - update_telemetry_context(extract_message_header_from_event(event).current_trace_parent) - - with trace.get_tracer(__name__).start_as_current_span("fetch_work"): - return accept_or_fail_event(event, success_callback, failure_callback) # type: ignore - - def run(self): - self.app = App() - - appname = f"terravibes-{self.__class__.__name__.lower()}" - configure_logging( - default_level=self.loglevel, - appname=appname, - logdir=self.logdir, - max_log_file_bytes=self.max_log_file_bytes, - log_backup_count=self.log_backup_count, - logfile=f"{appname}.log", - ) - - if self.otel_service_name: - setup_telemetry(appname, self.otel_service_name) - - @self.app.subscribe(self.pubsubname, self.cache_topic) - def fetch_work(event: v1.Event) -> TopicEventResponse: - return self.fetch_work(event) - - self.start_service() - - @dapr_ready - def start_service(self): - logging.info(f"Starting cache listening on port {self.port}") - self.app.run(self.port) - - -CacheConfig = builds( - Cache, - storage=StorageConfig, - port=settings.GRPC_APP_PORT, - pubsubname=CONTROL_STATUS_PUBSUB, - cache_topic=CACHE_PUBSUB_TOPIC, - status_topic=STATUS_PUBSUB_TOPIC, - logdir=None, - max_log_file_bytes=MAX_LOG_FILE_BYTES, - log_backup_count=LOG_BACKUP_COUNT, - loglevel=None, - otel_service_name="", - running_on_azure=False, -) diff --git a/src/vibe_agent/vibe_agent/cache_metadata_store.py b/src/vibe_agent/vibe_agent/cache_metadata_store.py deleted file mode 100644 index cf8565ca..00000000 --- a/src/vibe_agent/vibe_agent/cache_metadata_store.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from typing import Dict, Protocol, Set - -from hydra_zen import builds -from redis.asyncio import Redis -from redis.asyncio.retry import Retry as RedisRetry -from redis.backoff import DEFAULT_BASE, DEFAULT_CAP, ExponentialBackoff # type: ignore -from redis.exceptions import BusyLoadingError, ConnectionError, TimeoutError - -from vibe_common.schemas import OpRunId -from vibe_common.secret_provider import retrieve_dapr_secret - - -class CacheMetadataStoreProtocol(Protocol): - """ - Protocol for a cache metadata store. This store is used to store and retrieve metadata about - the relationships of the data (i.e. workflow runs, operation runs and assets) in the cache. - """ - - async def store_references(self, run_id: str, op_run_id: OpRunId, assets: Set[str]) -> None: ... - - async def get_run_ops(self, run_id: str) -> Set[OpRunId]: ... - - async def get_op_workflow_runs(self, op_ref: OpRunId) -> Set[str]: ... - - async def get_op_assets(self, op_ref: OpRunId) -> Set[str]: ... - - async def get_assets_refs(self, asset_ids: Set[str]) -> Dict[str, Set[OpRunId]]: ... - - async def remove_workflow_op_refs( - self, workflow_run_id: str, op_run_ref: OpRunId - ) -> Set[str]: ... - - async def remove_op_asset_refs(self, op_run_ref: OpRunId, asset_ids: Set[str]) -> None: ... - - -class RedisCacheMetadataStore(CacheMetadataStoreProtocol): - """ - Redis implementation of the cache metadata store. - """ - - # TODO: pass redis service name, namespace, and port through Terraform... - _redis_host = "redis-master.default.svc.cluster.local" - _redis_port = 6379 - _key_delimiter = ":" - _run_ops_key_format = "run:{run_id}:ops" - _op_runs_key_format = "op:{op_name}:{op_hash}:runs" - _op_assets_key_format = "op:{op_name}:{op_hash}:assets" - _asset_ops_key_format = "asset:{asset_id}:ops" - _op_ref_format = "{op_name}:{op_hash}" - - def __init__(self): - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.redis_password = retrieve_dapr_secret("kubernetes", "redis", "redis-password") - - async def _get_redis_client(self): - self.logger.debug( - f"Creating Redis client with host {self._redis_host} and port {self._redis_port}" - ) - retry = RedisRetry(ExponentialBackoff(cap=DEFAULT_CAP, base=DEFAULT_BASE), 3) - redis_client = Redis( - host=self._redis_host, - port=self._redis_port, - db=0, - password=self.redis_password, - decode_responses=True, - retry=retry, - retry_on_error=[ConnectionError, TimeoutError, BusyLoadingError], - ) # type: ignore - response = await redis_client.ping() - self.logger.debug(f"Created redis client - ping response: {response}") - return redis_client - - def _op_run_id_to_op_ref_str(self, op_run_id: OpRunId) -> str: - return self._op_ref_format.format(op_name=op_run_id.name, op_hash=op_run_id.hash) - - def _str_to_op_run_id(self, op_run_ref_str: str) -> OpRunId: - op_name, op_hash = op_run_ref_str.split(self._key_delimiter) - return OpRunId(name=op_name, hash=op_hash) - - async def store_references(self, run_id: str, op_run_id: OpRunId, assets: Set[str]) -> None: - # TODO: is a new client needed for every operation or can we intiate in init and reuse? - redis_client = await self._get_redis_client() - - try: - pipe = redis_client.pipeline(transaction=True) - - run_ops_key = self._run_ops_key_format.format(run_id=run_id) - op_ref = self._op_run_id_to_op_ref_str(op_run_id) - pipe.sadd(run_ops_key, op_ref) - - op_runs_key = self._op_runs_key_format.format( - op_name=op_run_id.name, op_hash=op_run_id.hash - ) - pipe.sadd(op_runs_key, run_id) - - if assets: - op_assets_key = self._op_assets_key_format.format( - op_name=op_run_id.name, op_hash=op_run_id.hash - ) - pipe.sadd(op_assets_key, *assets) - - for asset_id in assets: - asset_ops_key = self._asset_ops_key_format.format(asset_id=asset_id) - pipe.sadd(asset_ops_key, op_ref) - - await pipe.execute() - self.logger.debug( - f"Transaction complete for storing references for run id {run_id} " - f"(op name {op_run_id.name}, op hash {op_run_id.hash})." - ) - finally: - await redis_client.close() - - async def get_run_ops(self, run_id: str) -> Set[OpRunId]: - """ - Given a workflow run_id, return the set of op run references associated with that workflow - run as strings in the format "{op_name}:{op_hash}". - - :param run_id: The workflow run id - - :return: The set of op runs associated with the workflow run in the format - "{op_name}:{op_hash}" - """ - redis_client = await self._get_redis_client() - try: - run_ops_key = self._run_ops_key_format.format(run_id=run_id) - run_ops = await redis_client.smembers(run_ops_key) - return {self._str_to_op_run_id(o) for o in run_ops} - finally: - await redis_client.close() - - async def get_op_workflow_runs(self, op_run_id: OpRunId) -> Set[str]: - """ - Given an op run reference, return the set of workflow run ids associated with the op run. - - :param op_ref: The op run reference - - :return: The set of workflow run ids associated with the op run - """ - redis_client = await self._get_redis_client() - try: - op_runs_key = self._op_runs_key_format.format( - op_name=op_run_id.name, op_hash=op_run_id.hash - ) - return await redis_client.smembers(op_runs_key) - finally: - await redis_client.close() - - async def get_op_assets(self, op_ref: OpRunId) -> Set[str]: - """ - Given an op run reference, return the set of asset ids associated with the op run. - - :param op_ref: The op run reference - - :return: The set of asset ids associated with the op run - """ - redis_client = await self._get_redis_client() - try: - op_assets_key = self._op_assets_key_format.format( - op_name=op_ref.name, op_hash=op_ref.hash - ) - return await redis_client.smembers(op_assets_key) - finally: - await redis_client.close() - - async def get_assets_refs(self, asset_ids: Set[str]) -> Dict[str, Set[OpRunId]]: - """ - Given a list of asset ids, return the set of op run references associated with each asset. - - :param op_ref: The list of asset ids - - :return: A dictionary mapping asset ids to the set of op run references associated with - each asset - """ - redis_client = await self._get_redis_client() - - try: - pipe = redis_client.pipeline(transaction=False) - asset_ids_list = list(asset_ids) - - for asset_id in asset_ids_list: - asset_ops_key = self._asset_ops_key_format.format(asset_id=asset_id) - pipe.smembers(asset_ops_key) - - assets_smembers_result = await pipe.execute() - - results = {} - - for asset_id, asset_smembers in zip(asset_ids_list, assets_smembers_result): - results[asset_id] = [self._str_to_op_run_id(o) for o in asset_smembers] - - return results - finally: - await redis_client.close() - - async def remove_workflow_op_refs(self, workflow_run_id: str, op_run_ref: OpRunId) -> None: - """ - Removes the references between a workflow run and op run. - - :param workflow_run_id: The workflow run id - :param op_ref: The op run reference - """ - redis_client = await self._get_redis_client() - try: - pipe = redis_client.pipeline(transaction=True) - run_ops_key = self._run_ops_key_format.format(run_id=workflow_run_id) - op_ref = self._op_ref_format.format(op_name=op_run_ref.name, op_hash=op_run_ref.hash) - pipe.srem(run_ops_key, op_ref) - - op_runs_key = self._op_runs_key_format.format( - op_name=op_run_ref.name, op_hash=op_run_ref.hash - ) - pipe.srem(op_runs_key, workflow_run_id) - - await pipe.execute() - # TODO: check response for number of members removed and emit warning if not 1 - finally: - await redis_client.close() - - async def remove_op_asset_refs(self, op_run_id: OpRunId, asset_ids: Set[str]) -> None: - # TODO: the following commands could likely be more efficiently performed by invoking a Lua - # script that retrieves the op run, iterates through all of the assets ids and removes the - # asset --> op references and then deletes the op key as well - redis_client = await self._get_redis_client() - try: - pipe = redis_client.pipeline(transaction=True) - op_assets_key = self._op_assets_key_format.format( - op_name=op_run_id.name, op_hash=op_run_id.hash - ) - - for asset_id in asset_ids: - pipe.srem(op_assets_key, asset_id) - - asset_ops_key = self._asset_ops_key_format.format(asset_id=asset_id) - op_run_ref = self._op_ref_format.format( - op_name=op_run_id.name, op_hash=op_run_id.hash - ) - pipe.srem(asset_ops_key, op_run_ref) - - await pipe.execute() - # TODO: check response for number removed and emit warning if doesn't make sense - finally: - await redis_client.close() - - -CacheMetadataStoreProtocolConfig = builds( - CacheMetadataStoreProtocol, -) - -RedisCacheMetadataStoreConfig = builds( - RedisCacheMetadataStore, - builds_bases=(CacheMetadataStoreProtocolConfig,), - # config={"redis_url": getenv("REDIS_URL", "redis://localhost:6379")} -) diff --git a/src/vibe_agent/vibe_agent/cache_metadata_store_client.py b/src/vibe_agent/vibe_agent/cache_metadata_store_client.py deleted file mode 100644 index 9dca0ec2..00000000 --- a/src/vibe_agent/vibe_agent/cache_metadata_store_client.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging - -from vibe_common.constants import DATA_OPS_INVOKE_URL_TEMPLATE -from vibe_common.schemas import OpRunId, OpRunIdDict -from vibe_common.telemetry import get_current_trace_parent -from vibe_common.vibe_dapr_client import VibeDaprClient -from vibe_core.data.core_types import OpIOType - - -class CacheMetadataStoreClient: - def __init__(self): - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.logger.debug("Initializing CacheMetadataStoreClient") - self.vibe_dapr_client = VibeDaprClient() - - async def add_refs( - self, - run_id: str, - op_run_id: OpRunId, - output: OpIOType, - ) -> None: - self.logger.debug( - f"Adding refs for run {run_id} with op name = {op_run_id.name} " - f"op hash = {op_run_id.hash}" - ) - - # Under load, Pydantic is having issues serializing the OpRunId dataclass object - op_run_id_dict = OpRunIdDict(name=op_run_id.name, hash=op_run_id.hash) - response = await self.vibe_dapr_client.post( - url=DATA_OPS_INVOKE_URL_TEMPLATE.format("add_refs", run_id), - data={ - "op_run_id_dict": self.vibe_dapr_client.obj_json(op_run_id_dict), - "output": self.vibe_dapr_client.obj_json(output), - }, - traceparent=get_current_trace_parent(), - ) - - assert response.ok, "Failed to add refs, but underlying method didn't capture it" diff --git a/src/vibe_agent/vibe_agent/data_ops.py b/src/vibe_agent/vibe_agent/data_ops.py deleted file mode 100644 index 789615ff..00000000 --- a/src/vibe_agent/vibe_agent/data_ops.py +++ /dev/null @@ -1,362 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import logging -from typing import List, Optional, Set, cast - -from aiorwlock import RWLock -from cloudevents.sdk.event import v1 -from dapr.conf import settings -from fastapi import Request -from hydra_zen import builds -from opentelemetry import trace - -from vibe_agent.cache_metadata_store import ( - CacheMetadataStoreProtocol, - CacheMetadataStoreProtocolConfig, -) -from vibe_agent.storage.storage import Storage, StorageConfig -from vibe_common.constants import ( - CONTROL_STATUS_PUBSUB, - STATUS_PUBSUB_TOPIC, - TRACEPARENT_HEADER_KEY, - WORKFLOW_REQUEST_PUBSUB_TOPIC, -) -from vibe_common.dapr import dapr_ready -from vibe_common.dropdapr import App, TopicEventResponse, TopicEventResponseStatus -from vibe_common.messaging import ( - ExecuteReplyContent, - MessageType, - WorkMessage, - accept_or_fail_event_async, - extract_message_header_from_event, - run_id_from_traceparent, -) -from vibe_common.schemas import OpRunId, OpRunIdDict -from vibe_common.statestore import StateStore -from vibe_common.telemetry import add_trace, setup_telemetry, update_telemetry_context -from vibe_core.data.core_types import OpIOType -from vibe_core.datamodel import RunConfig, RunStatus -from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging -from vibe_core.utils import ensure_list - - -class DataOpsManager: - """ - The DataOpsManager is responsible for managing metadata about the system's cached data and - coordinating data operations. - - Assumptions this code makes: - - Once a workflow run is complete, its metadata (i.e. `RunConfig` in StateStore) and cached - data in Storage is immutable outside of the DataOpsManager. - - Once a op run is complete its cached data (i.e. metadata/catalog) and assets in Storage - are immutable. - - Notes about locks: - - The way metadata_store_lock essentially serializes all requests to the metadata store - whether they be add ref or delete ref requests. To make this more efficient in the future, - we can create a lock that allows many add ref requests to go through at a time but only one - delete ref request to execute at a time. - """ - - app: App - metadata_store_lock: RWLock - otel_service_name: str - statestore_lock: asyncio.Lock - - user_deletion_reason = "Deletion requested by user" - - def __init__( - self, - storage: Storage, - metadata_store: CacheMetadataStoreProtocol, - pubsubname: str = CONTROL_STATUS_PUBSUB, - status_topic: str = STATUS_PUBSUB_TOPIC, - delete_workflow_topic: str = WORKFLOW_REQUEST_PUBSUB_TOPIC, - port: int = settings.HTTP_APP_PORT, - logdir: Optional[str] = None, - max_log_file_bytes: int = MAX_LOG_FILE_BYTES, - log_backup_count: int = LOG_BACKUP_COUNT, - loglevel: Optional[str] = None, - otel_service_name: str = "", - ): - self.app = App() - self.port = port - self.pubsubname = pubsubname - self.status_topic = status_topic - self.delete_workflow_topic = delete_workflow_topic - self.storage = storage - self.metadata_store = metadata_store - self.statestore = StateStore() - self.logdir = logdir - self.max_log_file_bytes = max_log_file_bytes - self.log_backup_count = log_backup_count - self.loglevel = loglevel - self.otel_service_name = otel_service_name - - self._setup_routes() - - def _init_locks(self): - logging.debug("Creating locks") - self.metadata_store_lock = RWLock(fast=True) - self.statestore_lock = asyncio.Lock() - - def _setup_routes(self): - @self.app.startup() - def startup(): - # locks have to be be created on the app's (uvicorn's) event loop - self._init_locks() - - @self.app.subscribe_async(self.pubsubname, self.status_topic) - async def fetch_work(event: v1.Event) -> TopicEventResponse: - return await self.fetch_work(self.status_topic, event) - - @self.app.subscribe_async(self.pubsubname, self.delete_workflow_topic) - async def manage_workflow(event: v1.Event): - await self.handle_workflow_event(self.delete_workflow_topic, event) - - @self.app.method(name="add_refs/{run_id}") - async def add_refs( - request: Request, run_id: str, op_run_id_dict: OpRunIdDict, output: OpIOType - ) -> TopicEventResponse: - try: - traceparent = request.headers.get(TRACEPARENT_HEADER_KEY) - if traceparent: - update_telemetry_context(traceparent) - else: - logging.warning("No traceparent found in request headers.") - - with trace.get_tracer(__name__).start_as_current_span("add_refs"): - await self.add_references(run_id, OpRunId(**op_run_id_dict), output) - return TopicEventResponseStatus.success - except Exception as e: - logging.error( - f"Error adding references from service invocation for run id {run_id}: {e}" - ) - return TopicEventResponseStatus.drop - - async def fetch_work(self, channel: str, event: v1.Event) -> TopicEventResponse: - @add_trace - async def success_callback(message: WorkMessage) -> TopicEventResponse: - if not message.is_valid_for_channel(channel): - logging.warning( - f"Received invalid message {message} for channel {channel}. Dropping it." - ) - return TopicEventResponseStatus.drop - - if message.header.type == MessageType.execute_reply: - content = cast(ExecuteReplyContent, message.content) - logging.debug( - f"Received execute reply for run id {message.run_id} " - f"(op name {content.cache_info.name}, op hash {content.cache_info.hash})." - ) - - run_id = str(message.run_id) - op_run_id = OpRunId(content.cache_info.name, content.cache_info.hash) - await self.add_references(run_id, op_run_id, content.output) - - return TopicEventResponseStatus.success - - @add_trace - async def failure_callback( - event: v1.Event, e: Exception, traceback: List[str] - ) -> TopicEventResponse: - run_id = str(run_id_from_traceparent(event.id)) - logging.error(f"Failed to add references for run id {run_id}: {e}") - return TopicEventResponseStatus.drop - - update_telemetry_context(extract_message_header_from_event(event).current_trace_parent) - with trace.get_tracer(__name__).start_as_current_span("fetch_work"): - return await accept_or_fail_event_async(event, success_callback, failure_callback) - - async def handle_workflow_event(self, channel: str, event: v1.Event): - async def success_callback(message: WorkMessage) -> TopicEventResponse: - if not message.is_valid_for_channel(channel): - logging.warning( - f"Received invalid message {message} for channel {channel}. Dropping it." - ) - return TopicEventResponseStatus.drop - - if message.header.type == MessageType.workflow_deletion_request: - logging.debug(f"Received deletion request for run id {message.run_id}.") - - run_id = str(message.run_id) - await self.delete_workflow_run(run_id) - - return TopicEventResponseStatus.success - - async def failure_callback( - event: v1.Event, e: Exception, traceback: List[str] - ) -> TopicEventResponse: - run_id = str(run_id_from_traceparent(event.id)) - logging.error(f"Failed to delete run id {run_id}: {e}") - return TopicEventResponseStatus.drop - - return await accept_or_fail_event_async(event, success_callback, failure_callback) - - def get_asset_ids(self, output: OpIOType) -> Set[str]: - """ - Given op output as a OpIOTypes, returns the set of asset ids that are referenced in the - output. - - :param output: The op output as OpIOType - - :return: The set of asset ids referenced in the output - """ - # TODO: this should probably be moved into vibe_core.utils - asset_ids: Set[str] = set() - for output_item in output.values(): - output_item_list = ensure_list(output_item) - for i in output_item_list: - asset_ids.update(i["assets"].keys()) - return asset_ids - - async def add_references(self, run_id: str, op_run_id: OpRunId, output: OpIOType) -> None: - # many requests to add references can be processed simultaneously assuming Redis SADD used - async with self.metadata_store_lock.reader_lock: - try: - asset_ids = self.get_asset_ids(output) - await self.metadata_store.store_references(run_id, op_run_id, asset_ids) - logging.info( - f"Successfully added references for run id {run_id} " - f"(op name {op_run_id.name}, op hash {op_run_id.hash})." - ) - except Exception: - logging.exception( - f"Failed to add references for run id {run_id} " - f"(op name {op_run_id.name}, op hash {op_run_id.hash})." - ) - raise - - def _can_delete(self, run_config: RunConfig) -> bool: - can_delete = RunStatus.finished(run_config.details.status) - - if not can_delete: - if run_config.details.status == RunStatus.deleting: - logging.warning( - f"Run {run_config.id} is already being deleted. Will not process request." - ) - elif run_config.details.status == RunStatus.deleted: - logging.warning( - f"Run {run_config.id} has already been deleted. Will not process request." - ) - else: - logging.warning( - f"Cannot delete run {run_config.id} with status {run_config.details.status}." - ) - - return can_delete - - async def _init_delete(self, run_id: str) -> bool: - async with self.statestore_lock: # type: ignore - # Using an async lock to ensure two deletion requests for the same workflow run don't - # get processed at the same time. - # The data ops manager will only delete a workflow if it is in a finished status. - # The assumption is once the workflow is finished, the RunConfig will not change in the - # statestore (i.e. the status will not change) outside of the Data Ops Manager so it is - # sufficient to use asyncio lock in the Data Ops manager. - run_data = await self.statestore.retrieve(str(run_id)) - run_config = RunConfig(**run_data) - - if not self._can_delete(run_config): - return False - - run_config.details.status = RunStatus.deleting - run_config.details.reason = self.user_deletion_reason - await self.statestore.store(run_id, run_config) - return True - - async def _finalize_delete(self, run_id: str) -> None: - async with self.statestore_lock: # type: ignore - run_data = await self.statestore.retrieve(str(run_id)) - run_config = RunConfig(**run_data) - run_config.details.status = RunStatus.deleted - run_config.set_output({}) - await self.statestore.store(run_id, run_config) - - async def delete_op_run(self, op_run: OpRunId) -> None: - # TODO: the following two calls may be able to be combined into one call to a Lua script - # (need to learn more about Lua scripts) - op_asset_ids = await self.metadata_store.get_op_assets(op_run) - assets_to_ops = await self.metadata_store.get_assets_refs(op_asset_ids) - - for asset_id in op_asset_ids: - asset_ops = assets_to_ops[asset_id] - - if op_run not in asset_ops: - logging.warning( - f"Inconsistent state in metadata store: asset {asset_id} does not contain " - f"reference to {op_run}." - ) - continue - - if len(asset_ops) == 1: - # TODO: aiofiles or ?? - logging.debug(f"Removing asset {asset_id} from storage.") - self.storage.asset_manager.remove(asset_id) - - # TODO: aiofiles or ?? - logging.debug(f"Removing op run catalog {op_run} from storage.") - self.storage.remove(op_run) - await self.metadata_store.remove_op_asset_refs(op_run, op_asset_ids) - - async def delete_workflow_run(self, run_id: str) -> bool: - if not await self._init_delete(run_id): - return False - - op_runs = await self.metadata_store.get_run_ops(run_id) - - for op_run in op_runs: - # (re)grabbing write lock for each op so as not to starve other requests due to delete - async with self.metadata_store_lock.writer_lock: - op_wf_run_ids = await self.metadata_store.get_op_workflow_runs(op_run) - - if run_id not in op_wf_run_ids: - logging.warning( - f"Inconsistent state in metadata store: op {op_run} does not contain " - f"reference to workflow run {run_id}." - ) - elif len(op_wf_run_ids) == 1: - await self.delete_op_run(op_run) - - await self.metadata_store.remove_workflow_op_refs(run_id, op_run) - - await self._finalize_delete(run_id) - return True - - async def run(self): - appname = "terravibes-data-ops" - configure_logging( - default_level=self.loglevel, - appname=appname, - logdir=self.logdir, - max_log_file_bytes=self.max_log_file_bytes, - log_backup_count=self.log_backup_count, - logfile=f"{appname}.log", - ) - - if self.otel_service_name: - setup_telemetry(appname, self.otel_service_name) - - await self.start_service() - - @dapr_ready - async def start_service(self): - logging.info(f"Starting data ops manager listening on port {self.port}") - await self.app.run_async(self.port) - - -DataOpsConfig = builds( - DataOpsManager, - port=settings.GRPC_APP_PORT, - pubsubname=CONTROL_STATUS_PUBSUB, - status_topic=STATUS_PUBSUB_TOPIC, - metadata_store=CacheMetadataStoreProtocolConfig, - storage=StorageConfig, - logdir=None, - max_log_file_bytes=MAX_LOG_FILE_BYTES, - log_backup_count=LOG_BACKUP_COUNT, - loglevel=None, - otel_service_name="", -) diff --git a/src/vibe_agent/vibe_agent/launch_cache.py b/src/vibe_agent/vibe_agent/launch_cache.py deleted file mode 100644 index 3336b546..00000000 --- a/src/vibe_agent/vibe_agent/launch_cache.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -from typing import Any - -import hydra -from hydra.core.config_store import ConfigStore -from hydra_zen import instantiate, make_config - -from vibe_agent.agent_config import DebugConfig, aks_cosmos_config, local_storage -from vibe_agent.cache import CacheConfig - -local_cache = CacheConfig(storage=local_storage, running_on_azure=False) -aks_cache = CacheConfig(storage=aks_cosmos_config, running_on_azure=True) - -LocalCacheConfig = make_config(impl=local_cache) -AksCacheConfig = make_config(impl=aks_cache) - -CacheLaunchConfig = make_config( - "cache", - debug=DebugConfig(), - hydra_defaults=["_self_", {"cache": "local"}], -) - - -# Register cache config with hydra's config store -cs = ConfigStore.instance() -cs.store(group="cache", name="local", node=LocalCacheConfig()) -cs.store(group="cache", name="aks", node=AksCacheConfig()) -cs.store(name="vibe_cache", node=CacheLaunchConfig) - - -@hydra.main(config_path=None, version_base=None, config_name="vibe_cache") -def main(cfg: Any): - cache_obj = instantiate(cfg) - asyncio.run(cache_obj.cache.impl.run()) diff --git a/src/vibe_agent/vibe_agent/launch_data_ops.py b/src/vibe_agent/vibe_agent/launch_data_ops.py deleted file mode 100644 index 7d126cad..00000000 --- a/src/vibe_agent/vibe_agent/launch_data_ops.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -from typing import Any - -import hydra -from hydra.core.config_store import ConfigStore -from hydra_zen import instantiate, make_config - -from vibe_agent.agent_config import DebugConfig, aks_cosmos_config, local_storage -from vibe_agent.cache_metadata_store import RedisCacheMetadataStoreConfig -from vibe_agent.data_ops import DataOpsConfig - -# Create instiatiatable configs for CacheMetadataStoreProtocol -redis_cache_metadata_store_config = RedisCacheMetadataStoreConfig() - -# create two DataOpsConfigs: one to build DataOpsManager with local storage and another for -# to build DataOpsManager with AKS/Cosmos storage -local_data_ops_config = DataOpsConfig( - metadata_store=redis_cache_metadata_store_config, storage=local_storage -) -aks_data_ops_config = DataOpsConfig( - metadata_store=redis_cache_metadata_store_config, storage=aks_cosmos_config -) - -# two configs each with one field, impl, one set to the DataOpsConfig for local storage, the -# other for AKS/Cosmos -LocalDataOpsConfig = make_config(impl=local_data_ops_config) -AksDataOpsConfig = make_config(impl=aks_data_ops_config) - -# launching the data ops service has two parts that need to be configured: -# 1. whether or not we are debugging the service -# 2. should the DataOpsManager be referencing local storage or a AKS/Cosmos storage -# - by default, it will use the "local" entry in the "data_ops" group in the ConfigStore as the -# default config for the data_ops field -DataOpsLaunchConfig = make_config( - "data_ops", - debug=DebugConfig(), - hydra_defaults=["_self_", {"data_ops": "local"}], -) - -# Register configs config with hydra's config store -cs = ConfigStore.instance() -cs.store(group="data_ops", name="local", node=LocalDataOpsConfig) -cs.store(group="data_ops", name="aks", node=AksDataOpsConfig) -cs.store(name="vibe_data_ops", node=DataOpsLaunchConfig) - - -# The @hydra_main decorator in Hydra resolves all missing configurations from the top-level -# configuration using entries in the config store. If a configuration value is missing, Hydra -# will search the config store for a matching key and use the value stored in the config store -# if one is found. -@hydra.main(config_path=None, version_base=None, config_name="vibe_data_ops") -def main(cfg: Any): - data_ops_launch_config_obj = instantiate(cfg) - asyncio.run(data_ops_launch_config_obj.data_ops.impl.run()) diff --git a/src/vibe_agent/vibe_agent/launch_worker.py b/src/vibe_agent/vibe_agent/launch_worker.py deleted file mode 100644 index 54ad3385..00000000 --- a/src/vibe_agent/vibe_agent/launch_worker.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import signal -from multiprocessing import set_start_method -from typing import Any - -import hydra -from hydra.core.config_store import ConfigStore -from hydra_zen import instantiate, make_config - -from vibe_agent.agent_config import DebugConfig, aks_cosmos_config, local_storage -from vibe_agent.ops import OperationFactoryConfig -from vibe_common.secret_provider import DaprSecretProviderConfig - -from .worker import WorkerConfig - -local_worker = WorkerConfig( - factory_spec=OperationFactoryConfig(local_storage, DaprSecretProviderConfig()), -) -aks_worker = WorkerConfig( - factory_spec=OperationFactoryConfig(aks_cosmos_config, DaprSecretProviderConfig()), -) - -LocalWorkerConfig = make_config(impl=local_worker) -AksWorkerConfig = make_config(impl=aks_worker) - -WorkerLaunchConfig = make_config( - "worker", - debug=DebugConfig(), - hydra_defaults=["_self_", {"worker": "local"}], -) - -cs = ConfigStore.instance() -cs.store(group="worker", name="local", node=LocalWorkerConfig()) -cs.store(group="worker", name="aks", node=AksWorkerConfig()) -cs.store(name="vibe_worker", node=WorkerLaunchConfig) - - -@hydra.main(config_path=None, version_base=None, config_name="vibe_worker") -def main(cfg: Any): - set_start_method("forkserver") - worker_obj = instantiate(cfg) - signal.signal(signal.SIGTERM, worker_obj.worker.impl.pre_stop_hook) - asyncio.run(worker_obj.worker.impl.run()) diff --git a/src/vibe_agent/vibe_agent/ops.py b/src/vibe_agent/vibe_agent/ops.py deleted file mode 100644 index 3be691e4..00000000 --- a/src/vibe_agent/vibe_agent/ops.py +++ /dev/null @@ -1,240 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import importlib.util -import inspect -import logging -import os -from importlib.abc import Loader -from typing import Any, Callable, Dict, List, Optional, Union - -from azure.cosmos.exceptions import CosmosResourceExistsError -from hydra_zen import builds - -from vibe_agent.ops_helper import OpIOConverter -from vibe_agent.storage.local_storage import LocalResourceExistsError -from vibe_common.schemas import ( - CacheInfo, - EntryPointDict, - ItemDict, - OperationParser, - OperationSpec, - OpResolvedDependencies, -) -from vibe_common.secret_provider import SecretProvider, SecretProviderConfig -from vibe_core import data -from vibe_core.data.core_types import BaseVibeDict, InnerIOType, OpIOType, TypeDictVibe - -from .storage import Storage, StorageConfig - - -class Operation: - name: str - callback: Callable[..., BaseVibeDict] - storage: Storage - converter: data.StacConverter - inputs_spec: TypeDictVibe - output_spec: TypeDictVibe - version: str - - def __init__( - self, - name: str, - callback: Callable[..., BaseVibeDict], - storage: Storage, - converter: data.StacConverter, - inputs_spec: TypeDictVibe, - output_spec: TypeDictVibe, - version: str, - ): - self.name = name - self.callback = callback - self.storage = storage - self.converter = converter - self.inputs_spec = inputs_spec - self.output_spec = output_spec - self.version = version - self.logger = logging.getLogger(self.__class__.__name__) - - intersection = set(inputs_spec.keys()).intersection(output_spec.keys()) - if intersection: - raise ValueError( - f"Operation {name} has input and output with conflicting names {intersection}" - ) - - def _fetch_from_cache(self, cache_info: CacheInfo) -> Optional[OpIOType]: - """ - Try to fetch output from the cache, returns `None` if no output is found - """ - items = self.storage.retrieve_output_from_input_if_exists(cache_info) - if items is not None: - items = OpIOConverter.serialize_output(items) - return items - - def _call_validate_op(self, **kwargs: InnerIOType) -> ItemDict: - results = self.callback(**kwargs) - result_keys = set(results) - output_keys = set(self.output_spec) - if result_keys != output_keys: - raise RuntimeError( - f"Invalid output obtained during execution of op '{self.name}'. " - f"Expected output keys {output_keys}, but callback returned {result_keys}" - ) - try: - return {k: self.converter.to_stac_item(v) for k, v in results.items()} - except AttributeError: - raise ValueError( - f"Expected a dict-like as return value of operation {self.name}, found " - f"{type(results)}" - ) - - # Run will run the operation, loading the data from the catalog - def run(self, input_items: OpIOType, cache_info: CacheInfo) -> OpIOType: - stac_items = OpIOConverter.deserialize_input(input_items) - op_hash = cache_info.hash - items_out = self._fetch_from_cache(cache_info) - if items_out is not None: - self.logger.warning( - f"Cache hit for op {self.name} with cache hash {op_hash} before computation, " - "probably due to a repeated message." - ) - return items_out - - self.logger.info(f"Running op {self.name} for cache hash {op_hash}") - run_id = data.gen_guid() - retrieved_items = self.storage.retrieve(stac_items) - self.logger.info(f"Retrieved input for op {self.name}") - items = {k: self.converter.from_stac_item(v) for k, v in retrieved_items.items()} - self.logger.info(f"Running callback for op {self.name}") - stac_results = self._call_validate_op(**items) - self.logger.info(f"Callback finished for op {self.name}") - - try: - items_out = self.storage.store(run_id, stac_results, cache_info) - self.logger.info(f"Output stored for op {self.name}") - except (LocalResourceExistsError, CosmosResourceExistsError): - # If two instances of the same op with the same input start running at the same time - # We'll have a race condition where they'll both run, and try to store into the cache - # This will instead retrieve the output from the op that wrote their results first - items_out = self._fetch_from_cache(cache_info) - if items_out is not None: - self.logger.warning( - f"Cache hit after computing op {self.name} with cache hash {op_hash}, " - "probably due to a race condition." - ) - return items_out - raise # We couldn't write and we can't read, so we break - - return OpIOConverter.serialize_output(items_out) - - -class CallableBuilder: - def __init__(self): - self.logger = logging.getLogger(self.__class__.__name__) - - def _resolve_callable( - self, op_root_folder: str, filename: str, callback_builder_name: str - ) -> Any: - modname = os.path.splitext(filename)[0] - path = os.path.join(op_root_folder, filename) - self.logger.debug( - f"Loading module spec for {modname} from path {path} " - f"with callback {callback_builder_name}" - ) - spec = importlib.util.spec_from_file_location(modname, path) - assert spec is not None - self.logger.debug(f"Loading module {modname} from spec") - module = importlib.util.module_from_spec(spec) - assert isinstance(spec.loader, Loader) - self.logger.debug(f"Executing module {modname}") - spec.loader.exec_module(module) - self.logger.debug(f"Getting callback {callback_builder_name} from module {modname}") - callback_builder = getattr(module, callback_builder_name) - - return callback_builder - - def build( - self, - op_root_folder: str, - entrypoint: EntryPointDict, - parameters: Dict[str, Any], - ) -> Callable[[Any], Any]: - self.logger.debug(f"Building callable builder for {entrypoint}") - callable_builder = self._resolve_callable( - op_root_folder, - entrypoint["file"], - entrypoint["callback_builder"], - ) - self.logger.debug(f"Building callable from {callable_builder}") - callable = callable_builder(**parameters) - if inspect.isclass(callable_builder): - callable = callable() - self.logger.debug(f"Built callable {callable}") - return callable - - -class OperationDependencyResolver: - def __init__(self): - self._resolver_map = {"parameters": self._resolve_params} - - def resolve(self, op_spec: OperationSpec) -> OpResolvedDependencies: - output: OpResolvedDependencies = {} - for item, dependencies_list in op_spec.dependencies.items(): - try: - output[item] = self._resolver_map[item](op_spec, dependencies_list) - except Exception as e: - raise ValueError( - f"Dependency {item}: {dependencies_list} could not be resolved" - ) from e - return output - - def _resolve_params(self, op_spec: OperationSpec, params_to_resolve: List[str]): - return {param_name: op_spec.parameters[param_name] for param_name in params_to_resolve} - - -class OperationFactory: - converter: data.StacConverter - storage: Storage - secret_provider: SecretProvider - callable_builder: CallableBuilder - dependency_resolver: OperationDependencyResolver - - def __init__(self, storage: Storage, secret_provider: SecretProvider): - self.storage = storage - self.converter = data.StacConverter() - self.callable_builder = CallableBuilder() - self.secret_provider = secret_provider - - self.dependency_resolver = OperationDependencyResolver() - - def build(self, op_definition: Union[str, OperationSpec]) -> Operation: - if isinstance(op_definition, str): - return self._build_impl(OperationParser.parse(op_definition)) - return self._build_impl(op_definition) - - def resolve_secrets(self, parameters: Dict[str, Any]) -> Dict[str, Any]: - return {k: self.secret_provider.resolve(v) for k, v in parameters.items()} - - def _build_impl(self, op_config: OperationSpec) -> Operation: - parameters = self.resolve_secrets(op_config.parameters) - callable = self.callable_builder.build( - op_config.root_folder, op_config.entrypoint, parameters - ) - - return Operation( - op_config.name, - callable, - self.storage, - self.converter, - op_config.inputs_spec, - op_config.output_spec, - op_config.version, - ) - - -OperationFactoryConfig = builds( - OperationFactory, - storage=StorageConfig, - secret_provider=SecretProviderConfig, - zen_dataclass={"module": "vibe_agent.ops", "cls_name": "OperationFactoryConfig"}, -) diff --git a/src/vibe_agent/vibe_agent/ops_helper.py b/src/vibe_agent/vibe_agent/ops_helper.py deleted file mode 100644 index eac939a9..00000000 --- a/src/vibe_agent/vibe_agent/ops_helper.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from vibe_core.data.core_types import OpIOType -from vibe_core.data.utils import deserialize_stac, serialize_stac - -from .storage import ItemDict - - -class OpIOConverter: - @staticmethod - def serialize_output(output: ItemDict) -> OpIOType: - return {k: serialize_stac(v) for k, v in output.items()} - - @staticmethod - def deserialize_input(input_items: OpIOType) -> ItemDict: - return {k: deserialize_stac(v) for k, v in input_items.items()} diff --git a/src/vibe_agent/vibe_agent/storage/__init__.py b/src/vibe_agent/vibe_agent/storage/__init__.py deleted file mode 100644 index a366373d..00000000 --- a/src/vibe_agent/vibe_agent/storage/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from .asset_management import BlobAssetManagerConfig, LocalFileAssetManagerConfig -from .local_storage import LocalStorage, LocalStorageConfig -from .remote_storage import CosmosStorage, CosmosStorageConfig -from .storage import ItemDict, Storage, StorageConfig diff --git a/src/vibe_agent/vibe_agent/storage/asset_management.py b/src/vibe_agent/vibe_agent/storage/asset_management.py deleted file mode 100644 index 821d2880..00000000 --- a/src/vibe_agent/vibe_agent/storage/asset_management.py +++ /dev/null @@ -1,297 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -import shutil -from abc import ABC, abstractmethod -from dataclasses import dataclass -from functools import lru_cache -from typing import Any, List, Optional - -from azure.core.credentials import TokenCredential -from azure.identity import DefaultAzureCredential -from azure.storage.blob import BlobClient, BlobProperties, BlobServiceClient, ContainerClient -from hydra_zen import MISSING, builds - -from vibe_common.constants import DEFAULT_BLOB_ASSET_MANAGER_CONTAINER -from vibe_common.tokens import BlobTokenManagerConnectionString, BlobTokenManagerCredentialed -from vibe_core.file_downloader import download_file -from vibe_core.uri import is_local, local_uri_to_path, uri_to_filename - -from .file_upload import local_upload, remote_upload - -CACHE_SIZE = 100 - - -class AssetManager(ABC): - @abstractmethod - def store(self, asset_guid: str, file_path: str) -> str: - raise NotImplementedError - - @abstractmethod - def retrieve(self, asset_guid: str) -> str: - raise NotImplementedError - - @abstractmethod - def exists(self, asset_guid: str) -> bool: - raise NotImplementedError - - @abstractmethod - def remove(self, asset_guid: str) -> None: - raise NotImplementedError - - -class LocalFileAssetManager(AssetManager): - def __init__(self, local_storage_path: str): - self.logger = logging.getLogger(self.__class__.__name__) - self.root_path = local_storage_path - - def store(self, asset_guid: str, src_file_ref: str) -> str: - if self.exists(asset_guid): - self.logger.info(f"Attempted to write repeated entry {asset_guid}.") - return self.retrieve(asset_guid) - - dst_asset_dir = self._gen_path(asset_guid) - try: - os.makedirs(dst_asset_dir) - filename = uri_to_filename(src_file_ref) - dst_filename = os.path.join(dst_asset_dir, filename) - if is_local(src_file_ref): - shutil.copyfile(local_uri_to_path(src_file_ref), dst_filename) - else: - download_file(src_file_ref, dst_filename) - except Exception: - self.logger.exception(f"Exception when storing asset {src_file_ref}/{asset_guid}.") - # Clean up asset directory - try: - shutil.rmtree(dst_asset_dir) - except Exception: - self.logger.exception( - "Exception when cleaning up directory after failing to " - f"store asset with ID {asset_guid}" - ) - raise - raise - return dst_filename - - def retrieve(self, asset_guid: str) -> str: - asset_path = self._gen_path(asset_guid) - if not os.path.exists(asset_path): - msg = f"File with ID {asset_guid} does not exist." - self.logger.error(msg) - raise ValueError(msg) - files_in_asset_folder = os.listdir(asset_path) - - if len(files_in_asset_folder) != 1: - msg = f"Inconsistent content found for asset ID {asset_guid}" - self.logger.error(msg) - raise ValueError(msg) - - file_name = files_in_asset_folder[0] - return os.path.join(asset_path, file_name) - - def exists(self, asset_guid: str) -> bool: - return os.path.exists(self._gen_path(asset_guid)) - - def _gen_path(self, guid: str) -> str: - return os.path.join(self.root_path, guid) - - def remove(self, asset_guid: str) -> None: - if not self.exists(asset_guid): - self.logger.info(f"Asked to remove inexistent file {asset_guid}.") - return - - asset_folder = self._gen_path(asset_guid) - - try: - shutil.rmtree(asset_folder) - except Exception: - msg = f"Could not remove asset with ID {asset_guid}" - self.logger.exception(msg) - raise ValueError(msg) - - -# ATTENTION: if the blob container associated with the assets is modified (through a write or -# delete) operation, then we should invalidate the cache of this function by calling its -# cache_clear() method. -@lru_cache(maxsize=CACHE_SIZE) -def cached_blob_list_by_prefix(client: ContainerClient, guid: str) -> List[BlobProperties]: - return list(client.list_blobs(name_starts_with=guid)) - - -class BlobServiceProvider(ABC): - @abstractmethod - def get_client(self) -> BlobServiceClient: - raise NotImplementedError - - -class BlobServiceProviderWithCredentials(BlobServiceProvider): - def __init__( - self, - storage_account_url: str, - credential: Optional[TokenCredential] = None, - ): - self.credential = DefaultAzureCredential() if credential is None else credential - self.client = BlobServiceClient(storage_account_url, self.credential) - - def get_client(self) -> BlobServiceClient: - return self.client - - -class BlobServiceProviderWithConnectionString(BlobServiceProvider): - def __init__(self, connection_string: str): - self.client = BlobServiceClient.from_connection_string(connection_string) - - def get_client(self) -> BlobServiceClient: - return self.client - - -class BlobAssetManager(AssetManager): - blob_delimiter = "/" - - def __init__( - self, - storage_account_url: str = "", - storage_account_connection_string: str = "", - asset_container_name: str = DEFAULT_BLOB_ASSET_MANAGER_CONTAINER, - credential: Optional[TokenCredential] = None, - max_upload_concurrency: int = 6, - ): - self.logger = logging.getLogger(self.__class__.__name__) - # Create a blob client, authenticated. - self.credential = DefaultAzureCredential() if credential is None else credential - if storage_account_url: - self.client = BlobServiceProviderWithCredentials( - storage_account_url=storage_account_url, credential=self.credential - ).get_client() - self.blob_token_manager = BlobTokenManagerCredentialed(credential=self.credential) - elif storage_account_connection_string: - self.client = BlobServiceProviderWithConnectionString( - connection_string=storage_account_connection_string - ).get_client() - self.blob_token_manager = BlobTokenManagerConnectionString( - connection_string=storage_account_connection_string - ) - else: - msg = ( - "Could not get a blob manager since neither storage account " - "url nor connection string were provided" - ) - self.logger.exception(msg) - raise ValueError(msg) - - self.container_name = asset_container_name - self.container = self._retrieve_container() - self.max_upload_concurrency = max_upload_concurrency - - def _retrieve_container(self): - container = self.client.get_container_client(self.container_name) - if not container.exists(): - container.create_container() - - return container - - @staticmethod - def _join(*args: str): - return BlobAssetManager.blob_delimiter.join(args) - - def _list(self, guid: str) -> List[BlobProperties]: - listed_blob = cached_blob_list_by_prefix(self.container, guid) - if len(listed_blob) > 1: - ValueError(f"Encountered more than one asset with id {guid}") - - return listed_blob - - def _local_upload(self, file_path: str, blob_client: BlobClient): - # At this point, we expect a valid local path was passed to the file_path - # which can be something like "file:///path/to/file" or "/path/to/file". - local_upload(file_path, blob_client, max_concurrency=self.max_upload_concurrency) - - def store(self, asset_guid: str, file_ref: str) -> str: - if self.exists(asset_guid): - self.logger.debug(f"Attempted to write repeated entry {asset_guid}.") - blob_property = self._list(asset_guid)[0] - blob_client = self.container.get_blob_client(blob_property.name) - return blob_client.url - - filename = uri_to_filename(file_ref) - blob_name = self._join(asset_guid, filename) - blob_client = self.container.get_blob_client(blob_name) - - if is_local(file_ref): - upload = self._local_upload - else: - upload = remote_upload - - try: - upload(file_ref, blob_client) - except Exception: - self.logger.exception(f"Exception when storing asset {file_ref}/ ID {asset_guid}.") - raise - - # Clear cache as we know we have modified the blob content - cached_blob_list_by_prefix.cache_clear() - - return blob_client.url - - def retrieve(self, asset_guid: str) -> str: - # Obtains a SAS token for file and creates a URL for it. - if not self.exists(asset_guid): - msg = f"File with ID {asset_guid} does not exist." - self.logger.error(msg) - raise ValueError(msg) - - blob_property = self._list(asset_guid)[0] - blob_client = self.container.get_blob_client(blob_property.name) - - return self.blob_token_manager.sign_url(blob_client.url) - - def exists(self, asset_guid: str) -> bool: - listed_blob = self._list(asset_guid) - return len(listed_blob) == 1 - - def remove(self, asset_guid: str) -> None: - if not self.exists(asset_guid): - self.logger.debug(f"Asked to remove inexistent file {asset_guid}.") - return - - blob_property = self._list(asset_guid)[0] - try: - self.container.delete_blob(blob_property.name) - except Exception: - msg = f"Could not remove asset with ID {asset_guid}" - self.logger.exception(msg) - raise ValueError(msg) - - cached_blob_list_by_prefix.cache_clear() - - -AssetManagerConfig = builds( - AssetManager, - zen_dataclass={ - "module": "vibe_agent.storage.asset_management", - "cls_name": "AssetManagerConfig", - }, -) - - -@dataclass -class BlobAssetManagerConfig(AssetManagerConfig): - _target_: str = "vibe_agent.storage.asset_management.BlobAssetManager" - storage_account_url: Any = MISSING - storage_account_connection_string: Any = MISSING - asset_container_name: Any = MISSING - credential: Any = MISSING - max_upload_concurrency: Any = 6 - - -LocalFileAssetManagerConfig = builds( - LocalFileAssetManager, - populate_full_signature=True, - builds_bases=(AssetManagerConfig,), - zen_dataclass={ - "module": "vibe_agent.storage.asset_management", - "cls_name": "LocalFileAssetManagerConfig", - }, -) diff --git a/src/vibe_agent/vibe_agent/storage/file_upload.py b/src/vibe_agent/vibe_agent/storage/file_upload.py deleted file mode 100644 index 07ee46ae..00000000 --- a/src/vibe_agent/vibe_agent/storage/file_upload.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any - -from azure.storage.blob import BlobClient - -from vibe_core.uri import is_local, local_uri_to_path - - -def upload_to_blob(file_path: str, blob_client: BlobClient, *args: Any, **kwargs: Any): - if is_local(file_path): - local_upload(file_path, blob_client, *args, **kwargs) - else: - remote_upload(file_path, blob_client, *args, **kwargs) - - -def local_upload(file_path: str, blob_client: BlobClient, *args: Any, **kwargs: Any): - # At this point, we expect a valid local path was passed to the file_path - # which can be something like "file:///path/to/file" or "/path/to/file". - file_path = local_uri_to_path(file_path) - with open(file_path, "rb") as data: - blob_client.upload_blob(data=data, *args, **kwargs) - - -def remote_upload(file_path: str, blob_client: BlobClient, *args: Any, **kwargs: Any): - blob_client.upload_blob_from_url(file_path, *args, **kwargs) diff --git a/src/vibe_agent/vibe_agent/storage/local_storage.py b/src/vibe_agent/vibe_agent/storage/local_storage.py deleted file mode 100644 index ec567777..00000000 --- a/src/vibe_agent/vibe_agent/storage/local_storage.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import logging -import os -import shutil -from concurrent.futures import Executor -from typing import Any, Dict, List, Optional, Union, cast - -from hydra_zen import MISSING, builds -from pystac.catalog import Catalog, CatalogType -from pystac.collection import Collection, Extent -from pystac.item import Item -from pystac.stac_io import DefaultStacIO - -from vibe_common.schemas import CacheInfo, OpRunId -from vibe_core.utils import ensure_list - -from .asset_management import LocalFileAssetManagerConfig -from .storage import AssetManager, ItemDict, Storage, StorageConfig - - -class LocalStacIO(DefaultStacIO): - def stac_object_from_dict( - self, - d: Dict[str, Any], - href: Optional[str] = None, - root: Optional[Catalog] = None, - preserve_dict: bool = False, - ) -> Any: - return super().stac_object_from_dict(d, href, root, False) - - -class LocalResourceExistsError(RuntimeError): - pass - - -class LocalStorage(Storage): - """ - This class implements the Storage abstract class. - """ - - IS_SINGULAR_FIELD = "terravibe_is_singular" - COLLECTION_TYPE = CatalogType.SELF_CONTAINED - CATALOG_TYPE = CatalogType.RELATIVE_PUBLISHED - - def __init__(self, local_path: str, asset_manager: AssetManager): - """ - Initializer expects a directory path where catalogs can be stored - """ - super().__init__(asset_manager) - self.path = local_path - self.logger = logging.getLogger(self.__class__.__name__) - self.stac_io = LocalStacIO() - - def _retrieve_items(self, catalog: Catalog) -> ItemDict: - output: ItemDict = {} - for c in catalog.get_collections(): - output[c.id] = list(c.get_items()) - if c.extra_fields[self.IS_SINGULAR_FIELD]: # type: ignore - output[c.id] = cast(List[Item], output[c.id])[0] - return output - - def _create_output_collection( - self, output_name: str, items: Union[Item, List[Item]] - ) -> Collection: - extra_info: Dict[str, bool] = {self.IS_SINGULAR_FIELD: not isinstance(items, list)} - output_items = ensure_list(items) - extent = Extent.from_items(output_items) - description = f"Stores op output {output_name} for a unique op run." - output_collection = Collection( - id=output_name, - description=description, - extent=extent, - catalog_type=self.COLLECTION_TYPE, - extra_fields=extra_info, - ) - output_collection.add_items(output_items) - - return output_collection - - def retrieve_output_from_input_if_exists( - self, - cache_info: CacheInfo, - ) -> Optional[ItemDict]: - """ - Method to help users to skip computation if the result of the previous outputs from input - and operator combo has been memo-ized as a catalog in the TerraVibes storage system - """ - catalog_path = self.get_catalog_path(cache_info.hash, cache_info.name) - if os.path.exists(catalog_path): - catalog = Catalog.from_file( - os.path.join(catalog_path, Catalog.DEFAULT_FILE_NAME), stac_io=self.stac_io - ) - return self._retrieve_items(catalog) - - return None - - async def retrieve_output_from_input_if_exists_async( - self, cache_info: CacheInfo, **kwargs: Any - ): - executor: Executor = cast(Executor, kwargs["executor"]) - return await asyncio.get_running_loop().run_in_executor( - executor, self.retrieve_output_from_input_if_exists, cache_info - ) - - def create_run_collection( - self, - run_id: str, - catalog_path: str, - items: ItemDict, - extra_info: Dict[str, Any], - ) -> Catalog: - description = f"Collection of outputs of run id {run_id}." - run_catalog = Catalog( - id=run_id, - description=description, - href=catalog_path, - catalog_type=self.CATALOG_TYPE, - extra_fields=extra_info, - ) - for output_name, output_items in items.items(): - output_collection = self._create_output_collection(output_name, output_items) - run_catalog.add_child(output_collection) - - return run_catalog - - def get_catalog_path(self, op_hash: str, op_name: str) -> str: - """ - Each catalog has a directory and json file where the corresponding assets and files are - stored/indexed - """ - return os.path.join(self.path, op_name, op_hash) - - def _catalog_cleanup(self, catalog: Catalog): - catalog_path = catalog.get_self_href() - assert catalog_path is not None, f"Catalog {catalog.id} does not have an href." - catalog.normalize_hrefs(catalog_path) - catalog.make_all_asset_hrefs_relative() - - def store(self, run_id: str, items_to_store: ItemDict, cache_info: CacheInfo) -> ItemDict: - """ - Method to store a given list of items to current TerraVibes storage STAC catalog - This method must be atomic -- that is all of it happens or none of it happens - This method must be consistent -- that is the assets/items referenced by catalogs must be in - storage & vice-versa - This method must be isolated -- applications should be able to call multiple store - operations simultaneously and safely - This method must be durable -- all changes must be available across crashes unless there - is a catastrophic failure - This method must be performant -- it should support 1000s/100s/10s of - assets/catalogs/workflows being updated simultaneously - """ - catalog_path = self.get_catalog_path(cache_info.hash, cache_info.name) - items_to_store = self.asset_handler.copy_assets(items_to_store) - catalog = self.create_run_collection( - run_id, catalog_path, items_to_store, cache_info.as_storage_dict() - ) - self._catalog_cleanup(catalog) - if not os.path.exists(catalog_path): - catalog.save(stac_io=self.stac_io) - else: - raise LocalResourceExistsError( - f"Op output already exists in storage for {cache_info.name} with id {run_id}." - ) - - return items_to_store - - def remove(self, op_run_id: OpRunId): - catalog_path = self.get_catalog_path(op_run_id.hash, op_run_id.name) - - if not os.path.exists(catalog_path): - self.logger.info( - f"Asked to remove nonexistent catalog with op name {op_run_id.name} and hash " - f"{op_run_id.hash}." - ) - return - - try: - shutil.rmtree(catalog_path) - except OSError: - self.logger.exception(f"Error removing catalog for op run {op_run_id}.") - raise - - -LocalStorageConfig = builds( - LocalStorage, - local_path=MISSING, - asset_manager=LocalFileAssetManagerConfig(MISSING), - builds_bases=(StorageConfig,), - zen_dataclass={ - "module": "vibe_agent.storage.local_storage", - "cls_name": "LocalStorageConfig", - }, -) diff --git a/src/vibe_agent/vibe_agent/storage/remote_storage.py b/src/vibe_agent/vibe_agent/storage/remote_storage.py deleted file mode 100644 index b94dfd4e..00000000 --- a/src/vibe_agent/vibe_agent/storage/remote_storage.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from dataclasses import asdict, dataclass, fields -from functools import lru_cache -from hashlib import sha256 -from math import ceil -from typing import Any, Dict, List, Optional, cast - -from azure.cosmos import ContainerProxy, CosmosClient, PartitionKey -from azure.cosmos.aio import ( - ContainerProxy as AsyncContainerProxy, -) -from azure.cosmos.aio import ( - CosmosClient as AsyncCosmosClient, -) -from azure.cosmos.exceptions import CosmosHttpResponseError, CosmosResourceNotFoundError -from azure.storage.blob import BlobLeaseClient -from hydra_zen import MISSING -from pystac.item import Item - -from vibe_common.constants import ( - DEFAULT_COSMOS_DATABASE_NAME, - DEFAULT_COSMOS_URI, - DEFAULT_STAC_COSMOS_CONTAINER, -) -from vibe_common.schemas import CacheInfo, OpRunId -from vibe_core.utils import ensure_list - -from .asset_management import AssetManager, BlobAssetManagerConfig -from .storage import ItemDict, Storage, StorageConfig - -LeaseDict = Dict[str, BlobLeaseClient] - - -@dataclass -class CosmosData: - id: str - op_name: str - - -@dataclass -class ItemList(CosmosData): - output_name: str - items: List[Dict[str, Any]] - type: str = "item_list" - - -@dataclass -class RunInfo(CosmosData): - run_id: str - cache_info: Dict[str, Any] - items: List[str] - singular_items: List[str] - type: str = "run_info" - - -class CosmosStorage(Storage): - PARTITION_KEY = "/op_name" - LIST_MIN_SIZE: int = 1 - # https://docs.microsoft.com/en-us/rest/api/cosmos-db/http-status-codes-for-cosmosdb - entity_too_large_status_code: int = 413 - - def __init__( - self, - key: str, - asset_manager: AssetManager, - stac_container_name: str = DEFAULT_STAC_COSMOS_CONTAINER, - cosmos_database_name: str = DEFAULT_COSMOS_DATABASE_NAME, - cosmos_url: str = DEFAULT_COSMOS_URI, - list_max_size: int = 1024, - ): - super().__init__(asset_manager) - self.key = key - self.cosmos_url = cosmos_url - self.cosmos_database_name = cosmos_database_name - self.stac_container_name = stac_container_name - self.container_proxy_async = None - self.list_max_size = list_max_size - self.logger = logging.getLogger(self.__class__.__name__) - - @property - @lru_cache - def container_proxy(self): - cosmos_client = CosmosClient(self.cosmos_url, self.key) - db = cosmos_client.create_database_if_not_exists(id=self.cosmos_database_name) - return db.create_container_if_not_exists( - self.stac_container_name, partition_key=PartitionKey(self.PARTITION_KEY) - ) - - def _convert_items(self, items: ItemDict): - converted_items: Dict[str, List[Dict[str, Any]]] = {} - singular_items: List[str] = [] - for key, item in items.items(): - if isinstance(item, Item): - singular_items.append(key) - item = ensure_list(item) - converted_item = [i.to_dict() for i in item] - converted_items[key] = converted_item - return converted_items, singular_items - - def _build_item_list_id(self, ids: List[str], output_name: str, run_hash: str): - ids.append(run_hash) - ids.append(output_name) - return sha256("".join(ids).encode()).hexdigest() - - def _build_items_to_store( - self, - op_name: str, - run_hash: str, - item_dict: Dict[str, List[Dict[str, Any]]], - list_size: int, - ): - output: List[ItemList] = [] - id_list: List[str] = [] - for output_name, items in item_dict.items(): - items = ensure_list(items) - num_items = len(items) - num_partitions = ceil(num_items / list_size) - for i in range(num_partitions): - offset = i * list_size - last_item = min(offset + list_size, num_items) - partitioned_items = items[offset:last_item] - items_ids = [i["id"] for i in partitioned_items] - partition_id = self._build_item_list_id(items_ids, output_name, run_hash) - id_list.append(partition_id) - output.append(ItemList(partition_id, op_name, output_name, partitioned_items)) - return output, id_list - - def _store_data( - self, op_name: str, run_to_store: Dict[str, Any], items_to_store: List[Dict[str, Any]] - ): - container = self._get_container() - stored_items: List[str] = [] - try: - for i in items_to_store: - container.create_item(body=i) - stored_items.append(i["id"]) - container.create_item(body=run_to_store) - except Exception: - # rolling back - for i in stored_items: - container.delete_item(i, op_name) - raise - - def store(self, run_id: str, items: ItemDict, cache_info: CacheInfo) -> ItemDict: - items = self.asset_handler.copy_assets(items) - dict_items, singular_items = self._convert_items(items) - extra_fields = cache_info.as_storage_dict() - run_hash = extra_fields[self.HASH_FIELD] - list_size = self.list_max_size - e = RuntimeError("No tries to store have been made") - items_lists: List[ItemList] = [] - while list_size > self.LIST_MIN_SIZE: - try: - items_lists, items_id_list = self._build_items_to_store( - cache_info.name, run_hash, dict_items, list_size - ) - run_to_store = asdict( - RunInfo( - run_hash, - cache_info.name, - run_id, - extra_fields, - items_id_list, - singular_items, - ) - ) - items_to_store = [asdict(items_list) for items_list in items_lists] - self._store_data(cache_info.name, run_to_store, items_to_store) - return items - except CosmosHttpResponseError as er: - try: - status_code = int(er.status_code) # type: ignore - except TypeError: - raise er # Couldn't get the status code, so just break - if status_code != self.entity_too_large_status_code: - # We are only handling EntityTooLarge - raise - e = er - list_size = ceil(max(len(i.items) for i in items_lists) / 2) - raise RuntimeError( - f"Could not store items. Tried from {self.list_max_size} " - f"to {self.LIST_MIN_SIZE} sized lists" - ) from e - - def _get_container(self) -> ContainerProxy: - return self.container_proxy - - def _get_container_async(self) -> AsyncContainerProxy: - if self.container_proxy_async is None: - cosmos_client_async = AsyncCosmosClient(self.cosmos_url, self.key) - db = cosmos_client_async.get_database_client(self.cosmos_database_name) - self.container_proxy_async = db.get_container_client(self.stac_container_name) - return self.container_proxy_async - - def _get_run_info( - self, op_name: str, op_run_hash: str, container: ContainerProxy - ) -> Optional[RunInfo]: - try: - retrieved_item = cast(Dict[str, Any], container.read_item(op_run_hash, op_name)) - except CosmosResourceNotFoundError: - return None - run_info_fields = [f.name for f in fields(RunInfo)] - run_info_dict = {k: v for k, v in retrieved_item.items() if k in run_info_fields} - return RunInfo(**run_info_dict) - - async def _get_run_info_async( - self, op_name: str, op_run_hash: str, container: AsyncContainerProxy - ) -> Optional[RunInfo]: - try: - retrieved_item = await container.read_item(op_run_hash, op_name) - except CosmosResourceNotFoundError: - return None - run_info_fields = [f.name for f in fields(RunInfo)] - run_info_dict = {k: v for k, v in retrieved_item.items() if k in run_info_fields} - return RunInfo(**run_info_dict) - - def process_items(self, run_info: RunInfo, retrieved_items: List[Dict[str, Any]]): - item_list_fields = [f.name for f in fields(ItemList)] - items_dict: Dict[str, List[Dict[str, Any]]] = {} - for i in retrieved_items: - items_info_dict = {k: v for k, v in i.items() if k in item_list_fields} - items_list = ItemList(**items_info_dict) - output_name = items_list.output_name - dict_list = items_dict.get(output_name, []) - dict_list += items_list.items - items_dict[output_name] = dict_list - - singular_input = run_info.singular_items - retrieved_stac: ItemDict = {} - - for output_name, output_values in items_dict.items(): - converted_items = [Item.from_dict(ov, preserve_dict=False) for ov in output_values] - if output_name in singular_input: - retrieved_stac[output_name] = converted_items[0] - else: - retrieved_stac[output_name] = converted_items - return retrieved_stac - - def _retrieve_items(self, run_info: RunInfo, container: ContainerProxy): - retrieved_items = [container.read_item(i, run_info.op_name) for i in run_info.items] - return self.process_items(run_info, retrieved_items) - - async def _retrieve_items_async(self, run_info: RunInfo, container: AsyncContainerProxy): - retrieved_items = [await container.read_item(i, run_info.op_name) for i in run_info.items] - return self.process_items(run_info, retrieved_items) - - def retrieve_output_from_input_if_exists(self, cache_info: CacheInfo) -> Optional[ItemDict]: - container = self._get_container() - run_info = self._get_run_info(cache_info.name, cache_info.hash, container) - if run_info is None: - return None - - return self._retrieve_items(run_info, container) - - async def retrieve_output_from_input_if_exists_async( - self, cache_info: CacheInfo, **kwargs: Any - ) -> Optional[ItemDict]: - container = self._get_container_async() - - run_info = await self._get_run_info_async(cache_info.name, cache_info.hash, container) - if run_info is None: - return None - - return await self._retrieve_items_async(run_info, container) - - def remove(self, op_run_id: OpRunId): - container = self._get_container() - run_info = self._get_run_info(op_run_id.name, op_run_id.hash, container) - if run_info is None: - return None - - for i in run_info.items: - try: - container.delete_item(i, run_info.op_name) - except CosmosResourceNotFoundError as er: - self.logger.warning( - f"The item {i} that is a part of {op_run_id} does not exist in the " - f"Cosmos DB container: {er}" - ) - - try: - container.delete_item(op_run_id.hash, op_run_id.name) - except CosmosResourceNotFoundError as er: - self.logger.warning( - f"The item {op_run_id} does not exist in the Cosmos DB container: {er}" - ) - - -# Having to manually create Cosmos configuration so we can retrieve its -# key using a secret provider. -@dataclass -class CosmosStorageConfig(StorageConfig): - _target_: str = "vibe_agent.storage.remote_storage.CosmosStorage" - key: Any = MISSING - asset_manager: BlobAssetManagerConfig = MISSING - stac_container_name: Any = MISSING - cosmos_database_name: Any = MISSING - cosmos_url: Any = MISSING diff --git a/src/vibe_agent/vibe_agent/storage/storage.py b/src/vibe_agent/vibe_agent/storage/storage.py deleted file mode 100644 index 3be0bd2c..00000000 --- a/src/vibe_agent/vibe_agent/storage/storage.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -""" -Storage module for TerraVibes. Helps store, index, retrieve, and catalog geospatial knowledge that -an instance of TerraVibes contains. -""" - -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional - -from hydra_zen import builds -from pystac.asset import Asset - -from vibe_common.schemas import CacheInfo, ItemDict, OpRunId -from vibe_core.utils import ensure_list - -from .asset_management import AssetManager, AssetManagerConfig - - -class AssetCopyHandler: - def __init__(self, asset_manager: AssetManager): - self.asset_manager = asset_manager - - def _copy_asset(self, guid: str, asset: Asset): - file_path = asset.get_absolute_href() - assert file_path is not None - asset.href = self.asset_manager.store(guid, file_path) - - def _copy_prepared_assets(self, assets_to_copy: Dict[str, Asset]): - copied_assets: List[str] = [] - try: - for guid, asset in assets_to_copy.items(): - self._copy_asset(guid, asset) - copied_assets.append(guid) - except Exception: - for f in copied_assets: - self.asset_manager.remove(f) - raise - - def _prepare_assets(self, items: ItemDict): - assets: Dict[str, Asset] = {} - for item in items.values(): - item = ensure_list(item) - for i in item: - assets.update(i.assets) - return assets - - def copy_assets(self, items: ItemDict): - assets = self._prepare_assets(items) - self._copy_prepared_assets(assets) - - return items - - -class Storage(ABC): - """ - The TerraVibes storage class contains abstract methods that have to be implemented. The abstract - methods are "store", "retrieve", and "retrieve_output_from_input_if_exists". Store and retrieve - are self explanatory. The latter one helps retrieve data by querying with the inputs that - generated the output that the user is looking for. These methods are mandatory when - implementing a storage class in TerraVibes. - """ - - asset_manager: AssetManager - asset_copy_handler: AssetCopyHandler - HASH_FIELD: str = "vibe_op_hash" - - def __init__(self, asset_manager: AssetManager): - self.asset_manager = asset_manager - self.asset_handler = AssetCopyHandler(asset_manager) - - @abstractmethod - def store(self, run_id: str, items: ItemDict, cache_info: CacheInfo) -> ItemDict: - raise NotImplementedError - - def retrieve(self, input_items: ItemDict) -> ItemDict: - """ - Method to retrieve a list of items from the current TerraVibes storage STAC catalog - """ - for possible_item_list in input_items.values(): - items = ensure_list(possible_item_list) - for item in items: - for guid, asset in item.assets.items(): - asset.href = self.asset_manager.retrieve(guid) - - return input_items - - @abstractmethod - def retrieve_output_from_input_if_exists(self, cache_info: CacheInfo) -> Optional[ItemDict]: - raise NotImplementedError - - @abstractmethod - async def retrieve_output_from_input_if_exists_async( - self, cache_info: CacheInfo, **kwargs: Any - ) -> Optional[ItemDict]: - raise NotImplementedError - - @abstractmethod - def remove(self, op_run_id: OpRunId): - """ - Method to delete a STAC catalog from storage. Note: this does not remove the assets - referenced by a STAC catalog. - """ - raise NotImplementedError - - -StorageConfig = builds( - Storage, - asset_manager=AssetManagerConfig, - zen_dataclass={ - "module": "vibe_agent.storage.storage", - "cls_name": "StorageConfig", - }, -) diff --git a/src/vibe_agent/vibe_agent/worker.py b/src/vibe_agent/vibe_agent/worker.py deleted file mode 100644 index 0a374e65..00000000 --- a/src/vibe_agent/vibe_agent/worker.py +++ /dev/null @@ -1,527 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import concurrent.futures -import json -import logging -import os -import resource -import signal -import sys -import threading -import time -import traceback -from multiprocessing.context import ForkServerContext -from typing import Any, Dict, List, Optional, Tuple, Union, cast -from uuid import UUID - -import pebble.concurrent -from cloudevents.sdk.event import v1 -from dapr.conf import settings -from dapr.ext.grpc import App, TopicEventResponse -from hydra_zen import MISSING, builds, instantiate -from opentelemetry import trace -from pebble import ProcessFuture -from pebble.common import ProcessExpired - -from vibe_common.constants import CONTROL_STATUS_PUBSUB, STATUS_PUBSUB_TOPIC -from vibe_common.dapr import dapr_ready -from vibe_common.messaging import ( - CacheInfoExecuteRequestContent, - CacheInfoExecuteRequestMessage, - WorkMessage, - WorkMessageBuilder, - accept_or_fail_event, - extract_message_header_from_event, - send_async, -) -from vibe_common.schemas import CacheInfo -from vibe_common.statestore import StateStore -from vibe_common.telemetry import ( - add_span_attributes, - add_trace, - setup_telemetry, - update_telemetry_context, -) -from vibe_core.data.core_types import OpIOType -from vibe_core.datamodel import RunConfig, RunStatus -from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging -from vibe_core.utils import get_input_ids - -from .ops import OperationFactoryConfig, OperationSpec - -MESSAGING_RETRY_INTERVAL_S = 1 -TERMINATION_GRACE_PERIOD_S = 5 -MAX_OP_EXECUTION_TIME_S = 60 * 60 * 3 - - -class ShuttingDownException(Exception): - pass - - -class OpSignalHandler: - def __init__(self, logger: logging.Logger): - self.logger = logger - self.resource_description = { - "ru_utime": "User time", - "ru_stime": "System time", - "ru_maxrss": "Max. Resident Set Size", - "ru_ixrss": "Shared Memory Size", - "ru_idrss": "Unshared Memory Size", - "ru_isrss": "Stack Size", - "ru_inblock": "Block inputs", - "ru_oublock": "Block outputs", - } - - def parse_resources_usage(self, rusages: List[resource.struct_rusage]): - return { - resource: { - "description": description, - "value": sum([getattr(rusage, resource) for rusage in rusages]), - } - for resource, description in self.resource_description.items() - } - - def build_log_message(self, signum: int, child_pid: Optional[Tuple[int, int]]) -> str: - resource_usages = [resource.getrusage(resource.RUSAGE_SELF)] - - if signum == signal.SIGTERM: - msgs_list = ["Terminating op gracefully with SIGTERM."] - else: - msgs_list = [ - f"Received signal when executing op (signal {signal.Signals(signum).name}).", - ] - - if child_pid: - pid, exit_code = child_pid - msgs_list.append(f" Child pid = {pid} exit code = {exit_code >> 8},") - resource_usages.append(resource.getrusage(resource.RUSAGE_CHILDREN)) - - msgs_list.append(f"Op resources = {self.parse_resources_usage(resource_usages)}") - - return " ".join(msgs_list) - - def get_log_function(self, child_pid: Optional[Tuple[int, int]]): - if child_pid: - _, exit_code = child_pid - if not os.WIFEXITED(exit_code): - return self.logger.error - - return self.logger.info - - def log(self, signum: int, _: Any): - child_pid = None - try: - child_pid = os.waitpid(-1, os.WNOHANG) - except ChildProcessError: - # That's OK. There is no child process - pass - - message = self.build_log_message(signum, child_pid) - log_function = self.get_log_function(child_pid) - log_function(message) - - -@pebble.concurrent.process(daemon=False, context=ForkServerContext()) -# This must not be a daemonic process. Otherwise, we won't be able to run ops -# that start children. -def run_op( - factory_spec: OperationFactoryConfig, # type: ignore - spec: OperationSpec, - input: OpIOType, - cache_info: CacheInfo, -) -> Union[OpIOType, traceback.TracebackException]: - logger = logging.getLogger(f"{__name__}.run_op") - logger.info(f"Building op {spec.name} to process input {get_input_ids(input)}") - - op_signal_handler = OpSignalHandler(logger) - - for sign in (signal.SIGINT, signal.SIGTERM, signal.SIGCHLD): - signal.signal(sign, op_signal_handler.log) - - try: - factory = instantiate(factory_spec) - return factory.build(spec).run(input, cache_info) - except Exception as e: - return traceback.TracebackException.from_exception(e) - - -class WorkerMessenger: - pubsubname: str - status_topic: str - logger: logging.Logger - - def __init__( - self, pubsubname: str = CONTROL_STATUS_PUBSUB, status_topic: str = STATUS_PUBSUB_TOPIC - ): - self.pubsubname = pubsubname - self.status_topic = status_topic - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - - async def send(self, message: WorkMessage) -> None: - tries: int = 0 - sent = False - while True: - try: - sent = await send_async(message, "worker", self.pubsubname, self.status_topic) - except Exception: - pass - if sent: - break - tries += 1 - # We did some work, now we have to report what happened to the op - # If we are shutting down, we have TERMINATION_GRACE_PERIOD_S to try before exiting. - # Otherwise, it seems to make sense to keep retrying until we succeed. - self.logger.warn( - f"Failed to send {message} after {tries} attempts. " - f"Sleeping for {MESSAGING_RETRY_INTERVAL_S}s before retrying." - ) - await asyncio.sleep(MESSAGING_RETRY_INTERVAL_S) - - async def send_ack_reply(self, origin: WorkMessage) -> None: - await self.send(WorkMessageBuilder.build_ack_reply(origin.id)) - self.logger.debug(msg=f"Sent ACK for {origin.id}") - - @add_trace - async def send_success_reply( - self, - origin: WorkMessage, - out: OpIOType, - cache_info: Optional[CacheInfo] = None, - ) -> None: - if cache_info is None and not isinstance(origin, CacheInfoExecuteRequestMessage): - raise ValueError( - "cache_info must be provided if origin is not a CacheInfoExecuteRequestMessage" - ) - if not cache_info: - content = cast(CacheInfoExecuteRequestContent, origin.content) - cache_info = CacheInfo( - name=content.cache_info.name, - version=content.cache_info.version, - ids=content.cache_info.ids, - parameters=content.cache_info.parameters, - ) - await self.send(WorkMessageBuilder.build_execute_reply(origin.id, cache_info, out)) - self.logger.debug(msg=f"Sent success response for {origin.id}") - - async def send_failure_reply(self, traceparent: str, e: Exception, tb: List[str]) -> None: - assert type(e) is not None, "`send_failure_reply` called without an exception to handle" - reply = WorkMessageBuilder.build_error( - traceparent, - str(type(e)), - str(e), - tb, - ) - await self.send(reply) - self.logger.debug(f"Sent failure response for {traceparent}") - - -class Worker: - app: App - max_tries: int - pubsubname: str - status_topic: str - control_topic: str - current_message: Optional[WorkMessage] = None - shutting_down: bool = False - child_monitoring_period_s: int = 10 - termination_grace_period_s: int = 2 - state_store: StateStore - current_child: Optional[ProcessFuture] = None - factory_spec: OperationFactoryConfig # type: ignore - otel_service_name: str - - def __init__( - self, - termination_grace_period_s: int, - control_topic: str, - max_tries: int, - factory_spec: OperationFactoryConfig, # type: ignore - port: int = settings.HTTP_APP_PORT, - pubsubname: str = CONTROL_STATUS_PUBSUB, - status_topic: str = STATUS_PUBSUB_TOPIC, - logdir: Optional[str] = None, - max_log_file_bytes: int = MAX_LOG_FILE_BYTES, - log_backup_count: int = LOG_BACKUP_COUNT, - loglevel: Optional[str] = None, - otel_service_name: str = "", - **kwargs: Dict[str, Any], - ): - self.pubsubname = pubsubname - self.termination_grace_period_s = termination_grace_period_s - self.control_topic = control_topic - self.status_topic = status_topic - self.port = port - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.logdir: Optional[str] = logdir - self.loglevel = loglevel - self.max_log_file_bytes = max_log_file_bytes - self.log_backup_count = log_backup_count - self.otel_service_name = otel_service_name - - self.app = App() - self.messenger = WorkerMessenger(pubsubname, status_topic) - self.current_message = None - self.shutdown_lock = threading.Lock() - self.work_lock = threading.Lock() - self.max_tries = max_tries - self.factory_spec = factory_spec - self.statestore = StateStore() - self.name = self.__class__.__name__ - self._setup_routes_and_events() - - def _terminate_child(self): - if self.current_child is not None: - try: - self.current_child.cancel() - except Exception: - self.logger.info( - f"Failed to terminate child {self.current_child}, " - "probably because it terminated already" - ) - - def _setup_routes_and_events(self): - @self.app.subscribe(self.pubsubname, self.control_topic) - def fetch_work(event: v1.Event) -> TopicEventResponse: - return self.fetch_work(self.control_topic, event) - - @self.app.method(name="shutdown") - def shutdown() -> TopicEventResponse: - self.logger.info("Initiating shutdown sequence") - self.pre_stop_hook(signal.SIGTERM, None) - return TopicEventResponse("retry") - - def pre_stop_hook(self, signum: int, _: Any): - self.shutdown_lock.acquire() - if self.shutting_down: - self.logger.warning( - f"Shutdown requested while already shutting down. Ignoring. (signal: {signum})" - ) - self.shutdown_lock.release() - return - self.shutting_down = True - try: - if self.current_message is not None: - self._terminate_child() - finally: - if self.app._server is not None: - self.app._server.stop(None) - self.shutdown_lock.release() - - def run(self): - appname = "terravibes-worker" - configure_logging( - default_level=self.loglevel, - appname=appname, - logdir=self.logdir, - max_log_file_bytes=self.max_log_file_bytes, - log_backup_count=self.log_backup_count, - ) - if self.otel_service_name: - setup_telemetry(appname, self.otel_service_name) - self.start_service() - - @dapr_ready - def start_service(self): - self.logger.info(f"Starting worker listening on port {self.port}") - while not self.shutting_down: - # For some reason, the FastAPI lifecycle shutdown action is - # executing without us intending for it to run. We add this loop - # here to bring the server up if we haven't explicitly initiated the - # shutdown routine. - self.app.run(self.port) - time.sleep(1) - - @add_trace - def run_op_from_message(self, message: WorkMessage, timeout_s: float): - try: - self.current_message = message - content = cast(CacheInfoExecuteRequestContent, message.content) - out = self.run_op_with_retry(content, message.run_id, timeout_s) - asyncio.run(self.messenger.send_success_reply(message, out)) - except ShuttingDownException: - # We are shutting down. Don't send a reply. Another worker will pick - # this up. - raise - except Exception as e: - _, _, tb = sys.exc_info() - asyncio.run(self.messenger.send_failure_reply(message.id, e, traceback.format_tb(tb))) - raise - finally: - self.current_message = None - - def is_workflow_complete(self, message: WorkMessage) -> bool: - try: - run = asyncio.run(self.statestore.retrieve(str(message.run_id))) - except KeyError: - self.logger.warn( - f"Run {message.run_id} not found in statestore. Assuming it's not complete." - ) - return False - if not isinstance(run, dict): - run = json.loads(run) - runconfig = RunConfig(**run) - return RunStatus.finished(runconfig.details.status) - - def fetch_work(self, channel: str, event: v1.Event) -> TopicEventResponse: - @add_trace - def success_callback(message: WorkMessage) -> TopicEventResponse: - add_span_attributes({"run_id": str(message.run_id)}) - if not message.is_valid_for_channel(channel): - self.logger.warning( - f"Received invalid message {message} for channel {channel}. Dropping it." - ) - return TopicEventResponse("drop") - if self.is_workflow_complete(message): - self.logger.warning( - f"Rejecting event with id {event.id} for completed/failed/cancelled " - f"workflow {message.run_id}." - ) - return TopicEventResponse("drop") - - if self.shutting_down: - self.logger.info(f"Shutdown in progress. Rejecting event {event.id}") - return TopicEventResponse("retry") - - if not self.work_lock.acquire(blocking=False): - self.logger.info(f"Worker busy. Rejecting new work event {event.id}") - return TopicEventResponse("retry") - try: - asyncio.run(self.messenger.send_ack_reply(message)) - self.run_op_from_message(message, MAX_OP_EXECUTION_TIME_S) - return TopicEventResponse("success") - except ShuttingDownException: - return TopicEventResponse("retry") - except Exception: - self.logger.exception(f"Failed to run op for event {event.id}") - raise - finally: - self.work_lock.release() - - @add_trace - def failure_callback(event: v1.Event, e: Exception, tb: List[str]) -> TopicEventResponse: - asyncio.run(self.messenger.send_failure_reply(event.id, e, tb)) - return TopicEventResponse("drop") - - update_telemetry_context(extract_message_header_from_event(event).current_trace_parent) - return accept_or_fail_event(event, success_callback, failure_callback) # type: ignore - - def get_future_result( - self, child: ProcessFuture, monitoring_period_s: int, timeout_s: float - ) -> Any: - start_time = time.time() - while time.time() - start_time < timeout_s: - try: - ret = child.result(monitoring_period_s) - return ret - except concurrent.futures.TimeoutError: - assert self.current_message is not None, ( - "There's a correctness issue in the worker code. " - "`current_message` should not be `None`." - ) - if self.is_workflow_complete(self.current_message): - self.logger.info( - f"Workflow {self.current_message.run_id} is complete. " - "Terminating child process." - ) - child.cancel() - raise RuntimeError( - "Workflow was completed/failed/cancelled while running op. " - "Terminating child process." - ) - if self.shutting_down: - self.logger.info("Shutdown process initiated. Terminating child process.") - child.cancel() - raise ShuttingDownException() - continue - except concurrent.futures.CancelledError: - if self.shutting_down: - raise ShuttingDownException() - self.logger.warn( - f"Child process was cancelled while running op {self.current_message}. " - "But we're not shutting down. This is unexpected." - ) - raise - except Exception as e: - self.logger.exception(f"Child process failed with exception {e}") - return traceback.TracebackException.from_exception(e) - raise TimeoutError(f"Op execution took longer than the allowed {timeout_s} seconds.") - - @add_trace - def try_run_op( - self, spec: OperationSpec, content: CacheInfoExecuteRequestContent, inner_timeout: float - ) -> Union[OpIOType, traceback.TracebackException]: - trace.get_current_span().set_attribute("op_name", str(spec.name)) - self.current_child = cast( - ProcessFuture, - run_op(self.factory_spec, spec, content.input, content.cache_info), # type: ignore - ) - ret = self.get_future_result( - self.current_child, self.child_monitoring_period_s, inner_timeout - ) - - return ret - - @add_trace - def run_op_with_retry( - self, content: CacheInfoExecuteRequestContent, run_id: UUID, timeout_s: float - ) -> OpIOType: - spec = cast(OperationSpec, content.operation_spec) - ret: Union[traceback.TracebackException, OpIOType] = traceback.TracebackException( - RuntimeError, RuntimeError(f"Couldn't run op {spec} at all (run id: {run_id})"), None - ) - self.logger.info( - f"Will try to execute op {spec} with input {get_input_ids(content.input)} " - f"for at most {self.max_tries} tries in child process." - ) - final_time = time.time() + timeout_s - for i in range(self.max_tries): - inner_timeout = final_time - time.time() - if self.shutting_down: - self.logger.info( - "Stopping execution of op because the shutdown process has been initiated." - ) - raise ShuttingDownException() - try: - ret = self.try_run_op(spec, content, inner_timeout) - if not isinstance(ret, traceback.TracebackException): - self.logger.debug(f"Op {spec} ran successfully on try {i+1} (run id: {run_id})") - break - self.logger.error( - f"Failed to run op {spec} with input {get_input_ids(content.input)} " - f"in subprocess. (try {i+1}/{self.max_tries}) {''.join(ret.format())}" - ) - except ProcessExpired: - self.logger.exception(f"pebble child process failed on try {i+1}/{self.max_tries}") - except TimeoutError as e: - msg = ( - f"Op execution timed out on try {i+1}/{self.max_tries}. " - f"Total time allowed: {timeout_s} seconds. " - f"Last try was allowed to run for {inner_timeout} seconds." - ) - self.logger.exception(msg) - raise RuntimeError(msg) from e - self.current_child = None - if isinstance(ret, traceback.TracebackException): - raise RuntimeError("".join(ret.format())) - return ret - - -WorkerConfig = builds( - Worker, - port=settings.GRPC_APP_PORT, - pubsubname=CONTROL_STATUS_PUBSUB, - control_topic=MISSING, - status_topic=STATUS_PUBSUB_TOPIC, - max_tries=5, - termination_grace_period_s=TERMINATION_GRACE_PERIOD_S, - factory_spec=OperationFactoryConfig, - zen_partial=False, - hydra_recursive=False, - logdir=None, - max_log_file_bytes=MAX_LOG_FILE_BYTES, - log_backup_count=LOG_BACKUP_COUNT, - loglevel=None, - otel_service_name="", -) diff --git a/src/vibe_common/setup.py b/src/vibe_common/setup.py deleted file mode 100644 index 185a55fb..00000000 --- a/src/vibe_common/setup.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from setuptools import find_packages, setup - -setup( - name="vibe-common", - version="0.0.1", - author="Microsoft", - author_email="terravibes@microsoft.com", - description="TerraVibes Geospatial Platform Package - vibe common package.", - license="Proprietary", - keywords="terravibes geospatial", - packages=find_packages(exclude=["tests*"]), - python_requires="~=3.8", - install_requires=[ - "aiohttp~=3.9.0", - "aiohttp-retry~=2.8.3", - "azure-keyvault>=4.1.0", - "jsonschema~=4.6", - "requests~=2.32.0", - "cloudevents~=1.2", - "grpcio~=1.53.0", - "dapr~=1.13.0", - "fastapi_utils~=0.2.1", - "pyyaml~=6.0.1", - "vibe_core", - "debugpy", - "azure-identity~=1.14.0", - "azure-storage-blob>=12.5.0", - "uvicorn~=0.13.4", - "uvloop~=0.17.0", - "fastapi~=0.109.1", - "httptools~=0.6.0", - "gunicorn~=21.2.0", - "opentelemetry-api~=1.20.0", - "opentelemetry-sdk~=1.20.0", - "opentelemetry-exporter-otlp~=1.20.0", - "opentelemetry-instrumentation~=0.41b0", - ], -) diff --git a/src/vibe_common/tests/conftest.py b/src/vibe_common/tests/conftest.py deleted file mode 100644 index 440204e3..00000000 --- a/src/vibe_common/tests/conftest.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from vibe_dev.testing import anyio_backend -from vibe_dev.testing.fake_workflows_fixtures import fake_ops_dir, fake_workflows_dir -from vibe_dev.testing.workflow_fixtures import ( - SimpleStrData, - SimpleStrDataType, - simple_op_spec, - workflow_execution_message, -) - -__all__ = [ - "SimpleStrDataType", - "SimpleStrData", - "workflow_execution_message", - "simple_op_spec", - "fake_ops_dir", - "fake_workflows_dir", - "anyio_backend", -] diff --git a/src/vibe_common/tests/test_input_handlers.py b/src/vibe_common/tests/test_input_handlers.py deleted file mode 100644 index b8f57d05..00000000 --- a/src/vibe_common/tests/test_input_handlers.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime, timedelta, timezone -from typing import Any, Dict -from unittest.mock import Mock, patch - -import pytest - -from vibe_common.input_handlers import gen_stac_item_from_bounds, handle_non_collection - - -def test_with_feature_geojson(): - start_date = datetime.now(timezone.utc) - end_date = start_date - timedelta(days=6 * 30) - - test_feature: Dict[str, Any] = { - "type": "Feature", - "properties": {"Name": "some_name"}, - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-118.675944, 46.916908], - [-118.675944, 46.79631], - [-118.841574, 46.79631], - [-118.841574, 46.916908], - [-118.675944, 46.916908], - ] - ], - }, - } - - item = handle_non_collection(test_feature, start_date, end_date) - - assert item["properties"]["start_datetime"] == start_date.isoformat() - assert item["properties"]["end_datetime"] == end_date.isoformat() - assert item["geometry"] == test_feature["geometry"] - - -def test_with_geometry_geojson(): - start_date = datetime.now(timezone.utc) - end_date = start_date - timedelta(days=6 * 30) - - test_geometry: Dict[str, Any] = { - "type": "Polygon", - "name": "some_name", - "coordinates": [ - [ - [-85.34557342529297, 37.441882193395124], - [-85.18661499023436, 37.441882193395124], - [-85.18661499023436, 37.53804390907164], - [-85.34557342529297, 37.53804390907164], - [-85.34557342529297, 37.441882193395124], - ] - ], - } - - item = handle_non_collection(test_geometry, start_date, end_date) - - assert item["properties"]["start_datetime"] == start_date.isoformat() - assert item["properties"]["end_datetime"] == end_date.isoformat() - assert item["geometry"] == test_geometry - - -@patch("vibe_common.input_handlers.handle_non_collection") -def test_with_feature_collection_geojson(mock_handle: Mock): - start_date = datetime.now(timezone.utc) - end_date = start_date - timedelta(days=6 * 30) - - test_feature = { - "type": "Feature", - "properties": {"Name": "some_name"}, - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-118.675944, 46.916908], - [-118.675944, 46.79631], - [-118.841574, 46.79631], - [-118.841574, 46.916908], - [-118.675944, 46.916908], - ] - ], - }, - } - - test_collection: Dict[str, Any] = { - "type": "FeatureCollection", - "name": "some_name", - "features": [test_feature], - } - - gen_stac_item_from_bounds(test_collection, start_date, end_date) - - mock_handle.assert_called_once_with(test_feature, start_date, end_date) - - test_collection["features"].append(test_feature) - with pytest.raises(ValueError): - gen_stac_item_from_bounds(test_collection, start_date, end_date) diff --git a/src/vibe_common/tests/test_messaging.py b/src/vibe_common/tests/test_messaging.py deleted file mode 100644 index 6445c0a9..00000000 --- a/src/vibe_common/tests/test_messaging.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import sys -import traceback as tb -from typing import Callable, List, Type, cast -from unittest.mock import patch - -import pytest -from dapr.conf import settings - -from vibe_common.constants import PUBSUB_URL_TEMPLATE -from vibe_common.messaging import ( - ErrorContent, - ExecuteReplyContent, - ExecuteRequestContent, - MessageHeader, - MessageType, - OperationSpec, - OpStatusType, - WorkMessage, - build_work_message, - decode, - encode, - operation_spec_serializer, - run_id_from_traceparent, - send, -) -from vibe_common.schemas import CacheInfo -from vibe_core.data import TypeDictVibe -from vibe_dev.testing.workflow_fixtures import SimpleStrDataType - - -@pytest.fixture -def message_header(traceparent: str) -> MessageHeader: - header = MessageHeader( - type=MessageType.execute_request, run_id=run_id_from_traceparent(traceparent) - ) - return header - - -@pytest.fixture -def traceparent(workflow_execution_message: WorkMessage) -> str: - return workflow_execution_message.id - - -@pytest.fixture -def execute_request_content( - simple_op_spec: OperationSpec, SimpleStrData: Type[SimpleStrDataType] -) -> ExecuteRequestContent: - data = SimpleStrData("some fake data") - content = ExecuteRequestContent( - input=TypeDictVibe({"user_input": {"data": data}}), # type: ignore - operation_spec=simple_op_spec, - ) - return content - - -def test_workflow_message_construction(workflow_execution_message: ExecuteRequestContent): - assert workflow_execution_message - - -def test_execute_request_message_construction( - message_header: MessageHeader, traceparent: str, execute_request_content: ExecuteRequestContent -): - build_work_message( - header=message_header, - content=execute_request_content, - traceparent=traceparent, # type: ignore - ) - - -def test_execute_reply_message_construction(message_header: MessageHeader, traceparent: str): - content = ExecuteReplyContent( - cache_info=CacheInfo("test_op", "1.0", {}, {}), status=OpStatusType.done, output={} - ) - message_header.type = MessageType.execute_reply - build_work_message(header=message_header, content=content, traceparent=traceparent) - - -def test_error_message_construction(message_header: MessageHeader, traceparent: str): - try: - 1 / 0 # type: ignore - except ZeroDivisionError: - e, value, traceback = sys.exc_info() - content = ErrorContent( - status=OpStatusType.failed, - ename=e.__name__, # type: ignore - evalue=str(e), # type: ignore - traceback=tb.format_tb(traceback), # type: ignore - ) - message_header.type = MessageType.error - build_work_message(header=message_header, content=content, traceparent=traceparent) - - -@patch("requests.post") -def test_send_work_message(post: Callable[..., None], workflow_execution_message: WorkMessage): - send(workflow_execution_message, "test", "fake", "fake") - post.assert_called_with( - PUBSUB_URL_TEMPLATE.format( - cast(str, settings.DAPR_RUNTIME_HOST), - cast(str, settings.DAPR_HTTP_PORT), - "fake", - "fake", - ), - json=workflow_execution_message.to_cloud_event("test"), - headers={ - "Content-Type": "application/cloudevents+json", - "traceparent": workflow_execution_message.id, - }, - ) - - -def test_operation_spec_serializer(execute_request_content: ExecuteRequestContent): - spec = execute_request_content.operation_spec - assert spec is not None - out = operation_spec_serializer(spec) - type_mapper = { - "plain_input": "SimpleStrDataType", - "list_input": "List[SimpleStrDataType]", - "terravibes_input": "DataVibe", - "terravibes_list": "List[DataVibe]", - } - for k, v in type_mapper.items(): - assert out["inputs_spec"][k] == v - spec.inputs_spec["nested_list_input"] = List[List[SimpleStrDataType]] # type: ignore - with pytest.raises(ValueError): - operation_spec_serializer(spec) - - -def test_encoder_decoder(): - messages = [ - "1, 2, 3, 4", - "🤩😱🤷‍🤔🍎😜♾️🍔🤭😒😵‍", - json.dumps( - { - "+♾️": float("+inf"), - "-♾️": float("-inf"), - "🦇👨": [float("nan") for _ in range(20)], - } - ), - ] - - for message in messages: - assert message == decode(encode(message)) - - -def test_refuse_to_encode_message_with_invalid_values(workflow_execution_message: WorkMessage): - invalid_values = (float("nan"), float("inf"), float("-inf")) - - for value in invalid_values: - content = cast(ExecuteRequestContent, workflow_execution_message.content) - content.input["plain_input"]["data"] = [{"a": value}] # type: ignore - with pytest.raises(ValueError): - workflow_execution_message.to_cloud_event("test") diff --git a/src/vibe_common/tests/test_statestore.py b/src/vibe_common/tests/test_statestore.py deleted file mode 100644 index ef004cee..00000000 --- a/src/vibe_common/tests/test_statestore.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any - -import pytest - -from vibe_common.statestore import StateStore - - -class MockResponse: - def __init__(self, content: Any): - self._content = content - - async def json(self, loads: Any, **kwargs: Any) -> Any: - return loads(self._content, **kwargs) - - -@pytest.mark.anyio -async def test_store_fails_with_invalid_input(): - store = StateStore() - for value in [float(x) for x in "inf -inf nan".split()]: - with pytest.raises(ValueError): - await store.store("key", value) diff --git a/src/vibe_common/tests/test_vibe_dapr_client.py b/src/vibe_common/tests/test_vibe_dapr_client.py deleted file mode 100644 index b84866b8..00000000 --- a/src/vibe_common/tests/test_vibe_dapr_client.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from typing import Any - -import pytest - -from vibe_common.vibe_dapr_client import VibeDaprClient -from vibe_core.datamodel import Message, SpatioTemporalJson - - -class MockResponse: - def __init__(self, content: Any): - self._content = content - - async def json(self, loads: Any, **kwargs: Any) -> Any: - return loads(self._content, **kwargs) - - -def test_state_store_dumps_dataclass(): - client = VibeDaprClient() - assert client._dumps(Message(message="hi", id=None, location=None)) - - -def test_state_store_fails_to_dump_pydantic_model_with_invalid_values(): - client = VibeDaprClient() - with pytest.raises(ValueError): - client._dumps( - SpatioTemporalJson( - start_date=datetime.now(), - end_date=datetime.now(), - geojson={"location": float("nan")}, - ) - ) - - -def test_state_store_float_serialized_as_str(): - lat = -52.6324171000924 - lon = -7.241144827812494 - test_input = SpatioTemporalJson( - start_date=datetime.now(), - end_date=datetime.now(), - geojson={"coordinates": [lat, lon]}, - ) - client = VibeDaprClient() - test_input_json = client.obj_json(test_input) - assert test_input_json["geojson"]["coordinates"][0] == repr(lat) - assert test_input_json["geojson"]["coordinates"][1] == repr(lon) - - -@pytest.mark.anyio -async def test_state_store_response_deserialize_floats(): - lat = -52.6324171000924 - lon = -7.241144827812494 - - test_response = MockResponse( - str.encode('{{"geojson": {{"coordinates": ["{0}", "{1}"]}}}}'.format(lat, lon)) - ) - - client = VibeDaprClient() - test_response_json = await client.response_json(test_response) # type: ignore - assert isinstance(test_response_json["geojson"]["coordinates"][0], float) - assert isinstance(test_response_json["geojson"]["coordinates"][1], float) - assert test_response_json["geojson"]["coordinates"][0] == lat - assert test_response_json["geojson"]["coordinates"][1] == lon diff --git a/src/vibe_common/vibe_common/__init__.py b/src/vibe_common/vibe_common/__init__.py deleted file mode 100644 index b7c52582..00000000 --- a/src/vibe_common/vibe_common/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_common/vibe_common/constants.py b/src/vibe_common/vibe_common/constants.py deleted file mode 100644 index 3ea3828a..00000000 --- a/src/vibe_common/vibe_common/constants.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from typing import Dict, Final, List, Tuple, cast - -from dapr.conf import settings - -from vibe_core.cli.local import DATA_SUFFIX - -HeaderDict = Dict[str, str] -WorkReply = Tuple[str, int, HeaderDict] - -DEFAULT_STORE_PATH: Final[str] = os.environ.get( - "DEFAULT_STORE_PATH", os.path.join("/mnt", DATA_SUFFIX) -) -DEFAULT_CATALOG_PATH: Final[str] = os.environ.get( - "DEFAULT_CATALOG_PATH", os.path.join(DEFAULT_STORE_PATH, "stac") -) -DEFAULT_ASSET_PATH: Final[str] = os.environ.get( - "DEFAULT_ASSET_PATH", os.path.join(DEFAULT_STORE_PATH, "assets") -) -DEFAULT_BLOB_ASSET_MANAGER_CONTAINER: Final[str] = "assets" -DEFAULT_COSMOS_DATABASE_NAME: Final[str] = "prod-catalog" -DEFAULT_STAC_COSMOS_CONTAINER: Final[str] = "prod-stac" -DEFAULT_COSMOS_KEY_VAULT_KEY_NAME: Final[str] = "stac-cosmos-write-key" -DEFAULT_COSMOS_URI: Final[str] = "" -DEFAULT_SECRET_STORE_NAME: Final[str] = "azurekeyvault" - -CONTROL_STATUS_PUBSUB: Final[str] = "control-pubsub" -CONTROL_PUBSUB_TOPIC: Final[str] = "commands" -CACHE_PUBSUB_TOPIC: Final[str] = "cache-commands" -STATUS_PUBSUB_TOPIC: Final[str] = "updates" - -TRACEPARENT_VERSION: Final[str] = "00" -TRACEPARENT_FLAGS: Final[int] = 1 - -TRACE_FORMAT: Final[str] = "032x" -SPAN_FORMAT: Final[str] = "016x" -FLAGS_FORMAT: Final[str] = "02x" - -TRACEPARENT_STRING = ( - f"{TRACEPARENT_VERSION}-{{trace_id:{TRACE_FORMAT}}}" - f"-{{parent_id:{SPAN_FORMAT}}}-{{trace_flags:{FLAGS_FORMAT}}}" -) -TRACEPARENT_HEADER_KEY: Final[str] = "Traceparent" - -WORKFLOW_ARTIFACTS_PUBSUB_TOPIC: Final[str] = "workflow-artifacts-commands" -WORKFLOW_REQUEST_PUBSUB_TOPIC: Final[str] = "workflow_execution_request" -STATE_URL_PATH = "/v1.0/state" -STATE_URL_TEMPLATE: Final[str] = ( - f"http://{settings.DAPR_RUNTIME_HOST}:{settings.DAPR_HTTP_PORT}{STATE_URL_PATH}" "/{}/{}" -) -PUBSUB_URL_TEMPLATE: Final[str] = "http://{}:{}/v1.0/publish/{}/{}" -PUBSUB_WORKFLOW_URL: Final[str] = PUBSUB_URL_TEMPLATE.format( - cast(str, settings.DAPR_RUNTIME_HOST), - cast(str, settings.DAPR_HTTP_PORT), - CONTROL_STATUS_PUBSUB, - WORKFLOW_REQUEST_PUBSUB_TOPIC, -) -SERVICE_INVOCACATION_URL_PATH = "/v1.0/invoke" -DATA_OPS_INVOKE_URL_TEMPLATE: Final[str] = ( - f"http://{settings.DAPR_RUNTIME_HOST}:{settings.DAPR_HTTP_PORT}" - f"{SERVICE_INVOCACATION_URL_PATH}/terravibes-data-ops/method/" - "{}/{}" -) - -RUNS_KEY: Final[str] = "runs" -ALLOWED_ORIGINS: Final[List[str]] = [ - o - for o in os.getenv( - "ALLOWED_ORIGINS", - "http://localhost:3000," - "http://localhost," - "http://127.0.0.1:8080," - "http://127.0.0.1:3000,", - ).split(",") - if o -] - -MAX_PARALLEL_REQUESTS: Final[int] = 8 - -HERE = os.path.dirname(os.path.abspath(__file__)) -DEFAULT_OPS_DIR = os.path.abspath(os.path.join(HERE, "..", "..", "..", "ops")) -if not os.path.exists(DEFAULT_OPS_DIR): - DEFAULT_OPS_DIR = os.path.join("/", "app", "ops") diff --git a/src/vibe_common/vibe_common/dapr.py b/src/vibe_common/vibe_common/dapr.py deleted file mode 100644 index 31ecbf62..00000000 --- a/src/vibe_common/vibe_common/dapr.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import logging -from functools import partial, wraps -from typing import Any, Callable, overload - -from aiohttp import ClientResponse -from dapr.clients import DaprClient -from dapr.conf import settings - -from vibe_common.constants import SERVICE_INVOCACATION_URL_PATH, STATE_URL_PATH - -MAX_TIMEOUT_TRIES = 3 -DAPR_WAIT_TIME_S = 90 - - -def dapr_ready_decorator( - func: Callable[..., Any], dapr_wait_time_s: int = DAPR_WAIT_TIME_S -) -> Callable[..., Any]: - if asyncio.iscoroutinefunction(func): - - @wraps(func) - async def wrapper(*args: Any, **kwargs: Any): # type: ignore - with DaprClient() as dapr_client: - logger = logging.getLogger(f"{__name__}.wait_dapr") - logger.info(f"Waiting {dapr_wait_time_s} seconds for dapr to be ready") - try: - dapr_client.wait(dapr_wait_time_s) - except Exception: - logger.exception("dapr is not ready") - raise - logger.info("dapr is ready.") - return await func(*args, **kwargs) - else: - - @wraps(func) - def wrapper(*args: Any, **kwargs: Any): - with DaprClient() as dapr_client: - logger = logging.getLogger(f"{__name__}.wait_dapr") - logger.info(f"Waiting {dapr_wait_time_s} seconds for dapr to be ready") - try: - dapr_client.wait(dapr_wait_time_s) - except Exception: - logger.exception("dapr is not ready") - raise - logger.info("dapr is ready.") - return func(*args, **kwargs) - - return wrapper - - -@overload -def dapr_ready(func: None = None, *, dapr_wait_time_s: int = DAPR_WAIT_TIME_S) -> Any: ... - - -@overload -def dapr_ready(func: Callable[..., Any]) -> Callable[..., Any]: ... - - -def dapr_ready(func: Any = None, *, dapr_wait_time_s: int = DAPR_WAIT_TIME_S) -> Any: - if func is None: - return partial(dapr_ready_decorator, dapr_wait_time_s=dapr_wait_time_s) - else: - return dapr_ready_decorator(func, dapr_wait_time_s=dapr_wait_time_s) - - -def process_dapr_state_response(response: ClientResponse) -> ClientResponse: - if not response.ok: - if response.status == 400: - raise RuntimeError("State store is not configured") - elif response.status == 404: - raise KeyError(f"Key specified in {response.url} not found") - if response.request_info.method == "GET" and response.status == 204: - # https://docs.dapr.io/reference/api/state_api/#http-response-1 - raise KeyError(f"Key specified in {response.url} not found") - return response - - -async def process_dapr_service_invocation_response( - response: ClientResponse, -) -> ClientResponse: - if not response.ok: - if response.status == 400: - raise RuntimeError("Method name not given for service invocation.") - elif response.status == 403: - raise RuntimeError(f"Invocation forbidden by access control for {response.url}") - elif response.status == 500: - content = await response.read() - raise RuntimeError(f"Response 500 for {response.url} -- response body: {content}") - return response - - -async def process_dapr_response(response: ClientResponse) -> ClientResponse: - if response.url.host != settings.DAPR_RUNTIME_HOST: - logging.warning("This url is not a response from Dapr: {response.url.host}") - return response - - if response.url.path.startswith(STATE_URL_PATH): - return process_dapr_state_response(response) - elif response.url.path.startswith(SERVICE_INVOCACATION_URL_PATH): - return await process_dapr_service_invocation_response(response) - else: - logging.warning( - "We only handle Dapr responses for state management and service invocation. " - "Response URL = {response.url}" - ) - return response - - -async def handle_aiohttp_timeout(response: ClientResponse) -> ClientResponse: - logger = logging.getLogger(f"{__name__}.handle_aiohttp_timeout") - tries: int = 0 - while True: - try: - await response.read() - return await process_dapr_response(response) - except asyncio.TimeoutError: - tries += 1 - logger.warning( - f"Timeout interacting with Dapr via HTTP, " - f"retrying ({tries}/{MAX_TIMEOUT_TRIES})" - ) - if tries >= MAX_TIMEOUT_TRIES: - raise diff --git a/src/vibe_common/vibe_common/dropdapr.py b/src/vibe_common/vibe_common/dropdapr.py deleted file mode 100644 index 09d749ed..00000000 --- a/src/vibe_common/vibe_common/dropdapr.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -""" -dropdapr - A drop-in replacement for dapr-ext-grpc subscribe using FastAPI. -""" - -from typing import Any, Awaitable, Callable, Coroutine, Dict, List, Optional, TypedDict, Union - -import uvicorn -from cloudevents.sdk.event import v1 -from fastapi import FastAPI # type: ignore -from pydantic import BaseConfig - -BaseConfig.arbitrary_types_allowed = True - - -class TopicEventResponse(Dict[str, str]): - def __getattr__(self, attr: str): - if attr == "status": - return self["status"] - - def __init__(self, *args: Any, **kwargs: Dict[Any, Any]): - if len(args) == 1 and isinstance(args[0], str): - super().__init__({"status": args[0].upper()}) - else: - super().__init__(*args, **kwargs) - - -class TopicEventResponseStatus: - success: TopicEventResponse = TopicEventResponse({"status": "SUCCESS"}) - retry: TopicEventResponse = TopicEventResponse({"status": "RETRY"}) - drop: TopicEventResponse = TopicEventResponse({"status": "DROP"}) - - -class DaprSubscription(TypedDict): - pubsubname: str - topic: str - route: str - metadata: Optional[Dict[str, str]] - - -class App: - def __init__(self): - self.app = FastAPI() - self.subscriptions: List[DaprSubscription] = [] - self.server: Optional[uvicorn.Server] = None - - self.app.add_api_route( - "/", - self.index, # type: ignore - methods=["GET"], - response_model=Dict, - ) - - self.app.add_api_route( - "/dapr/subscribe", - lambda: self.subscriptions, # type: ignore - methods=["GET"], - response_model=Any, - ) - - def index(self): - return { - "detail": "This server only works with dapr. Please don't make requests to it.", - "subscriptions": self.subscriptions, - } - - def add_subscription( - self, - handler: Callable[..., Union[TopicEventResponse, Coroutine[Any, Any, Any]]], - pubsub: str, - topic: str, - metadata: Optional[Dict[str, str]] = {}, - ): - event_handler_route = f"/events/{pubsub}/{topic}" - self.app.add_api_route( - event_handler_route, - handler, # type: ignore - methods=["POST"], - response_model=Any, - ) - - self.subscriptions.append( - { - "pubsubname": pubsub, - "topic": topic, - "route": event_handler_route, - "metadata": metadata, - } - ) - - def subscribe_async(self, pubsub: str, topic: str, metadata: Optional[Dict[str, str]] = {}): - def decorator(func: Callable[[v1.Event], Awaitable[Any]]): - async def event_wrapper(request: Dict[str, Any]): - event = v1.Event() - event.SetEventType(request["type"]) - event.SetEventID(request["id"]) - event.SetSource(request["source"]) - try: - event.SetData(request["data"]) - except KeyError: - event.SetData(request["data_base64"]) - event.SetContentType(request["datacontenttype"]) - try: - return await func(event) - except RuntimeError: - return TopicEventResponseStatus.retry - except Exception: - return TopicEventResponseStatus.drop - - self.add_subscription(event_wrapper, pubsub, topic, metadata) - - return decorator - - def subscribe(self, pubsub: str, topic: str, metadata: Optional[Dict[str, str]] = {}): - def decorator(func: Callable[[v1.Event], Any]): - def event_wrapper(request: Dict[str, Any]): - event = v1.Event() - event.SetEventType(request["type"]) - event.SetEventID(request["id"]) - event.SetSource(request["source"]) - try: - event.SetData(request["data"]) - except KeyError: - event.SetData(request["data_base64"]) - event.SetContentType(request["datacontenttype"]) - try: - return func(event) - except RuntimeError: - return TopicEventResponseStatus.retry - except Exception: - return TopicEventResponseStatus.drop - - self.add_subscription(event_wrapper, pubsub, topic, metadata) - - return decorator - - def method(self, name: str): - def decorator(func): # type: ignore - route = f"/{name}" - self.app.add_api_route( - route, - func, - methods=["GET", "POST"], - response_model=Any, - ) - - return decorator - - def startup(self): - def decorator(func: Callable[[], None]): - self.app.add_event_handler("startup", func) - - return decorator - - def shutdown(self): - def decorator(func): # type: ignore - self.app.add_event_handler("shutdown", func) - - return decorator - - def health(self, endpoint: str = "/health"): - def decorator(func): # type: ignore - self.app.add_api_route( - endpoint, - func, - methods=["GET"], - response_model=Any, - ) - - return decorator - - def run( - self, - port: int, - limit_concurrency: Optional[int] = None, - ): - config = uvicorn.Config( - self.app, - host="127.0.0.1", - port=port, - log_config=None, - limit_concurrency=limit_concurrency, - ) - self.server = uvicorn.Server(config) - self.server.run() # type: ignore - - async def run_async( - self, - port: int, - limit_concurrency: Optional[int] = None, - workers: int = 1, - ): - config = uvicorn.Config( - self.app, - host="127.0.0.1", - port=port, - log_config=None, - limit_concurrency=limit_concurrency, - loop="uvloop", - workers=workers, - ) - self.server = uvicorn.Server(config) - await self.server.serve() diff --git a/src/vibe_common/vibe_common/input_handlers.py b/src/vibe_common/vibe_common/input_handlers.py deleted file mode 100644 index 710b29cd..00000000 --- a/src/vibe_common/vibe_common/input_handlers.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from typing import Any, Dict - -from vibe_core.data import DataVibe, StacConverter, gen_hash_id - -# Checking geojson dict and extracting geometry -VALID_GEOMETRIES = ["Polygon", "MultiPolygon"] -INVALID_GEOMETRIES = [ - "Point", - "LineString", - "MultiPoint", - "MultiLineString", - "GeometryCollection", -] - - -def handle_non_collection( - geojson_dict: Dict[str, Any], start_date: datetime, end_date: datetime -) -> Dict[str, Any]: - geotype = geojson_dict["type"] - - if geotype == "Feature": - geometry = geojson_dict["geometry"] - elif geotype in VALID_GEOMETRIES: - geometry = geojson_dict - elif geotype == "FeatureCollection": - raise ValueError("Feature collection not supported here.") - elif geotype in INVALID_GEOMETRIES: - raise ValueError( - f"Invalid geometry {geotype}. Input geometry must be Polygon or MultiPolygon." - ) - else: - raise ValueError(f"Invalid geojson type {geotype}.") - - converter = StacConverter() - time_range = (start_date, end_date) - data = DataVibe( - id=gen_hash_id("input", geometry, time_range), - time_range=time_range, - geometry=geometry, - assets=[], - ) - stac_item = converter.to_stac_item(data) - - return stac_item.to_dict(include_self_link=False) - - -def gen_stac_item_from_bounds( - geojson_dict: Dict[str, Any], start_date: datetime, end_date: datetime -) -> Dict[str, Any]: - geotype = geojson_dict["type"] - - if geotype == "FeatureCollection": - if len(geoms := geojson_dict["features"]) > 1: - raise ValueError( - f"Only one feature is currently supported as input to a workflow, found " - f"{len(geoms)} features in feature collection" - ) - return handle_non_collection(geoms[0], start_date, end_date) - - return handle_non_collection(geojson_dict, start_date, end_date) diff --git a/src/vibe_common/vibe_common/messaging.py b/src/vibe_common/vibe_common/messaging.py deleted file mode 100644 index e7025b5f..00000000 --- a/src/vibe_common/vibe_common/messaging.py +++ /dev/null @@ -1,651 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import logging -import sys -import traceback -from dataclasses import asdict -from datetime import datetime -from enum import auto -from random import getrandbits -from typing import ( - Any, - Awaitable, - Callable, - Dict, - Final, - List, - Literal, - Optional, - Set, - Type, - Union, - cast, - get_args, - get_type_hints, - overload, -) -from uuid import UUID - -import aiohttp -import requests -from cloudevents.sdk.event import v1 -from dapr.clients.grpc._response import TopicEventResponse -from dapr.conf import settings -from fastapi_utils.enums import StrEnum -from pydantic import BaseModel as PyBaseModel -from pydantic import Field, ValidationError, validator -from pystac.item import Item - -import vibe_common.telemetry as telemetry -from vibe_core.data.core_types import OpIOType -from vibe_core.data.utils import get_base_type, is_container_type, serialize_stac -from vibe_core.datamodel import decode, encode -from vibe_core.utils import get_input_ids - -from .constants import ( - CACHE_PUBSUB_TOPIC, - CONTROL_PUBSUB_TOPIC, - PUBSUB_URL_TEMPLATE, - STATUS_PUBSUB_TOPIC, - TRACEPARENT_FLAGS, - TRACEPARENT_STRING, - WORKFLOW_REQUEST_PUBSUB_TOPIC, -) -from .dropdapr import TopicEventResponse as HttpTopicEventResponse -from .schemas import CacheInfo, OperationSpec - -CLOUDEVENTS_JSON: Final[str] = "application/cloudevents+json" -OCTET_STREAM: Final[str] = "application/octet-stream" -MAXIMUM_MESSAGE_SIZE: Final[int] = 256 * 1024 - -MessageContent = Union[ - "AckContent", - "CacheInfoExecuteRequestContent", - "ExecuteRequestContent", - "ExecuteReplyContent", - "ErrorContent", - "WorkflowExecutionContent", - "EvictedReplyContent", - "WorkflowCancellationContent", - "WorkflowDeletionContent", -] -ValidVersion = Literal["1.0"] - - -class OpStatusType(StrEnum): - done = auto() - failed = auto() - - -class MessageType(StrEnum): - ack = auto() - cache_info_execute_request = auto() - error = auto() - execute_request = auto() - execute_reply = auto() - evicted_reply = auto() - workflow_execution_request = auto() - workflow_cancellation_request = auto() - workflow_deletion_request = auto() - - -class BaseModel(PyBaseModel): - class Config: - json_encoders = {Item: serialize_stac} - - -class MessageHeader(BaseModel): - type: MessageType - run_id: UUID - id: str = "" - parent_id: str = "" - current_trace_parent: str = "" - version: ValidVersion = "1.0" - created_at: datetime = Field(default_factory=datetime.now) - - @validator("id", always=True) - def set_id(cls, value: str, values: Dict[str, Any]): - return value or gen_traceparent(values["run_id"]) - - -class ExecuteRequestContent(BaseModel): - input: OpIOType - operation_spec: OperationSpec - - def __str__(self): - return ( - f"{self.__class__.__name__}" - f"(operation_spec={self.operation_spec}, " - f"input={get_input_ids(self.input)})" - ) - - -class CacheInfoExecuteRequestContent(ExecuteRequestContent): - cache_info: CacheInfo - - def __str__(self): - return ( - f"{self.__class__.__name__}" - f"(operation_spec={self.operation_spec}, " - f"input={get_input_ids(self.input)}, " - f"cache_info={self.cache_info})" - ) - - -class ExecuteReplyContent(BaseModel): - cache_info: CacheInfo - status: OpStatusType - output: OpIOType - - -class AckContent(BaseModel): - pass - - -class EvictedReplyContent(BaseModel): - pass - - -class ErrorContent(BaseModel): - status: OpStatusType - ename: str - evalue: str - traceback: List[str] - - -class WorkflowExecutionContent(BaseModel): - input: OpIOType - workflow: Dict[str, Any] - parameters: Optional[Dict[str, Any]] - - def __str__(self): - return ( - f"{self.__class__.__name__}(workflow={self.workflow}, parameters={self.parameters}, " - f"input={get_input_ids(self.input)})" - ) - - -class WorkflowCancellationContent(BaseModel): - pass - - -class WorkflowDeletionContent(BaseModel): - pass - - -class BaseMessage(BaseModel): - header: MessageHeader - content: MessageContent - _supported_channels: Set[str] - - class Config: - # VibeType is not JSON serializable, so we need to convert - # it to string, and convert it back when we receive the - # message - json_encoders = {OperationSpec: lambda x: operation_spec_serializer(x)} # type: ignore - - def is_valid_for_channel(self, channel: str): - return channel in self._supported_channels - - @property - def id(self): - return self.header.id - - @property - def parent_id(self): - return self.header.parent_id - - @property - def run_id(self): - return self.header.run_id - - @property - def current_trace_parent(self): - return self.header.current_trace_parent - - def update_current_trace_parent(self): - self.header.current_trace_parent = telemetry.get_current_trace_parent() - - @validator("content") - def validate_content(cls, value: MessageContent, values: Dict[str, MessageHeader]): - type: MessageType = values["header"].type - if not isinstance(value, MESSAGE_TYPE_TO_CONTENT_TYPE[type]): - raise ValueError( - f"Message of type {type} doesn't specify content of correct type " - f"({MESSAGE_TYPE_TO_CONTENT_TYPE[type]})" - ) - - if isinstance(value, ExecuteRequestContent) and value.operation_spec is None: - raise ValueError("Operation execution content requires an operation_spec") - return value - - def to_cloud_event(self, source: str) -> Dict[str, Any]: - """Converts this message to a CloudEvents 1.0 dict representation. - - Params: - source: str - From the spec: The "source" is the context in which the - occurrence happened. We should use the name of the TerraVibes - component that created this message. - - For details, please see the specification at - https://github.com/cloudevents/spec/blob/v1.0/spec.md - """ - - return { - "specversion": "1.0", - "datacontenttype": CLOUDEVENTS_JSON, - "type": f"ai.terravibes.work.{self.header.type}", - "source": source, - "data": encode(self.json(allow_nan=False)), - "time": datetime.now().isoformat(timespec="seconds") + "Z", # RFC3339 time - "subject": f"{self.header.type}-{self.header.id}", - "id": self.id, - "traceparent": self.id, - "traceid": self.id, - } - - -class CacheInfoExecuteRequestMessage(BaseMessage): - _supported_channels: Set[str] = {CONTROL_PUBSUB_TOPIC} - content: ExecuteRequestContent - - -class ExecuteRequestMessage(BaseMessage): - _supported_channels: Set[str] = {CACHE_PUBSUB_TOPIC} - content: ExecuteRequestContent - - -class ExecuteReplyMessage(BaseMessage): - _supported_channels: Set[str] = {STATUS_PUBSUB_TOPIC} - content: ExecuteReplyContent - - -class EvictedReplyMessage(BaseMessage): - _supported_channels: Set[str] = {STATUS_PUBSUB_TOPIC} - content: EvictedReplyContent - - -class ErrorMessage(BaseMessage): - _supported_channels: Set[str] = {STATUS_PUBSUB_TOPIC} - content: ErrorContent - - -class WorkflowDeletionMessage(BaseMessage): - _supported_channels: Set[str] = {WORKFLOW_REQUEST_PUBSUB_TOPIC} - content: WorkflowDeletionContent - - -class WorkflowExecutionMessage(BaseMessage): - _supported_channels: Set[str] = {WORKFLOW_REQUEST_PUBSUB_TOPIC} - content: WorkflowExecutionContent - - -class WorkflowCancellationMessage(BaseMessage): - _supported_channels: Set[str] = {WORKFLOW_REQUEST_PUBSUB_TOPIC} - content: WorkflowCancellationContent - - -class AckMessage(BaseMessage): - _supported_channels: Set[str] = {STATUS_PUBSUB_TOPIC} - content: AckContent - - -WorkMessage = Union[ - AckMessage, - CacheInfoExecuteRequestMessage, - ExecuteRequestMessage, - ExecuteReplyMessage, - EvictedReplyMessage, - ErrorMessage, - WorkflowExecutionMessage, - WorkflowCancellationMessage, - WorkflowDeletionMessage, -] - - -class WorkMessageBuilder: - @staticmethod - def build_execute_request( - run_id: UUID, - traceparent: str, - op_spec: OperationSpec, - input: OpIOType, - ) -> WorkMessage: - header = MessageHeader( - type=MessageType.execute_request, - run_id=run_id, - parent_id=traceparent, - ) - content = ExecuteRequestContent(input=input, operation_spec=op_spec) - return ExecuteRequestMessage(header=header, content=content) - - @staticmethod - def add_cache_info_to_execute_request( - execute_request_message: ExecuteRequestMessage, cache_info: CacheInfo - ) -> WorkMessage: - header = execute_request_message.header - header.type = MessageType.cache_info_execute_request - content = CacheInfoExecuteRequestContent( - input=execute_request_message.content.input, - operation_spec=execute_request_message.content.operation_spec, - cache_info=cache_info, - ) - return CacheInfoExecuteRequestMessage(header=header, content=content) - - @staticmethod - def build_workflow_request( - run_id: UUID, - workflow: Dict[str, Any], - parameters: Optional[Dict[str, Any]], - input: OpIOType, - ) -> WorkMessage: - header = MessageHeader(type=MessageType.workflow_execution_request, run_id=run_id) - content = WorkflowExecutionContent(input=input, workflow=workflow, parameters=parameters) - return WorkflowExecutionMessage(header=header, content=content) - - @staticmethod - def build_workflow_cancellation(run_id: UUID) -> WorkMessage: - header = MessageHeader(type=MessageType.workflow_cancellation_request, run_id=run_id) - content = WorkflowCancellationContent() - return WorkflowCancellationMessage(header=header, content=content) - - @staticmethod - def build_workflow_deletion(run_id: UUID) -> WorkMessage: - header = MessageHeader(type=MessageType.workflow_deletion_request, run_id=run_id) - content = WorkflowDeletionContent() - return WorkflowDeletionMessage(header=header, content=content) - - @staticmethod - def build_execute_reply( - traceparent: str, cache_info: CacheInfo, output: OpIOType - ) -> WorkMessage: - run_id = run_id_from_traceparent(traceparent) - header = MessageHeader(type=MessageType.execute_reply, run_id=run_id, parent_id=traceparent) - content = ExecuteReplyContent( - cache_info=cache_info, status=OpStatusType.done, output=output - ) - return ExecuteReplyMessage(header=header, content=content) - - @staticmethod - def build_error(traceparent: str, ename: str, evalue: str, traceback: List[str]) -> WorkMessage: - run_id = run_id_from_traceparent(traceparent) - header = MessageHeader(type=MessageType.error, run_id=run_id, parent_id=traceparent) - content = ErrorContent( - status=OpStatusType.failed, ename=ename, evalue=evalue, traceback=traceback - ) - return ErrorMessage(header=header, content=content) - - @staticmethod - def build_evicted_reply(traceparent: str) -> WorkMessage: - run_id = run_id_from_traceparent(traceparent) - header = MessageHeader(type=MessageType.evicted_reply, run_id=run_id, parent_id=traceparent) - content = EvictedReplyContent() - return EvictedReplyMessage(header=header, content=content) - - @staticmethod - def build_ack_reply(traceparent: str) -> WorkMessage: - run_id = run_id_from_traceparent(traceparent) - header = MessageHeader(type=MessageType.ack, run_id=run_id, parent_id=traceparent) - content = AckContent() - return AckMessage(header=header, content=content) - - -MESSAGE_TYPE_TO_CONTENT_TYPE: Dict[MessageType, Type[MessageContent]] = { - MessageType.ack: AckContent, - MessageType.cache_info_execute_request: CacheInfoExecuteRequestContent, - MessageType.error: ErrorContent, - MessageType.evicted_reply: EvictedReplyContent, - MessageType.execute_reply: ExecuteReplyContent, - MessageType.execute_request: ExecuteRequestContent, - MessageType.workflow_execution_request: WorkflowExecutionContent, - MessageType.workflow_cancellation_request: WorkflowCancellationContent, - MessageType.workflow_deletion_request: WorkflowDeletionContent, -} - - -def build_work_message( - header: MessageHeader, content: MessageContent, traceparent: Optional[str] = None -) -> WorkMessage: - error = None - for cls in get_args(WorkMessage): - try: - ret = cls(header=header, content=content) - if traceparent is not None: - ret.header.parent_id = traceparent - return ret - except ValidationError as e: - error = e - assert error is not None - raise error - - -def extract_event_data(event: v1.Event) -> Dict[str, Any]: - logger = logging.getLogger(f"{__name__}.extract_event_data") - if not isinstance(event.data, (bytes, str)): - logger.error("Received data is not a byte stream nor a string.") - raise ValueError("Unable to decode event data {event.data}") - try: - # dapr tries to encode our already-encoded string - data = json.loads(decode(json.loads(event.data))) - except json.decoder.JSONDecodeError: - data = json.loads( - decode(event.data if isinstance(event.data, str) else event.data.decode()) - ) - - return data - - -def event_to_work_message(event: v1.Event) -> WorkMessage: - data = extract_event_data(event) - header = MessageHeader(**data["header"]) - content = MESSAGE_TYPE_TO_CONTENT_TYPE[header.type](**data["content"]) - return build_work_message(header, content) - - -def extract_message_header_from_event(event: v1.Event) -> MessageHeader: - extracted_data = extract_event_data(event) - return MessageHeader(**extracted_data["header"]) - - -def send(message: WorkMessage, source: str, pubsubname: str, topic: str) -> bool: - message.update_current_trace_parent() - logger = logging.getLogger(f"{__name__}.send") - try: - logger.debug( - f"Sending message with header {message.header} from " - f"{source} to pubsub {pubsubname}, topic {topic}" - ) - response = requests.post( - PUBSUB_URL_TEMPLATE.format( - cast(str, settings.DAPR_RUNTIME_HOST), - cast(str, settings.DAPR_HTTP_PORT), - pubsubname, - topic, - ), - json=message.to_cloud_event(source), - headers={ - "Content-Type": CLOUDEVENTS_JSON, - "traceparent": message.id, - }, - ) - request_body_length = ( - len(response.request.body) if response.request and response.request.body else 0 - ) - logger.debug( - f"Last request to pubsub {pubsubname} topic {topic} had " - f"status code {response.status_code} and body length {request_body_length} bytes" - ) - if request_body_length > MAXIMUM_MESSAGE_SIZE: - logger.warning( - f"Last request to pubsub {pubsubname} topic {topic} exceeded " - f"maximum safe message size of {MAXIMUM_MESSAGE_SIZE} bytes. " - f"The message might have been dropped by the message broker." - ) - return response.ok - except Exception: - logger.exception( - f"Failed to send payload {message} from {source} to pubsub {pubsubname}, topic {topic}" - ) - raise - - -async def send_async(message: WorkMessage, source: str, pubsubname: str, topic: str) -> bool: - message.update_current_trace_parent() - logger = logging.getLogger(f"{__name__}.send_async") - try: - logger.debug( - f"Sending async message with header {message.header} from " - f"{source} to pubsub {pubsubname}, topic {topic}" - ) - async with aiohttp.ClientSession() as session: - payload = message.to_cloud_event(source) - async with await session.post( - PUBSUB_URL_TEMPLATE.format( - cast(str, settings.DAPR_RUNTIME_HOST), - cast(str, settings.DAPR_HTTP_PORT), - pubsubname, - topic, - ), - json=payload, - headers={ - "Content-Type": CLOUDEVENTS_JSON, - "traceparent": message.id, - }, - ) as response: - request_body_length = json.dumps(payload).encode("utf-8").__len__() - logger.debug( - f"Last request to pubsub {pubsubname} topic {topic} had " - f"status code {response.status} and body length {request_body_length} bytes" - ) - if request_body_length > MAXIMUM_MESSAGE_SIZE: - logger.warning( - f"Last request to pubsub {pubsubname} topic {topic} exceeded " - f"maximum safe message size of {MAXIMUM_MESSAGE_SIZE} bytes. " - f"The message might have been dropped by the message broker." - ) - return response.ok - except Exception: - logger.exception( - f"Failed to send payload {message} from {source} to pubsub {pubsubname}, topic {topic}" - ) - raise - - -def operation_spec_serializer(spec: OperationSpec) -> Dict[str, Any]: - opdict = asdict(spec) - for field in "inputs_spec output_spec".split(): - if field not in opdict: - continue - for k, v in opdict[field].items(): - if is_container_type(v): - base = get_base_type(v) - v = f"List[{base.__name__}]" - else: - v = get_base_type(v).__name__ - opdict[field][k] = str(v) - return opdict - - -def gen_traceparent(run_id: UUID) -> str: - """Generates a unique identifier that can be used as W3C traceparent header. - - See https://www.w3.org/TR/trace-context/#examples-of-http-traceparent-headers for examples. - """ - trace_id = int(run_id.hex, 16) - parent_id = getrandbits(64) - - return TRACEPARENT_STRING.format( - trace_id=trace_id, parent_id=parent_id, trace_flags=TRACEPARENT_FLAGS - ) - - -def run_id_from_traceparent(traceparent: str) -> UUID: - """Given the contents of a TerraVibes traceparent header, extracts a run_id from it.""" - - return UUID(traceparent.split("-")[1]) - - -@overload -def accept_or_fail_event( - event: v1.Event, - success_callback: Callable[[WorkMessage], HttpTopicEventResponse], - failure_callback: Callable[[v1.Event, Exception, List[str]], HttpTopicEventResponse], -) -> HttpTopicEventResponse: ... - - -@overload -def accept_or_fail_event( - event: v1.Event, - success_callback: Callable[[WorkMessage], TopicEventResponse], - failure_callback: Callable[[v1.Event, Exception, List[str]], TopicEventResponse], -) -> TopicEventResponse: ... - - -def accept_or_fail_event( - event: v1.Event, - success_callback: Callable[[WorkMessage], Union[HttpTopicEventResponse, TopicEventResponse]], - failure_callback: Callable[ - [v1.Event, Exception, List[str]], Union[HttpTopicEventResponse, TopicEventResponse] - ], -): - logger = logging.getLogger(f"{__name__}.accept_or_fail_event") - try: - message = event_to_work_message(event) - logger.info(f"Received message: header={message.header}") - return success_callback(message) - except Exception as e: - _, _, exc_traceback = sys.exc_info() - logger.exception(f"Failed to process event with id {event.id}") - try: - return failure_callback(event, e, traceback.format_tb(exc_traceback)) - except Exception: - logger.error(f"Unable to parse traceparent. Discarding event with id {event.id}") - - ResponseType = get_type_hints(success_callback).get("return", HttpTopicEventResponse) - return ResponseType("drop") - - -@overload -async def accept_or_fail_event_async( - event: v1.Event, - success_callback: Callable[[WorkMessage], Awaitable[HttpTopicEventResponse]], - failure_callback: Callable[[v1.Event, Exception, List[str]], Awaitable[HttpTopicEventResponse]], -) -> HttpTopicEventResponse: ... - - -@overload -async def accept_or_fail_event_async( - event: v1.Event, - success_callback: Callable[[WorkMessage], Awaitable[TopicEventResponse]], - failure_callback: Callable[[v1.Event, Exception, List[str]], Awaitable[TopicEventResponse]], -) -> TopicEventResponse: ... - - -async def accept_or_fail_event_async( - event: v1.Event, - success_callback: Callable[ - [WorkMessage], Awaitable[Union[HttpTopicEventResponse, TopicEventResponse]] - ], - failure_callback: Callable[ - [v1.Event, Exception, List[str]], - Awaitable[Union[HttpTopicEventResponse, TopicEventResponse]], - ], -): - logger = logging.getLogger(f"{__name__}.accept_or_fail_event_async") - try: - message = event_to_work_message(event) - logger.info(f"Received message: header={message.header}") - return await success_callback(message) - except Exception as e: - _, _, exc_traceback = sys.exc_info() - logger.exception(f"Failed to process event with id {event.id}") - try: - return await failure_callback(event, e, traceback.format_tb(exc_traceback)) - except Exception: - logger.error(f"Unable to parse traceparent. Discarding event with id {event.id}") - - ResponseType = get_type_hints(success_callback).get("return", HttpTopicEventResponse) - return ResponseType("drop") diff --git a/src/vibe_common/vibe_common/schemas.py b/src/vibe_common/vibe_common/schemas.py deleted file mode 100644 index afc20070..00000000 --- a/src/vibe_common/vibe_common/schemas.py +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from copy import deepcopy -from dataclasses import field, fields -from hashlib import sha256 -from itertools import chain -from typing import Any, ClassVar, Dict, List, Optional, Union, cast - -import yaml -from pydantic.dataclasses import dataclass -from pystac.item import Item -from typing_extensions import TypedDict # Required to avoid pydantic error - -from vibe_core.data.core_types import BaseVibe, TypeDictVibe, TypeParser -from vibe_core.datamodel import TaskDescription -from vibe_core.utils import rename_keys - -from .constants import CONTROL_PUBSUB_TOPIC - -ItemDict = Dict[str, Union[Item, List[Item]]] -CacheIdDict = Dict[str, Union[str, List[str]]] -OpDependencies = Dict[str, List[str]] -OpResolvedDependencies = Dict[str, Dict[str, Any]] - - -class EntryPointDict(TypedDict): - file: str - callback_builder: str - - -@dataclass -class OperationSpec: - name: str - root_folder: str - inputs_spec: TypeDictVibe - output_spec: TypeDictVibe - entrypoint: EntryPointDict - description: TaskDescription - dependencies: OpDependencies = field(default_factory=dict) - parameters: Dict[str, Any] = field(default_factory=dict) - default_parameters: Dict[str, Any] = field(default_factory=dict) - version: str = "1.0" - image_name: str = CONTROL_PUBSUB_TOPIC - - def __hash__(self): - return hash(self.name) - - -def update_parameters(parameters: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: - for k, v in override.items(): - if isinstance(v, dict): - parameters[k] = update_parameters(parameters.get(k, {}), cast(Dict[str, Any], v)) - else: - if k not in parameters: - raise ValueError(f"Tried to overwrite non-existent parameter {k}.") - parameters[k] = v - return parameters - - -class OperationParser: - required_fields: List[str] = "name inputs output parameters entrypoint".split() - default_version: str = "1.0" - - @classmethod - def parse( - cls, - op_definition_path: str, - parameters_override: Optional[Dict[str, Any]] = None, - ) -> OperationSpec: - op_config = cls._load_config(op_definition_path) - op_root_folder = os.path.dirname(op_definition_path) - - p = op_config.get("parameters", {}) - default_params: Dict[str, Any] = {} if p is None else p - - inputs = cls._parse_iospec(op_config["inputs"]) - output = cls._parse_iospec(op_config["output"]) - dependencies: OpDependencies = op_config.get("dependencies", {}) - version: str = op_config.get("version", cls.default_version) - version = str(version) if version is not None else version - - params = deepcopy(default_params) - if parameters_override is not None: - params = update_parameters(params, parameters_override) - - description = op_config.get("description", {}) - description = {} if description is None else description - description = rename_keys(description, {"output": "outputs"}) - description = TaskDescription(**description) - - return OperationSpec( - name=op_config["name"], - inputs_spec=inputs, - output_spec=output, - entrypoint=EntryPointDict( - file=op_config["entrypoint"]["file"], - callback_builder=op_config["entrypoint"]["callback_builder"], - ), - parameters=params, - default_parameters=default_params, - root_folder=op_root_folder, - dependencies=dependencies if dependencies is not None else {}, - version=version if version is not None else cls.default_version, - description=description, - ) - - @classmethod - def _parse_iospec(cls, iospec: Dict[str, str]) -> TypeDictVibe: - return TypeDictVibe({k: TypeParser.parse(v) for k, v in iospec.items()}) - - @staticmethod - def _load_config(path: str): - with open(path, "r") as stream: - data = yaml.safe_load(stream) - - for opfield in OperationParser.required_fields: - if opfield not in data: - raise ValueError(f"Operation config {path} is missing required field {opfield}") - - return data - - -@dataclass(frozen=True) -class OpRunId: - name: str - hash: str - - -class OpRunIdDict(TypedDict): - name: str - hash: str - - -@dataclass(init=False) -class CacheInfo: - name: str - version: str - hash: str = field(init=False) - parameters: OpResolvedDependencies = field(init=False) - ids: Dict[str, Union[str, List[str]]] = field(init=False) - - FIELD_TO_STORAGE: ClassVar[Dict[str, str]] = { - "version": "vibe_op_version", - "name": "vibe_op_name", - "hash": "vibe_op_hash", - "ids": "vibe_source_items", - "parameters": "vibe_op_parameters", - } - - def __init__( - self, - name: str, - version: str = "1.0", - sources: Optional[ItemDict] = None, - parameters: OpResolvedDependencies = {}, - **kwargs: Dict[str, Any], - ): - self.name = name - self.version = version.split(".")[0] - - if sources is not None: - kwargs["sources"] = sources - kwargs["parameters"] = self.parameters = parameters - - if "ids" not in kwargs: - if "sources" not in kwargs: - raise ValueError("CacheInfo missing both `ids` and `sources` fields.") - self.ids = self._populate_ids(cast(ItemDict, kwargs["sources"])) - else: - self.ids = kwargs["ids"] - - if "hash" in kwargs: - self.hash = cast(str, kwargs["hash"]) - else: - if "parameters" not in kwargs: - raise ValueError("CacheInfo missing required parameter `parameters`") - self.hash = sha256( - "".join( - [ - self._join_mapping(self.ids), - self._join_mapping(cast(OpResolvedDependencies, kwargs["parameters"])), - self.version, - ] - ).encode() - ).hexdigest() - - def as_storage_dict(self): - return { - self.FIELD_TO_STORAGE[f.name]: getattr(self, f.name) - for f in fields(self) # type: ignore - } - - @classmethod - def _compute_or_extract_id( - cls, thing: Union[Item, BaseVibe, List[Item], List[BaseVibe]] - ) -> Union[List[str], str]: - if isinstance(thing, list): - return [cast(str, cls._compute_or_extract_id(e)) for e in thing] - return thing.hash_id if hasattr(thing, "hash_id") else thing.id # type: ignore - - @classmethod - def _join(cls, thing: Union[Any, List[Any]]) -> str: - # TODO: this join might lead to collisions, but we're keeping it for now - # to avoid breaking existing caches - return "".join([str(i) for i in thing]) if isinstance(thing, list) else str(thing) - - @classmethod - def _join_mapping(cls, mapping: Union[CacheIdDict, OpResolvedDependencies]) -> str: - return "".join( - chain.from_iterable( - [ - (k, cls._join_mapping(v) if isinstance(v, dict) else cls._join(v)) - for k, v in sorted(mapping.items(), key=lambda e: e[0]) - ] - ) - ) - - @classmethod - def _populate_ids(cls, inputs: ItemDict) -> CacheIdDict: - return { - k: cast(List[str], sorted([cls._compute_or_extract_id(e) for e in v])) - if isinstance(v, list) - else cls._compute_or_extract_id(v) - for k, v in inputs.items() - } diff --git a/src/vibe_common/vibe_common/secret_provider.py b/src/vibe_common/vibe_common/secret_provider.py deleted file mode 100644 index d48f1374..00000000 --- a/src/vibe_common/vibe_common/secret_provider.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import re -import time -from abc import ABC, abstractmethod -from typing import Any, Optional - -from azure.core.credentials import TokenCredential -from azure.core.exceptions import ResourceNotFoundError, ServiceRequestError -from azure.identity import DefaultAzureCredential -from azure.keyvault.secrets import KeyVaultSecret, SecretClient -from dapr.clients import DaprClient -from hydra_zen import builds - -from vibe_common.dapr import dapr_ready - -CONNECTION_REFUSED_SUBSTRING = "connect: connection refused" -DAPR_WAIT_TIME_S = 30 - - -class SecretProvider(ABC): - def __init__(self): - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.expression = re.compile(r"^@SECRET\(([^,]*?), ([^,]*?)\)") - - def is_secret(self, value: str) -> bool: - return self.expression.match(value) is not None - - @abstractmethod - def _resolve_impl(self, value: Any) -> str: - raise NotImplementedError - - def resolve(self, value: Any) -> str: - if not (isinstance(value, str) and self.is_secret(value)): - return value - - return self._resolve_impl(value) - - -class DaprSecretProvider(SecretProvider): - def _resolve_impl(self, value: Any) -> str: - while True: - _, secret_name = self.expression.findall(value)[0] - try: - # dapr´s local file and local env vars secret stores do not allow - # live update, that is, any update to a secret would require the - # worker to be redeployed. So, we are using kubernetes secret store. - # Even though Kubernetes supports multiple keys in a secret, secrets - # added to the Kubernetes secret store via FarmVibes have a single - # key whose name is the same as the secret name. - return retrieve_dapr_secret("kubernetes", secret_name, secret_name) - except Exception as e: - stre = str(e) - if CONNECTION_REFUSED_SUBSTRING in stre: - self.logger.info( - "dapr sidecar temporarily unavailable, " - f"will retry to resolve secret {value}" - ) - # No need for exponential backoffs here. This is the k8s - # cluster misbehaving and it will return (hopefully - # quickly) - time.sleep(DAPR_WAIT_TIME_S) - continue - raise ValueError( - f"Could not retrive secret {secret_name} from Dapr.\n Error message {stre}" - ) - - -class AzureSecretProvider(SecretProvider): - def __init__(self, credential: Optional[TokenCredential] = None): - super().__init__() - self.__credential = credential - - @property - def credential(self): - if self.__credential is None: - self.__credential = DefaultAzureCredential() - - return self.__credential - - def retrieve_from_keyvault(self, keyvault_name: str, secret_name: str) -> KeyVaultSecret: - try: - secret_client = SecretClient( - vault_url=f"https://{keyvault_name}.vault.azure.net/", credential=self.credential - ) - secret = secret_client.get_secret(secret_name) - except ResourceNotFoundError as e: - raise ValueError(f"Could not retrieve secret {secret_name}.\n Error message {str(e)}") - except ServiceRequestError as e: - raise ValueError(f"Invalid keyvault {keyvault_name}.\n Error message {str(e)}") - - return secret - - def _resolve_impl(self, value: Any) -> str: - keyvault_name, secret_name = self.expression.findall(value)[0] - secret = self.retrieve_from_keyvault(keyvault_name, secret_name) - - assert secret.value is not None - - return secret.value - - -@dapr_ready(dapr_wait_time_s=DAPR_WAIT_TIME_S) -def retrieve_dapr_secret( - store_name: str, - secret_name: str, - key_name: str, -) -> str: - """ - Using Dapr, retrieve a secret from a given secret store. - - Args: - store_name: The name of the secret store from which to fetch the secret - secret_name: The name of the secret to fetch - key_name: The name of the key in the secret to fetch (Note: For secret stores that have - multiple key-value pairs in a secret this would be the key to fetch. If the secret store - supports only one key-value pair, this argument is the same as the `secret_name`.) - - Returns: - The secret value - """ - logger = logging.getLogger(f"{__name__}.retrieve_dapr_secret") - with DaprClient() as dapr_client: - key = dapr_client.get_secret(store_name, secret_name).secret[key_name] - logger.info(f"Retrieving secret {secret_name} from store {store_name}") - return key - - -def retrieve_keyvault_secret( - keyvault_name: str, secret_name: str, cred: Optional[TokenCredential] = None -): - cred = cred or DefaultAzureCredential() - kv = SecretClient(keyvault_name, credential=cred) - key = kv.get_secret(secret_name).value - if key is None: - raise ValueError( - f"Could not find cosmos key with name {secret_name} on vault {keyvault_name}" - ) - return key - - -DaprSecretConfig = builds( - retrieve_dapr_secret, - populate_full_signature=True, - zen_dataclass={ - "module": "vibe_common.secret_provider", - "cls_name": "DaprSecretConfig", - }, -) - -KeyVaultSecretConfig = builds( - retrieve_keyvault_secret, - populate_full_signature=True, - zen_dataclass={ - "module": "vibe_common.secret_provider", - "cls_name": "KeyVaultSecretConfig", - }, -) - -SecretProviderConfig = builds( - SecretProvider, - populate_full_signature=True, - zen_dataclass={ - "module": "vibe_common.secret_provider", - "cls_name": "SecretProviderConfig", - }, -) - -DaprSecretProviderConfig = builds( - DaprSecretProvider, - populate_full_signature=True, - builds_bases=(SecretProviderConfig,), - zen_dataclass={ - "module": "vibe_common.secret_provider", - "cls_name": "DaprSecretProviderConfig", - }, -) - -AzureSecretProviderConfig = builds( - AzureSecretProvider, - populate_full_signature=True, - builds_bases=(SecretProviderConfig,), - zen_dataclass={ - "module": "vibe_common.secret_provider", - "cls_name": "AzureSecretProviderConfig", - }, -) diff --git a/src/vibe_common/vibe_common/statestore.py b/src/vibe_common/vibe_common/statestore.py deleted file mode 100644 index 778eae0a..00000000 --- a/src/vibe_common/vibe_common/statestore.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# -*- coding: utf-8 -*- - -import logging -from typing import Any, List, Optional, Protocol, TypedDict - -from vibe_common.constants import STATE_URL_TEMPLATE -from vibe_common.vibe_dapr_client import VibeDaprClient - -LOGGER = logging.getLogger(__name__) -STATE_STORE = "statestore" -METADATA = {"partitionKey": "eywa"} - - -class TransactionOperation(TypedDict): - key: str - operation: str - value: Optional[Any] - - -class StateStoreProtocol(Protocol): - async def retrieve(self, key: str, traceparent: Optional[str] = None) -> Any: ... - - async def retrieve_bulk( - self, keys: List[str], parallelism: int = 2, traceparent: Optional[str] = None - ) -> List[Any]: ... - - async def store(self, key: str, obj: Any, traceparent: Optional[str] = None) -> bool: ... - - async def transaction( - self, operations: List[TransactionOperation], traceparent: Optional[str] = None - ) -> bool: ... - - -class StateStore(StateStoreProtocol): - def __init__( - self, - state_store: str = STATE_STORE, - partition_key: str = METADATA["partitionKey"], - ): - self.vibe_dapr_client = VibeDaprClient() - self.state_store: str = state_store - self.partition_key: str = partition_key - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - - async def retrieve(self, key: str, traceparent: Optional[str] = None) -> Any: - try: - response = await self.vibe_dapr_client.get( - STATE_URL_TEMPLATE.format(self.state_store, key), - traceparent=traceparent, - params={"metadata.partitionKey": METADATA["partitionKey"]}, - ) - - return await self.vibe_dapr_client.response_json(response) - except KeyError as e: - raise KeyError(f"Key {key} not found") from e - - async def retrieve_bulk( - self, keys: List[str], parallelism: int = 8, traceparent: Optional[str] = None - ) -> List[Any]: - """Retrieves keys in bulk. - - This only exists because our UI needs to display details about all - workflows, and retrieving in bulk saves on round trips to the state - store. - """ - - response = await self.vibe_dapr_client.post( - url=STATE_URL_TEMPLATE.format(self.state_store, "bulk"), - data={ - "keys": keys, - "parallelism": parallelism, - }, - traceparent=traceparent, - params={"metadata.partitionKey": METADATA["partitionKey"]}, - ) - - states = await self.vibe_dapr_client.response_json(response) - - if len(states) != len(keys): - keyset = set(keys) - for state in states: - keyset.remove(state[0]) - raise KeyError(f"Failed to retrieve keys {keyset} from state store.") - return [state["data"] for state in states] - - async def store(self, key: str, obj: Any, traceparent: Optional[str] = None) -> None: - response = await self.vibe_dapr_client.post( - STATE_URL_TEMPLATE.format(self.state_store, ""), - data=[ - { - "key": key, - "value": self.vibe_dapr_client.obj_json(obj), - "metadata": {"partitionKey": self.partition_key}, - } - ], - traceparent=traceparent, - ) - assert response.ok, "Failed to store state, but underlying method didn't capture it" - - async def transaction( - self, operations: List[TransactionOperation], traceparent: Optional[str] = None - ) -> None: - queries = [ - { - "operation": o["operation"], - "request": { - "key": o["key"], - "value": self.vibe_dapr_client.obj_json(o["value"]), - }, - } - for o in operations - ] - await self.vibe_dapr_client.post( - url=STATE_URL_TEMPLATE.format(self.state_store, "transaction"), - data={ - "operations": queries, - "metadata": {"partitionKey": self.partition_key}, - }, - traceparent=traceparent, - ) diff --git a/src/vibe_common/vibe_common/telemetry.py b/src/vibe_common/vibe_common/telemetry.py deleted file mode 100644 index e2ae147e..00000000 --- a/src/vibe_common/vibe_common/telemetry.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import inspect -import logging -from functools import wraps -from typing import Any, Callable, Dict - -from opentelemetry import trace -from opentelemetry.context import attach -from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter -from opentelemetry.propagate import extract -from opentelemetry.sdk.resources import Resource -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.trace.span import INVALID_SPAN - -from vibe_common.constants import TRACEPARENT_STRING - -LOGGER = logging.getLogger(__name__) - - -def setup_telemetry(service_name: str, exporter_endpoint: str): - resource = Resource(attributes={"service.name": service_name}) - provider = TracerProvider(resource=resource) - - # Create an OTLP exporter instance - # The insecure=True flag is used here because we're running the - # service locally (from the k8s cluster perspective) without - # Transport Layer Security (TLS). - otlp_exporter = OTLPSpanExporter(endpoint=exporter_endpoint, insecure=True) - - provider.add_span_processor(BatchSpanProcessor(otlp_exporter)) - - # Sets the global default tracer provider - trace.set_tracer_provider(provider) - - -def get_current_trace_parent() -> str: - span = trace.get_current_span() - - if span == INVALID_SPAN: - LOGGER.warning("No current span found. Returning empty traceparent.") - - trace_id = span.get_span_context().trace_id - span_id = span.get_span_context().span_id - trace_flags = span.get_span_context().trace_flags - return TRACEPARENT_STRING.format(trace_id=trace_id, parent_id=span_id, trace_flags=trace_flags) - - -def add_span_attributes(attributes: Dict[str, Any]): - current_span = trace.get_current_span() - for k, v in attributes.items(): - current_span.set_attribute(k, v) - - -def update_telemetry_context(trace_parent: str): - """Updates the current telemetry context with the trace parent""" - attach(extract({"traceparent": trace_parent})) - - -def add_trace(func: Callable[..., Any]): - if inspect.iscoroutinefunction(func): - return _add_trace_async(func) - else: - return _add_trace_sync(func) - - -def _add_trace_sync(func: Callable[..., Any]): - @wraps(func) - def wrapper(*args, **kwargs): # type: ignore - tracer = trace.get_tracer(__name__) - with tracer.start_as_current_span(func.__name__): - return func(*args, **kwargs) - - return wrapper - - -def _add_trace_async(func: Callable[..., Any]): - @wraps(func) - async def wrapper(*args, **kwargs): # type: ignore - tracer = trace.get_tracer(__name__) - with tracer.start_as_current_span(func.__name__): - return await func(*args, **kwargs) - - return wrapper diff --git a/src/vibe_common/vibe_common/tokens.py b/src/vibe_common/vibe_common/tokens.py deleted file mode 100644 index 905ad863..00000000 --- a/src/vibe_common/vibe_common/tokens.py +++ /dev/null @@ -1,237 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from abc import ABC, abstractmethod -from datetime import datetime, timedelta -from typing import Dict, Optional, Union, cast -from urllib.parse import urljoin, urlparse - -from azure.core.credentials import TokenCredential -from azure.identity import DefaultAzureCredential -from azure.storage.blob import ( - BlobClient, - BlobSasPermissions, - BlobServiceClient, - UserDelegationKey, - generate_blob_sas, -) - - -class StorageUserKey(ABC): - @abstractmethod - def is_valid(self) -> bool: - raise NotImplementedError("Subclass needs to implement this") - - @abstractmethod - def get_access_key(self) -> Union[UserDelegationKey, str]: - raise NotImplementedError("Subclass needs to implement this") - - -class StorageUserKeyCredentialed(StorageUserKey): - delegation_key: UserDelegationKey - key_expiration: Optional[datetime] - sas_expiration: timedelta - - def __init__( - self, - url: str, - sas_expiration: timedelta, - key_lease_time: timedelta, - credential: Optional[TokenCredential] = None, - ): - self.sas_expiration = sas_expiration - self.key_lease_time = key_lease_time - self.credential = DefaultAzureCredential() if credential is None else credential - self.storage_url = self._get_storage_url(url) - self.client = None - - # Update expiration and delegation keys - self._generate() - - def _get_storage_url(self, url: str) -> str: - return urlparse(url.rstrip("/")).netloc - - def _get_client(self): - if not self.client: - self.client = BlobServiceClient(self.storage_url, self.credential) - - return self.client - - def is_valid(self) -> bool: - if not self.key_expiration: - return False - return datetime.utcnow() + self.sas_expiration < self.key_expiration - - def _generate(self): - self.key_expiration = datetime.utcnow() + self.key_lease_time - client = self._get_client() - self.delegation_key = client.get_user_delegation_key(datetime.utcnow(), self.key_expiration) - - def get_access_key(self) -> Union[UserDelegationKey, str]: - if not self.is_valid(): - self._generate() - return self.delegation_key - - -class StorageUserKeyConnectionString(StorageUserKey): - def __init__( - self, - sas_expiration: timedelta, - key_lease_time: timedelta, - connection_string: str, - ): - self.connection_string = connection_string - self.client = None - - def _get_client(self): - if not self.client: - self.client = BlobServiceClient.from_connection_string(self.connection_string) - - return self.client - - def is_valid(self) -> bool: - return True - - def get_access_key(self) -> Union[UserDelegationKey, str]: - client = self._get_client() - return client.credential.account_key - - -class BlobTokenManager(ABC): - sas_expiration_days: int - lease_time_multiplier: int - user_key_cache: Dict[str, StorageUserKey] = {} - - def __init__( - self, - sas_expiration_days: int = 1, - lease_time_ratio: int = 2, - ): - self.logger = logging.getLogger(self.__class__.__name__) - self.sas_expiration = timedelta(days=sas_expiration_days) - self.lease_time_ratio = lease_time_ratio - self.key_lease_time = self.lease_time_ratio * self.sas_expiration - - @abstractmethod - def _get_storage_user_key( - self, - url: str, - sas_expiration: timedelta, - key_lease_time: timedelta, - ) -> StorageUserKey: - raise NotImplementedError("Subclass needs to implement this") - - def _get_user_key(self, url: str, account_name: str) -> StorageUserKey: - if account_name not in self.user_key_cache: - self.logger.debug(f"Creating a new user key for account {account_name}") - storage_user_key = self._get_storage_user_key( - url, self.sas_expiration, self.key_lease_time - ) - - self.user_key_cache[account_name] = storage_user_key - - return self.user_key_cache[account_name] - - @abstractmethod - def _get_token(self, blob_client: BlobClient): - raise NotImplementedError("Subclass needs to implement this") - - def sign_url(self, url: str) -> str: - blob_client = BlobClient.from_blob_url(blob_url=url) - sas_token = self._get_token(blob_client) - return f"{urljoin(url, urlparse(url).path)}?{sas_token}" - - -class BlobTokenManagerCredentialed(BlobTokenManager): - def __init__( - self, - sas_expiration_days: int = 1, - lease_time_ratio: int = 2, - credential: Optional[TokenCredential] = None, - ): - super().__init__(sas_expiration_days, lease_time_ratio) - self.credential = DefaultAzureCredential() if credential is None else credential - - def _get_storage_user_key( - self, - url: str, - sas_expiration: timedelta, - key_lease_time: timedelta, - ) -> StorageUserKey: - return StorageUserKeyCredentialed( - url, - sas_expiration, - key_lease_time, - credential=self.credential, - ) - - def _get_token( - self, - blob_client: BlobClient, - ): - account_name: str = cast(str, blob_client.account_name) - container_name: str = blob_client.container_name - blob_name: str = blob_client.blob_name - - start = datetime.utcnow() - end = start + self.sas_expiration - user_delegation_key = cast( - UserDelegationKey, self._get_user_key(blob_client.url, account_name).get_access_key() - ) - - sas_token = generate_blob_sas( - account_name=account_name, - container_name=container_name, - user_delegation_key=user_delegation_key, - blob_name=blob_name, - permission=BlobSasPermissions(read=True), - start=start, - expiry=end, - ) - return sas_token - - -class BlobTokenManagerConnectionString(BlobTokenManager): - def __init__( - self, - connection_string: str, - sas_expiration_days: int = 1, - lease_time_ratio: int = 2, - ): - super().__init__(sas_expiration_days, lease_time_ratio) - self.connection_string = connection_string - - def _get_storage_user_key( - self, - url: str, - sas_expiration: timedelta, - key_lease_time: timedelta, - ) -> StorageUserKey: - return StorageUserKeyConnectionString( - sas_expiration, - key_lease_time, - self.connection_string, - ) - - def _get_token( - self, - blob_client: BlobClient, - ): - account_name: str = cast(str, blob_client.account_name) - container_name: str = blob_client.container_name - blob_name: str = blob_client.blob_name - - start = datetime.utcnow() - end = start + self.sas_expiration - account_key = cast(str, self._get_user_key(blob_client.url, account_name).get_access_key()) - sas_token = generate_blob_sas( - account_name=account_name, - container_name=container_name, - account_key=account_key, - blob_name=blob_name, - permission=BlobSasPermissions(read=True), - start=start, - expiry=end, - ) - return sas_token diff --git a/src/vibe_common/vibe_common/vibe_dapr_client.py b/src/vibe_common/vibe_common/vibe_dapr_client.py deleted file mode 100644 index 9e2aca5e..00000000 --- a/src/vibe_common/vibe_common/vibe_dapr_client.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import logging -from functools import partial -from typing import Any, Mapping, Optional - -from aiohttp import ClientResponse, ClientSession -from aiohttp_retry import ExponentialRetry, RetryClient - -from vibe_common.constants import TRACEPARENT_HEADER_KEY -from vibe_common.dapr import handle_aiohttp_timeout, process_dapr_response -from vibe_core.data.json_converter import dump_to_json - -MAX_SESSION_ATTEMPTS = 10 -MAX_TIMEOUT_S = 30 -MAX_DIRECT_INVOKE_TRIES = 3 - -METADATA = {"partitionKey": "eywa"} - -""" -This is an implementation of a Dapr HTTP client that currently support Dapr service invocation -and state management through HTTP. -""" - - -class VibeDaprClient: - def __init__(self): - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - - def _build_client(self) -> RetryClient: - session = ClientSession() - retry_options = ExponentialRetry( - attempts=MAX_SESSION_ATTEMPTS, - max_timeout=MAX_TIMEOUT_S, - statuses={400, 500, 502, 503, 504}, - ) - retry_client = RetryClient(client_session=session, retry_options=retry_options) - return retry_client - - async def get( - self, - url: str, - traceparent: Optional[str], - params: Optional[Mapping[str, str]] = None, - ) -> ClientResponse: - async with self._build_client() as session: - try: - response = await session.get( - url, headers={"traceparent": traceparent} if traceparent else {}, params=params - ) - await handle_aiohttp_timeout(response) - return await process_dapr_response(response) - except KeyError: - raise - except Exception: - self.logger.exception(f"Failed to process request for {url}") - raise RuntimeError(f"dapr failed to process request for {url}") - - async def post( - self, - url: str, - data: Any, - traceparent: Optional[str], - params: Optional[Mapping[str, str]] = None, - ) -> ClientResponse: - if url.endswith("/"): - url = url[:-1] - - tries: int = 0 - - while True: - async with self._build_client() as session: - try: - headers = {"Content-Type": "application/json"} - if traceparent: - headers[TRACEPARENT_HEADER_KEY] = traceparent - response = await session.post( - url, - data=self._dumps(data), - headers=headers, - params=params, - ) - await handle_aiohttp_timeout(response) - return await process_dapr_response(response) - except RuntimeError as e: - if "ERR_DIRECT_INVOKE" in str(e): - tries += 1 - self.logger.warning( - f"ERR_DIRECT_INVOKE raised by Dapr, " - f"retrying ({tries}/{MAX_DIRECT_INVOKE_TRIES})" - ) - if tries >= MAX_DIRECT_INVOKE_TRIES: - self.logger.exception(f"Failed to process request for {url}") - raise - except Exception: - self.logger.exception(f"Failed to process request for {url}") - raise RuntimeError(f"dapr failed to process request for {url}") - - def obj_json(self, obj: Any, **kwargs: Any) -> Any: - """JSON representation of object `obj` encoding floats as strings. - - Unfortunately Dapr's JSON deserializer clips floating point precision - so floats are encoded as strings - - Args: - obj: the object to be converted - kwargs: optional keyword arguments passed to `_dumps` - - Returns: - Object `obj` represented as JSON - """ - return json.loads(self._dumps(obj, **kwargs), parse_float=lambda f_as_s: f_as_s) - - async def response_json(self, response: ClientResponse) -> Any: - """Loads a JSON from a `ClientResponse`. - - Because floats are encoded as strings before being sent to Dapr due to the truncation that - occurs in the Dapr sidecar when using its HTTP API, this method decodes any string that - can be parsed as a float into a Python float. - - Args: - response: The `ClientResponse` object with our data - - Returns: - The JSON of our response, with floats correctly decoded as floats - """ - return await response.json(loads=partial(json.loads, object_hook=_decode)) - - def _dumps(self, obj: Any, **kwargs: Any) -> str: - return dump_to_json(obj, **kwargs) - - -def _decode(obj: Any) -> Any: - """Returns the given decoded JSON object with all string values that can be parsed as floats as - Python floats. - - This function covers all possible valid JSON objects as valid JSON values are strings, objects - (Python dict), arrays (Python list), numbers (Python int/float), or the literals true (Python - True), false (Python False), or null (Python None)): - https://www.rfc-editor.org/rfc/rfc8259#section-3 - - Args: - obj: A decoded JSON object - - Returns: - The same decoded JSON object with all string values that can be parsed as floats as floats - """ - if isinstance(obj, str): - try: - return float(obj) - except ValueError: - return obj - elif isinstance(obj, dict): - return {k: _decode(v) for k, v in obj.items()} - elif isinstance(obj, list): - return [_decode(v) for v in obj] - else: - return obj diff --git a/src/vibe_common/vibe_common/workflow/__init__.py b/src/vibe_common/vibe_common/workflow/__init__.py deleted file mode 100644 index b7c52582..00000000 --- a/src/vibe_common/vibe_common/workflow/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_core/pyproject.toml b/src/vibe_core/pyproject.toml index 9b588886..cdad7708 100644 --- a/src/vibe_core/pyproject.toml +++ b/src/vibe_core/pyproject.toml @@ -18,9 +18,9 @@ vibe_core = ["terraform/*.tf"] [project] name = "vibe-core" -version = "0.0.1" +version ="2024.05.27" authors = [ - { name="Microsoft FarmVibes.AI Team", email="terravibes@microsoft.com" }, + { name="Microsoft FarmVibes.AI Team", email="eywa-devs@microsoft.com" }, ] description = "FarmVibes.AI Geospatial Platform Package - vibe core package." license = {text = "MIT"} @@ -44,7 +44,7 @@ dependencies = [ "pydantic~=1.10.0", "strenum~=0.4.7", "shapely>=1.7.1", - "requests~=2.32.0", + "requests~=2.31.0", "pystac~=1.6.0", "hydra-zen~=0.10", "rich~=13.7.1", @@ -57,8 +57,3 @@ dependencies = [ [project.scripts] farmvibes-ai = "vibe_core.cli.main:main" - -[project.optional-dependencies] -test = [ - "orjson~=3.9.15", -] \ No newline at end of file diff --git a/src/vibe_core/setup.py b/src/vibe_core/setup.py index fa5a68c9..7f1a1763 100644 --- a/src/vibe_core/setup.py +++ b/src/vibe_core/setup.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - from setuptools import setup if __name__ == "__main__": diff --git a/src/vibe_core/tests/test_register.py b/src/vibe_core/tests/test_register.py index e02d938a..b2b9f094 100644 --- a/src/vibe_core/tests/test_register.py +++ b/src/vibe_core/tests/test_register.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - from vibe_core.data import DataVibe, data_registry diff --git a/src/vibe_core/tests/test_stac_converter.py b/src/vibe_core/tests/test_stac_converter.py deleted file mode 100644 index 4321d8c0..00000000 --- a/src/vibe_core/tests/test_stac_converter.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# pyright: reportUnknownMemberType=false - -from dataclasses import dataclass -from datetime import datetime -from pathlib import Path -from typing import Any, Dict, List, Tuple - -import pytest -from shapely import geometry as shpg -from shapely.geometry.base import BaseGeometry - -from vibe_core.data import AssetVibe, DataVibe, Raster, StacConverter - - -@dataclass -class DateVibe(DataVibe): - date_field: datetime - int_field: int - str_field: str - other_field: Any - other_list: List[str] - date_list: List[datetime] - date_dict: Dict[str, datetime] - date_tuple: Tuple[datetime, datetime, datetime] - mixed_tuple: Tuple[int, datetime] - var_tuple: Tuple[datetime, ...] - nested_list: List[List[datetime]] - dict_list: Dict[str, List[datetime]] - super_nest: Dict[Any, List[Dict[Any, Dict[Any, Tuple[datetime, ...]]]]] - super_nest_no: Dict[Any, List[Dict[Any, Dict[Any, Tuple[Any, ...]]]]] - - -@dataclass -class ShapeVibe(DataVibe): - shape: BaseGeometry - shape_dict: Dict[str, BaseGeometry] - - -@pytest.fixture -def converter() -> StacConverter: - return StacConverter() - - -def test_conversion_roundtrip(converter: StacConverter, tmp_path: Path): - asset_path = tmp_path.as_posix() - now = datetime.now() - geom: Dict[str, Any] = shpg.mapping(shpg.box(-1, -1, 1, 1)) - terravibes_data = DataVibe(id="assetless", time_range=(now, now), geometry=geom, assets=[]) - # Assetless DataVibe conversion - assert converter.from_stac_item(converter.to_stac_item(terravibes_data)) == terravibes_data - mimefull = AssetVibe(reference=asset_path, type="image/tiff", id="mimefull") - terravibes_data.assets.append(mimefull) - # Conversion with asset that has mimetype - assert converter.from_stac_item(converter.to_stac_item(terravibes_data)) == terravibes_data - mimeless = AssetVibe(reference=asset_path, type=None, id="mimeless") - # Conversion with asset that has no mimetype - terravibes_data.assets.append(mimeless) - assert converter.from_stac_item(converter.to_stac_item(terravibes_data)) == terravibes_data - - -def test_conversion_raster(converter: StacConverter, tmp_path: Path): - asset_path = tmp_path.as_posix() - now = datetime.now() - geom: Dict[str, Any] = shpg.mapping(shpg.box(-1, -1, 1, 1)) - tiff_asset = AssetVibe(reference=asset_path, type="image/tiff", id="tiff_asset") - json_asset = AssetVibe(reference=asset_path, type="application/json", id="json_asset") - raster = Raster( - id="extra_info_test", - time_range=(now, now), - geometry=geom, - assets=[tiff_asset, json_asset], - bands={"B1": 0, "B2": 1, "B3": 2}, - ) - converted = converter.from_stac_item(converter.to_stac_item(raster)) - assert isinstance(converted, Raster) - assert converted == raster - assert raster.raster_asset == converted.raster_asset - assert raster.visualization_asset == converted.visualization_asset - - -def test_datetime_field_serialization(converter: StacConverter): - now = datetime.now() - geom: Dict[str, Any] = shpg.mapping(shpg.box(-1, -1, 1, 1)) - test_vibe = DateVibe( - "assetless", - (now, now), - geom, - [], - now, - 1, - "1", - None, - ["1", "2"], - [datetime.now() for _ in range(2)], - {f"{i}": datetime.now() for i in range(3)}, - (datetime.now(), datetime.now(), datetime.now()), - (1, datetime.now()), - tuple(datetime.now() for _ in range(4)), - [[datetime.now()]], - {"1": [datetime.now() for _ in range(2)], "2": [datetime.now() for _ in range(3)]}, - {0: [{0: {0: (datetime.now(),)}}]}, - {0: [{0: {0: ("NO",)}}]}, - ) - forward = converter.to_stac_item(test_vibe) - assert forward.properties["date_field"] == now.isoformat() - round_trip = converter.from_stac_item(forward) - assert test_vibe == round_trip - - -def test_geom_field_serialization(converter: StacConverter): - now = datetime.now() - geom: Dict[str, Any] = shpg.mapping(shpg.box(-1, -1, 1, 1)) - test_vibe = ShapeVibe( - "assetless", - (now, now), - geom, - [], - shpg.box(0, 0, 2, 2), - {f"{i}": shpg.box(0, 0, i, i) for i in range(1, 5)}, - ) - forward = converter.to_stac_item(test_vibe) - assert forward.properties["shape"] == { - "type": "Polygon", - "coordinates": (((2.0, 0.0), (2.0, 2.0), (0.0, 2.0), (0.0, 0.0), (2.0, 0.0)),), - } - round_trip = converter.from_stac_item(forward) - assert test_vibe == round_trip diff --git a/src/vibe_core/tests/test_type_serialization.py b/src/vibe_core/tests/test_type_serialization.py deleted file mode 100644 index c4019fe4..00000000 --- a/src/vibe_core/tests/test_type_serialization.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import inspect -import typing -from datetime import datetime -from unittest.mock import MagicMock, patch - -import orjson -import pytest - -import vibe_core.data -from vibe_core.data.utils import StacConverter, deserialize_stac, serialize_stac - -BASIC_MOCK_VALUES = { - int: 42, - float: 42.0, - str: "mock_str", - bool: True, - datetime: datetime.now(), -} - -DATAVIBES_MOCK_FIELDS = { - "id": "mock_id", - "time_range": (datetime.now(), datetime.now()), - "geometry": {"type": "Point", "coordinates": [0, 0]}, - "assets": [], -} - -FARMVIBES_DATA_CLASSES = [ - getattr(vibe_core.data, name) - for name in dir(vibe_core.data) - if inspect.isclass(getattr(vibe_core.data, name)) - and issubclass(getattr(vibe_core.data, name), vibe_core.data.DataVibe) -] - - -def is_optional(t: type) -> bool: - return typing.get_origin(t) is typing.Union and type(None) in typing.get_args(t) # type: ignore - - -def create_mock_instance(cls: type) -> typing.Any: - if cls in BASIC_MOCK_VALUES: - return BASIC_MOCK_VALUES[cls] # type: ignore - - args = {} - params = { - **inspect.signature(cls.__init__).parameters, - **inspect.signature(cls.__new__).parameters, - } - for name, param in params.items(): - if name in ["self", "args", "kwargs", "_cls"]: - continue - elif name in DATAVIBES_MOCK_FIELDS: - args[name] = DATAVIBES_MOCK_FIELDS[name] - else: - args[name] = create_mock_value(param.annotation) - return cls(**args) - - -def create_mock_value(tp: type) -> typing.Any: - # Handle basic types with random or default values - if tp in BASIC_MOCK_VALUES: - return BASIC_MOCK_VALUES[tp] # type: ignore - elif tp is list or getattr(tp, "__origin__", None) is list: - return [] - elif tp is tuple or getattr(tp, "__origin__", None) is tuple: - # Create an empty tuple or a tuple with mock values if types are specified - return tuple(create_mock_value(arg) for arg in getattr(tp, "__args__", [])) - elif tp is dict or getattr(tp, "__origin__", None) is dict: - return {} - elif tp is typing.Any: - return None - elif is_optional(tp): - # check which type is optional and create a mock value for it - return create_mock_value(tp.__args__[0]) # type: ignore - elif inspect.isclass(tp): - # Recursively create instances for complex types - return create_mock_instance(tp) - - raise NotImplementedError(f"Mocking not implemented for type: {tp}") - - -@patch.object(vibe_core.data.HansenProduct, "validate_url", return_value=True) -@pytest.mark.parametrize("cls", FARMVIBES_DATA_CLASSES) -def test_serialization_deserialization( - _: MagicMock, - cls: type, -): - converter = StacConverter() - - mock_instance = create_mock_instance(cls) - stac_item = converter.to_stac_item(mock_instance) - - json_instance = orjson.loads(orjson.dumps(serialize_stac(stac_item))) - deserialized_stac_item = deserialize_stac(json_instance) - deserialized = converter.from_stac_item(deserialized_stac_item) - assert mock_instance == deserialized - - deserialized = converter.from_stac_item(stac_item) - assert mock_instance == deserialized diff --git a/src/vibe_core/tests/test_utils.py b/src/vibe_core/tests/test_utils.py index 331dbbc0..c2301ec4 100644 --- a/src/vibe_core/tests/test_utils.py +++ b/src/vibe_core/tests/test_utils.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - from dataclasses import dataclass from datetime import datetime from typing import Any, Dict, List, Tuple, cast diff --git a/src/vibe_core/vibe_core/__init__.py b/src/vibe_core/vibe_core/__init__.py index 3cd867d2..ffeb1cd5 100644 --- a/src/vibe_core/vibe_core/__init__.py +++ b/src/vibe_core/vibe_core/__init__.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Core types and functions, as well as constants used throughout FarmVibes.AI.""" from .client import Client, FarmvibesAiClient diff --git a/src/vibe_core/vibe_core/admag_client.py b/src/vibe_core/vibe_core/admag_client.py index 2d84d701..62c4695f 100644 --- a/src/vibe_core/vibe_core/admag_client.py +++ b/src/vibe_core/vibe_core/admag_client.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Data types and supporting functions for interacting with Azure Data Manager for Agriculture.""" import json diff --git a/src/vibe_core/vibe_core/cli/__init__.py b/src/vibe_core/vibe_core/cli/__init__.py index b7c52582..e69de29b 100644 --- a/src/vibe_core/vibe_core/cli/__init__.py +++ b/src/vibe_core/vibe_core/cli/__init__.py @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_core/vibe_core/cli/constants.py b/src/vibe_core/vibe_core/cli/constants.py index 080a1f83..39e49f34 100644 --- a/src/vibe_core/vibe_core/cli/constants.py +++ b/src/vibe_core/vibe_core/cli/constants.py @@ -1,8 +1,5 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - DEFAULT_IMAGE_PREFIX = "farmai/terravibes/" -DEFAULT_IMAGE_TAG = "dev" +DEFAULT_IMAGE_TAG = "2024.05.27" DEFAULT_REGISTRY_PATH = "mcr.microsoft.com" LOCAL_SERVICE_URL_PATH_FILE = "service_url" @@ -14,5 +11,5 @@ # Local constants ONNX_SUBDIR = "onnx_resources" FARMVIBES_AI_LOG_LEVEL = "DEBUG" -REDIS_IMAGE_TAG = "7.4.1-debian-12-r2" -RABBITMQ_IMAGE_TAG = "4.0.4-debian-12-r1" +REDIS_IMAGE_TAG = "7.0.4-debian-11-r11" +RABBITMQ_IMAGE_TAG = "3.10.8-debian-11-r4" diff --git a/src/vibe_core/vibe_core/cli/help_descriptions.py b/src/vibe_core/vibe_core/cli/help_descriptions.py index 7ce72327..9d671d71 100644 --- a/src/vibe_core/vibe_core/cli/help_descriptions.py +++ b/src/vibe_core/vibe_core/cli/help_descriptions.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - ADD_ONNX_HELP = """\ Adds an Onnx model to the TerraVibes cluster. After being added, one can use this \ model in the ops compute_onnx and compute_onnx_from_sequence (by setting the parameter model_file \ diff --git a/src/vibe_core/vibe_core/cli/helper.py b/src/vibe_core/vibe_core/cli/helper.py index f2d6b02e..51f7f6b9 100644 --- a/src/vibe_core/vibe_core/cli/helper.py +++ b/src/vibe_core/vibe_core/cli/helper.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - import locale import os import socket diff --git a/src/vibe_core/vibe_core/cli/local.py b/src/vibe_core/vibe_core/cli/local.py index 2677df45..1f607653 100644 --- a/src/vibe_core/vibe_core/cli/local.py +++ b/src/vibe_core/vibe_core/cli/local.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - import argparse import codecs import os @@ -16,7 +13,7 @@ LOCAL_SERVICE_URL_PATH_FILE, ONNX_SUBDIR, ) -from vibe_core.cli.helper import verify_to_proceed +from vibe_core.cli.helper import log_should_be_logged_in, verify_to_proceed from vibe_core.cli.logging import log from vibe_core.cli.osartifacts import InstallType, OSArtifacts from vibe_core.cli.wrappers import ( @@ -295,28 +292,25 @@ def setup( k3d.os_artifacts.check_dependencies(InstallType.ALL) az = AzureCliWrapper(k3d.os_artifacts, "") log( - f"Username and password not provided for {registry}, requesting from Azure CLI", + f"Username and password not provided for {registry}, inferring from Azure CLI", level="warning", ) - password = az.request_registry_token(registry) - if password: + try: + az.get_subscription_info() # Needed for confirming subscription + except Exception as e: + log_should_be_logged_in(e) + return False + + username, password = az.infer_registry_credentials(registry) + + if username and password: log(f"Creating Docker credentials for registry {registry}") try: kubectl.delete_secret("acrtoken") except Exception: pass - if not username: - username = "00000000-0000-0000-0000-000000000000" kubectl.create_docker_token("acrtoken", registry, username, password) - else: - if registry.endswith(AZURE_CR_DOMAIN): - log( - "No registry username and password were provided, and I was unable to " - "get an ACR token. Aborting installation.", - level="error", - ) - return False if not worker_replicas: log( diff --git a/src/vibe_core/vibe_core/cli/logging.py b/src/vibe_core/vibe_core/cli/logging.py index e6a44a95..b5ef0c63 100644 --- a/src/vibe_core/vibe_core/cli/logging.py +++ b/src/vibe_core/vibe_core/cli/logging.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - import logging import logging.handlers import pathlib diff --git a/src/vibe_core/vibe_core/cli/main.py b/src/vibe_core/vibe_core/cli/main.py index 5b889e4f..92031069 100644 --- a/src/vibe_core/vibe_core/cli/main.py +++ b/src/vibe_core/vibe_core/cli/main.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - import argparse import sys diff --git a/src/vibe_core/vibe_core/cli/osartifacts.py b/src/vibe_core/vibe_core/cli/osartifacts.py index eafaa883..c5f2bc87 100644 --- a/src/vibe_core/vibe_core/cli/osartifacts.py +++ b/src/vibe_core/vibe_core/cli/osartifacts.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - import os import pathlib import platform diff --git a/src/vibe_core/vibe_core/cli/parsers.py b/src/vibe_core/vibe_core/cli/parsers.py index 13fb0f30..c10ff46e 100644 --- a/src/vibe_core/vibe_core/cli/parsers.py +++ b/src/vibe_core/vibe_core/cli/parsers.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - import argparse import getpass import os diff --git a/src/vibe_core/vibe_core/cli/remote.py b/src/vibe_core/vibe_core/cli/remote.py index 5ea44d0c..8318ce5a 100644 --- a/src/vibe_core/vibe_core/cli/remote.py +++ b/src/vibe_core/vibe_core/cli/remote.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - import argparse import os from typing import Optional @@ -194,10 +191,11 @@ def setup_or_upgrade( if registry_path and registry_path.endswith(AZURE_CR_DOMAIN): if not registry_username or not registry_password: - try: - registry_username = "00000000-0000-0000-0000-000000000000" - registry_password = az.request_registry_token(registry_path) + ( + registry_username, + registry_password, + ) = az.infer_registry_credentials(registry_path) except Exception: log( f"Couldn't infer registry credentials for {registry_path}. " diff --git a/src/vibe_core/vibe_core/cli/wrappers.py b/src/vibe_core/vibe_core/cli/wrappers.py index 6deaf4a6..89f21a2b 100644 --- a/src/vibe_core/vibe_core/cli/wrappers.py +++ b/src/vibe_core/vibe_core/cli/wrappers.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - import hashlib import json import os @@ -954,27 +951,35 @@ def verify_enough_cores_available( if required > available: raise ValueError(f"{cpu_type} has {available} CPUs. We need {required}.") - def request_registry_token(self, registry: str) -> str: - """Requests an access token for a given registry using the az CLI. - - Args: - registry: the name of the registry under Azure we want to connect to. - """ - log(f"Getting token credentials for {registry}") + def infer_registry_credentials(self, registry: str) -> Tuple[str, str]: + log(f"Inferring credentials for {registry}") registry = registry.replace(".azurecr.io", "") # FIXME: This only works for Azure Public self.refresh_az_creds() - token_command = [ + username_command = [ + self.os_artifacts.az, + "acr", + "credential", + "show", + "-n", + registry, + "--query", + "username", + ] + password_command = [ self.os_artifacts.az, "acr", - "login", + "credential", + "show", "-n", registry, - "--expose-token", + "--query", + "passwords[0].value", ] - error = f"Unable to get credentials for {registry}" - output = json.loads(execute_cmd(token_command, True, True, error, censor_output=True)) - return output["accessToken"] if "accessToken" in output else "" + error = f"Unable to infer credentials for {registry}" + username = json.loads(execute_cmd(username_command, True, True, error, censor_output=True)) + password = json.loads(execute_cmd(password_command, True, True, error, censor_output=True)) + return username, password def get_storage_account_list(self): cmd = [ @@ -1384,24 +1389,16 @@ def get_secret(self, name: str, key: str, cluster_name: str = ""): ) return json.loads(result) - def create_docker_token(self, token_name: str, registry: str, username: str, token: str): - """Add a secret to the kubernetes cluster. - - Args: - token_name: The name of the token to be added to the cluster - registry: The (Azure Container) registry this token is for - username: The user name to use to connect to the registry - token: The token to use. - """ + def create_docker_token(self, token: str, registry: str, username: str, password: str): cmd = [ self.os_artifacts.kubectl, "create", "secret", "docker-registry", - token_name, + token, f"--docker-server={registry}", f"--docker-username={username}", - f"--docker-password={token}", + f"--docker-password={password}", f"--docker-email={username}", ] execute_cmd( diff --git a/src/vibe_core/vibe_core/client.py b/src/vibe_core/vibe_core/client.py index f6c2497f..ec33bd3b 100644 --- a/src/vibe_core/vibe_core/client.py +++ b/src/vibe_core/vibe_core/client.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """FarmVibes.AI client. This module provides a client for the FarmVibes.AI service, which allows users to interact with the @@ -832,7 +829,7 @@ def _block_until_status( status_options = " or ".join(block_until_statuses) raise RuntimeError( f"Timeout of {timeout_s}s reached while waiting for the workflow to have a " - f"status of {status_options}. Workflow is currently in status {self.status}." + f"status of {status_options}." ) return self diff --git a/src/vibe_core/vibe_core/data/__init__.py b/src/vibe_core/vibe_core/data/__init__.py index ecacf110..5a1e78da 100644 --- a/src/vibe_core/vibe_core/data/__init__.py +++ b/src/vibe_core/vibe_core/data/__init__.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Core data model for FarmVibes.AI.""" from .airbus import AirbusPrice, AirbusProduct, AirbusRaster diff --git a/src/vibe_core/vibe_core/data/airbus.py b/src/vibe_core/vibe_core/data/airbus.py index 046a8d4f..e3e618d8 100644 --- a/src/vibe_core/vibe_core/data/airbus.py +++ b/src/vibe_core/vibe_core/data/airbus.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """AirBus data types.""" from dataclasses import dataclass diff --git a/src/vibe_core/vibe_core/data/core_types.py b/src/vibe_core/vibe_core/data/core_types.py index 231fbed3..55271080 100644 --- a/src/vibe_core/vibe_core/data/core_types.py +++ b/src/vibe_core/vibe_core/data/core_types.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Core data classes, functions, and constants of FarmVibes.AI.""" import hashlib @@ -18,6 +15,7 @@ ClassVar, Dict, List, + NamedTuple, Optional, Tuple, Type, @@ -53,8 +51,20 @@ """Type alias for a time range, as a tuple of two `datetime` objects (start, end).""" -ChipWindow = Tuple[float, float, float, float] -"""Type alias representing a raster chip window, as (col_offset, row_offset, width, height).""" +class ChipWindow(NamedTuple): + """Represent a window of a raster chip. + + Attributes: + col_offset: The column offset of the window with relation to the raster chip. + row_offset: The row offset of the window with relation to the raster chip. + width: The width of the window. + height: The height of the window. + """ + + col_offset: float + row_offset: float + width: float + height: float def gen_guid(): diff --git a/src/vibe_core/vibe_core/data/data_registry.py b/src/vibe_core/vibe_core/data/data_registry.py index eabc995a..a55c1fcd 100644 --- a/src/vibe_core/vibe_core/data/data_registry.py +++ b/src/vibe_core/vibe_core/data/data_registry.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Data registry types and functions used in FarmVibes.AI.""" import warnings diff --git a/src/vibe_core/vibe_core/data/farm.py b/src/vibe_core/vibe_core/data/farm.py index e0c63aae..c5c46881 100644 --- a/src/vibe_core/vibe_core/data/farm.py +++ b/src/vibe_core/vibe_core/data/farm.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Azure Data Manager for Agriculture (ADMA) data types.""" from dataclasses import dataclass diff --git a/src/vibe_core/vibe_core/data/json_converter.py b/src/vibe_core/vibe_core/data/json_converter.py index 9032f58f..4729dfde 100644 --- a/src/vibe_core/vibe_core/data/json_converter.py +++ b/src/vibe_core/vibe_core/data/json_converter.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """JSON serialization/deserialization utilities.""" import json diff --git a/src/vibe_core/vibe_core/data/products.py b/src/vibe_core/vibe_core/data/products.py index 3683ba33..231bc5ec 100644 --- a/src/vibe_core/vibe_core/data/products.py +++ b/src/vibe_core/vibe_core/data/products.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Data type and functions definitions related to the products supported in FarmVibes.AI.""" import mimetypes diff --git a/src/vibe_core/vibe_core/data/rasters.py b/src/vibe_core/vibe_core/data/rasters.py index d4ee84ca..131f55f1 100644 --- a/src/vibe_core/vibe_core/data/rasters.py +++ b/src/vibe_core/vibe_core/data/rasters.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Data types, constants, and supporting functions for manipulating rasters in FarmVibes.AI.""" from dataclasses import dataclass, field diff --git a/src/vibe_core/vibe_core/data/sentinel.py b/src/vibe_core/vibe_core/data/sentinel.py index 4b7459fe..7f7bc40e 100644 --- a/src/vibe_core/vibe_core/data/sentinel.py +++ b/src/vibe_core/vibe_core/data/sentinel.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Data types and supporting functions for Sentinel data in FarmVibes.AI.""" import mimetypes diff --git a/src/vibe_core/vibe_core/data/utils.py b/src/vibe_core/vibe_core/data/utils.py index ba32aa73..b6453e5f 100644 --- a/src/vibe_core/vibe_core/data/utils.py +++ b/src/vibe_core/vibe_core/data/utils.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Utilities for interacting with STAC items and serialization/deserialization.""" import json @@ -148,33 +145,18 @@ def convert_field( The converted field value. """ t_origin = get_origin(field_type) - if t_origin: - t_args = get_args(field_type) - if t_origin is list and len(t_args) == 1: - return [self.convert_field(f, t_args[0], converter) for f in field_value] - if t_origin is dict and t_args: - return { - k: self.convert_field(v, t_args[1], converter) for k, v in field_value.items() - } - if t_origin is tuple and t_args: - if len(t_args) == 2 and t_args[1] == ...: - return tuple(self.convert_field(f, t_args[0], converter) for f in field_value) - return tuple( - self.convert_field(f, ta, converter) if ta is datetime else f - for f, ta in zip(field_value, t_args) - ) - else: - for t in field_type.mro(): - if t in self.field_converters: - return converter(field_value, t) - elif t is list: - return [self.convert_field(f, type(f), converter) for f in field_value] - elif t is dict: - return { - k: self.convert_field(v, type(v), converter) for k, v in field_value.items() - } - elif t is tuple: - return tuple(self.convert_field(f, type(f), converter) for f in field_value) + t_args = get_args(field_type) + if t_origin is list and len(t_args) == 1: + return [self.convert_field(f, t_args[0], converter) for f in field_value] + if t_origin is dict and t_args: + return {k: self.convert_field(v, t_args[1], converter) for k, v in field_value.items()} + if t_origin is tuple and t_args: + if len(t_args) == 2 and t_args[1] == ...: + return tuple(self.convert_field(f, t_args[0], converter) for f in field_value) + return tuple( + self.convert_field(f, ta, converter) if ta is datetime else f + for f, ta in zip(field_value, t_args) + ) return converter(field_value, field_type) def serialize_fields( @@ -509,7 +491,7 @@ def get_base_type(vibetype: DataVibeType) -> Type[BaseVibe]: if not (is_container_type(vibetype) or isinstance(vibetype, type)): raise ValueError(f"Argument {vibetype} is not a type") if isinstance(vibetype, type): - return cast(Type[T], vibetype) # type: ignore + return cast(Type[T], vibetype) levels = 1 tmp = get_args(vibetype) while tmp is not None and is_container_type(tmp[0]): diff --git a/src/vibe_core/vibe_core/data/weather.py b/src/vibe_core/vibe_core/data/weather.py index df2b8d30..e2fe5296 100644 --- a/src/vibe_core/vibe_core/data/weather.py +++ b/src/vibe_core/vibe_core/data/weather.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Weather data types and function definitions.""" import hashlib diff --git a/src/vibe_core/vibe_core/datamodel.py b/src/vibe_core/vibe_core/datamodel.py index b9c0a0d9..3fd863fa 100644 --- a/src/vibe_core/vibe_core/datamodel.py +++ b/src/vibe_core/vibe_core/datamodel.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Data model classes definition used throughout FarmVibes.AI.""" import codecs diff --git a/src/vibe_core/vibe_core/farmvibes_ai_hello_world.py b/src/vibe_core/vibe_core/farmvibes_ai_hello_world.py index 5211ed1c..b52ecc17 100644 --- a/src/vibe_core/vibe_core/farmvibes_ai_hello_world.py +++ b/src/vibe_core/vibe_core/farmvibes_ai_hello_world.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - import logging from datetime import datetime, timezone diff --git a/src/vibe_core/vibe_core/file_downloader.py b/src/vibe_core/vibe_core/file_downloader.py index 9816fc21..651d7f92 100644 --- a/src/vibe_core/vibe_core/file_downloader.py +++ b/src/vibe_core/vibe_core/file_downloader.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """File downloader utility methods and classes.""" import logging diff --git a/src/vibe_core/vibe_core/file_utils.py b/src/vibe_core/vibe_core/file_utils.py index c190450c..e5569388 100644 --- a/src/vibe_core/vibe_core/file_utils.py +++ b/src/vibe_core/vibe_core/file_utils.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Utility functions for working with files.""" import os diff --git a/src/vibe_core/vibe_core/logconfig.py b/src/vibe_core/vibe_core/logconfig.py index 96c7a318..1e2eaa50 100644 --- a/src/vibe_core/vibe_core/logconfig.py +++ b/src/vibe_core/vibe_core/logconfig.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Utility functions for configuring logging.""" import json diff --git a/src/vibe_core/vibe_core/monitor.py b/src/vibe_core/vibe_core/monitor.py index 7c8ef56d..1c7010c4 100644 --- a/src/vibe_core/vibe_core/monitor.py +++ b/src/vibe_core/vibe_core/monitor.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Classes and functions definition for monitoring workflow runs.""" from collections import Counter diff --git a/src/vibe_core/vibe_core/terraform/__init__.py b/src/vibe_core/vibe_core/terraform/__init__.py index b7c52582..e69de29b 100644 --- a/src/vibe_core/vibe_core/terraform/__init__.py +++ b/src/vibe_core/vibe_core/terraform/__init__.py @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_core/vibe_core/terraform/aks/__init__.py b/src/vibe_core/vibe_core/terraform/aks/__init__.py index b7c52582..e69de29b 100644 --- a/src/vibe_core/vibe_core/terraform/aks/__init__.py +++ b/src/vibe_core/vibe_core/terraform/aks/__init__.py @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_core/vibe_core/terraform/aks/main.tf b/src/vibe_core/vibe_core/terraform/aks/main.tf index f3f11a1d..68b7dbc3 100644 --- a/src/vibe_core/vibe_core/terraform/aks/main.tf +++ b/src/vibe_core/vibe_core/terraform/aks/main.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - terraform { required_version = ">=0.12" } diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/__init__.py b/src/vibe_core/vibe_core/terraform/aks/modules/__init__.py index b7c52582..e69de29b 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/__init__.py +++ b/src/vibe_core/vibe_core/terraform/aks/modules/__init__.py @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf index e096b5d0..0c9bca85 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - resource "azurerm_log_analytics_workspace" "analyticsworkspace" { name = "${var.prefix}-analytics-workspace-${resource.random_string.name_suffix.result}" count = var.enable_telemetry ? 1 : 0 diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/cosmos.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/cosmos.tf index 76b51151..309f552a 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/cosmos.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/cosmos.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - resource "azurerm_cosmosdb_account" "cosmos" { name = "${var.prefix}-cosmos-${resource.random_string.name_suffix.result}" location = var.location diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/data.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/data.tf index 10d56e0b..bb087fd1 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/data.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/data.tf @@ -1,4 +1 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - data "azurerm_client_config" "current" {} \ No newline at end of file diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf index 747456f8..8ac3a46c 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - data "http" "ip" { url = "https://ipv4.icanhazip.com" } diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/kubernetes.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/kubernetes.tf index a7bec902..f2f33f0b 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/kubernetes.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/kubernetes.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - locals { default_node_pool_max_count = 3 } diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf index 879112f3..44a54738 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - output "kubernetes_config_path" { value = local_file.kubeconfig.filename diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/providers.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/providers.tf index 6877ac1b..826f447e 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/providers.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/providers.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - terraform { required_version = ">=0.12" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/publicip.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/publicip.tf index 81161cba..3227a708 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/publicip.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/publicip.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - resource "azurerm_public_ip" "publicip" { name = "${var.prefix}-${substr(sha256(var.resource_group_name), 0, 6)}-ip" resource_group_name = azurerm_kubernetes_cluster.kubernetes.node_resource_group diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/random.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/random.tf index 019c2a32..c1b4455e 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/random.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/random.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - resource "random_string" "name_suffix" { length = 5 special = false diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/resourcegroup.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/resourcegroup.tf index abde1fb7..425d2c8e 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/resourcegroup.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/resourcegroup.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - data "azurerm_resource_group" "resourcegroup" { name = var.resource_group_name } \ No newline at end of file diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf index 8675cc90..d11b0fea 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - resource "azurerm_storage_account" "storageaccount" { name = "storage${resource.random_string.name_suffix.result}" resource_group_name = var.resource_group_name @@ -21,7 +18,7 @@ resource "azurerm_storage_account" "storageaccount" { network_rules, ] } - + } resource "azurerm_storage_container" "userfiles" { diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf index 8f1cbbcd..da7cd61d 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - variable "location" { description = "Location of the resources." } diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/vnet.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/vnet.tf index 474ede52..3b4c9666 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/vnet.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/vnet.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - resource "azurerm_network_security_group" "aks-nsg" { name = "${var.prefix}-nsg" location = var.location diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/cert.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/cert.tf index f6e345b2..cfe11999 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/cert.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/cert.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - resource "helm_release" "letsencrypt" { name = "cert-manager" repository = "https://charts.jetstack.io" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf index b1c23dfa..7c4a1c1e 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - resource "kubernetes_namespace" "kubernetesdaprnamespace" { metadata { name = "dapr-system" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf index 87195161..5aee9b92 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - resource "kubernetes_namespace" "kubernetesnamespace" { count = var.namespace == "default" ? 0 : 1 metadata { diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf deleted file mode 100644 index 22fa2964..00000000 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -resource "kubernetes_config_map" "otel" { - count = var.enable_telemetry ? 1 : 0 - metadata { - name = "otel-collector-config" - labels = { - app = "opentelemetry" - component = "otel-collector-conf" - } - } - - data = { - "otel-collector-config.yaml" = < WorkflowCallback: - async def callback(change: WorkflowChange, **kwargs: Any): - if change == WorkflowChange.WORKFLOW_STARTED: - self._status = RunStatus.running - elif change == WorkflowChange.WORKFLOW_FINISHED: - self._status = RunStatus.done - elif change == WorkflowChange.WORKFLOW_FAILED: - self._status = RunStatus.failed - self._reason = kwargs["reason"] - - return callback - - @property - def status(self) -> str: - if self._status == RunStatus.failed: - return f"{self._status}: {self._reason}" - return self._status - - @property - def output(self) -> BaseVibeDict: - return self._output - - -class SubprocessClient(Client): - """ - LocalWorkflowRunner wrapper that runs the workflow and retrieves results as DataVibe. - """ - - def __init__( - self, - factory_spec: OperationFactoryConfig, - raise_exception: bool, - ): - self.factory_spec = factory_spec - self.converter = StacConverter() - self.storage: Storage = instantiate(factory_spec.storage) - self.raise_exception = raise_exception - - def _deserialize_to_datavibe(self, workflow_output: OpIOType) -> BaseVibeDict: - stac_items = {k: deserialize_stac(v) for k, v in workflow_output.items()} - retrieved = self.storage.retrieve(stac_items) - vibe_data = {k: self.converter.from_stac_item(v) for k, v in retrieved.items()} - return vibe_data - - async def run( - self, workflow_name: str, geometry: BaseGeometry, time_range: Tuple[datetime, datetime] - ) -> WorkflowRun: - output = SubprocessWorkflowRun() - callback = output._workflow_callback() - if workflow_name in self.list_workflows(): - # Load workflow by it's name - workflow = load_workflow_by_name(workflow_name) - else: - # Assume it's the path to a YAML file - workflow = Workflow.build(workflow_name) - - runner = LocalWorkflowRunner.build( - workflow, - io_mapper=WorkflowIOHandler(workflow), - factory_spec=self.factory_spec, - update_state_callback=callback, - ) - - stac_item_dict = gen_stac_item_from_bounds( - shpg.mapping(geometry), # type: ignore - *time_range, - ) - input_spec = cast(OpIOType, {k: stac_item_dict for k in runner.workflow.inputs_spec}) - try: - runner_output = await runner.run(input_spec) - output._output = self._deserialize_to_datavibe(runner_output) - await callback(WorkflowChange.WORKFLOW_FINISHED) - except Exception as e: - await callback(WorkflowChange.WORKFLOW_FAILED, reason=str(e)) - if self.raise_exception: - raise - return output - - def list_workflows(self) -> List[str]: - return list_workflows() - - -def get_default_subprocess_client(cache_dir: str) -> SubprocessClient: - tmp_asset_path = os.path.join(cache_dir, "assets") - storage_config = LocalStorageConfig( - local_path=cache_dir, asset_manager=LocalFileAssetManagerConfig(tmp_asset_path) - ) - factory_spec = OperationFactoryConfig(storage_config, AzureSecretProviderConfig()) - return SubprocessClient(factory_spec, False) diff --git a/src/vibe_dev/vibe_dev/local_runner.py b/src/vibe_dev/vibe_dev/local_runner.py deleted file mode 100644 index 067cf81a..00000000 --- a/src/vibe_dev/vibe_dev/local_runner.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import cast -from uuid import UUID - -from vibe_agent.ops import OperationDependencyResolver, OperationFactoryConfig, OpIOType -from vibe_agent.ops_helper import OpIOConverter -from vibe_agent.worker import Worker -from vibe_common.messaging import ( - CacheInfoExecuteRequestContent, - ExecuteRequestMessage, - WorkMessageBuilder, -) -from vibe_common.schemas import CacheInfo -from vibe_server.workflow.runner.runner import ( - NoOpStateChange, - WorkflowCallback, - WorkflowChange, - WorkflowRunner, -) -from vibe_server.workflow.runner.task_io_handler import WorkflowIOHandler -from vibe_server.workflow.workflow import GraphNodeType, Workflow - -MAX_OP_EXECUTION_TIME_S = 60 * 60 * 3 - - -class LocalWorkflowRunner(WorkflowRunner): - timeout_s: float = 1 # in seconds - - def __init__( - self, - workflow: Workflow, - io_mapper: WorkflowIOHandler, - factory_spec: OperationFactoryConfig, - update_state_callback: WorkflowCallback = NoOpStateChange, - max_tries: int = 1, - ): - super().__init__(workflow, io_mapper, update_state_callback) - self.runner = Worker( - termination_grace_period_s=int(self.timeout_s), - control_topic="", - max_tries=max_tries, - factory_spec=factory_spec, - ) - - self.dependency_resolver = OperationDependencyResolver() - - async def _run_op_impl( - self, op: GraphNodeType, input: OpIOType, run_id: UUID, subtask_idx: int - ) -> OpIOType: - try: - message = WorkMessageBuilder.build_execute_request(run_id, "", op.spec, input) - self.runner.current_message = message - stac = OpIOConverter.deserialize_input(input) - dependencies = self.dependency_resolver.resolve(op.spec) - message = WorkMessageBuilder.add_cache_info_to_execute_request( - cast(ExecuteRequestMessage, message), - CacheInfo(op.spec.name, op.spec.version, stac, dependencies), - ) - content = message.content - assert isinstance(content, CacheInfoExecuteRequestContent) - await self._report_state_change( - WorkflowChange.SUBTASK_RUNNING, task=op.name, subtask_idx=subtask_idx - ) - out = self.runner.run_op_with_retry(content, run_id, MAX_OP_EXECUTION_TIME_S) - await self._report_state_change( - WorkflowChange.SUBTASK_FINISHED, task=op.name, subtask_idx=subtask_idx - ) - return out - except Exception as e: - self.logger.exception(f"Failed to run operation {op.name}") - await self._report_state_change( - WorkflowChange.SUBTASK_FAILED, task=op.name, subtask_idx=subtask_idx, reason=str(e) - ) - raise - finally: - self.runner.current_message = None diff --git a/src/vibe_dev/vibe_dev/mock_utils.py b/src/vibe_dev/vibe_dev/mock_utils.py deleted file mode 100644 index 8dc511e4..00000000 --- a/src/vibe_dev/vibe_dev/mock_utils.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict - -from pydantic import BaseModel - - -class Request(BaseModel): - """Mock Request class for testing purposes.""" - - text: str - """Represents the response of the request.""" - - def raise_for_status(self) -> Dict[str, int]: - """Mock raise_for_status method. - - return: A dictionary with a success code. - """ - - return {"success": 200} diff --git a/src/vibe_dev/vibe_dev/testing/__init__.py b/src/vibe_dev/vibe_dev/testing/__init__.py deleted file mode 100644 index 4863cccf..00000000 --- a/src/vibe_dev/vibe_dev/testing/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import pytest - - -@pytest.fixture -def anyio_backend(): - return "asyncio" diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_base.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_base.yaml deleted file mode 100644 index d3f48d83..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_base.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: base_base -inputs: - user_data: SimpleStrDataType -output: - processed_data: SimpleStrDataType -parameters: -entrypoint: - file: base_op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py deleted file mode 100644 index 84787cc3..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from dataclasses import asdict -from typing import Any, List, Union - -from vibe_core.data.core_types import BaseVibe - - -def callback(user_data: Union[BaseVibe, List[BaseVibe]]): - if isinstance(user_data, list): - return {"processed_data": [d.__class__(**asdict(d)) for d in user_data]} - return {"processed_data": user_data.__class__(**asdict(user_data))} - - -def callback_builder(**kw: Any): # type: ignore - return callback diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_inheritance.yaml deleted file mode 100644 index 3bf3a395..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_inheritance.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: item_inheritance -inputs: - user_data: DataVibe -output: - processed_data: "@INHERIT(user_data)" -parameters: -entrypoint: - file: op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_item.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_item.yaml deleted file mode 100644 index e861cccb..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_item.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: item_item -inputs: - user_data: DataVibe -output: - processed_data: DataVibe -parameters: -entrypoint: - file: vibe_op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_list.yaml deleted file mode 100644 index 80642036..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_list.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: item_list -inputs: - user_data: DataVibe -output: - processed_data: List[DataVibe] -parameters: - num_items: 1 -entrypoint: - file: to_list_op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_and_item_inputs.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_and_item_inputs.yaml deleted file mode 100644 index f7463cea..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_and_item_inputs.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: list_and_item_inputs -inputs: - list_data: List[DataVibe] - item_data: DataVibe -output: - processed_data: DataVibe -parameters: -entrypoint: - file: op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_inheritance.yaml deleted file mode 100644 index 636515e9..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_inheritance.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: list_inheritance -inputs: - user_data: List[DataVibe] -output: - processed_data: "@INHERIT(user_data)" -parameters: -entrypoint: - file: op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_item.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_item.yaml deleted file mode 100644 index 3f51dfa0..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_item.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: list_item -inputs: - user_data: List[DataVibe] -output: - processed_data: DataVibe -parameters: -entrypoint: - file: to_item_op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_list.yaml deleted file mode 100644 index c07efbf7..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_list.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: list_list -inputs: - user_data: List[DataVibe] -output: - processed_data: List[DataVibe] -parameters: -entrypoint: - file: vibe_op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/missing_inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/missing_inheritance.yaml deleted file mode 100644 index 0bad0196..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/missing_inheritance.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: missing_inheritance -inputs: - user_data: DataVibe -output: - processed_data: "@INHERIT(whatever)" -parameters: -entrypoint: - file: op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/nested_parameters.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/nested_parameters.yaml deleted file mode 100644 index 3876b511..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/nested_parameters.yaml +++ /dev/null @@ -1,19 +0,0 @@ -name: nested_parameters -inputs: - user_data: DataVibe -output: - processed_data: DataVibe -parameters: - overwrite: kept - nested: - overwrite: kept nested - keep: kept nested -entrypoint: - file: vibe_op.py - callback_builder: callback_builder -description: - parameters: - overwrite: param named overwrite - nested: - overwrite: nested overwrite - keep: nested keep diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py deleted file mode 100644 index 0e59efc6..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from typing import Any - -from vibe_core.data import DataVibe - - -def print_args(user_data: Any): - try: - now = datetime.now() - user_data.data = "Processed " + user_data.data - print(user_data.data) - return { - "processed_data": [ - DataVibe( - user_data.data, - (now, now), - { - "type": "Point", - "coordinates": [0.0, 0.0], - "properties": {"name": user_data.data}, - }, - [], - ) - ] - } - except Exception: - return {"processed_data": user_data} - - -def callback_builder(**kw: Any): - return print_args diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster.yaml deleted file mode 100644 index 6b76cb39..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: raster -inputs: - user_data: Raster -output: - processed_data: Raster -parameters: -entrypoint: - file: op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster_list.yaml deleted file mode 100644 index a3ba7dff..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster_list.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: raster_list -inputs: - user_data: List[Raster] -output: - processed_data: List[Raster] -parameters: -entrypoint: - file: op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/simple_parameter.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/simple_parameter.yaml deleted file mode 100644 index 01b743ec..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/simple_parameter.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: simple_parameter -inputs: - user_data: DataVibe -output: - processed_data: DataVibe -parameters: - overwrite: kept - keep: kept -entrypoint: - file: vibe_op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/str_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/str_list.yaml deleted file mode 100644 index 4b8fbb2d..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/str_list.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: str_list -inputs: - user_data: FakeType -output: - processed_data: List[DataVibe] -parameters: - fake_param: 1 - fake_another_param: - fake_nested: 2 - fake_nested_too: 3 -entrypoint: - file: op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/timeseries.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/timeseries.yaml deleted file mode 100644 index 49c779de..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/timeseries.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: timeseries -inputs: - user_data: TimeSeries -output: - processed_data: TimeSeries -parameters: -entrypoint: - file: op.py - callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py deleted file mode 100644 index 1cd6c821..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, List - -from vibe_core.data import DataVibe - - -def callback(user_data: List[DataVibe]): - return {"processed_data": DataVibe.clone_from(user_data[0], id=user_data[0].id, assets=[])} - - -def callback_builder(**kw: Any): - return callback diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py deleted file mode 100644 index 7460d81f..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any - -from vibe_core.data import DataVibe - - -def callback_builder(**kw: Any): - num_items = kw.get("num_items", 1) - - def callback(user_data: DataVibe): - return { - "processed_data": [ - DataVibe.clone_from(user_data, id=f"{user_data.id}_{i}", assets=[]) - for i in range(num_items) - ] - } - - return callback diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py deleted file mode 100644 index 39159f9a..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, List, Union - -from vibe_core.data import DataVibe - - -def callback(user_data: Union[DataVibe, List[DataVibe]]): - if isinstance(user_data, list): - return {"processed_data": [DataVibe.clone_from(d, id=d.id, assets=[]) for d in user_data]} - return {"processed_data": DataVibe.clone_from(user_data, id=user_data.id, assets=[])} - - -def callback_builder(**kw: Any): - return callback diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_sink.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_sink.yaml deleted file mode 100644 index b011e2a5..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_sink.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: bad_sink -tasks: - task: - op: item_item - op_dir: fake -edges: -sources: - input: - - task.user_data -sinks: - output: task.bad_sink \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_source.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_source.yaml deleted file mode 100644 index fe52a855..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_source.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: bad_source -tasks: - task: - op: item_item - op_dir: fake -edges: -sources: - input: - - task.bad_source -sinks: - output: task.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/base_base.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/base_base.yaml deleted file mode 100644 index e6e0101f..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/base_base.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: base_base -tasks: - task1: - op: base_base - op_dir: fake - task2: - op: base_base - op_dir: fake -edges: - - origin: task1.processed_data - destination: [task2.user_data] -sources: - input: - - task1.user_data -sinks: - output: task2.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/custom_indices_structure.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/custom_indices_structure.yaml deleted file mode 100644 index 5a436f2a..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/custom_indices_structure.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: fake_custom_indices_structure -sources: - user_input: - - s2.input -sinks: - s2: s2.nested2 - ndvi: ndvi.output - evi: evi.output - ndmi: ndmi.output - ndre: ndre.output - reci: reci.output -tasks: - s2: - workflow: workflow_inception - ndvi: - workflow: list_list - evi: - workflow: list_list - ndmi: - workflow: list_list - ndre: - workflow: list_list - reci: - workflow: list_list -edges: - - origin: s2.nested2 - destination: - - ndvi.input - - evi.input - - ndmi.input - - ndre.input - - reci.input diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/fan_out_and_in.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/fan_out_and_in.yaml deleted file mode 100644 index e121b0e2..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/fan_out_and_in.yaml +++ /dev/null @@ -1,31 +0,0 @@ -name: fan_out_and_in -tasks: - to_list: - op: item_list - op_dir: fake - scatter: - op: item_list - op_dir: fake - parallel: - op: list_item - op_dir: fake - gather: - op: list_list - op_dir: fake -edges: - - origin: to_list.processed_data - destination: - - scatter.user_data - - origin: scatter.processed_data - destination: - - parallel.user_data - - origin: parallel.processed_data - destination: - - gather.user_data -sources: - input: - - to_list.user_data -sinks: - scatter: scatter.processed_data - parallel: parallel.processed_data - gather: gather.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel.yaml deleted file mode 100644 index c21dde82..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: gather_and_parallel -tasks: - list: - op: list_list - op_dir: fake - scatter: - op: item_item - op_dir: fake - parallel: - op: item_item - op_dir: fake - two_types: - op: list_and_item_inputs - op_dir: fake - still_parallel: - op: item_item - op_dir: fake -edges: - - origin: list.processed_data - destination: - - scatter.user_data - - origin: scatter.processed_data - destination: - - parallel.user_data - - two_types.list_data # gather edge - - origin: parallel.processed_data - destination: - - two_types.item_data # parallel edge - - origin: two_types.processed_data - destination: - - still_parallel.user_data -sources: - input: - - list.user_data -sinks: - still_parallel: still_parallel.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel_input_gather_output.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel_input_gather_output.yaml deleted file mode 100644 index 19129e57..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel_input_gather_output.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: gather_and_parallel_input_gather_output -tasks: - list: - op: list_list - op_dir: fake - scatter: - op: item_item - op_dir: fake - parallel: - op: item_item - op_dir: fake - two_types: - op: list_and_item_inputs - op_dir: fake - gather: - op: list_list - op_dir: fake -edges: - - origin: list.processed_data - destination: - - scatter.user_data - - origin: scatter.processed_data - destination: - - parallel.user_data - - two_types.list_data # gather edge - - origin: parallel.processed_data - destination: - - two_types.item_data # parallel edge - - origin: two_types.processed_data - destination: - - gather.user_data -sources: - inputs: - - list.user_data -sinks: - gather: gather.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/incompatible_source.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/incompatible_source.yaml deleted file mode 100644 index 0661b4cd..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/incompatible_source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: incompatible_source -tasks: - raster: - op: raster - op_dir: fake - timeseries: - op: timeseries - op_dir: fake -sources: - input: - - raster.user_data - - timeseries.user_data -sinks: - raster: raster.processed_data - datavibe: timeseries.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance.yaml deleted file mode 100644 index 3cceb1df..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: inheritance -tasks: - list: - op: item_list - op_dir: fake - inherit_list: - op: list_inheritance - op_dir: fake - item: - op: list_item - op_dir: fake - inherit_item: - op: item_inheritance - op_dir: fake -edges: - - origin: list.processed_data - destination: - - inherit_list.user_data - - origin: item.processed_data - destination: - - inherit_item.user_data -sources: - input: - - list.user_data - - item.user_data -sinks: - list: inherit_list.processed_data - item: inherit_item.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_after_fan_out.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_after_fan_out.yaml deleted file mode 100644 index 4a5501ec..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_after_fan_out.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: inheritance_after_fan_out -tasks: - list: - op: list_list - op_dir: fake - scatter_inherit: - op: item_inheritance - op_dir: fake - item: - op: item_item - op_dir: fake -edges: - - origin: list.processed_data - destination: - - scatter_inherit.user_data - - origin: scatter_inherit.processed_data - destination: - - item.user_data -sources: - input: - - list.user_data -sinks: - output: item.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_before_fan_out.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_before_fan_out.yaml deleted file mode 100644 index 5305230f..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_before_fan_out.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: inheritance_before_fan_out -tasks: - list: - op: list_list - op_dir: fake - inherit_list: - op: list_inheritance - op_dir: fake - scatter: - op: item_item - op_dir: fake -edges: - - origin: list.processed_data - destination: - - inherit_list.user_data - - origin: inherit_list.processed_data - destination: - - scatter.user_data -sources: - input: - - list.user_data -sinks: - scatter: scatter.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_from_source.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_from_source.yaml deleted file mode 100644 index c2ffcc50..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_from_source.yaml +++ /dev/null @@ -1,22 +0,0 @@ -name: inheritance_from_source -tasks: - raster: - op: raster - op_dir: fake - inherit_raster: - op: item_inheritance - op_dir: fake - inherit_source: - op: item_inheritance - op_dir: fake -edges: - - origin: raster.processed_data - destination: - - inherit_raster.user_data -sources: - input: - - raster.user_data - - inherit_source.user_data -sinks: - raster: inherit_raster.processed_data - source: inherit_source.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/item_gather.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/item_gather.yaml deleted file mode 100644 index 0353505d..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/item_gather.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: item_gather -tasks: - item: - op: item_item - op_dir: fake - list: - op: list_list - op_dir: fake -edges: - - origin: item.processed_data - destination: - - list.user_data -sources: - input: - - item.user_data -sinks: - output: list.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/item_item.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/item_item.yaml deleted file mode 100644 index a1d656cf..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/item_item.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: item_item -tasks: - task: - op: item_item - op_dir: fake -sources: - input: - - task.user_data -sinks: - output: task.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/list_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/list_list.yaml deleted file mode 100644 index 9e217cc5..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/list_list.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: list_list -tasks: - task: - op: list_list - op_dir: fake -sources: - input: - - task.user_data -sinks: - output: task.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_edge.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_edge.yaml deleted file mode 100644 index d6a68b9d..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_edge.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: missing_edge -tasks: - task1: - op: item_item - op_dir: fake - task2: - op: item_item - op_dir: fake - missing_input: - op: item_item - op_dir: fake -edges: - - origin: task1.processed_data - destination: - - task2.user_data -sources: - input: - - task1.user_data -sinks: - second: task2.processed_data - missing: missing_input.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_inheritance.yaml deleted file mode 100644 index 0e0da03a..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_inheritance.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: missing_inheritance -tasks: - list: - op: item_list - op_dir: fake - bad_inherit: - op: missing_inheritance - op_dir: fake -edges: - - origin: list.processed_data - destination: - - bad_inherit.user_data -sources: - input: - - list.user_data -sinks: - bad: bad_inherit.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_fan_out.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_fan_out.yaml deleted file mode 100644 index 81a4431b..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_fan_out.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: nested_fan_out -tasks: - to_list: - op: str_list - op_dir: fake - scatter: - op: item_list - op_dir: fake - nested_scatter: - op: item_list - op_dir: fake - parallel: - op: list_item - op_dir: fake - gather: - op: list_list - op_dir: fake -edges: - - origin: to_list.processed_data - destination: - - scatter.user_data - - origin: scatter.processed_data - destination: - - nested_scatter.user_data - - origin: nested_scatter.processed_data - - parallel.user_data - - origin: parallel.processed_data - destination: - - gather.user_data -sources: - input: - - to_list.user_data -sinks: - scatter: scatter.processed_data - nested_scatter: nested_scatter.processed_data - parallel: parallel.processed_data - gather: gather.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_task_params.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_task_params.yaml deleted file mode 100644 index f6d484f4..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_task_params.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: nested_task_params -tasks: - parameterizable: - op: str_list - op_dir: fake - parameters: - fake_another_param: - fake_nested_too: 4 -edges: -sources: - input: - - parameterizable.user_data -sinks: - output: parameterizable.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_workflow.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_workflow.yaml deleted file mode 100644 index 6a07ed0d..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_workflow.yaml +++ /dev/null @@ -1,22 +0,0 @@ -name: nested_workflow -tasks: - str_list: - op: str_list - op_dir: fake - nested1: - workflow: list_list - nested2: - workflow: list_list -edges: - - origin: str_list.processed_data - destination: - - nested1.input - - origin: nested1.output - destination: - - nested2.input -sources: - input: - - str_list.user_data -sinks: - str: str_list.processed_data - nested2: nested2.output \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params.yaml deleted file mode 100644 index e9be6dc9..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: resolve_nested_params -parameters: - new: overwritten - new_nested: overwritten nested -tasks: - simple: - op: simple_parameter - op_dir: fake - parameters: - overwrite: "@from(new)" - nested: - workflow: resolve_params - parameters: - new: "@from(new)" - new_nested: "@from(new_nested)" -edges: -sources: - input: - - simple.user_data - - nested.input -sinks: - simple: simple.processed_data - nested: nested.nested \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_default.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_default.yaml deleted file mode 100644 index c3fc269d..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_default.yaml +++ /dev/null @@ -1,24 +0,0 @@ -name: resolve_nested_params_default -parameters: - new: - nested_new: - nested_new_nested: -tasks: - simple: - op: simple_parameter - op_dir: fake - parameters: - overwrite: "@from(new)" - nested: - workflow: resolve_params - parameters: - new: "@from(nested_new)" - new_nested: "@from(nested_new_nested)" -edges: -sources: - input: - - simple.user_data - - nested.input -sinks: - simple: simple.processed_data - nested: nested.nested diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_multiple_default.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_multiple_default.yaml deleted file mode 100644 index 3756e91c..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_multiple_default.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: resolve_nested_params_multiple_default -parameters: - new: - new_nested: -tasks: - simple: - op: simple_parameter - op_dir: fake - parameters: - overwrite: "@from(new)" - nested: - workflow: resolve_params - parameters: - new: "@from(new)" - new_nested: "@from(new_nested)" -edges: -sources: - input: - - simple.user_data - - nested.input -sinks: - simple: simple.processed_data - nested: nested.nested diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_params.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_params.yaml deleted file mode 100644 index 7d1a5cd6..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_params.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: resolve_params -parameters: - new: overwritten - new_nested: overwritten nested -tasks: - simple: - op: simple_parameter - op_dir: fake - parameters: - overwrite: "@from(new)" - nested: - op: nested_parameters - op_dir: fake - parameters: - overwrite: "@from(new)" - nested: - overwrite: "@from(new_nested)" -edges: -sources: - input: - - simple.user_data - - nested.user_data -sinks: - simple: simple.processed_data - nested: nested.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/single_and_parallel.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/single_and_parallel.yaml deleted file mode 100644 index 7998ce2a..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/single_and_parallel.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: single_and_parallel -tasks: - list: - op: list_list - op_dir: fake - scatter: - op: item_item - op_dir: fake - parallel: - op: item_item - op_dir: fake - two_types: - op: list_and_item_inputs - op_dir: fake - still_parallel: - op: item_item - op_dir: fake -edges: - - origin: list.processed_data - destination: - - scatter.user_data - - two_types.list_data # single edge - - origin: scatter.processed_data - destination: - - parallel.user_data - - origin: parallel.processed_data - destination: - - two_types.item_data # parallel edge - - origin: two_types.processed_data - destination: - - still_parallel.user_data -sources: - input: - - list.user_data -sinks: - still_parallel: still_parallel.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/source_and_destination.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/source_and_destination.yaml deleted file mode 100644 index 4b78c1c7..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/source_and_destination.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Invalid workflow where an input port is defined as source and destination of an edge -name: source_and_destination -sources: - input: - - task1.user_data - - task2.user_data -sinks: - output: task2.processed_data -tasks: - task1: - op: list_list - op_dir: fake - task2: - op: list_list - op_dir: fake -edges: - - origin: task1.processed_data - destination: - - task2.user_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source.yaml deleted file mode 100644 index 5e811a82..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: specific_source -tasks: - raster: - op: raster - op_dir: fake - datavibe: - op: item_item - op_dir: fake -sources: - input: - - raster.user_data - - datavibe.user_data -sinks: - raster: raster.processed_data - datavibe: datavibe.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_item_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_item_list.yaml deleted file mode 100644 index 2764bb31..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_item_list.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: specific_source_item_list -tasks: - raster: - op: raster - op_dir: fake - datavibe: - op: list_list - op_dir: fake -sources: - input: - - raster.user_data - - datavibe.user_data -sinks: - raster: raster.processed_data - datavibe: datavibe.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_list_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_list_list.yaml deleted file mode 100644 index 018d8cb8..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_list_list.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: specific_source_list_list -tasks: - raster: - op: raster_list - op_dir: fake - datavibe: - op: list_list - op_dir: fake -sources: - input: - - raster.user_data - - datavibe.user_data -sinks: - raster: raster.processed_data - datavibe: datavibe.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/str_input.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/str_input.yaml deleted file mode 100644 index a1c294a2..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/str_input.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: str_input -tasks: - str: - op: str_list - op_dir: fake -edges: -sources: - input: - - str.user_data -sinks: - output: str.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/task_params.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/task_params.yaml deleted file mode 100644 index 39dae4c7..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/task_params.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: task_params -tasks: - parameterizable: - op: str_list - op_dir: fake - parameters: - fake_param: 3 -edges: -sources: - input: - - parameterizable.user_data -sinks: - output: parameterizable.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/three_ops.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/three_ops.yaml deleted file mode 100644 index 51267057..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/three_ops.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: three_ops -tasks: - first: - op: item_item - op_dir: fake - second: - op: item_item - op_dir: fake - third: - op: item_item - op_dir: fake -edges: - - origin: first.processed_data - destination: second.user_data - - origin: second.processed_data - destination: third.user_data -sources: - input: - - first.user_data -sinks: - output: third.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/two_level_inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/two_level_inheritance.yaml deleted file mode 100644 index 205ca0ae..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/two_level_inheritance.yaml +++ /dev/null @@ -1,24 +0,0 @@ -name: two_level_inheritance -tasks: - ancestor: - op: item_item - op_dir: fake - direct_inherit: - op: item_inheritance - op_dir: fake - indirect_inherit: - op: item_inheritance - op_dir: fake -edges: - - origin: ancestor.processed_data - destination: - - direct_inherit.user_data - - origin: direct_inherit.processed_data - destination: - - indirect_inherit.user_data -sources: - input: - - ancestor.user_data -sinks: - direct: direct_inherit.processed_data - indirect: indirect_inherit.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/unknown_task_params.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/unknown_task_params.yaml deleted file mode 100644 index 67ca94dd..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/unknown_task_params.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: unknown_task_params -tasks: - parameterizable: - op: str_list - op_dir: fake - parameters: - new_param: foo -edges: -sources: - input: - - parameterizable.user_data -sinks: - output: parameterizable.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/workflow_inception.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/workflow_inception.yaml deleted file mode 100644 index f0d3ac2d..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows/workflow_inception.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: workflow_inception -tasks: - nested: - workflow: nested_workflow - nested2: - workflow: list_list -edges: - - origin: nested.str - destination: - - nested2.input -sources: - input: - - nested.input -sinks: - nested2: nested2.output diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py b/src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py deleted file mode 100644 index 9e7d63e9..00000000 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from dataclasses import dataclass - -import pytest - -from vibe_core.data.core_types import BaseVibe - -HERE = os.path.dirname(os.path.abspath(__file__)) -WORKFLOWS_DIR = os.path.join(HERE, "fake_workflows") -OPS_DIR = os.path.join(HERE, "fake_ops") - - -@dataclass -class FakeType(BaseVibe): - data: str - - -def get_fake_workflow_path(workflow_name: str): - return os.path.join(WORKFLOWS_DIR, f"{workflow_name}.yaml") - - -@pytest.fixture -def fake_workflow_path(request: pytest.FixtureRequest): - workflow_name = request.param # type:ignore - return get_fake_workflow_path(workflow_name) - - -@pytest.fixture -def fake_ops_dir() -> str: - return OPS_DIR - - -@pytest.fixture -def fake_workflows_dir() -> str: - return WORKFLOWS_DIR diff --git a/src/vibe_dev/vibe_dev/testing/op_tester.py b/src/vibe_dev/vibe_dev/testing/op_tester.py deleted file mode 100644 index 7c247985..00000000 --- a/src/vibe_dev/vibe_dev/testing/op_tester.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import logging -import os -import shutil -from copy import deepcopy -from tempfile import TemporaryDirectory -from typing import Any, Callable, Dict, List, Optional, Union - -from azure.identity import AzureCliCredential -from hydra_zen import builds -from pystac.item import Item - -from vibe_agent.ops import ( - BaseVibeDict, - Operation, - OperationFactory, - OperationFactoryConfig, - OperationSpec, - OpIOType, - OpResolvedDependencies, - TypeDictVibe, -) -from vibe_agent.ops_helper import OpIOConverter -from vibe_agent.storage import Storage -from vibe_agent.storage.asset_management import LocalFileAssetManager -from vibe_agent.storage.storage import ItemDict, ensure_list -from vibe_common.schemas import CacheInfo, OperationParser, OpRunId -from vibe_common.secret_provider import AzureSecretProvider, SecretProvider -from vibe_core import data -from vibe_core.data.core_types import BaseVibe -from vibe_core.data.json_converter import DataclassJSONEncoder -from vibe_core.data.utils import deserialize_stac, serialize_stac - -LOGGER = logging.getLogger(__name__) -REFERENCE_FILENAME = "reference.json" - - -class FakeStorage(Storage): - def store(self, items: List[Item]) -> List[Item]: - return items - - def retrieve(self, input_item_dicts: List[Item]) -> List[Item]: - return input_item_dicts - - def retrieve_output_from_input_if_exists(self, input_item: Item) -> Optional[Item]: - return input_item - - async def retrieve_output_from_input_if_exists_async( - self, cache_info: CacheInfo, **kwargs: Any - ) -> Optional[ItemDict]: - raise NotImplementedError - - def remove(self, op_run_id: OpRunId): - return None - - -class OpTester: - def __init__(self, path_to_config: str): - self._tmp_dir = TemporaryDirectory() - self.asset_manager = LocalFileAssetManager(self._tmp_dir.name) - self.fake_storage = FakeStorage(self.asset_manager) - self.spec = OperationParser.parse(path_to_config) - - def run(self, **input_dict: Union[BaseVibe, List[BaseVibe]]) -> BaseVibeDict: - self.op = OperationFactory( - self.fake_storage, AzureSecretProvider(credential=AzureCliCredential()) - ).build(self.spec) - return self.op.callback(**input_dict) - - def update_parameters(self, parameters: Dict[str, Any]): - self.spec.parameters.update(parameters) - - def __del__(self): - try: - self._tmp_dir.cleanup() - except (AttributeError, FileNotFoundError): - LOGGER.info(f"Unable to clean temporary directory {self._tmp_dir}") - - -class ReferenceSaver(Operation): - storage: Storage - - def __init__( - self, - name: str, - callback: Callable[..., BaseVibeDict], - storage: Storage, - converter: data.StacConverter, - inputs_spec: TypeDictVibe, - output_spec: TypeDictVibe, - version: str, - dependencies: OpResolvedDependencies, - save_dir: str, - ): - self.root_dir = save_dir - self.dependencies = dependencies - super().__init__(name, callback, storage, converter, inputs_spec, output_spec, version) - - def _get_ref_path(self) -> str: - return os.path.join(self.root_dir, self.name, REFERENCE_FILENAME) - - def _get_reference(self) -> List[Any]: - ref_path = self._get_ref_path() - if os.path.exists(ref_path): - with open(ref_path) as f: - return json.load(f) - return [] - - def _update_reference(self, stac_inputs: ItemDict, stac_outputs: ItemDict): - ref = self._get_reference() - serialized = [ - {k: serialize_stac(v) for k, v in s.items()} for s in (stac_inputs, stac_outputs) - ] - ref.append(serialized) - with open(self._get_ref_path(), "w") as f: - json.dump(ref, f, cls=DataclassJSONEncoder) - - def save_items(self, items: ItemDict) -> ItemDict: - save_items = deepcopy(items) - for item_list in save_items.values(): - item_list = ensure_list(item_list) - for item in item_list: - for k, v in item.assets.items(): - rel_path = os.path.join(self.name, k) - abs_path = os.path.join(self.root_dir, rel_path) - filepath = self.storage.asset_manager.retrieve(k) - try: - os.makedirs(abs_path) - shutil.copy(filepath, abs_path) - except FileExistsError: - # File exists so we don't copy again - pass - v.href = os.path.join(rel_path, os.path.basename(filepath)) - - return save_items - - def save_inputs_and_outputs(self, input_items: ItemDict, output_items: ItemDict): - save_inputs = self.save_items(input_items) - save_outputs = self.save_items(output_items) - self._update_reference(save_inputs, save_outputs) - - def run(self, input_items: OpIOType) -> OpIOType: - stac_inputs = OpIOConverter.deserialize_input(input_items) - cache_info = CacheInfo(self.name, self.version, stac_inputs, self.dependencies) - items_out = super().run(input_items, cache_info) - stac_outputs = {k: deserialize_stac(v) for k, v in items_out.items()} - # Create directory for the op if necessary - os.makedirs(os.path.join(self.root_dir, self.name), exist_ok=True) - self.save_inputs_and_outputs(stac_inputs, stac_outputs) - return items_out - - -class ReferenceSaverFactory(OperationFactory): - storage: Storage - save_dir: str - - def __init__(self, storage: Storage, secret_provider: SecretProvider, save_dir: str): - super().__init__(storage, secret_provider) - self.save_dir = save_dir - - def _build_impl(self, op_config: OperationSpec) -> ReferenceSaver: - parameters = self.resolve_secrets(op_config.parameters) - dependencies = self.dependency_resolver.resolve(op_config) - callable = self.callable_builder.build( - op_config.root_folder, op_config.entrypoint, parameters - ) - return ReferenceSaver( - op_config.name, - callable, - self.storage, - self.converter, - op_config.inputs_spec, - op_config.output_spec, - op_config.version, - dependencies, - self.save_dir, - ) - - -ReferenceSaverFactoryConfig = builds( - ReferenceSaverFactory, - save_dir=str, - builds_bases=(OperationFactoryConfig,), -) - - -class ReferenceRetriever: - def __init__(self, root_dir: str): - self.root_dir = root_dir - self.converter = data.StacConverter() - - def retrieve_assets(self, items: Union[List[Item], Item]): - item: Item - for item in ensure_list(items): - for asset in item.assets.values(): - asset.href = os.path.join(self.root_dir, asset.href) - - def retrieve_op_io(self, item_dict: OpIOType) -> ItemDict: - stac_items = {k: deserialize_stac(v) for k, v in item_dict.items()} - for items in stac_items.values(): - self.retrieve_assets(items) - return stac_items - - def to_terravibes(self, item_dict: ItemDict) -> BaseVibeDict: - return {k: self.converter.from_stac_item(v) for k, v in item_dict.items()} - - def retrieve(self, op_name: str) -> List[List[BaseVibeDict]]: - op_dir = os.path.join(self.root_dir, op_name) - with open(os.path.join(op_dir, REFERENCE_FILENAME)) as f: - pairs = json.load(f) - stac_pairs = [[self.retrieve_op_io(i) for i in pair] for pair in pairs] - output_pairs = [[self.to_terravibes(i) for i in pair] for pair in stac_pairs] - return output_pairs diff --git a/src/vibe_dev/vibe_dev/testing/storage_fixtures.py b/src/vibe_dev/vibe_dev/testing/storage_fixtures.py deleted file mode 100644 index 2b29d2d2..00000000 --- a/src/vibe_dev/vibe_dev/testing/storage_fixtures.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import uuid -from typing import cast - -import pytest -from azure.cosmos import CosmosClient -from azure.identity import AzureCliCredential -from azure.storage.blob import BlobServiceClient -from hydra_zen import instantiate - -from vibe_agent.storage import ( - BlobAssetManagerConfig, - CosmosStorage, - CosmosStorageConfig, - LocalFileAssetManagerConfig, - LocalStorageConfig, -) -from vibe_common.secret_provider import KeyVaultSecretConfig - -TEST_STORAGE = "https://eywadevtest.blob.core.windows.net" -REMOTE_FILE_CONTAINER = "testdata" -DUMMY_COSMOS_URI = "https://terravibes-db.documents.azure.com:443/" - - -@pytest.fixture(autouse=True, scope="session") -def stac_container() -> str: - stac_container_name: str = "stac" + str(uuid.uuid4())[0:6] - return stac_container_name - - -@pytest.fixture(autouse=True, scope="session") -def asset_container() -> str: - asset_name: str = "asset" + str(uuid.uuid4())[0:6] - return asset_name - - -@pytest.fixture(scope="session") -def storage_spec( - request: pytest.FixtureRequest, - tmp_path_factory: pytest.TempPathFactory, - stac_container: str, - asset_container: str, -): - storage_type: str = request.param # type: ignore - if storage_type == "local": - tmp_path = tmp_path_factory.mktemp("testdir") - tmp_asset_path = os.path.join(tmp_path, "assets") - yield LocalStorageConfig( - local_path=tmp_path, asset_manager=LocalFileAssetManagerConfig(tmp_asset_path) - ) - elif storage_type == "remote": - cosmos_asset_container = asset_container + "cosmos" - key_config = KeyVaultSecretConfig( - "https://eywa-secrets.vault.azure.net/", "stac-cosmos-write-key", AzureCliCredential() - ) - key = instantiate(key_config) - test_db_name = "test-db" - config = CosmosStorageConfig( - key=key, - asset_manager=BlobAssetManagerConfig( - storage_account_url=TEST_STORAGE, - storage_account_connection_string="", - asset_container_name=cosmos_asset_container, - credential=AzureCliCredential(), - ), - cosmos_database_name=test_db_name, - stac_container_name=stac_container, - cosmos_url=DUMMY_COSMOS_URI, - ) - cast(CosmosStorage, instantiate(config)).container_proxy - yield config - cred = AzureCliCredential() - client = BlobServiceClient(TEST_STORAGE, credential=cred) - asset_client = client.get_container_client(cosmos_asset_container) - cosmos_client = CosmosClient(config.cosmos_url, key) - db = cosmos_client.get_database_client(test_db_name) - db.delete_container(stac_container) - if asset_client.exists(): - asset_client.delete_container() - else: - raise ValueError(f"Invalid storage setup {storage_type}") diff --git a/src/vibe_dev/vibe_dev/testing/utils.py b/src/vibe_dev/vibe_dev/testing/utils.py deleted file mode 100644 index 17634ce7..00000000 --- a/src/vibe_dev/vibe_dev/testing/utils.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import List -from unittest import TestCase - -import yaml -from azure.identity import AzureCliCredential - -from vibe_agent.ops import OperationFactoryConfig, OpIOType -from vibe_agent.storage import StorageConfig -from vibe_common.secret_provider import AzureSecretProviderConfig -from vibe_server.workflow.runner import WorkflowRunner -from vibe_server.workflow.runner.task_io_handler import WorkflowIOHandler -from vibe_server.workflow.workflow import Workflow - -from ..local_runner import LocalWorkflowRunner - - -class WorkflowTestHelper: - @staticmethod - def get_groundtruth_for_workflow(workflow_path: str) -> List[str]: - with open(workflow_path) as yaml_file: - workflow_def = yaml.safe_load(yaml_file) - - return workflow_def["sinks"] - - @staticmethod - def verify_workflow_result(workflow_path: str, result: OpIOType): - case = TestCase() - expected_output_names = WorkflowTestHelper.get_groundtruth_for_workflow(workflow_path) - - assert len(expected_output_names) == len(result.keys()) - case.assertCountEqual(result.keys(), expected_output_names) - for value in result.values(): - assert isinstance(value, dict) or isinstance(value, list) - assert len(result) > 0 - - @staticmethod - def gen_workflow( - workflow_path: str, - storage_spec: StorageConfig, - ) -> WorkflowRunner: - factory_spec = OperationFactoryConfig( - storage_spec, AzureSecretProviderConfig(credential=AzureCliCredential()) - ) - workflow = Workflow.build(workflow_path) - io_mapper = WorkflowIOHandler(workflow) - runner = LocalWorkflowRunner.build( - io_mapper=io_mapper, - factory_spec=factory_spec, - workflow=workflow, - ) - runner.runner.is_workflow = lambda *args, **kwargs: False # type: ignore - - return runner diff --git a/src/vibe_dev/vibe_dev/testing/workflow_fixtures.py b/src/vibe_dev/vibe_dev/testing/workflow_fixtures.py deleted file mode 100644 index 1e1e1a98..00000000 --- a/src/vibe_dev/vibe_dev/testing/workflow_fixtures.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import uuid -from dataclasses import dataclass -from datetime import datetime -from pathlib import Path -from typing import Any, Dict, List, Type - -import pytest -import yaml -from shapely import geometry as shpg - -from vibe_common.messaging import ( - MessageHeader, - MessageType, - WorkflowExecutionContent, - WorkMessage, - build_work_message, -) -from vibe_common.schemas import EntryPointDict, OperationSpec -from vibe_core.data import DataVibe, TypeDictVibe -from vibe_core.data.core_types import BaseVibe -from vibe_core.datamodel import TaskDescription - -from .fake_workflows_fixtures import get_fake_workflow_path - - -@dataclass -class SimpleStrDataType(BaseVibe): - data: str - - -@pytest.fixture -def SimpleStrData() -> Type[SimpleStrDataType]: - # A fixture that creates a type. Should this be in snake_case, or in CamelCase? - # I went with CamelCase, as there is no way to make this pretty. - return SimpleStrDataType - - -@pytest.fixture -def workflow_execution_message(SimpleStrData: Type[SimpleStrDataType]) -> WorkMessage: - with open(get_fake_workflow_path("item_gather")) as f: - wf_dict = yaml.safe_load(f) - - header = MessageHeader( - type=MessageType.workflow_execution_request, - run_id=uuid.uuid4(), - ) - data = SimpleStrData("some fake data") - content = WorkflowExecutionContent( - name="fake_item_gather", - input={ - "plain_input": {"data": data}, - }, - workflow=wf_dict, - ) - return build_work_message(header=header, content=content) - - -@pytest.fixture -def simple_op_spec(SimpleStrData: Type[SimpleStrDataType], tmp_path: Path) -> OperationSpec: - with open(tmp_path / "fake.py", "w") as fp: - fp.write( - """ -from datetime import datetime -from vibe_core.data import DataVibe -from vibe_dev.testing.workflow_fixtures import SimpleStrDataType as SimpleStrData -def fake_callback(*args, **kwargs): - def callback(**kwargs): - out = { - "processed_data": DataVibe( - id="🍔", - time_range=(datetime.now(), datetime.now()), - geometry={ - "type": "Point", - "coordinates": [0.0, 0.0], - "properties": { - "name": "🤭" - } - }, - assets=[] - ), - "simple_str": SimpleStrData("🍔") - } - return out - return callback - - """ - ) - return OperationSpec( - name="fake", - inputs_spec=TypeDictVibe( - { # type: ignore - "plain_input": SimpleStrData, - "list_input": List[SimpleStrData], - "terravibes_input": DataVibe, - "terravibes_list": List[DataVibe], - } - ), - output_spec=TypeDictVibe({"processed_data": DataVibe, "simple_str": SimpleStrData}), - parameters={}, - entrypoint=EntryPointDict( - {"file": "fake.py", "callback_builder": "fake_callback"} # type: ignore - ), - root_folder=str(tmp_path), - description=TaskDescription(), - ) - - -@pytest.fixture -def workflow_run_config() -> Dict[str, Any]: - return { - "name": "fake workflow run", - "user_input": { - "start_date": "2021-02-02T00:00:00", - "end_date": "2021-02-09T00:00:00", - "geojson": { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-88.068487, 37.058836], - [-88.036059, 37.048687], - [-88.012895, 37.068984], - [-88.026622, 37.085711], - [-88.062482, 37.081461], - [-88.068487, 37.058836], - ] - ], - }, - } - ], - }, - }, - "workflow": "helloworld", - "parameters": None, - } - - -COORDS = (-55, -6) -TIME_RANGE = (datetime.now(), datetime.now()) -THE_DATAVIBE = DataVibe( - id="1", - time_range=TIME_RANGE, - geometry=shpg.mapping(shpg.Point(*COORDS).buffer(0.05, cap_style=3)), - assets=[], -) diff --git a/src/vibe_lib/setup.py b/src/vibe_lib/setup.py deleted file mode 100644 index 42c7f0bc..00000000 --- a/src/vibe_lib/setup.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from setuptools import find_packages, setup - -setup( - name="vibe_lib", - version="0.0.1", - author="Microsoft", - author_email="terravibes@microsoft.com", - description="TerraVibes Geospatial Platform Package - vibe lib.", - license="Proprietary", - keywords="terravibes geospatial", - packages=find_packages(exclude=["tests*"]), - python_requires="~=3.8", - install_requires=["numpy", "geopandas", "rasterio~=1.2"], -) diff --git a/src/vibe_lib/tests/test_airbus_api.py b/src/vibe_lib/tests/test_airbus_api.py deleted file mode 100644 index cdcab08a..00000000 --- a/src/vibe_lib/tests/test_airbus_api.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict -from unittest.mock import Mock, patch - -import pytest - -from vibe_lib.airbus import AirBusAPI, Constellation - - -@pytest.fixture(scope="module") -def api(): - with patch("vibe_lib.airbus.AirBusAPI._get_api_key") as mock_key: - with patch("vibe_lib.airbus.AirBusAPI._authenticate") as mock_token: - mock_key.return_value = "mock_api_key" - mock_token.return_value = "mock_token" - yield AirBusAPI("mock_filepath", False, [Constellation.PHR], 0.1, 0.4) - - -@pytest.fixture -def ordered_status(): - return {"id": "0", "status": "ordered"} - - -@pytest.fixture -def delivered_status(): - return {"id": "0", "status": "delivered"} - - -@pytest.fixture -def unkown_status(): - return {"id": "0", "status": "unknown"} - - -@patch("vibe_lib.airbus.AirBusAPI.get_order_by_id") -def test_ok_order(mock_handle: Mock, api: AirBusAPI, delivered_status: Dict[str, str]): - mock_handle.return_value = delivered_status - api.block_until_order_delivered("order_id") - mock_handle.assert_called_once_with("order_id") - - -@patch("vibe_lib.airbus.AirBusAPI.get_order_by_id") -def test_unexpected_order_status(mock_handle: Mock, api: AirBusAPI, unkown_status: Dict[str, str]): - mock_handle.return_value = unkown_status - with pytest.raises(ValueError): - api.block_until_order_delivered("order_id") - - -@patch("vibe_lib.airbus.AirBusAPI.get_order_by_id") -def test_timeout(mock_handle: Mock, api: AirBusAPI, ordered_status: Dict[str, str]): - mock_handle.return_value = ordered_status - with pytest.raises(RuntimeError): - api.block_until_order_delivered("order_id") - assert mock_handle.call_count == 5 diff --git a/src/vibe_lib/tests/test_earthdata.py b/src/vibe_lib/tests/test_earthdata.py deleted file mode 100644 index 39a3cc99..00000000 --- a/src/vibe_lib/tests/test_earthdata.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import math -from datetime import datetime -from typing import Any, Optional, Tuple, cast -from unittest.mock import Mock, patch - -import pytest -import requests -from shapely import geometry as shpg - -from vibe_core.data.core_types import BBox -from vibe_lib.earthdata import EarthDataAPI, format_geometry - -FMT_BOX = "2.0,1.0,2.0,3.0,0.0,3.0,0.0,1.0,2.0,1.0" -PROCESSING_LEVEL = "GEDI02_B.002" - - -@pytest.fixture -def test_box(): - return shpg.box(0, 1, 2, 3) - - -def fake_responses(num_items: int, page_size: int): - def foo(*args: Any, **kwargs: Any): - nonlocal num_items - num_return = min(num_items, page_size) - num_items = num_items - num_return - return {"feed": {"entry": [None for _ in range(num_return)]}} - - return foo - - -def test_format_geometry(test_box: shpg.Polygon): - fmt_geoms = format_geometry(test_box) - assert len(fmt_geoms) == 1 - assert fmt_geoms[0] == FMT_BOX - - -def test_format_cw_geometry(test_box: shpg.Polygon): - # Make sure we orient geometry properly (counter-clockwise) - test_geom = shpg.polygon.orient(test_box, sign=-1) - fmt_cw = format_geometry(test_geom)[0] - assert fmt_cw == FMT_BOX - - -def test_format_multipoly(test_box: shpg.Polygon): - test_geom = cast(shpg.MultiPolygon, test_box.union(shpg.box(10, 10, 11, 11))) - fmt_geoms = format_geometry(test_geom) - assert len(fmt_geoms) == 2 - assert fmt_geoms[0] == FMT_BOX - - -def test_api_wrapper_base_payload(): - api = EarthDataAPI(PROCESSING_LEVEL) - payload = api._get_payload(geometry=None, bbox=None, time_range=None, id=None) - assert len(payload) == 3 - assert payload["provider"] == api.provider - assert payload["concept_id"] == api.concept_ids[PROCESSING_LEVEL] - assert payload["page_size"] == api.page_size - - -@pytest.mark.parametrize("id", (None, "test_id")) -@pytest.mark.parametrize("time_range", (None, (datetime.now(), datetime.now()))) -@pytest.mark.parametrize("bbox", (None, (0, 0, 1, 1))) -@pytest.mark.parametrize("geometry", (None, shpg.box(0, 0, 1, 1))) -def test_api_wrapper_payload_keys( - geometry: Optional[shpg.Polygon], - bbox: Optional[BBox], - time_range: Optional[Tuple[datetime, datetime]], - id: Optional[str], -): - api = EarthDataAPI(PROCESSING_LEVEL) - payload = api._get_payload(geometry=geometry, bbox=bbox, time_range=time_range, id=id) - if geometry is not None: - assert "polygon[]" in payload - assert "options[polygon][or]" in payload - if bbox is not None: - assert "bounding_box" in payload - if time_range is not None: - assert "temporal" in payload - if id is not None: - assert "producer_granule_id" in payload - - -@pytest.mark.parametrize("num_items", (1, 2000, 2001, 9000)) -@patch.object(requests, "post") -def test_api_wrapper_paging(post: Mock, num_items: int): - api = EarthDataAPI(PROCESSING_LEVEL) - response_mock = Mock() - response_mock.configure_mock(**{"json.side_effect": fake_responses(num_items, api.page_size)}) - post.return_value = response_mock - api.query() - expected_calls = math.ceil((num_items + 1) / api.page_size) - assert post.call_count == expected_calls - for i, call_args in enumerate(post.call_args_list, 1): - assert call_args[1]["data"]["pageNum"] == i - - -@patch.object(requests, "post") -def test_api_wrapper_max_pages(post: Mock): - api = EarthDataAPI(PROCESSING_LEVEL) - response_mock = Mock() - response_mock.configure_mock( - **{"json.side_effect": fake_responses(api.max_items, api.page_size)} - ) - post.return_value = response_mock - api.query() - - response_mock.configure_mock( - **{"json.side_effect": fake_responses(api.max_items + api.page_size, api.page_size)} - ) - - with pytest.raises(RuntimeError): - api.query() diff --git a/src/vibe_lib/tests/test_predict_chips.py b/src/vibe_lib/tests/test_predict_chips.py deleted file mode 100644 index 6d3db524..00000000 --- a/src/vibe_lib/tests/test_predict_chips.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from pathlib import Path -from unittest.mock import MagicMock, patch - -import numpy as np -import pytest -import rasterio -from rasterio.windows import Window -from shapely import geometry as shpg - -from vibe_core.data import AssetVibe, Raster -from vibe_lib.spaceeye import chip - -RASTER_SIZE = 256 -RASTER_BANDS = 2 - - -class MockDataset: - spatial_size: int = 256 - channels: int = 3 - nodata: int = 100 - - def __init__(self, start_idx: int, length: int): - self.start_idx = start_idx - self.length = length - self.get_filename = None - self.zeros = np.zeros((MockDataset.channels, self.spatial_size, self.spatial_size)) - self.ones = np.ones((MockDataset.channels, self.spatial_size, self.spatial_size)) - - def __getitem__(self, idx: int): - if idx < self.start_idx: - return self.ones, self.zeros, {} - return self.zeros, self.ones, {} - - def __len__(self): - return self.length - - -@pytest.fixture() -def test_raster(tmp_path: Path): - geom = shpg.mapping(shpg.box(0, 0, 1, 1)) - now = datetime.now() - filepath = tmp_path / "test_raster.tif" - with rasterio.open( - filepath, - "w", - driver="GTiff", - width=RASTER_SIZE, - height=RASTER_SIZE, - count=RASTER_BANDS, - dtype="float32", - nodata=-1, - ) as dst: - dst.write(np.arange(RASTER_SIZE**2 * RASTER_BANDS).reshape(2, RASTER_SIZE, RASTER_SIZE)) - asset = AssetVibe(reference=str(filepath), type="image/tiff", id="asset_id") - return Raster(id="1", geometry=geom, time_range=(now, now), bands={}, assets=[asset]) - - -@pytest.mark.parametrize("start_idx, length", ((0, 5), (1, 5), (5, 5), (0, 100), (50, 100))) -@patch.object(chip, "write_prediction_to_file") -def test_skip_nodata(write_patch: MagicMock, start_idx: int, length: int): - dataset = MockDataset(start_idx, length) - loader = chip.get_loader(dataset, 1, num_workers=0) # type: ignore - model = MagicMock() - model.run.return_value = 10 * np.ones((1, 5, dataset.spatial_size, dataset.spatial_size)) - chip.predict_chips(model, loader, "anything", skip_nodata=True) - assert model.run.call_count == max(start_idx, 1) - - -@pytest.mark.filterwarnings("ignore: Dataset has no geotransform") -@pytest.mark.parametrize("downsampling", (1, 2, 8)) -def test_in_memory_reader(downsampling: int, test_raster: Raster): - out_shape = (16, 16) - reader = chip.InMemoryReader(downsampling) - reader._cache_raster = MagicMock(side_effect=reader._cache_raster) - for offset in (0, 0, 1, 2): - win = Window( - offset * downsampling, # type: ignore - 0, - *(o * downsampling for o in out_shape), - ) - x, m = reader(test_raster, win, out_shape=out_shape) - assert x.shape[1:] == out_shape - assert m.shape[1:] == out_shape - x, m = reader(test_raster, win, out_shape=out_shape) - reader._cache_raster.assert_called_once() - assert reader.rasters[test_raster.id]["data"].shape == ( - RASTER_BANDS, - RASTER_SIZE // downsampling, - RASTER_SIZE // downsampling, - ) diff --git a/src/vibe_lib/tests/test_raster_chipping.py b/src/vibe_lib/tests/test_raster_chipping.py deleted file mode 100644 index ba8444fe..00000000 --- a/src/vibe_lib/tests/test_raster_chipping.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from tempfile import TemporaryDirectory - -import numpy as np -import pytest -import xarray as xr -from shapely import geometry as shpg - -from vibe_core.data import Raster -from vibe_lib.raster import save_raster_to_asset -from vibe_lib.spaceeye.chip import ChipDataset -from vibe_lib.spaceeye.dataset import Dims, get_read_intervals, get_write_intervals - -RASTER_SIZE = 256 - - -@pytest.mark.parametrize("dim_size", [500, 10000]) -@pytest.mark.parametrize("chip_ratio", [1, 2, 10, 100]) -@pytest.mark.parametrize("step_ratio", [0.3, 0.5, 1.0]) -@pytest.mark.parametrize("offset", [0, 5000]) -def test_read_intervals(dim_size: int, chip_ratio: int, step_ratio: int, offset: int): - chip_size = dim_size // chip_ratio - step = int(step_ratio * chip_size) - read_start, read_end = get_read_intervals(dim_size, chip_size, step, offset) - assert np.all(read_end > read_start) - # No empty space in reads - assert np.all(read_start[1:] <= read_end[:-1]) - # All windows have the correct size - assert np.all((read_end - read_start) == chip_size) - # Don't make the step larger when adjusting - assert np.all((read_start[1:] - read_start[:-1]) <= step) - # Cover the whole thing - assert read_start[0] == offset - assert read_end[-1] == dim_size + offset - - -@pytest.mark.parametrize("dim_size", [500, 10000]) -@pytest.mark.parametrize("chip_ratio", [1, 2, 10, 100]) -@pytest.mark.parametrize("step_ratio", [0.3, 0.5, 1.0]) -@pytest.mark.parametrize("offset", [0, 5000]) -def test_write_intervals(dim_size: int, chip_ratio: int, step_ratio: int, offset: int): - chip_size = dim_size // chip_ratio - step = int(step_ratio * chip_size) - read_start, read_end = get_read_intervals(dim_size, chip_size, step, offset) - (write_start, write_end), (chip_start, chip_end) = get_write_intervals( - dim_size, chip_size, step, offset - ) - assert np.all(write_end > write_start) - # Chip and window sizes are the same - assert np.allclose(write_end - write_start, chip_end - chip_start) - # No empty space and no intersection in writes - assert np.all(write_start[1:] == write_end[:-1]) - # Don't try to write where we didn't read - assert np.all(write_start >= read_start) - assert np.all(write_end <= read_end) - # Cover the whole thing - assert write_start[0] == offset - assert write_end[-1] == dim_size + offset - - -def test_chip_size_too_large(): - dim_size = 447 - chip_size = 448 - step = 0 - offset = 0 - with pytest.raises(ValueError): - get_read_intervals(dim_size, chip_size, step, offset) - with pytest.raises(ValueError): - get_write_intervals(dim_size, chip_size, step, offset) - - -@pytest.fixture -def tmp_dir_name(): - _tmp_dir = TemporaryDirectory() - yield _tmp_dir.name - _tmp_dir.cleanup() - - -@pytest.fixture() -def test_raster(tmp_dir_name: str): - geom = shpg.mapping(shpg.box(0, 0, RASTER_SIZE, RASTER_SIZE)) - now = datetime.now() - raster_dim = (1, RASTER_SIZE, RASTER_SIZE) - - fake_data = np.zeros(raster_dim).astype(np.float32) - fake_da = xr.DataArray( - fake_data, - coords={ - "bands": np.arange(raster_dim[0]), - "x": np.linspace(0, 1, raster_dim[1]), - "y": np.linspace(0, 1, raster_dim[2]), - }, - dims=["bands", "y", "x"], - ) - fake_da.rio.write_crs("epsg:4326", inplace=True) - - asset = save_raster_to_asset(fake_da, tmp_dir_name) - return Raster(id="1", geometry=geom, time_range=(now, now), bands={}, assets=[asset]) - - -def test_window_smaller_than_chip(test_raster: Raster): - chip_size = RASTER_SIZE // 2 - - # window of size 0.25 * RASTER_SIZE, while chip is 0.5 * RASTER_SIZE - # RoI will need to be adjusted to match chip size - roi_geometry = shpg.Polygon(shpg.box(0.25, 0.25, 0.5, 0.5)) - - dataset = ChipDataset( - rasters=[test_raster], - chip_size=Dims(chip_size, chip_size, 1), - step_size=Dims(chip_size, chip_size, 1), - geometry_or_chunk=roi_geometry, - ) - - assert (dataset.raster_width, dataset.raster_height) == (RASTER_SIZE, RASTER_SIZE) - assert (dataset.width, dataset.height) == (chip_size, chip_size) - assert (dataset.roi_window.width, dataset.roi_window.height) == (chip_size, chip_size) diff --git a/src/vibe_lib/vibe_lib/__init__.py b/src/vibe_lib/vibe_lib/__init__.py deleted file mode 100644 index b7c52582..00000000 --- a/src/vibe_lib/vibe_lib/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_lib/vibe_lib/airbus.py b/src/vibe_lib/vibe_lib/airbus.py deleted file mode 100644 index e099e385..00000000 --- a/src/vibe_lib/vibe_lib/airbus.py +++ /dev/null @@ -1,237 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import os -import time -from datetime import datetime -from enum import auto -from typing import Any, Dict, List, Sequence, Tuple -from zipfile import ZipFile - -import requests -from fastapi_utils.enums import StrEnum -from shapely import geometry as shpg -from shapely.geometry.base import BaseGeometry - -from vibe_core.file_downloader import download_file - -from .geometry import wgs_to_utm - -DEFAULT_DELAY = 60 -DEFAULT_TIMEOUT = 1200 -IMAGE_FORMAT = "image/jp2" -LIVING_ATLAS_PROCESSING_LEVEL = "SENSOR" -PRODUCT_TYPE = "pansharpened" -RADIOMETRIC_PROCESSING = "DISPLAY" - - -class Constellation(StrEnum): - SPOT = auto() - PHR = auto() - PNEO = auto() - - -class GeometryRelation(StrEnum): - intersects = auto() - contains = auto() - - -class OrderStatus(StrEnum): - ordered = auto() - delivered = auto() - - -class AirBusAPI: - authentication_url: str = ( - "https://authenticate.foundation.api.oneatlas.airbus.com/" - "auth/realms/IDP/protocol/openid-connect/token" - ) - search_url: str = "https://search.foundation.api.oneatlas.airbus.com/api/v2/opensearch" - price_url: str = "https://data.api.oneatlas.airbus.com/api/v1/prices" - order_url: str = "https://data.api.oneatlas.airbus.com/api/v1/orders" - item_url: str = "https://access.foundation.api.oneatlas.airbus.com/api/v1/items" - - def __init__( - self, - api_key: str, - projected_crs: bool, - constellations: List[Constellation], - delay: float = DEFAULT_DELAY, - timeout: float = DEFAULT_TIMEOUT, - ): - self.api_key = api_key - self.token = self._authenticate() - self.projected_crs = projected_crs - self.constellations = constellations - self.delay = delay # in seconds - self.timeout = timeout - - @staticmethod - def _get_api_key(api_key_filepath: str) -> str: - with open(api_key_filepath) as f: - return f.read().strip() - - def _get(self, url: str, **kwargs: Any) -> Dict[str, Any]: - response = requests.get(url, **kwargs) - response.raise_for_status() - return json.loads(response.text) - - def _post(self, url: str, **kwargs: Any) -> Dict[str, Any]: - response = requests.post(url, **kwargs) - response.raise_for_status() - return json.loads(response.text) - - def _authenticate(self): - headers = {"Content-Type": "application/x-www-form-urlencoded"} - data = [ - ("apikey", self.api_key), - ("grant_type", "api_key"), - ("client_id", "IDP"), - ] - response = self._post(self.authentication_url, headers=headers, data=data) - return response["access_token"] - - def _get_workspace_id(self) -> str: - headers = {"Authorization": f"Bearer {self.token}", "Cache-Control": "no-cache"} - response = self._get("https://data.api.oneatlas.airbus.com/api/v1/me", headers=headers) - return response["contract"]["workspaceId"] - - def _search(self, payload: Dict[str, Any]) -> List[Dict[str, Any]]: - headers = { - "Authorization": f"Bearer {self.token}", - "Cache-Control": "no-cache", - "Content-Type": "application/json", - } - - response = self._post(self.search_url, headers=headers, json=payload) - products = [{**r["properties"], "geometry": r["geometry"]} for r in response["features"]] - return products - - def query( - self, - geometry: BaseGeometry, - date_range: Tuple[datetime, datetime], - max_cloud_cover: int, - my_workspace: bool = False, - ) -> List[Dict[str, Any]]: - """ - Only get results that contain all the geometry (instead of intersecting) - - constellations - PNEO 0.3m (Neo Pléiades) - SPOT 1.5m - PHR 0.5m (Pléiades) - - Cloud cover values used for filtering are for the whole product - irrespective of the given geometry 😢 - """ - - formatted_date = ",".join( - [dt.astimezone().isoformat().replace("+00:00", "Z") for dt in date_range] - ) - payload: Dict[str, str] = { - "geometry": shpg.mapping(geometry), - "acquisitionDate": f"[{formatted_date}]", - "constellation": ",".join(self.constellations), - "cloudCover": f"[0,{max_cloud_cover:d}]", - "relation": GeometryRelation.intersects if my_workspace else GeometryRelation.contains, - } - if my_workspace: - payload["workspace"] = self._get_workspace_id() - else: - payload["processingLevel"] = LIVING_ATLAS_PROCESSING_LEVEL - - return self._search(payload) - - def query_owned(self, geometry: BaseGeometry, acquisition_id: str) -> List[Dict[str, Any]]: - """ - Query workspace for owned products that match the reference product - """ - payload: Dict[str, str] = { - "acquisitionIdentifier": acquisition_id, - "geometry": shpg.mapping(geometry), - "relation": GeometryRelation.intersects, - "workspace": self._get_workspace_id(), - } - return self._search(payload) - - def get_product_by_id(self, product_id: str) -> Dict[str, Any]: - payload: Dict[str, str] = {"id": product_id} - return self._search(payload)[0] - - def _get_order_params( - self, product_ids: Sequence[str], roi: BaseGeometry - ) -> Tuple[Dict[str, Any], Dict[str, Any]]: - headers = { - "Authorization": f"Bearer {self.token}", - "Cache-Control": "no-cache", - "Content-Type": "application/json", - } - epsg_code = wgs_to_utm(roi) if self.projected_crs else "4326" - payload = { - "kind": "order.data.gb.product", - "products": [ - { - "crsCode": f"urn:ogc:def:crs:EPSG::{epsg_code}", - "productType": PRODUCT_TYPE, - "radiometricProcessing": RADIOMETRIC_PROCESSING, - "aoi": shpg.mapping(roi), - "id": pid, - "imageFormat": IMAGE_FORMAT, - } - for pid in product_ids - ], - } - return headers, payload - - def get_price(self, product_ids: Sequence[str], roi: BaseGeometry) -> Dict[str, Any]: - headers, payload = self._get_order_params(product_ids, roi) - - response = self._post(self.price_url, headers=headers, json=payload) - return response - - def place_order(self, product_ids: Sequence[str], roi: BaseGeometry) -> Dict[str, Any]: - headers, payload = self._get_order_params(product_ids, roi) - - response = self._post(self.order_url, headers=headers, json=payload) - return response - - def get_order_by_id(self, order_id: str) -> Dict[str, Any]: - headers = {"Authorization": f"Bearer {self.token}"} - return self._get(f"{self.order_url}/{order_id}", headers=headers) - - def block_until_order_delivered(self, order_id: str) -> Dict[str, Any]: - start = time.time() - order = self.get_order_by_id(order_id) - while order["status"] != OrderStatus.delivered: - try: - OrderStatus(order["status"]) - except ValueError: - raise ValueError( - f"Received unexpected status {order['status']} from order {order_id}" - ) - waiting_time = time.time() - start - if waiting_time > self.timeout: - raise RuntimeError( - f"Timed out after {waiting_time:.1f}s waiting for order {order_id}" - ) - time.sleep(self.delay) - order = self.get_order_by_id(order_id) - return order - - def download_product(self, product_id: Sequence[str], out_dir: str) -> str: - headers = {"Authorization": f"Bearer {self.token}"} - - download_url = f"{self.item_url}/{product_id}/download" - zip_path = os.path.join(out_dir, f"{product_id}.zip") - - download_file(download_url, zip_path, headers=headers) - with ZipFile(zip_path) as zf: - zip_member = [f for f in zf.filelist if f.filename.endswith(".JP2")][0] - # Trick to extract file without the whole directory tree - # https://stackoverflow.com/questions/4917284/ - zip_member.filename = os.path.basename(zip_member.filename) - filepath = zf.extract(zip_member, path=out_dir) - - return filepath diff --git a/src/vibe_lib/vibe_lib/archive.py b/src/vibe_lib/vibe_lib/archive.py deleted file mode 100644 index bbf71844..00000000 --- a/src/vibe_lib/vibe_lib/archive.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import shutil - - -def create_flat_archive(directory_path: str, archive_name: str) -> str: - """Create a flat file directory zip archive containing all files under the given directory. - Traverses subdirectories to find all files. - - Args: - directory_path: directory to archive - archive_name: name to give the archive (without .zip extension) - - Returns: - Path to zipped archive containing all files at the root level - """ - files_to_move = [] - for dirpath, _, filenames in os.walk(directory_path): - for filename in filenames: - filepath = os.path.join(dirpath, filename) - files_to_move.append(filepath) - - archive_dir = os.path.join(directory_path, archive_name) - os.mkdir(archive_dir) - for file in files_to_move: - shutil.move(file, archive_dir) - - archive_path = os.path.join(directory_path, archive_name) - return shutil.make_archive(archive_path, "zip", archive_dir) diff --git a/src/vibe_lib/vibe_lib/bing_maps.py b/src/vibe_lib/vibe_lib/bing_maps.py deleted file mode 100644 index 18ea2a3e..00000000 --- a/src/vibe_lib/vibe_lib/bing_maps.py +++ /dev/null @@ -1,232 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -""" -BingMaps API interface and auxiliary method to query tiles, download basemaps, -and manipulate between lat-lon coordinates and tile x-y coordinates. Part of the code -is adapted from the following source: -https://learn.microsoft.com/en-us/bingmaps/articles/bing-maps-tile-system -""" - -import logging -from datetime import datetime -from typing import Dict, List, Tuple, cast - -import numpy as np -import requests -import shapely.geometry as shpg -from pystac.item import Item - -from vibe_core.data import BBox - -MIN_LATITUDE = -85.05112878 -MAX_LATITUDE = 85.05112878 -MIN_LONGITUDE = -180 -MAX_LONGITUDE = 180 -MIN_ZOOM_LEVEL = 1 -MAX_ZOOM_LEVEL = 20 -NO_TILE_AVALABILITY_KEY, NO_TILE_AVAILABILITY_VALUE = "X-VE-Tile-Info", "no-tile" -LOGGER = logging.getLogger(__name__) - - -def tile_xy_from_latlon(lat: float, lon: float, zoom_level: int) -> Tuple[int, int]: - """ - Get the tile x-y coordinates given a lat/lon pair and a zoom level. - """ - # Clip lat/lon to the valid range - lat = min(max(lat, MIN_LATITUDE), MAX_LATITUDE) - lon = min(max(lon, MIN_LONGITUDE), MAX_LONGITUDE) - - # Compute the world map size in pixels for a zoom level - map_size = 256 * (2**zoom_level) - - # Calculate x-y coordinates from the lat/lon (x-y are float values - # representing positions as ratio of the map size) - x = (lon + 180) / 360 - sin_lat = np.sin(lat * np.pi / 180) - y = 0.5 - np.log((1 + sin_lat) / (1 - sin_lat)) / (4 * np.pi) - - # Transform x-y coordinates to pixel positions and clip to a valid range - pixel_x = min(max(x * map_size, 0), map_size - 1) - pixel_y = min(max(y * map_size, 0), map_size - 1) - - # As each tile is 256x256 pixels, get tile x-y coordinates from pixel coordinates - tile_x = int(np.floor(pixel_x / 256)) - tile_y = int(np.floor(pixel_y / 256)) - - return tile_x, tile_y - - -def latlon_from_tile_xy(tile_x: int, tile_y: int, zoom_level: int) -> Tuple[float, float]: - """ - Given a tile x-y coordinates and a zoom level, return the lat/lon pair of the - tile's upper-left corner. - """ - - # Compute the world map size in pixels for a zoom level - map_size = 256 * (2**zoom_level) - - # Get upper-left corner pixel coordinates for the tile - pixel_x = tile_x * 256 - pixel_y = tile_y * 256 - - # Calculate x-y coordinates from pixel coordinates (x-y are float values - # representing positions as ratio of the map size) - x = min(max(pixel_x, 0), map_size - 1) / map_size - 0.5 - y = 0.5 - min(max(pixel_y, 0), map_size - 1) / map_size - - # Convert x-y coordinates to lat/lon - lat = 90 - 360 * np.arctan(np.exp(-y * 2 * np.pi)) / np.pi - lon = 360 * x - - return lat, lon - - -def tiles_from_bbox(bbox: BBox, zoom_level: int) -> List[Tuple[int, int]]: - """ - Get a list of tile x-y coordinates for all tiles covering the given bounding box - for a given zoom level. - """ - lon_bottom_left, lat_bottom_left, lon_top_right, lat_top_right = bbox - - # Get tile x-y coordinates for the bottom-left and top-right corners of the bbox - tile_x_bottom_left, tile_y_bottom_left = tile_xy_from_latlon( - lat_bottom_left, lon_bottom_left, zoom_level - ) - - # Do the same for the top-right corner of the bbox - tile_x_top_right, tile_y_top_right = tile_xy_from_latlon( - lat_top_right, lon_top_right, zoom_level - ) - - tiles = [ - (tile_x, tile_y) - for tile_x in range(tile_x_bottom_left, tile_x_top_right + 1) - for tile_y in range( - tile_y_top_right, tile_y_bottom_left + 1 - ) # top-right to bottom-left instead because y-axis is inverted - ] - return tiles - - -def quadkey_from_tile_xy(tile_x: int, tile_y: int, zoom_level: int) -> str: - """ - Build the quadkey string that uniquely identifies a tile with x-y coordinates - for a given zoom level. - - For more information, please refer to the 'Tile Coordinates and Quadkeys' section of - https://learn.microsoft.com/en-us/bingmaps/articles/bing-maps-tile-system - """ - quadkey = "" - for i in range(zoom_level, 0, -1): - digit = 0 - mask = 1 << (i - 1) - if tile_x & mask: - digit += 1 - if tile_y & mask: - digit += 2 - quadkey += str(digit) - return quadkey - - -def get_geometry_for_tile(tile_x: int, tile_y: int, zoom_level: int) -> shpg.Polygon: - """ - Get the geometry of the tile with x-y coordinates for a given zoom level. - """ - # Max lat, min lon because it is the upper-left corner of the tile - max_lat, min_lon = latlon_from_tile_xy(tile_x, tile_y, zoom_level) - # Min lat, max lon because it is the bottom-right corner of the tile - # (computed as the upper-left of x+1, y+1) - min_lat, max_lon = latlon_from_tile_xy(tile_x + 1, tile_y + 1, zoom_level) - bbox = shpg.box(min_lon, min_lat, max_lon, max_lat) - return bbox - - -def tile_is_available(url: str) -> bool: - """ - Make a request to BingMaps API to verify if tile represented by url is available for download. - """ - with requests.get(url, stream=True) as r: - try: - r.raise_for_status() - headers = cast(Dict[str, str], r.headers) - return (NO_TILE_AVALABILITY_KEY not in headers) or ( - headers[NO_TILE_AVALABILITY_KEY] != NO_TILE_AVAILABILITY_VALUE - ) - except requests.HTTPError: - error_details = r.json()["errorDetails"] - raise ValueError("Error when verifying tile availablity: " + "\n".join(error_details)) - - -class BingMapsCollection: - """ - BingMaps collection interface to query tiles and download basemaps. - Reference: https://learn.microsoft.com/en-us/bingmaps/articles/bing-maps-tile-system - """ - - METADATA_URL: str = ( - "http://dev.virtualearth.net/REST/V1/Imagery/Metadata/Aerial" - "?output=json&include=ImageryProviders&key={BING_MAPS_API_KEY}" - ) - - def __init__(self, api_key: str): - if not api_key: - raise ValueError("No API key provided.") - self.api_key = api_key - self.tile_download_url, self.subdomains = self.get_download_url_and_subdomains() - - def get_download_url_and_subdomains(self) -> Tuple[str, List[str]]: - """Fetch the download URL and subdomains using BingMaps API.""" - try: - with requests.get(self.METADATA_URL.format(BING_MAPS_API_KEY=self.api_key)) as r: - r.raise_for_status() - metadata = r.json() - url = metadata["resourceSets"][0]["resources"][0]["imageUrl"] - subdomains = metadata["resourceSets"][0]["resources"][0]["imageUrlSubdomains"] - return url, subdomains - except (requests.HTTPError, requests.ConnectionError) as e: - raise ValueError("Error when retrieving Bing Maps metadata.") from e - - def query_tiles(self, roi: BBox, zoom_level: int) -> List[Item]: - """Query the collection for tiles that intersect with the given bounding box.""" - tiles = tiles_from_bbox(roi, zoom_level) - - items = [] - for subdomain_idx, tile in enumerate(tiles): - tile_x, tile_y = tile - subdomain = self.subdomains[subdomain_idx % len(self.subdomains)] - - quadkey = quadkey_from_tile_xy(tile_x, tile_y, zoom_level) - url = self.tile_download_url.format( - quadkey=quadkey, - api_key=self.api_key, - subdomain=subdomain, - ) - if tile_is_available(url): - geometry = get_geometry_for_tile(tile_x, tile_y, zoom_level) - item = Item( - id=quadkey, - geometry=shpg.mapping(geometry), - bbox=list(geometry.bounds), - datetime=datetime.now(), - properties={"url": url}, - ) - items.append(item) - else: - LOGGER.info( - f"Tile {quadkey} (x {tile_x}, y {tile_y}, ZL {zoom_level}) " - "is not available for download. Skipping it." - ) - return items - - def download_tile(self, url: str, out_path: str): - """Download a tile from the given URL.""" - with requests.get(url, stream=True) as r: - try: - r.raise_for_status() - with open(out_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - f.write(chunk) - except requests.HTTPError: - error_details = r.json()["errorDetails"] - raise ValueError("Error when downloading basemap: " + "\n".join(error_details)) diff --git a/src/vibe_lib/vibe_lib/climatology_lab.py b/src/vibe_lib/vibe_lib/climatology_lab.py deleted file mode 100644 index a3d64029..00000000 --- a/src/vibe_lib/vibe_lib/climatology_lab.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -from datetime import datetime -from typing import List, Tuple - -import shapely.geometry as shpg -from pystac.item import Item - -from vibe_core.file_downloader import verify_url - - -class ClimatologyLabCollection: - asset_keys: List[str] - download_url: str - geometry_box: Tuple[float, float, float, float] - - def check_url_variable_year(self, variable: str, year: int) -> bool: - url = self.download_url.format(variable, year) - return verify_url(url) - - def query(self, variable: str, time_range: Tuple[datetime, datetime]) -> List[Item]: - start_date, end_date = time_range - year_range = range(start_date.year, end_date.year + 1) - - items = [ - self._create_item(variable, year) - for year in year_range - if self.check_url_variable_year(variable, year) - ] - return items - - def _create_item(self, variable: str, year: int) -> Item: - url = self.download_url.format(variable, year) - - item = Item( - id=hashlib.sha256(f"{variable}_{year}".encode()).hexdigest(), - geometry=shpg.mapping(shpg.box(*self.geometry_box)), - bbox=self.geometry_box, # type: ignore - datetime=datetime(year, 1, 1), - properties={"variable": variable, "url": url}, - ) - - return item - - -class TerraClimateCollection(ClimatologyLabCollection): - asset_keys: List[str] = [ - "aet", - "def", - "pet", - "ppt", - "q", - "soil", - "srad", - "swe", - "tmax", - "tmin", - "vap", - "ws", - "vpd", - "PDSI", - ] - - download_url = "https://climate.northwestknowledge.net/TERRACLIMATE-DATA/TerraClimate_{}_{}.nc" - geometry_box = (-180, -90, 180, 90) - - -class GridMETCollection(ClimatologyLabCollection): - asset_keys: List[str] = [ - "bi", - "erc", - "etr", - "fm1000", - "fm100", - "pet", - "pr", - "rmax", - "rmin", - "sph", - "srad", - "th", - "tmmn", - "tmmx", - "vpd", - "vs", - ] - - download_url = "https://www.northwestknowledge.net/metdata/data/{}_{}.nc" - geometry_box = ( - -124.76666663333334, - 25.066666666666666, - -67.05833330000002, - 49.400000000000006, - ) # Geometry for contiguous US (from gridMET products) diff --git a/src/vibe_lib/vibe_lib/comet_farm/__init__.py b/src/vibe_lib/vibe_lib/comet_farm/__init__.py deleted file mode 100644 index b7c52582..00000000 --- a/src/vibe_lib/vibe_lib/comet_farm/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_lib/vibe_lib/comet_farm/comet_model.py b/src/vibe_lib/vibe_lib/comet_farm/comet_model.py deleted file mode 100644 index b9c9968d..00000000 --- a/src/vibe_lib/vibe_lib/comet_farm/comet_model.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Dict, List, Union - -from pydantic import BaseModel, Field - - -class MapUnit(BaseModel): - id: str = Field(alias="@id") - area: Union[None, str] = Field(alias="@area") - year: Union[None, str] = Field(alias="Year") - inputCrop: Union[None, str] = Field(alias="InputCrop") - irrigated: Union[None, str] = Field(alias="Irrigated") - agcprd: Union[None, str] - abgdefac: Union[None, str] - accrste_1_: Union[None, str] - crpval: Union[None, str] - rain: Union[None, str] - cgrain: Union[None, str] - cinput: Union[None, str] - eupacc_1_: Union[None, str] - fertot_1_1_: Union[None, str] - fertac_1_: Union[None, str] - irrtot: Union[None, str] - metabe_1_1_: Union[None, str] - metabe_2_1_: Union[None, str] - nfixac: Union[None, str] - omadae_1_: Union[None, str] - petann: Union[None, str] - stdede_1_: Union[None, str] - struce_1_1_: Union[None, str] - struce_2_1_: Union[None, str] - tnetmn_1_: Union[None, str] - tminrl_1_: Union[None, str] - gromin_1_: Union[None, str] - somse_1_: Union[None, str] - somsc: Union[None, str] - strmac_2_: Union[None, str] - volpac: Union[None, str] - aagdefac: Union[None, str] - accrst: Union[None, str] - aglivc: Union[None, str] - bgdefac: Union[None, str] - bglivcm: Union[None, str] - crmvst: Union[None, str] - crootc: Union[None, str] - fbrchc: Union[None, str] - frootcm: Union[None, str] - metabc_1_: Union[None, str] - metabc_2_: Union[None, str] - omadac: Union[None, str] - rlwodc: Union[None, str] - stdedc: Union[None, str] - strmac_1_: Union[None, str] - strmac_6_: Union[None, str] - strucc_1_: Union[None, str] - n2oflux: Union[None, str] - annppt: Union[None, str] - noflux: Union[None, str] - - class Config: - allow_population_by_field_name = True - - -class CarbonResponse(BaseModel): - soilCarbon: str = Field(alias="SoilCarbon") - biomassBurningCarbon: str = Field(alias="BiomassBurningCarbon") - soilCarbonStock2000: str = Field(alias="SoilCarbonStock2000") - soilCarbonStockBegin: str = Field(alias="SoilCarbonStockBegin") - soilCarbonStockEnd: str = Field(alias="SoilCarbonStockEnd") - - class Config: - allow_population_by_field_name = True - - -class Co2Response(BaseModel): - limingCO2: str = Field(alias="LimingCO2") - ureaFertilizationCO2: str = Field(alias="UreaFertilizationCO2") - drainedOrganicSoilsCO2: str = Field(alias="DrainedOrganicSoilsCO2") - - class Config: - allow_population_by_field_name = True - - -class N2OResponse(BaseModel): - soilN2O: str = Field(alias="SoilN2O") - soilN2O_Direct: str = Field(alias="SoilN2O_Direct") - soilN2O_Indirect_Volatilization: str = Field(alias="SoilN2O_Indirect_Volatilization") - soilN2O_Indirect_Leaching: str = Field(alias="SoilN2O_Indirect_Leaching") - wetlandRiceCultivationN2O: str = Field(alias="WetlandRiceCultivationN2O") - biomassBurningN2O: str = Field(alias="BiomassBurningN2O") - drainedOrganicSoilsN2O: str = Field(alias="DrainedOrganicSoilsN2O") - - class Config: - allow_population_by_field_name = True - - -class CH4Response(BaseModel): - soilCH4: str = Field(alias="SoilCH4") - wetlandRiceCultivationCH4: str = Field(alias="WetlandRiceCultivationCH4") - biomassBurningCH4: str = Field(alias="BiomassBurningCH4") - - class Config: - allow_population_by_field_name = True - - -class CometOutput(BaseModel): - name: str = Field(alias="@name") - carbon: CarbonResponse = Field(alias="Carbon") - co2: Co2Response = Field(alias="CO2") - n20: N2OResponse = Field(alias="N2O") - ch4: CH4Response = Field(alias="CH4") - - class Config: - allow_population_by_field_name = True - - -class ScenarioMapUnit(BaseModel): - name: str = Field(alias="@name") - mapUnit: Union[List[MapUnit], MapUnit] = Field(alias="MapUnit") - - class Config: - allow_population_by_field_name = True - - -class ModelRunChild(BaseModel): - name: str = Field(alias="@name") - scenario: List[Union[ScenarioMapUnit, CometOutput]] = Field(alias="Scenario") - - class Config: - allow_population_by_field_name = True - - -class ModelRun(BaseModel): - modelRun: ModelRunChild = Field(alias="ModelRun") - - class Config: - allow_population_by_field_name = True - - -class CometDay(BaseModel): - cometEmailID: str = Field(alias="@cometEmailId") - cFARMVersion: str = Field(alias="@CFARMVersion") - cropland: ModelRun = Field(alias="Cropland") - - class Config: - allow_population_by_field_name = True - - -class CometResponse(BaseModel): - day: CometDay = Field(alias="Day") - - class Config: - allow_population_by_field_name = True - - -class CarbonOffset(BaseModel): - id: str - data: Dict[str, Any] diff --git a/src/vibe_lib/vibe_lib/comet_farm/comet_requester.py b/src/vibe_lib/vibe_lib/comet_farm/comet_requester.py deleted file mode 100644 index 7794814d..00000000 --- a/src/vibe_lib/vibe_lib/comet_farm/comet_requester.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -from queue import Queue -from typing import Any, Dict, Optional - -import xmltodict -from pyngrok import ngrok - -from vibe_lib.comet_farm.comet_model import CometOutput, CometResponse -from vibe_lib.comet_farm.comet_server import CometHTTPServer, CometServerParameters - -TIMEOUT_IN_SECONDS = 120 - - -class CometRequester: - def __init__(self, comet_request: CometServerParameters): - self.comet_request = comet_request - - def get_comet_raw_output(self, queue: "Queue[str]") -> str: - return queue.get(timeout=TIMEOUT_IN_SECONDS * 60) - - def parse_comet_response(self, raw_comet_response: str) -> Dict[str, Any]: - comet_xml = xmltodict.parse(raw_comet_response) - comet_json = json.loads(json.dumps(comet_xml)) - return comet_json - - def run_comet_request(self, request_str: str) -> str: - queue: "Queue[str]" = Queue() - server = CometHTTPServer(queue, self.comet_request, request_str) - comet_response = "" - try: - server.start() - comet_response = self.get_comet_raw_output(queue) - comet_json = self.parse_comet_response(comet_response) - - carbon_offset: Optional[str] = None - # deriving the carbon offset - cr = CometResponse(**comet_json) - cLand = cr.day.cropland - for scenario in cLand.modelRun.scenario: - if type(scenario) == CometOutput and "scenario" in scenario.name: - co = CometOutput(**scenario.dict()) - carbon_offset = co.carbon.soilCarbon + " Mg Co2e/year" - break - - if carbon_offset is None: - raise RuntimeError("Missing carbon offset from COMET-Farm API") - - return carbon_offset - except Exception as err: - raise RuntimeError( - f"Error when building comet response. Comet Response: {comet_response}" - ) from err - finally: - server.shutdown() - try: - ngrok.kill() - except Exception: - pass diff --git a/src/vibe_lib/vibe_lib/comet_farm/comet_server.py b/src/vibe_lib/vibe_lib/comet_farm/comet_server.py deleted file mode 100644 index 05ed1aec..00000000 --- a/src/vibe_lib/vibe_lib/comet_farm/comet_server.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import io -import logging -import os -import traceback -import uuid -from http import HTTPStatus -from http.server import BaseHTTPRequestHandler, HTTPServer -from queue import Queue -from tempfile import TemporaryDirectory -from threading import Thread -from typing import Any, Optional, cast - -import requests -from pydantic.main import BaseModel -from pyngrok import conf, ngrok - -HTTP_SERVER_PORT: int = 1108 -HTTP_SERVER_HOST: str = "0.0.0.0" - - -class CometServerParameters(BaseModel): - url: str - webhook: str - ngrokToken: str - supportEmail: str - - -class CometHTTPServer(Thread): - def __init__( - self, outqueue: "Queue[str]", comet_request: CometServerParameters, request_str: str - ): - def handler(*args: Any, **kwargs: Any): - return CometHTTPRequestHandler(outqueue, *args, **kwargs) - - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.outqueue = outqueue - self.comet_request = comet_request - self.ngrok_token = comet_request.ngrokToken - self.server = HTTPServer((HTTP_SERVER_HOST, HTTP_SERVER_PORT), handler) - self.tunnel: Optional[Any] = None - self.tmpdir = TemporaryDirectory() - self.ngrok_config = conf.get_default() - self.ngrok_config.ngrok_path = os.path.join(self.tmpdir.name, "ngrok") # type: ignore - self.started_server = False - self.request_str = request_str - - super().__init__() - - def start_ngrok(self): - ngrok.set_auth_token(self.ngrok_token, self.ngrok_config) - self.tunnel = ngrok.connect(HTTP_SERVER_PORT, bind_tls=True) - self.comet_request.webhook = self.tunnel.public_url - - def submit_job(self, xml_string: str, reference_id: str = ""): - xml_file = io.StringIO(xml_string) - postUrl = self.comet_request.url - webhookUrl = self.comet_request.webhook + "/" + reference_id - - payload = { - "LastCropland": "-1", - "FirstCropland": "-1", - "email": self.comet_request.supportEmail, - "url": webhookUrl, - "LastDaycentInput": "0", - "FirstDaycentInput": "0", - } - - files = {"file": ("file.xml", xml_file, "application/xml")} - headers = {} - - self.logger.info(f"Submitting {payload} to COMET-Farm API") - r = requests.request("POST", postUrl, headers=headers, data=payload, files=files) - - # raise exception on error - r.raise_for_status() - - return r.text - - def run(self): - try: - self.start_ngrok() - request_id = str(uuid.uuid4()) - self.submit_job(self.request_str, reference_id=request_id) - self.started_server = True - self.server.serve_forever() - except Exception: - self.outqueue.put(f"Failed to submit job to COMET-Farm API: {traceback.format_exc()}") - raise - - def shutdown(self): - if self.started_server: - self.server.shutdown() - if self.tunnel is not None: - ngrok.disconnect(self.tunnel.public_url) - self.tmpdir.cleanup() - - -class CometHTTPRequestHandler(BaseHTTPRequestHandler): - def __init__(self, outqueue: "Queue[str]", *args: Any, **kwargs: Any): - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.outqueue = outqueue - super().__init__(*args, **kwargs) - - def _send_ok(self): - self.send_response(HTTPStatus.OK) - self.send_header("Content-Type", "text/plain") - self.end_headers() - self.wfile.write(b"OK") - - def do_POST(self): - content_len_str = cast(str, self.headers.get("Content-Length")) - content_len = int(content_len_str, 0) - post_body = self.rfile.read(content_len).decode("utf-8") - self.logger.info(f"Received data {post_body} from COMET-Farm API") - self.outqueue.put(post_body) - self._send_ok() - - def do_GET(self): - self._send_ok() diff --git a/src/vibe_lib/vibe_lib/deepmc/encoder.py b/src/vibe_lib/vibe_lib/deepmc/encoder.py deleted file mode 100644 index 6af9f5c5..00000000 --- a/src/vibe_lib/vibe_lib/deepmc/encoder.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Optional - -from torch import Tensor, nn - -from .helpers import point_wise_feed_forward_network, positional_encoding -from .transform import MultiHeadAttention - - -class EncoderLayer(nn.Module): - def __init__(self, d_model: int, num_heads: int, d_ff: int, rate: float): - super().__init__() - self.mha = MultiHeadAttention(d_model, num_heads) - self.ffn = point_wise_feed_forward_network( - in_features=d_model, out_features=d_model, d_ff=d_ff - ) - self.layernorm1 = nn.LayerNorm(d_model, eps=1e-6) - self.layernorm2 = nn.LayerNorm(d_model, eps=1e-6) - - self.dropout1 = nn.Dropout(rate) - self.dropout2 = nn.Dropout(rate) - - def forward(self, x: Tensor, mask: Tensor): - attn_output = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model) - attn_output = self.dropout1(attn_output) - out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model) - - ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model) - ffn_output = self.dropout2(ffn_output) - out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model) - - return out2 - - -class Encoder(nn.Sequential): - def __init__( - self, - in_features: int, - num_layers: int, - d_model: int, - num_heads: int, - d_ff: int, - max_seq_len: int, - dropout: float = 0.1, - ): - super().__init__() - - self.d_model = d_model - self.num_layers = num_layers - self.embedding = nn.Sequential(nn.Linear(in_features, self.d_model), nn.ReLU()) - self.pos_encoding = positional_encoding(max_seq_len, d_model) - - self.enc_layers = nn.ModuleList( - [EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)] - ) - - self.dropout = nn.Dropout(dropout) - - def forward(self, x: Tensor, mask: Optional[Tensor] = None): - seq_len = x.size(1) - - # adding embedding and position encoding. - x = self.embedding(x) # (batch_size, input_seq_len, d_model) - x = x * self.d_model**0.5 - x = x + self.pos_encoding[:, :seq_len, :] - - x = self.dropout(x) - - for enc_layer in self.enc_layers: - x = enc_layer(x, mask) - - return x # (batch_size, input_seq_len, d_model) diff --git a/src/vibe_lib/vibe_lib/deepmc/helpers.py b/src/vibe_lib/vibe_lib/deepmc/helpers.py deleted file mode 100644 index 9c4147b5..00000000 --- a/src/vibe_lib/vibe_lib/deepmc/helpers.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Optional - -import numpy as np -import torch -import torch.nn as nn -from numpy._typing import NDArray -from torch import Tensor -from torch.nn import Sequential - - -def get_angles(pos: NDArray[Any], i: NDArray[Any], d_model: int): - angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model)) - return pos * angle_rates - - -def positional_encoding(position: int, d_model: int) -> Tensor: - angle_rads = get_angles( - np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model - ) - - # apply sin to even indices in the array; 2i - angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2]) - - # apply cos to odd indices in the array; 2i+1 - angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2]) - - pos_encoding = angle_rads[np.newaxis, ...] - - return torch.tensor(pos_encoding, dtype=torch.float32) - - -def attn( - q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, mask: Optional[torch.Tensor] = None -) -> Tensor: - sim = torch.einsum("b i d, b j d -> b i j", q, k) - - if mask is not None: - max_neg_value = -torch.finfo(sim.dtype).max - sim.masked_fill_(~mask, max_neg_value) - - attn = sim.softmax(dim=-1) - out = torch.einsum("b i j, b j d -> b i d", attn, v) - return out - - -def point_wise_feed_forward_network(in_features: int, out_features: int, d_ff: int) -> Sequential: - return Sequential( - nn.Linear(in_features, d_ff), - nn.ReLU(), - nn.Linear(d_ff, out_features), - ) diff --git a/src/vibe_lib/vibe_lib/deepmc/locally_connected.py b/src/vibe_lib/vibe_lib/deepmc/locally_connected.py deleted file mode 100644 index c7ea5e0b..00000000 --- a/src/vibe_lib/vibe_lib/deepmc/locally_connected.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Optional, Tuple, Union - -import torch -from torch import nn -from torch.nn import functional as F -from torch.nn.parameter import Parameter -from torch.types import _dtype - - -class LocallyConnected1d(nn.Module): - def __init__( - self, - in_channels: int, - out_channels: int, - seq_len: int, - kernel_size: int, - stride: int = 1, - padding: Union[int, Tuple[int, int]] = 0, - bias: bool = True, - device: Optional[str] = None, - dtype: Optional[_dtype] = None, - ): - super().__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.seq_len = seq_len - self.kernel_size = kernel_size - self.stride = stride - self.padding = (padding, padding) if isinstance(padding, int) else padding - out_seq_len = (seq_len + sum(self.padding) - (kernel_size - 1) - 1) // stride + 1 - self.weight = Parameter( - torch.empty( - (in_channels, out_channels, kernel_size, out_seq_len), # type: ignore - device=device, - dtype=dtype, # type: ignore - ) - ) - - if bias: - self.bias = Parameter((torch.empty(out_channels, out_seq_len))) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - # Do normal initialization for now, but can use something smarter - nn.init.normal_(self.weight, std=0.1) - if self.bias is not None: - nn.init.normal_(self.bias, std=0.1) - - def forward(self, x: torch.Tensor): - x = F.pad(x, self.padding) - x = x.unfold(-1, self.kernel_size, self.stride) - x = torch.einsum("b i l k, i o k l -> bol", x, self.weight) - if self.bias is not None: - x = x + self.bias - return x diff --git a/src/vibe_lib/vibe_lib/deepmc/models.py b/src/vibe_lib/vibe_lib/deepmc/models.py deleted file mode 100644 index 13e71de4..00000000 --- a/src/vibe_lib/vibe_lib/deepmc/models.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, List, Tuple, Union - -import torch -import torch.nn.functional as F -from einops.layers.torch import Rearrange -from torch import nn - -from .encoder import Encoder -from .locally_connected import LocallyConnected1d - - -class MyLSTM(nn.LSTM): - def forward(self, *args: Any, **kwargs: Any): - return super().forward(*args, **kwargs)[0] - - -class DeepMCModel(nn.Module): - def __init__( - self, - first_channels: int, # 3 - rest_channels: int, # 1 - first_encoder_channels: int, # 3 - rest_encoder_channels: Tuple[int, int, int], # [4, 8, 16] - sequence_length: int, # 24 - kernel_size: int, # 2 - num_inputs: int, # 6 - encoder_layers: int = 2, - encoder_features: int = 4, - encoder_heads: int = 4, - encoder_ff_features: int = 16, - encoder_dropout: float = 0.1, - decoder_features: Tuple[int, int] = (20, 16), - dropout: float = 0.2, - batch_first: bool = True, - return_sequence: bool = True, - ): - super(DeepMCModel, self).__init__() - self.return_sequence = return_sequence - self.num_inputs = num_inputs - out_seq_len = sequence_length - kernel_size + 1 - self.encoders = nn.ModuleList( - [ - nn.Sequential( - Rearrange("b l d -> b d l"), - LocallyConnected1d( - in_channels=first_channels, - out_channels=first_encoder_channels, - seq_len=sequence_length, - kernel_size=kernel_size, - ), - nn.BatchNorm1d(first_encoder_channels), - Rearrange("b d l -> b l d"), - Encoder( - in_features=first_encoder_channels, - num_layers=encoder_layers, - d_model=encoder_features, - num_heads=encoder_heads, - d_ff=encoder_ff_features, - max_seq_len=out_seq_len, - dropout=encoder_dropout, - ), - nn.Flatten(), - ) - ] - ) - - re1, re2, re3 = rest_encoder_channels - for _ in range(num_inputs - 1): - self.encoders.append( - nn.Sequential( - Rearrange("b l d -> b d l"), - LocallyConnected1d( - in_channels=rest_channels, - out_channels=re1, - seq_len=sequence_length, - kernel_size=kernel_size, - ), - nn.ReLU(), - nn.BatchNorm1d(re1), - LocallyConnected1d( - in_channels=re1, - out_channels=re2, - seq_len=out_seq_len, - kernel_size=kernel_size, - ), - nn.ReLU(), - nn.BatchNorm1d(re2), - Rearrange("b d l -> b l d"), - MyLSTM( - input_size=re2, - hidden_size=re3, - num_layers=1, - batch_first=batch_first, - dropout=dropout, - ), - # nn.ReLU(), # Do ReLU outside the model - ) - ) - - dec_input_features = out_seq_len * encoder_features + (self.num_inputs - 1) * re3 - df1, df2 = decoder_features - self.decoder = nn.Sequential( - nn.BatchNorm1d(dec_input_features), - Rearrange("b d -> b 1 d"), - MyLSTM( - input_size=dec_input_features, - hidden_size=df1, - batch_first=batch_first, - dropout=dropout, - ), - Rearrange("b 1 d -> b d"), - nn.ReLU(), - nn.BatchNorm1d(df1), - nn.Linear(df1, df2), - nn.ReLU(), - nn.Linear(df2, 1), - ) - - def forward(self, x: Union[torch.Tensor, List[torch.Tensor]]): - sliced_encoders = nn.ModuleList(list(self.encoders)[1:]) - x = [self.encoders[0](x[0])] + [ - F.relu(encoder(xi)[:, -1]) for encoder, xi in zip(sliced_encoders, x[1:]) - ] - x = torch.cat(x, dim=1) - x = self.decoder(x) - return x diff --git a/src/vibe_lib/vibe_lib/deepmc/time.py b/src/vibe_lib/vibe_lib/deepmc/time.py deleted file mode 100644 index 8c529265..00000000 --- a/src/vibe_lib/vibe_lib/deepmc/time.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from torch import Tensor, nn - - -class TimeDistributed(nn.Module): - def __init__(self, module: nn.Module, batch_first: bool = False): - super().__init__() - self.module = module - self.batch_first = batch_first - - def forward(self, x: Tensor): - if len(x.size()) <= 2: - return self.module(x) - - # Squash samples and timesteps into a single axis - # COMMENT: Can use rearrange here :) - x_reshape = x.contiguous().view(-1, x.size(-1)) # (samples * timesteps, input_size) - y = self.module(x_reshape) - - # We have to reshape Y - if self.batch_first: - y = y.contiguous().view(x.size(0), -1, y.size(-1)) # (samples, timesteps, output_size) - else: - y = y.view(-1, x.size(1), y.size(-1)) # (timesteps, samples, output_size) - - return y diff --git a/src/vibe_lib/vibe_lib/deepmc/transform.py b/src/vibe_lib/vibe_lib/deepmc/transform.py deleted file mode 100644 index cc6f05d0..00000000 --- a/src/vibe_lib/vibe_lib/deepmc/transform.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import torch.nn as nn -from einops import rearrange -from torch import Tensor - -from .helpers import attn - - -class MultiHeadAttention(nn.Module): - def __init__(self, d_model: int, num_heads: int): - super().__init__() - self.num_heads = num_heads - self.d_model = d_model - - assert d_model % self.num_heads == 0 - - self.d_head = d_model // self.num_heads - self.scale = self.d_head**-0.5 - - self.wq = nn.Linear(d_model, d_model) - self.wk = nn.Linear(d_model, d_model) - self.wv = nn.Linear(d_model, d_model) - - self.dense = nn.Linear(d_model, d_model) - - def forward(self, v: Tensor, k: Tensor, q: Tensor, mask: Tensor): - # (batch_size, seq_len, d_model) - q = self.wq(q) - k = self.wq(k) - v = self.wq(v) - - # (batch_size, num_heads, seq_len_q, depth) - q, k, v = (rearrange(x, "b l (h d) -> (b h) l d", h=self.num_heads) for x in (q, k, v)) - - q *= self.scale - # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth) - # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k) - scaled_attention = attn(q, k, v, mask) - - concat_attention = rearrange(scaled_attention, "(b h) l d -> b l (h d)", h=self.num_heads) - - output = self.dense(concat_attention) # (batch_size, seq_len_q, d_model) - - return output diff --git a/src/vibe_lib/vibe_lib/earthdata.py b/src/vibe_lib/vibe_lib/earthdata.py deleted file mode 100644 index 362b35f8..00000000 --- a/src/vibe_lib/vibe_lib/earthdata.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -""" -Interact with NASA's EarthData platform's API -""" - -import logging -import math -from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple, Union - -import requests -from requests.exceptions import HTTPError -from shapely import geometry as shpg -from shapely import ops as shpo -from shapely.geometry.base import BaseGeometry - -from vibe_core.data.core_types import BBox - - -def format_geometry(geometry: Union[shpg.Polygon, shpg.MultiPolygon]) -> List[str]: - def format_poly(poly: shpg.Polygon): - # Make sure it is a 2D geometry, and buffer 0 to make it more well-behaved - # Orient to have the exterior go counter-clockwise - poly = shpg.polygon.orient(shpo.transform(lambda *args: args[:2], poly.buffer(0))) - assert poly.exterior is not None - return ",".join(str(c) for p in poly.exterior.coords for c in p) - - if isinstance(geometry, shpg.MultiPolygon): - geoms = [format_poly(p) for p in geometry.geoms] - else: - geoms = [format_poly(geometry)] - return geoms - - -class EarthDataAPI: - url: str = "https://cmr.earthdata.nasa.gov/search/granules.json" - concept_ids: Dict[str, str] = { - "GEDI01_B.002": "C1908344278-LPDAAC_ECS", - "GEDI02_A.002": "C1908348134-LPDAAC_ECS", - "GEDI02_B.002": "C1908350066-LPDAAC_ECS", - } - provider: str = "LPDAAC_ECS" - page_size: int = 2000 - max_items: int = 1_000_000 - - def __init__(self, processing_level: str): - self.processing_level = processing_level - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - - def _get_payload( - self, - *, - geometry: Optional[BaseGeometry], - bbox: Optional[BBox], - time_range: Optional[Tuple[datetime, datetime]], - id: Optional[str], - ): - """ - Build query parameters - """ - # Format time range - payload: Dict[str, Any] = { - "provider": self.provider, - "concept_id": self.concept_ids[self.processing_level], - "page_size": self.page_size, - } - if time_range is not None: - fmt_tr = ",".join( - (t.astimezone().isoformat().replace("+00:00", "Z") for t in time_range) - ) - payload["temporal"] = fmt_tr - # Format spatial query - if geometry is not None: - assert isinstance(geometry, (shpg.Polygon, shpg.MultiPolygon)) - # Set option to get data that intersects with any of the geometries - payload.update({"polygon[]": format_geometry(geometry), "options[polygon][or]": "true"}) - if bbox is not None: - payload["bounding_box"] = ",".join(str(i) for i in bbox) - if id is not None: - payload["producer_granule_id"] = id - return payload - - def query( - self, - *, - geometry: Optional[BaseGeometry] = None, - bbox: Optional[BBox] = None, - time_range: Optional[Tuple[datetime, datetime]] = None, - id: Optional[str] = None, - ) -> List[Dict[str, Any]]: - items = [] - max_pages = math.ceil(self.max_items / self.page_size) - # Go to max_pages + 1 in case we have the maximum number of items possible - # In practice we'll accept up to page_size - 1 extra items - for page_num in range(1, max_pages + 2): - payload = self._get_payload(geometry=geometry, bbox=bbox, time_range=time_range, id=id) - payload["pageNum"] = page_num - response = requests.post(self.url, data=payload) - try: - response.raise_for_status() - except HTTPError as e: - error_message = response.text - msg = f"{e}. {error_message}" - raise HTTPError(msg, response=e.response) - page_items = response.json()["feed"]["entry"] - num_items = len(page_items) - self.logger.debug(f"Found {num_items} granules on page {page_num}") - items.extend(page_items) - if num_items < self.page_size: - return items - raise RuntimeError("Went through the maximum number of pages and did not return") diff --git a/src/vibe_lib/vibe_lib/gaussian_mixture.py b/src/vibe_lib/vibe_lib/gaussian_mixture.py deleted file mode 100644 index e9b81a96..00000000 --- a/src/vibe_lib/vibe_lib/gaussian_mixture.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Tuple - -import numpy as np -from numpy.typing import NDArray -from sklearn.mixture import GaussianMixture - - -def low_rank_precision( - cov: NDArray[Any], thr: float -) -> Tuple[NDArray[Any], NDArray[Any], NDArray[Any]]: - """ - Compute (pseudo?)inverse of low-rank approximation of covariance matrix. - Approximation is computed by using considering only - the top eigenvalues so that total energy is around thr. - """ - w, v = np.linalg.eigh(cov) - wi = 1 / w - mask = np.cumsum(w[::-1] / w.sum())[::-1] < thr - wi[~mask] = 0 - precision = v @ (wi * v.T) - return precision, w, mask - - -def component_log_likelihood( - x: NDArray[Any], mix: GaussianMixture, idx: int, thr: float = 0.99 -) -> NDArray[Any]: - """ - Pass in the curves (N, T), mixture object, and component index - Output is size N containing the log-likelihood of each curve under the component - Does the normalization part make sense? Should check with someone smarter - """ - - x = x - mix.means_[idx] # type: ignore - cov = mix.covariances_[idx] # type: ignore - # Invert covariance matrix but erasing bad eigenvalues - precision, w, mask = low_rank_precision(cov, thr) # type: ignore - # Numerator - n = (x * (precision @ x.T).T).sum(axis=1) - # Denominator - # We compute the denominator considering only the kept eigenvalues - d = mask.sum() * np.log(2 * np.pi) + np.sum(np.log(w[mask])) # type: ignore - return -(n + d) / 2 - - -def mixture_log_likelihood( - x: NDArray[Any], mix: GaussianMixture, thr: float = 0.99 -) -> NDArray[Any]: - """ - Compute the mixture log-likelihood (max of each component log-likelihood) - """ - return np.stack( - [component_log_likelihood(x, mix, i, thr) for i in range(mix.n_components)] # type: ignore - ).max(axis=0) - - -def cluster_data(x: NDArray[Any], mix: GaussianMixture, thr: float = 0.99) -> NDArray[Any]: - """ - Assign data to cluster with maximum likelihood - """ - return np.argmax( - [component_log_likelihood(x, mix, i, thr) for i in range(mix.n_components)], # type: ignore - axis=0, - ) - - -def train_mixture_with_component_search( - x: NDArray[Any], max_components: int = 10, thr: float = 0.2 -) -> GaussianMixture: - """ - Train mixture of gaussians with stopping criterion to try and figure out how - many components should be used - """ - - base_mixture = GaussianMixture(n_components=1).fit(x) - base_ll = mixture_log_likelihood(x, base_mixture).mean() - mixture = base_mixture - ll = base_ll - for n in range(2, max_components + 1): - new_mixture = GaussianMixture(n_components=n).fit(x) - new_ll = mixture_log_likelihood(x, new_mixture).mean() - if (new_ll - ll) < np.abs(thr * base_ll): - return mixture - mixture = new_mixture - ll = new_ll - return mixture diff --git a/src/vibe_lib/vibe_lib/geometry.py b/src/vibe_lib/vibe_lib/geometry.py deleted file mode 100644 index a2efd7a6..00000000 --- a/src/vibe_lib/vibe_lib/geometry.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from enum import auto -from functools import reduce -from operator import add -from typing import Any, Dict, List, cast - -import geopandas as gpd -import numpy as np -from geopandas import GeoDataFrame -from shapely import geometry as shpg -from shapely.geometry import Point, Polygon -from shapely.geometry.base import BaseGeometry -from strenum import StrEnum - -FEATURE = "feature" -FEATURE_COLLECTION = "featurecollection" - - -class SimplifyBy(StrEnum): - simplify = auto() - convex = auto() - none = auto() - - -def geojson_to_wkt(json: Dict[str, Any]) -> List[str]: - "Recursively extracts WKTs from geojson features" - - if "type" not in json: - return [] - - if json["type"].lower() == FEATURE: - return [shpg.shape(json["geometry"]).wkt] - - if json["type"].lower() == FEATURE_COLLECTION: - return reduce(add, [geojson_to_wkt(f) for f in json["features"]]) - - raise ValueError("Unable to parse GeoJSON input") - - -def norm_intersection(g1: BaseGeometry, g2: BaseGeometry) -> float: - """ - Compute normalized intersection area between two geometries - Area(G1 ∩ G2) / Area(G1) - """ - return g1.intersection(g2).area / g1.area - - -def is_approx_within(small_geom: BaseGeometry, big_geom: BaseGeometry, threshold: float) -> bool: - """ - Maybe not within, but close enough - """ - return norm_intersection(small_geom, big_geom) > threshold - - -def is_approx_equal(geom1: BaseGeometry, geom2: BaseGeometry, threshold: float) -> bool: - return is_approx_within(geom1, geom2, threshold) and is_approx_within(geom2, geom1, threshold) - - -def wgs_to_utm(geometry: BaseGeometry) -> str: - """ - Compute UTM sector for a geometry in WGS84 (EPSG:4326) - """ - c = cast(Point, geometry.centroid) - lon, lat = c.x, c.y - assert abs(lon) < 180.0 and abs(lat) < 90.0 - utm_band = str(int(lon + 180 + 6) // 6).zfill(2) - if lat >= 0: - epsg_code = "326" + utm_band - else: - epsg_code = "327" + utm_band - return epsg_code - - -def create_mesh_grid(boundary: Polygon, resolution: int, raster_crs: int = 32611) -> GeoDataFrame: - boundary_df = gpd.GeoDataFrame(geometry=[boundary], crs=4326).to_crs(raster_crs) # type: ignore - - if boundary_df is not None and not boundary_df.empty and boundary_df.bounds is not None: - # Extract the bounds of the polygon - xmin, ymin, xmax, ymax = list(boundary_df.bounds.itertuples(index=False, name=None))[0] - - # Calculate the number of points in each dimension - num_x = int((xmax - xmin) / resolution) + 1 - num_y = int((ymax - ymin) / resolution) + 1 - - # Generate the coordinate arrays - x = np.linspace(xmin, xmax, num_x) - y = np.linspace(ymin, ymax, num_y) - - # Create the mesh grid - x_, y_ = np.meshgrid(x, y) - - g_df = gpd.GeoDataFrame( - geometry=gpd.points_from_xy(x_.flatten(), y_.flatten()), crs=raster_crs - ).to_crs(4326) # type: ignore - if g_df is not None and not g_df.empty: - intersecting_locations = cast(GeoDataFrame, g_df[g_df.intersects(boundary)]) # type: ignore - return intersecting_locations - - raise Exception("Unable to create mesh grid") diff --git a/src/vibe_lib/vibe_lib/gfs_blob_utils.py b/src/vibe_lib/vibe_lib/gfs_blob_utils.py deleted file mode 100644 index 04d56e2b..00000000 --- a/src/vibe_lib/vibe_lib/gfs_blob_utils.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime - -""" -Utilities for dealing with NOAA GFS data in Azure Blob Store -""" - -# Blob container URI for GFS data -NOAA_BLOB_URI = "https://noaagfs.blob.core.windows.net/gfs" - - -def get_sas_uri(sas_token: str) -> str: - return "{uri}?{sas}".format(uri=NOAA_BLOB_URI, sas=sas_token) - - -def blob_url_from_offset(publish_date: datetime, offset: int) -> str: - date_str = publish_date.date().isoformat().replace("-", "") - hour_str = str(publish_date.hour).rjust(2, "0") - offset_str = str(offset).rjust(3, "0") - return "gfs.{date}/{hour}/atmos/gfs.t{hour}z.pgrb2.0p25.f{offset}".format( - date=date_str, hour=hour_str, offset=offset_str - ) diff --git a/src/vibe_lib/vibe_lib/glad.py b/src/vibe_lib/vibe_lib/glad.py deleted file mode 100644 index 735feef7..00000000 --- a/src/vibe_lib/vibe_lib/glad.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Dict, Iterable, List - -import geopandas as gpd -from shapely import geometry as shpg - -from vibe_core.file_downloader import verify_url - -GLAD_DOWNLOAD_URL = ( - "https://glad.umd.edu/users/Potapov/GLCLUC2020/Forest_extent_{year}/{tile_name}.tif" -) - - -def check_glad_for_year(tile_name: str, year: int) -> bool: - """Verify if there is a GLAD file available for that year""" - url = GLAD_DOWNLOAD_URL.format(year=year, tile_name=tile_name) - return verify_url(url) - - -def get_tile_geometry(tiles_gdf: gpd.GeoDataFrame, tile_name: str) -> Dict[str, Any]: - selected_tile = tiles_gdf[tiles_gdf["NAME"] == tile_name] - if not isinstance(selected_tile, gpd.GeoDataFrame) or "geometry" not in selected_tile.columns: - raise RuntimeError(f"Tile {tile_name} not found in GLAD/Hansen tiles shapefile.") - - selected_geometries = selected_tile["geometry"] - - if not isinstance(selected_geometries, Iterable): - raise RuntimeError( - "Failed to load the GLAD/Hansen tiles shapefile. 'geometry' field is not iterable." - ) - - if len(selected_geometries) != 1: - raise RuntimeError( - f"Failed to load the GLAD/Hansen tiles shapefile. " - f"Expected 1 geometry for tile {tile_name}, found {len(selected_geometries)}." - ) - - return shpg.mapping(selected_geometries.iloc[0]) - - -def intersecting_tiles(tiles_gdf: gpd.GeoDataFrame, user_polygon: Dict[str, Any]) -> List[str]: - user_gdf = gpd.GeoDataFrame({"geometry": [shpg.shape(user_polygon)]}) - intersection = gpd.overlay(user_gdf, tiles_gdf, how="intersection") - - name_intersections = intersection["NAME"] - - if not isinstance(name_intersections, Iterable): - raise RuntimeError( - "Failed to load the GLAD/Hansen tiles shapefile. 'NAME' field is not iterable." - ) - - return [str(name) for name in name_intersections] diff --git a/src/vibe_lib/vibe_lib/heatmap_neighbor.py b/src/vibe_lib/vibe_lib/heatmap_neighbor.py deleted file mode 100644 index 84b0d6b0..00000000 --- a/src/vibe_lib/vibe_lib/heatmap_neighbor.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import cast - -import geopandas as gpd -import numpy as np -import pandas as pd -import skgstat as skg -from geopandas import GeoDataFrame -from skgstat import OrdinaryKriging -from sklearn.neighbors import NearestNeighbors - - -def run_cluster_overlap( - attribute_name: str, - reduced_samples: GeoDataFrame, - minimum_sample_polygons: GeoDataFrame, - geo_locations: GeoDataFrame, -) -> GeoDataFrame: - # perform spatial join between minimum sample locations and polygons - df_overlap = gpd.sjoin(reduced_samples, minimum_sample_polygons) - df_overlap.rename( - columns={ - "index_right": "index_overlap", - "geometry": "geometry_overlap", - }, - inplace=True, - ) - df_overlap = df_overlap[["index_overlap", f"{attribute_name}", "geometry_overlap"]] - # perform spatial join between geolocation points and minimum sample polygons - geo_locations = gpd.sjoin(geo_locations, minimum_sample_polygons) - geo_locations.rename( - columns={ - "index_right": "index_geo_locations", - }, - inplace=True, - ) - # assign nutrient values to geolocation points - out = pd.merge( - df_overlap, - geo_locations, - how="right", - left_on="index_overlap", - right_on="index_geo_locations", - ) - out = out[~out.isna().any(axis=1)] - out = GeoDataFrame(out[[attribute_name, "geometry"]], geometry="geometry", crs=4326) # type: ignore - return out - - -def run_nearest_neighbor( - attribute_name: str, - reduced_samples: GeoDataFrame, - geo_locations: GeoDataFrame, -) -> GeoDataFrame: - # preprocess data - - x_ = np.array([reduced_samples.geometry.x, reduced_samples.geometry.y]).T - y_ = reduced_samples[attribute_name].values - reduced_samples.drop(columns=["geometry"], inplace=True) - # train nearest neighbor model - neigh = NearestNeighbors(n_neighbors=1) - neigh.fit(x_, y=y_) - # inference nearest neighbor - locations = np.array([geo_locations.geometry.x, geo_locations.geometry.y]).T - _, geo_locations["index_nearest"] = neigh.kneighbors(locations) - # assign nutrient values to geolocation points - geo_locations = cast( - GeoDataFrame, - geo_locations.merge(reduced_samples, left_on="index_nearest", right_index=True), - ) - geo_locations = cast(GeoDataFrame, geo_locations[[attribute_name, "geometry"]]) - return geo_locations - - -def run_kriging_model( - attribute_name: str, - reduced_samples: GeoDataFrame, - geo_locations: GeoDataFrame, -) -> GeoDataFrame: - # preprocess data - x_ = np.array([reduced_samples.geometry.x, reduced_samples.geometry.y]).T - y_ = reduced_samples[attribute_name].values - # train Variogram using gaussian model - V = skg.Variogram(x_, y_, model="gaussian", fit_method="trf") - # train Ordinary Kriging model - ok = OrdinaryKriging(V, min_points=1, max_points=2, mode="exact") - # inference Ordinary Krigging - out_k = ok.transform(geo_locations.geometry.x, geo_locations.geometry.y) - geo_locations[attribute_name] = out_k - return geo_locations diff --git a/src/vibe_lib/vibe_lib/overlap_clustering.py b/src/vibe_lib/vibe_lib/overlap_clustering.py deleted file mode 100644 index 283898fe..00000000 --- a/src/vibe_lib/vibe_lib/overlap_clustering.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import math -from typing import Any - -import numpy as np -import torch as T -from numpy.typing import NDArray -from torch.nn.functional import avg_pool2d, interpolate - -POSTERIOR_SMOOTHING = 0.001 - -LOGGER = logging.getLogger(__name__) - - -# compute 2D average pooling of data in squares of side 2*half_side_length+1 -def compute_local_average(data: T.Tensor, half_side_length: int, stride: int = 1): - if half_side_length == 0: - return data - w, h = data.shape[-2:] - mean = avg_pool2d( - data.reshape(-1, 1, w, h), - 2 * half_side_length + 1, - stride=stride, - padding=half_side_length, - count_include_pad=False, - ) - - # if pooling was strided (for speedup), upsample to original raster size - if stride > 1: - mean = interpolate(mean, size=(w, h), mode="bilinear", align_corners=False) - return mean.view(data.shape) - - -# compute mean and variance in local windows of data in each cluster c weighted by q[c] -def compute_weighted_average_and_variance( - data: T.Tensor, - weights: T.Tensor, - half_side_length: int, - stride: int = 1, - var_min: float = 0.0001, - mq_min: float = 0.000001, -): - # compute probability normalization constants per class - mq = compute_local_average(weights, half_side_length, stride) - mq.clamp(min=mq_min) - - # instantiate data and data**2 weighted by weights[c] for each c - # future todo: investigate whether replacing einsum by broadcast ops gives a speedup - weighted = T.einsum("zij,cij->czij", data, weights) # class,channel,x,y - weighted_sq = T.einsum("zij,cij->czij", data**2, weights) - - # mean = E_[x~weights[c]] data[x] - # var = E_x (data[x]^2) - (E_x data[x])^2 - mean = compute_local_average(weighted, half_side_length, stride) / mq.unsqueeze(1) - var = compute_local_average(weighted_sq, half_side_length, stride) / mq.unsqueeze(1) - mean**2 - var = var.clamp(min=var_min) - - return mean, var - - -# batched log-pdf of a diagonal Gaussian -def lp_gaussian( - data: T.Tensor, mean: T.Tensor, var: T.Tensor, half_side_length: int, stride: int = 1 -): - m0 = -compute_local_average(1 / var, half_side_length, stride) - m1 = compute_local_average(2 * mean / var, half_side_length, stride) - m2 = -compute_local_average(mean**2 / var, half_side_length, stride) - L = compute_local_average(T.log(var), half_side_length, stride) - return (m0 * data**2 + m1 * data + m2 - 1 * L).sum(1) / 2 - - -# batched posterior over components in a Gaussian mixture -def gaussian_mixture_posterior( - data: T.Tensor, - prior: T.Tensor, - mean: T.Tensor, - var: T.Tensor, - half_side_length: int, - stride: int = 1, -): - # compute unnormalized log-pdf - lp = lp_gaussian(data, mean, var, half_side_length, stride) - - # posterior proportional to density*prior - p = lp.softmax(0) * prior - p /= p.sum(0) - p += POSTERIOR_SMOOTHING - p /= p.sum(0) - - return p - - -# one iteration of EM algorithm for Gaussian mixture -def perform_iteration_expectation_maximization( - data: T.Tensor, p: T.Tensor, half_side_length: int, stride: int = 1 -): - # M step: compute optimal GMM parameters in each raster window - prior = compute_local_average(p, half_side_length, stride) - mean, var = compute_weighted_average_and_variance(data, p, half_side_length, stride) - - # E step: recompute posteriors - p_new = gaussian_mixture_posterior(data, prior, mean, var, half_side_length, stride) - - return p_new, mean, var, prior - - -# run EM algorithm for Gaussian mixture -def run_clustering( - image: NDArray[Any], - number_classes: int, - half_side_length: int, - number_iterations: int, - stride: int, - warmup_steps: int, - warmup_half_side_length: int, - window: int, -) -> NDArray[Any]: - _, x_size, y_size = image.shape - result = np.zeros(shape=(x_size, y_size), dtype="uint8") - - for row in range(math.ceil(x_size / window)): - for col in range(math.ceil(y_size / window)): - xmin = row * window - xmax = (row + 1) * window - if xmax > x_size: - xmax = x_size - ymin = col * window - ymax = (col + 1) * window - if ymax > y_size: - ymax = y_size - - partial_image = image[:, xmin:xmax, ymin:ymax] - - logging.info( - f"Computing clusters for row: {row}, col: {col}, [{xmin}, {xmax}, {ymin}, {ymax}]" - ) - - with T.inference_mode(): - # convert image to Torch object - data = T.as_tensor(partial_image) - - # randomly initialize posterior matrix - p = T.rand((number_classes,) + partial_image.shape[1:]) - p /= p.sum(0) - - # EM - for i in range(number_iterations): - p.mean().item() # trigger synchronization - p, _, _, _ = perform_iteration_expectation_maximization( - data, - p, - warmup_half_side_length if i < warmup_steps else half_side_length, - stride, - ) - - # return np.argmax(p.numpy(), axis=0) - result[xmin:xmax, ymin:ymax] = np.argmax(p.numpy(), axis=0) - return result diff --git a/src/vibe_lib/vibe_lib/planetary_computer.py b/src/vibe_lib/vibe_lib/planetary_computer.py deleted file mode 100644 index 8ad62691..00000000 --- a/src/vibe_lib/vibe_lib/planetary_computer.py +++ /dev/null @@ -1,509 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -""" -Planetary computer model for TerraVibes. Helps query and download items and assets. -""" - -import io -import logging -import os -import re -import time -import xml.etree.ElementTree as ET -from datetime import datetime -from itertools import product -from typing import Any, Dict, List, Optional, Tuple -from urllib.parse import urljoin - -import planetary_computer as pc -import requests -from azure.storage.blob import BlobProperties, ContainerClient -from planetary_computer.sas import get_token -from pystac.asset import Asset -from pystac.item import Item -from pystac_client import Client -from requests.exceptions import RequestException -from shapely import geometry as shpg -from shapely.geometry.base import BaseGeometry - -from vibe_core.data import S2ProcessingLevel, Sentinel1Product, Sentinel2Product -from vibe_core.data.core_types import BBox -from vibe_core.file_downloader import download_file - -CATALOG_URL = "https://planetarycomputer.microsoft.com/api/stac/v1" -DATE_FORMAT = "%Y-%m-%d" -RETRY_WAIT = 10 -MAX_RETRIES = 5 - -# https://sentinel.esa.int/web/sentinel/user-guides/sentinel-1-sar/naming-conventions -MODE_SLICE = slice(4, 6) -POLARIZATION_SLICE = slice(14, 16) -YEAR_SLICE = slice(17, 21) -MONTH_SLICE = slice(21, 23) -DAY_SLICE = slice(23, 25) -LOGGER = logging.getLogger(__name__) - - -class PlanetaryComputerCollection: - collection: str = "" - filename_regex: str = r".*/(.*\.\w{3,4})(?:\?|$)" - asset_keys: List[str] = ["image"] - - def __init__(self): - self.logger = logging.getLogger(self.__class__.__name__) - self.available_collections = get_available_collections() - - if self.collection not in self.available_collections: - message = ( - f"Invalid collection '{self.collection}'. " - f"Available collections: {self.available_collections}" - ) - self.logger.error(message) - raise ValueError(message) - - def query_by_id(self, id: str) -> Item: - items = query_catalog_by_ids([self.collection], [id]) - if not items: - message = f"There is no item with id {id} on collection {self.collection}." - self.logger.error(message) - raise KeyError(message) - return items[0] - - def query( - self, - geometry: Optional[BaseGeometry] = None, - roi: Optional[BBox] = None, - time_range: Optional[Tuple[datetime, datetime]] = None, - ids: Optional[List[str]] = None, - query: Optional[Dict[str, Any]] = None, - ) -> List[Item]: - return query_catalog( - [self.collection], - geometry=geometry, - roi=roi, - time_range=time_range, - ids=ids, - query=query, - ) - - def download_asset(self, asset: Asset, out_path: str) -> str: - """ - Download asset from the planetary computer and save it into the desired path. - If the output path is a directory, try to infer the filename from the asset href. - """ - if os.path.isdir(out_path): - # Resolve name from href - match = re.match(self.filename_regex, asset.href) - if match is None: - raise ValueError(f"Unable to parse filename from asset href: {asset.href}") - filename = match.groups()[0] - out_path = os.path.join(out_path, filename) - for retry in range(MAX_RETRIES): - href = pc.sign(asset.href) - try: - download_file(href, out_path) - return out_path - except RequestException as e: - LOGGER.warning( - f"Exception {e} downloading from {href}." - f" Retrying after {RETRY_WAIT}s ({retry+1}/{MAX_RETRIES})." - ) - time.sleep(RETRY_WAIT) - raise RuntimeError(f"Failed asset {asset.href} after {MAX_RETRIES} retries.") - - def download_item(self, item: Item, out_dir: str): - """ - Download assets from planetary computer. - """ - os.makedirs(out_dir) - asset_paths: List[str] = [] - for k in self.asset_keys: - asset_paths.append(self.download_asset(item.assets[k], out_dir)) - return asset_paths - - -class Sentinel2Collection(PlanetaryComputerCollection): - collection = "sentinel-2-l2a" - filename_regex = r".*/(.*\.\w{3,4})(?:\?|$)" - asset_keys: List[str] = [ - "B01", - "B02", - "B03", - "B04", - "B05", - "B06", - "B07", - "B08", - "B8A", - "B09", - "B11", - "B12", - ] - - def get_cloud_mask(self, item: Item) -> str: - return pc.sign(urljoin(item.assets["granule-metadata"].href, "QI_DATA/MSK_CLOUDS_B00.gml")) - - -class Sentinel1GRDCollection(PlanetaryComputerCollection): - collection: str = "sentinel-1-grd" - - -class Sentinel1RTCCollection(PlanetaryComputerCollection): - collection: str = "sentinel-1-rtc" - asset_keys: List[str] = ["vh", "vv"] - - -class USGS3DEPCollection(PlanetaryComputerCollection): - collection = "3dep-seamless" - asset_keys: List[str] = ["data"] - - -class CopernicusDEMCollection(PlanetaryComputerCollection): - collection = "cop-dem-glo-30" - asset_keys: List[str] = ["data"] - - -class NaipCollection(PlanetaryComputerCollection): - collection = "naip" - asset_keys: List[str] = ["image"] - - -class LandsatCollection(PlanetaryComputerCollection): - collection = "landsat-c2-l2" - asset_keys: List[str] = [ - "qa", - "red", - "blue", - "drad", - "emis", - "emsd", - "trad", - "urad", - "atran", - "cdist", - "green", - "nir08", - "swir16", - "swir22", - "qa_pixel", - "qa_radsat", - "lwir11", - ] - - -class Era5Collection(PlanetaryComputerCollection): - collection = "era5-pds" - asset_keys: List[str] = [ - "msl", - "2t", - "mx2t", - "mn2t", - "2d", - "100u", - "10u", - "ssrd", - "100v", - "10v", - "t0", - "sst", - "sp", - ] - - -class Modis8DaySRCollection(PlanetaryComputerCollection): - """ - MODIS Surface Reflectance generated every 8 days. - Available resolutions are 250m and 500m. - https://planetarycomputer.microsoft.com/dataset/modis-09Q1-061 - https://planetarycomputer.microsoft.com/dataset/modis-09A1-061 - """ - - collections: Dict[int, str] = {250: "modis-09Q1-061", 500: "modis-09A1-061"} - - def __init__(self, resolution: int): - if resolution not in self.collections: - raise ValueError( - f"Expected resolution to be one of {list(self.collections)}, got {resolution}." - ) - self.collection = self.collections[resolution] - super().__init__() - - -class Modis16DayVICollection(PlanetaryComputerCollection): - """ - MODIS Vegetation Indices generated every 16 days. - Pixels are chosen from all acquisitions in the 16-day period. - Available resolutions are 250m and 500m. - https://planetarycomputer.microsoft.com/dataset/modis-13Q1-061 - """ - - collections: Dict[int, str] = {250: "modis-13Q1-061", 500: "modis-13A1-061"} - - def __init__(self, resolution: int): - if resolution not in self.collections: - raise ValueError( - f"Expected resolution to be one of {list(self.collections)}, got {resolution}." - ) - self.collection = self.collections[resolution] - super().__init__() - - -class AlosForestCollection(PlanetaryComputerCollection): - """ - ALOS Forest/Non-Forest Classification is derived from the ALOS PALSAR Annual - Mosaic, and classifies the pixels to detect forest cover. - """ - - collection = "alos-fnf-mosaic" - asset_keys: List[str] = ["C"] - categories: List[str] = [ - "No data", - "Forest (>90% canopy cover)", - "Forest (10-90% canopy cover)", - "Non-forest", - "Water", - ] - - -class GNATSGOCollection(PlanetaryComputerCollection): - collection = "gnatsgo-rasters" - depth_variables = ["aws{}", "soc{}", "tk{}a", "tk{}s"] - soil_depths = [ - "0_5", - "0_20", - "0_30", - "5_20", - "0_100", - "0_150", - "0_999", - "20_50", - "50_100", - "100_150", - "150_999", - ] - - soil_assets = [d.format(v) for (d, v) in product(depth_variables, soil_depths)] - - additional_assets = [ - "mukey", - "droughty", - "nccpi3sg", - "musumcpct", - "nccpi3all", - "nccpi3cot", - "nccpi3soy", - "pwsl1pomu", - "rootznaws", - "rootznemc", - "musumcpcta", - "musumcpcts", - "nccpi3corn", - "pctearthmc", - ] - - asset_keys: List[str] = soil_assets + additional_assets - - -class EsriLandUseLandCoverCollection(PlanetaryComputerCollection): - collection = "io-lulc-9-class" - asset_keys: List[str] = ["data"] - categories: List[str] = [ - "No Data", - "Water", - "Trees", - "Flooded vegetation", - "Crops", - "Built area", - "Bare ground", - "Snow/ice", - "Clouds", - "Rangeland", - ] - - -def query_catalog( - collections: List[str], - geometry: Optional[BaseGeometry] = None, - roi: Optional[BBox] = None, - time_range: Optional[Tuple[datetime, datetime]] = None, - ids: Optional[List[str]] = None, - query: Optional[Dict[str, Any]] = None, -) -> List[Item]: - """ - Query the planetary computer for items that intersect with the desired RoI in the time range - """ - catalog = Client.open(CATALOG_URL) - datetime = ( - "/".join(i.strftime(DATE_FORMAT) for i in time_range) if time_range is not None else None - ) - search = catalog.search( - collections=collections, - intersects=shpg.mapping(geometry) if geometry is not None else None, - bbox=roi, - datetime=datetime, - ids=ids, - query=query, - ) - - items = [item for item in list(search.get_items())] - return items - - -def query_catalog_by_ids(collections: List[str], ids: List[str]) -> List[Item]: - """ - Query the planetary computer for items given a list of ids - """ - catalog = Client.open(CATALOG_URL) - search = catalog.search(collections=collections, ids=ids) - items = [item for item in list(search.get_items())] - return items - - -def get_available_collections() -> List[str]: - cat = Client.open(CATALOG_URL) - return [collection.id for collection in cat.get_collections()] - - -def map_sentinel_product_args(item: Item) -> Dict[str, Any]: - props = item.properties - kwargs = { - "geometry": item.geometry, - "time_range": (item.datetime, item.datetime), - "relative_orbit_number": props["sat:relative_orbit"], - "orbit_direction": props["sat:orbit_state"], - "platform": props["platform"].upper().replace("SENTINEL-", ""), - "extra_info": {}, - "assets": [], - } - return kwargs - - -def map_s1_product_args(item: Item) -> Dict[str, Any]: - kwargs = map_sentinel_product_args(item) - props = item.properties - kwargs.update( - { - "id": item.id, - "product_name": item.id, # Name without the unique identifier - "orbit_number": props["sat:absolute_orbit"], - "sensor_mode": props["sar:instrument_mode"], - "polarisation_mode": " ".join(props["sar:polarizations"]), - } - ) - return kwargs - - -def convert_to_s1_product(item: Item) -> Sentinel1Product: - kwargs = map_s1_product_args(item) - return Sentinel1Product(**kwargs) - - -def convert_to_s2_product(item: Item) -> Sentinel2Product: - kwargs = map_sentinel_product_args(item) - props = item.properties - product_name = props["s2:product_uri"].replace(".SAFE", "") - kwargs.update( - { - "id": product_name, - "product_name": product_name, - "orbit_number": get_absolute_orbit(item), - "tile_id": props["s2:mgrs_tile"], - "processing_level": S2ProcessingLevel.L2A, - } - ) - return Sentinel2Product(**kwargs) - - -def get_absolute_orbit(item: Item) -> int: - href = item.assets["safe-manifest"].href - signed_href = pc.sign(href) - response = requests.get(signed_href) - tree = ET.parse(io.BytesIO(response.content)) - orbit_element = [e for e in tree.iter() if "orbitNumber" in e.tag] - if not orbit_element: - raise RuntimeError( - f"Could not find orbit element when parsing manifest XML for item {item.id}" - ) - orbit = orbit_element[0].text - assert orbit is not None - return int(orbit) - - -def get_sentinel1_scene_name(item: Sentinel1Product) -> str: - collection = Sentinel1GRDCollection() - stac_item = collection.query_by_id(item.product_name) - scene_name = stac_item.assets["safe-manifest"].href.split("/")[-2] - return scene_name - - -# From example in: -# https://nbviewer.org/github/microsoft/AIforEarthDataSets/blob/main/data/sentinel-1-grd.ipynb -def generate_sentinel1_blob_path(item: Sentinel1Product) -> str: - scene_name = get_sentinel1_scene_name(item) - root = "GRD" - mode = scene_name[MODE_SLICE] - polarization = scene_name[POLARIZATION_SLICE] # "DV", for example, is "dual VV/VH" - year = scene_name[YEAR_SLICE] - month = scene_name[MONTH_SLICE].lstrip("0") - day = scene_name[DAY_SLICE].lstrip("0") - - azure_scene_prefix = "/".join([root, year, month, day, mode, polarization, scene_name]) - - return azure_scene_prefix - - -def get_sentinel1_container_client() -> ContainerClient: - storage_account_name = "sentinel1euwest" - container_name = "s1-grd" - - storage_account_url = "https://" + storage_account_name + ".blob.core.windows.net/" - - token = get_token(storage_account_name, container_name).token - container_client = ContainerClient( - account_url=storage_account_url, container_name=container_name, credential=token - ) - return container_client - - -def get_sentinel1_scene_files(item: Sentinel1Product) -> List[BlobProperties]: - blob_prefix = generate_sentinel1_blob_path(item) - - container_client = get_sentinel1_container_client() - blob_generator = container_client.list_blobs(name_starts_with=blob_prefix) - return list(blob_generator) - - -def get_complete_s1_prefix(scene_files: List[BlobProperties]) -> str: - prefixes = {"/".join(f["name"].split("/")[:7]) for f in scene_files} - if len(prefixes) > 1: - base_pref = next(iter(prefixes))[:-5] - raise RuntimeError(f"Found multiple prefixes matching '{base_pref}': {prefixes}") - prefix = next(iter(prefixes)) - return prefix - - -def validate_dem_provider(name: str, resolution: int) -> PlanetaryComputerCollection: - valid_providers = { - "USGS3DEP": { - "class": USGS3DEPCollection, - "resolutions": [10, 30], - }, - "COPERNICUSDEM30": { - "class": CopernicusDEMCollection, - "resolutions": [30], - }, - } - if name in valid_providers: - if resolution in valid_providers[name]["resolutions"]: - return valid_providers[name]["class"]() - else: - raise RuntimeError( - f"Wrong resolution for dem provider {name}. " - f"Valid resolution(s) is/are {valid_providers[name]['resolutions']}" - ) - else: - raise RuntimeError( - f"Invalid DEM parameter 'provider': {name}. " - f"Valid providers are {', '.join(valid_providers.keys())}" - ) diff --git a/src/vibe_lib/vibe_lib/raster.py b/src/vibe_lib/vibe_lib/raster.py deleted file mode 100644 index 80ec6f51..00000000 --- a/src/vibe_lib/vibe_lib/raster.py +++ /dev/null @@ -1,700 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import logging -import mimetypes -import os -import shutil -import tempfile -from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Dict, - List, - NamedTuple, - Optional, - Sequence, - Tuple, - Union, - cast, -) - -import matplotlib.pyplot as plt -import numpy as np -import rasterio -import rioxarray as rio -import scipy.ndimage -import xarray as xr -from matplotlib.colors import LinearSegmentedColormap, to_rgba_array -from numpy.lib.stride_tricks import as_strided -from numpy.typing import NDArray -from rasterio import Affine -from rasterio.crs import CRS -from rasterio.enums import Resampling -from rasterio.io import DatasetWriter -from rasterio.vrt import WarpedVRT -from rasterio.warp import reproject -from rasterio.windows import Window -from rio_cogeo.cogeo import cog_translate, cog_validate -from rio_cogeo.profiles import cog_profiles - -from vibe_core.data import AssetVibe, CategoricalRaster, Raster, gen_guid -from vibe_core.data.rasters import ChunkLimits - -if TYPE_CHECKING: - MaskedArrayType = np.ma.MaskedArray[Any, np.dtype[Any]] -else: - MaskedArrayType = np.ma.MaskedArray - -LOGGER = logging.getLogger(__name__) -# https://kokoalberti.com/articles/geotiff-compression-optimization-guide/ -COMPRESSION_KWARGS: Dict[str, Any] = { - "tiled": True, - "compress": "ZSTD", - "zstd_level": 9, -} - -FLOAT_COMPRESSION_KWARGS: Dict[str, Any] = {**COMPRESSION_KWARGS, "predictor": 3} - -INT_COMPRESSION_KWARGS: Dict[str, Any] = {**COMPRESSION_KWARGS, "predictor": 2} - -DEFAULT_NODATA = 100 - - -class RGBA(NamedTuple): - """ - Int RGBA - """ - - red: int - green: int - blue: int - alpha: int - - -class FRGB(NamedTuple): - """ - Float RGB - """ - - red: float - green: float - blue: float - - -class FRGBA(FRGB): - """ - Float RGBA - """ - - alpha: float - - -def get_crs(raster: Raster) -> CRS: - with rasterio.open(raster.raster_asset.url) as src: - crs = src.crs - return crs - - -def open_raster(raster: Raster, *args: Any, **kwargs: Any) -> rasterio.DatasetReader: - return open_raster_from_ref(raster.raster_asset.url, *args, **kwargs) - - -def open_raster_from_ref(raster_ref: str, *args: Any, **kwargs: Any) -> rasterio.DatasetReader: - return rasterio.open(raster_ref, *args, **kwargs) # type: ignore - - -def load_raster_from_url( - raster_url: str, - band_indices: Optional[Sequence[int]] = None, - crs: Optional[Any] = None, - transform: Optional[rasterio.Affine] = None, - shape: Optional[Tuple[int, int]] = None, - resampling: Resampling = Resampling.nearest, - geometry: Optional[Any] = None, - geometry_crs: Optional[Any] = None, - dtype: Optional[Any] = None, -) -> xr.DataArray: - with rasterio.open(raster_url) as src: - if crs or transform or shape: - if shape: - height, width = shape - if not transform: - # Fix bug from rasterio https://github.com/rasterio/rasterio/issues/2346 - scale_x, scale_y = src.meta["width"] / width, src.meta["height"] / height - transform = src.transform * Affine.scale(scale_x, scale_y) - else: - height, width = None, None - dtype = dtype if dtype is not None else src.meta["dtype"] - src = WarpedVRT( - src, - crs=crs, - transform=transform, - height=height, - width=width, - resampling=resampling, - dtype=dtype, - ) - with src: - data = rio.open_rasterio(src, masked=True) - if band_indices: # Read only the desired bands - data = data[band_indices] - if geometry: - data = data.rio.clip([geometry], crs=geometry_crs, all_touched=True, from_disk=True) - return data - - -def load_raster( - raster: Raster, - bands: Optional[Sequence[Union[int, str]]] = None, - use_geometry: bool = False, - crs: Optional[Any] = None, - transform: Optional[rasterio.Affine] = None, - shape: Optional[Tuple[int, int]] = None, - resampling: Resampling = Resampling.nearest, -) -> xr.DataArray: - """ - Open file and read desired raster bands. - Bands may be specified as integers (band indices from the TIFF) or strings (band names). - Band names are mapped to indices by looking up the Raster metadata. - If desired CRS, transform, and/or shape are defined, the raster will be lazily resampled using - rasterio's WarpedVRT according to the chosen resampling algorithm. - Finally, if `use_geometry` is True, the transformed raster will be clipped to the geometry - in the Raster. - """ - raster_url = raster.raster_asset.url - if bands: - # Map band names to indices if necessary - band_indices = [raster.bands[b] if isinstance(b, str) else b for b in bands] - else: - band_indices = None - if use_geometry: - geometry = raster.geometry - geometry_crs = "epsg:4326" - else: - geometry = None - geometry_crs = None - data = load_raster_from_url( - raster_url, - band_indices, - crs=crs, - transform=transform, - shape=shape, - resampling=resampling, - geometry=geometry, - geometry_crs=geometry_crs, - ) - return data - - -def load_raster_match( - raster: Raster, - match_raster: Raster, - bands: Optional[Sequence[Union[int, str]]] = None, - use_geometry: bool = False, - resampling: Resampling = Resampling.nearest, -) -> xr.DataArray: - """ - Load a resampled raster that matches the `match_raster`'s CRS, shape, and transform. - """ - match_file = match_raster.raster_asset.url - with rasterio.open(match_file) as ref: - meta = ref.meta - return load_raster( - raster, - bands, - use_geometry=use_geometry, - crs=meta["crs"], - transform=meta["transform"], - shape=(meta["height"], meta["width"]), - resampling=resampling, - ) - - -def get_profile_from_ref(ref_filepath: str, **kwargs: int) -> Dict[str, Any]: - """ - Get the TIFF profile from a reference file and update it with the given kwargs. - """ - with rasterio.open(ref_filepath) as src: - profile = src.profile - # We'll store all bands in the same file - profile.update(kwargs) - return profile - - -def check_valid_cog_raster(output_path: str): - is_valid, errors, warnings = cog_validate(output_path, strict=False) - if not is_valid and errors: - message = f"Raster is not a valid COG. Errors: {errors}" - LOGGER.warning(message) - return - if is_valid and warnings: - message = f"Raster is valid COG, but there are the following warnings {warnings}" - LOGGER.info(message) - return - if is_valid: - LOGGER.info(f"{output_path} is a valid COG Raster. No Warnings") - - -def save_raster_to_path(array: xr.DataArray, output_path: str) -> None: - """ - Save raster to file - """ - dtype = array.encoding.get("dtype", str(array.dtype)) - if np.issubdtype(dtype, np.floating): - predictor = 3 - else: - # For integers - predictor = 2 - - array.rio.to_raster(output_path, tiled=True, compress="ZSTD", zstd_level=9, predictor=predictor) - - -def save_raster_to_asset(array: xr.DataArray, output_dir: str) -> AssetVibe: - """ - Save raster to file and return the corresponding asset - """ - out_id = gen_guid() - filepath = os.path.join(output_dir, f"{out_id}.tif") - save_raster_to_path(array, filepath) - new_asset = AssetVibe(reference=filepath, type=mimetypes.types_map[".tif"], id=out_id) - return new_asset - - -def save_raster_from_ref(array: xr.DataArray, output_dir: str, ref_raster: Raster) -> Raster: - """ - Save raster to file and create a Raster type by copying metadata from a reference raster. - """ - new_asset = save_raster_to_asset(array, output_dir) - # Instantiate Raster by copying metadata from reference raster - return Raster.clone_from(ref_raster, id=gen_guid(), assets=[new_asset]) - - -def get_cmap(cmap_name: str) -> List[RGBA]: - color_map = plt.get_cmap(cmap_name.lower()) - return [RGBA(*color_map(i)) for i in range(256)] # type: ignore - - -def get_categorical_cmap(cmap_name: str, num_classes: int) -> List[RGBA]: - colors = plt.get_cmap(cmap_name).colors # type: ignore - intervals = np.linspace(0, 255, num_classes + 1).round().astype(int)[1:-1] - return step_cmap_from_colors(colors, intervals) - - -def step_cmap_from_colors( - colors: Union[Sequence[Union[FRGB, FRGBA]], NDArray[Any]], - intervals: Union[Sequence[int], NDArray[Any]], -) -> List[RGBA]: - interval_array = np.asarray(intervals) - idx = interval_array.shape - (np.arange(256) < interval_array[:, None]).sum(axis=0) - # Get RGBA values - rgba = to_rgba_array(np.asarray(colors)[idx]) - # Convert to RGBA in range 0 - 255 - rgba = np.round(255 * rgba).astype(int).tolist() - rgba = [RGBA(*c) for c in rgba] - return rgba - - -def interpolated_cmap_from_colors(colors: Sequence[RGBA], intervals: Sequence[float]) -> List[RGBA]: - colors = np.asarray(colors) / 255 # type: ignore - intervals = np.asarray(intervals) # type: ignore - imin, imax = intervals.min(), intervals.max() # type: ignore - norm_int = (intervals - imin) / (imax - imin) - ndvi_cmap = LinearSegmentedColormap.from_list( - "interpolated_cmap", - [(i, c) for (i, c) in zip(norm_int, colors)], # type: ignore - ) - rgba = np.round(ndvi_cmap(np.linspace(0, 1, 256)) * 255).astype(int).tolist() # type: ignore - return [RGBA(*c) for c in rgba] - - -def json_to_asset(json_dict: Dict[str, Any], output_dir: str) -> AssetVibe: - uid = gen_guid() - filepath = os.path.join(output_dir, f"{uid}.json") - with open(filepath, "w") as f: - json.dump(json_dict, f) - return AssetVibe(reference=filepath, type=mimetypes.types_map[".json"], id=uid) - - -def load_vis_dict(raster: Raster) -> Dict[str, Any]: - local_path = raster.visualization_asset.local_path - with open(local_path) as f: - vis_dict = json.load(f) - vis_dict["colormap"] = {i: c for i, c in enumerate(vis_dict["colormap"])} - if isinstance(raster, CategoricalRaster): - vis_dict["labels"] = raster.categories - # Position ticks in the middle of the class section - ticks = np.linspace(0, 255, len(raster.categories) + 1) - ticks = as_strided( - ticks, - shape=(len(raster.categories), 2), - strides=(ticks.strides[0], ticks.strides[0]), - writeable=False, - ) - ticks = ticks.mean(axis=1) # type: ignore - vis_dict["ticks"] = ticks - else: - num_ticks = 5 - vis_dict["ticks"] = np.linspace(0, 255, num_ticks) - vis_dict["labels"] = np.linspace( - vis_dict["range"][0], vis_dict["range"][1], num_ticks - ).round(1) - return vis_dict - - -def compute_index( - raster: Raster, - bands: Optional[Sequence[Union[int, str]]], - index_fun: Callable[[xr.DataArray], xr.DataArray], - index_name: str, - output_dir: str, -) -> Raster: - """ - Open raster, load specified bands, compute index, save a 1-band raster with indices. - bands can be a sequence of integers (direct band indices) or strings (band names). - """ - bands_array = load_raster(raster, bands, use_geometry=True) - # Convert to reflectance values, add minimum value to avoid division by zero - bands_array = (bands_array.astype(np.float32) * raster.scale + raster.offset).clip(min=1e-6) - index_array = index_fun(bands_array) - - index_raster = save_raster_from_ref(index_array, output_dir, raster) - index_raster.bands = {index_name: 0} - return index_raster - - -def compute_sobel_gradient(x: NDArray[Any]) -> NDArray[Any]: - """Use a Sobel filter to compute the magnitude of the gradient in input - - Args: - x (np.array): Input image (height, width) - Returns: - grad_mag (np.array): Gradient magnitude of input - """ - if len(x.shape) > 2: - x = np.squeeze(x) - - if len(x.shape) != 2: - raise ValueError( - "Invalid NumPy array. Valid arrays have two dimensions or more dimensions of " - "length 1. E.g. (100, 100) or (1, 100, 100) or (1, 1, 100, 100)" - ) - - grad_y: NDArray[Any] = cast(NDArray[Any], scipy.ndimage.sobel(x, axis=1)) - grad_x: NDArray[Any] = cast(NDArray[Any], scipy.ndimage.sobel(x, axis=0)) - - return np.sqrt(grad_x**2 + grad_y**2) - - -def tile_to_utm(tile_id: str) -> str: - """ - Get EPSG for a sentinel 2 tile - """ - utm_band = tile_id[:2] - is_north = tile_id[2] > "M" - epsg_code = f"32{'6' if is_north else '7'}{utm_band}" - return epsg_code - - -def write_window_to_file( - data_ar: NDArray[Any], - mask_ar: Optional[NDArray[Any]], - write_window: Window, - filepath: str, - meta: Dict[str, Any], -) -> None: - """Helper function to write a window of data to file. - - The function will create the file if it does not exist or will open it in - `r+` mode if it does. The data array will then be written in the window. - """ - if mask_ar is not None: - data_ar[:, mask_ar] = meta["nodata"] - if os.path.exists(filepath): - kwargs = {"mode": "r+"} - else: - kwargs = { - "mode": "w", - **meta, - } - kwargs["count"] = data_ar.shape[0] - with rasterio.open(filepath, **kwargs) as dst: - dst.write(data_ar, window=write_window) - - -def read_chunk_series(limits: ChunkLimits, rasters: List[Raster]) -> xr.Dataset: - rasters = sorted(rasters, key=lambda x: x.time_range[0], reverse=True) - ref_path = rasters[0].raster_asset.path_or_url - - with rasterio.open(ref_path) as src: - meta = src.meta - - vrt_options = { - "resampling": Resampling.bilinear, - "crs": meta["crs"], - "transform": meta["transform"], - "height": meta["height"], - "width": meta["width"], - } - - col_off, row_off, width, height = limits - s0 = row_off - e0 = row_off + height - s1 = col_off - e1 = col_off + width - res = [] - time = [] - for raster in rasters: - asset = raster.raster_asset - t = raster.time_range[0] - path = asset.path_or_url - time.append(t) - with rasterio.open(path) as src: - with WarpedVRT(src, **vrt_options) as vrt: - res.append(rio.open_rasterio(vrt, masked=True)[:, s0:e0, s1:e1]) - return xr.concat(res, xr.DataArray(time, name="time", dims="time")) - - -def get_meta( - in_path: str, - width: int, - height: int, - transform: Affine, - nodata: Optional[Union[int, float]] = None, -) -> Dict[str, Any]: - """ - Get input metadata from input raster and adjust width, height, and transform - """ - with rasterio.open(in_path) as src: - kwargs = src.meta.copy() - if nodata is not None: - kwargs["nodata"] = nodata - compression_kwargs = ( - INT_COMPRESSION_KWARGS - if np.issubdtype(src.meta["dtype"], np.integer) - else FLOAT_COMPRESSION_KWARGS - ) - kwargs.update( - { - "width": width, - "height": height, - "transform": transform, - "BIGTIFF": "IF_SAFER", - **compression_kwargs, - } - ) - return kwargs - - -def resample_raster( - in_path: str, - out_dir: str, - width: int, - height: int, - transform: Affine, - resampling: Resampling, - nodata: Optional[Union[int, float]] = None, -) -> str: - """ - Compress file and resample (if necessary) to the desired resolution - """ - kwargs = get_meta(in_path, width, height, transform, nodata) - out_path = os.path.join(out_dir, f"{gen_guid()}.tif") - with rasterio.open(in_path) as src: - with rasterio.open(out_path, "w", **kwargs) as dst: - for i in range(1, src.count + 1): - if width != src.width or height != src.height: - reproject( - source=rasterio.band(src, i), - destination=rasterio.band(dst, i), - src_transform=src.transform, - src_crs=src.crs, - dst_transform=transform, - dst_crs=src.crs, - resampling=resampling, - ) - else: - dst.write(src.read(i), i) - - return out_path - - -def compress_raster( - src_path: str, dst_path: str, num_threads: Union[int, str] = "all_cpus", **kwargs: Any -) -> None: - """Load a tif raster and save it in compressed format""" - with rasterio.open(src_path) as src: - with rasterio.open(dst_path, "w", **src.meta, **kwargs, num_threads=num_threads) as dst: - for _, win in src.block_windows(): - dst.write(src.read(window=win), window=win) - - -def include_raster_overviews(src_path: str): - """Convert image to COG.""" - - with tempfile.TemporaryDirectory() as tmp_dir: - tmpfile_name = os.path.join(tmp_dir, "tmp_file.tif") - # Format creation option (see gdalwarp `-co` option) - output_profile = cog_profiles.get("deflate") - output_profile.update(dict(BIGTIFF="IF_SAFER")) - - # Dataset Open option (see gdalwarp `-oo` option) - config = dict( - GDAL_NUM_THREADS="ALL_CPUS", - GDAL_TIFF_OVR_BLOCKSIZE="128", - ) - - LOGGER.info("Starting raster COG translation") - cog_translate( - src_path, - tmpfile_name, - output_profile, - config=config, - in_memory=False, - quiet=True, - ) - - LOGGER.info("Finished raster COG translation") - shutil.move(tmpfile_name, src_path) - - -def get_windows(width: int, height: int, win_width: int, win_height: int): - """ - Returns non-overlapping windows that cover the raster - """ - wins = [] - for start_r in range(0, height, win_height): - for start_c in range(0, width, win_width): - end_c = min(start_c + win_width, width) - end_r = min(start_r + win_height, height) - wins.append(Window.from_slices(rows=(start_r, end_r), cols=(start_c, end_c))) - return wins - - -def parallel_stack_bands( - raster_refs: Sequence[str], - out_path: str, - num_workers: int, - block_size: Tuple[int, int], - resampling: Resampling, - timeout_s: float = 120.0, - **kwargs: Any, -): - """ - Stack bands by reading different band files and writing them into a single file. - All bands are resampled to the output CRS and affine transform. - - Arguments: - raster_refs: sequence of references for the files containing band data - out_path: output filepath - num_workers: number of threads used to read data - block_size: size of the block (width, height) that is read by each thread - resampling: rasterio resampling method used to resample band data - timeout_s: timeout in seconds for each band read operation (default: 120) - **kwargs: other keyword arguments will be used to create the output raster. - Should include things like driver, height, width, transform, crs - """ - - def read_block(raster_url: str, win: Window): - LOGGER.debug(f"Reading block {win} from {raster_url}") - with rasterio.open(raster_url) as src: - with WarpedVRT( - src, - crs=kwargs["crs"], - width=kwargs["width"], - height=kwargs["height"], - transform=kwargs["transform"], - resampling=resampling, - ) as vrt: - win_data = vrt.read(window=win) - LOGGER.debug(f"Done reading block {win} from {raster_url}") - return win_data, win - - def write_bands(raster_ref: str, wins: List[Window], band_idx: List[int], dst: DatasetWriter): - with ThreadPoolExecutor(max_workers=num_workers) as pool: - futures = [pool.submit(read_block, raster_ref, win) for win in wins] - for future in as_completed(futures, timeout=timeout_s): - try: - ar, w = future.result() - LOGGER.debug(f"Writing block {w}, bands {band_idx}, to {out_path}") - dst.write(ar, band_idx, window=w) - LOGGER.debug(f"Done writing block {w}, bands {band_idx}, to {out_path}") - except Exception as e: - LOGGER.exception(f"Exception while processing block from {raster_ref}: {e}") - raise e - - wins = [w for w in get_windows(kwargs["width"], kwargs["height"], *block_size)] - with rasterio.open(out_path, "w", **kwargs, num_threads="all_cpus") as dst: - offset = 1 - for raster_ref in raster_refs: - with rasterio.open(raster_ref) as src: - band_idx = [i + offset for i in range(src.count)] - try: - write_bands(raster_ref, wins, band_idx, dst) - offset = band_idx[-1] + 1 - except TimeoutError: - msg = f"Timeout while reading raster data from {raster_ref}" - LOGGER.exception(msg) - raise TimeoutError(msg) - - -def serial_stack_bands( - raster_refs: Sequence[str], - out_path: str, - block_size: Tuple[int, int], - resampling: Resampling, - **kwargs: Any, -): - def read_block(raster_ref: str, win: Window): - LOGGER.debug(f"Reading block {win} from {raster_ref}") - with rasterio.open(raster_ref) as src: - with WarpedVRT( - src, - crs=kwargs["crs"], - width=kwargs["width"], - height=kwargs["height"], - transform=kwargs["transform"], - resampling=resampling, - ) as vrt: - win_data = vrt.read(window=win) - LOGGER.debug(f"Done reading block {win} from {raster_ref}") - return win_data - - def write_bands(raster_ref: str, wins: List[Window], band_idx: List[int], dst: DatasetWriter): - for w in wins: - try: - ar = read_block(raster_ref, w) - LOGGER.debug(f"Writing block {w}, bands {band_idx}, to {out_path}") - dst.write(ar, band_idx, window=w) - LOGGER.debug(f"Done writing block {w}, bands {band_idx}, to {out_path}") - except Exception as e: - LOGGER.exception(f"Exception while processing block from {raster_ref}: {e}") - raise e - - with rasterio.open(out_path, "w", **kwargs, num_threads="all_cpus") as dst: - offset = 1 - wins = [w for w in get_windows(kwargs["width"], kwargs["height"], *block_size)] - for raster_ref in raster_refs: - with rasterio.open(raster_ref) as src: - band_idx = [i + offset for i in range(src.count)] - write_bands(raster_ref, wins, band_idx, dst) - offset = band_idx[-1] + 1 - - -def write_to_raster(data: NDArray[Any], tr: Affine, raster_path: str, raster_crs: CRS) -> AssetVibe: - with rasterio.open( - raster_path, - "w", - driver="GTiff", - transform=tr, - dtype=rasterio.float32, - count=1, - width=data.shape[1], - height=data.shape[0], - crs=raster_crs, - ) as dst: - dst.write(data, indexes=1) - return AssetVibe(reference=raster_path, type="image/tiff", id=gen_guid()) diff --git a/src/vibe_lib/vibe_lib/segment_anything.py b/src/vibe_lib/vibe_lib/segment_anything.py deleted file mode 100644 index 2f405cd5..00000000 --- a/src/vibe_lib/vibe_lib/segment_anything.py +++ /dev/null @@ -1,657 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from itertools import product -from math import ceil -from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast - -import geopandas as gpd -import numpy as np -import onnxruntime as ort -import shapely.geometry as shpg -import torch -from geopandas import GeoDataFrame -from numpy.typing import NDArray -from rasterio import Affine -from rasterio.windows import Window -from shapely.geometry.base import BaseGeometry -from torchvision.transforms.functional import resize - -from vibe_core.data import GeometryCollection, Raster -from vibe_core.data.core_types import BBox, Point -from vibe_lib.spaceeye.chip import ChipDataset, Dims - -LOGGER = logging.getLogger(__name__) - -SAM_CHIP_SIZE = 1024 -SAM_PIXEL_RGB_MEAN = [123.675, 116.28, 103.53] -SAM_PIXEL_RGB_STD = [58.395, 57.12, 57.375] -BACKGROUND_VALUE = 0 -MASK_LOGIT_THRESHOLD = 0.0 - -Prompt = Tuple[Union[Point, BBox], int] - - -# -# PROMPT VALIDATION and PREPROCESSING METHODS -# - - -def is_valid_prompt(prompt: List[Prompt], n_original_fg_pnts: int) -> bool: - """Check if prompt is valid for SAM inference. - - Valid prompts within a chip: - - Prompt contains at least one foreground point (with or without bbox). - - Prompt contains bbox without foreground points in the original prompt group/id. - - Args: - prompt: List of prompts. - n_original_fg_pnts: Number of original foreground points in the prompt group/id. - - Returns: - True if prompt is valid, False otherwise. - """ - if prompt: - pts_in_chip = [p for p in prompt if len(p[0]) == 2] - bbox_in_chip = [p for p in prompt if len(p[0]) == 4] - return (1 in [p[1] for p in pts_in_chip]) or ( - len(bbox_in_chip) > 0 and n_original_fg_pnts == 0 - ) - return False - - -def validate_prompt_geometry_collection(prompt_df: GeoDataFrame, roi: shpg.Polygon): - """Validate a GeoDataFrame representing a geometry collection with points or bbox as prompts. - - Args: - prompt_df: GeoDataFrame with columns 'prompt_id', 'label', and 'geometry'. - roi: Polygon representing the region of interest. - - Raises: - ValueError: If prompts are invalid. - """ - if not all(col in prompt_df.columns for col in ["prompt_id", "label", "geometry"]): - raise ValueError( - "Geometry collection must have columns 'prompt_id', 'label', and 'geometry'. " - f"Columns found: {prompt_df.columns}" - ) - - if not prompt_df.geometry.apply(lambda g: isinstance(g, (shpg.Point, shpg.Polygon))).all(): - prompt_types = list( - set( - [ - type(g) - for g in prompt_df.geometry - if not (isinstance(g, (shpg.Point, shpg.Polygon))) - ] - ) - ) - raise ValueError( - f"Expected each geometry to be a shapely Point or Polygon. Found: {prompt_types}" - ) - - prompts_within_roi = prompt_df.geometry.within(roi) - if not prompts_within_roi.all(): - prompts_outside_roi = prompt_df.geometry[~prompts_within_roi] - coords = [ - (p.x, p.y) if isinstance(p, shpg.Point) else p.bounds for p in prompts_outside_roi - ] - raise ValueError( - "Expected all prompts to be contained within the ROI of input_geometry. Prompts " - f"outside of ROI: {coords}" - ) - - if not prompt_df.prompt_id.apply(lambda i: isinstance(i, (int, str))).all(): - prompts = [i for i in prompt_df.prompt_id if not isinstance(i, (int, str))] - raise ValueError(f"Expected prompt_ids as integers or strings. Found: {prompts}") - - if not prompt_df.label.apply(lambda i: isinstance(i, int) and i in (0, 1)).all(): - raise ValueError( - "Expected labels to be integers, with 0 or 1 values. " - f"Found: {[i for i in prompt_df.label if not isinstance(i, int) or i not in (0, 1)]}" - ) - - for prompt_id, group in prompt_df.groupby("prompt_id"): - nbbox = sum([isinstance(g, shpg.Polygon) for g in group.geometry]) - if nbbox > 1: - raise ValueError( - "Expected at most one bounding box per prompt. " - f"Found {nbbox} for prompt_id '{prompt_id}'" - ) - - -def adjust_bounding_box(prompts: List[Prompt]) -> List[Prompt]: - """Adjust bounding box coordinates to contain all foreground points in the prompt - - Args: - prompts: List of prompts. - - Returns: - Adjusted list of prompts. - """ - bbox = [p for p in prompts if len(p[0]) == 4] - foreground_points = [point for point, label in prompts if len(point) == 2 and label == 1] - if not bbox or not foreground_points: - return prompts - - bbox_coords, bbox_label = bbox[0] - xmin, ymin, xmax, ymax = cast(BBox, bbox_coords) - - x_pts, y_pts = zip(*foreground_points) - - xmin, xmax = np.min([xmin, np.min(x_pts)]), np.max([xmax, np.max(x_pts)]) - ymin, ymax = np.min([ymin, np.min(y_pts)]), np.max([ymax, np.max(y_pts)]) - - adjusted_prompts = [cast(Prompt, ((xmin, ymin, xmax, ymax), bbox_label))] + [ - p for p in prompts if len(p[0]) == 2 - ] - - return adjusted_prompts - - -def convert_coords_to_pixel_position( - geometry: Union[shpg.Point, shpg.Polygon], transform: Affine -) -> Union[Point, BBox]: - """Convert point/bbox coordinates to pixel position. - - If bounding box, returns the pixel positions as a tuple of (xmin, ymin, xmax, ymax), - as expected by SAM. - - Args: - geometry: Point or Polygon geometry. - transform: Affine transformation matrix. - - Returns: - Coordinates in pixel position. - - Raises: - ValueError: If geometry is not a Point or Polygon. - """ - - if isinstance(geometry, shpg.Point): - return ~transform * (geometry.x, geometry.y) # type: ignore - elif isinstance(geometry, shpg.Polygon): - bounds = geometry.bounds - pixel_pos = ~transform * bounds[:2] + ~transform * bounds[2:] # type: ignore - xmin, xmax = sorted(pixel_pos[::2]) - ymin, ymax = sorted(pixel_pos[1::2]) - return (xmin, ymin, xmax, ymax) - else: - raise ValueError(f"Invalid prompt geometry: {geometry}") - - -def preprocess_geometry_collection( - geometry_collection: GeometryCollection, - dataset: ChipDataset, - roi_geometry: BaseGeometry, -) -> Tuple[Dict[int, List[Prompt]], Dict[int, str]]: - """Preprocess input geometry collection. - - Args: - geometry_collection: Geometry collection with prompts. - dataset: ChipDataset object. - roi_geometry: Region of interest geometry. - Returns: - Tuple of prompts and prompt mapping. - """ - prompt_df = cast( - gpd.GeoDataFrame, - gpd.read_file(geometry_collection.assets[0].path_or_url).to_crs(dataset.meta["crs"]), # type: ignore - ) - # Assert GeoDataFrame format and field values - roi_polygon = cast( - shpg.Polygon, - gpd.GeoSeries(roi_geometry, crs="epsg:4326") # type: ignore - .to_crs(dataset.crs) - .iloc[0] - .envelope, - ) - try: - validate_prompt_geometry_collection(prompt_df, roi_polygon) - except ValueError as e: - raise ValueError(f"Failed to parse prompts for segmentation. {e}") from e - - # Group by prompt_id and build tuple of transformed points and label pairs - groups = prompt_df.groupby("prompt_id") - grouped_prompts = groups.apply( - lambda x: [ - (convert_coords_to_pixel_position(geometry, dataset.transform), label) - for geometry, label in zip(x.geometry, x.label) - ] - ) - grouped_prompts = cast(Dict[Union[int, str], List[Prompt]], grouped_prompts.to_dict()) - - # Adjust bounding box to cover all points within the same prompt - grouped_prompts = { - prompt_id: adjust_bounding_box(prompts) for prompt_id, prompts in grouped_prompts.items() - } - - # Remapping prompt_ids to 0, 1, 2, ... - prompt_dict = { - new_id: cast(List[Prompt], grouped_prompts[prompt_id]) - for new_id, prompt_id in enumerate(grouped_prompts.keys()) - } - prompt_mapping = { - new_id: str(prompt_id) for new_id, prompt_id in enumerate(grouped_prompts.keys()) - } - return prompt_dict, prompt_mapping - - -def get_normalized_prompts_within_chip( - prompts: Dict[int, List[Prompt]], read_window: Window, geometry_offset: Dims -) -> Dict[int, List[Prompt]]: - """Filter and normalize prompts within chip. - - Output prompts will include only prompts within the chip with normalized coordinates relative - to the chip read window. - - Args: - prompts: Dictionary of prompts. - read_window: Chip read window. - geometry_offset: Chip geometry offset. - Returns: - Dictionary of normalized prompts. - """ - col_min = read_window.col_off - geometry_offset.width - col_max = col_min + read_window.width - - row_min = read_window.row_off - geometry_offset.height - row_max = row_min + read_window.height - - normalized_prompts = {} - for prompt_id, prompt in prompts.items(): - new_prompt, n_foreground_points = [], 0 - for coords, lb in prompt: - if len(coords) == 2: # Point - n_foreground_points += lb - x, y = cast(Point, coords) - if (col_min <= x <= col_max) and (row_min <= y <= row_max): - new_prompt.append(((x - col_min, y - row_min), lb)) - elif len(coords) == 4: # Bounding box - xmin, ymin, xmax, ymax = cast(BBox, coords) - if xmin < col_max and xmax > col_min and ymin < row_max and ymax > row_min: - xmin = max(xmin, col_min) - col_min - ymin = max(ymin, row_min) - row_min - xmax = min(xmax, col_max) - col_min - ymax = min(ymax, row_max) - row_min - new_prompt.append(((xmin, ymin, xmax, ymax), lb)) - else: - raise ValueError( - "Invalid prompt format. Expected either a point or a bounding box." - f"Got the following prompt instead: {prompt}" - ) - - if is_valid_prompt(new_prompt, n_foreground_points): - normalized_prompts[prompt_id] = new_prompt - - return normalized_prompts - - -# -# AUTOMATIC SEGMENTATION METHODS -# - - -def build_point_grid(points_per_side: int, img_size: int) -> List[Prompt]: - """Build a grid of points within the image. - - The grid is composed of points spaced evenly across the image, with a total number of points - equal to points_per_side**2. - - Args: - points_per_side: Number of points per side. - img_size: Image size. - - Returns: - List of points forming the grid. - """ - offset = img_size / (2 * points_per_side) - points_one_side = np.linspace(offset, img_size - offset, points_per_side) - grid_points = [cast(Prompt, ((x, y), 1)) for x, y in product(points_one_side, points_one_side)] - return grid_points - - -def generate_crop_boxes( - chip_size: int, n_layers: int, overlap_ratio: float = 0.0 -) -> Tuple[List[BBox], List[int]]: - """Generate a list of crop boxes of different sizes. - - Each layer has (2**i)**2 boxes for the ith layer. - - Args: - chip_size: Size of the chip. - n_layers: Number of layers. - overlap_ratio: Overlap ratio between crops. - Returns: - Tuple of crop boxes and associated layer indices. - """ - crop_boxes, layer_idxs = [], [] - - # Original chip - crop_boxes.append([0, 0, chip_size, chip_size]) - layer_idxs.append(0) - - def crop_len(orig_len: int, n_crops: int, overlap: int) -> int: - return int(ceil((overlap * (n_crops - 1) + orig_len) / n_crops)) - - for i_layer in range(n_layers): - n_crops_per_side = 2 ** (i_layer + 1) - overlap = int(overlap_ratio * chip_size * (2 / n_crops_per_side)) - - crop_w = crop_len(chip_size, n_crops_per_side, overlap) - crop_h = crop_len(chip_size, n_crops_per_side, overlap) - - crop_box_x0 = [int((crop_w - overlap) * i) for i in range(n_crops_per_side)] - crop_box_y0 = [int((crop_h - overlap) * i) for i in range(n_crops_per_side)] - - # Crops in XYWH format - for x0, y0 in product(crop_box_x0, crop_box_y0): - box = (x0, y0, min(x0 + crop_w, chip_size), min(y0 + crop_h, chip_size)) - crop_boxes.append(box) - layer_idxs.append(i_layer + 1) - - return crop_boxes, layer_idxs - - -def mask_to_bbox(mask: NDArray[Any]) -> NDArray[Any]: - """Build the bounding box of a binary mask. - - Args: - mask: Binary mask. - Returns: - Bounding box coordinates (col_min, row_min, col_max, row_max) of the mask. - """ - bbox = [] - for m in np.squeeze(mask, axis=1): - rows = np.any(m, axis=1) - cols = np.any(m, axis=0) - rmin, rmax = np.where(rows)[0][[0, -1]] - cmin, cmax = np.where(cols)[0][[0, -1]] - bbox.append([cmin, rmin, cmax, rmax]) - return np.array(bbox, dtype=np.float32) - - -def translate_bbox(mask_bbox: NDArray[Any], x_offset: float, y_offset: float) -> NDArray[Any]: - """Translate a mask bounding box by an offset. - - Args: - mask_bbox: Mask bounding box. - x_offset: X offset. - y_offset: Y offset. - Returns: - Translated bounding box. - """ - offset = [[x_offset, y_offset, x_offset, y_offset]] - return mask_bbox + offset - - -def uncrop_masks( - mask: NDArray[Any], mask_bbox: NDArray[Any], crop_box: BBox, chip_size: int -) -> Tuple[NDArray[Any], NDArray[Any]]: - """Translate and scale a mask from a crop to the original chip size. - - Args: - mask: Binary mask. - mask_bbox: Bounding box of the mask. - crop_box: Crop box. - chip_size: Chip size. - Returns: - Tuple of translated mask and bounding box numpy arrays. - """ - x0, y0, x1, y1 = map(int, crop_box) - crop_width = x1 - x0 - crop_height = y1 - y0 - resized_mask = cast( - torch.Tensor, - resize(torch.from_numpy(mask), size=[crop_height, crop_width]), - ) - pad_x, pad_y = chip_size - crop_width, chip_size - crop_height - pad = (x0, pad_x - x0, y0, pad_y - y0) - - mask = torch.nn.functional.pad(resized_mask, pad, value=0).numpy() - - scale_x, scale_y = crop_width / chip_size, crop_height / chip_size - mask_bbox = mask_bbox.astype(np.float64) * np.array([scale_y, scale_x, scale_y, scale_x]) - return mask, np.round(mask_bbox).astype(np.float32) - - -def calculate_stability_score( - masks: NDArray[Any], mask_threshold: float, threshold_offset: float -) -> NDArray[Any]: - """Compute the stability score for a batch of masks. - - The stability score is the IoU between the binary masks obtained by thresholding - the predicted mask logits at high and low values. - - Args: - masks: Mask logits. - mask_threshold: Mask threshold. - threshold_offset: Threshold offset. - - Returns: - Stability score. - """ - intersections = np.sum(masks > (mask_threshold + threshold_offset), axis=(2, 3)) - unions = np.sum(masks > (mask_threshold - threshold_offset), axis=(2, 3)) - return intersections / unions - - -# -# ENCODER/DECODER PREPROCESSING -# - - -def build_chip_preprocessing_operation( - raster: Raster, - band_names: Optional[List[str]], - band_scaling: Optional[List[float]], - band_offset: Optional[List[float]], -) -> Callable[[NDArray[Any]], NDArray[Any]]: - if band_names: - if len(band_names) == 1: - LOGGER.info( - "Got only a single band name. " - "Will replicate it to build a 3-channeled chip for SAM." - ) - band_names = band_names * 3 - elif len(band_names) != 3: - raise ValueError( - f"Invalid number of bands. Expected one or three band names. Got {band_names}" - ) - else: - LOGGER.info("No bands selected. Using ['R', 'G', 'B']") - band_names = ["R", "G", "B"] - - if not all([b in raster.bands for b in band_names]): - raise ValueError( - f"Band not found in input raster. Expected band names {band_names} " - f"to be among raster bands {list(raster.bands.keys())}" - ) - band_idx = [raster.bands[b] for b in band_names] - - if band_scaling: - if len(band_scaling) == 1: - LOGGER.info("Got a single scaling parameter. Will use it for all bands.") - band_scaling = band_scaling * 3 - elif len(band_scaling) != len(band_names): - raise ValueError(f"Expected one or three scaling parameters. Got {band_scaling}") - else: - band_scaling = [float(raster.scale)] * 3 - scale = np.array(band_scaling).reshape(1, 3, 1, 1) - - if band_offset: - if len(band_offset) == 1: - LOGGER.info("Got a single offset parameter. Will use it for all bands.") - band_offset = band_offset * 3 - elif len(band_offset) != len(band_names): - raise ValueError(f"Expected one or three offset parameters. Got {band_offset}") - else: - band_offset = [float(raster.offset)] * 3 - offset = np.array(band_offset).reshape(1, 3, 1, 1) - - def preprocessing_operation(chip: NDArray[Any]) -> NDArray[Any]: - normalized_chip = chip[:, band_idx, :, :] * scale + offset - if np.min(normalized_chip) < 0 or np.max(normalized_chip) > 1: - LOGGER.warning( - "Chip values are outside the expected range [0, 1] after scaling and offset. " - f"Found max of {np.max(normalized_chip)} and min of {np.min(normalized_chip)}." - "Will clip to [0, 1] and normalize to [0, 255]. Please, verify the band_scaling " - "and band_offset parameters of the workflow." - ) - normalized_chip = np.clip(normalized_chip, 0, 1) - normalized_chip = normalized_chip * 255.0 - return normalized_chip.astype(np.float32) - - return preprocessing_operation - - -def img_encoder_preprocess( - chip: NDArray[Any], preprocessing_operation: Callable[[NDArray[Any]], NDArray[Any]] -) -> NDArray[Any]: - """Preprocesses the input chip for the image encoder model. - - Args: - chip: Input chip. - preprocessing_operation: Preprocessing function (depending on the chip type). - - Returns: - Preprocessed chip. - """ - processed_chip = preprocessing_operation(chip) - input_tensor = torch.from_numpy(processed_chip.clip(0, 255)) - - # Normalizing input tensor by subtracting pixel mean and dividing by pixel std - pixel_mean = torch.Tensor(SAM_PIXEL_RGB_MEAN).view(-1, 1, 1) - pixel_std = torch.Tensor(SAM_PIXEL_RGB_STD).view(-1, 1, 1) - x = (input_tensor - pixel_mean) / pixel_std - return x.numpy() - - -def prompt_encoder_preprocess( - prompt: List[Prompt], -) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: - """Preprocesses the input prompt to the expected decoder format. - - Args: - prompt: List of prompts. - - Returns: - Tuple of preprocessed coordinates and labels. - """ - point_prompt = [p for p in prompt if len(p[0]) == 2] - bbox_prompt = [p for p in prompt if len(p[0]) == 4] - - if point_prompt: - coords, labels = zip(*point_prompt) - point_batch, point_label = np.array(coords), np.array(labels) - else: - point_batch, point_label = None, None - - if bbox_prompt: - coords, _ = zip(*bbox_prompt) - bbox_batch = np.array(coords).reshape(2, 2) - bbox_label = np.array([2, 3]) - else: # Padding with dummy bbox - bbox_batch = np.array([[0.0, 0.0]]) - bbox_label = np.array([-1]) - - onnx_coord = ( - np.concatenate([point_batch, bbox_batch], axis=0)[None, :, :].astype(np.float32) - if point_batch is not None - else bbox_batch[None, :, :].astype(np.float32) - ) - onnx_label = ( - np.concatenate([point_label, bbox_label], axis=0)[None, :].astype(np.float32) - if point_label is not None - else bbox_label[None, :].astype(np.float32) - ) - - return onnx_coord, onnx_label - - -def batch_prompt_encoder_preprocess( - prompt_group: List[List[Prompt]], -) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: - """Preprocesses a batch of prompts for the encoder model. - - Args: - prompt_group: List of prompt groups. - Returns: - Tuple of preprocessed coordinates and labels. - """ - processed_prompts = [prompt_encoder_preprocess(p) for p in prompt_group] - - onnx_coord = np.concatenate([p[0] for p in processed_prompts], axis=0) - onnx_label = np.concatenate([p[1] for p in processed_prompts], axis=0) - - return onnx_coord, onnx_label - - -def mask_encoder_preprocess( - input_mask: Optional[NDArray[Any]] = None, -) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: - """Preprocess the input mask for the encoder model. - - Args: - input_mask: Input mask. - Returns: - Tuple of preprocessed mask and has_mask inputs. - """ - if not input_mask: - onnx_mask_input = np.zeros((1, 1, 256, 256), dtype=np.float32) - onnx_has_mask_input = np.zeros(1, dtype=np.float32) - return onnx_mask_input, onnx_has_mask_input - - # TODO: Implement mask preprocessing if passed as argument - # input_mask = ... - return input_mask, np.ones(1, dtype=np.float32) - - -# -# POSTPROCESSING -# - - -def get_mask_within_bbox(mask: NDArray[Any], prompt: List[Prompt]) -> NDArray[Any]: - """Filter input mask pixels only for those within the bounding box of the prompt (if any). - - Args: - mask: Input mask. - prompt: List of prompts. - Returns: - Mask filtered within the bounding box of the prompt. - """ - bbox = [coords for coords, _ in prompt if len(coords) == 4] - if bbox: - xmin, ymin, xmax, ymax = cast(BBox, bbox[0]) - bbox_mask = np.full(mask.shape, False) - bbox_mask[ - 0, 0, int(round(ymin)) : int(round(ymax)), int(round(xmin)) : int(round(xmax)) - ] = True - return np.logical_and(mask, bbox_mask) - return mask - - -# -# ONNX RUNTIME METHODS -# - - -def extract_img_embeddings_from_chip( - chip_data: NDArray[Any], - preprocessing_operation: Callable[[NDArray[Any]], NDArray[Any]], - encoder: ort.InferenceSession, -) -> NDArray[Any]: - """Extract image embeddings from a chip using the encoder model. - - Args: - chip_data: Input chip data. - preprocessing_operation: Preprocessing operation for the chip. - encoder: ONNX encoder model. - Returns: - Image embeddings. - """ - model_input = img_encoder_preprocess(chip_data, preprocessing_operation) - model_output = encoder.run(None, {encoder.get_inputs()[0].name: model_input})[0] - return model_output diff --git a/src/vibe_lib/vibe_lib/shapefile.py b/src/vibe_lib/vibe_lib/shapefile.py deleted file mode 100644 index 6a50a954..00000000 --- a/src/vibe_lib/vibe_lib/shapefile.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from typing import Any - -import geopandas as gpd -import numpy as np -from numpy.typing import NDArray -from rasterio import Affine -from rasterio.crs import CRS -from rasterio.features import shapes -from shapely import geometry as shpg - -from vibe_core.data.core_types import AssetVibe, gen_guid -from vibe_lib.archive import create_flat_archive -from vibe_lib.geometry import SimplifyBy - - -def write_shapefile( - data: NDArray[Any], - input_crs: CRS, - tr: Affine, - mask1: NDArray[Any], - path: str, - simplify: str, - tolerance: float, - file_name: str, - output_crs: int = 4326, -) -> AssetVibe: - clusters = np.unique(data) - data1 = data * mask1.astype(np.uint16) - - for segment in clusters: - cluster = data1 == segment - df_shapes = gpd.GeoSeries( - [shpg.shape(s) for s, _ in shapes(data1.astype(np.uint16), mask=cluster, transform=tr)], - crs=input_crs, - ) # type: ignore - cluster_path = os.path.join(path, f"{file_name}{segment}.shp") - - if simplify == SimplifyBy.simplify: - df_shapes.simplify(tolerance).to_crs(output_crs).to_file(cluster_path) - elif simplify == SimplifyBy.convex: - df_shapes.convex_hull.to_file(cluster_path) - else: - df_shapes.to_file(cluster_path) - - # Create zip archive containing all output - archive_path = create_flat_archive(path, "result") - return AssetVibe(reference=archive_path, type="application/zip", id=gen_guid()) diff --git a/src/vibe_lib/vibe_lib/spaceeye/__init__.py b/src/vibe_lib/vibe_lib/spaceeye/__init__.py deleted file mode 100644 index b7c52582..00000000 --- a/src/vibe_lib/vibe_lib/spaceeye/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_lib/vibe_lib/spaceeye/chip.py b/src/vibe_lib/vibe_lib/spaceeye/chip.py deleted file mode 100644 index 1297bdb8..00000000 --- a/src/vibe_lib/vibe_lib/spaceeye/chip.py +++ /dev/null @@ -1,431 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -""" -This module contains code for running a pytorch module in chips extracted from -rasters. Chips are read from disk before inference and predictions are written -to disk as they are computed. -""" - -import logging -import os -from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast, overload - -import geopandas as gpd -import numpy as np -import onnxruntime as ort -import rasterio -from numpy.typing import NDArray -from rasterio import Affine -from rasterio.mask import raster_geometry_mask -from rasterio.windows import Window -from rasterio.windows import bounds as window_bounds -from rasterio.windows import transform as window_transform -from shapely import geometry as shpg -from shapely.geometry.base import BaseGeometry -from torch.utils.data import DataLoader, Dataset - -from vibe_core.data import Raster -from vibe_core.data.rasters import RasterChunk - -from ..raster import MaskedArrayType, write_window_to_file -from .dataset import Dims, get_read_windows, get_write_windows - -LOGGER = logging.getLogger(__name__) -T = TypeVar("T", bound=Raster) - -ChipDataType = Tuple[NDArray[Any], NDArray[Any], Dict[str, Any]] - -EPS = 1e-6 - - -def affine_all_close(tr1: Affine, tr2: Affine, rel_tol: float = EPS) -> bool: - return all(abs((a - b) / (a + b + EPS)) < rel_tol for a, b in zip(tr1, tr2)) - - -class InMemoryReader: - def __init__(self, downsampling: int): - self.rasters = {} - self.downsampling = downsampling - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - - def _cache_raster(self, raster: Raster): - """ - Read the whole raster and keep it in memory for subsequent windows - """ - - self.logger.debug(f"Loading raster id={raster.id} into memory") - # Read the whole raster and keep it in memory - with rasterio.open(raster.raster_asset.url) as src: - ds_shape = (src.height // self.downsampling, src.width // self.downsampling) - raster_data = src.read(out_shape=ds_shape) - self.rasters[raster.id] = { - "data": raster_data, - "meta": src.meta, - } - self.logger.debug( - f"Loaded raster id={raster.id} into memory as array of shape " - f"{raster_data.shape} and dtype {raster_data.dtype}" - ) - - def _adjust_window(self, window: Window): - """Adjust window to downsampled raster""" - win = Window(*(i // self.downsampling for i in window.flatten())) - return win - - def _read_data_from_cache(self, raster: Raster, window: Window): - if raster.id not in self.rasters: - self._cache_raster(raster) - # Adjust window to downsampled raster - win = self._adjust_window(window) - i, j = win.toslices() - raster_cache = self.rasters[raster.id] - x = raster_cache["data"][:, i, j] - return x.astype(np.float32), x == raster_cache["meta"]["nodata"] - - def __call__(self, raster: Raster, window: Window, out_shape: Tuple[int, int]): - win_data, win_mask = self._read_data_from_cache(raster, window) - if win_data.shape[1:] != out_shape: - raise ValueError( - f"Requested output shape {out_shape}, got {win_data.shape[1:]} " - f"for downsampling {self.downsampling}" - ) - return win_data, win_mask - - -class ChipDataset(Dataset[ChipDataType]): - """ - Pytorch dataset that load chips of data for model inference. - - This dataset can be used with a pytorch DataLoader to load data as needed and - avoid loading the whole raster into memory. Will optionally downsample the - input to reduce computation requirements. - """ - - def __init__( - self, - rasters: List[T], - chip_size: Dims, - step_size: Dims, - downsampling: int = 1, - nodata: Optional[float] = None, - geometry_or_chunk: Optional[Union[BaseGeometry, RasterChunk]] = None, - reader: Optional[ - Callable[[T, Window, Tuple[int, int]], Tuple[NDArray[Any], NDArray[Any]]] - ] = None, - dtype: str = "float32", - ): - self.rasters = rasters - self.chip_size = Dims(*chip_size) - self.step_size = Dims(*step_size) - self.downsampling = downsampling - self.read_chip = Dims( - chip_size.width * downsampling, chip_size.height * downsampling, chip_size.time - ) - self.read_step = Dims( - step_size.width * downsampling, step_size.height * downsampling, step_size.time - ) - self.reader = reader if reader is not None else self._default_reader - - self._read_meta(rasters[0].raster_asset.url, geometry_or_chunk, nodata) - - self.out_width = self.width // self.downsampling - self.out_height = self.height // self.downsampling - self.out_transform = self.transform * Affine.scale(self.downsampling, self.downsampling) - - self.read_windows = get_read_windows( - self.width, self.height, len(self.rasters), self.read_chip, self.read_step, self.offset - ) - self.write_windows, self.chip_slices = get_write_windows( - self.out_width, self.out_height, len(self.rasters), self.chip_size, self.step_size - ) - - self.meta = { - "driver": "GTiff", - "height": self.out_height, - "width": self.out_width, - "crs": self.crs, - "dtype": dtype, - "transform": self.out_transform, - "nodata": self.nodata, - } - - def _adjust_roi_window(self, window: Window) -> Window: - width = self.chip_size.width - height = self.chip_size.height - if window.width >= width and window.height >= height: - return window - width = max(window.width, width) - height = max(window.height, height) - LOGGER.warning( - f"RoI has dimensions {window.width, window.height} and chip size is {self.chip_size}," - f" adjusting to {width, height}" - ) - diff_w = width - window.width - dw = diff_w // 2 - diff_h = height - window.height - dh = diff_h // 2 - - hs, ws = window.toranges() - min_w = max(ws[0] - dw, 0) - max_w = min(ws[1] + diff_w - dw, self.raster_width) - min_h = max(hs[0] - dh, 0) - max_h = min(hs[1] + diff_h - dh, self.raster_height) - - new_win = Window.from_slices((min_h, max_h), (min_w, max_w)) - LOGGER.info(f"Adjusting from {window} to {new_win}") - return new_win - - def __len__(self): - return len(self.read_windows) - - def _read_meta( - self, - url: str, - geometry_or_chunk: Optional[Union[BaseGeometry, RasterChunk]] = None, - nodata: Optional[float] = None, - ): - with rasterio.open(url) as src: - self.crs = src.crs - self.raster_width: int = src.width - self.raster_height: int = src.height - self.nodata = src.nodata if nodata is None else nodata - if geometry_or_chunk and isinstance(geometry_or_chunk, BaseGeometry): - # Compute envelope in native CRS to avoid nodata - box = cast( - shpg.Polygon, - gpd.GeoSeries(geometry_or_chunk, crs="epsg:4326") - .to_crs(self.crs) - .iloc[0] - .envelope, - ) - window = cast( - Window, raster_geometry_mask(src, [box], all_touched=True, crop=True)[2] - ) - # Adjust window to make sure it is not too small - window = self._adjust_roi_window(window) - # Compute the transform with the adjusted window - self.transform: Affine = window_transform(window, src.transform) - self.roi_window = window - self.width: int = window.width - self.height: int = window.height - self.offset = Dims(window.col_off, window.row_off, 0) - elif geometry_or_chunk and isinstance(geometry_or_chunk, RasterChunk): - col_off, row_off, width, height = geometry_or_chunk.limits - self.transform: Affine = src.transform - self.width: int = width - self.height: int = height - self.offset = Dims(col_off, row_off, 0) - self.roi_window = Window(*geometry_or_chunk.limits) # type:ignore - box = window_bounds(self.roi_window, self.transform) - else: - box = shpg.box(*src.bounds) - self.transform: Affine = src.transform - self.width: int = src.width - self.height: int = src.height - self.offset = Dims(0, 0, 0) - self.roi_window = Window(0, 0, src.width, src.height) # type:ignore - self.roi = box - - @staticmethod - def _default_reader( - raster: Raster, window: Window, out_shape: Tuple[int, int] - ) -> Tuple[NDArray[np.float32], NDArray[np.bool_]]: - with rasterio.open(raster.raster_asset.url) as src: - x = src.read(window=window, out_shape=out_shape, masked=True).astype(np.float32) - x = cast(MaskedArrayType, x) - return x.data, np.ma.getmaskarray(x) - - def __getitem__(self, idx: int) -> ChipDataType: - read_window, read_times = self.read_windows[idx] - write_window, write_times = self.write_windows[idx] - chip_slices = self.chip_slices[idx] - # Squeeze to remove singleton dimension if time chip_size is 1 - data = [ - self.reader(self.rasters[i], read_window, self.chip_size[:2]) - for i in range(*read_times) - ] - data, mask = (np.squeeze(np.stack(x)) for x in zip(*data)) - - write_info = { - "write_window": write_window, - "write_times": write_times, - "chip_slices": chip_slices, - "meta": self.meta, - } - return data, mask, write_info - - def get_filename(self, idx: int): - return f"pred_{idx}.tif" - - -class StackOnChannelsChipDataset(ChipDataset): - def __init__( - self, - rasters: List[List[T]], - chip_size: Dims, - step_size: Dims, - downsampling: int = 1, - nodata: Optional[float] = None, - geometry_or_chunk: Optional[Union[BaseGeometry, RasterChunk]] = None, - reader: Optional[ - Callable[[T, Window, Tuple[int, int]], Tuple[NDArray[Any], NDArray[Any]]] - ] = None, - ): - super().__init__( - rasters[0], chip_size, step_size, downsampling, nodata, geometry_or_chunk, reader - ) - self.datasets = [ - ChipDataset(r, chip_size, step_size, downsampling, nodata, geometry_or_chunk, reader) - for r in rasters - ] - for attr in ("width", "height", "crs", "transform"): - for d in self.datasets: - ref_attr = getattr(self, attr) - comp_attr = getattr(d, attr) - if (attr == "transform" and not affine_all_close(ref_attr, comp_attr)) or ( - attr != "transform" and ref_attr != comp_attr - ): - raise ValueError( - f"Expected '{attr}' to be the same for all datasets, found " - f"{ref_attr} != {comp_attr}" - ) - - def __getitem__(self, idx: int) -> ChipDataType: - # Convert sequence of tuples to tuple of sequences - # (d, i), (d, i), (d, i) -> (d, d, d), (i, i, i) - chip_data, chip_mask, chip_info = zip(*(d[idx] for d in self.datasets)) - chip_data = cast(List[NDArray[Any]], chip_data) - chip_mask = cast(List[NDArray[Any]], chip_mask) - chip_info = cast(List[Dict[str, str]], chip_info) - assert all( - chip_info[0][k] == c[k] - for c in chip_info - for k in ("write_window", "write_times", "chip_slices") - ) - chip_data = np.concatenate([c[None] if c.ndim == 2 else c for c in chip_data]) - chip_mask = np.concatenate([c[None] if c.ndim == 2 else c for c in chip_mask]) - return chip_data, chip_mask, chip_info[0] - - -def custom_collate( - samples: List[ChipDataType], -) -> Tuple[NDArray[Any], NDArray[Any], List[Dict[str, Any]]]: - """Custom function for joining samples from `ChipDataset` into a batch""" - chip_data, chip_mask, write_info = zip(*samples) - chip_data = cast(List[NDArray[Any]], chip_data) - chip_mask = cast(List[NDArray[Any]], chip_mask) - write_info = cast(List[Dict[str, Any]], write_info) - return collate_data(chip_data), collate_data(chip_mask), write_info - - -@overload -def collate_data(data: List[NDArray[Any]]) -> NDArray[Any]: ... - - -@overload -def collate_data(data: Dict[Any, NDArray[Any]]) -> Dict[Any, NDArray[Any]]: ... - - -@overload -def collate_data(data: NDArray[Any]) -> NDArray[Any]: ... - - -def collate_data( - data: Union[List[NDArray[Any]], Dict[Any, NDArray[Any]], NDArray[Any]], -) -> Union[Dict[Any, NDArray[Any]], NDArray[Any]]: - if isinstance(data, dict): - return {k: collate_data(v) for k, v in data.items()} - if isinstance(data, (list, tuple)): - if isinstance(data[0], np.ndarray): - return np.stack(data) - if isinstance(data, np.ndarray): - return data - - raise ValueError(f"Invalid type {type(data)} for collate function.") - - -def get_loader( - dataset: ChipDataset, - batch_size: int, - num_workers: int = 1, - collate_fn: Callable[ - [List[ChipDataType]], Tuple[NDArray[Any], NDArray[Any], List[Dict[str, Any]]] - ] = custom_collate, -): - return DataLoader( - dataset, - batch_size=batch_size, - collate_fn=collate_fn, # type: ignore - num_workers=num_workers, - ) - - -def predict_chips( - model: ort.InferenceSession, - dataloader: DataLoader[ChipDataType], - out_dir: str, - skip_nodata: bool, - pre_process: Callable[[NDArray[Any], NDArray[Any]], NDArray[Any]] = lambda x, _: x, - post_process: Callable[[NDArray[Any], NDArray[Any], NDArray[Any]], NDArray[Any]] = lambda *x: x[ - -1 - ], -) -> List[str]: - """ - Function to extract chips, compute model predictions, and save to disk. - - Optionally accepts `pre_process` and `post_process` functions which are - called before and after model predictions, respectively. - """ - filepaths: List[str] = [] - dataset = cast(ChipDataset, dataloader.dataset) - get_filename = dataset.get_filename - out_shape: Optional[Tuple[int, ...]] = None - for batch_idx, batch in enumerate(dataloader): - LOGGER.info(f"Running model for batch ({batch_idx + 1}/{len(dataloader)})") - chip_data, chip_mask, write_info_list = batch - if skip_nodata and chip_mask.all(): - if out_shape is None: - # Run the model to get the output shape - model_inputs = pre_process(chip_data, chip_mask) - out_shape = model.run(None, {model.get_inputs()[0].name: model_inputs})[0].shape[1:] - LOGGER.info(f"Skipping batch of nodata ({batch_idx+1})") - assert out_shape is not None - model_out = dataset.nodata * np.ones((chip_data.shape[0], *out_shape)) - else: - model_inputs = pre_process(chip_data, chip_mask) - model_out = model.run(None, {model.get_inputs()[0].name: model_inputs})[0] - out_shape = model_out.shape[1:] # ignore batch size - post_out = post_process(chip_data, chip_mask, model_out) - write_prediction_to_file( - post_out, chip_mask, write_info_list, out_dir, filepaths, get_filename - ) - return filepaths - - -def write_prediction_to_file( - chip_data: NDArray[Any], - chip_mask: NDArray[Any], - write_info_list: List[Dict[str, Any]], - out_dir: str, - filepaths: List[str], - get_filename: Callable[[int], str], -): - for out, mask, write_info in zip(chip_data, chip_mask, write_info_list): - if out.ndim == 3: - out = out[None] # Create singleton time dimension if necessary - if mask.ndim == 3: - mask = mask[None] - chip_times, chip_rows, chip_cols = write_info["chip_slices"] - for write_t, chip_t in zip(range(*write_info["write_times"]), range(*chip_times)): - filename = get_filename(write_t) - filepath = os.path.join(out_dir, filename) - if filepath not in filepaths: - filepaths.append(filepath) - write_window_to_file( - out[chip_t, :, slice(*chip_rows), slice(*chip_cols)], - mask[chip_t, :, slice(*chip_rows), slice(*chip_cols)].any(axis=0), - write_info["write_window"], - filepath, - write_info["meta"], - ) diff --git a/src/vibe_lib/vibe_lib/spaceeye/dataset.py b/src/vibe_lib/vibe_lib/spaceeye/dataset.py deleted file mode 100644 index 3284c461..00000000 --- a/src/vibe_lib/vibe_lib/spaceeye/dataset.py +++ /dev/null @@ -1,530 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -""" -Module for generating inputs for the SpaceEye model. - -It includes code for splitting the RoI into chips of adequate size, loading and -normalizing Sentinel 1 and 2 data, doing illuminance normalization, and -generating the windows for writing predictions to file. - -The main idea is that we only load the necessary data to perform inference, and -write predictions to disk as they are done, to avoid loading the whole thing -into memory. -""" - -import logging -from datetime import datetime, timedelta -from typing import Any, Callable, Dict, List, NamedTuple, Optional, Tuple, TypeVar, Union, cast - -import geopandas as gpd -import numpy as np -import rasterio -from numpy.typing import NDArray -from rasterio import Affine -from rasterio.mask import raster_geometry_mask -from rasterio.windows import Window -from rasterio.windows import transform as window_transform -from shapely.geometry.base import BaseGeometry -from torch.utils.data import Dataset - -from vibe_core.data import Sentinel1Raster, Sentinel2Raster -from vibe_core.data.core_types import AssetVibe -from vibe_core.data.sentinel import ( - Sentinel1RasterTileSequence, - Sentinel2CloudMaskTileSequence, - Sentinel2RasterTileSequence, -) - -from .illumination import interpolate_illuminance, masked_average_illuminance -from .utils import QUANTIFICATION_VALUE - -EPS = 1e-10 -LOGGER = logging.getLogger(__name__) - - -class Dims(NamedTuple): - width: int - height: int - time: int - - -Interval = Tuple[int, int] - -DatasetReturnType = Tuple[ - Dict[str, NDArray[Any]], - Dict[str, Union[Window, Interval, Tuple[Interval, Interval, Interval]]], -] - -TileSequenceData = Union[ - Sentinel1RasterTileSequence, - Sentinel2RasterTileSequence, - Sentinel2CloudMaskTileSequence, -] - -T = TypeVar("T", Sentinel1Raster, Sentinel2Raster) -NDArrayInt = NDArray[np.int_] - - -def get_read_intervals( - dim_size: int, chip_size: int, step: int, offset: int -) -> Tuple[NDArrayInt, NDArrayInt]: - """ - Divide total dim size in intervals by using an approximate step - Actual step is computed by rounding the step so that the number of windows - is the rounded number of windows with the desired step - """ - if dim_size < chip_size: - raise ValueError( - f"{dim_size=} cannot be smaller than {chip_size=}. " - "Please consider reducing the step/chip size or increasing the input geometry." - ) - - # Effects of using round versus ceil for determining step size: - # With round: - # This number of blocks should have the step be at most 1.5x the original step - # Which should only happen when the chip size is quite big compared to the dimension size - # With ceil: step size should be at most the chosen step - num_blocks = int(np.ceil((dim_size - chip_size) / step)) + 1 - # Make sure we capture the whole area if dim_size is barely larger - if dim_size > chip_size: - num_blocks = max(num_blocks, 2) - start = np.round(np.linspace(0, dim_size - chip_size, num_blocks)).astype(int) - end = np.clip(start + chip_size, 0, dim_size) - assert end[-1] == dim_size, f"{end[-1]=} != {dim_size}" - return start + offset, end + offset - - -def get_write_intervals( - dim_size: int, chip_size: int, step: int, offset: int -) -> Tuple[Tuple[NDArrayInt, NDArrayInt], Tuple[NDArrayInt, NDArrayInt]]: - """ - Divide total dim size in non-overlapping intervals which divide the overlap - sections according to proximity to the center of the interval - """ - read_start, read_end = get_read_intervals(dim_size, chip_size, step, offset) - edges = np.concatenate((read_start[:1], (read_end[:-1] + read_start[1:]) // 2, read_end[-1:])) - write_start = edges[:-1].astype(int) - write_end = edges[1:].astype(int) - chip_start = write_start - read_start - chip_end = write_end - read_start - return (write_start, write_end), (chip_start, chip_end) - - -def get_read_windows( - width: int, height: int, time_length: int, chip_size: Dims, step: Dims, offset: Dims -) -> List[Tuple[Window, Interval]]: - """ - Generate read windows for a tensor with width, height, and time_length. - The windows are generated according to chip_size, step and offset (for all three dimensions). - The offset is used to start the first read window in the RoI boundary. - """ - return [ - ( - Window.from_slices(rows, cols), - time, - ) - for time in zip(*get_read_intervals(time_length, chip_size.time, step.time, offset.time)) - for rows in zip(*get_read_intervals(height, chip_size.height, step.height, offset.height)) - for cols in zip(*get_read_intervals(width, chip_size.width, step.width, offset.width)) - ] - - -def get_write_windows( - width: int, height: int, time_length: int, chip_size: Dims, step: Dims -) -> Tuple[List[Tuple[Window, Interval]], List[Tuple[Interval, Interval, Interval]]]: - """ - Generate write windows for a tensor with width, height, and time_length. - The windows are generated according to chip_size and step (for all three dimensions). - """ - col_intervals, chip_col_intervals = get_write_intervals(width, chip_size.width, step.width, 0) - row_intervals, chip_row_intervals = get_write_intervals( - height, chip_size.height, step.height, 0 - ) - time_intervals, chip_time_intervals = get_write_intervals( - time_length, chip_size.time, step.time, 0 - ) - return ( - [ - ( - Window.from_slices(rows, cols), - time, - ) - for time in zip(*time_intervals) - for rows in zip(*row_intervals) - for cols in zip(*col_intervals) - ], - [ - (chip_time, chip_rows, chip_cols) - for chip_time in zip(*chip_time_intervals) - for chip_rows in zip(*chip_row_intervals) - for chip_cols in zip(*chip_col_intervals) - ], - ) - - -def adjust_dim( - window_dim: float, window_ranges: Tuple[float, float], chip_dim: float, raster_bounds: float -) -> Tuple[float, float]: - """ - Adjust a window's dimension (width or height) to make sure the window reaches the chip size - while still within the raster bounds. - - Args: - chip_dim: The chip dimension (width or height). - window_dim: The window dimension (width or height). - window_ranges: The window ranges (start, end). - raster_bounds: The raster dimension (width or height). - - Returns: - The adjusted window ranges. - """ - diff = chip_dim - window_dim - offset = diff // 2 - - offset_low = offset if window_ranges[0] - offset >= 0 else window_ranges[0] - offset_high = diff - offset_low - if offset_high + window_ranges[1] > raster_bounds: - offset_high = raster_bounds - window_ranges[1] - offset_low = diff - offset_high - - min_dim = max(window_ranges[0] - offset_low, 0) - max_dim = window_ranges[1] + offset_high - - return min_dim, max_dim - - -class SpaceEyeReader(Dataset[DatasetReturnType]): - """Dataset that lazily reads chips from sentinel 1 and 2 rasters. - The dataset computes the necessary chips to cover the whole RoI according to - chip size and overlap, and generates input data, as well as write windows - for each chip. - It also includes preprocessing steps such as input standardization, - discarding very cloud days illuminance normalization - Input data is a daily tensor with padding on non-available days. - """ - - def __init__( - self, - s1_items: Optional[Sentinel1RasterTileSequence], - s2_items: Sentinel2RasterTileSequence, - cloud_masks: Sentinel2CloudMaskTileSequence, - time_range: Tuple[datetime, datetime], - geometry: BaseGeometry, - chip_size: Dims, - overlap: Tuple[float, float, float], - s2_bands: List[int], - min_clear_ratio: float, - normalize_illuminance: bool, - ): - self.s1_items = s1_items - self.s2_items = s2_items - self.cloud_masks = cloud_masks - ref_item = s2_items.assets[0] - self.time_range = time_range - self.geometry = geometry - self.chip_size = chip_size - self.min_clear_ratio = min_clear_ratio - if any((o < 0) or (o >= 1) for o in overlap): - raise ValueError(f"Overlap values must be in range [0, 1), found {overlap}") - self.overlap = overlap - self.step = Dims(*(int(s * (1 - o)) for s, o in zip(chip_size, overlap))) - self.s2_bands = s2_bands - self.normalize_illuminance = normalize_illuminance - self.time_length = (self.time_range[1] - self.time_range[0]).days + 1 - if self.time_length != self.chip_size.time: - raise ValueError( - f"Expected time length = {self.time_length} to be the same as " - f"chip size = {self.chip_size.time}" - ) - self.write_range = s2_items.write_time_range - self.write_indices = ( - (self.write_range[0] - self.time_range[0]).days, - (self.write_range[1] - self.time_range[0]).days + 1, - ) - - with rasterio.open(ref_item.url) as src: - # Assuming all products are from the same tile for now - self.crs = src.crs - self.raster_width: int = src.width - self.raster_height: int = src.height - # Compute envelope in native CRS to avoid nodata - box = gpd.GeoSeries(geometry, crs="epsg:4326").to_crs(self.crs).iloc[0].envelope - window = cast(Window, raster_geometry_mask(src, [box], all_touched=True, crop=True)[2]) - # Adjust window to make sure it is not too small - window = self._adjust_roi_window(window) - # Compute the transform with the adjusted window - self.transform: Affine = window_transform(window, src.transform) - self.width: int = window.width - self.height: int = window.height - self.roi = box - self.offset = Dims(window.col_off, window.row_off, 0) - self.roi_window = window - read_windows = get_read_windows( - self.width, self.height, self.time_length, self.chip_size, self.step, self.offset - ) - write_windows, chip_slices = get_write_windows( - self.width, self.height, self.time_length, self.chip_size, self.step - ) - assert all(i == write_windows[0][1] for _, i in write_windows) - assert all(i == chip_slices[0][0] for i, _, _ in chip_slices) - # Overwrite time indices by what we get from the input sequence - write_windows = [(w, self.write_indices) for w, _ in write_windows] - chip_slices = [(self.write_indices, h, w) for _, h, w in chip_slices] - - assert len(read_windows) == len(write_windows) == len(chip_slices) - self.s1_indices = self._get_indices(self.s1_items) if self.s1_items is not None else None - self.s2_indices = self._get_s2_indices(self.s2_items, self.cloud_masks) - - # Filter out windows without any cloud-free data - valid_idx = [idx for idx in self.s2_indices if idx != -1] - - if valid_idx: - self.read_windows = cast(List[Tuple[Window, Interval]], read_windows) - self.write_windows = cast(List[Tuple[Window, Interval]], write_windows) - self.chip_slices = cast(List[Tuple[Interval, Interval, Interval]], chip_slices) - else: - self.read_windows, self.write_windows, self.chip_slices = [], [], [] - assert len(self.read_windows) == len(self.write_windows) == len(self.chip_slices) - - self.illuminance = self._get_illumination_array() - - def _adjust_roi_window(self, window: Window) -> Window: - width = self.chip_size.width - height = self.chip_size.height - if window.width >= width and window.height >= height: - return window - width = max(window.width, width) - height = max(window.height, height) - LOGGER.warning( - f"RoI has dimensions {window.width, window.height} and chip size is {self.chip_size}," - f" adjusting to {width, height}" - ) - - hs, ws = window.toranges() - - min_h, max_h = adjust_dim(window.height, hs, height, self.raster_height) - min_w, max_w = adjust_dim(window.width, ws, width, self.raster_width) - - new_win = Window.from_slices((min_h, max_h), (min_w, max_w)) - LOGGER.info(f"Adjusting from {window} to {new_win}") - return new_win - - def _get_indices(self, sequence: TileSequenceData) -> List[int]: - """ - Get timestep indices for each asset in the sequence. - Assuming daily predictions here. Not supporting multiple day intervals. - For a generic timestep we would need to treat possible collisions, i.e., - multiple products on the same timestep index. This is not currently treated here. - """ - asset_list = sequence.get_ordered_assets() - start = sequence.asset_time_range[asset_list[0].id][0] - return [(sequence.asset_time_range[a.id][0] - start).days for a in asset_list] - - def _get_clear_ratio(self, cloud_mask_asset: AssetVibe) -> int: - mask = self._read_cloud_mask( - cloud_mask_asset, - np.zeros(1, dtype=bool), - self.roi_window, - ) - return (mask == 1).mean() - - def _get_s2_indices( - self, - s2_sequence: Sentinel2RasterTileSequence, - cloud_mask_sequence: Sentinel2CloudMaskTileSequence, - ) -> List[int]: - """ - Get indices and remove items that have too much cloud cover. To do so, - we consider that each asset in the same (ordered) position in s2_sequence - and cloud_mask_sequence is associated. - """ - indices = self._get_indices(s2_sequence) - return [ - index if self._get_clear_ratio(cloudmask_item) > self.min_clear_ratio else -1 - for index, cloudmask_item in zip(indices, cloud_mask_sequence.get_ordered_assets()) - ] - - def _get_illumination_array(self) -> NDArray[np.float32]: - """ - Compute the illumance array for each available product in the RoI - The illuminance for days where there is no data (or not enough cloudless - data) is obtained through interpolation - """ - if not self.normalize_illuminance: - return np.ones((len(self.s2_bands), self.time_length, 1, 1), dtype=np.float32) - illuminance = np.zeros((len(self.s2_bands), self.time_length, 1, 1), dtype=np.float32) - mask_ar = np.zeros((1, self.time_length, 1, 1), dtype=np.float32) - for s2_asset, cloud_mask_asset, index in zip( - self.s2_items.get_ordered_assets(), - self.cloud_masks.get_ordered_assets(), - self.s2_indices, - ): - if 0 <= index < self.time_length: - x, m = self._read_s2(s2_asset, self.roi_window, cloud_mask_asset) - m = m == 1 - clear_ratio = m.mean() - if clear_ratio < self.min_clear_ratio: - LOGGER.warning( - "Discarding sentinel data for illumination computation with date " - f"{self.s2_items.asset_time_range[s2_asset.id][0]} (index {index}) because " - f"clear_ratio {clear_ratio:.1%} < threshold {self.min_clear_ratio:.1%}" - ) - continue - illum_ar = masked_average_illuminance(x, m.astype(np.float32)) - illuminance[:, index] = illum_ar - mask_ar[:, index] = 1 - if mask_ar.sum() == 0: - LOGGER.warning("No cloudless day available for illuminance calculation.") - return np.ones((len(self.s2_bands), self.time_length, 1, 1), dtype=np.float32) - return interpolate_illuminance(illuminance, mask_ar) - - @staticmethod - def _read_data( - file_ref: str, window: Window, bands: Optional[List[int]] = None - ) -> NDArray[Any]: - """ - Read a window of data from a file - """ - offset_bands = [b + 1 for b in bands] if bands else None - with rasterio.open(file_ref) as src: - return src.read(indexes=offset_bands, window=window) - - def _read_s2( - self, - s2_asset: AssetVibe, - window: Window, - cloud_mask_asset: AssetVibe, - ) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: - """ - Read a window sentinel 2 data and the associated cloud mask - """ - # Read s2 data - s2_data = self._read_data(s2_asset.url, window, self.s2_bands) - nodata = s2_data.sum(axis=0, keepdims=True) == 0 - s2_data = s2_data.astype(np.float32) / QUANTIFICATION_VALUE - cloud_mask = self._read_cloud_mask(cloud_mask_asset, nodata, window) - return s2_data, cloud_mask - - def _read_cloud_mask( - self, cloud_mask_asset: AssetVibe, nodata: NDArray[np.bool8], window: Window - ) -> NDArray[np.float32]: - """ - Read a cloud mask and change the binary mask to the format expected by the model - """ - # Read cloud mask - cloud_mask = self._read_data(cloud_mask_asset.url, window, [0]) - # Use this masking for now for compatibility purposes - # TODO: Change the model to receive a binary mask for Sentinel2 as well - cloud_mask[cloud_mask == 1] = 2 - cloud_mask[cloud_mask == 0] = 1 - # Add nodata as cloud - cloud_mask[nodata] = 2 - return cloud_mask.astype(np.float32) - - def _read_s1( - self, s1_asset: AssetVibe, window: Window, _ - ) -> Tuple[NDArray[np.float32], NDArray[np.bool8]]: - filepath = s1_asset.url - s1 = self._read_data(filepath, window, None) - s1_available = np.sum(np.abs(s1), axis=0) > 0 - s1 = (s1 + 20.0) / 40.0 - s1[:, ~s1_available] = 0.0 # just to make it match the images that are completely missing. - return s1, s1_available - - def _get_data_array( - self, - items: List[AssetVibe], - mask_items: List[Optional[AssetVibe]], - indices: List[int], - read_times: Interval, - read_window: Window, - read_callback: Callable[ - [AssetVibe, Window, Optional[AssetVibe]], Tuple[NDArray[np.float32], NDArray[Any]] - ], - ) -> Tuple[NDArray[np.float32], NDArray[Any]]: - """ - Get data array which will be used as input to the network. - This is done by selecting data inside the time range of the input - and inserting it in the correct time index - """ - x = None - mask = None - # Closed at beginning, open at ending - read_start, read_end = read_times - for item, mask_item, index in zip(items, mask_items, indices): - if read_start <= index < read_end: - chip_data, chip_mask = read_callback(item, read_window, mask_item) - if x is None: - x = np.zeros( - ( - chip_data.shape[0], - self.chip_size.time, - self.chip_size.height, - self.chip_size.width, - ), - dtype=np.float32, - ) - if mask is None: - mask = np.zeros( - (1, self.chip_size.time, self.chip_size.height, self.chip_size.width), - dtype=chip_mask.dtype, - ) - x[:, index - read_start] = chip_data - mask[:, index - read_start] = chip_mask - if x is None or mask is None: - start_time = (self.time_range[0] + timedelta(days=int(read_start))).isoformat() - end_time = (self.time_range[0] + timedelta(days=int(read_end))).isoformat() - raise RuntimeError( - f"Could not find any cloud-free data from dates {start_time} to {end_time}" - ) - return x, mask - - def __getitem__(self, idx: int) -> DatasetReturnType: - # Tensors are C x T x H x W - read_window, read_times = self.read_windows[idx] - - s2_data, s2_mask = self._get_data_array( - self.s2_items.get_ordered_assets(), - self.cloud_masks.get_ordered_assets(), # type: ignore - self.s2_indices, - read_times, - read_window, - self._read_s2, # type: ignore - ) - # Get data on where to write in the file - write_window, write_times = self.write_windows[idx] - # Which part of the predictions will be written - chip_slices = self.chip_slices[idx] - # Illuminance values for the chip - chip_illuminance = self.illuminance[:, read_times[0] : read_times[1]] - - # Data we feed into the network - chip_data = { - "S2": s2_data / (chip_illuminance + np.float32(EPS)), - "cloud_label": s2_mask, - "illuminance": chip_illuminance, - } - if self.s1_items is not None: - s1_sorted_assets = self.s1_items.get_ordered_assets() - # Read data - s1_data, s1_mask = self._get_data_array( - s1_sorted_assets, - [None for _ in range(len(s1_sorted_assets))], - cast(List[int], self.s1_indices), - read_times, - read_window, - self._read_s1, - ) - chip_data.update({"S1": s1_data, "S1_mask": s1_mask}) - # Information for writing in the files - write_info = { - "write_window": write_window, - "write_times": write_times, - "chip_slices": chip_slices, - } - - return chip_data, write_info - - def __len__(self) -> int: - return len(self.read_windows) diff --git a/src/vibe_lib/vibe_lib/spaceeye/illumination.py b/src/vibe_lib/vibe_lib/spaceeye/illumination.py deleted file mode 100644 index a2b17d7c..00000000 --- a/src/vibe_lib/vibe_lib/spaceeye/illumination.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -""" -Methods for computing, normalizing and interpolation illuminance of -multispectral raster timeseries. -""" - -from typing import Tuple - -import numpy as np -from numpy.typing import NDArray - -EPS = 1e-10 -MIN_CLEAR_RATIO = 0.01 -MIN_OVERLAP = 0.01 -DEFAULT_LAMBDA_T = 0.5 -SPATIAL_AXES = (-2, -1) - - -def extract_illuminance( - x: NDArray[np.float32], mask: NDArray[np.float32] -) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: - numerator = (x * mask).sum(axis=SPATIAL_AXES, keepdims=True) - denominator = mask.sum(axis=SPATIAL_AXES, keepdims=True) - illuminance = numerator / (denominator + EPS) - albedo = x / (illuminance + EPS) - return albedo, illuminance - - -def extract_illuminance_simple( - x: NDArray[np.float32], mask: NDArray[np.float32] -) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: - illuminance = masked_average_illuminance(x, mask) - illuminance_mask = (mask.mean(axis=SPATIAL_AXES, keepdims=True) > MIN_CLEAR_RATIO).astype( - np.float32 - ) - interp_illuminance = interpolate_illuminance(illuminance, illuminance_mask) - x /= interp_illuminance + EPS # Modify inplace to save memory - return x, interp_illuminance - - -def masked_average_illuminance( - x: NDArray[np.float32], mask: NDArray[np.float32] -) -> NDArray[np.float32]: - # x: C x T x H x W - # mask: 1 x T x H x W - # output: C x T x 1 x 1 - numerator = (x * mask).sum(axis=SPATIAL_AXES, keepdims=True) - denominator = mask.sum(axis=SPATIAL_AXES, keepdims=True) - illuminance = numerator / (denominator + EPS) - return illuminance - - -def extract_illuminance_relative( - x: NDArray[np.float32], mask: NDArray[np.float32] -) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: - illuminance_mask = (mask.mean(axis=SPATIAL_AXES, keepdims=True) > MIN_CLEAR_RATIO).astype( - np.float32 - ) - - # Relevant inputs for which we have data - # We'll interpolate the rest - available = np.squeeze(illuminance_mask).astype(bool) - x_s = x[:, available] - mask_s = mask[:, available] - - # find the anchor image - clear_percentage = mask_s.sum(axis=0).mean(axis=SPATIAL_AXES) - t_anchor = np.argmax(clear_percentage) - - # compute the anchor illuminance - anchor_x = x_s[:, t_anchor : t_anchor + 1] - anchor_mask = mask_s[:, t_anchor : t_anchor + 1] - anchor_illuminance = masked_average_illuminance(anchor_x, anchor_mask) - - # Compute relative illuminance - ratio_mask = ((mask_s + anchor_mask) == 2.0).astype(np.float32) - # Fall back to the old method if there is not enough overlap - overlap_mask = ratio_mask.mean(axis=(0, *SPATIAL_AXES)) > MIN_OVERLAP - _, i_old = extract_illuminance(x_s[:, ~overlap_mask], mask_s[:, ~overlap_mask]) - # New method for the rest - relative_illuminance = masked_average_illuminance( - x_s[:, overlap_mask], ratio_mask[:, overlap_mask] - ) / (masked_average_illuminance(anchor_x, ratio_mask[:, overlap_mask]) + EPS) - # Compute final illuminance - i_new = anchor_illuminance * relative_illuminance - - available_idx = np.where(available)[0] - illuminance = np.zeros((*x.shape[:2], 1, 1), dtype=np.float32) - illuminance[:, available_idx[~overlap_mask]] = i_old - illuminance[:, available_idx[overlap_mask]] = i_new - interp_illuminance = interpolate_illuminance(illuminance, illuminance_mask) - x /= interp_illuminance + EPS # Modify inplace to save memory - return x, interp_illuminance - - -def add_illuminance( - albedo: NDArray[np.float32], illuminance: NDArray[np.float32] -) -> NDArray[np.float32]: - return albedo * illuminance - - -def interpolate_illuminance( - illuminance: NDArray[np.float32], mask: NDArray[np.float32], lambda_t: float = DEFAULT_LAMBDA_T -) -> NDArray[np.float32]: - C, T, _, _ = illuminance.shape - t_tensor = np.arange(T, dtype=np.float32) - delta_t_matrix = np.abs(t_tensor[None] - t_tensor[:, None]) - weight = np.exp(-lambda_t * delta_t_matrix) - illuminance_sum = (weight @ illuminance.reshape((C, T, -1))).reshape(illuminance.shape) - mask_sum = (weight @ mask.reshape((1, T, -1))).reshape(mask.shape) - weighted_illuminance = illuminance_sum / (mask_sum + EPS) - return weighted_illuminance * (1 - mask) + illuminance * mask diff --git a/src/vibe_lib/vibe_lib/spaceeye/interpolation.py b/src/vibe_lib/vibe_lib/spaceeye/interpolation.py deleted file mode 100644 index cb9fc195..00000000 --- a/src/vibe_lib/vibe_lib/spaceeye/interpolation.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict - -import torch -import torch.nn as nn -from einops import rearrange, repeat - -EPS = 1e-6 - - -def generate_delta_matrix(dim: int): - """ - The matrix returned calculates discrete forward differences (discrete derivative). - delta * x returns a matrix with elements x[t+1]-x[t] with the last entry being 0. - - The matrix returned looks in general like this: - delta = [ [-1, 1, 0, ..., 0, 0], - [ 0, -1, 1, ..., 0, 0], - ... - [ 0, 0, 0, ..., -1, 1], - [ 0, 0, 0, ..., 0, 0]] - """ - d = torch.zeros((dim, dim), dtype=torch.float32) - i = torch.arange(dim - 1) - d[i, i] = -1 - d[i, i + 1] = 1 - return d - - -def masked_time_average(x: torch.Tensor, m: torch.Tensor): - n = (x * m).sum(dim=2, keepdim=True) - d = m.sum(dim=2, keepdim=True) - return n / (d + EPS) - - -class DampedInterpolation(nn.Module): - """ - This algorithm implements interpolation through minimizing an object function, namely: - - F(X) = sum_t || (X_t - S2_t) .* M_t ||_F^2 + alpha sum_t ||X_{t+1}-X_t||_F^2 - = || (X - S2) .* M ||_F^2 + alpha || Delta * X ||_F^2 - - The gradient is - F'(X) = 2 * M**2 .* (X-S2) + 2 * alpha * (Delta^T @ Delta) @ X - We use || F'(X) ||_F^2 / (nb*nt*nx*ny) as a stoppping criteria for the algorithm. - Note that M**2=M when M represents a 0/1 cloud-mask. - In the case of cloud-probabilities it's more complex. - - Using algorithm from SpaceEye paper: - X <== (I+alpha*Delta^T*Delta)^{-1} ((M.*S2)-(1-M).*X) - - Note that S2, X and M here are assumed to me (nb*nt) x (nx*ny) matrices, while the illumination - calculation is done on nb x nt x nx x ny tensors. (Of course we just use different views of the - same tensors). - - """ - - def __init__( - self, - num_bands: int, - time_window: int, - damping_factor: float = 0.1, - tol: float = 1e-3, - max_iter: int = 200, - check_interval: int = 5, - ): - super().__init__() - self.num_bands = num_bands - self.time_window = time_window - self.damping_factor = damping_factor - self.tol = tol - self.max_iter = max_iter - self.check_interval = check_interval - assert self.damping_factor > 0 - d = generate_delta_matrix(self.time_window) - self.delta = torch.kron(torch.eye(self.num_bands), d) - self.w: torch.Tensor = torch.linalg.inv( - torch.eye(self.time_window) + damping_factor * (d.T @ d) - ) - - def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor: - s2, m = inputs["S2"], inputs["cloud_label"] == 1 - x = s2.clone() - m = m.to(x) - m_: torch.Tensor = 1 - m - pixel_avg = masked_time_average(x, m) - x = x * m + pixel_avg * m_ - b, c, _, h, _ = s2.shape - s2 = rearrange(s2, "b c t h w -> t (b c h w)").contiguous() - x = rearrange(x, "b c t h w -> t (b c h w)").contiguous() - m = repeat(m, "b 1 t h w -> t (b c h w)", c=c).contiguous() - m_ = repeat(m_, "b 1 t h w -> t (b c h w)", c=c).contiguous() - f = self.w @ (m * s2) - for i in range(self.max_iter): - x1 = f + self.w @ (m_ * x) - if not (i % self.check_interval) and ( - (x1 - x).abs().mean() / (x1.abs().mean() + EPS) < self.tol - ): - return rearrange(x1, "t (b c h w) -> b c t h w", b=b, c=c, h=h) - x = x1 - return rearrange(x, "t (b c h w) -> b c t h w", b=b, c=c, h=h) diff --git a/src/vibe_lib/vibe_lib/spaceeye/utils.py b/src/vibe_lib/vibe_lib/spaceeye/utils.py deleted file mode 100644 index a87fd0c9..00000000 --- a/src/vibe_lib/vibe_lib/spaceeye/utils.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Dict, List, Sequence, TypeVar - -from vibe_core.data import S2ProcessingLevel, Sentinel2Product - -T = TypeVar("T", bound=Sentinel2Product) - -QUANTIFICATION_VALUE = 10000 -SPACEEYE_TO_SPYNDEX_BAND_NAMES: Dict[str, str] = { - "B02": "B", - "B03": "G", - "B04": "R", - "B05": "RE1", - "B06": "RE2", - "B07": "RE3", - "B08": "N", - "B8A": "N2", - "B11": "S1", - "B12": "S2", -} - - -def find_s2_product(product_name: str, products: List[T]) -> T: - for product in products: - if product.product_name == product_name: - return product - raise ValueError(f"Could not find product with product name {product_name}.") - - -def verify_processing_level( - items: Sequence[Sentinel2Product], processing_level: S2ProcessingLevel, prefix: str = "" -): - invalid = set( - [item.processing_level for item in items if item.processing_level != processing_level] - ) - if invalid: - raise ValueError( - f"{prefix} {'e' if prefix else 'E'}xpected items with processing level " - f"{processing_level}. Found items with processing level: {','.join(invalid)}" - ) diff --git a/src/vibe_lib/vibe_lib/stats.py b/src/vibe_lib/vibe_lib/stats.py deleted file mode 100644 index c3a276ef..00000000 --- a/src/vibe_lib/vibe_lib/stats.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime -from typing import Any, Dict, List, NamedTuple, Sequence, cast - -import geopandas as gpd -import pandas as pd -import rasterio -from pandas.core.frame import DataFrame -from rasterstats import zonal_stats -from shapely.geometry import shape -from shapely.geometry.base import BaseGeometry - - -class Stats(NamedTuple): - date: datetime - min: float - max: float - mean: float - - -def calculate_zonal_stats( - raster_paths: Sequence[str], raster_dates: Sequence[datetime], geo_dict: Dict[str, Any] -) -> List[Stats]: - """For each raster in a list of rasters, calculates min, max, and mean - values of the pixels overlapping or intersecting a geojson geometry. - This function assumes geometry represents a single non multi geometry. - """ - - # Convert geometry to raster CRS - with rasterio.open(raster_paths[0]) as src: # type: ignore - crs = src.crs # type: ignore - geom: BaseGeometry = ( - gpd.GeoSeries(shape(geo_dict), crs="epsg:4326").to_crs(crs).iloc[0] # type: ignore - ) - - result: List[Stats] = [] - - for raster_path, raster_date in zip(raster_paths, raster_dates): - stats = zonal_stats(geom, raster_path) - - raster_stats = Stats( - raster_date, - cast(float, stats[0]["min"]), - cast(float, stats[0]["max"]), - cast(float, stats[0]["mean"]), - ) - - result.append(raster_stats) - - return result - - -def convert_zonal_stats_to_timeseries(stats: Sequence[Stats]) -> DataFrame: - df = pd.DataFrame(stats) - df.set_index("date", drop=True, inplace=True) # type: ignore - - return df diff --git a/src/vibe_lib/vibe_lib/timeseries.py b/src/vibe_lib/vibe_lib/timeseries.py deleted file mode 100644 index 4aa364e2..00000000 --- a/src/vibe_lib/vibe_lib/timeseries.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import mimetypes -import os - -import pandas as pd - -from vibe_core.data import AssetVibe, gen_guid - - -def save_timeseries_to_asset(timeseries: pd.DataFrame, output_dir: str) -> AssetVibe: - """ - Save dataframe to CSV file and return corresponding asset - """ - out_id = gen_guid() - filepath = os.path.join(output_dir, f"{out_id}.csv") - timeseries.to_csv(filepath) - new_asset = AssetVibe(reference=filepath, type=mimetypes.types_map[".csv"], id=out_id) - return new_asset diff --git a/src/vibe_notebook/setup.py b/src/vibe_notebook/setup.py index 2b723afe..7ca8570f 100644 --- a/src/vibe_notebook/setup.py +++ b/src/vibe_notebook/setup.py @@ -1,13 +1,10 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - from setuptools import find_packages, setup setup( name="vibe_notebook", version="0.0.1", author="Microsoft", - author_email="terravibes@microsoft.com", + author_email="eywa-devs@microsoft.com", packages=find_packages(), description="Shared notebook library for FarmVibes.AI notebooks.", install_requires=[ diff --git a/src/vibe_notebook/vibe_notebook/__init__.py b/src/vibe_notebook/vibe_notebook/__init__.py index b4ba6ddd..bcac4a86 100644 --- a/src/vibe_notebook/vibe_notebook/__init__.py +++ b/src/vibe_notebook/vibe_notebook/__init__.py @@ -1,4 +1 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Shared notebook library for FarmVibes.AI notebooks.""" diff --git a/src/vibe_notebook/vibe_notebook/deepmc/__init__.py b/src/vibe_notebook/vibe_notebook/deepmc/__init__.py deleted file mode 100644 index b7c52582..00000000 --- a/src/vibe_notebook/vibe_notebook/deepmc/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_notebook/vibe_notebook/deepmc/forecast.py b/src/vibe_notebook/vibe_notebook/deepmc/forecast.py deleted file mode 100644 index a8742496..00000000 --- a/src/vibe_notebook/vibe_notebook/deepmc/forecast.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime, timedelta -from typing import Any, Dict, List, Tuple, cast - -import numpy as np -import pandas as pd -from IPython.display import clear_output -from shapely.geometry import Point - -from vibe_core.client import FarmvibesAiClient, get_default_vibe_client -from vibe_core.datamodel import RunConfig, RunConfigUser, SpatioTemporalJson - - -class Forecast: - def __init__( - self, - workflow_name: str, - geometry: Point, - time_range: Tuple[datetime, datetime], - parameters: List[Dict[str, str]], - date_column: str = "date", - ): - self.client: FarmvibesAiClient = get_default_vibe_client() - self.workflow_name = workflow_name - self.geometry = geometry - self.parameters = parameters - self.time_range = time_range - self.date_column = date_column - - def submit_download_request(self): - """ - Submit request to worker to download forecast data - """ - run_metadata_list = [] - runs = [] - for parameter in self.parameters: - run_name = f"forecast_{parameter['weather_type']}" - run = self.client.run( - workflow=self.workflow_name, - name=run_name, - geometry=self.geometry, - time_range=self.time_range, - parameters=parameter, - ) - - run_metadata_list.append( - { - "id": run.id, - "weather_type": parameter["weather_type"], - } - ) - runs.append(run) - - self.client.monitor(runs, 5) - - return run_metadata_list - - def get_run_status(self, run_list: List[Dict[str, str]]): - clear_output(wait=True) - out = [] - for run_item in run_list: - o = self.client.describe_run(run_item["id"]) - print(f"Execution status for {run_item['weather_type']}: {o.details.status}") - - if o.details.status == "done": - out.append(o) - else: - raise Exception( - f"Execution status for {run_item['weather_type']}: {o.details.status}" - ) - - return out - - def get_all_assets(self, details: RunConfigUser): - asset_files = [] - output = details.output["weather_forecast"] - record: Dict[str, Any] - for record in cast(List[Dict[str, Any]], output): - for value in record["assets"].values(): - asset_files.append(value["href"]) - df_assets = [pd.read_csv(f, index_col=False) for f in asset_files] - df_out = pd.concat(df_assets) - df_out = self.clean_forecast_data(forecast_df=df_out, run_details=details) - return df_out - - def get_downloaded_data(self, run_list: List[Dict[str, str]], offset_hours: int = 0): - """ - check the download status. If status is done, fetch the downloaded data - """ - forecast_dataset = pd.DataFrame() - out = self.get_run_status(run_list) - for detail in out: - df = self.get_all_assets(detail) - - # Offset from UTC to specified timezone - df.index = df.index + pd.offsets.Hour(offset_hours) - - if not df.empty: - forecast_dataset = pd.concat([forecast_dataset, df], axis=1) - - return forecast_dataset - - def clean_forecast_data( - self, - forecast_df: pd.DataFrame, - run_details: RunConfig, - ): - df = forecast_df[self.date_column] - assert isinstance(run_details.user_input, SpatioTemporalJson) - start_date: datetime = run_details.user_input.start_date - end_date: datetime = run_details.user_input.end_date - - # derive forecast data - forecast_df.drop(columns=[self.date_column], inplace=True) - a = forecast_df.values.tolist() - o = pd.DataFrame([a]) - o = o.T - - df_date = pd.DataFrame( - data=pd.date_range(start_date, end_date + timedelta(days=1), freq="h"), - columns=[self.date_column], - ) - - # derive hours - hours = [f"{str(i)}:00:00" for i in range(24)] - list_hours = [hours for _ in range(forecast_df.shape[0])] - - assert run_details.parameters is not None, "Parameters are not defined" - # transform forecast data with date and time - df = pd.DataFrame( - data={ - self.date_column: df.values, - "time": list_hours, - run_details.parameters["weather_type"]: o[0], - } - ) - df = df.explode(column=["time", run_details.parameters["weather_type"]]) - df[self.date_column] = df[self.date_column].astype(str) + " " + df["time"] - df[self.date_column] = pd.to_datetime(df[self.date_column].values) - - df.drop(columns=["time"], inplace=True) - df = pd.merge(df_date, df, how="left", left_on=self.date_column, right_on=self.date_column) - - df.reset_index() - df.set_index(self.date_column, inplace=True) - df.sort_index(ascending=True, inplace=True) - df[run_details.parameters["weather_type"]] = df[ - run_details.parameters["weather_type"] - ].values.astype(np.float32) - - # rename columns with suffix forecast - df.rename( - columns={ - run_details.parameters[ - "weather_type" - ]: f"{run_details.parameters['weather_type']}_forecast" - }, - inplace=True, - ) - - # interpolate to derive missing data - df = df.interpolate(method="from_derivatives") - assert df is not None, "Interpolation deleted all data" - df = df.dropna() - return df diff --git a/src/vibe_notebook/vibe_notebook/deepmc/utils.py b/src/vibe_notebook/vibe_notebook/deepmc/utils.py deleted file mode 100644 index 57d51068..00000000 --- a/src/vibe_notebook/vibe_notebook/deepmc/utils.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from datetime import datetime, timedelta -from typing import Any, Dict, List - -import numpy as np -import pandas as pd -from numpy._typing import NDArray -from pandas.tseries.offsets import DateOffset -from sklearn.metrics import mean_absolute_error, mean_squared_error -from sklearn.preprocessing import StandardScaler - - -def get_csv_data( - path: str, - date_attribute: str = "date", - columns_rename: Dict[str, str] = {}, - frequency: str = "60min", - interpolate: bool = True, - fill_na: bool = True, -): - """ - Read data from CSV file using Pandas python package. - """ - - data_df = pd.read_csv(path) - data_df[date_attribute] = pd.to_datetime(data_df[date_attribute]) - - if columns_rename: - data_df.rename(columns=columns_rename, inplace=True) - - # apply index on date - data_df.reset_index(drop=True, inplace=True) - data_df.set_index(date_attribute, inplace=True) - data_df.sort_index(ascending=True, inplace=True) - - if interpolate: - # interpolate to derive missing data - data_df = data_df.interpolate(method="from_derivatives") - assert data_df is not None, "Interpolate deleted all data" - data_df = data_df.dropna() - - if fill_na: - # Group rows by frequency, requires date attribute indexed to execute this - data_df = data_df.fillna(method="ffill") # type: ignore - data_df = data_df.fillna(method="bfill") - data_df = data_df.groupby(pd.Grouper(freq=frequency)).mean() - data_df = data_df.fillna(method="ffill") - data_df = data_df.fillna(method="bfill") - else: - data_df = data_df.groupby(pd.Grouper(freq=frequency)).mean() - - return data_df - - -def hour_round(t: datetime): - # Rounds to nearest hour by adding a timedelta hour if minute >= 30 - return t.replace(second=0, microsecond=0, minute=0, hour=t.hour) + timedelta( - hours=t.minute // 30 - ) - - -def get_split_scaled_data(data: pd.DataFrame, out_feature: str, split_ratio: float = 0.92): - split = int(split_ratio * data.shape[0]) - - train_data = data.iloc[:split] - test_data = data.iloc[split:] - - output_scaler = StandardScaler() - output_scaler.fit_transform(np.expand_dims(data[out_feature].values, axis=1)) # type: ignore - - train_scaler = StandardScaler() - train_scale_df = pd.DataFrame( - train_scaler.fit_transform(train_data), - columns=train_data.columns, - index=train_data.index, - ) - test_scale_df = pd.DataFrame( - train_scaler.transform(test_data), - columns=test_data.columns, - index=test_data.index, - ) - - return train_scaler, output_scaler, train_scale_df, test_scale_df - - -def shift_index(ds_df: pd.DataFrame, freq_minutes: int, num_indices: int, dateColumn: str = "date"): - ds_df[dateColumn] = ds_df.index.shift(-num_indices, freq=DateOffset(minutes=freq_minutes)) - ds_df = ds_df.reset_index(drop=True) - ds_df = ds_df.set_index(dateColumn) - return ds_df - - -def clean_relevant_data( - actual_df: pd.DataFrame, - forecast_df: pd.DataFrame, - out_variables: List[str], - freq_hours: int, - num_of_indices: int, -): - base_data_df = actual_df.copy() - current_ws_df = forecast_df.add_suffix("Current") - base_data_df = base_data_df.join(current_ws_df) - shift_forecast_df = shift_index(forecast_df, freq_hours * 60, num_of_indices) - base_data_df = base_data_df.join(shift_forecast_df) - - base_data_df = base_data_df[out_variables] - base_data_df = base_data_df.interpolate(method="from_derivatives") - assert base_data_df is not None, "Interpolate deleted all data" - base_data_df = base_data_df.dropna() - return base_data_df - - -def smooth(y: List[float], box_pts: int): - box = np.ones(box_pts) / box_pts - y_smooth = np.convolve(y, box, mode="same") - return y_smooth - - -def clean_relevant_data_using_hrrr( - actual_df: pd.DataFrame, - forecast_df: pd.DataFrame, - out_variables: List[str], - freq_hours: int, - num_of_indices: int, - start_date: datetime, - end_date: datetime, -): - forecast_df = forecast_df.loc[ - (forecast_df.index >= start_date) & (forecast_df.index <= end_date) - ] - actual_df = actual_df.loc[(actual_df.index >= start_date) & (actual_df.index <= end_date)] - - for col in actual_df.columns: - sub_df = actual_df[actual_df[col].isna()] - if col + "_forecast" in forecast_df.columns: - actual_df.loc[actual_df.index.isin(sub_df.index.values), col] = forecast_df[ - forecast_df.index.isin(sub_df.index.values) - ][col + "_forecast"] - - base_data_df = actual_df.copy() - current_ws_df = forecast_df.add_suffix("Current") - base_data_df = base_data_df.join(current_ws_df) - shift_forecast_df = shift_index(forecast_df, freq_hours * 60, num_of_indices) - base_data_df = base_data_df.join(shift_forecast_df) - - base_data_df = base_data_df[out_variables] - base_data_df = base_data_df.interpolate(method="from_derivatives") - assert base_data_df is not None, "Interpolate deleted all data" - base_data_df = base_data_df.dropna() - return base_data_df - - -def calculate_KPI(y: NDArray[Any], yhat: NDArray[Any]): - mae = float(mean_absolute_error(y, yhat)) - rmse = float(mean_squared_error(y, yhat, squared=False)) - print(f"RMSE: {round(rmse, 2)}") - print(f"MAE: {round(mae, 2)}") - print(f"MAE%: {round(100*sum(abs(y-yhat))/sum(y),2)}%") - - -def convert_forecast_data(data: pd.DataFrame): - # Temperature - # convert kelvin to celsius - # convert celsius to Fahrenheit - data["temperature_forecast"] = data["temperature_forecast"].apply( - lambda x: ((x - 273.15) * 9 / 5) + 32 - ) - - # wind_speed - # multiplying with 2.23 to convert wind speed from m/sec to mph - data["wind_speed_forecast"] = data.apply( - lambda x: np.sqrt( - np.square(x["u-component_forecast"]) + np.square(x["v-component_forecast"]) - ) - * 2.23, - axis=1, - ) - data.drop(columns=["u-component_forecast", "v-component_forecast"], inplace=True) - return data - - -def transform_to_array_3D(data: NDArray[Any], inference_hours: int = 24) -> NDArray[Any]: - X = transform_to_array(data, inference_hours) - X = X.reshape(X.shape[0], 1, X.shape[1]) - return X - - -def transform_to_array(data: NDArray[Any], inference_hours: int = 24) -> NDArray[Any]: - data = np.array(data) - X = [] - for in_start in range(len(data)): - in_end = in_start + inference_hours - if in_end <= (len(data)): - X.append(data[in_start:in_end]) - else: - break - - X = np.array(X) - # skip rows not in loop - X = X[: data.shape[0] - inference_hours] - return X diff --git a/src/vibe_notebook/vibe_notebook/plot.py b/src/vibe_notebook/vibe_notebook/plot.py index 2dcf72c1..cfe93cba 100644 --- a/src/vibe_notebook/vibe_notebook/plot.py +++ b/src/vibe_notebook/vibe_notebook/plot.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Auxiliary methods for plotting and visualizing data in notebooks.""" import io diff --git a/src/vibe_notebook/vibe_notebook/raster.py b/src/vibe_notebook/vibe_notebook/raster.py index 1e36d4f5..7eee0f71 100644 --- a/src/vibe_notebook/vibe_notebook/raster.py +++ b/src/vibe_notebook/vibe_notebook/raster.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Raster data processing utilities.""" from typing import Any, List, Optional diff --git a/src/vibe_notebook/vibe_notebook/utils.py b/src/vibe_notebook/vibe_notebook/utils.py index c2c8b815..971d8751 100644 --- a/src/vibe_notebook/vibe_notebook/utils.py +++ b/src/vibe_notebook/vibe_notebook/utils.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - """Additional utility methods used in the notebooks.""" import os diff --git a/src/vibe_server/setup.py b/src/vibe_server/setup.py deleted file mode 100644 index 184ac0fd..00000000 --- a/src/vibe_server/setup.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from setuptools import find_packages, setup - -setup( - name="vibe_server", - version="0.0.1", - author="Microsoft", - author_email="terravibes@microsoft.com", - description="TerraVibes Geospatial Platform Package - server package.", - license="Proprietary", - keywords="terravibes geospatial", - packages=find_packages(exclude=["tests*"]), - python_requires="~=3.8", - install_requires=[ - "vibe-core", - "vibe-common", - "httpx~=0.24.1", - "fastapi_utils~=0.2.1", - "grpcio~=1.53.0", - "dapr==1.13.0", - "dapr-ext-grpc~=1.12.0", - "cloudevents~=1.2", - "fastapi~=0.109.1", - "fastapi-versioning~=0.10.0", - "requests~=2.32.0", - "starlette~=0.36.2", - "uvicorn~=0.13.4", - "urllib3~=1.26.8", - "psutil~=5.9.0", - ], - entry_points={ - "console_scripts": [ - "vibe-orchestrator = vibe_server.orchestrator:main_sync", - "vibe-server = vibe_server.server:main_sync", - "vibe-sniffer = vibe_server.sniffer:main", - ] - }, -) diff --git a/src/vibe_server/tests/conftest.py b/src/vibe_server/tests/conftest.py deleted file mode 100644 index 3c5bf8af..00000000 --- a/src/vibe_server/tests/conftest.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from dataclasses import asdict -from typing import Any, Dict - -import pytest - -from vibe_common.messaging import WorkMessage -from vibe_core.datamodel import RunConfig, RunDetails, RunStatus, SpatioTemporalJson -from vibe_dev.testing import anyio_backend -from vibe_dev.testing.fake_workflows_fixtures import fake_ops_dir, fake_workflows_dir -from vibe_dev.testing.workflow_fixtures import ( - SimpleStrData, - SimpleStrDataType, - workflow_execution_message, - workflow_run_config, -) - - -@pytest.fixture -def run_config(workflow_execution_message: WorkMessage) -> Dict[str, Any]: - run_id = workflow_execution_message.header.run_id - spatio_temporal_json = { - "end_date": "2019-02-03T00:00:00", - "geojson": { - "features": [ - { - "geometry": { - "coordinates": [ - [ - [-88.068487, 37.058836], - [-88.036059, 37.048687], - [-88.012895, 37.068984], - [-88.026622, 37.085711], - [-88.062482, 37.081461], - [-88.068487, 37.058836], - ] - ], - "type": "Polygon", - }, - "type": "Feature", - } - ], - "type": "FeatureCollection", - }, - "start_date": "2019-02-02T00:00:00", - } - - run_config = asdict( - RunConfig( - name="fake", - workflow="fake", - parameters=None, - user_input=SpatioTemporalJson(**spatio_temporal_json), - id=run_id, - details=RunDetails( - status=RunStatus.running, start_time=None, end_time=None, reason=None - ), - task_details={}, - spatio_temporal_json=None, - output="", - ) - ) - return run_config - - -__all__ = [ - "SimpleStrData", - "SimpleStrDataType", - "workflow_execution_message", - "fake_ops_dir", - "fake_workflows_dir", - "workflow_run_config", - "anyio_backend", - "run_config", -] diff --git a/src/vibe_server/tests/test_graph.py b/src/vibe_server/tests/test_graph.py deleted file mode 100644 index 34ce87ba..00000000 --- a/src/vibe_server/tests/test_graph.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import random -from typing import Dict, List - -import pytest - -from vibe_server.workflow.graph import Graph - - -class SomeGraph(Graph[int, int]): - def __init__(self, data: Dict[int, List[int]]): - super().__init__() - - for k in data: - self.add_node(k) - for k, v in data.items(): - for d in v: - self.add_edge(k, d, 1) - - -@pytest.fixture -def loopy_graph() -> SomeGraph: - return SomeGraph( - { - 0: [1, 2, 3], - 1: [2, 3, 4], - 2: [3], - 3: [4], - 4: [3], - 5: [2], - } - ) - - -@pytest.fixture -def a_normal_graph() -> SomeGraph: - # topological sort: [0], [1, 4], [2, 5, 6, 7], [3] - # graph: - # /-> 7 - # /-> 6 - # /-> 4 -> 5 - # 0 -> 1 -> 2 -> 3 - # \-------/ / - # \---------/ - # - return SomeGraph( - { - 0: [1, 2, 3, 4], - 1: [2, 3], - 2: [3], - 3: [], - 4: [5, 6, 7], - 5: [], - 6: [], - 7: [], - } - ) - - -@pytest.fixture -def a_simple_graph() -> SomeGraph: - # /-> 🔙 \ - # 🌎 -> 🎶 --> 🔚 \-> ✅ - # - return SomeGraph( - { - int.from_bytes("🌎".encode("utf-8"), "little"): [ - int.from_bytes("🎶".encode("utf-8"), "little") - ], - int.from_bytes("🎶".encode("utf-8"), "little"): [ - int.from_bytes("🔙".encode("utf-8"), "little"), - int.from_bytes("🔚".encode("utf-8"), "little"), - ], - int.from_bytes("🔙".encode("utf-8"), "little"): [ - int.from_bytes("✅".encode("utf-8"), "little") - ], - int.from_bytes("🔚".encode("utf-8"), "little"): [ - int.from_bytes("✅".encode("utf-8"), "little") - ], - } - ) - - -@pytest.fixture -def empty_graph() -> SomeGraph: - return SomeGraph({}) - - -def test_topological_sort_on_empty_graph(empty_graph: SomeGraph): - assert list(empty_graph.topological_sort()) == [] - - -def test_cycle_detection_on_empty_graph(empty_graph: SomeGraph): - assert not empty_graph.has_cycle() - - -def test_loopy_graph_has_cycle(loopy_graph: SomeGraph): - assert loopy_graph.has_cycle() - - -def test_topological_sort_on_a_loopy_graph(loopy_graph: SomeGraph): - with pytest.raises(ValueError): - loopy_graph.topological_sort() - - -def test_topological_sort_on_a_normal_graph(a_normal_graph: SomeGraph): - sort = list(a_normal_graph.topological_sort()) - assert sort[0] == [0] - assert sort[1] == [1, 4] - assert sort[2] == [2, 5, 6, 7] - assert sort[3] == [3] - - -@pytest.mark.filterwarnings("ignore::UserWarning") -def test_topological_sort_on_a_simple_graph(a_simple_graph: SomeGraph): - sort = list(a_simple_graph.topological_sort()) - assert sort[0] == [int.from_bytes("🌎".encode("utf-8"), "little")] - assert sort[1] == [int.from_bytes("🎶".encode("utf-8"), "little")] - assert set(sort[2]) == set( - [ - int.from_bytes("🔙".encode("utf-8"), "little"), - int.from_bytes("🔚".encode("utf-8"), "little"), - ] - ) - assert sort[3] == [int.from_bytes("✅".encode("utf-8"), "little")] - - -def test_topological_sort_on_random_graphs(): - with pytest.warns(UserWarning): - for _ in range(42): - a = random.randint(-999999, 999999) - b = random.randint(-999999, 999999) - c = random.randint(-999999, 999999) - graph = SomeGraph({a: [b, c], b: [c]}) - sort = list(graph.topological_sort()) - assert len(sort) == 3 - assert sort[0] == [a] - assert sort[1] == [b] - assert sort[2] == [c] - - -def test_relabel_normal_graph(a_normal_graph: SomeGraph): - edge1 = (1, 2, 1) - a_normal_graph.relabel(edge1, 2) - assert 2 in a_normal_graph.neighbors(1) - assert (2, 2) in a_normal_graph.adjacency_list[1] - assert (2, 1) not in a_normal_graph.adjacency_list[1] - - -def test_no_relabel_missing_edge(a_normal_graph: SomeGraph): - edge = (3, 4, 1) - with pytest.raises(KeyError): - a_normal_graph.relabel(edge, 2) diff --git a/src/vibe_server/tests/test_href_handler.py b/src/vibe_server/tests/test_href_handler.py deleted file mode 100644 index abc8ef39..00000000 --- a/src/vibe_server/tests/test_href_handler.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import datetime -import os -from pathlib import Path -from typing import Any, Dict, List, cast - -import pytest -from pystac import Asset, Item - -from vibe_core.data.utils import serialize_stac -from vibe_core.datamodel import RunConfigInput, RunConfigUser -from vibe_server.href_handler import LocalHrefHandler -from vibe_server.server import TerravibesProvider - - -@pytest.fixture -def fake_op_name() -> str: - return "fake.fake" - - -@pytest.fixture -def fake_asset_name() -> str: - return "fake_asset" - - -@pytest.fixture -def one_item_one_asset(fake_asset_name: str) -> Item: - asset = Asset(href="../../../assets/asdf/test.txt") - item = Item( - id="fake_id", - geometry={}, - bbox=[], - datetime=datetime.datetime.utcnow(), - properties={}, - ) - item.add_asset(key=fake_asset_name, asset=asset) - return item - - -def test_local_href_handler_parse_item(one_item_one_asset: Item, tmp_path: Path): - local_href_handler = LocalHrefHandler(tmp_path) - new_item = local_href_handler._parse_item(one_item_one_asset) - for _, v in new_item.get_assets().items(): - p = Path(v.href) - assert p.absolute - - -def test_local_href_handler_update_asset(tmp_path: Path): - local_href_handler = LocalHrefHandler(tmp_path) - - asset = Asset(href="../../../assets/asdf/test.txt") - local_href_handler._update_asset(asset) - p = tmp_path / "asdf" / "test.txt" - assert asset.href == str(p) - assert os.path.isabs(asset.href) - - asset = Asset(href=".././/../assets/asdf/test.txt") - local_href_handler._update_asset(asset) - p = tmp_path / "asdf" / "test.txt" - assert asset.href == str(p) - - asset = Asset(href="../../assets/asdf/blah/../test.txt") - local_href_handler._update_asset(asset) - p = tmp_path / "asdf" / "test.txt" - assert asset.href == str(p) - assert ".." not in asset.href - - asset = Asset(href="/test.txt") - local_href_handler._update_asset(asset) - p = tmp_path / "test.txt" - assert asset.href == str(p) - - -@pytest.fixture -def run_config_with_output( - one_item_one_asset: Item, fake_op_name: str, workflow_run_config: Dict[str, Any] -) -> RunConfigUser: - provider = TerravibesProvider(LocalHrefHandler("/tmp")) - _, run_config = provider.create_new_run(RunConfigInput(**workflow_run_config), []) - run_config.set_output({fake_op_name: [serialize_stac(one_item_one_asset)]}) - return RunConfigUser.from_runconfig(run_config) - - -def test_href_handler_handle( - run_config_with_output: RunConfigUser, fake_op_name: str, fake_asset_name: str, tmp_path: Path -): - local_href_handler = LocalHrefHandler(tmp_path) - - original_item = cast(List[Dict[str, Any]], run_config_with_output.output[fake_op_name])[0] - original_href = original_item["assets"][fake_asset_name]["href"] - original_path = str( - local_href_handler.assets_dir / Path(original_href).parent.name / Path(original_href).name - ) - - local_href_handler.handle(run_config_with_output) - - parsed_item = cast(List[Dict[str, Any]], run_config_with_output.output[fake_op_name])[0] - parsed_path = parsed_item["assets"][fake_asset_name]["href"] - - assert parsed_path == original_path diff --git a/src/vibe_server/tests/test_op_parallelism.py b/src/vibe_server/tests/test_op_parallelism.py deleted file mode 100644 index 75259b4d..00000000 --- a/src/vibe_server/tests/test_op_parallelism.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Awaitable, Callable, Dict, List, NamedTuple, cast -from unittest.mock import MagicMock, patch -from uuid import UUID, uuid4 - -import pytest - -from vibe_core.data.core_types import DataVibe, OpIOType -from vibe_server.workflow.runner.runner import OpParallelism -from vibe_server.workflow.workflow import EdgeLabel, EdgeType, GraphNodeType, InputFanOut - - -class OpSpecMock: - def __init__(self, inputs: OpIOType): - self.inputs_spec = { - name: List[DataVibe] if isinstance(data, list) else DataVibe - for name, data in inputs.items() - } - - -class NodeMock(NamedTuple): - name: str - spec: OpSpecMock - - -@pytest.fixture -def merge_input() -> List[OpIOType]: - return [{"something": [{"int": i}]} for i in range(10)] - - -@pytest.fixture -def exploder_input() -> OpIOType: - return {"to": [{"something": i} for i in range(10)], "other": {"another": "thing"}} - - -def test_parallelism_merges(merge_input: List[Dict[str, Any]]): - the_edge = EdgeLabel("from", "to", EdgeType.scatter) - none = cast(Callable[[GraphNodeType, OpIOType, UUID, int], Awaitable[OpIOType]], None) - parallelism = OpParallelism([the_edge], cast(GraphNodeType, None), none) - out = parallelism.fan_in(merge_input) - assert len(out) == 1 - assert "something" in out - assert len(out["something"]) == 10 - - -def test_parallelism_explodes_inputs(exploder_input: OpIOType): - op_mock = cast(GraphNodeType, NodeMock("mock", OpSpecMock(exploder_input))) - the_edge = EdgeLabel("from", "to", EdgeType.scatter) - none = cast(Callable[[GraphNodeType, OpIOType, UUID, int], Awaitable[OpIOType]], None) - parallelism = OpParallelism([the_edge], op_mock, none) - exploded_inputs = list(parallelism.fan_out(exploder_input)) - assert len(exploded_inputs) == 10 - - -@pytest.mark.anyio -async def test_parallelism_runs(exploder_input: OpIOType): - async def run_task(_: GraphNodeType, input: OpIOType, __: UUID, ___: int) -> OpIOType: - return {"out_" + k: v for k, v in input.items()} - - op_mock = cast(GraphNodeType, NodeMock("mock", OpSpecMock(exploder_input))) - the_edge = EdgeLabel("from", "to", EdgeType.scatter) - parallelism = OpParallelism([the_edge], op_mock, run_task) - out = parallelism.fan_in(await parallelism.run(exploder_input, uuid4())) - - assert "out_to" in out - assert "out_other" in out - assert len(out["out_to"]) == len(out["out_other"]) == 10 - - -@pytest.mark.anyio -async def test_parallelism_fails(exploder_input: OpIOType): - async def run_task(_: GraphNodeType, input: OpIOType, __: UUID, ___: int) -> OpIOType: - raise RuntimeError(":-(") - - op_mock = cast(GraphNodeType, NodeMock("mock", OpSpecMock(exploder_input))) - the_edge = EdgeLabel("from", "to", EdgeType.scatter) - parallelism = OpParallelism([the_edge], op_mock, run_task) - - with pytest.raises(RuntimeError): - await parallelism.run(exploder_input, uuid4()) - - -@patch.object(OpParallelism, "fan_out") -@patch("pydantic.fields.ModelField.validate", side_effect=lambda *args, **_: (args[1], None)) -@pytest.mark.anyio -async def test_parallelism_input_fan_out(_: MagicMock, fan_out: MagicMock): - run_task = MagicMock() - node = InputFanOut("test", DataVibe) - parallelism = OpParallelism([], GraphNodeType("test", node), run_task) - with patch.object(OpParallelism, "fan_in") as fan_in: - outputs = await parallelism.run(cast(OpIOType, {node.input_port: "👍"}), uuid4()) - fan_in.assert_not_called() - fan_out.assert_not_called() - run_task.assert_not_called() - assert parallelism.fan_in(outputs) == {node.output_port: "👍"} diff --git a/src/vibe_server/tests/test_orchestrator.py b/src/vibe_server/tests/test_orchestrator.py deleted file mode 100644 index 52c47381..00000000 --- a/src/vibe_server/tests/test_orchestrator.py +++ /dev/null @@ -1,433 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from asyncio.queues import Queue -from dataclasses import asdict -from datetime import datetime -from typing import Any, Dict, Optional, Tuple, cast -from unittest.mock import AsyncMock, Mock, patch -from uuid import UUID -from uuid import uuid4 as uuid - -import pytest -from cloudevents.sdk.event import v1 - -from vibe_common.constants import STATUS_PUBSUB_TOPIC, WORKFLOW_REQUEST_PUBSUB_TOPIC -from vibe_common.dropdapr import TopicEventResponseStatus -from vibe_common.messaging import ( - ErrorContent, - ExecuteReplyContent, - MessageHeader, - MessageType, - OpStatusType, - WorkflowExecutionContent, - WorkflowExecutionMessage, - WorkMessage, - WorkMessageBuilder, - build_work_message, - encode, - gen_traceparent, -) -from vibe_common.schemas import CacheInfo -from vibe_common.statestore import StateStore -from vibe_core.data.core_types import OpIOType -from vibe_core.data.json_converter import dump_to_json -from vibe_core.data.utils import StacConverter, is_container_type, serialize_stac -from vibe_core.datamodel import RunConfig, RunDetails, RunStatus, SpatioTemporalJson -from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path # noqa -from vibe_dev.testing.workflow_fixtures import THE_DATAVIBE -from vibe_server.orchestrator import Orchestrator, WorkflowRunManager -from vibe_server.workflow.runner import WorkflowChange -from vibe_server.workflow.runner.remote_runner import RemoteWorkflowRunner -from vibe_server.workflow.spec_parser import WorkflowParser -from vibe_server.workflow.workflow import GraphNodeType, Workflow - - -def make_test_message( - workflow_name: str, - params: Optional[Dict[str, Any]], - fake_ops_dir: str, # noqa - fake_workflows_dir: str, # noqa -) -> WorkflowExecutionMessage: - header = MessageHeader( - type=MessageType.workflow_execution_request, - run_id=uuid(), - ) - workflow_dict = asdict( - WorkflowParser.parse( - get_fake_workflow_path(workflow_name), - ops_dir=fake_ops_dir, - workflows_dir=fake_workflows_dir, - parameters_override=params, - ) - ) - content = WorkflowExecutionContent( - input={}, - workflow=workflow_dict, - parameters=params, - ) - return cast(WorkflowExecutionMessage, build_work_message(header, content)) - - -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_orchestrator_add_output(store: Mock, retrieve: Mock, run_config: Dict[str, Any]): - retrieve.side_effect = lambda _: run_config - output = cast(OpIOType, {"some-op": {"data": "fake"}}) - statestore = StateStore() - await WorkflowRunManager.add_output_to_run(run_config["id"], output, statestore) - run_config["output"] = encode(dump_to_json(output)) - store.assert_called_with(run_config["id"], RunConfig(**run_config)) - - -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_orchestrator_fail_workflow(store: Mock, retrieve: Mock, run_config: Dict[str, Any]): - retrieve.side_effect = lambda _: run_config - orchestrator = Orchestrator() - reason = "fake reason" - await orchestrator.fail_workflow(run_config["id"], reason) - run_config["details"]["status"] = RunStatus.failed - run_config["details"]["reason"] = reason - assert store.mock_calls[0][1][1].details.status == RunStatus.failed - assert store.mock_calls[0][1][1].details.reason == reason - - -def to_cloud_event(msg: WorkMessage) -> v1.Event: - ce = v1.Event() - msgdict = msg.to_cloud_event("test") - for key in msgdict: - if hasattr(ce, key): - try: - setattr(ce, key, msgdict[key]) - except Exception: - pass - ce.data = ce.data.encode("ascii") # type: ignore - return ce - - -def test_run_config_fails_on_invalid_inputs(): - rc = RunConfig( - name="name", - workflow="fake", - parameters=None, - user_input=SpatioTemporalJson( - datetime.now(), - datetime.now(), - {}, - ), - id=uuid(), - details=RunDetails(status=RunStatus.pending, start_time=None, end_time=None, reason=None), - task_details={}, - spatio_temporal_json=None, - ) - for value in float("nan"), float("inf"), float("-inf"): - with pytest.raises(ValueError): - rc.set_output({"a": value}) # type: ignore - - -@pytest.mark.anyio -async def test_orchestrator_update_response(): - reply_content = ExecuteReplyContent( - cache_info=CacheInfo("test_op", "1.0", {}, {}), status=OpStatusType.done, output={} - ) - header = MessageHeader(type=MessageType.execute_reply, run_id=uuid()) - reply = build_work_message(header=header, content=reply_content) - orchestrator = Orchestrator() - orchestrator.inqueues[str(header.run_id)] = Queue() - topic_reply = await orchestrator.handle_update_workflow_status( - STATUS_PUBSUB_TOPIC, to_cloud_event(reply) - ) - assert topic_reply.status == TopicEventResponseStatus.success["status"] - - -@pytest.mark.anyio -async def test_orchestrator_update_error_response(): - reply_content = ErrorContent(status=OpStatusType.failed, ename="", evalue="", traceback=[]) - header = MessageHeader(type=MessageType.error, run_id=uuid()) - reply = build_work_message(header=header, content=reply_content) - orchestrator = Orchestrator() - orchestrator.inqueues[str(header.run_id)] = Queue() - topic_reply = await orchestrator.handle_update_workflow_status( - STATUS_PUBSUB_TOPIC, to_cloud_event(reply) - ) - assert topic_reply.status == TopicEventResponseStatus.success["status"] - - -@pytest.mark.anyio -async def test_orchestrator_update_response_fails_as_message_not_in_queue(): - orchestrator = Orchestrator() - ack_reply = WorkMessageBuilder.build_ack_reply(gen_traceparent(uuid())) - topic_reply = await orchestrator.handle_update_workflow_status( - STATUS_PUBSUB_TOPIC, to_cloud_event(ack_reply) - ) - assert topic_reply.status == TopicEventResponseStatus.drop["status"] - - -@pytest.mark.anyio -async def test_orchestrator_update_response_fails_with_invalid_message( - workflow_execution_message: WorkMessage, -): - orchestrator = Orchestrator() - topic_reply = await orchestrator.handle_update_workflow_status( - STATUS_PUBSUB_TOPIC, to_cloud_event(workflow_execution_message) - ) - assert topic_reply.status == TopicEventResponseStatus.drop["status"] - - -@pytest.mark.anyio -async def test_orchestrator_workflow_submission_rejects(): - request = WorkMessageBuilder.build_error(gen_traceparent(uuid()), "", "", []) - orchestrator = Orchestrator() - topic_reply = await orchestrator.handle_manage_workflow_event( - WORKFLOW_REQUEST_PUBSUB_TOPIC, to_cloud_event(request) - ) - assert topic_reply.status == TopicEventResponseStatus.drop["status"] - - -@pytest.mark.filterwarnings("ignore::pytest.PytestUnhandledThreadExceptionWarning") -@pytest.mark.anyio -async def test_orchestrator_workflow_submission_accepts(): - spec = WorkflowParser._load_workflow(get_fake_workflow_path("item_gather")) - request = WorkMessageBuilder.build_workflow_request(uuid(), spec, {}, {}) - orchestrator = Orchestrator() - reply = await orchestrator.handle_manage_workflow_event( - WORKFLOW_REQUEST_PUBSUB_TOPIC, to_cloud_event(request) - ) - assert reply.status == TopicEventResponseStatus.success["status"] - - -@patch("vibe_common.statestore.StateStore.retrieve_bulk") -@patch("vibe_common.statestore.StateStore.retrieve") -@pytest.mark.anyio -async def test_orchestrator_startup_sees_no_runs(retrieve: Mock, retrieve_bulk: Mock): - retrieve.return_value = [] - retrieve_bulk.return_value = [] - orchestrator = Orchestrator() - assert await orchestrator.get_unfinished_workflows() == [] - retrieve_bulk.assert_called_once_with([]) - - -@patch("vibe_common.statestore.StateStore.retrieve") -@pytest.mark.anyio -async def test_orchestrator_startup_dapr_not_stared(retrieve: Mock): - retrieve.side_effect = Exception("Random error when retrieving runs") - with pytest.raises(RuntimeError): - orchestrator = Orchestrator() - await orchestrator._resume_workflows() - - -@patch("vibe_common.statestore.StateStore.retrieve_bulk") -@patch("vibe_common.statestore.StateStore.retrieve") -@pytest.mark.anyio -async def test_orchestrator_startup_sees_no_unfinished_runs( - retrieve: Mock, retrieve_bulk: Mock, run_config: Dict[str, Any] -): - retrieve.return_value = [run_config["id"]] - run_config["details"]["status"] = RunStatus.done - retrieve_bulk.return_value = [run_config] - orchestrator = Orchestrator() - assert await orchestrator.get_unfinished_workflows() == [] - retrieve_bulk.assert_called_once_with([run_config["id"]]) - - -@patch("vibe_common.statestore.StateStore.retrieve_bulk") -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@patch("vibe_server.workflow.runner.task_io_handler.WorkflowIOHandler.map_output") -@patch("vibe_server.workflow.runner.task_io_handler.TaskIOHandler.retrieve_sinks") -@patch("vibe_server.workflow.runner.remote_runner.RemoteWorkflowRunner._run_ops") -@pytest.mark.anyio -async def test_orchestrator_startup_sees_unfinished_runs( - _run_ops: AsyncMock, - retrieve_sinks: Mock, - map_output: Mock, - store: Mock, - retrieve: Mock, - retrieve_bulk: Mock, - run_config: Dict[str, Any], - fake_ops_dir: str, - fake_workflows_dir: str, -): - first = True - - def retrieve_fun(_: str): - nonlocal first - if first: - first = False - return run_config["id"] - return run_config - - _run_ops.return_value = None - retrieve_sinks.return_value = None - map_output.return_value = None - retrieve.side_effect = retrieve_fun - retrieve_bulk.return_value = [run_config, run_config, run_config] - build_return_value = Workflow.build( - get_fake_workflow_path("single_and_parallel"), fake_ops_dir, fake_workflows_dir - ) - - with patch("vibe_server.workflow.workflow.Workflow.build", return_value=build_return_value): - orchestrator = Orchestrator() - await orchestrator._resume_workflows() - retrieve_bulk.assert_called_once_with(run_config["id"]) - _run_ops.assert_called() - - -@patch("vibe_server.orchestrator.WorkflowStateUpdate.__call__") -@pytest.mark.anyio -async def test_orchestrator_cancel_run( - state_update: Mock, - fake_ops_dir: str, # noqa - fake_workflows_dir: str, # noqa -): - workflow = Workflow.build( - get_fake_workflow_path("str_input"), - fake_ops_dir, - fake_workflows_dir, - ) - - message = WorkMessageBuilder.build_workflow_request( - uuid(), - asdict(workflow.workflow_spec), - None, - {k: [{}] for k in workflow.inputs_spec}, - ) - - cancellation = WorkMessageBuilder.build_workflow_cancellation(message.run_id) - orchestrator = Orchestrator(ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) - await orchestrator.manage_workflow(message) - assert len(orchestrator._workflow_management_tasks.values()) == 1 - wf = list(orchestrator._workflow_management_tasks.values())[0] - - await orchestrator.manage_workflow(cancellation) - await wf.task - assert wf.is_cancelled - assert wf.runner - assert wf.runner.is_cancelled - state_update.assert_any_call(WorkflowChange.WORKFLOW_CANCELLED) - - -@pytest.mark.parametrize("params", [None, {"new": "from_message"}]) -@pytest.mark.anyio -async def test_build_workflow_with_params( - fake_ops_dir: str, # noqa - fake_workflows_dir: str, # noqa - params: Optional[Dict[str, Any]], -): - msg = make_test_message("resolve_params", params, fake_ops_dir, fake_workflows_dir) - manager = WorkflowRunManager( - None, # type: ignore - msg, - 1, # type: ignore - "", - "", - "", - fake_ops_dir, - fake_workflows_dir, - ) - workflow, _ = manager.build_workflow({"input": None}) # type: ignore - expected = workflow.workflow_spec.default_parameters["new"] if params is None else params["new"] - assert workflow.workflow_spec.parameters["new"] == expected - - -@pytest.mark.parametrize( - "wf_params", [("resolve_params", {"made_up": 1}), ("list_list", {"any": "!"})] -) -@patch("vibe_server.orchestrator.update_workflow") -@pytest.mark.anyio -async def test_build_workflow_invalid_params_update_status( - update: Mock, - wf_params: Tuple[str, Dict[str, Any]], - fake_ops_dir: str, # noqa - fake_workflows_dir: str, # noqa -): - msg = make_test_message( - wf_params[0], {}, fake_ops_dir=fake_ops_dir, fake_workflows_dir=fake_workflows_dir - ) - msg.content.parameters = wf_params[1] - manager = WorkflowRunManager( - {}, - msg, - 1, # type: ignore - "", - "", - "", - fake_ops_dir, - fake_workflows_dir, # type: ignore - ) - with pytest.raises(ValueError): - await manager.task - update.assert_called_once() - run_id, _, status, _ = update.call_args[0] - assert run_id == str(msg.header.run_id) - assert status == RunStatus.failed - - -@patch.object(RemoteWorkflowRunner, "_build_and_process_request", autospec=True) -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_run_workflow_that_will_fail( - store: Mock, - retrieve: Mock, - bpr: Mock, - fake_ops_dir: str, # noqa - fake_workflows_dir: str, # noqa - run_config: Dict[str, Any], -): - converter = StacConverter() - - workflow = Workflow.build( - get_fake_workflow_path("custom_indices_structure"), - fake_ops_dir, - fake_workflows_dir, - ) - - message = WorkMessageBuilder.build_workflow_request( - uuid(), - asdict(workflow.workflow_spec), - None, - {k: serialize_stac(converter.to_stac_item([THE_DATAVIBE])) for k in workflow.inputs_spec}, - ) - - def mock_build_and_process_request( - self: Any, op: GraphNodeType, input: OpIOType, run_id: UUID, subtask_idx: int - ) -> OpIOType: - self._handle_ack_message(op.name, subtask_idx) - if op.name.startswith("ndvi"): - raise RuntimeError("Received unsupported message error. Aborting execution.") - return { - k: serialize_stac( - converter.to_stac_item( - # This should work just fine, as `DataVibe` inherits from `BaseVibe`, - # but pyright doesn't like it. I think the issue pyright is having - # is because we use `__init_subclass__` in a dataclass, and it is - # getting confused - [THE_DATAVIBE] if is_container_type(v) else THE_DATAVIBE # type: ignore - ) - ) - for k, v in op.spec.output_spec.items() - } - - def store_side_effect(key: str, obj: Any, _: Optional[str] = None): # type: ignore - nonlocal run_config - run_config = obj - - def retrieve_side_effect(key: str, _: Optional[str] = None): # type: ignore - return run_config - - store.side_effect = store_side_effect - retrieve.side_effect = retrieve_side_effect - bpr.side_effect = mock_build_and_process_request - - orchestrator = Orchestrator(ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) - - with pytest.raises(RuntimeError): - await orchestrator.manage_workflow(message) - wf = list(orchestrator._workflow_management_tasks.values())[0] - await wf.task - - assert run_config["details"]["status"] == RunStatus.failed diff --git a/src/vibe_server/tests/test_parameter_resolver.py b/src/vibe_server/tests/test_parameter_resolver.py deleted file mode 100644 index 4b9a6a69..00000000 --- a/src/vibe_server/tests/test_parameter_resolver.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os - -from vibe_common.schemas import OperationParser -from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path -from vibe_server.workflow.parameter import Parameter, ParameterResolver -from vibe_server.workflow.spec_parser import WorkflowParser - - -def test_parameter_defaults_from_child(): - p_root = Parameter("root", "root", None, None, None) - p_child = Parameter("child", "task", "@from(root)", 0, "child description") - p_root.add_child(p_child) - assert p_root.default == p_child.default - assert p_root.description == p_child.description - p_root._default = "set default" - assert p_root.default == p_root._default - assert p_root.description == p_child.description - p_root._description = "set desc" - assert p_root.default == p_root._default - assert p_root.description == p_root._description - p_root._default = None - assert p_root.default == p_child.default - assert p_root.description == p_root._description - - -def test_parameter_two_children(): - p_root = Parameter("root", "root", None, None, None) - p_child = Parameter("child", "task", "@from(root)", 0, "child1 description") - p_child2 = Parameter("child2", "task2", "@from(root)", 1, "child2 description") - p_root.add_child(p_child) - p_root.add_child(p_child2) - assert p_root.default == (p_child.default, p_child2.default) - assert p_root.description == (p_child.description, p_child2.description) - - -def test_parameter_two_children_same_definition(): - p_root = Parameter("root", "root", None, None, None) - p_child = Parameter("child", "task", "@from(root)", 0, "child description") - p_child2 = Parameter("child2", "task2", "@from(root)", 0, "child description") - p_root.add_child(p_child) - p_root.add_child(p_child2) - assert p_root.default == p_child.default == p_child2.default - assert p_root.description == p_child.description == p_child2.description - - -def test_parameter_children_handle_none(): - p_root = Parameter("root", "root", None, None, None) - p_child = Parameter("child", "task", "@from(root)", 0, "child1 description") - p_child2 = Parameter("child2", "task2", "@from(root)", None, None) - p_root.add_child(p_child) - p_root.add_child(p_child2) - # For parameters, we don't discard None! - assert p_root.default == (p_child.default, p_child2.default) - # For descriptions, we ignore None from child2 - assert p_root.description == p_child.description - p_child3 = Parameter("child", "task", "@from(root)", 2, "child3 description") - p_root.add_child(p_child3) - assert p_root.default == (p_child.default, p_child2.default, p_child3.default) - assert p_root.description == (p_child.description, p_child3.description) - - -def test_get_op_params(fake_ops_dir: str): - resolver = ParameterResolver("", "") - op_spec = OperationParser.parse( - os.path.join(fake_ops_dir, "fake", "simple_parameter.yaml"), {"overwrite": "over"} - ) - params = {p.name: p for p in resolver._get_op_params(op_spec, "task")} - assert len(params) == 2 - assert params["keep"]._value == "kept" - assert params["keep"].default == "kept" - assert params["keep"].description is None - - assert params["overwrite"]._value == "over" - assert params["overwrite"].default == "kept" - assert params["overwrite"].description is None - - -def test_get_op_params_nested(fake_ops_dir: str): - resolver = ParameterResolver("", "") - op_spec = OperationParser.parse( - os.path.join(fake_ops_dir, "fake", "nested_parameters.yaml"), - {"nested": {"overwrite": "over nested"}}, - ) - params = {p.name: p for p in resolver._get_op_params(op_spec, "task")} - assert len(params) == 3 - param = params["overwrite"] - assert param._value == param.default == "kept" - assert param.description == "param named overwrite" - - param = params["nested.overwrite"] - assert param._value == "over nested" - assert param.default == "kept nested" - assert param.description == "nested overwrite" - - -def test_resolve_params(fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("resolve_nested_params_multiple_default") - wf_spec = WorkflowParser.parse(wf_path, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) - resolver = ParameterResolver(fake_workflows_dir, fake_ops_dir) - params = resolver.resolve(wf_spec) - assert len(params) == 2 - param = params["new"] - assert param.default == ("kept", "overwritten") - assert param._value is None - assert len(param.childs) == 2 - assert sorted([p.name for p in param.childs]) == ["new", "overwrite"] - - param = params["new_nested"] - assert param.default == "overwritten nested" - assert param._value is None - assert len(param.childs) == 1 - assert param.description == "nested overwrite" - - -def test_resolve_only_description(fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("resolve_params") - wf_spec = WorkflowParser.parse(wf_path, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) - resolver = ParameterResolver(fake_workflows_dir, fake_ops_dir) - params = resolver.resolve(wf_spec) - param = params["new_nested"] - # We don't get default from child - assert param.default == param._value == "overwritten nested" - # But we do get description - assert param._description is None - assert param.description == "nested overwrite" diff --git a/src/vibe_server/tests/test_remote_workflow_runner.py b/src/vibe_server/tests/test_remote_workflow_runner.py deleted file mode 100644 index 9d4d8f6b..00000000 --- a/src/vibe_server/tests/test_remote_workflow_runner.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import sys -import traceback -from asyncio.queues import Queue -from datetime import datetime, timezone -from typing import Any, Optional, Tuple, cast -from unittest.mock import AsyncMock, patch - -import pydantic -import pytest -from shapely.geometry import Polygon, mapping - -from vibe_common.input_handlers import gen_stac_item_from_bounds -from vibe_common.messaging import ( - ErrorContent, - ExecuteReplyContent, - ExecuteRequestContent, - MessageHeader, - MessageType, - OpStatusType, - WorkMessage, - build_work_message, -) -from vibe_common.schemas import CacheInfo, EntryPointDict, OperationSpec -from vibe_core.data import TypeDictVibe -from vibe_core.data.core_types import OpIOType -from vibe_core.data.utils import is_vibe_list -from vibe_core.datamodel import TaskDescription -from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path -from vibe_server.workflow.runner.remote_runner import ( - MessageRouter, - RemoteWorkflowRunner, - WorkMessageBuilder, -) -from vibe_server.workflow.runner.task_io_handler import WorkflowIOHandler -from vibe_server.workflow.workflow import Workflow - -HERE = os.path.dirname(os.path.abspath(__file__)) - - -class FakeMessage(str): - def __init__(self, s: str): - self.parent_id = "" - self.msg = s - - def __str__(self): - return self.msg - - -@pytest.fixture -def time_range() -> Tuple[datetime, datetime]: - return ( - datetime(year=2021, month=2, day=1, tzinfo=timezone.utc), - datetime(year=2021, month=2, day=11, tzinfo=timezone.utc), - ) - - -@pytest.fixture -def input_polygon() -> Polygon: - polygon_coords = [ - (-88.062073563448919, 37.081397673802059), - (-88.026349330507315, 37.085463858128762), - (-88.026349330507315, 37.085463858128762), - (-88.012445388773259, 37.069230099135126), - (-88.035931592028305, 37.048441375086092), - (-88.068120429075847, 37.058833638440767), - (-88.062073563448919, 37.081397673802059), - ] - - return Polygon(polygon_coords) - - -@pytest.fixture -def helloworld_input(input_polygon: Polygon, time_range: Tuple[datetime, datetime]): - return gen_stac_item_from_bounds(mapping(input_polygon), time_range[0], time_range[1]) - - -def test_work_message_builder_fails(workflow_execution_message: WorkMessage): - if hasattr(pydantic, "error_wrappers"): - ValidationError = pydantic.error_wrappers.ValidationError # type: ignore - else: - ValidationError = pydantic.ValidationError # type: ignore - with pytest.raises(ValidationError): - WorkMessageBuilder.build_execute_request( - workflow_execution_message.header.run_id, - "", - None, # type: ignore - {}, - ) - - -def test_work_message_builder_succeeds_with_op_spec(workflow_execution_message: WorkMessage): - message = WorkMessageBuilder.build_execute_request( - workflow_execution_message.header.run_id, - "", - OperationSpec( - name="fake", - root_folder="/tmp", - inputs_spec=TypeDictVibe({}), - output_spec=TypeDictVibe({}), - entrypoint=EntryPointDict(file="op.py", callback_builder="whatever"), - description=TaskDescription(), - ), - {}, - ) - assert cast(ExecuteRequestContent, message.content).operation_spec - - -@pytest.mark.anyio -async def test_message_router_put(): - inqueue = Queue() - handler = MessageRouter(inqueue) - item = FakeMessage("some really cool item") - await inqueue.put(item) - assert await handler.get("") == item - - -@pytest.mark.anyio -async def test_message_router_len(): - inqueue = Queue() - handler = MessageRouter(inqueue) - assert len(handler) == 0 - for i in range(10): - await inqueue.put(FakeMessage(f"{i}")) - assert len(handler) == 10 - handler.should_stop = True - - -def build_reply( - parent_header: MessageHeader, op: Optional[OperationSpec] = None, failure: bool = False -) -> WorkMessage: - if op is None: - output = {} - else: - output = { - k: ([{"a": 1}] if is_vibe_list(op.output_spec[k]) else {"a": 1}) for k in op.output_spec - } - if failure: - try: - 1 / 0 # type: ignore - except ZeroDivisionError: - ename, evalue, tb = sys.exc_info() - content = ErrorContent( - status=OpStatusType.failed, - ename=str(ename), # type: ignore - evalue=str(evalue), # type: ignore - traceback=traceback.format_tb(tb), # type: ignore - ) - else: - content = ExecuteReplyContent( - cache_info=CacheInfo("test_op", "1.0", {}, {}), - status=OpStatusType.done, - output=output, # type: ignore - ) - header = MessageHeader( - type=MessageType.error if failure else MessageType.execute_reply, - run_id=parent_header.run_id, - parent_id=parent_header.id, - ) - return build_work_message(header=header, content=content) - - -async def workflow_callback(change, **kwargs): # type: ignore - print(change, kwargs) # type: ignore - - -@patch("vibe_server.workflow.runner.remote_runner.send_async") -@pytest.mark.anyio -async def test_remote_workflow_runner_runs( - send_async: AsyncMock, - fake_ops_dir: str, - fake_workflows_dir: str, - helloworld_input: OpIOType, - workflow_execution_message: WorkMessage, -): - inqueue: "Queue[WorkMessage]" = Queue() - handler = MessageRouter(inqueue) - workflow = Workflow.build(get_fake_workflow_path("str_input"), fake_ops_dir, fake_workflows_dir) - io_mapper = WorkflowIOHandler(workflow) - runner = RemoteWorkflowRunner( - handler, - workflow, - workflow_execution_message.id, - pubsubname="", - source="", - topic="", - io_mapper=io_mapper, - update_state_callback=workflow_callback, - ) - - async def patched_send(item: WorkMessage, *args: Any) -> None: - reply = build_reply( - parent_header=item.header, op=cast(ExecuteRequestContent, item.content).operation_spec - ) - await inqueue.put(reply) - - send_async.side_effect = patched_send - - await runner.run( - {k: helloworld_input for k in runner.workflow.inputs_spec}, - workflow_execution_message.header.run_id, - ) - - -@patch("vibe_server.workflow.runner.remote_runner.send_async") -@pytest.mark.anyio -async def test_remote_workflow_runner_fails( - send_async: AsyncMock, - fake_ops_dir: str, - fake_workflows_dir: str, - helloworld_input: OpIOType, - workflow_execution_message: WorkMessage, -): - inqueue: "Queue[WorkMessage]" = Queue() - handler = MessageRouter(inqueue) - workflow = Workflow.build(get_fake_workflow_path("str_input"), fake_ops_dir, fake_workflows_dir) - io_mapper = WorkflowIOHandler(workflow) - runner = RemoteWorkflowRunner( - handler, - workflow, - workflow_execution_message.id, - pubsubname="", - source="", - topic="", - io_mapper=io_mapper, - update_state_callback=workflow_callback, - ) - - async def patched_send(item: WorkMessage, *args: Any) -> None: - reply = build_reply(item.header, None, True) - await inqueue.put(reply) - - send_async.side_effect = patched_send - - with pytest.raises(RuntimeError): - await runner.run( - {k: helloworld_input for k in runner.workflow.inputs_spec}, - workflow_execution_message.header.run_id, - ) diff --git a/src/vibe_server/tests/test_workflow.py b/src/vibe_server/tests/test_workflow.py deleted file mode 100644 index ce3abf06..00000000 --- a/src/vibe_server/tests/test_workflow.py +++ /dev/null @@ -1,307 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from typing import List - -import pytest - -from vibe_core.data.core_types import DataVibe -from vibe_core.data.rasters import Raster -from vibe_core.data.utils import is_vibe_list -from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path -from vibe_server.workflow.spec_parser import WorkflowParser, WorkflowSpec, WorkflowSpecEdge -from vibe_server.workflow.workflow import EdgeType, Workflow - -HERE = os.path.dirname(os.path.abspath(__file__)) - - -def test_workflow_parameters( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow = Workflow.build( - get_fake_workflow_path("task_params"), fake_ops_dir, fake_workflows_dir - ) - assert workflow["parameterizable"].parameters["fake_param"] == 3 # type: ignore - assert workflow["parameterizable"].parameters["fake_another_param"] == { # type: ignore - "fake_nested": 2, - "fake_nested_too": 3, - } - - -def test_workflow_nested_parameters( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow = Workflow.build( - get_fake_workflow_path("nested_task_params"), fake_ops_dir, fake_workflows_dir - ) - assert workflow["parameterizable"].parameters["fake_param"] == 1 # type: ignore - assert workflow["parameterizable"].parameters["fake_another_param"] == { # type: ignore - "fake_nested": 2, - "fake_nested_too": 4, - } - - -def test_workflow_unknown_parameter( - fake_ops_dir: str, - fake_workflows_dir: str, -): - with pytest.raises(ValueError): - Workflow.build( - get_fake_workflow_path("unknown_task_params"), fake_ops_dir, fake_workflows_dir - ) - - -def test_misconfigured_workflow( - fake_ops_dir: str, - fake_workflows_dir: str, -): - with pytest.raises(ValueError): - Workflow.build(get_fake_workflow_path("missing_edge"), fake_ops_dir, fake_workflows_dir) - - -def test_fan_out_fan_in( - fake_ops_dir: str, - fake_workflows_dir: str, -): - # Tests whether we support workflows with nodes - # from List[DataVibe] <-> [DataVibe] - Workflow.build(get_fake_workflow_path("fan_out_and_in"), fake_ops_dir, fake_workflows_dir) - - -def test_nested_fan_out_fails( - fake_ops_dir: str, - fake_workflows_dir: str, -): - with pytest.raises(ValueError): - Workflow.build(get_fake_workflow_path("nested_fan_out"), fake_ops_dir, fake_workflows_dir) - - -@pytest.mark.parametrize( - "workflow_name", - ["single_and_parallel", "gather_and_parallel", "gather_and_parallel_input_gather_output"], -) -def test_parallelism_two_edge_types( - workflow_name: str, - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow_path = get_fake_workflow_path(workflow_name) - - workflow_spec: WorkflowSpec = WorkflowParser.parse( - workflow_path, fake_ops_dir, fake_workflows_dir - ) - workflow = Workflow(workflow_spec) - edge = workflow.edges_from(workflow.index["two_types"])[0] - correct_type = EdgeType.gather if "gather_output" in workflow_name else EdgeType.parallel - assert edge[-1].type == correct_type - - -def test_gather_not_parallel( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow = Workflow.build( - get_fake_workflow_path("item_gather"), fake_ops_dir, fake_workflows_dir - ) - assert workflow.edges_from(workflow.index["item"])[0][-1].type == EdgeType.gather - - -def test_loading_inheritance_works( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow = Workflow.build( - get_fake_workflow_path("inheritance"), fake_ops_dir, fake_workflows_dir - ) - assert not is_vibe_list(workflow["inherit_item"].output_spec["processed_data"]) - assert is_vibe_list(workflow["inherit_list"].output_spec["processed_data"]) - - -def test_loading_missing_inheritance_fails( - fake_ops_dir: str, - fake_workflows_dir: str, -): - with pytest.raises(ValueError): - Workflow.build( - get_fake_workflow_path("missing_inheritance"), fake_ops_dir, fake_workflows_dir - ) - - -def test_loading_multi_level_inheritance_works( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow = Workflow.build( - get_fake_workflow_path("two_level_inheritance"), fake_ops_dir, fake_workflows_dir - ) - assert workflow["direct_inherit"].output_spec["processed_data"] is DataVibe - assert workflow["indirect_inherit"].output_spec["processed_data"] is DataVibe - - -def test_inheritance_before_fanout( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow = Workflow.build( - get_fake_workflow_path("inheritance_before_fan_out"), fake_ops_dir, fake_workflows_dir - ) - - assert workflow["inherit_list"].output_spec["processed_data"] is List[DataVibe] - assert list(workflow.edges_from(workflow.index["inherit_list"]))[0][-1].type == EdgeType.scatter - - -def test_inheritance_after_fanout( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow = Workflow.build( - get_fake_workflow_path("inheritance_after_fan_out"), fake_ops_dir, fake_workflows_dir - ) - - assert workflow["scatter_inherit"].output_spec["processed_data"] is DataVibe - assert list(workflow.edges_from(workflow.index["list"]))[0][-1].type == EdgeType.scatter - assert ( - list(workflow.edges_from(workflow.index["scatter_inherit"]))[0][-1].type - == EdgeType.parallel - ) - - -def test_inheritance_source( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow = Workflow.build( - get_fake_workflow_path("inheritance_from_source"), fake_ops_dir, fake_workflows_dir - ) - - assert workflow["inherit_raster"].output_spec["processed_data"] is Raster - assert workflow["inherit_source"].output_spec["processed_data"] is DataVibe - - -def test_cycle_disconnected_components_detection( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow_path = get_fake_workflow_path("three_ops") - - workflow_spec: WorkflowSpec = WorkflowParser.parse( - workflow_path, fake_ops_dir, fake_workflows_dir - ) - for origin, destination in zip( - ("second.processed_data", "third.processed_data", "third.processed_data"), - ("first.user_data", "second.user_data", "third.user_data"), - ): - edge: WorkflowSpecEdge = WorkflowSpecEdge(origin=origin, destination=[destination]) - workflow_spec.edges.append(edge) - - with pytest.raises(ValueError): - Workflow(workflow_spec) - - workflow_spec.edges.pop() - - -def test_parameter_resolution( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow_path = get_fake_workflow_path("resolve_params") - - workflow = Workflow.build(workflow_path, fake_ops_dir, fake_workflows_dir) - assert workflow["simple"].parameters["keep"] == "kept" - assert workflow["simple"].parameters["overwrite"] == "overwritten" - assert workflow["nested"].parameters["overwrite"] == "overwritten" - assert workflow["nested"].parameters["nested"]["keep"] == "kept nested" - assert workflow["nested"].parameters["nested"]["overwrite"] == "overwritten nested" - - -def test_nested_workflow_parameter_resolution( - fake_ops_dir: str, - fake_workflows_dir: str, -): - workflow_path = get_fake_workflow_path("resolve_nested_params") - - workflow = Workflow.build(workflow_path, fake_ops_dir, fake_workflows_dir) - assert workflow["simple"].parameters["keep"] == "kept" - assert workflow["simple"].parameters["overwrite"] == "overwritten" - assert workflow["nested.simple"].parameters["overwrite"] == "overwritten" - assert workflow["nested.nested"].parameters["overwrite"] == "overwritten" - assert workflow["nested.nested"].parameters["nested"]["keep"] == "kept nested" - assert workflow["nested.nested"].parameters["nested"]["overwrite"] == "overwritten nested" - - -def test_workflow_parameter_resolution_default_values(fake_ops_dir: str, fake_workflows_dir: str): - workflow_path = get_fake_workflow_path("resolve_nested_params_default") - - workflow = Workflow.build(workflow_path, fake_ops_dir, fake_workflows_dir) - assert workflow["simple"].parameters["keep"] == "kept" - # Default value for the op in 'overwrite' is "kept" - assert workflow["simple"].parameters["overwrite"] == "kept" - assert workflow["nested.simple"].parameters["overwrite"] == "overwritten" - assert workflow["nested.nested"].parameters["overwrite"] == "overwritten" - assert workflow["nested.nested"].parameters["nested"]["keep"] == "kept nested" - # Default value for the op in 'overwrite' is kept, - # but default for the workflow containing it is 'overwritten nested' - assert workflow["nested.nested"].parameters["nested"]["overwrite"] == "overwritten nested" - - -@pytest.mark.parametrize("invalid", ["", "inexistent"]) -def test_workflow_parameter_resolution_invalid_ref( - fake_ops_dir: str, fake_workflows_dir: str, invalid: str -): - workflow_path = get_fake_workflow_path("resolve_params") - - spec = WorkflowParser.parse(workflow_path, fake_ops_dir, fake_workflows_dir) - spec.tasks["simple"].parameters["overwrite"] = f"@from({invalid})" - with pytest.raises(ValueError): - Workflow(spec) - - -def test_workflow_missing_source(fake_ops_dir: str, fake_workflows_dir: str): - workflow_path = get_fake_workflow_path("bad_source") - with pytest.raises(ValueError): - Workflow.build( - workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir - ) - - -def test_workflow_missing_sink(fake_ops_dir: str, fake_workflows_dir: str): - workflow_path = get_fake_workflow_path("bad_sink") - with pytest.raises(ValueError): - Workflow.build( - workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir - ) - - -def test_most_specific_source_type(fake_ops_dir: str, fake_workflows_dir: str): - workflow_path = get_fake_workflow_path("specific_source") - wf = Workflow.build( - workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir - ) - assert wf.inputs_spec["input"] is Raster - - -def test_item_list_source_type(fake_ops_dir: str, fake_workflows_dir: str): - workflow_path = get_fake_workflow_path("specific_source_item_list") - wf = Workflow.build( - workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir - ) - assert wf.inputs_spec["input"] is Raster - - -def test_list_list_source_type(fake_ops_dir: str, fake_workflows_dir: str): - workflow_path = get_fake_workflow_path("specific_source_list_list") - wf = Workflow.build( - workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir - ) - assert wf.inputs_spec["input"] is List[Raster] - - -def test_incompatible_sources_fails(fake_ops_dir: str, fake_workflows_dir: str): - workflow_path = get_fake_workflow_path("incompatible_source") - with pytest.raises(ValueError): - Workflow.build( - workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir - ) diff --git a/src/vibe_server/tests/test_workflow_input_handler.py b/src/vibe_server/tests/test_workflow_input_handler.py deleted file mode 100644 index 4c69db14..00000000 --- a/src/vibe_server/tests/test_workflow_input_handler.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import copy -from dataclasses import dataclass -from datetime import datetime -from typing import Any, Dict, List, cast -from unittest.mock import MagicMock, patch - -import pytest -from shapely import geometry as shpg - -from vibe_common.input_handlers import gen_stac_item_from_bounds -from vibe_core.data.core_types import BaseVibe, DataVibe, OpIOType -from vibe_core.data.rasters import CategoricalRaster, Raster -from vibe_core.data.utils import StacConverter, serialize_stac -from vibe_core.datamodel import SpatioTemporalJson -from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path -from vibe_server.workflow.input_handler import ( - build_args_for_workflow, - patch_workflow_sources, - validate_workflow_input, -) -from vibe_server.workflow.spec_parser import WorkflowParser -from vibe_server.workflow.workflow import Workflow - - -@pytest.fixture -def dummy_input(): - return {"dummy": 0, "another": "1"} - - -def test_build_workflow_args_spatiotemporal_input(): - geom = shpg.box(0, 0, 1, 1) - geojson = {"type": "Feature", "geometry": shpg.mapping(geom)} - start_date = datetime(2020, 1, 2) - end_date = datetime(2020, 1, 3) - user_input = SpatioTemporalJson(start_date, end_date, geojson) - args = build_args_for_workflow(user_input, ["one_input"]) - assert args == {"one_input": gen_stac_item_from_bounds(geojson, start_date, end_date)} - with pytest.raises(ValueError): - build_args_for_workflow(user_input, ["1", "2"]) - - -def test_build_workflow_args_single_source(dummy_input: Dict[str, Any]): - args = build_args_for_workflow(dummy_input, ["one_input"]) - assert args == {"one_input": dummy_input} - args = build_args_for_workflow({"one_input": dummy_input}, ["one_input"]) - assert args == {"one_input": dummy_input} - - -def test_build_workflow_args_multi_source(dummy_input: Dict[str, Any]): - inputs = ["1", "2"] - matching_input = {k: dummy_input for k in inputs} - args = build_args_for_workflow(matching_input, inputs) - assert args == matching_input - - -def test_build_workflow_args_missing_key_fails(dummy_input: Dict[str, Any]): - inputs = ["1", "2"] - with pytest.raises(ValueError): - build_args_for_workflow(dummy_input, inputs) - - -def test_build_workflow_args_wrong_key_fails(dummy_input: Dict[str, Any]): - inputs = ["1", "2"] - bad_input = {k: dummy_input for k in ["1", "3"]} - with pytest.raises(ValueError): - build_args_for_workflow(bad_input, inputs) - - -def test_validate_wf_item_input(): - inputs_spec: Any = {"input": DataVibe} - converter = StacConverter() - geom = shpg.box(0, 0, 1, 1) - now = datetime.now() - x = DataVibe(id="1", time_range=(now, now), geometry=shpg.mapping(geom), assets=[]) - serial = serialize_stac(converter.to_stac_item(x)) - validate_workflow_input(cast(OpIOType, {"input": serial}), inputs_spec) - - -def test_validate_wf_input_subtype(): - inputs_spec: Any = {"input": Raster} - converter = StacConverter() - geom = shpg.box(0, 0, 1, 1) - now = datetime.now() - x = Raster(id="1", time_range=(now, now), geometry=shpg.mapping(geom), assets=[], bands={}) - serial = serialize_stac(converter.to_stac_item(x)) - validate_workflow_input(cast(OpIOType, {"input": serial}), inputs_spec) - - # More specific types are fine - x = CategoricalRaster.clone_from(x, id="2", assets=[], categories=[]) - serial = serialize_stac(converter.to_stac_item(x)) - validate_workflow_input(cast(OpIOType, {"input": serial}), inputs_spec) - - # More generic types are not - x = DataVibe.clone_from(x, id="3", assets=[]) - serial = serialize_stac(converter.to_stac_item(x)) - with pytest.raises(ValueError): - validate_workflow_input(cast(OpIOType, {"input": serial}), inputs_spec) - - -def test_validate_wf_list_input(): - inputs_spec: Any = {"input": List[DataVibe]} - converter = StacConverter() - geom = shpg.box(0, 0, 1, 1) - now = datetime.now() - x = DataVibe(id="1", time_range=(now, now), geometry=shpg.mapping(geom), assets=[]) - serial = serialize_stac(converter.to_stac_item(x)) - validate_workflow_input(cast(OpIOType, {"input": [serial]}), inputs_spec) - # Item is ok as well (will be converted to one item list) - validate_workflow_input(cast(OpIOType, {"input": serial}), inputs_spec) - - -def test_validate_wf_base_input(): - @dataclass - class A(BaseVibe): - a: int - - inputs_spec: Any = {"input": List[A]} - input = serialize_stac(StacConverter().to_stac_item(A(a=1))) - other_input = copy.deepcopy(input) - del other_input["properties"]["a"] - other_input["properties"]["b"] = 1 - - validate_workflow_input({"input": input}, inputs_spec) - validate_workflow_input({"input": [input]}, inputs_spec) - - with pytest.raises(ValueError): - validate_workflow_input({"input": other_input}, inputs_spec) - - with pytest.raises(ValueError): - validate_workflow_input({"input": [other_input]}, inputs_spec) - - inputs_spec: Any = {"input": A} - validate_workflow_input({"input": input}, inputs_spec) - - -def test_validate_wf_multi_source_input(): - inputs_spec: Any = {"input1": DataVibe, "input2": Raster} - converter = StacConverter() - geom = shpg.box(0, 0, 1, 1) - now = datetime.now() - x1 = DataVibe(id="1", time_range=(now, now), geometry=shpg.mapping(geom), assets=[]) - s1 = serialize_stac(converter.to_stac_item(x1)) - x2 = Raster.clone_from(x1, id="1", assets=[], bands={}) - s2 = serialize_stac(converter.to_stac_item(x2)) - x3 = CategoricalRaster.clone_from(x2, id="1", assets=[], categories=[]) - s3 = serialize_stac(converter.to_stac_item(x3)) - - validate_workflow_input({"input1": s1, "input2": s2}, inputs_spec) - validate_workflow_input({"input1": s1, "input2": s3}, inputs_spec) - validate_workflow_input({"input1": s3, "input2": s2}, inputs_spec) - - with pytest.raises(ValueError): - validate_workflow_input({"input1": s1, "input2": s1}, inputs_spec) - - -def test_workflow_source_patch(fake_ops_dir: str, fake_workflows_dir: str): - workflow = Workflow.build(get_fake_workflow_path("item_item"), fake_ops_dir, fake_workflows_dir) - assert workflow.inputs_spec == {"input": DataVibe} - assert len(workflow.nodes) == 1 - assert len(workflow.edges) == 0 - old_source = workflow.source_mappings["input"][0] - patch_workflow_sources({"input": []}, workflow) - # We support list in the input - assert workflow.inputs_spec == {"input": List[DataVibe]} - # We add one fan-out node - assert len(workflow.nodes) == 2 - # We add one edge from fan-out node to actual node - assert len(workflow.edges) == 1 - # Our new edge should be from our node to the former source port - edge = workflow.edges_from(workflow.index["input_fanout"])[0] - destination = f"{edge[1].name}.{edge[2][1]}" - assert destination == old_source - - -def test_workflow_source_patch_multiedge(fake_ops_dir: str, fake_workflows_dir: str): - workflow = Workflow.build( - get_fake_workflow_path("specific_source"), fake_ops_dir, fake_workflows_dir - ) - assert workflow.inputs_spec == {"input": Raster} - assert len(workflow.nodes) == 2 - assert len(workflow.edges) == 0 - old_sources = [s for s in workflow.source_mappings["input"]] - patch_workflow_sources({"input": []}, workflow) - # We support list in the input - assert workflow.inputs_spec == {"input": List[Raster]} - # We add one fan-out node - assert len(workflow.nodes) == 3 - # We add one edge from fan-out node to each input port in the source (2) - assert len(workflow.edges) == 2 - # Each new edge should be from our node to a former source port - edges = workflow.edges_from(workflow.index["input_fanout"]) - destinations = [f"{edge[1].name}.{edge[2][1]}" for edge in edges] - assert sorted(destinations) == sorted(old_sources) - - -def test_workflow_source_patch_fails_nested_fanout(fake_ops_dir: str, fake_workflows_dir: str): - workflow = Workflow.build( - get_fake_workflow_path("fan_out_and_in"), fake_ops_dir, fake_workflows_dir - ) - with pytest.raises(ValueError): - patch_workflow_sources({"input": []}, workflow) - - -@patch("vibe_server.workflow.input_handler.fan_out_workflow_source") -def test_workflow_source_patch_list_source( - patch_mock: MagicMock, fake_ops_dir: str, fake_workflows_dir: str -): - workflow = Workflow.build(get_fake_workflow_path("list_list"), fake_ops_dir, fake_workflows_dir) - patch_workflow_sources({"input": []}, workflow) - # Put something that is not a list - patch_workflow_sources({"input": 0}, workflow) # type: ignore - patch_mock.assert_not_called() - - -def test_workflow_multi_source_patch(fake_ops_dir: str, fake_workflows_dir: str): - wf_dict = { - "name": "test", - "sources": { - "input1": ["t1.input"], - "input2": ["t2.input"], - "input3": ["t3.input"], - }, - "sinks": { - "output1": "t1.output", - "output2": "t2.gather", - "output3": "t3.raster", - }, - "tasks": { - "t1": {"workflow": "item_gather"}, - "t2": {"workflow": "fan_out_and_in"}, - "t3": {"workflow": "specific_source"}, - }, - } - spec = WorkflowParser.parse_dict( - wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir - ) - workflow = Workflow(spec) - unpatched_nodes = len(workflow.nodes) - unpatched_edges = len(workflow.edges) - assert workflow.inputs_spec == {"input1": DataVibe, "input2": DataVibe, "input3": Raster} - with pytest.raises(ValueError): - patch_workflow_sources({"input1": [], "input2": [], "input3": []}, workflow) - # We patched the first and last ones, but reverted the second one - assert workflow.inputs_spec == { - "input1": List[DataVibe], - "input2": DataVibe, - "input3": List[Raster], - } - assert len(workflow.nodes) == unpatched_nodes + 2 - # t3 has a source that maps to two ports so it will create two edges - assert len(workflow.edges) == unpatched_edges + 3 diff --git a/src/vibe_server/tests/test_workflow_parser.py b/src/vibe_server/tests/test_workflow_parser.py deleted file mode 100644 index 4c39149e..00000000 --- a/src/vibe_server/tests/test_workflow_parser.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from dataclasses import asdict - -import pytest -import yaml - -from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path -from vibe_server.workflow.spec_parser import WorkflowParser - - -@pytest.mark.parametrize("missing_field", WorkflowParser.required_fields) -def test_parser_fails_missing_field(missing_field: str, fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("resolve_params") - with open(wf_path) as f: - wf_dict = yaml.safe_load(f) - del wf_dict[missing_field] - with pytest.raises(ValueError): - WorkflowParser.parse_dict(wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) - - -def test_parser_fails_unknown_wf_field(fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("resolve_params") - with open(wf_path) as f: - wf_dict = yaml.safe_load(f) - wf_dict["unknown"] = "🤦‍♂️" - - with pytest.raises(ValueError): - WorkflowParser.parse_dict(wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) - - -def test_parser_fails_unknown_task_field(fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("resolve_params") - with open(wf_path) as f: - wf_dict = yaml.safe_load(f) - - wf_dict["tasks"]["simple"]["unknown"] = "🤦‍♂" - - with pytest.raises(ValueError): - WorkflowParser.parse_dict(wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) - - -def test_parser_fills_optional_fields(fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("list_list") - with open(wf_path) as f: - wf_dict = yaml.safe_load(f) - for field in WorkflowParser.optional_fields: - wf_dict[field] = None - spec = WorkflowParser.parse_dict( - wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir - ) - assert getattr(spec, field) is not None - del wf_dict[field] - - -def test_parser_parameter_override(fake_ops_dir: str, fake_workflows_dir: str): - spec = WorkflowParser.parse( - get_fake_workflow_path("resolve_params"), - ops_dir=fake_ops_dir, - workflows_dir=fake_workflows_dir, - parameters_override={"new": "override"}, - ) - assert spec.parameters["new"] == "override" - - -def test_parser_parameter_override_yaml_dict(fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("resolve_params") - with open(wf_path) as f: - wf_dict = yaml.safe_load(f) - spec = WorkflowParser.parse_dict( - wf_dict, - ops_dir=fake_ops_dir, - workflows_dir=fake_workflows_dir, - parameters_override={"new": "override"}, - ) - assert spec.parameters["new"] == "override" - - -def test_parser_parameter_override_spec_dict(fake_ops_dir: str, fake_workflows_dir: str): - spec = WorkflowParser.parse( - get_fake_workflow_path("resolve_params"), - ops_dir=fake_ops_dir, - workflows_dir=fake_workflows_dir, - ) - spec = WorkflowParser.parse_dict( - asdict(spec), - ops_dir=fake_ops_dir, - workflows_dir=fake_workflows_dir, - parameters_override={"new": "override"}, - ) - assert spec.parameters["new"] == "override" diff --git a/src/vibe_server/tests/test_workflow_runner.py b/src/vibe_server/tests/test_workflow_runner.py deleted file mode 100644 index 066c16e6..00000000 --- a/src/vibe_server/tests/test_workflow_runner.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, List -from uuid import UUID, uuid4 - -import pytest - -from vibe_common.messaging import OpIOType -from vibe_core.data.utils import StacConverter, is_container_type, serialize_stac -from vibe_dev.testing.fake_workflows_fixtures import ( # noqa - fake_ops_dir, - fake_workflows_dir, - get_fake_workflow_path, -) -from vibe_dev.testing.workflow_fixtures import THE_DATAVIBE -from vibe_server.workflow.runner.runner import WorkflowRunner -from vibe_server.workflow.runner.task_io_handler import WorkflowIOHandler -from vibe_server.workflow.workflow import GraphNodeType, Workflow - - -class MockWorkflowRunner(WorkflowRunner): - def __init__(self, fail_list: List[str], *args: Any, **kwargs: Any): - self.fail_list = fail_list - super().__init__(*args, **kwargs) - - async def _run_op_impl( - self, op: GraphNodeType, input: OpIOType, run_id: UUID, _: int - ) -> OpIOType: - for fail in self.fail_list: - if op.name.startswith(fail): - raise RuntimeError(f"Failed op {op} because it was in the fail list") - converter = StacConverter() - return { - k: serialize_stac( - converter.to_stac_item( - [THE_DATAVIBE] if is_container_type(v) else THE_DATAVIBE # type: ignore - ) - ) - for k, v in op.spec.output_spec.items() - } - - -@pytest.mark.anyio -async def test_one_failure_in_sink_fails_workflow( - fake_ops_dir: str, # noqa - fake_workflows_dir: str, # noqa -): - workflow = Workflow.build( - get_fake_workflow_path("custom_indices_structure"), - fake_ops_dir, - fake_workflows_dir, - ) - - data = StacConverter().to_stac_item([THE_DATAVIBE]) - wf_input: OpIOType = {"user_input": serialize_stac(data)} - - runner = MockWorkflowRunner( - fail_list=["ndvi"], - workflow=workflow, - io_mapper=WorkflowIOHandler(workflow), - ) - - with pytest.raises(RuntimeError): - await runner.run(wf_input, uuid4()) diff --git a/src/vibe_server/tests/test_workflow_spec_validator.py b/src/vibe_server/tests/test_workflow_spec_validator.py deleted file mode 100644 index 159df489..00000000 --- a/src/vibe_server/tests/test_workflow_spec_validator.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import pytest - -from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path -from vibe_server.workflow.spec_parser import WorkflowParser -from vibe_server.workflow.spec_validator import WorkflowSpecValidator - - -def test_validator_fails_unused_parameter(fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("resolve_params") - wf_dict = WorkflowParser._load_workflow(wf_path) - spec = WorkflowParser.parse_dict( - wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir - ) - WorkflowSpecValidator.validate(spec) - # Add unused param - wf_dict["parameters"]["unused"] = None - spec = WorkflowParser.parse_dict( - wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir - ) - with pytest.raises(ValueError): - WorkflowSpecValidator._validate_parameter_references(spec) - - -def test_validator_fails_bad_ref(fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("resolve_params") - wf_dict = WorkflowParser._load_workflow(wf_path) - # Add invalid ref - wf_dict["tasks"]["nested"]["parameters"]["overwrite"] = "@from(unexistent)" - spec = WorkflowParser.parse_dict(wf_dict, fake_ops_dir, workflows_dir=fake_workflows_dir) - with pytest.raises(ValueError): - WorkflowSpecValidator._validate_parameter_references(spec) - - -def test_validator_fails_multiple_defaults(fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("resolve_nested_params_multiple_default") - spec = WorkflowParser.parse(wf_path, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) - with pytest.raises(ValueError): - WorkflowSpecValidator.validate(spec) - - -def test_validator_fails_source_and_destination(fake_ops_dir: str, fake_workflows_dir: str): - wf_path = get_fake_workflow_path("source_and_destination") - spec = WorkflowParser.parse(wf_path, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) - with pytest.raises(ValueError): - WorkflowSpecValidator.validate(spec) diff --git a/src/vibe_server/tests/test_workflow_state.py b/src/vibe_server/tests/test_workflow_state.py deleted file mode 100644 index 393067d4..00000000 --- a/src/vibe_server/tests/test_workflow_state.py +++ /dev/null @@ -1,420 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from collections import Counter -from dataclasses import asdict -from datetime import datetime -from typing import Any, Dict, List, Tuple -from unittest.mock import Mock, patch - -import pytest - -from vibe_core.datamodel import RunDetails, RunStatus -from vibe_server.orchestrator import WorkflowStateUpdate -from vibe_server.workflow.runner import WorkflowChange - -MOCK_SUBMISSION_TIME = datetime(2020, 1, 2, 3, 4, 5, 6) - - -async def setup_updater(run_config: Dict[str, Any], tasks: List[str]): - with patch.object(WorkflowStateUpdate, "_init_cache", autospec=True) as mock_method: - deets = asdict(RunDetails()) - deets["submission_time"] = MOCK_SUBMISSION_TIME - - def mock_fun(self): # type:ignore - self.wf_cache["details"] = deets - self._cache_init = True - - mock_method.side_effect = mock_fun - updater = WorkflowStateUpdate(run_config["id"]) - await updater(WorkflowChange.WORKFLOW_STARTED, tasks=tasks) - return updater - - -@patch("vibe_common.statestore.StateStore.transaction") -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_workflow_started( - store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] -): - retrieve.return_value = run_config - tasks = ["task1", "task2"] - updater = await setup_updater(run_config, tasks) - transaction_ops = transaction.mock_calls[0][1][0] - # We update all tasks + workflow - assert len(transaction_ops) == len(tasks) + 1 - assert transaction_ops[-1]["key"] == str(updater.run_id) - wf_cache = updater._get_cache(None, None)[0] - assert wf_cache["status"] == RunStatus.pending - assert wf_cache["submission_time"] == MOCK_SUBMISSION_TIME - assert wf_cache["start_time"] is not None - for t_op, task in zip(transaction_ops, tasks): - assert task in updater.task_cache - cache = updater._get_cache(task, None)[0] - assert t_op["key"] == f"{updater.run_id}-{task}" - assert cache["status"] == RunStatus.pending - assert cache["subtasks"] is None - assert cache["submission_time"] is None - - -@patch("vibe_common.statestore.StateStore.transaction") -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_workflow_finished( - store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] -): - retrieve.return_value = run_config - tasks = ["task1", "task2"] - updater = await setup_updater(run_config, tasks) - await updater(WorkflowChange.WORKFLOW_FINISHED) - transaction_ops = transaction.mock_calls[1][1][0] - # We only update the workflow - assert len(transaction_ops) == 1 - assert transaction_ops[0]["key"] == str(updater.run_id) - - -@patch("vibe_common.statestore.StateStore.transaction") -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_task_started( - store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] -): - retrieve.return_value = run_config - tasks = ["task1", "task2"] - updater = await setup_updater(run_config, tasks) - task_start = "task1" - num_subtasks = 4 - await updater(WorkflowChange.TASK_STARTED, task=task_start, num_subtasks=num_subtasks) - transaction_ops = transaction.mock_calls[1][1][0] - # We update the task, not the workflow (still pending) - assert len(transaction_ops) == 1 - assert transaction_ops[0]["key"] == f"{updater.run_id}-{task_start}" - cache = updater._get_cache(task_start, None)[0] - assert cache["status"] == RunStatus.pending - assert len(cache["subtasks"]) == num_subtasks - assert all([s["status"] == RunStatus.pending for s in cache["subtasks"]]) - - -@patch("vibe_common.statestore.StateStore.transaction") -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_propagate_up( - store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] -): - retrieve.return_value = run_config - tasks = ["task1", "task2"] - updater = await setup_updater(run_config, tasks) - assert updater._get_cache(None, None)[0]["submission_time"] == MOCK_SUBMISSION_TIME - assert updater._get_cache(None, None)[0]["start_time"] is not None - task_start = "task1" - num_subtasks = 4 - await updater(WorkflowChange.TASK_STARTED, task=task_start, num_subtasks=num_subtasks) - transaction.reset_mock() - await updater(WorkflowChange.SUBTASK_QUEUED, task=task_start, subtask_idx=0) - transaction_ops = transaction.mock_calls[0][1][0] - # We update the task and workflow to queued - assert len(transaction_ops) == 2 - assert transaction_ops[0]["key"] == f"{updater.run_id}-{task_start}" - assert transaction_ops[1]["key"] == f"{updater.run_id}" - - assert updater._get_cache(None, None)[0]["status"] == RunStatus.queued - assert updater._get_cache(task_start, None)[0]["status"] == RunStatus.queued - assert updater._get_cache(task_start, 0)[0]["status"] == RunStatus.queued - # Check that submission time was properly updated - subtask_submission = updater._get_cache(task_start, 0)[0]["submission_time"] - assert subtask_submission is not None - assert updater._get_cache(task_start, None)[0]["submission_time"] == subtask_submission - - # A different subtask should still be pending - assert updater._get_cache(task_start, 1)[0]["status"] == RunStatus.pending - - # Let's queue another subtask from the same task - await updater(WorkflowChange.SUBTASK_QUEUED, task=task_start, subtask_idx=1) - transaction_ops = transaction.mock_calls[-1][1][0] - # We only update the task since the workflow is already queued - assert len(transaction_ops) == 1 - assert transaction_ops[0]["key"] == f"{updater.run_id}-{task_start}" - assert updater._get_cache(task_start, 1)[0]["status"] == RunStatus.queued - - # Let's start the other task and queue a subtask - other_task = "task2" - await updater(WorkflowChange.TASK_STARTED, task=other_task, num_subtasks=1) - transaction_ops = transaction.mock_calls[-1][1][0] - assert len(transaction_ops) == 1 - assert transaction_ops[0]["key"] == f"{updater.run_id}-{other_task}" - await updater(WorkflowChange.SUBTASK_QUEUED, task=other_task, subtask_idx=0) - transaction_ops = transaction.mock_calls[-1][1][0] - assert len(transaction_ops) == 1 - assert transaction_ops[0]["key"] == f"{updater.run_id}-{other_task}" - - # Let's start a subtask on the first task - await updater(WorkflowChange.SUBTASK_RUNNING, task=task_start, subtask_idx=0) - transaction_ops = transaction.mock_calls[-1][1][0] - # We should update the task and the workflow to running here - assert len(transaction_ops) == 2 - assert transaction_ops[0]["key"] == f"{updater.run_id}-{task_start}" - assert transaction_ops[1]["key"] == f"{updater.run_id}" - assert updater._get_cache(task_start, 0)[0]["status"] == RunStatus.running - assert updater._get_cache(task_start, None)[0]["status"] == RunStatus.running - assert updater._get_cache(None, None)[0]["status"] == RunStatus.running - # The start times should match - subtask_start = updater._get_cache(task_start, 0)[0]["start_time"] - assert updater._get_cache(task_start, None)[0]["start_time"] == subtask_start - - # Let's finish the first subtask - await updater(WorkflowChange.SUBTASK_FINISHED, task=task_start, subtask_idx=0) - transaction_ops = transaction.mock_calls[-1][1][0] - # We should update the task and the workflow back to queued - assert len(transaction_ops) == 2 - assert transaction_ops[0]["key"] == f"{updater.run_id}-{task_start}" - assert transaction_ops[1]["key"] == f"{updater.run_id}" - assert updater._get_cache(task_start, 0)[0]["status"] == RunStatus.done - assert updater._get_cache(task_start, None)[0]["status"] == RunStatus.queued - assert updater._get_cache(None, None)[0]["status"] == RunStatus.queued - # The task should have an end time, but the task and workflow should not be updated - assert updater._get_cache(task_start, 0)[0]["end_time"] is not None - assert updater._get_cache(task_start, None)[0]["end_time"] is None - assert updater._get_cache(None, None)[0]["end_time"] is None - - # If we start the subtask for the other task, the workflow should be running - await updater(WorkflowChange.SUBTASK_RUNNING, task=other_task, subtask_idx=0) - transaction_ops = transaction.mock_calls[-1][1][0] - assert len(transaction_ops) == 2 - assert transaction_ops[0]["key"] == f"{updater.run_id}-{other_task}" - assert transaction_ops[1]["key"] == f"{updater.run_id}" - assert updater._get_cache(other_task, 0)[0]["status"] == RunStatus.running - assert updater._get_cache(other_task, None)[0]["status"] == RunStatus.running - assert updater._get_cache(None, None)[0]["status"] == RunStatus.running - - # Completing the only subtask should set the task to finished and workflow back to queued - await updater(WorkflowChange.SUBTASK_FINISHED, task=other_task, subtask_idx=0) - transaction_ops = transaction.mock_calls[-1][1][0] - assert len(transaction_ops) == 2 - assert transaction_ops[0]["key"] == f"{updater.run_id}-{other_task}" - assert transaction_ops[1]["key"] == f"{updater.run_id}" - assert updater._get_cache(other_task, 0)[0]["status"] == RunStatus.done - assert updater._get_cache(other_task, None)[0]["status"] == RunStatus.done - assert updater._get_cache(None, None)[0]["status"] == RunStatus.queued - # The task should have an end time, but the workflow should not be updated - subtask_end = updater._get_cache(other_task, 0)[0]["end_time"] - assert subtask_end is not None - assert updater._get_cache(other_task, None)[0]["end_time"] == subtask_end - assert updater._get_cache(None, None)[0]["end_time"] is None - - # Complete all subtasks for the first task - for subtask_idx in range(num_subtasks): - await updater(WorkflowChange.SUBTASK_FINISHED, task=task_start, subtask_idx=subtask_idx) - assert updater._get_cache(task_start, subtask_idx)[0]["status"] == RunStatus.done - # The task should be finished and the workflow should NOT - assert updater._get_cache(task_start, None)[0]["status"] == RunStatus.done - assert updater._get_cache(None, None)[0]["status"] != RunStatus.done - # Check end time for the task - subtask_end = updater._get_cache(task_start, 3)[0]["end_time"] - assert subtask_end is not None - assert updater._get_cache(task_start, None)[0]["end_time"] == subtask_end - assert updater._get_cache(None, None)[0]["end_time"] is None - - -@patch("vibe_common.statestore.StateStore.transaction") -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_workflow_cancel( - store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] -): - retrieve.return_value = run_config - tasks = ["task1", "task2"] - updater = await setup_updater(run_config, tasks) - task_start = tasks[0] - num_subtasks = 4 - finished_subtask = 2 - await updater(WorkflowChange.TASK_STARTED, task=task_start, num_subtasks=num_subtasks) - await updater(WorkflowChange.SUBTASK_FINISHED, task=task_start, subtask_idx=finished_subtask) - transaction.reset_mock() - await updater(WorkflowChange.WORKFLOW_CANCELLED) - transaction_ops = transaction.mock_calls[0][1][0] - # We update the workflow and all tasks - assert len(transaction_ops) == 3 - for t_op, task in zip(transaction_ops, tasks): - assert t_op["key"] == f"{updater.run_id}-{task}" - assert transaction_ops[-1]["key"] == str(updater.run_id) - assert updater._get_cache(None, None)[0]["status"] == RunStatus.cancelled - assert updater._get_cache(None, None)[0]["reason"] == updater.user_request_reason - for task, task_cache in updater.task_cache.items(): - assert task in tasks - assert task_cache["status"] == RunStatus.cancelled - assert task_cache["reason"] == updater.user_request_reason - subtasks = updater._get_cache(task_start, None)[0]["subtasks"] - # We should have cancelled all subtasks except the one that finished - for i, subtask in enumerate(subtasks): - if i == finished_subtask: - assert subtask["status"] == RunStatus.done - else: - assert subtask["status"] == RunStatus.cancelled - - -@patch("vibe_common.statestore.StateStore.transaction") -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_no_update_if_done( - store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] -): - retrieve.return_value = run_config - tasks = ["task1", "task2"] - updater = await setup_updater(run_config, tasks) - task_start = tasks[0] - num_subtasks = 4 - canceled_subtask = 0 - finished_subtask = 2 - await updater(WorkflowChange.TASK_STARTED, task=task_start, num_subtasks=num_subtasks) - await updater(WorkflowChange.SUBTASK_FINISHED, task=task_start, subtask_idx=finished_subtask) - await updater(WorkflowChange.WORKFLOW_CANCELLED) - transaction.reset_mock() - - # We should not update anything if we try to update a finished task - # Either if it's marked as `done` - await updater(WorkflowChange.SUBTASK_RUNNING, task=task_start, subtask_idx=finished_subtask) - transaction.assert_not_called() - # Or if it's marked as `cancelled` - await updater(WorkflowChange.SUBTASK_RUNNING, task=task_start, subtask_idx=canceled_subtask) - transaction.assert_not_called() - - -@patch("vibe_common.statestore.StateStore.transaction") -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_workflow_fail( - store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] -): - retrieve.return_value = run_config - tasks = ["task1", "task2"] - updater = await setup_updater(run_config, tasks) - task_start = tasks[0] - num_subtasks = 1 - finished_subtask = 0 - await updater(WorkflowChange.WORKFLOW_STARTED, tasks=tasks) - await updater(WorkflowChange.TASK_STARTED, task=task_start, num_subtasks=num_subtasks) - await updater(WorkflowChange.SUBTASK_FINISHED, task=task_start, subtask_idx=finished_subtask) - transaction.reset_mock() - failure_reason = "Something went wrong 💀" - await updater(WorkflowChange.WORKFLOW_FAILED, reason=failure_reason) - transaction_ops = transaction.mock_calls[0][1][0] - # We update the workflow and one task - assert len(transaction_ops) == 2 - assert transaction_ops[0]["key"] == f"{updater.run_id}-{tasks[1]}" - assert transaction_ops[-1]["key"] == str(updater.run_id) - assert updater._get_cache(None, None)[0]["status"] == RunStatus.failed - # We should have the reason of failure here - assert updater._get_cache(None, None)[0]["reason"] == failure_reason - # The first task should be done - assert updater._get_cache(task_start, None)[0]["status"] == RunStatus.done - assert updater._get_cache(task_start, 0)[0]["status"] == RunStatus.done - # The second task should be cancelled - assert updater._get_cache(tasks[1], None)[0]["status"] == RunStatus.cancelled - # We should have the cancellation reason for workflow failure here - assert updater._get_cache(tasks[1], None)[0]["reason"] == updater.workflow_failure_reason - - -@patch("vibe_common.statestore.StateStore.transaction") -@patch("vibe_common.statestore.StateStore.retrieve") -@patch("vibe_common.statestore.StateStore.store") -@pytest.mark.anyio -async def test_subtask_fail( - store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] -): - retrieve.return_value = run_config - tasks = ["task1", "task2", "task3", "task4"] - updater = await setup_updater(run_config, tasks) - # Task with several subtasks - await updater(WorkflowChange.TASK_STARTED, task=tasks[0], num_subtasks=3) - # Task with a single subtask - await updater(WorkflowChange.TASK_STARTED, task=tasks[1], num_subtasks=1) - # Task with no subtasks - # Task with single subtask that's done - await updater(WorkflowChange.TASK_STARTED, task=tasks[3], num_subtasks=1) - await updater(WorkflowChange.SUBTASK_FINISHED, task=tasks[3], subtask_idx=0) - # First task has a subtask that is done, and one that is not - await updater(WorkflowChange.SUBTASK_FINISHED, task=tasks[0], subtask_idx=0) - await updater(WorkflowChange.SUBTASK_QUEUED, task=tasks[0], subtask_idx=1) - transaction.reset_mock() - # The last subtask fails - failure_reason = "Something went wrong 💀" - await updater( - WorkflowChange.SUBTASK_FAILED, task=tasks[0], subtask_idx=2, reason=failure_reason - ) - transaction_ops = transaction.mock_calls[0][1][0] - expected_cancel_reason = f"Cancelled because task '{tasks[0]}' (subtask 2) failed" - # We update the workflow and three tasks - assert len(transaction_ops) == 4 - for t_op, task in zip(transaction_ops, tasks[:-1]): - assert t_op["key"] == f"{updater.run_id}-{task}" - assert transaction_ops[-1]["key"] == str(updater.run_id) - # Workflow is marked as failed - assert updater._get_cache(None, None)[0]["status"] == RunStatus.failed - # We should have the reason of failure here - assert updater._get_cache(None, None)[0]["reason"] == failure_reason - # The first task should be failed - assert updater._get_cache(tasks[0], None)[0]["status"] == RunStatus.failed - assert updater._get_cache(tasks[0], None)[0]["reason"] == failure_reason - # Last subtask should be failed - assert updater._get_cache(tasks[0], 2)[0]["status"] == RunStatus.failed - assert updater._get_cache(tasks[0], 2)[0]["reason"] == failure_reason - # The first subtask should be done still - assert updater._get_cache(tasks[0], 0)[0]["status"] == RunStatus.done - # The second subtask should be cancelled - assert updater._get_cache(tasks[0], 1)[0]["status"] == RunStatus.cancelled - assert updater._get_cache(tasks[0], 1)[0]["reason"] == expected_cancel_reason - # Other unfinished tasks should be cancelled - for task in tasks[1:-1]: - assert updater._get_cache(task, None)[0]["status"] == RunStatus.cancelled - # Last task should be done - assert updater._get_cache(tasks[-1], None)[0]["status"] == RunStatus.done - - -@patch.object(WorkflowStateUpdate, "commit_cache_for") -@pytest.mark.anyio -async def test_workflow_state_update_subtasks(commit: Mock, run_config: Dict[str, Any]): - op_name = "fake-op" - updater = await setup_updater(run_config, [op_name]) - await updater(WorkflowChange.TASK_STARTED, task=op_name, num_subtasks=3) - subtasks = updater.task_cache[op_name]["subtasks"] - assert len(subtasks) == 3 - assert all(r["status"] == RunStatus.pending for r in subtasks) - RunDetails(**subtasks[0]) - - def compare(values: Tuple[int, int, int, int]): - counts = Counter([r["status"] for r in subtasks]) - return all( - counts[k] == v - for k, v in zip( - (RunStatus.pending, RunStatus.queued, RunStatus.running, RunStatus.done), values - ) - ) - - await updater(WorkflowChange.SUBTASK_QUEUED, task=op_name, subtask_idx=0) - assert subtasks[0]["status"] == RunStatus.queued - RunDetails(**subtasks[0]) - compare((2, 1, 0, 0)) - await updater(WorkflowChange.SUBTASK_QUEUED, task=op_name, subtask_idx=2) - assert subtasks[2]["status"] == RunStatus.queued - compare((1, 2, 0, 0)) - await updater(WorkflowChange.SUBTASK_RUNNING, task=op_name, subtask_idx=0) - assert subtasks[0]["status"] == RunStatus.running - RunDetails(**subtasks[0]) - compare((1, 1, 1, 0)) - await updater(WorkflowChange.SUBTASK_RUNNING, task=op_name, subtask_idx=2) - assert subtasks[2]["status"] == RunStatus.running - compare((1, 0, 2, 0)) - await updater(WorkflowChange.SUBTASK_FINISHED, task=op_name, subtask_idx=2) - assert subtasks[2]["status"] == RunStatus.done - RunDetails(**subtasks[2]) - compare((1, 0, 1, 1)) - await updater(WorkflowChange.SUBTASK_QUEUED, task=op_name, subtask_idx=1) - assert subtasks[1]["status"] == RunStatus.queued - compare((0, 1, 1, 1)) diff --git a/src/vibe_server/vibe_server/__init__.py b/src/vibe_server/vibe_server/__init__.py deleted file mode 100644 index b7c52582..00000000 --- a/src/vibe_server/vibe_server/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - diff --git a/src/vibe_server/vibe_server/href_handler.py b/src/vibe_server/vibe_server/href_handler.py deleted file mode 100644 index bdc99c88..00000000 --- a/src/vibe_server/vibe_server/href_handler.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from abc import ABC, abstractmethod -from pathlib import Path -from typing import List, Optional, Union - -from azure.core.credentials import TokenCredential -from pystac import Asset, Item - -from vibe_common.messaging import OpIOType -from vibe_common.tokens import BlobTokenManagerConnectionString, BlobTokenManagerCredentialed -from vibe_core.data.utils import deserialize_stac, serialize_stac -from vibe_core.datamodel import RunConfigUser -from vibe_core.utils import ensure_list - - -class HrefHandler(ABC): - @abstractmethod - def _update_asset(self, asset: Asset): - raise NotImplementedError - - def _parse_item(self, item: Item): - assets = item.get_assets() - for asset in assets: - self._update_asset(assets[asset]) - return item - - def _parse_items(self, obj: Union[Item, List[Item]]) -> Union[Item, List[Item]]: - if isinstance(obj, Item): - return self._parse_item(obj) - else: - return [self._parse_item(item) for item in obj] - - def _run(self, out: OpIOType) -> OpIOType: - result = {} - for key in out: - items = deserialize_stac(out[key]) - items = ensure_list(items) - for item in items: - item.clear_links() - result[key] = serialize_stac(self._parse_items(items)) - return result - - def handle(self, original_response: RunConfigUser) -> RunConfigUser: - original_response.output = self._run(original_response.output) - return original_response - - -class LocalHrefHandler(HrefHandler): - def __init__(self, assets_dir: Union["str", Path]): - super().__init__() - self.assets_dir = assets_dir if isinstance(assets_dir, Path) else Path(assets_dir) - - def _update_asset(self, asset: Asset): - asset_href_path = Path(asset.href).resolve() - parent_name = asset_href_path.parent.name - asset_name = asset_href_path.name - - asset.href = str(self.assets_dir / Path(parent_name) / asset_name) - - -class BlobHrefHandler(HrefHandler): - def __init__( - self, credential: Optional[TokenCredential] = None, connection_string: Optional[str] = None - ): - super().__init__() - if connection_string is not None: - self.manager = BlobTokenManagerConnectionString(connection_string=connection_string) - else: - self.manager = BlobTokenManagerCredentialed(credential=credential) - - def _update_asset(self, asset: Asset): - asset.href = self.manager.sign_url(asset.href) diff --git a/src/vibe_server/vibe_server/orchestrator.py b/src/vibe_server/vibe_server/orchestrator.py deleted file mode 100755 index 8480a0b7..00000000 --- a/src/vibe_server/vibe_server/orchestrator.py +++ /dev/null @@ -1,846 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import asyncio.queues -import logging -from argparse import ArgumentParser -from copy import copy -from dataclasses import asdict -from datetime import datetime -from functools import partial -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, cast -from uuid import UUID - -import debugpy -from cloudevents.sdk.event import v1 -from dapr.conf import settings -from opentelemetry import trace - -from vibe_common.constants import ( - CACHE_PUBSUB_TOPIC, - CONTROL_STATUS_PUBSUB, - DEFAULT_OPS_DIR, - RUNS_KEY, - STATUS_PUBSUB_TOPIC, - WORKFLOW_REQUEST_PUBSUB_TOPIC, -) -from vibe_common.dapr import dapr_ready -from vibe_common.dropdapr import App, TopicEventResponse -from vibe_common.messaging import ( - OpIOType, - WorkflowCancellationMessage, - WorkflowDeletionMessage, - WorkflowExecutionMessage, - WorkMessage, - WorkMessageBuilder, - accept_or_fail_event_async, - extract_message_header_from_event, - run_id_from_traceparent, -) -from vibe_common.statestore import StateStore, TransactionOperation -from vibe_common.telemetry import add_trace, setup_telemetry, update_telemetry_context -from vibe_core.datamodel import RunConfig, RunDetails, RunStatus -from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging - -from .workflow import workflow_from_input -from .workflow.input_handler import build_args_for_workflow, patch_workflow_sources -from .workflow.runner.remote_runner import MessageRouter, RemoteWorkflowRunner -from .workflow.runner.runner import WorkflowCallback, WorkflowChange, WorkflowRunner -from .workflow.runner.task_io_handler import WorkflowIOHandler -from .workflow.spec_parser import WorkflowParser -from .workflow.workflow import Workflow, get_workflow_dir - -Updates = Tuple[bool, List[str]] - - -class WorkflowStateUpdate(WorkflowCallback): - """Keeps track of the state of a workflow and its tasks. - The state is stored in the statestore and updated based on the events received - from the workflow runner. The workflow and task states are updated in the statestore with - different keys in order to avoid upserting a large amount of data with every update. - - In general, the state of a task is defined based on the status of its subtasks. - A task is marked as a status when at least one of its subtasks is marked as that status - in the following other of priority: - 1. failed. - 2. running. - 3. queued. - 4. pending. - 5. done. - Whenever an update to a subtask happens, we propagate it up and update statuses as necessary. - The analogous is defined for workflow w.r.t tasks. - - Cancellation and failure events are also propagated down. - This means that when a workflow is cancelled, all tasks are updated and cancelled as well - (unless already done). The analogous happens for tasks and subtasks. - For failures, we propagate the cancelled state down and the failed state up. - """ - - user_request_reason = "Cancellation requested by user" - workflow_failure_reason = "Cancelled due to failure during workflow execution" - - def __init__(self, workflowRunId: UUID): - self.run_id = workflowRunId - self.wf_cache: Dict[str, Any] = {} - self.task_cache: Dict[str, Any] = {} - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.statestore = StateStore() - self.update_lock = asyncio.Lock() - # Cache "empty" RunDetails because creating it triggers the big bad bug - self.pending_run = asdict(RunDetails()) - self.wf_change_to_update = { - WorkflowChange.WORKFLOW_STARTED: self.create_workflow, - WorkflowChange.WORKFLOW_FINISHED: self.complete_workflow, - WorkflowChange.WORKFLOW_CANCELLED: self.cancel_workflow, - WorkflowChange.WORKFLOW_FAILED: self.fail_workflow, - WorkflowChange.TASK_STARTED: self.create_subtasks, - WorkflowChange.SUBTASK_QUEUED: self.queue_subtask, - WorkflowChange.SUBTASK_RUNNING: self.execute_subtask, - WorkflowChange.SUBTASK_FINISHED: self.complete_subtask, - WorkflowChange.SUBTASK_FAILED: self.fail_subtask, - WorkflowChange.SUBTASK_PENDING: self.pend_subtask, - } - self._cache_init = False - - async def _init_cache(self): - # TODO: We could also load task cache here in case we want to resume a workflow - cache = await self.statestore.retrieve(str(self.run_id)) - self.wf_cache["details"] = cache["details"] - self._cache_init = True - - def create_workflow(self, tasks: List[str]) -> Updates: - # Workflow start time is set when we start running the graph - self.wf_cache["details"]["start_time"] = datetime.now() - self.wf_cache["tasks"] = tasks - for t in tasks: - self.task_cache[t] = copy(self.pending_run) - return True, tasks - - def complete_workflow(self) -> Updates: - return self._update_finish_change(None, None, cancelled=False, reason=""), [] - - def cancel_workflow(self) -> Updates: - fun = partial(self._update_finish_change, cancelled=True, reason=self.user_request_reason) - return self._propagate_down(fun) - - def fail_workflow(self, reason: str) -> Updates: - wf_updated = self._update_failure_change(None, None, reason=reason) - if not wf_updated: - # We won't cancel the workflow because it is already finished - return False, [] - fun = partial( - self._update_finish_change, - cancelled=True, - reason=self.workflow_failure_reason, - ) - _, updated_tasks = self._propagate_down(fun) - return wf_updated, updated_tasks - - def create_subtasks(self, task: str, num_subtasks: int) -> Updates: - cache, name = self._get_cache(task, None) - cache["subtasks"] = [copy(self.pending_run) for _ in range(num_subtasks)] - self.logger.info(f"Created {num_subtasks} subtasks for {name}. (run id: {self.run_id})") - return False, [task] - - def queue_subtask(self, task: str, subtask_idx: int) -> Updates: - return self._propagate_up(self._update_queued_change, task, subtask_idx) - - def execute_subtask(self, task: str, subtask_idx: int) -> Updates: - return self._propagate_up(self._update_start_change, task, subtask_idx) - - def complete_subtask(self, task: str, subtask_idx: int) -> Updates: - fun = partial(self._update_finish_change, cancelled=False, reason="") - return self._propagate_up(fun, task, subtask_idx) - - def fail_subtask(self, task: str, subtask_idx: int, reason: str) -> Updates: - fail_fun = partial(self._update_failure_change, reason=reason) - subtask_updated = fail_fun(task, subtask_idx, reason=reason) - task_updated = fail_fun(task, None, reason=reason) - wf_updated_up = fail_fun(None, None, reason=reason) - updated_tasks_up = [task] if (task_updated or subtask_updated) else [] - cancel_fun = partial( - self._update_finish_change, - cancelled=True, - reason=f"Cancelled because task '{task}' (subtask {subtask_idx}) failed", - ) - wf_updated_down, updated_tasks_down = self._propagate_down(cancel_fun) - wf_updated = wf_updated_up or wf_updated_down - updated_tasks = updated_tasks_up + [ - i for i in updated_tasks_down if i not in updated_tasks_up - ] - return wf_updated, updated_tasks - - def pend_subtask(self, task: str, subtask_idx: int) -> Updates: - return self._propagate_up(self._update_pending_change, task, subtask_idx) - - def _combine_children_status(self, children_status: Set[RunStatus]) -> RunStatus: - for status in (RunStatus.running, RunStatus.queued, RunStatus.pending): - if status in children_status: - new_status = status - break - else: - if children_status != {RunStatus.done}: - raise ValueError(f"Unknown status combination: {children_status}") - new_status = RunStatus.done - return new_status - - def _combine_children_time( - self, children_start: List[Optional[datetime]], children_end: List[Optional[datetime]] - ) -> Tuple[Optional[datetime], Optional[datetime]]: - children_start = [i for i in children_start if i is not None] - if not children_start: - start_time = None - else: - start_time = min(cast(List[datetime], children_start)) - if any(i is None for i in children_end): - end_time = None - else: - end_time = max(cast(List[datetime], children_end)) - return start_time, end_time - - def _update_task_status(self, task: str) -> bool: - cache, _ = self._get_cache(task, None) - if cache["subtasks"] is None: - raise RuntimeError(f"Tried to update status of task {task} before creating subtasks") - subtask_status = {i["status"] for i in cache["subtasks"]} - new_status = self._combine_children_status(subtask_status) - if new_status != cache["status"]: - cache["status"] = new_status - cache["submission_time"], _ = self._combine_children_time( - [i["submission_time"] for i in cache["subtasks"]], - [None], - ) - cache["start_time"], cache["end_time"] = self._combine_children_time( - [i["start_time"] for i in cache["subtasks"]], - [i["end_time"] for i in cache["subtasks"]], - ) - return True - return False - - def _update_workflow_status(self) -> bool: - cache, _ = self._get_cache(None, None) - task_status = {i["status"] for i in self.task_cache.values()} - new_status = self._combine_children_status(task_status) - if new_status == RunStatus.done: - # We don't set it to done here because we still need to store the output - # We only set to done when complete_workflow is called by the orchestrator - new_status = RunStatus.running - if new_status != cache["status"]: - cache["status"] = new_status - return True - return False - - def _propagate_up( - self, fun: Callable[[Optional[str], Optional[int]], bool], task: str, subtask_idx: int - ) -> Updates: - subtask_updated = fun(task, subtask_idx) - if not subtask_updated: - return False, [] - task_updated = self._update_task_status(task) - if not task_updated: - return False, [task] - return self._update_workflow_status(), [task] - - def _propagate_down(self, fun: Callable[[Optional[str], Optional[int]], bool]) -> Updates: - wf_updated = fun(None, None) - updated_tasks = [] - for task, task_cache in self.task_cache.items(): - task_updated = fun(task, None) - if task_cache["subtasks"] is not None: - subtask_updated = any([fun(task, i) for i in range(len(task_cache["subtasks"]))]) - else: - subtask_updated = False - if task_updated or subtask_updated: - updated_tasks.append(task) - return wf_updated, updated_tasks - - def _get_cache( - self, task: Optional[str], subtask_idx: Optional[int] - ) -> Tuple[Dict[str, Any], str]: - if task is None: - return self.wf_cache["details"], "workflow" - if subtask_idx is None: - return self.task_cache[task], f"task {task}" - subtasks_cache = self.task_cache[task]["subtasks"] - if subtasks_cache is None: - raise ValueError( - f"Tried to update subtask {subtask_idx} for {task} before creating subtasks" - ) - return ( - self.task_cache[task]["subtasks"][subtask_idx], - f"task {task} (subtask {subtask_idx})", - ) - - def _update_pending_change(self, task: Optional[str], subtask_idx: Optional[int]) -> bool: - cache, name = self._get_cache(task, subtask_idx) - if RunStatus.finished(cache["status"]): - return False - cache["status"] = RunStatus.pending - self.logger.info(f"Changed {name} status to {RunStatus.pending}. (run id: {self.run_id})") - return True - - def _update_queued_change(self, task: Optional[str], subtask_idx: Optional[int]) -> bool: - cache, name = self._get_cache(task, subtask_idx) - if RunStatus.finished(cache["status"]): - return False - if cache["submission_time"] is None: - cache["submission_time"] = datetime.now() - cache["status"] = RunStatus.queued - self.logger.info(f"Changed {name} status to {RunStatus.queued}. (run id: {self.run_id})") - return True - - def _update_start_change(self, task: Optional[str], subtask_idx: Optional[int]) -> bool: - cache, name = self._get_cache(task, subtask_idx) - if RunStatus.finished(cache["status"]) or cache["status"] == RunStatus.running: - return False - if cache["start_time"] is None: - cache["start_time"] = datetime.now() - cache["status"] = RunStatus.running - self.logger.info(f"Changed {name} status to {RunStatus.running}. (run id: {self.run_id})") - return True - - def _update_finish_change( - self, task: Optional[str], subtask_idx: Optional[int], cancelled: bool, reason: str - ) -> bool: - cache, name = self._get_cache(task, subtask_idx) - if RunStatus.finished(cache["status"]): - return False - status = RunStatus.cancelled if cancelled else RunStatus.done - for missing in ("submission_time", "start_time"): - if cache[missing] is None: - cache[missing] = datetime.now() - if not cancelled: - self.logger.warning( - f"Marking {name} as finished, " - f"but it didn't have a {missing} set. (run id: {self.run_id})" - ) - cache["end_time"] = datetime.now() - cache["status"] = status - if cancelled: - cache["reason"] = reason - self.logger.info(f"Changed {name} status to {status}. (run id: {self.run_id})") - return True - - def _update_failure_change( - self, task: Optional[str], subtask_idx: Optional[int], reason: str - ) -> bool: - cache, name = self._get_cache(task, subtask_idx) - if RunStatus.finished(cache["status"]): - return False - if cache["start_time"] is None: - self.logger.error( - f"Marking {name} as failed, " - f"but it didn't have a start time set. (run id: {self.run_id})" - ) - cache["start_time"] = datetime.now() - cache["end_time"] = datetime.now() - cache["status"] = RunStatus.failed - cache["reason"] = reason - self.logger.info(f"Changed {name} status to {RunStatus.failed}. (run id: {self.run_id})") - return True - - def update_cache_for(self, change: WorkflowChange, **kwargs: Any) -> Updates: - update_fun = self.wf_change_to_update[change] - return update_fun(**kwargs) - - async def commit_cache_for(self, update_workflow: bool, tasks: List[str]) -> None: - # We are not deserializing run data into a RunConfig object because this breaks *something* - # We do not deserialize the cache into RunDetails for the same reason - operations = [ - TransactionOperation( - key=f"{self.run_id}-{t}", operation="upsert", value=self.task_cache[t] - ) - for t in tasks - ] - if update_workflow: - wf_data = await self.statestore.retrieve(str(self.run_id)) - wf_data["tasks"] = self.wf_cache["tasks"] - wf_data["details"] = self.wf_cache["details"] - operations.append( - TransactionOperation(key=str(self.run_id), operation="upsert", value=wf_data) - ) - - await self.statestore.transaction(operations) - - async def __call__(self, change: WorkflowChange, **kwargs: Any) -> None: - async with self.update_lock: - # Since we parallelize op execution, there might be a race condition - # on updating the overall status of a given workflow run. Locking - # here serializes status updates and guarantees we won't overwrite - # previously-written updates - if not self._cache_init: - await self._init_cache() - update_workflow, tasks_to_update = self.update_cache_for(change, **kwargs) - if update_workflow or tasks_to_update: - await self.commit_cache_for(update_workflow, tasks_to_update) - - -class WorkflowRunManager: - inqueues: Dict[str, "asyncio.queues.Queue[WorkMessage]"] - runner: Optional[WorkflowRunner] - - def __init__( - self, - inqueues: Dict[str, "asyncio.queues.Queue[WorkMessage]"], - message: WorkflowExecutionMessage, - pubsubname: str, - source: str, - topic: str, - ops_dir: str = DEFAULT_OPS_DIR, - workflows_dir: str = get_workflow_dir(), - *args: Any, - **kwargs: Dict[str, Any], - ): - self.message = message - self.inqueues = inqueues - self.statestore = StateStore() - self.runner = None - self.name = str(message.run_id) - self.is_cancelled = False - self.ops_dir = ops_dir - self.workflows_dir = workflows_dir - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.task = asyncio.create_task(self.start_managing()) - self.pubsubname = pubsubname - self.topic = topic - self.source = source - - def done_callback(task: Any) -> None: - self.task = None - try: - maybe_exception = task.exception() - if maybe_exception is not None: - self.logger.warning( - f"Task {task} for workflow run {self.name} failed " - f"with exception {maybe_exception}" - ) - except (asyncio.CancelledError, asyncio.InvalidStateError): - pass - - self.task.add_done_callback(done_callback) - - def build_workflow(self, input_items: OpIOType): - content = self.message.content - spec = WorkflowParser.parse_dict( - content.workflow, - ops_dir=self.ops_dir, - parameters_override=content.parameters, - ) - workflow = Workflow(spec) - patch_workflow_sources(input_items, workflow) - io_mapper = WorkflowIOHandler(workflow) - return workflow, io_mapper - - async def start_managing(self) -> None: - content = self.message.content - input_items = content.input - run_id = self.message.run_id - self.inqueues[str(run_id)] = asyncio.queues.Queue() - try: - workflow, io_mapper = self.build_workflow(input_items) - except Exception: - await update_workflow( - str(run_id), - self.statestore, - RunStatus.failed, - f"Failed to build workflow {content.workflow}" - f" with parameters: {content.parameters}", - ) - raise - router = MessageRouter(self.inqueues[str(run_id)]) - self.runner = RemoteWorkflowRunner( - traceid=self.message.id, - message_router=router, - workflow=workflow, - io_mapper=io_mapper, - update_state_callback=WorkflowStateUpdate(run_id), - pubsubname=self.pubsubname, - source=self.source, - topic=self.topic, - ) - self.runner.is_cancelled = self.is_cancelled - output = await self.runner.run(input_items, run_id) - router.should_stop = True - if router.task is not None: - await router.task - if not self.is_cancelled: - await self.add_output(output) - self.logger.debug( - f"Updated statestore with output for workflow run {self.message.run_id}" - ) - await self.runner.update_state(WorkflowChange.WORKFLOW_FINISHED) - self.logger.debug(f"Marked workflow run {self.message.run_id} as done") - - async def add_output(self, output: OpIOType) -> None: - await self.add_output_to_run(str(self.message.run_id), output, self.statestore) - - @staticmethod - async def add_output_to_run(run_id: str, output: OpIOType, statestore: StateStore) -> None: - run_data = await statestore.retrieve(run_id) - run_config = RunConfig(**run_data) - run_config.set_output(output) - await statestore.store(run_id, run_config) - - async def cancel(self): - self.is_cancelled = True - if self.runner is not None: - await self.runner.cancel() - - -async def update_workflow( - run_id: str, - statestore: StateStore, - new_status: RunStatus, - reason: Optional[str] = None, - dont_update: Callable[[RunStatus], bool] = RunStatus.finished, -) -> None: - run_data = await statestore.retrieve(run_id) - run_config = RunConfig(**run_data) - if dont_update(run_config.details.status): - return - run_config.details.status = new_status - run_config.details.reason = reason if reason else "" - if new_status in {RunStatus.failed}: - run_config.details.start_time = run_config.details.end_time = datetime.now() - await statestore.store(run_id, run_config) - - -class Orchestrator: - app: App - inqueues: Dict[str, "asyncio.queues.Queue[WorkMessage]"] - pubsubname: str - cache_topic: str - new_workflow_topic: str - _workflow_management_tasks: Dict[UUID, WorkflowRunManager] - ops_dir: str - workflows_dir: str - - # TODO: We need some way of reloading orchestrator state to make it robust - # to crashes - - def __init__( - self, - pubsubname: str = CONTROL_STATUS_PUBSUB, - cache_topic: str = CACHE_PUBSUB_TOPIC, - status_topic: str = STATUS_PUBSUB_TOPIC, - new_workflow_topic: str = WORKFLOW_REQUEST_PUBSUB_TOPIC, - port: int = settings.GRPC_APP_PORT, - ops_dir: str = DEFAULT_OPS_DIR, - workflows_dir: str = get_workflow_dir(), - ): - self.app = App() - self.port = port - self.pubsubname = pubsubname - self.cache_topic = cache_topic - self.status_topic = status_topic - self.new_workflow_topic = new_workflow_topic - self.inqueues = {} - self.statestore = StateStore() - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self._workflow_management_tasks: Dict[UUID, WorkflowRunManager] = {} - self.ops_dir = ops_dir - self.workflows_dir = workflows_dir - - @self.app.subscribe_async(self.pubsubname, self.status_topic) - async def update(event: v1.Event): - await self.handle_update_workflow_status(self.status_topic, event) - - @self.app.subscribe_async(self.pubsubname, self.new_workflow_topic) - async def manage_workflow(event: v1.Event): - await self.handle_manage_workflow_event(self.new_workflow_topic, event) - - async def handle_update_workflow_status(self, channel: str, event: v1.Event): - async def success_callback(message: WorkMessage) -> TopicEventResponse: - if not message.is_valid_for_channel(channel): - self.logger.error( - f"Received unsupported message {message} for channel {channel}. Dropping it." - ) - return TopicEventResponse("drop") - if str(message.run_id) not in self.inqueues: - self.logger.info( - f"Received message {message}, but the run it references" - " is not being managed. Dropping it." - ) - return TopicEventResponse("drop") - await self.inqueues[str(message.run_id)].put(message) - return TopicEventResponse("success") - - return await accept_or_fail_event_async(event, success_callback, self._failure_callback) - - async def handle_manage_workflow_event(self, channel: str, event: v1.Event): - update_telemetry_context(extract_message_header_from_event(event).current_trace_parent) - - @add_trace - async def success_callback(message: WorkMessage) -> TopicEventResponse: - try: - if not message.is_valid_for_channel(channel): - self.logger.error(f"Received unsupported message {message}. Dropping it.") - return TopicEventResponse("drop") - await self.manage_workflow(message) - return TopicEventResponse("success") - except Exception as e: - await self.fail_workflow(str(message.run_id), f"{e.__class__.__name__}: {e}") - self.logger.exception( - f"Failed to submit workflow {message.run_id} " - f"from event {event.id} for execution" - ) - return TopicEventResponse("drop") - - with trace.get_tracer(__name__).start_as_current_span("handle_manage_workflow_event"): - return await accept_or_fail_event_async(event, success_callback, self._failure_callback) - - @add_trace - async def handle_workflow_execution_message(self, message: WorkflowExecutionMessage): - wf = WorkflowRunManager( - self.inqueues, - message, - pubsubname=self.pubsubname, - source="orchestrator", - topic=self.cache_topic, - ops_dir=self.ops_dir, - workflows_dir=self.workflows_dir, - ) - self._workflow_management_tasks[message.run_id] = wf - - def wf_done_callback(task: "asyncio.Future[Any]") -> None: - self.logger.info(f"Workflow run {message.run_id} finished. Freeing up space.") - self.inqueues.pop(str(message.run_id)) - self._workflow_management_tasks.pop(message.run_id) - try: - maybe_exception = task.exception() - if maybe_exception is not None: - self.logger.warning( - f"Workflow run {message.run_id} failed with exception {maybe_exception}" - ) - except (asyncio.CancelledError, asyncio.InvalidStateError): - pass - - wf.task.add_done_callback(wf_done_callback) - - async def handle_workflow_cancellation_message(self, message: WorkflowCancellationMessage): - if message.run_id in self._workflow_management_tasks: - wf = self._workflow_management_tasks[message.run_id] - if not wf.task.done(): - await wf.cancel() - else: - self.logger.warning( - f"Tried to cancel a workflow run from message {message}, " - f"but the workflow has already finished. (run id: {message.run_id})" - ) - else: - # We don't know this workflow run. Either this completed execution - # already, or it doesn't exist. Log and ignore. - self.logger.warning( - f"Tried to cancel a workflow run from message {message}, " - f"but the run doesn't exist. (run id: {message.run_id})" - ) - - async def handle_workflow_deletion_message(self, message: WorkflowDeletionMessage): - # deletion of a workflow run is handled by the data ops service - pass - - async def manage_workflow(self, message: WorkMessage) -> None: - message_handler_map = { - WorkflowExecutionMessage: self.handle_workflow_execution_message, - WorkflowCancellationMessage: self.handle_workflow_cancellation_message, - WorkflowDeletionMessage: self.handle_workflow_deletion_message, - } - handled = False - for type in message_handler_map: - if isinstance(message, type): - handled = True - await message_handler_map[type](message) - break - if not handled: - self.logger.error(f"Unable to process message {message}. Ignoring.") - - async def update_workflow_if_not_finished(self, run_id: str, reason: str, status: RunStatus): - await update_workflow(run_id, self.statestore, status, reason) - - async def fail_workflow(self, run_id: str, reason: str): - await self.update_workflow_if_not_finished(run_id, reason, RunStatus.failed) - - async def _failure_callback( - self, event: v1.Event, e: Exception, traceback: List[str] - ) -> TopicEventResponse: - run_id = str(run_id_from_traceparent(event.id)) - await self.fail_workflow( - run_id, f"{e.__class__.__name__}: {str(e)}\n" + "\n".join(traceback) - ) - self.logger.info(f"Marked workflow {run_id} from event {event.id} failed") - return TopicEventResponse("drop") - - @dapr_ready - async def run(self): - async def shutdown_callback(task: Any): - try: - maybe_exception = task.exception() - if maybe_exception is not None: - self.logger.warning(f"Server task failed with exception {maybe_exception}.") - except (asyncio.CancelledError, asyncio.InvalidStateError): - pass - - self.logger.info(f"Starting orchestrator listening on port {self.port}") - server_task = asyncio.create_task(self.app.run_async(self.port)) - server_task.add_done_callback(shutdown_callback) - resume_call = self._resume_workflows() - await asyncio.gather(server_task, resume_call) - - async def get_unfinished_workflows(self) -> List[RunConfig]: - keys = [] - try: - keys = await self.statestore.retrieve(RUNS_KEY) - except KeyError: - await self.statestore.store(RUNS_KEY, []) - - all_runs = cast( - List[RunConfig], [RunConfig(**r) for r in await self.statestore.retrieve_bulk(keys)] - ) - return [r for r in all_runs if not RunStatus.finished(r.details.status)] - - def run_config_to_workflow_message(self, run: RunConfig) -> WorkflowExecutionMessage: - workflow = workflow_from_input(run.workflow) - inputs_spec = workflow.inputs_spec - user_input = build_args_for_workflow(run.user_input, list(inputs_spec)) - message = WorkMessageBuilder.build_workflow_request( - run.id, asdict(workflow.workflow_spec), run.parameters, user_input - ) - - update_telemetry_context(message.current_trace_parent) - with trace.get_tracer(__name__).start_as_current_span("re-submit-workflow"): - return cast(WorkflowExecutionMessage, message) - - async def _resume_workflows(self): - self.logger.debug("Searching for unfinished workflow runs") - try: - runs = await self.get_unfinished_workflows() - except Exception: - raise RuntimeError( - "Failed to fetch list of unfinished workflow runs. Aborting Execution." - ) - self.logger.debug(f"Found {len(runs)} unfinished workflow run(s)") - - unfinished_tasks = [] - for run in runs: - self.logger.debug(f"Resuming workflow run {run.id}") - try: - message = self.run_config_to_workflow_message(run) - self.logger.debug(f"Created workflow execution message for run id {run.id}") - unfinished_tasks.append( - asyncio.create_task(self.handle_workflow_execution_message(message)) - ) - except Exception: - self.logger.exception(f"Failed to resume execution for workflow run {run.id}") - raise - - await asyncio.gather(*unfinished_tasks) - - -async def main(): - parser = ArgumentParser(description="TerraVibes 🌎 Orchestrator") - parser.add_argument( - "--pubsubname", - type=str, - default=CONTROL_STATUS_PUBSUB, - help="The name of the publish subscribe component to use", - ) - parser.add_argument( - "--cache-topic", - type=str, - default=CACHE_PUBSUB_TOPIC, - help="The name of the topic to use to send control messages", - ) - parser.add_argument( - "--status-topic", - type=str, - default=STATUS_PUBSUB_TOPIC, - help="The name of the topic to use to receive status messages", - ) - parser.add_argument( - "--workflow-topic", - type=str, - default=WORKFLOW_REQUEST_PUBSUB_TOPIC, - help="The name of the topic to use to receive workflow execution requests", - ) - parser.add_argument( - "--port", - type=int, - default=int(settings.GRPC_APP_PORT), - help="The port to use to listen for HTTP requests from dapr", - ) - parser.add_argument( - "--debug", action="store_true", default=False, help="Whether to enable remote debugging" - ) - parser.add_argument( - "--debugger-port", - type=int, - default=5678, - help="The port on which to listen to the debugger", - ) - parser.add_argument( - "--otel-service-name", - type=str, - help="The name of the service to use for OpenTelemetry collector", - default="", - ) - parser.add_argument( - "--logdir", - type=str, - help="The directory on which to save logs", - default="", - ) - parser.add_argument( - "--max-log-file-bytes", - type=int, - help="The maximum number of bytes for a log file", - default=MAX_LOG_FILE_BYTES, - ) - parser.add_argument( - "--log-backup-count", - type=int, - help="The number of log files to keep", - required=False, - default=LOG_BACKUP_COUNT, - ) - parser.add_argument( - "--loglevel", - type=str, - help="The default log level to use", - default="INFO", - ) - options = parser.parse_args() - - appname = "terravibes-orchestrator" - configure_logging( - appname=appname, - logdir=options.logdir if options.logdir else None, - max_log_file_bytes=options.max_log_file_bytes, - log_backup_count=options.log_backup_count, - logfile=f"{appname}.log", - default_level=options.loglevel, - ) - - if options.otel_service_name: - setup_telemetry(appname, options.otel_service_name) - - if options.debug: - debugpy.listen(options.debugger_port) # type: ignore - logging.info(f"Debugger enabled and listening on port {options.debugger_port}") - - orchestrator = Orchestrator( - pubsubname=options.pubsubname, - cache_topic=options.cache_topic, - status_topic=options.status_topic, - new_workflow_topic=options.workflow_topic, - port=options.port, - ) - await orchestrator.run() - - -def main_sync(): - asyncio.run(main()) - - -if __name__ == "__main__": - main_sync() diff --git a/src/vibe_server/vibe_server/server.py b/src/vibe_server/vibe_server/server.py deleted file mode 100644 index 75dbe2a5..00000000 --- a/src/vibe_server/vibe_server/server.py +++ /dev/null @@ -1,903 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import logging -import os -from argparse import ArgumentParser, Namespace -from dataclasses import asdict -from datetime import datetime -from enum import auto -from typing import ( - Any, - Dict, - Final, - List, - Optional, - Tuple, - Union, - _type_repr, # type: ignore - cast, -) -from uuid import UUID, uuid4 - -import debugpy -import psutil -import pydantic -import requests -import uvicorn -import yaml -from dapr.conf import settings -from fastapi import Body, FastAPI, Path, Query, status -from fastapi.encoders import jsonable_encoder -from fastapi.responses import JSONResponse -from fastapi_versioning import VersionedFastAPI, version -from hydra_zen import instantiate -from opentelemetry import trace -from starlette.middleware.cors import CORSMiddleware -from strenum import StrEnum - -from vibe_common.constants import ( - ALLOWED_ORIGINS, - CONTROL_STATUS_PUBSUB, - DEFAULT_SECRET_STORE_NAME, - RUNS_KEY, - WORKFLOW_REQUEST_PUBSUB_TOPIC, -) -from vibe_common.dapr import dapr_ready -from vibe_common.messaging import WorkMessageBuilder, send -from vibe_common.secret_provider import DaprSecretConfig -from vibe_common.statestore import StateStore, TransactionOperation -from vibe_common.telemetry import ( - add_span_attributes, - add_trace, - setup_telemetry, - update_telemetry_context, -) -from vibe_core.datamodel import ( - SUMMARY_DEFAULT_FIELDS, - Message, - MetricsDict, - RunConfig, - RunConfigInput, - RunConfigUser, - RunDetails, - RunStatus, - SpatioTemporalJson, -) -from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging - -from .href_handler import BlobHrefHandler, HrefHandler, LocalHrefHandler -from .workflow import get_workflow_path, workflow_from_input -from .workflow import list_workflows as list_existing_workflows -from .workflow.input_handler import ( - build_args_for_workflow, - patch_workflow_sources, - validate_workflow_input, -) -from .workflow.parameter import ParameterResolver -from .workflow.workflow import Workflow - -RUN_CONFIG_SUBMISSION_EXAMPLE: Final[Dict[str, Any]] = { - "name": "example workflow run for sample region", - "workflow": "helloworld", - "parameters": {}, - "user_input": { - "start_date": "2021-02-02T00:00:00Z", - "end_date": "2021-08-02T00:00:00Z", - "geojson": { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-88.068487, 37.058836], - [-88.036059, 37.048687], - [-88.012895, 37.068984], - [-88.026622, 37.085711], - [-88.062482, 37.081461], - [-88.068487, 37.058836], - ] - ], - }, - } - ], - }, - }, -} -MOUNT_DIR: Final[str] = "/mnt" -RunList = Union[List[str], List[Dict[str, Any]], JSONResponse] -WorkflowList = Union[List[str], Dict[str, Any], JSONResponse] -CreateRunResponse = Union[Dict[str, Union[UUID, str]], JSONResponse] - - -class WorkflowReturnFormat(StrEnum): - description = auto() - yaml = auto() - - -class TerravibesProvider: - state_store: StateStore - logger: logging.Logger - href_handler: HrefHandler - - def __init__(self, href_handler: HrefHandler): - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.state_store = StateStore() - self.href_handler = href_handler - - @add_trace - def summarize_runs(self, runs: List[RunConfig], fields: List[str] = SUMMARY_DEFAULT_FIELDS): - """Summarizes a list of runs given a list of fields. - - Supports accessing members deeper in the object by using dots to separate levels. - For example, to extract the "status" member from "details", use "details.status". - """ - - summarized_runs = [{k: v for k, v in asdict(e).items() if k in fields} for e in runs] - for field in fields: - if "." not in field: - continue - for i, src in enumerate([asdict(r) for r in runs]): - prefixes, suffix = field.rsplit(".", maxsplit=1) - obj = src - for prefix in prefixes.split("."): - try: - obj = obj[prefix] - summarized_runs[i].update({field: obj[suffix]}) - except TypeError as e: - # We are trying to get a subfield from a field that - # didn't exist in the first place. `obj` is None, so we - # won't be able to get it here - raise KeyError( - f"Workflow run with id {runs[i].id} does not have field {field}" - ) from e - return summarized_runs - - @add_trace - def system_metrics(self) -> MetricsDict: - """Returns a dict of system metrics.""" - - load_avg: Tuple[float, float, float] = psutil.getloadavg() - cpu_usage: float = psutil.cpu_percent() - mem = psutil.virtual_memory() - - df: Optional[int] - if isinstance(self.href_handler, BlobHrefHandler): - df = None - else: - df = psutil.disk_usage(MOUNT_DIR).free - - return MetricsDict( - load_avg=load_avg, - cpu_usage=cpu_usage, - free_mem=mem.free, - used_mem=mem.used, - total_mem=mem.total, - disk_free=df, - ) - - async def root(self) -> Message: - return Message(message="REST API server is running") - - @add_trace - async def list_workflows( - self, - workflow: Optional[str] = None, - return_format: str = WorkflowReturnFormat.description, - ) -> WorkflowList: - if not workflow: - return [i for i in list_existing_workflows() if "private" not in i] - try: - if return_format == WorkflowReturnFormat.description: - wf = Workflow.build(get_workflow_path(workflow)) - wf_spec = wf.workflow_spec - param_resolver = ParameterResolver(wf_spec.workflows_dir, wf_spec.ops_dir) - parameters = param_resolver.resolve(wf_spec) - param_defaults = {k: v.default for k, v in parameters.items()} - param_descriptions = {k: v.description for k, v in parameters.items()} - description = wf.workflow_spec.description - description.parameters = param_descriptions # type: ignore - return { - "name": wf.name, - "inputs": {k: _type_repr(v) for k, v in wf.inputs_spec.items()}, - "outputs": {k: _type_repr(v) for k, v in wf.output_spec.items()}, - "parameters": param_defaults, - "description": asdict(wf.workflow_spec.description), - } - elif return_format == WorkflowReturnFormat.yaml: - with open(get_workflow_path(workflow)) as f: - yaml_content = yaml.safe_load(f) - return yaml_content - else: - return JSONResponse( - status_code=status.HTTP_400_BAD_REQUEST, - content=asdict(Message(f"Invalid return format: {return_format}")), - ) - except FileNotFoundError: - return JSONResponse( - status_code=status.HTTP_404_NOT_FOUND, - content=asdict(Message(f'Workflow "{workflow}" not found')), - ) - except Exception as e: - return JSONResponse( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - content=asdict(Message(f"Internal server error: {str(e)}")), - ) - - @add_trace - async def list_runs( - self, - ids: Optional[List[UUID]], - page: Optional[int], - items: Optional[int], - fields: Optional[List[str]], - ) -> RunList: - def paginate( - things: List[Any], page: Optional[int] = 0, items: Optional[int] = 0 - ) -> List[Any]: - if items is None or items <= 0: - return things - if page is None or page <= 0: - page = 0 - return things[items * page : items * (page + 1)] - - ret: Union[List[str], List[Dict[str, Any]]] = [] - try: - if ids is None: - all_ids = await self.list_runs_from_store() - if fields is None: - return all_ids - - ret = self.summarize_runs(await self.get_bulk_runs_by_id(all_ids), fields) - else: - ids = cast(List[Any], ids) - if not all([isinstance(i, UUID) for i in ids]): - return JSONResponse( - status_code=status.HTTP_400_BAD_REQUEST, - content=asdict(Message("Provided ids must be UUIDs")), - ) - if fields is None: - ret = self.summarize_runs(await self.get_bulk_runs_by_id(ids)) - else: - ret = self.summarize_runs(await self.get_bulk_runs_by_id(ids), fields) - - return paginate(ret, page, items) - except (KeyError, IndexError): - reason = f"Failed to get id(s) {ids}" - self.logger.debug(reason) - return JSONResponse( - status_code=status.HTTP_404_NOT_FOUND, content=asdict(Message(reason)) - ) - - async def describe_run( - self, - run_id: UUID = Path(..., title="The ID of the workflow execution to get."), - ): - try: - run = (await self.get_bulk_runs_by_id([run_id]))[0] - run_config_user = RunConfigUser.from_runconfig(run) - return jsonable_encoder(self.href_handler.handle(run_config_user)) - except KeyError: - return JSONResponse( - status_code=status.HTTP_404_NOT_FOUND, - content=asdict(Message(f'Workflow execution "{run_id}" not found')), - ) - - @add_trace - async def cancel_run( - self, - run_id: UUID = Path(..., title="The ID of the workflow run to cancel."), - ) -> JSONResponse: - try: - await self.state_store.retrieve(str(run_id)) - except KeyError: - return JSONResponse( - status_code=status.HTTP_404_NOT_FOUND, - content=asdict(Message(f"Workflow execution {run_id} not found")), - ) - - message = WorkMessageBuilder.build_workflow_cancellation(run_id) - - response = send( - message, - "rest-api", - CONTROL_STATUS_PUBSUB, - WORKFLOW_REQUEST_PUBSUB_TOPIC, - ) - - if not response: - raise RuntimeError("Failed to submit workflow cancellation request.") - self.logger.debug(f"Successfully posted workflow cancellation request for run {run_id}") - - return JSONResponse( - status_code=status.HTTP_202_ACCEPTED, - content=asdict(Message(f"Requested cancellation of workflow run {run_id}")), - ) - - @add_trace - async def delete_run( - self, - run_id: UUID = Path(..., title="The ID of the workflow run to delete."), - ) -> JSONResponse: - try: - run_data = await self.state_store.retrieve(str(run_id)) - except KeyError: - return JSONResponse( - status_code=status.HTTP_404_NOT_FOUND, - content=asdict(Message(f"Workflow execution {run_id} not found")), - ) - - run_config = RunConfig(**run_data) - - if not RunStatus.finished(run_config.details.status): - return JSONResponse( - status_code=status.HTTP_400_BAD_REQUEST, - content=asdict(Message("Cannot delete an unfinished workflow run.")), - ) - - message = WorkMessageBuilder.build_workflow_deletion(run_id) - - response = send( - message, - "rest-api", - CONTROL_STATUS_PUBSUB, - WORKFLOW_REQUEST_PUBSUB_TOPIC, - ) - - if not response: - raise RuntimeError("Failed to submit workflow deletion request.") - self.logger.debug(f"Successfully posted workflow deletion request for run {run_id}") - - return JSONResponse( - status_code=status.HTTP_200_OK, - content=asdict(Message(f"Requested deletion of workflow run {run_id}")), - ) - - async def create_run(self, runConfig: RunConfigInput) -> CreateRunResponse: - response: JSONResponse - try: - if ( - isinstance(runConfig.workflow, str) - and runConfig.workflow not in list_existing_workflows() - ): - raise ValueError(f'Workflow "{runConfig.workflow}" unknown') - - workflow = workflow_from_input(runConfig.workflow) - inputs_spec = workflow.inputs_spec - # Build and validate inputs - user_input = build_args_for_workflow(runConfig.user_input, list(inputs_spec)) - # Validate workflow inputs and potentially patch workflow for input fan-out - validate_workflow_input(user_input, inputs_spec) - patch_workflow_sources(user_input, workflow) - - run_ids: List[str] = await self.list_runs_from_store() - new_id, new_run = self.create_new_run(runConfig, run_ids) - add_span_attributes({"run_id": new_id}) - - if new_id is None: - raise RuntimeError("Failed to create new run id") - await self.update_run_state(run_ids, new_run) - - # Update run id with parsed workflow and user input - new_run.workflow = asdict(workflow.workflow_spec) - new_run.user_input = user_input - self.submit_work(new_run) - - response = JSONResponse( - status_code=status.HTTP_201_CREATED, - content=asdict( - Message( - id=new_id, - location=f"/runs/{new_id}", - message="Workflow created and queued for execution", - ) - ), - ) - except ( - ValueError, - pydantic.ValidationError, - requests.exceptions.RequestException, - ) as e: - self.logger.exception("Failed to submit workflow to worker") - response = JSONResponse( - status_code=status.HTTP_400_BAD_REQUEST, - content=asdict( - Message(f"Unable to run workflow with provided parameters. {str(e)}") - ), - ) - except FileNotFoundError as e: - self.logger.exception("Failed to submit workflow") - response = JSONResponse( - status_code=status.HTTP_404_NOT_FOUND, - content=asdict(Message(f"Unable to find workflow with name {str(e)}.")), - ) - except Exception as e: - self.logger.exception("Failed to update workflow state") - response = JSONResponse( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - content=asdict( - Message(f"Unable to run workflow with provided parameters. {str(e)}") - ), - ) - return response - - @add_trace - async def resubmit_run(self, run_id: UUID) -> CreateRunResponse: - try: - run = await self.state_store.retrieve(str(run_id)) - except KeyError: - return JSONResponse( - status_code=status.HTTP_404_NOT_FOUND, - content=asdict(Message(f"Workflow run {run_id} not found")), - ) - run_config = RunConfigInput( - **{ - k: v - for k, v in run.items() - if k in ("name", "workflow", "parameters", "user_input") - } - ) - return await self.create_run(run_config) - - def create_new_run(self, workflow: RunConfigInput, run_ids: List[str]): - new_id = str(uuid4()) - - workflow_data = {k: v for k, v in asdict(workflow).items() if k != "user_input"} - workflow_data["id"] = new_id - workflow_data["details"] = RunDetails() # type: ignore - # Set workflow submission time - workflow_data["details"].submission_time = datetime.utcnow() - workflow_data["task_details"] = {} - workflow_data["user_input"] = workflow.user_input - if isinstance(workflow.user_input, SpatioTemporalJson): - workflow_data["spatio_temporal_json"] = workflow.user_input - else: - workflow_data["spatio_temporal_json"] = None - - new_run = RunConfig(**workflow_data) - run_ids.append(new_id) - - return new_id, new_run - - @add_trace - async def update_run_state(self, run_ids: List[str], new_run: RunConfig): - await self.state_store.transaction( - [ - cast( - TransactionOperation, - { - "key": RUNS_KEY, - "operation": "upsert", - "value": run_ids, - }, - ), - cast( - TransactionOperation, - { - "key": str(new_run.id), - "operation": "upsert", - "value": new_run, - }, - ), - ] - ) - - @add_trace - async def list_runs_from_store(self) -> List[str]: - try: - return await self.state_store.retrieve(RUNS_KEY) - except KeyError: - # No workflows exist yet, ignore the failure - return [] - - @add_trace - async def get_bulk_runs_by_id(self, run_ids: Union[List[str], List[UUID]]) -> List[RunConfig]: - run_data = await self.state_store.retrieve_bulk([str(id) for id in run_ids]) - run_id_to_data = {r["id"]: r for r in run_data} - run_task_ids = [(r["id"], task) for r in run_data for task in r.get("tasks", [])] - task_data = await self.state_store.retrieve_bulk([f"{i[0]}-{i[1]}" for i in run_task_ids]) - for run_task_id, task_datum in zip(run_task_ids, task_data): - run_id, task_name = run_task_id - run_datum = run_id_to_data[run_id] - run_datum["task_details"][task_name] = task_datum - runs = [RunConfig(**cast(Dict[str, Any], data)) for data in run_data] - return runs - - def submit_work(self, new_run: RunConfig): - assert isinstance(new_run.workflow, dict) - assert isinstance(new_run.user_input, dict) - message = WorkMessageBuilder.build_workflow_request( - new_run.id, new_run.workflow, new_run.parameters, new_run.user_input - ) - - tracer = trace.get_tracer(__name__) - update_telemetry_context(message.id) - - with tracer.start_as_current_span("submit-workflow"): - response = send( - message, - "rest-api", - CONTROL_STATUS_PUBSUB, - WORKFLOW_REQUEST_PUBSUB_TOPIC, - ) - - if not response: - raise RuntimeError("Failed to submit workflow for processing.") - self.logger.debug(f"Successfully posted workflow message for run {new_run.id}") - - -class TerravibesAPI(FastAPI): - uvicorn_config: uvicorn.Config - terravibes: TerravibesProvider - - def __init__( - self, - href_handler: HrefHandler, - allowed_origins: List[str] = ALLOWED_ORIGINS, - host: str = "127.0.0.1", - port: int = 8000, - reload: bool = False, - debug: bool = False, - *args: Any, - **kwargs: Any, - ): - super().__init__(*args, **kwargs) - - self.terravibes = TerravibesProvider(href_handler) - - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.logger.info(f"TerraVibes server: using {allowed_origins} as allowed origins") - self.description = """# TerraVibes REST API - - TerraVibes is the execution engine of the FarmVibes platform, a - containerized, distributed system that can run machine learning models at scale. - TerraVibes uses Kubernetes for container orchestration and supports a variety of - machine learning frameworks, as well as various data sources. - - With TerraVibes, farmers can run geospatial ingestion and machine learning models - in the cloud or on-premises, depending on their needs. The platform is - designed to be highly scalable and flexible, so userscan start with a - small deployment and scale up as needed. - - ### Endpoints - - - `GET /`: Root endpoint - - `GET /system-metrics`: Get system metrics - - ## Workflows - - The base computation unit users interact with is a workflow. A workflow is a - collection of tasks that are arranged in a computational graph. Each task - represents a single operation, and the graph represents the dependencies - between the tasks. For example, a workflow might have a task that downloads - satellite imagery, a task that runs a machine learning model on the imagery, - and a task that uploads the results to a cloud storage bucket. The tasks are - executed in parallel, and the results of each task are passed to the next task - in the graph. - - ### Endpoints - - - `GET /workflows`: List all workflows - - `GET /workflows/{workflow_name}`: Get a workflow by name, either as - JSON description, or YAML graph implementation - - ## Runs - - Every time a workflow is executed, the API creates a new run. A run is a - specific instance of a workflow, and it is uniquely identified by a run ID. - The run ID is a UUID, and it is returned to the user when the workflow is - submitted. The run ID can be used to query the status of the workflow, and it - can be used to cancel the workflow. - - ### Endpoints - - - `GET /runs`: Lists all the workflow runs currently in the system. - - `GET /runs/{run_id}`: Get information of a specific run. - - `POST /runs`: Submit a new workflow run. - - `POST /runs/{run_id}/cancel`: Cancel a workflow run. - """ - - self.openapi_tags = [ - { - "name": "workflows", - "description": ( - "Operations on workflows, including listing, describing, " - "and obtaining workflow definition YAMLs." - ), - "externalDocs": { - "description": "FarmVibes.AI Workflow Documentation", - "url": ( - "https://github.com/microsoft/farmvibes-ai/blob/main/documentation/" - "WORKFLOWS.md" - ), - }, - }, - { - "name": "runs", - "description": ( - "Operations on workflow runs, including submitting, listing, " - "describing, and cancelling runs.", - ), - }, - ] - - @self.get("/") - @version(0) - async def terravibes_root() -> Message: - """Root endpoint.""" - return await self.terravibes.root() - - @self.get("/system-metrics") - @version(0) - async def terravibes_metrics() -> MetricsDict: - """Get system metrics, including CPU usage, memory usage, and storage disk space.""" - return self.terravibes.system_metrics() - - @self.get("/workflows", tags=["workflows"], response_model=None) - @version(0) - async def terravibes_list_workflows() -> WorkflowList: - """List all workflows available in FarmVibes.AI.""" - return await self.terravibes.list_workflows() - - @self.get("/workflows/{workflow:path}", tags=["workflows"]) - @version(0) - async def terravibes_describe_workflow( - workflow: str = Path( - ..., title="Workflow name", description="The name of the workflow to be described." - ), - return_format: str = Query( - "description", - title="Return format", - description="The format to return the workflow in [description, yaml].", - ), - ): - """Get a workflow by name, either as JSON description, or YAML graph implementation.""" - return await self.terravibes.list_workflows(workflow, return_format) - - @self.get("/runs", tags=["runs"], response_model=None) - @version(0) - async def terravibes_list_runs( - ids: Optional[List[UUID]] = Query( - None, - description=( - "The list of run IDs to retrieve. If not provided, all runs are returned." - ), - ), - page: Optional[int] = Query(0, description="The page number to retrieve."), - items: Optional[int] = Query(0, description="The number of items per page."), - fields: Optional[List[str]] = Query( - None, - description=( - "Fields to return alongside each run id. " - "If not provided, only run ids are returned." - ), - ), - ) -> RunList: - """List all the workflow runs currently in the system.""" - return await self.terravibes.list_runs(ids, page, items, fields) - - @self.get("/runs/{run_id}", tags=["runs"]) - @version(0) - async def terravibes_describe_run( - run_id: UUID = Path( - ..., - title="Run ID", - description="The ID of the workflow execution to get.", - ), - ): - """Get information of a specific run.""" - return await self.terravibes.describe_run(run_id) - - @self.post("/runs/{run_id}/cancel", tags=["runs"]) - @version(0) - async def terravibes_cancel_run( - run_id: UUID = Path( - ..., - title="Run ID", - description="The ID of the workflow run to cancel.", - ), - ) -> JSONResponse: - """Cancel a workflow run.""" - return await self.terravibes.cancel_run(run_id) - - @self.delete("/runs/{run_id}", tags=["runs"]) - @version(0) - async def terravibes_delete_run( - run_id: UUID = Path( - ..., - title="Run ID", - description="The ID of the workflow run to delete.", - ), - ) -> JSONResponse: - """Delete data associated with a workflow run (if not shared by other runs). - - For a detailed overview on how data is managed in FarmVibes.AI, please refer to the - [documentation](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/CACHE.html). - """ - return await self.terravibes.delete_run(run_id) - - @self.post("/runs/{run_id}/resubmit", tags=["runs"], response_model=None) - @version(0) - async def terravibes_resubmit_run( - run_id: UUID = Path( - ..., - title="Run ID", - description="The ID of the workflow run to resubmit.", - ), - ) -> CreateRunResponse: - """Resubmit a workflow run.""" - return await self.terravibes.resubmit_run(run_id) - - @self.post("/runs", tags=["workflows", "runs"], response_model=None) - @version(0) - async def terravibes_create_run( - runConfig: RunConfigInput = Body( - default=None, - example=RUN_CONFIG_SUBMISSION_EXAMPLE, - description="The configuration and inputs of the workflow run to submit.", - ), - ) -> CreateRunResponse: - """Submit a new workflow run.""" - return await self.terravibes.create_run(runConfig) - - self.versioned_wrapper = VersionedFastAPI( - self, version_format="{major}", prefix_format="/v{major}" - ) - self.versioned_wrapper.add_middleware( - CORSMiddleware, - allow_origins=allowed_origins, - allow_credentials="*" not in allowed_origins, - allow_methods=["*"], - allow_headers=["*"], - ) - self.uvicorn_config = uvicorn.Config( - app=self.versioned_wrapper, - host=host, - port=port, - reload=reload, - debug=debug, - log_config=None, - ) - - async def run(self): - server = uvicorn.Server(self.uvicorn_config) - await server.serve() - - -def build_href_handler(options: Namespace) -> HrefHandler: - logger = logging.getLogger(f"{__name__}.build_href_handler") - if options.terravibes_host_assets_dir: - return LocalHrefHandler(options.terravibes_host_assets_dir) - else: - try: - storage_account_connection_string = instantiate( - DaprSecretConfig( - store_name=DEFAULT_SECRET_STORE_NAME, - secret_name=os.environ["BLOB_STORAGE_ACCOUNT_CONNECTION_STRING"], - key_name=os.environ["BLOB_STORAGE_ACCOUNT_CONNECTION_STRING"], - ) - ) - except Exception: - storage_account_connection_string = "" - logger.exception( - "Failed to load blob storage account connection string from Dapr secret store. " - "Expect describing runs to fail due to an inability to resolve asset hrefs." - ) - return BlobHrefHandler( - connection_string=storage_account_connection_string, - ) - - -async def main() -> None: - parser = ArgumentParser(description="TerraVibes 🌎 REST API Server") - parser.add_argument("--host", type=str, default="0.0.0.0", help="IP address to listen on") - parser.add_argument( - "--port", - type=int, - default=int(settings.HTTP_APP_PORT), - help="Port to listen on", - ) - parser.add_argument( - "--debug", - default=False, - action="store_true", - help="Whether to enable debug support", - ) - parser.add_argument( - "--reload", - default=False, - action="store_true", - help="Whether to reload the server on file change", - ) - parser.add_argument( - "--debugger-port", - type=int, - default=5678, - help="The port on which to listen to the debugger", - ) - parser.add_argument( - "--terravibes-host-assets-dir", - type=str, - help="The asset directory on the host", - default="", - ) - parser.add_argument( - "--otel-service-name", - type=str, - help="The name of the service to use for OpenTelemetry collector", - default="", - ) - parser.add_argument( - "--logdir", - type=str, - help="The directory on which to save logs", - default="", - ) - parser.add_argument( - "--max-log-file-bytes", - type=int, - help="The maximum number of bytes for a log file", - default=MAX_LOG_FILE_BYTES, - ) - parser.add_argument( - "--log-backup-count", - type=int, - help="The number of log files to keep", - required=False, - default=LOG_BACKUP_COUNT, - ) - parser.add_argument( - "--loglevel", - type=str, - help="The default log level to use", - default="INFO", - ) - - options = parser.parse_args() - - appname = "terravibes-rest-api" - configure_logging( - appname=appname, - logdir=options.logdir if options.logdir else None, - max_log_file_bytes=options.max_log_file_bytes, - log_backup_count=options.log_backup_count, - logfile=f"{appname}.log", - default_level=options.loglevel, - ) - - if options.otel_service_name: - setup_telemetry(appname, options.otel_service_name) - - if options.debug: - debugpy.listen(options.debugger_port) # type: ignore - logging.info(f"Debugger enabled and listening on port {options.debugger_port}") - - terravibes_api = TerravibesAPI( - href_handler=build_href_handler(options), - allowed_origins=ALLOWED_ORIGINS, - host=options.host, - port=options.port, - reload=options.reload, - debug=options.debug, - title="TerraVibes 🌎 Spatial API", - description="Low-code planetary analytics with powerful operators", - ) - - await start_service(terravibes_api) - - -@dapr_ready -async def start_service(terravibes_api: TerravibesAPI) -> None: - await terravibes_api.run() - - -def main_sync(): - asyncio.run(main()) - - -if __name__ == "__main__": - main_sync() diff --git a/src/vibe_server/vibe_server/sniffer.py b/src/vibe_server/vibe_server/sniffer.py deleted file mode 100644 index 75d6d664..00000000 --- a/src/vibe_server/vibe_server/sniffer.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import argparse -import logging -from typing import List - -from cloudevents.sdk.event import v1 -from dapr.conf import settings -from dapr.ext.grpc import App - -from vibe_common.constants import ( - CACHE_PUBSUB_TOPIC, - CONTROL_PUBSUB_TOPIC, - CONTROL_STATUS_PUBSUB, - STATUS_PUBSUB_TOPIC, -) -from vibe_common.dapr import dapr_ready -from vibe_common.messaging import event_to_work_message -from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging - - -class Sniffer: - app: App - topics: List[str] - - def __init__(self, pubsub: str, topics: List[str], port: int = settings.GRPC_APP_PORT): - self.app = App() - self.port = port - self.pubsub = pubsub - self.topics = topics - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - - self.logger.info(f"Will subscribe to topics {topics}") - for topic in self.topics: - - @self.app.subscribe(self.pubsub, topic) - def log(event: v1.Event) -> None: - self.log(event, topic) - - def log(self, event: v1.Event, topic: str) -> None: - try: - message = event_to_work_message(event) - except Exception: - raise RuntimeError(f"Failed to decode event with id {event.id}") - self.logger.info(f"{event.source} => {topic}: {message}") - - @dapr_ready - def run(self): - self.app.run(self.port) - - -def main(): - parser = argparse.ArgumentParser( - "vibe-sniffer", description="Sniffs TerraVibes queues and logs them" - ) - - parser.add_argument( - "--logdir", - type=str, - default=None, - help=( - "Path to which to save logs " - "(if specified, duplicate messages will be persisted for all services)" - ), - ) - parser.add_argument( - "--max-log-file-bytes", - type=int, - help="The maximum number of bytes for a log file", - default=MAX_LOG_FILE_BYTES, - ) - parser.add_argument( - "--log-backup-count", - type=int, - help="The number of log files to keep", - required=False, - default=LOG_BACKUP_COUNT, - ) - parser.add_argument( - "--port", - type=int, - default=3000, - help="Port to listen on for dapr connections", - ) - parser.add_argument( - "--pubsub", - type=str, - default=CONTROL_STATUS_PUBSUB, - help="dapr pubsub to connect to", - ) - parser.add_argument( - "--topics", - nargs="+", - default=[CONTROL_PUBSUB_TOPIC, CACHE_PUBSUB_TOPIC, STATUS_PUBSUB_TOPIC], - help="the topics to sniff", - ) - args = parser.parse_args() - - configure_logging( - logdir=None if args.logdir is None else args.logdir, - max_log_file_bytes=args.max_log_file_bytes if args.max_log_file_bytes else None, - log_backup_count=args.log_backup_count if args.log_backup_count else None, - appname="sniffer", - ) - - sniffer = Sniffer( - pubsub=args.pubsub, - topics=args.topics, - port=args.port, - ) - sniffer.run() - - -if __name__ == "__main__": - main() diff --git a/src/vibe_server/vibe_server/workflow/__init__.py b/src/vibe_server/vibe_server/workflow/__init__.py deleted file mode 100644 index b2f600bf..00000000 --- a/src/vibe_server/vibe_server/workflow/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import re -from typing import Any, Dict, List, Union - -from ..workflow.spec_parser import WorkflowParser, get_workflow_dir -from ..workflow.workflow import Workflow - - -def get_workflow_path(name: str, base: str = get_workflow_dir()) -> str: - return os.path.join(base, name) + ".yaml" - - -def workflow_from_input(input: Union[str, Dict[str, Any]]) -> Workflow: - workflow: Workflow - if isinstance(input, str): - workflow = Workflow.build(get_workflow_path(input)) - else: - workflow = Workflow(WorkflowParser.parse_dict(input)) - return workflow - - -def list_workflows() -> List[str]: - "Returns a list of workflows to be loaded by `load_workflow_by_name`" - - workflow_dir = get_workflow_dir() - if not os.path.exists(workflow_dir): - return [] - - workflows: List[str] = [] - for dirpath, _, filenames in os.walk(workflow_dir): - for filename in filenames: - if filename.endswith(".yaml"): - workflows.append( - re.sub( - # Both patterns here are guaranteed to be present - # in the input string. We don't want them. - f"{workflow_dir}/|\\.yaml", - "", - os.path.join(dirpath, filename), - ) - ) - return workflows diff --git a/src/vibe_server/vibe_server/workflow/description_validator.py b/src/vibe_server/vibe_server/workflow/description_validator.py deleted file mode 100644 index bcff4b70..00000000 --- a/src/vibe_server/vibe_server/workflow/description_validator.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Dict, List, Tuple, Union - -from .parameter import ParameterResolver -from .spec_parser import WorkflowSpec - - -def unpack_description(description: Union[str, Tuple[str], None]) -> str: - if isinstance(description, tuple): - return description[0] - else: - return "" if description is None else description - - -class WorkflowDescriptionValidator: - @classmethod - def _validate_node_against_description( - cls, - node_name: str, - node_type: str, - description: Dict[str, str], - workflow_name: str, - ): - if node_name not in description or not description[node_name]: - raise ValueError( - f"{node_type} {node_name} in workflow {workflow_name} is missing a description." - ) - - @classmethod - def _validate_description_against_nodes( - cls, desc_nodes: List[str], node_type: str, nodes: Dict[str, Any], workflow_name: str - ): - for name in desc_nodes: - if name not in nodes: - raise ValueError( - f"{name} in the workflow description does not match " - f"any {node_type} in workflow {workflow_name}" - ) - - @classmethod - def _validate_sources(cls, spec: WorkflowSpec): - for source_name in spec.sources.keys(): - cls._validate_node_against_description( - source_name, "Source", spec.description.inputs, spec.name - ) - - @classmethod - def _validate_sinks(cls, spec: WorkflowSpec): - for sink_name in spec.sinks.keys(): - cls._validate_node_against_description( - sink_name, "Sink", spec.description.outputs, spec.name - ) - - @classmethod - def _validate_parameters(cls, workflow_spec: WorkflowSpec): - param_resolver = ParameterResolver(workflow_spec.workflows_dir, workflow_spec.ops_dir) - parameters = param_resolver.resolve(workflow_spec) - param_descriptions = {k: unpack_description(v.description) for k, v in parameters.items()} - - for param_name in workflow_spec.parameters.keys(): - cls._validate_node_against_description( - param_name, "Parameter", param_descriptions, workflow_spec.name - ) - - @classmethod - def _validate_tasks(cls, workflow_spec: WorkflowSpec): - for task_name in workflow_spec.tasks.keys(): - cls._validate_node_against_description( - task_name, "Task", workflow_spec.description.task_descriptions, workflow_spec.name - ) - - @classmethod - def _validate_description(cls, spec: WorkflowSpec): - desc = spec.description - if not desc.short_description: - raise ValueError(f"Short description is missing in workflow {spec.name}.") - - # Make sure every node in the description matches to a source/sink/parameter - for desc_nodes, node_type, node in [ - (desc.inputs, "sources", spec.sources), - (desc.outputs, "sinks", spec.sinks), - (desc.parameters, "parameters", spec.parameters), - ]: - cls._validate_description_against_nodes(desc_nodes.keys(), node_type, node, spec.name) - - @classmethod - def validate(cls, workflow_spec: WorkflowSpec): - cls._validate_sources(workflow_spec) - cls._validate_sinks(workflow_spec) - cls._validate_parameters(workflow_spec) - cls._validate_tasks(workflow_spec) - cls._validate_description(workflow_spec) diff --git a/src/vibe_server/vibe_server/workflow/graph.py b/src/vibe_server/vibe_server/workflow/graph.py deleted file mode 100644 index 1edd98da..00000000 --- a/src/vibe_server/vibe_server/workflow/graph.py +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - - -from collections import defaultdict -from enum import IntEnum -from typing import Callable, Dict, Generic, Iterable, Iterator, List, Set, Tuple, TypeVar -from warnings import warn - -T = TypeVar("T") -V = TypeVar("V") -Edge = Tuple[T, T, V] -Adjacency = Set[Tuple[T, V]] - - -class VisitStatus(IntEnum): - new = 0 - visiting = 1 - visited = 2 - - -class Graph(Generic[T, V]): - adjacency_list: Dict[T, Adjacency[T, V]] - - def __init__(self): - self.adjacency_list = {} - - def add_node(self, node: T): - if node in self.adjacency_list: - warn(f"Trying to add already existing node {node} to graph. Ignoring.") - else: - self.adjacency_list[node] = set() - - def add_edge(self, origin: T, destination: T, label: V): - if origin not in self.adjacency_list: - warn(f"Tried to add edge from {origin} to {destination}, but {origin} not in graph") - self.add_node(origin) - if destination not in self.adjacency_list: - warn( - f"Tried to add edge from {origin} to {destination}, but {destination} not in graph" - ) - self.add_node(destination) - self.adjacency_list[origin].add((destination, label)) - - def relabel(self, edge: Edge[T, V], new_label: V): - """Changes an existing edge's label to `new_label`.""" - self.adjacency_list[edge[0]].remove((edge[1], edge[2])) - self.adjacency_list[edge[0]].add((edge[1], new_label)) - - @property - def nodes(self) -> List[T]: - return list(self.adjacency_list.keys()) - - @property - def edges(self) -> List[Edge[T, V]]: - return [ - (origin, destination[0], destination[1]) - for origin, destinations in self.adjacency_list.items() - for destination in destinations - ] - - def neighbors(self, vertex: T) -> Set[T]: - return set(e[0] for e in self.adjacency_list[vertex]) - - def edges_from(self, vertex: T) -> Iterable[Edge[T, V]]: - return [(vertex, *dst) for dst in self.adjacency_list[vertex]] - - def zero_in_degree_nodes(self) -> Iterable[T]: - in_degrees: Dict[T, int] = {k: 0 for k in self.adjacency_list} - for destinations in self.adjacency_list.values(): - for destination in destinations: - in_degrees[destination[0]] += 1 - return [k for k, v in in_degrees.items() if v == 0] - - def _dfs_impl( - self, - vertex: T, - visited: Dict[T, Tuple[VisitStatus, int]], - level: int = 0, - visit: Callable[[int, T, VisitStatus], None] = lambda i, v, s: None, - ) -> None: - if len(visited) == 0: - for v in self.nodes: - visited[v] = (VisitStatus.new, 0) - - if visited[vertex][0] == VisitStatus.visited and level < visited[vertex][1]: - return - - visit(level, vertex, VisitStatus.visiting) - for neighbor in self.neighbors(vertex): - try: - if visited[neighbor][0] == VisitStatus.visiting: - raise ValueError(f"Graph has a cycle with at least node {neighbor}") - elif visited[neighbor][0] == VisitStatus.new or (level + 1 > visited[neighbor][1]): - # Haven't visited this, or need to revisit at a higher level - self._dfs_impl(neighbor, visited, level + 1, visit) - except KeyError: - # We just reached a node we didn't even know existed - # This is probably a terminal node - warn(f"Found node {neighbor}, but it is not in the list of nodes.") - self._dfs_impl(neighbor, visited, level + 1, visit) - - visit(level, vertex, VisitStatus.visited) - - def has_cycle(self) -> bool: - try: - self.topological_sort() - return False - except ValueError as e: - if "cycle" in str(e): - return True - raise - - def topological_sort(self) -> Iterable[List[T]]: - """Performs topological sort in a graph. - - Returns an iterable for all connected components. Raises exception if - the graph has a cycle. - """ - visited: Dict[T, Tuple[VisitStatus, int]] = {k: (VisitStatus.new, 0) for k in self.nodes} - - def visit(level: int, vertex: T, status: VisitStatus): - visited[vertex] = status, level - - for source in self.zero_in_degree_nodes(): - assert visited[source][0] == VisitStatus.new, f"Visited source {source} more than once" - visit(0, source, VisitStatus.visiting) - for neighbor in self.neighbors(source): - self._dfs_impl(neighbor, level=1, visit=visit, visited=visited) - visit(0, source, VisitStatus.visited) - if not all([v[0] == VisitStatus.visited for v in visited.values()]): - raise ValueError( - "Not all nodes visited in topological sort. This indicates " - "disconnected components in the graph." - ) - - ordering: Dict[int, List[T]] = defaultdict(list) - for node, (_, level) in visited.items(): - ordering[level].append(node) - return (ordering[k] for k in sorted(ordering.keys())) - - def __iter__(self) -> Iterator[List[T]]: - return (v for v in self.topological_sort()) diff --git a/src/vibe_server/vibe_server/workflow/input_handler.py b/src/vibe_server/vibe_server/workflow/input_handler.py deleted file mode 100644 index 6f5e2d3b..00000000 --- a/src/vibe_server/vibe_server/workflow/input_handler.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -from typing import ( - Any, - Dict, - List, - Union, - _type_repr, # type: ignore - cast, -) - -from vibe_common.input_handlers import gen_stac_item_from_bounds -from vibe_core.data.core_types import DataVibeType, InnerIOType, OpIOType, TypeDictVibe -from vibe_core.data.utils import StacConverter, deserialize_stac, get_base_type, is_container_type -from vibe_core.datamodel import SpatioTemporalJson -from vibe_core.utils import ensure_list - -from .workflow import EdgeLabel, EdgeType, GraphNodeType, InputFanOut, Workflow, parse_edge_string - -LOGGER = logging.getLogger(__name__) - - -def add_node(workflow: Workflow, node: GraphNodeType): - workflow.index[node.name] = node - workflow.add_node(node) - - def rollback(): - del workflow.adjacency_list[node] - del workflow.index[node.name] - - return rollback - - -def source_to_edge(workflow: Workflow, fan_node: GraphNodeType, source: str, destination: str): - output_port = cast(InputFanOut, fan_node.spec).output_port - workflow._add_workflow_edge_to_graph(f"{fan_node.name}.{output_port}", destination) - node_name, port_name = parse_edge_string(destination, maxsplit=-1) - workflow.sources[workflow.index[node_name]].remove(port_name) - if not workflow.sources[workflow.index[node_name]]: - del workflow._sources[workflow.index[node_name]] - workflow.source_mappings[source].remove(destination) - - def rollback(): - workflow._sources[workflow.index[node_name]].append(port_name) - workflow.source_mappings[source].append(f"{node_name}.{port_name}") - - return rollback - - -def add_fan_source(workflow: Workflow, node: GraphNodeType, source: str): - input_port = cast(InputFanOut, node.spec).input_port - workflow._sources[workflow.index[node.name]] = [input_port] - workflow.source_mappings[source].append(f"{node.name}.{input_port}") - - def rollback(): - del workflow._sources[node] - workflow.source_mappings[source].remove(f"{node.name}.{input_port}") - - return rollback - - -def recompute_parallelism(workflow: Workflow): - for edge in workflow.edges: - new_label = EdgeLabel(*edge[-1][:-1], EdgeType.single) - workflow.relabel(edge, new_label) - fanout, fanin = workflow._find_fan_out_fan_in_edges() - workflow._update_edges(fanout, fanin) - - -def rollback_parallelism(workflow: Workflow): - def rollback(): - recompute_parallelism(workflow) - - return rollback - - -def fan_out_workflow_source(workflow: Workflow, source: str): - rollback_list = [] - try: - op_name = f"{source}_fanout" - fan_node = GraphNodeType(op_name, spec=InputFanOut(op_name, workflow.inputs_spec[source])) - rollback_list.append(add_node(workflow, fan_node)) - destinations = workflow.source_mappings[source].copy() - for destination in destinations: - rollback_list.insert(0, source_to_edge(workflow, fan_node, source, destination)) - rollback_list.insert(0, add_fan_source(workflow, fan_node, source)) - rollback_list.append(rollback_parallelism(workflow)) - recompute_parallelism(workflow) - except Exception: - # Something went wrong, let's rollback all changes to the workflow! - for foo in rollback_list: - foo() - raise - - -def build_args_for_workflow( - user_input: Union[List[Any], Dict[str, Any], SpatioTemporalJson], wf_inputs: List[str] -) -> OpIOType: - """ - Get user input and transform it into a dict where the keys match the workflow sources - """ - # If all the keys match, there is nothing to do - if isinstance(user_input, dict) and set(wf_inputs) == set(user_input.keys()): - return user_input - # Check if there is only one source. If that's the case, assign input to it, otherwise break - if len(wf_inputs) > 1: - raise ValueError( - "User input does not specify workflow sources and workflow has multiple sources: " - f"{', '.join(wf_inputs)}. A dictionary with matching keys is required." - ) - # Check if it's a spatiotemporal json (geom + time range) - # If that's the case we generate a DataVibe with that info - if isinstance(user_input, SpatioTemporalJson): - user_input = gen_stac_item_from_bounds( - user_input.geojson, # type: ignore - user_input.start_date, # type: ignore - user_input.end_date, # type: ignore - ) - return {wf_inputs[0]: user_input} - - -def validate_workflow_input(user_input: OpIOType, inputs_spec: TypeDictVibe): - """ - Validate workflow input by making sure user input types match the respective source types - """ - for source_name, source_type in inputs_spec.items(): - source_input = user_input[source_name] - validate_vibe_types(source_input, source_type, source_name) - - -def validate_vibe_types(source_input: InnerIOType, source_type: DataVibeType, source_name: str): - # If it's a DataVibe, we deserialize and check if the types are compatible - base_type = get_base_type(source_type) - try: - vibe_input = StacConverter().from_stac_item(deserialize_stac(source_input)) - except Exception: - raise ValueError( - "Failed to convert inputs to workflow source " - f"{source_name} of type {_type_repr(source_type)}" - ) - source_types = set(type(i) for i in ensure_list(vibe_input)) - bad_types = [t for t in source_types if not issubclass(t, base_type)] - if bad_types: - raise ValueError( - f"Workflow source {source_name} expects inputs of type {source_type}, " - f"found incompatible types: {', '.join(_type_repr(t) for t in bad_types)}" - ) - - -def patch_workflow_source(source_input: InnerIOType, workflow: Workflow, source_name: str): - # Check if input is list and type is not list - # If that's the case, try to patch the workflow with a source fan-out node - # An element in a list source is fine because we make a one element list - # in the runner automatically - source_type = workflow.inputs_spec[source_name] - if isinstance(source_input, list) and not is_container_type(source_type): - LOGGER.info(f"Input for source {source_name} is a list, trying to patch workflow") - try: - fan_out_workflow_source(workflow, source_name) # patch is done in-place - except ValueError: - raise ValueError( - f"Found list of inputs for workflow source '{source_name}' " - f"which does not support lists" - ) - - -def patch_workflow_sources(user_input: OpIOType, workflow: Workflow): - bad_sources = [] - for source_name in workflow.inputs_spec: - source_input = user_input[source_name] - try: - patch_workflow_source(source_input, workflow, source_name) - except ValueError: - bad_sources.append(source_name) - if bad_sources: - raise ValueError( - f"Found list of inputs for workflow sources {bad_sources} that do not support lists" - ) diff --git a/src/vibe_server/vibe_server/workflow/parameter.py b/src/vibe_server/vibe_server/workflow/parameter.py deleted file mode 100644 index 7a0478d9..00000000 --- a/src/vibe_server/vibe_server/workflow/parameter.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from collections import defaultdict -from typing import Any, Dict, List, Optional, Tuple, Union, cast - -from .spec_parser import ( - OperationSpec, - TaskType, - WorkflowSpec, - WorkflowSpecNode, - get_parameter_reference, -) - - -class ParameterResolver: - def __init__(self, workflows_dir: str, ops_dir: str): - self.workflows_dir = workflows_dir - self.ops_dir = ops_dir - - def resolve(self, workflow_spec: WorkflowSpec) -> Dict[str, "Parameter"]: - wf_params = self._get_wf_params(workflow_spec, "root") - return {p.name: p for p in wf_params} - - def _get_wf_params(self, workflow_spec: WorkflowSpec, task_name: str): - wf_params: List[Parameter] = [] - for k, v in workflow_spec.parameters.items(): - default = workflow_spec.default_parameters[k] - descriptions = workflow_spec.description.parameters - desc = descriptions.get(k) if descriptions is not None else None - wf_params.append( - Parameter(name=k, task=task_name, value=v, default=default, description=desc) - ) - # Get references from tasks - refs: Dict[str, List[Parameter]] = defaultdict(list) - for task_name, node in workflow_spec.tasks.items(): - for task_param in self._get_node_params(node): - ref = task_param.reference - if ref is not None: - refs[ref].append(task_param) - for wf_param in wf_params: - for ref_param in refs[wf_param.name]: - wf_param.add_child(ref_param) - return wf_params - - def _get_op_params(self, op_spec: OperationSpec, task_name: str) -> List["Parameter"]: - def foo( - params: Dict[str, Any], - descriptions: Optional[Dict[str, Any]], - defaults: Dict[str, Any], - prefix: str = "", - ): - for k, v in params.items(): - desc = descriptions.get(k) if descriptions is not None else None - default = defaults[k] - if isinstance(v, dict): - assert isinstance(desc, dict) or desc is None - assert isinstance(default, dict) - for p in foo(v, desc, default, prefix=k): - yield p - else: - assert isinstance(desc, str) or desc is None - name = f"{prefix}.{k}" if prefix else k - yield Parameter( - name=name, task=task_name, value=v, default=default, description=desc - ) - - return [ - p - for p in foo( - op_spec.parameters, op_spec.description.parameters, op_spec.default_parameters - ) - ] - - def _get_node_params(self, node: WorkflowSpecNode): - task = node.load(ops_base_dir=self.ops_dir, workflow_dir=self.workflows_dir) - if node.type == TaskType.op: - return self._get_op_params(cast(OperationSpec, task), node.task) - return self._get_wf_params(cast(WorkflowSpec, task), node.task) - - -class Parameter: - def __init__( - self, - name: str, - task: str, - value: Any, - default: Any, - description: Optional[Union[str, Dict[str, str]]], - ) -> None: - self.name = name - self.task = task - self._value = value - self._default = default - self._description = description - self.childs: List["Parameter"] = [] - - def add_child(self, child: "Parameter"): - self.childs.append(child) - - def _resolve(self, attr: str, private_attr: str): - # If our attribute is None and we have childs, lets get the default value from them - if getattr(self, private_attr) is None and self.childs: - attrs = [] - for p in self.childs: - p_attr = getattr(p, attr) - if not isinstance(p_attr, tuple): - p_attr = (p_attr,) - for i in p_attr: - if i not in attrs: - attrs.append(i) - if len(attrs) == 1: - return attrs[0] - return tuple(attrs) - return getattr(self, private_attr) - - @property - def default(self) -> Any: - return self._resolve("default", "_default") - - @property - def description(self) -> Union[str, Tuple[str], None]: - descriptions = self._resolve("description", "_description") - # Discard `None` from children and adjust accordingly - if isinstance(descriptions, tuple): - descriptions = tuple(d for d in descriptions if d is not None) - if not descriptions: # Empty set, return None - return None - if len(descriptions) == 1: - return descriptions[0] - return descriptions - - @property - def reference(self) -> Optional[str]: - return get_parameter_reference(self._value, self.task) diff --git a/src/vibe_server/vibe_server/workflow/runner/__init__.py b/src/vibe_server/vibe_server/workflow/runner/__init__.py deleted file mode 100644 index 44601725..00000000 --- a/src/vibe_server/vibe_server/workflow/runner/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from .remote_runner import RemoteWorkflowRunner -from .runner import NoOpStateChange, WorkflowCallback, WorkflowChange, WorkflowRunner - -__all__ = [ # type: ignore - NoOpStateChange, - RemoteWorkflowRunner, - WorkflowCallback, - WorkflowChange, - WorkflowRunner, -] diff --git a/src/vibe_server/vibe_server/workflow/runner/remote_runner.py b/src/vibe_server/vibe_server/workflow/runner/remote_runner.py deleted file mode 100644 index 51ee3efb..00000000 --- a/src/vibe_server/vibe_server/workflow/runner/remote_runner.py +++ /dev/null @@ -1,245 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import asyncio.queues -import logging -from collections import defaultdict -from typing import Any, Dict, List, NoReturn, Optional, TypeVar, cast -from uuid import UUID - -from vibe_common.messaging import ( - ErrorContent, - ExecuteReplyContent, - ExecuteRequestMessage, - MessageType, - OperationSpec, - WorkMessage, - WorkMessageBuilder, - send_async, -) -from vibe_common.telemetry import add_span_attributes, add_trace -from vibe_core.data.core_types import OpIOType - -from ..workflow import GraphNodeType, Workflow -from .runner import ( - CancelledOpError, - NoOpStateChange, - WorkflowCallback, - WorkflowChange, - WorkflowRunner, -) - -SLEEP_S = 0.2 -RAISE_STR = "raise" -T = TypeVar("T") - - -class MessageRouter: - def __init__(self, inqueue: "asyncio.queues.Queue[WorkMessage]"): - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.message_map: Dict[str, "asyncio.queues.Queue[WorkMessage]"] = defaultdict( - asyncio.queues.Queue - ) - self.inqueue = inqueue - self.should_stop = False - self.task = asyncio.create_task(self.route_messages()) - - def done_callback(task: Any) -> None: - self.task = None - try: - maybe_exception = task.exception() - if maybe_exception is not None: - self.logger.warning( - f"MessageRouter task {task} encountered an exception: {maybe_exception}" - ) - except (asyncio.CancelledError, asyncio.InvalidStateError): - pass - - self.task.add_done_callback(done_callback) - - async def route_messages(self): - while not self.should_stop: - try: - msg = await asyncio.wait_for(self.inqueue.get(), timeout=SLEEP_S) - self.message_map[msg.parent_id].put_nowait(msg) - self.inqueue.task_done() - except asyncio.TimeoutError: - pass - - async def get(self, request_id: str, block: bool = True) -> WorkMessage: - if block: - msg = await self.message_map[request_id].get() - else: - msg = self.message_map[request_id].get_nowait() - return msg - - def task_done(self, request_id: str) -> None: - try: - self.message_map[request_id].task_done() - except ValueError: - self.logger.exception( - "task_done() called more times than there were items in the queue. " - "This indicates a correctness issue and should be fixed. I'm ignoring " - "it for now, though." - ) - - def clear(self) -> None: - for queue in self.message_map.values(): - while not queue.empty(): - try: - queue.get_nowait() - queue.task_done() - except asyncio.QueueEmpty: - pass - - def __len__(self) -> int: - return sum([q.qsize() for q in self.message_map.values()]) + self.inqueue.qsize() - - def __del__(self): - if self.task and not self.task.done(): - self.task.cancel() - self.task = None - - -class RemoteWorkflowRunner(WorkflowRunner): - def __init__( - self, - message_router: "MessageRouter", - workflow: Workflow, - traceid: str, - update_state_callback: WorkflowCallback = NoOpStateChange, - pubsubname: Optional[str] = None, - source: Optional[str] = None, - topic: Optional[str] = None, - **kwargs: Any, - ): - super().__init__( - workflow=workflow, - update_state_callback=update_state_callback, - **kwargs, - ) - self.topic = topic - self.source = source - self.pubsubname = pubsubname - - self.message_router = message_router - self.traceid = traceid - self.id_queue_map: Dict[str, "asyncio.queues.Queue[WorkMessage]"] = {} - - def _handle_failure(self, request: ExecuteRequestMessage, reply: WorkMessage) -> NoReturn: - content = cast(ErrorContent, reply.content) - root_idx = content.evalue.rfind(RAISE_STR) - root_idx = root_idx + len(RAISE_STR) if root_idx != -1 else 0 - evalue = content.evalue[root_idx:] - error = f"{content.ename}: {evalue}" - self.logger.info( - f"Operation {reply.id} failed with error {error}. (run id {reply.run_id})." - f"Traceback: {content.traceback}" - ) - raise RuntimeError( - f"Failed to run op {request.content.operation_spec.name} in workflow run id " - f"{reply.run_id} for input with message id {request.id}. Error description: {error}." - ) - - async def _handle_ack_message(self, op_name: str, subtask_idx: int) -> None: - await self._report_state_change( - WorkflowChange.SUBTASK_RUNNING, task=op_name, subtask_idx=subtask_idx - ) - - def _process_reply(self, request: WorkMessage, reply: WorkMessage) -> OpIOType: - assert ( - reply.header.type != MessageType.execute_request - ), f"Received invalid message {reply.id}" - assert ( - reply.header.parent_id - ), f"Received invalid reply {reply.id} with empty parent_id. (run id {reply.run_id})" - if reply.header.type == MessageType.error: - self._handle_failure(cast(ExecuteRequestMessage, request), reply) - else: - content = cast(ExecuteReplyContent, reply.content) - self.logger.debug( - f"Received execute reply for run id {reply.run_id} " - f"(op name {content.cache_info.name}, op hash {content.cache_info.hash})." - ) - return content.output - - async def _build_and_process_request( - self, op: GraphNodeType, input: OpIOType, run_id: UUID, subtask_idx: int - ) -> OpIOType: - op_spec: OperationSpec = op.spec - request: ExecuteRequestMessage = cast( - ExecuteRequestMessage, - WorkMessageBuilder.build_execute_request( - run_id, - self.traceid, - op_spec, - input, - ), - ) - - failure_msg: str = ( - f"Failed to run op {op_spec.name} (subtask {subtask_idx})" - f"with execution request id {request.id}, run id {run_id}." - ) - if all([e is not None for e in (self.source, self.pubsubname, self.topic)]): - await send_async(request, self.source, self.pubsubname, self.topic) # type: ignore - - while True: - if self.is_cancelled: - raise CancelledOpError() - - try: - reply = await self._wait_for_reply(request) - except CancelledOpError: - raise - except Exception as e: - raise RuntimeError(failure_msg) from e - - if reply.header.type == MessageType.ack: - await self._handle_ack_message(op.name, subtask_idx) - continue - elif reply.header.type in (MessageType.execute_reply, MessageType.error): - try: - return self._process_reply(request, reply) - finally: - self.message_router.task_done(request.id) - else: - raise RuntimeError(f"Received unsupported message {reply}. Aborting execution.") - - async def _wait_for_reply(self, request: ExecuteRequestMessage) -> WorkMessage: - while True: - try: - return await self.message_router.get(request.id, block=False) - except asyncio.QueueEmpty: - await asyncio.sleep(SLEEP_S) - if self.is_cancelled: - raise CancelledOpError() - - @add_trace - async def _run_op_impl( - self, op: GraphNodeType, input: OpIOType, run_id: UUID, subtask_idx: int - ) -> OpIOType: - try: - add_span_attributes({"op_name": op.spec.name}) - return await self._build_and_process_request(op, input, run_id, subtask_idx) - except CancelledOpError: - self.logger.debug( - f"Did not try to run operation {op.name} for parent event {self.traceid}" - " because the workflow was cancelled" - ) - raise - - @add_trace - async def _run_ops(self, ops: List[GraphNodeType], run_id: UUID): - add_span_attributes({"workflow_id": str(run_id)}) - await super()._run_ops(ops, run_id) - if len(self.message_router): - self.logger.warning( - f"Finishing workflow level {ops} execution with messages still in queue " - f"(run id: {run_id})." - ) - self.message_router.clear() - - def __del__(self): - self.message_router.should_stop = True diff --git a/src/vibe_server/vibe_server/workflow/runner/runner.py b/src/vibe_server/vibe_server/workflow/runner/runner.py deleted file mode 100644 index 9443cf2a..00000000 --- a/src/vibe_server/vibe_server/workflow/runner/runner.py +++ /dev/null @@ -1,328 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import gc -import logging -from abc import ABC, abstractmethod -from collections import defaultdict -from enum import auto -from typing import Any, Awaitable, Callable, Dict, Iterable, List, Protocol, Set, Tuple, cast -from uuid import UUID, uuid4 - -from fastapi_utils.enums import StrEnum - -from vibe_core.data.core_types import OpIOType -from vibe_core.data.utils import is_vibe_list -from vibe_core.utils import ensure_list - -from ..workflow import DESTINATION, LABEL, EdgeLabel, EdgeType, GraphNodeType, InputFanOut, Workflow -from .task_io_handler import TaskIOHandler, WorkflowIOHandler - - -class CancelledOpError(Exception): - pass - - -class WorkflowCallback(Protocol): - async def __call__(self, change: "WorkflowChange", **kwargs: Any) -> None: - pass - - -async def NoOpStateChange(change: "WorkflowChange", **kwargs: Any) -> None: - return None - - -class WorkflowChange(StrEnum): - WORKFLOW_STARTED = cast("WorkflowChange", auto()) - WORKFLOW_FINISHED = cast("WorkflowChange", auto()) - WORKFLOW_FAILED = cast("WorkflowChange", auto()) - WORKFLOW_CANCELLED = cast("WorkflowChange", auto()) - TASK_STARTED = cast("WorkflowChange", auto()) - SUBTASK_QUEUED = cast("WorkflowChange", auto()) - SUBTASK_RUNNING = cast("WorkflowChange", auto()) - SUBTASK_FINISHED = cast("WorkflowChange", auto()) - SUBTASK_FAILED = cast("WorkflowChange", auto()) - SUBTASK_PENDING = cast("WorkflowChange", auto()) - - -class OpParallelism: - parallel_edges: Set[EdgeType] = {EdgeType.parallel, EdgeType.scatter} - - def __init__( - self, - in_edges: List[EdgeLabel], - op: GraphNodeType, - run_task: Callable[[GraphNodeType, OpIOType, UUID, int], Awaitable[OpIOType]], - update_state_callback: WorkflowCallback = NoOpStateChange, - ): - self.op = op - self.in_edges = in_edges - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - self.run_task = run_task - self.update_state = update_state_callback - - def is_parallel(self, edge: EdgeLabel) -> bool: - return edge.type in self.parallel_edges - - def fan_in(self, inputs: List[OpIOType]) -> OpIOType: - if any(self.is_parallel(edge) for edge in self.in_edges): - # Op is running in parallel so we collate the outputs - outputs: OpIOType = {k: [] for k in inputs[0]} - for input in inputs: - for key, value in outputs.items(): - cast(List[Dict[str, Any]], value).append(cast(Dict[str, Any], input[key])) - return outputs - # Op is single, so we just return the output - if len(inputs) > 1: - raise RuntimeError(f"Expected a single input in the list, found {len(inputs)}") - return inputs[0] - - @staticmethod - def align(**kwargs: Any) -> Iterable[Tuple[Any, ...]]: - input_lens = {n: len(arg) for n, arg in kwargs.items() if len(arg) != 1} - lens = set(input_lens.values()) - if len(lens) > 1: - error_str = ", ".join(f"'{k}': {v}" for k, v in input_lens.items()) - raise ValueError(f"Unable to pair sequences of different sizes - {error_str}") - for i in range(1 if len(lens) == 0 else max(lens)): - yield tuple((arg[i] if len(arg) > 1 else arg[0]) for arg in kwargs.values()) - - def fan_out(self, op_input: OpIOType) -> Iterable[Tuple[OpIOType, ...]]: - parallel = {edge.dstport for edge in self.in_edges if self.is_parallel(edge)} - try: - aligned = self.align( - **{k: ([vv for vv in v] if k in parallel else [v]) for k, v in op_input.items()} - ) - for input in aligned: - yield tuple( - cast(OpIOType, ensure_list(i)) - if is_vibe_list(self.op.spec.inputs_spec[name]) - else i - for i, name in zip(input, op_input) - ) - except ValueError as e: - raise ValueError(f"Unable to fan-out input for op {self.op.name}: {e}") from e - - async def run(self, op_input: OpIOType, run_id: UUID) -> List[OpIOType]: - if isinstance(self.op.spec, InputFanOut): - self.logger.info(f"Bypassing input fan-out node {self.op.name}") - await self.update_state(WorkflowChange.TASK_STARTED, task=self.op.name, num_subtasks=1) - await self.update_state( - WorkflowChange.SUBTASK_FINISHED, task=self.op.name, subtask_idx=0 - ) - return [{self.op.spec.output_port: op_input[self.op.spec.input_port]}] - inputs: List[OpIOType] = [ - {k: v for k, v in zip(op_input.keys(), input)} for input in self.fan_out(op_input) - ] - await self.update_state( - WorkflowChange.TASK_STARTED, task=self.op.name, num_subtasks=len(inputs) - ) - self.logger.info( - f"Will run op {self.op.name} with {len(inputs)} different input(s). " - f"(run id: {run_id})" - ) - - async def sub_run(args: Tuple[int, OpIOType]) -> OpIOType: - idx, input = args - try: - self.logger.debug( - f"Executing task {idx + 1}/{len(inputs)} of op {self.op.name}. " - f"(run id: {run_id})" - ) - await self.update_state( - WorkflowChange.SUBTASK_QUEUED, task=self.op.name, subtask_idx=idx - ) - ret = await self.run_task(self.op, input, run_id, idx) - self.logger.debug( - f"Successfully executed task {idx + 1}/{len(inputs)} of op {self.op.name}. " - f"(run id: {run_id})" - ) - await self.update_state( - WorkflowChange.SUBTASK_FINISHED, task=self.op.name, subtask_idx=idx - ) - return ret - except Exception as e: - self.logger.exception( - f"Failed to execute task {idx + 1}/{len(inputs)} of op {self.op.name}. " - f"(run id: {run_id})" - ) - await self.update_state( - WorkflowChange.SUBTASK_FAILED, - task=self.op.name, - subtask_idx=idx, - reason=f"{e.__class__.__name__}: {e}", - ) - raise - - results = await asyncio.gather(*[sub_run(args) for args in enumerate(inputs)]) - return results - - -class WorkflowRunner(ABC): - workflow: Workflow - update_state: WorkflowCallback - logger: logging.Logger - io_mapper: WorkflowIOHandler - io_handler: TaskIOHandler - is_cancelled: bool - - def __init__( - self, - workflow: Workflow, - io_mapper: WorkflowIOHandler, - update_state_callback: WorkflowCallback = NoOpStateChange, - **_: Any, - ): - self.workflow = workflow - self.update_state = update_state_callback - self.io_mapper = io_mapper - self.is_cancelled = False - - self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") - - async def cancel(self): - await self._report_state_change(WorkflowChange.WORKFLOW_CANCELLED) - self.is_cancelled = True - - @abstractmethod - async def _run_op_impl( - self, op: GraphNodeType, input: OpIOType, run_id: UUID, subtask_idx: int - ) -> OpIOType: - raise NotImplementedError - - async def _run_graph_impl(self, input: OpIOType, run_id: UUID) -> OpIOType: - self.io_handler.add_sources(input) - for ops in self.workflow: - self.logger.info(f"Will run ops {ops} in parallel. (run id: {run_id})") - await self._run_ops(ops, run_id) - if not self.is_cancelled: - return self.io_handler.retrieve_sinks() - - # Workflow was cancelled - return {} - - async def _run_ops(self, ops: List[GraphNodeType], run_id: UUID): - try: - op_parallelism = {} - tasks: List[Tuple[GraphNodeType, "asyncio.Task[List[OpIOType]]"]] = [] - for op in ops: - op_parallelism[op.name] = OpParallelism( - [e[LABEL] for e in self.workflow.edges if e[DESTINATION] == op], - op, - self._run_op_impl, - update_state_callback=self.update_state, - ) - task = asyncio.create_task( - self._submit_op(op, run_id, op_parallelism[op.name]), name=op.name - ) - tasks.append((op, task)) - await self._monitor_futures(tasks, run_id, op_parallelism) - for _, task in tasks: - if not task.done(): - task.cancel() - del tasks - finally: - # The garbage collector seems to be a bit lazy, so we need to force it to collect - # anything that's been leftover from previous executions - collected = gc.collect() - self.logger.debug( - f"Garbage collector collected {collected} objects after running ops {ops} " - f"in run {run_id}." - ) - - async def _monitor_futures( - self, - tasks: List[Tuple[GraphNodeType, "asyncio.Task[List[OpIOType]]"]], - run_id: UUID, - op_parallelism: Dict[str, OpParallelism], - ): - op_outputs: Dict[GraphNodeType, List[OpIOType]] = defaultdict(list) - gather = asyncio.gather(*[t[1] for t in tasks], return_exceptions=True) - await gather - for op, task in tasks: - op_name = task.get_name() - try: - assert op_name is not None - result = await task - if isinstance(result, Exception): - raise result - op_outputs[op].extend(result) - except CancelledOpError: - return - except Exception as e: - gather.cancel() - await self._fail_workflow(e, run_id) - raise - - for op, op_result in op_outputs.items(): - self.io_handler.add_result(op, op_parallelism[op.name].fan_in(op_result)) - - async def _fail_workflow(self, e: Exception, run_id: UUID): - self.logger.exception(f"Failed to run workflow {self.workflow.name}. (run id: {run_id})") - await self._report_state_change(WorkflowChange.WORKFLOW_FAILED, reason=str(e)) - - @classmethod - def build( - cls, - workflow: Workflow, - **kwargs: Any, - ) -> "WorkflowRunner": - return cls(workflow, **kwargs) - - async def _submit_op( - self, - op: GraphNodeType, - run_id: UUID, - parallelism: OpParallelism, - ) -> List[OpIOType]: - if self.is_cancelled: - # Exit early, as this run has been cancelled - return [{}] - input = self.io_handler.retrieve_input(op) - try: - return await parallelism.run(input, run_id) - except CancelledOpError: - return [{}] - except Exception as e: - await self._fail_workflow(e, run_id) - raise - - async def _run_graph(self, input: OpIOType, run_id: UUID) -> OpIOType: - self.logger.debug(f"Starting execution of workflow {self.workflow.name} (run id: {run_id})") - tasks = [task.name for level in self.workflow for task in level] - await self._report_state_change(WorkflowChange.WORKFLOW_STARTED, tasks=tasks) - output = self._run_graph_impl(input, run_id) - # Mark workflow as cancelled if needed - # Do not mark workflow as done, as it will be marked as such after the outputs are updated - # in the statestore - if self.is_cancelled: - await self._report_state_change(WorkflowChange.WORKFLOW_CANCELLED) - self.logger.debug(f"Finished execution of workflow {self.workflow.name} (run id: {run_id})") - - return await output - - async def run(self, input_items: OpIOType, run_id: UUID = uuid4()) -> OpIOType: - try: - # Initializing task IO handler for this specific run. - self.io_handler = TaskIOHandler(self.workflow) - output = await self._run_graph(self.io_mapper.map_input(input_items), run_id) - return self.io_mapper.map_output(output) if not self.is_cancelled else {} - except Exception as e: - self.logger.exception(f"Failed to run workflow {self.workflow.name} (run id: {run_id})") - await self._report_state_change(WorkflowChange.WORKFLOW_FAILED, reason=str(e)) - raise - finally: - del self.io_handler - - async def _report_state_change( - self, - change: WorkflowChange, - **kwargs: Any, - ) -> None: - try: - await self.update_state(change, **kwargs) - except Exception: - logging.exception( - f"Failed to update workflow/operation state with change {change}. Ignoring." - ) diff --git a/src/vibe_server/vibe_server/workflow/runner/task_io_handler.py b/src/vibe_server/vibe_server/workflow/runner/task_io_handler.py deleted file mode 100644 index de1a8a2a..00000000 --- a/src/vibe_server/vibe_server/workflow/runner/task_io_handler.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from copy import copy -from typing import Dict, List - -from vibe_core.data.core_types import InnerIOType, OpIOType - -from ..workflow import GraphNodeType, Workflow - - -class TaskIOHandler: - IoMapType = Dict[GraphNodeType, Dict[str, List[InnerIOType]]] - input_map: IoMapType - output_map: IoMapType - source_map: IoMapType - sink_map: IoMapType - - @staticmethod - def _update_dict(task: GraphNodeType, input_name: str, d: IoMapType, value: List[InnerIOType]): - if task in d: - d[task][input_name] = value - else: - d[task] = {input_name: value} - - def _attach_input_port(self, node: GraphNodeType, input_port: str, io: List[InnerIOType]): - self.input_map.setdefault(node, {}) - node_inputs = self.input_map[node] - if input_port in node_inputs: - raise ValueError( - f"Tried to attach input port {node.name}.{input_port} but it is already attached" - ) - node_inputs[input_port] = io - - def _parse_workflow(self, workflow: Workflow): - io: List[InnerIOType] - for origin, destination, label in workflow.edges: - if origin in self.output_map and label.srcport in self.output_map[origin]: - io = self.output_map[origin][label.srcport] - else: - io = [] - self._update_dict(origin, label.srcport, self.output_map, io) - self._attach_input_port(destination, label.dstport, io) - - for sink, ports in workflow.sinks.items(): - for port in ports: - if sink in self.output_map and port in self.output_map[sink]: - # sink already exists as input to another task - io = self.output_map[sink][port] - else: - # new output that is a sink only - io = [] - self._update_dict(sink, port, self.output_map, io) - self._update_dict(sink, port, self.sink_map, io) - - for source, ports in workflow.sources.items(): - for port in ports: - io = [] - self._attach_input_port(source, port, io) - self._update_dict(source, port, self.source_map, io) - - def __init__(self, workflow: Workflow): - self.input_map = {} - self.output_map = {} - self.sink_map = {} - self.source_map = {} - self._parse_workflow(workflow) - - def add_result(self, task: GraphNodeType, value: OpIOType): - for output_name, result in value.items(): - # Calling `get` here may create a new dict/list but, if it is new, - # it won't be consumed by any other task, or sink - io = self.output_map.get(task, {}).get(output_name, []) - if len(io) != 0: - raise RuntimeError(f"Repeated write to task '{task}' output '{output_name}'.") - io.append(result) - - def retrieve_input(self, task: GraphNodeType) -> OpIOType: - input_dict: OpIOType = {} - for kw_name, input_value in self.input_map[task].items(): - input_dict[kw_name] = copy(input_value[0]) - - return input_dict - - def add_sources(self, values: OpIOType): - if len(values) != sum([len(t) for t in self.source_map.values()]): - raise ValueError("Tried to add different number of values to workflow") - - for task, ports in self.source_map.items(): - for port in ports: - key = task.name + "." + port - try: - value = values.pop(key) - ports[port].append(value) - except KeyError: - raise ValueError(f"Unable to find source {key} for running workflow") - - if values: - raise ValueError(f"Tried to add unknown values {values.keys()} to workflow") - - def retrieve_sinks(self) -> OpIOType: - output_dict: OpIOType = {} - for task, sink_outputs in self.sink_map.items(): - for task_output_name, sink_output in sink_outputs.items(): - output_dict[task.name + "." + task_output_name] = copy(sink_output[0]) - - return output_dict - - def __del__(self): - for mapping in (self.input_map, self.output_map, self.sink_map, self.source_map): - for ports in mapping.values(): - for port in ports: - try: - ports[port].pop() - except IndexError: - break - del self.input_map - del self.output_map - del self.sink_map - del self.source_map - - -class WorkflowIOHandler: - def __init__(self, workflow: Workflow): - self.workflow = workflow - - def map_input(self, input_items: OpIOType) -> OpIOType: - return { - node: input_items[key] - for key, nodes in self.workflow.source_mappings.items() - for node in nodes - } - - def map_output(self, output_items: OpIOType) -> OpIOType: - return {key: output_items[value] for key, value in self.workflow.sink_mappings.items()} diff --git a/src/vibe_server/vibe_server/workflow/spec_parser.py b/src/vibe_server/vibe_server/workflow/spec_parser.py deleted file mode 100644 index 4cc634ac..00000000 --- a/src/vibe_server/vibe_server/workflow/spec_parser.py +++ /dev/null @@ -1,365 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -import re -from copy import deepcopy -from dataclasses import dataclass -from enum import auto -from re import Pattern -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast - -import yaml -from fastapi_utils.enums import StrEnum - -from vibe_common.constants import DEFAULT_OPS_DIR -from vibe_common.schemas import OperationParser, OperationSpec, update_parameters -from vibe_core.datamodel import TaskDescription -from vibe_core.utils import ( - MermaidVerticesMap, - build_mermaid_edge, - draw_mermaid_diagram, - rename_keys, -) - -HERE = os.path.dirname(os.path.abspath(__file__)) -DEV_WORKFLOW_DIR = os.path.abspath(os.path.join(HERE, "..", "..", "..", "..", "workflows")) -RUN_WORKFLOW_DIR = os.path.join("/", "app", "workflows") - -PARAM_PATTERN: "Pattern[str]" = re.compile(r"@from\((.*)\)") - - -def get_workflow_dir() -> str: - if os.path.exists(DEV_WORKFLOW_DIR): - return DEV_WORKFLOW_DIR - else: - return RUN_WORKFLOW_DIR - - -def get_parameter_reference(param: Any, task_name: str) -> Optional[str]: - if isinstance(param, str) and (match := re.match(PARAM_PATTERN, param)): - param_msg = f"task '{task_name}', parameter '{param}'" - if len(g := match.groups()) > 1: - raise ValueError(f"Failed to parse parameter reference '{param}' in {param_msg}") - ref_name = g[0] - if not ref_name: - raise ValueError(f"Found empty parameter reference in {param_msg}") - return ref_name - return None - - -def split_task_name_port(edge: str) -> Tuple[str, str]: - spllited_edge = edge.split(".") - return tuple(spllited_edge[0:1] + spllited_edge[-1:]) - - -SpecNodeType = Union[OperationSpec, "WorkflowSpec"] - - -class TaskType(StrEnum): - op = auto() - workflow = auto() - - -@dataclass -class WorkflowSpecEdge: - origin: str - destination: List[str] - - -@dataclass -class WorkflowSpecNode: - task: str - type: TaskType - parameters: Dict[str, Any] - op_dir: Optional[str] # only exists when this is an op - parent: str # the workflow that gave rise to this - - def load( - self, ops_base_dir: str = DEFAULT_OPS_DIR, workflow_dir: str = get_workflow_dir() - ) -> SpecNodeType: - if self.type == TaskType.op: - return self._load_op(ops_base_dir) - else: - return self._load_workflow(ops_base_dir, workflow_dir) - - def _load_op(self, ops_base_dir: str) -> OperationSpec: - assert isinstance(self.op_dir, str) - return OperationParser.parse( - os.path.abspath(os.path.join(ops_base_dir, self.op_dir, f"{self.task}.yaml")), - self.parameters, - ) - - def _load_workflow(self, ops_base_dir: str, workflow_dir: str) -> "WorkflowSpec": - return WorkflowParser.parse( - os.path.abspath(os.path.join(workflow_dir, f"{self.task}.yaml")), - ops_base_dir, - workflow_dir, - self.parameters, - ) - - def to_dict(self) -> Dict[str, Any]: - ret = { - ("op" if self.type == TaskType.op else "workflow"): self.task, - "parameters": self.parameters, - } - if self.op_dir is not None: - ret["op_dir"] = self.op_dir - return ret - - -@dataclass -class WorkflowSpec: - name: str - sources: Dict[str, List[str]] - sinks: Dict[str, str] - tasks: Dict[str, WorkflowSpecNode] - edges: List[WorkflowSpecEdge] - parameters: Dict[str, Any] - default_parameters: Dict[str, Any] - description: TaskDescription - ops_dir: str - workflows_dir: str - - def __post_init__(self): - for i, e in enumerate((e for e in self.edges)): - if isinstance(e, dict): - self.edges[i] = WorkflowSpecEdge(**e) - for k, v in zip(self.tasks.keys(), (v for v in self.tasks.values())): - if isinstance(v, dict): - self.tasks[k] = WorkflowSpecNode(**v) - if isinstance(self.description, dict): - self.description = TaskDescription(**self.description) - for task_name, node_spec in self.tasks.items(): - if task_name in self.description.task_descriptions: - continue - spec = node_spec.load(self.ops_dir, self.workflows_dir) - if isinstance(spec.description, dict): - spec.description = TaskDescription(**spec.description) - self.description.task_descriptions[task_name] = spec.description.short_description - - def _build_vertices_map(self) -> MermaidVerticesMap: - vertices = MermaidVerticesMap(sources={}, sinks={}, tasks={}) - # Create a dictionary to map sources, sinks, and tasks to vertex ids - for i, source in enumerate(self.sources.keys()): - vertices.sources[source] = f"inp{i+1}>{source}]" - for i, sink in enumerate(self.sinks.keys()): - vertices.sinks[sink] = f"out{i+1}>{sink}]" - for i, task in enumerate(self.tasks.keys()): - vertices.tasks[task] = f"tsk{i+1}" + "{{" + task + "}}" - return vertices - - def to_mermaid(self) -> str: - vertices_map: MermaidVerticesMap = self._build_vertices_map() - - # Create edges between tasks - edges = [ - build_mermaid_edge( - split_task_name_port(edge.origin), - split_task_name_port(destination), - vertices_map.tasks, - vertices_map.tasks, - ) - for edge in self.edges - for destination in edge.destination - ] - - # Create edges between sources and tasks - edges += [ - build_mermaid_edge( - (source_name, ""), - split_task_name_port(source_port), - vertices_map.sources, - vertices_map.tasks, - ) - for source_name, source_ports in self.sources.items() - for source_port in source_ports - ] - - # Create edges between tasks and sinks - edges += [ - build_mermaid_edge( - split_task_name_port(sink_port), - (sink_name, ""), - vertices_map.tasks, - vertices_map.sinks, - ) - for sink_name, sink_port in self.sinks.items() - ] - - return draw_mermaid_diagram(vertices_map, edges) - - -class WorkflowParser: - required_fields: List[str] = "name sources sinks tasks".split() - optional_fields: List[str] = "parameters default_parameters edges description".split() - op_spec_fields: List[str] = "op parameters op_dir".split() - wf_spec_fields: List[str] = "workflow parameters".split() - - @classmethod - def _load_workflow(cls, yamlpath: str) -> Dict[str, Any]: - with open(yamlpath) as fp: - data = yaml.safe_load(fp) - - return data - - @classmethod - def _parse_nodespec( - cls, nodespec: Dict[str, Union[str, Dict[str, Any]]], workflow_name: str, task_name: str - ) -> WorkflowSpecNode: - if "workflow" in nodespec: - type = TaskType.workflow - possible_fields = cls.wf_spec_fields - elif "op" in nodespec: - type = TaskType.op - possible_fields = cls.op_spec_fields - else: - raise ValueError(f"Task specification is missing fields 'op' or 'workflow': {nodespec}") - - task = nodespec[type] - check_config_fields(nodespec, possible_fields, "Task", task_name) - - # Check field types - if not isinstance(task, str): - raise TypeError(f"'{type}' field of task {task_name} is not a string") - if "parameters" in nodespec and not isinstance(nodespec["parameters"], dict): - raise TypeError(f"'parameters' field of task {task_name} is not a dictionary") - if "op_dir" in nodespec and not isinstance(nodespec["op_dir"], str): - raise TypeError(f"'op_dir' field of task {task_name} is not a dictionary") - - return WorkflowSpecNode( - task=task, - type=type, - parameters=cast(Dict[str, Any], nodespec.get("parameters", {})), - op_dir=cast(str, nodespec.get("op_dir", task)), - parent=workflow_name, - ) - - @classmethod - def _parse_edgespec(cls, edgespec: Dict[str, Union[str, List[str]]]) -> WorkflowSpecEdge: - return WorkflowSpecEdge( - origin=cast(str, edgespec["origin"]), - destination=cast(List[str], edgespec["destination"]), - ) - - @classmethod - def _workflow_spec_from_yaml_dict( - cls, - workflow_dict: Dict[str, Any], - ops_dir: str, - workflows_dir: str, - parameters: Dict[str, Any], - default_parameters: Dict[str, Any], - ): - workflow_name = workflow_dict.get("name", "UNAMED") - for field in cls.required_fields: - if field not in workflow_dict: - raise ValueError( - f"Workflow specification '{workflow_name}' is missing required field '{field}'" - ) - check_config_fields( - workflow_dict, cls.required_fields + cls.optional_fields, "Workflow", workflow_name - ) - try: - edges: Optional[List[Dict[str, Union[str, List[str]]]]] = workflow_dict.get("edges", []) - if edges is None: - edges = [] - if not isinstance(edges, list): - raise TypeError(f"Expected edges to be a list, found {type(edges)}") - yaml_description: Dict[str, Any] = workflow_dict.get("description", {}) - if yaml_description is None: - yaml_description = {} - yaml_description = rename_keys( - yaml_description, {"sources": "inputs", "sinks": "outputs"} - ) - description: TaskDescription = TaskDescription( - **{k: v for k, v in yaml_description.items() if v is not None} - ) - return WorkflowSpec( - name=workflow_dict["name"], - sources=workflow_dict["sources"], - sinks=workflow_dict["sinks"], - tasks={ - k: cls._parse_nodespec(v, workflow_name, k) - for k, v in workflow_dict["tasks"].items() - }, - edges=[cls._parse_edgespec(e) for e in edges], - parameters=parameters, - default_parameters=default_parameters, - description=description, - ops_dir=ops_dir, - workflows_dir=workflows_dir, - ) - except KeyError as e: - raise ValueError(f"Workflow spec {workflow_dict} is missing field {e}") from e - - @classmethod - def parse_dict( - cls, - workflow_dict: Dict[str, Any], - ops_dir: str = DEFAULT_OPS_DIR, - workflows_dir: str = get_workflow_dir(), - parameters_override: Optional[Dict[str, Any]] = None, - ) -> "WorkflowSpec": - params = workflow_dict.get("parameters", {}) - if params is None: - params = {} - workflow_dict["default_parameters"] = deepcopy(params) - if parameters_override is not None: - params = update_parameters(params, parameters_override) - workflow_dict["parameters"] = params - try: - # workflow_dict is a WorkflowSpec that was serialized to a dict - return WorkflowSpec(**workflow_dict) - except TypeError: - # workflow_dict was loaded from a YAML - return cls._workflow_spec_from_yaml_dict( - workflow_dict, - ops_dir, - workflows_dir, - workflow_dict["parameters"], - workflow_dict["default_parameters"], - ) - - @classmethod - def parse( - cls, - workflow_name: str, - ops_dir: str = DEFAULT_OPS_DIR, - workflows_dir: str = get_workflow_dir(), - parameters_override: Optional[Dict[str, Any]] = None, - ) -> "WorkflowSpec": - data = cls._load_workflow(workflow_name) - return cls.parse_dict( - data, - ops_dir, - workflows_dir, - parameters_override, - ) - - -def parse_edge_string(edge_string: str, maxsplit: int = 1) -> Tuple[str, str]: - return ( - ".".join(edge_string.split(".", maxsplit=maxsplit)[:-1]), - edge_string.split(".", maxsplit=maxsplit)[-1], - ) - - -def check_config_fields( - fields: Iterable[str], accepted_fields: List[str], config_type: str, config_name: str -): - bad_fields = [field for field in fields if field not in accepted_fields] - if bad_fields: - bad_fields_str = ", ".join([f"'{field}'" for field in bad_fields]) - s = "s" if len(bad_fields) > 1 else "" - raise ValueError( - f"{config_type} spec '{config_name}' contains unknown field{s} {bad_fields_str}" - ) - - -def flat_params(params: Dict[str, Any]): - for param in params.values(): - if isinstance(param, dict): - yield from flat_params(param) - else: - yield param diff --git a/src/vibe_server/vibe_server/workflow/spec_validator.py b/src/vibe_server/vibe_server/workflow/spec_validator.py deleted file mode 100644 index d8a812f6..00000000 --- a/src/vibe_server/vibe_server/workflow/spec_validator.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import warnings -from typing import List - -from vibe_common.schemas import OperationSpec - -from .parameter import ParameterResolver -from .spec_parser import ( - SpecNodeType, - WorkflowSpec, - flat_params, - get_parameter_reference, - parse_edge_string, -) - - -class WorkflowSpecValidator: - @classmethod - def _validate_node_exists(cls, spec: WorkflowSpec, nodename: str, type: str) -> bool: - if nodename not in spec.tasks: - raise ValueError( - f"Workflow {spec.name} specifies node {nodename} as {type}, but it doesn't exist" - ) - return True - - @classmethod - def _validate_sources(cls, spec: WorkflowSpec) -> bool: - mapping_error = "Sources field must be a mapping between strings and lists of strings" - if not isinstance(spec.sources, dict): - raise ValueError(mapping_error) - else: - for k, v in spec.sources.items(): - if not (isinstance(k, str) and isinstance(v, list)): - raise ValueError(mapping_error) - - if len(spec.sources) == 0: - raise ValueError(f"There must be at least one source in workflow spec {spec.name}.") - - for source_name, source_ports in spec.sources.items(): - if len(source_ports) == 0: - raise ValueError( - f"Source {source_name} must be associated with at least " - f"one task input in workflow spec {spec.name}." - ) - - return cls._validate_node_list( - spec, [e for v in spec.sources.values() for e in v], "source" - ) - - @classmethod - def _validate_sinks(cls, spec: WorkflowSpec) -> bool: - mapping_error = "Sinks field must be a mapping of strings" - if not isinstance(spec.sinks, dict): - raise ValueError(mapping_error) - else: - for k, v in spec.sinks.items(): - if not (isinstance(k, str) and isinstance(v, str)): - raise ValueError(mapping_error) - - if len(spec.sinks) == 0: - warnings.warn( - f"Workflow {spec.name} has no sinks. Is it being used for side-effects only?" - ) - - return cls._validate_node_list(spec, [v for v in spec.sinks.values()], "sink") - - @classmethod - def _validate_node_list(cls, spec: WorkflowSpec, ref: List[str], type: str) -> bool: - for thing in ref: - nodename, _ = parse_edge_string(thing) - cls._validate_node_exists(spec, nodename, type) - return True - - @classmethod - def _port_exists(cls, port: str, node: SpecNodeType) -> bool: - if isinstance(node, OperationSpec): - return port in node.inputs_spec or port in node.output_spec - return port in node.sources or port in node.sinks - - @classmethod - def _validate_workflow_without_edges(cls, workflow_spec: WorkflowSpec) -> bool: - sink_nodes = set([parse_edge_string(s)[0] for s in workflow_spec.sinks.values()]) - source_nodes = set( - [parse_edge_string(ss)[0] for s in workflow_spec.sources.values() for ss in s] - ) - task_nodes = workflow_spec.tasks - if not len(task_nodes) == len(sink_nodes) == len(source_nodes): - raise ValueError( - "The number of sink and source nodes should equal the number of tasks " - "when defining a workflow without edges." - ) - # "Single"-operation workflows aren't required to have edges - workflow_spec.edges = [] - return True - - @classmethod - def _validate_edges(cls, workflow_spec: WorkflowSpec) -> bool: - if not workflow_spec.edges: - cls._validate_workflow_without_edges(workflow_spec) - if not isinstance(workflow_spec.edges, list): - raise TypeError(f"Edges of workflow {workflow_spec.name} are not in a list.") - source_ports = [port for source in workflow_spec.sources.values() for port in source] - for edge in workflow_spec.edges: - if not isinstance(edge.destination, list): - raise TypeError(f"Destination of edge {edge} is not a list") - for source in source_ports: - if source in edge.destination: - raise ValueError( - f"Source {source} is also a destination of edge " - f"{edge.origin} -> {source}" - ) - cls._validate_node_list(workflow_spec, [edge.origin], "edge origin") - cls._validate_node_list(workflow_spec, edge.destination, "edge destination") - return True - - @classmethod - def _validate_parameter_references(cls, workflow_spec: WorkflowSpec): - """ - Validate that all defined workflow parameters are used in tasks and that all parameter - references exist - """ - - param_references = { - get_parameter_reference(v, task_name) - for task_name, task in workflow_spec.tasks.items() - for v in flat_params(task.parameters) - } - param_references.discard(None) - bad_params = [param for param in workflow_spec.parameters if param not in param_references] - bad_references = {ref for ref in param_references if ref not in workflow_spec.parameters} - if not (bad_params or bad_references): - return - error_msg = [] - for msg, bad_stuff in zip( - ( - "Workflow parameter{s} {bad_stuff_str} {is_are} not mapped to any task parameters", - "Task parameters reference undefined workflow parameter{s} {bad_stuff_str}", - ), - (bad_params, bad_references), - ): - if bad_stuff: - bad_stuff_str = ", ".join([f"'{i}'" for i in bad_stuff]) - s = "s" if len(bad_stuff) > 1 else "" - is_are = "are" if len(bad_stuff) > 1 else "is" - error_msg.append(msg.format(bad_stuff_str=bad_stuff_str, s=s, is_are=is_are)) - raise ValueError(". ".join(error_msg)) - - @classmethod - def _validate_parameter_defaults(cls, workflow_spec: WorkflowSpec): - resolver = ParameterResolver(workflow_spec.workflows_dir, workflow_spec.ops_dir) - params = resolver.resolve(workflow_spec) - bad_params = [k for k, v in params.items() if isinstance(v.default, tuple)] - if bad_params: - param_names = ", ".join([f"'{p}'" for p in bad_params]) - s = "s" if len(bad_params) > 1 else "" - s_ = "" if len(bad_params) > 1 else "s" - raise ValueError( - f"Workflow parameter{s} {param_names} map{s_} to task parameters with different " - "default values. Please define a default value in the workflow." - ) - - @classmethod - def _validate_parameters(cls, workflow_spec: WorkflowSpec): - cls._validate_parameter_references(workflow_spec) - cls._validate_parameter_defaults(workflow_spec) - - @classmethod - def validate(cls, workflow_spec: WorkflowSpec) -> WorkflowSpec: - cls._validate_sources(workflow_spec) - cls._validate_sinks(workflow_spec) - cls._validate_edges(workflow_spec) - cls._validate_parameters(workflow_spec) - - for task in workflow_spec.tasks.values(): - spec = task.load(workflow_spec.ops_dir, workflow_spec.workflows_dir) - if isinstance(spec, WorkflowSpec): - if spec.name == workflow_spec.name: - raise ValueError( - f"Recursive definition of workflow {workflow_spec.name} is not supported." - ) - cls.validate(spec) - - return workflow_spec diff --git a/src/vibe_server/vibe_server/workflow/workflow.py b/src/vibe_server/vibe_server/workflow/workflow.py deleted file mode 100644 index a4398b89..00000000 --- a/src/vibe_server/vibe_server/workflow/workflow.py +++ /dev/null @@ -1,637 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -import re -from collections import defaultdict -from copy import deepcopy -from enum import IntEnum -from re import Pattern -from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Type, TypeVar, cast - -from vibe_common.constants import DEFAULT_OPS_DIR -from vibe_common.schemas import EntryPointDict, OperationSpec -from vibe_core.data.core_types import BaseVibe, DataVibeType, TypeDictVibe, UnresolvedDataVibe -from vibe_core.data.utils import ( - get_base_type, - get_most_specific_type, - is_container_type, - is_vibe_list, -) -from vibe_core.datamodel import TaskDescription - -from . import get_workflow_dir -from .graph import Edge, Graph -from .spec_parser import ( - SpecNodeType, - WorkflowParser, - WorkflowSpec, - WorkflowSpecEdge, - WorkflowSpecNode, - get_parameter_reference, - parse_edge_string, -) -from .spec_validator import WorkflowSpecValidator - -ORIGIN = 0 -DESTINATION = 1 -LABEL = 2 -T = TypeVar("T", bound=BaseVibe) - - -class InputFanOut(OperationSpec): - input_port: str = "input" - output_port: str = "output" - - def __init__(self, name: str, data_type: DataVibeType): - if not is_container_type(data_type): - data_type = List[data_type] # type: ignore - inputs_spec = TypeDictVibe({self.input_port: data_type}) - output_spec = TypeDictVibe({self.output_port: data_type}) - ed: EntryPointDict = {"file": "", "callback_builder": ""} - td = TaskDescription() - super().__init__(name, "", inputs_spec, output_spec, ed, td, {}, {}, {}) - - -class GraphNodeType(NamedTuple): - name: str - spec: OperationSpec - - -class EdgeType(IntEnum): - single = 0 - parallel = 1 - scatter = 2 - gather = 3 - - -class EdgeLabel(NamedTuple): - srcport: str - dstport: str - type: EdgeType - - def __hash__(self): - return hash(self.srcport) * hash(self.dstport) - - -class WorkflowEdge(Edge[GraphNodeType, EdgeLabel]): - def __str__(self): - src, dst, label = self - return f"{src.name}.{label.srcport} -> {dst.name}.{label.dstport} ({label.type.name})" - - -class Workflow(Graph[GraphNodeType, EdgeLabel]): - param_pattern: "Pattern[str]" = re.compile(r"@from\((.*)\)") - logger: logging.Logger - workflow_spec: WorkflowSpec - index: Dict[str, GraphNodeType] - _sinks: Dict[GraphNodeType, List[str]] - _sources: Dict[GraphNodeType, List[str]] - - def __init__(self, workflow_spec: WorkflowSpec, resolve: bool = True): - """Instantiate workflow from a workflow specification. - Given a workflow specification, instantiate all tasks, recursively instantiating workflows, - and connect all nodes. - When `resolve = False`, do not resolve types and edge labels. - This is necessary when instantiating inner workflows in order to resolve everything - when the whole graph is in place. - """ - super().__init__() - - self.logger = logging.getLogger(f"{__name__}.Workflow") - self.workflow_spec = workflow_spec - - self._build_index() - - self.source_mappings = {k: [i for i in v] for k, v in self.workflow_spec.sources.items()} - self._sources = defaultdict(list) - for sources in self.source_mappings.values(): - for source in sources: - name, port = parse_edge_string(source, maxsplit=-1) - self._sources[self.index[name]].append(port) - - self.sink_mappings = {k: v for k, v in self.workflow_spec.sinks.items()} - self._sinks = defaultdict(list) - for sink in self.sink_mappings.values(): - name, port = parse_edge_string(sink, maxsplit=-1) - self._sinks[self.index[name]].append(port) - - if resolve: - self.resolve_types() - self.validate() - - fanout, fanin = self._find_fan_out_fan_in_edges() - self._update_edges(fanout, fanin) - - def _ensure_same_container( - self, input_type: DataVibeType, ref_type: DataVibeType - ) -> DataVibeType: - """ - Ensure the input type has (doesn't have) a container if the reference - type has (does not have) one - """ - base_type = get_base_type(input_type) - if is_vibe_list(ref_type): - return cast(Type[List[BaseVibe]], List[base_type]) - return base_type - - def _resolve_types_for_node(self, node: GraphNodeType): - """ - Resolve types for all output ports in node - """ - for port_name in node.spec.output_spec: - self._resolve_port_type(node, port_name) - - def _resolve_port_type(self, node: GraphNodeType, port_name: str): - """ - Resolve port type and update the op spec, if necessary. - This method assumes that the referred port already has a resolved type - This is the case for our current implementation because we traverse the - graph in topological order - """ - port_type = node.spec.output_spec[port_name] - if not isinstance(port_type, UnresolvedDataVibe): - # Nothing to resolve - return - - origin_port = port_type.__name__ - origin_str = f"{node.name}.{origin_port}" - port_str = f"{node.name}.{port_name}" - try: - origin_type = node.spec.inputs_spec[origin_port] - except KeyError: - raise ValueError( - f"Could not infer type of '{port_str}': " - f"'{origin_port}' is not an input port for '{node.name}'" - ) - if origin_port in self.sources.get(node, []): - # There is no one to get the type from because we refer to a source port. - # We get it from the input port for now, could try something smarter - self.logger.debug( - f"Inferring type of {port_str} directly from referenced " - f"input port {origin_str} because it is a source port" - ) - node.spec.output_spec[port_name] = origin_type - return - - # Let's get the type from what connects to the origin port - source, _, label = self.edge_to(node, origin_port) - source_port = label.srcport - source_type = source.spec.output_spec[source_port] - - if isinstance(source_type, UnresolvedDataVibe): - raise RuntimeError( - f"Unresolved type on previous level port {source.name}.{source_port}" - ) - - node.spec.output_spec[port_name] = self._ensure_same_container(source_type, origin_type) - - def resolve_types(self): - for nodes in self.topological_sort(): - for node in nodes: - self._resolve_types_for_node(node) - - def validate(self) -> bool: - if self.has_cycle(): - try: - self.topological_sort() - except ValueError as e: - raise ValueError( - f"Workflows should be Directed Acyclic Graphs, " - f"but workflow {self.workflow_spec.name} has a cycle" - ) from e - self._validate_edges_io() - self._validate_all_inputs_connected() - self._validate_sinks_exist() - # We verify compatibility of ports associated to a source when building the inputs spec - # Calling it here acts as validation of the workflow sources - self.inputs_spec - return True - - @property - def ops_dir(self) -> str: - return self.workflow_spec.ops_dir - - @property - def workflow_dir(self) -> str: - return self.workflow_spec.workflows_dir - - def _get_type_for(self, port_str: str) -> DataVibeType: - name, port = parse_edge_string(port_str, maxsplit=-1) - op = self.index[name].spec - try: - return op.inputs_spec[port] - except KeyError: - return op.output_spec[port] - - def _remove_label_from_edge( - self, edges: Iterable[Edge[GraphNodeType, EdgeLabel]] - ) -> Set[Tuple[GraphNodeType, GraphNodeType]]: - return {e[:-1] for e in edges} - - def _find_fan_out_fan_in_edges(self) -> Tuple[Set[Edge[GraphNodeType, EdgeLabel]], ...]: - fanout = set() - fanin = set() - for edge in self.edges: - source, destination, label = edge - srctype = source.spec.output_spec[label.srcport] - dsttype = destination.spec.inputs_spec[label.dstport] - if isinstance(srctype, UnresolvedDataVibe): - raise RuntimeError( - f"Unresolved type found on edge {edge}, when finding fan-out/in edges" - ) - if is_vibe_list(srctype) == is_vibe_list(dsttype): - continue - if is_vibe_list(srctype) and not is_vibe_list(dsttype): - fanout.add(edge) - elif is_vibe_list(dsttype) and not is_vibe_list(srctype): - fanin.add(edge) - else: - raise RuntimeError( - f"srctype {srctype} and dsttype {dsttype} are different " - f"but are not of the expected types List -> DataVibe " - "or DataVibe -> List" - ) - return fanout, fanin - - def _update_edges( - self, - fanout: Set[Edge[GraphNodeType, EdgeLabel]], - fanin: Set[Edge[GraphNodeType, EdgeLabel]], - ): - op_parallelism = {} - for edge in fanin: - self.relabel(edge, EdgeLabel(*edge[LABEL][:-1], EdgeType.gather)) - for edge in fanout: - self.relabel(edge, EdgeLabel(*edge[LABEL][:-1], EdgeType.scatter)) - - for root in self.sources: - self.propagate_labels(root, 0, op_parallelism) - for task, v in op_parallelism.items(): - if v < 0: - raise ValueError(f"Fan-in without parallelism at input of {task.name}") - if v > 1: - # This should never happen because we break during propagation - raise RuntimeError(f"Nested fan-out at input of {task.name}") - - def propagate_labels( - self, root: GraphNodeType, parallelism_level: int, op_parallelism: Dict[GraphNodeType, int] - ): - """Propagate parallelism labels across the graph. - - We update labels according to the parallelism level of previous edges along a path - (single -> parallel if parallelism_level > 0). - - Our parallelization strategy involves parallelizing ops if *any* of the incoming edges is - parallel. If there are both parallel and singular edges in the same op, the parallel edges - distribute items into several instances of the op, while all the data flowing into singular - edges is replicated as is to all op instances. - Due to this strategy, we keep track of the maximum parallelism level of all input ports - in an op, and propagate that into the next level. This means that in some paths the - algorithm might temporarily assign wrong parallelism levels to edges (even < 0), but they - will be overwritten to the correct level after the most parallel path is traversed. - """ - for source, neighbor, label in self.edges_from(root): - edge = WorkflowEdge((source, neighbor, label)) - label_type = label.type - neighbor_parallelism_level = parallelism_level - if label_type == EdgeType.parallel: - return - elif label_type == EdgeType.single: - if neighbor_parallelism_level > 0: - label_type = EdgeType.parallel - elif label_type == EdgeType.scatter: - if neighbor_parallelism_level > 0: - raise ValueError(f"Nested fan-out found at edge {edge} is unsupported") - neighbor_parallelism_level += 1 - elif label_type == EdgeType.gather: - # If we are not parallel, gather will just make a list of a single element - neighbor_parallelism_level = max(0, neighbor_parallelism_level - 1) - else: - raise RuntimeError(f"Found unknown label type in edge {edge}") - if neighbor in op_parallelism: - neighbor_parallelism_level = max( - neighbor_parallelism_level, op_parallelism[neighbor] - ) - op_parallelism[neighbor] = neighbor_parallelism_level - self.relabel((source, neighbor, label), EdgeLabel(*label[:-1], label_type)) - self.propagate_labels(neighbor, neighbor_parallelism_level, op_parallelism) - - def prefix_node(self, node: GraphNodeType, prefix: str) -> GraphNodeType: - return GraphNodeType(name=f"{prefix}.{node.name}", spec=node.spec) - - def merge_inner_workflow(self, inner_workflow: "Workflow", prefix: str): - inner_index = { - f"{prefix}.{k}": self.prefix_node(v, prefix) for k, v in inner_workflow.index.items() - } - # Add nodes to the graph - for v in inner_index.values(): - self.add_node(v) - # Update our index - self.index.update(inner_index) - # Add edges - for edge in inner_workflow.edges: - origin, destination, label = edge - self.add_edge( - inner_index[f"{prefix}.{origin.name}"], - inner_index[f"{prefix}.{destination.name}"], - label, - ) - - def _load_inner_workflow(self, workflow: WorkflowSpec, taskname: str) -> None: - wf = Workflow(workflow, resolve=False) - spec = wf.workflow_spec - self.workflow_spec.edges = list( - self._update_workflow_spec_edges(self.workflow_spec.edges, spec, taskname) - ) - self.workflow_spec.sources = dict( - self._update_workflow_spec_sources(self.workflow_spec.sources, spec, taskname) - ) - self.workflow_spec.sinks = dict( - self._update_workflow_spec_sinks(self.workflow_spec.sinks, spec, taskname) - ) - self.merge_inner_workflow(wf, taskname) - - def _add_workflow_edge_to_graph(self, origin: str, destination: str) -> None: - origin, srcport = parse_edge_string(origin, -1) - destination, dstport = parse_edge_string(destination, -1) - try: - if srcport not in self.index[origin].spec.output_spec: - raise ValueError(f"Port {srcport} could not be found as output of op {origin}") - if dstport not in self.index[destination].spec.inputs_spec: - raise ValueError(f"Port {dstport} could not be found as input of op {destination}") - self.add_edge( - self.index[origin], - self.index[destination], - EdgeLabel(srcport, dstport, EdgeType.single), - ) - except KeyError as e: - raise ValueError( - f"Tried to connect port {srcport} from op {origin} to " - f"port {dstport} of op {destination}, but {str(e)} does " - "not exist in the workflow graph." - ) - - def _resolve_parameters(self, task: SpecNodeType): - wf_params = self.workflow_spec.parameters - - def resolve(parameters: Dict[str, Any], default: Dict[str, Any]): - new_params = deepcopy(parameters) - for k, v in parameters.items(): - if isinstance(v, dict): - new_params[k] = resolve(parameters[k], default[k]) - ref_name = get_parameter_reference(v, task.name) - if ref_name is not None: - if wf_params is None or ref_name not in wf_params: - raise ValueError( - f"Could not find parameter '{ref_name}' in workflow '{self.name}'" - f" to substitute in task '{task.name}'" - ) - override = wf_params[ref_name] - # Keep default parameter if override is not defined - new_params[k] = default[k] if override is None else override - return new_params - - task.parameters = resolve(task.parameters, task.default_parameters) - - def _build_index(self) -> Dict[str, GraphNodeType]: - self.index: Dict[str, GraphNodeType] = {} - - for k, t in self.workflow_spec.tasks.items(): - task = t.load(self.ops_dir, self.workflow_dir) - self._resolve_parameters(task) - if isinstance(task, WorkflowSpec): - self._load_inner_workflow(task, k) - else: - self.index[k] = GraphNodeType(k, task) - self.add_node(self.index[k]) - for edge in self.workflow_spec.edges: - for destination in edge.destination: - self._add_workflow_edge_to_graph(edge.origin, destination) - - return self.index - - def _update_workflow_spec_sources( - self, - sources: Dict[str, List[str]], - included_workflow_spec: WorkflowSpec, - prefix: str, - ) -> Iterable[Tuple[str, List[str]]]: - for sourcename, targets in sources.items(): - tmp = [] - for target in targets: - target_task, target_source_name = parse_edge_string(target, -1) - if target_task != prefix: - tmp.append(target) - else: - if target_source_name not in included_workflow_spec.sources: - raise ValueError( - f"Could not find source '{target_source_name}' " - f"in inner workflow '{prefix}'" - ) - tmp.extend( - [ - f"{prefix}.{t}" - for t in included_workflow_spec.sources[target_source_name] - ] - ) - yield sourcename, tmp - - def _update_workflow_spec_sinks( - self, - sinks: Dict[str, str], - included_workflow_spec: WorkflowSpec, - prefix: str, - ) -> Iterable[Tuple[str, str]]: - for name, real_sink in sinks.items(): - sink_task, sink_name = parse_edge_string(real_sink, -1) - if sink_task != prefix: - yield name, real_sink - else: - if sink_name not in included_workflow_spec.sinks: - raise ValueError( - f"Could not find sink '{sink_name}' in inner workflow '{prefix}'" - ) - yield name, f"{prefix}.{included_workflow_spec.sinks[sink_name]}" - - def _update_workflow_spec_edges( - self, edges: List[WorkflowSpecEdge], included_workflow_spec: WorkflowSpec, prefix: str - ) -> Iterable[WorkflowSpecEdge]: - for edge in edges: - tmp = self._update_edge_destinations(edge, included_workflow_spec, prefix) - yield self._update_edge_origin(tmp, included_workflow_spec, prefix) - - def _update_edge_destinations( - self, edge: WorkflowSpecEdge, included_workflow_spec: WorkflowSpec, prefix: str - ) -> WorkflowSpecEdge: - new_edge = WorkflowSpecEdge(edge.origin, []) - for destination in edge.destination: - matched = False - for source, targets in included_workflow_spec.sources.items(): - sourcename = f"{prefix}.{source}" - if destination == sourcename: - new_edge.destination.extend( - [f"{prefix}.{target}" for target in targets], - ) - # Mask the match - matched = True - # If we match one source, we won't match others, so we're done - break - if not matched: - # We don't have any matches, let's put it back in the list - new_edge.destination.append(destination) - return new_edge - - def _update_edge_origin( - self, edge: WorkflowSpecEdge, included_workflow_spec: WorkflowSpec, prefix: str - ) -> WorkflowSpecEdge: - for spec_name, real_name in included_workflow_spec.sinks.items(): - if f"{prefix}.{spec_name}" == edge.origin: - edge.origin = f"{prefix}.{real_name}" - # We updated the edge, our work is done - return edge - return edge - - def _validate_all_inputs_connected(self): - inputs = { - f"{name}.{port}": False - for name, node in self.index.items() - for port in node.spec.inputs_spec - } - - for node, ports in self.sources.items(): - for port in ports: - key = f"{node.name}.{port}" - if key not in inputs: - raise ValueError(f"'{key}' not in inputs dictionary") - inputs[key] = True - - for _, destination, label in self.edges: - key = f"{destination.name}.{label.dstport}" - if key not in inputs: - raise ValueError(f"'{key}' not in inputs dictionary") - inputs[key] = True - - missing: List[str] = [] - for key, value in inputs.items(): - if not value: - missing.append(f"'{key}'") - - if missing: - s = "s" if len(missing) > 1 else "" - raise ValueError( - f"Operation{s} port{s} {','.join(missing)} missing inputs. " - "All tasks in a workflow must have all their inputs filled" - ) - return True - - def _validate_edges_io(self): - def check_compatible_io(edge: WorkflowEdge) -> None: - origin, destination, label = edge - origin_type = get_base_type(origin.spec.output_spec[label.srcport]) - destination_type = get_base_type(destination.spec.inputs_spec[label.dstport]) - if not issubclass(origin_type, destination_type): - raise ValueError( - "Incompatible types for edge " - f'"{origin.name}.{label.srcport}" ({origin_type.__name__})' - f' -> "{destination.name}.{label.dstport}" ({destination_type.__name__})' - ) - - for edge in self.edges: - check_compatible_io(edge) - - def _validate_sinks_exist(self): - for node, ports in self.sinks.items(): - for port in ports: - if port not in node.spec.output_spec: - raise ValueError(f"'{node.name}.{port}' not in op output spec") - - def __getitem__(self, op_name: str) -> OperationSpec: - for op in self.nodes: - if op.name == op_name: - return op.spec - raise KeyError(f"op {op_name} does not exist") - - @property - def name(self): - return self.workflow_spec.name - - @property - def inputs_spec(self) -> TypeDictVibe: - spec = {} - for k, v in self.source_mappings.items(): - try: - spec[k] = get_most_specific_type([self._get_type_for(i) for i in v]) - except ValueError as e: - raise ValueError(f"Workflow source '{k}' contains incompatible types. {e}") - return TypeDictVibe(spec) - - @property - def output_spec(self): - return TypeDictVibe({k: self._get_type_for(v) for k, v in self.sink_mappings.items()}) - - @property - def sources(self) -> Dict[GraphNodeType, List[str]]: - return {k: v for k, v in self._sources.items()} - - @property - def sinks(self) -> Dict[GraphNodeType, List[str]]: - return {k: v for k, v in self._sinks.items()} - - @property - def edges(self) -> List[WorkflowEdge]: - return [WorkflowEdge(e) for e in super().edges] - - def edges_from(self, node: GraphNodeType) -> List[WorkflowEdge]: - return [WorkflowEdge(e) for e in super().edges_from(node)] - - def edge_to(self, node: GraphNodeType, port_name: str): - edges = [e for e in self.edges if e[LABEL].dstport == port_name and e[DESTINATION] is node] - port_str = f"'{node.name}.{port_name}'" - if not edges: - raise ValueError(f"{port_str} is not a destination of any port") - if len(edges) > 1: - # Something went very wrong if we are here - raise RuntimeError(f"Found multiple edges with '{port_str}' as destination") - return edges[0] - - def get_node(self, op_name: str) -> WorkflowSpecNode: - return self.workflow_spec.tasks[op_name] - - def get_op_dir(self, op_name: str) -> Optional[str]: - return self.workflow_spec.tasks[op_name].op_dir - - def get_op_parameter(self, op_name: str) -> Optional[Dict[str, Any]]: - return self.workflow_spec.tasks[op_name].parameters - - @classmethod - def build( - cls, - workflow_path: str, - ops_base_dir: str = DEFAULT_OPS_DIR, - workflow_base_dir: str = get_workflow_dir(), - parameters_override: Optional[Dict[str, Any]] = None, - ) -> "Workflow": - spec = WorkflowParser.parse( - workflow_path, - ops_base_dir, - workflow_base_dir, - parameters_override, - ) - WorkflowSpecValidator.validate(spec) - return Workflow(spec) - - -def load_workflow_by_name( - name: str, - ops_dir: str = DEFAULT_OPS_DIR, - workflow_dir: str = get_workflow_dir(), -) -> Workflow: - """Loads a workflow in the format returned by `list_workflows()`""" - - return Workflow.build( - os.path.join(workflow_dir, f"{name}.yaml"), - ops_base_dir=ops_dir, - workflow_base_dir=workflow_dir, - ) diff --git a/workflows/data_ingestion/admag/admag_seasonal_field.yaml b/workflows/data_ingestion/admag/admag_seasonal_field.yaml deleted file mode 100644 index 7f7d7454..00000000 --- a/workflows/data_ingestion/admag/admag_seasonal_field.yaml +++ /dev/null @@ -1,53 +0,0 @@ -name: admag_seasonal_field -sources: - admag_input: - - admag_seasonal_field.admag_input -sinks: - seasonal_field: admag_seasonal_field.seasonal_field -parameters: - base_url: - client_id: - client_secret: - authority: - default_scope: -tasks: - admag_seasonal_field: - op: admag_seasonal_field - op_dir: admag - parameters: - base_url: "@from(base_url)" - client_id: "@from(client_id)" - client_secret: "@from(client_secret)" - authority: "@from(authority)" - default_scope: "@from(default_scope)" -description: - short_description: - Generates SeasonalFieldInformation using ADMAg (Microsoft Azure Data - Manager for Agriculture). - long_description: - The workflow creates a DataVibe subclass SeasonalFieldInformation that - contains farm-related operations (e.g., fertilization, harvest, tillage, - planting, crop name). - sources: - admag_input: Unique identifiers for ADMAg seasonal field, and party. - sinks: - seasonal_field: - Crop SeasonalFieldInformation which contains SeasonalFieldInformation that - contains farm-related operations (e.g., fertilization, harvest, tillage, - planting, crop name). - parameters: - base_url: - Azure Data Manager for Agriculture host. Please visit https://aka.ms/farmvibesDMA to check how - to get these credentials. - client_id: - Azure Data Manager for Agriculture client id. Please visit https://aka.ms/farmvibesDMA to check how - to get these credentials. - client_secret: - Azure Data Manager for Agriculture client secret. Please visit https://aka.ms/farmvibesDMA to check how - to get these credentials. - authority: - Azure Data Manager for Agriculture authority. Please visit https://aka.ms/farmvibesDMA to check how - to get these credentials. - default_scope: - Azure Data Manager for Agriculture default scope. Please visit https://aka.ms/farmvibesDMA to check how - to get these credentials. diff --git a/workflows/data_ingestion/admag/prescriptions.yaml b/workflows/data_ingestion/admag/prescriptions.yaml deleted file mode 100644 index 5acf812b..00000000 --- a/workflows/data_ingestion/admag/prescriptions.yaml +++ /dev/null @@ -1,75 +0,0 @@ -name: admag_prescritpions -sources: - admag_input: - - list_prescriptions.admag_input - - admag_prescriptions.admag_input -sinks: - response: admag_prescriptions.response -parameters: - base_url: - client_id: - client_secret: - authority: - default_scope: -tasks: - list_prescriptions: - op: list_prescriptions - op_dir: admag - parameters: - base_url: "@from(base_url)" - client_id: "@from(client_id)" - client_secret: "@from(client_secret)" - authority: "@from(authority)" - default_scope: "@from(default_scope)" - get_prescription: - op: get_prescription - op_dir: admag - parameters: - base_url: "@from(base_url)" - client_id: "@from(client_id)" - client_secret: "@from(client_secret)" - authority: "@from(authority)" - default_scope: "@from(default_scope)" - admag_prescriptions: - op: prescriptions - op_dir: admag - parameters: - base_url: "@from(base_url)" - client_id: "@from(client_id)" - client_secret: "@from(client_secret)" - authority: "@from(authority)" - default_scope: "@from(default_scope)" -edges: - - origin: list_prescriptions.prescriptions - destination: - - get_prescription.prescription_without_geom_input - - origin: get_prescription.prescription_with_geom - destination: - - admag_prescriptions.prescriptions_with_geom_input -description: - short_description: Fetches prescriptions using ADMAg (Microsoft Azure Data Manager for Agriculture). - long_description: - The workflow fetch prescriptions (sensor samples) linked to prescription_map_id. Each sensor sample have - the information of nutrient (Nitrogen, Carbon, Phosphorus, pH, Latitude, Longitude etc., ). The Latitude & - Longitude used to create a point geometry. Geometry and nutrient information transformed to GeoJSON. The GeoJSON - stored as asset in farmvibes-ai. - sources: - admag_input: Required inputs to access ADMAg resources, party_id and prescription_map_id that helps fetching prescriptions. - sinks: - response: Prescriptions received from ADMAg. - parameters: - base_url: - URL to access the registered app. Refer this url to create required resources for admag. - https://learn.microsoft.com/en-us/azure/data-manager-for-agri/quickstart-install-data-manager-for-agriculture - - client_id: - Value uniquely identifies registered application in the Microsoft identity platform. Visit url - https://learn.microsoft.com/en-us/azure/data-manager-for-agri/quickstart-install-data-manager-for-agriculture - to register the app. - client_secret: - Sometimes called an application password, a client secret is a string value your app can use in place of a certificate - to identity itself. - authority: - The endpoint URIs for your app are generated automatically when you register or configure your app. It is used by - client to obtain authorization from the resource owner - default_scope: URL for default azure OAuth2 permissions diff --git a/workflows/data_ingestion/airbus/airbus_download.yaml b/workflows/data_ingestion/airbus/airbus_download.yaml deleted file mode 100644 index 14da8a3c..00000000 --- a/workflows/data_ingestion/airbus/airbus_download.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: airbus_download -sources: - user_input: - - list.input_item -sinks: - raster: download.downloaded_products -parameters: - api_key: -tasks: - list: - op: list_airbus_products - parameters: - api_key: "@from(api_key)" - download: - op: download_airbus - parameters: - api_key: "@from(api_key)" -edges: - - origin: list.airbus_products - destination: - - download.airbus_products -description: - short_description: Downloads available AirBus imagery for the input geometry and time range. - long_description: - The workflow will check available imagery, using the AirBus API, that contains the input - geometry and inside the input time range. Matching images will be purchased (if they are not - already in the user's library) and downloaded. This workflow requires an AirBus API key. - sources: - user_input: Time range and geometry of interest. - sinks: - raster: AirBus raster. - parameters: - api_key: AirBus API key. Required to run the workflow. diff --git a/workflows/data_ingestion/airbus/airbus_price.yaml b/workflows/data_ingestion/airbus/airbus_price.yaml deleted file mode 100644 index ea2162ce..00000000 --- a/workflows/data_ingestion/airbus/airbus_price.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: airbus_price -sources: - user_input: - - list.input_item -sinks: - price: price.products_price -parameters: - api_key: -tasks: - list: - op: list_airbus_products - parameters: - api_key: "@from(api_key)" - price: - op: price_airbus_products - parameters: - api_key: "@from(api_key)" -edges: - - origin: list.airbus_products - destination: - - price.airbus_products -description: - short_description: Prices available AirBus imagery for the input geometry and time range. - long_description: - The workflow will check available imagery, using the AirBus API, that contains the input - geometry inside the input time range. The aggregate price (in kB) for matching images will be - computed, discounting images already in the user's library. This workflow requires an AirBus API - key. - sources: - user_input: Time range and geometry of interest. - sinks: - price: Price for all matching imagery. - parameters: - api_key: AirBus API key. Required to run the workflow. diff --git a/workflows/data_ingestion/alos/alos_forest_extent_download.yaml b/workflows/data_ingestion/alos/alos_forest_extent_download.yaml deleted file mode 100644 index 2035be5a..00000000 --- a/workflows/data_ingestion/alos/alos_forest_extent_download.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: alos_forest_extent_download -sources: - user_input: - - list.input_data -sinks: - downloaded_product: download.raster -parameters: - pc_key: -tasks: - list: - op: list_alos_products - download: - op: download_alos - parameters: - pc_key: "@from(pc_key)" -edges: - - origin: list.alos_products - destination: - - download.product -description: - short_description: Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map. - long_description: - The workflow lists all ALOS forest/non-forest classification products that intersect with the input - geometry and time range (available range 2015-2020), then downloads the data for - each of them. The data will be returned in the form of rasters. - sources: - user_input: Geometry of interest for which to download the ALOS forest/non-forest classification map. - sinks: - downloaded_product: Downloaded ALOS forest/non-forest classification map. \ No newline at end of file diff --git a/workflows/data_ingestion/alos/alos_forest_extent_download_merge.yaml b/workflows/data_ingestion/alos/alos_forest_extent_download_merge.yaml deleted file mode 100644 index ccb94db1..00000000 --- a/workflows/data_ingestion/alos/alos_forest_extent_download_merge.yaml +++ /dev/null @@ -1,41 +0,0 @@ -name: alos_forest_extent_download_merge -sources: - user_input: - - alos_forest_extent_download.user_input -sinks: - merged_raster: merge.raster - categorical_raster: alos_forest_extent_download.downloaded_product -parameters: - pc_key: -tasks: - alos_forest_extent_download: - workflow: data_ingestion/alos/alos_forest_extent_download - parameters: - pc_key: "@from(pc_key)" - group_rasters_by_time: - op: group_rasters_by_time - parameters: - criterion: "year" - merge: - op: merge_rasters -edges: - - origin: alos_forest_extent_download.downloaded_product - destination: - - group_rasters_by_time.rasters - - origin: group_rasters_by_time.raster_groups - destination: - - merge.raster_sequence -description: - short_description: - Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map and merges it into a single raster. - long_description: - The workflow lists the ALOS forest/non-forest classification products that intersect with the input - geometry and time range (available range 2015-2020), and downloads the filtered products. The - workflow processes the downloaded products and merge them into a single raster. - sources: - user_input: Geometry of interest for which to download the ALOS forest/non-forest classification map. - sinks: - merged_raster: ALOS forest/non-forest classification products converted to raster and merged. - categorical_raster: ALOS forest/non-forest classification products that intersect with the input geometry & time range. - parameters: - pc_key: Planetary computer API key. \ No newline at end of file diff --git a/workflows/data_ingestion/bing/basemap_download.yaml b/workflows/data_ingestion/bing/basemap_download.yaml deleted file mode 100644 index f4d04c4f..00000000 --- a/workflows/data_ingestion/bing/basemap_download.yaml +++ /dev/null @@ -1,35 +0,0 @@ -name: basemap_download -sources: - input_geometry: - - list.user_input -sinks: - basemaps: download.basemap -parameters: - api_key: - zoom_level: -tasks: - list: - op: list_bing_maps - parameters: - api_key: "@from(api_key)" - zoom_level: "@from(zoom_level)" - download: - op: download_bing_basemap - parameters: - api_key: "@from(api_key)" -edges: - - origin: list.products - destination: - - download.input_product -description: - short_description: - Downloads Bing Maps basemaps. - long_description: - The workflow will list all tiles intersecting with the input geometry for a given zoom level - and download a basemap for each of them using Bing Maps API. The basemap tiles will be returned - as individual rasters. - sources: - input_geometry: Geometry of interest for which to download the basemap tiles. - sinks: - basemaps: Downloaded basemaps. - diff --git a/workflows/data_ingestion/bing/basemap_download_merge.yaml b/workflows/data_ingestion/bing/basemap_download_merge.yaml deleted file mode 100644 index 84a92fc8..00000000 --- a/workflows/data_ingestion/bing/basemap_download_merge.yaml +++ /dev/null @@ -1,41 +0,0 @@ -name: basemap_download_merge -sources: - input_geometry: - - basemap_download.input_geometry -sinks: - merged_basemap: merge.raster -parameters: - api_key: - zoom_level: - merge_resolution: highest -tasks: - basemap_download: - workflow: data_ingestion/bing/basemap_download - parameters: - api_key: "@from(api_key)" - zoom_level: "@from(zoom_level)" - to_sequence: - op: list_to_sequence - merge: - op: merge_rasters - parameters: - resolution: "@from(merge_resolution)" -edges: - - origin: basemap_download.basemaps - destination: - - to_sequence.list_rasters - - origin: to_sequence.rasters_seq - destination: - - merge.raster_sequence -description: - short_description: - Downloads Bing Maps basemap tiles and merges them into a single raster. - long_description: - The workflow will list all tiles intersecting with the input geometry for a given zoom level, - and download a basemap for each of them using Bing Maps API. The basemaps will be merged into - a single raster with the union of the geometries of all tiles. - sources: - input_geometry: Geometry of interest for which to download the basemap tiles. - sinks: - merged_basemap: Merged basemap raster. - diff --git a/workflows/data_ingestion/cdl/download_cdl.yaml b/workflows/data_ingestion/cdl/download_cdl.yaml deleted file mode 100644 index e04fd1e3..00000000 --- a/workflows/data_ingestion/cdl/download_cdl.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: download_cdl -sources: - user_input: - - list_cdl.input_item -sinks: - raster: download_cdl.cdl_raster -tasks: - list_cdl: - op: list_cdl_products - download_cdl: - op: download_cdl - op_dir: download_cdl_data -edges: - - origin: list_cdl.cdl_products - destination: - - download_cdl.input_product -description: - short_description: Downloads crop classes maps in the continental USA for the input time range. - long_description: - The workflow will download crop-specific land cover maps from the USDA Cropland Data Layer, - available for the continental United States. The input geometry must intersect with the coverage - area. - sources: - user_input: Time range and geometry of interest. - sinks: - raster: CDL land cover raster. diff --git a/workflows/data_ingestion/dem/download_dem.yaml b/workflows/data_ingestion/dem/download_dem.yaml deleted file mode 100644 index 083198bc..00000000 --- a/workflows/data_ingestion/dem/download_dem.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: download_dem -sources: - user_input: - - list.input_items -sinks: - raster: download.downloaded_product -parameters: - pc_key: - resolution: 10 - provider: "USGS3DEP" -tasks: - list: - op: list_dem_products - parameters: - resolution: "@from(resolution)" - provider: "@from(provider)" - download: - op: download_dem - parameters: - api_key: "@from(pc_key)" -edges: - - origin: list.dem_products - destination: - - download.input_product -description: - short_description: Downloads digital elevation map tiles that intersect with the input geometry and time range. - long_description: - The workflow will download digital elevation maps from the USGS 3DEP datasets (available - for the United States at 10 and 30 meters) or Copernicus DEM GLO-30 (globally at 30 meters) - through the Planetary Computer. For more information, see https://planetarycomputer.microsoft.com/dataset/3dep-seamless - and https://planetarycomputer.microsoft.com/dataset/cop-dem-glo-30 . - sources: - user_input: Time range and geometry of interest. - sinks: - raster: DEM raster. - parameters: - pc_key: Optional Planetary Computer API key. - resolution: Spatial resolution of the DEM. 10m and 30m are available. - provider: Provider of the DEM. "USGS3DEP" and "CopernicusDEM30" are available. diff --git a/workflows/data_ingestion/gedi/download_gedi.yaml b/workflows/data_ingestion/gedi/download_gedi.yaml deleted file mode 100644 index d11dbd08..00000000 --- a/workflows/data_ingestion/gedi/download_gedi.yaml +++ /dev/null @@ -1,35 +0,0 @@ -name: download_gedi -sources: - user_input: - - list.input_data -sinks: - product: download.downloaded_product -parameters: - earthdata_token: - processing_level: -tasks: - list: - op: list_gedi_products - parameters: - processing_level: "@from(processing_level)" - download: - op: download_gedi_product - parameters: - token: "@from(earthdata_token)" -edges: - - origin: list.gedi_products - destination: - - download.gedi_product -description: - short_description: Downloads GEDI products for the input region and time range. - long_description: - The workflow downloads Global Ecosystem Dynamics Investigation (GEDI) products at the desired - processing level using NASA's EarthData API. This workflow requires an EarthData API token. - sources: - user_input: Time range and geometry of interest. - sinks: - product: GEDI products. - parameters: - earthdata_token: API token for the EarthData platform. Required to run the workflow. - processing_level: - GEDI product processing level. One of 'GEDI01_B.002', 'GEDI02_A.002', 'GEDI02_B.002'. diff --git a/workflows/data_ingestion/gedi/download_gedi_rh100.yaml b/workflows/data_ingestion/gedi/download_gedi_rh100.yaml deleted file mode 100644 index 260a221f..00000000 --- a/workflows/data_ingestion/gedi/download_gedi_rh100.yaml +++ /dev/null @@ -1,35 +0,0 @@ -name: download_gedi_rh100 -sources: - user_input: - - download.user_input - - extract.roi -sinks: - rh100: extract.rh100 -parameters: - earthdata_token: - check_quality: -tasks: - download: - workflow: data_ingestion/gedi/download_gedi - parameters: - earthdata_token: "@from(earthdata_token)" - extract: - op: extract_gedi_rh100 - parameters: - check_quality: "@from(check_quality)" -edges: - - origin: download.product - destination: - - extract.gedi_product -description: - short_description: Downloads L2B GEDI products and extracts RH100 variables. - long_description: - The workflow will download the products for the input region and time range, and then extract - RH100 variables for each of the beam shots. Each value is geolocated according to the lowest - mode latitude and longitude values. - sources: - user_input: Time range and geometry of interest. - sinks: - rh100: Points in EPSG:4326 with their associated RH100 values. - parameters: - check_quality: Whether to filter points according to the quality flag. diff --git a/workflows/data_ingestion/glad/glad_forest_extent_download.yaml b/workflows/data_ingestion/glad/glad_forest_extent_download.yaml deleted file mode 100644 index 5d36f8ef..00000000 --- a/workflows/data_ingestion/glad/glad_forest_extent_download.yaml +++ /dev/null @@ -1,27 +0,0 @@ -name: glad_forest_extent_download -sources: - input_item: - - list.input_item -sinks: - downloaded_product: download.downloaded_product -parameters: -tasks: - list: - op: list_glad_products - download: - op: download_glad - op_dir: download_glad_data -edges: - - origin: list.glad_products - destination: - - download.glad_product -description: - short_description: - Downloads Global Land Analysis (GLAD) forest extent data. - long_description: - The workflow will list all GLAD forest extent products that intersect with the input geometry - and download the data for each of them. The data will be returned as rasters. - sources: - input_item: Geometry of interest for which to download the GLAD forest extent data. - sinks: - downloaded_product: Downloaded GLAD forest extent product. diff --git a/workflows/data_ingestion/glad/glad_forest_extent_download_merge.yaml b/workflows/data_ingestion/glad/glad_forest_extent_download_merge.yaml deleted file mode 100644 index da52f9d8..00000000 --- a/workflows/data_ingestion/glad/glad_forest_extent_download_merge.yaml +++ /dev/null @@ -1,41 +0,0 @@ -name: glad_forest_extent_download_merge -sources: - input_item: - - glad_forest_extent_download.input_item -parameters: -sinks: - merged_product: merge.raster - categorical_raster: glad_forest_extent_download.downloaded_product -tasks: - glad_forest_extent_download: - workflow: data_ingestion/glad/glad_forest_extent_download - group_rasters_by_time: - op: group_rasters_by_time - parameters: - criterion: "year" - merge: - op: merge_rasters -edges: - - origin: glad_forest_extent_download.downloaded_product - destination: - - group_rasters_by_time.rasters - - origin: group_rasters_by_time.raster_groups - destination: - - merge.raster_sequence -description: - short_description: - Downloads the tiles from Global Land Analysis (GLAD) forest data that intersect with the user input geometry and time range, and merges them into a single raster. - long_description: - The workflow lists the GLAD forest products that intersect with the input - geometry and time range, and downloads the filtered products. The downloaded - products are merged into a single raster and classified. The result tiles have - pixel values categorized into two classes - 0 (non-forest) and 1 (forest). - This workflow uses the same forest definition as the Food and Agriculture - Organization of the United Nations (FAO). - sources: - input_item: Geometry of interest for which to download the GLAD forest extent data. - sinks: - merged_product: Merged GLAD forest extent product to geometry of interest. - categorical_raster: Raster with the GLAD forest extent data. - - \ No newline at end of file diff --git a/workflows/data_ingestion/gnatsgo/download_gnatsgo.yaml b/workflows/data_ingestion/gnatsgo/download_gnatsgo.yaml deleted file mode 100644 index 3f6884f7..00000000 --- a/workflows/data_ingestion/gnatsgo/download_gnatsgo.yaml +++ /dev/null @@ -1,79 +0,0 @@ -name: download_gnatsgo -sources: - user_input: - - list.input_item -sinks: - raster: download.downloaded_raster -parameters: - pc_key: - variable: soc0_5 -tasks: - list: - op: list_gnatsgo_products - download: - op: download_gnatsgo - parameters: - api_key: "@from(pc_key)" - variable: "@from(variable)" -edges: - - origin: list.gnatsgo_products - destination: - - download.gnatsgo_product -description: - short_description: - Downloads gNATSGO raster data that intersect with the input geometry and time range. - long_description: - This workflow lists and downloads raster products of gNATSGO dataset from Planetary Computer. - Input geometry must fall within Continel USA, whereas input time range can be arbitrary (all - gNATSGO assets are from 2020-07-01). For more information on the available properties, see - https://planetarycomputer.microsoft.com/dataset/gnatsgo-rasters. - sources: - user_input: Geometry of interest (arbitrary time range). - sinks: - raster: Raster with desired property. - parameters: - pc_key: Optional Planetary Computer API key. - variable: >- - Options are: - aws{DEPTH} - Available water storage estimate (AWS) for the DEPTH zone. - soc{DEPTH} - Soil organic carbon stock estimate (SOC) for the DEPTH zone. - tk{DEPTH}a - Thickness of soil components used in the DEPTH zone for the AWS calculation. - tk{DEPTH}s - Thickness of soil components used in the DEPTH zone for the SOC calculation. - mukey - Map unit key, a unique identifier of a record for matching with gNATSGO tables. - droughty - Drought vulnerability estimate. - nccpi3all - National Commodity Crop Productivity Index that has the highest value among Corn - and Soybeans, Small Grains, or Cotton for major earthy components. - nccpi3corn - National Commodity Crop Productivity Index for Corn for major earthy - components. - nccpi3cot - National Commodity Crop Productivity Index for Cotton for major earthy - components. - nccpi3sg - National Commodity Crop Productivity Index for Small Grains for major earthy - components. - nccpi3soy - National Commodity Crop Productivity Index for Soy for major earthy components. - pctearthmc - National Commodity Crop Productivity Index map unit percent earthy is the map - unit summed comppct_r for major earthy components. - pwsl1pomu - Potential Wetland Soil Landscapes (PWSL). - rootznaws - Root zone (commodity crop) available water storage estimate (RZAWS). - rootznemc - Root zone depth is the depth within the soil profile that commodity crop (cc) - roots can effectively extract water and nutrients for growth. - musumcpct - Sum of the comppct_r (SSURGO component table) values for all listed components - in the map unit. - musumcpcta - Sum of the comppct_r (SSURGO component table) values used in the available - water storage calculation for the map unit. - musumcpcts - Sum of the comppct_r (SSURGO component table) values used in the soil organic - carbon calculation for the map unit. - - gNATSGO has properties available for multiple soil - depths. You may exchange DEPTH in the variable names above for any of the following (all - measured in cm): - 0_5 - 0_20 - 0_30 - 5_20 - 0_100 - 0_150 - 0_999 - 20_50 - 50_100 - 100_150 - 150_999 diff --git a/workflows/data_ingestion/hansen/hansen_forest_change_download.yaml b/workflows/data_ingestion/hansen/hansen_forest_change_download.yaml deleted file mode 100644 index b682d005..00000000 --- a/workflows/data_ingestion/hansen/hansen_forest_change_download.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: glad_forest_change_download -sources: - input_item: - - list.input_item -sinks: - merged_raster: merge.raster - downloaded_raster: download.raster -parameters: - layer_name: - tiles_folder_url: https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/ -tasks: - list: - op: list_hansen_products - parameters: - tiles_folder_url: "@from(tiles_folder_url)" - layer_name: "@from(layer_name)" - download: - op: download_hansen - group: - op: group_rasters_by_time - parameters: - criterion: "year" - merge: - op: merge_rasters -edges: - - origin: list.hansen_products - destination: - - download.hansen_product - - origin: download.raster - destination: - - group.rasters - - origin: group.raster_groups - destination: - - merge.raster_sequence -description: - short_description: Downloads and merges Global Forest Change (Hansen) rasters that intersect the user-provided geometry/time range. - long_description: - The workflow lists Global Forest Change (Hansen) products that intersect the - user-provided geometry/time range, downloads the data for each of them, and - merges the rasters. The dataset is available at 30m resolution and is - updated annually. The data contains information on forest cover, loss, and - gain. The default dataset version is GFC-2022-v1.10 and is passed to the - workflow as the parameter tiles_folder_url. For the default version, the - dataset is available from 2000 to 2022. Dataset details can be found at - https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html. - sources: - input_item: User-provided geometry and time range. - sinks: - merged_raster: Merged Global Forest Change (Hansen) data as a raster. - downloaded_raster: Individual Global Forest Change (Hansen) rasters prior to the merge operation. - parameters: - tiles_folder_url: - URL to the Global Forest Change (Hansen) dataset. It specifies the dataset - version and is used to download the data. - layer_name: - Name of the Global Forest Change (Hansen) layer. Can be any of the following names - 'treecover2000', 'loss', 'gain', 'lossyear', 'datamask', 'first', 'last'. \ No newline at end of file diff --git a/workflows/data_ingestion/landsat/preprocess_landsat.yaml b/workflows/data_ingestion/landsat/preprocess_landsat.yaml deleted file mode 100644 index 303f96f3..00000000 --- a/workflows/data_ingestion/landsat/preprocess_landsat.yaml +++ /dev/null @@ -1,43 +0,0 @@ -name: preprocess_landsat -sources: - user_input: - - list.input_item -sinks: - raster: stack.landsat_raster -parameters: - pc_key: - qa_mask_value: 64 -tasks: - list: - op: list_landsat_products_pc - download: - op: download_landsat_from_pc - parameters: - api_key: "@from(pc_key)" - stack: - op: stack_landsat - parameters: - qa_mask_value: "@from(qa_mask_value)" -edges: - - origin: list.landsat_products - destination: - - download.landsat_product - - origin: download.downloaded_product - destination: - - stack.landsat_product -description: - short_description: - Downloads and preprocesses LANDSAT tiles that intersect with the input geometry and time range. - long_description: - The workflow will download the tile bands from the Planetary Computer and stack them into a - single raster at 30m resolution. - sources: - user_input: Time range and geometry of interest. - sinks: - raster: LANDSAT rasters at 30m resolution. - parameters: - pc_key: Optional Planetary Computer API key. - qa_mask_value: - Bitmap for which pixel to be included. See documentation for each bit in - https://www.usgs.gov/media/images/landsat-collection-2-pixel-quality-assessment-bit-index - For example, the default value 64 (i.e. 1<<6 ) corresponds to "Clear" pixels diff --git a/workflows/data_ingestion/modis/download_modis_surface_reflectance.yaml b/workflows/data_ingestion/modis/download_modis_surface_reflectance.yaml deleted file mode 100644 index bdda6ae0..00000000 --- a/workflows/data_ingestion/modis/download_modis_surface_reflectance.yaml +++ /dev/null @@ -1,41 +0,0 @@ -name: download_modis_surface_reflectance -sources: - user_input: - - list.input_data -sinks: - raster: download.raster -parameters: - pc_key: - resolution_m: -tasks: - list: - op: list_modis_sr - parameters: - resolution: "@from(resolution_m)" - download: - op: download_modis_sr - parameters: - pc_key: "@from(pc_key)" -edges: - - origin: list.modis_products - destination: - - download.product -description: - short_description: - Downloads MODIS 8-day surface reflectance rasters that intersect with the input geometry and - time range. - long_description: - The workflow will download MODIS raster images either at 250m or 500m resolution. The products - are available at a 8-day interval and pixel values are selected based on low clouds, low view - angle, and highest index value. Notice that only bands 1, 2 and quality control are available - on 250m. - For more information, see - https://planetarycomputer.microsoft.com/dataset/modis-09Q1-061 - https://planetarycomputer.microsoft.com/dataset/modis-09A1-061 - sources: - user_input: Time range and geometry of interest. - sinks: - raster: Products containing MODIS reflectance bands and data. - parameters: - pc_key: Optional Planetary Computer API key. - resolution_m: Product resolution, in meters. Either 250 or 500. diff --git a/workflows/data_ingestion/modis/download_modis_vegetation_index.yaml b/workflows/data_ingestion/modis/download_modis_vegetation_index.yaml deleted file mode 100644 index cb513533..00000000 --- a/workflows/data_ingestion/modis/download_modis_vegetation_index.yaml +++ /dev/null @@ -1,42 +0,0 @@ -name: download_modis_vegetation_index -sources: - user_input: - - list.input_data -sinks: - index: download.index -parameters: - index: - pc_key: - resolution_m: -tasks: - list: - op: list_modis_vegetation - parameters: - resolution: "@from(resolution_m)" - download: - op: download_modis_vegetation - parameters: - pc_key: "@from(pc_key)" - index: "@from(index)" -edges: - - origin: list.modis_products - destination: - - download.product -description: - short_description: - Downloads MODIS 16-day vegetation index products that intersect with the input geometry and time - range. - long_description: - The workflow will download products at the chosen index and resolution. The products are - available at a 16-day interval and pixel values are selected based on low clouds, low view - angle, and highest index value. Vegetation index values range from (-2000 to 10000). - For more information, see https://planetarycomputer.microsoft.com/dataset/modis-13Q1-061 - and https://lpdaac.usgs.gov/products/mod13a1v061/ . - sources: - user_input: Time range and geometry of interest. - sinks: - index: Products containing the chosen index at the chosen resolution. - parameters: - index: Vegetation index that should be downloaded. Either 'evi' or 'ndvi'. - pc_key: Optional Planetary Computer API key. - resolution_m: Product resolution, in meters. Either 250 or 500. diff --git a/workflows/data_ingestion/naip/download_naip.yaml b/workflows/data_ingestion/naip/download_naip.yaml deleted file mode 100644 index cafaf1b1..00000000 --- a/workflows/data_ingestion/naip/download_naip.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: download_naip -sources: - user_input: - - list.input_item -sinks: - raster: download.downloaded_product -parameters: - pc_key: -tasks: - list: - op: list_naip_products - download: - op: download_naip - parameters: - api_key: "@from(pc_key)" -edges: - - origin: list.naip_products - destination: - - download.input_product -description: - short_description: Downloads NAIP tiles that intersect with the input geometry and time range. - long_description: - sources: - user_input: Time range and geometry of interest. - sinks: - raster: NAIP tiles. - parameters: - pc_key: Optional Planetary Computer API key. diff --git a/workflows/data_ingestion/osm_road_geometries.yaml b/workflows/data_ingestion/osm_road_geometries.yaml deleted file mode 100644 index b2f97f8b..00000000 --- a/workflows/data_ingestion/osm_road_geometries.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: osm_road_geometries -sources: - user_input: - - download.input_region -sinks: - roads: download.roads -parameters: - network_type: - buffer_size: -tasks: - download: - op: download_road_geometries - parameters: - network_type: "@from(network_type)" - buffer_size: "@from(buffer_size)" -description: - short_description: Downloads road geometry for input region from Open Street Maps. - long_description: - The workflow downloads information from Open Street Maps for the target region and generates - geometries for roads that intercept the input region bounding box. - sources: - user_input: List of external references. - sinks: - roads: Geometry collection with road geometries that intercept the input region bounding box. - parameters: - network_type: >- - Type of roads that will be selected. One of: - - 'drive_service': get drivable streets, including service roads. - - 'walk': get all streets and paths that pedestrians can use (this network type ignores - one-way directionality). - - 'bike': get all streets and paths that cyclists can use. - - 'all': download all non-private OSM streets and paths (this is the default network type - unless you specify a different one). - - 'all_private': download all OSM streets and paths, including private-access ones. - - 'drive': get drivable public streets (but not service roads). - For more information see https://osmnx.readthedocs.io/en/stable/index.html. - buffer_size: Size of buffer, in meters, to search for nodes in OSM. diff --git a/workflows/data_ingestion/sentinel1/preprocess_s1.yaml b/workflows/data_ingestion/sentinel1/preprocess_s1.yaml deleted file mode 100644 index fa718d17..00000000 --- a/workflows/data_ingestion/sentinel1/preprocess_s1.yaml +++ /dev/null @@ -1,75 +0,0 @@ -name: preprocess_s1_rtc -sources: - user_input: - - merge_geom_tr.time_range - s2_products: - - union.items - - filter.bounds_items - - tile.sentinel2_products -sinks: - raster: merge.merged_product -parameters: - pc_key: - min_cover: .4 - dl_timeout: -tasks: - union: - op: merge_geometries - merge_geom_tr: - op: merge_geometry_and_time_range - list: - op: list_sentinel1_products_pc - op_dir: list_sentinel1_products - filter: - op: select_necessary_coverage_items - parameters: - min_cover: "@from(min_cover)" - group_attribute: orbit_number - download: - op: download_sentinel1 - parameters: - api_key: "@from(pc_key)" - timeout_s: "@from(dl_timeout)" - tile: - op: tile_sentinel1_rtc - op_dir: tile_sentinel1 - group: - op: group_sentinel1_orbits - merge: - op: merge_sentinel1_orbits -edges: - - origin: union.merged - destination: - - merge_geom_tr.geometry - - origin: merge_geom_tr.merged - destination: - - list.input_item - - origin: list.sentinel_products - destination: - - filter.items - - origin: filter.filtered_items - destination: - - download.sentinel_product - - origin: download.downloaded_product - destination: - - tile.sentinel1_products - - origin: tile.tiled_products - destination: - - group.rasters - - origin: group.raster_groups - destination: - - merge.raster_group -description: - short_description: - Downloads and preprocesses tiles of Sentinel-1 imagery that intersect with the input Sentinel-2 - products in the input time range. - long_description: - The workflow fetches Sentinel-1 tiles that intersects with the Sentinel-2 products, downloads - and preprocesses them, and produces Sentinel-1 rasters in the Sentinel-2 tiling system. - sources: - user_input: Time range of interest. - s2_products: Sentinel-2 products whose geometries are used to select Sentinel-1 tiles. - sinks: - raster: Sentinel-1 rasters in the Sentinel-2 tiling system. - parameters: - pc_key: Planetary Computer API key. diff --git a/workflows/data_ingestion/sentinel2/cloud_ensemble.yaml b/workflows/data_ingestion/sentinel2/cloud_ensemble.yaml deleted file mode 100644 index 004e34bc..00000000 --- a/workflows/data_ingestion/sentinel2/cloud_ensemble.yaml +++ /dev/null @@ -1,60 +0,0 @@ -name: cloud_ensemble -sources: - sentinel_raster: - - cloud1.sentinel_raster - - cloud2.sentinel_raster - - cloud3.sentinel_raster - - cloud4.sentinel_raster - - cloud5.sentinel_raster -sinks: - cloud_probability: ensemble.cloud_probability -tasks: - cloud1: - op: compute_cloud_prob - parameters: - model_path: cloud_model1_cpu.onnx - cloud2: - op: compute_cloud_prob - parameters: - model_path: cloud_model2_cpu.onnx - cloud3: - op: compute_cloud_prob - parameters: - model_path: cloud_model3_cpu.onnx - cloud4: - op: compute_cloud_prob - parameters: - model_path: cloud_model4_cpu.onnx - cloud5: - op: compute_cloud_prob - parameters: - model_path: cloud_model5_cpu.onnx - ensemble: - op: ensemble_cloud_prob -edges: - - origin: cloud1.cloud_probability - destination: - - ensemble.cloud1 - - origin: cloud2.cloud_probability - destination: - - ensemble.cloud2 - - origin: cloud3.cloud_probability - destination: - - ensemble.cloud3 - - origin: cloud4.cloud_probability - destination: - - ensemble.cloud4 - - origin: cloud5.cloud_probability - destination: - - ensemble.cloud5 -description: - short_description: - Computes the cloud probability of a Sentinel-2 L2A raster using an ensemble of five cloud - segmentation models. - long_description: - The workflow computes cloud probabilities for each model independently, and averages them to - obtain a single probability map. - sources: - sentinel_raster: Sentinel-2 L2A raster. - sinks: - cloud_probability: Cloud probability map. diff --git a/workflows/data_ingestion/sentinel2/improve_cloud_mask.yaml b/workflows/data_ingestion/sentinel2/improve_cloud_mask.yaml deleted file mode 100644 index 0a1c8452..00000000 --- a/workflows/data_ingestion/sentinel2/improve_cloud_mask.yaml +++ /dev/null @@ -1,63 +0,0 @@ -name: improve_cloud_mask -sources: - s2_raster: - - cloud.sentinel_raster - - shadow.sentinel_raster - product_mask: - - merge.product_mask -sinks: - mask: merge.merged_cloud_mask -parameters: - cloud_thr: - shadow_thr: - in_memory: - cloud_model: - shadow_model: -tasks: - cloud: - op: compute_cloud_prob - parameters: - in_memory: "@from(in_memory)" - model_path: "@from(cloud_model)" - shadow: - op: compute_shadow_prob - parameters: - in_memory: "@from(in_memory)" - model_path: "@from(shadow_model)" - merge: - op: merge_cloud_masks_simple - op_dir: merge_cloud_masks - parameters: - cloud_prob_threshold: "@from(cloud_thr)" - shadow_prob_threshold: "@from(shadow_thr)" -edges: - - origin: cloud.cloud_probability - destination: - - merge.cloud_probability - - origin: shadow.shadow_probability - destination: - - merge.shadow_probability -description: - short_description: - Improves cloud masks by merging the product cloud mask with cloud and shadow masks computed by - machine learning segmentation models. - long_description: - This workflow computes cloud and shadow probabilities using segmentation models, thresholds - them, and merges the models' masks with the product mask. - sources: - s2_raster: Sentinel-2 L2A raster. - product_mask: Cloud mask obtained from the product's quality indicators. - sinks: - mask: Improved cloud mask. - parameters: - cloud_thr: Confidence threshold to assign a pixel as cloud. - shadow_thr: Confidence threshold to assign a pixel as shadow. - in_memory: - Whether to load the whole raster in memory when running predictions. Uses more memory - (~4GB/worker) but speeds up inference for fast models. - cloud_model: - ONNX file for the cloud model. Available models are 'cloud_model{idx}_cpu.onnx' with idx ∈ {1, - 2} being FPN-based models, which are more accurate but slower, and idx ∈ {3, 4, 5} being - cheaplab models, which are less accurate but faster. - shadow_model: - ONNX file for the shadow model. 'shadow.onnx' is the only currently available model. diff --git a/workflows/data_ingestion/sentinel2/improve_cloud_mask_ensemble.yaml b/workflows/data_ingestion/sentinel2/improve_cloud_mask_ensemble.yaml deleted file mode 100644 index 5460e8e7..00000000 --- a/workflows/data_ingestion/sentinel2/improve_cloud_mask_ensemble.yaml +++ /dev/null @@ -1,45 +0,0 @@ -name: improve_cloud_mask_ensemble -sources: - s2_raster: - - cloud.sentinel_raster - - shadow.sentinel_raster - product_mask: - - merge.product_mask -sinks: - mask: merge.merged_cloud_mask -parameters: - cloud_thr: - shadow_thr: -tasks: - cloud: - workflow: data_ingestion/sentinel2/cloud_ensemble - shadow: - op: compute_shadow_prob - merge: - op: merge_cloud_masks_simple - op_dir: merge_cloud_masks - parameters: - cloud_prob_threshold: "@from(cloud_thr)" - shadow_prob_threshold: "@from(shadow_thr)" -edges: - - origin: cloud.cloud_probability - destination: - - merge.cloud_probability - - origin: shadow.shadow_probability - destination: - - merge.shadow_probability -description: - short_description: - Improves cloud masks by merging the product cloud mask with cloud and shadow masks computed by - an ensemble of machine learning segmentation models. - long_description: - This workflow computes cloud and shadow probabilities using and ensemble of segmentation models, - thresholds them, and merges the models' masks with the product mask. - sources: - s2_raster: Sentinel-2 L2A raster. - product_mask: Cloud mask obtained from the product's quality indicators. - sinks: - mask: Improved cloud mask. - parameters: - cloud_thr: Confidence threshold to assign a pixel as cloud. - shadow_thr: Confidence threshold to assign a pixel as shadow. diff --git a/workflows/data_ingestion/sentinel2/preprocess_s2.yaml b/workflows/data_ingestion/sentinel2/preprocess_s2.yaml deleted file mode 100644 index 251a698f..00000000 --- a/workflows/data_ingestion/sentinel2/preprocess_s2.yaml +++ /dev/null @@ -1,66 +0,0 @@ -name: preprocess_s2 -sources: - user_input: - - list.input_item - - filter.bounds_items -sinks: - raster: merge.output_raster - mask: merge.output_mask -parameters: - min_tile_cover: - max_tiles_per_time: - pc_key: - dl_timeout: -tasks: - list: - op: list_sentinel2_products_pc - op_dir: list_sentinel2_products - filter: - op: select_necessary_coverage_items - parameters: - min_cover: "@from(min_tile_cover)" - max_items: "@from(max_tiles_per_time)" - download: - op: download_stack_sentinel2 - parameters: - api_key: "@from(pc_key)" - timeout_s: "@from(dl_timeout)" - group: - op: group_sentinel2_orbits - merge: - op: merge_sentinel2_orbits -edges: - - origin: list.sentinel_products - destination: - - filter.items - - origin: filter.filtered_items - destination: - - download.sentinel_product - - origin: download.raster - destination: - - group.rasters - - origin: download.cloud - destination: - - group.masks - - origin: group.raster_groups - destination: - - merge.raster_group - - origin: group.mask_groups - destination: - - merge.mask_group -description: - short_description: - Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range. - long_description: - This workflow selects a minimum set of tiles that covers the input geometry, downloads - Sentinel-2 imagery for the selected time range, and preprocesses it by generating a single - multi-band raster at 10m resolution. - sources: - user_input: Time range and geometry of interest. - sinks: - raster: Sentinel-2 L2A rasters with all bands resampled to 10m resolution. - mask: Cloud mask at 10m resolution from the product's quality indicators. - parameters: - min_tile_cover: Minimum RoI coverage to consider a set of tiles sufficient. - max_tiles_per_time: Maximum number of tiles used to cover the RoI in each date. - pc_key: Optional Planetary Computer API key. diff --git a/workflows/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.yaml b/workflows/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.yaml deleted file mode 100644 index 7c3e6d59..00000000 --- a/workflows/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.yaml +++ /dev/null @@ -1,46 +0,0 @@ -name: preprocess_s2_ensemble_masks -sources: - user_input: - - s2.user_input -sinks: - raster: s2.raster - mask: cloud.mask -parameters: - min_tile_cover: - max_tiles_per_time: - cloud_thr: - shadow_thr: - pc_key: -tasks: - s2: - workflow: data_ingestion/sentinel2/preprocess_s2 - parameters: - min_tile_cover: "@from(min_tile_cover)" - max_tiles_per_time: "@from(max_tiles_per_time)" - pc_key: "@from(pc_key)" - cloud: - workflow: data_ingestion/sentinel2/improve_cloud_mask_ensemble - parameters: - cloud_thr: "@from(cloud_thr)" - shadow_thr: "@from(shadow_thr)" -edges: - - origin: s2.raster - destination: - - cloud.s2_raster - - origin: s2.mask - destination: - - cloud.product_mask -description: - short_description: - Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and - computes improved cloud masks using an ensemble of cloud and shadow segmentation models. - long_description: - This workflow selects a minimum set of tiles that covers the input geometry, downloads - Sentinel-2 imagery for the selected time range, and preprocesses it by generating a single - multi-band raster at 10m resolution. It then improves cloud masks by merging the product mask - with cloud and shadow masks computed using an ensemble of cloud and shadow segmentation models. - sources: - user_input: Time range and geometry of interest. - sinks: - raster: Sentinel-2 L2A rasters with all bands resampled to 10m resolution. - mask: Cloud masks at 10m resolution. diff --git a/workflows/data_ingestion/sentinel2/preprocess_s2_improved_masks.yaml b/workflows/data_ingestion/sentinel2/preprocess_s2_improved_masks.yaml deleted file mode 100644 index 1f8bb126..00000000 --- a/workflows/data_ingestion/sentinel2/preprocess_s2_improved_masks.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: preprocess_s2_improved_masks -sources: - user_input: - - s2.user_input -sinks: - raster: s2.raster - mask: cloud.mask -parameters: - min_tile_cover: - max_tiles_per_time: - cloud_thr: - shadow_thr: - in_memory: - cloud_model: - shadow_model: - pc_key: - dl_timeout: -tasks: - s2: - workflow: data_ingestion/sentinel2/preprocess_s2 - parameters: - min_tile_cover: "@from(min_tile_cover)" - max_tiles_per_time: "@from(max_tiles_per_time)" - pc_key: "@from(pc_key)" - dl_timeout: "@from(dl_timeout)" - cloud: - workflow: data_ingestion/sentinel2/improve_cloud_mask - parameters: - cloud_thr: "@from(cloud_thr)" - shadow_thr: "@from(shadow_thr)" - in_memory: "@from(in_memory)" - cloud_model: "@from(cloud_model)" - shadow_model: "@from(shadow_model)" -edges: - - origin: s2.raster - destination: - - cloud.s2_raster - - origin: s2.mask - destination: - - cloud.product_mask -description: - short_description: - Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and - computes improved cloud masks using cloud and shadow segmentation models. - long_description: - This workflow selects a minimum set of tiles that covers the input geometry, downloads - Sentinel-2 imagery for the selected time range, and preprocesses it by generating a single - multi-band raster at 10m resolution. It then improves cloud masks by merging the product mask - with cloud and shadow masks computed using cloud and shadow segmentation models. - sources: - user_input: Time range and geometry of interest. - sinks: - raster: Sentinel-2 L2A rasters with all bands resampled to 10m resolution. - mask: Cloud masks at 10m resolution. diff --git a/workflows/data_ingestion/soil/soilgrids.yaml b/workflows/data_ingestion/soil/soilgrids.yaml deleted file mode 100644 index 6296b365..00000000 --- a/workflows/data_ingestion/soil/soilgrids.yaml +++ /dev/null @@ -1,53 +0,0 @@ -name: soilgrids -sources: - input_item: - - download_soilgrids.input_item -sinks: - downloaded_raster: download_soilgrids.downloaded_raster -parameters: - map: wrb - identifier: MostProbable -tasks: - download_soilgrids: - op: download_soilgrids - parameters: - map: "@from(map)" - identifier: "@from(identifier)" -edges: -description: - short_description: - Downloads digital soil mapping information from SoilGrids for the input geometry. - long_description: >- - The workflow downloads a raster containing the map and identifiers for the input geometry. - SoilGrids is a system for digital soil mapping based on global compilation of soil profile data - and environmental layers. - sources: - input_item: Input geometry. - sinks: - downloaded_raster: Raster with the map and identifiers requested. - parameters: - map: >- - Map to download. Options: - - wrb - World Reference Base classes and probabilites - - bdod - Bulk density - kg/dm^3 - - cec - Cation exchange capacity at ph 7 - cmol(c)/kg - - cfvo - Coarse fragments volumetric) - cm3/100cm3 (vol%) - - clay - Clay content - g/100g (%) - - nitrogen - Nitrogen - g/kg - - phh2o - Soil pH in H2O - pH - - sand - Sand content - g/100g (%) - - silt - Silt content - g/100g (%) - - soc - Soil organic carbon content - g/kg - - ocs - Soil organic carbon stock - kg/m^3 - - ocd - Organic carbon densities - kg/m^3 - identifier: >- - Variable identifier to be downloaded. Depends on map. - - wrb: Acrisols, Albeluvisols, Alisols, Andosols, Arenosols, Calcisols, Cambisols, - Chernozems, Cryosols, Durisols, Ferralsols, Fluvisols, Gleysols, Gypsisols, Histosols, - Kastanozems, Leptosols, Lixisols, Luvisols, MostProbable, Nitisols, Phaeozems, Planosols, - Plinthosols, Podzols, Regosols, Solonchaks, Solonetz, Stagnosols, Umbrisols, Vertisols. - - Other identifiers follow the nomenclature defined in the - [link=https://www.isric.org/explore/soilgrids/faq-soilgrids#What_do_the_filename_codes_mean]SoilGrids - documentation page: - https://www.isric.org/explore/soilgrids/faq-soilgrids#What_do_the_filename_codes_mean[/]. diff --git a/workflows/data_ingestion/soil/usda.yaml b/workflows/data_ingestion/soil/usda.yaml deleted file mode 100644 index e2e1e51c..00000000 --- a/workflows/data_ingestion/soil/usda.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: usda_soils -sources: - input_item: - - datavibe_filter.input_item -sinks: - downloaded_raster: download_usda_soils.downloaded_raster -parameters: - ignore: all -tasks: - datavibe_filter: - op: datavibe_filter - parameters: - filter_out: "@from(ignore)" - download_usda_soils: - op: download_usda_soils -edges: - - origin: datavibe_filter.output_item - destination: - - download_usda_soils.input_item -description: - short_description: Downloads USDA soil classification raster. - long_description: - The workflow will download a global raster with USDA soil classes at 1/30 degree resolution. - sources: - input_item: Dummy input. - sinks: - downloaded_raster: Raster with USDA soil classes. - parameters: - ignore: Selection of each field of input item should be ignored (among "time_range", "geometry", or "all" for both of them). \ No newline at end of file diff --git a/workflows/data_ingestion/spaceeye/spaceeye.yaml b/workflows/data_ingestion/spaceeye/spaceeye.yaml deleted file mode 100644 index 0aa2defb..00000000 --- a/workflows/data_ingestion/spaceeye/spaceeye.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: spaceeye -sources: - user_input: - - preprocess.user_input - - spaceeye.input_data -sinks: - raster: spaceeye.raster -parameters: - duration: - time_overlap: - min_tile_cover: - max_tiles_per_time: - cloud_thr: - shadow_thr: - pc_key: - s2_timeout: -tasks: - preprocess: - workflow: data_ingestion/spaceeye/spaceeye_preprocess - parameters: - min_tile_cover: "@from(min_tile_cover)" - max_tiles_per_time: "@from(max_tiles_per_time)" - cloud_thr: "@from(cloud_thr)" - shadow_thr: "@from(shadow_thr)" - pc_key: "@from(pc_key)" - s2_timeout: "@from(s2_timeout)" - spaceeye: - workflow: data_ingestion/spaceeye/spaceeye_inference - parameters: - duration: "@from(duration)" - time_overlap: "@from(time_overlap)" -edges: - - origin: preprocess.s2_raster - destination: - - spaceeye.s2_rasters - - origin: preprocess.s1_raster - destination: - - spaceeye.s1_rasters - - origin: preprocess.cloud_mask - destination: - - spaceeye.cloud_rasters -description: - short_description: - Runs the SpaceEye cloud removal pipeline, yielding daily cloud-free images for the input - geometry and time range. - long_description: >- - The workflow fetches both Sentinel-1 and Sentinel-2 tiles that cover the input geometry and time - range, preprocesses them, computes cloud masks, and runs SpaceEye inference in a sliding window - on the retrieved tiles. This workflow can be reused as a preprocess step in many applications - that require cloud-free Sentinel-2 data. For more information about SpaceEye, read the paper: - https://arxiv.org/abs/2106.08408. - sources: - user_input: Time range and geometry of interest. - sinks: - raster: Cloud-free rasters. - parameters: diff --git a/workflows/data_ingestion/spaceeye/spaceeye_inference.yaml b/workflows/data_ingestion/spaceeye/spaceeye_inference.yaml deleted file mode 100644 index cdd4006e..00000000 --- a/workflows/data_ingestion/spaceeye/spaceeye_inference.yaml +++ /dev/null @@ -1,81 +0,0 @@ -name: spaceeye_inference -sources: - input_data: - - group_s1.input_data - - group_s2.input_data - - group_mask.input_data - s1_rasters: - - group_s1.rasters - s2_rasters: - - group_s2.rasters - cloud_rasters: - - group_mask.rasters -sinks: - raster: split.rasters -parameters: - duration: 48 - time_overlap: 0.5 -tasks: - group_s1: - op: group_s1_tile_sequence - op_dir: group_tile_sequence - parameters: - duration: "@from(duration)" - overlap: "@from(time_overlap)" - group_s2: - op: group_s2_tile_sequence - op_dir: group_tile_sequence - parameters: - duration: "@from(duration)" - overlap: "@from(time_overlap)" - group_mask: - op: group_s2cloudmask_tile_sequence - op_dir: group_tile_sequence - parameters: - duration: "@from(duration)" - overlap: "@from(time_overlap)" - spaceeye: - op: remove_clouds - parameters: - duration: "@from(duration)" - split: - op: split_spaceeye_sequence - op_dir: split_sequence -edges: - - origin: group_s1.tile_sequences - destination: - - spaceeye.s1_products - - origin: group_s2.tile_sequences - destination: - - spaceeye.s2_products - - origin: group_mask.tile_sequences - destination: - - spaceeye.cloud_masks - - origin: spaceeye.spaceeye_sequence - destination: - - split.sequences -description: - short_description: - Performs SpaceEye inference to generate daily cloud-free images given Sentinel data and cloud - masks. - long_description: >- - The workflow will group input Sentinel-1, Sentinel-2, and cloud mask rasters into - spatio-temporal windows and perform inference of each window. The windows will then be merged - into rasters for the RoI. More information about SpaceEye available in the paper: - https://arxiv.org/abs/2106.08408. - sources: - input_data: - Time range and region of interest. Will determine the spatio-temporal windows and region for - the output rasters. - s1_rasters: Sentinel-1 rasters tiled to the Sentinel-2 grid. - s2_rasters: Sentinel-2 tile rasters for the input time range. - cloud_rasters: Cloud masks for each of the Sentinel-2 tiles. - sinks: - raster: Cloud-free rasters for the input time range and region of interest. - parameters: - duration: - Time window, in days, considered in the inference. Controls the amount of temporal context for - inpainting clouds. Larger windows require more compute and memory. - time_overlap: - Overlap ratio of each temporal window. Controls the temporal step between windows as a - fraction of the window size. diff --git a/workflows/data_ingestion/spaceeye/spaceeye_interpolation.yaml b/workflows/data_ingestion/spaceeye/spaceeye_interpolation.yaml deleted file mode 100644 index 1a71dcfd..00000000 --- a/workflows/data_ingestion/spaceeye/spaceeye_interpolation.yaml +++ /dev/null @@ -1,62 +0,0 @@ -name: spaceeye_interpolation -sources: - user_input: - - preprocess.user_input - - spaceeye.input_data -sinks: - raster: spaceeye.raster -parameters: - duration: - time_overlap: - min_tile_cover: - max_tiles_per_time: - cloud_thr: - shadow_thr: - pc_key: -tasks: - preprocess: - workflow: data_ingestion/sentinel2/preprocess_s2_improved_masks - parameters: - min_tile_cover: "@from(min_tile_cover)" - max_tiles_per_time: "@from(max_tiles_per_time)" - cloud_thr: "@from(cloud_thr)" - shadow_thr: "@from(shadow_thr)" - pc_key: "@from(pc_key)" - spaceeye: - workflow: data_ingestion/spaceeye/spaceeye_interpolation_inference - parameters: - duration: "@from(duration)" - time_overlap: "@from(time_overlap)" -edges: - - origin: preprocess.raster - destination: - - spaceeye.s2_rasters - - origin: preprocess.mask - destination: - - spaceeye.cloud_rasters -description: - short_description: - Runs the SpaceEye cloud removal pipeline using an interpolation-based algorithm, yielding daily - cloud-free images for the input geometry and time range. - long_description: >- - The workflow fetches Sentinel-2 tiles that cover the input geometry and time range, preprocesses - them, computes cloud masks, and runs SpaceEye inference in a sliding window on the retrieved - tiles. This workflow can be reused as a preprocess step in many applications that require - cloud-free Sentinel-2 data. For more information about SpaceEye, read the - [link=https://arxiv.org/abs/2106.08408]paper: https://arxiv.org/abs/2106.08408[/link]. - sources: - user_input: Time range and geometry of interest. - sinks: - raster: Cloud-free rasters. - parameters: - duration: - Time window, in days, considered in the inference. Controls the amount of temporal context for - inpainting clouds. Larger windows require more compute and memory. - time_overlap: - Overlap ratio of each temporal window. Controls the temporal step between windows as a - fraction of the window size. - min_tile_cover: Minimum RoI coverage to consider a set of tiles sufficient. - max_tiles_per_time: Maximum number of tiles used to cover the RoI in each date. - cloud_thr: Confidence threshold to assign a pixel as cloud. - shadow_thr: Confidence threshold to assign a pixel as shadow. - pc_key: Optional Planetary Computer API key. diff --git a/workflows/data_ingestion/spaceeye/spaceeye_interpolation_inference.yaml b/workflows/data_ingestion/spaceeye/spaceeye_interpolation_inference.yaml deleted file mode 100644 index c3aaa9c2..00000000 --- a/workflows/data_ingestion/spaceeye/spaceeye_interpolation_inference.yaml +++ /dev/null @@ -1,68 +0,0 @@ -name: spaceeye_interpolation_inference -sources: - input_data: - - group_s2.input_data - - group_mask.input_data - s2_rasters: - - group_s2.rasters - cloud_rasters: - - group_mask.rasters -sinks: - raster: split.rasters -parameters: - duration: 48 - time_overlap: 0.5 -tasks: - group_s2: - op: group_s2_tile_sequence - op_dir: group_tile_sequence - parameters: - duration: "@from(duration)" - overlap: "@from(time_overlap)" - group_mask: - op: group_s2cloudmask_tile_sequence - op_dir: group_tile_sequence - parameters: - duration: "@from(duration)" - overlap: "@from(time_overlap)" - spaceeye: - op: remove_clouds_interpolation - op_dir: remove_clouds - parameters: - duration: "@from(duration)" - split: - op: split_spaceeye_sequence - op_dir: split_sequence -edges: - - origin: group_s2.tile_sequences - destination: - - spaceeye.s2_products - - origin: group_mask.tile_sequences - destination: - - spaceeye.cloud_masks - - origin: spaceeye.spaceeye_sequence - destination: - - split.sequences -description: - short_description: - Performs temporal damped interpolation to generate daily cloud-free images given Sentinel-2 data - and cloud masks. - long_description: >- - The workflow will group input Sentinel-2 and cloud mask rasters into spatio-temporal windows and - perform inference of each window. The windows will then be merged into rasters for the RoI. More - information about SpaceEye available in the paper: https://arxiv.org/abs/2106.08408. - sources: - input_data: - Time range and region of interest. Will determine the spatio-temporal windows and region for - the output rasters. - s2_rasters: Sentinel-2 tile rasters for the input time range. - cloud_rasters: Cloud masks for each of the Sentinel-2 tiles. - sinks: - raster: Cloud-free rasters for the input time range and region of interest. - parameters: - duration: - Time window, in days, considered in the inference. Controls the amount of temporal context for - inpainting clouds. Larger windows require more compute and memory. - time_overlap: - Overlap ratio of each temporal window. Controls the temporal step between windows as a - fraction of the window size. diff --git a/workflows/data_ingestion/spaceeye/spaceeye_preprocess.yaml b/workflows/data_ingestion/spaceeye/spaceeye_preprocess.yaml deleted file mode 100644 index 6ea55cfc..00000000 --- a/workflows/data_ingestion/spaceeye/spaceeye_preprocess.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: spaceeye_preprocess_rtc -sources: - user_input: - - s2.user_input - - s1.user_input -sinks: - s2_raster: s2.raster - s1_raster: s1.raster - cloud_mask: s2.mask -parameters: - min_tile_cover: .4 - max_tiles_per_time: - cloud_thr: - shadow_thr: - pc_key: - s1_timeout: - s2_timeout: -tasks: - s2: - workflow: data_ingestion/sentinel2/preprocess_s2_improved_masks - parameters: - min_tile_cover: "@from(min_tile_cover)" - max_tiles_per_time: "@from(max_tiles_per_time)" - cloud_thr: "@from(cloud_thr)" - shadow_thr: "@from(shadow_thr)" - pc_key: "@from(pc_key)" - in_memory: true - dl_timeout: "@from(s2_timeout)" - s1: - workflow: data_ingestion/sentinel1/preprocess_s1 - parameters: - pc_key: "@from(pc_key)" - dl_timeout: "@from(s1_timeout)" -edges: - - origin: s2.raster - destination: - - s1.s2_products -description: - short_description: Runs the SpaceEye preprocessing pipeline. - long_description: - The workflow fetches both Sentinel-1 and Sentinel-2 tiles that cover the input geometry and time - range and preprocesses them. It also computes improved cloud masks using cloud and shadow - segmentation models. - sources: - user_input: Time range and geometry of interest. - sinks: - s2_raster: Sentinel-2 rasters. - s1_raster: Sentinel-1 rasters. - cloud_mask: Cloud and cloud shadow mask. diff --git a/workflows/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.yaml b/workflows/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.yaml deleted file mode 100644 index bb56c7c9..00000000 --- a/workflows/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: spaceeye_preprocess_ensemble -sources: - user_input: - - s2.user_input - - s1.user_input -sinks: - s2_raster: s2.raster - s1_raster: s1.raster - cloud_mask: s2.mask -parameters: - pc_key: -tasks: - s2: - workflow: data_ingestion/sentinel2/preprocess_s2_ensemble_masks - parameters: - pc_key: "@from(pc_key)" - s1: - workflow: data_ingestion/sentinel1/preprocess_s1 - parameters: - pc_key: "@from(pc_key)" -edges: - - origin: s2.raster - destination: - - s1.s2_products -description: - short_description: - Runs the SpaceEye preprocessing pipeline with an ensemble of cloud segmentation models. - long_description: - The workflow fetches both Sentinel-1 and Sentinel-2 tiles that cover the input geometry and time - range and preprocesses them, it also computes improved cloud masks using cloud and shadow - segmentation models. Cloud probabilities are computed with an ensemble of five models. - sources: - user_input: Time range and geometry of interest. - sinks: - s2_raster: Sentinel-2 rasters. - s1_raster: Sentinel-1 rasters. - cloud_mask: Cloud and cloud shadow mask. - parameters: - pc_key: Planetary Computer API key. diff --git a/workflows/data_ingestion/user_data/ingest_geometry.yaml b/workflows/data_ingestion/user_data/ingest_geometry.yaml deleted file mode 100644 index 966a670a..00000000 --- a/workflows/data_ingestion/user_data/ingest_geometry.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: ingest_geometry -sources: - user_input: - - unpack.input_refs -sinks: - geometry: download.downloaded -tasks: - unpack: - op: unpack_refs - download: - op: download_geometry_from_ref - op_dir: download_from_ref -edges: - - origin: unpack.ref_list - destination: - - download.input_ref -description: - short_description: - Adds user geometries into the cluster storage, allowing for them to be used on workflows. - long_description: - The workflow downloads geometries provided in the references and generates GeometryCollection - objects with local assets that can be used in other operations. - sources: - user_input: List of external references. - sinks: - geometry: GeometryCollections with downloaded assets. diff --git a/workflows/data_ingestion/user_data/ingest_raster.yaml b/workflows/data_ingestion/user_data/ingest_raster.yaml deleted file mode 100644 index 13842bb9..00000000 --- a/workflows/data_ingestion/user_data/ingest_raster.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: ingest_raster -sources: - user_input: - - unpack.input_refs -sinks: - raster: download.downloaded -tasks: - unpack: - op: unpack_refs - download: - op: download_raster_from_ref - op_dir: download_from_ref -edges: - - origin: unpack.ref_list - destination: - - download.input_ref -description: - short_description: - Adds user rasters into the cluster storage, allowing for them to be used on workflows. - long_description: - The workflow downloads rasters provided in the references and generates Raster objects with - local assets that can be used in other operations. - sources: - user_input: List of external references. - sinks: - raster: Rasters with downloaded assets. diff --git a/workflows/data_ingestion/user_data/ingest_smb.yaml b/workflows/data_ingestion/user_data/ingest_smb.yaml deleted file mode 100644 index 20dffe39..00000000 --- a/workflows/data_ingestion/user_data/ingest_smb.yaml +++ /dev/null @@ -1,42 +0,0 @@ -name: ingest_smb -sources: - user_input: - - download.user_input -sinks: - rasters: download.rasters -parameters: - server_name: - server_ip: - server_port: 445 - username: - password: - share_name: - directory_path: "/" - bands: ["red", "green", "blue"] -tasks: - download: - op: download_rasters_from_smb - op_dir: download_from_smb - parameters: - server_name: "@from(server_name)" - server_ip: "@from(server_ip)" - server_port: "@from(server_port)" - username: "@from(username)" - password: "@from(password)" - share_name: "@from(share_name)" - directory_path: "@from(directory_path)" - bands: "@from(bands)" -edges: -description: - short_description: - Adds user rasters into the cluster storage from an SMB share, allowing for them to be - used on workflows. - long_description: - The workflow downloads rasters from the provided SMB share and generates Raster objects with - local assets that can be used in other operations. - sources: - user_input: - DataVibe containing the time range and geometry metadata of the set rasters - to be downloaded. - sinks: - rasters: Rasters with downloaded assets. diff --git a/workflows/data_ingestion/weather/download_chirps.yaml b/workflows/data_ingestion/weather/download_chirps.yaml deleted file mode 100755 index fb0793b9..00000000 --- a/workflows/data_ingestion/weather/download_chirps.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: chirps -sources: - user_input: - - list_chirps.input_item -sinks: - product: download_chirps.downloaded_product -parameters: - freq: daily - res: p05 -tasks: - list_chirps: - op: list_chirps - parameters: - freq: "@from(freq)" - res: "@from(res)" - download_chirps: - op: download_chirps -edges: - - origin: list_chirps.chirps_products - destination: - - download_chirps.chirps_product -description: - short_description: Downloads accumulated precipitation data from the CHIRPS dataset. - long_description: - sources: - user_input: Time range and geometry of interest. - sinks: - product: TIFF file containing accumulated precipitation. - parameters: - freq: daily or monthly frequencies - res: p05 for 0.05 degree resolution or p25 for 0.25 degree resolution, - p25 is only available daily diff --git a/workflows/data_ingestion/weather/download_era5.yaml b/workflows/data_ingestion/weather/download_era5.yaml deleted file mode 100644 index e5f6df16..00000000 --- a/workflows/data_ingestion/weather/download_era5.yaml +++ /dev/null @@ -1,48 +0,0 @@ -name: download_era5 -sources: - user_input: - - list.input_item -sinks: - downloaded_product: download.downloaded_product -parameters: - pc_key: - variable: 2t -tasks: - list: - op: list_era5 - parameters: - variable: "@from(variable)" - download: - op: download_era5 - parameters: - api_key: "@from(pc_key)" -edges: - - origin: list.era5_products - destination: - - download.era5_product -description: - short_description: Hourly estimated weather variables. - long_description: - Hourly weather variables obtained from combining observations and numerical model runs to - estimate the state of the atmosphere. - sources: - user_input: Time range and geometry of interest. - sinks: - downloaded_product: 30km resolution weather variables. - parameters: - pc_key: Optional Planetary Computer API key. - variable: >- - Options are: - 2t - 2 meter temperature (default) - 100u - 100 meter U wind component - 100v - 100 meter V wind component - 10u - 10 meter U wind component - 10v - 10 meter V wind component - 2d - 2 meter dewpoint temperature - mn2t - Minimum temperature at 2 meters since previous post-processing - msl - Mean sea level pressure - mx2t - Maximum temperature at 2 meters since previous post-processing - sp - Surface pressure - ssrd - Surface solar radiation downwards - sst - Sea surface temperature - tp - Total precipitation diff --git a/workflows/data_ingestion/weather/download_era5_monthly.yaml b/workflows/data_ingestion/weather/download_era5_monthly.yaml deleted file mode 100644 index b180015a..00000000 --- a/workflows/data_ingestion/weather/download_era5_monthly.yaml +++ /dev/null @@ -1,47 +0,0 @@ -name: download_era5_monthly -sources: - user_input: - - list.input_item -sinks: - downloaded_product: download.downloaded_product -parameters: - cds_api_key: - variable: 2t -tasks: - list: - op: list_era5_cds - op_dir: list_era5 - parameters: - variable: "@from(variable)" - download: - op: download_era5 - parameters: - api_key: "@from(cds_api_key)" -edges: - - origin: list.era5_products - destination: - - download.era5_product -description: - short_description: Monthly estimated weather variables. - long_description: - Monthly weather variables obtained from combining observations and numerical model runs to - estimate the state of the atmosphere. - sources: - user_input: Time range and geometry of interest. - sinks: - downloaded_product: 30km resolution weather variables. - parameters: - cds_api_key: api key for Copernicus CDS (https://cds.climate.copernicus.eu/user/register) - variable: >- - Options are: - 2t - 2 meter temperature (default) - 100u - 100 meter U wind component - 100v - 100 meter V wind component - 10u - 10 meter U wind component - 10v - 10 meter V wind component - 2d - 2 meter dewpoint temperature - msl - Mean sea level pressure - sp - Surface pressure - ssrd - Surface solar radiation downwards - sst - Sea surface temperature - tp - Total precipitation diff --git a/workflows/data_ingestion/weather/download_gridmet.yaml b/workflows/data_ingestion/weather/download_gridmet.yaml deleted file mode 100644 index ca85319f..00000000 --- a/workflows/data_ingestion/weather/download_gridmet.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: download_gridmet -sources: - user_input: - - list.input_item -sinks: - downloaded_product: download.downloaded_product -parameters: - variable: pet -tasks: - list: - op: list_gridmet - op_dir: list_climatology_lab - parameters: - variable: "@from(variable)" - download: - op: download_climatology_lab -edges: - - origin: list.products - destination: - - download.input_product -description: - short_description: Daily surface meteorological properties from GridMET. - long_description: >- - The workflow downloads weather and hydrological data for the input time range. Data is - available for the contiguous US and southern British Columbia surfaces from 1979-present, with a - daily temporal resolution and a ~4-km (1/24th degree) spatial resolution. - sources: - user_input: Time range of interest. - sinks: - downloaded_product: Downloaded variable for each year in the input time range. - parameters: - variable: >- - Options are: - bi - Burning Index - erc - Energy Release Component - etr - Daily reference evapotranspiration (alfafa, units = mm) - fm100 - Fuel Moisture (100-hr, units = %) - fm1000 - Fuel Moisture (1000-hr, units = %) - pet - Potential evapotranspiration (reference grass evapotranspiration, units = mm) - pr - Precipitation amount (daily total, units = mm) - rmax - Maximum relative humidity (units = %) - rmin - Minimum relative humidity (units = %) - sph - Specific humididy (units = kg/kg) - srad - Downward surface shortwave radiation (units = W/m^2) - th - Wind direction (degrees clockwise from North) - tmmn - Minimum temperature (units = K) - tmmx - Maximum temperature (units = K) - vpd - Vapor Pressure Deficit (units = kPa) - vs - Wind speed at 10m (units = m/s) diff --git a/workflows/data_ingestion/weather/download_herbie.yaml b/workflows/data_ingestion/weather/download_herbie.yaml deleted file mode 100755 index 71154bb4..00000000 --- a/workflows/data_ingestion/weather/download_herbie.yaml +++ /dev/null @@ -1,70 +0,0 @@ -name: download_herbie -sources: - user_input: - - list_herbie.input_item -sinks: - forecast: download_herbie.forecast -parameters: - model: "hrrr" - product: - frequency: 1 - forecast_lead_times: - forecast_start_date: - search_text: ":TMP:2 m" -tasks: - list_herbie: - op: list_herbie - parameters: - model: "@from(model)" - product: "@from(product)" - frequency: "@from(frequency)" - forecast_lead_times: "@from(forecast_lead_times)" - forecast_start_date: "@from(forecast_start_date)" - search_text: "@from(search_text)" - download_herbie: - op: download_herbie -edges: - - origin: list_herbie.product - destination: - - download_herbie.herbie_product -description: - short_description: Downloads forecast data for provided location & time range using herbie python package. - long_description: - Herbie is a python package that downloads recent and archived numerical weather prediction (NWP) model - outputs from different cloud archive sources. Its most popular capability is to download HRRR model data. - NWP data in GRIB2 format can be read with xarray+cfgrib. Model data Herbie can retrieve includes the High - Resolution Rapid Refresh (HRRR), Rapid Refresh (RAP), Global Forecast System (GFS), National Blend of Models (NBM), - Rapid Refresh Forecast System - Prototype (RRFS), and ECMWF open data forecast products (ECMWF). - sources: - user_input: Time range and geometry of interest. - sinks: - forecast: Grib file with the requested forecast. - parameters: - model: - Model name as defined in the models template folder. CASE INSENSITIVE Below are examples of model types - 'hrrr' HRRR contiguous United States model - 'hrrrak' HRRR Alaska model (alias 'alaska') - 'rap' RAP model - 'gfs' Global Forecast System (atmosphere) - 'gfs_wave' Global Forecast System (wave) - 'rrfs' Rapid Refresh Forecast System prototype - for more information see https://herbie.readthedocs.io/en/latest/user_guide/model_info.html - product: - Output variable product file type (sfc (surface fields), prs (pressure fields), nat (native fields), - subh (subhourly fields)). Not specifying this will use the first product in model template file. - frequency: frequency in hours of the forecast - forecast_lead_times: - Forecast lead time in the format [start_time, end_time, increment] (in hours). This parameter can - be None, and in this case see parameter 'forecast_start_date' for more details. You cannot specify - 'forecast_lead_times' and 'forecast_start_date' at the same time. - forecast_start_date: - latest datetime (in the format "%Y-%m-%d %H:%M") for which analysis (zero lead time) are retrieved. - After this datetime, forecasts with progressively increasing lead times are retrieved. If this parameter - is set to None and 'forecast_lead_times' is also set to None, then the workflow returns analysis - (zero lead time) up to the latest analysis available, and from that point it returns forecasts with - progressively increasing lead times. - search_text: - It's a regular expression used to search on GRIB2 Index files and allow you to download just the layer - of the file required instead of complete file. - For more information on search_text refer to below url. - https://blaylockbk.github.io/Herbie/_build/html/user_guide/searchString.html diff --git a/workflows/data_ingestion/weather/download_terraclimate.yaml b/workflows/data_ingestion/weather/download_terraclimate.yaml deleted file mode 100644 index dcb2bfa3..00000000 --- a/workflows/data_ingestion/weather/download_terraclimate.yaml +++ /dev/null @@ -1,47 +0,0 @@ -name: download_terraclimate -sources: - user_input: - - list.input_item -sinks: - downloaded_product: download.downloaded_product -parameters: - variable: tmax -tasks: - list: - op: list_terraclimate - op_dir: list_climatology_lab - parameters: - variable: "@from(variable)" - download: - op: download_climatology_lab -edges: - - origin: list.products - destination: - - download.input_product -description: - short_description: Monthly climate and hydroclimate properties from TerraClimate. - long_description: >- - The workflow downloads weather and hydrological data for the input time range. Data is - available for global terrestrial surfaces from 1958-present, with a monthly temporal resolution - and a ~4-km (1/24th degree) spatial resolution. - sources: - user_input: Time range of interest. - sinks: - downloaded_product: Downloaded variable for each year in the input time range. - parameters: - variable: >- - Options are: - aet - Actual Evapotranspiration (monthly total, units = mm) - def - Climate Water Deficit (monthly total, units = mm) - pet - Potential evapotranspiration (monthly total, units = mm) - ppt - Precipitation (monthly total, units = mm) - q - Runoff (monthly total, units = mm) - soil - Soil Moisture (total column at end of month, units = mm) - srad - Downward surface shortwave radiation (units = W/m2) - swe - Snow water equivalent (at end of month, units = mm) - tmax - Max Temperature (average for month, units = C) - tmin - Min Temperature (average for month, units = C) - vap - Vapor pressure (average for month, units = kPa) - ws - Wind speed (average for month, units = m/s) - vpd - Vapor Pressure Deficit (average for month, units = kPa) - PDSI - Palmer Drought Severity Index (at end of month, units = unitless) diff --git a/workflows/data_ingestion/weather/get_ambient_weather.yaml b/workflows/data_ingestion/weather/get_ambient_weather.yaml deleted file mode 100644 index 790f1a92..00000000 --- a/workflows/data_ingestion/weather/get_ambient_weather.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: get_ambient_weather -sources: - user_input: - - get_weather.user_input -sinks: - weather: get_weather.weather -parameters: - api_key: - app_key: - limit: -1 # No limit - feed_interval: -tasks: - get_weather: - op: download_ambient_weather - op_dir: download_ambient_weather - parameters: - api_key: "@from(api_key)" - app_key: "@from(app_key)" - limit: "@from(limit)" - feed_interval: "@from(feed_interval)" -edges: -description: - short_description: Downloads weather data from an Ambient Weather station. - long_description: - The workflow connects to the Ambient Weather REST API and requests data for the input time - range. The input geometry will be used to find a device inside the region. If not devices are - found in the geometry, the workflow will fail. Connection to the API requires an API key and an - App key. - sources: - user_input: Time range and geometry of interest. - sinks: - weather: Weather data from the station. - parameters: - api_key: Ambient Weather API key. - app_key: Ambient Weather App key. - limit: Maximum number of data points. If -1, do not limit. - feed_interval: Interval between samples. Defined by the weather station. diff --git a/workflows/data_ingestion/weather/get_forecast.yaml b/workflows/data_ingestion/weather/get_forecast.yaml deleted file mode 100644 index 578d7f92..00000000 --- a/workflows/data_ingestion/weather/get_forecast.yaml +++ /dev/null @@ -1,45 +0,0 @@ -name: get_forecast -sources: - user_input: - - preprocessing.user_input -sinks: - forecast: read_forecast.local_forecast -parameters: - noaa_gfs_token: -tasks: - preprocessing: - op: gfs_preprocess - op_dir: gfs_preprocess - parameters: - sas_token: "@from(noaa_gfs_token)" - gfs_download: - op: gfs_download - op_dir: gfs_download - parameters: - sas_token: "@from(noaa_gfs_token)" - read_forecast: - op: read_grib_forecast - op_dir: read_grib_forecast -edges: - - origin: preprocessing.time - destination: - - gfs_download.time - - origin: preprocessing.location - destination: - - read_forecast.location - - origin: gfs_download.global_forecast - destination: - - read_forecast.global_forecast -description: - short_description: - Downloads weather forecast data from NOAA Global Forecast System (GFS) for the input time range. - long_description: - The workflow downloads global forecast data from the Planetary Computer with 13km resolution - between grid points. The workflow requires a SAS token to access the blob storage, which can be - found at https://planetarycomputer.microsoft.com/dataset/storage/noaa-gfs. - sources: - user_input: Time range and geometry of interest. - sinks: - forecast: Weather forecast data. - parameters: - noaa_gfs_token: SAS token to access blob storage. diff --git a/workflows/data_ingestion/weather/herbie_forecast.yaml b/workflows/data_ingestion/weather/herbie_forecast.yaml deleted file mode 100644 index 4257c1c7..00000000 --- a/workflows/data_ingestion/weather/herbie_forecast.yaml +++ /dev/null @@ -1,74 +0,0 @@ -name: forecast_weather -sources: - user_input: - - forecast_range.user_input -sinks: - weather_forecast: forecast_download.weather_forecast - forecast_range: forecast_range.download_period -parameters: - forecast_lead_times: - search_text: - weather_type: - model: - overwrite: - product: -tasks: - forecast_range: - op: forecast_range_split - op_dir: download_herbie - parameters: - forecast_lead_times: "@from(forecast_lead_times)" - weather_type: "@from(weather_type)" - forecast_download: - op: forecast_weather - op_dir: download_herbie - parameters: - model: "@from(model)" - overwrite: "@from(overwrite)" - product: "@from(product)" - forecast_lead_times: "@from(forecast_lead_times)" - search_text: "@from(search_text)" - weather_type: "@from(weather_type)" -edges: - - origin: forecast_range.download_period - destination: - - forecast_download.user_input -description: - short_description: Downloads forecast observations for provided location & time range using herbie python package. - long_description: - Herbie is a python package that downloads recent and archived numerical weather prediction (NWP) model - outputs from different cloud archive sources. Its most popular capability is to download HRRR model data. - NWP data in GRIB2 format can be read with xarray+cfgrib. Model data Herbie can retrieve includes the High - Resolution Rapid Refresh (HRRR), Rapid Refresh (RAP), Global Forecast System (GFS), National Blend of Models (NBM), - Rapid Refresh Forecast System - Prototype (RRFS), and ECMWF open data forecast products (ECMWF). - sources: - user_input: Time range and geometry of interest. - sinks: - weather_forecast: Downloaded Forecast observations, cleaned, interpolated and mapped to each hour. - forecast_range: Time range of forecast observations. - parameters: - model: - Model name as defined in the models template folder. CASE INSENSITIVE Below are examples of model types - 'hrrr' HRRR contiguous United States model - 'hrrrak' HRRR Alaska model (alias 'alaska') - 'rap' RAP model - 'gfs' Global Forecast System (atmosphere) - 'gfs_wave' Global Forecast System (wave) - 'rrfs' Rapid Refresh Forecast System prototype - overwrite: If true, look for GRIB2 file even if local copy exists. If false, use the local copy - product: - Output variable product file type (sfc (surface fields), prs (pressure fields), nat (native fields), - subh (subhourly fields)). Not specifying this will use the first product in model template file. - forecast_lead_times: - Help to define forecast lead time in hours. Accept the input in range format. - Example - (1, 25, 1) - For more information refer below url. - https://blaylockbk.github.io/Herbie/_build/html/reference_guide/_autosummary/herbie.archive.Herbie.html - search_text: - It's a regular expression used to search on GRIB2 Index files and allow you to download just the layer - of the file required instead of complete file. - For more information on search_text refer to below url. - https://blaylockbk.github.io/Herbie/_build/html/user_guide/searchString.html - weather_type: - It's a user preferred text to represent weather parameter type (temperature, humidity, wind_speed etc). - This is used as column name for the output returned by operator. diff --git a/workflows/data_processing/chunk_onnx/chunk_onnx.yaml b/workflows/data_processing/chunk_onnx/chunk_onnx.yaml deleted file mode 100644 index 4d30c2ef..00000000 --- a/workflows/data_processing/chunk_onnx/chunk_onnx.yaml +++ /dev/null @@ -1,55 +0,0 @@ -name: chunk_onnx -sources: - rasters: - - chunk_raster.rasters - - list_to_sequence.list_rasters -sinks: - raster: combine_chunks.raster -parameters: - model_file: - step: 100 -tasks: - chunk_raster: - op: chunk_raster - parameters: - step_y: "@from(step)" - step_x: "@from(step)" - list_to_sequence: - op: list_to_sequence - compute_onnx: - op: compute_onnx_from_chunks - op_dir: compute_onnx - parameters: - model_file: "@from(model_file)" - window_size: "@from(step)" - combine_chunks: - op: combine_chunks -edges: - - origin: chunk_raster.chunk_series - destination: - - compute_onnx.chunk - - origin: list_to_sequence.rasters_seq - destination: - - compute_onnx.input_raster - - origin: compute_onnx.output_raster - destination: - - combine_chunks.chunks -description: - short_description: Runs an Onnx model over all rasters in the input to produce a single raster. - long_description: >- - This workflow is intended to apply an Onnx model over all rasters in the input to produce - a single raster output. This can be used, for instance, to compute time-series analysis of - a list of rasters that span multiple times. The analysis can be any computation that can - be expressed as an Onnx model (for an example, see notebooks/crop_cycles/crop_cycles.ipynb). - In order to run the model in parallel (and avoid running out of memory if the list of rasters - is large), the input rasters are divided spatially into chunks (that span all times). The - Onnx model is applied to these chunks and then combined back to produce the final output. - sources: - rasters: Input rasters. - sinks: - raster: Result of the Onnx model run. - parameters: - model_file: - An Onnx model which needs to be deployed with "farmvibes-ai local add-onnx" command. - step: - Size of the chunk in pixels. diff --git a/workflows/data_processing/chunk_onnx/chunk_onnx_sequence.yaml b/workflows/data_processing/chunk_onnx/chunk_onnx_sequence.yaml deleted file mode 100644 index 6aac04e7..00000000 --- a/workflows/data_processing/chunk_onnx/chunk_onnx_sequence.yaml +++ /dev/null @@ -1,51 +0,0 @@ -name: chunk_onnx_sequence -sources: - rasters: - - chunk_raster.rasters - - compute_onnx.input_raster -sinks: - raster: combine_chunks.raster -parameters: - model_file: - step: 100 -tasks: - chunk_raster: - op: chunk_sequence_raster - op_dir: chunk_raster - parameters: - step_y: "@from(step)" - step_x: "@from(step)" - compute_onnx: - op: compute_onnx_from_chunks - op_dir: compute_onnx - parameters: - model_file: "@from(model_file)" - window_size: "@from(step)" - combine_chunks: - op: combine_chunks -edges: - - origin: chunk_raster.chunk_series - destination: - - compute_onnx.chunk - - origin: compute_onnx.output_raster - destination: - - combine_chunks.chunks -description: - short_description: Runs an Onnx model over all rasters in the input to produce a single raster. - long_description: >- - This workflow is intended to run an Onnx model on all input rasters to produce - a single raster output. This can be used, for instance, to compute time-series analysis of - a list of rasters that span multiple times. The analysis can be any computation that can - be expressed as an Onnx model (for an example, see notebooks/crop_cycles/crop_cycles.ipynb). - In order to run the model in parallel (and avoid running out of memory if the list of rasters - is large), the input rasters are divided spatially into chunks (that span all times). The - Onnx model is applied to these chunks and then combined back to produce the final output. - sources: - rasters: Input rasters. - sinks: - raster: Result of the Onnx model run. - parameters: - model_file: - An Onnx model which needs to be deployed with "farmvibes-ai local add-onnx" command. - step: - Size of the chunk in pixels. diff --git a/workflows/data_processing/clip/clip.yaml b/workflows/data_processing/clip/clip.yaml deleted file mode 100644 index db0b467b..00000000 --- a/workflows/data_processing/clip/clip.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: clip -sources: - raster: - - clip_raster.raster - input_geometry: - - clip_raster.input_item -sinks: - clipped_raster: clip_raster.clipped_raster -parameters: - hard_clip: false -tasks: - clip_raster: - op: clip_raster - parameters: - hard_clip: "@from(hard_clip)" -edges: -description: - short_description: Performs a clip on an input raster based on a provided reference geometry. - long_description: - The workflow outputs a new raster copied from the input raster with its geometry metadata as the - intersection between the input raster's geometry and the provided reference geometry. If the - parameter hard_clip is set to true, then only data in the intersection is kept in output. The - workflow raises an error if there is no intersection between both geometries. - sources: - raster: Input raster to be clipped. - input_geometry: Reference geometry. - sinks: - clipped_raster: Clipped raster with the reference geometry. - parameters: - hard_clip: > - if true, keeps only data inside the intersection of reference and input geometries, soft clip - otherwise diff --git a/workflows/data_processing/gradient/raster_gradient.yaml b/workflows/data_processing/gradient/raster_gradient.yaml deleted file mode 100644 index 62be413b..00000000 --- a/workflows/data_processing/gradient/raster_gradient.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: raster_gradient -sources: - raster: - - gradient.input_raster -sinks: - gradient: gradient.output_raster -tasks: - gradient: - op: compute_raster_gradient -edges: -description: - short_description: Computes the gradient of each band of the input raster with a Sobel operator. - long_description: - sources: - raster: Input raster. - sinks: - gradient: Raster with the gradients. - parameters: diff --git a/workflows/data_processing/heatmap/classification.yaml b/workflows/data_processing/heatmap/classification.yaml deleted file mode 100644 index c2be97ca..00000000 --- a/workflows/data_processing/heatmap/classification.yaml +++ /dev/null @@ -1,108 +0,0 @@ -name: heatmap_intermediate -sources: - input_raster: - - compute_index.raster - samples: - - soil_sample_heatmap.samples -sinks: - result: soil_sample_heatmap.result -parameters: - attribute_name: - buffer: - index: - bins: - simplify: - tolerance: - data_scale: - max_depth: - n_estimators: - random_state: -tasks: - compute_index: - workflow: data_processing/index/index - parameters: - index: "@from(index)" - soil_sample_heatmap: - op: soil_sample_heatmap_using_classification - op_dir: heatmap_sensor - parameters: - attribute_name: "@from(attribute_name)" - buffer: "@from(buffer)" - bins: "@from(bins)" - simplify: "@from(simplify)" - tolerance: "@from(tolerance)" - data_scale: "@from(data_scale)" - max_depth: "@from(max_depth)" - n_estimators: "@from(n_estimators)" - random_state: "@from(random_state)" -edges: - - origin: compute_index.index_raster - destination: - - soil_sample_heatmap.raster -description: - short_description: > - Utilizes input Sentinel-2 satellite imagery & the sensor samples as labeled data that contain - nutrient information (Nitrogen, Carbon, pH, Phosphorus) to train a model using Random Forest classifier. - The inference operation predicts nutrients in soil for the chosen farm boundary. - long_description: |- - The workflow generates a heatmap for selected nutrient. It relies on sample soil data that - contain information of nutrients. The quantity of samples define the accuracy of the heat map - generation. During the research performed testing with samples spaced at 200 feet, 100 feet and - 50 feet. The 50 feet sample spaced distance provided results matching to the ground truth. - Generating heatmaps with this approach reduces the number of samples. It utilizes the logic - below behind the scenes to generate heatmap. - - Read the sentinel raster provided. - - Sensor samples needs to be uploaded into prescriptions entity in Azure - data manager for Agriculture (ADMAg). ADMAg is having hierarchy to hold - information of Party, Field, Seasons, Crop etc. Prior to - uploading prescriptions, it is required to build hierarchy and - a `prescription_map_id`. All prescriptions uploaded to ADMAg are - related to farm hierarchy through `prescription_map_id`. Please refer to - https://learn.microsoft.com/en-us/rest/api/data-manager-for-agri/ for - more information on ADMAg. - - Compute indices using the spyndex python package. - - Clip the satellite imagery & sensor samples using farm boundary. - - Perform spatial interpolation to find raster pixels within the offset distance - from sample location and assign the value of nutrients to group of pixels. - - Classify the data based on number of bins. - - Train the model using Random Forest classifier. - - Predict the nutrients using the satellite imagery. - - Generate a shape file using the predicted outputs. - sources: - input_raster: Input raster for index computation. - samples: External references to sensor samples for nutrients. - sinks: - result: Zip file containing cluster geometries. - parameters: - attribute_name: - Nutrient property name in sensor samples geojson file. For example - CARBON (C), Nitrogen (N), Phosphorus (P) etc., - buffer: Offset distance from sample to perform interpolate operations with raster. - index: Type of index to be used to generate heatmap. For example - evi, pri etc., - bins: - Possible number of groups used to move value to nearest group using [numpy - histogram](https://numpy.org/doc/stable/reference/generated/numpy.histogram.html) - and to pre-process the data to support model training with classification . - simplify: - Replace small polygons in input with value of their largest neighbor - after converting from raster to vector. Accepts 'simplify' or 'convex' or 'none'. - tolerance: - All parts of a [simplified geometry](https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.simplify.html) - will be no more than tolerance distance from the original. It has the same units - as the coordinate reference system of the GeoSeries. For example, using tolerance=100 - in a projected CRS with meters as units means a distance of 100 meters in reality. - data_scale: - Accepts True or False. Default is False. On True, it scale data using - [StandardScalar] (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html) - from scikit-learn package. It Standardize features by removing the mean and - scaling to unit variance. - max_depth: - The maximum depth of the tree. If None, then nodes are expanded until - all leaves are pure or until all leaves contain less than min_samples_split - samples. For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) - n_estimators: The number of trees in the forest. For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) - random_state: - Controls both the randomness of the bootstrapping of the samples - used when building trees (if bootstrap=True) and the sampling of the features - to consider when looking for the best split at each node (if max_features < - n_features). For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) diff --git a/workflows/data_processing/index/index.yaml b/workflows/data_processing/index/index.yaml deleted file mode 100644 index 0b2d2b7f..00000000 --- a/workflows/data_processing/index/index.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: index -sources: - raster: - - compute_index.raster -sinks: - index_raster: compute_index.index -parameters: - index: ndvi -tasks: - compute_index: - op: compute_index - parameters: - index: "@from(index)" -edges: -description: - short_description: Computes an index from the bands of an input raster. - long_description: >- - In addition to the indices 'ndvi', 'evi', 'msavi', 'ndre', 'reci', 'ndmi', 'methane' and 'pri' - all indices in https://github.com/awesome-spectral-indices/awesome-spectral-indices are - available (depending on the bands available on the corresponding satellite product). - sources: - raster: Input raster. - sinks: - index_raster: Single-band raster with the computed index. - parameters: - index: - The choice of index to be computed ('ndvi', 'evi', 'msavi', 'ndre', 'reci', 'ndmi', - 'methane', 'pri' or any of the awesome-spectral-indices). diff --git a/workflows/data_processing/linear_trend/chunked_linear_trend.yaml b/workflows/data_processing/linear_trend/chunked_linear_trend.yaml deleted file mode 100644 index 42afd082..00000000 --- a/workflows/data_processing/linear_trend/chunked_linear_trend.yaml +++ /dev/null @@ -1,43 +0,0 @@ -name: chunked_linear_trend -sources: - input_rasters: - - chunk_raster.rasters - - linear_trend.rasters -sinks: - linear_trend_raster: combine_chunks.raster -parameters: - # steps used to divide the rasters into chunks - # units are grid points - chunk_step_y: - chunk_step_x: -tasks: - chunk_raster: - op: chunk_raster - parameters: - step_y: "@from(chunk_step_y)" - step_x: "@from(chunk_step_x)" - linear_trend: - op: linear_trend - combine_chunks: - op: combine_chunks -edges: - - origin: chunk_raster.chunk_series - destination: - - linear_trend.series - - origin: linear_trend.trend - destination: - - combine_chunks.chunks -description: - short_description: Computes the pixel-wise linear trend of a list of rasters (e.g. NDVI). - long_description: - The workflow computes the linear trend over chunks of data, combining them into the final - raster. - sources: - input_rasters: List of rasters to compute linear trend. - sinks: - linear_trend_raster: Raster with the trend and the test statistics. - parameters: - chunk_step_y: - steps used to divide the rasters into chunks in the y direction (units are grid points). - chunk_step_x: - steps used to divide the rasters into chunks in the x direction (units are grid points). diff --git a/workflows/data_processing/merge/match_merge_to_ref.yaml b/workflows/data_processing/merge/match_merge_to_ref.yaml deleted file mode 100644 index 9957539a..00000000 --- a/workflows/data_processing/merge/match_merge_to_ref.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: match_merge_to_ref -sources: - rasters: - - pair.rasters2 - ref_rasters: - - pair.rasters1 - - group.group_by -sinks: - match_rasters: merge.raster -parameters: - resampling: bilinear -tasks: - pair: - op: pair_intersecting_rasters - match: - op: match_raster_to_ref - parameters: - resampling: "@from(resampling)" - group: - op: group_rasters_by_geometries - merge: - op: merge_rasters - parameters: - resampling: "@from(resampling)" -edges: - - origin: pair.paired_rasters1 - destination: - - match.ref_raster - - origin: pair.paired_rasters2 - destination: - - match.raster - - origin: match.output_raster - destination: - - group.rasters - - origin: group.raster_groups - destination: - - merge.raster_sequence -description: - short_description: Resamples input rasters to the reference rasters' grid. - long_description: - The workflow will produce input and reference raster pairs with intersecting geometries. For - each pair, the input raster is resampled to match the reference raster's grid. Afterwards, all - resampled rasters are groupped if they are contained in a reference raster geometry, and each - raster group is matched into single raster. The output should contain the information available - in the input rasters, gridded according to the reference rasters. - sources: - rasters: Input rasters that will be resampled. - ref_rasters: Reference rasters. - sinks: - match_rasters: Rasters with information from the input rasters on the reference grid. - parameters: - resampling: >- - Type of resampling when reprojecting the rasters. See - [link=https://rasterio.readthedocs.io/en/latest/api/rasterio.enums.html#rasterio.enums.Resampling] - rasterio documentation: - https://rasterio.readthedocs.io/en/latest/api/rasterio.enums.html#rasterio.enums.Resampling[/] - for all available resampling options. diff --git a/workflows/data_processing/outlier/detect_outlier.yaml b/workflows/data_processing/outlier/detect_outlier.yaml deleted file mode 100644 index ac7070dd..00000000 --- a/workflows/data_processing/outlier/detect_outlier.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: detect_outlier -sources: - rasters: - - outlier.rasters -sinks: - segmentation: outlier.segmentation - heatmap: outlier.heatmap - outliers: outlier.outliers - mixture_means: outlier.mixture_means -parameters: - threshold: -tasks: - outlier: - op: detect_outliers - parameters: - threshold: "@from(threshold)" -edges: -description: - short_description: - Fits a single-component Gaussian Mixture Model (GMM) over input data to detect outliers - according to the threshold parameter. - long_description: - The workflow outputs segmentation and outlier maps based on the threshold parameter and the - likelihood of each sample belonging to the GMM component. It also yields heatmaps of the - likelihood, and the mean of GMM's component. - sources: - rasters: Input rasters. - sinks: - segmentation: - Segmentation maps based on the likelihood of each sample belonging to the GMM's - single-component. - heatmap: Likelihood maps. - outliers: Outlier maps based on the thresholded likelihood map. - mixture_means: Mean of the GMM. - parameters: - threshold: Likelihood threshold value to consider a sample as an outlier. diff --git a/workflows/data_processing/threshold/threshold_raster.yaml b/workflows/data_processing/threshold/threshold_raster.yaml deleted file mode 100644 index ef47fcb5..00000000 --- a/workflows/data_processing/threshold/threshold_raster.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: threshold_raster -sources: - raster: - - threshold_task.raster -sinks: - thresholded_raster: threshold_task.thresholded -parameters: - threshold: -tasks: - threshold_task: - op: threshold_raster - parameters: - threshold: "@from(threshold)" -edges: -description: - short_description: Thresholds values of the input raster if higher than the threshold parameter. - long_description: - sources: - raster: Input raster. - sinks: - thresholded_raster: Thresholded raster. - parameters: - threshold: Threshold value. diff --git a/workflows/data_processing/timeseries/timeseries_aggregation.yaml b/workflows/data_processing/timeseries/timeseries_aggregation.yaml deleted file mode 100644 index 0048eecf..00000000 --- a/workflows/data_processing/timeseries/timeseries_aggregation.yaml +++ /dev/null @@ -1,27 +0,0 @@ -name: timeseries_aggregation -sources: - raster: - - summary.raster - input_geometry: - - summary.input_geometry -sinks: - timeseries: timeseries.timeseries -tasks: - summary: - op: summarize_raster - timeseries: - op: aggregate_statistics_timeseries -edges: - - origin: summary.summary - destination: - - timeseries.stats -description: - short_description: - Computes the mean, standard deviation, maximum, and minimum values of all regions of the raster - and aggregates them into a timeseries. - long_description: - sources: - raster: Input raster. - input_geometry: Geometry of interest. - sinks: - timeseries: Aggregated statistics of the raster. diff --git a/workflows/data_processing/timeseries/timeseries_masked_aggregation.yaml b/workflows/data_processing/timeseries/timeseries_masked_aggregation.yaml deleted file mode 100644 index ff55bbb4..00000000 --- a/workflows/data_processing/timeseries/timeseries_masked_aggregation.yaml +++ /dev/null @@ -1,40 +0,0 @@ -name: timeseries_masked_aggregation -sources: - raster: - - masked_summary.raster - mask: - - masked_summary.mask - input_geometry: - - masked_summary.input_geometry -sinks: - timeseries: timeseries.timeseries -parameters: - timeseries_masked_thr: -tasks: - masked_summary: - op: summarize_masked_raster - op_dir: summarize_raster - timeseries: - op: aggregate_statistics_timeseries - parameters: - masked_thr: "@from(timeseries_masked_thr)" -edges: - - origin: masked_summary.summary - destination: - - timeseries.stats -description: - short_description: - Computes the mean, standard deviation, maximum, and minimum values of all regions of the raster - considered by the mask and aggregates them into a timeseries. - long_description: - sources: - raster: Input raster. - mask: Mask of the regions to be considered during summarization; - input_geometry: Geometry of interest. - sinks: - timeseries: Aggregated statistics of the raster considered by the mask. - parameters: - timeseries_masked_thr: - Threshold of the maximum ratio of masked content allowed in a raster. The statistics of - rasters with masked content above the threshold (e.g., heavily clouded) are not included in - the timeseries. diff --git a/workflows/farm_ai/agriculture/canopy_cover.yaml b/workflows/farm_ai/agriculture/canopy_cover.yaml deleted file mode 100644 index 71b1f9c6..00000000 --- a/workflows/farm_ai/agriculture/canopy_cover.yaml +++ /dev/null @@ -1,50 +0,0 @@ -name: canopy_cover -sources: - user_input: - - ndvi_summary.user_input - - canopy_summary_timeseries.input_geometry -sinks: - ndvi: ndvi_summary.compute_ndvi.compute_index.index - estimated_canopy_cover: canopy.estimated_canopy_cover - ndvi_timeseries: ndvi_summary.timeseries - canopy_timeseries: canopy_summary_timeseries.timeseries -parameters: - pc_key: -tasks: - ndvi_summary: - workflow: farm_ai/agriculture/ndvi_summary - parameters: - pc_key: "@from(pc_key)" - canopy: - op: estimate_canopy_cover - canopy_summary_timeseries: - workflow: data_processing/timeseries/timeseries_masked_aggregation -edges: - - origin: ndvi_summary.compute_ndvi.compute_index.index - destination: - - canopy.indices - - origin: canopy.estimated_canopy_cover - destination: - - canopy_summary_timeseries.raster - # We need to use the full name if this is not a sink 😭 - - origin: ndvi_summary.s2.cloud.merge.merged_cloud_mask - destination: - - canopy_summary_timeseries.mask -description: - short_description: Estimates pixel-wise canopy cover for a region and date. - long_description: - The workflow retrieves the relevant Sentinel-2 products with Planetary Computer (PC) API, and - computes the NDVI for each available tile and date. It applies a linear regressor trained with - polynomial features (up to the 3rd degree) on top of the index raster to estimate canopy cover. - The coeficients and intercept of the regressor were obtained beforehand using as ground-truth - masked/annotated drone imagery, and are used for inference in this workflow. - sources: - user_input: Time range and geometry of interest. - sinks: - ndvi: NDVI raster. - estimated_canopy_cover: Raster with pixel-wise canopy cover estimation; - ndvi_timeseries: - Aggregated NDVI statistics of the retrieved tiles within the input geometry and time range. - canopy_timeseries: Aggregated canopy cover statistics. - parameters: - pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/agriculture/change_detection.yaml b/workflows/farm_ai/agriculture/change_detection.yaml deleted file mode 100644 index 89491b94..00000000 --- a/workflows/farm_ai/agriculture/change_detection.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: change_detection -sources: - user_input: - - spaceeye.user_input - - summary_timeseries.input_geometry -sinks: - spaceeye_raster: spaceeye.raster - index: ndvi.index_raster - timeseries: summary_timeseries.timeseries - segmentation: outliers.segmentation - heatmap: outliers.heatmap - outliers: outliers.outliers - mixture_means: outliers.mixture_means -parameters: - pc_key: -tasks: - spaceeye: - workflow: data_ingestion/spaceeye/spaceeye - parameters: - pc_key: "@from(pc_key)" - ndvi: - workflow: data_processing/index/index - parameters: - index: ndvi - summary_timeseries: - workflow: data_processing/timeseries/timeseries_aggregation - outliers: - workflow: data_processing/outlier/detect_outlier -edges: - - origin: spaceeye.raster - destination: - - ndvi.raster - - origin: ndvi.index_raster - destination: - - summary_timeseries.raster - - outliers.rasters -description: - short_description: Identifies changes/outliers over NDVI across dates. - long_description: - The workflow generates SpaceEye imagery for the input region and time range and computes NDVI - raster for each date. It aggregates NDVI statistics (mean, standard deviation, maximum and - minimum) in time and detects outliers across dates with a single-component Gaussian Mixture - Model (GMM). - sources: - user_input: Time range and geometry of interest. - sinks: - spaceeye_raster: SpaceEye cloud-free rasters. - index: NDVI rasters. - timeseries: Aggregated NDVI statistics over the time range. - segmentation: - Segmentation maps based on the likelihood of each sample belonging to the GMM's - single-component. - heatmap: Likelihood maps. - outliers: Outlier maps. - mixture_means: Means of the GMM. - parameters: - pc_key: PlanetaryComputer API key. diff --git a/workflows/farm_ai/agriculture/emergence_summary.yaml b/workflows/farm_ai/agriculture/emergence_summary.yaml deleted file mode 100644 index 1356019b..00000000 --- a/workflows/farm_ai/agriculture/emergence_summary.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: emergence_summary -sources: - user_input: - - s2.user_input - - summary_timeseries.input_geometry -sinks: - timeseries: summary_timeseries.timeseries -parameters: - pc_key: -tasks: - s2: - workflow: data_ingestion/sentinel2/preprocess_s2_improved_masks - parameters: - max_tiles_per_time: 1 - pc_key: "@from(pc_key)" - msavi: - workflow: data_processing/index/index - parameters: - index: msavi - emergence: - workflow: data_processing/threshold/threshold_raster - parameters: - threshold: 0.2 - summary_timeseries: - workflow: data_processing/timeseries/timeseries_masked_aggregation -edges: - - origin: s2.raster - destination: - - msavi.raster - - origin: msavi.index_raster - destination: - - emergence.raster - - origin: emergence.thresholded_raster - destination: - - summary_timeseries.raster - - origin: s2.mask - destination: - - summary_timeseries.mask -description: - short_description: - Calculates emergence statistics using thresholded MSAVI (mean, standard deviation, maximum and - minimum) for the input geometry and time range. - long_description: - The workflow retrieves Sentinel2 products with Planetary Computer (PC) API, forwards them to a - cloud detection model and combines the predicted cloud mask to the mask provided by PC. It - computes the MSAVI for each available tile and date, thresholds them above a certain value and - summarizes each with the mean, standard deviation, maximum and minimum values for the regions - not obscured by clouds. Finally, it outputs a timeseries with such statistics for all available - dates, filtering out heavily-clouded tiles. - sources: - user_input: Time range and geometry of interest. - sinks: - timeseries: - Aggregated emergence statistics of the retrieved tiles within the input geometry and time - range. - parameters: - pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/agriculture/green_house_gas_fluxes.yaml b/workflows/farm_ai/agriculture/green_house_gas_fluxes.yaml deleted file mode 100644 index 50a054d6..00000000 --- a/workflows/farm_ai/agriculture/green_house_gas_fluxes.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: green_house_gas_fluxes -sources: - user_input: - - ghg.ghg -sinks: - fluxes: ghg.fluxes -parameters: - crop_type: "corn" -tasks: - ghg: - op: compute_ghg_fluxes - parameters: - crop_type: "@from(crop_type)" -edges: -description: - short_description: Computes Green House Fluxes for a region and date range - long_description: >- - The workflow follows the GHG Protocol guidelines published for Brazil - (which are based on IPCC reports) to compute Green House Gas emission - fluxes (sequestration versus emissions) for a given crop. - sources: - user_input: The user-provided inputs for GHG computation. - sinks: - fluxes: The computed fluxes for the given area and date range considering the user input data. - parameters: - crop_type: >- - The type of the crop to compute GHG emissions. - Supported crops are 'wheat', 'corn', 'cotton', and 'soybeans'. diff --git a/workflows/farm_ai/agriculture/heatmap_using_classification.yaml b/workflows/farm_ai/agriculture/heatmap_using_classification.yaml deleted file mode 100644 index 0c7ca7f7..00000000 --- a/workflows/farm_ai/agriculture/heatmap_using_classification.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: heatmap_using_classification -sources: - input_samples: - - download_samples.user_input - input_raster: - - soil_sample_heatmap_classification.input_raster -sinks: - result: soil_sample_heatmap_classification.result -parameters: - attribute_name: - buffer: - index: - bins: - simplify: - tolerance: - data_scale: - max_depth: - n_estimators: - random_state: -tasks: - download_samples: - workflow: data_ingestion/user_data/ingest_geometry - soil_sample_heatmap_classification: - workflow: data_processing/heatmap/classification - parameters: - attribute_name: "@from(attribute_name)" - buffer: "@from(buffer)" - index: "@from(index)" - bins: "@from(bins)" - simplify: "@from(simplify)" - tolerance: "@from(tolerance)" - data_scale: "@from(data_scale)" - max_depth: "@from(max_depth)" - n_estimators: "@from(n_estimators)" - random_state: "@from(random_state)" -edges: - - origin: download_samples.geometry - destination: - - soil_sample_heatmap_classification.samples -description: - short_description: The workflow generates a nutrient heatmap for samples provided by user by downloading the samples from user input. - long_description: The samples provided are related with farm boundary and have required nutrient information to create a heatmap. - sources: - input_raster: Input raster for index computation. - input_samples: External references to sensor samples for nutrients. - sinks: - result: Zip file containing cluster geometries. - parameters: # Utilize parameters configured in workflow data_processing/heatmap/classification.yaml - diff --git a/workflows/farm_ai/agriculture/heatmap_using_classification_admag.yaml b/workflows/farm_ai/agriculture/heatmap_using_classification_admag.yaml deleted file mode 100644 index a6ede80d..00000000 --- a/workflows/farm_ai/agriculture/heatmap_using_classification_admag.yaml +++ /dev/null @@ -1,71 +0,0 @@ -name: heatmap_using_classification_admag -sources: - admag_input: - - prescriptions.admag_input - input_raster: - - soil_sample_heatmap_classification.input_raster -sinks: - result: soil_sample_heatmap_classification.result -parameters: - base_url: - client_id: - client_secret: - authority: - default_scope: - attribute_name: - buffer: - index: - bins: - simplify: - tolerance: - data_scale: - max_depth: - n_estimators: - random_state: -tasks: - prescriptions: - workflow: data_ingestion/admag/prescriptions - parameters: - base_url: "@from(base_url)" - client_id: "@from(client_id)" - client_secret: "@from(client_secret)" - authority: "@from(authority)" - default_scope: "@from(default_scope)" - soil_sample_heatmap_classification: - workflow: data_processing/heatmap/classification - parameters: - attribute_name: "@from(attribute_name)" - buffer: "@from(buffer)" - index: "@from(index)" - bins: "@from(bins)" - simplify: "@from(simplify)" - tolerance: "@from(tolerance)" - data_scale: "@from(data_scale)" - max_depth: "@from(max_depth)" - n_estimators: "@from(n_estimators)" - random_state: "@from(random_state)" -edges: - - origin: prescriptions.response - destination: - - soil_sample_heatmap_classification.samples -description: - short_description: This workflow integrate the ADMAG API to download prescriptions and generate heatmap. - long_description: The prescriptions are related with farm boundary and the nutrient information. Each prescription represent a sensor sample at a location within a farm boundary. - - sources: - input_raster: Input raster for index computation. - admag_input: Required inputs to download prescriptions from admag. - sinks: - result: Zip file containing cluster geometries. - parameters: - base_url: URL to access the registered app - client_id: - Value uniquely identifies registered application in the Microsoft identity platform. Visit url - https://learn.microsoft.com/en-us/azure/active-directory/develop/quickstart-register-app to register the app. - client_secret: - Sometimes called an application password, a client secret is a string value your app can use in place of a certificate - to identity itself. - authority: - The endpoint URIs for your app are generated automatically when you register or configure your app. It is used by - client to obtain authorization from the resource owner - default_scope: URL for default azure OAuth2 permissions diff --git a/workflows/farm_ai/agriculture/heatmap_using_neighboring_data_points.yaml b/workflows/farm_ai/agriculture/heatmap_using_neighboring_data_points.yaml deleted file mode 100644 index 630a00cf..00000000 --- a/workflows/farm_ai/agriculture/heatmap_using_neighboring_data_points.yaml +++ /dev/null @@ -1,68 +0,0 @@ -name: heatmap_using_neighboring_data_points -sources: - input_raster: - - soil_sample_heatmap.raster - input_samples: - - download_samples.user_input - input_sample_clusters: - - download_sample_clusters.user_input -sinks: - result: soil_sample_heatmap.result -parameters: - attribute_name: - simplify: - tolerance: - algorithm: - resolution: - bins: -tasks: - download_samples: - workflow: data_ingestion/user_data/ingest_geometry - download_sample_clusters: - workflow: data_ingestion/user_data/ingest_geometry - soil_sample_heatmap: - op: soil_sample_heatmap_using_neighbors - op_dir: heatmap_sensor - parameters: - attribute_name: "@from(attribute_name)" - simplify: "@from(simplify)" - tolerance: "@from(tolerance)" - algorithm: "@from(algorithm)" - resolution: "@from(resolution)" - bins: "@from(bins)" -edges: - - origin: download_samples.geometry - destination: - - soil_sample_heatmap.samples - - origin: download_sample_clusters.geometry - destination: - - soil_sample_heatmap.samples_boundary -description: - short_description: - Creates heatmap using the neighbors by performing spatial interpolation operations. It utilizes soil information collected - at optimal sensor/sample locations and downloaded sentinel satellite imagery. - long_description: - The optimal location of nutrient samples are identified using workflow . - The quantity of samples defines the accuracy of the heatmap generation. During the research performed testing on a 100 acre farm using sample count - of approximately 20, 80, 130, 600. The research concluded that a sample count of 20 provided decent results, also accuracy of nutrient - information improved with increase in sample count. - sources: - input_raster: Sentinel-2 raster. - input_samples: Sensor samples with nutrient information. - input_sample_clusters: Clusters boundaries of sensor samples locations. - sinks: - result: Zip file containing heatmap output as shape files. - parameters: - attribute_name: "Nutrient property name in sensor samples geojson file. For example: CARBON (C), Nitrogen (N), Phosphorus (P) etc.," - simplify: Replace small polygons in input with value of their largest neighbor after converting from raster to vector. Accepts 'simplify' or 'convex' or 'none'. - tolerance: - All parts of a [simplified geometry](https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.simplify.html) will be no more - than tolerance distance from the original. It has the same units as the coordinate reference system of the GeoSeries. For example, using - tolerance=100 in a projected CRS with meters as units means a distance of 100 meters in reality. - algorithm: Algorithm used to identify nearest neighbors. Accepts 'cluster overlap' or 'nearest neighbor' or 'kriging neighbor'. - resolution: - Defines the output resolution as the ratio of input raster resolution. For example, if resolution is 5, the output - heatmap is 5 times coarser than input raster. - bins: - it defines the number of equal-width bins in the given range.Refer to this article to learn more about bins - https://numpy.org/doc/stable/reference/generated/numpy.histogram.html diff --git a/workflows/farm_ai/agriculture/methane_index.yaml b/workflows/farm_ai/agriculture/methane_index.yaml deleted file mode 100644 index afcf2ab7..00000000 --- a/workflows/farm_ai/agriculture/methane_index.yaml +++ /dev/null @@ -1,45 +0,0 @@ -name: methane_index -sources: - user_input: - - s2.user_input - - clip.input_geometry -sinks: - index: methane.index_raster - s2_raster: s2.raster - cloud_mask: s2.mask -parameters: - pc_key: -tasks: - s2: - workflow: data_ingestion/sentinel2/preprocess_s2_improved_masks - parameters: - pc_key: "@from(pc_key)" - clip: - workflow: data_processing/clip/clip - methane: - workflow: data_processing/index/index - parameters: - index: methane -edges: - - origin: s2.raster - destination: - - clip.raster - - origin: clip.clipped_raster - destination: - - methane.raster -description: - short_description: Computes methane index from ultra emitters for a region and date range. - long_description: - The workflow retrieves the relevant Sentinel-2 products with Planetary Computer (PC) API and - crop the rasters for the region defined in user_input. All bands are normalized and an - anti-aliasing guassian filter is applied to smooth and remove potential artifacts. An - unsupervised K-Nearest Neighbor is applied to identify bands similar to band 12, and the index - is computed by the difference between band 12 to the pixel-wise median of top K similar bands. - sources: - user_input: Time range and geometry of interest. - sinks: - index: Methane index raster. - s2_raster: Sentinel-2 raster. - cloud_mask: Cloud mask. - parameters: - pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/agriculture/ndvi_summary.yaml b/workflows/farm_ai/agriculture/ndvi_summary.yaml deleted file mode 100644 index ad85b65a..00000000 --- a/workflows/farm_ai/agriculture/ndvi_summary.yaml +++ /dev/null @@ -1,47 +0,0 @@ -name: ndvi_summary -sources: - user_input: - - s2.user_input - - summary_timeseries.input_geometry -sinks: - timeseries: summary_timeseries.timeseries -parameters: - pc_key: -tasks: - s2: - workflow: data_ingestion/sentinel2/preprocess_s2_improved_masks - parameters: - max_tiles_per_time: 1 - pc_key: "@from(pc_key)" - compute_ndvi: - workflow: data_processing/index/index - summary_timeseries: - workflow: data_processing/timeseries/timeseries_masked_aggregation -edges: - - origin: s2.raster - destination: - - compute_ndvi.raster - - origin: compute_ndvi.index_raster - destination: - - summary_timeseries.raster - - origin: s2.mask - destination: - - summary_timeseries.mask -description: - short_description: - Calculates NDVI statistics (mean, standard deviation, maximum and minimum) for the input - geometry and time range. - long_description: - The workflow retrieves the relevant Sentinel-2 products with Planetary Computer (PC) API, - forwards them to a cloud detection model and combines the predicted cloud mask to the mask - obtained from the product. The workflow computes the NDVI for each available tile and date, - summarizing each with the mean, standard deviation, maximum and minimum values for the regions - not obscured by clouds. Finally, it outputs a timeseries with such statistics for all available - dates, ignoring heavily-clouded tiles. - sources: - user_input: Time range and geometry of interest. - sinks: - timeseries: - Aggregated NDVI statistics of the retrieved tiles within the input geometry and time range. - parameters: - pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/agriculture/weed_detection.yaml b/workflows/farm_ai/agriculture/weed_detection.yaml deleted file mode 100644 index 72a9a947..00000000 --- a/workflows/farm_ai/agriculture/weed_detection.yaml +++ /dev/null @@ -1,67 +0,0 @@ -name: weed_detection -sources: - user_input: - - download_raster.user_input -sinks: - result: weed_detection.result -parameters: - buffer: - no_data: - clusters: - sieve_size: - simplify: - tolerance: - samples: - bands: - alpha_index: -tasks: - download_raster: - workflow: data_ingestion/user_data/ingest_raster - weed_detection: - op: weed_detection - parameters: - buffer: "@from(buffer)" - no_data: "@from(no_data)" - clusters: "@from(clusters)" - sieve_size: "@from(sieve_size)" - simplify: "@from(simplify)" - tolerance: "@from(tolerance)" - samples: "@from(samples)" - bands: "@from(bands)" - alpha_index: "@from(alpha_index)" -edges: - - origin: download_raster.raster - destination: - - weed_detection.raster -description: - short_description: Generates shape files for similarly colored regions in the input raster. - long_description: - The workflow retrieves a remote raster and trains a Gaussian Mixture Model (GMM) over a subset - of the input data with a fixed number of components. The GMM is then used to cluster all images - pixels. Clustered regions are converted to polygons with a minimum size threshold. These - polygons are then simplified to smooth their borders. All polygons of a given cluster are - written to a single shapefile. All files are then compressed and returned as a single zip - archive. - sources: - user_input: External references to raster data. - sinks: - result: Zip file containing cluster geometries. - parameters: - buffer: - Buffer size, in projected CRS, to apply to the input geometry before sampling training points. - A negative number can be used to avoid sampling unwanted regions if the geometry is not very - precise. - no_data: Value to use as nodata when reading the raster. Uses the raster's internal nodata value - if not provided. - clusters: Number of clusters to use when segmenting the image. - sieve_size: - Area of the minimum connected region. Smaller regions will have their class assigned to the - largest adjancent region. - simplify: - Method used to simplify the geometries. Accepts 'none', for no simplification, 'simplify', for - tolerance-based simplification, and 'convex', for returning the convex hull. - tolerance: - Tolerance for simplifcation algorithm. Only applicable if simplification method is 'simplify'. - samples: Number os samples to use during training. - bands: List of band indices to use during training and inference. - alpha_index: Positive index of alpha band, if used to filter out nodata values. diff --git a/workflows/farm_ai/carbon_local/admag_carbon_integration.yaml b/workflows/farm_ai/carbon_local/admag_carbon_integration.yaml deleted file mode 100644 index 9ba80151..00000000 --- a/workflows/farm_ai/carbon_local/admag_carbon_integration.yaml +++ /dev/null @@ -1,94 +0,0 @@ -name: admag_carbon_integration -sources: - baseline_admag_input: - - baseline_seasonal_field_list.admag_input - scenario_admag_input: - - scenario_seasonal_field_list.admag_input -sinks: - carbon_output: admag_carbon.carbon_output -parameters: - base_url: - client_id: - client_secret: - authority: - default_scope: - comet_support_email: - ngrok_token: -tasks: - baseline_seasonal_field_list: - workflow: data_ingestion/admag/admag_seasonal_field - parameters: - base_url: "@from(base_url)" - client_id: "@from(client_id)" - client_secret: "@from(client_secret)" - authority: "@from(authority)" - default_scope: "@from(default_scope)" - scenario_seasonal_field_list: - workflow: data_ingestion/admag/admag_seasonal_field - parameters: - base_url: "@from(base_url)" - client_id: "@from(client_id)" - client_secret: "@from(client_secret)" - authority: "@from(authority)" - default_scope: "@from(default_scope)" - admag_carbon: - workflow: farm_ai/carbon_local/carbon_whatif - parameters: - comet_support_email: "@from(comet_support_email)" - ngrok_token: "@from(ngrok_token)" -edges: - - origin: baseline_seasonal_field_list.seasonal_field - destination: - - admag_carbon.baseline_seasonal_fields - - origin: scenario_seasonal_field_list.seasonal_field - destination: - - admag_carbon.scenario_seasonal_fields -description: - short_description: - Computes the offset amount of carbon that would be sequestered in a seasonal - field using Microsoft Azure Data Manager for Agriculture (ADMAg) data. - long_description: - Derives carbon sequestration information. Microsoft Azure Data Manager for - Agriculture (ADMAg) and the COMET-Farm API are used to obtain - farming data and evaluate carbon offset. ADMAg is - capable of describing important farming activities such as fertilization, - tillage, and organic amendments applications, all of which are represented in - the data manager. FarmVibes.AI retrieves this information from the data manager - and builds SeasonalFieldInformation FarmVibes.AI objects. These objects are then - used to call the COMET-Farm API and evaluate Carbon Offset Information. - sources: - baseline_admag_input: - List of ADMAgSeasonalFieldInput to retrieve SeasonalFieldInformation - objects for baseline COMET-Farm API Carbon offset evaluation. - scenario_admag_input: - List of ADMAgSeasonalFieldInput to retrieve SeasonalFieldInformation - objects for scenarios COMET-Farm API Carbon offset evaluation. - sinks: - carbon_output: Carbon sequestration received for scenario information provided as input. - parameters: - comet_support_email: - Comet support email. The email used to register for a COMET account. The - requests are forwarded to comet with this email reference. This email is - used by comet to share the information back to you for failed requests. - ngrok_token: - NGROK session token. A token that FarmVibes uses to create a web_hook url - that is shared with Comet in a request when running the workflow. Comet - can use this link to send back a response to FarmVibes. NGROK is a - service that creates temporary urls for local servers. To use NGROK, - FarmVibes needs to get a token from this website, - https://dashboard.ngrok.com/. - base_url: - Azure Data Manager for Agriculture host. Please visit https://aka.ms/farmvibesDMA to check how - to get these credentials. - client_id: - Azure Data Manager for Agriculture client id. Please visit https://aka.ms/farmvibesDMA to check how - to get these credentials. - client_secret: - Azure Data Manager for Agriculture client secret. Please visit https://aka.ms/farmvibesDMA to check how - to get these credentials. - authority: - Azure Data Manager for Agriculture authority. Please visit https://aka.ms/farmvibesDMA to check how - to get these credentials. - default_scope: - Azure Data Manager for Agriculture default scope. Please visit https://aka.ms/farmvibesDMA to check how - to get these credentials. \ No newline at end of file diff --git a/workflows/farm_ai/carbon_local/carbon_whatif.yaml b/workflows/farm_ai/carbon_local/carbon_whatif.yaml deleted file mode 100644 index 3a8fe012..00000000 --- a/workflows/farm_ai/carbon_local/carbon_whatif.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: carbon_whatif -sources: - baseline_seasonal_fields: - - comet_task.baseline_seasonal_fields - scenario_seasonal_fields: - - comet_task.scenario_seasonal_fields -sinks: - carbon_output: comet_task.carbon_output -parameters: - comet_support_email: - ngrok_token: -tasks: - comet_task: - op: whatif_comet_local_op - op_dir: carbon_local - parameters: - comet_support_email: "@from(comet_support_email)" - ngrok_token: "@from(ngrok_token)" -description: - short_description: - Computes the offset amount of carbon that would be sequestered in a seasonal field using the - baseline (historical) and scenario (time range interested in) information. - long_description: - To derive amount of carbon, it relies on seasonal information information provided for both baseline and - scenario. The baseline represents historical information of farm practices used during each season that - includes fertilizers, tillage, harvest and organic amendment. Minimum 2 years of baseline information required - to execute the workflow. The scenario represents future farm practices planning to do during each season that - includes fertilizers, tillage, harvest and organic amendment. For the scenario information provided, the workflow - compute the offset amount of carbon that would be sequestrated in a seasonal field. Minimum 2years of baseline - information required to execute the workflow. The requests received by workflow are forwarded to comet api. - To know more information of comet refer to https://gitlab.com/comet-api/api-docs/-/tree/master/. - To understand the enumerations and information accepted by comet refer to - https://gitlab.com/comet-api/api-docs/-/blob/master/COMET-Farm_API_File_Specification.xlsx - The request submitted get executed with in 5 minutes to max 2 hours. If response not received from comet within - this time period, check comet_support_email for information on failed requests, if no emails received check - status of requests by contacting to this support email address of comet "appnrel@colostate.edu". - For public use comet limits 50 requests each day. If more requests need to send contact support email address. - sources: - baseline_seasonal_fields: - List of seasonal fields that holds the historical information of farm practices such as fertilizers, - tillage, harvest and organic amendment. - scenario_seasonal_fields: - List of seasonal fields that holds the future information of farm practices such as fertilizers, - tillage, harvest and organic amendment. - sinks: - carbon_output: Carbon sequestration received for scenario information provided as input. - parameters: - comet_support_email: - COMET-Farm API Registered email. The requests are forwarded to comet with this email reference. - This email used by comet to share the information back to you for failed requests. - ngrok_token: - NGROK session token. FarmVibes generate web_hook url and shared url with comet along the request to receive the - response from comet. It's publicly accessible url and it's unique for each session. The url gets destroyed - once the session ends. To start the ngrok session a token, it is generated from this url https://dashboard.ngrok.com/ diff --git a/workflows/farm_ai/land_cover_mapping/conservation_practices.yaml b/workflows/farm_ai/land_cover_mapping/conservation_practices.yaml deleted file mode 100644 index f3b4b92c..00000000 --- a/workflows/farm_ai/land_cover_mapping/conservation_practices.yaml +++ /dev/null @@ -1,92 +0,0 @@ -name: conservation_practices -sources: - user_input: - - naip.user_input -sinks: - dem_raster: dem.raster - naip_raster: naip.raster - dem_gradient: gradient.gradient - cluster: cluster.output_raster - average_elevation: avg_elev.output_raster - practices: practice.output_raster -parameters: - clustering_iterations: - pc_key: -tasks: - naip: - workflow: data_ingestion/naip/download_naip - parameters: - pc_key: "@from(pc_key)" - cluster: - op: compute_raster_cluster - parameters: - number_iterations: "@from(clustering_iterations)" - dem: - workflow: data_ingestion/dem/download_dem - parameters: - pc_key: "@from(pc_key)" - gradient: - workflow: data_processing/gradient/raster_gradient - match_grad: - workflow: data_processing/merge/match_merge_to_ref - match_elev: - workflow: data_processing/merge/match_merge_to_ref - avg_elev: - op: compute_raster_class_windowed_average - practice: - op: compute_conservation_practice -edges: - - origin: naip.raster - destination: - - dem.user_input - - cluster.input_raster - - match_elev.ref_rasters - - match_grad.ref_rasters - - origin: dem.raster - destination: - - gradient.raster - - match_elev.rasters - - origin: gradient.gradient - destination: - - match_grad.rasters - - origin: cluster.output_raster - destination: - - avg_elev.input_cluster_raster - - origin: match_elev.match_rasters - destination: - - avg_elev.input_dem_raster # This is not a DemRaster anymore! - - origin: avg_elev.output_raster - destination: - - practice.average_elevation - - origin: match_grad.match_rasters - destination: - - practice.elevation_gradient -description: - short_description: - Identifies conservation practices (terraces and grassed waterways) using elevation data. - long_description: - The workflow classifies pixels in terraces or grassed waterways. It starts downloading NAIP and - USGS 3DEP tiles. Then, it computes the elevation gradient using a Sobel filter. And it computes - local clusters using an overlap clustering method. Then, it combines cluster and elevation tiles - to compute the average elevation per cluster. Finally, it uses a CNN model to classify pixels in - either terraces or grassed waterways. - sources: - user_input: Time range and geometry of interest. - sinks: - dem_raster: USGS 3DEP tiles that overlap the NAIP tiles that overlap the area of interest. - naip_raster: NAIP tiles that overlap the area of interest. - dem_gradient: - A copy of the USGS 3DEP tiles where the pixel values are the gradient computed using the Sobel - filter. - cluster: - A copy of the NAIP tiles with one band representing the output of the overlap clustering - method. Each pixel has a value between one and four. - average_elevation: - A combination of the dem_gradient and cluster sinks, where each pixel value is the average - elevation of all pixels that fall in the same cluster. - practices: - A copy of the NAIP tile with one band where each pixel value refers to a conservation practice - (0 = none, 1 = terraces, 2 = grassed waterways). - parameters: - clustering_iterations: The number of iterations used in the overlap clustering method. - pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/land_degradation/landsat_ndvi_trend.yaml b/workflows/farm_ai/land_degradation/landsat_ndvi_trend.yaml deleted file mode 100644 index a7d6788f..00000000 --- a/workflows/farm_ai/land_degradation/landsat_ndvi_trend.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: landsat_ndvi_trend -sources: - user_input: - - landsat.user_input -sinks: - ndvi: trend.ndvi_raster - linear_trend: trend.linear_trend -parameters: - pc_key: -tasks: - landsat: - workflow: data_ingestion/landsat/preprocess_landsat - parameters: - pc_key: "@from(pc_key)" - trend: - workflow: farm_ai/land_degradation/ndvi_linear_trend -edges: - - origin: landsat.raster - destination: - - trend.raster -description: - short_description: - Estimates a linear trend over NDVI computer over LANDSAT tiles that intersect with the input - geometry and time range. - long_description: - The workflow downloads LANDSAT data, compute NDVI over them, and estimate a linear trend over - chunks of data, combining them into a final trend raster. - sources: - user_input: Time range and geometry of interest. - sinks: - ndvi: NDVI rasters. - linear_trend: Raster with the trend and the test statistics. - parameters: - pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/land_degradation/ndvi_linear_trend.yaml b/workflows/farm_ai/land_degradation/ndvi_linear_trend.yaml deleted file mode 100755 index 3d418b44..00000000 --- a/workflows/farm_ai/land_degradation/ndvi_linear_trend.yaml +++ /dev/null @@ -1,31 +0,0 @@ -name: ndvi_linear_trend -sources: - raster: - - ndvi.raster -sinks: - ndvi_raster: ndvi.index_raster - linear_trend: chunked_linear_trend.linear_trend_raster -tasks: - ndvi: - workflow: data_processing/index/index - parameters: - index: ndvi - chunked_linear_trend: - workflow: data_processing/linear_trend/chunked_linear_trend - parameters: - chunk_step_y: 512 - chunk_step_x: 512 -edges: - - origin: ndvi.index_raster - destination: - - chunked_linear_trend.input_rasters -description: - short_description: Computes the pixel-wise NDVI linear trend over the input raster. - long_description: - The workflow computes the NDVI from the input raster, calculates the linear trend over chunks of - data, combining them into the final raster. - sources: - raster: Input raster. - sinks: - ndvi_raster: NDVI raster. - linear_trend: Raster with the trend and the test statistics. diff --git a/workflows/farm_ai/segmentation/auto_segment_basemap.yaml b/workflows/farm_ai/segmentation/auto_segment_basemap.yaml deleted file mode 100644 index e9b6fe9e..00000000 --- a/workflows/farm_ai/segmentation/auto_segment_basemap.yaml +++ /dev/null @@ -1,77 +0,0 @@ -name: auto_segment_basemap -sources: - user_input: - - basemap_download.input_geometry - - basemap_automatic_segmentation.input_geometry -sinks: - basemap: basemap_download.merged_basemap - segmentation_mask: basemap_automatic_segmentation.segmentation_mask -parameters: - bingmaps_api_key: - basemap_zoom_level: 14 - model_type: vit_b - spatial_overlap: 0.5 - points_per_side: 16 - n_crop_layers: 0 - crop_overlap_ratio: 0.0 - crop_n_points_downscale_factor: 1 - pred_iou_thresh: 0.88 - stability_score_thresh: 0.95 - stability_score_offset: 1.0 - points_per_batch: 16 - num_workers: 0 - in_memory: True - chip_nms_thr: 0.7 - mask_nms_thr: 0.5 -tasks: - basemap_download: - workflow: data_ingestion/bing/basemap_download_merge - parameters: - api_key: "@from(bingmaps_api_key)" - zoom_level: "@from(basemap_zoom_level)" - basemap_automatic_segmentation: - workflow: ml/segment_anything/automatic_segmentation - parameters: - model_type: "@from(model_type)" - band_names: ["red", "green", "blue"] - band_scaling: null - band_offset: null - spatial_overlap: "@from(spatial_overlap)" - points_per_side: "@from(points_per_side)" - n_crop_layers: "@from(n_crop_layers)" - crop_overlap_ratio: "@from(crop_overlap_ratio)" - crop_n_points_downscale_factor: "@from(crop_n_points_downscale_factor)" - pred_iou_thresh: "@from(pred_iou_thresh)" - stability_score_thresh: "@from(stability_score_thresh)" - stability_score_offset: "@from(stability_score_offset)" - points_per_batch: "@from(points_per_batch)" - num_workers: "@from(num_workers)" - in_memory: "@from(in_memory)" - chip_nms_thr: "@from(chip_nms_thr)" - mask_nms_thr: "@from(mask_nms_thr)" -edges: - - origin: basemap_download.merged_basemap - destination: - - basemap_automatic_segmentation.input_raster -description: - short_description: >- - Downloads basemap with BingMaps API and runs Segment Anything Model (SAM) automatic segmentation over - them. - long_description: >- - The workflow lists and downloads basemaps tiles with BingMaps API, and merges them into a - single raster. The raster is then split into chips of 1024x1024 pixels with an overlap defined - by `spatial_overlap`. Each chip is processed by SAM's image encoder, and a point grid is defined - within each chip, with each point being used as a prompt for the segmentation. Each point is - used to generate a mask, and the masks are combined using multiple non-maximal suppression - steps to generate the final segmentation mask. Before running the workflow, make sure the model - has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. - The script will download the desired model weights from SAM repository, export the image encoder - and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the - [FarmVibes.AI - troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) - page in the documentation. - sources: - user_input: Time range and geometry of interest. - sinks: - basemap: Merged basemap used as input to the segmentation. - segmentation_mask: Output segmentation masks. diff --git a/workflows/farm_ai/segmentation/auto_segment_s2.yaml b/workflows/farm_ai/segmentation/auto_segment_s2.yaml deleted file mode 100644 index 2e8a7f25..00000000 --- a/workflows/farm_ai/segmentation/auto_segment_s2.yaml +++ /dev/null @@ -1,75 +0,0 @@ -name: auto_segment_s2 -sources: - user_input: - - preprocess_s2.user_input - - s2_automatic_segmentation.input_geometry -sinks: - s2_raster: preprocess_s2.raster - segmentation_mask: s2_automatic_segmentation.segmentation_mask -parameters: - pc_key: - model_type: vit_b - spatial_overlap: 0.5 - points_per_side: 16 - n_crop_layers: 0 - crop_overlap_ratio: 0.0 - crop_n_points_downscale_factor: 1 - pred_iou_thresh: 0.88 - stability_score_thresh: 0.95 - stability_score_offset: 1.0 - points_per_batch: 16 - num_workers: 0 - in_memory: True - chip_nms_thr: 0.7 - mask_nms_thr: 0.5 -tasks: - preprocess_s2: - workflow: data_ingestion/sentinel2/preprocess_s2 - parameters: - pc_key: "@from(pc_key)" - s2_automatic_segmentation: - workflow: ml/segment_anything/automatic_segmentation - parameters: - model_type: "@from(model_type)" - band_names: ["R", "G", "B"] - band_scaling: null - band_offset: null - spatial_overlap: "@from(spatial_overlap)" - points_per_side: "@from(points_per_side)" - n_crop_layers: "@from(n_crop_layers)" - crop_overlap_ratio: "@from(crop_overlap_ratio)" - crop_n_points_downscale_factor: "@from(crop_n_points_downscale_factor)" - pred_iou_thresh: "@from(pred_iou_thresh)" - stability_score_thresh: "@from(stability_score_thresh)" - stability_score_offset: "@from(stability_score_offset)" - points_per_batch: "@from(points_per_batch)" - num_workers: "@from(num_workers)" - in_memory: "@from(in_memory)" - chip_nms_thr: "@from(chip_nms_thr)" - mask_nms_thr: "@from(mask_nms_thr)" -edges: - - origin: preprocess_s2.raster - destination: - - s2_automatic_segmentation.input_raster -description: - short_description: >- - Downloads Sentinel-2 imagery and runs Segment Anything Model (SAM) automatic segmentation over - them. - long_description: >- - The workflow retrieves the relevant Sentinel-2 products with the Planetary Computer (PC) API, - and splits the input rasters into chips of 1024x1024 pixels with an overlap defined by - `spatial_overlap`. Each chip is processed by SAM's image encoder, and a point grid is defined - within each chip, with each point being used as a prompt for the segmentation. Each point is - used to generate a mask, and the masks are combined using multiple non-maximal suppression - steps to generate the final segmentation mask. Before running the workflow, make sure the model - has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. - The script will download the desired model weights from SAM repository, export the image encoder - and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the - [FarmVibes.AI - troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) - page in the documentation. - sources: - user_input: Time range and geometry of interest. - sinks: - s2_raster: Sentinel-2 rasters used as input for the segmentation. - segmentation_mask: Output segmentation masks. diff --git a/workflows/farm_ai/segmentation/segment_basemap.yaml b/workflows/farm_ai/segmentation/segment_basemap.yaml deleted file mode 100644 index ce5778af..00000000 --- a/workflows/farm_ai/segmentation/segment_basemap.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: segment_basemap -sources: - user_input: - - basemap_download.input_geometry - - basemap_segmentation.input_geometry - prompts: - - basemap_segmentation.input_prompts -sinks: - basemap: basemap_download.merged_basemap - segmentation_mask: basemap_segmentation.segmentation_mask -parameters: - bingmaps_api_key: - basemap_zoom_level: 14 - model_type: vit_b - spatial_overlap: 0.5 -tasks: - basemap_download: - workflow: data_ingestion/bing/basemap_download_merge - parameters: - api_key: "@from(bingmaps_api_key)" - zoom_level: "@from(basemap_zoom_level)" - basemap_segmentation: - workflow: ml/segment_anything/prompt_segmentation - parameters: - model_type: "@from(model_type)" - band_names: ["red", "green", "blue"] - band_scaling: null - band_offset: null - spatial_overlap: "@from(spatial_overlap)" -edges: - - origin: basemap_download.merged_basemap - destination: - - basemap_segmentation.input_raster -description: - short_description: >- - Downloads basemap with BingMaps API and runs Segment Anything Model (SAM) over them with points and/or - bounding boxes as prompts. - long_description: >- - The workflow lists and downloads basemaps tiles with BingMaps API, and merges them into a - single raster. The raster is then split into chips of 1024x1024 pixels with an overlap defined - by `spatial_overlap`. Chips intersecting with prompts are processed by SAM's image encoder, - followed by prompt encoder and mask decoder. Before running the workflow, make sure the model - has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. - The script will download the desired model weights from SAM repository, export the image encoder - and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the - [FarmVibes.AI - troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) - page in the documentation. - sources: - user_input: Time range and geometry of interest. - prompts: >- - ExternalReferences to the point and/or bounding box prompts. These are GeoJSON with - coordinates, label (foreground/background) and prompt id (in case the raster contains - multiple entities that should be segmented in a single workflow run). - sinks: - basemap: Merged basemap used as input to the segmentation. - segmentation_mask: Output segmentation masks. diff --git a/workflows/farm_ai/segmentation/segment_s2.yaml b/workflows/farm_ai/segmentation/segment_s2.yaml deleted file mode 100644 index 5980d2bd..00000000 --- a/workflows/farm_ai/segmentation/segment_s2.yaml +++ /dev/null @@ -1,55 +0,0 @@ -name: segment_s2 -sources: - user_input: - - preprocess_s2.user_input - - s2_segmentation.input_geometry - prompts: - - s2_segmentation.input_prompts -sinks: - s2_raster: preprocess_s2.raster - segmentation_mask: s2_segmentation.segmentation_mask -parameters: - model_type: vit_b - spatial_overlap: 0.5 - pc_key: -tasks: - preprocess_s2: - workflow: data_ingestion/sentinel2/preprocess_s2 - parameters: - pc_key: "@from(pc_key)" - s2_segmentation: - workflow: ml/segment_anything/prompt_segmentation - parameters: - model_type: "@from(model_type)" - band_names: ["R", "G", "B"] - band_scaling: null - band_offset: null - spatial_overlap: "@from(spatial_overlap)" -edges: - - origin: preprocess_s2.raster - destination: - - s2_segmentation.input_raster -description: - short_description: >- - Downloads Sentinel-2 imagery and runs Segment Anything Model (SAM) over them with points and/or - bounding boxes as prompts. - long_description: >- - The workflow retrieves the relevant Sentinel-2 products with the Planetary Computer (PC) API, - and splits the input rasters into chips of 1024x1024 pixels with an overlap defined by - `spatial_overlap`. Chips intersecting with prompts are processed by SAM's image encoder, - followed by prompt encoder and mask decoder. Before running the workflow, make sure the model - has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. - The script will download the desired model weights from SAM repository, export the image encoder - and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the - [FarmVibes.AI - troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) - page in the documentation. - sources: - user_input: Time range and geometry of interest. - prompts: >- - ExternalReferences to the point and/or bounding box prompts. These are GeoJSON with - coordinates, label (foreground/background) and prompt id (in case, the raster contains - multiple entities that should be segmented in a single workflow run). - sinks: - s2_raster: Sentinel-2 rasters used as input for the segmentation. - segmentation_mask: Output segmentation masks. diff --git a/workflows/farm_ai/sensor/optimal_locations.yaml b/workflows/farm_ai/sensor/optimal_locations.yaml deleted file mode 100644 index c104deff..00000000 --- a/workflows/farm_ai/sensor/optimal_locations.yaml +++ /dev/null @@ -1,58 +0,0 @@ -name: optimal_locations -sources: - user_input: - - find_samples.user_input - input_raster: - - compute_index.raster -sinks: - result: find_samples.locations -parameters: - n_clusters: - sieve_size: - index: -tasks: - compute_index: - workflow: data_processing/index/index - parameters: - index: "@from(index)" - find_samples: - op: find_soil_sample_locations - op_dir: minimum_samples - parameters: - n_clusters: "@from(n_clusters)" - sieve_size: "@from(sieve_size)" -edges: - - origin: compute_index.index_raster - destination: - - find_samples.raster -description: - short_description: Identify optimal locations by performing clustering operation using Gaussian Mixture model on computed raster indices. - long_description: |- - The clustering operation separate computed raster indices values into n groups of equal variance, each group assigned a location and that location is considered as a - optimal locations. The sample locations generated provide information of latitude and longitude. The optimal location can be utilized to install sensors and collect - soil information. The index parameter used as input to run the computed index workflow internally using the input raster submitted. The selection of index parameter varies - based on requirement. The workflow supports all the indices supported by spyndex library (https://github.com/awesome-spectral-indices/awesome-spectral-indices#vegetation). - Below provided various indices that are used to identify optimal locations and generated a nutrients heatmap. - Enhanced Vegetation Index (EVI) - EVI is designed to minimize the influence of soil brightness and atmospheric conditions on vegetation assessment. It is calculated - using the red, blue, and near-infrared (NIR) bands. EVI is particularly useful for monitoring vegetation in regions with high canopy cover and in areas where atmospheric - interference is significant. This indices also used in notebook (notebooks/heatmaps/nutrients_using_neighbors.ipynb) that derive nutrient information for Carbon, Nitrogen, - and Phosphorus. - Photochemical Reflectance Index (PRI) - It is a vegetation index used to assess the light-use efficiency of plants in terms of photosynthesis and their response to - changes in light conditions, particularly variations in the blue and red parts of the electromagnetic spectrum. This index also used in notebook - (notebooks/heatmaps/nutrients_using_neighbors.ipynb) that derive nutrient information for pH. - The number of sample locations generated depend on input parameters submitted. Tune n_clusters and sieve_size parameters to generate more or less location data points. - For a 100 acre farm, - - 20 sample locations are generated using n_clusters=5 and sieve_size=10. - - 30 sample locations are generated using n_clusters=5 and sieve_size=20. - - 80 sample locations are generated using n_clusters=5 and sieve_size=5. - - 130 sample locations are generated using n_clusters=8 and sieve_size=5. - - sources: - input_raster: List of computed raster indices generated using the sentinel 2 satellite imagery. - user_input: DataVibe with time range information. - sinks: - result: Zip file containing sample locations in a shape file (.shp) format. - parameters: - n_clusters: number of clusters used to generate sample locations. - sieve_size: Group the nearest neighbor pixel values. - index: Index used to generate sample locations. diff --git a/workflows/farm_ai/water/irrigation_classification.yaml b/workflows/farm_ai/water/irrigation_classification.yaml deleted file mode 100644 index 462c230c..00000000 --- a/workflows/farm_ai/water/irrigation_classification.yaml +++ /dev/null @@ -1,140 +0,0 @@ -name: irrigation_classification -sources: - user_input: - - landsat.user_input - - merge_geom_time_range.time_range -sinks: - landsat_bands: landsat.raster - ndvi: ndvi.index - cloud_water_mask: cloud_water_mask.cloud_water_mask - dem: match_dem.match_rasters - evaporative_fraction: evaporative_fraction.evaporative_fraction - ngi: ngi_egi_layers.ngi - egi: ngi_egi_layers.egi - lst: ngi_egi_layers.lst - irrigation_probability: irrigation_probability.irrigation_probability -parameters: - ndvi_threshold: 0.0 - ndvi_hot_threshold: 0.02 - coef_ngi: -0.50604148 - coef_egi: -0.93103156 - coef_lst: -0.14612046 - intercept: 1.99036986 - dem_resolution: 30 - dem_provider: CopernicusDEM30 - pc_key: -tasks: - landsat: - workflow: data_ingestion/landsat/preprocess_landsat - parameters: - pc_key: "@from(pc_key)" - ndvi: - op: compute_index - merge_geom: - op: merge_geometries - merge_geom_time_range: - op: merge_geometry_and_time_range - cloud_water_mask: - op: compute_cloud_water_mask - parameters: - ndvi_threshold: "@from(ndvi_threshold)" - dem: - workflow: data_ingestion/dem/download_dem - parameters: - resolution: "@from(dem_resolution)" - provider: "@from(dem_provider)" - match_dem: - workflow: data_processing/merge/match_merge_to_ref - evaporative_fraction: - op: compute_evaporative_fraction - parameters: - ndvi_hot_threshold: "@from(ndvi_hot_threshold)" - ngi_egi_layers: - op: compute_ngi_egi_layers - irrigation_probability: - op: compute_irrigation_probability - parameters: - coef_ngi: "@from(coef_ngi)" - coef_egi: "@from(coef_egi)" - coef_lst: "@from(coef_lst)" - intercept: "@from(intercept)" -edges: - - origin: landsat.raster - destination: - - merge_geom.items - - ndvi.raster - - cloud_water_mask.landsat_raster - - match_dem.ref_rasters - - evaporative_fraction.landsat_raster - - ngi_egi_layers.landsat_raster - - irrigation_probability.landsat_raster - - origin: ndvi.index - destination: - - cloud_water_mask.ndvi_raster - - evaporative_fraction.ndvi_raster - - ngi_egi_layers.ndvi_raster - - origin: merge_geom.merged - destination: - - merge_geom_time_range.geometry - - origin: merge_geom_time_range.merged - destination: - - dem.user_input - - origin: dem.raster - destination: - - match_dem.rasters - - origin: match_dem.match_rasters - destination: - - evaporative_fraction.dem_raster - - origin: evaporative_fraction.evaporative_fraction - destination: - - ngi_egi_layers.evaporative_fraction - - origin: cloud_water_mask.cloud_water_mask - destination: - - evaporative_fraction.cloud_water_mask_raster - - ngi_egi_layers.cloud_water_mask_raster - - irrigation_probability.cloud_water_mask_raster - - origin: ngi_egi_layers.ngi - destination: - - irrigation_probability.ngi - - origin: ngi_egi_layers.egi - destination: - - irrigation_probability.egi - - origin: ngi_egi_layers.lst - destination: - - irrigation_probability.lst -description: - short_description: Develops 30m pixel-wise irrigation probability map. - long_description: - The workflow retrieves LANDSAT 8 Surface Reflectance (SR) image tile and land surface elevation DEM data, - and runs four ops to compute irrigation probability map. The land surface elevation data source are 10m - USGS DEM, or 30m Copernicus DEM; but Copernicus DEM is set as the default source in the workflow. Landsat Op - compute_cloud_water_mask utilizes the qa_pixel band of image and NDVI index to generate mask of cloud cover and - water bodies. Op compute_evaporative_fraction utilizes NDVI index, land surface temperature (LST), green and - near infra-red bands, and DEM data to estimate evaporative flux (ETRF). Op compute_ngi_egi_layers utilizes NDVI index, - ETRF estimates, green and near infra-red bands to generate NGI and EGI irrigation layers. Lastly op - compute_irrigation_probability uses NGI and EGI layers along with LST band; and applies optimized logistic regression - model to compute 30m pixel-wise irrigation probability map. The coeficients and intercept of the model were obtained - beforehand using as ground-truth data from Nebraska state, USA for the year 2015. - sources: - user_input: Time range and geometry of interest. - sinks: - landsat_bands: Raster of Landsat bands. - ndvi: NDVI raster. - cloud_water_mask: Mask of cloud cover and water bodies. - dem: DEM raster. Options are CopernicusDEM30 and USGS3DEP. - evaporative_fraction: Raster with estimates of evaporative fraction flux. - ngi: Raster of NGI irrigation layer. - egi: Raster of EGI irrigation layer. - lst: Raster of land surface temperature. - irrigation_probability: Raster of irrigation probability map in 30m resolution. - parameters: - ndvi_threshold: NDVI index threshold value for masking water bodies. - ndvi_hot_threshold: Maximum NDVI index threshold value for selecting hot pixel. - coef_ngi: Coefficient of NGI layer in optimized logistic regression model. - coef_egi: Coefficient of EGI layer in optimized logistic regression model. - coef_lst: Coefficient of land surface temperature band in optimized logistic regression model. - intercept: Intercept value of optimized logistic regression model. - pc_key: Optional Planetary Computer API key. - - - diff --git a/workflows/forest_ai/deforestation/alos_trend_detection.yaml b/workflows/forest_ai/deforestation/alos_trend_detection.yaml deleted file mode 100644 index 5aaa6d92..00000000 --- a/workflows/forest_ai/deforestation/alos_trend_detection.yaml +++ /dev/null @@ -1,61 +0,0 @@ -name: alos_trend_detection -sources: - user_input: - - alos_forest_extent_download_merge.user_input - - ordinal_trend_detection.input_geometry -sinks: - merged_raster: alos_forest_extent_download_merge.merged_raster - categorical_raster: alos_forest_extent_download_merge.categorical_raster - recoded_raster: ordinal_trend_detection.recoded_raster - clipped_raster: ordinal_trend_detection.clipped_raster - trend_test_result: ordinal_trend_detection.trend_test_result -parameters: - pc_key: - from_values: [4, 3, 0, 2, 1] - to_values: [0, 0, 0, 1, 1] -tasks: - alos_forest_extent_download_merge: - workflow: data_ingestion/alos/alos_forest_extent_download_merge - parameters: - pc_key: "@from(pc_key)" - ordinal_trend_detection: - workflow: forest_ai/deforestation/ordinal_trend_detection - parameters: - from_values: "@from(from_values)" - to_values: "@from(to_values)" -edges: - - origin: alos_forest_extent_download_merge.merged_raster - destination: - - ordinal_trend_detection.raster -description: - short_description: - Detects increase/decrease trends in forest pixel levels over the user-input geometry and time range for the ALOS forest map. - long_description: - This workflow combines the alos_forest_extent_download_merge and - ordinal_trend_detection workflows to detect increase/decrease trends in the - forest pixel levels over the user-provided geometry and time range for the - ALOS forest map. The ALOS PALSAR 2.1 Forest/Non-Forest Maps are downloaded - in the alos_forest_extent_download_merge workflow. Then the - ordinal_trend_detection workflow clips the ordinal raster to the - user-provided geometry and time range and determines if there is an - increasing or decreasing trend in the forest pixel levels over them. - alos_trend_detection uses the Cochran-Armitage test to detect trends in the - forest levels over the years. The null hypothesis is that there is no trend - in the pixel levels over the list of rasters. The alternative hypothesis is - that there is a trend in the forest pixel levels over the list of rasters - (one for each year). It returns a p-value and a z-score. If the p-value is - less than some significance level, the null hypothesis is rejected and the - alternative hypothesis is accepted. If the z-score is positive, the trend is - increasing. If the z-score is negative, the trend is decreasing. - sources: - user_input: Time range and geometry of interest. - sinks: - merged_raster: Merged raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range. - categorical_raster: Categorical raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range before the merge operation. - recoded_raster: Recoded raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range. - clipped_raster: Clipped ordinal raster for the user-provided geometry and time range. - trend_test_result: Cochran-armitage test results composed of p-value and z-score. - parameters: - pc_key: Planetary Computer API key. - from_values: Values to recode from. - to_values: Values to recode to. \ No newline at end of file diff --git a/workflows/forest_ai/deforestation/ordinal_trend_detection.yaml b/workflows/forest_ai/deforestation/ordinal_trend_detection.yaml deleted file mode 100644 index 821fee96..00000000 --- a/workflows/forest_ai/deforestation/ordinal_trend_detection.yaml +++ /dev/null @@ -1,62 +0,0 @@ -name: ordinal_trend_detection -sources: - raster: - - recode_raster.raster - input_geometry: - - clip.input_geometry -sinks: - recoded_raster: recode_raster.recoded_raster - trend_test_result: trend_test.ordinal_trend_result - clipped_raster: clip.clipped_raster -parameters: - from_values: [] - to_values: [] -tasks: - recode_raster: - op: recode_raster - parameters: - from_values: "@from(from_values)" - to_values: "@from(to_values)" - clip: - workflow: data_processing/clip/clip - compute_pixel_count: - op: compute_pixel_count - trend_test: - op: ordinal_trend_test -edges: - - origin: recode_raster.recoded_raster - destination: - - clip.raster - - origin: clip.clipped_raster - destination: - - compute_pixel_count.raster - - origin: compute_pixel_count.pixel_count - destination: - - trend_test.pixel_count -description: - short_description: - Detects increase/decrease trends in the pixel levels over the user-input geometry and time range. - long_description: - This workflow prepares rasters to perform the Cochran-Armitage trend test - over a user-provided geometry and time range. Initially, it recodes the - input raster according to the 'from_values' and 'to_values' parameters. For - example, if the original raster has values (2, 1, 3, 4, 5) and the default - values of 'from_values' and 'to_values' are respectively [1, 2, 3, 4, 5] and - [6, 7, 8, 9, 10], the recoded raster will have values (7, 6, 8, 9, 10). The - workflow then clips the user-provided geometries and computes an ordinal - raster. It also counts each unique pixel present in the recoded rasters to - create a pixel frequency contingency table. This data is used to determine - if there is an increasing or decreasing trend in pixel levels. The - Cochran-Armitage test is a non-parametric test used to ascertain this trend. - The null hypothesis assumes no trend in pixel levels, while the alternative - hypothesis assumes a trend exists. The test returns a p-value and a z-score. - If the p-value is less than some significance level, the null hypothesis is - rejected in favor of the alternative. A positive z-score indicates an - increasing trend, while a negative one indicates a decreasing trend. - sources: - raster: Raster to be processed and tested for trends. - input_geometry: Reference geometry. - sinks: - recoded_raster: Recoded raster for the user-provided geometry and time range. - trend_test_result: Cochran-armitage test results composed of p-value and z-score. - clipped_raster: Clipped ordinal raster for the user-provided geometry and time range. \ No newline at end of file diff --git a/workflows/helloworld.yaml b/workflows/helloworld.yaml deleted file mode 100644 index eef79e9d..00000000 --- a/workflows/helloworld.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: helloworld -sources: - user_input: - - hello.user_input -sinks: - raster: hello.raster -tasks: - hello: - op: helloworld -description: - short_description: Hello world! - long_description: - Small test workflow that generates an image of the Earth with countries that intersect with the - input geometry highlighted in orange. - sources: - user_input: Input geometry. - sinks: - raster: Raster with highlighted countries. diff --git a/workflows/ml/crop_segmentation.yaml b/workflows/ml/crop_segmentation.yaml deleted file mode 100644 index 58319f6a..00000000 --- a/workflows/ml/crop_segmentation.yaml +++ /dev/null @@ -1,58 +0,0 @@ -name: crop_segmentation -sources: - user_input: - - spaceeye.user_input -sinks: - segmentation: inference.output_raster -parameters: - pc_key: - model_file: - model_bands: 37 -tasks: - spaceeye: - workflow: data_ingestion/spaceeye/spaceeye_interpolation - parameters: - pc_key: "@from(pc_key)" - ndvi: - workflow: data_processing/index/index - parameters: - index: ndvi - group: - op: select_sequence_from_list - op_dir: select_sequence - parameters: - num: "@from(model_bands)" - criterion: regular - inference: - op: compute_onnx_from_sequence - op_dir: compute_onnx - parameters: - model_file: "@from(model_file)" - window_size: 256 - overlap: .25 - num_workers: 4 -edges: - - origin: spaceeye.raster - destination: - - ndvi.raster - - origin: ndvi.index_raster - destination: - - group.rasters - - origin: group.sequence - destination: - - inference.input_raster -description: - short_description: - Runs a crop segmentation model based on NDVI from SpaceEye imagery along the year. - long_description: - The workflow generates SpaceEye cloud-free data for the input region and time range and computes - NDVI over those. NDVI values sampled regularly along the year are stacked as bands and used as - input to the crop segmentation model. - sources: - user_input: Time range and geometry of interest. - sinks: - segmentation: Crop segmentation map at 10m resolution. - parameters: - pc_key: Optional Planetary Computer API key. - model_file: Path to the ONNX file containing the model architecture and weights. - model_bands: Number of NDVI bands to stack as the model input. diff --git a/workflows/ml/dataset_generation/datagen_crop_segmentation.yaml b/workflows/ml/dataset_generation/datagen_crop_segmentation.yaml deleted file mode 100644 index d92da847..00000000 --- a/workflows/ml/dataset_generation/datagen_crop_segmentation.yaml +++ /dev/null @@ -1,38 +0,0 @@ -name: datagen_crop_segmentation -sources: - user_input: - - spaceeye.user_input - - cdl.user_input -sinks: - ndvi: ndvi.index_raster - cdl: cdl.raster -parameters: - pc_key: -tasks: - spaceeye: - workflow: data_ingestion/spaceeye/spaceeye_interpolation - parameters: - pc_key: "@from(pc_key)" - ndvi: - workflow: data_processing/index/index - parameters: - index: ndvi - cdl: - workflow: data_ingestion/cdl/download_cdl -edges: - - origin: spaceeye.raster - destination: - - ndvi.raster -description: - short_description: - Generates a dataset for crop segmentation, based on NDVI raster and Crop Data Layer (CDL) maps. - long_description: - The workflow generates SpaceEye cloud-free data for the input region and time range and computes - NDVI over those. It also downloads CDL maps for the years comprised in the time range. - sources: - user_input: Time range and geometry of interest. - sinks: - ndvi: NDVI rasters. - cdl: CDL map for the years comprised in the input time range. - parameters: - pc_key: Optional Planetary Computer API key. diff --git a/workflows/ml/driveway_detection.yaml b/workflows/ml/driveway_detection.yaml deleted file mode 100644 index 1b45f18f..00000000 --- a/workflows/ml/driveway_detection.yaml +++ /dev/null @@ -1,62 +0,0 @@ -name: driveway_detection -sources: - input_raster: - - segment.input_raster - - detect.input_raster - - osm.user_input - property_boundaries: - - detect.property_boundaries -sinks: - properties: detect.properties_with_driveways - driveways: detect.driveways -parameters: - min_region_area: - ndvi_thr: - car_size: - num_kernels: - car_thr: -tasks: - segment: - op: segment_driveway - osm: - workflow: data_ingestion/osm_road_geometries - parameters: - network_type: drive_service - buffer_size: 100 - detect: - op: detect_driveway - parameters: - min_region_area: "@from(min_region_area)" - ndvi_thr: "@from(ndvi_thr)" - car_size: "@from(car_size)" - num_kernels: "@from(num_kernels)" - car_thr: "@from(car_thr)" -edges: - - origin: segment.segmentation_raster - destination: - - detect.segmentation_raster - - origin: osm.roads - destination: - - detect.roads -description: - short_description: Detects driveways in front of houses. - long_description: - The workflow downloads road geometry from Open Street Maps and segments the front of houses in - the input image using a machine learning model. It then uses the input image, segmentation map, - road geometry, and input property boundaries to detect the presence of driveways in the front of - each house. - sources: - input_raster: Aerial imagery of the region of interest with RBG + NIR bands. - property_boundaries: Property boundary information for the region of interest. - sinks: - properties: Boundaries of properties that contain a driveway. - driveways: Regions of each property boundary where a driveway was detected. - parameters: - min_region_area: - Minimum contiguous region that will be considered as a potential driveway, in meters. - ndvi_thr: Only areas under this NDVI threshold will be considered for driveways. - car_size: Expected size of a car, in pixels, defined as [height, width]. - num_kernels: Number of rotated kernels to try to fit a car inside a potential driveway region. - car_thr: - Ratio of pixels of a kernel that have to be inside a region in order to consider it a parkable - spot. diff --git a/workflows/ml/segment_anything/automatic_segmentation.yaml b/workflows/ml/segment_anything/automatic_segmentation.yaml deleted file mode 100644 index 54c76a56..00000000 --- a/workflows/ml/segment_anything/automatic_segmentation.yaml +++ /dev/null @@ -1,82 +0,0 @@ -name: automatic_segmentation -sources: - input_raster: - - clip.raster - input_geometry: - - clip.input_geometry -sinks: - segmentation_mask: combine_masks.output_mask -parameters: - model_type: vit_b - band_names: null - band_scaling: null - band_offset: null - spatial_overlap: 0.5 - points_per_side: 16 - n_crop_layers: 0 - crop_overlap_ratio: 0.0 - crop_n_points_downscale_factor: 1 - pred_iou_thresh: 0.88 - stability_score_thresh: 0.95 - stability_score_offset: 1.0 - points_per_batch: 16 - num_workers: 0 - in_memory: True - chip_nms_thr: 0.7 - mask_nms_thr: 0.5 -tasks: - clip: - workflow: data_processing/clip/clip - sam_inference: - op: automatic_segmentation - op_dir: segment_anything - parameters: - model_type: "@from(model_type)" - band_names: "@from(band_names)" - band_scaling: "@from(band_scaling)" - band_offset: "@from(band_offset)" - spatial_overlap: "@from(spatial_overlap)" - points_per_side: "@from(points_per_side)" - n_crop_layers: "@from(n_crop_layers)" - crop_overlap_ratio: "@from(crop_overlap_ratio)" - crop_n_points_downscale_factor: "@from(crop_n_points_downscale_factor)" - pred_iou_thresh: "@from(pred_iou_thresh)" - stability_score_thresh: "@from(stability_score_thresh)" - stability_score_offset: "@from(stability_score_offset)" - points_per_batch: "@from(points_per_batch)" - num_workers: "@from(num_workers)" - in_memory: "@from(in_memory)" - combine_masks: - op: combine_sam_masks - op_dir: segment_anything_combine_masks - parameters: - chip_nms_thr: "@from(chip_nms_thr)" - mask_nms_thr: "@from(mask_nms_thr)" -edges: - - origin: clip.clipped_raster - destination: - - sam_inference.input_raster - - origin: sam_inference.segmented_chips - destination: - - combine_masks.input_masks -description: - short_description: >- - Runs a Segment Anything Model (SAM) automatic segmentation over input rasters. - long_description: >- - The workflow splits the input rasters into chips of 1024x1024 pixels with an overlap - defined by `spatial_overlap`. Each chip is processed by SAM's image encoder, and a point grid - is defined within each chip, with each point being used as a prompt for the segmentation. - Each point is used to generate a mask, and the masks are combined using multiple non-maximal - suppression steps to generate the final segmentation mask. Before running the workflow, make - sure the model has been imported into the cluster by running - `scripts/export_prompt_segmentation_models.py`. The script will download the desired model - weights from SAM repository, export the image encoder and mask decoder to ONNX format, and add - them to the cluster. For more information, refer to the - [FarmVibes.AI - troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) - page in the documentation. - sources: - input_raster: Rasters used as input for the segmentation. - input_geometry: Geometry of interest within the raster for the segmentation. - sinks: - segmentation_mask: Output segmentation masks. diff --git a/workflows/ml/segment_anything/prompt_segmentation.yaml b/workflows/ml/segment_anything/prompt_segmentation.yaml deleted file mode 100644 index 03d587e8..00000000 --- a/workflows/ml/segment_anything/prompt_segmentation.yaml +++ /dev/null @@ -1,61 +0,0 @@ -name: prompt_segmentation -sources: - input_raster: - - clip.raster - input_geometry: - - clip.input_geometry - input_prompts: - - ingest_points.user_input -sinks: - segmentation_mask: sam_inference.segmentation_mask -parameters: - model_type: vit_b - band_names: null - band_scaling: null - band_offset: null - spatial_overlap: 0.5 -tasks: - ingest_points: - workflow: data_ingestion/user_data/ingest_geometry - clip: - workflow: data_processing/clip/clip - sam_inference: - op: prompt_segmentation - op_dir: segment_anything - parameters: - model_type: "@from(model_type)" - band_names: "@from(band_names)" - band_scaling: "@from(band_scaling)" - band_offset: "@from(band_offset)" - spatial_overlap: "@from(spatial_overlap)" -edges: - - origin: ingest_points.geometry - destination: - - sam_inference.input_prompts - - origin: clip.clipped_raster - destination: - - sam_inference.input_raster -description: - short_description: >- - Runs Segment Anything Model (SAM) over input rasters with points and/or bounding boxes - as prompts. - long_description: >- - The workflow splits the input input rasters into chips of 1024x1024 pixels with an overlap - defined by `spatial_overlap`. Chips intersecting with prompts are processed by SAM's image - encoder, followed by prompt encoder and mask decoder. Before running the workflow, make sure - the model has been imported into the cluster by running - `scripts/export_prompt_segmentation_models.py`. The script will download the desired model - weights from SAM repository, export the image encoder and mask decoder to ONNX format, and add - them to the cluster. For more information, refer to the - [FarmVibes.AI - troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) - page in the documentation. - sources: - input_geometry: Geometry of interest within the raster for the segmentation. - input_raster: Rasters used as input for the segmentation. - input_prompts: >- - ExternalReferences to the point and/or bounding box prompts. These are GeoJSON with - coordinates, label (foreground/background) and prompt id (in case, the raster contains - multiple entities that should be segmented in a single workflow run). - sinks: - segmentation_mask: Output segmentation masks. diff --git a/workflows/ml/spectral_extension.yaml b/workflows/ml/spectral_extension.yaml deleted file mode 100644 index d80eca76..00000000 --- a/workflows/ml/spectral_extension.yaml +++ /dev/null @@ -1,68 +0,0 @@ -name: spectral_extension -sources: - raster: - - ingest_raster.input_ref -sinks: - s2_rasters: s2.raster - matched_raster: match.output_raster - extended_raster: compute_onnx.output_raster -parameters: - resampling: nearest -tasks: - ingest_raster: - op: download_raster_from_ref - op_dir: download_from_ref - s2: - workflow: data_ingestion/sentinel2/preprocess_s2 - select: - op: select_sequence_from_list - op_dir: select_sequence - parameters: - num: 1 - criterion: first - match: - op: match_raster_to_ref - parameters: - resampling: "@from(resampling)" - sequence: - op: create_raster_sequence - compute_onnx: - op: compute_onnx_from_sequence - op_dir: compute_onnx - parameters: - model_file: /opt/terravibes/ops/resources/spectral_extension_model/spectral_extension.onnx - nodata: 0 -edges: - - origin: ingest_raster.downloaded - destination: - - s2.user_input - - match.ref_raster - - sequence.rasters1 - - origin: s2.raster - destination: - - select.rasters - - origin: select.sequence - destination: - - match.raster - - origin: match.output_raster - destination: - - sequence.rasters2 - - origin: sequence.sequence - destination: - - compute_onnx.input_raster -description: - short_description: Generates high-resolution Sentinel-2 bands by combining UAV and Sentinel-2 data. - long_description: - The workflow will download a user-specified UAV raster, download and resample the corresponding - Sentinel-2 raster, and run the spectral extension model to generate 8 Sentinel-2 bands - at 0.125m resolution. - The input raster should contain three bands (RGB) at 0.125m/px resolution in the range 0-255. - sources: - raster: - The UAV input raster with three bands (red, green, blue, in this order) at 0.125m resolution. - sinks: - s2_rasters: The original Sentinel-2 raster used in the spectral extension. - matched_raster: Sentinel-2 data resampled to the UAV raster's grid (low-resolution). - extended_raster: The generated raster, containing 8 of the 12 Sentinel-2 bands. - parameters: - resampling: Resampling to use when reprojecting the Sentinel-2 data into the UAV raster's grid. \ No newline at end of file