diff --git a/.deprecated_files b/.deprecated_files index af2a4cf8..1fc91256 100644 --- a/.deprecated_files +++ b/.deprecated_files @@ -10,6 +10,7 @@ .github/workflows/check_mandatory_and_static_files.yaml .github/workflows/dev_cd.yaml .github/workflows/unit_and_int_tests.yaml +.github/workflows/cd.yaml scripts/check_mandatory_and_static_files.py scripts/update_static_files.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 89ff58f8..c49da2b5 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -25,12 +25,16 @@ ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, + "editor.codeActionsOnSave": { + "source.organizeImports": true + }, "editor.formatOnSave": true, "editor.renderWhitespace": "all", "editor.rulers": [ 88 ], - "editor.defaultFormatter": "ms-python.black-formatter", + "ruff.organizeImports": true, + "editor.defaultFormatter": "charliermarsh.ruff", "licenser.license": "Custom", "licenser.customHeaderFile": "/workspace/.devcontainer/license_header.txt" }, @@ -52,7 +56,6 @@ "visualstudioexptteam.vscodeintellicode", "ymotongpoo.licenser", "charliermarsh.ruff", - "ms-python.black-formatter", "ms-python.mypy-type-checker" ] } diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml deleted file mode 100644 index b621db7b..00000000 --- a/.github/workflows/cd.yaml +++ /dev/null @@ -1,120 +0,0 @@ -name: CD - -on: - release: - types: [published] - # trigger only on new release - -jobs: - verify_version: - runs-on: ubuntu-latest - outputs: - # export to be used in other jobs - version: ${{ steps.get_version_tag.outputs.version }} - steps: - - uses: actions/checkout@v3 - name: Check out code - - - uses: actions/setup-python@v4 - name: Set up Python 3.9 - with: - python-version: "3.9" - - - id: get_version_tag - name: Get version tag - run: | - TAG_VER="${GITHUB_REF##*/}" - # set as output: - echo "version: ${TAG_VER}" - echo "version=${TAG_VER}" >> $GITHUB_OUTPUT - - - id: verify_semantic_tag_format - name: Verify tag format - # format must be compatible with semantic versioning - run: | - SEMVER_REGEX="^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" - if echo "${{ steps.get_version_tag.outputs.version }}" | grep -Eq "$SEMVER_REGEX"; then - echo "Tag format is valid" - else - echo "Invalid tag format: ${{ steps.get_version_tag.outputs.version }}" - exit 1 - fi - - - id: verify_package_version - name: Verify package version vs tag version - # package version must be same with tag version - run: | - PKG_VER="$(grep -oP 'version = \"\K[^\"]+' pyproject.toml)" - echo "Package version is $PKG_VER" >&2 - echo "Tag version is ${{ steps.get_version_tag.outputs.version }}" >&2 - if [ "$PKG_VER" != "${{ steps.get_version_tag.outputs.version }}" ]; then - echo "Package version and tag name mismatch." >&2 - exit 1 - fi - - push_to_docker_hub: - runs-on: ubuntu-latest - needs: verify_version - steps: - - uses: actions/checkout@v3 - name: Check out code - - - uses: docker/setup-qemu-action@v2.0.0 - name: Set up QEMU - - - uses: docker/setup-buildx-action@v2.5.0 - name: Set up Docker Buildx - - - uses: docker/login-action@v2.1.0 - name: Login to DockerHub - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - uses: docker/build-push-action@v4.0.0 - name: Build and push - id: docker_build - with: - push: true - platforms: linux/amd64,linux/arm64/v8 - tags: "ghga/${{ github.event.repository.name }}:${{ needs.verify_version.outputs.version }}" - - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@master - with: - image-ref: "docker.io/ghga/${{ github.event.repository.name }}:${{ needs.verify_version.outputs.version }}" - format: "table" - exit-code: "1" - ignore-unfixed: true - vuln-type: "os,library" - severity: "CRITICAL,HIGH" - - - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} - - # Please uncomment and adapt the DEPLOYMENT_CONFIG_REPO to trigger automatic - # updates of helm charts: - update_deployment_repo: - runs-on: ubuntu-latest - needs: - - verify_version - - push_to_docker_hub - env: - DEPLOYMENT_CONFIG_REPO: ghga-de/helm - steps: - - name: trigger update in deployment repo - run: | - # access token needs to be of format: : - curl -X POST \ - "https://api.github.com/repos/${DEPLOYMENT_CONFIG_REPO}/dispatches" \ - -H 'Accept: application/vnd.github.everest-preview+json' \ - -u '${{ secrets.DEPLOYMENT_UPDATE_TOKEN }}' \ - --data '{ - "event_type": "new_app_version", - "client_payload": { - "deploy_filename": "${{ github.event.repository.name }}", - "app_name": "${{ github.event.repository.name }}", - "context": "${{ needs.verify_version.outputs.version }}", - "new_image_tag": "${{ needs.verify_version.outputs.version }}" - } - }' diff --git a/.github/workflows/ci_release.yaml b/.github/workflows/ci_release.yaml new file mode 100644 index 00000000..d8547b40 --- /dev/null +++ b/.github/workflows/ci_release.yaml @@ -0,0 +1,15 @@ +name: CI on release + +on: + release: + types: [published] + +jobs: + push_to_docker_hub: + runs-on: ubuntu-latest + steps: + - uses: ghga-de/gh-action-ci@v1 + with: + tag: ${{ github.event.release.tag_name }} + dockerhub_username: ${{ secrets.DOCKERHUB_USERNAME }} + dockerhub_token: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/ci_workflow_dispatch.yaml b/.github/workflows/ci_workflow_dispatch.yaml new file mode 100644 index 00000000..1452f7ec --- /dev/null +++ b/.github/workflows/ci_workflow_dispatch.yaml @@ -0,0 +1,30 @@ +name: Build on PR or dispatch + +on: + workflow_dispatch: + pull_request: + types: + - opened + - synchronize + - reopened + - labeled + +jobs: + fetch-tag: + runs-on: ubuntu-latest + if: ( github.event.action == 'workflow_dispatch' || github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'build') ) || ( github.event.action == 'labeled' && github.event.label.name == 'build' ) + steps: + - id: fetch-tag + uses: ghga-de/gh-action-fetch-tag@v1 + outputs: + latest_tag: ${{ steps.fetch-tag.outputs.latest_tag }} + + push_to_docker_hub: + needs: fetch-tag + runs-on: ubuntu-latest + steps: + - uses: ghga-de/gh-action-ci@v1 + with: + tag: ${{ needs.fetch-tag.outputs.latest_tag }}-${{ github.sha }} + dockerhub_username: ${{ secrets.DOCKERHUB_USERNAME }} + dockerhub_token: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/static_code_analysis.yaml b/.github/workflows/static_code_analysis.yaml index 401f4289..39b9bad4 100644 --- a/.github/workflows/static_code_analysis.yaml +++ b/.github/workflows/static_code_analysis.yaml @@ -16,10 +16,9 @@ jobs: env: SKIP: no-commit-to-branch - name: ruff - uses: chartboost/ruff-action@v1 - - name: black run: | - black --check . + ruff check --output-format=github . + ruff format --check . - name: mypy run: | mypy . diff --git a/.static_files b/.static_files index 5f035a96..1f129ea4 100644 --- a/.static_files +++ b/.static_files @@ -30,12 +30,13 @@ scripts/list_outdated_dependencies.py scripts/README.md .github/workflows/check_config_docs.yaml +.github/workflows/check_openapi_spec.yaml +.github/workflows/check_readme.yaml .github/workflows/check_template_files.yaml +.github/workflows/ci_release.yaml +.github/workflows/ci_workflow_dispatch.yaml .github/workflows/static_code_analysis.yaml .github/workflows/tests.yaml -.github/workflows/check_openapi_spec.yaml -.github/workflows/check_readme.yaml -.github/workflows/cd.yaml example_data/README.md diff --git a/pyproject.toml b/pyproject.toml index dbf12fe4..5d1b5eb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "linkml==1.6.1", "linkml-runtime==1.6.0", "linkml-validator==0.4.5", + "schemapack<=1.0.0, <2.0.0" ] [project.license] diff --git a/requirements-dev-common.in b/requirements-dev-common.in index 1c677cd3..c756c9ad 100644 --- a/requirements-dev-common.in +++ b/requirements-dev-common.in @@ -13,8 +13,6 @@ mypy-extensions>=1.0.0 ruff>=0.0.290 -black>=23.1.0 - click>=8.1.0 typer>=0.7.0 diff --git a/requirements-dev.txt b/requirements-dev.txt index 97c91940..6b54f120 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.9 # by the following command: # -# pip-compile --generate-hashes --output-file=/workspace/requirements-dev.txt /tmp/tmph8bxvet7/pyproject.toml /workspace/requirements-dev.in +# pip-compile --generate-hashes --output-file=/workspace/requirements-dev.txt /tmp/tmpwflr3qrs/pyproject.toml /workspace/requirements-dev.in # aiokafka==0.8.0 \ --hash=sha256:021e9f0027ca63c6c04daccfdd0e985f7a56d51bd0d43f482f674a58fada52f5 \ @@ -648,6 +648,10 @@ idna==3.4 \ # jsonschema # requests # rfc3986 +immutabledict==3.0.0 \ + --hash=sha256:034bacc6c6872707c4ec0ea9515de6bbe0dcf0fcabd97ae19fd4e4c338f05798 \ + --hash=sha256:5a23cd369a6187f76a8c29d7d687980b092538eb9800e58964603f1b973c56fe + # via schemapack importlib-metadata==6.8.0 \ --hash=sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb \ --hash=sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743 @@ -704,15 +708,16 @@ jsonpointer==2.4 \ # via # jsonpatch # jsonschema -jsonschema[format]==4.19.1 \ - --hash=sha256:cd5f1f9ed9444e554b38ba003af06c0a8c2868131e56bfbef0550fb450c0330e \ - --hash=sha256:ec84cc37cfa703ef7cd4928db24f9cb31428a5d0fa77747b8b51a847458e0bbf +jsonschema[format]==4.20.0 \ + --hash=sha256:4f614fd46d8d61258610998997743ec5492a648b33cf478c1ddc23ed4598a5fa \ + --hash=sha256:ed6231f0429ecf966f5bc8dfef245998220549cbbcf140f913b7464c52c3b6b3 # via # ghga-event-schemas # hexkit # jsonschema # linkml # linkml-runtime + # schemapack jsonschema-specifications==2023.7.1 \ --hash=sha256:05adf340b659828a004220a9613be00fa3f223f2b82002e273dee62fd50524b1 \ --hash=sha256:c91a50404e88a1f6ba40636778e2ee08f6e24c5613fe4c53ac24578a5a7f72bb @@ -929,6 +934,7 @@ pydantic[email]==2.4.2 \ # linkml-runtime # linkml-validator # pydantic-settings + # schemapack pydantic-core==2.10.1 \ --hash=sha256:042462d8d6ba707fd3ce9649e7bf268633a41018d6a998fb5fbacb7e928a183e \ --hash=sha256:0523aeb76e03f753b58be33b26540880bac5aa54422e4462404c432230543f33 \ @@ -1040,7 +1046,9 @@ pydantic-core==2.10.1 \ pydantic-settings==2.0.3 \ --hash=sha256:962dc3672495aad6ae96a4390fac7e593591e144625e5112d359f8f67fb75945 \ --hash=sha256:ddd907b066622bd67603b75e2ff791875540dc485b7307c4fffc015719da8625 - # via hexkit + # via + # hexkit + # schemapack pyjsg==0.11.10 \ --hash=sha256:10af60ff42219be7e85bf7f11c19b648715b0b29eb2ddbd269e87069a7c3f26d \ --hash=sha256:4bd6e3ff2833fa2b395bbe803a2d72a5f0bab5b7285bccd0da1a1bc0aee88bfa @@ -1195,47 +1203,57 @@ python-dotenv==1.0.0 \ pytrie==0.4.0 \ --hash=sha256:8f4488f402d3465993fb6b6efa09866849ed8cda7903b50647b7d0342b805379 # via curies -pyyaml==6.0 \ - --hash=sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf \ - --hash=sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293 \ - --hash=sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b \ - --hash=sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57 \ - --hash=sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b \ - --hash=sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4 \ - --hash=sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07 \ - --hash=sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba \ - --hash=sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9 \ - --hash=sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287 \ - --hash=sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513 \ - --hash=sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0 \ - --hash=sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782 \ - --hash=sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0 \ - --hash=sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92 \ - --hash=sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f \ - --hash=sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2 \ - --hash=sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc \ - --hash=sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1 \ - --hash=sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c \ - --hash=sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86 \ - --hash=sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4 \ - --hash=sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c \ - --hash=sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34 \ - --hash=sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b \ - --hash=sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d \ - --hash=sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c \ - --hash=sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb \ - --hash=sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7 \ - --hash=sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737 \ - --hash=sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3 \ - --hash=sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d \ - --hash=sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358 \ - --hash=sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53 \ - --hash=sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78 \ - --hash=sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803 \ - --hash=sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a \ - --hash=sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f \ - --hash=sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174 \ - --hash=sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5 +pyyaml==6.0.1 \ + --hash=sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5 \ + --hash=sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc \ + --hash=sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df \ + --hash=sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741 \ + --hash=sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206 \ + --hash=sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27 \ + --hash=sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595 \ + --hash=sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62 \ + --hash=sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98 \ + --hash=sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696 \ + --hash=sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290 \ + --hash=sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9 \ + --hash=sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d \ + --hash=sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6 \ + --hash=sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867 \ + --hash=sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47 \ + --hash=sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486 \ + --hash=sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6 \ + --hash=sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3 \ + --hash=sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007 \ + --hash=sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938 \ + --hash=sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0 \ + --hash=sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c \ + --hash=sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735 \ + --hash=sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d \ + --hash=sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28 \ + --hash=sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4 \ + --hash=sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba \ + --hash=sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8 \ + --hash=sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5 \ + --hash=sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd \ + --hash=sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3 \ + --hash=sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0 \ + --hash=sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515 \ + --hash=sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c \ + --hash=sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c \ + --hash=sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924 \ + --hash=sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34 \ + --hash=sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43 \ + --hash=sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859 \ + --hash=sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673 \ + --hash=sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54 \ + --hash=sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a \ + --hash=sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b \ + --hash=sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab \ + --hash=sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa \ + --hash=sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c \ + --hash=sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585 \ + --hash=sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d \ + --hash=sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f # via # hexkit # json-flattener @@ -1245,6 +1263,7 @@ pyyaml==6.0 \ # pre-commit # prefixcommons # prefixmaps + # schemapack # uvicorn rdflib==7.0.0 \ --hash=sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd \ @@ -1457,6 +1476,10 @@ ruff==0.0.292 \ --hash=sha256:f27282bedfd04d4c3492e5c3398360c9d86a295be00eccc63914438b4ac8a83c \ --hash=sha256:f4476f1243af2d8c29da5f235c13dca52177117935e1f9393f9d90f9833f69e4 # via -r /workspace/requirements-dev-common.in +schemapack==1.0.0 \ + --hash=sha256:2f62d7bac01c7d703f5bea095020a4911b7b8329547d47acb1bc0f53a9b42bdb \ + --hash=sha256:3146b19b4b512732b52976c1f435bae6fc88aa0a1ed065021f037f8dd1f51b9d + # via metldata (pyproject.toml) shexjsg==0.8.2 \ --hash=sha256:3b0d8432dd313bee9e1343382c5e02e9908dd941a7dd7342bf8c0200fe523766 \ --hash=sha256:f17a629fc577fa344382bdee143cd9ff86588537f9f811f66cea6f63cdbcd0b6 @@ -1549,7 +1572,7 @@ stringcase==1.2.0 \ # via # -r /workspace/requirements-dev-common.in # linkml-validator -testcontainers[kafka,mongo]==3.7.1 \ +testcontainers[mongo]==3.7.1 \ --hash=sha256:7f48cef4bf0ccd78f1a4534d4b701a003a3bace851f24eae58a32f9e3f0aeba0 # via # hexkit diff --git a/requirements.txt b/requirements.txt index 75461ba8..11a381f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.9 # by the following command: # -# pip-compile --constraint=/workspace/requirements-dev.txt --generate-hashes --output-file=/workspace/requirements.txt /tmp/tmph8bxvet7/pyproject.toml +# pip-compile --constraint=/workspace/requirements-dev.txt --generate-hashes --output-file=/workspace/requirements.txt /tmp/tmpwflr3qrs/pyproject.toml # aiokafka==0.8.0 \ --hash=sha256:021e9f0027ca63c6c04daccfdd0e985f7a56d51bd0d43f482f674a58fada52f5 \ @@ -582,6 +582,12 @@ idna==3.4 \ # jsonschema # requests # rfc3986 +immutabledict==3.0.0 \ + --hash=sha256:034bacc6c6872707c4ec0ea9515de6bbe0dcf0fcabd97ae19fd4e4c338f05798 \ + --hash=sha256:5a23cd369a6187f76a8c29d7d687980b092538eb9800e58964603f1b973c56fe + # via + # -c /workspace/requirements-dev.txt + # schemapack importlib-metadata==6.8.0 \ --hash=sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb \ --hash=sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743 @@ -653,9 +659,9 @@ jsonpointer==2.4 \ # -c /workspace/requirements-dev.txt # jsonpatch # jsonschema -jsonschema[format]==4.19.1 \ - --hash=sha256:cd5f1f9ed9444e554b38ba003af06c0a8c2868131e56bfbef0550fb450c0330e \ - --hash=sha256:ec84cc37cfa703ef7cd4928db24f9cb31428a5d0fa77747b8b51a847458e0bbf +jsonschema[format]==4.20.0 \ + --hash=sha256:4f614fd46d8d61258610998997743ec5492a648b33cf478c1ddc23ed4598a5fa \ + --hash=sha256:ed6231f0429ecf966f5bc8dfef245998220549cbbcf140f913b7464c52c3b6b3 # via # -c /workspace/requirements-dev.txt # ghga-event-schemas @@ -663,6 +669,7 @@ jsonschema[format]==4.19.1 \ # jsonschema # linkml # linkml-runtime + # schemapack jsonschema-specifications==2023.7.1 \ --hash=sha256:05adf340b659828a004220a9613be00fa3f223f2b82002e273dee62fd50524b1 \ --hash=sha256:c91a50404e88a1f6ba40636778e2ee08f6e24c5613fe4c53ac24578a5a7f72bb @@ -843,6 +850,7 @@ pydantic[email]==2.4.2 \ # linkml-runtime # linkml-validator # pydantic-settings + # schemapack pydantic-core==2.10.1 \ --hash=sha256:042462d8d6ba707fd3ce9649e7bf268633a41018d6a998fb5fbacb7e928a183e \ --hash=sha256:0523aeb76e03f753b58be33b26540880bac5aa54422e4462404c432230543f33 \ @@ -959,6 +967,7 @@ pydantic-settings==2.0.3 \ # via # -c /workspace/requirements-dev.txt # hexkit + # schemapack pyjsg==0.11.10 \ --hash=sha256:10af60ff42219be7e85bf7f11c19b648715b0b29eb2ddbd269e87069a7c3f26d \ --hash=sha256:4bd6e3ff2833fa2b395bbe803a2d72a5f0bab5b7285bccd0da1a1bc0aee88bfa @@ -1101,47 +1110,57 @@ pytrie==0.4.0 \ # via # -c /workspace/requirements-dev.txt # curies -pyyaml==6.0 \ - --hash=sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf \ - --hash=sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293 \ - --hash=sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b \ - --hash=sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57 \ - --hash=sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b \ - --hash=sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4 \ - --hash=sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07 \ - --hash=sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba \ - --hash=sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9 \ - --hash=sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287 \ - --hash=sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513 \ - --hash=sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0 \ - --hash=sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782 \ - --hash=sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0 \ - --hash=sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92 \ - --hash=sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f \ - --hash=sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2 \ - --hash=sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc \ - --hash=sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1 \ - --hash=sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c \ - --hash=sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86 \ - --hash=sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4 \ - --hash=sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c \ - --hash=sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34 \ - --hash=sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b \ - --hash=sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d \ - --hash=sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c \ - --hash=sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb \ - --hash=sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7 \ - --hash=sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737 \ - --hash=sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3 \ - --hash=sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d \ - --hash=sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358 \ - --hash=sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53 \ - --hash=sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78 \ - --hash=sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803 \ - --hash=sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a \ - --hash=sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f \ - --hash=sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174 \ - --hash=sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5 +pyyaml==6.0.1 \ + --hash=sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5 \ + --hash=sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc \ + --hash=sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df \ + --hash=sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741 \ + --hash=sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206 \ + --hash=sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27 \ + --hash=sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595 \ + --hash=sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62 \ + --hash=sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98 \ + --hash=sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696 \ + --hash=sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290 \ + --hash=sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9 \ + --hash=sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d \ + --hash=sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6 \ + --hash=sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867 \ + --hash=sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47 \ + --hash=sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486 \ + --hash=sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6 \ + --hash=sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3 \ + --hash=sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007 \ + --hash=sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938 \ + --hash=sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0 \ + --hash=sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c \ + --hash=sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735 \ + --hash=sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d \ + --hash=sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28 \ + --hash=sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4 \ + --hash=sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba \ + --hash=sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8 \ + --hash=sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5 \ + --hash=sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd \ + --hash=sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3 \ + --hash=sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0 \ + --hash=sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515 \ + --hash=sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c \ + --hash=sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c \ + --hash=sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924 \ + --hash=sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34 \ + --hash=sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43 \ + --hash=sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859 \ + --hash=sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673 \ + --hash=sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54 \ + --hash=sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a \ + --hash=sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b \ + --hash=sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab \ + --hash=sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa \ + --hash=sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c \ + --hash=sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585 \ + --hash=sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d \ + --hash=sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f # via # -c /workspace/requirements-dev.txt # hexkit @@ -1150,6 +1169,7 @@ pyyaml==6.0 \ # linkml-runtime # prefixcommons # prefixmaps + # schemapack # uvicorn rdflib==7.0.0 \ --hash=sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd \ @@ -1357,6 +1377,12 @@ ruamel-yaml-clib==0.2.8 \ # via # -c /workspace/requirements-dev.txt # ruamel-yaml +schemapack==1.0.0 \ + --hash=sha256:2f62d7bac01c7d703f5bea095020a4911b7b8329547d47acb1bc0f53a9b42bdb \ + --hash=sha256:3146b19b4b512732b52976c1f435bae6fc88aa0a1ed065021f037f8dd1f51b9d + # via + # -c /workspace/requirements-dev.txt + # metldata (pyproject.toml) shexjsg==0.8.2 \ --hash=sha256:3b0d8432dd313bee9e1343382c5e02e9908dd941a7dd7342bf8c0200fe523766 \ --hash=sha256:f17a629fc577fa344382bdee143cd9ff86588537f9f811f66cea6f63cdbcd0b6 diff --git a/scripts/list_outdated_dependencies.py b/scripts/list_outdated_dependencies.py index 1aa15a13..db91fe05 100755 --- a/scripts/list_outdated_dependencies.py +++ b/scripts/list_outdated_dependencies.py @@ -51,7 +51,7 @@ def get_main_deps_pyproject(modified_pyproject: dict[str, Any]) -> list[Requirem def get_optional_deps_pyproject( - modified_pyproject: dict[str, Any] + modified_pyproject: dict[str, Any], ) -> list[Requirement]: """Get a list of the optional dependencies from pyproject.toml""" diff --git a/src/metldata/schemapack_/__init__.py b/src/metldata/schemapack_/__init__.py new file mode 100644 index 00000000..2792d042 --- /dev/null +++ b/src/metldata/schemapack_/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Re-implementation based on schemapack.""" diff --git a/src/metldata/schemapack_/builtin_transformations/__init__.py b/src/metldata/schemapack_/builtin_transformations/__init__.py new file mode 100644 index 00000000..0d36155a --- /dev/null +++ b/src/metldata/schemapack_/builtin_transformations/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Built-in transformations""" diff --git a/src/metldata/schemapack_/builtin_transformations/null/__init__.py b/src/metldata/schemapack_/builtin_transformations/null/__init__.py new file mode 100644 index 00000000..598799bf --- /dev/null +++ b/src/metldata/schemapack_/builtin_transformations/null/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""A Null transformer that returns the input model and data unchanged. Useful e.g. for +testing.""" + +from metldata.schemapack_.builtin_transformations.null.main import ( # noqa: F401 + NULL_TRANSFORMATION, +) diff --git a/src/metldata/schemapack_/builtin_transformations/null/config.py b/src/metldata/schemapack_/builtin_transformations/null/config.py new file mode 100644 index 00000000..80ab6019 --- /dev/null +++ b/src/metldata/schemapack_/builtin_transformations/null/config.py @@ -0,0 +1,23 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Configuration for the transformation.""" + +from pydantic import BaseModel + + +class NullConfig(BaseModel): + """No Parameters required for this transformation.""" diff --git a/src/metldata/schemapack_/builtin_transformations/null/main.py b/src/metldata/schemapack_/builtin_transformations/null/main.py new file mode 100644 index 00000000..d932b5ef --- /dev/null +++ b/src/metldata/schemapack_/builtin_transformations/null/main.py @@ -0,0 +1,61 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Contains the transformation definition.""" + +from schemapack.spec.datapack import DataPack +from schemapack.spec.schemapack import SchemaPack + +from metldata.schemapack_.builtin_transformations.null.config import NullConfig +from metldata.schemapack_.transform.base import ( + DataTransformer, + TransformationDefinition, +) + + +def null_model_assumptions(model: SchemaPack, config: NullConfig): + """No assumptions made.""" + return + + +def null_transform_model(model: SchemaPack, config: NullConfig) -> SchemaPack: + """The model is returned unchanged.""" + return model + + +class NullTransformer(DataTransformer[NullConfig]): + """A Null transformer that returns the input model and data unchanged. Useful e.g. + for testing.""" + + def transform(self, data: DataPack) -> DataPack: + """Transforms data. + + Args: + data: The data as DataPack to be transformed. + + Raises: + DataTransformationError: + if the transformation fails. + """ + return data + + +NULL_TRANSFORMATION = TransformationDefinition( + config_cls=NullConfig, + check_model_assumptions=lambda schemapack, config: None, + transform_model=lambda schemapack, config: schemapack, + data_transformer_factory=NullTransformer, +) diff --git a/src/metldata/schemapack_/transform/__init__.py b/src/metldata/schemapack_/transform/__init__.py new file mode 100644 index 00000000..6b39eff7 --- /dev/null +++ b/src/metldata/schemapack_/transform/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Handles transformations of the source events to generate artifacts.""" diff --git a/src/metldata/schemapack_/transform/base.py b/src/metldata/schemapack_/transform/base.py new file mode 100644 index 00000000..11fe0353 --- /dev/null +++ b/src/metldata/schemapack_/transform/base.py @@ -0,0 +1,242 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Models to describe transformations and workflows.""" + +from abc import ABC, abstractmethod +from collections import defaultdict +from dataclasses import dataclass +from graphlib import CycleError, TopologicalSorter +from typing import Callable, Generic, Optional, TypeVar + +from pydantic import ( + BaseModel, + ConfigDict, + Field, + create_model, + field_validator, + model_validator, +) +from schemapack.spec.datapack import DataPack +from schemapack.spec.schemapack import SchemaPack + + +class ModelAssumptionError(RuntimeError): + """Raised when assumptions made by transformation step about a model are not met.""" + + +class ModelTransformationError(RuntimeError): + """Raised when a transformation failed when applied to the schemapack-based model.""" + + +class DataTransformationError(RuntimeError): + """Raised when a transformation failed when applied to data in datapack-format.""" + + +Config = TypeVar("Config", bound=BaseModel) + + +class DataTransformer(ABC, Generic[Config]): + """A base class for a data transformer.""" + + def __init__( + self, + *, + config: Config, + original_model: SchemaPack, + transformed_model: SchemaPack, + ): + """Initialize the transformer with config params, the original model, and the + transformed model. + """ + self._config = config + self._original_model = original_model + self._transformed_model = transformed_model + + @abstractmethod + def transform(self, data: DataPack) -> DataPack: + """Transforms data. + + Args: + data: The data as DataPack to be transformed. + + Raises: + DataTransformationError: + if the transformation fails. + """ + ... + + +@dataclass(frozen=True) +class TransformationDefinition(Generic[Config]): + """A model for describing a transformation.""" + + config_cls: type[Config] = Field( + ..., description="The config model of the transformation." + ) + check_model_assumptions: Callable[[SchemaPack, Config], None] = Field( + ..., + description=( + "A function that checks the assumptions made about the input model." + " Raises a ModelAssumptionError if the assumptions are not met." + ), + ) + transform_model: Callable[[SchemaPack, Config], SchemaPack] = Field( + ..., + description=( + "A function to transform the model. Raises a" + + " ModelTransformationError if the transformation fails." + ), + ) + data_transformer_factory: type[DataTransformer] = Field( + ..., + description=( + "A class for transforming data. Raises a DataTransformationError" + " if the transformation fails." + ), + ) + + +class WorkflowConfig(BaseModel, ABC): + """A base class for workflow configs.""" + + +class WorkflowStepBase(BaseModel, ABC): + """A base class for workflow steps.""" + + model_config = ConfigDict(frozen=True) + description: str = Field(..., description="A description of the step.") + input: Optional[str] = Field( + ..., + description=( + "The name of the workflow step from which the output is used as input" + " for this step. If this is the first step, set to None." + ), + ) + + +class WorkflowStep(WorkflowStepBase): + """A single step in a transformation workflow.""" + + transformation_definition: TransformationDefinition = Field( + ..., + description="The transformation to be executed in this step.", + ) + + +class WorkflowDefinition(BaseModel): + """A definition of a transformation workflow.""" + + model_config = ConfigDict(frozen=True) + description: str = Field(..., description="A description of the workflow.") + steps: dict[str, WorkflowStep] = Field( + ..., + description=( + "A dictionary of workflow steps. The keys are the names of the steps, and" + + " the values are the workflow steps themselves." + ), + ) + artifacts: dict[str, str] = Field( + ..., + description=( + "A dictionary of artifacts that are output by this workflow." + + " The keys are the names of the artifacts, and the values are the names" + + " of the workflow steps that output them." + ), + ) + + # pylint: disable=no-self-argument + @field_validator("steps", mode="after") + def validate_step_references( + cls, steps: dict[str, WorkflowStep] + ) -> dict[str, WorkflowStep]: + """Validate that workflow steps reference other existing steps as input. + There should be exactly one step with input=None. + """ + step_with_no_input_found = False + + for step_name, step in steps.items(): + if step.input is None: + if step_with_no_input_found: + raise ValueError( + "There should be exactly one step with input=None. But multiple" + + " were found." + ) + step_with_no_input_found = True + continue + if step.input not in steps: + raise ValueError( + f"Step {step.input} referenced in step {step_name} is not defined." + ) + + if not step_with_no_input_found: + raise ValueError( + "There should be exactly one step with input=None but none was found." + ) + + return steps + + @model_validator(mode="after") + def validate_artifact_references(cls, values): + """Validate that artifacts reference existing workflow steps.""" + steps = values.steps + if steps is None: + raise ValueError("Steps are undefined.") + artifacts = values.artifacts + if artifacts is None: + raise ValueError("Artifacts are undefined.") + + for artifact_name, step_name in artifacts.items(): + if step_name not in steps: + raise ValueError( + f"Step {step_name} referenced in artifact {artifact_name} is not defined." + ) + + return values + + @property + def config_cls(self) -> type[WorkflowConfig]: + """Get a config model containing the config requirements from all workflow + steps. + """ + step_configs = { + step_name: (step.transformation_definition.config_cls, ...) + for step_name, step in self.steps.items() + } + + config_cls = create_model( # type: ignore + "SpecificWorkflowConfig", + **step_configs, + __base__=WorkflowConfig, + ) + + return config_cls + + @property + def step_order(self) -> list[str]: + """Get a list of step names in the order in which the steps should be executed.""" + # create graph from steps + graph: dict[str, set[str]] = defaultdict(set[str]) + for step_name, step in self.steps.items(): + if step.input: + graph[step_name].add(step.input) + + # sort with TopologicalSorter + topological_sorter = TopologicalSorter(graph) + try: + return list(topological_sorter.static_order()) + except CycleError as exc: + raise RuntimeError("Step definitions imply a circular dependency.") from exc diff --git a/src/metldata/schemapack_/transform/handling.py b/src/metldata/schemapack_/transform/handling.py new file mode 100644 index 00000000..2d1a5b7b --- /dev/null +++ b/src/metldata/schemapack_/transform/handling.py @@ -0,0 +1,254 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Logic for handling Transformation.""" + +from pydantic import BaseModel, ConfigDict +from schemapack.spec.datapack import DataPack +from schemapack.spec.schemapack import SchemaPack +from schemapack.validation import SchemaPackValidator + +from metldata.schemapack_.transform.base import ( + Config, + TransformationDefinition, + WorkflowConfig, + WorkflowDefinition, + WorkflowStep, + WorkflowStepBase, +) + + +class WorkflowConfigMismatchError(RuntimeError): + """Raised when the provided workflow config does not match the config class of the + workflow definition. + """ + + def __init__( + self, workflow_definition: WorkflowDefinition, workflow_config: BaseModel + ): + """Initialize the error with the workflow definition and the config.""" + message = ( + f"The config {workflow_config} is not an instance of the config class " + f"{workflow_definition.config_cls} of the workflow definition " + f"{workflow_definition}." + ) + super().__init__(message) + + +class TransformationHandler: + """Used for executing transformations described in a TransformationDefinition.""" + + def __init__( + self, + transformation_definition: TransformationDefinition[Config], + transformation_config: Config, + original_model: SchemaPack, + ): + """Initialize the TransformationHandler by checking the assumptions made on the + original model and transforming the model as described in the transformation + definition. The transformed model is available at the `transformed_model` + attribute. + + Raises: + ModelAssumptionError: + if the assumptions made on the original model are not met. + """ + self._definition = transformation_definition + self._config = transformation_config + self._original_model = original_model + + self._definition.check_model_assumptions(self._original_model, self._config) + self.transformed_model = self._definition.transform_model( + self._original_model, self._config + ) + self._data_transformer = self._definition.data_transformer_factory( + config=self._config, + original_model=self._original_model, + transformed_model=self.transformed_model, + ) + + self._original_data_validator = SchemaPackValidator( + schemapack=self._original_model + ) + self._transformed_data_validator = SchemaPackValidator( + schemapack=self.transformed_model + ) + + def transform_data(self, data: DataPack) -> DataPack: + """Transforms data using the transformation definition. Validates the + original data against the original model and the transformed data + against the transformed model. + + Args: + data: The data to be transformed. + + Raises: + schemapack.exceptions.ValidationError: + If validation of input data or transformed data fails against the + original or transformed model, respectively. + DataTransformationError: + if the transformation fails. + """ + self._original_data_validator.validate(datapack=data) + transformed_data = self._data_transformer.transform(data=data) + self._transformed_data_validator.validate(datapack=transformed_data) + + return transformed_data + + +class ResolvedWorkflowStep(WorkflowStepBase): + """A resolved workflow step contains a transformation handler.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + transformation_handler: TransformationHandler + + +class ResolvedWorkflow(WorkflowDefinition): + """A resolved workflow contains a list of resolved workflow steps.""" + + steps: dict[str, ResolvedWorkflowStep] # type: ignore + workflow_config: WorkflowConfig + + +def check_workflow_config( + *, workflow_definition: WorkflowDefinition, workflow_config: WorkflowConfig +): + """Checks if the config is an instance of the config class of the workflow + definition. + + Raises: + WorkflowConfigMismatchError: + """ + if isinstance(workflow_config, workflow_definition.config_cls): + raise WorkflowConfigMismatchError( + workflow_definition=workflow_definition, workflow_config=workflow_config + ) + + +def resolve_workflow_step( + *, + workflow_step: WorkflowStep, + step_name: str, + workflow_definition: WorkflowDefinition, + workflow_config: WorkflowConfig, + original_model: SchemaPack, +) -> ResolvedWorkflowStep: + """Translates a workflow step given a workflow definition and a workflow config + into a resolved workflow step. + """ + check_workflow_config( + workflow_definition=workflow_definition, workflow_config=workflow_config + ) + + transformation_config: BaseModel = getattr(workflow_config, step_name) + transformation_handler = TransformationHandler( + transformation_definition=workflow_step.transformation_definition, + transformation_config=transformation_config, + original_model=original_model, + ) + return ResolvedWorkflowStep( + transformation_handler=transformation_handler, + input=workflow_step.input, + description=workflow_step.description, + ) + + +def resolve_workflow( + workflow_definition: WorkflowDefinition, + original_model: SchemaPack, + workflow_config: WorkflowConfig, +) -> ResolvedWorkflow: + """Translates a workflow definition given an input model and a workflow config into + a resolved workflow. + """ + check_workflow_config( + workflow_definition=workflow_definition, workflow_config=workflow_config + ) + + resolved_steps: dict[str, ResolvedWorkflowStep] = {} + for step_name in workflow_definition.step_order: + workflow_step = workflow_definition.steps[step_name] + input_model = ( + original_model + if workflow_step.input is None + else resolved_steps[ + workflow_step.input + ].transformation_handler.transformed_model + ) + + resolved_steps[step_name] = resolve_workflow_step( + workflow_step=workflow_step, + step_name=step_name, + workflow_definition=workflow_definition, + workflow_config=workflow_config, + original_model=input_model, + ) + + return ResolvedWorkflow( + steps=resolved_steps, + workflow_config=workflow_config, + description=workflow_definition.description, + artifacts=workflow_definition.artifacts, + ) + + +def get_model_artifacts_from_resolved_workflow(resolved_workflow: ResolvedWorkflow): + """Returns a dictionary of models for artifacts produced by resolved workflow.""" + return { + artifact_name: resolved_workflow.steps[ + step_name + ].transformation_handler.transformed_model + for artifact_name, step_name in resolved_workflow.artifacts.items() + } + + +class WorkflowHandler: + """Used for executing workflows described in a WorkflowDefinition.""" + + def __init__( + self, + workflow_definition: WorkflowDefinition, + workflow_config: WorkflowConfig, + original_model: SchemaPack, + ): + """Initialize the WorkflowHandler with a workflow deinition, a matching + config, and a model. The workflow definition is translated into a + resolved workflow. + """ + self._resolved_workflow = resolve_workflow( + workflow_definition=workflow_definition, + original_model=original_model, + workflow_config=workflow_config, + ) + + self.artifact_models = get_model_artifacts_from_resolved_workflow( + self._resolved_workflow + ) + + def run(self, *, data: DataPack) -> dict[str, DataPack]: + """Run the workflow definition on data to generate artifacts.""" + transformed_data: dict[str, DataPack] = {} + for step_name in self._resolved_workflow.step_order: + step = self._resolved_workflow.steps[step_name] + input_data = data if step.input is None else transformed_data[step.input] + transformed_data[step_name] = step.transformation_handler.transform_data( + input_data + ) + + return { + artifact_name: transformed_data[step_name] + for artifact_name, step_name in self._resolved_workflow.artifacts.items() + } diff --git a/tests/schemapack_/__init__.py b/tests/schemapack_/__init__.py new file mode 100644 index 00000000..56d58dec --- /dev/null +++ b/tests/schemapack_/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Testing the re-implementation based on schemapack.""" diff --git a/tests/schemapack_/fixtures/__init__.py b/tests/schemapack_/fixtures/__init__.py new file mode 100644 index 00000000..b8a4adb5 --- /dev/null +++ b/tests/schemapack_/fixtures/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Fixtures that are used in both integration and unit tests""" diff --git a/tests/schemapack_/fixtures/data.py b/tests/schemapack_/fixtures/data.py new file mode 100644 index 00000000..7074529c --- /dev/null +++ b/tests/schemapack_/fixtures/data.py @@ -0,0 +1,34 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Valid and invalid metadata examples using the minimal model.""" + +from schemapack.load import load_datapack +from schemapack.spec.datapack import DataPack + +from tests.schemapack_.fixtures.utils import BASE_DIR + +EXAMPLE_DATA_DIR = BASE_DIR / "example_data" + + +def _get_example_data(name: str) -> DataPack: + """Get example metadata.""" + + return load_datapack(EXAMPLE_DATA_DIR / f"{name}.datapack.yaml") + + +VALID_MINIMAL_DATA = _get_example_data("valid_minimal") +INVALID_MINIMAL_DATA = _get_example_data("invalid_minimal") diff --git a/tests/schemapack_/fixtures/example_data/invalid_minimal.datapack.yaml b/tests/schemapack_/fixtures/example_data/invalid_minimal.datapack.yaml new file mode 100644 index 00000000..14e3a2e7 --- /dev/null +++ b/tests/schemapack_/fixtures/example_data/invalid_minimal.datapack.yaml @@ -0,0 +1,19 @@ +# Misses content property defined in the content schema: +datapack: 0.1.0 +resources: + File: + example_file_a: + content: + alias: example_file_a + filename: example_file_a.fastq + format: FASTQ + checksum: 1a5ac10ab42911dc0224172c118a326d9a4c03969112a2f3eb1ad971e96e92b8 + # missing size property + Dataset: + example_dataset: + content: + alias: example_dataset + dac_contact: dac@example.org + relations: + files: + - example_file_a diff --git a/tests/schemapack_/fixtures/example_data/valid_minimal.datapack.yaml b/tests/schemapack_/fixtures/example_data/valid_minimal.datapack.yaml new file mode 100644 index 00000000..21cca892 --- /dev/null +++ b/tests/schemapack_/fixtures/example_data/valid_minimal.datapack.yaml @@ -0,0 +1,40 @@ +datapack: 0.1.0 +resources: + File: + example_file_a: + content: + alias: example_file_a + filename: example_file_a.fastq + format: FASTQ + checksum: 1a5ac10ab42911dc0224172c118a326d9a4c03969112a2f3eb1ad971e96e92b8 + size: 12321 + example_file_b: + content: + alias: example_file_b + filename: example_file_b.fastq + format: FASTQ + checksum: 2b5ac10ab42911dc0224172c118a326d9a4c03969112a2f3eb1ad971e96e92c9 + size: 12314 + example_file_c: + content: + alias: example_file_c + filename: example_file_c.fastq + format: FASTQ + checksum: a9c24870071da03f78515e6197048f3a2172e90e597e9250cd01a0cb8f0986ed + size: 12123 + Dataset: + example_dataset_1: + content: + alias: example_dataset_1 + dac_contact: dac@example.org + relations: + files: + - example_file_a + - example_file_b + example_dataset_2: + content: + alias: example_dataset_2 + dac_contact: dac@example.org + relations: + files: + - example_file_c diff --git a/tests/schemapack_/fixtures/example_models/minimal.schemapack.yaml b/tests/schemapack_/fixtures/example_models/minimal.schemapack.yaml new file mode 100644 index 00000000..d79663d6 --- /dev/null +++ b/tests/schemapack_/fixtures/example_models/minimal.schemapack.yaml @@ -0,0 +1,49 @@ +# a simple schemapack with the content schemas being embedded +schemapack: 0.1.0 +classes: + File: + id: + from_content: alias + content: + "$schema": "http://json-schema.org/draft-07/schema#" + additionalProperties: false + description: A file is an object that contains information generated from a process, + either an Experiment or an Analysis. + properties: + alias: + type: string + checksum: + type: string + filename: + type: string + format: + type: string + size: + type: integer + required: + - alias + - filename + - format + - checksum + - size + type: object + Dataset: + id: + from_content: alias + content: + "$schema": "http://json-schema.org/draft-07/schema#" + additionalProperties: false + description: A dataset that is a collection of files. + properties: + alias: + type: string + dac_contact: + type: string + required: + - alias + type: object + + relations: + files: + to: File + cardinality: many_to_many diff --git a/tests/schemapack_/fixtures/models.py b/tests/schemapack_/fixtures/models.py new file mode 100644 index 00000000..dd9b514d --- /dev/null +++ b/tests/schemapack_/fixtures/models.py @@ -0,0 +1,35 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Example models.""" + +from schemapack.load import load_schemapack +from schemapack.spec.schemapack import SchemaPack + +from tests.schemapack_.fixtures.utils import BASE_DIR + +EXAMPLE_MODEL_DIR = BASE_DIR / "example_models" +VALID_MINIMAL_MODEL_EXAMPLE_PATH = EXAMPLE_MODEL_DIR / "minimal_model.yaml" + + +def _get_example_model(name: str) -> SchemaPack: + """Get example model.""" + + return load_schemapack(EXAMPLE_MODEL_DIR / f"{name}.schemapack.yaml") + + +VALID_MINIMAL_MODEL = _get_example_model("minimal") +VALID_MODELS = [VALID_MINIMAL_MODEL] diff --git a/tests/schemapack_/fixtures/utils.py b/tests/schemapack_/fixtures/utils.py new file mode 100644 index 00000000..3bbd4d0f --- /dev/null +++ b/tests/schemapack_/fixtures/utils.py @@ -0,0 +1,30 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for Fixture handling""" + +from pathlib import Path +from typing import Any + +import yaml + +BASE_DIR = Path(__file__).parent.resolve() + + +def read_yaml(path: Path) -> dict[str, Any]: + """Read a YAML file.""" + + with open(path, encoding="utf-8") as file: + return yaml.safe_load(file) diff --git a/tests/schemapack_/transform/__init__.py b/tests/schemapack_/transform/__init__.py new file mode 100644 index 00000000..46489375 --- /dev/null +++ b/tests/schemapack_/transform/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Test the transform sub-package.""" diff --git a/tests/schemapack_/transform/test_base.py b/tests/schemapack_/transform/test_base.py new file mode 100644 index 00000000..523bc82a --- /dev/null +++ b/tests/schemapack_/transform/test_base.py @@ -0,0 +1,221 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Test the base module.""" + + +import pytest +from pydantic import ValidationError + +from metldata.schemapack_.builtin_transformations.null import NULL_TRANSFORMATION +from metldata.schemapack_.transform.base import ( + WorkflowDefinition, + WorkflowStep, +) + + +def test_workflow_definition_invalid_step_refs(): + """Test that an invalid step reference raises an error.""" + with pytest.raises(ValidationError): + WorkflowDefinition( + description="A workflow for testing.", + steps={ + "step1": WorkflowStep( + description="A dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input=None, + ), + "step2": WorkflowStep( + description="Another dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input="non_existing_step", + ), + }, + artifacts={ + "step1_output": "step1", + "step2_output": "step2", + }, + ) + + +def test_workflow_definition_invalid_multiple_first_steps(): + """Test that specifying multiple steps without input raises an exception.""" + with pytest.raises(ValidationError): + WorkflowDefinition( + description="A workflow for testing.", + steps={ + "step1": WorkflowStep( + description="A dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input=None, + ), + "step2": WorkflowStep( + description="Another dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input=None, + ), + }, + artifacts={ + "step1_output": "step1", + "step2_output": "step2", + }, + ) + + +def test_workflow_definition_invalid_artifacts(): + """Test that artifacts referencing non-existing steps raise an exception.""" + with pytest.raises(ValidationError): + WorkflowDefinition( + description="A workflow for testing.", + steps={ + "step1": WorkflowStep( + description="A dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input=None, + ), + "step2": WorkflowStep( + description="Another dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input="step1", + ), + }, + artifacts={ + "step1_output": "non_existing_step", + "step2_output": "step2", + }, + ) + + +def test_workflow_definition_step_order_happy(): + """Test that the step order is correctly inferred from the workflow definition.""" + + workflow_definition = WorkflowDefinition( + description="A workflow for testing.", + steps={ + "step3": WorkflowStep( + description="A test step.", + transformation_definition=NULL_TRANSFORMATION, + input="step2", + ), + "step2": WorkflowStep( + description="A test step.", + transformation_definition=NULL_TRANSFORMATION, + input="step1", + ), + "step1": WorkflowStep( + description="A test step.", + transformation_definition=NULL_TRANSFORMATION, + input=None, + ), + "step4": WorkflowStep( + description="A test step.", + transformation_definition=NULL_TRANSFORMATION, + input="step2", + ), + }, + artifacts={ + "output3": "step3", + "output4": "step4", + }, + ) + + assert workflow_definition.step_order in ( + [ + "step1", + "step2", + "step3", + "step4", + ], + [ + "step1", + "step2", + "step4", + "step3", + ], + ) + + +def test_workflow_definition_step_order_circular(): + """Test that initialization of a WorkflowDefinition with a circularly dependent + steps fails.""" + + workflow_definition = WorkflowDefinition( + description="A workflow for testing.", + steps={ + "step1": WorkflowStep( + description="A test step.", + transformation_definition=NULL_TRANSFORMATION, + input=None, + ), + "step2": WorkflowStep( + description="A test step.", + transformation_definition=NULL_TRANSFORMATION, + input="step4", + ), + "step3": WorkflowStep( + description="A test step.", + transformation_definition=NULL_TRANSFORMATION, + input="step2", + ), + "step4": WorkflowStep( + description="A test step.", + transformation_definition=NULL_TRANSFORMATION, + input="step3", + ), + }, + artifacts={ + "output3": "step3", + "output4": "step4", + }, + ) + + with pytest.raises(RuntimeError): + _ = workflow_definition.step_order + + +def test_workflow_definition_config_cls(): + """Test that the config_cls of the WorkflowDefinition generates a concatenated + config class correctly.""" + + null_workflow = WorkflowDefinition( + description="A workflow for testing.", + steps={ + "step1": WorkflowStep( + description="A dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input=None, + ), + "step2": WorkflowStep( + description="Another dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input="step1", + ), + }, + artifacts={ + "step1_output": "step1", + "step2_output": "step2", + }, + ) + + config_fields = null_workflow.config_cls.model_fields + + assert "step1" in config_fields + assert "step2" in config_fields + assert ( + config_fields["step1"].annotation + == config_fields["step2"].annotation + == NULL_TRANSFORMATION.config_cls + ) diff --git a/tests/schemapack_/transform/test_handling.py b/tests/schemapack_/transform/test_handling.py new file mode 100644 index 00000000..e4248df2 --- /dev/null +++ b/tests/schemapack_/transform/test_handling.py @@ -0,0 +1,189 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Test the handling module. Only edge cases that are not covered by tests +with builtin transformations are tested here.""" + +import pytest +import schemapack.exceptions +from schemapack.spec.datapack import DataPack +from schemapack.spec.schemapack import SchemaPack + +from metldata.schemapack_.builtin_transformations.null import NULL_TRANSFORMATION +from metldata.schemapack_.builtin_transformations.null.config import NullConfig +from metldata.schemapack_.transform.base import ( + DataTransformer, + ModelAssumptionError, + ModelTransformationError, + TransformationDefinition, + WorkflowDefinition, + WorkflowStep, +) +from metldata.schemapack_.transform.handling import ( + TransformationHandler, + WorkflowHandler, +) +from tests.schemapack_.fixtures.data import INVALID_MINIMAL_DATA, VALID_MINIMAL_DATA +from tests.schemapack_.fixtures.models import VALID_MINIMAL_MODEL + + +def test_transformation_handler_happy(): + """Test the happy path of using a TransformationHandler.""" + + transformation_handler = TransformationHandler( + transformation_definition=NULL_TRANSFORMATION, + transformation_config=NullConfig(), + original_model=VALID_MINIMAL_MODEL, + ) + + # Since the null transformation was used, compare with the input: + assert transformation_handler.transformed_model == VALID_MINIMAL_MODEL + + transformed_data = transformation_handler.transform_data(VALID_MINIMAL_DATA) + + # Since the null transformation was used, compare with the input: + assert transformed_data == VALID_MINIMAL_DATA + + +def test_transformation_handler_assumption_error(): + """Test using the TransformationHandling when model assumptions are not met.""" + + # make transformation definition always raise an MetadataModelAssumptionError: + def always_failing_assumptions(model: SchemaPack, config: NullConfig): + """A function that always raises a MetadataModelAssumptionError.""" + raise ModelAssumptionError + + transformation = TransformationDefinition[NullConfig]( + config_cls=NULL_TRANSFORMATION.config_cls, + check_model_assumptions=always_failing_assumptions, + transform_model=NULL_TRANSFORMATION.transform_model, + data_transformer_factory=NULL_TRANSFORMATION.data_transformer_factory, + ) + + with pytest.raises(ModelAssumptionError): + _ = TransformationHandler( + transformation_definition=transformation, + transformation_config=NullConfig(), + original_model=VALID_MINIMAL_MODEL, + ) + + +def test_transformation_handler_model_transformation_error(): + """Test using the TransformationHandling when model transformation fails.""" + + # make transformation definition always raise an ModelAssumptionError: + def always_failing_transformation(original_model: SchemaPack, config: NullConfig): + """A function that always raises a ModelTransformationError.""" + raise ModelTransformationError + + transformation = TransformationDefinition[NullConfig]( + config_cls=NULL_TRANSFORMATION.config_cls, + check_model_assumptions=NULL_TRANSFORMATION.check_model_assumptions, + transform_model=always_failing_transformation, + data_transformer_factory=NULL_TRANSFORMATION.data_transformer_factory, + ) + with pytest.raises(ModelTransformationError): + _ = TransformationHandler( + transformation_definition=transformation, + transformation_config=NullConfig(), + original_model=VALID_MINIMAL_MODEL, + ) + + +def test_transformation_handler_input_data_invalid(): + """Test the TransformationHandler when used with input data that is not valid + against the model.""" + + transformation_handler = TransformationHandler( + transformation_definition=NULL_TRANSFORMATION, + transformation_config=NullConfig(), + original_model=VALID_MINIMAL_MODEL, + ) + + with pytest.raises(schemapack.exceptions.ValidationError): + _ = transformation_handler.transform_data(INVALID_MINIMAL_DATA) + + +def test_transformation_handler_transformed_data_invalid(): + """Test the TransformationHandler when the transformed data fails validation + against the transformed model.""" + + class AlwaysInvalidTransformer(DataTransformer[NullConfig]): + """A transformer that always returns the same invalid data.""" + + def transform(self, data: DataPack) -> DataPack: + """Transforms data. + + Args: + data: The data as DataPack to be transformed. + + Raises: + DataTransformationError: + if the transformation fails. + """ + return INVALID_MINIMAL_DATA + + transformation = TransformationDefinition[NullConfig]( + config_cls=NULL_TRANSFORMATION.config_cls, + check_model_assumptions=NULL_TRANSFORMATION.check_model_assumptions, + transform_model=NULL_TRANSFORMATION.transform_model, + data_transformer_factory=AlwaysInvalidTransformer, + ) + + transformation_handler = TransformationHandler( + transformation_definition=transformation, + transformation_config=NullConfig(), + original_model=VALID_MINIMAL_MODEL, + ) + + with pytest.raises(schemapack.exceptions.ValidationError): + _ = transformation_handler.transform_data(VALID_MINIMAL_DATA) + + +def test_workflow_handler_happy(): + """Test the happy path of using a WorkflowHandler.""" + null_workflow = WorkflowDefinition( + description="A workflow for testing.", + steps={ + "step1": WorkflowStep( + description="A dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input=None, + ), + "step2": WorkflowStep( + description="Another dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input="step1", + ), + }, + artifacts={ + "step1_output": "step1", + "step2_output": "step2", + }, + ) + + workflow_handler = WorkflowHandler( + workflow_definition=null_workflow, + workflow_config=null_workflow.config_cls.model_validate( + {"step1": {}, "step2": {}} + ), + original_model=VALID_MINIMAL_MODEL, + ) + + artifacts = workflow_handler.run(data=VALID_MINIMAL_DATA) + + # Since a null workflow was used, compare to the input: + assert artifacts["step1_output"] == artifacts["step2_output"] == VALID_MINIMAL_DATA diff --git a/tests/submission_registry/test_models.py b/tests/submission_registry/test_models.py index 0b72badd..12e7a6cc 100644 --- a/tests/submission_registry/test_models.py +++ b/tests/submission_registry/test_models.py @@ -38,7 +38,8 @@ def test_submission_current_status(): id="testsubmission001", status_history=( StatusChange( - timestamp=now_as_utc(), new_status=SubmissionStatus.IN_REVIEW # second + timestamp=now_as_utc(), + new_status=SubmissionStatus.IN_REVIEW, # second ), StatusChange( timestamp=now_as_utc() + timedelta(days=10), # third diff --git a/tests/transform/test_base.py b/tests/transform/test_base.py index a3612c96..93175919 100644 --- a/tests/transform/test_base.py +++ b/tests/transform/test_base.py @@ -168,7 +168,8 @@ def test_workflow_definition_step_order_happy(): def test_workflow_definition_step_order_circular(): - """Test that the step order is correctly inferred from the workflow definition.""" + """Test that initialization of a WorkflowDefinition with a circularly dependent + steps fails.""" workflow_definition = WorkflowDefinition( description="A workflow for testing.",