From 6e8e372469956f601d10dbccf8b6d9219223053f Mon Sep 17 00:00:00 2001 From: Pauline Ribeyre <4224001+paulineribeyre@users.noreply.github.com> Date: Tue, 12 Mar 2024 10:05:49 -0500 Subject: [PATCH 01/14] Split build-push workflows (#2499) --- .github/workflows/build_awshelper.yaml | 21 +++++++++++++ .github/workflows/build_python3.10.yaml | 23 ++++++++++++++ .github/workflows/build_python3.9.yaml | 23 ++++++++++++++ .github/workflows/image_build_push.yaml | 42 ------------------------- 4 files changed, 67 insertions(+), 42 deletions(-) create mode 100644 .github/workflows/build_awshelper.yaml create mode 100644 .github/workflows/build_python3.10.yaml create mode 100644 .github/workflows/build_python3.9.yaml delete mode 100644 .github/workflows/image_build_push.yaml diff --git a/.github/workflows/build_awshelper.yaml b/.github/workflows/build_awshelper.yaml new file mode 100644 index 0000000000..f42a136109 --- /dev/null +++ b/.github/workflows/build_awshelper.yaml @@ -0,0 +1,21 @@ +name: Build awshelper image + +on: + push: + paths: + - .github/workflows/build_awshelper.yaml + - Docker/awshelper/** + +jobs: + awshelper: + name: Build and push + uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master + with: + DOCKERFILE_LOCATION: "./Docker/awshelper/Dockerfile" + OVERRIDE_REPO_NAME: "awshelper" + secrets: + ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }} + ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }} + QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }} + QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + diff --git a/.github/workflows/build_python3.10.yaml b/.github/workflows/build_python3.10.yaml new file mode 100644 index 0000000000..993da14680 --- /dev/null +++ b/.github/workflows/build_python3.10.yaml @@ -0,0 +1,23 @@ +name: Build Python 3.10 image + +on: + push: + paths: + - .github/workflows/build_python3.10.yaml + - Docker/python-nginx/python3.10-buster/** + +jobs: + python_3-10: + name: Build and push + uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master + with: + DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.10-buster/Dockerfile" + DOCKERFILE_BUILD_CONTEXT: "./Docker/python-nginx/python3.10-buster" + OVERRIDE_REPO_NAME: "python" + OVERRIDE_TAG_NAME: "python3.10-buster-$(echo ${GITHUB_REF#refs/*/} | tr / _)" + secrets: + ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }} + ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }} + QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }} + QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + diff --git a/.github/workflows/build_python3.9.yaml b/.github/workflows/build_python3.9.yaml new file mode 100644 index 0000000000..5bc8bc4629 --- /dev/null +++ b/.github/workflows/build_python3.9.yaml @@ -0,0 +1,23 @@ +name: Build Python 3.9 image + +on: + push: + paths: + - .github/workflows/build_python3.9.yaml + - Docker/python-nginx/python3.9-buster/** + +jobs: + python_3-9: + name: Build and push + uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master + with: + DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.9-buster/Dockerfile" + DOCKERFILE_BUILD_CONTEXT: "./Docker/python-nginx/python3.9-buster" + OVERRIDE_REPO_NAME: "python" + OVERRIDE_TAG_NAME: "python3.9-buster-$(echo ${GITHUB_REF#refs/*/} | tr / _)" + secrets: + ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }} + ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }} + QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }} + QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + diff --git a/.github/workflows/image_build_push.yaml b/.github/workflows/image_build_push.yaml deleted file mode 100644 index d5bfea351d..0000000000 --- a/.github/workflows/image_build_push.yaml +++ /dev/null @@ -1,42 +0,0 @@ -name: Build Python Base Images - -on: push - -jobs: - python_3-9: - name: Python 3.9 - uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master - with: - DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.9-buster/Dockerfile" - DOCKERFILE_BUILD_CONTEXT: "./Docker/python-nginx/python3.9-buster" - OVERRIDE_REPO_NAME: "python" - OVERRIDE_TAG_NAME: "python3.9-buster-$(echo ${GITHUB_REF#refs/*/} | tr / _)" - secrets: - ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }} - ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }} - QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }} - QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} - python_3-10: - name: Python 3.10 - uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master - with: - DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.10-buster/Dockerfile" - DOCKERFILE_BUILD_CONTEXT: "./Docker/python-nginx/python3.10-buster" - OVERRIDE_REPO_NAME: "python" - OVERRIDE_TAG_NAME: "python3.10-buster-$(echo ${GITHUB_REF#refs/*/} | tr / _)" - secrets: - ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }} - ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }} - QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }} - QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} - awshelper: - name: AwsHelper - uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master - with: - DOCKERFILE_LOCATION: "./Docker/awshelper/Dockerfile" - OVERRIDE_REPO_NAME: "awshelper" - secrets: - ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }} - ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }} - QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }} - QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} From b2195b1ae6f9d802d346629d126e9d3ba408eccd Mon Sep 17 00:00:00 2001 From: Pauline <4224001+paulineribeyre@users.noreply.github.com> Date: Tue, 12 Mar 2024 10:07:34 -0500 Subject: [PATCH 02/14] fix wf naming --- .github/workflows/build_awshelper.yaml | 2 +- .github/workflows/build_python3.10.yaml | 2 +- .github/workflows/build_python3.9.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_awshelper.yaml b/.github/workflows/build_awshelper.yaml index f42a136109..3d2da5393e 100644 --- a/.github/workflows/build_awshelper.yaml +++ b/.github/workflows/build_awshelper.yaml @@ -8,7 +8,7 @@ on: jobs: awshelper: - name: Build and push + name: awshelper uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master with: DOCKERFILE_LOCATION: "./Docker/awshelper/Dockerfile" diff --git a/.github/workflows/build_python3.10.yaml b/.github/workflows/build_python3.10.yaml index 993da14680..80d2d76232 100644 --- a/.github/workflows/build_python3.10.yaml +++ b/.github/workflows/build_python3.10.yaml @@ -8,7 +8,7 @@ on: jobs: python_3-10: - name: Build and push + name: Python 3.10 uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master with: DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.10-buster/Dockerfile" diff --git a/.github/workflows/build_python3.9.yaml b/.github/workflows/build_python3.9.yaml index 5bc8bc4629..540e0d4eca 100644 --- a/.github/workflows/build_python3.9.yaml +++ b/.github/workflows/build_python3.9.yaml @@ -8,7 +8,7 @@ on: jobs: python_3-9: - name: Build and push + name: Python 3.9 uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master with: DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.9-buster/Dockerfile" From b6031e029db84ab0190d2a263c16b418b113482d Mon Sep 17 00:00:00 2001 From: Pauline Ribeyre <4224001+paulineribeyre@users.noreply.github.com> Date: Tue, 12 Mar 2024 12:03:41 -0500 Subject: [PATCH 03/14] Build awshelper workflow: always build (#2501) --- .github/workflows/build_awshelper.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_awshelper.yaml b/.github/workflows/build_awshelper.yaml index 3d2da5393e..36b5745dbd 100644 --- a/.github/workflows/build_awshelper.yaml +++ b/.github/workflows/build_awshelper.yaml @@ -1,10 +1,8 @@ name: Build awshelper image -on: - push: - paths: - - .github/workflows/build_awshelper.yaml - - Docker/awshelper/** +# Always build this image because it contains all the cloud-automation files. +# Some jobs depend on arbitrary files and we need to test them with updated awshelper images. +on: push jobs: awshelper: From 24492c2d6868ce49a474617544b575a38697d0af Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Tue, 12 Mar 2024 16:23:59 -0400 Subject: [PATCH 04/14] Adding a gen3 db namespace to the temp files so they don't overlap (#2502) --- gen3/bin/kube-setup-revproxy.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gen3/bin/kube-setup-revproxy.sh b/gen3/bin/kube-setup-revproxy.sh index 5db9850a18..fd30b478b3 100644 --- a/gen3/bin/kube-setup-revproxy.sh +++ b/gen3/bin/kube-setup-revproxy.sh @@ -114,8 +114,8 @@ done if g3k_manifest_lookup .argo.argo_server_service_url 2> /dev/null; then argo_server_service_url=$(g3k_manifest_lookup .argo.argo_server_service_url) - g3k_kv_filter "${scriptDir}/gen3.nginx.conf/argo-server.conf" SERVICE_URL "${argo_server_service_url}" > /tmp/argo-server-with-url.conf - filePath="/tmp/argo-server-with-url.conf" + g3k_kv_filter "${scriptDir}/gen3.nginx.conf/argo-server.conf" SERVICE_URL "${argo_server_service_url}" > /tmp/argo-server-with-url$(gen3 db namespace).conf + filePath="/tmp/argo-server-with-url$(gen3 db namespace).conf" if [[ -f "$filePath" ]]; then confFileList+=("--from-file" "$filePath") fi From e979669cd92cf1ecad69f2bc2837a8fed35e2926 Mon Sep 17 00:00:00 2001 From: Mingfei Shao <2475897+mfshao@users.noreply.github.com> Date: Wed, 13 Mar 2024 12:08:30 -0500 Subject: [PATCH 05/14] HP-1310 feat: updated related studies logic (#2498) * feat: updated related studies logic * update --- .../healdata/heal-cedar-data-ingest.py | 174 +++++++++++------- 1 file changed, 107 insertions(+), 67 deletions(-) diff --git a/files/scripts/healdata/heal-cedar-data-ingest.py b/files/scripts/healdata/heal-cedar-data-ingest.py index c54f9d5aa0..e0c4b3c463 100644 --- a/files/scripts/healdata/heal-cedar-data-ingest.py +++ b/files/scripts/healdata/heal-cedar-data-ingest.py @@ -13,7 +13,7 @@ "study_metadata.study_type.study_subject_type": "Subject Type", "study_metadata.human_subject_applicability.gender_applicability": "Gender", "study_metadata.human_subject_applicability.age_applicability": "Age", - "research_program": "Research Program" + "research_program": "Research Program", } # Defines how to handle special cases for values in filters @@ -33,7 +33,7 @@ "Gender Queer": "Genderqueer/gender nonconforming/neither exclusively male nor female", "Intersex": "Genderqueer/gender nonconforming/neither exclusively male nor female", "Intersexed": "Genderqueer/gender nonconforming/neither exclusively male nor female", - "Buisness Development": "Business Development" + "Buisness Development": "Business Development", } # Defines field that we don't want to include in the filters @@ -54,24 +54,25 @@ def is_valid_uuid(uuid_to_test, version=4): """ Check if uuid_to_test is a valid UUID. - + Parameters ---------- uuid_to_test : str version : {1, 2, 3, 4} - + Returns ------- `True` if uuid_to_test is a valid UUID, otherwise `False`. - + """ - + try: uuid_obj = UUID(uuid_to_test, version=version) except ValueError: return False return str(uuid_obj) == uuid_to_test + def update_filter_metadata(metadata_to_update): filter_metadata = [] for metadata_field_key, filter_field_key in FILTER_FIELD_MAPPINGS.items(): @@ -83,20 +84,21 @@ def update_filter_metadata(metadata_to_update): print(filter_field_values) raise TypeError("Neither a string nor a list") for filter_field_value in filter_field_values: - if (metadata_field_key, filter_field_value) in OMITTED_VALUES_MAPPING.items(): + if ( + metadata_field_key, + filter_field_value, + ) in OMITTED_VALUES_MAPPING.items(): continue if filter_field_value in SPECIAL_VALUE_MAPPINGS: filter_field_value = SPECIAL_VALUE_MAPPINGS[filter_field_value] - filter_metadata.append({"key": filter_field_key, "value": filter_field_value}) + filter_metadata.append( + {"key": filter_field_key, "value": filter_field_value} + ) filter_metadata = pydash.uniq(filter_metadata) metadata_to_update["advSearchFilters"] = filter_metadata # Retain these from existing tags save_tags = ["Data Repository"] - tags = [ - tag - for tag in metadata_to_update["tags"] - if tag["category"] in save_tags - ] + tags = [tag for tag in metadata_to_update["tags"] if tag["category"] in save_tags] # Add any new tags from advSearchFilters for f in metadata_to_update["advSearchFilters"]: if f["key"] == "Gender": @@ -111,25 +113,30 @@ def update_filter_metadata(metadata_to_update): def get_client_token(client_id: str, client_secret: str): try: token_url = f"http://revproxy-service/user/oauth2/token" - headers = {'Content-Type': 'application/x-www-form-urlencoded'} - params = {'grant_type': 'client_credentials'} - data = 'scope=openid user data' + headers = {"Content-Type": "application/x-www-form-urlencoded"} + params = {"grant_type": "client_credentials"} + data = "scope=openid user data" token_result = requests.post( - token_url, params=params, headers=headers, data=data, + token_url, + params=params, + headers=headers, + data=data, auth=(client_id, client_secret), ) - token = token_result.json()["access_token"] + token = token_result.json()["access_token"] except: raise Exception("Could not get token") return token -def get_related_studies(serial_num, hostname): +def get_related_studies(serial_num, guid, hostname): related_study_result = [] if serial_num: - mds = requests.get(f"http://revproxy-service/mds/metadata?nih_reporter.project_num_split.serial_num={serial_num}&data=true&limit=2000") + mds = requests.get( + f"http://revproxy-service/mds/metadata?nih_reporter.project_num_split.serial_num={serial_num}&data=true&limit=2000" + ) if mds.status_code == 200: related_study_metadata = mds.json() @@ -137,15 +144,22 @@ def get_related_studies(serial_num, hostname): related_study_metadata_key, related_study_metadata_value, ) in related_study_metadata.items(): + if related_study_metadata_key == guid or ( + related_study_metadata_value["_guid_type"] != "discovery_metadata" + and related_study_metadata_value["_guid_type"] + != "unregistered_discovery_metadata" + ): + # do nothing for self, or for archived studies + continue title = ( - related_study_metadata_value.get( - "gen3_discovery", {} - ) + related_study_metadata_value.get("gen3_discovery", {}) .get("study_metadata", {}) .get("minimal_info", {}) .get("study_name", "") ) - link = f"https://{hostname}/portal/discovery/{related_study_metadata_key}/" + link = ( + f"https://{hostname}/portal/discovery/{related_study_metadata_key}/" + ) related_study_result.append({"title": title, "link": link}) return related_study_result @@ -180,7 +194,7 @@ def get_related_studies(serial_num, hostname): print("Getting CEDAR client access token") access_token = get_client_token(client_id, client_secret) -token_header = {"Authorization": 'bearer ' + access_token} +token_header = {"Authorization": "bearer " + access_token} limit = 10 offset = 0 @@ -192,16 +206,21 @@ def get_related_studies(serial_num, hostname): print("Directory ID is not in UUID format!") sys.exit(1) -while((limit + offset <= total)): +while limit + offset <= total: # Get the metadata from cedar to register print("Querying CEDAR...") - cedar = requests.get(f"http://revproxy-service/cedar/get-instance-by-directory/{dir_id}?limit={limit}&offset={offset}", headers=token_header) + cedar = requests.get( + f"http://revproxy-service/cedar/get-instance-by-directory/{dir_id}?limit={limit}&offset={offset}", + headers=token_header, + ) # If we get metadata back now register with MDS if cedar.status_code == 200: metadata_return = cedar.json() if "metadata" not in metadata_return: - print("Got 200 from CEDAR wrapper but no metadata in body, something is not right!") + print( + "Got 200 from CEDAR wrapper but no metadata in body, something is not right!" + ) sys.exit(1) total = metadata_return["metadata"]["totalCount"] @@ -209,13 +228,17 @@ def get_related_studies(serial_num, hostname): print(f"Successfully got {returned_records} record(s) from CEDAR directory") for cedar_record in metadata_return["metadata"]["records"]: # get the appl id from cedar for querying in our MDS - cedar_appl_id = pydash.get(cedar_record, "metadata_location.nih_application_id") + cedar_appl_id = pydash.get( + cedar_record, "metadata_location.nih_application_id" + ) if cedar_appl_id is None: print("This record doesn't have appl_id, skipping...") continue # Get the metadata record for the nih_application_id - mds = requests.get(f"http://revproxy-service/mds/metadata?gen3_discovery.study_metadata.metadata_location.nih_application_id={cedar_appl_id}&data=true") + mds = requests.get( + f"http://revproxy-service/mds/metadata?gen3_discovery.study_metadata.metadata_location.nih_application_id={cedar_appl_id}&data=true" + ) if mds.status_code == 200: mds_res = mds.json() @@ -234,9 +257,13 @@ def get_related_studies(serial_num, hostname): if mds_res["_guid_type"] == "discovery_metadata": print("Metadata is already registered. Updating MDS record") elif mds_res["_guid_type"] == "unregistered_discovery_metadata": - print("Metadata has not been registered. Registering it in MDS record") + print( + "Metadata has not been registered. Registering it in MDS record" + ) else: - print(f"This metadata data record has a special GUID type \"{mds_res['_guid_type']}\" and will be skipped") + print( + f"This metadata data record has a special GUID type \"{mds_res['_guid_type']}\" and will be skipped" + ) continue if "clinicaltrials_gov" in cedar_record: @@ -244,21 +271,27 @@ def get_related_studies(serial_num, hostname): del cedar_record["clinicaltrials_gov"] # some special handing for this field, because its parent will be deleted before we merging the CEDAR and MDS SLMD to avoid duplicated values - cedar_record_other_study_websites = cedar_record.get("metadata_location", {}).get("other_study_websites", []) + cedar_record_other_study_websites = cedar_record.get( + "metadata_location", {} + ).get("other_study_websites", []) del cedar_record["metadata_location"] mds_res["gen3_discovery"]["study_metadata"].update(cedar_record) - mds_res["gen3_discovery"]["study_metadata"]["metadata_location"]["other_study_websites"] = cedar_record_other_study_websites + mds_res["gen3_discovery"]["study_metadata"]["metadata_location"][ + "other_study_websites" + ] = cedar_record_other_study_websites # setup citations - doi_citation = mds_res["gen3_discovery"]["study_metadata"].get("doi_citation", "") - mds_res["gen3_discovery"]["study_metadata"]["citation"]["heal_platform_citation"] = doi_citation - + doi_citation = mds_res["gen3_discovery"]["study_metadata"].get( + "doi_citation", "" + ) + mds_res["gen3_discovery"]["study_metadata"]["citation"][ + "heal_platform_citation" + ] = doi_citation # setup repository_study_link data_repositories = ( - mds_res - .get("gen3_discovery", {}) + mds_res.get("gen3_discovery", {}) .get("study_metadata", {}) .get("metadata_location", {}) .get("data_repositories", []) @@ -275,8 +308,13 @@ def get_related_studies(serial_num, hostname): repository_study_link = REPOSITORY_STUDY_ID_LINK_TEMPLATE[ repository["repository_name"] ].replace("", repository["repository_study_ID"]) - repository.update({"repository_study_link": repository_study_link}) - if repository_citation_additional_text not in repository_citation: + repository.update( + {"repository_study_link": repository_study_link} + ) + if ( + repository_citation_additional_text + not in repository_citation + ): repository_citation += repository_citation_additional_text if len(data_repositories): data_repositories[0] = { @@ -284,36 +322,28 @@ def get_related_studies(serial_num, hostname): "repository_citation": repository_citation, } - mds_res["gen3_discovery"]["study_metadata"][ - "metadata_location" - ]["data_repositories"] = copy.deepcopy(data_repositories) - - + mds_res["gen3_discovery"]["study_metadata"]["metadata_location"][ + "data_repositories" + ] = copy.deepcopy(data_repositories) # set up related studies serial_num = None try: serial_num = ( - mds_res - .get("nih_reporter", {}) + mds_res.get("nih_reporter", {}) .get("project_num_split", {}) .get("serial_num", None) ) except Exception: - print(f"Unable to get serial number for study") - - if serial_num == None: - print(f"Unable to get serial number for study") + print("Unable to get serial number for study") - related_study_result = get_related_studies(serial_num, hostname) - existing_related_study_result = mds_res.get("related_studies", []) - for related_study in related_study_result: - if related_study not in existing_related_study_result: - existing_related_study_result.append(copy.deepcopy(related_study)) - mds_res["gen3_discovery"][ - "related_studies" - ] = copy.deepcopy(existing_related_study_result) + if serial_num is None: + print("Unable to get serial number for study") + related_study_result = get_related_studies( + serial_num, mds_record_guid, hostname + ) + mds_res["gen3_discovery"]["related_studies"] = copy.deepcopy(related_study_result) # merge data from cedar that is not study level metadata into a level higher deleted_keys = [] @@ -324,29 +354,39 @@ def get_related_studies(serial_num, hostname): for key in deleted_keys: del mds_res["gen3_discovery"]["study_metadata"][key] - mds_discovery_data_body = update_filter_metadata(mds_res["gen3_discovery"]) + mds_discovery_data_body = update_filter_metadata( + mds_res["gen3_discovery"] + ) mds_cedar_register_data_body["gen3_discovery"] = mds_discovery_data_body if mds_clinical_trials: - mds_cedar_register_data_body["clinicaltrials_gov"] = {**mds_cedar_register_data_body.get("clinicaltrials_gov", {}), **mds_clinical_trials} + mds_cedar_register_data_body["clinicaltrials_gov"] = { + **mds_cedar_register_data_body.get("clinicaltrials_gov", {}), + **mds_clinical_trials, + } mds_cedar_register_data_body["_guid_type"] = "discovery_metadata" print(f"Metadata {mds_record_guid} is now being registered.") - mds_put = requests.put(f"http://revproxy-service/mds/metadata/{mds_record_guid}", + mds_put = requests.put( + f"http://revproxy-service/mds/metadata/{mds_record_guid}", headers=token_header, - json = mds_cedar_register_data_body + json=mds_cedar_register_data_body, ) if mds_put.status_code == 200: print(f"Successfully registered: {mds_record_guid}") else: - print(f"Failed to register: {mds_record_guid}. Might not be MDS admin") + print( + f"Failed to register: {mds_record_guid}. Might not be MDS admin" + ) print(f"Status from MDS: {mds_put.status_code}") else: print(f"Failed to get information from MDS: {mds.status_code}") - + else: - print(f"Failed to get information from CEDAR wrapper service: {cedar.status_code}") + print( + f"Failed to get information from CEDAR wrapper service: {cedar.status_code}" + ) if offset + limit == total: break From ec6510ff37b03662497ac5e651b36d70f4101e68 Mon Sep 17 00:00:00 2001 From: Jian <52763034+tianj7@users.noreply.github.com> Date: Wed, 13 Mar 2024 16:30:06 -0500 Subject: [PATCH 06/14] add alt text to maintenance page images (#2500) --- files/dashboard/maintenance-page/index.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/files/dashboard/maintenance-page/index.html b/files/dashboard/maintenance-page/index.html index a3e34479b7..fac49e64e1 100644 --- a/files/dashboard/maintenance-page/index.html +++ b/files/dashboard/maintenance-page/index.html @@ -16,7 +16,7 @@ @@ -27,12 +27,12 @@

This site is under maintenance...

Please check back later.

- + A shiba dog looking into the distance From 5b75af3a39cc8ebcfeed9c75a4fe55f0db004ae1 Mon Sep 17 00:00:00 2001 From: emalinowski Date: Mon, 18 Mar 2024 10:41:44 -0600 Subject: [PATCH 07/14] Update ingress.yaml (#2506) --- kube/services/ingress/ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/ingress/ingress.yaml b/kube/services/ingress/ingress.yaml index 3f1f312592..1db08e8ef6 100644 --- a/kube/services/ingress/ingress.yaml +++ b/kube/services/ingress/ingress.yaml @@ -11,7 +11,7 @@ metadata: alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS":443}]' alb.ingress.kubernetes.io/load-balancer-attributes: idle_timeout.timeout_seconds=600 alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' - alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-TLS13-1-2-Res-FIPS-2023-04 + alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-TLS13-1-2-FIPS-2023-04 spec: ingressClassName: alb rules: From bff3a57818d24f416a3a518bebfe956e281bad80 Mon Sep 17 00:00:00 2001 From: Pauline Ribeyre <4224001+paulineribeyre@users.noreply.github.com> Date: Tue, 19 Mar 2024 09:53:47 -0500 Subject: [PATCH 08/14] MIDRC-543 Let Hatchery assume role (#2504) --- gen3/bin/kube-setup-hatchery.sh | 31 ++++++++++++++++++++++++-- kube/services/jobs/ecr-access-job.yaml | 2 -- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/gen3/bin/kube-setup-hatchery.sh b/gen3/bin/kube-setup-hatchery.sh index 5454d1e248..bdcff8ed0b 100644 --- a/gen3/bin/kube-setup-hatchery.sh +++ b/gen3/bin/kube-setup-hatchery.sh @@ -76,15 +76,38 @@ else exists_or_create_gen3_license_table "$TARGET_TABLE" fi +# if `nextflow-global.imagebuilder-reader-role-arn` is set in hatchery config, allow hatchery +# to assume the configured role +imagebuilderRoleArn=$(g3kubectl get configmap manifest-hatchery -o jsonpath={.data.nextflow-global} | jq -r '."imagebuilder-reader-role-arn"') +assumeImageBuilderRolePolicyBlock="" +if [ -z "$imagebuilderRoleArn" ]; then + gen3_log_info "No 'nexftlow-global.imagebuilder-reader-role-arn' in Hatchery configuration, not granting AssumeRole" +else + gen3_log_info "Found 'nexftlow-global.imagebuilder-reader-role-arn' in Hatchery configuration, granting AssumeRole" + assumeImageBuilderRolePolicyBlock=$( cat < /dev/null 2>&1; then roleName="$(gen3 api safe-name hatchery-sa)" gen3 awsrole create $roleName $saName @@ -176,7 +204,6 @@ if ! g3kubectl get sa "$saName" -o json | jq -e '.metadata.annotations | ."eks.a # create the new version gen3_aws_run aws iam create-policy-version --policy-arn "$policyArn" --policy-document "$policy" --set-as-default fi - gen3_log_info "Attaching policy '${policyName}' to role '${roleName}'" gen3 awsrole attach-policy ${policyArn} --role-name ${roleName} --force-aws-cli || exit 1 gen3 awsrole attach-policy "arn:aws:iam::aws:policy/AWSResourceAccessManagerFullAccess" --role-name ${roleName} --force-aws-cli || exit 1 diff --git a/kube/services/jobs/ecr-access-job.yaml b/kube/services/jobs/ecr-access-job.yaml index 11979a1232..89bb49d6d2 100644 --- a/kube/services/jobs/ecr-access-job.yaml +++ b/kube/services/jobs/ecr-access-job.yaml @@ -65,8 +65,6 @@ spec: args: - "-c" - | - set -e - cd cloud-automation/files/scripts/ echo Installing requirements... pip3 install -r ecr-access-job-requirements.txt From 6d67d747679ed73edf55fc3484297732f59e4000 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Wed, 20 Mar 2024 10:10:41 -0400 Subject: [PATCH 09/14] Feat/scaling va workflows (#2507) * Raising total parallelism to 13, to enable stress tests in va-testing. This should be merged with a value of 10, to allow 5 WFs in each env * Bumping the parallelism for Argo up to 5 --- kube/services/argo/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kube/services/argo/values.yaml b/kube/services/argo/values.yaml index 2b46ced0f3..23dda4a5ad 100644 --- a/kube/services/argo/values.yaml +++ b/kube/services/argo/values.yaml @@ -1,6 +1,6 @@ controller: - parallelism: 8 - namespaceParallelism: 3 + parallelism: 10 + namespaceParallelism: 5 metricsConfig: # -- Enables prometheus metrics server enabled: true From 31e6e49015d84782e1c5687175dc11e36bfae4f8 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Wed, 20 Mar 2024 15:41:29 -0400 Subject: [PATCH 10/14] Creating a new type of DB dump that grabs stuff for va-testing (#2508) * Creating a new type of DB dump that grabs stuff for va-testing * Missed changes to dbdump script * Changing job name --- gen3/bin/dbbackup.sh | 13 ++- .../jobs/psql-db-dump-va-testing-job.yaml | 80 +++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 kube/services/jobs/psql-db-dump-va-testing-job.yaml diff --git a/gen3/bin/dbbackup.sh b/gen3/bin/dbbackup.sh index 29f267221d..eb9611a907 100644 --- a/gen3/bin/dbbackup.sh +++ b/gen3/bin/dbbackup.sh @@ -173,6 +173,10 @@ db_restore() { gen3 job run psql-db-prep-restore } +va_testing_db_dump() { + gen3 job run psql-db-dump-va-testing +} + # main function to determine whether dump or restore main() { @@ -191,8 +195,15 @@ main() { create_s3_bucket db_restore ;; + va-dump) + gen3_log_info "Running a va-testing DB dump..." + create_policy + create_service_account_and_role + create_s3_bucket + va_testing_db_dump + ;; *) - echo "Invalid command. Usage: gen3 dbbackup [dump|restore]" + echo "Invalid command. Usage: gen3 dbbackup [dump|restore|va-dump]" return 1 ;; esac diff --git a/kube/services/jobs/psql-db-dump-va-testing-job.yaml b/kube/services/jobs/psql-db-dump-va-testing-job.yaml new file mode 100644 index 0000000000..8a8037e166 --- /dev/null +++ b/kube/services/jobs/psql-db-dump-va-testing-job.yaml @@ -0,0 +1,80 @@ +--- +# NOTE: This job was created specifically to dump all the databases in va-testing, in preparation for a move to second cluster +# If you aren't doing that, this probably is not the job you're looking for +apiVersion: batch/v1 +kind: Job +metadata: + name: psql-db-dump-va-testing +spec: + template: + metadata: + labels: + app: gen3job + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + serviceAccountName: dbbackup-sa + containers: + - name: pgdump + image: quay.io/cdis/awshelper:master + imagePullPolicy: Always + env: + - name: gen3Env + valueFrom: + configMapKeyRef: + name: global + key: environment + - name: JENKINS_HOME + value: "devterm" + - name: GEN3_HOME + value: /home/ubuntu/cloud-automation + command: ["/bin/bash"] + args: + - "-c" + - | + source "${GEN3_HOME}/gen3/lib/utils.sh" + gen3_load "gen3/gen3setup" + account_id=$(aws sts get-caller-identity --query "Account" --output text) + default_bucket_name="gen3-db-backups-${account_id}" + default_databases=("fence" "indexd" "sheepdog" "peregrine" "arborist" "argo" "atlas" "metadata" "ohdsi" "omop-data" "wts") + s3_dir="va-testing-$(date +"%Y-%m-%d-%H-%M-%S")" + databases=("${default_databases[@]}") + bucket_name=$default_bucket_name + + for database in "${databases[@]}"; do + gen3_log_info "Starting database backup for ${database}" + gen3 db backup "${database}" > "${database}.sql" + + if [ $? -eq 0 ] && [ -f "${database}.sql" ]; then + gen3_log_info "Uploading backup file ${database}.sql to s3://${bucket_name}/${s3_dir}/${database}.sql" + aws s3 cp "${database}.sql" "s3://${bucket_name}/${s3_dir}/${database}.sql" + + if [ $? -eq 0 ]; then + gen3_log_info "Successfully uploaded ${database}.sql to S3" + else + gen3_log_err "Failed to upload ${database}.sql to S3" + fi + gen3_log_info "Deleting temporary backup file ${database}.sql" + rm -f "${database}.sql" + else + gen3_log_err "Backup operation failed for ${database}" + rm -f "${database}.sql" + fi + done + sleep 600 + restartPolicy: Never From 6de65e70a7065789f6250ad05e94f816bf8eeeaf Mon Sep 17 00:00:00 2001 From: Michael Lukowski Date: Wed, 20 Mar 2024 15:32:38 -0500 Subject: [PATCH 11/14] add whitelist for qdr staging (#2509) --- files/squid_whitelist/web_whitelist | 2 ++ 1 file changed, 2 insertions(+) diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist index c191b2e8c1..afacba9e4c 100644 --- a/files/squid_whitelist/web_whitelist +++ b/files/squid_whitelist/web_whitelist @@ -165,3 +165,5 @@ www.rabbitmq.com www.uniprot.org vpodc.org yahoo.com +idp.stage.qdr.org +stage.qdr.org \ No newline at end of file From cde8a9666a53fe9c2345f4562f202a701e9a172a Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Mon, 25 Mar 2024 11:26:58 -0400 Subject: [PATCH 12/14] Cronjobs aren't beta (#2511) --- .../services/jobs/fence-cleanup-expired-ga4gh-info-cronjob.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/jobs/fence-cleanup-expired-ga4gh-info-cronjob.yaml b/kube/services/jobs/fence-cleanup-expired-ga4gh-info-cronjob.yaml index 74d7fc9a4d..93eaf7652d 100644 --- a/kube/services/jobs/fence-cleanup-expired-ga4gh-info-cronjob.yaml +++ b/kube/services/jobs/fence-cleanup-expired-ga4gh-info-cronjob.yaml @@ -1,5 +1,5 @@ --- -apiVersion: batch/v1beta1 +apiVersion: batch/v1 kind: CronJob metadata: name: fence-cleanup-expired-ga4gh-info From 79f305a7741a221a6a6e0236c08e0f610cc589f0 Mon Sep 17 00:00:00 2001 From: Pauline Ribeyre <4224001+paulineribeyre@users.noreply.github.com> Date: Thu, 28 Mar 2024 15:13:38 -0500 Subject: [PATCH 13/14] MIDRC-672 Fix ECR access job role name conflict (#2515) --- gen3/bin/iam-serviceaccount.sh | 25 +++++++++++++++-------- gen3/bin/kube-setup-ecr-access-cronjob.sh | 4 ++-- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/gen3/bin/iam-serviceaccount.sh b/gen3/bin/iam-serviceaccount.sh index 0c5a8bba3c..1ea055f662 100644 --- a/gen3/bin/iam-serviceaccount.sh +++ b/gen3/bin/iam-serviceaccount.sh @@ -115,7 +115,7 @@ EOF # @return the resulting json from awscli ## function create_role(){ - local role_name="${vpc_name}-${SERVICE_ACCOUNT_NAME}-role" + local role_name="${1}" if [[ ${#role_name} -gt 63 ]]; then role_name=$(echo "$role_name" | head -c63) gen3_log_warning "Role name has been truncated, due to amazon role name 64 character limit. New role name is $role_name" @@ -123,8 +123,8 @@ function create_role(){ local assume_role_policy_path="$(create_assume_role_policy)" gen3_log_info "Entering create_role" - gen3_log_info " ${role_name}" - gen3_log_info " ${assume_role_policy_path}" + gen3_log_info " Role: ${role_name}" + gen3_log_info " Policy path: ${assume_role_policy_path}" local role_json role_json=$(aws iam create-role \ @@ -156,8 +156,8 @@ function add_policy_to_role(){ local role_name="${2}" gen3_log_info "Entering add_policy_to_role" - gen3_log_info " ${policy}" - gen3_log_info " ${role_name}" + gen3_log_info " Policy: ${policy}" + gen3_log_info " Role: ${role_name}" local result if [[ ${policy} =~ arn:aws:iam::aws:policy/[a-zA-Z0-9]+ ]] @@ -198,8 +198,8 @@ function create_role_with_policy() { local role_name="${2}" gen3_log_info "Entering create_role_with_policy" - gen3_log_info " ${policy}" - gen3_log_info " ${role_name}" + gen3_log_info " Policy: ${policy}" + gen3_log_info " Role: ${role_name}" local created_role_json created_role_json="$(create_role ${role_name})" || return $? @@ -357,7 +357,10 @@ function main() { local policy_validation local policy_source - local role_name="${vpc_name}-${SERVICE_ACCOUNT_NAME}-role" + local role_name=$ROLE_NAME + if [ -z "${role_name}" ]; then + role_name="${vpc_name}-${SERVICE_ACCOUNT_NAME}-role" + fi if [ -z ${NAMESPACE_SCRIPT} ]; then @@ -481,6 +484,12 @@ while getopts "$OPTSPEC" optchar; do ACTION="c" SERVICE_ACCOUNT_NAME=${OPTARG#*=} ;; + role-name) + ROLE_NAME="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 )) + ;; + role-name=*) + ROLE_NAME=${OPTARG#*=} + ;; list) ACTION="l" SERVICE_ACCOUNT_NAME="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 )) diff --git a/gen3/bin/kube-setup-ecr-access-cronjob.sh b/gen3/bin/kube-setup-ecr-access-cronjob.sh index d23afc862c..5c645ad35d 100644 --- a/gen3/bin/kube-setup-ecr-access-cronjob.sh +++ b/gen3/bin/kube-setup-ecr-access-cronjob.sh @@ -38,8 +38,8 @@ setup_ecr_access_job() { ] } EOM - local role_name - if ! role_name="$(gen3 iam-serviceaccount -c "${saName}" -p $tempFile)" || [[ -z "$role_name" ]]; then + local safe_role_name=$(gen3 api safe-name ${saName}-role | head -c63) + if ! role_name="$(gen3 iam-serviceaccount -c "${saName}" -p $tempFile --role-name $safe_role_name)" || [[ -z "$role_name" ]]; then gen3_log_err "Failed to create iam service account" rm $tempFile return 1 From 797fdf3fcd4f2ce8d66582a6e7891bfbba5bffe1 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Fri, 29 Mar 2024 11:59:34 -0400 Subject: [PATCH 14/14] Adding awslabs.github.io to the squid whitelist (#2516) --- files/squid_whitelist/web_whitelist | 1 + 1 file changed, 1 insertion(+) diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist index afacba9e4c..6896314abb 100644 --- a/files/squid_whitelist/web_whitelist +++ b/files/squid_whitelist/web_whitelist @@ -7,6 +7,7 @@ achecker.ca apache.github.io api.epigraphdb.org api.monqcle.com +awslabs.github.io biodata-integration-tests.net marketing.biorender.com clinicaltrials.gov