From 6e8e372469956f601d10dbccf8b6d9219223053f Mon Sep 17 00:00:00 2001
From: Pauline Ribeyre <4224001+paulineribeyre@users.noreply.github.com>
Date: Tue, 12 Mar 2024 10:05:49 -0500
Subject: [PATCH 01/14] Split build-push workflows (#2499)
---
.github/workflows/build_awshelper.yaml | 21 +++++++++++++
.github/workflows/build_python3.10.yaml | 23 ++++++++++++++
.github/workflows/build_python3.9.yaml | 23 ++++++++++++++
.github/workflows/image_build_push.yaml | 42 -------------------------
4 files changed, 67 insertions(+), 42 deletions(-)
create mode 100644 .github/workflows/build_awshelper.yaml
create mode 100644 .github/workflows/build_python3.10.yaml
create mode 100644 .github/workflows/build_python3.9.yaml
delete mode 100644 .github/workflows/image_build_push.yaml
diff --git a/.github/workflows/build_awshelper.yaml b/.github/workflows/build_awshelper.yaml
new file mode 100644
index 0000000000..f42a136109
--- /dev/null
+++ b/.github/workflows/build_awshelper.yaml
@@ -0,0 +1,21 @@
+name: Build awshelper image
+
+on:
+ push:
+ paths:
+ - .github/workflows/build_awshelper.yaml
+ - Docker/awshelper/**
+
+jobs:
+ awshelper:
+ name: Build and push
+ uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
+ with:
+ DOCKERFILE_LOCATION: "./Docker/awshelper/Dockerfile"
+ OVERRIDE_REPO_NAME: "awshelper"
+ secrets:
+ ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
+ ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
+ QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }}
+ QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }}
+
diff --git a/.github/workflows/build_python3.10.yaml b/.github/workflows/build_python3.10.yaml
new file mode 100644
index 0000000000..993da14680
--- /dev/null
+++ b/.github/workflows/build_python3.10.yaml
@@ -0,0 +1,23 @@
+name: Build Python 3.10 image
+
+on:
+ push:
+ paths:
+ - .github/workflows/build_python3.10.yaml
+ - Docker/python-nginx/python3.10-buster/**
+
+jobs:
+ python_3-10:
+ name: Build and push
+ uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
+ with:
+ DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.10-buster/Dockerfile"
+ DOCKERFILE_BUILD_CONTEXT: "./Docker/python-nginx/python3.10-buster"
+ OVERRIDE_REPO_NAME: "python"
+ OVERRIDE_TAG_NAME: "python3.10-buster-$(echo ${GITHUB_REF#refs/*/} | tr / _)"
+ secrets:
+ ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
+ ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
+ QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }}
+ QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }}
+
diff --git a/.github/workflows/build_python3.9.yaml b/.github/workflows/build_python3.9.yaml
new file mode 100644
index 0000000000..5bc8bc4629
--- /dev/null
+++ b/.github/workflows/build_python3.9.yaml
@@ -0,0 +1,23 @@
+name: Build Python 3.9 image
+
+on:
+ push:
+ paths:
+ - .github/workflows/build_python3.9.yaml
+ - Docker/python-nginx/python3.9-buster/**
+
+jobs:
+ python_3-9:
+ name: Build and push
+ uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
+ with:
+ DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.9-buster/Dockerfile"
+ DOCKERFILE_BUILD_CONTEXT: "./Docker/python-nginx/python3.9-buster"
+ OVERRIDE_REPO_NAME: "python"
+ OVERRIDE_TAG_NAME: "python3.9-buster-$(echo ${GITHUB_REF#refs/*/} | tr / _)"
+ secrets:
+ ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
+ ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
+ QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }}
+ QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }}
+
diff --git a/.github/workflows/image_build_push.yaml b/.github/workflows/image_build_push.yaml
deleted file mode 100644
index d5bfea351d..0000000000
--- a/.github/workflows/image_build_push.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-name: Build Python Base Images
-
-on: push
-
-jobs:
- python_3-9:
- name: Python 3.9
- uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
- with:
- DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.9-buster/Dockerfile"
- DOCKERFILE_BUILD_CONTEXT: "./Docker/python-nginx/python3.9-buster"
- OVERRIDE_REPO_NAME: "python"
- OVERRIDE_TAG_NAME: "python3.9-buster-$(echo ${GITHUB_REF#refs/*/} | tr / _)"
- secrets:
- ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
- ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
- QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }}
- QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }}
- python_3-10:
- name: Python 3.10
- uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
- with:
- DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.10-buster/Dockerfile"
- DOCKERFILE_BUILD_CONTEXT: "./Docker/python-nginx/python3.10-buster"
- OVERRIDE_REPO_NAME: "python"
- OVERRIDE_TAG_NAME: "python3.10-buster-$(echo ${GITHUB_REF#refs/*/} | tr / _)"
- secrets:
- ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
- ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
- QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }}
- QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }}
- awshelper:
- name: AwsHelper
- uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
- with:
- DOCKERFILE_LOCATION: "./Docker/awshelper/Dockerfile"
- OVERRIDE_REPO_NAME: "awshelper"
- secrets:
- ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
- ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
- QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }}
- QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }}
From b2195b1ae6f9d802d346629d126e9d3ba408eccd Mon Sep 17 00:00:00 2001
From: Pauline <4224001+paulineribeyre@users.noreply.github.com>
Date: Tue, 12 Mar 2024 10:07:34 -0500
Subject: [PATCH 02/14] fix wf naming
---
.github/workflows/build_awshelper.yaml | 2 +-
.github/workflows/build_python3.10.yaml | 2 +-
.github/workflows/build_python3.9.yaml | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/build_awshelper.yaml b/.github/workflows/build_awshelper.yaml
index f42a136109..3d2da5393e 100644
--- a/.github/workflows/build_awshelper.yaml
+++ b/.github/workflows/build_awshelper.yaml
@@ -8,7 +8,7 @@ on:
jobs:
awshelper:
- name: Build and push
+ name: awshelper
uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
with:
DOCKERFILE_LOCATION: "./Docker/awshelper/Dockerfile"
diff --git a/.github/workflows/build_python3.10.yaml b/.github/workflows/build_python3.10.yaml
index 993da14680..80d2d76232 100644
--- a/.github/workflows/build_python3.10.yaml
+++ b/.github/workflows/build_python3.10.yaml
@@ -8,7 +8,7 @@ on:
jobs:
python_3-10:
- name: Build and push
+ name: Python 3.10
uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
with:
DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.10-buster/Dockerfile"
diff --git a/.github/workflows/build_python3.9.yaml b/.github/workflows/build_python3.9.yaml
index 5bc8bc4629..540e0d4eca 100644
--- a/.github/workflows/build_python3.9.yaml
+++ b/.github/workflows/build_python3.9.yaml
@@ -8,7 +8,7 @@ on:
jobs:
python_3-9:
- name: Build and push
+ name: Python 3.9
uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
with:
DOCKERFILE_LOCATION: "./Docker/python-nginx/python3.9-buster/Dockerfile"
From b6031e029db84ab0190d2a263c16b418b113482d Mon Sep 17 00:00:00 2001
From: Pauline Ribeyre <4224001+paulineribeyre@users.noreply.github.com>
Date: Tue, 12 Mar 2024 12:03:41 -0500
Subject: [PATCH 03/14] Build awshelper workflow: always build (#2501)
---
.github/workflows/build_awshelper.yaml | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/.github/workflows/build_awshelper.yaml b/.github/workflows/build_awshelper.yaml
index 3d2da5393e..36b5745dbd 100644
--- a/.github/workflows/build_awshelper.yaml
+++ b/.github/workflows/build_awshelper.yaml
@@ -1,10 +1,8 @@
name: Build awshelper image
-on:
- push:
- paths:
- - .github/workflows/build_awshelper.yaml
- - Docker/awshelper/**
+# Always build this image because it contains all the cloud-automation files.
+# Some jobs depend on arbitrary files and we need to test them with updated awshelper images.
+on: push
jobs:
awshelper:
From 24492c2d6868ce49a474617544b575a38697d0af Mon Sep 17 00:00:00 2001
From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com>
Date: Tue, 12 Mar 2024 16:23:59 -0400
Subject: [PATCH 04/14] Adding a gen3 db namespace to the temp files so they
don't overlap (#2502)
---
gen3/bin/kube-setup-revproxy.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/gen3/bin/kube-setup-revproxy.sh b/gen3/bin/kube-setup-revproxy.sh
index 5db9850a18..fd30b478b3 100644
--- a/gen3/bin/kube-setup-revproxy.sh
+++ b/gen3/bin/kube-setup-revproxy.sh
@@ -114,8 +114,8 @@ done
if g3k_manifest_lookup .argo.argo_server_service_url 2> /dev/null; then
argo_server_service_url=$(g3k_manifest_lookup .argo.argo_server_service_url)
- g3k_kv_filter "${scriptDir}/gen3.nginx.conf/argo-server.conf" SERVICE_URL "${argo_server_service_url}" > /tmp/argo-server-with-url.conf
- filePath="/tmp/argo-server-with-url.conf"
+ g3k_kv_filter "${scriptDir}/gen3.nginx.conf/argo-server.conf" SERVICE_URL "${argo_server_service_url}" > /tmp/argo-server-with-url$(gen3 db namespace).conf
+ filePath="/tmp/argo-server-with-url$(gen3 db namespace).conf"
if [[ -f "$filePath" ]]; then
confFileList+=("--from-file" "$filePath")
fi
From e979669cd92cf1ecad69f2bc2837a8fed35e2926 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <2475897+mfshao@users.noreply.github.com>
Date: Wed, 13 Mar 2024 12:08:30 -0500
Subject: [PATCH 05/14] HP-1310 feat: updated related studies logic (#2498)
* feat: updated related studies logic
* update
---
.../healdata/heal-cedar-data-ingest.py | 174 +++++++++++-------
1 file changed, 107 insertions(+), 67 deletions(-)
diff --git a/files/scripts/healdata/heal-cedar-data-ingest.py b/files/scripts/healdata/heal-cedar-data-ingest.py
index c54f9d5aa0..e0c4b3c463 100644
--- a/files/scripts/healdata/heal-cedar-data-ingest.py
+++ b/files/scripts/healdata/heal-cedar-data-ingest.py
@@ -13,7 +13,7 @@
"study_metadata.study_type.study_subject_type": "Subject Type",
"study_metadata.human_subject_applicability.gender_applicability": "Gender",
"study_metadata.human_subject_applicability.age_applicability": "Age",
- "research_program": "Research Program"
+ "research_program": "Research Program",
}
# Defines how to handle special cases for values in filters
@@ -33,7 +33,7 @@
"Gender Queer": "Genderqueer/gender nonconforming/neither exclusively male nor female",
"Intersex": "Genderqueer/gender nonconforming/neither exclusively male nor female",
"Intersexed": "Genderqueer/gender nonconforming/neither exclusively male nor female",
- "Buisness Development": "Business Development"
+ "Buisness Development": "Business Development",
}
# Defines field that we don't want to include in the filters
@@ -54,24 +54,25 @@
def is_valid_uuid(uuid_to_test, version=4):
"""
Check if uuid_to_test is a valid UUID.
-
+
Parameters
----------
uuid_to_test : str
version : {1, 2, 3, 4}
-
+
Returns
-------
`True` if uuid_to_test is a valid UUID, otherwise `False`.
-
+
"""
-
+
try:
uuid_obj = UUID(uuid_to_test, version=version)
except ValueError:
return False
return str(uuid_obj) == uuid_to_test
+
def update_filter_metadata(metadata_to_update):
filter_metadata = []
for metadata_field_key, filter_field_key in FILTER_FIELD_MAPPINGS.items():
@@ -83,20 +84,21 @@ def update_filter_metadata(metadata_to_update):
print(filter_field_values)
raise TypeError("Neither a string nor a list")
for filter_field_value in filter_field_values:
- if (metadata_field_key, filter_field_value) in OMITTED_VALUES_MAPPING.items():
+ if (
+ metadata_field_key,
+ filter_field_value,
+ ) in OMITTED_VALUES_MAPPING.items():
continue
if filter_field_value in SPECIAL_VALUE_MAPPINGS:
filter_field_value = SPECIAL_VALUE_MAPPINGS[filter_field_value]
- filter_metadata.append({"key": filter_field_key, "value": filter_field_value})
+ filter_metadata.append(
+ {"key": filter_field_key, "value": filter_field_value}
+ )
filter_metadata = pydash.uniq(filter_metadata)
metadata_to_update["advSearchFilters"] = filter_metadata
# Retain these from existing tags
save_tags = ["Data Repository"]
- tags = [
- tag
- for tag in metadata_to_update["tags"]
- if tag["category"] in save_tags
- ]
+ tags = [tag for tag in metadata_to_update["tags"] if tag["category"] in save_tags]
# Add any new tags from advSearchFilters
for f in metadata_to_update["advSearchFilters"]:
if f["key"] == "Gender":
@@ -111,25 +113,30 @@ def update_filter_metadata(metadata_to_update):
def get_client_token(client_id: str, client_secret: str):
try:
token_url = f"http://revproxy-service/user/oauth2/token"
- headers = {'Content-Type': 'application/x-www-form-urlencoded'}
- params = {'grant_type': 'client_credentials'}
- data = 'scope=openid user data'
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
+ params = {"grant_type": "client_credentials"}
+ data = "scope=openid user data"
token_result = requests.post(
- token_url, params=params, headers=headers, data=data,
+ token_url,
+ params=params,
+ headers=headers,
+ data=data,
auth=(client_id, client_secret),
)
- token = token_result.json()["access_token"]
+ token = token_result.json()["access_token"]
except:
raise Exception("Could not get token")
return token
-def get_related_studies(serial_num, hostname):
+def get_related_studies(serial_num, guid, hostname):
related_study_result = []
if serial_num:
- mds = requests.get(f"http://revproxy-service/mds/metadata?nih_reporter.project_num_split.serial_num={serial_num}&data=true&limit=2000")
+ mds = requests.get(
+ f"http://revproxy-service/mds/metadata?nih_reporter.project_num_split.serial_num={serial_num}&data=true&limit=2000"
+ )
if mds.status_code == 200:
related_study_metadata = mds.json()
@@ -137,15 +144,22 @@ def get_related_studies(serial_num, hostname):
related_study_metadata_key,
related_study_metadata_value,
) in related_study_metadata.items():
+ if related_study_metadata_key == guid or (
+ related_study_metadata_value["_guid_type"] != "discovery_metadata"
+ and related_study_metadata_value["_guid_type"]
+ != "unregistered_discovery_metadata"
+ ):
+ # do nothing for self, or for archived studies
+ continue
title = (
- related_study_metadata_value.get(
- "gen3_discovery", {}
- )
+ related_study_metadata_value.get("gen3_discovery", {})
.get("study_metadata", {})
.get("minimal_info", {})
.get("study_name", "")
)
- link = f"https://{hostname}/portal/discovery/{related_study_metadata_key}/"
+ link = (
+ f"https://{hostname}/portal/discovery/{related_study_metadata_key}/"
+ )
related_study_result.append({"title": title, "link": link})
return related_study_result
@@ -180,7 +194,7 @@ def get_related_studies(serial_num, hostname):
print("Getting CEDAR client access token")
access_token = get_client_token(client_id, client_secret)
-token_header = {"Authorization": 'bearer ' + access_token}
+token_header = {"Authorization": "bearer " + access_token}
limit = 10
offset = 0
@@ -192,16 +206,21 @@ def get_related_studies(serial_num, hostname):
print("Directory ID is not in UUID format!")
sys.exit(1)
-while((limit + offset <= total)):
+while limit + offset <= total:
# Get the metadata from cedar to register
print("Querying CEDAR...")
- cedar = requests.get(f"http://revproxy-service/cedar/get-instance-by-directory/{dir_id}?limit={limit}&offset={offset}", headers=token_header)
+ cedar = requests.get(
+ f"http://revproxy-service/cedar/get-instance-by-directory/{dir_id}?limit={limit}&offset={offset}",
+ headers=token_header,
+ )
# If we get metadata back now register with MDS
if cedar.status_code == 200:
metadata_return = cedar.json()
if "metadata" not in metadata_return:
- print("Got 200 from CEDAR wrapper but no metadata in body, something is not right!")
+ print(
+ "Got 200 from CEDAR wrapper but no metadata in body, something is not right!"
+ )
sys.exit(1)
total = metadata_return["metadata"]["totalCount"]
@@ -209,13 +228,17 @@ def get_related_studies(serial_num, hostname):
print(f"Successfully got {returned_records} record(s) from CEDAR directory")
for cedar_record in metadata_return["metadata"]["records"]:
# get the appl id from cedar for querying in our MDS
- cedar_appl_id = pydash.get(cedar_record, "metadata_location.nih_application_id")
+ cedar_appl_id = pydash.get(
+ cedar_record, "metadata_location.nih_application_id"
+ )
if cedar_appl_id is None:
print("This record doesn't have appl_id, skipping...")
continue
# Get the metadata record for the nih_application_id
- mds = requests.get(f"http://revproxy-service/mds/metadata?gen3_discovery.study_metadata.metadata_location.nih_application_id={cedar_appl_id}&data=true")
+ mds = requests.get(
+ f"http://revproxy-service/mds/metadata?gen3_discovery.study_metadata.metadata_location.nih_application_id={cedar_appl_id}&data=true"
+ )
if mds.status_code == 200:
mds_res = mds.json()
@@ -234,9 +257,13 @@ def get_related_studies(serial_num, hostname):
if mds_res["_guid_type"] == "discovery_metadata":
print("Metadata is already registered. Updating MDS record")
elif mds_res["_guid_type"] == "unregistered_discovery_metadata":
- print("Metadata has not been registered. Registering it in MDS record")
+ print(
+ "Metadata has not been registered. Registering it in MDS record"
+ )
else:
- print(f"This metadata data record has a special GUID type \"{mds_res['_guid_type']}\" and will be skipped")
+ print(
+ f"This metadata data record has a special GUID type \"{mds_res['_guid_type']}\" and will be skipped"
+ )
continue
if "clinicaltrials_gov" in cedar_record:
@@ -244,21 +271,27 @@ def get_related_studies(serial_num, hostname):
del cedar_record["clinicaltrials_gov"]
# some special handing for this field, because its parent will be deleted before we merging the CEDAR and MDS SLMD to avoid duplicated values
- cedar_record_other_study_websites = cedar_record.get("metadata_location", {}).get("other_study_websites", [])
+ cedar_record_other_study_websites = cedar_record.get(
+ "metadata_location", {}
+ ).get("other_study_websites", [])
del cedar_record["metadata_location"]
mds_res["gen3_discovery"]["study_metadata"].update(cedar_record)
- mds_res["gen3_discovery"]["study_metadata"]["metadata_location"]["other_study_websites"] = cedar_record_other_study_websites
+ mds_res["gen3_discovery"]["study_metadata"]["metadata_location"][
+ "other_study_websites"
+ ] = cedar_record_other_study_websites
# setup citations
- doi_citation = mds_res["gen3_discovery"]["study_metadata"].get("doi_citation", "")
- mds_res["gen3_discovery"]["study_metadata"]["citation"]["heal_platform_citation"] = doi_citation
-
+ doi_citation = mds_res["gen3_discovery"]["study_metadata"].get(
+ "doi_citation", ""
+ )
+ mds_res["gen3_discovery"]["study_metadata"]["citation"][
+ "heal_platform_citation"
+ ] = doi_citation
# setup repository_study_link
data_repositories = (
- mds_res
- .get("gen3_discovery", {})
+ mds_res.get("gen3_discovery", {})
.get("study_metadata", {})
.get("metadata_location", {})
.get("data_repositories", [])
@@ -275,8 +308,13 @@ def get_related_studies(serial_num, hostname):
repository_study_link = REPOSITORY_STUDY_ID_LINK_TEMPLATE[
repository["repository_name"]
].replace("This site is under maintenance...
Please check back later.