Skip to content

Commit

Permalink
Merge pull request #21 from Sage-Bionetworks-Workflows/add-patch-release
Browse files Browse the repository at this point in the history
[GEN-863] Add patch release
  • Loading branch information
thomasyu888 authored Sep 13, 2024
2 parents e16bee8 + 264bf13 commit bc01b10
Show file tree
Hide file tree
Showing 10 changed files with 741 additions and 0 deletions.
20 changes: 20 additions & 0 deletions modules/compare_releases.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Compares two GENIE releases given two synapse ids
process compare_releases {
container "$params.patch_release_docker"
secret 'SYNAPSE_AUTH_TOKEN'

input:
val previous
val release_synid
val new_release_synid

output:
stdout

script:
"""
python3 /patch_release/compare_patch.py \
--original_synid $release_synid \
--new_synid $new_release_synid
"""
}
31 changes: 31 additions & 0 deletions modules/create_dashboard_html.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Create data guide
process create_dashboard_html {
debug true
container "$params.main_pipeline_docker"
secret 'SYNAPSE_AUTH_TOKEN'

input:
val previous
val release
val production

output:
stdout
// path "data_guide.pdf"

script:
if (production) {
"""
cd /root/Genie
Rscript ./R/dashboard_markdown_generator.R $release \
--template_path ./templates/dashboardTemplate.Rmd
"""
} else {
"""
cd /root/Genie
Rscript ./R/dashboard_markdown_generator.R $release \
--template_path ./templates/dashboardTemplate.Rmd \
--staging
"""
}
}
33 changes: 33 additions & 0 deletions modules/patch_release.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Patch release
process patch_release {
container "$params.patch_release_docker"
secret 'SYNAPSE_AUTH_TOKEN'

input:
val release_synid
val new_release_synid
val retracted_sample_synid
val production

output:
stdout

script:
if (production) {
"""
python3 /patch_release/patch.py \
$release_synid \
$new_release_synid \
$retracted_sample_synid \
--production
"""
}
else {
"""
python3 /patch_release/patch.py \
$release_synid \
$new_release_synid \
$retracted_sample_synid \
"""
}
}
10 changes: 10 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ profiles {
main_release_utils_docker = "sagebionetworks/main-genie-release-utils"
find_maf_artifacts_docker = "sagebionetworks/genie-artifact-finder"
create_data_guide_docker = "sagebionetworks/genie-data-guide"
patch_release_docker = "sagebionetworks/genie-patch-main-release"
}
}
aws_prod {
Expand All @@ -38,10 +39,18 @@ profiles {
memory = 16.GB
cpus = 4
}
withName: patch_release {
memory = 16.GB
cpus = 4
}
withName: create_consortium_release {
memory = 32.GB
cpus = 4
}
withName: create_dashboard_html {
memory = 32.GB
cpus = 4
}
withName: create_public_release {
memory = 16.GB
cpus = 4
Expand All @@ -57,6 +66,7 @@ profiles {
main_release_utils_docker = "sagebionetworks/main-genie-release-utils"
find_maf_artifacts_docker = "sagebionetworks/genie-artifact-finder"
create_data_guide_docker = "sagebionetworks/genie-data-guide"
patch_release_docker = "sagebionetworks/genie-patch-main-release"
}
}
}
44 changes: 44 additions & 0 deletions nextflow_schema_patch_release.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/Sage-Bionetworks-Workflows/nf-genie/master/nextflow_schema.json",
"title": "Sage-Bionetworks-Workflows/nf-genie pipeline parameters",
"description": "Nextflow pipeline for main GENIE processing",
"type": "object",
"definitions": {
"patch_release": {
"title": "patch_release",
"type": "object",
"description": "Patch release configurations",
"default": "",
"properties": {
"release_synid": {
"type": "string",
"description": "Existing consortium release synapse folder id"
},
"new_release_synid": {
"type": "string",
"description": "New consoritum release synapse folder id"
},
"retracted_sample_synid": {
"type": "string",
"description": "samples_to_retract.csv of 3rd consortium release"
},
"release": {
"type": "string",
"description": "Release name. E.g: 13.1-consortium",
"default": "TEST.consortium",
"pattern": "\\d+[.]\\d+-(consortium)$"
},
"project_id": {
"type": "string",
"description": "Synapse GENIE internal projects."
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/patch_release"
}
]
}
41 changes: 41 additions & 0 deletions patch_release_main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env nextflow
// Ensure DSL2
nextflow.enable.dsl = 2

// IMPORT MODULES
include { patch_release } from './modules/patch_release'
include { create_data_guide } from './modules/create_data_guide'
include { create_dashboard_html } from './modules/create_dashboard_html'
include { compare_releases } from './modules/compare_releases'

params.release_synid = "syn53170398" // 15.4-consortium
params.new_release_synid = "syn62069187" // 15.6-consortium (in staging)
params.retracted_sample_synid = "syn54082015" // 16.3-consortium samples_to_retract.csv
params.release = "15.6-consortium"
// project_id = "syn7208886"
params.project_id = "syn22033066" // staging project
if (params.project_id == "syn22033066") {
is_production = false
} else if (params.project_id == "syn3380222") {
is_production = true
} else {
exit 1, "project_id must be syn22033066 or syn3380222"
}

workflow {
ch_release_synid = Channel.value(params.release_synid)
ch_new_release_synid = Channel.value(params.new_release_synid)
ch_retracted_sample_synid = Channel.value(params.retracted_sample_synid)
ch_release = Channel.value(params.release)
ch_project_id = Channel.value(params.project_id)
patch_release(ch_release_synid, ch_new_release_synid, ch_retracted_sample_synid, is_production)
create_dashboard_html(patch_release.out, ch_release, is_production)
create_data_guide(patch_release.out, ch_release, ch_project_id)
// This syn55146141 is hard coded because the ch_release used will certainly
// definitely be different from ch_new_release_synid because that is the patch.
// TODO: we will want to implement a different comparison report to look at diffs
// This current comparison looks at similarities and it good for staging pipeline.
if (!is_production) {
compare_releases(create_data_guide.out, "syn55146141", ch_new_release_synid)
}
}
5 changes: 5 additions & 0 deletions scripts/patch_release/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM sagebionetworks/genie:version-16.4.0

WORKDIR /patch_release

COPY . .
6 changes: 6 additions & 0 deletions scripts/patch_release/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Public Patch Release

All sample / patient retractions have to occur within 3 months of the public release. If problems are found with older public releases, we will not patch, but will add information to the release notes. The patch releases are not meant to resolve data issues, but just for removing samples that are retracted consent.

1. Create another consortium release
1. Generate the data guide, dashboard html, and release notes
84 changes: 84 additions & 0 deletions scripts/patch_release/compare_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
The command ran:
python patch.py syn53170398 syn62069187 syn54082015
In leu of lack of unit or integration tests, the above command replicates the
this is to test 15.5-consortium (syn55146141) and 15.6-consortium (Staging syn62069187)
that they are the same.
python compare_patch.py --original_synid syn55146141 --new_synid syn62069187
"""
import argparse

import synapseclient
import synapseutils as synu


def _get_file_dict(syn: synapseclient.Synapse, synid: str):
"""
This function generates a dictionary of files from a Synapse ID.
Args:
syn (synapseclient.Synapse): A Synapse client object.
synid (str): The Synapse ID of the files to retrieve.
Returns:
dict[str, str]: A dictionary mapping Synapse IDs to file names.
"""
all_files = synu.walk(syn, synid)
file_list = {}
for _, _, files in all_files:
files = {name: syn.get(synid, downloadFile=False) for name, synid in files}
file_list.update(files)
return file_list


def compare_releases(original_synid: str, new_synid: str):
"""
This function compares two folders that should have identifical files
with each file's MD5s
Args:
original_synid (str): The Synapse ID of the original release.
new_synid (str): The Synapse ID of the new release.
Returns:
tuple: A tuple containing the original release entity, the new release entity,
and a list of retracted entities.
"""

# Log in to Synapse
syn = synapseclient.login()

# Get the entities for the original and new releases
# original_ent = syn.get(original_synid)
# original_files = synu.walk(original_synid)
original_file_list = _get_file_dict(syn, original_synid)
# new_ent = syn.get(new_synid)
# new_files = synu.walk(new_synid)
new_file_list = _get_file_dict(syn, new_synid)

# Check that the two folders have the same number of files
print("Number of files in old folder: ", len(original_file_list))
print("Number of files in new folder: ", len(new_file_list))
for filename in new_file_list.keys():
if original_file_list.get(filename) is None:
print("File not found in old folder: ", filename)

for filename in original_file_list.keys():
if new_file_list.get(filename) is None:
print("File not found in new folder: ", filename)
else:
if original_file_list[filename].md5 != new_file_list[filename].md5:
print("Files are different: ", filename)

def main():
parser = argparse.ArgumentParser(description='Compare two Synapse releases.')
parser.add_argument('--original_synid', type=str, help='The Synapse ID of the original release')
parser.add_argument('--new_synid', type=str, help='The Synapse ID of the new release')

args = parser.parse_args()

compare_releases(args.original_synid, args.new_synid)

if __name__ == "__main__":
main()
Loading

0 comments on commit bc01b10

Please sign in to comment.