Merge pull request #424 from apriltuesday/EVA-3531

Add labels to nextflow processes for slurm migration
EBIvariation · Apr 22, 2024 · 130f8cd · 130f8cd
2 parents 1e1afdb + 9ff6514
commit 130f8cd
Show file tree

Hide file tree

Showing 6 changed files with 58 additions and 2 deletions.
diff --git a/pipelines/annotation_pipeline.nf b/pipelines/annotation_pipeline.nf
@@ -98,6 +98,8 @@ workflow {
  * Download ClinVar data, using the most recent XML dump.
  */
 process downloadClinvar {
+    label 'small_mem'
+
     output:
     path "clinvar.xml.gz", emit: clinvarXml
 
@@ -112,6 +114,9 @@ process downloadClinvar {
  * Download the Open Targets JSON schema.
  */
 process downloadJsonSchema {
+    label 'short_time'
+    label 'small_mem'
+
     output:
     path "opentargets-${params.schema}.json", emit: jsonSchema
 
@@ -222,6 +227,9 @@ process runStructural {
  * Unite results of consequence mapping.
  */
 process combineConsequences {
+    label 'short_time'
+    label 'small_mem'
+
     input:
     path consequencesSnp
     path consequencesRepeat
@@ -327,6 +335,8 @@ process generateAnnotatedXml {
  * Count number of RCV records in ClinVar.
  */
 process countClinvarRecords {
+    label 'small_mem'
+
     input:
     path clinvarXml
 
@@ -373,6 +383,9 @@ process generateEvidence {
  * Concatenate evidence strings into a single file.
  */
 process collectEvidenceStrings {
+    label 'short_time'
+    label 'small_mem'
+
     publishDir "${batchRoot}/evidence_strings",
         overwrite: true,
         mode: "copy",
@@ -394,6 +407,9 @@ process collectEvidenceStrings {
  * Aggregate counts into a single file and print the report.
  */
 process collectCounts {
+    label 'short_time'
+    label 'small_mem'
+
     publishDir "${batchRoot}/logs",
         overwrite: true,
         mode: "copy",
@@ -415,6 +431,9 @@ process collectCounts {
  * Check that the generated evidence strings do not contain any duplicated evidence strings.
  */
 process checkDuplicates {
+    label 'short_time'
+    label 'small_mem'
+
     input:
     path evidenceStrings
 

diff --git a/pipelines/export_curation_spreadsheet.nf b/pipelines/export_curation_spreadsheet.nf
@@ -55,6 +55,9 @@ workflow {
  * Extract the relevant columns from the input CSV.
  */
 process exportTable {
+    label 'short_time'
+    label 'small_mem'
+
     publishDir "${curationRoot}",
         overwrite: true,
         mode: "copy",
@@ -79,6 +82,9 @@ process exportTable {
  * Strip header from existing mappings file.
  */
  process stripMappingsHeader {
+    label 'short_time'
+    label 'small_mem'
+
     output:
     path "previous_mappings.tsv", emit: previousMappings
 
@@ -92,6 +98,9 @@ process exportTable {
  * Concatenate finished automated and manual mappings into a single file.
  */
 process combineManualAndAutomated {
+    label 'short_time'
+    label 'small_mem'
+
     input:
     path finishedMappings
 
@@ -111,6 +120,9 @@ process combineManualAndAutomated {
  * latest curation iteration.
  */
 process mergeWithLatestMappings {
+    label 'short_time'
+    label 'small_mem'
+
     input:
     path newMappings
     path previousMappings
@@ -134,6 +146,8 @@ process mergeWithLatestMappings {
  * Prepare the table for EFO import.
  */
 process createEfoTable {
+    label 'short_time'
+    label 'small_mem'
     publishDir "${curationRoot}",
         overwrite: true,
         mode: "copy",
@@ -157,6 +171,8 @@ process createEfoTable {
  * Generate ZOOMA feedback.
  */
 process generateZoomaFeedback {
+    label 'short_time'
+    label 'small_mem'
     publishDir "${curationRoot}",
         overwrite: true,
         mode: "copy",
@@ -184,6 +200,9 @@ process generateZoomaFeedback {
  * Check there are no complete duplicates in the final mappings file.
  */
 process checkDuplicates {
+    label 'short_time'
+    label 'small_mem'
+
     input:
     path newMappings
 
@@ -201,6 +220,8 @@ process checkDuplicates {
  * Add generated date and target ontology to header of final mappings file.
  */
 process addMappingsHeader {
+    label 'short_time'
+    label 'small_mem'
     publishDir "${curationRoot}",
         overwrite: true,
         mode: "copy",
@@ -226,6 +247,9 @@ process addMappingsHeader {
  * Update the symbolic links pointing to the location of the most recent curation result and ZOOMA feedback dataset.
  */
 process updateLinks {
+    label 'short_time'
+    label 'small_mem'
+
     input:
     path finalMappings
     path zoomaFeedback

diff --git a/pipelines/generate_curation_spreadsheet.nf b/pipelines/generate_curation_spreadsheet.nf
@@ -59,6 +59,8 @@ workflow {
  * Download ClinVar data, using the most recent XML dump.
  */
 process downloadClinvar {
+    label 'small_mem'
+
     output:
     path "clinvar.xml.gz", emit: clinvarXml
 
@@ -91,6 +93,9 @@ process parseTraits {
  * Split parsed traits into multiple chunks.
  */
 process splitTraits {
+    label 'short_time'
+    label 'small_mem'
+
     input:
     path parsedTraits
 
@@ -136,6 +141,8 @@ process processTraits {
  * Aggregate automated trait mappings into a single file.
  */
 process collectAutomatedMappings {
+    label 'short_time'
+    label 'small_mem'
     publishDir "${curationRoot}",
         overwrite: true,
         mode: "copy",
@@ -158,6 +165,9 @@ process collectAutomatedMappings {
  * Aggregate traits for manual curation into a single file.
  */
 process collectCurationTraits {
+    label 'short_time'
+    label 'small_mem'
+
     input:
     path "curation_traits_*.tsv"
 

diff --git a/pipelines/utils.nf b/pipelines/utils.nf
@@ -2,6 +2,9 @@
  * Extract target ontology from mappings file header. Defaults to EFO if missing.
  */
 process getTargetOntology {
+    label 'short_time'
+    label 'small_mem'
+
     input:
     val mappingsFile
 

diff --git a/tests/output_generation/evaluation/test_ols_utils.py b/tests/output_generation/evaluation/test_ols_utils.py
@@ -9,5 +9,5 @@ def test_fetch_eval_data():
 
 def test_fetch_eval_data_include_neighbors():
     expected = ('MONDO:0004975', False, {'MONDO:0004975'},
-                {'EFO:0005815', 'MONDO:0001627'}, {'MONDO:0100087', 'EFO:1001870'})
+                {'EFO:0005815', 'MONDO:0001627'}, {'MONDO:0100087', 'MONDO:0014265', 'EFO:1001870'})
     assert fetch_eval_data(db_iden=('MONDO', 'MONDO:0004975'), include_neighbors=True) == expected
diff --git a/tests/output_generation/test_consequence_type.py b/tests/output_generation/test_consequence_type.py
@@ -44,4 +44,4 @@ def test_repeat_expansion_so_term():
 
 def test_get_so_accession_dict():
     results = get_so_accession_dict(page_size=100)
-    assert len(results) == 217
+    assert len(results) == 222