From 0209a4615f3c10449bb77b4136405a113734a7a8 Mon Sep 17 00:00:00 2001 From: ericjove <123645716+ericjove@users.noreply.github.com> Date: Fri, 6 Oct 2023 09:04:30 -0400 Subject: [PATCH 01/16] JIRIA: PGAPX-1175 Added new --prefix command line option. --- scripts/pgap.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/scripts/pgap.py b/scripts/pgap.py index 65eb7b9..d018c95 100755 --- a/scripts/pgap.py +++ b/scripts/pgap.py @@ -892,6 +892,56 @@ def create_simple_input_yaml_file(fasta_location, genus_species, output_filename return os.path.abspath(output_filename) +def validate_prefix(prefix): + + """ + Validates the given prefix to ensure it can be used in a filename on Linux, macOS, and Windows. + + Exits the program with an error message if the prefix is not valid. + + Valid Prefix: + - Contains only alphanumeric characters, underscores, or hyphens. + e.g., "my_prefix", "prefix123", "123_prefix", "prefix-123" + + Invalid Prefix: + - Contains any characters other than alphanumeric characters, underscores, or hyphens. + e.g., "my prefix", "prefix#", "prefix@", "prefix!" + + Note: This function is compatible with Linux, macOS, and Windows filenames. + """ + if not re.match("^[a-zA-Z0-9_\-]+$", prefix): + sys.exit(f"The provided prefix '{prefix}' is invalid. A valid prefix should only contain alphanumeric characters, underscores, and hyphens.") + return True + +def apply_prefix_to_output_dir(output_dir, prefix): + """ + Removes the default prefix "annot" and adds the given prefix to each file in the specified directory. + + Parameters: + - output_dir (str): The path of the directory containing the files to rename. + - prefix (str): The prefix to add to each file name. + + Returns: + - None + """ + if not os.path.exists(output_dir): + print(f"The directory {output_dir} does not exist.") + return + + for filename in os.listdir(output_dir): + file_path = os.path.join(output_dir, filename) + if os.path.isfile(file_path): + # Remove existing 'annot' prefix if present + new_filename = filename + if filename.startswith("annot"): + new_filename = filename[5:] + + # Add the new prefix + new_file_path = os.path.join(output_dir, prefix + new_filename) + + # Rename the file + os.rename(file_path, new_file_path) + def main(): parser = argparse.ArgumentParser(description="Input must be provided as:\n" @@ -958,6 +1008,8 @@ def main(): #help='Set a maximum time for pipeline to run, format is D:H:M:S, H:M:S, or M:S, or S (default: %(default)s)') parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode, for scripts') + parser.add_argument('--prefix', type=str, + help='Set the prefix for output files (default: "annot")') parser.add_argument('--no-self-update', action='store_true', dest='no_self_up', help='Do not attempt to update this script') @@ -971,6 +1023,10 @@ def main(): args = parser.parse_args() + # Ensure that user provided prefix is valid. + if args.prefix: + validate_prefix(args.prefix) + # const storing the initial working directory. # Please do not modify this variable's value. ORIGINAL_WORKSPACE = os.getcwd() @@ -1054,6 +1110,10 @@ def main(): os.remove(submol_modified) remove_empty_files(outputdir) + + if args.prefix: + apply_prefix_to_output_dir(outputdir, args.prefix) + except (Exception, KeyboardInterrupt) as exc: if args.debug: raise From af468ae34c82dc88399aeedfd8f12f1e87052367 Mon Sep 17 00:00:00 2001 From: ericjove <123645716+ericjove@users.noreply.github.com> Date: Fri, 6 Oct 2023 09:16:46 -0400 Subject: [PATCH 02/16] JIRA: PGAPX-1175 Removed errroneous --- scripts/pgap.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/pgap.py b/scripts/pgap.py index d018c95..761677c 100755 --- a/scripts/pgap.py +++ b/scripts/pgap.py @@ -1110,7 +1110,6 @@ def main(): os.remove(submol_modified) remove_empty_files(outputdir) - if args.prefix: apply_prefix_to_output_dir(outputdir, args.prefix) From 1b874287c38b67ef5fc47cde2c436b39ae8ac00a Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Tue, 28 Nov 2023 13:32:05 -0500 Subject: [PATCH 03/16] Organism name: genus or any level below genus; JIRA: PGAPX-1197 --- scripts/pgap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pgap.py b/scripts/pgap.py index 761677c..60ae899 100755 --- a/scripts/pgap.py +++ b/scripts/pgap.py @@ -954,7 +954,7 @@ def main(): parser.add_argument('-g', '--genome', type=str, help='Path to genomic fasta') - parser.add_argument('-s', '--organism', type=str, help='Binomial name') + parser.add_argument('-s', '--organism', type=str, help='Organism name: genus or any level below genus') parser.add_argument('input', nargs='?', help=argparse.SUPPRESS) From aeeace979246305e7efdc9299d37181b615990de Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Thu, 30 Nov 2023 11:12:48 -0500 Subject: [PATCH 04/16] changed help to Organism name: genus, genus species, or more specific and known to NCBI Taxonomy, see https://github.com/ncbi/pgap/wiki/Input-Files#taxonomy-information for more information; JIRA: PGAPX-1197 --- scripts/pgap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pgap.py b/scripts/pgap.py index 60ae899..662fdd7 100755 --- a/scripts/pgap.py +++ b/scripts/pgap.py @@ -954,7 +954,7 @@ def main(): parser.add_argument('-g', '--genome', type=str, help='Path to genomic fasta') - parser.add_argument('-s', '--organism', type=str, help='Organism name: genus or any level below genus') + parser.add_argument('-s', '--organism', type=str, help='Organism name: genus, genus species, or more specific and known to NCBI Taxonomy, see https://github.com/ncbi/pgap/wiki/Input-Files#taxonomy-information for more information') parser.add_argument('input', nargs='?', help=argparse.SUPPRESS) From bd17caff0120150fbcfe4c00623ee7d31ca4f7fc Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Mon, 4 Dec 2023 13:07:32 -0500 Subject: [PATCH 05/16] panfs->VAST; JIRA: PGAPX-1226 --- bacterial_trna/trnascan_wnode.cwl | 2 +- progs/gencode2trnamodel.cwl | 2 +- wf_common.cwl | 20 +++++++++++++++++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/bacterial_trna/trnascan_wnode.cwl b/bacterial_trna/trnascan_wnode.cwl index 8446fe8..f6db487 100644 --- a/bacterial_trna/trnascan_wnode.cwl +++ b/bacterial_trna/trnascan_wnode.cwl @@ -40,7 +40,7 @@ inputs: prefix: -g binary: type: string? - default: /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/ThirdParty/tRNAscan-SE/production/bin/tRNAscan-SE + default: /netmnt/vast01/gp/ThirdParty/tRNAscan-SE/production/bin/tRNAscan-SE inputBinding: prefix: -tRNAscan taxid: diff --git a/progs/gencode2trnamodel.cwl b/progs/gencode2trnamodel.cwl index b6eaa90..385bcae 100644 --- a/progs/gencode2trnamodel.cwl +++ b/progs/gencode2trnamodel.cwl @@ -26,7 +26,7 @@ inputs: # genetic_codes = "ystmito"; # } expression: | - ${ var gc = inputs.gencode; var gc2 = ""; if( gc == 4 ) { gc2="othmito"; } else if ( gc==6 ) { gc2="cilnuc"; } else if ( gc == 9) { gc2 = "echdmito" } else if ( gc == 5) { gc2 = "invmito" } else if ( gc == 2) { gc2 = "vertmito" } else if ( gc == 3) { gc2 = "ystmito" } ; if ( gc2 != "" ) { return { "output": "/panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/ThirdParty/tRNAscan-SE/production/lib/tRNAscan-SE/gcode/gcode."+gc2 }; } else { return { "output": null }; } } + ${ var gc = inputs.gencode; var gc2 = ""; if( gc == 4 ) { gc2="othmito"; } else if ( gc==6 ) { gc2="cilnuc"; } else if ( gc == 9) { gc2 = "echdmito" } else if ( gc == 5) { gc2 = "invmito" } else if ( gc == 2) { gc2 = "vertmito" } else if ( gc == 3) { gc2 = "ystmito" } ; if ( gc2 != "" ) { return { "output": "/netmnt/vast01/gp/ThirdParty/tRNAscan-SE/production/lib/tRNAscan-SE/gcode/gcode."+gc2 }; } else { return { "output": null }; } } outputs: output: string? diff --git a/wf_common.cwl b/wf_common.cwl index 3832b59..fedba6b 100755 --- a/wf_common.cwl +++ b/wf_common.cwl @@ -120,6 +120,7 @@ steps: - 23s_model_path - AntiFamLib - all_order_specific_blastdb_file + - amr_finder_plus_database - asn2pas_xsl - identification_db_dir - CDDdata2 @@ -398,7 +399,7 @@ steps: Generate_23S_rRNA_Annotation_annotation: bacterial_noncoding/annotations_23s Post_process_CMsearch_annotations_annots_5S: bacterial_noncoding/annotations_5s genemark_path: - default: /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/ThirdParty/GeneMark/ + default: /netmnt/vast01/gp/ThirdParty/GeneMark/ thresholds: passdata/thresholds out: [lds2,seqids,proteins, aligns, annotation, out_hmm_params, outseqs, prot_ids, models1] @@ -472,7 +473,7 @@ steps: wp_hashes: passdata/wp_hashes taxon_db: passdata/taxon_db genemark_path: - default: /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/ThirdParty/GeneMark/ + default: /netmnt/vast01/gp/ThirdParty/GeneMark/ out: - id: Find_Best_Evidence_Alignments_aligns - id: Run_GeneMark_Post_models @@ -561,8 +562,18 @@ steps: # # tasktype coded, input/output matches # # application not coded # ############################################### - # # AMR plane is for later stages skipping + # # AMR plane # ############################################### + AMR_naming: + run: amr_naming/wf_amr_naming.cwl + in: + annotation: bacterial_annot_4/out_annotation + # aka Bacterial_Annot_Filter/out_annotation + database: passdata/amr_finder_plus_database + passdata: passdata/taxon_db + taxid: taxid + out: [amr_report] + bacterial_orthology_conditional: run: bacterial_orthology/wf_bacterial_orthology_conditional.cwl in: @@ -1062,4 +1073,7 @@ outputs: checkm_results: type: File outputSource: checkm/checkm_results + amr_report: + type: File + outputSource: AMR_naming/amr_report From 7569d6ed7b62a2f0f600c12f6579b69d0073bb7d Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Mon, 4 Dec 2023 14:50:44 -0500 Subject: [PATCH 06/16] previous commit accidentally grabbed changed for AMR that are not ready yet; JIRA: PGAPX-1226 --- wf_common.cwl | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) mode change 100755 => 100644 wf_common.cwl diff --git a/wf_common.cwl b/wf_common.cwl old mode 100755 new mode 100644 index fedba6b..598d5a7 --- a/wf_common.cwl +++ b/wf_common.cwl @@ -120,7 +120,6 @@ steps: - 23s_model_path - AntiFamLib - all_order_specific_blastdb_file - - amr_finder_plus_database - asn2pas_xsl - identification_db_dir - CDDdata2 @@ -562,18 +561,8 @@ steps: # # tasktype coded, input/output matches # # application not coded # ############################################### - # # AMR plane + # # AMR plane is for later stages skipping # ############################################### - AMR_naming: - run: amr_naming/wf_amr_naming.cwl - in: - annotation: bacterial_annot_4/out_annotation - # aka Bacterial_Annot_Filter/out_annotation - database: passdata/amr_finder_plus_database - passdata: passdata/taxon_db - taxid: taxid - out: [amr_report] - bacterial_orthology_conditional: run: bacterial_orthology/wf_bacterial_orthology_conditional.cwl in: @@ -1073,7 +1062,4 @@ outputs: checkm_results: type: File outputSource: checkm/checkm_results - amr_report: - type: File - outputSource: AMR_naming/amr_report From 3b8acd25a515b365b39d258fbb14a69cbb5a743a Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Tue, 19 Dec 2023 10:36:13 -0500 Subject: [PATCH 07/16] remove clade_assign; JIRA: PGAPX-1227 --- clade_assign/align_sort.cwl | 28 ---------- clade_assign/assign_clade_bacteria.cwl | 65 ---------------------- clade_assign/gpx_make_outputs.cwl | 28 ---------- clade_assign/gpx_qsubmit.cwl | 52 ----------------- clade_assign/tblastn_wnode.cwl | 49 ---------------- clade_assign/wf_assign_clade.cwl | 40 ------------- clade_assign/wf_clade_assign.cwl | 44 --------------- clade_assign/wf_find_marker_alignments.cwl | 44 --------------- 8 files changed, 350 deletions(-) delete mode 100644 clade_assign/align_sort.cwl delete mode 100644 clade_assign/assign_clade_bacteria.cwl delete mode 100644 clade_assign/gpx_make_outputs.cwl delete mode 100644 clade_assign/gpx_qsubmit.cwl delete mode 100644 clade_assign/tblastn_wnode.cwl delete mode 100644 clade_assign/wf_assign_clade.cwl delete mode 100644 clade_assign/wf_clade_assign.cwl delete mode 100755 clade_assign/wf_find_marker_alignments.cwl diff --git a/clade_assign/align_sort.cwl b/clade_assign/align_sort.cwl deleted file mode 100644 index f8edbff..0000000 --- a/clade_assign/align_sort.cwl +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.2 -class: CommandLineTool - -label: "Assign Clade, align_sort" - - -baseCommand: align_sort -arguments: [ -ifmt, seq-align-set, -k, "query,subject", -nogenbank ] - -inputs: - hits: - type: File - inputBinding: - prefix: -input - output: - type: string? - default: sorted-aligns.asn - inputBinding: - prefix: -o - - -outputs: - sorted_aligns: - type: File - outputBinding: - glob: $(inputs.output) diff --git a/clade_assign/assign_clade_bacteria.cwl b/clade_assign/assign_clade_bacteria.cwl deleted file mode 100644 index ee8eaea..0000000 --- a/clade_assign/assign_clade_bacteria.cwl +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.2 -class: CommandLineTool - -label: "Assign Clade, assign_clade_bacteria" - - -baseCommand: assign_clade_bacteria -arguments: [ -comp_based_stats, "F", -lower-threshold, "0.004", -matrix, BLOSUM80, -min-markers, "17", -release-id, "0", -seg, "22 2.2 2.5", -soft_masking, "true", -task, tblastn, -threshold, "18", -upper-threshold, "0.01", -word_size, "6", -nogenbank ] - -inputs: - conffile: - type: File? - default: - class: File - location: /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/etc/bact/ncbi.ini - inputBinding: - prefix: -conffile - assembly_id: - type: string - inputBinding: - prefix: -assembly-taxid - sorted_aligns: - type: File - inputBinding: - prefix: -hits - asn_cache: - type: Directory - inputBinding: - prefix: -asn-cache - valueFrom: $(inputs.asn_cache.basename),$(inputs.CladeMarkers_asn_cache.basename) - CladeMarkers_asn_cache: - type: Directory - ani: - type: File - inputBinding: - prefix: -ani -# clade_tree: -# type: File -# inputBinding: -# prefix: -clade-tree -# clade_tree_manifest: -# type: File? -# default: -# class: File -# location: ../input/dummy.mft -# inputBinding: -# prefix: -clade-tree-manifest - reference_set: - type: File - inputBinding: - prefix: -reference-set - output: - type: string? - default: clade_assignment.xml - inputBinding: - prefix: -o - -outputs: - clade_assignment: - type: File - outputBinding: - glob: $(inputs.output) - diff --git a/clade_assign/gpx_make_outputs.cwl b/clade_assign/gpx_make_outputs.cwl deleted file mode 100644 index f2599d7..0000000 --- a/clade_assign/gpx_make_outputs.cwl +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.2 -class: CommandLineTool - -label: "Find Marker Alignments, gather" - - -baseCommand: gpx_make_outputs -arguments: [ -unzip, '*', -num-partitions, "1" ] - -inputs: - input_path: - type: Directory - inputBinding: - prefix: -input-path - output_name: - type: string? - default: "blast.#.asn" - inputBinding: - prefix: -output - -outputs: - blast_align: - type: File - outputBinding: - #glob: $(inputs.output_name) - glob: blast.*.asn diff --git a/clade_assign/gpx_qsubmit.cwl b/clade_assign/gpx_qsubmit.cwl deleted file mode 100644 index 69bd13a..0000000 --- a/clade_assign/gpx_qsubmit.cwl +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.2 -class: CommandLineTool - -label: "Find Marker Alignments, scatter" - - -requirements: - - class: InitialWorkDirRequirement - listing: - - entry: $(inputs.asn_cache) - writable: False - - entry: $(inputs.CladeMarkers_asn_cache) - writable: False - - entry: $(inputs.blastdb_dir) - writable: False - -baseCommand: gpx_qsubmit -arguments: [ -affinity, "subject", -max-batch-length, "10000", -nogenbank ] - -inputs: - asn_cache: - type: Directory - inputBinding: - prefix: -asn-cache - valueFrom: $(inputs.asn_cache.basename),$(inputs.CladeMarkers_asn_cache.basename) - CladeMarkers_asn_cache: - type: Directory - seqids: - type: File - inputBinding: - prefix: -ids - blastdb_dir: - type: Directory - blastdb: - type: string? - default: blastdb - inputBinding: - prefix: -db - valueFrom: $(inputs.blastdb_dir.path)/$(inputs.blastdb) - output: - type: string? - default: jobs.xml - inputBinding: - prefix: -output - -outputs: - jobs: - type: File - outputBinding: - glob: $(inputs.output) diff --git a/clade_assign/tblastn_wnode.cwl b/clade_assign/tblastn_wnode.cwl deleted file mode 100644 index 2f40c70..0000000 --- a/clade_assign/tblastn_wnode.cwl +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.2 -class: CommandLineTool - -label: Find Marker Alignments, execute" - - -requirements: - - class: InitialWorkDirRequirement - listing: - - entry: $(inputs.asn_cache) - writable: False - - entry: $(inputs.CladeMarkers_asn_cache) - writable: False - - entry: $(inputs.blastdb_dir) - writable: False - -baseCommand: tblastn_wnode - -arguments: [ -backlog, "1", -comp_based_stats, "F", -db_gencode, "4", -delay, "0", -evalue, "0.001", -matrix, BLOSUM80, -max-jobs, "1", -seg, "22 2.2 2.5", -soft_masking, "true", -threshold, "18", -word_size, "6", -nogenbank ] - -inputs: - asn_cache: - type: Directory - inputBinding: - prefix: -asn-cache - valueFrom: $(inputs.asn_cache.basename),$(inputs.CladeMarkers_asn_cache.basename) - CladeMarkers_asn_cache: - type: Directory - input_jobs: - type: File? - default: - class: File - location: jobs.xml - inputBinding: - prefix: -input-jobs - output_dir: - type: string? - default: output - inputBinding: - prefix: -O - -outputs: - outdir: - type: Directory - outputBinding: - glob: $(inputs.output_dir) - diff --git a/clade_assign/wf_assign_clade.cwl b/clade_assign/wf_assign_clade.cwl deleted file mode 100644 index 19d6c5a..0000000 --- a/clade_assign/wf_assign_clade.cwl +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.2 -class: Workflow - -label: "Assign Clade" - -inputs: - asn_cache: Directory - CladeMarkers_asn_cache: Directory - assembly_id: string - hits: File - ani: File -# clade_tree: File - reference_set: File - -outputs: - clade_assignment: - type: File - outputSource: assign_clade_bacteria/clade_assignment - -steps: - align_sort: - run: align_sort.cwl - in: - hits: hits - out: [ sorted_aligns ] - - assign_clade_bacteria: - run: assign_clade_bacteria.cwl - in: - sorted_aligns: align_sort/sorted_aligns - assembly_id: assembly_id - asn_cache: asn_cache - CladeMarkers_asn_cache: CladeMarkers_asn_cache - ani: ani -# clade_tree: clade_tree - reference_set: reference_set - out: [ clade_assignment ] - diff --git a/clade_assign/wf_clade_assign.cwl b/clade_assign/wf_clade_assign.cwl deleted file mode 100644 index c1295e3..0000000 --- a/clade_assign/wf_clade_assign.cwl +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.2 -class: Workflow - -label: "Assign Clade plane complete workflow" - -requirements: - - class: SubworkflowFeatureRequirement - -inputs: - asn_cache: Directory - CladeMarkers_asn_cache: Directory - seqids: File - blastdb_dir: Directory - assembly_id: string - ani: File - reference_set: File - -outputs: - clade_assignment: - type: File - outputSource: wf_assign_clade/clade_assignment - -steps: - wf_assign_clade: - run: wf_assign_clade.cwl - in: - hits: wf_find_marker_alignments/blast_align - assembly_id: assembly_id - asn_cache: asn_cache - CladeMarkers_asn_cache: CladeMarkers_asn_cache - ani: ani - reference_set: reference_set - out: [ clade_assignment ] - - wf_find_marker_alignments: - run: wf_find_marker_alignments.cwl - in: - asn_cache: asn_cache - CladeMarkers_asn_cache: CladeMarkers_asn_cache - seqids: seqids - blastdb_dir: blastdb_dir - out: [blast_align] diff --git a/clade_assign/wf_find_marker_alignments.cwl b/clade_assign/wf_find_marker_alignments.cwl deleted file mode 100755 index 5cc801b..0000000 --- a/clade_assign/wf_find_marker_alignments.cwl +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.2 -class: Workflow - -label: "Find Marker Alignments" - -inputs: - asn_cache: Directory - CladeMarkers_asn_cache: Directory - seqids: File - blastdb_dir: Directory - -outputs: - blast_align: - type: File - outputSource: gpx_make_outputs/blast_align - -steps: - gpx_qsubmit: - run: gpx_qsubmit.cwl - in: - asn_cache: asn_cache - CladeMarkers_asn_cache: CladeMarkers_asn_cache - seqids: seqids - blastdb_dir: blastdb_dir - out: [jobs] - - tblastn_wnode: - run: tblastn_wnode.cwl - in: - asn_cache: asn_cache - CladeMarkers_asn_cache: CladeMarkers_asn_cache - input_jobs: gpx_qsubmit/jobs - blastdb_dir: blastdb_dir - out: [outdir] - - gpx_make_outputs: - run: gpx_make_outputs.cwl - in: - input_path: tblastn_wnode/outdir - out: [blast_align] - - From f5d66effb5c3d9871b61d21868ae87e2de088332 Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Wed, 24 Jan 2024 06:12:44 -0500 Subject: [PATCH 08/16] step renaming that does not involve file renaming; JIRA: PGAPX-1206 --- .../test_preserve_annot_markup/test.cwl | 10 ++--- .../unit_tests/test_univ_prot_stats/test.cwl | 4 +- protein_alignment/cat.cwl | 4 +- protein_alignment/wf_align_filter.cwl | 4 +- protein_alignment/wf_protein_alignment.cwl | 4 +- wf_common.cwl | 42 +++++++++---------- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/progs/unit_tests/test_preserve_annot_markup/test.cwl b/progs/unit_tests/test_preserve_annot_markup/test.cwl index d8ea225..b4ed8d1 100755 --- a/progs/unit_tests/test_preserve_annot_markup/test.cwl +++ b/progs/unit_tests/test_preserve_annot_markup/test.cwl @@ -44,7 +44,7 @@ steps: Final_Bacterial_Package_asn_cleanup: # TESTED as part of "last couple of nodes" test run: progs/asn_cleanup.cwl in: - # inp_annotation: bacterial_annot_4/out_annotation + # inp_annotation: bacterial_annot_2nd_pass/out_annotation # inp_annotation: bacterial_annot_4_out_annotation_bypass # , this bypass does not work: SQD-4522 # using oroginal input from official buildrun template (that is from fam_report output) inp_annotation: fam_report_bypass @@ -177,10 +177,10 @@ steps: in: annot_request_id: default: -1 # this is dummy annot_request_id - # hmm_search: bacterial_annot_3/Search_Naming_HMMs_hmm_hits # Search Naming HMMs bacterial_annot 3 - hmm_search: bacterial_annot_3_Search_Naming_HMMs_hmm_hits_bypass # for bacterial_annot_3/Search_Naming_HMMs_hmm_hits # Search Naming HMMs bacterial_annot 3 - # hmm_search_proteins: bacterial_annot_3/Run_GeneMark_Post_models # genemark models - hmm_search_proteins: bacterial_annot_3_Run_GeneMark_Post_models_bypass # for bacterial_annot_3/Run_GeneMark_Post_models # genemark models + # hmm_search: bacterial_annot_misc/Search_Naming_HMMs_hmm_hits # Search Naming HMMs bacterial_annot 3 + hmm_search: bacterial_annot_3_Search_Naming_HMMs_hmm_hits_bypass # for bacterial_annot_misc/Search_Naming_HMMs_hmm_hits # Search Naming HMMs bacterial_annot 3 + # hmm_search_proteins: bacterial_annot_misc/Run_GeneMark_Post_models # genemark models + hmm_search_proteins: bacterial_annot_3_Run_GeneMark_Post_models_bypass # for bacterial_annot_misc/Run_GeneMark_Post_models # genemark models input: Final_Bacterial_Package_final_bact_asn/outfull univ_prot_xml: univ_prot_xml # /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/home/badrazat/local-install/2018-05-17/third-party/data/BacterialPipeline/uniColl/ver-3.2/universal.xml val_res_den_xml: val_res_den_xml # /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/home/badrazat/local-install/2018-05-17/etc/validation-results.xml diff --git a/progs/unit_tests/test_univ_prot_stats/test.cwl b/progs/unit_tests/test_univ_prot_stats/test.cwl index c9c2154..e51e711 100755 --- a/progs/unit_tests/test_univ_prot_stats/test.cwl +++ b/progs/unit_tests/test_univ_prot_stats/test.cwl @@ -19,8 +19,8 @@ steps: in: annot_request_id: default: -1 # this is dummy annot_request_id - hmm_search: hmm_hits # bacterial_annot_3_Search_Naming_HMMs_hmm_hits_bypass # for bacterial_annot_3/Search_Naming_HMMs_hmm_hits # Search Naming HMMs bacterial_annot 3 - hmm_search_proteins: hmm_search_proteins # bacterial_annot_3_Run_GeneMark_Post_models_bypass # for bacterial_annot_3/Run_GeneMark_Post_models # genemark models + hmm_search: hmm_hits # bacterial_annot_3_Search_Naming_HMMs_hmm_hits_bypass # for bacterial_annot_misc/Search_Naming_HMMs_hmm_hits # Search Naming HMMs bacterial_annot 3 + hmm_search_proteins: hmm_search_proteins # bacterial_annot_3_Run_GeneMark_Post_models_bypass # for bacterial_annot_misc/Run_GeneMark_Post_models # genemark models input: outfull # Final_Bacterial_Package_final_bact_asn/outfull univ_prot_xml: univ_prot_xml # /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/home/badrazat/local-install/2018-05-17/third-party/data/BacterialPipeline/uniColl/ver-3.2/universal.xml val_res_den_xml: val_res_den_xml # /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/home/badrazat/local-install/2018-05-17/etc/validation-results.xml diff --git a/protein_alignment/cat.cwl b/protein_alignment/cat.cwl index 9587ef4..c8a4bca 100644 --- a/protein_alignment/cat.cwl +++ b/protein_alignment/cat.cwl @@ -9,11 +9,11 @@ baseCommand: cat stdout: out.asn inputs: - file_in_1: + file_in_1st: type: File inputBinding: position: 1 - file_in_2: + file_in_2nd: type: File inputBinding: position: 2 diff --git a/protein_alignment/wf_align_filter.cwl b/protein_alignment/wf_align_filter.cwl index 3acbe75..14753b9 100644 --- a/protein_alignment/wf_align_filter.cwl +++ b/protein_alignment/wf_align_filter.cwl @@ -23,8 +23,8 @@ steps: cat: run: cat.cwl in: - file_in_1: blast_full - file_in_2: prosplign + file_in_1st: blast_full + file_in_2nd: prosplign out: [ file_out ] align_filter: diff --git a/protein_alignment/wf_protein_alignment.cwl b/protein_alignment/wf_protein_alignment.cwl index 35dc9f9..6cb6922 100755 --- a/protein_alignment/wf_protein_alignment.cwl +++ b/protein_alignment/wf_protein_alignment.cwl @@ -63,8 +63,8 @@ steps: cat: run: cat.cwl in: - file_in_1: Seed_Protein_Alignments/blast_align - file_in_2: Seed_Search_Compartments/blast_align + file_in_1st: Seed_Protein_Alignments/blast_align + file_in_2nd: Seed_Search_Compartments/blast_align out: [ file_out ] Sort_Seed_Hits: diff --git a/wf_common.cwl b/wf_common.cwl index 598d5a7..3c2d628 100644 --- a/wf_common.cwl +++ b/wf_common.cwl @@ -402,7 +402,7 @@ steps: thresholds: passdata/thresholds out: [lds2,seqids,proteins, aligns, annotation, out_hmm_params, outseqs, prot_ids, models1] - spurious_annot_1: # PLANE + spurious_annot_prelim: # PLANE run: spurious_annot/wf_spurious_annot_pass1.cwl in: Extract_ORF_Proteins_proteins: bacterial_annot/proteins @@ -413,14 +413,14 @@ steps: scatter_gather_nchunks: scatter_gather_nchunks out: [AntiFam_tainted_proteins_I___oseqids] - bacterial_annot_2: # PLANE + bacterial_annot_1st_pass: # PLANE run: bacterial_annot/wf_bacterial_annot_pass2.cwl in: lds2: bacterial_annot/lds2 proteins: bacterial_annot/proteins prot_ids_A: bacterial_annot/seqids prot_ids_B1: bacterial_annot/prot_ids - prot_ids_B2: spurious_annot_1/AntiFam_tainted_proteins_I___oseqids + prot_ids_B2: spurious_annot_prelim/AntiFam_tainted_proteins_I___oseqids identification_db_dir: passdata/identification_db_dir blastdb: Get_Proteins/selected_blastdb annotation: bacterial_annot/outseqs @@ -447,11 +447,11 @@ steps: taxid: taxid tax_sql_file: passdata/taxon_db gc_assembly: genomic_source/gencoll_asn - compartments: bacterial_annot_2/aligns + compartments: bacterial_annot_1st_pass/aligns all_prots: Get_Proteins/all_prots out: [align, align_non_match] - bacterial_annot_3: + bacterial_annot_misc: run: bacterial_annot/wf_bacterial_annot_pass3.cwl in: AntiFamLib: passdata/AntiFamLib @@ -484,35 +484,35 @@ steps: - id: Name_by_WPs_names - id: PGAP_plus_ab_initio_annotation - spurious_annot_2: + spurious_annot_final: run: spurious_annot/wf_spurious_annot_pass2.cwl in: - Extract_Model_Proteins_proteins: bacterial_annot_3/Extract_Model_Proteins_proteins - Extract_Model_Proteins_seqids: bacterial_annot_3/Extract_Model_Proteins_seqids - Extract_Model_Proteins_lds2: bacterial_annot_3/Extract_Model_Proteins_lds2 + Extract_Model_Proteins_proteins: bacterial_annot_misc/Extract_Model_Proteins_proteins + Extract_Model_Proteins_seqids: bacterial_annot_misc/Extract_Model_Proteins_seqids + Extract_Model_Proteins_lds2: bacterial_annot_misc/Extract_Model_Proteins_lds2 AntiFamLib: passdata/AntiFamLib sequence_cache: genomic_source/asncache scatter_gather_nchunks: scatter_gather_nchunks - input_models: bacterial_annot_3/PGAP_plus_ab_initio_annotation + input_models: bacterial_annot_misc/PGAP_plus_ab_initio_annotation out: - AntiFam_tainted_proteins___oseqids - Good_AntiFam_filtered_annotations_out - Good_AntiFam_filtered_proteins_output - bacterial_annot_4: + bacterial_annot_2nd_pass: run: bacterial_annot/wf_bacterial_annot_pass4.cwl in: - lds2: bacterial_annot_3/Extract_Model_Proteins_lds2 - proteins: bacterial_annot_3/Extract_Model_Proteins_proteins - annotation: spurious_annot_2/Good_AntiFam_filtered_annotations_out - Good_AntiFam_filtered_proteins_gilist: spurious_annot_2/Good_AntiFam_filtered_proteins_output + lds2: bacterial_annot_misc/Extract_Model_Proteins_lds2 + proteins: bacterial_annot_misc/Extract_Model_Proteins_proteins + annotation: spurious_annot_final/Good_AntiFam_filtered_annotations_out + Good_AntiFam_filtered_proteins_gilist: spurious_annot_final/Good_AntiFam_filtered_proteins_output sequence_cache: genomic_source/asncache uniColl_cache: passdata/uniColl_cache identification_db_dir: passdata/identification_db_dir naming_sqlite: passdata/naming_sqlite - hmm_assignments: bacterial_annot_3/Assign_Naming_HMM_to_Proteins_assignments - wp_assignments: bacterial_annot_3/Name_by_WPs_names - Extract_Model_Proteins_prot_ids: bacterial_annot_3/Extract_Model_Proteins_seqids + hmm_assignments: bacterial_annot_misc/Assign_Naming_HMM_to_Proteins_assignments + wp_assignments: bacterial_annot_misc/Name_by_WPs_names + Extract_Model_Proteins_prot_ids: bacterial_annot_misc/Extract_Model_Proteins_seqids CDDdata: passdata/CDDdata CDDdata2: passdata/CDDdata2 thresholds: passdata/thresholds @@ -584,7 +584,7 @@ steps: Add_Locus_Tags: run: progs/add_locus_tags.cwl in: - input: bacterial_annot_4/out_annotation + input: bacterial_annot_2nd_pass/out_annotation locus_tag_prefix: locus_tag_prefix dbname: dbname out: [output] @@ -866,8 +866,8 @@ steps: in: annot_request_id: default: -1 # this is dummy annot_request_id - hmm_search: bacterial_annot_3/Search_Naming_HMMs_hmm_hits - hmm_search_proteins: bacterial_annot_3/PGAP_plus_ab_initio_annotation + hmm_search: bacterial_annot_misc/Search_Naming_HMMs_hmm_hits + hmm_search_proteins: bacterial_annot_misc/PGAP_plus_ab_initio_annotation input: Final_Bacterial_Package_final_bact_asn/outfull univ_prot_xml: passdata/univ_prot_xml val_res_den_xml: passdata/val_res_den_xml From 85e9bef0fe674bd68ddd883fdec17d1f9735bb85 Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Thu, 25 Jan 2024 11:21:38 -0500 Subject: [PATCH 09/16] rename files and eponymous steps and references; JIRA: PGAPX-1206 --- .../{gpx_qsubmit_1.cwl => gpx_qsubmit_seqids.cwl} | 0 .../{tblastn_wnode_1.cwl => tblastn_wnode_db.cwl} | 0 protein_alignment/wf_protein_alignment.cwl | 2 +- protein_alignment/{wf_seed_1.cwl => wf_seed_seqids.cwl} | 4 ++-- 4 files changed, 3 insertions(+), 3 deletions(-) rename protein_alignment/{gpx_qsubmit_1.cwl => gpx_qsubmit_seqids.cwl} (100%) rename protein_alignment/{tblastn_wnode_1.cwl => tblastn_wnode_db.cwl} (100%) rename protein_alignment/{wf_seed_1.cwl => wf_seed_seqids.cwl} (92%) diff --git a/protein_alignment/gpx_qsubmit_1.cwl b/protein_alignment/gpx_qsubmit_seqids.cwl similarity index 100% rename from protein_alignment/gpx_qsubmit_1.cwl rename to protein_alignment/gpx_qsubmit_seqids.cwl diff --git a/protein_alignment/tblastn_wnode_1.cwl b/protein_alignment/tblastn_wnode_db.cwl similarity index 100% rename from protein_alignment/tblastn_wnode_1.cwl rename to protein_alignment/tblastn_wnode_db.cwl diff --git a/protein_alignment/wf_protein_alignment.cwl b/protein_alignment/wf_protein_alignment.cwl index 6cb6922..506fd00 100755 --- a/protein_alignment/wf_protein_alignment.cwl +++ b/protein_alignment/wf_protein_alignment.cwl @@ -51,7 +51,7 @@ steps: out: [ blast_align ] Seed_Protein_Alignments: - run: wf_seed_1.cwl + run: wf_seed_seqids.cwl in: db_gencode: Compute_Gencode_int/value asn_cache: asn_cache diff --git a/protein_alignment/wf_seed_1.cwl b/protein_alignment/wf_seed_seqids.cwl similarity index 92% rename from protein_alignment/wf_seed_1.cwl rename to protein_alignment/wf_seed_seqids.cwl index 1655ac4..9e257d9 100644 --- a/protein_alignment/wf_seed_1.cwl +++ b/protein_alignment/wf_seed_seqids.cwl @@ -19,7 +19,7 @@ outputs: steps: gpx_qsubmit: - run: gpx_qsubmit_1.cwl + run: gpx_qsubmit_seqids.cwl in: asn_cache: asn_cache uniColl_asn_cache: uniColl_asn_cache @@ -28,7 +28,7 @@ steps: out: [jobs] tblastn_wnode: - run: tblastn_wnode_1.cwl + run: tblastn_wnode_db.cwl in: db_gencode: db_gencode asn_cache: asn_cache From 11bf2dc3db609d00941571dbc56f6da0653dd53b Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Mon, 29 Jan 2024 10:49:05 -0500 Subject: [PATCH 10/16] (1) hook up pgap.cwl with 4 diagnostic exception queries that we had in wf_common.cwl workflow (2) add modified default exception XML queries to input.yaml file generation for the case when user defaulted to command line specs for metadata. Modification includes addition of exempting GENERIC_BadSubmissionAuthorName; JIRA: PGAPX-1246 --- pgap.cwl | 9 +++++++++ scripts/pgap.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/pgap.cwl b/pgap.cwl index 82bc3fb..4272564 100755 --- a/pgap.cwl +++ b/pgap.cwl @@ -36,6 +36,15 @@ inputs: default: true uuid_in: type: File? + xpath_fail_initial_asndisc: + type: string? + xpath_fail_initial_asnvalidate: + type: string? + xpath_fail_final_asndisc: + type: string? + xpath_fail_final_asnvalidate: + type: string? + outputs: calls: outputSource: vecscreen/calls diff --git a/scripts/pgap.py b/scripts/pgap.py index 662fdd7..0590910 100755 --- a/scripts/pgap.py +++ b/scripts/pgap.py @@ -183,7 +183,11 @@ def __init__(self, params, local_input, pipeline): self.submol = self.create_submolfile(submol, params.ani_output, params.ani_hr_output, params.args.auto_correct_tax) else: self.submol = None - self.yaml = self.create_inputfile(local_input) + add_std_validation_exemptions = False + args = self.params.args + if not args.input and args.genome and args.organism: + add_std_validation_exemptions = True + self.yaml = self.create_inputfile(local_input, add_std_validation_exemptions) if self.params.docker_type in ['singularity', 'apptainer']: self.make_singularity_cmd() elif self.params.docker_type == 'podman': @@ -368,7 +372,7 @@ def create_submolfile(self, local_submol, ani_output, ani_hr_output, auto_correc return yaml - def create_inputfile(self, local_input): + def create_inputfile(self, local_input, add_std_validation_exemptions): with tempfile.NamedTemporaryFile(mode='w', suffix=".yaml", prefix="pgap_input_", @@ -398,6 +402,46 @@ def create_inputfile(self, local_input): if os.path.exists(uuidfile) and os.stat(uuidfile).st_size != 0: fOut.write(u'make_uuid: false\n') fOut.write(u'uuid_in: { class: File, location: /pgap/output/uuid.txt }\n') + if add_std_validation_exemptions: + fOut.write(f""" +xpath_fail_initial_asnvalidate: > + //*[ + ( @severity="ERROR" or @severity="REJECT" ) + and not(contains(@code, "GENERIC_MissingPubRequirement")) + and not(contains(@code, "GENERIC_BadSubmissionAuthorName")) + and not(contains(@code, "SEQ_DESCR_ChromosomeLocation")) + and not(contains(@code, "SEQ_DESCR_MissingLineage")) + and not(contains(@code, "SEQ_DESCR_NoTaxonID")) + and not(contains(@code, "SEQ_DESCR_OrganismIsUndefinedSpecies")) + and not(contains(@code, "SEQ_DESCR_StrainWithEnvironSample")) + and not(contains(@code, "SEQ_DESCR_BacteriaMissingSourceQualifier")) + and not(contains(@code, "SEQ_DESCR_UnwantedCompleteFlag")) + and not(contains(@code, "SEQ_FEAT_BadCharInAuthorLastName")) + and not(contains(@code, "SEQ_FEAT_ShortIntron")) + and not(contains(@code, "SEQ_INST_InternalNsInSeqRaw")) + and not(contains(@code, "SEQ_INST_ProteinsHaveGeneralID")) + and not(contains(@code, "SEQ_PKG_NucProtProblem")) + and not(contains(@code, "SEQ_PKG_ComponentMissingTitle")) + ] +xpath_fail_final_asnvalidate: > + //*[( @severity="ERROR" or @severity="REJECT" ) + and not(contains(@code, "GENERIC_MissingPubRequirement")) + and not(contains(@code, "GENERIC_BadSubmissionAuthorName")) + and not(contains(@code, "SEQ_DESCR_ChromosomeLocation")) + and not(contains(@code, "SEQ_DESCR_MissingLineage")) + and not(contains(@code, "SEQ_DESCR_NoTaxonID")) + and not(contains(@code, "SEQ_DESCR_OrganismIsUndefinedSpecies")) + and not(contains(@code, "SEQ_DESCR_StrainWithEnvironSample")) + and not(contains(@code, "SEQ_DESCR_BacteriaMissingSourceQualifier")) + and not(contains(@code, "SEQ_DESCR_UnwantedCompleteFlag")) + and not(contains(@code, "SEQ_FEAT_BadCharInAuthorLastName")) + and not(contains(@code, "SEQ_FEAT_ShortIntron")) + and not(contains(@code, "SEQ_INST_InternalNsInSeqRaw")) + and not(contains(@code, "SEQ_INST_ProteinsHaveGeneralID")) + and not(contains(@code, "SEQ_PKG_ComponentMissingTitle")) + and not(contains(@code, "SEQ_PKG_NucProtProblem")) + ] +""") fOut.flush() return yaml From c431db4e6a1860d6b55be2dc86aae02c77efe089 Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Mon, 29 Jan 2024 11:56:38 -0500 Subject: [PATCH 11/16] added a comment in Python script to explain that now we are committed to last name 'Lastname' and, similarly,to first name setting as default: they are coded in asn_validate tool; JIRA: PGAPX-1246 --- scripts/pgap.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/pgap.py b/scripts/pgap.py index 0590910..6a5b323 100755 --- a/scripts/pgap.py +++ b/scripts/pgap.py @@ -354,6 +354,10 @@ def create_submolfile(self, local_submol, ani_output, ani_hr_output, auto_correc if has_authors == False: fOut.write(u'authors:\n') fOut.write(u' - author:\n') + # + # note: do not change these defaults, they are coded now + # in standard diagnostics asnvalidate tool, that's how GenBank detects that users did not provide correct names + # fOut.write(u" first_name: 'Firstname'\n") fOut.write(u" last_name: 'Lastname'\n") if has_contact_info == False: From cfd5aa88a03ec84835db9903fabe32da384df66e Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Mon, 29 Jan 2024 12:37:53 -0500 Subject: [PATCH 12/16] pass the 4 validation exemption strings from pgap input to wf_common input; JIRA: PGAPX-1246 --- pgap.cwl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pgap.cwl b/pgap.cwl index 4272564..963194c 100755 --- a/pgap.cwl +++ b/pgap.cwl @@ -194,6 +194,10 @@ steps: make_uuid: make_uuid uuid_in: uuid_in blast_hits_cache_data: blast_hits_cache_data + xpath_fail_initial_asndisc: xpath_fail_initial_asndisc + xpath_fail_initial_asnvalidate: xpath_fail_initial_asnvalidate + xpath_fail_final_asndisc: xpath_fail_final_asndisc + xpath_fail_final_asnvalidate: xpath_fail_final_asnvalidate out: [gbent, gbk, gff, nucleotide_fasta, protein_fasta, cds_nucleotide_fasta, cds_protein_fasta, sqn, initial_asndisc_error_diag, initial_asnval_error_diag, final_asndisc_error_diag, final_asnval_error_diag, checkm_raw, checkm_results] run: wf_common.cwl Generate_Annotation_Reports_gff_enhanced: From 304b59e5f9947bfa5d4ca45f154089898cd8d0b2 Mon Sep 17 00:00:00 2001 From: Azat Badretdin <38532187+azat-badretdin@users.noreply.github.com> Date: Mon, 29 Jan 2024 15:27:59 -0500 Subject: [PATCH 13/16] args.input is created already by this time, do not check for it. JIRA: PGAPX-1246 --- scripts/pgap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pgap.py b/scripts/pgap.py index 6a5b323..4493d97 100755 --- a/scripts/pgap.py +++ b/scripts/pgap.py @@ -185,7 +185,7 @@ def __init__(self, params, local_input, pipeline): self.submol = None add_std_validation_exemptions = False args = self.params.args - if not args.input and args.genome and args.organism: + if args.genome and args.organism: add_std_validation_exemptions = True self.yaml = self.create_inputfile(local_input, add_std_validation_exemptions) if self.params.docker_type in ['singularity', 'apptainer']: From 83d7e319d694bc2caa02a604b637a15aae944a0c Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Sun, 31 Mar 2024 07:21:40 -0400 Subject: [PATCH 14/16] do not use blast_hits_cache in orthology graph; JIRA: PGAPX-1283 --- wf_common.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/wf_common.cwl b/wf_common.cwl index 3c2d628..da3472b 100644 --- a/wf_common.cwl +++ b/wf_common.cwl @@ -574,7 +574,6 @@ steps: asn_cache: source: [passdata/uniColl_nuc_cache, genomic_source/asncache] linkMerge: merge_flattened - blast_hits_cache: blast_hits_cache_data_split_dir/blast_hits_cache genus_list: genus_list_file2ints/values blastdb: default: [blastdb] From 5d73f9d29542235613311ce7d9432bbbaa10d0d5 Mon Sep 17 00:00:00 2001 From: George Coulouris Date: Mon, 15 Apr 2024 10:10:21 -0400 Subject: [PATCH 15/16] PGAPX-1268 force platform to linux/amd64 for apple silicon --- scripts/pgap.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/scripts/pgap.py b/scripts/pgap.py index 4493d97..e82db79 100755 --- a/scripts/pgap.py +++ b/scripts/pgap.py @@ -219,6 +219,8 @@ def __init__(self, params, local_input, pipeline): def make_docker_cmd(self): self.cmd = [self.params.docker_cmd, 'run', '-i', '--rm' ] + self.cmd.extend(['--platform', 'linux/amd64']) + if self.params.docker_user_remap: self.cmd.extend(['--user', str(os.getuid()) + ":" + str(os.getgid())]) self.cmd.extend([ @@ -531,11 +533,13 @@ def launch(self): for line in fIn: f.write(line) f.write("--- End YAML Input ---\n\n") - # Show runtime parameters in the log - f.write("--- Start Runtime Report ---\n") - self.record_runtime(f) - f.write("\n--- End Runtime Report ---\n\n") - f.flush() + + if platform.system() != "Darwin": + # Show runtime parameters in the log + f.write("--- Start Runtime Report ---\n") + self.record_runtime(f) + f.write("\n--- End Runtime Report ---\n\n") + try: proc = subprocess.Popen(self.cmd, stdout=f, stderr=subprocess.STDOUT) proc.wait() From a1851f7b930a08bb100e81329b24d0aaa7a644e8 Mon Sep 17 00:00:00 2001 From: "Badretdin, Azat" Date: Tue, 16 Apr 2024 18:40:35 -0400 Subject: [PATCH 16/16] switch to ncbi_crisper 1.03; JIRA: PGAPX-1293 --- bacterial_mobile_elem/ncbi_crisper_wnode.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bacterial_mobile_elem/ncbi_crisper_wnode.cwl b/bacterial_mobile_elem/ncbi_crisper_wnode.cwl index d8e53f1..c0b4a96 100644 --- a/bacterial_mobile_elem/ncbi_crisper_wnode.cwl +++ b/bacterial_mobile_elem/ncbi_crisper_wnode.cwl @@ -16,7 +16,7 @@ inputs: prefix: -input-jobs crisper_path: type: string? - default: /opt/crispr/1.02/bin/ + default: /opt/crispr/1.03/bin/ inputBinding: prefix: -ncbi-crisper-path output_dir: