Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/test' into prod
Browse files Browse the repository at this point in the history
  • Loading branch information
azat-badretdin committed Nov 8, 2019
2 parents 33d7112 + 5342bee commit 84048d9
Show file tree
Hide file tree
Showing 13 changed files with 192 additions and 52 deletions.
22 changes: 18 additions & 4 deletions bacterial_kmer/wf_bacterial_kmer.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@ inputs:
gc_cache: File
kmer_cache_sqlite: File
ref_assembly_taxid: int
ref_assembly_id: int
ANI_cutoff: File
kmer_reference_assemblies: File
tax_synon: File
taxon_db: File
gcextract2_sqlite: File
outputs:
Identify_Top_N_ANI_annot:
type: File
Expand Down Expand Up @@ -146,6 +149,7 @@ steps:
in:
top_distances: Identify_Top_N/top_distances
ref_assembly_taxid: ref_assembly_taxid
ref_assembly_id: ref_assembly_id
out: [tax_report, gc_id_list]
Build_Kmer_Tree:
label: Build Kmer Tree
Expand All @@ -166,13 +170,21 @@ steps:
run: ../task_types/tt_gcaccess_from_list.cwl
in:
gc_id_list: Extract_Top_Assemblies/gc_id_list
gc_cache: gc_cache
out: [gencoll_asn]
Extract_Input_GenColl_IDs:
label: Extract Input GenColl IDs
doc: Input is input ASN.1 file for our target assembly
run: ../progs/gc_extract_ids.cwl
in:
input: gencoll_asn
out: [output]
Assembly_Assembly_BLASTn:
label: Assembly Assembly BLASTn
doc: This is rather standard blast
run: ../task_types/tt_assm_assm_blastn_wnode.cwl
in:
queries_gc_id_list: List_sqlite/keys
queries_gc_id_list: Extract_Input_GenColl_IDs/output
subjects_gc_id_list: Extract_Top_Assemblies/gc_id_list
# this will brea here
ref_gencoll_asn: Get_Top_Assemblies_GenColl_ASN/gencoll_asn
Expand All @@ -184,7 +196,7 @@ steps:
gc_seq_cache: gc_seq_cache
gc_cache: gc_cache
compart:
default: "true"
default: true
evalue:
default: 0.0001
gapextend:
Expand All @@ -200,11 +212,11 @@ steps:
merge_engine:
default: "tree-merger"
soft_masking:
default: "true"
default: 'true'
task:
default: megablast
use_common_components:
default: "true"
default: true
window_size:
default: 150
word_size:
Expand All @@ -223,4 +235,6 @@ steps:
blast_align: Assembly_Assembly_BLASTn/blast_align
ref_assembly_taxid: ref_assembly_taxid
tax_synon: tax_synon
gcextract2_sqlite: gcextract2_sqlite
taxon_db: taxon_db
out: [top,annot]
8 changes: 7 additions & 1 deletion expr/supplemental_data_split_dir.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,11 @@ expression: |
break;
case 'TaxSynon.tsv':
r['tax_synon'] = l[i];
break; }
break;
case 'GCExtract2.sqlite':
r['gcextract2_sqlite'] = l[i];
break;
}
}
return r;
}
Expand All @@ -133,6 +137,8 @@ outputs:
defline_cleanup_rules:
# defline_cleanup_rules # ${GP_HOME}/etc/product_rules.prt
type: File
gcextract2_sqlite:
type: File
gene_master_ini:
type: File
genemark_path:
Expand Down
10 changes: 9 additions & 1 deletion progs/ani_top_identification.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ inputs:
query_assembly:
type: File
inputBinding:
prefix: -ANI_cutoff
prefix: -query-assembly
ref_assembly_id:
type: int?
inputBinding:
Expand All @@ -45,6 +45,14 @@ inputs:
type: File
inputBinding:
prefix: -tax-syn-table
taxon_db:
type: File
inputBinding:
prefix: -taxon-db
gcextract2_sqlite:
type: File
inputBinding:
prefix: -gcextract2-sqlite
o:
type: string?
default: ani-tax-report.xml
Expand Down
31 changes: 25 additions & 6 deletions progs/assm_assm_blastn_create_jobs.cwl
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
#!/usr/bin/env cwl-runner
label: "assm_assm_blastn_create_jobs"
class: CommandLineTool
baseCommand: submit_kmer_compare
baseCommand: assm_assm_blastn_create_jobs
cwlVersion: v1.0

requirements:
- class: InlineJavascriptRequirement
- class: InitialWorkDirRequirement
listing:
- entryname: q.mft
entry: |-
${
var blob = '# q.mft created for assm_assm_blastn_create_jobs from input "queries_gc_id_list" File\n';
if(inputs.queries_gc_id_list != null) { blob += inputs.queries_gc_id_list.path + '\n'; }
return blob;
}
- entryname: t.mft
entry: |-
${
var blob = '# t.mft created for assm_assm_blastn_create_jobs from input "subjects_gc_id_list" File\n';
if(inputs.subjects_gc_id_list != null) { blob += inputs.subjects_gc_id_list.path + '\n'; }
return blob;
}
arguments: [ -query-assemblies-manifest, q.mft, -target-assemblies-manifest, t.mft ]
# ~/gpipe-debug-bin/assm_assm_blastn_create_jobs -affinity-bin 10
# -query-assemblies-manifest inp/query_ids.mft -target-assemblies-manifest inp/subject_ids.mft -output inp/jobs.xml
#
inputs:
affinity_bin:
type: int?
inputBinding:
prefix: -affinity-bin
queries_gc_id_list:
type: File?
inputBinding:
prefix: -query-assemblies
subjects_gc_id_list:
type: File?
inputBinding:
prefix: -target-assemblies
output_xml_file_name:
type: string?
default: jobs.xml
Expand Down
35 changes: 24 additions & 11 deletions progs/assm_assm_blastn_wnode.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,36 @@ cwlVersion: v1.0
label: "assm_assm_blastn_wnode"

class: CommandLineTool
#
# You might need something like this:
#
# requirements:
# - class: InitialWorkDirRequirement
# listing:
# - entry: $(inputs.asn_cache)
# writable: True
# - entry: $(inputs.blastdb_dir)
# writable: False
requirements:
- class: InlineJavascriptRequirement
- class: InitialWorkDirRequirement
listing:

- entryname: queries-and-targets.mft
entry: |-
${
var blob = '# queries-and-targets.mft created for assm_assm_blastn_wnode from input "target_set" File\n';
if(inputs.target_set != null) {
for(var i=0; i<inputs.target_set.length; i++) {
blob += inputs.target_set[i].path + '\n';
}
}
return blob;
}

baseCommand: assm_assm_blastn_wnode
inputs:
input_jobs:
type: File?
inputBinding:
prefix: -input-jobs
prefix: -input-jobs
target_set:
type: File[]
target_set_manifest:
type: string?
default: queries-and-targets.mft
inputBinding:
prefix: -target-set-manifest
asn_cache:
type: Directory[]
inputBinding:
Expand Down
21 changes: 21 additions & 0 deletions progs/gc_extract_ids.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env cwl-runner
label: "gc_extract_ids"
class: CommandLineTool
baseCommand: gc_extract_ids
cwlVersion: v1.0
inputs:
input:
type: File
inputBinding:
prefix: -input
output_name:
type: string?
default: gencoll.ids
inputBinding:
prefix: -output
outputs:
output:
type: File
outputBinding:
glob: $(inputs.output_name)

4 changes: 4 additions & 0 deletions progs/gc_get_assembly.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ inputs:
type: File?
inputBinding:
prefix: -release_id_list
gc_cache:
type: File
inputBinding:
prefix: -gc-cache
outputs:
gencoll_asn:
type: File
Expand Down
67 changes: 67 additions & 0 deletions progs/gpx_qsubmit-xml.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
cwlVersion: v1.0
label: "gpx_qsubmit"
doc: >
This workflow is specialized for the case when there is an XML input

class: CommandLineTool
requirements:
- class: InlineJavascriptRequirement
- class: InitialWorkDirRequirement
listing:
- entry: ${ var cs=0; var s=inputs.asn_cache.length-1; var as = cs; if(as >= s) {as = s }; return inputs.asn_cache[as]; }
writable: False
- entry: ${ var cs=1; var s=inputs.asn_cache.length-1; var as = cs; if(as >= s) {as = s }; return inputs.asn_cache[as]; }
writable: False

baseCommand: gpx_qsubmit
inputs:
affinity:
type: string?
default: subject
inputBinding:
prefix: -affinity
asn_cache:
type: Directory[]?
inputBinding:
prefix: -asn-cache
itemSeparator: ","
batch_size:
type: int?
inputBinding:
prefix: -batch-size
max_batch_length:
type: int?
inputBinding:
prefix: -max-batch-length
nogenbank:
type: boolean?
inputBinding:
# prefix: -nogenbank # commenting this as a hail mary
NxM_threshold:
type: int?
inputBinding:
prefix: -NxM-threshold
overlap:
type: int?
inputBinding:
prefix: -overlap
subseq_size:
type: int?
inputBinding:
prefix: -subseq-size
xml_jobs:
type: File?
inputBinding:
prefix: -xml-jobs
output_xml_jobs:
type: string
default: jobs.xml
inputBinding:
prefix: -o


outputs:
jobs:
type: File
outputBinding:
glob: $(inputs.output_xml_jobs)
27 changes: 4 additions & 23 deletions task_types/tt_ani_top_n.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,16 @@ inputs:
ANI_cutoff: File
asn_cache: Directory
ref_assembly_taxid: int
taxon_db: File
tax_synon: File
gcextract2_sqlite: File
outputs:
top:
type: File
outputSource: ani_top_identification/top
annot:
type: File
outputSource: ani_top_identification/annot
#/panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/system/2018-03-13.build2663/bin/ani_top_identification \
# -ANI_cutoff \
# /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/system/2018-03-13.build2663/third-party/data/BacterialPipeline/ANI_cutoff/ANI_cutoff.xml \
# -N \
# 25 \
# -asn-cache \
# /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/data56/Mycoplasma_genitalium_G37/Mycoplasma_genitalium_External_PGAP.4585524/sequence_cache,/panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe_id_cache/full_id_cache \
# -input-manifest \
# /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/data56/Mycoplasma_genitalium_G37/Mycoplasma_genitalium_External_PGAP.4585524/4829637/ani_top_n.455674852/inp/assm_aligns.mft \
# -min-gap \
# 10000 \
# -min-region \
# 1000 \
# -o \
# /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/data56/Mycoplasma_genitalium_G37/Mycoplasma_genitalium_External_PGAP.4585524/4829637/ani_top_n.455674852/out/ani-tax-report.xml \
# -o-annot \
# /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/data56/Mycoplasma_genitalium_G37/Mycoplasma_genitalium_External_PGAP.4585524/4829637/ani_top_n.455674852/out/annot.asn \
# -query-assembly \
# /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/data56/Mycoplasma_genitalium_G37/Mycoplasma_genitalium_External_PGAP.4585524/4829637/gc_create_from_sequences.455674892/out/gencoll.asn \
# -ref-assembly-id \
# 0 \
# -ref-assembly-taxid \
# 243273

steps:
ani_top_identification:
Expand All @@ -58,4 +37,6 @@ steps:
default: 0
ref_assembly_taxid: ref_assembly_taxid
tax_synon: tax_synon
taxon_db: taxon_db
gcextract2_sqlite: gcextract2_sqlite
out: [annot, top]
5 changes: 4 additions & 1 deletion task_types/tt_assm_assm_blastn_wnode.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ steps:
subjects_gc_id_list: subjects_gc_id_list
out: [output]
gpx_qsubmit:
run: ../progs/gpx_qsubmit.cwl
run: ../progs/gpx_qsubmit-xml.cwl
in:
affinity: affinity
asn_cache:
Expand All @@ -55,6 +55,9 @@ steps:
assm_assm_blastn_wnode:
run: ../progs/assm_assm_blastn_wnode.cwl
in:
target_set:
source: [gencoll_asn, ref_gencoll_asn]
linkMerge: merge_flattened
asn_cache:
source: [asn_cache, gc_seq_cache]
linkMerge: merge_flattened
Expand Down
2 changes: 2 additions & 0 deletions task_types/tt_gcaccess_from_list.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ label: "gcaccess_from_list"
class: Workflow # task type
inputs:
gc_id_list: File
gc_cache: File
outputs:
gencoll_asn:
type: File
Expand All @@ -15,6 +16,7 @@ steps:
mode:
default: AllSequences
release_id_list: gc_id_list
gc_cache: gc_cache
out: [gencoll_asn]
# this is for the future we might need this in general case
# gc_get_molecules:
Expand Down
Loading

0 comments on commit 84048d9

Please sign in to comment.