From a78d1cc5855fe21e4118339c1b8ce5484aba6117 Mon Sep 17 00:00:00 2001
From: "Badretdin, Azat" <badrazat@ncbi.nlm.nih.gov>
Date: Thu, 31 Oct 2019 12:01:51 -0400
Subject: [PATCH 1/6] add splitting of gcextract2 sqlite; JIRA: PGAPX-584

---
 expr/supplemental_data_split_dir.cwl | 8 +++++++-
 wf_common.cwl                        | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/expr/supplemental_data_split_dir.cwl b/expr/supplemental_data_split_dir.cwl
index 12eab2e..8d4cc11 100644
--- a/expr/supplemental_data_split_dir.cwl
+++ b/expr/supplemental_data_split_dir.cwl
@@ -107,7 +107,11 @@ expression: |
           break;
         case 'TaxSynon.tsv':
           r['tax_synon'] = l[i];
-          break;      }
+          break;    
+        case 'GCExtract2.sqlite':
+          r['gcextract2_sqlite'] = l[i];
+          break;    
+		}
     }
     return r;
   }
@@ -133,6 +137,8 @@ outputs:
   defline_cleanup_rules:
     # defline_cleanup_rules # ${GP_HOME}/etc/product_rules.prt
     type: File
+  gcextract2_sqlite:
+	type: File
   gene_master_ini:
     type: File
   genemark_path:
diff --git a/wf_common.cwl b/wf_common.cwl
index ff78825..a488516 100755
--- a/wf_common.cwl
+++ b/wf_common.cwl
@@ -112,6 +112,7 @@ steps:
       - CDDdata2
       - CDDdata
       - defline_cleanup_rules
+	  - gcextract2_sqlite
       - gene_master_ini
       - genemark_path
       - hmm_path

From a627cfd1cabf095bc5cddcbb1aa257af13d6343b Mon Sep 17 00:00:00 2001
From: "Badretdin, Azat" <badrazat@ncbi.nlm.nih.gov>
Date: Thu, 31 Oct 2019 12:18:11 -0400
Subject: [PATCH 2/6] pass new resource GCExtract2 to the kmer plane; JIRA:
 PGAPX-584

---
 bacterial_kmer/wf_bacterial_kmer.cwl | 2 ++
 progs/ani_top_identification.cwl     | 4 ++++
 task_types/tt_ani_top_n.cwl          | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/bacterial_kmer/wf_bacterial_kmer.cwl b/bacterial_kmer/wf_bacterial_kmer.cwl
index a9f7dba..32a3de1 100644
--- a/bacterial_kmer/wf_bacterial_kmer.cwl
+++ b/bacterial_kmer/wf_bacterial_kmer.cwl
@@ -17,6 +17,7 @@ inputs:
     ANI_cutoff: File
     kmer_reference_assemblies: File
     tax_synon: File
+	gcextract2_sqlite: File
 outputs:
     Identify_Top_N_ANI_annot:
         type: File
@@ -223,4 +224,5 @@ steps:
         blast_align: Assembly_Assembly_BLASTn/blast_align
         ref_assembly_taxid: ref_assembly_taxid
         tax_synon: tax_synon
+		gcextract2_sqlite: gcextract2_sqlite
     out: [top,annot]
diff --git a/progs/ani_top_identification.cwl b/progs/ani_top_identification.cwl
index 3e5c704..ea2f1ae 100644
--- a/progs/ani_top_identification.cwl
+++ b/progs/ani_top_identification.cwl
@@ -45,6 +45,10 @@ inputs:
         type: File
         inputBinding:
             prefix: -tax-syn-table
+	  gcextract2_sqlite:
+	    type: File
+        inputBinding:
+            prefix: -gcextract2-sqlite
       o:
         type: string?
         default: ani-tax-report.xml
diff --git a/task_types/tt_ani_top_n.cwl b/task_types/tt_ani_top_n.cwl
index cab5cb9..b339553 100644
--- a/task_types/tt_ani_top_n.cwl
+++ b/task_types/tt_ani_top_n.cwl
@@ -9,6 +9,7 @@ inputs:
   asn_cache: Directory
   ref_assembly_taxid: int
   tax_synon: File
+  gcextract2_sqlite: File
 outputs:
   top:
     type: File
@@ -58,4 +59,5 @@ steps:
         default: 0
       ref_assembly_taxid: ref_assembly_taxid
       tax_synon: tax_synon
+	  gcextract2_sqlite: gcextract2_sqlite
     out: [annot, top]

From b9046809e78bc99dffde40f9aae8dde21651edec Mon Sep 17 00:00:00 2001
From: "Badretdin, Azat" <badrazat@ncbi.nlm.nih.gov>
Date: Fri, 1 Nov 2019 10:06:30 -0400
Subject: [PATCH 3/6] successful testing, CWL part is done; more testing needed
 to make sure actual output is correct; JIRA: PGAPX-242

---
 bacterial_kmer/wf_bacterial_kmer.cwl     | 22 +++++---
 expr/supplemental_data_split_dir.cwl     |  4 +-
 progs/ani_top_identification.cwl         | 10 ++--
 progs/assm_assm_blastn_create_jobs.cwl   | 31 ++++++++---
 progs/assm_assm_blastn_wnode.cwl         | 35 +++++++++----
 progs/gc_extract_ids.cwl                 | 21 ++++++++
 progs/gc_get_assembly.cwl                |  4 ++
 progs/gpx_qsubmit-xml.cwl                | 67 ++++++++++++++++++++++++
 task_types/tt_ani_top_n.cwl              | 27 ++--------
 task_types/tt_assm_assm_blastn_wnode.cwl |  5 +-
 task_types/tt_gcaccess_from_list.cwl     |  2 +
 11 files changed, 175 insertions(+), 53 deletions(-)
 create mode 100644 progs/gc_extract_ids.cwl
 create mode 100644 progs/gpx_qsubmit-xml.cwl

diff --git a/bacterial_kmer/wf_bacterial_kmer.cwl b/bacterial_kmer/wf_bacterial_kmer.cwl
index 32a3de1..80e72ae 100644
--- a/bacterial_kmer/wf_bacterial_kmer.cwl
+++ b/bacterial_kmer/wf_bacterial_kmer.cwl
@@ -17,7 +17,8 @@ inputs:
     ANI_cutoff: File
     kmer_reference_assemblies: File
     tax_synon: File
-	gcextract2_sqlite: File
+    taxon_db: File
+    gcextract2_sqlite: File
 outputs:
     Identify_Top_N_ANI_annot:
         type: File
@@ -167,13 +168,21 @@ steps:
     run: ../task_types/tt_gcaccess_from_list.cwl
     in:
       gc_id_list: Extract_Top_Assemblies/gc_id_list
+      gc_cache: gc_cache
     out: [gencoll_asn]
+  Extract_Input_GenColl_IDs:
+    label: Extract Input GenColl IDs
+    doc: Input is input ASN.1 file for our target assembly
+    run: ../progs/gc_extract_ids.cwl
+    in:
+      input: gencoll_asn
+    out: [output]
   Assembly_Assembly_BLASTn:
     label: Assembly Assembly BLASTn
     doc: This is rather standard blast
     run: ../task_types/tt_assm_assm_blastn_wnode.cwl
     in:
-      queries_gc_id_list: List_sqlite/keys
+      queries_gc_id_list: Extract_Input_GenColl_IDs/output
       subjects_gc_id_list: Extract_Top_Assemblies/gc_id_list
       # this will brea here
       ref_gencoll_asn: Get_Top_Assemblies_GenColl_ASN/gencoll_asn
@@ -185,7 +194,7 @@ steps:
       gc_seq_cache: gc_seq_cache
       gc_cache: gc_cache
       compart: 
-        default: "true"
+        default: true
       evalue: 
         default: 0.0001
       gapextend: 
@@ -201,11 +210,11 @@ steps:
       merge_engine: 
         default: "tree-merger"
       soft_masking:  
-        default: "true"
+        default: 'true'
       task:  
         default: megablast
       use_common_components:  
-        default: "true"
+        default: true
       window_size:  
         default: 150
       word_size:  
@@ -224,5 +233,6 @@ steps:
         blast_align: Assembly_Assembly_BLASTn/blast_align
         ref_assembly_taxid: ref_assembly_taxid
         tax_synon: tax_synon
-		gcextract2_sqlite: gcextract2_sqlite
+        gcextract2_sqlite: gcextract2_sqlite
+        taxon_db: taxon_db
     out: [top,annot]
diff --git a/expr/supplemental_data_split_dir.cwl b/expr/supplemental_data_split_dir.cwl
index 8d4cc11..183c00b 100644
--- a/expr/supplemental_data_split_dir.cwl
+++ b/expr/supplemental_data_split_dir.cwl
@@ -111,7 +111,7 @@ expression: |
         case 'GCExtract2.sqlite':
           r['gcextract2_sqlite'] = l[i];
           break;    
-		}
+        }
     }
     return r;
   }
@@ -138,7 +138,7 @@ outputs:
     # defline_cleanup_rules # ${GP_HOME}/etc/product_rules.prt
     type: File
   gcextract2_sqlite:
-	type: File
+    type: File
   gene_master_ini:
     type: File
   genemark_path:
diff --git a/progs/ani_top_identification.cwl b/progs/ani_top_identification.cwl
index ea2f1ae..be3834a 100644
--- a/progs/ani_top_identification.cwl
+++ b/progs/ani_top_identification.cwl
@@ -32,7 +32,7 @@ inputs:
       query_assembly: 
         type: File
         inputBinding:
-            prefix: -ANI_cutoff
+            prefix: -query-assembly
       ref_assembly_id: 
         type: int?
         inputBinding:
@@ -45,8 +45,12 @@ inputs:
         type: File
         inputBinding:
             prefix: -tax-syn-table
-	  gcextract2_sqlite:
-	    type: File
+      taxon_db:
+        type: File
+        inputBinding:
+            prefix: -taxon-db
+      gcextract2_sqlite:
+        type: File
         inputBinding:
             prefix: -gcextract2-sqlite
       o:
diff --git a/progs/assm_assm_blastn_create_jobs.cwl b/progs/assm_assm_blastn_create_jobs.cwl
index 23148aa..9422f6c 100644
--- a/progs/assm_assm_blastn_create_jobs.cwl
+++ b/progs/assm_assm_blastn_create_jobs.cwl
@@ -1,20 +1,39 @@
 #!/usr/bin/env cwl-runner
 label: "assm_assm_blastn_create_jobs"
 class: CommandLineTool
-baseCommand: submit_kmer_compare
+baseCommand: assm_assm_blastn_create_jobs
 cwlVersion: v1.0
-
+requirements:
+  - class: InlineJavascriptRequirement
+  - class: InitialWorkDirRequirement
+    listing:
+     - entryname: q.mft
+       entry: |- 
+        ${
+          var blob = '# q.mft created for assm_assm_blastn_create_jobs from input "queries_gc_id_list" File\n'; 
+          if(inputs.queries_gc_id_list != null) { blob += inputs.queries_gc_id_list.path + '\n'; } 
+          return blob; 
+        }
+     - entryname: t.mft
+       entry: |- 
+        ${
+          var blob = '# t.mft created for assm_assm_blastn_create_jobs from input "subjects_gc_id_list" File\n'; 
+          if(inputs.subjects_gc_id_list != null) { blob += inputs.subjects_gc_id_list.path + '\n'; } 
+          return blob; 
+        }
+arguments: [ -query-assemblies-manifest, q.mft, -target-assemblies-manifest, t.mft ]
+# ~/gpipe-debug-bin/assm_assm_blastn_create_jobs -affinity-bin 10 
+# -query-assemblies-manifest inp/query_ids.mft -target-assemblies-manifest inp/subject_ids.mft -output inp/jobs.xml
+#
 inputs:
     affinity_bin: 
         type: int?
+        inputBinding:
+          prefix: -affinity-bin
     queries_gc_id_list:
         type: File?
-        inputBinding:
-            prefix: -query-assemblies
     subjects_gc_id_list: 
         type: File?
-        inputBinding:
-            prefix: -target-assemblies
     output_xml_file_name:
         type: string?
         default: jobs.xml
diff --git a/progs/assm_assm_blastn_wnode.cwl b/progs/assm_assm_blastn_wnode.cwl
index aacd4ca..7bb814e 100644
--- a/progs/assm_assm_blastn_wnode.cwl
+++ b/progs/assm_assm_blastn_wnode.cwl
@@ -2,23 +2,36 @@ cwlVersion: v1.0
 label: "assm_assm_blastn_wnode"
 
 class: CommandLineTool
-#
-# You might need something like this:
-#
-# requirements:
-#  - class: InitialWorkDirRequirement
-#    listing:
-#      - entry: $(inputs.asn_cache)
-#        writable: True
-#      - entry: $(inputs.blastdb_dir)
-#        writable: False
+requirements:
+   - class: InlineJavascriptRequirement
+   - class: InitialWorkDirRequirement
+     listing:
+
+       - entryname: queries-and-targets.mft
+         entry: |- 
+          ${
+            var blob = '# queries-and-targets.mft created for assm_assm_blastn_wnode from input "target_set" File\n'; 
+            if(inputs.target_set != null) { 
+              for(var i=0; i<inputs.target_set.length; i++) {
+                blob += inputs.target_set[i].path + '\n'; 
+              }
+            } 
+            return blob; 
+          }
 
 baseCommand: assm_assm_blastn_wnode
 inputs:
   input_jobs:
     type: File?
     inputBinding:
-        prefix: -input-jobs 
+        prefix: -input-jobs
+  target_set:
+    type: File[]
+  target_set_manifest:
+    type: string?
+    default: queries-and-targets.mft
+    inputBinding:
+      prefix: -target-set-manifest
   asn_cache:
     type: Directory[]
     inputBinding:
diff --git a/progs/gc_extract_ids.cwl b/progs/gc_extract_ids.cwl
new file mode 100644
index 0000000..bb06216
--- /dev/null
+++ b/progs/gc_extract_ids.cwl
@@ -0,0 +1,21 @@
+#!/usr/bin/env cwl-runner
+label: "gc_extract_ids"
+class: CommandLineTool
+baseCommand: gc_extract_ids
+cwlVersion: v1.0
+inputs:
+  input:
+    type: File
+    inputBinding:
+      prefix: -input
+  output_name:
+    type: string?
+    default: gencoll.ids
+    inputBinding:
+      prefix: -output
+outputs:
+  output:
+    type: File  
+    outputBinding:
+      glob: $(inputs.output_name)
+      
diff --git a/progs/gc_get_assembly.cwl b/progs/gc_get_assembly.cwl
index 6ceb03d..97b1d80 100644
--- a/progs/gc_get_assembly.cwl
+++ b/progs/gc_get_assembly.cwl
@@ -17,6 +17,10 @@ inputs:
         type: File?
         inputBinding:
           prefix: -release_id_list
+    gc_cache:
+        type: File
+        inputBinding:
+          prefix:  -gc-cache
 outputs: 
     gencoll_asn:
         type: File
diff --git a/progs/gpx_qsubmit-xml.cwl b/progs/gpx_qsubmit-xml.cwl
new file mode 100644
index 0000000..1c1c898
--- /dev/null
+++ b/progs/gpx_qsubmit-xml.cwl
@@ -0,0 +1,67 @@
+cwlVersion: v1.0 
+label: "gpx_qsubmit"
+doc: >
+    This workflow is specialized for the case when there is an XML input 
+
+class: CommandLineTool
+requirements:
+  - class: InlineJavascriptRequirement
+  - class: InitialWorkDirRequirement
+    listing:
+      - entry:  ${ var cs=0; var s=inputs.asn_cache.length-1; var as = cs; if(as >= s) {as = s }; return inputs.asn_cache[as]; }
+        writable: False
+      - entry:  ${ var cs=1; var s=inputs.asn_cache.length-1; var as = cs; if(as >= s) {as = s }; return inputs.asn_cache[as]; }
+        writable: False
+
+baseCommand: gpx_qsubmit
+inputs:
+  affinity:
+    type: string?
+    default: subject
+    inputBinding:
+      prefix: -affinity
+  asn_cache:
+    type: Directory[]?
+    inputBinding:
+      prefix: -asn-cache
+      itemSeparator: ","
+  batch_size:
+    type: int?
+    inputBinding:
+      prefix: -batch-size
+  max_batch_length:
+    type: int?
+    inputBinding:
+      prefix: -max-batch-length
+  nogenbank:
+    type: boolean?
+    inputBinding:
+      # prefix: -nogenbank # commenting this as a hail mary
+  NxM_threshold:
+    type: int?
+    inputBinding:
+      prefix: -NxM-threshold
+  overlap:
+    type: int?
+    inputBinding:
+      prefix: -overlap
+  subseq_size:
+    type: int?
+    inputBinding:
+      prefix: -subseq-size
+  xml_jobs:
+    type: File?
+    inputBinding:
+      prefix: -xml-jobs
+  output_xml_jobs:
+    type: string
+    default: jobs.xml
+    inputBinding:
+      prefix: -o
+  
+      
+outputs:
+  jobs:
+    type: File
+    outputBinding:
+      glob: $(inputs.output_xml_jobs)    
diff --git a/task_types/tt_ani_top_n.cwl b/task_types/tt_ani_top_n.cwl
index b339553..b6416f1 100644
--- a/task_types/tt_ani_top_n.cwl
+++ b/task_types/tt_ani_top_n.cwl
@@ -8,6 +8,7 @@ inputs:
   ANI_cutoff: File
   asn_cache: Directory
   ref_assembly_taxid: int
+  taxon_db: File
   tax_synon: File
   gcextract2_sqlite: File
 outputs:
@@ -17,29 +18,6 @@ outputs:
   annot:
     type: File
     outputSource: ani_top_identification/annot
-#/panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/system/2018-03-13.build2663/bin/ani_top_identification \
-#    -ANI_cutoff \
-#    /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/system/2018-03-13.build2663/third-party/data/BacterialPipeline/ANI_cutoff/ANI_cutoff.xml \
-#    -N \
-#    25 \
-#    -asn-cache \
-#    /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/data56/Mycoplasma_genitalium_G37/Mycoplasma_genitalium_External_PGAP.4585524/sequence_cache,/panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe_id_cache/full_id_cache \
-#    -input-manifest \
-#    /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/data56/Mycoplasma_genitalium_G37/Mycoplasma_genitalium_External_PGAP.4585524/4829637/ani_top_n.455674852/inp/assm_aligns.mft \
-#    -min-gap \
-#    10000 \
-#    -min-region \
-#    1000 \
-#    -o \
-#    /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/data56/Mycoplasma_genitalium_G37/Mycoplasma_genitalium_External_PGAP.4585524/4829637/ani_top_n.455674852/out/ani-tax-report.xml \
-#    -o-annot \
-#    /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/data56/Mycoplasma_genitalium_G37/Mycoplasma_genitalium_External_PGAP.4585524/4829637/ani_top_n.455674852/out/annot.asn \
-#    -query-assembly \
-#    /panfs/pan1.be-md.ncbi.nlm.nih.gov/gpipe/bacterial_pipeline/data56/Mycoplasma_genitalium_G37/Mycoplasma_genitalium_External_PGAP.4585524/4829637/gc_create_from_sequences.455674892/out/gencoll.asn \
-#    -ref-assembly-id \
-#    0 \
-#    -ref-assembly-taxid \
-#    243273
     
 steps:
   ani_top_identification:
@@ -59,5 +37,6 @@ steps:
         default: 0
       ref_assembly_taxid: ref_assembly_taxid
       tax_synon: tax_synon
-	  gcextract2_sqlite: gcextract2_sqlite
+      taxon_db: taxon_db
+      gcextract2_sqlite: gcextract2_sqlite
     out: [annot, top]
diff --git a/task_types/tt_assm_assm_blastn_wnode.cwl b/task_types/tt_assm_assm_blastn_wnode.cwl
index 929321b..5e097f1 100644
--- a/task_types/tt_assm_assm_blastn_wnode.cwl
+++ b/task_types/tt_assm_assm_blastn_wnode.cwl
@@ -43,7 +43,7 @@ steps:
         subjects_gc_id_list: subjects_gc_id_list
     out: [output]
   gpx_qsubmit:
-    run: ../progs/gpx_qsubmit.cwl
+    run: ../progs/gpx_qsubmit-xml.cwl
     in:
       affinity: affinity
       asn_cache: 
@@ -55,6 +55,9 @@ steps:
   assm_assm_blastn_wnode:
     run: ../progs/assm_assm_blastn_wnode.cwl
     in:
+      target_set:
+        source: [gencoll_asn, ref_gencoll_asn]
+        linkMerge: merge_flattened
       asn_cache: 
         source: [asn_cache, gc_seq_cache]
         linkMerge: merge_flattened
diff --git a/task_types/tt_gcaccess_from_list.cwl b/task_types/tt_gcaccess_from_list.cwl
index 2df55e2..012fd55 100644
--- a/task_types/tt_gcaccess_from_list.cwl
+++ b/task_types/tt_gcaccess_from_list.cwl
@@ -4,6 +4,7 @@ label: "gcaccess_from_list"
 class: Workflow # task type
 inputs:
   gc_id_list: File
+  gc_cache: File
 outputs:
   gencoll_asn:
     type: File
@@ -15,6 +16,7 @@ steps:
       mode: 
         default: AllSequences
       release_id_list: gc_id_list
+      gc_cache: gc_cache
     out: [gencoll_asn]
 # this is for the future  we might need this in general case    
 #  gc_get_molecules:

From ecc4c4c6130a3d12edbb9ad65a3a96242c67c71e Mon Sep 17 00:00:00 2001
From: "Badretdin, Azat" <badrazat@ncbi.nlm.nih.gov>
Date: Fri, 1 Nov 2019 10:06:54 -0400
Subject: [PATCH 4/6] fixing tab; JIRA: PGAPX-242

---
 wf_common.cwl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wf_common.cwl b/wf_common.cwl
index a488516..d3cf4aa 100755
--- a/wf_common.cwl
+++ b/wf_common.cwl
@@ -112,7 +112,7 @@ steps:
       - CDDdata2
       - CDDdata
       - defline_cleanup_rules
-	  - gcextract2_sqlite
+      - gcextract2_sqlite
       - gene_master_ini
       - genemark_path
       - hmm_path

From 4d7f3f8e6ba8f4789d1fa5d1b25acda6c710e41c Mon Sep 17 00:00:00 2001
From: "Badretdin, Azat" <badrazat@ncbi.nlm.nih.gov>
Date: Tue, 5 Nov 2019 08:32:26 -0500
Subject: [PATCH 5/6] added prog_annot_stats as output of the workflow for
 Pathogen Detect; JIRA: PGAPX-593

---
 wf_common.cwl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/wf_common.cwl b/wf_common.cwl
index d3cf4aa..2f06c07 100755
--- a/wf_common.cwl
+++ b/wf_common.cwl
@@ -778,7 +778,7 @@ steps:
       val_res_den_xml:  passdata/val_res_den_xml
       it:
         default: true
-    out:
+    out:  
       - id: var_proc_annot_stats_xml
       - id: var_proc_annot_details_xml
   Validate_Annotation_xsltproc_asnvalidate:
@@ -925,5 +925,6 @@ outputs:
   sqn:
     type: File
     outputSource:  add_checksum_sqn/output
-    
-
+  proc_annot_stats: 
+    type: File
+    outputSource:  Validate_Annotation_proc_annot_stats/var_proc_annot_stats_xml

From f438561b673ae2f7198019f2f1bdcf115da5a482 Mon Sep 17 00:00:00 2001
From: "Badretdin, Azat" <badrazat@ncbi.nlm.nih.gov>
Date: Thu, 7 Nov 2019 12:22:00 -0500
Subject: [PATCH 6/6] pass ref_assembly_id from the top; JIRA: PGAPX-602

---
 bacterial_kmer/wf_bacterial_kmer.cwl | 2 ++
 task_types/tt_kmer_top_n_extract.cwl | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/bacterial_kmer/wf_bacterial_kmer.cwl b/bacterial_kmer/wf_bacterial_kmer.cwl
index 80e72ae..9a161e8 100644
--- a/bacterial_kmer/wf_bacterial_kmer.cwl
+++ b/bacterial_kmer/wf_bacterial_kmer.cwl
@@ -14,6 +14,7 @@ inputs:
     gc_cache: File
     kmer_cache_sqlite: File
     ref_assembly_taxid: int
+    ref_assembly_id: int
     ANI_cutoff: File
     kmer_reference_assemblies: File
     tax_synon: File
@@ -148,6 +149,7 @@ steps:
     in:
       top_distances: Identify_Top_N/top_distances
       ref_assembly_taxid: ref_assembly_taxid
+      ref_assembly_id: ref_assembly_id
     out: [tax_report, gc_id_list]
   Build_Kmer_Tree:
     label: Build Kmer Tree
diff --git a/task_types/tt_kmer_top_n_extract.cwl b/task_types/tt_kmer_top_n_extract.cwl
index bc5fb3e..b2a406a 100644
--- a/task_types/tt_kmer_top_n_extract.cwl
+++ b/task_types/tt_kmer_top_n_extract.cwl
@@ -5,6 +5,7 @@ class: Workflow # task type
 inputs:
   top_distances: File
   ref_assembly_taxid: int
+  ref_assembly_id: int
 outputs:
   tax_report:
     type: File
@@ -17,8 +18,7 @@ steps:
     run: ../progs/kmer_top_n_extract.cwl
     in:
       input: top_distances
-      ref_assembly_id: 
-        default: 0 # because input is FASTA, and original value is ${GP_gencoll_release}
+      ref_assembly_id:  ref_assembly_id
       ref_assembly_taxid: ref_assembly_taxid
       threshold: 
         default: 0.1