-
Notifications
You must be signed in to change notification settings - Fork 3
/
tasks.wdl
600 lines (532 loc) · 21.9 KB
/
tasks.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
version 1.0
import "./structs.wdl"
# * task compute_one_pop_cms2_components
task compute_one_pop_cms2_components {
meta {
description: "Compute one-pop CMS2 component scores assuming selection in a given pop"
}
input {
Array[File]+ hapsets
Pop sel_pop
ComponentComputationParams component_computation_params
}
File script = "./compute_cms2_components.py"
File misc_utils = "./misc_utils.py" # !UnusedDeclaration
command <<<
set -ex -o pipefail
python3 "~{script}" --hapsets @~{write_lines(hapsets)} \
--sel-pop ~{sel_pop.pop_id} --components ihs nsl ihh12 delihh derFreq iSAFE \
--component-computation-params "~{write_json(component_computation_params)}" \
--checkpoint-file "checkpoint.tar"
>>>
output {
Array[File]+ replicaInfos = glob("*.replicaInfo.json")
Array[File]+ ihs = glob("*.ihs.out")
Array[File]+ nsl = glob("*.nsl.out")
Array[File]+ ihh12 = glob("*.ihh12.out")
Array[File]+ delihh = glob("*.delihh.out")
Array[File]+ derFreq = glob("*.derFreq.tsv")
Array[File]+ iSAFE = glob("*.iSAFE.out")
Array[File]+ hapset_vcf = glob("*.vcf.gz") # for debugging
Array[File]+ hapset_sample_case_txt = glob("*.case.txt") # for debugging
Array[File]+ hapset_sample_cont_txt = glob("*.cont.txt") # for debugging
Pop sel_pop_used = sel_pop
Boolean sanity_check = ((length(replicaInfos) == length(hapsets)) &&
(length(ihs) == length(hapsets)) &&
(length(nsl) == length(hapsets)) &&
(length(ihh12) == length(hapsets)) &&
(length(delihh) == length(hapsets)) &&
(length(derFreq) == length(hapsets)) &&
(length(iSAFE) == length(hapsets)))
Array[Int]+ sanity_check_assert = if sanity_check then [1] else []
}
runtime {
#docker: "quay.io/broad_cms_ci/cms@sha256:fc4825edda550ef203c917adb0b149cbcc82f0eeae34b516a02afaaab0eceac6" # selscan=1.3.0a09
docker: "quay.io/broad_cms_ci/cms:cms2-docker-component-stats-aced0918ac0afd34f7cbb3031e3b044ac7e686cc" # selscan=1.3.0a09
preemptible: 3
memory: "16 GB"
cpu: 1
disks: "local-disk 50 HDD"
checkpointFile: "checkpoint.tar" # !UnknownRuntimeKey
}
}
# * task compute_two_pop_cms2_components
task compute_two_pop_cms2_components {
meta {
description: "Compute cross-pop comparison CMS2 component scores"
}
# ** inputs
input {
Array[File]+ hapsets
Pop sel_pop
Pop alt_pop
}
File script = "./compute_cms2_components.py"
File misc_utils = "./misc_utils.py" # !UnusedDeclaration
# ** command
command <<<
set -ex -o pipefail
python3 "~{script}" --hapsets "@~{write_lines(hapsets)}" \
--sel-pop "~{sel_pop.pop_id}" --alt-pop "~{alt_pop.pop_id}" \
--components xpehh fst delDAF --checkpoint-file checkpoint.tar
>>>
# ** outputs
output {
Array[File]+ replicaInfos = glob("*.replicaInfo.json")
Array[File]+ xpehh = glob("*.xpehh.out")
#Array[File]+ xpehh_log = glob("*.xpehh.log")
Array[File]+ fst_and_delDAF = glob("*.fst_and_delDAF.tsv")
Pop sel_pop_used = sel_pop
Pop alt_pop_used = alt_pop
Boolean sanity_check = ((length(replicaInfos) == length(hapsets)) &&
(length(xpehh) == length(hapsets)) &&
#(length(xpehh_log) == length(hapsets)) &&
(length(fst_and_delDAF) == length(hapsets)))
Array[Int]+ sanity_check_assert = if sanity_check then [1] else []
}
# ** runtime
runtime {
# docker: "quay.io/broad_cms_ci/cms@sha256:fc4825edda550ef203c917adb0b149cbcc82f0eeae34b516a02afaaab0eceac6" # selscan=1.3.0a09
docker: "quay.io/broad_cms_ci/cms:cms2-docker-component-stats-aced0918ac0afd34f7cbb3031e3b044ac7e686cc" # selscan=1.3.0a09
preemptible: 3
memory: "8 GB"
cpu: 8
disks: "local-disk 50 HDD"
checkpointFile: "checkpoint.tar" # !UnknownRuntimeKey
}
}
# * task compute_one_pop_bin_stats_for_normalization
task compute_one_pop_bin_stats_for_normalization {
meta {
description: "Compute the means and stds of component scores on neutral sims, for the purpose of normalization"
email: "ilya_shl@alum.mit.edu"
}
input {
String out_fnames_prefix
Pop sel_pop
Array[File]+ ihs_out
Array[File]+ delihh_out
Array[File]+ nsl_out
Array[File]+ ihh12_out
Int n_bins_ihs
Int n_bins_nsl
Int n_bins_delihh
}
Int n_bins_ihh12 = 1
command <<<
set -ex -o pipefail
norm --ihs --bins ~{n_bins_ihs} --files "@~{write_lines(ihs_out)}" --save-bins "~{out_fnames_prefix}.norm_bins_ihs.dat" \
--only-save-bins --log "~{out_fnames_prefix}.norm_bins_ihs.log"
norm --ihs --bins ~{n_bins_delihh} --files "@~{write_lines(delihh_out)}" --save-bins "~{out_fnames_prefix}.norm_bins_delihh.dat" \
--only-save-bins --log "~{out_fnames_prefix}.norm_bins_delihh.log"
norm --nsl --bins ~{n_bins_nsl} --files "@~{write_lines(nsl_out)}" --save-bins "~{out_fnames_prefix}.norm_bins_nsl.dat" \
--only-save-bins --log "~{out_fnames_prefix}.norm_bins_nsl.log"
norm --ihh12 --bins ~{n_bins_ihh12} --files "@~{write_lines(ihh12_out)}" --save-bins "~{out_fnames_prefix}.norm_bins_ihh12.dat" \
--only-save-bins --log "~{out_fnames_prefix}.norm_bins_ihh12.log"
>>>
output {
File norm_bins_ihs = out_fnames_prefix + ".norm_bins_ihs.dat"
File norm_bins_nsl = out_fnames_prefix + ".norm_bins_nsl.dat"
File norm_bins_ihh12 = out_fnames_prefix + ".norm_bins_ihh12.dat"
File norm_bins_delihh = out_fnames_prefix + ".norm_bins_delihh.dat"
File norm_bins_ihs_log = out_fnames_prefix + ".norm_bins_ihs.log"
File norm_bins_nsl_log = out_fnames_prefix + ".norm_bins_nsl.log"
File norm_bins_ihh12_log = out_fnames_prefix + ".norm_bins_ihh12.log"
File norm_bins_delihh_log = out_fnames_prefix + ".norm_bins_delihh.log"
Pop sel_pop_used = sel_pop
}
runtime {
docker: "quay.io/broad_cms_ci/cms:cms2-docker-component-stats-aced0918ac0afd34f7cbb3031e3b044ac7e686cc" # selscan=1.3.0a09
#docker: "quay.io/broad_cms_ci/cms@sha256:fc4825edda550ef203c917adb0b149cbcc82f0eeae34b516a02afaaab0eceac6" # selscan=1.3.0a09
preemptible: 2
memory: "8 GB"
cpu: 1
disks: "local-disk 50 HDD"
}
}
# * task compute_two_pop_bin_stats_for_normalization
task compute_two_pop_bin_stats_for_normalization {
meta {
description: "Compute the means and stds of component scores on neutral sims, for a pop pair, for the purpose of normalization"
}
input {
String out_fnames_prefix
Pop sel_pop
Pop alt_pop
Array[File]+ xpehh_out
}
Int n_bins_xpehh = 1
String norm_bins_xpehh_fname = "${out_fnames_prefix}__selpop_${sel_pop.pop_id}__altpop_${alt_pop.pop_id}.norm_bins_xpehh.dat"
String norm_bins_xpehh_log_fname = "${out_fnames_prefix}__selpop_${sel_pop.pop_id}__altpop_${alt_pop.pop_id}.norm_bins_xpehh.dat"
String norm_bins_flip_pops_xpehh_fname = "${out_fnames_prefix}__selpop_${alt_pop.pop_id}__altpop_${sel_pop.pop_id}.norm_bins_xpehh.dat"
String norm_bins_flip_pops_xpehh_log_fname =
"${out_fnames_prefix}__selpop_${alt_pop.pop_id}__altpop_${sel_pop.pop_id}.norm_bins_xpehh.log"
command <<<
set -ex -o pipefail
norm --xpehh --bins ~{n_bins_xpehh} --files "@~{write_lines(xpehh_out)}" --save-bins "~{norm_bins_xpehh_fname}" --only-save-bins \
--log "~{norm_bins_xpehh_log_fname}"
norm --xpehh --xpehh-flip-pops --bins ~{n_bins_xpehh} --files "@~{write_lines(xpehh_out)}" \
--save-bins "~{norm_bins_flip_pops_xpehh_fname}" \
--only-save-bins \
--log "~{norm_bins_flip_pops_xpehh_log_fname}"
>>>
output {
File norm_bins_xpehh = norm_bins_xpehh_fname
File norm_bins_xpehh_log = norm_bins_xpehh_log_fname
File norm_bins_flip_pops_xpehh = norm_bins_flip_pops_xpehh_fname
File norm_bins_flip_pops_xpehh_log = norm_bins_flip_pops_xpehh_log_fname
Pop sel_pop_used = sel_pop
Pop alt_pop_used = alt_pop
Pop flip_pops_sel_pop_used = alt_pop
Pop flip_pops_alt_pop_used = sel_pop
}
runtime {
#docker: "quay.io/broad_cms_ci/cms@sha256:fc4825edda550ef203c917adb0b149cbcc82f0eeae34b516a02afaaab0eceac6" # selscan=1.3.0a09
docker: "quay.io/broad_cms_ci/cms:cms2-docker-component-stats-aced0918ac0afd34f7cbb3031e3b044ac7e686cc" # selscan=1.3.0a09
preemptible: 2
memory: "8 GB"
cpu: 1
disks: "local-disk 50 HDD"
}
}
# * task normalize_and_collate_block
task normalize_and_collate_block {
meta {
description: "Normalize raw scores to neutral sims, and collate component scores into one table."
}
input {
NormalizeAndCollateBlockInput inp
}
File normalize_and_collate_script = "./norm_and_collate_block.py"
command <<<
set -ex -o pipefail
python3 "~{normalize_and_collate_script}" --input-json "~{write_json(inp)}"
>>>
output {
Array[File]+ replica_info = glob("*.normed_and_collated.replicaInfo.json")
Array[File]+ normed_collated_stats = glob("*.normed_and_collated.tsv")
Pop sel_pop_used = inp.sel_pop
}
runtime {
docker: "quay.io/broad_cms_ci/cms:cms2-docker-component-stats-aced0918ac0afd34f7cbb3031e3b044ac7e686cc" # selscan=1.3.0a09
#docker: "quay.io/broad_cms_ci/cms@sha256:fc4825edda550ef203c917adb0b149cbcc82f0eeae34b516a02afaaab0eceac6" # selscan=1.3.0a09
memory: "1 GB"
cpu: 1
disks: "local-disk 10 HDD"
preemptible: 1
}
}
struct collate_stats_and_metadata_for_all_sel_sims_input {
String out_fnames_prefix
Array[File]+ sel_normed_and_collated
Array[File]+ replica_infos
}
task collate_stats_and_metadata_for_sel_sims_block {
meta {
description: "Collate component stats and metadata for a block of selection sims"
}
input {
collate_stats_and_metadata_for_all_sel_sims_input inp
}
File collate_stats_and_metadata_for_sel_sims_block_script = "./collate_stats_and_metadata_for_sel_sims_block.py"
Int max_hapset_id_len = 256
String hapsets_component_stats_h5_fname = inp.out_fnames_prefix + ".all_component_stats.h5"
String hapsets_metadata_tsv_gz_fname = inp.out_fnames_prefix + ".hapsets_metadata.tsv.gz"
command <<<
set -ex -o pipefail
python3 "~{collate_stats_and_metadata_for_sel_sims_block_script}" --input-json "~{write_json(inp)}" \
--max-hapset-id-len ~{max_hapset_id_len} --hapsets-component-stats-h5-fname "~{hapsets_component_stats_h5_fname}" \
--hapsets-metadata-tsv-gz-fname "~{hapsets_metadata_tsv_gz_fname}"
>>>
output {
File hapsets_component_stats_h5 = hapsets_component_stats_h5_fname
File hapsets_metadata_tsv_gz = hapsets_metadata_tsv_gz_fname
}
runtime {
docker: "quay.io/broad_cms_ci/cms:cms2-docker-component-stats-aced0918ac0afd34f7cbb3031e3b044ac7e686cc"
memory: "4 GB"
cpu: 1
disks: "local-disk 25 HDD"
preemptible: 1
}
}
# * task create_tar_gz
task create_tar_gz {
meta {
description: "Combine files into a tar file"
email: "ilya_shl@alum.mit.edu"
}
input {
Array[File]+ files
String out_basename = "out"
}
String out_fname_tar_gz = out_basename + ".tar.gz"
command <<<
set -ex -o pipefail
tar cvfz ~{out_fname_tar_gz} ~{sep=" " files}
>>>
output {
File out_tar_gz = out_fname_tar_gz
}
runtime {
docker: "quay.io/broad_cms_ci/cms:wget-1544c1d7a6fbb36a7f0cfebf7aa332a6e52e767d"
memory: "500 MB"
cpu: 1
disks: "local-disk 1 HDD"
}
}
task construct_pops_info_for_1KG {
meta {
description: "Constructs a PopsInfo struct for 1KG populations"
}
parameter_meta {
# ** inputs
superpop_to_representative_pop_json: "(File) map from superpop to the pop used to represent it in model-fitting simulations"
empirical_regions_bed: "(File) empirical regions to fetch. Column 5 (score), if present, is interpreted as the name of the putatively selected population. The same region may be listed multiple times to test for selection in multiple populations."
# ** outputs
pops_info: "(PopsInfo) a PopsInfo struct giving info for 1KG pops"
}
input {
File superpop_to_representative_pop_json = "gs://fc-21baddbc-5142-4983-a26e-7d85a72c830b/resources/superpop-to-representative-pop.json"
File empirical_regions_bed
}
File construct_pops_info_for_1KG_script = "./construct_pops_info_for_1KG.py"
String pops_info_fname = "pops_info.1KG.json"
command <<<
set -ex -o pipefail
mkdir "${PWD}/hapsets"
python3 "~{construct_pops_info_for_1KG_script}" --superpop-to-representative-pop-json "~{superpop_to_representative_pop_json}" \
--empirical-regions-bed "~{empirical_regions_bed}" \
--out-pops-info-json "~{pops_info_fname}"
>>>
output {
PopsInfo pops_info = read_json("${pops_info_fname}")["pops_info"] # !UnverifiedStruct
}
runtime {
docker: "quay.io/broad_cms_ci/cms@sha256:c8727e20ba0bc058c5c5596c4fad1ee23bc20c59f4f337ed62edb10e3a646010" # selscan=1.3.0a09 with tabix
memory: "16 GB"
cpu: 1
disks: "local-disk 256 HDD"
preemptible: 1
}
}
task fetch_empirical_hapsets_from_1KG {
meta {
description: "Fetches empirical hapsets for specified regions from 1KG, converts to hapset format"
}
parameter_meta {
# ** inputs
pops_info: "(PopsInfo) information about 1KG pops, including the outgroups to compare with each pop"
empirical_regions_bed: "(File) empirical regions to fetch. Column 5 (score), if present, is interpreted as the name of the putatively selected population. The same region may be listed multiple times to test for selection in multiple populations."
sel_pop_id: "(String?) if not given, assume empirical_regions_bed are neutral, else take only regions with selection in this pop"
# add: metadata to attach to all regions
genetic_maps_tar_gz: "(File) genetic maps"
superpop_to_representative_pop_json: "(File) map from superpop to the pop used to represent it in model-fitting simulations"
# ** outputs
empirical_hapsets: "(Array[File]) for each empirical region, a .tar.gz file containing one tped for each pop, and a *.replicaInfo.json file describing the hapset"
}
input {
PopsInfo pops_info
String? sel_pop_id
File empirical_regions_bed
String out_fnames_prefix
File genetic_maps_tar_gz = "gs://fc-21baddbc-5142-4983-a26e-7d85a72c830b/genetic_maps/hg19_maps.tar.gz"
File superpop_to_representative_pop_json = "gs://fc-21baddbc-5142-4983-a26e-7d85a72c830b/resources/superpop-to-representative-pop.json"
}
File fetch_empirical_hapsets_script = "./fetch_empirical_hapsets.py"
command <<<
set -ex -o pipefail
mkdir "${PWD}/hapsets"
python3 "~{fetch_empirical_hapsets_script}" --empirical-regions-bed "~{empirical_regions_bed}" \
--genetic-maps-tar-gz "~{genetic_maps_tar_gz}" --superpop-to-representative-pop-json "~{superpop_to_representative_pop_json}" \
--out-fnames-prefix "~{out_fnames_prefix}" \
~{"--sel-pop " + sel_pop_id} \
--tmp-dir "${PWD}/hapsets"
df -h
>>>
output {
Array[File]+ empirical_hapsets = glob("hapsets/*.hapset.tar.gz")
}
runtime {
docker: "quay.io/broad_cms_ci/cms@sha256:c8727e20ba0bc058c5c5596c4fad1ee23bc20c59f4f337ed62edb10e3a646010" # selscan=1.3.0a09 with tabix
memory: "16 GB"
cpu: 1
disks: "local-disk 256 HDD"
preemptible: 1
}
}
# task construct_empirical_neutral_regions_list_gazave14 {
# meta {
# description: "Construct a .bed file representing neutral empirical regions based on Gazave 2014 paper."
# }
# }
task call_neutral_region_explorer {
meta {
description: "Calls Neutral Region Explorer webserver"
}
parameter_meta {
# ** inputs
# ** outputs
}
input {
NeutralRegionExplorerParams nre_params
String out_fnames_prefix = "nre"
}
File fetch_neutral_regions_nre_script = "./fetch_neutral_regions_nre.py"
String neutral_regions_tsv_fname = out_fnames_prefix + ".neutral_regions.tsv"
String neutral_regions_bed_fname = out_fnames_prefix + ".neutral_regions.bed"
String nre_submitted_form_html_fname = out_fnames_prefix + ".submitted_form.html"
String nre_results_html_fname = out_fnames_prefix + ".nre_results.html"
String nre_results_url_fname = out_fnames_prefix + ".nre_results.url.txt"
command <<<
set -ex -o pipefail
python3 "~{fetch_neutral_regions_nre_script}" --nre-params "~{write_json(nre_params)}" \
--neutral-regions-tsv "~{neutral_regions_tsv_fname}" \
--neutral-regions-bed "~{neutral_regions_bed_fname}" \
--nre-submitted-form-html "~{nre_submitted_form_html_fname}" \
--nre-results-html "~{nre_results_html_fname}" \
--nre-results-url "~{nre_results_url_fname}"
>>>
output {
File neutral_regions_tsv = neutral_regions_tsv_fname
File neutral_regions_bed = neutral_regions_bed_fname
File nre_submitted_form_html = nre_submitted_form_html_fname
File nre_results_html = nre_results_html_fname
String nre_results_url = read_string(nre_results_url_fname)
}
runtime {
docker: "quay.io/broad_cms_ci/cms:webdriver-6dba18313775d137217c8a45c4bdd53d6b4e4441"
memory: "4 GB"
cpu: 1
disks: "local-disk 32 HDD"
preemptible: 1
}
}
task fetch_file_from_url {
meta {
description: "Downloads a file from a given URL. Using an output of this ask as inputs, instead of using URLs directly, can improve reproducibility in case the contents of a URL changes or the URL becomes inaccessible, since call caching would save a copy of the file."
}
parameter_meta {
# ** inputs
url: "(String) the URL from which to download a file"
out_fname: "(String?) name of output file (defaults to basename(url))"
sha256: "(String?) if given, fail unless the sha256 checksum of the downloaded file matches this value"
file_metadata: "(Map[String,String]) arbitrary metadata (such as description) to associate with this file"
wget_flags: "(String) wget options to use for downloading, such as timeout and number of retries"
# ** outputs
file: "(File) the downloaded file"
url_used: "(String) url from which the file was fetched"
file_lastmod: "(File) the last-modified time of the file on the server (human-readable)"
file_size: "(Int) size of the downloaded file in bytes"
out_file_metadata: "(Map[String,String]) any metadata specified as input, copied to the output"
wget_cmd_used: "(String) the full wget command used to download the file"
}
input {
String url
String? out_fname
String? sha256
Map[String,String] file_metadata = {}
String wget_flags = " -T 300 -t 20 -S "
}
String out_fname_here = select_first([out_fname, basename(url), "unknown_filename"])
String out_lastmod_fname = out_fname_here + ".lastmod.txt"
String wget_cmd_here = "wget -O " + out_fname_here + " " + wget_flags + " " + url
String sha256_here = select_first([sha256, ""])
command <<<
set -ex -o pipefail
~{wget_cmd_here}
if [ -n "~{sha256_here}" ]
then
echo "~{sha256_here} ~{out_fname_here}" > "~{out_fname_here}.sha256"
sha256sum -c "~{out_fname_here}.sha256"
fi
# save last-modified
stat -c '%Y' "~{out_fname_here}" > "~{out_lastmod_fname}"
>>>
output {
File file = out_fname_here
String url_used = url
String file_lastmod = read_string(out_lastmod_fname)
Int file_size = round(size(file))
Map[String,String] out_file_metadata = file_metadata
String wget_cmd_used = wget_cmd_here
}
runtime {
docker: "quay.io/broad_cms_ci/cms:wget-1544c1d7a6fbb36a7f0cfebf7aa332a6e52e767d"
memory: "4 GB"
cpu: 1
disks: "local-disk 32 HDD"
preemptible: 1
}
}
task compute_intervals_stats {
meta {
description: "Compute summary stats for a set of genomic intervals files"
}
parameter_meta {
# ** inputs
intervals_files: "(Array[File]+) genomic intervals files (.bed or .gff3)"
# ** outputs
intervals_report_html: "(File) An HTML report of stats about the intervals"
}
input {
Array[File]+ intervals_files
File? metadata_json
String intervals_report_html_fname = basename(intervals_files[0]) + ".stats.html"
}
File compute_intervals_stats_script = "./compute_intervals_stats.py"
command <<<
set -ex -o pipefail
python3 "~{compute_intervals_stats_script}" \
--intervals-files "@~{write_lines(intervals_files)}" \
~{'--metadata-json ' + metadata_json} \
--intervals-report-html "~{intervals_report_html_fname}"
>>>
output {
File intervals_report_html = intervals_report_html_fname
}
runtime {
docker: "quay.io/broad_cms_ci/cms:common-tools-2b4d477113c453dc9e957c002f6665be20fd56fd"
memory: "4 GB"
cpu: 1
disks: "local-disk 32 HDD"
preemptible: 1
}
}
task construct_neutral_regions_list {
meta {
description: "Constructs a list of likely-neutral genomic regions."
}
parameter_meta {
# ** inputs
# ** outputs
neutral_regions_bed: "(File) likely-neutral regions"
}
input {
EmpiricalNeutralRegionsParams empirical_neutral_regions_params
GenomicFeaturesForFindingEmpiricalNeutralRegions genomic_features_for_finding_empirical_neutral_regions
String neutral_regions_bed_fname = "neutral_regions.bed"
}
File construct_neutral_regions_list_script = "./construct_neutral_regions_list.py"
File empirical_neutral_regions_params_json = write_json(empirical_neutral_regions_params)
command <<<
set -ex -o pipefail
python3 "~{construct_neutral_regions_list_script}" \
--empirical-neutral-regions-params "~{empirical_neutral_regions_params_json}" \
--genomic-features-for-finding-empirical-neutral-regions "~{write_json(genomic_features_for_finding_empirical_neutral_regions)}" \
--neutral-regions-bed "~{neutral_regions_bed_fname}"
>>>
output {
File neutral_regions_bed = neutral_regions_bed_fname
Array[File] aux_beds = glob("*.bed")
EmpiricalNeutralRegionsParams empirical_neutral_regions_params_used = empirical_neutral_regions_params
File empirical_neutral_regions_params_used_json = empirical_neutral_regions_params_json
}
runtime {
docker: "quay.io/broad_cms_ci/cms:common-tools-2b4d477113c453dc9e957c002f6665be20fd56fd"
memory: "4 GB"
cpu: 1
disks: "local-disk 32 HDD"
preemptible: 1
}
}