-
Notifications
You must be signed in to change notification settings - Fork 0
/
new-celltag.smk
127 lines (105 loc) · 4.45 KB
/
new-celltag.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
configfile: "celltag-config.yaml"
rule all:
input:
".smk_markers/all.done",
rule start_over:
output:
touch(".smk_markers/start_over.marker"),
rule clone_workflow:
output:
out_dir=directory("src/celltag/BiddyetalWorkflow")
shell:
"git clone https://github.com/morris-lab/BiddyetalWorkflow {output.out_dir}"
rule unzip_10X_barcode_tsv:
input:
barcodes_10X=lambda wildcards: f"{config['raw_dir']}/count/{config['samples_orig_names'][wildcards.sample]}/outs/filtered_feature_bc_matrix/barcodes.tsv.gz",
output:
tsv=expand(
"{output_dir}/{sample}/10X_filtered_barcodes.tsv",
output_dir=config["interim_dir"],
allow_missing=True,
),
shell:
"gunzip {input.barcodes_10X} --keep --to-stdout > {output.tsv}"
# also installing the dependencies ‘GenomeInfoDbData’, ‘GenomeInfoDb’, ‘GenomicRanges’, ‘Biostrings’, ‘BiocGenerics’, ‘S4Vectors’, ‘IRanges’, ‘XVector’, ‘zlibbioc’, ‘BiocParallel’, ‘Rhtslib’
# mamba install bioconductor-genomeinfodb bioconductor-genomeinfodbdata bioconductor-genomicranges bioconductor-biostrings bioconductor-biocgenerics bioconductor-s4vectors bioconductor-iranges bioconductor-xvector bioconductor-zlibbioc bioconductor-biocparallel bioconductor-rhtslib
rule filter_celltags_and_prepare_collapsing:
input:
bam=lambda wildcards: f"{config['raw_dir']}/count/{config['samples_orig_names'][wildcards.sample]}/outs/gex_possorted_bam.bam",
barcodes_tsv=lambda wildcards: f"{config['interim_dir']}/{wildcards.sample}/10X_filtered_barcodes.tsv",
output:
bam_obj=expand(
"{output_dir}/{sample}/celltag/bam_obj_pre_collapsing.rds",
output_dir=config["interim_dir"],
allow_missing=True,
),
collapsing_file=expand(
"{output_dir}/{sample}/celltag/collapsing/for-collapsing.txt",
output_dir=config["interim_dir"],
allow_missing=True,
),
script:
"src/celltag/1.1_create_celltag_object.R"
rule collapse:
input:
collapsing_file=lambda wildcards:
f"{config['interim_dir']}/{wildcards.sample}/celltag/collapsing/for-collapsing.txt"
output:
collapsing_result=expand(
"{output_dir}/{sample}/celltag/collapsing/collapsing_result.txt",
output_dir=config["interim_dir"],
allow_missing=True,
),
conda:
"envs/starcode.yaml"
shell:
"starcode -s --print-clusters {input.collapsing_file} > {output.collapsing_result}"
rule collapsed_matrix_and_filtering:
input:
bam_obj=lambda wildcards:
f"{config['interim_dir']}/{wildcards.sample}/celltag/bam_obj_pre_collapsing.rds",
collapsing_result=lambda wildcards:
f"{config['interim_dir']}/{wildcards.sample}/celltag/collapsing/collapsing_result.txt",
# whitelist=f"{config['raw_dir']}/barcodes_reverse_complementary.csv",
whitelist=f"src/celltag/BiddyetalWorkflow/whitelist/V3.CellTag.Whitelist.csv"
output:
bam_obj=expand(
"{output_dir}/{sample}/celltag/bam_obj_post_collapsing.rds",
output_dir=config["interim_dir"],
allow_missing=True,
),
metric_plots_pre=expand(
"{output_dir}/{sample}/celltag/metric_plots_pre_filtering.svg",
output_dir=config["output_dir"],
allow_missing=True,
),
metric_plots_post_whitelist=expand(
"{output_dir}/{sample}/celltag/metric_plots_post_whitelist.svg",
output_dir=config["output_dir"],
allow_missing=True,
),
metric_plots_post_metric_filtering=expand(
"{output_dir}/{sample}/celltag/metric_plots_post_metric_filtering.svg",
output_dir=config["output_dir"],
allow_missing=True,
),
script:
"src/celltag/1.2_.R"
rule clone_calling:
input:
bam_obj=lambda wildcards:
f"{config['interim_dir']}/{wildcards.sample}/celltag/bam_obj_post_collapsing.rds",
output:
bam_obj=expand(
"{output_dir}/{sample}/celltag/bam_obj.rds",
output_dir=config["output_dir"],
allow_missing=True,
),
clones_csv=expand(
"{output_dir}/{sample}/celltag/clones.csv",
output_dir=config["output_dir"],
allow_missing=True,
),
script:
"src/celltag/1.3_.R"