forked from mmendeville/Tumor-only-mutation-calling-pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ConsensusCalling.sh
179 lines (118 loc) · 4.83 KB
/
ConsensusCalling.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/bin/bash
###############################################
# Date: 29 mei 2018
# input: dedupped, coordinate-sorted bam files (recalibrated also..?)
#
# DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # ????
# Set permissions for MPS-###/runfolder/
###############################################
# GLOBAL SETTINGS
# Provide directory with bam files. select bam files, based on extension (string: recal.bam / coordsorted.bam)
# number of threads to use: THREADS=
# mutation target bed file: TARGET_REGIONS_BED='/net/nfs/PAT/home/matias/data/manifests/BCNHL_Seq_v2/BCNHLv2_allExons.bed'
# minimal Variant Allele Frequency: minVAF=
# Mutation Callers to use: VarScan2, LoFreq, VarDict
# Combining VCFs yes/no. what overlap?
###############################################
# GENERAL SETTINGS PER STEP
# Variant Callers general settings
# similar output - names/fields
# SORT VCF
# Annotation general settings:
# Variant Discrimination general settings:
# use minVAF
# remove variants outside of target regions
###############################################
###############################################
# DEPENDENCIES & PREDEFINED VARIABLES
###############################################
# modules & variables
#java="/ccagc/lib/java/jre1.8.0_25/bin/java"
module load java
module load bwa
module load samtools
#fastqc="/net/nfs/PAT/lib/FastQC/0.11.2/fastqc"
manifest="/net/nfs/PAT/home/matias/data/manifests/BCNHL_Seq_v2/"
target_bed='/net/nfs/PAT/home/matias/data/manifests/BCNHL_Seq_v2/BCNHLv2_allExons.bed'
#ref="/ccagc/data/ref/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/genome.fa"
ref="/net/nfs/PAT/data/ref/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/genome.fa"
#dbsnp="/ccagc/data/ref/gatk_bundle/2.8/hg19/dbsnp_138.hg19.excluding_sites_after_129.vcf"
#dbsnp="/net/nfs/PAT/data/ref/gatk_bundle/2.8/hg19/dbsnp_138.hg19.excluding_sites_after_129.vcf"
############################################
# OPTIONAL STEP:
# Select aligned reads from bam file that overlap with capture target regions
# for quicker processing (for somatic mutation analysis only)
# downside: SNPs outside of target regions (introns) are not called/evaluated
# if not used, still after each calling tool, only mutations in regions of interest are kept.
#if [ ! -e log/targetRegionFilter ]
#then
# ./code/targetRegionFilter.sh && touch log/targetRegionFilter
#else
#echo "log/targetRegionFilter already exists: skipping targetRegionFilter"
#fi
############################################
#if [ ! -e log/LoFreq_bam_indelQ ]
#then
#./code/LoFreq_bam_indelQ.sh && touch log/LoFreq_bam_indelQ
#else
#echo "log/LoFreq_bam_indelQ already exists: skipping LoFreq_bam_indelQ"
#fi
############################################
# LoFreq: Variant Calling
#if [ ! -e log/LoFreq_VariantCalling ]
#then
# ./code/LoFreq_VariantCalling.sh && touch log/LoFreq_VariantCalling
#else
# echo "log/LoFreq_variantCalling already exists: skipping LoFreq_variantCalling"
#fi
############################################
# LoFreq: Blacklist filter
#if [ ! -e log/LoFreq_BlacklistFilter ]
#then
#./code/LoFreq_BlacklistFilter.sh && touch log/LoFreq_BlacklistFilter
#else
# echo "log/LoFreq_BlacklistFilter already exists: skipping LoFreq_BlacklistFilter"
#fi
############################################
# VarScan: Variant Calling
#if [ ! -e log/VarScan_VariantCalling ]
#then
#./code/VarScan_VariantCalling.sh && touch log/VarScan_VariantCalling
#else
# echo "log/VarScan_VariantCalling already exists: skipping VarScan_VariantCalling"
#fi
############################################
# VarScan: Blacklist filter
#if [ ! -e log/VarScan_BlacklistFilter ]
#then
# ./code/VarScan_BlacklistFilter.sh && touch log/VarScan_BlacklistFilter
#else
# echo "log/VarScan_BlacklistFilter already exists: skipping VarScan_BlacklistFilter"
#fi
############################################
# Consencus calling: intersect variant calls
if [ ! -e log/Intersect_VariantCalls ]
then
./code/Intersect_VariantCalls.sh && touch log/Intersect_VariantCalls
else
echo "log/Intersect_VariantCalls already exists: skipping Intersect_VariantCalls"
fi
############################################
# Annotation
if [ ! -e log/Annotate_IntersectCalls ]
then
./code/Annotate_IntersectCalls.sh && touch log/Annotate_IntersectCalls
else
echo "log/Annotate_IntersectCalls already exists: skipping Annotate_IntersectCalls"
fi
############################################
# Separate SNPs and Somatic variants from VCF file:
if [ ! -e log/LoFreq_VariantDiscrimination ]
then
./code/LoFreq_VariantDiscrimination.sh && touch log/LoFreq_VariantDiscrimination
else
echo "log/LoFreq_VariantDiscrimination already exists: skipping LoFreq_VariantDiscrimination"
fi
############################################
# COMBINE VCF FILES FROM DIFFERENT CALLERS
############################################