-
Notifications
You must be signed in to change notification settings - Fork 1
/
call_variants.sh
executable file
·83 lines (58 loc) · 2.48 KB
/
call_variants.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/bin/bash
#$ -cwd
#$ -r y
#$ -j y
#$ -l h_data=30G
#$ -l h_rt=24:00:00
#$ -l highp
#$ -t 1-11
########################### environment #####################################
. /u/home/c/christac/miniconda3/etc/profile.d/conda.sh
conda activate exome
ID_NUM=$((SGE_TASK_ID+40))
mkdir -p gatk_output
mkdir -p gatk_output/bams
reference="../hg38/broad/Homo_sapiens_assembly38.fasta"
hapmap="/u/home/c/christac/project-nzaitlen/hg38/broad/hapmap_3.3.hg38.vcf.gz"
omni="/u/home/c/christac/project-nzaitlen/hg38/broad/1000G_omni2.5.hg38.vcf.gz"
thousand_genomes="/u/home/c/christac/project-nzaitlen/hg38/broad/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
dbsnp="/u/home/c/christac/project-nzaitlen/hg38/broad/Homo_sapiens_assembly38.dbsnp138.vcf"
mills="/u/home/c/christac/project-nzaitlen/hg38/broad/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
indels="/u/home/c/christac/project-nzaitlen/hg38/broad/Homo_sapiens_assembly38.known_indels.vcf.gz"
primary_targets="SeqCapEZ_Exome_v3.0_Design_Annotation_files/primary_targets.interval_list"
# ########################### base recalibration #####################################
# recalibrates for indel identification
gatk BaseRecalibrator \
-R $reference \
-I "aligned/"$ID_NUM".readgroup.bam" \
-O "gatk_output/bams/"$ID_NUM"_recal.table" \
--known-sites $indels \
--known-sites $mills \
--known-sites $dbsnp
gatk ApplyBQSR \
-R $reference \
-I "aligned/"$ID_NUM".readgroup.bam" \
-bqsr "gatk_output/bams/"$ID_NUM"_recal.table" \
-O "gatk_output/bams/"$ID_NUM"_recal_reads.bam" \
gatk BaseRecalibrator \
-R $reference \
-I "gatk_output/bams/"$ID_NUM"_recal_reads.bam" \
-O "gatk_output/bams/"$ID_NUM"_post_recal.table" \
--known-sites $indels \
--known-sites $mills \
--known-sites $dbsnp
# plots that help to analyze success of base recalibration
gatk AnalyzeCovariates \
-before "gatk_output/bams/"$ID_NUM"_recal.table" \
-after "gatk_output/bams/"$ID_NUM"_post_recal.table" \
-plots "gatk_output/bams/"$ID_NUM"_recalibration_plots.pdf"
############################# variant calling #########################################
# calls sample snps and indels in gvcf form
gatk HaplotypeCaller \
-R $reference \
-D $dbsnp \
-I "gatk_output/bams/"$ID_NUM"_recal_reads.bam" \
-ERC GVCF \
-ip 100 \
-L $primary_targets \
-O "gatk_output/"$ID_NUM"_variants.gvcf"