-
Notifications
You must be signed in to change notification settings - Fork 1
/
MASTERSCRIPT_MosaiC-All.sh
executable file
·165 lines (142 loc) · 7.15 KB
/
MASTERSCRIPT_MosaiC-All.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/bin/bash
# MasterScript: Variant calling step
# 1. Mutect2 and FilterMutect2: Parents,Probands and Siblings (if available)
# 2. MosaicHunter: Parents,Probands and Siblings (if available)
# 3. MosaicForecast on Mutect2 variant callset, followed by Filter
# UniAdelaide-HPC friendly
# Date: 9th June 2023
#
usage()
{
echo "#MasterScript: Variant calling steps, which includes
# 1. Mutect2: Parents and Probands and Siblings (if available)
# 2. MosaicHunter: Parents and Probands and Siblings (if available)
# 3. MosaicForecast on Mutect2 variant callset
#
#
# Usage $0 -s /path/to/sampleID.list -o /path/to/output_folder -c /path/to/config_file | [ - h | --help ]
#
# Options
#-s <file> REQUIRED: A file e.g. sampleID.list (one header row and then tab-delimited columns \$BAMdir,\$ProbandID,\$Gender,\$Mother,\$Father)
#-o <directory> REQUIRED: Output directory (all variant calls for all samples will output into a single directory)
#-c <file> REQUIRED: Configuration File (This file sets paths and defaults relevant to your system e.g. see: config/Mosaic-All.config)
#
# -h or --help Prints this message. Or if you got one of the options above wrong you'll be reading this too!
#
# Original: Nandini Sandran, 9/6/2023
# Modified: (Date; Name; Description)
# See: https://github.com/ngs1810/MosaiC-All for history and new updates.
#
"
}
## Set Variables ##
while [ "$1" != "" ]; do
case $1 in
-s ) shift
SAMPLELIST=$1
;;
-o ) shift
OUTDIR=$1
;;
-c ) shift
CONFIG_FILE=$1
;;
-h | --help ) usage
exit 0
;;
* ) usage
exit 1
esac
shift
done
## Define Directories, Variables and
# If the script lacks any requirements then fail immediately or create one
if [ -z "$SAMPLELIST" ]; then
usage
echo "## ERROR: You need to provide a sample list"
echo "#-s REQUIRED sampleID.list (one header row and then tab-delimited columns \$BAMdir,\$ProbandID,\$Gender,\$Mother,\$Father)"
exit 1
fi
if [ -z "$CONFIG_FILE" ]; then
usage
echo "## ERROR: You need to provide a config file. Check the config/Mosaic-All.config file for an example."
echo "#-c <file> REQUIRED: Configuration File (This file sets paths and defaults relevant to your system e.g. see: config/Mosaic-All.config)"
exit 1
fi
source $CONFIG_FILE
if [ ! -d "$LOGDIR" ]; then
mkdir -p $LOGDIR
echo "## INFO: Slurm log files will be placed in this location $LOGDIR"
fi
if [ ! -d "${OUTDIR}" ]; then
mkdir -p ${OUTDIR}
echo "## INFO: output directory created, you'll find all of the outputs and log files in here: ${OUTDIR}" >> $OUTDIR/Mosaic-All.pipeline.log
fi
#Array from a list of Samples (ignoring the header of the file)
mapfile -t SAMPLEID < <(tail -n +2 "$SAMPLELIST")
#modules
module purge
module load BCFtools/1.17-GCC-11.2.0
# Iteration for variant calling starts here
for SAMPLEID in "${SAMPLEID[@]}"; do
#Defining variables from each row
BAMDIR=$(awk '{print $1}' <<< "$SAMPLEID ")
ProbandID=$(awk '{print $2}' <<< "$SAMPLEID ")
Gender=$(awk '{print $3}' <<< "$SAMPLEID ")
MotherID=$(awk '{print $4}' <<< "$SAMPLEID ")
FatherID=$(awk '{print $5}' <<< "$SAMPLEID ")
echo "Pipeline for $ProbandID, $MotherID, $FatherID in $BAMDIR" >> $OUTDIR/$ProbandID.pipeline.log
#1.MosaicHunter
# Submit MHjob for Proband either in triomode or singlemode
# so, need to Check if both MotherID and FatherID are present
if [[ -n "$MotherID" && -n "$FatherID" ]]; then
sbatch $SCRIPTDIR/scripts/MosaicHunter_WES_Trio.sh -s $ProbandID -b $BAMDIR -d $OUTDIR -g $Gender -f $FatherID -m $MotherID -c $CONFIG_FILE
else
sbatch $SCRIPTDIR/scripts/MosaicHunter_WES_Singlemode.sh -s $ProbandID -b $BAMDIR -d $OUTDIR -g $Gender -c $CONFIG_FILE
fi
# Submit MHjob for Parents
# Check if either MotherID or FatherID is present
if [[ -n "$MotherID" ]]; then
sbatch "$SCRIPTDIR/scripts/MosaicHunter_WES_Singlemode.sh" -s "$MotherID" -b "$BAMDIR" -d "$OUTDIR" -g "F" -c "$CONFIG_FILE"
fi
if [[ -n "$FatherID" ]]; then
sbatch "$SCRIPTDIR/scripts/MosaicHunter_WES_Singlemode.sh" -s "$FatherID" -b "$BAMDIR" -d "$OUTDIR" -g "M" -c "$CONFIG_FILE"
fi
#2.Mutect2 and MosaicForecast
#Check if the PON contains the sample in the family
for SampleID in "$ProbandID" "$MotherID" "$FatherID"; do
# Store the result of the grep command in a variable
normalSample=$(bcftools query -l $PON_A | grep "$SampleID")
# Check if $SampleID is present in the result
if [ -n "$normalSample" ]; then
echo "## WARN: $SampleID is present in $PON_A. Checking for this sample in $PON_B." >> $OUTDIR/$SampleID.pipeline.log
normalSample_B=$(bcftools query -l $PON_B | grep "$SampleID")
#check sample in PON_B
if [ -z "$normalSample_B" ]; then
echo "## INFO: $SampleID is not present in $PON_B. So, let's do Mutect2 on this sample using $PON_B" >> $OUTDIR/$SampleID.pipeline.log
PON=$PON_B
else
echo "## WARN: $SampleID present in both Panel of Normal VCFs, you will need to provide another one and set this in $CONFIG_FILE. Mutect2 was not performed for this sample." >> $OUTDIR/$SampleID.pipeline.log
fi
else
# Submit the Mutect2 job using PON_A
echo "## INFO: $SampleID is not present in $PON_A. So, let's do Mutect2 on this sample using $PON_A" >> $OUTDIR/$SampleID.pipeline.log
PON=$PON_A
fi
# Run Mutect2 and MosaicForecast using the selected PON
Mutect2="sbatch $SCRIPTDIR/scripts/Mutect2.singlemode.sh -b $BAMDIR -s $SampleID -c $CONFIG_FILE -o $OUTDIR -p $PON"
Mutect2JobID=$($Mutect2 | awk '{print $NF}')
#execute FilterMutect2 which depends on Mutect2
sbatch --export=ALL --dependency=afterok:${Mutect2JobID} $SCRIPTDIR/scripts/Mutect2.FilterMutect2.sh -s $SampleID -v $OUTDIR -c $CONFIG_FILE
#execute MosaicForecast (step1) which depends on Mutect2
MF1="sbatch --export=ALL --dependency=afterok:${Mutect2JobID} $SCRIPTDIR/scripts/MF1_ProcessInput.sh -s $SampleID -b $BAMDIR -o $OUTDIR -c $CONFIG_FILE"
MF1_job_id=$($MF1 | awk '{print $NF}')
#execute MosaicForecast (step2) which depends on MosaicForecast (step1)
MF2="sbatch --export=ALL --dependency=afterok:${MF1_job_id} $SCRIPTDIR/scripts/MF2_Extractreadlevel-singularity.sh -b $BAMDIR -s $SampleID -c $CONFIG_FILE -o $OUTDIR"
MF2_job_id=$($MF2 | awk '{print $NF}')
#execute MosaicForecast (step3) which depends on MosaicForecast (step2)
MF3="sbatch --export=ALL --dependency=afterok:${MF2_job_id} $SCRIPTDIR/scripts/MF3.GenotypePredictions-singularity.sh -s $SampleID -c $CONFIG_FILE -o $OUTDIR"
MF3_job_id=$($MF3 | awk '{print $NF}')
sbatch --export=ALL --dependency=afterok:${Mutect2JobID} $SCRIPTDIR/scripts/Mutect2.FilterMutect2.sh -s $SampleID -v $OUTDIR -c $CONFIG_FILE
done
done