Skip to content

Commit

Permalink
Merge pull request #22 from fischuu/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
fischuu authored Mar 14, 2021
2 parents b8cbea3 + 02257d0 commit 638738b
Show file tree
Hide file tree
Showing 14 changed files with 916 additions and 102 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@
##### #####
################################################################################

0.5.15: * Report adjusted
0.5.14: * Bugfix: Rule FinalMockVsRef_alignment didn't have resource allocations
0.5.13: * Statistics on final mock reference added
0.5.12: * Coverage for finalMock alignments are now also calculated
0.5.11: * Input rule all cleaned up a bit to account for temporary output
0.5.10: * Mapping stats for final mock vs reference genome added
0.5.9 : * Bugfix - Final report crashed, in case that no casecontrol is given in samplelist
0.5.8 : * Final mock reference was not declared in output
0.5.7 : * Dependencies between the rules improved
0.5.6 : * Bugfix - wrong path for reference based vcf fixed
0.5.5 : * Bugfix - wrong reference vcf address in module 6
0.5.4 : * Variant calling for final mock added
0.5.3 : * Rule all rule simplified
0.5.2 : * Script to refine the mock reference added
0.5.1 : * samtools index set to csi instead of bai
0.4.1 : * Bugfix - Missing outputfile script added
0.4 : Release version
0.3.16: * Several other small path typos
Expand Down
24 changes: 11 additions & 13 deletions GBS-pipeline.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ import os
##### GBS-snakemake pipeline #####
##### Daniel Fischer (daniel.fischer@luke.fi)
##### Natural Resources Institute Finland (Luke)
##### This pipeline is build upon the the GBS-SNP-CROP pipeline
##### Version: 0.4.1
version = "0.4.1"
##### This pipeline is build upon the the GBS-SNP-CROP pipeline:
##### https://github.com/halelab/GBS-SNP-CROP
##### Version: 0.5.15
version = "0.5.15"

##### set minimum snakemake version #####
min_version("5.24")
min_version("6.0")

##### Sample sheets #####

Expand All @@ -30,6 +31,7 @@ wildcard_constraints:
config["genome-bwa-index"] = config["genome"]+".bwt"
config["genome-star-index"] = config["project-folder"]+"/references/STAR2.7.3a"
config["report-script"] = config["pipeline-folder"]+"/scripts/workflow-report.Rmd"
config["refinement-script"] = config["pipeline-folder"]+"/scripts/refineMockReference.R"
config["adapter"]=config["pipeline-folder"]+"/adapter.fa"

##### Singularity container #####
Expand Down Expand Up @@ -79,18 +81,10 @@ print("#########################################################################

rule all:
input:
# OUTPUT: PREPARATION MODULE
expand("%s/FASTQ/CONCATENATED/{samples}_R1_001.merged.fastq.gz" % (config["project-folder"]), samples=samples),
expand("%s/FASTQ/CONCATENATED/{samples}_R2_001.merged.fastq.gz" % (config["project-folder"]), samples=samples),
# # QC OF RAW AND CONCATENATED FILES
# QC OF RAW AND CONCATENATED FILES
"%s/QC/RAW/multiqc_R1/" % (config["project-folder"]),
"%s/QC/CONCATENATED/multiqc_R1/" % (config["project-folder"]),
"%s/QC/TRIMMED/multiqc_R1/" % (config["project-folder"]),
# OUTPUT STEP 2
expand("%s/FASTQ/TRIMMED/{samples}.R1.fq.gz" % (config["project-folder"]), samples=samples),
expand("%s/FASTQ/TRIMMED/{samples}.R2.fq.gz" % (config["project-folder"]), samples=samples),
# OUTPUT STEP 2b
expand("%s/FASTQ/SUBSTITUTED/{samples}.R1.fq.gz" % (config["project-folder"]), samples=samples),
# OUTPUT STEP 4
"%s/FASTQ/TRIMMED/GSC.MR.Genome.fa" % (config["project-folder"]),
"%s/BAM/Mockref/mockToRef.sam.flagstat" % (config["project-folder"]),
Expand Down Expand Up @@ -119,9 +113,13 @@ rule all:
# OUTPUT STEP 9
"%s/BAM/mockVariantsToReference/mockVariantsToReference.bam" % (config["project-folder"]),
# Quality check
expand("%s/BAM/alignments_finalMock/{samples}.sam.flagstat" % (config["project-folder"]), samples=samples),
"%s/MockReference/MockReference.fa" % (config["project-folder"]),
"%s/VCF/FinalSetVariants_finalMock.vcf" % (config["project-folder"]),
"%s/finalReport.html" % (config["project-folder"])



### setup report #####

report: "report/workflow.rst"
Expand Down
12 changes: 7 additions & 5 deletions GBS-pipeline_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,25 @@
project-folder: "/scratch/project_2001746/Example" # Absolute path to the project root folder
pipeline-folder: "/scratch/project_2001746/Pipeline-GBS" # Absolute path to the pipeline folder
pipeline-config: "/scratch/project_2001746/Pipeline-GBS/GBS-pipeline_config-example.yaml" # Absolute path to the pipeline configuration file
#report-script: "/scratch/project_2001746/Pipeline-GBS/scripts/workflow-report.Rmd" # Absolute path to the report scripts (THIS OPTION SHOULD BE OBSOLETE)
barcodes: "/scratch/project_2001746/Example/barcodesID.txt" # Absolute path to the barcodes file (MAKE THIS OPTIONAL)
rawsamples: "/scratch/project_2001746/Example/rawsamples" # Absolute path to the raw samples file (MAKE THIS OPTIONAL)
genome: "/scratch/project_2001746/Example/references/Vulpes_vulpes.VulVul2.2.dna.toplevel.fa" # Absolute path to an existing reference genome, leave empty "" if not available
#genome-bwa-index: "/scratch/project_2001746/Example/references/Vulpes_vulpes.VulVul2.2.dna.toplevel.fa.bwt" # Absolute path to the bwa index (THIS OPTION SHOULD BE OBSOLETE)
#genome-star-index: "/scratch/project_2001746/Example/references/STAR2.7.3a" # Absolute path to the star index (THIS OPTION SHOULD BE OBSOLETE)
refsample: "" # Id of ref sample used to build mock reference (NOT SURE, IF USED, CHECK THIS)
local-scratch: $LOCAL_SCRATCH # Path for fast local storage for tmp files (Here, a system variable is used. Check with your server configs)
#adapter: "/scratch/project_2001746/Pipeline-GBS/adapter.fa" # Path for the adapter sequences, used by Trimmomatic (MAKE THIS OPTIONAL)
tmpdir: "/scratch/project_2001746/tmp"

# Parameters for the GBS-SNP-CROP tools
################################################################################
enz1: "AATTC" # First restriction enzyme sequence
enz2: "GCATG" # Second restriction enzyme sequence
libname: "AllReads" #
libtype: "PE" # LIbrary type, paired-end (PE) or single-end (SE)
libtype: "PE" # Library type, paired-end (PE) or single-end (SE)

# Filter parameters for the mock reference refinement
################################################################################
mockref:
TotalReadCoverage: 25 # How many overall reads need to be on a cluster to remain in mock reference?
minSampleCoverage: 3 # How many different samples need to have reads on a cluster to remain in mock reference?

# Tools specific settings
################################################################################
Expand Down
89 changes: 88 additions & 1 deletion GBS-pipeline_server-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,17 @@ IndexClustersSamtools:
time: 02:00:00
job-name: IndexMockRefClusters_Samtools
mem-per-cpu: 25000


IndexFinalMockBWA:
time: 02:00:00
job-name: IndexFinalMockBWA
mem-per-cpu: 25000

IndexFinalMockSamtools:
time: 02:00:00
job-name: IndexFinalMockSamtools
mem-per-cpu: 25000

# Module 1 - QC:
################################################################################
fastqc_quality_control_raw_data:
Expand Down Expand Up @@ -130,10 +140,26 @@ MockVsRef_AlignmentStats:
time: 05:00:00
job-name: MockVsRef_AlignmentStats

FinalMockVsRef_alignment:
time: 05:00:00
cpus-per-task: 20
job-name: FinalMockVsRef_alignment

FinalMockVsRef_samtools_SamToSortedBam:
time: 05:00:00
job-name: FinalMockVsRef_samtools_SamToSortedBam

FinalMockVsRef_AlignmentStats:
time: 05:00:00
job-name: FinalMockVsRef_AlignmentStats

MockVsRef_SortedBamToMpileup:
time: 05:00:00
job-name: MockVsRef_SortedBamToMpileup

refine_mock_reference:
job-name: refine_mock_reference

# Module 4 - Read alignment
################################################################################
RefGenome_AlignReads:
Expand Down Expand Up @@ -200,6 +226,30 @@ MockRefClusters_AlignmentStats:
cpus-per-task: 20
mem-per-cpu: 5000

FinalMockRef_AlignReads:
time: 01:00:00
job-name: FinalMockRef_AlignReads
cpus-per-task: 20
mem-per-cpu: 5000

FinalMockRef_SamToSortedBam:
time: 01:00:00
job-name: FinalMockRef_SamToSortedBam
cpus-per-task: 20
mem-per-cpu: 5000

FinalMockRef_SortedBamToMpileup:
time: 01:00:00
job-name: FinalMockRef_SortedBamToMpileup
cpus-per-task: 20
mem-per-cpu: 5000

finalMockRef_AlignmentStats:
time: 01:00:00
job-name: FinalMockRef_AlignmentStats
cpus-per-task: 20
mem-per-cpu: 5000

# Module 5 - Call Variants
################################################################################

Expand Down Expand Up @@ -231,6 +281,9 @@ cut_verticalRef:
cut_verticalRef_reference:
job-name: cut_verticalRef_reference

cut_verticalRef_finalMock:
job-name: cut_verticalRef_finalMock

create_MasterMatrix_parallel:
time: 0-12:00:00
job-name: create_MasterMatrix_parallel
Expand All @@ -243,6 +296,13 @@ create_MasterMatrix_parallel_reference:
mem-per-cpu: 64000
nvme: 20

create_MasterMatrix_parallel_finalMock:
time: 0-12:00:00
job-name: create_MasterMatrix_parallel_finalMock
mem-per-cpu: 64000
nvme: 20


aggregate_MasterMatrix:
job-name: aggregate_MasterMatrix
time: 0-12:00:00
Expand All @@ -254,6 +314,12 @@ aggregate_MasterMatrix_reference:
time: 0-12:00:00
mem-per-cpu: 48000
nvme: 20

aggregate_MasterMatrix_finalMock:
job-name: aggregate_MasterMatrix_finalMock
time: 0-12:00:00
mem-per-cpu: 48000
nvme: 20

FilterVariants:
time: 1-12:00:00
Expand All @@ -265,6 +331,11 @@ FilterVariants_reference:
job-name: filterVariants_reference
mem-per-cpu: 30000

FilterVariants_finalMock:
time: 1-12:00:00
job-name: filterVariants_finalMock
mem-per-cpu: 30000

CreateVCF:
time: 01:00:00
job-name: CreateVCF
Expand All @@ -275,6 +346,22 @@ CreateVCF_reference:
job-name: CreateVCF_reference
mem-per-cpu: 30000

CreateVCF_finalMock:
time: 01:00:00
job-name: CreateVCF_finalMock
mem-per-cpu: 30000

ParseMpileup_createCountFiles_finalMock:
time: 05:00:00
job-name: createCountFiles_finalmock
mem-per-cpu: 16000

create_verticalRef_finalMock:
time: 01:00:00
job-name: create_verticalRef_finalMock
mem-per-cpu: 16000
nvme: 20

# Module 6 - Postprocessing
################################################################################
Ref_getVariantFlanking:
Expand Down
36 changes: 36 additions & 0 deletions rules/Module0-PreparationsAndIndexing
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,39 @@ rule IndexClustersSamtools:
shell:"""
samtools faidx {input}
"""

rule IndexFinalMockBWA:
"""
Index the final Mock Reference Genome (BWA).
"""
input:
"%s/MockReference/MockReference.fa" % (config["project-folder"])
output:
"%s/MockReference/MockReference.fa.bwt" % (config["project-folder"])
log:
"%s/logs/BWA/IndexFinalMock.log" % (config["project-folder"])
benchmark:
"%s/benchmark/BWA/IndexFinalMock.benchmark.tsv" % (config["project-folder"])
conda:"envs/gbs.yaml"
singularity: config["singularity"]["gbs"]
shell:"""
bwa index -a bwtsw {input}
"""

rule IndexFinalMockSamtools:
"""
Index the final Mock Reference Genome Clusters (samtools).
"""
input:
"%s/MockReference/MockReference.fa" % (config["project-folder"])
output:
"%s/MockReference/MockReference.fai" % (config["project-folder"])
log:
"%s/logs/Samtools/IndexFinalMockSam.log" % (config["project-folder"])
benchmark:
"%s/benchmark/Samtools/IndexFinalMockSam.benchmark.tsv" % (config["project-folder"])
conda:"envs/samtools.yaml"
singularity: config["singularity"]["gbs"]
shell:"""
samtools faidx {input}
"""
Loading

0 comments on commit 638738b

Please sign in to comment.