Skip to content

Commit

Permalink
define the ref in config.yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
r1cheu committed Nov 18, 2024
1 parent 57c1518 commit d8a7c8c
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 14 deletions.
1 change: 1 addition & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
samples: config/samples.tsv
units: config/units.tsv
ref: IRGSP-1.0_genome.fasta

sif_path: /data9/home/rlchen/project/snp_gatk/snpcalling.sif
10 changes: 5 additions & 5 deletions workflow/rules/calling.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ rule call_variants:
input:
bam=get_sample_bams,
bam_idx=get_sample_bams_idx,
ref="resources/IRGSP-1.0_genome.fasta",
fai="resources/IRGSP-1.0_genome.fasta.fai",
idx="resources/IRGSP-1.0_genome.dict",
ref=get_ref_path(),
fai=get_ref_path() + ".fai",
idx=get_ref_name() + ".dict",
output:
gvcf=protected("results/called/gvcfs/{sample}.g.vcf.gz"),
log:
Expand Down Expand Up @@ -56,8 +56,8 @@ rule genomics_db_import:
rule joint_calling:
input:
db="results/called/genome_db/{chrom}_db",
ref="resources/IRGSP-1.0_genome.fasta",
idx="resources/IRGSP-1.0_genome.dict",
ref=get_ref_path(),
idx=get_ref_name() + ".dict",
output:
vcf=protected("results/vcf/all.{chrom}.vcf.gz"),
log:
Expand Down
9 changes: 8 additions & 1 deletion workflow/rules/common.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

import pandas as pd
from snakemake.utils import validate
from snakemake.utils import min_version
Expand All @@ -7,13 +9,18 @@ min_version("5.18.0")
configfile: "config/config.yaml"
samples = pd.read_table(config["samples"]).set_index("sample", drop=False)
units = pd.read_table(config["units"], dtype=str).set_index("sample", drop=False)
ref_path = Path(f"resources/{config['ref']}")

def get_ref_path(wildcards):
return str(ref_path)

def get_ref_name(wildcards):
return str(ref_path.with_suffix(""))

def is_single_end(sample):
"""Return True if sample-unit is single end."""
return pd.isnull(units.loc[sample, "fq2"])


def get_fastq(wildcards):
"""Get fastq files of given sample."""
fastqs = units.loc[wildcards.sample, ["fq1", "fq2"]].dropna()
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/filter.smk
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ rule select_indel:
rule filter_snp:
input:
vcf="results/all.snp.vcf.gz",
ref="resources/IRGSP-1.0_genome.fasta"
ref=get_ref_path(),
output:
temp("results/all.snp.filter.vcf.gz")
log:
Expand All @@ -75,7 +75,7 @@ rule filter_snp:
rule filter_indel:
input:
vcf="results/all.indel.vcf.gz",
ref="resources/IRGSP-1.0_genome.fasta"
ref=get_ref_path(),
output:
temp("results/all.indel.filter.vcf.gz")
log:
Expand Down
14 changes: 8 additions & 6 deletions workflow/rules/ref.smk
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
rule genome_faidx:
input:
"resources/IRGSP-1.0_genome.fasta"
get_ref_path()
output:
"resources/IRGSP-1.0_genome.fasta.fai"
get_ref_path() + ".fai"
log:
"logs/samtools/create_faidx.log"
cache: True
shell:
"samtools faidx {input} > {output} {log}"

rule genome_dict:
input:
"resources/IRGSP-1.0_genome.fasta"
get_ref_path()
output:
"resources/IRGSP-1.0_genome.dict"
get_ref_name() + ".dict"
log:
"logs/samtools/create_dict.log"
cache: True
Expand All @@ -20,9 +22,9 @@ rule genome_dict:

rule bwa_index:
input:
"resources/IRGSP-1.0_genome.fasta"
get_ref_path()
output:
multiext("resources/IRGSP-1.0_genome.fasta", ".amb", ".ann", ".bwt", ".pac", ".sa")
multiext(get_ref_path(), ".amb", ".ann", ".bwt", ".pac", ".sa")
log:
"logs/bwa_index.log"
resources:
Expand Down

0 comments on commit d8a7c8c

Please sign in to comment.