Skip to content

Commit

Permalink
Update stitch (#6832)
Browse files Browse the repository at this point in the history
* Update stitch

* Add bam test and sort snapshot

* Set R version for repetability

* Add htslib to environment for conda

* Fix environment

* Add rsync to environment

* Fix linting

* Update path
  • Loading branch information
LouisLeNezet authored Oct 25, 2024
1 parent 7face45 commit 55e4111
Show file tree
Hide file tree
Showing 11 changed files with 382 additions and 279 deletions.
3 changes: 3 additions & 0 deletions modules/nf-core/stitch/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::r-base=4.3.1
- conda-forge::rsync=3.2.7
- bioconda::r-stitch=1.6.10
- bioconda::htslib=1.18
4 changes: 2 additions & 2 deletions modules/nf-core/stitch/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ process STITCH {
'biocontainers/r-stitch:1.6.10--r43h06b5641_0' }"

input:
tuple val(meta) , path(posfile), path(input, stageAs: "input"), path(rdata, stageAs: "RData_in"), val(chromosome_name), val(K), val(nGen)
tuple val(meta2), path(collected_crams), path(collected_crais), path(cramlist)
tuple val(meta), path(collected_crams), path(collected_crais), path(cramlist)
tuple val(meta2), path(posfile), path(input, stageAs: "input"), path(rdata, stageAs: "RData_in"), val(chromosome_name), val(K), val(nGen)
tuple val(meta3), path(fasta), path(fasta_fai)
val seed

Expand Down
36 changes: 18 additions & 18 deletions modules/nf-core/stitch/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@ tools:
identifier: biotools:stitch-snijderlab
input:
- - meta:
type: map
description: |
Groovy Map containing information about the set of samples
e.g. `[ id:'test' ]`
- collected_crams:
type: file
description: List of sorted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- collected_crais:
type: file
description: List of BAM/CRAM/SAM index files
pattern: "*.{bai,crai,sai}"
- cramlist:
type: file
description: |
Text file with the path to the cram files to use in imputation, one per line. Since the cram files are staged to the working directory for the process, this file should just contain the file names without any pre-pending path.
pattern: "*.txt"
- - meta2:
type: map
description: |
Groovy Map containing information about the set of positions to run the imputation over
Expand Down Expand Up @@ -55,24 +73,6 @@ input:
description: Number of generations since founding of the population to use for
imputation. Refer to the documentation for the `--nGen` argument of STITCH
for more information.
- - meta2:
type: map
description: |
Groovy Map containing information about the set of samples
e.g. `[ id:'test' ]`
- collected_crams:
type: file
description: List of sorted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- collected_crais:
type: file
description: List of BAM/CRAM/SAM index files
pattern: "*.{bai,crai,sai}"
- cramlist:
type: file
description: |
Text file with the path to the cram files to use in imputation, one per line. Since the cram files are staged to the working directory for the process, this file should just contain the file names without any pre-pending path.
pattern: "*.txt"
- - meta3:
type: map
description: |
Expand Down
178 changes: 178 additions & 0 deletions modules/nf-core/stitch/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
def pathbam = "file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/"
def pathgenome = "file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/"
// positions and essential parameters
def posfile = "${pathgenome}dbsnp_138.hg38.first_10_biallelic_sites.tsv', checkIfExists: true)"
def stitch_params = "[ [ id: 'test_positions' ], $posfile, [], [], 'chr21', 2, 1 ]"

// sequencing data in cram format
def crams_val = "[${pathbam}cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), ${pathbam}cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true)]"
def crais_val = "[${pathbam}cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true), ${pathbam}cram/test2.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true)]"
def reads_cram = "[ [ id: 'test_reads' ], $crams_val, $crais_val ]"

// sequencing data in bam format
def bams_val = "[${pathbam}bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), ${pathbam}bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true)]"
def bais_val = "[${pathbam}bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), ${pathbam}bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)]"
def reads_bam = "[ [ id:'test_reads' ], $bams_val, $bais_val ]"

// reference genome
def reference = "[[ id:'test_reference' ], ${pathgenome}genome.fasta', checkIfExists: true), ${pathgenome}genome.fasta.fai', checkIfExists: true)]"

// for reproducibility
def seed = 1

nextflow_process {
name "Test Process STITCH"
script "../main.nf"
process "STITCH"

tag "modules"
tag "modules_nfcore"
tag "stitch"

test("test_no_seed") {
when {
process {
"""
filelist = Channel.fromPath( $crams_val )
.map { it[-1] as String } // get only filename
.collectFile( name: "cramlist.txt", newLine: true, sort: true )
input[0] = Channel.of( $reads_cram ).combine( filelist )
input[1] = $stitch_params
input[2] = $reference
input[3] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.input.collect{ file(it[1]).listFiles().sort().name },
process.out.rdata.collect{ file(it[1]).listFiles().sort().name },
process.out.plots.collect{ file(it[1]).listFiles().sort().name },
process.out.vcf.collect{ file(it[1]).name },
process.out.versions
).match() }
)
}
}

test("test_with_seed") {
when {
process {
"""
filelist = Channel.fromPath( $crams_val )
.map { it[-1] as String } // get only filename
.collectFile( name: "cramlist.txt", newLine: true, sort: true )
input[0] = Channel.of( $reads_cram ).combine( filelist )
input[1] = $stitch_params
input[2] = $reference
input[3] = $seed
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.input.collect{ file(it[1]).listFiles().sort().name },
process.out.rdata.collect{ file(it[1]).listFiles().sort().name },
process.out.plots.collect{ file(it[1]).listFiles().sort().name },
process.out.vcf.collect{ file(it[1]).name },
process.out.versions
).match() }
)
}
}

test("test_two_stage_imputation") {
setup {
run ("STITCH", alias: "STITCH_GENERATE_INPUTS") {
script "../main.nf"
config "./stitch_generate_input.config"
process {
"""
filelist = Channel.fromPath( $crams_val )
.map { it[-1] as String } // get only filename
.collectFile( name: "cramlist.txt", newLine: true, sort: true )
input[0] = Channel.of( $reads_cram ).combine( filelist )
input[1] = $stitch_params
input[2] = $reference
input[3] = $seed
"""
}
}
}

when {
config "./stitch_impute_only.config"
process {
"""
ch_input_2step = Channel.of( $stitch_params )
.map {
meta, positions, target, rdata, chromosome_name, K, nGen ->
[ meta, positions ]
}
.combine(
STITCH_GENERATE_INPUTS.out.input
.join ( STITCH_GENERATE_INPUTS.out.rdata )
)
.map {
meta, positions, metaT, target, rdata ->
[ metaT, positions, target, rdata, "chr21", 2, 1 ]
}
input[0] = [[id: null], [], [], []]
input[1] = ch_input_2step
input[2] = [[id: null], [], []]
input[3] = $seed
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.input.collect{ file(it[1]).listFiles().sort().name },
process.out.rdata.collect{ file(it[1]).listFiles().sort().name },
process.out.plots.collect{ file(it[1]).listFiles().sort().name },
process.out.vcf.collect{ file(it[1]).name },
process.out.versions
).match() }
)
}
}

test("test_with_bam") {
when {
process {
"""
filelist = Channel.fromPath( $bams_val )
.map { it[-1] as String } // get only filename
.collectFile( name: "cramlist.txt", newLine: true, sort: true )
input[0] = Channel.of( $reads_bam ).combine( filelist )
input[1] = $stitch_params
input[2] = $reference
input[3] = $seed
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.input.collect{ file(it[1]).listFiles().sort().name },
process.out.rdata.collect{ file(it[1]).listFiles().sort().name },
process.out.plots.collect{ file(it[1]).listFiles().sort().name },
process.out.vcf.collect{ file(it[1]).name },
process.out.versions
).match() }
)
}
}

}
Loading

0 comments on commit 55e4111

Please sign in to comment.