From 128eef6132020e1cf4946800a490cf9f4029db24 Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 17 Oct 2024 15:08:53 +0100 Subject: [PATCH 1/3] et split prefix only for large genomes --- variant_to_realignment.nf | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/variant_to_realignment.nf b/variant_to_realignment.nf index c4ff400..690986e 100755 --- a/variant_to_realignment.nf +++ b/variant_to_realignment.nf @@ -138,6 +138,10 @@ process alignWithMinimap { script: + split_prefix = "" + if (file(params.newgenome).size() > 4294967296){ + split_prefix = " --split-prefix prefix " + } if (flanklength < 500) """ # Options used by the 'sr' preset with some modifications: @@ -147,7 +151,7 @@ process alignWithMinimap { # --secondary=yes -N 2 --> allow up to 2 secondary alignments # -y option will take the comment from the fasta entry and output it # the awk script will convert this comment in valid SAM tag - minimap2 -k21 -w11 --sr --frag=yes -A2 -B5 -O6,16 --end-bonus 20 -E2,1 -r50 -p.5 -z 800,200\ + minimap2 $split_prefix -k21 -w11 --sr --frag=yes -A2 -B5 -O6,16 --end-bonus 20 -E2,1 -r50 -p.5 -z 800,200\ -f1000,5000 -n2 -m20 -s40 -g200 -2K50m --heap-sort=yes --secondary=yes -N 2 -y \ -a genome.fa variant_read1.fa variant_read2.fa | \ awk -F '\\t' 'BEGIN{OFS="\\t"}{if(!/^@/){\$NF="vr:Z:"\$NF}; print \$0;}' | \ @@ -155,7 +159,7 @@ process alignWithMinimap { """ else """ - minimap2 -k19 -w19 -A2 -B5 -O6,16 --end-bonus 20 -E3,1 -s200 -z200 -N50 --min-occ-floor=100 \ + minimap2 $split_prefix -k19 -w19 -A2 -B5 -O6,16 --end-bonus 20 -E3,1 -s200 -z200 -N50 --min-occ-floor=100 \ --secondary=yes -N 2 -y \ -a genome.fa variant_read1.fa variant_read2.fa | \ awk -F '\\t' 'BEGIN{OFS="\\t"}{if(!/^@/){\$NF="vr:Z:"\$NF}; print \$0;}' | \ From 0daa42854bbe6ae0fc54e1a32d817a0b4482f2f2 Mon Sep 17 00:00:00 2001 From: tcezard Date: Mon, 28 Oct 2024 18:00:55 +0000 Subject: [PATCH 2/3] Set the index to be larger than the genome to ensure that the inde won't be split. --- variant_to_realignment.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_to_realignment.nf b/variant_to_realignment.nf index 690986e..9f4ac7b 100755 --- a/variant_to_realignment.nf +++ b/variant_to_realignment.nf @@ -138,9 +138,9 @@ process alignWithMinimap { script: - split_prefix = "" - if (file(params.newgenome).size() > 4294967296){ - split_prefix = " --split-prefix prefix " + $index_size = "" + if (file(params.newgenome).size() > 4000000000){ + index_size = " -I " + file(params.newgenome).size() * 1.1 } if (flanklength < 500) """ @@ -151,7 +151,7 @@ process alignWithMinimap { # --secondary=yes -N 2 --> allow up to 2 secondary alignments # -y option will take the comment from the fasta entry and output it # the awk script will convert this comment in valid SAM tag - minimap2 $split_prefix -k21 -w11 --sr --frag=yes -A2 -B5 -O6,16 --end-bonus 20 -E2,1 -r50 -p.5 -z 800,200\ + minimap2 $index_size -k21 -w11 --sr --frag=yes -A2 -B5 -O6,16 --end-bonus 20 -E2,1 -r50 -p.5 -z 800,200\ -f1000,5000 -n2 -m20 -s40 -g200 -2K50m --heap-sort=yes --secondary=yes -N 2 -y \ -a genome.fa variant_read1.fa variant_read2.fa | \ awk -F '\\t' 'BEGIN{OFS="\\t"}{if(!/^@/){\$NF="vr:Z:"\$NF}; print \$0;}' | \ @@ -159,7 +159,7 @@ process alignWithMinimap { """ else """ - minimap2 $split_prefix -k19 -w19 -A2 -B5 -O6,16 --end-bonus 20 -E3,1 -s200 -z200 -N50 --min-occ-floor=100 \ + minimap2 $index_size -k19 -w19 -A2 -B5 -O6,16 --end-bonus 20 -E3,1 -s200 -z200 -N50 --min-occ-floor=100 \ --secondary=yes -N 2 -y \ -a genome.fa variant_read1.fa variant_read2.fa | \ awk -F '\\t' 'BEGIN{OFS="\\t"}{if(!/^@/){\$NF="vr:Z:"\$NF}; print \$0;}' | \ From 8bdcc96e1d99cd1d0de186b32e99576e8d5f1528 Mon Sep 17 00:00:00 2001 From: tcezard Date: Mon, 28 Oct 2024 21:21:33 +0000 Subject: [PATCH 3/3] fix typo --- variant_to_realignment.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_to_realignment.nf b/variant_to_realignment.nf index 9f4ac7b..84d1911 100755 --- a/variant_to_realignment.nf +++ b/variant_to_realignment.nf @@ -138,7 +138,7 @@ process alignWithMinimap { script: - $index_size = "" + index_size = "" if (file(params.newgenome).size() > 4000000000){ index_size = " -I " + file(params.newgenome).size() * 1.1 }