From 745e4a5aba0a273de2da7216a1b07a9fb29d5e6b Mon Sep 17 00:00:00 2001 From: Shaun Jackman Date: Thu, 7 Jun 2018 00:19:29 +0200 Subject: [PATCH 1/5] Add .github/ISSUE_TEMPLATE.rb (#234) --- .github/ISSUE_TEMPLATE.rb | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE.rb diff --git a/.github/ISSUE_TEMPLATE.rb b/.github/ISSUE_TEMPLATE.rb new file mode 100644 index 000000000..5b6b744ff --- /dev/null +++ b/.github/ISSUE_TEMPLATE.rb @@ -0,0 +1,22 @@ +# Please report + +- [ ] version of ABySS with `abyss-pe version` +- [ ] distribution of Linux with `lsb_release -d` + +# Assembly error + +- [ ] complete `abyss-pe` command line +- [ ] last 20 lines of the output of `abyss-pe` +- [ ] number of sequenced bases +- [ ] estimated genome size and ploidy +- [ ] estimated sequencing depth of coverage + +# Build error + +Consider installing ABySS using [Linuxbrew](https://linuxbrew.sh) on Linux or [Homebrew](https://brew.sh) on macOS with `brew install abyss`, or using [Bioconda](https://bioconda.github.io) with `conda install abyss`. + +- [ ] Have you tried installing ABySS using Brew or Bioconda? +- [ ] version of GCC or compiler with `gcc --version` +- [ ] complete `./configure` command line +- [ ] last 20 lines of the output of `./configure` +- [ ] last 20 lines of the output of `make` From 9aceb97de2925b20e30786999fe978a48a8b9f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladimir=20Nikoli=C4=87?= Date: Fri, 17 Aug 2018 18:46:54 +0200 Subject: [PATCH 2/5] Add closed gap file output (#247) * Add closed gap file output * Add check for whether gap filepath is present * Add requested changes --- Sealer/sealer.cc | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/Sealer/sealer.cc b/Sealer/sealer.cc index 8b7b3e7ef..288492d95 100644 --- a/Sealer/sealer.cc +++ b/Sealer/sealer.cc @@ -110,6 +110,7 @@ static const char USAGE_MESSAGE[] = " -s, --search-mem=N mem limit for graph searches; multiply by the\n" " number of threads (-j) to get the total mem used\n" " for graph traversal [500M]\n" +" -g, --gap-file=FILE write sealed gaps to FILE\n" " -t, --trace-file=FILE write graph search stats to FILE\n" " -v, --verbose display verbose output\n" " --help display this help and exit\n" @@ -204,6 +205,9 @@ namespace opt { /** Output file for graph search stats */ static string tracefilePath; + /** Output file for sealed gaps */ + static string gapfilePath; + /** Mask bases not in flanks */ static int mask = 0; @@ -236,7 +240,7 @@ struct Counters { size_t skipped; }; -static const char shortopts[] = "S:L:b:B:d:ef:F:G:i:Ij:k:lm:M:no:P:q:r:s:t:v"; +static const char shortopts[] = "S:L:b:B:d:ef:F:G:g:i:Ij:k:lm:M:no:P:q:r:s:t:v"; enum { OPT_HELP = 1, OPT_VERSION }; @@ -273,6 +277,7 @@ static const struct option longopts[] = { { "read-name", required_argument, NULL, 'r' }, { "search-mem", required_argument, NULL, 's' }, { "trace-file", required_argument, NULL, 't' }, + { "gap-file", required_argument, NULL, 'g' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, OPT_HELP }, { "version", no_argument, NULL, OPT_VERSION }, @@ -572,7 +577,8 @@ void kRun(const ConnectPairsParams& params, map > &flanks, unsigned &gapsclosed, ofstream &logStream, - ofstream &traceStream) + ofstream &traceStream, + ofstream &gapStream) { map >::iterator read1_it; map::iterator read2_it; @@ -615,6 +621,13 @@ void kRun(const ConnectPairsParams& params, uniqueGapsClosed++; if (gapsclosed % 100 == 0) printLog(logStream, IntToString(gapsclosed) + " gaps closed so far\n"); + + if (!opt::gapfilePath.empty()) { + gapStream << ">" << read1.id.substr(0,read1.id.length()-2) + << "_" << read2_it->second.gapStart() << "-" << read2_it->second.gapEnd() + << " LN:i:" << tempSeq.length() << '\n'; + gapStream << tempSeq << '\n'; + } } } if (success) { @@ -771,6 +784,8 @@ int main(int argc, char** argv) opt::searchMem = SIToBytes(arg); break; case 't': arg >> opt::tracefilePath; break; + case 'g': + arg >> opt::gapfilePath; break; case 'v': opt::verbose++; break; case OPT_HELP: @@ -874,6 +889,13 @@ int main(int argc, char** argv) assert_good(traceStream, opt::tracefilePath); } + ofstream gapStream; + if (!opt::gapfilePath.empty()) { + gapStream.open(opt::gapfilePath.c_str()); + assert(gapStream.is_open()); + assert_good(gapStream, opt::gapfilePath); + } + string logOutputPath(opt::outputPrefix); logOutputPath.append("_log.txt"); ofstream logStream(logOutputPath.c_str()); @@ -1020,7 +1042,7 @@ int main(int argc, char** argv) temp = "Starting K run with k = " + IntToString(opt::k) + "\n"; printLog(logStream, temp); - kRun(params, opt::k, g, allmerged, flanks, gapsclosed, logStream, traceStream); + kRun(params, opt::k, g, allmerged, flanks, gapsclosed, logStream, traceStream, gapStream); temp = "k" + IntToString(opt::k) + " run complete\n" + "Total gaps closed so far = " + IntToString(gapsclosed) + "\n\n"; @@ -1073,5 +1095,10 @@ int main(int argc, char** argv) traceStream.close(); } + if (!opt::gapfilePath.empty()) { + assert_good(gapStream, opt::gapfilePath); + gapStream.close(); + } + return 0; } From 8992a58663f1d0717032bee59adaad5d1ec2d097 Mon Sep 17 00:00:00 2001 From: Ben Vandervalk Date: Mon, 20 Aug 2018 14:24:19 -0700 Subject: [PATCH 3/5] abyss-pe: test if `zsh` supports `-o pipefail` and if not, fallback to using `bash` as SHELL closes #240 --- bin/abyss-pe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/abyss-pe b/bin/abyss-pe index 481f9ccbe..e469003cd 100755 --- a/bin/abyss-pe +++ b/bin/abyss-pe @@ -4,7 +4,7 @@ # Anthony Raymond . SHELL=bash -e -o pipefail -ifneq ($(shell command -v zsh),) +ifeq ($(shell zsh -e -o pipefail -c 'true' 2>/dev/null; echo $$?), 0) # Set pipefail to ensure that all commands of a pipe succeed. SHELL=zsh -e -o pipefail # Report run time and memory usage with zsh. From 895d88fbe71f5d20eac52a397c82509184da27bc Mon Sep 17 00:00:00 2001 From: Ben Vandervalk Date: Mon, 20 Aug 2018 15:11:41 -0700 Subject: [PATCH 4/5] abyss-pe: time all assembly commands when user sets `time=1` Previously, only a subset of the commands were being timed. --- bin/abyss-pe | 106 +++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/bin/abyss-pe b/bin/abyss-pe index e469003cd..aedf144df 100755 --- a/bin/abyss-pe +++ b/bin/abyss-pe @@ -530,45 +530,45 @@ endif ifdef B %-1.fa: - abyss-bloom-dbg $(abyssopt) $(ABYSS_OPTIONS) $(in) $(se) > $@ + $(gtime) abyss-bloom-dbg $(abyssopt) $(ABYSS_OPTIONS) $(in) $(se) > $@ else ifdef K ifdef np %-1.fa: - $(mpirun) -np $(np) abyss-paired-dbg-mpi $(abyssopt) $(ABYSS_OPTIONS) -o $*-1.fa $(in) $(se) + $(gtime) $(mpirun) -np $(np) abyss-paired-dbg-mpi $(abyssopt) $(ABYSS_OPTIONS) -o $*-1.fa $(in) $(se) else %-1.fa %-1.$g: - abyss-paired-dbg $(abyssopt) $(ABYSS_OPTIONS) -o $*-1.fa -g $*-1.$g $(in) $(se) + $(gtime) abyss-paired-dbg $(abyssopt) $(ABYSS_OPTIONS) -o $*-1.fa -g $*-1.$g $(in) $(se) endif else ifdef np %-1.fa: - $(mpirun) -np $(np) ABYSS-P $(abyssopt) $(ABYSS_OPTIONS) -o $@ $(in) $(se) + $(gtime) $(mpirun) -np $(np) ABYSS-P $(abyssopt) $(ABYSS_OPTIONS) -o $@ $(in) $(se) else %-1.fa: - ABYSS $(abyssopt) $(ABYSS_OPTIONS) -o $@ $(in) $(se) + $(gtime) ABYSS $(abyssopt) $(ABYSS_OPTIONS) -o $@ $(in) $(se) endif # Find overlapping contigs %-1.$g: %-1.fa - AdjList $(alopt) --$g $< >$@ + $(gtime) AdjList $(alopt) --$g $< >$@ # Remove shim contigs %-2.$g1 %-1.path: %-1.$g %-1.fa - abyss-filtergraph $v --$g $(fgopt) $(FILTERGRAPH_OPTIONS) -k$k -g $*-2.$g1 $^ >$*-1.path + $(gtime) abyss-filtergraph $v --$g $(fgopt) $(FILTERGRAPH_OPTIONS) -k$k -g $*-2.$g1 $^ >$*-1.path %-2.fa %-2.$g: %-1.fa %-2.$g1 %-1.path - MergeContigs --$g $(mcopt) -g $*-2.$g -o $*-2.fa $^ + $(gtime) MergeContigs --$g $(mcopt) -g $*-2.$g -o $*-2.fa $^ # Pop bubbles %-2.path %-3.$g: %-2.fa %-2.$g - PopBubbles $v --$g -j$j -k$k $(SS) $(pbopt) $(POPBUBBLES_OPTIONS) -g $*-3.$g $^ >$*-2.path + $(gtime) PopBubbles $v --$g -j$j -k$k $(SS) $(pbopt) $(POPBUBBLES_OPTIONS) -g $*-3.$g $^ >$*-2.path %-3.fa: %-2.fa %-2.$g %-2.path - MergeContigs $(mcopt) -o $@ $^ + $(gtime) MergeContigs $(mcopt) -o $@ $^ awk '!/^>/ {x[">" $$1]=1; next} {getline s} $$1 in x {print $$0 "\n" s}' \ $*-2.path $*-1.fa >$*-indel.fa @@ -581,27 +581,27 @@ endif # Estimate distances between unitigs %-3.sam.gz %-3.hist: $(name)-3.fa - $(align) $(mapopt) $(strip $($*)) $< \ + $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-3.hist \ |sort -snk3 -k4 \ |$(gzip) >$*-3.sam.gz %-3.bam %-3.hist: $(name)-3.fa - $(align) $(mapopt) $(strip $($*)) $< \ + $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-3.hist \ |sort -snk3 -k4 \ |samtools view -Sb - -o $*-3.bam %-3.dist: %-3.sam.gz %-3.hist gunzip -c $< \ - |$(DistanceEst) $(deopt) -o $@ $*-3.hist + |$(gtime) $(DistanceEst) $(deopt) -o $@ $*-3.hist %-3.dist: %-3.bam %-3.hist - samtools view -h $< \ + $(gtime) samtools view -h $< \ |$(DistanceEst) $(deopt) -o $@ $*-3.hist %-3.dist: $(name)-3.fa - $(align) $(mapopt) $(strip $($*)) $< \ + $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-3.hist \ |sort -snk3 -k4 \ |$(DistanceEst) $(deopt) -o $@ $*-3.hist @@ -610,25 +610,25 @@ dist=$(addsuffix -3.dist, $(pe)) ifneq ($(name)-3.dist, $(dist)) $(name)-3.dist: $(name)-3.fa $(dist) - abyss-todot $v --dist -e $^ >$@ + $(gtime) abyss-todot $v --dist -e $^ >$@ $(name)-3.bam: $(addsuffix -3.bam, $(pe)) - samtools merge -r $@ $^ + $(gtime) samtools merge -r $@ $^ endif # Find overlaps between contigs %-4.fa %-4.$g: %-3.fa %-3.$g %-3.dist - Overlap $v --$g $(SS) $(OVERLAP_OPTIONS) -k$k -g $*-4.$g -o $*-4.fa $^ + $(gtime) Overlap $v --$g $(SS) $(OVERLAP_OPTIONS) -k$k -g $*-4.$g -o $*-4.fa $^ # Assemble contigs %-4.path1: %-4.$g %-3.dist - SimpleGraph $v $(sgopt) $(SIMPLEGRAPH_OPTIONS) -j$j -k$k -o $@ $^ + $(gtime) SimpleGraph $v $(sgopt) $(SIMPLEGRAPH_OPTIONS) -j$j -k$k -o $@ $^ %-4.path2: %-4.path1 %-3.fa.fai %-4.fa.fai cat $*-3.fa.fai $*-4.fa.fai \ - |MergePaths $(mpopt) $(MERGEPATHS_OPTIONS) -o $@ - $< + |$(gtime) MergePaths $(mpopt) $(MERGEPATHS_OPTIONS) -o $@ - $< %-4.path3: %-4.$g %-4.path2 PathOverlap --assemble $(poopt) $(SS) $^ >$@ @@ -637,10 +637,10 @@ ifndef cs %-5.path %-5.fa %-5.$g: %-3.fa %-4.fa %-4.$g %-4.path3 cat $(wordlist 1, 2, $^) \ - |PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -o $*-5.path -s $*-5.fa -g $*-5.$g - $(wordlist 3, 4, $^) + |$(gtime) PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -o $*-5.path -s $*-5.fa -g $*-5.$g - $(wordlist 3, 4, $^) %-6.fa: %-3.fa %-4.fa %-5.fa %-5.$g %-5.path - cat $(wordlist 1, 3, $^) |MergeContigs $(mcopt) -o $@ - $(wordlist 4, 5, $^) + cat $(wordlist 1, 3, $^) |$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 4, 5, $^) else @@ -649,18 +649,18 @@ else ln -sf $*-4.path3 $*-5.path %-cs.fa: %-3.fa %-4.fa %-4.$g %-4.path3 - cat $(wordlist 1, 2, $^) |MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^) + cat $(wordlist 1, 2, $^) |$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^) # Convert colour-space sequence to nucleotides %-6.fa: %-cs.fa - KAligner $v --seq -m -j$j -l$l $(in) $(se) $< \ + $(gtime) KAligner $v --seq -m -j$j -l$l $(in) $(se) $< \ |Consensus $v -o $@ $< endif %-6.$g: %-5.$g %-5.path - PathOverlap --overlap $(poopt) --$g $^ >$@ + $(gtime) PathOverlap --overlap $(poopt) --$g $^ >$@ %-contigs.fa: %-6.fa ln -sf $< $@ @@ -671,27 +671,27 @@ endif # Estimate distances between contigs %-6.sam.gz %-6.hist: $(name)-6.fa - $(align) $(mapopt) $(strip $($*)) $< \ + $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-6.hist \ |sort -snk3 -k4 \ |$(gzip) >$*-6.sam.gz %-6.bam %-6.hist: $(name)-6.fa - $(align) $(mapopt) $(strip $($*)) $< \ + $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-6.hist \ |sort -snk3 -k4 \ |samtools view -Sb - -o $*-6.bam %-6.dist.dot: %-6.sam.gz %-6.hist gunzip -c $< \ - |$(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist + |$(gtime) $(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist %-6.dist.dot: %-6.bam %-6.hist samtools view -h $< \ - |$(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist + |$(gtime) $(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist %-6.dist.dot: $(name)-6.fa - $(align) $(mapopt) $(strip $($*)) $< \ + $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-6.hist \ |sort -snk3 -k4 \ |$(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist @@ -699,17 +699,17 @@ endif # Scaffold %-6.path: $(name)-6.$g $(addsuffix -6.dist.dot, $(mp)) - abyss-scaffold $(scopt) -s$S -n$N -g $@.dot $(SCAFFOLD_OPTIONS) $^ >$@ + $(gtime) abyss-scaffold $(scopt) -s$S -n$N -g $@.dot $(SCAFFOLD_OPTIONS) $^ >$@ %-7.path %-7.$g %-7.fa: %-6.fa %-6.$g %-6.path - PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -s $*-7.fa -g $*-7.$g -o $*-7.path $^ + $(gtime) PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -s $*-7.fa -g $*-7.$g -o $*-7.path $^ %-8.fa: %-6.fa %-7.fa %-7.$g %-7.path cat $(wordlist 1, 2, $^) \ - |MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^) + |$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^) %-8.$g: %-7.$g %-7.path - PathOverlap --overlap $(poopt) --$g $^ >$@ + $(gtime) PathOverlap --overlap $(poopt) --$g $^ >$@ # Scaffold using linked reads ifdef lr @@ -790,16 +790,16 @@ arcs_z=500 # Create a graph of linked contigs using ARCS. %.arcs.dist.gv: %.lr.sortn.sam.gz gunzip -c $< \ - | arcs $v -c$(arcs_c) -d$(arcs_d) -e$(arcs_e) -l$(arcs_l) -m$(arcs_m) -r$(arcs_r) -s$(arcs_s) -z$(arcs_z) \ + |$(gtime) arcs $v -c$(arcs_c) -d$(arcs_d) -e$(arcs_e) -l$(arcs_l) -m$(arcs_m) -r$(arcs_r) -s$(arcs_s) -z$(arcs_z) \ -g $*.arcs.dist.gv --tsv=$*.arcs.tsv --barcode-counts=$*.arcs.barcode-counts.tsv /dev/stdin # Scaffold using ARCS and abyss-scaffold. %.arcs.path: %.arcs.dist.gv - abyss-scaffold $(scopt) -s$(lr_s) -n$(lr_n) -g $@.dot $(LR_SCAFFOLD_OPTIONS) $< >$@ + $(gtime) abyss-scaffold $(scopt) -s$(lr_s) -n$(lr_n) -g $@.dot $(LR_SCAFFOLD_OPTIONS) $< >$@ # Create the FASTA file of ARCS scaffolds. %.arcs.fa: %.fa %.arcs.path - MergeContigs $(mcopt) -o $@ $^ + $(gtime) MergeContigs $(mcopt) -o $@ $^ %-scaffolds.fa: %-8.tigmint.arcs.fa ln -sf $< $@ @@ -818,7 +818,7 @@ endif sealer_ks?=-k90 -k80 -k70 -k60 -k50 -k40 -k30 %-8_scaffold.fa: %-8.fa - abyss-sealer -v -j$j --print-flanks -o$*-8 -S$< $(sealer_ks) $(SEALER_OPTIONS) $(in) $(se) + $(gtime) abyss-sealer -v -j$j --print-flanks -o$*-8 -S$< $(sealer_ks) $(SEALER_OPTIONS) $(in) $(se) %-scaffolds-sealed.fa: %-8_scaffold.fa ln -s $< $@ @@ -834,28 +834,28 @@ sealer_ks?=-k90 -k80 -k70 -k60 -k50 -k40 -k30 # Transcriptome assisted scaffolding %.fa.bwt: %.fa - bwa index $< + $(gtime) bwa index $< %-8.sam.gz: $(name)-8.fa.bwt - bwa mem -a -t$j -S -P -k$l $(name)-8.fa $(strip $($*)) \ + $(gtime) bwa mem -a -t$j -S -P -k$l $(name)-8.fa $(strip $($*)) \ |$(gzip) >$@ %-8.dist.dot: %-8.sam.gz - abyss-longseqdist -k$k $(LONGSEQDIST_OPTIONS) $< \ + $(gtime) abyss-longseqdist -k$k $(LONGSEQDIST_OPTIONS) $< \ |grep -v "l=" >$@ %-8.path: $(name)-8.$g $(addsuffix -8.dist.dot, $(long)) - abyss-scaffold $(scopt) -s$S -n1 -g $@.$g $(SCAFFOLD_OPTIONS) $^ >$@ + $(gtime) abyss-scaffold $(scopt) -s$S -n1 -g $@.$g $(SCAFFOLD_OPTIONS) $^ >$@ %-9.path %-9.$g %-9.fa: %-8.fa %-8.$g %-8.path - PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -s $*-9.fa -g $*-9.$g -o $*-9.path $^ + $(gtime) PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -s $*-9.fa -g $*-9.$g -o $*-9.path $^ %-10.fa: %-8.fa %-9.fa %-9.$g %-9.path cat $(wordlist 1, 2, $^) \ - |MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^) + |$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^) %-10.$g: %-9.$g %-9.path - PathOverlap --overlap $(poopt) --$g $^ >$@ + $(gtime) PathOverlap --overlap $(poopt) --$g $^ >$@ %-long-scaffs.fa: %-10.fa ln -sf $< $@ @@ -881,11 +881,11 @@ endif endif $(name)-unitigs.bam: %.bam: %.fa - $(align) $v -j$j -l$l $(ALIGNER_OPTIONS) $(se) $< \ + $(gtime) $(align) $v -j$j -l$l $(ALIGNER_OPTIONS) $(se) $< \ |samtools view -Su - |samtools sort -o - - >$@ $(name)-contigs.bam $(name)-scaffolds.bam: %.bam: %.fa - $(align) $v -j$j -l$l $(ALIGNER_OPTIONS) \ + $(gtime) $(align) $v -j$j -l$l $(ALIGNER_OPTIONS) \ $(call map, deref, $(sort $(lib) $(pe) $(mp))) $< \ |$(fixmate) $v $(FIXMATE_OPTIONS) \ |sort -snk3 -k4 \ @@ -894,14 +894,14 @@ $(name)-contigs.bam $(name)-scaffolds.bam: %.bam: %.fa # Align the variants to the assembly %-variants.bam: %.fa.bwt - bwa bwasw -t$j $*.fa <(cat $(name)-bubbles.fa $(name)-indel.fa) \ + $(gtime) bwa bwasw -t$j $*.fa <(cat $(name)-bubbles.fa $(name)-indel.fa) \ |samtools view -Su - |samtools sort -o - - >$@ %-variants.vcf.gz: %.fa %-variants.bam - samtools mpileup -Buf $^ |bcftools view -vp1 - |bgzip >$@ + $(gtime) samtools mpileup -Buf $^ |bcftools view -vp1 - |bgzip >$@ %.gz.tbi: %.gz - tabix -pvcf $< + $(gtime) tabix -pvcf $< # Calculate assembly contiguity statistics @@ -938,17 +938,17 @@ $(name)-stats.tab: # Create an AGP file and FASTA file of scaftigs from scaffolds %.agp %-agp.fa: %.fa - abyss-fatoagp $(FATOAGP_OPTIONS) -f $*-agp.fa $< >$*.agp + $(gtime) abyss-fatoagp $(FATOAGP_OPTIONS) -f $*-agp.fa $< >$*.agp # Align the contigs to the reference %-$(ref).sam.gz: %.fa - bwa bwasw $(bwaswopt) $(BWASW_OPTIONS) $($(ref)) $< |$(gzip) >$@ + $(gtime) bwa bwasw $(bwaswopt) $(BWASW_OPTIONS) $($(ref)) $< |$(gzip) >$@ # Find breakpoints in the alignments %.break: %.sam.gz - abyss-samtobreak $(SAMTOBREAK_OPTIONS) $< >$@ + $(gtime) abyss-samtobreak $(SAMTOBREAK_OPTIONS) $< >$@ # Report ABySS configuration variable(s) and value(s) currently set. From f18bd36bbc601d8b6911ddee94ac4109d36e088f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladimir=20Nikoli=C4=87?= Date: Fri, 24 Aug 2018 19:07:09 +0200 Subject: [PATCH 5/5] Parallelize gap sealing (#249) --- Sealer/sealer.cc | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/Sealer/sealer.cc b/Sealer/sealer.cc index 288492d95..a4c827f4b 100644 --- a/Sealer/sealer.cc +++ b/Sealer/sealer.cc @@ -420,8 +420,8 @@ template string merge(const Graph& g, unsigned k, const Gap& gap, - FastaRecord &read1, - FastaRecord &read2, + const FastaRecord &read1, + const FastaRecord &read2, const ConnectPairsParams& params, Counters& g_count, ofstream& traceStream) @@ -583,7 +583,6 @@ void kRun(const ConnectPairsParams& params, map >::iterator read1_it; map::iterator read2_it; unsigned uniqueGapsClosed = 0; - bool success; Counters g_count; g_count.noStartOrGoalKmer = 0; @@ -603,38 +602,50 @@ void kRun(const ConnectPairsParams& params, printLog(logStream, "Flanks inserted into k run = " + IntToString(flanks.size()) + "\n"); - for (read1_it = flanks.begin(); read1_it != flanks.end();) { - success = false; + int counter = 0; + vector >::iterator> flanks_closed; +#pragma omp parallel private(read1_it, read2_it) firstprivate(counter) + for (read1_it = flanks.begin(); read1_it != flanks.end(); ++read1_it) { FastaRecord read1 = read1_it->first; - for (read2_it = flanks[read1].begin(); read2_it != flanks[read1].end(); read2_it++) { + bool success = false; + for (read2_it = flanks[read1].begin(); read2_it != flanks[read1].end(); ++read2_it, ++counter) { +#if _OPENMP + if (counter % omp_get_num_threads() != omp_get_thread_num()) + continue; +#endif FastaRecord read2 = read2_it->first; int startposition = read2_it->second.gapStart(); string tempSeq = merge(g, k, read2_it->second, read1, read2, params, g_count, traceStream); if (!tempSeq.empty()) { success = true; +#pragma omp critical (allmerged) allmerged[read1.id.substr(0,read1.id.length()-2)][startposition] = ClosedGap(read2_it->second, tempSeq); -//#pragma omp atomic - gapsclosed++; -//#pragma omp atomic - uniqueGapsClosed++; - if (gapsclosed % 100 == 0) +#pragma omp atomic + ++uniqueGapsClosed; +#pragma omp critical (gapsclosed) + if (++gapsclosed % 100 == 0) printLog(logStream, IntToString(gapsclosed) + " gaps closed so far\n"); - if (!opt::gapfilePath.empty()) { + if (!opt::gapfilePath.empty()) +#pragma omp critical (gapStream) gapStream << ">" << read1.id.substr(0,read1.id.length()-2) << "_" << read2_it->second.gapStart() << "-" << read2_it->second.gapEnd() - << " LN:i:" << tempSeq.length() << '\n'; - gapStream << tempSeq << '\n'; - } + << " LN:i:" << tempSeq.length() << '\n' + << tempSeq << '\n'; } } if (success) { - flanks.erase(read1_it++); +#pragma omp critical (flanks_closed) + flanks_closed.push_back(read1_it); } - else - read1_it++; + } + + for (vector >::iterator>::iterator it = flanks_closed.begin(); + it != flanks_closed.end(); ++it) { + if (flanks.count((*it)->first) > 0) + flanks.erase(*it); } printLog(logStream, IntToString(uniqueGapsClosed) + " unique gaps closed for k" + IntToString(k) + "\n");