Skip to content

Commit

Permalink
Check genome size param for span=auto (#57)
Browse files Browse the repository at this point in the history
* Set G=0 by default and check value in tigmint-make

* Bump up version numbers to 1.2.4

* Fix typo

* Remove and gitignore compiled long-to-linked-pe
  • Loading branch information
janetxinli authored Jun 25, 2021
1 parent 3f25f2a commit 9374380
Show file tree
Hide file tree
Showing 9 changed files with 27 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ xml-patch-make/
__pycache__
.pytest_cache
.vscode
src/long-to-linked-pe

# BWA
*.fa.amb
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ tigmint-make tigmint-long draft=myassembly reads=myreads span=auto G=gsize dist=

+ `draft`: Name of the draft assembly, `myassembly.fa`
+ `reads`: Name of the reads, `myreads.fq.gz`
+ `G`: Haploid genome size of the draft assembly organism. Used to calculate `span` parameter automatically. Can be given as an integer or in scientific notation (e.g. '3e9' for human)
+ `G`: Haploid genome size of the draft assembly organism. Required to calculate `span` parameter automatically. Can be given as an integer or in scientific notation (e.g. '3e9' for human) [0]
+ `span=20`: Number of spanning molecules threshold. Set `span=auto` to automatically select span parameter (currently only recommended for `tigmint-long`)
+ `cut=500`: Cut length for long reads (`tigmint-long` only)
+ `longmap=ont`: Long read platform; `ont` for Oxford Nanopore Technologies (ONT) long reads, `pb` for PacBio long reads (`tigmint-long` only)
Expand Down
2 changes: 1 addition & 1 deletion bin/tigmint-cut
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def get_span(filename):

def main():
parser = argparse.ArgumentParser(description="Find misassembled regions in assembly using Chromium molecule extents")
parser.add_argument("--version", action="version", version="tigmint-cut 1.2.3")
parser.add_argument("--version", action="version", version="tigmint-cut 1.2.4")
parser.add_argument("fasta", type=str, help="Reference genome fasta file (must have FAI index generated)")
parser.add_argument("bed", type=str, help="Sorted bed file of molecule extents")
parser.add_argument("-o", "--fastaout", type=str, help="The output FASTA file.", required=True)
Expand Down
30 changes: 19 additions & 11 deletions bin/tigmint-make
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ endif
ref=ref

# Haploid size of the reference genome, for calculating span, NG50 and NGA50
G=-1
G=0

# Minimap2 long read map parameter
longmap=ont
Expand Down Expand Up @@ -114,7 +114,7 @@ endif

.DELETE_ON_ERROR:
.SECONDARY:
.PHONY: help version all tigmint tigmint-long arcs metrics draft_metrics tigmint_metrics arcs_metrics
.PHONY: help version all tigmint tigmint-long arcs metrics draft_metrics tigmint_metrics arcs_metrics check_span_g

help:
@echo 'Tigmint: Correct misassemblies using linked or long reads'
Expand All @@ -123,12 +123,12 @@ help:
@echo 'For more information see https://bcgsc.github.io/tigmint/'

version:
@echo "Tigmint 1.2.3"
@echo "Tigmint 1.2.4"
@echo "Written by Shaun Jackman @sjackman."

all: tigmint arcs
ifneq ($(ref), ref)
ifneq ($(G), -1)
ifneq ($G, 0)
all: metrics
endif
endif
Expand Down Expand Up @@ -196,19 +196,27 @@ $(draft).%.sortbx.bam: %.fq.gz $(draft).fa.bwt
$(draft).%.cut$(cut).sortbx.bam: %.cut$(cut).fa.gz $(draft).fa
$(gtime) minimap2 -y -t$t -ax map-$(longmap) --secondary=no $(draft).fa $< | samtools view -b -u -F4 | samtools sort -@$t -tBX -T$$(mktemp -u -t $@.XXXXXX) -o $@

# Check that G is set if span=auto
check_span_g:
ifeq ($(span), auto)
ifeq ($G, 0)
$(error Must set genome size parameter (G) to calculate span automatically)
endif
endif

# Segment long reads from gzipped fasta file, optionally calculating tigmint-long parameters.
$(reads).cut$(cut).fq.gz: $(longreads)
$(reads).cut$(cut).fq.gz: $(longreads) check_span_g
ifeq ($(span), auto)
ifeq ($(dist), auto)
$(gtime) $(gzip) -dc $< | $(bin)/long-to-linked -l$(cut) -m$(minsize) -g$(G) -s -d -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) -g$G -s -d -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
else
$(gtime) $(gzip) -dc $< | $(bin)/long-to-linked -l$(cut) -m$(minsize) -g$(G) -s -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) -g$G -s -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
endif
else
ifeq ($(dist), auto)
$(gtime) $(gzip) -dc $< | $(bin)/long-to-linked -l$(cut) -m$(minsize) -d -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) -d -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
else
$(gtime) $(gzip) -dc $< | $(bin)/long-to-linked -l$(cut) -m$(minsize) | $(gzip) > $@
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) | $(gzip) > $@
endif
endif

Expand All @@ -229,7 +237,7 @@ $(reads).tigmint-long.params.tsv: $(longreads)
$(bin)/tigmint_estimate_dist.py - -n $(dist_sample) -o $@'

# Create molecule extents BED using cut long reads
$(draft).$(reads).cut$(cut).molecule.size$(minsize).bed: $(longreads) $(draft).fa $(reads).tigmint-long.params.tsv
$(draft).$(reads).cut$(cut).molecule.size$(minsize).bed: $(longreads) $(draft).fa $(reads).tigmint-long.params.tsv check_span_g
ifeq ($(dist), auto)
$(gtime) $(bin)/../src/long-to-linked-pe -l $(cut) -m$(minsize) -g$G -s -b $(reads).barcode-multiplicity.tsv --bx -t$t --fasta -f $(reads).tigmint-long.params.tsv $< | \
minimap2 -y -t$t -x map-$(longmap) --secondary=no $(draft).fa - | \
Expand Down Expand Up @@ -326,7 +334,7 @@ endif
%.abyss-fac.tsv: %.fa
abyss-fac -G$G -t500 $< >$@

ifneq ($(G), -1)
ifneq ($G, 0)
abyss_samtobreak=abyss-samtobreak -l500 -G$G
else
abyss_samtobreak=abyss-samtobreak -l500
Expand Down
2 changes: 1 addition & 1 deletion bin/tigmint_estimate_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def main():
required=False)
parser.add_argument("-v", "--version",
action="version",
version="tigmint_estimate_dist.py 1.2.3")
version="tigmint_estimate_dist.py 1.2.4")

args = parser.parse_args()

Expand Down
2 changes: 1 addition & 1 deletion bin/tigmint_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def parse_arguments(self):
"Read a SAM/BAM file and output a TSV file. "
"The SAM/BAM file must be sorted by BX tag and then by position.")
parser.add_argument(
'--version', action='version', version='tigmint-molecule 1.2.3')
'--version', action='version', version='tigmint-molecule 1.2.4')
parser.add_argument(
metavar="BAM", dest="in_bam_filename",
help="Input BAM file sorted by BX tag then position, - for stdin")
Expand Down
2 changes: 1 addition & 1 deletion bin/tigmint_molecule_paf.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def parse_arguments(self):
parser = argparse.ArgumentParser(
description="Group linked reads simulated from long reads into molecules. "
"Read a PAF file and output a BED file.")
parser.add_argument('--version', action='version', version='tigmint_molecule_paf.py 1.2.3')
parser.add_argument('--version', action='version', version='tigmint_molecule_paf.py 1.2.4')
parser.add_argument(metavar="PAF", dest="PAF", help="Input PAF file, - for stdin")
parser.add_argument("-o", "--output", dest="out_molecules_filename",
help="Output molecule BED file [stdout]",
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="tigmint",
version="1.2.3",
version="1.2.4",
author="Shaun Jackman",
author_email="sjackman@gmail.com",
description="Correct misassemblies using linked or long reads",
Expand Down
2 changes: 1 addition & 1 deletion src/long-to-linked-pe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <vector>

const static std::string PROGNAME = "long-to-linked-pe";
const static std::string VERSION = "v1.2.3";
const static std::string VERSION = "v1.2.4";
const static size_t MAX_THREADS = 6;

static void
Expand Down

0 comments on commit 9374380

Please sign in to comment.