From a4079c5a95106f3f34e7133d047ac86831f4398e Mon Sep 17 00:00:00 2001 From: lcoombe Date: Wed, 26 Sep 2018 15:40:12 -0700 Subject: [PATCH 1/2] Add --median option to DistanceEst (#256) --- Common/PMF.h | 6 +++++- DistanceEst/DistanceEst.cpp | 33 ++++++++++++++++++++++++++++++++- bin/abyss-pe | 2 +- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/Common/PMF.h b/Common/PMF.h index 85b4f0fbb..2a2f93d6f 100644 --- a/Common/PMF.h +++ b/Common/PMF.h @@ -15,7 +15,7 @@ class PMF public: /** Construct a PMF from a histogram. */ PMF(const Histogram& h) - : m_dist(h.maximum() + 1), m_mean(h.mean()), m_stdDev(h.sd()) + : m_dist(h.maximum() + 1), m_mean(h.mean()), m_stdDev(h.sd()), m_median(h.median()) { unsigned count = h.size(); m_minp = (double)1 / count; @@ -44,6 +44,9 @@ class PMF return m_dist.size() - 1; } + /** Return the median of this distribution. */ + int median() const { return m_median; } + /** Return the mean of this distribution. */ double mean() const { return m_mean; } @@ -60,6 +63,7 @@ class PMF double m_mean; double m_stdDev; double m_minp; + int m_median; }; namespace std { diff --git a/DistanceEst/DistanceEst.cpp b/DistanceEst/DistanceEst.cpp index bbcd8a921..412b00867 100644 --- a/DistanceEst/DistanceEst.cpp +++ b/DistanceEst/DistanceEst.cpp @@ -59,6 +59,8 @@ static const char USAGE_MESSAGE[] = " -o, --out=FILE write result to FILE\n" " --mle use the MLE [default]\n" " (maximum likelihood estimator)\n" +" --median use the difference of the population median\n" +" and the sample median\n" " --mean use the difference of the population mean\n" " and the sample mean\n" " --dist output the graph in dist format [default]\n" @@ -78,7 +80,7 @@ static const char USAGE_MESSAGE[] = "Report bugs to <" PACKAGE_BUGREPORT ">.\n"; /** Which estimator to use. See opt::method. */ -enum { MLE, MEAN }; +enum { MLE, MEAN, MEDIAN }; namespace opt { string db; @@ -131,6 +133,7 @@ static const struct option longopts[] = { { "mind", required_argument, NULL, OPT_MIND }, { "maxd", required_argument, NULL, OPT_MAXD }, { "mle", no_argument, &opt::method, MLE }, + { "median", no_argument, &opt::method, MEDIAN }, { "mean", no_argument, &opt::method, MEAN }, { "kmer", required_argument, NULL, 'k' }, { "npairs", required_argument, NULL, 'n' }, @@ -175,6 +178,29 @@ static int estimateDistanceUsingMean( return d; } +/** Estimate the distance between two contigs using the difference of + * the population median and the sample median. + * @param numPairs [out] the number of pairs that agree with the + * expected distribution + * @return the estimated distance + */ +static int estimateDistanceUsingMedian( + const std::vector& samples, const PMF& pmf, + unsigned& numPairs) +{ + Histogram h(samples.begin(), samples.end()); + int d = (int)round(pmf.median() - h.median()); + // Count the number of samples that agree with the distribution. + unsigned n = 0; + for (Histogram::const_iterator it = h.begin(); + it != h.end(); ++it) + if (pmf[it->first + d] > pmf.minProbability()) + n += it->second; + + numPairs = n; + return d; +} + /** Global variable to track a recommended minAlign parameter */ unsigned g_recMA; @@ -257,6 +283,11 @@ static int estimateDistance(unsigned len0, unsigned len1, // and the sample mean. return estimateDistanceUsingMean( fragmentSizes, pmf, numPairs); + case MEDIAN: + // Use the difference of the population median + // and the sample median. + return estimateDistanceUsingMedian( + fragmentSizes, pmf, numPairs); default: assert(false); abort(); diff --git a/bin/abyss-pe b/bin/abyss-pe index 88d3f5a04..d8e3ee758 100755 --- a/bin/abyss-pe +++ b/bin/abyss-pe @@ -342,7 +342,7 @@ SCAFFOLD_DE_OPTIONS?=$(DISTANCEEST_OPTIONS) $(foreach i,$(mp),$(eval $i_l?=$L)) $(foreach i,$(mp),$(eval $i_s?=$(SCAFFOLD_DE_S))) $(foreach i,$(mp),$(eval $i_n?=$(SCAFFOLD_DE_N))) -override scaffold_deopt=$v $(dbopt) --dot --mean -j$j -k$k $(SCAFFOLD_DE_OPTIONS) -l$($*_l) -s$($*_s) -n$($*_n) $($*_de) +override scaffold_deopt=$v $(dbopt) --dot --median -j$j -k$k $(SCAFFOLD_DE_OPTIONS) -l$($*_l) -s$($*_s) -n$($*_n) $($*_de) scopt += $v $(dbopt) $(SS) -k$k ifdef G scopt += -G$G From 116d2e3d045de3b479f55bc5a7792dc3b4d6fbae Mon Sep 17 00:00:00 2001 From: Shaun Jackman Date: Fri, 5 Oct 2018 16:11:57 -0700 Subject: [PATCH 2/2] Dockerfile: Add a user for OpenMPI (#257) OpenMPI does not like to be run as root. --- Dockerfile | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index a7fa6abb4..fc56e1c23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,24 @@ -FROM ubuntu:latest +FROM ubuntu:18.04 MAINTAINER Shaun Jackman RUN apt-get update \ && apt-get install -y --no-install-recommends \ - bsdmainutils libgomp1 make openmpi-bin ssh + bsdmainutils libgomp1 make openmpi-bin ssh sudo \ + && useradd -m -s /bin/bash abyss \ + && echo 'abyss ALL=(ALL) NOPASSWD:ALL' >>/etc/sudoers ADD . /tmp/abyss RUN apt-get install -y --no-install-recommends \ automake g++ libboost-dev libopenmpi-dev libsparsehash-dev \ && cd /tmp/abyss \ && ./autogen.sh \ && mkdir build && cd build \ - && ../configure --with-mpi=/usr/lib/openmpi \ + && ../configure --with-mpi=/usr/lib/x86_64-linux-gnu/openmpi \ && make install-strip \ && rm -rf /tmp/abyss \ && apt-get autoremove -y binutils \ automake g++ libboost-dev libopenmpi-dev libsparsehash-dev -ENV SHELL=/bin/bash +USER abyss +WORKDIR /home/abyss +ENV SHELL=/bin/bash USER=abyss ENTRYPOINT ["abyss-pe"] CMD ["help"]