Skip to content

Commit

Permalink
Merge branch 'master' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben Vandervalk committed Oct 24, 2018
2 parents 4811239 + 116d2e3 commit eb1f29a
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 7 deletions.
6 changes: 5 additions & 1 deletion Common/PMF.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class PMF
public:
/** Construct a PMF from a histogram. */
PMF(const Histogram& h)
: m_dist(h.maximum() + 1), m_mean(h.mean()), m_stdDev(h.sd())
: m_dist(h.maximum() + 1), m_mean(h.mean()), m_stdDev(h.sd()), m_median(h.median())
{
unsigned count = h.size();
m_minp = (double)1 / count;
Expand Down Expand Up @@ -44,6 +44,9 @@ class PMF
return m_dist.size() - 1;
}

/** Return the median of this distribution. */
int median() const { return m_median; }

/** Return the mean of this distribution. */
double mean() const { return m_mean; }

Expand All @@ -60,6 +63,7 @@ class PMF
double m_mean;
double m_stdDev;
double m_minp;
int m_median;
};

namespace std {
Expand Down
33 changes: 32 additions & 1 deletion DistanceEst/DistanceEst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ static const char USAGE_MESSAGE[] =
" -o, --out=FILE write result to FILE\n"
" --mle use the MLE [default]\n"
" (maximum likelihood estimator)\n"
" --median use the difference of the population median\n"
" and the sample median\n"
" --mean use the difference of the population mean\n"
" and the sample mean\n"
" --dist output the graph in dist format [default]\n"
Expand All @@ -78,7 +80,7 @@ static const char USAGE_MESSAGE[] =
"Report bugs to <" PACKAGE_BUGREPORT ">.\n";

/** Which estimator to use. See opt::method. */
enum { MLE, MEAN };
enum { MLE, MEAN, MEDIAN };

namespace opt {
string db;
Expand Down Expand Up @@ -131,6 +133,7 @@ static const struct option longopts[] = {
{ "mind", required_argument, NULL, OPT_MIND },
{ "maxd", required_argument, NULL, OPT_MAXD },
{ "mle", no_argument, &opt::method, MLE },
{ "median", no_argument, &opt::method, MEDIAN },
{ "mean", no_argument, &opt::method, MEAN },
{ "kmer", required_argument, NULL, 'k' },
{ "npairs", required_argument, NULL, 'n' },
Expand Down Expand Up @@ -175,6 +178,29 @@ static int estimateDistanceUsingMean(
return d;
}

/** Estimate the distance between two contigs using the difference of
* the population median and the sample median.
* @param numPairs [out] the number of pairs that agree with the
* expected distribution
* @return the estimated distance
*/
static int estimateDistanceUsingMedian(
const std::vector<int>& samples, const PMF& pmf,
unsigned& numPairs)
{
Histogram h(samples.begin(), samples.end());
int d = (int)round(pmf.median() - h.median());
// Count the number of samples that agree with the distribution.
unsigned n = 0;
for (Histogram::const_iterator it = h.begin();
it != h.end(); ++it)
if (pmf[it->first + d] > pmf.minProbability())
n += it->second;

numPairs = n;
return d;
}

/** Global variable to track a recommended minAlign parameter */
unsigned g_recMA;

Expand Down Expand Up @@ -257,6 +283,11 @@ static int estimateDistance(unsigned len0, unsigned len1,
// and the sample mean.
return estimateDistanceUsingMean(
fragmentSizes, pmf, numPairs);
case MEDIAN:
// Use the difference of the population median
// and the sample median.
return estimateDistanceUsingMedian(
fragmentSizes, pmf, numPairs);
default:
assert(false);
abort();
Expand Down
12 changes: 8 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
FROM ubuntu:latest
FROM ubuntu:18.04
MAINTAINER Shaun Jackman <sjackman@gmail.com>

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bsdmainutils libgomp1 make openmpi-bin ssh
bsdmainutils libgomp1 make openmpi-bin ssh sudo \
&& useradd -m -s /bin/bash abyss \
&& echo 'abyss ALL=(ALL) NOPASSWD:ALL' >>/etc/sudoers
ADD . /tmp/abyss
RUN apt-get install -y --no-install-recommends \
automake g++ libboost-dev libopenmpi-dev libsparsehash-dev \
&& cd /tmp/abyss \
&& ./autogen.sh \
&& mkdir build && cd build \
&& ../configure --with-mpi=/usr/lib/openmpi \
&& ../configure --with-mpi=/usr/lib/x86_64-linux-gnu/openmpi \
&& make install-strip \
&& rm -rf /tmp/abyss \
&& apt-get autoremove -y binutils \
automake g++ libboost-dev libopenmpi-dev libsparsehash-dev
ENV SHELL=/bin/bash
USER abyss
WORKDIR /home/abyss
ENV SHELL=/bin/bash USER=abyss
ENTRYPOINT ["abyss-pe"]
CMD ["help"]
2 changes: 1 addition & 1 deletion bin/abyss-pe
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ SCAFFOLD_DE_OPTIONS?=$(DISTANCEEST_OPTIONS)
$(foreach i,$(mp),$(eval $i_l?=$L))
$(foreach i,$(mp),$(eval $i_s?=$(SCAFFOLD_DE_S)))
$(foreach i,$(mp),$(eval $i_n?=$(SCAFFOLD_DE_N)))
override scaffold_deopt=$v $(dbopt) --dot --mean -j$j -k$k $(SCAFFOLD_DE_OPTIONS) -l$($*_l) -s$($*_s) -n$($*_n) $($*_de)
override scaffold_deopt=$v $(dbopt) --dot --median -j$j -k$k $(SCAFFOLD_DE_OPTIONS) -l$($*_l) -s$($*_s) -n$($*_n) $($*_de)
scopt += $v $(dbopt) $(SS) -k$k
ifdef G
scopt += -G$G
Expand Down

0 comments on commit eb1f29a

Please sign in to comment.