Skip to content

Commit

Permalink
added run-time hardware detection and check - issue #136 and #138
Browse files Browse the repository at this point in the history
  • Loading branch information
flouris authored and flouris committed May 15, 2017
1 parent dba4e07 commit 33e5e35
Show file tree
Hide file tree
Showing 13 changed files with 361 additions and 101 deletions.
34 changes: 34 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,40 @@
All notable changes to `libpll` will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).

## [0.3.0] - 2017-05-15
### Added
- Run-time detection of cpu features
- Vectorized (AVX) computation of 20-state transition probability matrices
- Faster tip-inner kernels for 20-state models
- Improved AVX vectorization of derivatives
- Faster PHYLIP parser
- vectorized scaling for 20-state and arbitrary-state models
- AVX2 vectorizations for partials, likelihood and derivatives
- Unweighted parsimony functions including SSE, AVX and AVX2 vectorizations
- Randomized stepwise addition
- Portable functions for parsing trees from a C-string
- Optional per-rate category scalers to prevent numerical underflows on large
trees
- Setting of identity matrix if all exponentiations of eigenvalues multiplied
by branch length and rate are approximately equal to one
- Re-entrant cross-platform pseudo-random number generator
- Wrapper tree structures
- Custom exporting of tree structures using a callback function
- Support for median category rates in discrete gamma model

### Fixed
- Derivatives computation
- Parsing of branch lengths in newick trees
- Invariant sites computation
- Multiplication of log-likelihood with pattern weight after scaling term
- Added destructors for eliminating memory leaks when tree parsing fails
- Sumtable computation when having multiple substitution matrices
- Ascertainment bias computation
- Per-site log-likelihood computation
- Uninitialized values in testing framework



## [0.2.0] - 2016-09-09
### Added
- Methods for ascertainment bias correction (Lewis, Felsenstein, Stamatakis)
Expand Down
80 changes: 50 additions & 30 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.

AC_PREREQ([2.63])
AC_INIT([libpll], [0.2.0], [Tomas.Flouri@h-its.org])
AC_INIT([libpll], [0.3.0], [Tomas.Flouri@h-its.org])
AM_INIT_AUTOMAKE([subdir-objects])
AC_LANG([C])
AC_CONFIG_SRCDIR([src/pll.c])
Expand Down Expand Up @@ -52,37 +52,39 @@ AC_CHECK_FUNCS([asprintf memcpy memset posix_memalign])

have_avx2=no
have_avx=no
have_sse=no
have_sse3=no
have_ps2pdf=no

AX_EXT

if test "x${ax_cv_have_fma3_ext}" = "xyes"; then
have_avx2=yes
fi

if test "x${ax_cv_have_avx_ext}" = "xyes"; then
have_avx=yes
fi

if test "x${ax_cv_have_sse3_ext}" = "xyes"; then
have_sse3=yes
fi

AC_ARG_ENABLE(avx2, AS_HELP_STRING([--disable-avx2], [Build without AVX2/FMA support]))
AS_IF([test "x${ax_cv_have_fma3_ext}" = "xyes"], [
have_avx2=yes
])

AC_ARG_ENABLE(avx, AS_HELP_STRING([--disable-avx], [Build without AVX support]))
AS_IF([test "x${ax_cv_have_avx_ext}" = "xyes"], [
have_avx=yes
])

AC_ARG_ENABLE(sse,i AS_HELP_STRING([--disable-sse],[Build without SSE support]))
AS_IF([test "x${ax_cv_have_sse3_ext}" = "xyes"], [
have_sse3=yes
])
# Compile-time detection of processor features - now disabled
#AX_EXT
#
#if test "x${ax_cv_have_fma3_ext}" = "xyes"; then
# have_avx2=yes
#fi
#
#if test "x${ax_cv_have_avx_ext}" = "xyes"; then
# have_avx=yes
#fi
#
#if test "x${ax_cv_have_sse3_ext}" = "xyes"; then
# have_sse3=yes
#fi
#
#AC_ARG_ENABLE(avx2, AS_HELP_STRING([--disable-avx2], [Build without AVX2/FMA support]))
#AS_IF([test "x${ax_cv_have_fma3_ext}" = "xyes"], [
# have_avx2=yes
#])
#
#AC_ARG_ENABLE(avx, AS_HELP_STRING([--disable-avx], [Build without AVX support]))
#AS_IF([test "x${ax_cv_have_avx_ext}" = "xyes"], [
# have_avx=yes
#])
#
#AC_ARG_ENABLE(sse, AS_HELP_STRING([--disable-sse],[Build without SSE support]))
#AS_IF([test "x${ax_cv_have_sse3_ext}" = "xyes"], [
# have_sse3=yes
#])
#

AC_ARG_ENABLE(pdfman, AS_HELP_STRING([--disable-pdfman], [Disable PDF manual creation]))
AS_IF([test "x$enable_pdfman" != "xno"], [
Expand All @@ -94,6 +96,24 @@ AS_IF([test "x$enable_pdfman" != "xno"], [
fi
])

AC_ARG_ENABLE(sse, AS_HELP_STRING([--disable-sse], [Build without SSE support]))
AS_IF([test "x$enable_sse" != "xno"], [
have_sse3=yes
AC_DEFINE([HAVE_SSE3], [1], [Define to 1 to support Streaming SIMD Extensions 3])
])

AC_ARG_ENABLE(avx, AS_HELP_STRING([--disable-avx], [Build without AVX support]))
AS_IF([test "x$enable_avx" != "xno"], [
have_avx=yes
AC_DEFINE([HAVE_AVX], [1], [Define to 1 to support Advanced Vector Extensions])
])

AC_ARG_ENABLE(avx2, AS_HELP_STRING([--disable-avx2], [Build without AVX2/FMA support]))
AS_IF([test "x$enable_avx2" != "xno"], [
have_avx2=yes
AC_DEFINE([HAVE_AVX2], [1], [Define to 1 to support Advanced Vector Extensions 2])
])

AM_CONDITIONAL(HAVE_AVX2, test "x${have_avx2}" = "xyes")
AM_CONDITIONAL(HAVE_AVX, test "x${have_avx}" = "xyes")
AM_CONDITIONAL(HAVE_SSE3, test "x${have_sse3}" = "xyes")
Expand Down
10 changes: 10 additions & 0 deletions man/libpll.3
Original file line number Diff line number Diff line change
Expand Up @@ -576,5 +576,15 @@ bug releases may not be mentioned):
.TP
.BR v0.2.0\~ "released September 9th, 2016"
First public release.
.TP
.BR v0.3.0\~ "released May 15th, 2017"
Added faster vectorizations for 20-state and arbitrary-state models, unweighted
parsimony functions, randomized stepwise addition, portable functions for
parsing trees from C-strings, per-rate category scalers for preventing
numberical underflows. Modified newick exporting function to accept callbacks
for custom printing. Fixed derivatives computation, parsing of branch lengths,
invariant sites computation, log-likelihood computation for cases where we have
scaling and patterns, ascertainment bias computation, per-site log-likelihood
computation, memory leaks. Added run-time detection of hardware.
.RE
.LP
4 changes: 3 additions & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ lex_rtree.l \
fast_parsimony.c \
stepwise.c \
random.c \
phylip.c
phylip.c \
hardware.c \
init.c

libpll_la_CFLAGS = $(AM_CFLAGS)

Expand Down
18 changes: 9 additions & 9 deletions src/core_derivatives.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ PLL_EXPORT int pll_core_update_sumtable_ii(unsigned int states,
const double * t_freqs;

#ifdef HAVE_SSE3
if (attrib & PLL_ATTRIB_ARCH_SSE)
if (attrib & PLL_ATTRIB_ARCH_SSE && PLL_STAT(sse3_present))
{
return pll_core_update_sumtable_ii_sse(states,
sites,
Expand All @@ -164,7 +164,7 @@ PLL_EXPORT int pll_core_update_sumtable_ii(unsigned int states,
}
#endif
#ifdef HAVE_AVX
if (attrib & PLL_ATTRIB_ARCH_AVX)
if (attrib & PLL_ATTRIB_ARCH_AVX && PLL_STAT(avx_present))
{
return pll_core_update_sumtable_ii_avx(states,
sites,
Expand All @@ -181,7 +181,7 @@ PLL_EXPORT int pll_core_update_sumtable_ii(unsigned int states,
}
#endif
#ifdef HAVE_AVX2
if (attrib & PLL_ATTRIB_ARCH_AVX2)
if (attrib & PLL_ATTRIB_ARCH_AVX2 && PLL_STAT(avx2_present))
{
return pll_core_update_sumtable_ii_avx2(states,
sites,
Expand Down Expand Up @@ -299,7 +299,7 @@ PLL_EXPORT int pll_core_update_sumtable_ti(unsigned int states,
unsigned int states_padded = states;

#ifdef HAVE_SSE3
if (attrib & PLL_ATTRIB_ARCH_SSE)
if (attrib & PLL_ATTRIB_ARCH_SSE && PLL_STAT(sse3_present))
{
return pll_core_update_sumtable_ti_sse(states,
sites,
Expand All @@ -316,7 +316,7 @@ PLL_EXPORT int pll_core_update_sumtable_ti(unsigned int states,
}
#endif
#ifdef HAVE_AVX
if (attrib & PLL_ATTRIB_ARCH_AVX)
if (attrib & PLL_ATTRIB_ARCH_AVX && PLL_STAT(avx_present))
{
return pll_core_update_sumtable_ti_avx(states,
sites,
Expand All @@ -334,7 +334,7 @@ PLL_EXPORT int pll_core_update_sumtable_ti(unsigned int states,
}
#endif
#ifdef HAVE_AVX2
if (attrib & PLL_ATTRIB_ARCH_AVX2)
if (attrib & PLL_ATTRIB_ARCH_AVX2 && PLL_STAT(avx2_present))
{
return pll_core_update_sumtable_ti_avx2(states,
sites,
Expand Down Expand Up @@ -530,14 +530,14 @@ PLL_EXPORT int pll_core_likelihood_derivatives(unsigned int states,

// SSE3 vectorization in missing as of now
#ifdef HAVE_SSE3
if (attrib & PLL_ATTRIB_ARCH_SSE)
if (attrib & PLL_ATTRIB_ARCH_SSE && PLL_STAT(sse3_present))
{
states_padded = (states+1) & 0xFFFFFFFE;
}
#endif

#ifdef HAVE_AVX2
if (attrib & PLL_ATTRIB_ARCH_AVX2)
if (attrib & PLL_ATTRIB_ARCH_AVX2 && PLL_STAT(avx2_present))
{
states_padded = (states+3) & 0xFFFFFFFC;

Expand All @@ -558,7 +558,7 @@ PLL_EXPORT int pll_core_likelihood_derivatives(unsigned int states,
else
#endif
#ifdef HAVE_AVX
if (attrib & PLL_ATTRIB_ARCH_AVX)
if (attrib & PLL_ATTRIB_ARCH_AVX && PLL_STAT(avx_present))
{
states_padded = (states+3) & 0xFFFFFFFC;

Expand Down
24 changes: 12 additions & 12 deletions src/core_likelihood.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ PLL_EXPORT double pll_core_root_loglikelihood(unsigned int states,
unsigned int states_padded = states;

#ifdef HAVE_SSE3
if (attrib & PLL_ATTRIB_ARCH_SSE)
if (attrib & PLL_ATTRIB_ARCH_SSE && PLL_STAT(sse3_present))
{
if (states == 4)
{
Expand Down Expand Up @@ -85,7 +85,7 @@ PLL_EXPORT double pll_core_root_loglikelihood(unsigned int states,
}
#endif
#ifdef HAVE_AVX
if (attrib & PLL_ATTRIB_ARCH_AVX)
if (attrib & PLL_ATTRIB_ARCH_AVX && PLL_STAT(avx_present))
{
if (states == 4)
{
Expand Down Expand Up @@ -122,7 +122,7 @@ PLL_EXPORT double pll_core_root_loglikelihood(unsigned int states,
}
#endif
#ifdef HAVE_AVX2
if (attrib & PLL_ATTRIB_ARCH_AVX2)
if (attrib & PLL_ATTRIB_ARCH_AVX2 && PLL_STAT(avx2_present))
{
if (states == 4)
{
Expand Down Expand Up @@ -241,7 +241,7 @@ double pll_core_edge_loglikelihood_ti_4x4(unsigned int sites,
unsigned int states_padded = states;

#ifdef HAVE_SSE3
if (attrib & PLL_ATTRIB_ARCH_SSE)
if (attrib & PLL_ATTRIB_ARCH_SSE && PLL_STAT(sse3_present))
{
return pll_core_edge_loglikelihood_ti_4x4_sse(sites,
rate_cats,
Expand All @@ -260,7 +260,7 @@ double pll_core_edge_loglikelihood_ti_4x4(unsigned int sites,
}
#endif
#ifdef HAVE_AVX
if (attrib & PLL_ATTRIB_ARCH_AVX)
if (attrib & PLL_ATTRIB_ARCH_AVX && PLL_STAT(avx_present))
{
return pll_core_edge_loglikelihood_ti_4x4_avx(sites,
rate_cats,
Expand All @@ -279,7 +279,7 @@ double pll_core_edge_loglikelihood_ti_4x4(unsigned int sites,
}
#endif
#ifdef HAVE_AVX2
if (attrib & PLL_ATTRIB_ARCH_AVX2)
if (attrib & PLL_ATTRIB_ARCH_AVX2 && PLL_STAT(avx2_present))
{
return pll_core_edge_loglikelihood_ti_4x4_avx(sites,
rate_cats,
Expand Down Expand Up @@ -445,7 +445,7 @@ double pll_core_edge_loglikelihood_ti(unsigned int states,
unsigned int states_padded = states;

#ifdef HAVE_SSE3
if (attrib & PLL_ATTRIB_ARCH_SSE)
if (attrib & PLL_ATTRIB_ARCH_SSE && PLL_STAT(sse3_present))
{
if (states == 4)
{
Expand Down Expand Up @@ -488,7 +488,7 @@ double pll_core_edge_loglikelihood_ti(unsigned int states,
}
#endif
#ifdef HAVE_AVX
if (attrib & PLL_ATTRIB_ARCH_AVX)
if (attrib & PLL_ATTRIB_ARCH_AVX && PLL_STAT(avx_present))
{
if (states == 4)
{
Expand Down Expand Up @@ -549,7 +549,7 @@ double pll_core_edge_loglikelihood_ti(unsigned int states,
}
#endif
#ifdef HAVE_AVX2
if (attrib & PLL_ATTRIB_ARCH_AVX2)
if (attrib & PLL_ATTRIB_ARCH_AVX2 && PLL_STAT(avx2_present))
{
if (states == 4)
{
Expand Down Expand Up @@ -705,7 +705,7 @@ double pll_core_edge_loglikelihood_ii(unsigned int states,
unsigned int states_padded = states;

#ifdef HAVE_SSE3
if (attrib & PLL_ATTRIB_ARCH_SSE)
if (attrib & PLL_ATTRIB_ARCH_SSE && PLL_STAT(sse3_present))
{
if (states == 4)
{
Expand Down Expand Up @@ -749,7 +749,7 @@ double pll_core_edge_loglikelihood_ii(unsigned int states,
}
#endif
#ifdef HAVE_AVX
if (attrib & PLL_ATTRIB_ARCH_AVX)
if (attrib & PLL_ATTRIB_ARCH_AVX && PLL_STAT(avx_present))
{
if (states == 4)
{
Expand Down Expand Up @@ -793,7 +793,7 @@ double pll_core_edge_loglikelihood_ii(unsigned int states,
}
#endif
#ifdef HAVE_AVX2
if (attrib & PLL_ATTRIB_ARCH_AVX2)
if (attrib & PLL_ATTRIB_ARCH_AVX2 && PLL_STAT(avx2_present))
{
if (states == 4)
{
Expand Down
Loading

0 comments on commit 33e5e35

Please sign in to comment.