Skip to content

Commit

Permalink
added check for x86intrin.h (fixed #137), changed to builtin function…
Browse files Browse the repository at this point in the history
…s for cpu detection (fixes #138 #140 and #141)
  • Loading branch information
flouris authored and flouris committed May 16, 2017
1 parent d53249a commit a7cf248
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 110 deletions.
3 changes: 1 addition & 2 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ fast_parsimony.c \
stepwise.c \
random.c \
phylip.c \
hardware.c \
init.c
hardware.c

libpll_la_CFLAGS = $(AM_CFLAGS)

Expand Down
110 changes: 46 additions & 64 deletions src/hardware.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,101 +21,83 @@

#include "pll.h"

#ifndef __PPC__
#define cpuid(f1, f2, a, b, c, d) \
__asm__ __volatile__ ("cpuid" \
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
: "a" (f1), "c" (f2));
#endif

static void cpu_features_detect()
{
unsigned int a,b,c,d;

memset(pll_hardware,0,sizeof(pll_hardware_t));

#ifdef __PPC__
pll_hardware->altivec_present = 1;
#else

cpuid(0,0,a,b,c,d);
unsigned int maxlevel = a & 0xff;

if (maxlevel >= 1)
{
cpuid(1,0,a,b,c,d);
pll_hardware->mmx_present = (d >> 23) & 1;
pll_hardware->sse_present = (d >> 25) & 1;
pll_hardware->sse2_present = (d >> 26) & 1;
pll_hardware->sse3_present = (c >> 0) & 1;
pll_hardware->ssse3_present = (c >> 9) & 1;
pll_hardware->sse41_present = (c >> 19) & 1;
pll_hardware->sse42_present = (c >> 20) & 1;
pll_hardware->popcnt_present = (c >> 23) & 1;
pll_hardware->avx_present = (c >> 28) & 1;

if (maxlevel >= 7)
{
cpuid(7,0,a,b,c,d);
pll_hardware->avx2_present = (b >> 5) & 1;
}
}
memset(&pll_hardware,0,sizeof(pll_hardware_t));

pll_hardware.init = 1;
#if defined(__PPC__)
pll_hardware.altivec_present = __builtin_cpu_supports("altivec");
#elif defined(__x86_64__) || defined(__i386__)
pll_hardware.mmx_present = __builtin_cpu_supports("mmx");
pll_hardware.sse_present = __builtin_cpu_supports("sse");
pll_hardware.sse2_present = __builtin_cpu_supports("sse2");
pll_hardware.sse3_present = __builtin_cpu_supports("sse3");
pll_hardware.ssse3_present = __builtin_cpu_supports("ssse3");
pll_hardware.sse41_present = __builtin_cpu_supports("sse4.1");
pll_hardware.sse42_present = __builtin_cpu_supports("sse4.2");
pll_hardware.popcnt_present = __builtin_cpu_supports("popcnt");
pll_hardware.avx_present = __builtin_cpu_supports("avx");
pll_hardware.avx2_present = __builtin_cpu_supports("avx2");
#endif
}

static void cpu_features_show()
{
if (!pll_hardware)
{
/* TODO: Add proper error control after we figure out
cross-platform compatibility */
return;
}

fprintf(stderr, "CPU features:");
if (pll_hardware->altivec_present)
if (pll_hardware.altivec_present)
fprintf(stderr, " altivec");
if (pll_hardware->mmx_present)
if (pll_hardware.mmx_present)
fprintf(stderr, " mmx");
if (pll_hardware->sse_present)
if (pll_hardware.sse_present)
fprintf(stderr, " sse");
if (pll_hardware->sse2_present)
if (pll_hardware.sse2_present)
fprintf(stderr, " sse2");
if (pll_hardware->sse3_present)
if (pll_hardware.sse3_present)
fprintf(stderr, " sse3");
if (pll_hardware->ssse3_present)
if (pll_hardware.ssse3_present)
fprintf(stderr, " ssse3");
if (pll_hardware->sse41_present)
if (pll_hardware.sse41_present)
fprintf(stderr, " sse4.1");
if (pll_hardware->sse42_present)
if (pll_hardware.sse42_present)
fprintf(stderr, " sse4.2");
if (pll_hardware->popcnt_present)
if (pll_hardware.popcnt_present)
fprintf(stderr, " popcnt");
if (pll_hardware->avx_present)
if (pll_hardware.avx_present)
fprintf(stderr, " avx");
if (pll_hardware->avx2_present)
if (pll_hardware.avx2_present)
fprintf(stderr, " avx2");
fprintf(stderr, "\n");
}

PLL_EXPORT int pll_hardware_probe()
{
/* probe cpu features */
if (!pll_hardware)
{
if (!(pll_hardware = (pll_hardware_t *)calloc(1,sizeof(pll_hardware_t))))
{
pll_errno = PLL_ERROR_MEM_ALLOC;
snprintf(pll_errmsg, 200, "Unable to allocate enough memory.");
return PLL_FAILURE;
}
}
cpu_features_detect();

return PLL_SUCCESS;
}

PLL_EXPORT void pll_hardware_dump()
{
if (!pll_hardware.init)
pll_hardware_probe();

cpu_features_show();
}

PLL_EXPORT void pll_hardware_ignore()
{
pll_hardware.init = 1;
pll_hardware.altivec_present = 1;
pll_hardware.mmx_present = 1;
pll_hardware.sse_present = 1;
pll_hardware.sse2_present = 1;
pll_hardware.sse3_present = 1;
pll_hardware.ssse3_present = 1;
pll_hardware.sse41_present = 1;
pll_hardware.sse42_present = 1;
pll_hardware.popcnt_present = 1;
pll_hardware.avx_present = 1;
pll_hardware.avx2_present = 1;
}
34 changes: 0 additions & 34 deletions src/init.c

This file was deleted.

2 changes: 1 addition & 1 deletion src/pll.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
__thread int pll_errno;
__thread char pll_errmsg[200] = {0};

pll_hardware_t * pll_hardware = NULL;
pll_hardware_t pll_hardware = {0,0,0,0,0,0,0,0,0,0,0,0};

static void dealloc_partition_data(pll_partition_t * partition);

Expand Down
18 changes: 9 additions & 9 deletions src/pll.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,15 @@
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <x86intrin.h>

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#ifdef HAVE_X86INTRIN_H
#include <x86intrin.h>
#endif

/* platform specific */

#if (!defined(__APPLE__) && !defined(__WIN32__) && !defined(__WIN64__))
Expand All @@ -49,7 +52,8 @@
#define PLL_MIN(a,b) ((a) < (b) ? (a) : (b))
#define PLL_MAX(a,b) ((a) > (b) ? (a) : (b))
#define PLL_SWAP(x,y) do { __typeof__ (x) _t = x; x = y; y = _t; } while(0)
#define PLL_STAT(x) (pll_hardware && pll_hardware->x)
#define PLL_STAT(x) ((pll_hardware.init || pll_hardware_probe()) \
&& pll_hardware.x)

/* constants */

Expand Down Expand Up @@ -163,6 +167,7 @@

typedef struct pll_hardware_s
{
int init;
/* cpu features */
int altivec_present;
int mmx_present;
Expand Down Expand Up @@ -451,7 +456,7 @@ struct pll_random_data

PLL_EXPORT extern __thread int pll_errno;
PLL_EXPORT extern __thread char pll_errmsg[200];
PLL_EXPORT extern pll_hardware_t * pll_hardware;
PLL_EXPORT extern pll_hardware_t pll_hardware;

PLL_EXPORT extern const unsigned int pll_map_bin[256];
PLL_EXPORT extern const unsigned int pll_map_nt[256];
Expand Down Expand Up @@ -1875,12 +1880,7 @@ PLL_EXPORT int pll_hardware_probe(void);

PLL_EXPORT void pll_hardware_dump();

/* functions in init.c */

PLL_EXPORT void pll_init(void) __attribute__((constructor));

PLL_EXPORT void pll_fini(void) __attribute__((destructor));

PLL_EXPORT void pll_hardware_ignore();

#ifdef __cplusplus
} /* extern "C" */
Expand Down

0 comments on commit a7cf248

Please sign in to comment.