diff --git a/src/Makefile.am b/src/Makefile.am
index 23c16ed..977595b 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -37,8 +37,7 @@ fast_parsimony.c \
stepwise.c \
random.c \
phylip.c \
-hardware.c \
-init.c
+hardware.c
libpll_la_CFLAGS = $(AM_CFLAGS)
diff --git a/src/hardware.c b/src/hardware.c
index 49d414a..c02b5cf 100644
--- a/src/hardware.c
+++ b/src/hardware.c
@@ -21,79 +21,51 @@
#include "pll.h"
-#ifndef __PPC__
-#define cpuid(f1, f2, a, b, c, d) \
- __asm__ __volatile__ ("cpuid" \
- : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
- : "a" (f1), "c" (f2));
-#endif
-
static void cpu_features_detect()
{
- unsigned int a,b,c,d;
-
- memset(pll_hardware,0,sizeof(pll_hardware_t));
-
-#ifdef __PPC__
- pll_hardware->altivec_present = 1;
-#else
-
- cpuid(0,0,a,b,c,d);
- unsigned int maxlevel = a & 0xff;
-
- if (maxlevel >= 1)
- {
- cpuid(1,0,a,b,c,d);
- pll_hardware->mmx_present = (d >> 23) & 1;
- pll_hardware->sse_present = (d >> 25) & 1;
- pll_hardware->sse2_present = (d >> 26) & 1;
- pll_hardware->sse3_present = (c >> 0) & 1;
- pll_hardware->ssse3_present = (c >> 9) & 1;
- pll_hardware->sse41_present = (c >> 19) & 1;
- pll_hardware->sse42_present = (c >> 20) & 1;
- pll_hardware->popcnt_present = (c >> 23) & 1;
- pll_hardware->avx_present = (c >> 28) & 1;
-
- if (maxlevel >= 7)
- {
- cpuid(7,0,a,b,c,d);
- pll_hardware->avx2_present = (b >> 5) & 1;
- }
- }
+ memset(&pll_hardware,0,sizeof(pll_hardware_t));
+
+ pll_hardware.init = 1;
+#if defined(__PPC__)
+ pll_hardware.altivec_present = __builtin_cpu_supports("altivec");
+#elif defined(__x86_64__) || defined(__i386__)
+ pll_hardware.mmx_present = __builtin_cpu_supports("mmx");
+ pll_hardware.sse_present = __builtin_cpu_supports("sse");
+ pll_hardware.sse2_present = __builtin_cpu_supports("sse2");
+ pll_hardware.sse3_present = __builtin_cpu_supports("sse3");
+ pll_hardware.ssse3_present = __builtin_cpu_supports("ssse3");
+ pll_hardware.sse41_present = __builtin_cpu_supports("sse4.1");
+ pll_hardware.sse42_present = __builtin_cpu_supports("sse4.2");
+ pll_hardware.popcnt_present = __builtin_cpu_supports("popcnt");
+ pll_hardware.avx_present = __builtin_cpu_supports("avx");
+ pll_hardware.avx2_present = __builtin_cpu_supports("avx2");
#endif
}
static void cpu_features_show()
{
- if (!pll_hardware)
- {
- /* TODO: Add proper error control after we figure out
- cross-platform compatibility */
- return;
- }
-
fprintf(stderr, "CPU features:");
- if (pll_hardware->altivec_present)
+ if (pll_hardware.altivec_present)
fprintf(stderr, " altivec");
- if (pll_hardware->mmx_present)
+ if (pll_hardware.mmx_present)
fprintf(stderr, " mmx");
- if (pll_hardware->sse_present)
+ if (pll_hardware.sse_present)
fprintf(stderr, " sse");
- if (pll_hardware->sse2_present)
+ if (pll_hardware.sse2_present)
fprintf(stderr, " sse2");
- if (pll_hardware->sse3_present)
+ if (pll_hardware.sse3_present)
fprintf(stderr, " sse3");
- if (pll_hardware->ssse3_present)
+ if (pll_hardware.ssse3_present)
fprintf(stderr, " ssse3");
- if (pll_hardware->sse41_present)
+ if (pll_hardware.sse41_present)
fprintf(stderr, " sse4.1");
- if (pll_hardware->sse42_present)
+ if (pll_hardware.sse42_present)
fprintf(stderr, " sse4.2");
- if (pll_hardware->popcnt_present)
+ if (pll_hardware.popcnt_present)
fprintf(stderr, " popcnt");
- if (pll_hardware->avx_present)
+ if (pll_hardware.avx_present)
fprintf(stderr, " avx");
- if (pll_hardware->avx2_present)
+ if (pll_hardware.avx2_present)
fprintf(stderr, " avx2");
fprintf(stderr, "\n");
}
@@ -101,15 +73,6 @@ static void cpu_features_show()
PLL_EXPORT int pll_hardware_probe()
{
/* probe cpu features */
- if (!pll_hardware)
- {
- if (!(pll_hardware = (pll_hardware_t *)calloc(1,sizeof(pll_hardware_t))))
- {
- pll_errno = PLL_ERROR_MEM_ALLOC;
- snprintf(pll_errmsg, 200, "Unable to allocate enough memory.");
- return PLL_FAILURE;
- }
- }
cpu_features_detect();
return PLL_SUCCESS;
@@ -117,5 +80,24 @@ PLL_EXPORT int pll_hardware_probe()
PLL_EXPORT void pll_hardware_dump()
{
+ if (!pll_hardware.init)
+ pll_hardware_probe();
+
cpu_features_show();
}
+
+PLL_EXPORT void pll_hardware_ignore()
+{
+ pll_hardware.init = 1;
+ pll_hardware.altivec_present = 1;
+ pll_hardware.mmx_present = 1;
+ pll_hardware.sse_present = 1;
+ pll_hardware.sse2_present = 1;
+ pll_hardware.sse3_present = 1;
+ pll_hardware.ssse3_present = 1;
+ pll_hardware.sse41_present = 1;
+ pll_hardware.sse42_present = 1;
+ pll_hardware.popcnt_present = 1;
+ pll_hardware.avx_present = 1;
+ pll_hardware.avx2_present = 1;
+}
diff --git a/src/init.c b/src/init.c
deleted file mode 100644
index 90d5bd3..0000000
--- a/src/init.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- Copyright (C) 2017 Tomas Flouri
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as
- published by the Free Software Foundation, either version 3 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-
- Contact: Tomas Flouri ,
- Exelixis Lab, Heidelberg Instutute for Theoretical Studies
- Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
-*/
-
-#include "pll.h"
-
-PLL_EXPORT void pll_init()
-{
- pll_hardware_probe();
-}
-
-PLL_EXPORT void pll_fini()
-{
- if (pll_hardware)
- free(pll_hardware);
- pll_hardware = NULL;
-}
diff --git a/src/pll.c b/src/pll.c
index 0944761..299d0cd 100644
--- a/src/pll.c
+++ b/src/pll.c
@@ -24,7 +24,7 @@
__thread int pll_errno;
__thread char pll_errmsg[200] = {0};
-pll_hardware_t * pll_hardware = NULL;
+pll_hardware_t pll_hardware = {0,0,0,0,0,0,0,0,0,0,0,0};
static void dealloc_partition_data(pll_partition_t * partition);
diff --git a/src/pll.h b/src/pll.h
index fe38a76..90d0f32 100644
--- a/src/pll.h
+++ b/src/pll.h
@@ -26,12 +26,15 @@
#include
#include
#include
-#include
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
+#ifdef HAVE_X86INTRIN_H
+#include
+#endif
+
/* platform specific */
#if (!defined(__APPLE__) && !defined(__WIN32__) && !defined(__WIN64__))
@@ -49,7 +52,8 @@
#define PLL_MIN(a,b) ((a) < (b) ? (a) : (b))
#define PLL_MAX(a,b) ((a) > (b) ? (a) : (b))
#define PLL_SWAP(x,y) do { __typeof__ (x) _t = x; x = y; y = _t; } while(0)
-#define PLL_STAT(x) (pll_hardware && pll_hardware->x)
+#define PLL_STAT(x) ((pll_hardware.init || pll_hardware_probe()) \
+ && pll_hardware.x)
/* constants */
@@ -163,6 +167,7 @@
typedef struct pll_hardware_s
{
+ int init;
/* cpu features */
int altivec_present;
int mmx_present;
@@ -451,7 +456,7 @@ struct pll_random_data
PLL_EXPORT extern __thread int pll_errno;
PLL_EXPORT extern __thread char pll_errmsg[200];
-PLL_EXPORT extern pll_hardware_t * pll_hardware;
+PLL_EXPORT extern pll_hardware_t pll_hardware;
PLL_EXPORT extern const unsigned int pll_map_bin[256];
PLL_EXPORT extern const unsigned int pll_map_nt[256];
@@ -1875,12 +1880,7 @@ PLL_EXPORT int pll_hardware_probe(void);
PLL_EXPORT void pll_hardware_dump();
-/* functions in init.c */
-
-PLL_EXPORT void pll_init(void) __attribute__((constructor));
-
-PLL_EXPORT void pll_fini(void) __attribute__((destructor));
-
+PLL_EXPORT void pll_hardware_ignore();
#ifdef __cplusplus
} /* extern "C" */