From a7cf248b7f59c724082133a998d938934370bc19 Mon Sep 17 00:00:00 2001 From: flouris Date: Tue, 16 May 2017 18:03:09 +0200 Subject: [PATCH] added check for x86intrin.h (fixed #137), changed to builtin functions for cpu detection (fixes #138 #140 and #141) --- src/Makefile.am | 3 +- src/hardware.c | 110 ++++++++++++++++++++---------------------------- src/init.c | 34 --------------- src/pll.c | 2 +- src/pll.h | 18 ++++---- 5 files changed, 57 insertions(+), 110 deletions(-) delete mode 100644 src/init.c diff --git a/src/Makefile.am b/src/Makefile.am index 23c16ed..977595b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -37,8 +37,7 @@ fast_parsimony.c \ stepwise.c \ random.c \ phylip.c \ -hardware.c \ -init.c +hardware.c libpll_la_CFLAGS = $(AM_CFLAGS) diff --git a/src/hardware.c b/src/hardware.c index 49d414a..c02b5cf 100644 --- a/src/hardware.c +++ b/src/hardware.c @@ -21,79 +21,51 @@ #include "pll.h" -#ifndef __PPC__ -#define cpuid(f1, f2, a, b, c, d) \ - __asm__ __volatile__ ("cpuid" \ - : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ - : "a" (f1), "c" (f2)); -#endif - static void cpu_features_detect() { - unsigned int a,b,c,d; - - memset(pll_hardware,0,sizeof(pll_hardware_t)); - -#ifdef __PPC__ - pll_hardware->altivec_present = 1; -#else - - cpuid(0,0,a,b,c,d); - unsigned int maxlevel = a & 0xff; - - if (maxlevel >= 1) - { - cpuid(1,0,a,b,c,d); - pll_hardware->mmx_present = (d >> 23) & 1; - pll_hardware->sse_present = (d >> 25) & 1; - pll_hardware->sse2_present = (d >> 26) & 1; - pll_hardware->sse3_present = (c >> 0) & 1; - pll_hardware->ssse3_present = (c >> 9) & 1; - pll_hardware->sse41_present = (c >> 19) & 1; - pll_hardware->sse42_present = (c >> 20) & 1; - pll_hardware->popcnt_present = (c >> 23) & 1; - pll_hardware->avx_present = (c >> 28) & 1; - - if (maxlevel >= 7) - { - cpuid(7,0,a,b,c,d); - pll_hardware->avx2_present = (b >> 5) & 1; - } - } + memset(&pll_hardware,0,sizeof(pll_hardware_t)); + + pll_hardware.init = 1; +#if defined(__PPC__) + pll_hardware.altivec_present = __builtin_cpu_supports("altivec"); +#elif defined(__x86_64__) || defined(__i386__) + pll_hardware.mmx_present = __builtin_cpu_supports("mmx"); + pll_hardware.sse_present = __builtin_cpu_supports("sse"); + pll_hardware.sse2_present = __builtin_cpu_supports("sse2"); + pll_hardware.sse3_present = __builtin_cpu_supports("sse3"); + pll_hardware.ssse3_present = __builtin_cpu_supports("ssse3"); + pll_hardware.sse41_present = __builtin_cpu_supports("sse4.1"); + pll_hardware.sse42_present = __builtin_cpu_supports("sse4.2"); + pll_hardware.popcnt_present = __builtin_cpu_supports("popcnt"); + pll_hardware.avx_present = __builtin_cpu_supports("avx"); + pll_hardware.avx2_present = __builtin_cpu_supports("avx2"); #endif } static void cpu_features_show() { - if (!pll_hardware) - { - /* TODO: Add proper error control after we figure out - cross-platform compatibility */ - return; - } - fprintf(stderr, "CPU features:"); - if (pll_hardware->altivec_present) + if (pll_hardware.altivec_present) fprintf(stderr, " altivec"); - if (pll_hardware->mmx_present) + if (pll_hardware.mmx_present) fprintf(stderr, " mmx"); - if (pll_hardware->sse_present) + if (pll_hardware.sse_present) fprintf(stderr, " sse"); - if (pll_hardware->sse2_present) + if (pll_hardware.sse2_present) fprintf(stderr, " sse2"); - if (pll_hardware->sse3_present) + if (pll_hardware.sse3_present) fprintf(stderr, " sse3"); - if (pll_hardware->ssse3_present) + if (pll_hardware.ssse3_present) fprintf(stderr, " ssse3"); - if (pll_hardware->sse41_present) + if (pll_hardware.sse41_present) fprintf(stderr, " sse4.1"); - if (pll_hardware->sse42_present) + if (pll_hardware.sse42_present) fprintf(stderr, " sse4.2"); - if (pll_hardware->popcnt_present) + if (pll_hardware.popcnt_present) fprintf(stderr, " popcnt"); - if (pll_hardware->avx_present) + if (pll_hardware.avx_present) fprintf(stderr, " avx"); - if (pll_hardware->avx2_present) + if (pll_hardware.avx2_present) fprintf(stderr, " avx2"); fprintf(stderr, "\n"); } @@ -101,15 +73,6 @@ static void cpu_features_show() PLL_EXPORT int pll_hardware_probe() { /* probe cpu features */ - if (!pll_hardware) - { - if (!(pll_hardware = (pll_hardware_t *)calloc(1,sizeof(pll_hardware_t)))) - { - pll_errno = PLL_ERROR_MEM_ALLOC; - snprintf(pll_errmsg, 200, "Unable to allocate enough memory."); - return PLL_FAILURE; - } - } cpu_features_detect(); return PLL_SUCCESS; @@ -117,5 +80,24 @@ PLL_EXPORT int pll_hardware_probe() PLL_EXPORT void pll_hardware_dump() { + if (!pll_hardware.init) + pll_hardware_probe(); + cpu_features_show(); } + +PLL_EXPORT void pll_hardware_ignore() +{ + pll_hardware.init = 1; + pll_hardware.altivec_present = 1; + pll_hardware.mmx_present = 1; + pll_hardware.sse_present = 1; + pll_hardware.sse2_present = 1; + pll_hardware.sse3_present = 1; + pll_hardware.ssse3_present = 1; + pll_hardware.sse41_present = 1; + pll_hardware.sse42_present = 1; + pll_hardware.popcnt_present = 1; + pll_hardware.avx_present = 1; + pll_hardware.avx2_present = 1; +} diff --git a/src/init.c b/src/init.c deleted file mode 100644 index 90d5bd3..0000000 --- a/src/init.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - Copyright (C) 2017 Tomas Flouri - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as - published by the Free Software Foundation, either version 3 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - - Contact: Tomas Flouri , - Exelixis Lab, Heidelberg Instutute for Theoretical Studies - Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany -*/ - -#include "pll.h" - -PLL_EXPORT void pll_init() -{ - pll_hardware_probe(); -} - -PLL_EXPORT void pll_fini() -{ - if (pll_hardware) - free(pll_hardware); - pll_hardware = NULL; -} diff --git a/src/pll.c b/src/pll.c index 0944761..299d0cd 100644 --- a/src/pll.c +++ b/src/pll.c @@ -24,7 +24,7 @@ __thread int pll_errno; __thread char pll_errmsg[200] = {0}; -pll_hardware_t * pll_hardware = NULL; +pll_hardware_t pll_hardware = {0,0,0,0,0,0,0,0,0,0,0,0}; static void dealloc_partition_data(pll_partition_t * partition); diff --git a/src/pll.h b/src/pll.h index fe38a76..90d0f32 100644 --- a/src/pll.h +++ b/src/pll.h @@ -26,12 +26,15 @@ #include #include #include -#include #ifdef HAVE_CONFIG_H #include "config.h" #endif +#ifdef HAVE_X86INTRIN_H +#include +#endif + /* platform specific */ #if (!defined(__APPLE__) && !defined(__WIN32__) && !defined(__WIN64__)) @@ -49,7 +52,8 @@ #define PLL_MIN(a,b) ((a) < (b) ? (a) : (b)) #define PLL_MAX(a,b) ((a) > (b) ? (a) : (b)) #define PLL_SWAP(x,y) do { __typeof__ (x) _t = x; x = y; y = _t; } while(0) -#define PLL_STAT(x) (pll_hardware && pll_hardware->x) +#define PLL_STAT(x) ((pll_hardware.init || pll_hardware_probe()) \ + && pll_hardware.x) /* constants */ @@ -163,6 +167,7 @@ typedef struct pll_hardware_s { + int init; /* cpu features */ int altivec_present; int mmx_present; @@ -451,7 +456,7 @@ struct pll_random_data PLL_EXPORT extern __thread int pll_errno; PLL_EXPORT extern __thread char pll_errmsg[200]; -PLL_EXPORT extern pll_hardware_t * pll_hardware; +PLL_EXPORT extern pll_hardware_t pll_hardware; PLL_EXPORT extern const unsigned int pll_map_bin[256]; PLL_EXPORT extern const unsigned int pll_map_nt[256]; @@ -1875,12 +1880,7 @@ PLL_EXPORT int pll_hardware_probe(void); PLL_EXPORT void pll_hardware_dump(); -/* functions in init.c */ - -PLL_EXPORT void pll_init(void) __attribute__((constructor)); - -PLL_EXPORT void pll_fini(void) __attribute__((destructor)); - +PLL_EXPORT void pll_hardware_ignore(); #ifdef __cplusplus } /* extern "C" */