From 84637d141951a981d791c82f9021387122515b87 Mon Sep 17 00:00:00 2001 From: Guillaume Piolat Date: Tue, 22 Oct 2024 15:29:15 +0200 Subject: [PATCH] Function that should have been pure, by cet. Edit contributing guidelines. --- CONTRIBUTING.md | 13 ++++++++++++- source/inteli/avxintrin.d | 9 +++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e18b2d6..40b79d0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,14 +1,25 @@ Advice: - **GODBOLT EVERYTHING YOU COMMIT** + * Use `godbolt-template.d` and modify to your wished + * GDC (version 12 or later) with -mavx -mavx2 (the template doesn't build without -mavx) + * LDC (version 1.24+ or later) with -mtriple arm64, -O2, -O0, -mattr=+avx2, etc. -- Do intrinsics **one by one**, not all at once. This is very detailed work, it's not possible nor desirable to go fast while writing intrinsics. Please don't. + +- Do intrinsics **one by one**, not all at once. This is **very** detailed work, it's not possible nor desirable to go fast while writing intrinsics. + * Please don't go fast. + * Please make small PR because there is a lot of context to communicate. + * Get pre-approval before working on something big. - Add PERF comment anywhere you feel that something could be done faster in a supported combination: DMD D_SIMD, LDC x86_64, LDC arm64, LDC x86, GDC x86_64, with or without optimizations, with or without instruction support... * If this is supposed returns a SIMD literal, does it inline? * Can this be faster in -O0? * If instruction support is not there, is the alternative path fast? +- Later instruction set are allowed to use intrinsics from <= instruction sets. + +- Keep in mind all intrinsics should work, whatever the compiler and flags, with same semantics. This is the main appeal of the library. + To be merged a PR: diff --git a/source/inteli/avxintrin.d b/source/inteli/avxintrin.d index d9a8969..00207df 100644 --- a/source/inteli/avxintrin.d +++ b/source/inteli/avxintrin.d @@ -4,6 +4,7 @@ * * Copyright: Guillaume Piolat 2022. * Johan Engelen 2022. +* cet 2024. * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) */ module inteli.avxintrin; @@ -3300,7 +3301,7 @@ unittest } /// Broadcast 64-bit integer `a` to all elements of the return value. -__m256i _mm256_set1_epi64x (long a) +__m256i _mm256_set1_epi64x (long a) pure { return cast(__m256i)(long4(a)); } @@ -3459,7 +3460,7 @@ unittest } /// Set packed `__m256` vector with the supplied values. -__m256 _mm256_setr_m128 (__m128 lo, __m128 hi) +__m256 _mm256_setr_m128 (__m128 lo, __m128 hi) pure { return _mm256_set_m128(hi, lo); } @@ -3473,7 +3474,7 @@ unittest } /// Set packed `__m256d` vector with the supplied values. -__m256d _mm256_setr_m128d (__m128d lo, __m128d hi) +__m256d _mm256_setr_m128d (__m128d lo, __m128d hi) pure { return _mm256_set_m128d(hi, lo); } @@ -3487,7 +3488,7 @@ unittest } /// Set packed `__m256i` vector with the supplied values. -__m256i _mm256_setr_m128i (__m128i lo, __m128i hi) +__m256i _mm256_setr_m128i (__m128i lo, __m128i hi) pure { return _mm256_set_m128i(hi, lo); }