diff --git a/MathterConfig.cmake.in b/MathterConfig.cmake.in index ee0580c..1e294ee 100644 --- a/MathterConfig.cmake.in +++ b/MathterConfig.cmake.in @@ -7,7 +7,7 @@ set_and_check(Mathter_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@") include(CMakeFindDependencyMacro) if (${MATHTER_ENABLE_SIMD}) - find_dependency(xsimd) + find_dependency(xsimd 13) endif() include("${CMAKE_CURRENT_LIST_DIR}/MathterTargets.cmake") diff --git a/benchmark/main.cpp b/benchmark/main.cpp index 3dda25f..937cb01 100644 --- a/benchmark/main.cpp +++ b/benchmark/main.cpp @@ -20,31 +20,50 @@ using namespace std; using namespace mathter; +void PrintArch(std::string_view name, unsigned supported) { + std::cout << name << (supported ? "YES" : "NO") << std::endl; +} + + +template +void PrintVectorType(std::string_view name) { + std::cout << " " << name << ": " << (IsBatched() ? "YES" : "NO") << " - " << sizeof(Vector) << " bytes" << std::endl; +} + + void DisplayArchitectureInfo() { #if MATHTER_ENABLE_SIMD std::cout << "Available on CPU: " << std::endl; const auto architectures = xsimd::available_architectures(); - std::cout << " sse2: " << (architectures.sse2 ? "YES" : "NO") << std::endl; - std::cout << " sse3: " << (architectures.sse3 ? "YES" : "NO") << std::endl; - std::cout << " ssse3: " << (architectures.ssse3 ? "YES" : "NO") << std::endl; - std::cout << " sse4_1: " << (architectures.sse4_1 ? "YES" : "NO") << std::endl; - std::cout << " sse4_2: " << (architectures.sse4_2 ? "YES" : "NO") << std::endl; - std::cout << " sse4a: " << (architectures.sse4a ? "YES" : "NO") << std::endl; - std::cout << " fma3_sse: " << (architectures.fma3_sse ? "YES" : "NO") << std::endl; - std::cout << " fma4: " << (architectures.fma4 ? "YES" : "NO") << std::endl; - std::cout << " xop: " << (architectures.xop ? "YES" : "NO") << std::endl; - std::cout << " avx: " << (architectures.avx ? "YES" : "NO") << std::endl; - std::cout << " fma3_avx: " << (architectures.fma3_avx ? "YES" : "NO") << std::endl; - std::cout << " avx2: " << (architectures.avx2 ? "YES" : "NO") << std::endl; - std::cout << " fma3_avx2: " << (architectures.fma3_avx2 ? "YES" : "NO") << std::endl; - std::cout << " avx512f: " << (architectures.avx512f ? "YES" : "NO") << std::endl; - std::cout << " avx512cd: " << (architectures.avx512cd ? "YES" : "NO") << std::endl; - std::cout << " avx512dq: " << (architectures.avx512dq ? "YES" : "NO") << std::endl; - std::cout << " avx512bw: " << (architectures.avx512bw ? "YES" : "NO") << std::endl; - std::cout << " neon: " << (architectures.neon ? "YES" : "NO") << std::endl; - std::cout << " neon64: " << (architectures.neon64 ? "YES" : "NO") << std::endl; - std::cout << " sve: " << (architectures.sve ? "YES" : "NO") << std::endl; - std::cout << std::endl; + + PrintArch("sse2", architectures.sse2); + PrintArch("sse3", architectures.sse3); + PrintArch("ssse3", architectures.ssse3); + PrintArch("sse4_1", architectures.sse4_1); + PrintArch("sse4_2", architectures.sse4_2); + PrintArch("fma3_sse42", architectures.fma3_sse42); + PrintArch("fma4", architectures.fma4); + PrintArch("avx", architectures.avx); + PrintArch("fma3_avx", architectures.fma3_avx); + PrintArch("avx2", architectures.avx2); + PrintArch("avxvnni", architectures.avxvnni); + PrintArch("fma3_avx2", architectures.fma3_avx2); + PrintArch("avx512f", architectures.avx512f); + PrintArch("avx512cd", architectures.avx512cd); + PrintArch("avx512dq", architectures.avx512dq); + PrintArch("avx512bw", architectures.avx512bw); + PrintArch("avx512er", architectures.avx512er); + PrintArch("avx512pf", architectures.avx512pf); + PrintArch("avx512ifma", architectures.avx512ifma); + PrintArch("avx512vbmi", architectures.avx512vbmi); + PrintArch("avx512vnni_bw", architectures.avx512vnni_bw); + PrintArch("avx512vnni_vbmi", architectures.avx512vnni_vbmi); + PrintArch("neon", architectures.neon); + PrintArch("neon64", architectures.neon64); + PrintArch("i8mm_neon64", architectures.i8mm_neon64); + PrintArch("sve", architectures.sve); + PrintArch("rvv", architectures.rvv); + PrintArch("wasm", architectures.wasm); std::cout << "Enabled in build: " << std::endl; xsimd::all_architectures::for_each([](const auto& arch) { @@ -63,12 +82,24 @@ int main(int argc, char* argv[]) { DisplayArchitectureInfo(); std::cout << "SIMD support:" << std::endl; - std::cout << " float.2: " << (IsBatched() ? "YES" : "NO") << " - " << sizeof(Vector) << " bytes" << std::endl; - std::cout << " float.3: " << (IsBatched() ? "YES" : "NO") << " - " << sizeof(Vector) << " bytes" << std::endl; - std::cout << " float.4: " << (IsBatched() ? "YES" : "NO") << " - " << sizeof(Vector) << " bytes" << std::endl; - std::cout << " double.2: " << (IsBatched() ? "YES" : "NO") << " - " << sizeof(Vector) << " bytes" << std::endl; - std::cout << " double.3: " << (IsBatched() ? "YES" : "NO") << " - " << sizeof(Vector) << " bytes" << std::endl; - std::cout << " double.4: " << (IsBatched() ? "YES" : "NO") << " - " << sizeof(Vector) << " bytes" << std::endl; + PrintVectorType("float2"); + PrintVectorType("float3"); + PrintVectorType("float4"); + PrintVectorType("double2"); + PrintVectorType("double3"); + PrintVectorType("double4"); + PrintVectorType, 2>("c_float2"); + PrintVectorType, 3>("c_float3"); + PrintVectorType, 4>("c_float4"); + PrintVectorType, 2>("c_double2"); + PrintVectorType, 3>("c_double3"); + PrintVectorType, 4>("c_double4"); + PrintVectorType("i32_2"); + PrintVectorType("i32_3"); + PrintVectorType("i32_4"); + PrintVectorType("i64_2"); + PrintVectorType("i64_3"); + PrintVectorType("i64_4"); std::cout << std::endl; int ret = Catch::Session().run(argc, argv); diff --git a/conanfile.txt b/conanfile.txt index 65e94c3..c43852a 100644 --- a/conanfile.txt +++ b/conanfile.txt @@ -1,6 +1,6 @@ -[requires] -catch2/3.5.3 -xsimd/11.1.0 +[requires] +catch2/3.7.0 +xsimd/13.0.0 [generators] CMakeDeps diff --git a/include/Mathter/CMakeLists.txt b/include/Mathter/CMakeLists.txt index 54620de..5e59ed8 100644 --- a/include/Mathter/CMakeLists.txt +++ b/include/Mathter/CMakeLists.txt @@ -67,6 +67,11 @@ target_sources(Mathter "Vector/SIMDUtil.hpp" "Vector/Swizzle.hpp" "Vector/Vector.hpp" + # Swizzles + "Vector/SwizzleInc/Swizzle1.hpp.inc" + "Vector/SwizzleInc/Swizzle2.hpp.inc" + "Vector/SwizzleInc/Swizzle3.hpp.inc" + "Vector/SwizzleInc/Swizzle4.hpp.inc" ) set_target_properties(Mathter PROPERTIES VERIFY_INTERFACE_HEADER_SETS ON) @@ -83,7 +88,7 @@ endif() if (${MATHTER_ENABLE_SIMD}) message("Vectorization: ON (using XSimd).") - find_package(xsimd REQUIRED) + find_package(xsimd 13 REQUIRED) target_link_libraries(Mathter INTERFACE xsimd) target_compile_definitions(Mathter INTERFACE MATHTER_ENABLE_SIMD=1) else() diff --git a/include/Mathter/Vector/OperationUtil.hpp b/include/Mathter/Vector/OperationUtil.hpp index 6e33e7f..6e46131 100644 --- a/include/Mathter/Vector/OperationUtil.hpp +++ b/include/Mathter/Vector/OperationUtil.hpp @@ -29,7 +29,7 @@ Batch FillMasked(Batch batch, Element value) { } else { const auto fillers = Batch{ value }; - const auto mask = xsimd::make_batch_bool_constant(); + const auto mask = xsimd::make_batch_bool_constant(); return xsimd::select(mask, batch, fillers); } #else @@ -52,7 +52,7 @@ Batch FillMaskedWithFirst(Batch batch) { using UintBatch = xsimd::batch; - const auto mask = xsimd::make_batch_constant(); + const auto mask = xsimd::make_batch_constant(); return xsimd::swizzle(batch, mask); #else return batch; diff --git a/include/Mathter/Vector/Swizzle.hpp b/include/Mathter/Vector/Swizzle.hpp index 07571cf..b7d6a5f 100644 --- a/include/Mathter/Vector/Swizzle.hpp +++ b/include/Mathter/Vector/Swizzle.hpp @@ -200,7 +200,7 @@ typename Swizzle::TargetStorage Swizzle; const auto sourceBatch = SourceBatch::load_aligned(array.data()); - const auto mask = xsimd::make_batch_constant>(); + const auto mask = xsimd::make_batch_constant>(); const auto linearizedBatch = xsimd::swizzle(sourceBatch, mask); alignas(GetStorageAlignment()) SourceStorage linearized; @@ -226,7 +226,7 @@ auto Swizzle::Delinearize(const TargetStorage& targe alignas(GetStorageAlignment()) SourceStorage targetPadded; std::copy(target.begin(), target.end(), targetPadded.begin()); const auto targetBatch = SourceBatch::load_aligned(targetPadded.data()); - const auto mask = xsimd::make_batch_constant>(); + const auto mask = xsimd::make_batch_constant>(); const auto delinearizedBatch = xsimd::swizzle(targetBatch, mask); alignas(GetStorageAlignment()) SourceStorage delinearized; @@ -250,7 +250,7 @@ auto Swizzle::Blend(const SourceStorage& old, const if constexpr (isSourceBatched && sizeof...(Indices) <= Dim) { const auto oldBatch = SourceBatch::load_unaligned(old.data()); const auto freshBatch = SourceBatch::load_unaligned(fresh.data()); - const auto mask = xsimd::make_batch_bool_constant>(); + const auto mask = xsimd::make_batch_bool_constant>(); const auto blendedBatch = xsimd::select(mask, freshBatch, oldBatch); alignas(GetStorageAlignment()) SourceStorage blended;