Skip to content

Commit

Permalink
Merge pull request #50 from petiaccja/bump-dependencies
Browse files Browse the repository at this point in the history
bump xsimd and catch2
  • Loading branch information
petiaccja authored Sep 16, 2024
2 parents 4a607da + 0c6b32a commit 76f3a53
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 37 deletions.
2 changes: 1 addition & 1 deletion MathterConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set_and_check(Mathter_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@")

include(CMakeFindDependencyMacro)
if (${MATHTER_ENABLE_SIMD})
find_dependency(xsimd)
find_dependency(xsimd 13)
endif()

include("${CMAKE_CURRENT_LIST_DIR}/MathterTargets.cmake")
Expand Down
85 changes: 58 additions & 27 deletions benchmark/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,31 +20,50 @@ using namespace std;
using namespace mathter;


void PrintArch(std::string_view name, unsigned supported) {
std::cout << name << (supported ? "YES" : "NO") << std::endl;
}


template <class T, int Dim>
void PrintVectorType(std::string_view name) {
std::cout << " " << name << ": " << (IsBatched<T, Dim, false>() ? "YES" : "NO") << " - " << sizeof(Vector<T, Dim, false>) << " bytes" << std::endl;
}


void DisplayArchitectureInfo() {
#if MATHTER_ENABLE_SIMD
std::cout << "Available on CPU: " << std::endl;
const auto architectures = xsimd::available_architectures();
std::cout << " sse2: " << (architectures.sse2 ? "YES" : "NO") << std::endl;
std::cout << " sse3: " << (architectures.sse3 ? "YES" : "NO") << std::endl;
std::cout << " ssse3: " << (architectures.ssse3 ? "YES" : "NO") << std::endl;
std::cout << " sse4_1: " << (architectures.sse4_1 ? "YES" : "NO") << std::endl;
std::cout << " sse4_2: " << (architectures.sse4_2 ? "YES" : "NO") << std::endl;
std::cout << " sse4a: " << (architectures.sse4a ? "YES" : "NO") << std::endl;
std::cout << " fma3_sse: " << (architectures.fma3_sse ? "YES" : "NO") << std::endl;
std::cout << " fma4: " << (architectures.fma4 ? "YES" : "NO") << std::endl;
std::cout << " xop: " << (architectures.xop ? "YES" : "NO") << std::endl;
std::cout << " avx: " << (architectures.avx ? "YES" : "NO") << std::endl;
std::cout << " fma3_avx: " << (architectures.fma3_avx ? "YES" : "NO") << std::endl;
std::cout << " avx2: " << (architectures.avx2 ? "YES" : "NO") << std::endl;
std::cout << " fma3_avx2: " << (architectures.fma3_avx2 ? "YES" : "NO") << std::endl;
std::cout << " avx512f: " << (architectures.avx512f ? "YES" : "NO") << std::endl;
std::cout << " avx512cd: " << (architectures.avx512cd ? "YES" : "NO") << std::endl;
std::cout << " avx512dq: " << (architectures.avx512dq ? "YES" : "NO") << std::endl;
std::cout << " avx512bw: " << (architectures.avx512bw ? "YES" : "NO") << std::endl;
std::cout << " neon: " << (architectures.neon ? "YES" : "NO") << std::endl;
std::cout << " neon64: " << (architectures.neon64 ? "YES" : "NO") << std::endl;
std::cout << " sve: " << (architectures.sve ? "YES" : "NO") << std::endl;
std::cout << std::endl;

PrintArch("sse2", architectures.sse2);
PrintArch("sse3", architectures.sse3);
PrintArch("ssse3", architectures.ssse3);
PrintArch("sse4_1", architectures.sse4_1);
PrintArch("sse4_2", architectures.sse4_2);
PrintArch("fma3_sse42", architectures.fma3_sse42);
PrintArch("fma4", architectures.fma4);
PrintArch("avx", architectures.avx);
PrintArch("fma3_avx", architectures.fma3_avx);
PrintArch("avx2", architectures.avx2);
PrintArch("avxvnni", architectures.avxvnni);
PrintArch("fma3_avx2", architectures.fma3_avx2);
PrintArch("avx512f", architectures.avx512f);
PrintArch("avx512cd", architectures.avx512cd);
PrintArch("avx512dq", architectures.avx512dq);
PrintArch("avx512bw", architectures.avx512bw);
PrintArch("avx512er", architectures.avx512er);
PrintArch("avx512pf", architectures.avx512pf);
PrintArch("avx512ifma", architectures.avx512ifma);
PrintArch("avx512vbmi", architectures.avx512vbmi);
PrintArch("avx512vnni_bw", architectures.avx512vnni_bw);
PrintArch("avx512vnni_vbmi", architectures.avx512vnni_vbmi);
PrintArch("neon", architectures.neon);
PrintArch("neon64", architectures.neon64);
PrintArch("i8mm_neon64", architectures.i8mm_neon64);
PrintArch("sve", architectures.sve);
PrintArch("rvv", architectures.rvv);
PrintArch("wasm", architectures.wasm);

std::cout << "Enabled in build: " << std::endl;
xsimd::all_architectures::for_each([](const auto& arch) {
Expand All @@ -63,12 +82,24 @@ int main(int argc, char* argv[]) {
DisplayArchitectureInfo();

std::cout << "SIMD support:" << std::endl;
std::cout << " float.2: " << (IsBatched<float, 2, false>() ? "YES" : "NO") << " - " << sizeof(Vector<float, 2, false>) << " bytes" << std::endl;
std::cout << " float.3: " << (IsBatched<float, 3, false>() ? "YES" : "NO") << " - " << sizeof(Vector<float, 3, false>) << " bytes" << std::endl;
std::cout << " float.4: " << (IsBatched<float, 4, false>() ? "YES" : "NO") << " - " << sizeof(Vector<float, 4, false>) << " bytes" << std::endl;
std::cout << " double.2: " << (IsBatched<double, 2, false>() ? "YES" : "NO") << " - " << sizeof(Vector<double, 2, false>) << " bytes" << std::endl;
std::cout << " double.3: " << (IsBatched<double, 3, false>() ? "YES" : "NO") << " - " << sizeof(Vector<double, 3, false>) << " bytes" << std::endl;
std::cout << " double.4: " << (IsBatched<double, 4, false>() ? "YES" : "NO") << " - " << sizeof(Vector<double, 4, false>) << " bytes" << std::endl;
PrintVectorType<float, 2>("float2");
PrintVectorType<float, 3>("float3");
PrintVectorType<float, 4>("float4");
PrintVectorType<double, 2>("double2");
PrintVectorType<double, 3>("double3");
PrintVectorType<double, 4>("double4");
PrintVectorType<std::complex<float>, 2>("c_float2");
PrintVectorType<std::complex<float>, 3>("c_float3");
PrintVectorType<std::complex<float>, 4>("c_float4");
PrintVectorType<std::complex<double>, 2>("c_double2");
PrintVectorType<std::complex<double>, 3>("c_double3");
PrintVectorType<std::complex<double>, 4>("c_double4");
PrintVectorType<int32_t, 2>("i32_2");
PrintVectorType<int32_t, 3>("i32_3");
PrintVectorType<int32_t, 4>("i32_4");
PrintVectorType<int64_t, 2>("i64_2");
PrintVectorType<int64_t, 3>("i64_3");
PrintVectorType<int64_t, 4>("i64_4");
std::cout << std::endl;

int ret = Catch::Session().run(argc, argv);
Expand Down
6 changes: 3 additions & 3 deletions conanfile.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[requires]
catch2/3.5.3
xsimd/11.1.0
[requires]
catch2/3.7.0
xsimd/13.0.0

[generators]
CMakeDeps
Expand Down
7 changes: 6 additions & 1 deletion include/Mathter/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ target_sources(Mathter
"Vector/SIMDUtil.hpp"
"Vector/Swizzle.hpp"
"Vector/Vector.hpp"
# Swizzles
"Vector/SwizzleInc/Swizzle1.hpp.inc"
"Vector/SwizzleInc/Swizzle2.hpp.inc"
"Vector/SwizzleInc/Swizzle3.hpp.inc"
"Vector/SwizzleInc/Swizzle4.hpp.inc"
)

set_target_properties(Mathter PROPERTIES VERIFY_INTERFACE_HEADER_SETS ON)
Expand All @@ -83,7 +88,7 @@ endif()

if (${MATHTER_ENABLE_SIMD})
message("Vectorization: ON (using XSimd).")
find_package(xsimd REQUIRED)
find_package(xsimd 13 REQUIRED)
target_link_libraries(Mathter INTERFACE xsimd)
target_compile_definitions(Mathter INTERFACE MATHTER_ENABLE_SIMD=1)
else()
Expand Down
4 changes: 2 additions & 2 deletions include/Mathter/Vector/OperationUtil.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Batch FillMasked(Batch batch, Element value) {
}
else {
const auto fillers = Batch{ value };
const auto mask = xsimd::make_batch_bool_constant<Batch, MaskGenerator>();
const auto mask = xsimd::make_batch_bool_constant<typename Batch::value_type, typename Batch::arch_type, MaskGenerator>();
return xsimd::select(mask, batch, fillers);
}
#else
Expand All @@ -52,7 +52,7 @@ Batch FillMaskedWithFirst(Batch batch) {

using UintBatch = xsimd::batch<Uint, typename Batch::arch_type>;

const auto mask = xsimd::make_batch_constant<UintBatch, Generator>();
const auto mask = xsimd::make_batch_constant<typename UintBatch::value_type, typename UintBatch::arch_type, Generator>();
return xsimd::swizzle(batch, mask);
#else
return batch;
Expand Down
6 changes: 3 additions & 3 deletions include/Mathter/Vector/Swizzle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ typename Swizzle<T, Dim, Packed, Indices...>::TargetStorage Swizzle<T, Dim, Pack
using SourceIntBatch = xsimd::batch<Integer, typename SourceBatch::arch_type>;

const auto sourceBatch = SourceBatch::load_aligned(array.data());
const auto mask = xsimd::make_batch_constant<SourceIntBatch, impl::LinearizationGenerator<Indices...>>();
const auto mask = xsimd::make_batch_constant<typename SourceIntBatch::value_type, typename SourceIntBatch::arch_type, impl::LinearizationGenerator<Indices...>>();
const auto linearizedBatch = xsimd::swizzle(sourceBatch, mask);

alignas(GetStorageAlignment<T, Dim, Packed>()) SourceStorage linearized;
Expand All @@ -226,7 +226,7 @@ auto Swizzle<T, Dim, Packed, Indices...>::Delinearize(const TargetStorage& targe
alignas(GetStorageAlignment<T, Dim, Packed>()) SourceStorage targetPadded;
std::copy(target.begin(), target.end(), targetPadded.begin());
const auto targetBatch = SourceBatch::load_aligned(targetPadded.data());
const auto mask = xsimd::make_batch_constant<SourceIntBatch, impl::DelinearizationGenerator<Indices...>>();
const auto mask = xsimd::make_batch_constant<typename SourceIntBatch::value_type, typename SourceIntBatch::arch_type, impl::DelinearizationGenerator<Indices...>>();
const auto delinearizedBatch = xsimd::swizzle(targetBatch, mask);

alignas(GetStorageAlignment<T, Dim, Packed>()) SourceStorage delinearized;
Expand All @@ -250,7 +250,7 @@ auto Swizzle<T, Dim, Packed, Indices...>::Blend(const SourceStorage& old, const
if constexpr (isSourceBatched && sizeof...(Indices) <= Dim) {
const auto oldBatch = SourceBatch::load_unaligned(old.data());
const auto freshBatch = SourceBatch::load_unaligned(fresh.data());
const auto mask = xsimd::make_batch_bool_constant<SourceBatch, impl::BlendGenerator<Indices...>>();
const auto mask = xsimd::make_batch_bool_constant<typename SourceBatch::value_type, typename SourceBatch::arch_type, impl::BlendGenerator<Indices...>>();
const auto blendedBatch = xsimd::select(mask, freshBatch, oldBatch);

alignas(GetStorageAlignment<T, Dim, Packed>()) SourceStorage blended;
Expand Down

0 comments on commit 76f3a53

Please sign in to comment.