Skip to content

Commit

Permalink
removing movemask from platform (#2302)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #2302

stepping stone for simd::contains.

We want to do less things in simd platform if possbile, so moving out non essential things.

Differential Revision: D63388617
  • Loading branch information
DenisYaroshevskiy authored and facebook-github-bot committed Sep 26, 2024
1 parent 70d9018 commit cd61173
Show file tree
Hide file tree
Showing 13 changed files with 171 additions and 79 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,8 @@ if (BUILD_TESTS OR BUILD_BENCHMARKS)
DIRECTORY algorithm/simd/detail/test/
TEST algorithm_simd_detail_simd_any_of_test SOURCES SimdAnyOfTest.cpp
TEST algorithm_simd_detail_simd_for_each_test SOURCES SimdForEachTest.cpp
TEST algorithm_simd_detail_simd_for_each_test SOURCES SimdForEachTest.cpp
TEST algorithm_simd_detail_ignore_test SOURCES IgnoreTest.cpp
TEST algorithm_simd_detail_unroll_utils_test SOURCES UnrollUtilsTest.cpp
# disabled until C++20
# TEST algorithm_simd_detail_simd_traits_test SOURCES TraitsTest.cpp
Expand Down
2 changes: 1 addition & 1 deletion folly/algorithm/simd/FindFixed.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ std::optional<std::size_t> findSplitFirstRegister(

template <typename Scalar, typename Reg>
std::optional<std::size_t> firstTrue(Reg reg) {
auto [bits, bitsPerElement] = folly::movemask<Scalar>(reg);
auto [bits, bitsPerElement] = folly::simd::movemask<Scalar>(reg);
if (bits) {
return std::countr_zero(bits) / bitsPerElement();
}
Expand Down
21 changes: 16 additions & 5 deletions folly/algorithm/simd/Movemask.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
FOLLY_PUSH_WARNING
FOLLY_GCC_DISABLE_WARNING("-Wignored-attributes")

namespace folly {
namespace folly::simd {

/*
* This is a low level utility used for simd search algorithms.
Expand All @@ -43,7 +43,7 @@ namespace folly {
* for both x86 and arm.
*
* Interface looks like this:
* folly::movemask<-scalar type->(nativeRegister)
* folly::simd::movemask<-scalar type->(nativeRegister)
* -> std::pair<Bits, BitsPerElement>;
*
* Bits - unsigned integral, containing the bitmask (first is lowest bit).
Expand All @@ -53,7 +53,7 @@ namespace folly {
*
* std::optional<std::uint32_t> firstTrueUint16(auto simdRegister) {
* auto [bits, bitsPerElement] =
* folly::movemask<std::uint16_t>(simdRegister);
* folly::simd::movemask<std::uint16_t>(simdRegister);
* if (!bits) {
* return std::nullopt;
* }
Expand All @@ -71,7 +71,11 @@ template <typename Scalar, typename Reg>
auto movemask(Reg reg) {
std::integral_constant<std::uint32_t, sizeof(Scalar) == 2 ? 2 : 1>
bitsPerElement;
auto mmask = static_cast<std::uint32_t>([&] {

using uint_t = std::
conditional_t<std::is_same_v<Reg, __m128i>, std::uint16_t, std::uint32_t>;

auto mmask = static_cast<uint_t>([&] {
if constexpr (std::is_same_v<Reg, __m128i>) {
if constexpr (sizeof(Scalar) <= 2) {
return _mm_movemask_epi8(reg);
Expand Down Expand Up @@ -142,6 +146,13 @@ auto movemask(Reg reg) {

#endif

} // namespace folly
#if !FOLLY_X64 && !FOLLY_AARCH64

template <typename Scalar, typename Reg>
void movemask(Reg reg) = delete;

#endif

} // namespace folly::simd

FOLLY_POP_WARNING
13 changes: 11 additions & 2 deletions folly/algorithm/simd/detail/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")

oncall("fbcode_entropy_wardens_folly")

cpp_library(
name = "ignore",
headers = ["Ignore.h"],
exported_deps = [
"//folly/lang:bits",
],
)

cpp_library(
name = "simd_any_of",
headers = ["SimdAnyOf.h"],
Expand All @@ -19,7 +27,7 @@ cpp_library(
name = "simd_char_platform",
headers = ["SimdCharPlatform.h"],
exported_deps = [
":simd_for_each",
":ignore",
"//folly:portability",
"//folly/algorithm/simd:movemask",
"//folly/lang:bits",
Expand All @@ -30,9 +38,10 @@ cpp_library(
name = "simd_for_each",
headers = ["SimdForEach.h"],
exported_deps = [
":ignore",
":unroll_utils",
"//folly:c_portability",
"//folly:traits",
"//folly/algorithm/simd/detail:unroll_utils",
],
)

Expand Down
61 changes: 61 additions & 0 deletions folly/algorithm/simd/detail/Ignore.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <folly/lang/Bits.h>

#include <type_traits>

namespace folly::simd::detail {

/**
* ignore(_none/_extrema)
*
* Tag types for handling the tails.
* ignore_none indicates that the whole register is used.
* ignore_extrema.first, .last show how many elements are out of the data.
*
* For example 3 elements, starting from the second for an 8 element register
* will be ignore_extrema{.first = 1, .last = 4}
*/

struct ignore_extrema {
int first = 0;
int last = 0;
};

struct ignore_none {};

/*
* NOTE: for ignore none we don't clear anything, even if some bits are not
* doing anything. We expect mmask to only have zeroes in masked out elements.
*
* Maybe we need to revisit that at some point.
*/
template <int Cardinal, typename Uint, typename BitsPerElement, typename Ignore>
void mmaskClearIgnored(std::pair<Uint, BitsPerElement>& mmask, Ignore ignore) {
if constexpr (std::is_same_v<Ignore, ignore_extrema>) {
mmask.first = set_rzero(mmask.first, ignore.first * BitsPerElement{});

static constexpr int kTopBitsAlwaysIgnored =
sizeof(Uint) * 8 - Cardinal * BitsPerElement{};
mmask.first = set_lzero(
mmask.first, ignore.last * BitsPerElement{} + kTopBitsAlwaysIgnored);
}
}

} // namespace folly::simd::detail
49 changes: 7 additions & 42 deletions folly/algorithm/simd/detail/SimdCharPlatform.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

#include <folly/Portability.h>
#include <folly/algorithm/simd/Movemask.h>
#include <folly/algorithm/simd/detail/SimdForEach.h>
#include <folly/algorithm/simd/detail/Ignore.h>
#include <folly/algorithm/simd/detail/SimdCharPlatform.h>
#include <folly/lang/Bits.h>

#include <array>
Expand Down Expand Up @@ -68,47 +69,16 @@ namespace simd::detail {
* - le_unsigned(reg_t, char) - by lane less than or equal to char.
*
* logical ops:
* - movemask - take a bitmask
* - any(logical_t, ignore) - return true if any the lanes are true
* - logical_or(logical_t, logical_t) - by lane logical or
*
* mmask ops:
* - clear(mmask, ignore) - sets ignored bits to 0
*
*/

#if FOLLY_X64 || FOLLY_AARCH64

template <typename Platform>
struct SimdCharPlatformCommon : Platform {
using logical_t = typename Platform::logical_t;
using movemask_result_t =
decltype(folly::movemask<std::uint8_t>(logical_t{}));
using mmask_t = typename movemask_result_t::first_type;
static constexpr std::uint32_t kMmaskBitsPerElement =
typename movemask_result_t::second_type{}();

template <typename Uint>
FOLLY_NODISCARD FOLLY_ALWAYS_INLINE static Uint setLowerNBits(int n) {
if (sizeof(Uint) == 8 && n == 64) {
return static_cast<Uint>(-1);
}
return static_cast<Uint>((std::uint64_t{1} << n) - 1);
}

FOLLY_NODISCARD FOLLY_ALWAYS_INLINE static mmask_t clear(
mmask_t mmask, ignore_extrema ignore) {
mmask_t clearFirst =
~setLowerNBits<mmask_t>(ignore.first * kMmaskBitsPerElement);
mmask_t clearLast = setLowerNBits<mmask_t>(
(Platform::kCardinal - ignore.last) * kMmaskBitsPerElement);
return mmask & clearFirst & clearLast;
}

FOLLY_NODISCARD FOLLY_ALWAYS_INLINE static mmask_t clear(
mmask_t mmask, ignore_none) {
return mmask;
}

// These are aligned loads but there is no point in generating
// aligned load instructions, so we call loadu.
Expand All @@ -122,18 +92,13 @@ struct SimdCharPlatformCommon : Platform {
return Platform::unsafeLoadu(ptr, ignore_none{});
}

FOLLY_ALWAYS_INLINE
static mmask_t movemask(logical_t log) {
return folly::movemask<std::uint8_t>(log).first;
}

using Platform::any;

FOLLY_ALWAYS_INLINE
static bool any(typename Platform::logical_t log, ignore_extrema ignore) {
auto mmask = movemask(log);
mmask = clear(mmask, ignore);
return mmask;
auto mmask = movemask<std::uint8_t>(log);
mmaskClearIgnored<Platform::kCardinal>(mmask, ignore);
return mmask.first;
}

static auto toArray(typename Platform::reg_t x) {
Expand Down Expand Up @@ -186,7 +151,7 @@ struct SimdCharSse2PlatformSpecific {

FOLLY_ALWAYS_INLINE
static bool any(logical_t log, ignore_none) {
return folly::movemask<std::uint8_t>(log).first;
return movemask<std::uint8_t>(log).first;
}
};

Expand Down Expand Up @@ -234,7 +199,7 @@ struct SimdCharAvx2PlatformSpecific {

FOLLY_ALWAYS_INLINE
static bool any(logical_t log, ignore_none) {
return folly::movemask<std::uint8_t>(log).first;
return simd::movemask<std::uint8_t>(log).first;
}
};

Expand Down
18 changes: 1 addition & 17 deletions folly/algorithm/simd/detail/SimdForEach.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <folly/CPortability.h>
#include <folly/Traits.h>
#include <folly/algorithm/simd/detail/Ignore.h>
#include <folly/algorithm/simd/detail/UnrollUtils.h>

#include <array>
Expand All @@ -35,23 +36,6 @@ namespace simd::detail {
// to mess that up.
//

/**
* ignore(_none/_extrema)
*
* Tag types for handling the tails.
* ignore_none indicates that the whole register is used.
* ignore_extrema.first, .last show how many elements are out of the data.
*
* For example 3 elements, starting from the second for an 8 element register
* will be ignore_extrema{.first = 1, .last = 4}
*/
struct ignore_extrema {
int first = 0;
int last = 0;
};

struct ignore_none {};

/**
* simdForEachAligning<unrolling>(cardinal, f, l, delegate);
*
Expand Down
9 changes: 9 additions & 0 deletions folly/algorithm/simd/detail/test/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ cpp_unittest(
],
)

cpp_unittest(
name = "ignore_test",
srcs = ["IgnoreTest.cpp"],
deps = [
"//folly/algorithm/simd/detail:ignore",
"//folly/portability:gtest",
],
)

cpp_unittest(
name = "traits_test",
srcs = ["TraitsTest.cpp"],
Expand Down
45 changes: 45 additions & 0 deletions folly/algorithm/simd/detail/test/IgnoreTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <folly/algorithm/simd/detail/Ignore.h>

#include <cstdint>

#include <folly/portability/GTest.h>

namespace folly::simd::detail {

struct IgnoreTest : ::testing::Test {};

TEST_F(IgnoreTest, MaskClearIgnored) {
auto mmask =
std::pair{std::uint8_t{0xff}, std::integral_constant<std::uint32_t, 2>{}};

// mostly relying on folly::clear_<>_n_bits working correctly
// simd any of also covers a lot of cases.
// this is just the bare minimal smoke test.

mmaskClearIgnored<4>(mmask, ignore_none{});
EXPECT_EQ(0xff, mmask.first);

mmaskClearIgnored<4>(mmask, ignore_extrema{1, 2});
EXPECT_EQ(0b0000'1100, mmask.first);

mmaskClearIgnored<2>(mmask, ignore_extrema{0, 1});
EXPECT_EQ(0b0000'0000, mmask.first);
}

} // namespace folly::simd::detail
2 changes: 1 addition & 1 deletion folly/algorithm/simd/detail/test/SimdAnyOfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ TEST(SimdAnyOfSimple, Ignore) {
buffer.fill(' ');
for (auto& c : buffer) {
c = 'a';
anySpacesTest({&c, 1}, false);
ASSERT_NO_FATAL_FAILURE(anySpacesTest({&c, 1}, false));
c = ' ';
}
}
Expand Down
4 changes: 2 additions & 2 deletions folly/algorithm/simd/test/MovemaskTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ void allOneTrueTests() {
std::array<T, N> arr;
arr.fill(kFalse);

ASSERT_EQ(0, folly::movemask<T>(loadReg<Reg>(arr)).first);
ASSERT_EQ(0, folly::simd::movemask<T>(loadReg<Reg>(arr)).first);

for (std::size_t i = 0; i != N; ++i) {
arr[i] = kTrue;
auto [bits, bitsPerElement] = folly::movemask<T>(loadReg<Reg>(arr));
auto [bits, bitsPerElement] = folly::simd::movemask<T>(loadReg<Reg>(arr));
std::uint64_t oneElement = safeShift(1, bitsPerElement()) - 1;
std::uint64_t expectedBits = safeShift(oneElement, i * bitsPerElement());

Expand Down
2 changes: 2 additions & 0 deletions folly/detail/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ cpp_library(
exported_deps = [
"//folly:portability",
"//folly:range",
"//folly/algorithm/simd:movemask",
"//folly/algorithm/simd/detail:ignore",
"//folly/algorithm/simd/detail:simd_char_platform",
"//folly/algorithm/simd/detail:simd_for_each",
"//folly/lang:bits",
Expand Down
Loading

0 comments on commit cd61173

Please sign in to comment.