diff --git a/folly/BUCK b/folly/BUCK index a402d10ff39..da8afc4b29b 100644 --- a/folly/BUCK +++ b/folly/BUCK @@ -771,6 +771,9 @@ cpp_library( "ovr_config//cpu:x86_64": [ "memset.S", ], + "ovr_config//os:linux-arm64": [ + "memset_select_aarch64.cpp", + ], }), auto_headers = AutoHeaders.NONE, headers = [], @@ -808,6 +811,9 @@ cpp_library( "ovr_config//cpu:x86_64": [ "memset.S", ], + "ovr_config//os:linux-arm64": [ + "memset_select_aarch64.cpp", + ], }), auto_headers = AutoHeaders.NONE, headers = [], diff --git a/folly/external/aor/BUCK b/folly/external/aor/BUCK index 7cdd45a9882..0692ef31a16 100644 --- a/folly/external/aor/BUCK +++ b/folly/external/aor/BUCK @@ -7,7 +7,9 @@ cpp_library( srcs = [ "memcpy-advsimd.S", "memcpy-armv8.S", + "memcpy-mops.S", "memcpy_sve.S", + "memmove-mops.S", ], headers = [ "asmdefs.h", @@ -22,7 +24,9 @@ cpp_library( srcs = [ "memcpy-advsimd.S", "memcpy-armv8.S", + "memcpy-mops.S", "memcpy_sve.S", + "memmove-mops.S", ], headers = [ "asmdefs.h", @@ -41,6 +45,7 @@ cpp_library( name = "memset_aarch64", srcs = [ "memset-advsimd.S", + "memset-mops.S", ], headers = [ "asmdefs.h", @@ -54,6 +59,7 @@ cpp_library( name = "memset_aarch64-use", srcs = [ "memset-advsimd.S", + "memset-mops.S", ], headers = [ "asmdefs.h", diff --git a/folly/external/aor/memcpy-mops.S b/folly/external/aor/memcpy-mops.S new file mode 100644 index 00000000000..6d2c04d5e57 --- /dev/null +++ b/folly/external/aor/memcpy-mops.S @@ -0,0 +1,21 @@ +/* + * memcpy using MOPS extension. + * + * Copyright (c) 2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +ENTRY (__folly_memcpy_aarch64_mops) + PTR_ARG (0) + PTR_ARG (1) + SIZE_ARG (2) + + mov x3, x0 + .inst 0x19010443 /* cpyfp [x3]!, [x1]!, x2! */ + .inst 0x19410443 /* cpyfm [x3]!, [x1]!, x2! */ + .inst 0x19810443 /* cpyfe [x3]!, [x1]!, x2! */ + ret + +END (__folly_memcpy_aarch64_mops) diff --git a/folly/external/aor/memmove-mops.S b/folly/external/aor/memmove-mops.S new file mode 100644 index 00000000000..0218d1f3b81 --- /dev/null +++ b/folly/external/aor/memmove-mops.S @@ -0,0 +1,21 @@ +/* + * memmove using MOPS extension. + * + * Copyright (c) 2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +ENTRY (__folly_memmove_aarch64_mops) + PTR_ARG (0) + PTR_ARG (1) + SIZE_ARG (2) + + mov x3, x0 + .inst 0x1d010443 /* cpyp [x3]!, [x1]!, x2! */ + .inst 0x1d410443 /* cpym [x3]!, [x1]!, x2! */ + .inst 0x1d810443 /* cpye [x3]!, [x1]!, x2! */ + ret + +END (__folly_memmove_aarch64_mops) diff --git a/folly/external/aor/memset-advsimd.S b/folly/external/aor/memset-advsimd.S index efaf341d34f..4e5b3903a17 100644 --- a/folly/external/aor/memset-advsimd.S +++ b/folly/external/aor/memset-advsimd.S @@ -21,7 +21,7 @@ #define dstend x4 #define zva_val x5 -ENTRY (__folly_memset) +ENTRY (__folly_memset_aarch64_simd) PTR_ARG (0) SIZE_ARG (2) @@ -113,9 +113,4 @@ L(no_zva_loop): stp q0, q0, [dstend, -32] ret -END (__folly_memset) - -#ifdef FOLLY_MEMSET_IS_MEMSET -.weak memset -memset = __folly_memset -#endif +END (__folly_memset_aarch64_simd) diff --git a/folly/external/aor/memset-mops.S b/folly/external/aor/memset-mops.S new file mode 100644 index 00000000000..3bde32e4f35 --- /dev/null +++ b/folly/external/aor/memset-mops.S @@ -0,0 +1,20 @@ +/* + * memset using MOPS extension. + * + * Copyright (c) 2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +ENTRY (__folly_memset_aarch64_mops) + PTR_ARG (0) + SIZE_ARG (2) + + mov x3, x0 + .inst 0x19c10443 /* setp [x3]!, x2!, x1 */ + .inst 0x19c14443 /* setm [x3]!, x2!, x1 */ + .inst 0x19c18443 /* sete [x3]!, x2!, x1 */ + ret + +END (__folly_memset_aarch64_mops) diff --git a/folly/memcpy_select_aarch64.cpp b/folly/memcpy_select_aarch64.cpp index d5884e5a837..56295046135 100644 --- a/folly/memcpy_select_aarch64.cpp +++ b/folly/memcpy_select_aarch64.cpp @@ -56,18 +56,40 @@ #include // @manual +#if defined(__has_include) +#if __has_include() +#include +#endif +#endif + +#if !defined(HWCAP2_MOPS) +#define HWCAP2_MOPS (1UL << 43) +#endif + extern "C" { void* __folly_memcpy_aarch64(void* dst, const void* src, std::size_t size); +void* __folly_memcpy_aarch64_mops(void* dst, const void* src, std::size_t size); void* __folly_memcpy_aarch64_simd(void* dst, const void* src, std::size_t size); void* __folly_memcpy_aarch64_sve(void* dst, const void* src, std::size_t size); void* __folly_memmove_aarch64(void* dst, const void* src, std::size_t len); +void* __folly_memmove_aarch64_mops(void* dst, const void* src, std::size_t len); void* __folly_memmove_aarch64_simd(void* dst, const void* src, std::size_t len); void* __folly_memmove_aarch64_sve(void* dst, const void* src, std::size_t len); +[[gnu::no_sanitize_address]] decltype(&__folly_memcpy_aarch64) __folly_detail_memcpy_resolve( - uint64_t hwcaps) { + uint64_t hwcaps, const void* arg2) { +#if defined(_IFUNC_ARG_HWCAP) + if (hwcaps & _IFUNC_ARG_HWCAP && arg2 != nullptr) { + const __ifunc_arg_t* args = reinterpret_cast(arg2); + if (args->_hwcap2 & HWCAP2_MOPS) { + return __folly_memcpy_aarch64_mops; + } + } +#endif + if (hwcaps & HWCAP_SVE) { return __folly_memcpy_aarch64_sve; } @@ -79,8 +101,18 @@ decltype(&__folly_memcpy_aarch64) __folly_detail_memcpy_resolve( return __folly_memcpy_aarch64; } +[[gnu::no_sanitize_address]] decltype(&__folly_memmove_aarch64) __folly_detail_memmove_resolve( - uint64_t hwcaps) { + uint64_t hwcaps, const void* arg2) { +#if defined(_IFUNC_ARG_HWCAP) + if (hwcaps & _IFUNC_ARG_HWCAP && arg2 != nullptr) { + const __ifunc_arg_t* args = reinterpret_cast(arg2); + if (args->_hwcap2 & HWCAP2_MOPS) { + return __folly_memmove_aarch64_mops; + } + } +#endif + if (hwcaps & HWCAP_SVE) { return __folly_memmove_aarch64_sve; } diff --git a/folly/memset_select_aarch64.cpp b/folly/memset_select_aarch64.cpp new file mode 100644 index 00000000000..6c3ada080d4 --- /dev/null +++ b/folly/memset_select_aarch64.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * How on earth does this work? + * + * See memcpy_select_aarch64.cpp for a full discussion. + */ + +#include +#include + +#if defined(__linux__) && defined(__aarch64__) + +#include // @manual + +#if defined(__has_include) +#if __has_include() +#include +#endif +#endif + +#if !defined(HWCAP2_MOPS) +#define HWCAP2_MOPS (1UL << 43) +#endif + +extern "C" { + +void* __folly_memset_aarch64_mops(void* dest, int ch, std::size_t count); +void* __folly_memset_aarch64_simd(void* dest, int ch, std::size_t count); + +[[gnu::no_sanitize_address]] +decltype(&__folly_memset_aarch64_simd) __folly_detail_memset_resolve( + uint64_t hwcaps, const void* arg2) { +#if defined(_IFUNC_ARG_HWCAP) + if (hwcaps & _IFUNC_ARG_HWCAP && arg2 != nullptr) { + const __ifunc_arg_t* args = reinterpret_cast(arg2); + if (args->_hwcap2 & HWCAP2_MOPS) { + return __folly_memset_aarch64_mops; + } + } +#endif + + return __folly_memset_aarch64_simd; +} + +[[gnu::ifunc("__folly_detail_memset_resolve")]] +void* __folly_memset(void* dest, int ch, std::size_t count); + +#ifdef FOLLY_MEMSET_IS_MEMSET + +[[gnu::weak, gnu::alias("__folly_memset")]] +void* memset(void* dest, int ch, std::size_t count); + +#endif + +} // extern "C" + +#endif // defined(__linux__) && defined(__aarch64__)