From e836b85785cc1a1c95596135155bc9378454e3db Mon Sep 17 00:00:00 2001 From: Luigi Pinca Date: Fri, 10 May 2024 17:58:17 +0200 Subject: [PATCH] [deps] Update is_utf8 to version 1.3.2 --- deps/is_utf8/CMakeLists.txt | 12 +++++-- deps/is_utf8/src/is_utf8.cpp | 61 +++++++++++++++++++++++------------- 2 files changed, 49 insertions(+), 24 deletions(-) diff --git a/deps/is_utf8/CMakeLists.txt b/deps/is_utf8/CMakeLists.txt index 97c20a7..ab9e040 100644 --- a/deps/is_utf8/CMakeLists.txt +++ b/deps/is_utf8/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.15) project(is_utf8 DESCRIPTION "Fast UTF-8 Validation" LANGUAGES CXX - VERSION 1.3.1 + VERSION 1.3.2 ) include(GNUInstallDirs) @@ -20,12 +20,16 @@ if (NOT CMAKE_BUILD_TYPE) endif() endif() -set(CMAKE_CXX_STANDARD 14) +# We compile tools, tests, etc. with C++ 11. Override yourself if you need on a +# target. +set(IS_UTF8_CXX_STANDARD 11 CACHE STRING "the C++ standard to use for is_utf8") + +set(CMAKE_CXX_STANDARD ${IS_UTF8_CXX_STANDARD}) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_MACOSX_RPATH OFF) -set(IS_UTF8_LIB_VERSION "1.3.1" CACHE STRING "is_utf8 library version") +set(IS_UTF8_LIB_VERSION "1.3.2" CACHE STRING "is_utf8 library version") set(IS_UTF8_LIB_SOVERSION "1" CACHE STRING "is_utf8 library soversion") set(IS_UTF8_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) @@ -40,6 +44,8 @@ endif(BUILD_TESTING) add_subdirectory(benchmarks) + +message(STATUS "Compiling using the C++ standard:" ${CMAKE_CXX_STANDARD}) # ---- Install rules ---- add_library(is_utf8::is_utf8 ALIAS is_utf8) diff --git a/deps/is_utf8/src/is_utf8.cpp b/deps/is_utf8/src/is_utf8.cpp index 2a7fe22..72ed218 100644 --- a/deps/is_utf8/src/is_utf8.cpp +++ b/deps/is_utf8/src/is_utf8.cpp @@ -872,8 +872,13 @@ template std::string toBinaryString(T b) { #ifndef IS_UTF8_IMPLEMENTATION_ARM64 #define IS_UTF8_IMPLEMENTATION_ARM64 (IS_UTF8_IS_ARM64) #endif -#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 \ - IS_UTF8_IMPLEMENTATION_ARM64 &&IS_UTF8_IS_ARM64 + +#if IS_UTF8_IMPLEMENTATION_ARM64 &&IS_UTF8_IS_ARM64 +#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 1 +#else +#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 0 +#endif + #if IS_UTF8_IMPLEMENTATION_ARM64 @@ -1116,8 +1121,9 @@ template > struct base_u8 { return *this_cast; } - is_utf8_really_inline Mask operator==(const simd8 other) const { - return vceqq_u8(*this, other); + friend is_utf8_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return vceqq_u8(lhs, rhs); } template @@ -2172,7 +2178,7 @@ namespace icelake {} // namespace icelake // We should not get warnings while including yet we do // under some versions of GCC. // If the x86intrin.h header has uninitialized values that are problematic, -// it is a GCC issue, we want to ignore these warnigns. +// it is a GCC issue, we want to ignore these warnings. IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized) #endif @@ -2342,8 +2348,11 @@ IS_UTF8_POP_DISABLE_WARNINGS #endif // To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this // next line, see https://github.com/simdutf/simdutf/issues/1247 -#define IS_UTF8_CAN_ALWAYS_RUN_HASWELL \ - ((IS_UTF8_IMPLEMENTATION_HASWELL) && (IS_UTF8_IS_X86_64) && (__AVX2__)) +#if ((IS_UTF8_IMPLEMENTATION_HASWELL) && (IS_UTF8_IS_X86_64) && (__AVX2__)) +#define IS_UTF8_CAN_ALWAYS_RUN_HASWELL 1 +#else +#define IS_UTF8_CAN_ALWAYS_RUN_HASWELL 0 +#endif #if IS_UTF8_IMPLEMENTATION_HASWELL @@ -2398,7 +2407,7 @@ class implementation final : public is_utf8_internals::implementation { // We should not get warnings while including yet we do // under some versions of GCC. // If the x86intrin.h header has uninitialized values that are problematic, -// it is a GCC issue, we want to ignore these warnigns. +// it is a GCC issue, we want to ignore these warnings. IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized) #endif @@ -2539,8 +2548,9 @@ struct base8 : base> { is_utf8_really_inline T last() const { return _mm256_extract_epi8(*this, 31); } - is_utf8_really_inline Mask operator==(const simd8 other) const { - return _mm256_cmpeq_epi8(*this, other); + friend is_utf8_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return _mm256_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base::value); @@ -2965,8 +2975,9 @@ struct base16 : base> { is_utf8_really_inline base16(const Pointer *ptr) : base16(_mm256_loadu_si256(reinterpret_cast(ptr))) {} - is_utf8_really_inline Mask operator==(const simd16 other) const { - return _mm256_cmpeq_epi16(*this, other); + friend is_utf8_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return _mm256_cmpeq_epi16(lhs, rhs); } /// the size of vector in bytes @@ -3340,9 +3351,11 @@ IS_UTF8_UNTARGET_REGION #endif -#define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE \ - (IS_UTF8_IMPLEMENTATION_WESTMERE && IS_UTF8_IS_X86_64 && __SSE4_2__ && \ - __PCLMUL__) +#if IS_UTF8_IMPLEMENTATION_WESTMERE && IS_UTF8_IS_X86_64 && __SSE4_2__ && __PCLMUL__ +#define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE 1 +#else +#define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE 0 +#endif #if IS_UTF8_IMPLEMENTATION_WESTMERE @@ -3395,7 +3408,7 @@ class implementation final : public is_utf8_internals::implementation { // We should not get warnings while including yet we do // under some versions of GCC. // If the x86intrin.h header has uninitialized values that are problematic, -// it is a GCC issue, we want to ignore these warnigns. +// it is a GCC issue, we want to ignore these warnings. IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized) #endif @@ -3517,8 +3530,9 @@ struct base8 : base> { is_utf8_really_inline base8() : base>() {} is_utf8_really_inline base8(const __m128i _value) : base>(_value) {} - is_utf8_really_inline Mask operator==(const simd8 other) const { - return _mm_cmpeq_epi8(*this, other); + friend is_utf8_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return _mm_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base>::value); @@ -4032,8 +4046,9 @@ struct base16 : base> { is_utf8_really_inline base16(const Pointer *ptr) : base16(_mm_loadu_si128(reinterpret_cast(ptr))) {} - is_utf8_really_inline Mask operator==(const simd16 other) const { - return _mm_cmpeq_epi16(*this, other); + friend is_utf8_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return _mm_cmpeq_epi16(lhs, rhs); } static const int SIZE = sizeof(base>::value); @@ -4407,7 +4422,11 @@ IS_UTF8_UNTARGET_REGION #endif #endif -#define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK (IS_UTF8_IMPLEMENTATION_FALLBACK) +#if IS_UTF8_IMPLEMENTATION_FALLBACK +#define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK 1 +#else +#define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK 0 +#endif #if IS_UTF8_IMPLEMENTATION_FALLBACK