From 6d4504fcdb9dc2e98169ea739fd14c243766da2d Mon Sep 17 00:00:00 2001 From: Alexandr Shelepin <57798122+graveart@users.noreply.github.com> Date: Sat, 29 Jul 2023 15:41:16 +0300 Subject: [PATCH] Update to version v3.18.0 --- bindings/consts.go | 2 +- changelog.md | 35 + cjson/decoder.go | 4 +- cpp_src/CMakeLists.txt | 18 +- cpp_src/client/cororeindexer.cc | 5 +- cpp_src/client/itemimpl.cc | 6 +- cpp_src/client/rpcclient.cc | 3 + .../reindexer_server/contrib/Dockerfile.deb | 2 +- cpp_src/cmd/reindexer_server/main.cc | 4 + cpp_src/cmd/reindexer_tool/reindexer_tool.cc | 3 + cpp_src/core/cjson/baseencoder.cc | 15 +- cpp_src/core/cjson/baseencoder.h | 4 +- cpp_src/core/cjson/cjsondecoder.cc | 2 +- cpp_src/core/cjson/cjsonmodifier.cc | 4 +- cpp_src/core/cjson/cjsontools.cc | 2 +- cpp_src/core/cjson/cjsontools.h | 2 +- cpp_src/core/cjson/jsondecoder.cc | 2 +- cpp_src/core/cjson/msgpackdecoder.cc | 2 +- cpp_src/core/cjson/protobufdecoder.cc | 4 +- cpp_src/core/cjson/uuid_recoders.h | 4 +- cpp_src/core/comparator.cc | 33 +- cpp_src/core/expressiontree.h | 2 +- cpp_src/core/ft/ft_fast/dataholder.cc | 12 +- cpp_src/core/ft/ft_fast/dataprocessor.cc | 2 +- cpp_src/core/ft/ftsetcashe.h | 8 +- cpp_src/core/ft/idrelset.h | 18 +- cpp_src/core/ft/numtotext.cc | 60 +- cpp_src/core/idsetcache.h | 13 +- cpp_src/core/index/index.h | 2 +- cpp_src/core/index/indexiterator.h | 8 +- cpp_src/core/index/indextext/fastindextext.cc | 56 +- cpp_src/core/index/indextext/fieldsgetter.h | 15 +- cpp_src/core/index/indextext/indextext.cc | 40 +- cpp_src/core/index/indextext/indextext.h | 4 +- cpp_src/core/index/indexunordered.cc | 4 - cpp_src/core/index/indexunordered.h | 2 +- cpp_src/core/index/payload_map.h | 18 +- cpp_src/core/itemimpl.cc | 10 +- cpp_src/core/itemmodifier.cc | 31 +- cpp_src/core/joincache.h | 17 +- cpp_src/core/keyvalue/variant.cc | 14 +- cpp_src/core/lrucache.cc | 42 +- cpp_src/core/lrucache.h | 27 +- cpp_src/core/namespace/asyncstorage.h | 27 +- cpp_src/core/namespace/namespace.cc | 19 +- cpp_src/core/namespace/namespace.h | 3 +- cpp_src/core/namespace/namespaceimpl.cc | 16 +- cpp_src/core/namespace/namespaceimpl.h | 18 +- cpp_src/core/nsselecter/btreeindexiterator.h | 10 +- .../core/nsselecter/btreeindexiteratorimpl.h | 90 +- cpp_src/core/nsselecter/explaincalc.cc | 34 +- cpp_src/core/nsselecter/explaincalc.h | 38 +- cpp_src/core/nsselecter/nsselecter.cc | 453 ++-- cpp_src/core/nsselecter/nsselecter.h | 4 + cpp_src/core/nsselecter/querypreprocessor.cc | 75 +- cpp_src/core/nsselecter/querypreprocessor.h | 6 +- cpp_src/core/nsselecter/selectiterator.cc | 34 +- cpp_src/core/nsselecter/selectiterator.h | 27 +- .../nsselecter/selectiteratorcontainer.cc | 33 +- .../core/nsselecter/selectiteratorcontainer.h | 2 +- cpp_src/core/nsselecter/substitutionhelpers.h | 77 +- cpp_src/core/payload/fieldsset.cc | 8 +- cpp_src/core/payload/fieldsset.h | 82 +- cpp_src/core/payload/payloadfieldvalue.cc | 78 +- cpp_src/core/payload/payloadfieldvalue.h | 84 +- cpp_src/core/payload/payloadiface.cc | 56 +- cpp_src/core/payload/payloadiface.h | 40 +- cpp_src/core/query/sql/sqlencoder.cc | 2 +- cpp_src/core/query/sql/sqlsuggester.cc | 3 +- cpp_src/core/querycache.h | 15 +- cpp_src/core/queryresults/queryresults.h | 2 +- cpp_src/core/querystat.h | 52 +- cpp_src/core/rdxcontext.cc | 3 + cpp_src/core/rdxcontext.h | 2 +- cpp_src/core/reindexer.cc | 6 +- cpp_src/core/reindexerimpl.cc | 11 +- cpp_src/core/reindexerimpl.h | 3 +- .../core/selectfunc/functions/highlight.cc | 2 +- cpp_src/core/selectfunc/functions/snippet.cc | 2 +- cpp_src/core/selectkeyresult.h | 8 + cpp_src/estl/h_vector.h | 4 +- cpp_src/estl/multihash_map.h | 15 +- cpp_src/estl/mutex.h | 6 +- .../fixtures/api_tv_simple_comparators.cc | 511 +++++ .../fixtures/api_tv_simple_comparators.h | 71 + cpp_src/gtests/bench/reindexer_bench.cc | 9 + cpp_src/gtests/tests/API/base_tests.cc | 5 +- .../fixtures/fuzzing/random_generator.cc | 21 + .../tests/fixtures/fuzzing/random_generator.h | 2 + cpp_src/gtests/tests/fixtures/queries_api.cc | 179 ++ cpp_src/gtests/tests/fixtures/queries_api.h | 137 +- .../gtests/tests/fixtures/queries_verifier.h | 273 ++- .../gtests/tests/fixtures/reindexertestapi.h | 30 +- cpp_src/gtests/tests/fixtures/ttl_index_api.h | 7 +- cpp_src/gtests/tests/fuzzing/fuzzing.cc | 12 +- cpp_src/gtests/tests/unit/namespace_test.cc | 12 +- cpp_src/gtests/tests/unit/queries_test.cc | 5 + cpp_src/net/cproto/serverconnection.cc | 4 + cpp_src/readme.md | 2 +- cpp_src/server/CMakeLists.txt | 2 +- cpp_src/server/contrib/server.md | 2 +- cpp_src/server/contrib/server.yml | 2 +- cpp_src/server/httpserver.cc | 8 +- cpp_src/tools/cpucheck.cc | 79 + cpp_src/tools/cpucheck.h | 8 + cpp_src/tools/customhash.cc | 38 +- cpp_src/tools/customhash.h | 28 +- cpp_src/tools/logginglongqueries.cc | 138 +- cpp_src/tools/logginglongqueries.h | 74 +- cpp_src/tools/stringstools.cc | 146 +- cpp_src/tools/stringstools.h | 52 +- .../vendor/picohttpparser/picohttpparser.c | 8 +- dsl/dsl.go | 218 +- query.go | 7 + readme.md | 2 +- reindexer.go | 2 +- reindexer_impl.go | 173 +- test/composite_indexes_test.go | 185 +- test/dsl_test.go | 1944 +++++++++++++++++ test/eq_and_set_test.go | 24 +- test/huge_items_test.go | 183 ++ test/query_test.go | 3 +- 122 files changed, 5449 insertions(+), 1168 deletions(-) create mode 100644 cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.cc create mode 100644 cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.h create mode 100644 cpp_src/tools/cpucheck.cc create mode 100644 cpp_src/tools/cpucheck.h create mode 100644 test/dsl_test.go diff --git a/bindings/consts.go b/bindings/consts.go index 248192d40..d676bf95f 100644 --- a/bindings/consts.go +++ b/bindings/consts.go @@ -2,7 +2,7 @@ package bindings const CInt32Max = int(^uint32(0) >> 1) -const ReindexerVersion = "v3.17.0" +const ReindexerVersion = "v3.18.0" // public go consts from type_consts.h and reindexer_ctypes.h const ( diff --git a/changelog.md b/changelog.md index a33b10e00..725d77331 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,38 @@ +# Version 3.18.0 (29.07.2023) +## Core +- [fea] Increased max indexes count for each namespace up to 255 user-defined indexes (previously it was 63) +- [fea] Added more info to the [slow logger's](readme.md#slow-actions-logging) output: mutexes timing for transactions and basic `explain` info for `select`-queries +- [fea] Improved logic of the cost evaluation for the btree indexes usage in situations, when backgroud indexes ordering was not completed (right after write operations). Expecting more optimal execution plan in those cases +- [fix] Changed logic of the `ALLSET` operator. Now `ALLSET` condition returns `false` for empty values sets and the result behavior is similar to MongoDB [$all](https://www.mongodb.com/docs/manual/reference/operator/query/all/) +- [fix] Fixed automatic conversion for numeric strings with leading or trailing spaces (i.e. ' 1234' or '1234 ') into integers/floats in `WHERE`/`ORDER BY` +- [fix] Allowed non-unique values in forced sort (`ORDER BY (id,4,2,2,5)`). If forced sort order contains same values on the different positions (i.e. `ORDER BY (id,1,2,1,5)`), then the first occurance of the value will be used for sorting +- [fix] Added limits for the large values sets in the composite indexes substitution algorithm, introduced in v3.15.0 (due to performance issues in some cases). If the result size of the set is exceeding corresponding limit, reindexer will try to find another composite index or skip the substitution + +## Go connector +- [fea] Added support for JOINs and brackets into [JSON DSL wrapper](dsl/dsl.go) + +## Build +- [fea] Added support and deploy for Debian 12 (bookworm). Debian 10 (buster) build was deprecated +- [fea] Enabled SSE4.2 for the default reindexer's builds and for the prebuilt packages. SSE may still be disabled by passing `-DENABLE_SSE=OFF` to `cmake` command + +## Face +- [fea] Changed the scale window icon for textareas +- [fea] Added the background color to the Close icon in the search history on the Namespace page +- [fea] Improved the buttons' behavior on the Query builder page +- [fea] Added the database name size limit. +- [fea] Improved the drop-down section behavior on the Query builder page +- [fea] Added new proc settings to the Index config +- [fix] Fixed the columns' settings resetting after the Perfstats page reloading +- [fix] Removed the double requests on the Perfstats page +- [fix] Fixed the JSON Paths tooltip description +- [fix] Fixed the pie chart position in Safari +- [fix] Fixed the popup window size for the long text +- [fix] Fixed the bottom padding on the statistics legend window +- [fix] Fixed the modal window to inform about disabled memory statistics +- [fix] Fixed the filter removal +- [fix] Fixed the filter result page when the filter is removed +- [fix] Fixed the redirect to the wrong page after all items were removed + # Version 3.17.0 (06.07.2023) ## Core - [fea] Optimized namespaces' locks for queries to the system namespaces, containing explicit list of names (for example, `SELECT * FROM #memstats WHERE "name" IN ('ns1', 'nsx', 'ns19')` now requires shared locks for the listed namespaces only) diff --git a/cjson/decoder.go b/cjson/decoder.go index 7c823549f..3cc9bd183 100644 --- a/cjson/decoder.go +++ b/cjson/decoder.go @@ -26,6 +26,8 @@ type Decoder struct { logger Logger } +const MaxIndexes = 256 + func fieldByTag(t reflect.Type, tag string) (result reflect.StructField, ok bool) { if t.Kind() == reflect.Ptr { t = t.Elem() @@ -674,7 +676,7 @@ func (dec *Decoder) DecodeCPtr(cptr uintptr, dest interface{}) (err error) { } }() - fieldsoutcnt := make([]int, 64, 64) + fieldsoutcnt := make([]int, MaxIndexes) ctagsPath := make([]int, 0, 8) dec.decodeValue(pl, ser, reflect.ValueOf(dest), fieldsoutcnt, ctagsPath) diff --git a/cpp_src/CMakeLists.txt b/cpp_src/CMakeLists.txt index 6daee0518..754f49e93 100644 --- a/cpp_src/CMakeLists.txt +++ b/cpp_src/CMakeLists.txt @@ -24,6 +24,7 @@ option (ENABLE_TCMALLOC "Enable tcmalloc extensions" ON) option (ENABLE_JEMALLOC "Enable jemalloc extensions" ON) option (ENABLE_ROCKSDB "Enable rocksdb storage" ON) option (ENABLE_GRPC "Enable GRPC service" OFF) +option (ENABLE_SSE "Enable SSE instructions" ON) if (NOT GRPC_PACKAGE_PROVIDER) set (GRPC_PACKAGE_PROVIDER "CONFIG") @@ -35,7 +36,7 @@ else() option (LINK_RESOURCES "Link web resources as binary data" ON) endif() -set (REINDEXER_VERSION_DEFAULT "3.17.0") +set (REINDEXER_VERSION_DEFAULT "3.18.0") if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "RelWithDebInfo") @@ -52,12 +53,13 @@ include (TargetArch) target_architecture(COMPILER_TARGET_ARCH) # Configure compile options -string( REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") +string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") +string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +string(REPLACE "-O2" "-O3" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") if (NOT ${COMPILER_TARGET_ARCH} STREQUAL "e2k") string(REPLACE "-g" "-g1" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") else() string(REPLACE "-g" "-g0" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") - string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() if (${COMPILER_TARGET_ARCH} STREQUAL "e2k") @@ -224,6 +226,16 @@ else () endif () list(APPEND SRCS ${CONTEXT_ASM_SRCS}) +if (ENABLE_SSE) + if (NOT MSVC AND NOT APPLE AND (${COMPILER_TARGET_ARCH} STREQUAL "x86_64" OR ${COMPILER_TARGET_ARCH} STREQUAL "i386")) + add_definitions(-DREINDEXER_WITH_SSE=1) + message ("Building with SSE support...") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse -msse2 -msse3 -mssse3 -msse4 -msse4.1 -msse4.2 -mpopcnt") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -msse3 -mssse3 -msse4 -msse4.1 -msse4.2 -mpopcnt") + else () + message ("SSE compiler flags were disabled for the current platform") + endif () +endif () include_directories(${REINDEXER_SOURCE_PATH}) include_directories(${REINDEXER_SOURCE_PATH}/vendor) diff --git a/cpp_src/client/cororeindexer.cc b/cpp_src/client/cororeindexer.cc index 9ab1f1674..4cb747cf5 100644 --- a/cpp_src/client/cororeindexer.cc +++ b/cpp_src/client/cororeindexer.cc @@ -1,11 +1,14 @@ #include "client/cororeindexer.h" #include "client/cororpcclient.h" +#include "tools/cpucheck.h" #include "tools/logger.h" namespace reindexer { namespace client { -CoroReindexer::CoroReindexer(const ReindexerConfig& config) : impl_(new CoroRPCClient(config)), owner_(true), ctx_() {} +CoroReindexer::CoroReindexer(const ReindexerConfig& config) : impl_(new CoroRPCClient(config)), owner_(true), ctx_() { + reindexer::CheckRequiredSSESupport(); +} CoroReindexer::~CoroReindexer() { if (owner_) { delete impl_; diff --git a/cpp_src/client/itemimpl.cc b/cpp_src/client/itemimpl.cc index 0a2c04055..98f32d0a1 100644 --- a/cpp_src/client/itemimpl.cc +++ b/cpp_src/client/itemimpl.cc @@ -54,7 +54,7 @@ void ItemImpl::FromCJSON(std::string_view slice) { throw Error(errParseJson, "Internal error - left unparsed data %d", rdser.Pos()); } tupleData_.assign(ser_.Slice().data(), ser_.Slice().size()); - pl.Set(0, {Variant(p_string(&tupleData_))}); + pl.Set(0, Variant(p_string(&tupleData_))); } Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool /*pkOnly*/) { @@ -88,7 +88,7 @@ Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool /*pkOnly*/) { if (err.ok()) { // Put tuple to field[0] tupleData_.assign(ser_.Slice().data(), ser_.Slice().size()); - pl.Set(0, {Variant(p_string(&tupleData_))}); + pl.Set(0, Variant(p_string(&tupleData_))); ser_ = WrSerializer(); } return err; @@ -102,7 +102,7 @@ Error ItemImpl::FromMsgPack(std::string_view buf, size_t &offset) { Error err = decoder.Decode(buf, pl, ser_, offset); if (err.ok()) { tupleData_.assign(ser_.Slice().data(), ser_.Slice().size()); - pl.Set(0, {Variant(p_string(&tupleData_))}); + pl.Set(0, Variant(p_string(&tupleData_))); } return err; } diff --git a/cpp_src/client/rpcclient.cc b/cpp_src/client/rpcclient.cc index e3cde6532..29b6f5ccc 100644 --- a/cpp_src/client/rpcclient.cc +++ b/cpp_src/client/rpcclient.cc @@ -4,6 +4,7 @@ #include "client/itemimpl.h" #include "core/namespacedef.h" #include "gason/gason.h" +#include "tools/cpucheck.h" #include "tools/errors.h" #include "tools/logger.h" #include "vendor/gason/gason.h" @@ -14,6 +15,8 @@ namespace client { using reindexer::net::cproto::RPCAnswer; RPCClient::RPCClient(const ReindexerConfig& config) : workers_(config.WorkerThreads), config_(config), updatesConn_(nullptr) { + reindexer::CheckRequiredSSESupport(); + if (config_.ConnectTimeout > config_.RequestTimeout) { config_.RequestTimeout = config_.ConnectTimeout; } diff --git a/cpp_src/cmd/reindexer_server/contrib/Dockerfile.deb b/cpp_src/cmd/reindexer_server/contrib/Dockerfile.deb index fd90203e0..d64e65dc2 100644 --- a/cpp_src/cmd/reindexer_server/contrib/Dockerfile.deb +++ b/cpp_src/cmd/reindexer_server/contrib/Dockerfile.deb @@ -19,7 +19,7 @@ RUN cd /src && \ FROM debian:stable-slim COPY --from=build /usr/local /usr/local COPY --from=build /entrypoint.sh /entrypoint.sh -RUN apt update -y && apt install -y libleveldb1d libunwind8 libjemalloc2 libgrpc++1 && rm -rf /var/lib/apt +RUN apt update -y && apt install -y libleveldb1d libunwind8 libjemalloc2 libgrpc++1.51 && rm -rf /var/lib/apt ENV RX_DATABASE /db ENV RX_CORELOG stdout diff --git a/cpp_src/cmd/reindexer_server/main.cc b/cpp_src/cmd/reindexer_server/main.cc index 6d2535cf3..4f3c4dc01 100644 --- a/cpp_src/cmd/reindexer_server/main.cc +++ b/cpp_src/cmd/reindexer_server/main.cc @@ -2,9 +2,13 @@ #include "debug/backtrace.h" #include "server/server.h" #include "spdlog/spdlog.h" +#include "tools/cpucheck.h" int main(int argc, char* argv[]) { reindexer::debug::backtrace_init(); + + reindexer::CheckRequiredSSESupport(); + reindexer_server::Server svc(reindexer_server::ServerMode::Standalone); auto err = svc.InitFromCLI(argc, argv); if (!err.ok()) { diff --git a/cpp_src/cmd/reindexer_tool/reindexer_tool.cc b/cpp_src/cmd/reindexer_tool/reindexer_tool.cc index ee0802227..cd5229406 100644 --- a/cpp_src/cmd/reindexer_tool/reindexer_tool.cc +++ b/cpp_src/cmd/reindexer_tool/reindexer_tool.cc @@ -7,6 +7,7 @@ #include "debug/backtrace.h" #include "reindexer_version.h" #include "repair_tool.h" +#include "tools/cpucheck.h" #include "tools/logger.h" #include "tools/stringstools.h" @@ -42,6 +43,8 @@ int main(int argc, char* argv[]) { using namespace reindexer_tool; reindexer::debug::backtrace_init(); + reindexer::CheckRequiredSSESupport(); + args::ArgumentParser parser("Reindexer client tool"); args::HelpFlag help(parser, "help", "show this message", {'h', "help"}); diff --git a/cpp_src/core/cjson/baseencoder.cc b/cpp_src/core/cjson/baseencoder.cc index f3f0aa6c1..2314aa3eb 100644 --- a/cpp_src/core/cjson/baseencoder.cc +++ b/cpp_src/core/cjson/baseencoder.cc @@ -14,10 +14,7 @@ namespace reindexer { template -BaseEncoder::BaseEncoder(const TagsMatcher* tagsMatcher, const FieldsSet* filter) : tagsMatcher_(tagsMatcher), filter_(filter) { - static_assert(std::numeric_limits::digits >= maxIndexes, - "objectScalarIndexes_ needs to provide 'maxIndexes' bits or more"); -} +BaseEncoder::BaseEncoder(const TagsMatcher* tagsMatcher, const FieldsSet* filter) : tagsMatcher_(tagsMatcher), filter_(filter) {} template void BaseEncoder::Encode(std::string_view tuple, Builder& builder, IAdditionalDatasource* ds) { @@ -45,7 +42,7 @@ void BaseEncoder::Encode(ConstPayload& pl, Builder& builder, IAdditiona return; } - objectScalarIndexes_ = 0; + objectScalarIndexes_.reset(); std::fill_n(std::begin(fieldsoutcnt_), pl.NumFields(), 0); builder.SetTagsMatcher(tagsMatcher_); if constexpr (kWithTagsPathTracking) { @@ -75,7 +72,7 @@ const TagsLengths& BaseEncoder::GetTagsMeasures(ConstPayload& pl, IEnco [[maybe_unused]] const ctag beginTag = rdser.GetCTag(); assertrx(beginTag.Type() == TAG_OBJECT); - tagsLengths_.reserve(maxIndexes); + tagsLengths_.reserve(kMaxIndexes); tagsLengths_.push_back(StartObject); while (collectTagsSizes(pl, rdser)) { @@ -149,14 +146,14 @@ bool BaseEncoder::encode(ConstPayload* pl, Serializer& rdser, Builder& // get field from indexed field if (tagField >= 0) { if (!pl) throw Error(errParams, "Trying to encode index field %d without payload", tagField); - if ((objectScalarIndexes_ & (1ULL << tagField)) && (tagType != TAG_ARRAY)) { + if (objectScalarIndexes_.test(tagField) && (tagType != TAG_ARRAY)) { std::string fieldName; if (tagName && tagsMatcher_) { fieldName = tagsMatcher_->tag2name(tagName); } throw Error(errParams, "Non-array field '%s' [%d] from '%s' can only be encoded once.", fieldName, tagField, pl->Type().Name()); } - objectScalarIndexes_ |= (1ULL << tagField); + objectScalarIndexes_.set(tagField); assertrx(tagField < pl->NumFields()); int* cnt = &fieldsoutcnt_[tagField]; switch (tagType) { @@ -220,7 +217,7 @@ bool BaseEncoder::encode(ConstPayload* pl, Serializer& rdser, Builder& break; } case TAG_OBJECT: { - objectScalarIndexes_ = 0; + objectScalarIndexes_.reset(); if (visible) { auto objNode = builder.Object(tagName); while (encode(pl, rdser, objNode, true)) diff --git a/cpp_src/core/cjson/baseencoder.h b/cpp_src/core/cjson/baseencoder.h index 9c42cd4d6..e08c66feb 100644 --- a/cpp_src/core/cjson/baseencoder.h +++ b/cpp_src/core/cjson/baseencoder.h @@ -63,13 +63,13 @@ class BaseEncoder { std::string_view getPlTuple(ConstPayload &pl); const TagsMatcher *tagsMatcher_; - int fieldsoutcnt_[maxIndexes]; + int fieldsoutcnt_[kMaxIndexes]; const FieldsSet *filter_; WrSerializer tmpPlTuple_; TagsPath curTagsPath_; IndexedTagsPathInternalT indexedTagsPath_; TagsLengths tagsLengths_; - uint64_t objectScalarIndexes_ = 0; + std::bitset objectScalarIndexes_; }; using JsonEncoder = BaseEncoder; diff --git a/cpp_src/core/cjson/cjsondecoder.cc b/cpp_src/core/cjson/cjsondecoder.cc index 87772dc04..9db833d12 100644 --- a/cpp_src/core/cjson/cjsondecoder.cc +++ b/cpp_src/core/cjson/cjsondecoder.cc @@ -81,7 +81,7 @@ bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrs throw Error(errLogic, "Error parsing cjson field '%s' - got value in the nested array, but expected scalar %s", fieldRef.Name(), fieldType.Name()); } else { - pl.Set(field, {cjsonValueToVariant(tagType, rdser, fieldType)}, true); + pl.Set(field, cjsonValueToVariant(tagType, rdser, fieldType), true); fieldType.EvaluateOneOf( [&](OneOf) { wrser.PutCTag(ctag{TAG_VARINT, tagName, field}); diff --git a/cpp_src/core/cjson/cjsonmodifier.cc b/cpp_src/core/cjson/cjsonmodifier.cc index 1a8832e3c..d549ce1a8 100644 --- a/cpp_src/core/cjson/cjsonmodifier.cc +++ b/cpp_src/core/cjson/cjsonmodifier.cc @@ -36,10 +36,10 @@ class CJsonModifier::Context { TagsPath jsonPath; IndexedTagsPath currObjPath; FieldModifyMode mode; - const Payload *payload = nullptr; bool fieldUpdated = false; bool updateArrayElements = false; - std::array fieldsArrayOffsets; + const Payload *payload = nullptr; + std::array fieldsArrayOffsets; private: bool isForAllItems_ = false; diff --git a/cpp_src/core/cjson/cjsontools.cc b/cpp_src/core/cjson/cjsontools.cc index d19a4154a..5669b0c87 100644 --- a/cpp_src/core/cjson/cjsontools.cc +++ b/cpp_src/core/cjson/cjsontools.cc @@ -92,7 +92,7 @@ void copyCJsonValue(TagType tagType, Serializer &rdser, WrSerializer &wrser) { } } -void skipCjsonTag(ctag tag, Serializer &rdser, std::array *fieldsArrayOffsets) { +void skipCjsonTag(ctag tag, Serializer &rdser, std::array *fieldsArrayOffsets) { const auto field = tag.Field(); const bool embeddedField = (field < 0); switch (tag.Type()) { diff --git a/cpp_src/core/cjson/cjsontools.h b/cpp_src/core/cjson/cjsontools.h index b3c1054a9..490633b36 100644 --- a/cpp_src/core/cjson/cjsontools.h +++ b/cpp_src/core/cjson/cjsontools.h @@ -14,7 +14,7 @@ void putCJsonRef(TagType tagType, int tagName, int tagField, const VariantArray void putCJsonValue(TagType tagType, int tagName, const VariantArray &values, WrSerializer &wrser); [[nodiscard]] TagType kvType2Tag(KeyValueType kvType) noexcept; -void skipCjsonTag(ctag tag, Serializer &rdser, std::array *fieldsArrayOffsets = nullptr); +void skipCjsonTag(ctag tag, Serializer &rdser, std::array *fieldsArrayOffsets = nullptr); [[nodiscard]] Variant cjsonValueToVariant(TagType tag, Serializer &rdser, KeyValueType dstType); } // namespace reindexer diff --git a/cpp_src/core/cjson/jsondecoder.cc b/cpp_src/core/cjson/jsondecoder.cc index 1af48e7ea..bc9ab94d6 100644 --- a/cpp_src/core/cjson/jsondecoder.cc +++ b/cpp_src/core/cjson/jsondecoder.cc @@ -77,7 +77,7 @@ void JsonDecoder::decodeJsonObject(Payload &pl, CJsonBuilder &builder, const gas f.Name(), f.Type().Name()); } Variant v = jsonValue2Variant(elem.value, f.Type(), f.Name()); - pl.Set(field, {v}, true); + pl.Set(field, v, true); builder.Ref(tagName, v, field); } break; } diff --git a/cpp_src/core/cjson/msgpackdecoder.cc b/cpp_src/core/cjson/msgpackdecoder.cc index b19ad5a58..47586b469 100644 --- a/cpp_src/core/cjson/msgpackdecoder.cc +++ b/cpp_src/core/cjson/msgpackdecoder.cc @@ -18,7 +18,7 @@ void MsgPackDecoder::setValue(Payload& pl, CJsonBuilder& builder, const T& value throw Error(errLogic, "Error parsing msgpack field '%s' - got array, expected scalar %s", f.Name(), f.Type().Name()); } Variant val(value); - pl.Set(field, {val}, true); + pl.Set(field, val, true); builder.Ref(tagName, val, field); } else { builder.Put(tagName, value); diff --git a/cpp_src/core/cjson/protobufdecoder.cc b/cpp_src/core/cjson/protobufdecoder.cc index ac80b7171..7067d7a8b 100644 --- a/cpp_src/core/cjson/protobufdecoder.cc +++ b/cpp_src/core/cjson/protobufdecoder.cc @@ -51,7 +51,7 @@ void ProtobufDecoder::setValue(Payload& pl, CJsonBuilder& builder, ProtobufValue int field = tm_.tags2field(tagsPath_.data(), tagsPath_.size()); auto value = item.value.convert(item.itemType); if (field > 0) { - pl.Set(field, {value}, true); + pl.Set(field, value, true); if (item.isArray) { arraysStorage_.UpdateArraySize(item.tagName, field); } else { @@ -76,7 +76,7 @@ Error ProtobufDecoder::decodeArray(Payload& pl, CJsonBuilder& builder, const Pro if (packed) { int count = 0; while (!parser.IsEof()) { - pl.Set(field, {parser.ReadArrayItem(item.itemType)}, true); + pl.Set(field, parser.ReadArrayItem(item.itemType), true); ++count; } builder.ArrayRef(item.tagName, field, count); diff --git a/cpp_src/core/cjson/uuid_recoders.h b/cpp_src/core/cjson/uuid_recoders.h index 24c4f1db3..fbc0d60fe 100644 --- a/cpp_src/core/cjson/uuid_recoders.h +++ b/cpp_src/core/cjson/uuid_recoders.h @@ -57,7 +57,7 @@ class RecoderStringToUuidArray : public Recoder { void Recode(Serializer &, WrSerializer &) const override final { assertrx(0); } void Recode(Serializer &rdser, Payload &pl, int tagName, WrSerializer &wrser) override final { if (fromNotArrayField_) { - pl.Set(field_, {Variant{rdser.GetStrUuid()}}, true); + pl.Set(field_, Variant{rdser.GetStrUuid()}, true); wrser.PutCTag(ctag{TAG_ARRAY, tagName, field_}); wrser.PutVarUint(1); } else { @@ -98,7 +98,7 @@ class RecoderStringToUuid : public Recoder { [[nodiscard]] bool Match(const TagsPath &) const noexcept override final { return false; } void Recode(Serializer &, WrSerializer &) const override final { assertrx(0); } void Recode(Serializer &rdser, Payload &pl, int tagName, WrSerializer &wrser) override final { - pl.Set(field_, {Variant{rdser.GetStrUuid()}}, true); + pl.Set(field_, Variant{rdser.GetStrUuid()}, true); wrser.PutCTag(ctag{TAG_UUID, tagName, field_}); } diff --git a/cpp_src/core/comparator.cc b/cpp_src/core/comparator.cc index bb43376b5..08d001339 100644 --- a/cpp_src/core/comparator.cc +++ b/cpp_src/core/comparator.cc @@ -14,12 +14,33 @@ Comparator::Comparator(CondType cond, KeyValueType type, const VariantArray &val cmpGeom(distinct), cmpUuid(distinct) { if (type.Is()) assertrx(fields_.size() > 0); - if (cond_ == CondEq && values.size() != 1) cond_ = CondSet; - if (cond_ == CondAllSet && values.size() == 1) cond_ = CondEq; - if (cond_ == CondDWithin) { - cmpGeom.SetValues(values); - } else { - setValues(values); + switch (cond) { + case CondEq: + if (values.size() != 1) { + cond_ = CondSet; + } + setValues(values); + break; + case CondSet: + case CondAllSet: + if (values.size() == 1) { + cond_ = CondEq; + } + [[fallthrough]]; + case CondLt: + case CondLe: + case CondGt: + case CondGe: + case CondLike: + case CondRange: + setValues(values); + break; + case CondDWithin: + cmpGeom.SetValues(values); + break; + case CondEmpty: + case CondAny: + break; } } diff --git a/cpp_src/core/expressiontree.h b/cpp_src/core/expressiontree.h index e87287156..6f4147452 100644 --- a/cpp_src/core/expressiontree.h +++ b/cpp_src/core/expressiontree.h @@ -466,7 +466,7 @@ class ExpressionTree { void EncloseInBracket(size_t from, size_t to, OperationType op, Args&&... args) { assertrx(to > from); assertrx(to <= container_.size()); - for (unsigned b : activeBrackets_) { + for (unsigned& b : activeBrackets_) { assertrx(b < container_.size()); if (b >= from) ++b; } diff --git a/cpp_src/core/ft/ft_fast/dataholder.cc b/cpp_src/core/ft/ft_fast/dataholder.cc index 3f76c0716..694eab40b 100644 --- a/cpp_src/core/ft/ft_fast/dataholder.cc +++ b/cpp_src/core/ft/ft_fast/dataholder.cc @@ -190,14 +190,12 @@ template IDataHolder::MergeData DataHolder::Select(FtDSLQuery&& dsl, size_t fieldSize, bool needArea, int maxAreasInDoc, bool inTransaction, FtMergeStatuses::Statuses&& mergeStatuses, FtUseExternStatuses useExternSt, const RdxContext& rdxCtx) { - switch (useExternSt) { - case FtUseExternStatuses::No: - return Selecter{*this, fieldSize, needArea, maxAreasInDoc}.template Process( - std::move(dsl), inTransaction, std::move(mergeStatuses), rdxCtx); - case FtUseExternStatuses::Yes: - return Selecter{*this, fieldSize, needArea, maxAreasInDoc}.template Process( - std::move(dsl), inTransaction, std::move(mergeStatuses), rdxCtx); + if (useExternSt == FtUseExternStatuses::No) { + return Selecter{*this, fieldSize, needArea, maxAreasInDoc}.template Process( + std::move(dsl), inTransaction, std::move(mergeStatuses), rdxCtx); } + return Selecter{*this, fieldSize, needArea, maxAreasInDoc}.template Process( + std::move(dsl), inTransaction, std::move(mergeStatuses), rdxCtx); } template class DataHolder; template class DataHolder; diff --git a/cpp_src/core/ft/ft_fast/dataprocessor.cc b/cpp_src/core/ft/ft_fast/dataprocessor.cc index 391c116e4..5a7f50b75 100644 --- a/cpp_src/core/ft/ft_fast/dataprocessor.cc +++ b/cpp_src/core/ft/ft_fast/dataprocessor.cc @@ -147,7 +147,7 @@ size_t DataProcessor::buildWordsMap(words_map &words_um) { auto &cfg = holder_.cfg_; auto &vdocsTexts = holder_.vdocsTexts; auto &vdocs = holder_.vdocs_; - int fieldscount = fieldSize_; + const int fieldscount = fieldSize_; size_t offset = holder_.vdocsOffset_; // build words map parallel in maxIndexWorkers threads auto worker = [this, &ctxs, &vdocsTexts, offset, maxIndexWorkers, fieldscount, &cfg, &vdocs](int i) { diff --git a/cpp_src/core/ft/ftsetcashe.h b/cpp_src/core/ft/ftsetcashe.h index 2963bd283..810a06ccd 100644 --- a/cpp_src/core/ft/ftsetcashe.h +++ b/cpp_src/core/ft/ftsetcashe.h @@ -7,11 +7,11 @@ namespace reindexer { struct FtIdSetCacheVal { - FtIdSetCacheVal() : ids(make_intrusive>()) {} - FtIdSetCacheVal(IdSet::Ptr i) noexcept : ids(std::move(i)) {} - FtIdSetCacheVal(IdSet::Ptr i, FtCtx::Data::Ptr c) noexcept : ids(std::move(i)), ctx(std::move(c)) {} + FtIdSetCacheVal() = default; + FtIdSetCacheVal(IdSet::Ptr&& i) noexcept : ids(std::move(i)) {} + FtIdSetCacheVal(IdSet::Ptr&& i, FtCtx::Data::Ptr&& c) noexcept : ids(std::move(i)), ctx(std::move(c)) {} - size_t Size() const noexcept { return ids ? sizeof(*ids.get()) + ids->heap_size() : 0; } + size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } IdSet::Ptr ids; FtCtx::Data::Ptr ctx; diff --git a/cpp_src/core/ft/idrelset.h b/cpp_src/core/ft/idrelset.h index 8400fb5e8..05ea5f8b7 100644 --- a/cpp_src/core/ft/idrelset.h +++ b/cpp_src/core/ft/idrelset.h @@ -12,6 +12,7 @@ namespace reindexer { typedef uint32_t VDocIdType; +static constexpr int kMaxFtCompositeFields = 63; // the position of the word in the document (the index of the word in the field (pos), the field in which the word field was // encountered (field) @@ -39,12 +40,13 @@ class IdRelType { struct PosType { static const int posBits = 24; PosType() = default; - PosType(int pos, int field) : fpos(pos | (field << posBits)) {} + PosType(int pos, int field) noexcept : fpos(pos | (field << posBits)) {} int pos() const noexcept { return fpos & ((1 << posBits) - 1); } int field() const noexcept { return fpos >> posBits; } bool operator<(PosType other) const noexcept { return fpos < other.fpos; } bool operator==(PosType other) const noexcept { return fpos == other.fpos; } - unsigned fpos; + + uint32_t fpos; }; template @@ -89,12 +91,15 @@ class IdRelType { } void Add(int pos, int field) { + assertrx_throw(0 <= field && field <= kMaxFtCompositeFields); pos_.emplace_back(pos, field); addField(field); } void Add(PosType p) { + const auto field = p.field(); + assertrx_throw(0 <= field && field <= kMaxFtCompositeFields); pos_.emplace_back(p); - addField(p.field()); + addField(field); } void SortAndUnique() { boost::sort::pdqsort(pos_.begin(), pos_.end()); @@ -117,12 +122,7 @@ class IdRelType { size_t HeapSize() const noexcept { return heapSize(pos_); } private: - static constexpr int maxField = 63; - - void addField(int field) noexcept { - assertrx(0 <= field && field <= maxField); - usedFieldsMask_ |= (uint64_t(1) << field); - } + void addField(int field) noexcept { usedFieldsMask_ |= (uint64_t(1) << field); } template size_t heapSize(const T& p) const noexcept { diff --git a/cpp_src/core/ft/numtotext.cc b/cpp_src/core/ft/numtotext.cc index 2dd89b226..fc91c1848 100644 --- a/cpp_src/core/ft/numtotext.cc +++ b/cpp_src/core/ft/numtotext.cc @@ -1,37 +1,33 @@ #include "numtotext.h" -#include -#include #include -#include +#include +#include #include "tools/errors.h" namespace reindexer { -using std::string; -using std::vector; -using std::pair; - -const string units[] = {"", "один", "два", "три", "четыре", "пять", "шесть", "семь", "восемь", "девять"}; -const string unitsNominat[] = {"", "одна", "две"}; -const string tens[] = {"", "одиннадцать", "двенадцать", "тринадцать", "четырнадцать", - "пятнадцать", "шестнадцать", "семнадцать", "восемнадцать", "девятнадцать"}; -const string decades[] = {"", "десять", "двадцать", "тридцать", "сорок", - "пятьдесят", "шестьдесят", "семьдесят", "восемьдесят", "девяносто"}; -const string hundreads[] = {"", "сто", "двести", "триста", "четыреста", "пятьсот", "шестьсот", "семьсот", "восемьсот", "девятьсот"}; -const string thousands[] = {"тысяча", "тысячи", "тысяч"}; -const string millions[] = {"миллион", "миллиона", "миллионов"}; -const string billions[] = {"миллиард", "миллиарда", "миллиардов"}; -const string trillions[] = {"триллион", "триллиона", "триллионов"}; -const string quadrillion[] = {"квадриллион", "квадриллиона", "квадриллионов"}; -const string quintillion[] = {"квинтиллион", "квинтиллиона", "квинтиллионов"}; -const string sextillion[] = {"секстиллион", "секстиллиона", "секстиллионов"}; -const string septillion[] = {"септиллион", "септиллиона", "септиллионов"}; +constexpr std::string_view units[] = {"", "один", "два", "три", "четыре", "пять", "шесть", "семь", "восемь", "девять"}; +constexpr std::string_view unitsNominat[] = {"", "одна", "две"}; +constexpr std::string_view tens[] = {"", "одиннадцать", "двенадцать", "тринадцать", "четырнадцать", + "пятнадцать", "шестнадцать", "семнадцать", "восемнадцать", "девятнадцать"}; +constexpr std::string_view decades[] = {"", "десять", "двадцать", "тридцать", "сорок", + "пятьдесят", "шестьдесят", "семьдесят", "восемьдесят", "девяносто"}; +constexpr std::string_view hundreads[] = {"", "сто", "двести", "триста", "четыреста", + "пятьсот", "шестьсот", "семьсот", "восемьсот", "девятьсот"}; +constexpr std::string_view thousands[] = {"тысяча", "тысячи", "тысяч"}; +constexpr std::string_view millions[] = {"миллион", "миллиона", "миллионов"}; +constexpr std::string_view billions[] = {"миллиард", "миллиарда", "миллиардов"}; +constexpr std::string_view trillions[] = {"триллион", "триллиона", "триллионов"}; +constexpr std::string_view quadrillion[] = {"квадриллион", "квадриллиона", "квадриллионов"}; +constexpr std::string_view quintillion[] = {"квинтиллион", "квинтиллиона", "квинтиллионов"}; +constexpr std::string_view sextillion[] = {"секстиллион", "секстиллиона", "секстиллионов"}; +constexpr std::string_view septillion[] = {"септиллион", "септиллиона", "септиллионов"}; enum Numorders : int { Thousands, Millions, Billions, Trillions, Quadrillion, Quintillion, Sextillion, Septillion }; -const string& getNumorder(int numorder, int i) { +static std::string_view getNumorder(int numorder, int i) { switch (numorder) { case Thousands: return thousands[i]; @@ -53,13 +49,14 @@ const string& getNumorder(int numorder, int i) { throw Error(errParams, "Incorrect order [%s]: too big", numorder); } -int ansiCharacterToDigit(char ch) { return static_cast(ch - 48); } +RX_ALWAYS_INLINE int ansiCharacterToDigit(char ch) noexcept { return static_cast(ch - 48); } -vector getOrders(std::string_view str) { +static std::vector getOrders(std::string_view str) { std::string numStr(str); std::reverse(numStr.begin(), numStr.end()); int numChars = numStr.length(); std::vector orders; + orders.reserve(numChars / 3); for (int i = 0; i < numChars; i += 3) { std::string tempString; if (i <= numChars - 3) { @@ -78,12 +75,12 @@ vector getOrders(std::string_view str) { break; } } - orders.push_back(tempString); + orders.emplace_back(std::move(tempString)); } return orders; } -vector getDecimal(const string& str, int i) { +static std::vector getDecimal(const std::string& str, int i) { std::vector words; int v = std::stoi(str); if (v < 10) { @@ -102,7 +99,7 @@ vector getDecimal(const string& str, int i) { return words; } -string getNumOrders(int i, int num) { +static std::string getNumOrders(int i, int num) { std::string orders; if (i > 0) { if (num % 10 > 4 || (num % 100 > 10 && num % 100 < 20) || num % 10 == 0) { @@ -116,7 +113,7 @@ string getNumOrders(int i, int num) { return orders; } -vector formTextString(const string& str, int i) { +static std::vector formTextString(const std::string& str, int i) { std::vector words; int strlen = str.length(); if (strlen == 3) { @@ -141,8 +138,8 @@ vector formTextString(const string& str, int i) { return words; } -vector& NumToText::convert(std::string_view str, std::vector& output) { - output.clear(); +std::vector& NumToText::convert(std::string_view str, std::vector& output) { + output.resize(0); if ((str.length() == 1) && (str[0] == '0')) { output = {"ноль"}; return output; @@ -159,4 +156,5 @@ vector& NumToText::convert(std::string_view str, std::vectorsize() * sizeof(VariantArray::value_type); } + size_t Size() const noexcept { return sizeof(IdSetCacheKey) + keys->size() * sizeof(VariantArray::value_type); } const VariantArray *keys; CondType cond; @@ -57,9 +58,9 @@ T &operator<<(T &os, const IdSetCacheKey &k) { } struct IdSetCacheVal { - IdSetCacheVal() : ids(nullptr) {} - IdSetCacheVal(const IdSet::Ptr &i) : ids(i) {} - size_t Size() const { return ids ? sizeof(*ids.get()) + ids->heap_size() : 0; } + IdSetCacheVal() = default; + IdSetCacheVal(IdSet::Ptr &&i) noexcept : ids(std::move(i)) {} + size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } IdSet::Ptr ids; }; @@ -84,7 +85,7 @@ struct hash_idset_cache_key { class IdSetCache : public LRUCache { public: - void ClearSorted(const std::bitset<64> &s) { + void ClearSorted(const std::bitset &s) { if (s.any()) { Clear([&s](const IdSetCacheKey &k) { return s.test(k.sort); }); } diff --git a/cpp_src/core/index/index.h b/cpp_src/core/index/index.h index 53d2403e8..136a97fdf 100644 --- a/cpp_src/core/index/index.h +++ b/cpp_src/core/index/index.h @@ -120,7 +120,7 @@ class Index { } virtual bool HoldsStrings() const noexcept = 0; virtual void ClearCache() {} - virtual void ClearCache(const std::bitset<64>&) {} + virtual void ClearCache(const std::bitset&) {} virtual bool IsBuilt() const noexcept { return isBuilt_; } virtual void MarkBuilt() noexcept { isBuilt_ = true; } virtual void EnableUpdatesCountingMode(bool) noexcept {} diff --git a/cpp_src/core/index/indexiterator.h b/cpp_src/core/index/indexiterator.h index 920ae2f45..2ed2fcb0b 100644 --- a/cpp_src/core/index/indexiterator.h +++ b/cpp_src/core/index/indexiterator.h @@ -9,11 +9,11 @@ class IndexIteratorBase { public: virtual ~IndexIteratorBase() = default; virtual void Start(bool reverse) = 0; - virtual IdType Value() const = 0; - virtual bool Next() = 0; - virtual void ExcludeLastSet() = 0; + virtual IdType Value() const noexcept = 0; + virtual bool Next() noexcept = 0; + virtual void ExcludeLastSet() noexcept = 0; virtual size_t GetMaxIterations(size_t limitIters) noexcept = 0; - virtual void SetMaxIterations(size_t iters) = 0; + virtual void SetMaxIterations(size_t iters) noexcept = 0; }; class IndexIterator : public intrusive_atomic_rc_wrapper { diff --git a/cpp_src/core/index/indextext/fastindextext.cc b/cpp_src/core/index/indextext/fastindextext.cc index 7a99fd770..90e050e9c 100644 --- a/cpp_src/core/index/indextext/fastindextext.cc +++ b/cpp_src/core/index/indextext/fastindextext.cc @@ -46,7 +46,7 @@ void FastIndexText::initHolder(FtFastConfig &cfg) { template Variant FastIndexText::Upsert(const Variant &key, IdType id, bool &clearCache) { - if (key.Type().Is()) { + if (rx_unlikely(key.Type().Is())) { if (this->empty_ids_.Unsorted().Add(id, IdSet::Auto, 0)) { this->isBuilt_ = false; } @@ -78,7 +78,7 @@ Variant FastIndexText::Upsert(const Variant &key, IdType id, bool &clearCache template void FastIndexText::Delete(const Variant &key, IdType id, StringsHolder &strHolder, bool &clearCache) { int delcnt = 0; - if (key.Type().Is()) { + if (rx_unlikely(key.Type().Is())) { delcnt = this->empty_ids_.Unsorted().Erase(id); assertrx(delcnt); this->isBuilt_ = false; @@ -140,7 +140,6 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery &&dsl, bool inTr // convert vids(uniq documents id) to ids (real ids) IdSet::Ptr mergedIds = make_intrusive>(); auto &holder = *this->holder_; - auto &vdocs = holder.vdocs_; if (mergeData.empty()) { return mergedIds; @@ -148,49 +147,46 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery &&dsl, bool inTr int cnt = 0; const double scalingFactor = mergeData.maxRank > 255 ? 255.0 / mergeData.maxRank : 1.0; int minRelevancy = GetConfig()->minRelevancy * 100 * scalingFactor; + size_t releventDocs = 0; for (auto &vid : mergeData) { - assertrx(vid.id < int(vdocs.size())); - if (!vdocs[vid.id].keyEntry) { + auto &vdoc = holder.vdocs_[vid.id]; + if (!vdoc.keyEntry) { continue; } vid.proc *= scalingFactor; if (vid.proc <= minRelevancy) break; - cnt += vdocs[vid.id].keyEntry->Sorted(0).size(); + cnt += vdoc.keyEntry->Sorted(0).size(); + ++releventDocs; } mergedIds->reserve(cnt); fctx->Reserve(cnt); - for (auto &vid : mergeData) { - auto id = vid.id; - assertrx(id < IdType(vdocs.size())); - - if (!vdocs[id].keyEntry) { + for (size_t i = 0; i < releventDocs; ++i) { + auto &vid = mergeData[i]; + auto &vdoc = holder.vdocs_[vid.id]; + if (!vdoc.keyEntry) { continue; } - assertrx(!vdocs[id].keyEntry->Unsorted().empty()); - if (vid.proc <= minRelevancy) break; + assertrx_throw(!vdoc.keyEntry->Unsorted().empty()); + auto ebegin = vdoc.keyEntry->Sorted(0).begin(); + auto eend = vdoc.keyEntry->Sorted(0).end(); if (useExternSt == FtUseExternStatuses::No) { if (vid.areaIndex == std::numeric_limits::max()) { - fctx->Add(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), vid.proc); + fctx->Add(ebegin, eend, vid.proc); } else { - fctx->Add(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), vid.proc, - std::move(mergeData.vectorAreas[vid.areaIndex])); + fctx->Add(ebegin, eend, vid.proc, std::move(mergeData.vectorAreas[vid.areaIndex])); } - mergedIds->Append(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), IdSet::Unordered); - } else if (useExternSt == FtUseExternStatuses::Yes) { + mergedIds->Append(ebegin, eend, IdSet::Unordered); + } else { if (vid.areaIndex == std::numeric_limits::max()) { - fctx->Add(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), vid.proc, statuses.rowIds); + fctx->Add(ebegin, eend, vid.proc, statuses.rowIds); } else { - fctx->Add(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), vid.proc, statuses.rowIds, - std::move(mergeData.vectorAreas[vid.areaIndex])); + fctx->Add(ebegin, eend, vid.proc, statuses.rowIds, std::move(mergeData.vectorAreas[vid.areaIndex])); } - mergedIds->Append(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), statuses.rowIds, - IdSet::Unordered); - } else { - assertrx_throw(false); + mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); } } - if (GetConfig()->logLevel >= LogInfo) { + if (rx_unlikely(GetConfig()->logLevel >= LogInfo)) { logPrintf(LogInfo, "Total merge out: %d ids", mergedIds->size()); std::string str; @@ -209,7 +205,7 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery &&dsl, bool inTr } logPrintf(LogInfo, "Relevancy(%d): %s", fctx->GetSize(), str); } - assertrx(mergedIds->size() == fctx->GetSize()); + assertrx_throw(mergedIds->size() == fctx->GetSize()); return mergedIds; } template @@ -242,8 +238,8 @@ void FastIndexText::commitFulltextImpl() { } } } - auto tm2 = high_resolution_clock::now(); - if (GetConfig()->logLevel >= LogInfo) { + if (rx_unlikely(GetConfig()->logLevel >= LogInfo)) { + auto tm2 = high_resolution_clock::now(); logPrintf(LogInfo, "FastIndexText::Commit elapsed %d ms total [ build vdocs %d ms, process data %d ms ]", duration_cast(tm2 - tm0).count(), duration_cast(tm1 - tm0).count(), duration_cast(tm2 - tm1).count()); @@ -294,7 +290,7 @@ void FastIndexText::buildVdocs(Container &data) { vdocs.push_back({doc->second.get(), {}, {}}); #endif - if (GetConfig()->logLevel <= LogInfo) { + if (rx_unlikely(GetConfig()->logLevel <= LogInfo)) { for (auto &f : vdocsTexts.back()) this->holder_->szCnt += f.first.length(); } } diff --git a/cpp_src/core/index/indextext/fieldsgetter.h b/cpp_src/core/index/indextext/fieldsgetter.h index 89f17ca1b..791932b27 100644 --- a/cpp_src/core/index/indextext/fieldsgetter.h +++ b/cpp_src/core/index/indextext/fieldsgetter.h @@ -1,4 +1,5 @@ #pragma once +#include "core/ft/usingcontainer.h" #include "core/index/payload_map.h" #include "core/payload/fieldsset.h" #include "vendor/utf8cpp/utf8.h" @@ -9,7 +10,6 @@ class FieldsGetter { public: FieldsGetter(const FieldsSet &fields, const PayloadType &plt, KeyValueType type) : fields_(fields), plt_(plt), type_(type) {} - RVector, 8> getDocFields(const key_string &doc, std::vector> &) { if (!utf8::is_valid(doc->cbegin(), doc->cend())) throw Error(errParams, "Invalid UTF8 string in FullText index"); @@ -19,8 +19,8 @@ class FieldsGetter { VariantArray krefs; // Specific implemetation for composite index - - RVector, 8> getDocFields(const PayloadValue &doc, std::vector> &strsBuf) { + RVector, 8> getDocFields(const PayloadValue &doc, + std::vector> &strsBuf) { ConstPayload pl(plt_, doc); uint32_t fieldPos = 0; @@ -29,7 +29,7 @@ class FieldsGetter { RVector, 8> ret; for (auto field : fields_) { - krefs.resize(0); + krefs.clear(); bool fieldFromCjson = (field == IndexValueType::SetByJsonPath); if (fieldFromCjson) { assertrx(tagsPathIdx < fields_.getTagsPathsLength()); @@ -39,12 +39,13 @@ class FieldsGetter { } for (const Variant &kref : krefs) { if (!kref.Type().Is()) { - strsBuf.emplace_back(std::unique_ptr(new std::string(kref.As()))); - ret.emplace_back(*strsBuf.back().get(), fieldPos); + auto &str = strsBuf.emplace_back(std::make_unique(kref.As())); + ret.emplace_back(*str, fieldPos); } else { const std::string_view stringRef(kref); - if (!utf8::is_valid(stringRef.data(), stringRef.data() + stringRef.size())) + if (rx_likely(!utf8::is_valid(stringRef.data(), stringRef.data() + stringRef.size()))) { throw Error(errParams, "Invalid UTF8 string in FullTextindex"); + } ret.emplace_back(stringRef, fieldPos); } } diff --git a/cpp_src/core/index/indextext/indextext.cc b/cpp_src/core/index/indextext/indextext.cc index 2b6ec60df..7ca8e5f8b 100644 --- a/cpp_src/core/index/indextext/indextext.cc +++ b/cpp_src/core/index/indextext/indextext.cc @@ -23,11 +23,18 @@ void IndexText::initSearchers() { auto fieldIdx = this->fields_[i]; if (fieldIdx == IndexValueType::SetByJsonPath) { assertrx(jsonPathIdx < this->fields_.getJsonPathsLength()); - ftFields_.insert({this->fields_.getJsonPath(jsonPathIdx++), i}); + ftFields_.emplace(this->fields_.getJsonPath(jsonPathIdx++), i); } else { - ftFields_.insert({this->payloadType_->Field(fieldIdx).Name(), i}); + ftFields_.emplace(this->payloadType_->Field(fieldIdx).Name(), i); } } + if (rx_unlikely(ftFields_.size() != this->fields_.size())) { + throw Error(errParams, "Composite fulltext index '%s' contains duplicated fields", this->name_); + } + if (rx_unlikely(ftFields_.size() > kMaxFtCompositeFields)) { + throw Error(errParams, "Unable to create composite fulltext '%s' index with %d fields. Fileds count limit is %d", this->name_, + ftFields_.size(), kMaxFtCompositeFields); + } } } @@ -51,7 +58,7 @@ void IndexText::SetOpts(const IndexOpts &opts) { template FtCtx::Ptr IndexText::prepareFtCtx(const BaseFunctionCtx::Ptr &ctx) { FtCtx::Ptr ftctx = reindexer::reinterpret_pointer_cast(ctx); - if (!ftctx) { + if (rx_unlikely(!ftctx)) { throw Error(errParams, "Full text index (%s) may not be used without context", Index::Name()); } ftctx->PrepareAreas(ftFields_, this->name_); @@ -76,7 +83,7 @@ template SelectKeyResults IndexText::SelectKey(const VariantArray &keys, CondType condition, SortType, Index::SelectOpts opts, const BaseFunctionCtx::Ptr &ctx, const RdxContext &rdxCtx) { const auto indexWard(rdxCtx.BeforeIndexWork()); - if (keys.size() < 1 || (condition != CondEq && condition != CondSet)) { + if (rx_unlikely(keys.size() < 1 || (condition != CondEq && condition != CondSet))) { throw Error(errParams, "Full text index (%s) support only EQ or SET condition with 1 or 2 parameter", Index::Name()); } @@ -86,10 +93,12 @@ SelectKeyResults IndexText::SelectKey(const VariantArray &keys, CondType cond IdSetCacheKey ckey{keys, condition, 0}; auto cache_ft = cache_ft_->Get(ckey); if (cache_ft.valid) { - if (!cache_ft.val.ids->size() || (ftctx->NeedArea() && !cache_ft.val.ctx->need_area_)) { + if (!cache_ft.val.ids) { + needPutCache = true; + } else if (ftctx->NeedArea() && (!cache_ft.val.ctx || !cache_ft.val.ctx->need_area_)) { needPutCache = true; } else { - return resultFromCache(keys, cache_ft, ftctx); + return resultFromCache(keys, std::move(cache_ft), ftctx); } } return doSelectKey(keys, needPutCache ? std::optional{std::move(ckey)} : std::nullopt, std::move(mergeStatuses), @@ -97,16 +106,17 @@ SelectKeyResults IndexText::SelectKey(const VariantArray &keys, CondType cond } template -SelectKeyResults IndexText::resultFromCache(const VariantArray &keys, const FtIdSetCache::Iterator &it, FtCtx::Ptr &ftctx) { +SelectKeyResults IndexText::resultFromCache(const VariantArray &keys, FtIdSetCache::Iterator &&it, FtCtx::Ptr &ftctx) { if (rx_unlikely(cfg_->logLevel >= LogInfo)) { logPrintf(LogInfo, "Get search results for '%s' in '%s' from cache", keys[0].As(), this->payloadType_ ? this->payloadType_->Name() : ""); } - SelectKeyResult res; - res.emplace_back(it.val.ids); - SelectKeyResults r(std::move(res)); + SelectKeyResults r; + auto &res = r.emplace_back(); + res.emplace_back(std::move(it.val.ids)); + assertrx(it.val.ctx); - ftctx->SetData(it.val.ctx); + ftctx->SetData(std::move(it.val.ctx)); return r; } @@ -123,7 +133,7 @@ SelectKeyResults IndexText::doSelectKey(const VariantArray &keys, const std:: FtDSLQuery dsl(this->ftFields_, this->cfg_->stopWords, this->cfg_->extraWordSymbols); dsl.parse(keys[0].As()); - auto mergedIds = Select(ftctx, std::move(dsl), inTransaction, std::move(mergeStatuses), useExternSt, rdxCtx); + IdSet::Ptr mergedIds = Select(ftctx, std::move(dsl), inTransaction, std::move(mergeStatuses), useExternSt, rdxCtx); SelectKeyResult res; if (mergedIds) { bool need_put = (useExternSt == FtUseExternStatuses::No) && ckey.has_value(); @@ -148,10 +158,10 @@ SelectKeyResults IndexText::doSelectKey(const VariantArray &keys, const std:: d->area_[area.second].Commit(); } } - cache_ft_->Put(*ckey, FtIdSetCacheVal{mergedIds, std::move(d)}); + cache_ft_->Put(*ckey, FtIdSetCacheVal{IdSet::Ptr(mergedIds), std::move(d)}); } - res.push_back(SingleSelectKeyResult(std::move(mergedIds))); + res.emplace_back(std::move(mergedIds)); } return SelectKeyResults(std::move(res)); } @@ -160,7 +170,7 @@ template SelectKeyResults IndexText::SelectKey(const VariantArray &keys, CondType condition, Index::SelectOpts opts, const BaseFunctionCtx::Ptr &ctx, FtPreselectT &&preselect, const RdxContext &rdxCtx) { const auto indexWard(rdxCtx.BeforeIndexWork()); - if (keys.size() < 1 || (condition != CondEq && condition != CondSet)) { + if (rx_unlikely(keys.size() < 1 || (condition != CondEq && condition != CondSet))) { throw Error(errParams, "Full text index (%s) support only EQ or SET condition with 1 or 2 parameter", Index::Name()); } return doSelectKey(keys, std::nullopt, std::move(preselect), FtUseExternStatuses::Yes, opts.inTransaction, prepareFtCtx(ctx), rdxCtx); diff --git a/cpp_src/core/index/indextext/indextext.h b/cpp_src/core/index/indextext/indextext.h index 4ce974fb8..d09c61a26 100644 --- a/cpp_src/core/index/indextext/indextext.h +++ b/cpp_src/core/index/indextext/indextext.h @@ -49,7 +49,7 @@ class IndexText : public IndexUnordered { Base::ClearCache(); cache_ft_.reset(); } - void ClearCache(const std::bitset<64>& s) override { Base::ClearCache(s); } + void ClearCache(const std::bitset& s) override { Base::ClearCache(s); } void MarkBuilt() noexcept override { assertrx(0); } bool IsFulltext() const noexcept override { return true; } @@ -60,7 +60,7 @@ class IndexText : public IndexUnordered { FtCtx::Ptr prepareFtCtx(const BaseFunctionCtx::Ptr&); SelectKeyResults doSelectKey(const VariantArray& keys, const std::optional&, FtMergeStatuses&&, FtUseExternStatuses useExternSt, bool inTransaction, FtCtx::Ptr, const RdxContext&); - SelectKeyResults resultFromCache(const VariantArray& keys, const FtIdSetCache::Iterator&, FtCtx::Ptr&); + SelectKeyResults resultFromCache(const VariantArray& keys, FtIdSetCache::Iterator&&, FtCtx::Ptr&); void build(const RdxContext& rdxCtx); void initSearchers(); diff --git a/cpp_src/core/index/indexunordered.cc b/cpp_src/core/index/indexunordered.cc index e2b0a4f6b..6b1d5869e 100644 --- a/cpp_src/core/index/indexunordered.cc +++ b/cpp_src/core/index/indexunordered.cc @@ -251,10 +251,6 @@ SelectKeyResults IndexUnordered::SelectKey(const VariantArray &keys, CondType break; // Get set of keys or single key case CondEq: - if (keys.size() == 0) { - throw Error(errParams, "Condition EQ must have at least 1 argument, but provided 0"); - } - [[fallthrough]]; case CondSet: { struct { T *i_map; diff --git a/cpp_src/core/index/indexunordered.h b/cpp_src/core/index/indexunordered.h index 61cc94c54..05cc89d95 100644 --- a/cpp_src/core/index/indexunordered.h +++ b/cpp_src/core/index/indexunordered.h @@ -41,7 +41,7 @@ class IndexUnordered : public IndexStore> { void SetSortedIdxCount(int sortedIdxCount) override; bool HoldsStrings() const noexcept override; void ClearCache() override { cache_.reset(); } - void ClearCache(const std::bitset<64> &s) override { + void ClearCache(const std::bitset &s) override { if (cache_) cache_->ClearSorted(s); } void Dump(std::ostream &os, std::string_view step = " ", std::string_view offset = "") const override { dump(os, step, offset); } diff --git a/cpp_src/core/index/payload_map.h b/cpp_src/core/index/payload_map.h index 7f8c242b8..91132989f 100644 --- a/cpp_src/core/index/payload_map.h +++ b/cpp_src/core/index/payload_map.h @@ -173,13 +173,27 @@ class unordered_payload_map std::pair insert(const std::pair &v) { PayloadValueWithHash key(v.first, payloadType_, fields_); - auto res = base_hash_map::insert(std::make_pair(std::move(key), v.second)); + auto res = base_hash_map::emplate(std::move(key), v.second); if (res.second) add_ref(res.first->first); return res; } std::pair insert(std::pair &&v) { PayloadValueWithHash key(std::move(v.first), payloadType_, fields_); - auto res = base_hash_map::insert(std::make_pair(std::move(key), std::move(v.second))); + auto res = base_hash_map::emplace(std::move(key), std::move(v.second)); + if (res.second) this->add_ref(res.first->first); + return res; + } + template + std::pair emplace(const PayloadValue &pl, V &&v) { + PayloadValueWithHash key(pl, payloadType_, fields_); + auto res = base_hash_map::emplace(std::move(key), std::forward(v)); + if (res.second) this->add_ref(res.first->first); + return res; + } + template + std::pair emplace(PayloadValue &&pl, V &&v) { + PayloadValueWithHash key(std::move(pl), payloadType_, fields_); + auto res = base_hash_map::emplace(std::move(key), std::forward(v)); if (res.second) this->add_ref(res.first->first); return res; } diff --git a/cpp_src/core/itemimpl.cc b/cpp_src/core/itemimpl.cc index 1902039d7..e80161acc 100644 --- a/cpp_src/core/itemimpl.cc +++ b/cpp_src/core/itemimpl.cc @@ -73,7 +73,7 @@ void ItemImpl::ModifyField(const IndexedTagsPath &tagsPath, const VariantArray & } tupleData_ = ser_.DetachLStr(); - pl.Set(0, {Variant(p_string(reinterpret_cast(tupleData_.get())))}); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); } void ItemImpl::SetField(std::string_view jsonPath, const VariantArray &keys, const IndexExpressionEvaluator &ev) { @@ -94,7 +94,7 @@ Error ItemImpl::FromMsgPack(std::string_view buf, size_t &offset) { Error err = msgPackDecoder_->Decode(buf, pl, ser_, offset); if (err.ok()) { tupleData_ = ser_.DetachLStr(); - pl.Set(0, {Variant(p_string(reinterpret_cast(tupleData_.get())))}); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); } return err; } @@ -109,7 +109,7 @@ Error ItemImpl::FromProtobuf(std::string_view buf) { Error err = decoder.Decode(buf, pl, ser_); if (err.ok()) { tupleData_ = ser_.DetachLStr(); - pl.Set(0, {Variant(p_string(reinterpret_cast(tupleData_.get())))}); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); } return err; } @@ -166,7 +166,7 @@ void ItemImpl::FromCJSON(std::string_view slice, bool pkOnly, Recoder *recoder) if (!rdser.Eof()) throw Error(errParseJson, "Internal error - left unparsed data %d", rdser.Pos()); tupleData_ = ser_.DetachLStr(); - pl.Set(0, {Variant(p_string(reinterpret_cast(tupleData_.get())))}); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); } Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool pkOnly) { @@ -217,7 +217,7 @@ Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool pkOnly) { // Put tuple to field[0] tupleData_ = ser_.DetachLStr(); - pl.Set(0, {Variant(p_string(reinterpret_cast(tupleData_.get())))}); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); return err; } diff --git a/cpp_src/core/itemmodifier.cc b/cpp_src/core/itemmodifier.cc index f117a4d66..80746f090 100644 --- a/cpp_src/core/itemmodifier.cc +++ b/cpp_src/core/itemmodifier.cc @@ -287,9 +287,12 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var auto strHolder = ns_.StrHolder(ctx); auto indexesCacheCleaner{ns_.GetIndexesCacheCleaner()}; h_vector needUpdateCompIndexes(ns_.indexes_.compositeIndexesSize(), false); - for (int i = ns_.indexes_.firstCompositePos(); i < ns_.indexes_.totalSize(); ++i) { - const auto &fields = ns_.indexes_[i]->Fields(); - const auto idxId = i - ns_.indexes_.firstCompositePos(); + const auto firstCompositePos = ns_.indexes_.firstCompositePos(); + const auto totalIndexes = ns_.indexes_.totalSize(); + for (int i = firstCompositePos; i < totalIndexes; ++i) { + auto &compositeIdx = ns_.indexes_[i]; + const auto &fields = compositeIdx->Fields(); + const auto idxId = i - firstCompositePos; for (const auto f : fields) { if (f == IndexValueType::SetByJsonPath) continue; if (f == field.index()) { @@ -307,16 +310,17 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var if (!needUpdateCompIndexes[idxId]) continue; } bool needClearCache{false}; - ns_.indexes_[i]->Delete(Variant(ns_.items_[itemId]), itemId, *strHolder, needClearCache); - if (needClearCache && ns_.indexes_[i]->IsOrdered()) indexesCacheCleaner.Add(ns_.indexes_[i]->SortId()); + compositeIdx->Delete(Variant(ns_.items_[itemId]), itemId, *strHolder, needClearCache); + if (needClearCache && compositeIdx->IsOrdered()) indexesCacheCleaner.Add(compositeIdx->SortId()); } const auto insertItemIntoCompositeIndexes = [&] { - for (int i = ns_.indexes_.firstCompositePos(); i < ns_.indexes_.totalSize(); ++i) { - if (!needUpdateCompIndexes[i - ns_.indexes_.firstCompositePos()]) continue; + for (int i = firstCompositePos; i < totalIndexes; ++i) { + if (!needUpdateCompIndexes[i - firstCompositePos]) continue; bool needClearCache{false}; - ns_.indexes_[i]->Upsert(Variant(ns_.items_[itemId]), itemId, needClearCache); - if (needClearCache && ns_.indexes_[i]->IsOrdered()) indexesCacheCleaner.Add(ns_.indexes_[i]->SortId()); + auto &compositeIdx = ns_.indexes_[i]; + compositeIdx->Upsert(Variant(ns_.items_[itemId]), itemId, needClearCache); + if (needClearCache && compositeIdx->IsOrdered()) indexesCacheCleaner.Add(compositeIdx->SortId()); } }; @@ -330,7 +334,8 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var Variant oldTupleValue = item.GetField(0); oldTupleValue.EnsureHold(); bool needClearCache{false}; - ns_.indexes_[0]->Delete(oldTupleValue, itemId, *strHolder, needClearCache); + auto &tupleIdx = ns_.indexes_[0]; + tupleIdx->Delete(oldTupleValue, itemId, *strHolder, needClearCache); Variant tupleValue; std::exception_ptr exception; try { @@ -338,9 +343,9 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var } catch (...) { exception = std::current_exception(); } - tupleValue = ns_.indexes_[0]->Upsert(item.GetField(0), itemId, needClearCache); - if (needClearCache && ns_.indexes_[0]->IsOrdered()) indexesCacheCleaner.Add(ns_.indexes_[0]->SortId()); - pl.Set(0, {std::move(tupleValue)}); + tupleValue = tupleIdx->Upsert(item.GetField(0), itemId, needClearCache); + if (needClearCache && tupleIdx->IsOrdered()) indexesCacheCleaner.Add(tupleIdx->SortId()); + pl.Set(0, std::move(tupleValue)); ns_.tagsMatcher_.try_merge(item.tagsMatcher()); if (exception) { std::rethrow_exception(exception); diff --git a/cpp_src/core/joincache.h b/cpp_src/core/joincache.h index def18e5fc..8ec4030f2 100644 --- a/cpp_src/core/joincache.h +++ b/cpp_src/core/joincache.h @@ -9,12 +9,11 @@ namespace reindexer { struct JoinCacheKey { - JoinCacheKey() {} - JoinCacheKey(const JoinCacheKey &other) { - if (this != &other) { - buf_ = other.buf_; - } - } + JoinCacheKey() = default; + JoinCacheKey(JoinCacheKey &&other) = default; + JoinCacheKey(const JoinCacheKey &other) = default; + JoinCacheKey &operator=(JoinCacheKey &&other) = default; + JoinCacheKey &operator=(const JoinCacheKey &other) = delete; void SetData(const Query &q) { WrSerializer ser; q.Serialize(ser, (SkipJoinQueries | SkipMergeQueries)); @@ -28,7 +27,7 @@ struct JoinCacheKey { buf_.reserve(buf_.size() + ser.Len()); buf_.insert(buf_.end(), ser.Buf(), ser.Buf() + ser.Len()); } - size_t Size() const { return sizeof(JoinCacheKey) + (buf_.is_hdata() ? 0 : buf_.size()); } + size_t Size() const noexcept { return sizeof(JoinCacheKey) + (buf_.is_hdata() ? 0 : buf_.size()); } h_vector buf_; }; @@ -48,8 +47,8 @@ struct hash_join_cache_key { struct JoinPreResult; struct JoinCacheVal { - JoinCacheVal() {} - size_t Size() const { return ids_ ? sizeof(*ids_.get()) + ids_->heap_size() : 0; } + JoinCacheVal() = default; + size_t Size() const noexcept { return ids_ ? (sizeof(*ids_.get()) + ids_->heap_size()) : 0; } IdSet::Ptr ids_; bool matchedAtLeastOnce = false; bool inited = false; diff --git a/cpp_src/core/keyvalue/variant.cc b/cpp_src/core/keyvalue/variant.cc index 6232e7567..3b3947c60 100644 --- a/cpp_src/core/keyvalue/variant.cc +++ b/cpp_src/core/keyvalue/variant.cc @@ -223,10 +223,14 @@ std::string Variant::As(const PayloadType &pt, const FieldsSet &fie template std::optional tryParseAs(std::string_view str) noexcept { - const auto end = str.data() + str.size(); + auto begin = str.data(); + const auto end = begin + str.size(); + while (begin != end && std::isspace(*begin)) { + ++begin; + } T res; - auto [ptr, err] = std::from_chars(str.data(), end, res); - if (ptr == str.data() || err == std::errc::invalid_argument || err == std::errc::result_out_of_range) { + auto [ptr, err] = std::from_chars(begin, end, res); + if (ptr == begin || err == std::errc::invalid_argument || err == std::errc::result_out_of_range) { return std::nullopt; } for (; ptr != end; ++ptr) { @@ -607,7 +611,7 @@ void Variant::convertToComposite(const PayloadType *payloadType, const FieldsSet for (auto field : *fields) { if (field != IndexValueType::SetByJsonPath) { - pl.Set(field, {ser.GetVariant()}); + pl.Set(field, ser.GetVariant()); } else { // TODO: will have to implement SetByJsonPath in PayloadIFace // or this "mixed" composite queries (by ordinary indexes + indexes @@ -684,6 +688,7 @@ void Variant::Dump(T &os) const { template void Variant::Dump(WrSerializer &) const; template void Variant::Dump(std::ostream &) const; +template void Variant::Dump(std::stringstream &) const; template void VariantArray::Dump(T &os) const { @@ -697,6 +702,7 @@ void VariantArray::Dump(T &os) const { template void VariantArray::Dump(WrSerializer &) const; template void VariantArray::Dump(std::ostream &) const; +template void VariantArray::Dump(std::stringstream &) const; VariantArray::VariantArray(Point p) noexcept { emplace_back(p.X()); diff --git a/cpp_src/core/lrucache.cc b/cpp_src/core/lrucache.cc index 304e134c5..7f04bbf32 100644 --- a/cpp_src/core/lrucache.cc +++ b/cpp_src/core/lrucache.cc @@ -13,16 +13,17 @@ const int kMaxHitCountToCache = 1024; template typename LRUCache::Iterator LRUCache::Get(const K &key) { - if (cacheSizeLimit_ == 0) return Iterator(); + if (rx_unlikely(cacheSizeLimit_ == 0)) return Iterator(); - std::lock_guard lk(lock_); + std::lock_guard lk(lock_); - auto it = items_.find(key); - if (it == items_.end()) { - it = items_.emplace(key, Entry{}).first; + auto [it, emplaced] = items_.try_emplace(key); + if (emplaced) { totalCacheSize_ += kElemSizeOverhead + sizeof(Entry) + key.Size(); it->second.lruPos = lru_.insert(lru_.end(), &it->first); - if (!eraseLRU()) return Iterator(); + if (rx_unlikely(!eraseLRU())) { + return Iterator(); + } } else if (std::next(it->second.lruPos) != lru_.end()) { lru_.splice(lru_.end(), lru_, it->second.lruPos, std::next(it->second.lruPos)); it->second.lruPos = std::prev(lru_.end()); @@ -40,9 +41,9 @@ typename LRUCache::Iterator LRUCache::Get( template void LRUCache::Put(const K &key, V &&v) { - if (cacheSizeLimit_ == 0) return; + if (rx_unlikely(cacheSizeLimit_ == 0)) return; - std::lock_guard lk(lock_); + std::lock_guard lk(lock_); auto it = items_.find(key); if (it == items_.end()) return; @@ -55,7 +56,7 @@ void LRUCache::Put(const K &key, V &&v) { eraseLRU(); - if (eraseCount_ && putCount_ * 16 > getCount_) { + if (rx_unlikely(putCount_ * 16 > getCount_ && eraseCount_)) { logPrintf(LogWarning, "IdSetCache::eraseLRU () cache invalidates too fast eraseCount=%d,putCount=%d,getCount=%d", eraseCount_, putCount_, eraseCount_); eraseCount_ = 0; @@ -66,29 +67,29 @@ void LRUCache::Put(const K &key, V &&v) { } template -bool LRUCache::eraseLRU() { +RX_ALWAYS_INLINE bool LRUCache::eraseLRU() { typename LRUList::iterator it = lru_.begin(); while (totalCacheSize_ > cacheSizeLimit_) { // just to save us if totalCacheSize_ >0 and lru is empty // someone can make bad key or val with wrong size - if (lru_.empty()) { + // TODO: Probably we should remove this logic, since there is no access to sizes outside of the lrucache + if (rx_unlikely(lru_.empty())) { clearAll(); logPrintf(LogError, "IdSetCache::eraseLRU () Cache restarted because wrong cache size totalCacheSize_=%d", totalCacheSize_); return false; } auto mIt = items_.find(**it); - assertrx(mIt != items_.end()); + assertrx_throw(mIt != items_.end()); - size_t oldSize = sizeof(Entry) + kElemSizeOverhead + mIt->first.Size() + mIt->second.val.Size(); + const size_t oldSize = sizeof(Entry) + kElemSizeOverhead + mIt->first.Size() + mIt->second.val.Size(); - if (oldSize > totalCacheSize_) { + if (rx_unlikely(oldSize > totalCacheSize_)) { clearAll(); logPrintf(LogError, "IdSetCache::eraseLRU () Cache restarted because wrong cache size totalCacheSize_=%d,oldSize=%d", totalCacheSize_, oldSize); return false; } - totalCacheSize_ = totalCacheSize_ - oldSize; items_.erase(mIt); it = lru_.erase(it); @@ -97,15 +98,10 @@ bool LRUCache::eraseLRU() { return !lru_.empty(); } -template -bool LRUCache::Clear() { - std::lock_guard lk(lock_); - return clearAll(); -} template bool LRUCache::clearAll() { - bool res = !items_.empty(); + const bool res = !items_.empty(); totalCacheSize_ = 0; std::unordered_map().swap(items_); LRUList().swap(lru_); @@ -117,8 +113,9 @@ bool LRUCache::clearAll() { template LRUCacheMemStat LRUCache::GetMemStat() { - std::lock_guard lk(lock_); LRUCacheMemStat ret; + + std::lock_guard lk(lock_); ret.totalSize = totalCacheSize_; ret.itemsCount = items_.size(); // for (auto &item : items_) { @@ -133,6 +130,5 @@ template class LRUCache; template class LRUCache; template class LRUCache; -template class LRUCache; } // namespace reindexer diff --git a/cpp_src/core/lrucache.h b/cpp_src/core/lrucache.h index 902a88554..8b07b7e1e 100644 --- a/cpp_src/core/lrucache.h +++ b/cpp_src/core/lrucache.h @@ -16,14 +16,14 @@ template class LRUCache { public: using Key = K; - LRUCache(size_t sizeLimit = kDefaultCacheSizeLimit, int hitCount = kDefaultHitCountToCache) + LRUCache(size_t sizeLimit = kDefaultCacheSizeLimit, int hitCount = kDefaultHitCountToCache) noexcept : totalCacheSize_(0), cacheSizeLimit_(sizeLimit), hitCountToCache_(hitCount) {} struct Iterator { Iterator(bool k = false, const V &v = V()) : valid(k), val(v) {} Iterator(const Iterator &other) = delete; Iterator &operator=(const Iterator &other) = delete; - Iterator(Iterator &&other) : valid(other.valid), val(std::move(other.val)) { other.valid = false; } - Iterator &operator=(Iterator &&other) { + Iterator(Iterator &&other) noexcept : valid(other.valid), val(std::move(other.val)) { other.valid = false; } + Iterator &operator=(Iterator &&other) noexcept { if (this != &other) { valid = other.valid; val = std::move(other.val); @@ -41,7 +41,10 @@ class LRUCache { LRUCacheMemStat GetMemStat(); - bool Clear(); + bool Clear() { + std::lock_guard lk(lock_); + return clearAll(); + } template void Dump(T &os, std::string_view step, std::string_view offset) const { @@ -75,7 +78,7 @@ class LRUCache { template void Clear(const F &cond) { - std::lock_guard lock{lock_}; + std::lock_guard lock(lock_); for (auto it = lru_.begin(); it != lru_.end();) { if (!cond(**it)) { ++it; @@ -84,7 +87,7 @@ class LRUCache { auto mIt = items_.find(**it); assertrx(mIt != items_.end()); const size_t oldSize = sizeof(Entry) + kElemSizeOverhead + mIt->first.Size() + mIt->second.val.Size(); - if (oldSize > totalCacheSize_) { + if (rx_unlikely(oldSize > totalCacheSize_)) { clearAll(); return; } @@ -96,12 +99,7 @@ class LRUCache { } protected: - bool eraseLRU(); - - bool clearAll(); - typedef std::list LRUList; - struct Entry { V val; typename LRUList::iterator lruPos; @@ -112,14 +110,17 @@ class LRUCache { } }; + bool eraseLRU(); + bool clearAll(); + std::unordered_map items_; LRUList lru_; mutable std::mutex lock_; size_t totalCacheSize_; - size_t cacheSizeLimit_; + const size_t cacheSizeLimit_; int hitCountToCache_; - int getCount_ = 0, putCount_ = 0, eraseCount_ = 0; + uint64_t getCount_ = 0, putCount_ = 0, eraseCount_ = 0; }; } // namespace reindexer diff --git a/cpp_src/core/namespace/asyncstorage.h b/cpp_src/core/namespace/asyncstorage.h index 50e0df6a4..aefb8948e 100644 --- a/cpp_src/core/namespace/asyncstorage.h +++ b/cpp_src/core/namespace/asyncstorage.h @@ -4,6 +4,7 @@ #include #include "core/storage/idatastorage.h" #include "estl/h_vector.h" +#include "estl/mutex.h" #include "tools/assertrx.h" #include "tools/flagguard.h" @@ -39,6 +40,7 @@ class AsyncStorage { using AdviceGuardT = CounterGuardAIRL32; using ClockT = std::chrono::system_clock; using TimepointT = ClockT::time_point; + using Mutex = MarkedMutex; struct Status { bool isEnabled = false; @@ -47,8 +49,7 @@ class AsyncStorage { class Cursor { public: - Cursor(std::unique_lock&& lck, std::unique_ptr&& c) noexcept - : lck_(std::move(lck)), c_(std::move(c)) { + Cursor(std::unique_lock&& lck, std::unique_ptr&& c) noexcept : lck_(std::move(lck)), c_(std::move(c)) { assertrx(lck_.owns_lock()); assertrx(c_); } @@ -58,24 +59,20 @@ class AsyncStorage { // NOTE: Cursor owns unique storage lock. I.e. nobody is able to read stroage or write into it, while cursor exists. // Currently the only place, where it matter is EnumMeta method. However, we should to consider switching to shared_mutex, if // the number of such concurrent Cursors will grow. - std::unique_lock lck_; + std::unique_lock lck_; std::unique_ptr c_; }; class FullLockT { public: - FullLockT(std::mutex& flushMtx, std::mutex& updatesMtx) : flushLck_(flushMtx), storageLck_(updatesMtx) {} - ~FullLockT() { - // Specify unlock order - storageLck_.unlock(); - flushLck_.unlock(); - } - bool OwnsThisFlushMutex(std::mutex& mtx) const noexcept { return flushLck_.owns_lock() && flushLck_.mutex() == &mtx; } - bool OwnsThisStorageMutex(std::mutex& mtx) const noexcept { return storageLck_.owns_lock() && storageLck_.mutex() == &mtx; } + using MutexType = Mutex; + FullLockT(Mutex& flushMtx, Mutex& updatesMtx) : flushLck_(flushMtx), storageLck_(updatesMtx) {} + bool OwnsThisFlushMutex(Mutex& mtx) const noexcept { return flushLck_.owns_lock() && flushLck_.mutex() == &mtx; } + bool OwnsThisStorageMutex(Mutex& mtx) const noexcept { return storageLck_.owns_lock() && storageLck_.mutex() == &mtx; } private: - std::unique_lock flushLck_; - std::unique_lock storageLck_; + std::unique_lock flushLck_; + std::unique_lock storageLck_; }; AsyncStorage() = default; @@ -267,8 +264,8 @@ class AsyncStorage { // storageMtx_ locks shared_ptr storage_; std::string path_; - mutable std::mutex storageMtx_; - mutable std::mutex flushMtx_; + mutable Mutex storageMtx_; + mutable Mutex flushMtx_; bool isCopiedNsStorage_ = false; h_vector recycled_; std::atomic batchingAdvices_ = {0}; diff --git a/cpp_src/core/namespace/namespace.cc b/cpp_src/core/namespace/namespace.cc index 4e6bbb35e..cfb3d53f1 100644 --- a/cpp_src/core/namespace/namespace.cc +++ b/cpp_src/core/namespace/namespace.cc @@ -16,13 +16,15 @@ void Namespace::CommitTransaction(Transaction& tx, QueryResults& result, const R txStatsCounter_.Count(tx); } bool wasCopied = false; // NOLINT(*deadcode.DeadStores) - QueryStatCalculator statCalculator( - long_actions::Logger{tx, longTxLoggingParams_.load(std::memory_order_relaxed), wasCopied}); + auto params = longTxLoggingParams_.load(std::memory_order_relaxed); + QueryStatCalculator statCalculator(long_actions::Logger{tx, params, wasCopied}, params.thresholdUs >= 0); PerfStatCalculatorMT txCommitCalc(commitStatsCounter_, enablePerfCounters); if (needNamespaceCopy(nsl, tx)) { PerfStatCalculatorMT calc(nsl->updatePerfCounter_, enablePerfCounters); - contexted_unique_lock lck(clonerMtx_, &ctx); + + auto lck = statCalculator.CreateLock(clonerMtx_, &ctx); + nsl = ns_; if (needNamespaceCopy(nsl, tx)) { PerfStatCalculatorMT nsCopyCalc(copyStatsCounter_, enablePerfCounters); @@ -32,12 +34,13 @@ void Namespace::CommitTransaction(Transaction& tx, QueryResults& result, const R hasCopy_.store(true, std::memory_order_release); CounterGuardAIR32 cg(nsl->cancelCommitCnt_); try { - auto rlck = nsl->rLock(ctx); - auto storageLock = nsl->storage_.FullLock(); + auto rlck = statCalculator.CreateLock(*nsl, &NamespaceImpl::rLock, ctx); + auto storageLock = statCalculator.CreateLock(nsl->storage_, &AsyncStorage::FullLock); + cg.Reset(); nsCopy_.reset(new NamespaceImpl(*nsl, storageLock)); nsCopyCalc.HitManualy(); - nsCopy_->CommitTransaction(tx, result, NsContext(ctx).NoLock()); + nsCopy_->CommitTransaction(tx, result, NsContext(ctx).NoLock(), statCalculator); if (nsCopy_->lastUpdateTime_) { nsCopy_->lastUpdateTime_ -= nsCopy_->config_.optimizationTimeout * 2; nsCopy_->optimizeIndexes(NsContext(ctx).NoLock()); @@ -64,11 +67,11 @@ void Namespace::CommitTransaction(Transaction& tx, QueryResults& result, const R } nsl = ns_; lck.unlock(); - nsl->storage_.TryForceFlush(); + statCalculator.LogFlushDuration(nsl->storage_, &AsyncStorage::TryForceFlush); return; } } - handleInvalidation(NamespaceImpl::CommitTransaction)(tx, result, NsContext(ctx)); + handleInvalidation(NamespaceImpl::CommitTransaction)(tx, result, NsContext(ctx), statCalculator); } NamespacePerfStat Namespace::GetPerfStat(const RdxContext& ctx) { diff --git a/cpp_src/core/namespace/namespace.h b/cpp_src/core/namespace/namespace.h index 56cb37c2b..8430b1e70 100644 --- a/cpp_src/core/namespace/namespace.h +++ b/cpp_src/core/namespace/namespace.h @@ -220,7 +220,7 @@ class Namespace { std::shared_ptr ns_; std::unique_ptr nsCopy_; std::atomic hasCopy_ = {false}; - using Mutex = MarkedMutex; + using Mutex = MarkedMutex; mutable Mutex clonerMtx_; mutable spinlock nsPtrSpinlock_; std::atomic startCopyPolicyTxSize_; @@ -235,4 +235,3 @@ class Namespace { #undef handleInvalidation } // namespace reindexer - diff --git a/cpp_src/core/namespace/namespaceimpl.cc b/cpp_src/core/namespace/namespaceimpl.cc index ec4df2b24..1aa11048f 100644 --- a/cpp_src/core/namespace/namespaceimpl.cc +++ b/cpp_src/core/namespace/namespaceimpl.cc @@ -12,6 +12,7 @@ #include "core/itemmodifier.h" #include "core/nsselecter/nsselecter.h" #include "core/payload/payloadiface.h" +#include "core/querystat.h" #include "core/rdxcontext.h" #include "core/selectfunc/functionexecutor.h" #include "itemsloader.h" @@ -974,9 +975,9 @@ void NamespaceImpl::addIndex(const IndexDef& indexDef) { } const int idxNo = payloadType_->NumFields(); - if (idxNo >= maxIndexes) { + if (idxNo >= kMaxIndexes) { throw Error(errConflict, "Cannot add index '%s.%s'. Too many non-composite indexes. %d non-composite indexes are allowed only", - name_, indexName, maxIndexes - 1); + name_, indexName, kMaxIndexes - 1); } const JsonPaths& jsonPaths = indexDef.jsonPaths_; RollBack_addIndex rollbacker{*this}; @@ -1005,7 +1006,7 @@ void NamespaceImpl::addIndex(const IndexDef& indexDef) { rollbacker.SetOldPayloadType(std::move(oldPlType)); tagsMatcher_.UpdatePayloadType(payloadType_); rollbacker.NeedResetPayloadTypeInTagsMatcher(); - newIndex->SetFields(FieldsSet{idxNo}); + newIndex->SetFields(FieldsSet(idxNo)); newIndex->UpdatePayloadType(payloadType_); FieldsSet changedFields{0, idxNo}; @@ -1591,13 +1592,14 @@ Transaction NamespaceImpl::NewTransaction(const RdxContext& ctx) { return Transaction(name_, payloadType_, tagsMatcher_, pkFields(), schema_); } -void NamespaceImpl::CommitTransaction(Transaction& tx, QueryResults& result, NsContext ctx) { +void NamespaceImpl::CommitTransaction(Transaction& tx, QueryResults& result, NsContext ctx, + QueryStatCalculator& queryStatCalculator) { logPrintf(LogTrace, "[repl:%s]:%d CommitTransaction start", name_, serverId_); Locker::WLockT wlck; if (!ctx.noLock) { PerfStatCalculatorMT calc(updatePerfCounter_, enablePerfCounters_); CounterGuardAIR32 cg(cancelCommitCnt_); - wlck = wLock(ctx.rdxContext); + wlck = queryStatCalculator.CreateLock(*this, &NamespaceImpl::wLock, ctx.rdxContext); cg.Reset(); calc.LockHit(); } @@ -1640,7 +1642,7 @@ void NamespaceImpl::CommitTransaction(Transaction& tx, QueryResults& result, NsC processWalRecord(commitWrec, ctx.rdxContext); logPrintf(LogTrace, "[repl:%s]:%d CommitTransaction end", name_, serverId_); - tryForceFlush(std::move(wlck)); + queryStatCalculator.LogFlushDuration(*this, &NamespaceImpl::tryForceFlush, std::move(wlck)); } void NamespaceImpl::doUpsert(ItemImpl* ritem, IdType id, bool doUpdate) { @@ -2091,7 +2093,7 @@ NamespaceMemStat NamespaceImpl::GetMemStat(const RdxContext& ctx) { if (storageStatus.isEnabled) { if (storageStatus.err.ok()) { ret.storageStatus = "OK"sv; - } else if (checkIfEndsWith("No space left on device"sv, storageStatus.err.what(), true)) { + } else if (checkIfEndsWith("No space left on device"sv, storageStatus.err.what())) { ret.storageStatus = "NO SPACE LEFT"sv; } else { ret.storageStatus = storageStatus.err.what(); diff --git a/cpp_src/core/namespace/namespaceimpl.h b/cpp_src/core/namespace/namespaceimpl.h index 9bcacae9e..fbb0ef6ca 100644 --- a/cpp_src/core/namespace/namespaceimpl.h +++ b/cpp_src/core/namespace/namespaceimpl.h @@ -53,6 +53,14 @@ class ItemComparator; class SortExpression; class ProtobufSchema; class QueryResults; + +namespace long_actions { +template +struct Logger; +} +template class> +class QueryStatCalculator; + namespace SortExprFuncs { struct DistanceBetweenJoinedIndexesSameNs; } // namespace SortExprFuncs @@ -86,12 +94,15 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f class RollBack_updateItems; class IndexesCacheCleaner { public: - explicit IndexesCacheCleaner(NamespaceImpl &ns) : ns_{ns} {} + explicit IndexesCacheCleaner(NamespaceImpl &ns) noexcept : ns_{ns} {} IndexesCacheCleaner(const IndexesCacheCleaner &) = delete; IndexesCacheCleaner(IndexesCacheCleaner &&) = delete; IndexesCacheCleaner &operator=(const IndexesCacheCleaner &) = delete; IndexesCacheCleaner &operator=(IndexesCacheCleaner &&) = delete; void Add(SortType s) { + if (rx_unlikely(s >= sorts_.size())) { + throw Error(errLogic, "Index sort type overflow: %d. Limit is %d", s, sorts_.size() - 1); + } if (s > 0) { sorts_.set(s); } @@ -100,7 +111,7 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f private: NamespaceImpl &ns_; - std::bitset<64> sorts_; + std::bitset sorts_; }; friend class NsSelecter; @@ -229,7 +240,8 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f void CloseStorage(const RdxContext &); Transaction NewTransaction(const RdxContext &ctx); - void CommitTransaction(Transaction &tx, QueryResults &result, NsContext ctx); + void CommitTransaction(Transaction &tx, QueryResults &result, NsContext ctx, + QueryStatCalculator &queryStatCalculator); Item NewItem(const NsContext &ctx); void ToPool(ItemImpl *item); diff --git a/cpp_src/core/nsselecter/btreeindexiterator.h b/cpp_src/core/nsselecter/btreeindexiterator.h index b859f4d55..a805955a8 100644 --- a/cpp_src/core/nsselecter/btreeindexiterator.h +++ b/cpp_src/core/nsselecter/btreeindexiterator.h @@ -13,7 +13,7 @@ class BtreeIndexIterator final : public IndexIterator { explicit BtreeIndexIterator(const T& idxMap) : idxMap_(idxMap), first_(idxMap.begin()), last_(idxMap.end()) {} BtreeIndexIterator(const T& idxMap, const typename T::iterator& first, const typename T::iterator& last) : idxMap_(idxMap), first_(first), last_(last) {} - ~BtreeIndexIterator() override final {} + ~BtreeIndexIterator() override final = default; void Start(bool reverse) final override { if (reverse) { @@ -26,7 +26,7 @@ class BtreeIndexIterator final : public IndexIterator { impl_->shiftIdsetToBegin(); } - bool Next() final override { + bool Next() noexcept final override { assertrx(impl_); if (impl_->isOver()) { return impl_->finishIteration(); @@ -41,12 +41,12 @@ class BtreeIndexIterator final : public IndexIterator { return true; } - void ExcludeLastSet() override { + void ExcludeLastSet() noexcept override { assertrx(impl_); impl_->shiftToNextIdset(); } - IdType Value() const override final { + IdType Value() const noexcept override final { assertrx(impl_); return impl_->getValue(); } @@ -54,7 +54,7 @@ class BtreeIndexIterator final : public IndexIterator { if (cachedIters_ != std::numeric_limits::max()) return cachedIters_; return BtreeIndexForwardIteratorImpl(idxMap_, first_, last_).getMaxIterations(limitIters); } - void SetMaxIterations(size_t iters) final { cachedIters_ = iters; } + void SetMaxIterations(size_t iters) noexcept final { cachedIters_ = iters; } private: std::shared_ptr> impl_; diff --git a/cpp_src/core/nsselecter/btreeindexiteratorimpl.h b/cpp_src/core/nsselecter/btreeindexiteratorimpl.h index c303fb66e..e2ce9c82b 100644 --- a/cpp_src/core/nsselecter/btreeindexiteratorimpl.h +++ b/cpp_src/core/nsselecter/btreeindexiteratorimpl.h @@ -15,14 +15,14 @@ class BtreeIndexIteratorImpl { public: enum class IdsetType { Plain = 0, Btree }; - explicit BtreeIndexIteratorImpl(const T& idxMap) : idxMap_(idxMap){}; - virtual ~BtreeIndexIteratorImpl(){}; + explicit BtreeIndexIteratorImpl(const T& idxMap) : idxMap_(idxMap) {} + virtual ~BtreeIndexIteratorImpl() = default; - virtual bool isOver() const = 0; - virtual void shiftToBegin() = 0; - virtual void next() = 0; + virtual bool isOver() const noexcept = 0; + virtual void shiftToBegin() noexcept = 0; + virtual void next() noexcept = 0; - bool shiftToNextIdset() { + bool shiftToNextIdset() noexcept { if (isOver()) return false; for (next(); !isOver() && getCurrentIdsetSize() == 0;) { next(); @@ -54,7 +54,7 @@ class BtreeIndexIteratorImpl { break; } } - bool isIdsetOver() const { + bool isIdsetOver() const noexcept { switch (currentIdsetType_) { case IdsetType::Btree: return isBtreeIdsetOver(); @@ -64,7 +64,7 @@ class BtreeIndexIteratorImpl { std::abort(); } } - void updateCurrentValue() { + void updateCurrentValue() noexcept { switch (currentIdsetType_) { case IdsetType::Btree: currVal_ = getBtreeIdsetCurrentValue(); @@ -76,13 +76,13 @@ class BtreeIndexIteratorImpl { std::abort(); } } - bool finishIteration() { + bool finishIteration() noexcept { currVal_ = INT_MAX; return false; } template - void detectCurrentIdsetType(const TIdSet& idset) { + void detectCurrentIdsetType(const TIdSet& idset) noexcept { if (std::is_same() && !idset.IsCommited()) { currentIdsetType_ = IdsetType::Btree; } else { @@ -90,8 +90,8 @@ class BtreeIndexIteratorImpl { } } - size_t getSize() const { return idxMap_.size(); } - size_t getCurrentIdsetSize() const { + size_t getSize() const noexcept { return idxMap_.size(); } + size_t getCurrentIdsetSize() const noexcept { switch (currentIdsetType_) { case IdsetType::Btree: return getBtreeIdsetSize(); @@ -138,9 +138,9 @@ class BtreeIndexForwardIteratorImpl : public BtreeIndexIteratorImpl { this->idxMapItEnd_ = last; this->idxMapIt_ = this->idxMapItBegin_; } - ~BtreeIndexForwardIteratorImpl() override {} + ~BtreeIndexForwardIteratorImpl() override = default; - void shiftToBegin() override { + void shiftToBegin() noexcept override { this->idxMapIt_ = this->idxMapItBegin_; if (this->getSize() > 0) { this->detectCurrentIdsetType(this->idxMapIt_->second.Unsorted()); @@ -148,33 +148,39 @@ class BtreeIndexForwardIteratorImpl : public BtreeIndexIteratorImpl { } } - void next() override { + void next() noexcept override { ++this->idxMapIt_; if (!isOver()) { this->detectCurrentIdsetType(this->idxMapIt_->second.Unsorted()); } } - void shiftPlainIdsetToNext() override { + void shiftPlainIdsetToNext() noexcept override { const auto& idset = this->idxMapIt_->second.Unsorted(); for (; it_ != idset.end() && *it_ <= this->currVal_; ++it_) { } } - void shiftBtreeIdsetToNext() override { + void shiftBtreeIdsetToNext() noexcept override { const IdSet& sortedIdset = static_cast(this->idxMapIt_->second.Unsorted()); for (; itset_ != sortedIdset.set_->end() && *itset_ <= this->currVal_; ++itset_) { } } - bool isOver() const override { return this->idxMapIt_ == this->idxMapItEnd_; } - void shiftPlainIdsetToBegin() override { it_ = this->idxMapIt_->second.Unsorted().begin(); } - void shiftBtreeIdsetToBegin() override { itset_ = static_cast(this->idxMapIt_->second.Unsorted()).set_->begin(); } - bool isPlainIdsetOver() const override { return it_ == this->idxMapIt_->second.Unsorted().end(); } - bool isBtreeIdsetOver() const override { return itset_ == static_cast(this->idxMapIt_->second.Unsorted()).set_->end(); } - IdType getPlainIdsetCurrentValue() const override { return *it_; } - IdType getBtreeIdsetCurrentValue() const override { return *itset_; } - size_t getPlainIdsetSize() const override { return this->idxMapIt_->second.Unsorted().size(); } - size_t getBtreeIdsetSize() const override { return static_cast(this->idxMapIt_->second.Unsorted()).set_->size(); } + bool isOver() const noexcept override { return this->idxMapIt_ == this->idxMapItEnd_; } + void shiftPlainIdsetToBegin() noexcept override { it_ = this->idxMapIt_->second.Unsorted().begin(); } + void shiftBtreeIdsetToBegin() noexcept override { + itset_ = static_cast(this->idxMapIt_->second.Unsorted()).set_->begin(); + } + bool isPlainIdsetOver() const noexcept override { return it_ == this->idxMapIt_->second.Unsorted().end(); } + bool isBtreeIdsetOver() const noexcept override { + return itset_ == static_cast(this->idxMapIt_->second.Unsorted()).set_->end(); + } + IdType getPlainIdsetCurrentValue() const noexcept override { return *it_; } + IdType getBtreeIdsetCurrentValue() const noexcept override { return *itset_; } + size_t getPlainIdsetSize() const noexcept override { return this->idxMapIt_->second.Unsorted().size(); } + size_t getBtreeIdsetSize() const noexcept override { + return static_cast(this->idxMapIt_->second.Unsorted()).set_->size(); + } size_t getMaxIterations(size_t limitIters) noexcept { size_t cnt = 0; for (auto it = idxMapItBegin_; cnt < limitIters && it != idxMapItEnd_; ++it) { @@ -219,9 +225,9 @@ class BtreeIndexReverseIteratorImpl : public BtreeIndexIteratorImpl { idxMapRit_ = idxMapRitBegin_; } - ~BtreeIndexReverseIteratorImpl() override {} + ~BtreeIndexReverseIteratorImpl() override = default; - void shiftToBegin() override { + void shiftToBegin() noexcept override { this->idxMapRit_ = this->idxMapRitBegin_; if (this->getSize() > 0) { this->detectCurrentIdsetType(this->idxMapRit_->second.Unsorted()); @@ -229,36 +235,40 @@ class BtreeIndexReverseIteratorImpl : public BtreeIndexIteratorImpl { } } - void shiftPlainIdsetToNext() override { + void shiftPlainIdsetToNext() noexcept override { const auto& idset = this->idxMapRit_->second.Unsorted(); for (; rit_ != idset.rend() && *rit_ >= this->currVal_; ++rit_) { } } - void shiftBtreeIdsetToNext() override { + void shiftBtreeIdsetToNext() noexcept override { const IdSet& sortedIdset = static_cast(this->idxMapRit_->second.Unsorted()); for (; ritset_ != sortedIdset.set_->rend() && *ritset_ >= this->currVal_; ++ritset_) { } } - void next() override { + void next() noexcept override { ++this->idxMapRit_; if (!isOver()) { this->detectCurrentIdsetType(this->idxMapRit_->second.Unsorted()); } } - bool isOver() const override { return idxMapRit_ == idxMapRitEnd_; } - void shiftPlainIdsetToBegin() override { rit_ = this->idxMapRit_->second.Unsorted().rbegin(); } - void shiftBtreeIdsetToBegin() override { ritset_ = static_cast(this->idxMapRit_->second.Unsorted()).set_->rbegin(); } - bool isPlainIdsetOver() const override { return rit_ == this->idxMapRit_->second.Unsorted().rend(); } - bool isBtreeIdsetOver() const override { + bool isOver() const noexcept override { return idxMapRit_ == idxMapRitEnd_; } + void shiftPlainIdsetToBegin() noexcept override { rit_ = this->idxMapRit_->second.Unsorted().rbegin(); } + void shiftBtreeIdsetToBegin() noexcept override { + ritset_ = static_cast(this->idxMapRit_->second.Unsorted()).set_->rbegin(); + } + bool isPlainIdsetOver() const noexcept override { return rit_ == this->idxMapRit_->second.Unsorted().rend(); } + bool isBtreeIdsetOver() const noexcept override { return ritset_ == static_cast(this->idxMapRit_->second.Unsorted()).set_->rend(); } - IdType getPlainIdsetCurrentValue() const override { return *rit_; } - IdType getBtreeIdsetCurrentValue() const override { return *ritset_; } - size_t getPlainIdsetSize() const override { return this->idxMapRit_->second.Unsorted().size(); } - size_t getBtreeIdsetSize() const override { return static_cast(this->idxMapRit_->second.Unsorted()).set_->size(); } + IdType getPlainIdsetCurrentValue() const noexcept override { return *rit_; } + IdType getBtreeIdsetCurrentValue() const noexcept override { return *ritset_; } + size_t getPlainIdsetSize() const noexcept override { return this->idxMapRit_->second.Unsorted().size(); } + size_t getBtreeIdsetSize() const noexcept override { + return static_cast(this->idxMapRit_->second.Unsorted()).set_->size(); + } private: union { diff --git a/cpp_src/core/nsselecter/explaincalc.cc b/cpp_src/core/nsselecter/explaincalc.cc index 71b74fdf4..86cd6c7d0 100644 --- a/cpp_src/core/nsselecter/explaincalc.cc +++ b/cpp_src/core/nsselecter/explaincalc.cc @@ -46,7 +46,7 @@ void ExplainCalc::LogDump(int logLevel) { } } -static const char *joinTypeName(JoinType type) { +constexpr inline const char *joinTypeName(JoinType type) noexcept { switch (type) { case JoinType::InnerJoin: return "inner_join "; @@ -61,7 +61,7 @@ static const char *joinTypeName(JoinType type) { } } -static const char *opName(OpType op, bool first = true) { +constexpr inline const char *opName(OpType op, bool first = true) { switch (op) { case OpAnd: return first ? "" : "and "; @@ -70,7 +70,7 @@ static const char *opName(OpType op, bool first = true) { case OpNot: return "not "; default: - abort(); + throw Error(errLogic, "Unexpected op type: %d", int(op)); } } @@ -207,50 +207,50 @@ std::string SelectIteratorContainer::explainJSON(const_iterator begin, const_ite return name.str(); } -ExplainCalc::Duration ExplainCalc::lap() { +ExplainCalc::Duration ExplainCalc::lap() noexcept { auto now = Clock::now(); Duration d = now - last_point_; last_point_ = now; return d; } -int ExplainCalc::To_us(const ExplainCalc::Duration &d) { return duration_cast(d).count(); } +int ExplainCalc::To_us(const ExplainCalc::Duration &d) noexcept { return duration_cast(d).count(); } -void reindexer::ExplainCalc::StartTiming() { +void reindexer::ExplainCalc::StartTiming() noexcept { if (enabled_) lap(); } -void reindexer::ExplainCalc::StopTiming() { +void reindexer::ExplainCalc::StopTiming() noexcept { if (enabled_) total_ = prepare_ + select_ + postprocess_ + loop_; } -void reindexer::ExplainCalc::AddPrepareTime() { +void reindexer::ExplainCalc::AddPrepareTime() noexcept { if (enabled_) prepare_ += lap(); } -void reindexer::ExplainCalc::AddSelectTime() { +void reindexer::ExplainCalc::AddSelectTime() noexcept { if (enabled_) select_ += lap(); } -void reindexer::ExplainCalc::AddPostprocessTime() { +void reindexer::ExplainCalc::AddPostprocessTime() noexcept { if (enabled_) postprocess_ += lap(); } -void reindexer::ExplainCalc::AddLoopTime() { +void reindexer::ExplainCalc::AddLoopTime() noexcept { if (enabled_) loop_ += lap(); } -void reindexer::ExplainCalc::StartSort() { +void reindexer::ExplainCalc::StartSort() noexcept { if (enabled_) sort_start_point_ = Clock::now(); } -void reindexer::ExplainCalc::StopSort() { +void reindexer::ExplainCalc::StopSort() noexcept { if (enabled_) sort_ = Clock::now() - sort_start_point_; } -void reindexer::ExplainCalc::AddIterations(int iters) { iters_ += iters; } -void reindexer::ExplainCalc::PutSortIndex(std::string_view index) { sortIndex_ = index; } -void ExplainCalc::PutSelectors(SelectIteratorContainer *qres) { selectors_ = qres; } -void ExplainCalc::PutJoinedSelectors(JoinedSelectors *jselectors) { jselectors_ = jselectors; } +void reindexer::ExplainCalc::AddIterations(int iters) noexcept { iters_ += iters; } +void reindexer::ExplainCalc::PutSortIndex(std::string_view index) noexcept { sortIndex_ = index; } +void ExplainCalc::PutSelectors(SelectIteratorContainer *qres) noexcept { selectors_ = qres; } +void ExplainCalc::PutJoinedSelectors(JoinedSelectors *jselectors) noexcept { jselectors_ = jselectors; } } // namespace reindexer diff --git a/cpp_src/core/nsselecter/explaincalc.h b/cpp_src/core/nsselecter/explaincalc.h index dd49b1efb..34cccd5f9 100644 --- a/cpp_src/core/nsselecter/explaincalc.h +++ b/cpp_src/core/nsselecter/explaincalc.h @@ -22,34 +22,42 @@ class ExplainCalc { typedef Clock::time_point time_point; public: + ExplainCalc() = default; ExplainCalc(bool enable) noexcept : enabled_(enable) {} - void StartTiming(); - void StopTiming(); + void StartTiming() noexcept; + void StopTiming() noexcept; - void AddPrepareTime(); - void AddSelectTime(); - void AddPostprocessTime(); - void AddLoopTime(); - void AddIterations(int iters); - void StartSort(); - void StopSort(); + void AddPrepareTime() noexcept; + void AddSelectTime() noexcept; + void AddPostprocessTime() noexcept; + void AddLoopTime() noexcept; + void AddIterations(int iters) noexcept; + void StartSort() noexcept; + void StopSort() noexcept; void PutCount(int cnt) noexcept { count_ = cnt; } - void PutSortIndex(std::string_view index); - void PutSelectors(SelectIteratorContainer *qres); - void PutJoinedSelectors(JoinedSelectors *jselectors); + void PutSortIndex(std::string_view index) noexcept; + void PutSelectors(SelectIteratorContainer *qres) noexcept; + void PutJoinedSelectors(JoinedSelectors *jselectors) noexcept; void SetSortOptimization(bool enable) noexcept { sortOptimization_ = enable; } void LogDump(int logLevel); std::string GetJSON(); + Duration Total() const noexcept { return total_; } + Duration Prepare() const noexcept { return prepare_; } + Duration Indexes() const noexcept { return select_; } + Duration Postprocess() const noexcept { return postprocess_; } + Duration Loop() const noexcept { return loop_; } + Duration Sort() const noexcept { return sort_; } + size_t Iterations() const noexcept { return iters_; } - static int To_us(const Duration &d); + static int To_us(const Duration &d) noexcept; bool IsEnabled() const noexcept { return enabled_; } private: - Duration lap(); + Duration lap() noexcept; static const char *JoinTypeName(JoinType jtype); time_point last_point_, sort_start_point_; @@ -65,7 +73,7 @@ class ExplainCalc { int iters_ = 0; int count_ = 0; bool sortOptimization_ = false; - const bool enabled_; + bool enabled_ = false; }; } // namespace reindexer diff --git a/cpp_src/core/nsselecter/nsselecter.cc b/cpp_src/core/nsselecter/nsselecter.cc index 33be61412..bdb1cf571 100644 --- a/cpp_src/core/nsselecter/nsselecter.cc +++ b/cpp_src/core/nsselecter/nsselecter.cc @@ -3,7 +3,6 @@ #include "core/queryresults/joinresults.h" #include "crashqueryreporter.h" #include "estl/multihash_map.h" -#include "explaincalc.h" #include "itemcomparator.h" #include "qresexplainholder.h" #include "querypreprocessor.h" @@ -27,7 +26,8 @@ void NsSelecter::operator()(QueryResults &result, SelectCtx &ctx, const RdxConte const_cast(&ctx.query)->debugLevel = ns_->config_.logLevel; } - ExplainCalc explain(ctx.query.explain_ || ctx.query.debugLevel >= LogInfo); + auto &explain = ctx.explain; + explain = ExplainCalc(ctx.query.explain_ || ctx.query.debugLevel >= LogInfo); ActiveQueryScope queryScope(ctx, ns_->optimizationState_, explain, ns_->locker_.IsReadOnly(), ns_->strHolder_.get()); explain.StartTiming(); @@ -248,7 +248,7 @@ void NsSelecter::operator()(QueryResults &result, SelectCtx &ctx, const RdxConte if (ctx.sortingContext.isOptimizationEnabled()) { auto it = ns_->indexes_[ctx.sortingContext.uncommitedIndex]->CreateIterator(); it->SetMaxIterations(ns_->items_.size()); - scan.emplace_back(it); + scan.emplace_back(std::move(it)); maxIterations = ns_->items_.size(); } else { // special case - no idset in query @@ -586,60 +586,107 @@ struct RelaxedHasher { }; class ForcedSortMap { - using MultiMap = MultiHashMap; - struct SingleTypeMap { +public: + using mapped_type = size_t; + +private: + using MultiMap = MultiHashMap; + struct SingleTypeMap : tsl::hopscotch_sc_map { KeyValueType type_; - fast_hash_map map_; }; using DataType = std::variant; + class Iterator : private std::variant { + using Base = std::variant; + + public: + using Base::Base; + const auto *operator->() const { + return std::visit(overloaded{[](MultiMap::Iterator it) { return it.operator->(); }, + [](SingleTypeMap::const_iterator it) { return it.operator->(); }}, + static_cast(*this)); + } + const auto &operator*() const { + return std::visit(overloaded{[](MultiMap::Iterator it) -> const auto &{ return *it; + } + , [](SingleTypeMap::const_iterator it) -> const auto & { return *it; }}, + static_cast(*this)); +} +}; // namespace reindexer public: - ForcedSortMap(Variant k, size_t v, size_t size) - : data_{k.Type().Is() || k.Type().Is() || k.Type().IsNumeric() - ? DataType{MultiMap{size}} - : DataType{SingleTypeMap{k.Type(), {}}}} { - std::visit(overloaded{[&](MultiMap &m) { m.insert(std::move(k), v); }, [&](SingleTypeMap &m) { m.map_.emplace(std::move(k), v); }}, - data_); - } - bool insert(Variant k, size_t v) { - return std::visit(overloaded{[&](MultiMap &m) { return m.insert(std::move(k), v); }, - [&](SingleTypeMap &m) { - if (!m.type_.IsSame(k.Type())) { - throw Error{errQueryExec, "Items of different types in forced sort list"}; - } - return m.map_.emplace(std::move(k), v).second; - }}, - data_); - } - bool contain(const Variant &k) const { - return std::visit(overloaded{[&k](const MultiMap &m) { return m.find(k) != m.cend(); }, - [&k](const SingleTypeMap &m) { - if (!m.type_.IsSame(k.Type())) { - throw Error{errQueryExec, "Items of different types in forced sort list"}; - } - return m.map_.find(k) != m.map_.end(); - }}, - data_); - } - size_t get(const Variant &k) const { - return std::visit(overloaded{[&k](const MultiMap &m) { - const auto it = m.find(k); - assertrx_throw(it != m.cend()); - return it->second; - }, - [&k](const SingleTypeMap &m) { - if (!m.type_.IsSame(k.Type())) { - throw Error{errQueryExec, "Items of different types in forced sort list"}; - } - const auto it = m.map_.find(k); - assertrx_throw(it != m.map_.end()); - return it->second; - }}, - data_); +ForcedSortMap(Variant k, mapped_type v, size_t size) + : data_{k.Type().Is() || k.Type().Is() || k.Type().IsNumeric() + ? DataType{MultiMap{size}} + : DataType{SingleTypeMap{{}, k.Type()}}} { + std::visit(overloaded{[&](MultiMap &m) { m.insert(std::move(k), v); }, [&](SingleTypeMap &m) { m.emplace(std::move(k), v); }}, data_); +} +std::pair emplace(Variant k, mapped_type v) & { + return std::visit(overloaded{[&](MultiMap &m) { + const auto [iter, success] = m.insert(std::move(k), v); + return std::make_pair(Iterator{iter}, success); + }, + [&](SingleTypeMap &m) { + if (!m.type_.IsSame(k.Type())) { + throw Error{errQueryExec, "Items of different types in forced sort list"}; + } + const auto [iter, success] = m.emplace(std::move(k), v); + return std::make_pair(Iterator{iter}, success); + }}, + data_); +} +bool contain(const Variant &k) const { + return std::visit(overloaded{[&k](const MultiMap &m) { return m.find(k) != m.cend(); }, + [&k](const SingleTypeMap &m) { + if (!m.type_.IsSame(k.Type())) { + throw Error{errQueryExec, "Items of different types in forced sort list"}; + } + return m.find(k) != m.end(); + }}, + data_); +} +mapped_type get(const Variant &k) const { + return std::visit(overloaded{[&k](const MultiMap &m) { + const auto it = m.find(k); + assertrx_throw(it != m.cend()); + return it->second; + }, + [&k](const SingleTypeMap &m) { + if (!m.type_.IsSame(k.Type())) { + throw Error{errQueryExec, "Items of different types in forced sort list"}; + } + const auto it = m.find(k); + assertrx_throw(it != m.end()); + return it->second; + }}, + data_); +} + +private: +DataType data_; +} +; + +template +class ForcedMapInserter { +public: + ForcedMapInserter(Map &m) noexcept : map_{m} {} + template + void Insert(V &&value) { + if (const auto [iter, success] = map_.emplace(std::forward(value), cost_); success) { + ++cost_; + } else if (iter->second != cost_ - 1) { + static constexpr auto errMsg = "Forced sort value '%s' is dublicated. Deduplicated by the first occurrence."; + if constexpr (std::is_same_v) { + logPrintf(LogInfo, errMsg, value.template As()); + } else { + logPrintf(LogInfo, errMsg, Variant{std::forward(value)}.template As()); + } + } } private: - DataType data_; + Map &map_; + typename Map::mapped_type cost_ = 1; }; template @@ -653,14 +700,9 @@ It NsSelecter::applyForcedSortImpl(NamespaceImpl &ns, It begin, It end, const It if (idx < ns.indexes_.firstCompositePos()) { // implementation for regular indexes fast_hash_map sortMap; - ItemRefVector::difference_type cost = 0; - for (auto value : forcedSortOrder) { - value.convert(fieldType); - if (!sortMap.emplace(std::move(value), cost).second) { - // NOLINTNEXTLINE(bugprone-use-after-move) - throw Error(errQueryExec, "Value '%s' used twice in forced sorting", value.As()); - } - cost++; + ForcedMapInserter inserter{sortMap}; + for (const auto &value : forcedSortOrder) { + inserter.Insert(value.convert(fieldType)); } VariantArray keyRefs; @@ -720,13 +762,10 @@ It NsSelecter::applyForcedSortImpl(NamespaceImpl &ns, It begin, It end, const It const auto &payloadType = ns.payloadType_; const FieldsSet &fields = ns.indexes_[idx]->Fields(); unordered_payload_map sortMap(0, payloadType, fields); - ItemRefVector::difference_type cost = 0; + ForcedMapInserter inserter{sortMap}; for (auto value : forcedSortOrder) { value.convert(fieldType, &payloadType, &fields); - if (!sortMap.insert({static_cast(value), cost}).second) { - throw Error(errQueryExec, "Value '%s' used twice in forced sorting", value.As()); - } - cost++; + inserter.Insert(static_cast(value)); } const auto boundary = std::stable_partition(begin, end, [&](const ItemRef &itemRef) { @@ -770,11 +809,9 @@ It NsSelecter::applyForcedSortImpl(NamespaceImpl &ns, It begin, It end, const It } } else { ForcedSortMap sortMap{forcedSortOrder[0], 0, forcedSortOrder.size()}; + ForcedMapInserter inserter{sortMap}; for (size_t i = 1, s = forcedSortOrder.size(); i < s; ++i) { - const auto &value = forcedSortOrder[i]; - if (!sortMap.insert(value, i)) { - throw Error(errQueryExec, "Value '%s' used twice in forced sorting", value.As()); - } + inserter.Insert(forcedSortOrder[i]); } VariantArray keyRefs; @@ -1406,100 +1443,240 @@ void NsSelecter::prepareSortingContext(SortingEntries &sortBy, SelectCtx &ctx, b ctx.sortingContext.exprResults.resize(ctx.sortingContext.expressions.size()); } -bool NsSelecter::isSortOptimizatonEffective(const QueryEntries &qentries, SelectCtx &ctx, const RdxContext &rdxCtx) { - if (qentries.Size() == 0) { - return true; +enum class CostCountingPolicy : bool { Any, ExceptTargetSortIdxSeq }; + +template +class CostCalculator { +public: + CostCalculator(size_t _totalCost) noexcept : totalCost_(_totalCost) {} + void BeginSequence() noexcept { + isInSequence_ = true; + hasInappositeEntries_ = false; + onlyTargetSortIdxInSequence_ = true; + curCost_ = 0; } - if (qentries.Size() == 1 && qentries.HoldsOrReferTo(0)) { - const auto &qe = qentries.Get(0); - if (qe.idxNo == ctx.sortingContext.uncommitedIndex && SelectIteratorContainer::IsExpectingOrderedResults(qe)) { - return true; + void EndSequence() noexcept { + if (isInSequence_ && !hasInappositeEntries_) { + if constexpr (countingPolicy == CostCountingPolicy::Any) { + totalCost_ = std::min(curCost_, totalCost_); + } else if (!onlyTargetSortIdxInSequence_) { + totalCost_ = std::min(curCost_, totalCost_); + } } + isInSequence_ = false; + onlyTargetSortIdxInSequence_ = true; + curCost_ = 0; } - - size_t costNormal = ns_->items_.size() - ns_->free_.size(); - enum { SortIndexNotFound = 0, SortIndexFound, SortIndexHasUnorderedConditions } sortIndexSearchState = SortIndexNotFound; - - qentries.ExecuteAppropriateForEach( - Skip{}, - [this, &ctx, &rdxCtx, &costNormal, &sortIndexSearchState](const QueryEntry &qe) { - if (qe.idxNo < 0) return; - if (qe.idxNo == ctx.sortingContext.uncommitedIndex) { - if (sortIndexSearchState == SortIndexNotFound && !SelectIteratorContainer::IsExpectingOrderedResults(qe)) { - sortIndexSearchState = SortIndexHasUnorderedConditions; + bool IsInOrSequence() const noexcept { return isInSequence_; } + void Add(const SelectKeyResults &results, bool isTargetSortIndex) noexcept { + if constexpr (countingPolicy == CostCountingPolicy::ExceptTargetSortIdxSeq) { + if (!isInSequence_ && isTargetSortIndex) { + return; + } + } + onlyTargetSortIdxInSequence_ = onlyTargetSortIdxInSequence_ && isTargetSortIndex; + Add(results); + } + void Add(const SelectKeyResults &results) noexcept { + for (const SelectKeyResult &res : results) { + if (res.comparators_.empty()) { + if (isInSequence_) { + curCost_ += res.GetMaxIterations(totalCost_); } else { - sortIndexSearchState = SortIndexFound; + totalCost_ = std::min(totalCost_, res.GetMaxIterations(totalCost_)); } - return; + } else { + hasInappositeEntries_ = true; + break; } - if (costNormal == 0) return; - - auto &index = ns_->indexes_[qe.idxNo]; - if (IsFullText(index->Type())) return; - - Index::SelectOpts opts; - opts.disableIdSetCache = 1; - opts.itemsCountInNamespace = ns_->items_.size() - ns_->free_.size(); - opts.indexesNotOptimized = !ctx.sortingContext.enableSortOrders; - opts.inTransaction = ctx.inTransaction; - - try { - SelectKeyResults reslts = index->SelectKey(qe.values, qe.condition, 0, opts, nullptr, rdxCtx); - for (const SelectKeyResult &res : reslts) { - if (res.comparators_.empty()) { - costNormal = std::min(costNormal, res.GetMaxIterations(costNormal)); - } + } + } + size_t TotalCost() const noexcept { return totalCost_; } + void MarkInapposite() noexcept { hasInappositeEntries_ = true; } + bool OnNewEntry(const QueryEntries &qentries, size_t i, size_t next) { + const OpType op = qentries.GetOperation(i); + switch (op) { + case OpAnd: { + EndSequence(); + if (next != qentries.Size() && qentries.GetOperation(next) == OpOr) { + BeginSequence(); } - } catch (const Error &) { + return true; } - }); + case OpOr: { + if (hasInappositeEntries_) { + return false; + } + if (next != qentries.Size() && qentries.GetOperation(next) == OpOr) { + BeginSequence(); + } + return true; + } + case OpNot: { + if (next != qentries.Size() && qentries.GetOperation(next) == OpOr) { + BeginSequence(); + } + hasInappositeEntries_ = true; + return false; + } + } + throw Error(errLogic, "Unexpected op value: %d", int(op)); + } + +private: + bool isInSequence_ = false; + bool onlyTargetSortIdxInSequence_ = true; + bool hasInappositeEntries_ = false; + size_t curCost_ = 0; + size_t totalCost_ = std::numeric_limits::max(); +}; + +size_t NsSelecter::calculateNormalCost(const QueryEntries &qentries, SelectCtx &ctx, const RdxContext &rdxCtx) { + const size_t totalItemsCount = ns_->items_.size() - ns_->free_.size(); + CostCalculator costCalculator(totalItemsCount); + enum { SortIndexNotFound = 0, SortIndexFound, SortIndexHasUnorderedConditions } sortIndexSearchState = SortIndexNotFound; + for (size_t next, i = 0, sz = qentries.Size(); i != sz; i = next) { + next = qentries.Next(i); + const bool calculateEntry = costCalculator.OnNewEntry(qentries, i, next); + qentries.InvokeAppropriate( + i, Skip{}, [&costCalculator](const QueryEntriesBracket &) { costCalculator.MarkInapposite(); }, + [&costCalculator](const JoinQueryEntry &) { costCalculator.MarkInapposite(); }, + [&costCalculator](const BetweenFieldsQueryEntry &) { costCalculator.MarkInapposite(); }, + [&](const QueryEntry &qe) { + if (qe.idxNo < 0) { + costCalculator.MarkInapposite(); + return; + } + if (qe.idxNo == ctx.sortingContext.uncommitedIndex) { + if (sortIndexSearchState == SortIndexNotFound) { + const bool isExpectingIdSet = + qentries.GetOperation(i) == OpAnd && (next == sz || qentries.GetOperation(next) != OpOr); + if (isExpectingIdSet && !SelectIteratorContainer::IsExpectingOrderedResults(qe)) { + sortIndexSearchState = SortIndexHasUnorderedConditions; + return; + } else { + sortIndexSearchState = SortIndexFound; + } + } + if (!costCalculator.IsInOrSequence()) { + // Count cost only for the OR-sequences with mixed indexes: 'ANY_IDX OR TARGET_SORT_IDX', + // 'TARGET_SORT_IDX OR ANY_IDX1 OR ANY_IDX2', etc. + return; + } + } + + if (!calculateEntry || costCalculator.TotalCost() == 0 || sortIndexSearchState == SortIndexHasUnorderedConditions) { + return; + } + + auto &index = ns_->indexes_[qe.idxNo]; + if (IsFullText(index->Type())) { + costCalculator.MarkInapposite(); + return; + } + + Index::SelectOpts opts; + opts.disableIdSetCache = 1; + opts.itemsCountInNamespace = totalItemsCount; + opts.indexesNotOptimized = !ctx.sortingContext.enableSortOrders; + opts.inTransaction = ctx.inTransaction; + + try { + SelectKeyResults reslts = index->SelectKey(qe.values, qe.condition, 0, opts, nullptr, rdxCtx); + costCalculator.Add(reslts, qe.idxNo == ctx.sortingContext.uncommitedIndex); + } catch (const Error &) { + costCalculator.MarkInapposite(); + } + }); + } + costCalculator.EndSequence(); if (sortIndexSearchState == SortIndexHasUnorderedConditions) { - return false; + return 0; + } + return costCalculator.TotalCost(); +} + +size_t NsSelecter::calculateOptimizedCost(size_t costNormal, const QueryEntries &qentries, SelectCtx &ctx, const RdxContext &rdxCtx) { + // 'costOptimized == costNormal + 1' reduces internal iterations count for the tree in the res.GetMaxIterations() call + CostCalculator costCalculator(costNormal + 1); + for (size_t next, i = 0, sz = qentries.Size(); i != sz; i = next) { + next = qentries.Next(i); + if (!costCalculator.OnNewEntry(qentries, i, next)) { + continue; + } + qentries.InvokeAppropriate( + i, Skip{}, [&costCalculator](const QueryEntriesBracket &) { costCalculator.MarkInapposite(); }, + [&costCalculator](const JoinQueryEntry &) { costCalculator.MarkInapposite(); }, + [&costCalculator](const BetweenFieldsQueryEntry &) { costCalculator.MarkInapposite(); }, + [&](const QueryEntry &qe) { + if (qe.idxNo < 0 || qe.idxNo != ctx.sortingContext.uncommitedIndex) { + costCalculator.MarkInapposite(); + return; + } + + Index::SelectOpts opts; + opts.itemsCountInNamespace = ns_->items_.size() - ns_->free_.size(); + opts.disableIdSetCache = 1; + opts.unbuiltSortOrders = 1; + opts.indexesNotOptimized = !ctx.sortingContext.enableSortOrders; + opts.inTransaction = ctx.inTransaction; + + try { + SelectKeyResults reslts = ns_->indexes_[qe.idxNo]->SelectKey(qe.values, qe.condition, 0, opts, nullptr, rdxCtx); + costCalculator.Add(reslts); + } catch (const Error &) { + costCalculator.MarkInapposite(); + } + }); + } + costCalculator.EndSequence(); + return costCalculator.TotalCost(); +} + +bool NsSelecter::isSortOptimizatonEffective(const QueryEntries &qentries, SelectCtx &ctx, const RdxContext &rdxCtx) { + if (qentries.Size() == 0) { + return true; } - if (costNormal == 0) { + if (qentries.Size() == 1 && qentries.HoldsOrReferTo(0)) { + const auto &qe = qentries.Get(0); + if (qe.idxNo == ctx.sortingContext.uncommitedIndex) { + return SelectIteratorContainer::IsExpectingOrderedResults(qe); + } + } + + const size_t expectedMaxIterationsNormal = calculateNormalCost(qentries, ctx, rdxCtx); + if (expectedMaxIterationsNormal == 0) { return false; } - size_t costOptimized = ns_->items_.size() - ns_->free_.size(); - costNormal = size_t(double(costNormal) * log2(costNormal)); - if (costNormal < costOptimized) { - costOptimized = costNormal + 1; - qentries.ExecuteAppropriateForEach(Skip{}, - [this, &ctx, &rdxCtx, &costOptimized](const QueryEntry &qe) { - if (qe.idxNo < 0 || qe.idxNo != ctx.sortingContext.uncommitedIndex) return; - - Index::SelectOpts opts; - opts.itemsCountInNamespace = ns_->items_.size() - ns_->free_.size(); - opts.disableIdSetCache = 1; - opts.unbuiltSortOrders = 1; - opts.indexesNotOptimized = !ctx.sortingContext.enableSortOrders; - opts.inTransaction = ctx.inTransaction; - - try { - SelectKeyResults reslts = ns_->indexes_[qe.idxNo]->SelectKey(qe.values, qe.condition, 0, - opts, nullptr, rdxCtx); - for (const SelectKeyResult &res : reslts) { - if (res.comparators_.empty()) { - costOptimized = std::min(costOptimized, res.GetMaxIterations(costOptimized)); - } - } - } catch (const Error &) { - } - }); - } else { + const size_t totalItemsCount = ns_->items_.size() - ns_->free_.size(); + const size_t costNormal = size_t(double(expectedMaxIterationsNormal) * log2(expectedMaxIterationsNormal)); + if (costNormal >= totalItemsCount) { + // Check if it's more effective to iterate over all the items via btree, than select and sort ids via the most effective index return true; } - if (costNormal < costOptimized && !ctx.isForceAll && ctx.query.HasLimit()) { + size_t costOptimized = calculateOptimizedCost(costNormal, qentries, ctx, rdxCtx); + if (costNormal >= costOptimized) { + return true; // If max iterations count with btree indexes is better than with any other condition (including sort overhead) + } + if (expectedMaxIterationsNormal <= 150) { + return false; // If there is very good filtering condition (case for the issues #1489) + } + if (ctx.isForceAll || ctx.query.HasLimit()) { + if (expectedMaxIterationsNormal < 2000) { + return false; // Skip attempt to check limit if there is good enough unordered filtering condition + } + } + if (!ctx.isForceAll && ctx.query.HasLimit()) { // If optimization will be disabled, selecter will must to iterate over all the results, ignoring limit // Experimental value. It was chosen during debugging request from issue #1402. // TODO: It's possible to evaluate this multiplier, based on the query conditions, but the only way to avoid corner cases is to // allow user to hint this optimization. - constexpr static unsigned kLimitMultiplier = 20; + const size_t limitMultiplier = std::max(size_t(20), size_t(totalItemsCount / expectedMaxIterationsNormal) * 4); const auto offset = ctx.query.HasOffset() ? ctx.query.start : 1; - costOptimized = kLimitMultiplier * (ctx.query.count + offset); + costOptimized = limitMultiplier * (ctx.query.count + offset); } - return costOptimized <= costNormal; } diff --git a/cpp_src/core/nsselecter/nsselecter.h b/cpp_src/core/nsselecter/nsselecter.h index bb4867eea..4b247ed6f 100644 --- a/cpp_src/core/nsselecter/nsselecter.h +++ b/cpp_src/core/nsselecter/nsselecter.h @@ -1,6 +1,7 @@ #pragma once #include "aggregator.h" #include "core/index/index.h" +#include "explaincalc.h" #include "joinedselector.h" #include "sortingcontext.h" @@ -28,6 +29,7 @@ struct SelectCtx { IsFTQuery isFtQuery = IsFTQuery::NotSet; const Query *parentQuery = nullptr; + ExplainCalc explain; bool requiresCrashTracking = false; }; @@ -88,6 +90,8 @@ class NsSelecter { template void sortResults(LoopCtx &sctx, It begin, It end, const SortingOptions &sortingOptions, const joins::NamespaceResults *); + size_t calculateNormalCost(const QueryEntries &qe, SelectCtx &ctx, const RdxContext &rdxCtx); + size_t calculateOptimizedCost(size_t costNormal, const QueryEntries &qe, SelectCtx &ctx, const RdxContext &rdxCtx); bool isSortOptimizatonEffective(const QueryEntries &qe, SelectCtx &ctx, const RdxContext &rdxCtx); static bool validateField(StrictMode strictMode, std::string_view name, std::string_view nsName, const TagsMatcher &tagsMatcher); void checkStrictModeAgg(StrictMode strictMode, const std::string &name, const std::string &nsName, diff --git a/cpp_src/core/nsselecter/querypreprocessor.cc b/cpp_src/core/nsselecter/querypreprocessor.cc index 94c40aa0f..19b14ba07 100644 --- a/cpp_src/core/nsselecter/querypreprocessor.cc +++ b/cpp_src/core/nsselecter/querypreprocessor.cc @@ -191,10 +191,9 @@ void QueryPreprocessor::InitIndexNumbers() { }); } -size_t QueryPreprocessor::lookupQueryIndexes(size_t dst, const size_t srcBegin, const size_t srcEnd) { +size_t QueryPreprocessor::lookupQueryIndexes(uint16_t dst, uint16_t srcBegin, uint16_t srcEnd) { assertrx(dst <= srcBegin); - h_vector iidx(maxIndexes); - std::fill(iidx.begin(), iidx.begin() + maxIndexes, -1); + h_vector iidx(kMaxIndexes, uint16_t(0)); size_t merged = 0; for (size_t src = srcBegin, nextSrc; src < srcEnd; src = nextSrc) { nextSrc = Next(src); @@ -207,23 +206,24 @@ size_t QueryPreprocessor::lookupQueryIndexes(size_t dst, const size_t srcBegin, return true; }, [&](QueryEntry &entry) { - const bool isIndexField = (entry.idxNo != IndexValueType::SetByJsonPath); + const bool isIndexField = (entry.idxNo >= 0); if (isIndexField) { // try merge entries with AND opetator if ((GetOperation(src) == OpAnd) && (nextSrc >= srcEnd || GetOperation(nextSrc) != OpOr)) { - if (static_cast(entry.idxNo) >= iidx.size()) { + if (size_t(entry.idxNo) >= iidx.size()) { const auto oldSize = iidx.size(); - iidx.resize(entry.idxNo + 1); - std::fill(iidx.begin() + oldSize, iidx.begin() + iidx.size(), -1); + iidx.resize(size_t(entry.idxNo) + 1); + std::fill(iidx.begin() + oldSize, iidx.begin() + iidx.size(), 0); } auto &iidxRef = iidx[entry.idxNo]; - if (iidxRef >= 0 && !ns_.indexes_[entry.idxNo]->Opts().IsArray()) { - if (mergeQueryEntries(iidxRef, src)) { + if (iidxRef > 0 && !ns_.indexes_[entry.idxNo]->Opts().IsArray()) { + if (mergeQueryEntries(iidxRef - 1, src)) { ++merged; return false; } } else { - iidxRef = dst; + assertrx_throw(dst < std::numeric_limits::max() - 1); + iidxRef = dst + 1; } } } @@ -291,29 +291,32 @@ const std::vector *QueryPreprocessor::getCompositeIndex(int field) const { return nullptr; } -static void createCompositeKeyValues(const h_vector, 4> &values, const PayloadType &plType, Payload *pl, +static void createCompositeKeyValues(const h_vector, 4> &values, const PayloadType &plType, Payload &pl, VariantArray &ret, unsigned n) { - PayloadValue d(plType.TotalSize()); - Payload pl1(plType, d); - if (!pl) pl = &pl1; - - assertrx(n < values.size()); const auto &v = values[n]; for (auto it = v.second.cbegin(), end = v.second.cend(); it != end; ++it) { - pl->Set(v.first, {*it}); + pl.Set(v.first, *it); if (n + 1 < values.size()) { createCompositeKeyValues(values, plType, pl, ret, n + 1); } else { - PayloadValue pv(*pl->Value()); + PayloadValue pv(*(pl.Value())); pv.Clone(); - ret.push_back(Variant(std::move(pv))); + ret.emplace_back(std::move(pv)); } } } +static void createCompositeKeyValues(const h_vector, 4> &values, const PayloadType &plType, + VariantArray &ret) { + PayloadValue d(plType.TotalSize()); + Payload pl(plType, d); + createCompositeKeyValues(values, plType, pl, ret, 0); +} + size_t QueryPreprocessor::substituteCompositeIndexes(const size_t from, const size_t to) { using composite_substitution_helpers::CompositeSearcher; using composite_substitution_helpers::EntriesRanges; + using composite_substitution_helpers::CompositeValuesCountLimits; size_t deleted = 0; CompositeSearcher searcher(ns_); @@ -344,25 +347,46 @@ size_t QueryPreprocessor::substituteCompositeIndexes(const size_t from, const si } EntriesRanges deleteRanges; - for (auto resIdx = searcher.GetResult(); resIdx >= 0; resIdx = searcher.RemoveAndGetNext(resIdx)) { + h_vector, 4> values; + auto resIdx = searcher.GetResult(); + while (resIdx >= 0) { auto &res = searcher[resIdx]; - h_vector, 4> values; + values.clear(); + uint32_t resultSetSize = 0; + uint32_t maxSetSize = 0; for (auto i : res.entries) { auto &qe = Get(i); - if (!res.fields.contains(qe.idxNo)) { + if (rx_unlikely(!res.fields.contains(qe.idxNo))) { throw Error(errLogic, "Error during composite index's fields substitution (this should not happen)"); } - if (qe.condition == CondEq && qe.values.size() == 0) { + if (rx_unlikely(qe.condition == CondEq && qe.values.size() == 0)) { throw Error(errParams, "Condition EQ must have at least 1 argument, but provided 0"); } + maxSetSize = std::max(maxSetSize, qe.values.size()); + resultSetSize = (resultSetSize == 0) ? qe.values.size() : (resultSetSize * qe.values.size()); + } + static const CompositeValuesCountLimits kCompositeSetLimits; + if (resultSetSize != maxSetSize) { + // Do not perform substitution if result set size becoms larger than initial indexes set size + // and this size is greater than limit + // TODO: This is potential customization point for the user's hints system + if (resultSetSize > kCompositeSetLimits[res.entries.size()]) { + resIdx = searcher.RemoveUnusedAndGetNext(resIdx); + continue; + } + } + for (auto i : res.entries) { + auto &qe = Get(i); + const auto idxKeyType = ns_.indexes_[qe.idxNo]->KeyType(); for (auto &v : qe.values) { - v.convert(ns_.indexes_[qe.idxNo]->KeyType()); + v.convert(idxKeyType); } values.emplace_back(qe.idxNo, std::move(qe.values)); } { QueryEntry ce(CondSet, ns_.indexes_[res.idx]->Name(), res.idx); - createCompositeKeyValues(values, ns_.payloadType_, nullptr, ce.values, 0); + ce.values.reserve(resultSetSize); + createCompositeKeyValues(values, ns_.payloadType_, ce.values); if (ce.values.size() == 1) { ce.condition = CondEq; } @@ -371,6 +395,7 @@ size_t QueryPreprocessor::substituteCompositeIndexes(const size_t from, const si container_[first].SetValue(std::move(ce)); } deleteRanges.Add(span(res.entries.data() + 1, res.entries.size() - 1)); + resIdx = searcher.RemoveUsedAndGetNext(resIdx); } for (auto rit = deleteRanges.rbegin(); rit != deleteRanges.rend(); ++rit) { Erase(rit->From(), rit->To()); diff --git a/cpp_src/core/nsselecter/querypreprocessor.h b/cpp_src/core/nsselecter/querypreprocessor.h index bf67045b5..31d3dbd29 100644 --- a/cpp_src/core/nsselecter/querypreprocessor.h +++ b/cpp_src/core/nsselecter/querypreprocessor.h @@ -17,7 +17,9 @@ class QueryPreprocessor : private QueryEntries { QueryPreprocessor(QueryEntries &&, NamespaceImpl *, const SelectCtx &); const QueryEntries &GetQueryEntries() const noexcept { return *this; } bool LookupQueryIndexes() { - const size_t merged = lookupQueryIndexes(0, 0, container_.size() - queryEntryAddedByForcedSortOptimization_); + const unsigned lookupEnd = queryEntryAddedByForcedSortOptimization_ ? container_.size() - 1 : container_.size(); + assertrx_throw(lookupEnd <= uint32_t(std::numeric_limits::max() - 1)); + const size_t merged = lookupQueryIndexes(0, 0, lookupEnd); if (queryEntryAddedByForcedSortOptimization_) { container_[container_.size() - merged - 1] = std::move(container_.back()); } @@ -79,7 +81,7 @@ class QueryPreprocessor : private QueryEntries { [[nodiscard]] SortingEntries detectOptimalSortOrder() const; bool forcedStage() const noexcept { return evaluationsCount_ == (desc_ ? 1 : 0); } - size_t lookupQueryIndexes(size_t dst, size_t srcBegin, size_t srcEnd); + size_t lookupQueryIndexes(uint16_t dst, uint16_t srcBegin, uint16_t srcEnd); size_t substituteCompositeIndexes(size_t from, size_t to); bool mergeQueryEntries(size_t lhs, size_t rhs); const std::vector *getCompositeIndex(int field) const; diff --git a/cpp_src/core/nsselecter/selectiterator.cc b/cpp_src/core/nsselecter/selectiterator.cc index 9aca62d7d..9bc60dd4d 100644 --- a/cpp_src/core/nsselecter/selectiterator.cc +++ b/cpp_src/core/nsselecter/selectiterator.cc @@ -77,7 +77,7 @@ void SelectIterator::Start(bool reverse, int maxIterations) { } // Generic next implementation -bool SelectIterator::nextFwd(IdType minHint) { +bool SelectIterator::nextFwd(IdType minHint) noexcept { if (minHint > lastVal_) lastVal_ = minHint - 1; int minVal = INT_MAX; for (auto it = begin(); it != end(); it++) { @@ -112,7 +112,7 @@ bool SelectIterator::nextFwd(IdType minHint) { return lastVal_ != INT_MAX; } -bool SelectIterator::nextRev(IdType maxHint) { +bool SelectIterator::nextRev(IdType maxHint) noexcept { if (maxHint < lastVal_) lastVal_ = maxHint + 1; int maxVal = INT_MIN; @@ -145,7 +145,7 @@ bool SelectIterator::nextRev(IdType maxHint) { } // Single idset next implementation -bool SelectIterator::nextFwdSingleIdset(IdType minHint) { +bool SelectIterator::nextFwdSingleIdset(IdType minHint) noexcept { if (minHint > lastVal_) lastVal_ = minHint - 1; auto it = begin(); if (it->useBtree_) { @@ -167,7 +167,7 @@ bool SelectIterator::nextFwdSingleIdset(IdType minHint) { return !(lastVal_ == INT_MAX); } -bool SelectIterator::nextRevSingleIdset(IdType maxHint) { +bool SelectIterator::nextRevSingleIdset(IdType maxHint) noexcept { if (maxHint < lastVal_) lastVal_ = maxHint + 1; auto it = begin(); @@ -185,8 +185,10 @@ bool SelectIterator::nextRevSingleIdset(IdType maxHint) { return !(lastVal_ == INT_MIN); } +bool SelectIterator::nextUnbuiltSortOrders() noexcept { return begin()->indexForwardIter_->Next(); } + // Single range next implementation -bool SelectIterator::nextFwdSingleRange(IdType minHint) { +bool SelectIterator::nextFwdSingleRange(IdType minHint) noexcept { if (minHint > lastVal_) lastVal_ = minHint - 1; if (lastVal_ < begin()->rBegin_) lastVal_ = begin()->rBegin_ - 1; @@ -196,7 +198,7 @@ bool SelectIterator::nextFwdSingleRange(IdType minHint) { return (lastVal_ != INT_MAX); } -bool SelectIterator::nextRevSingleRange(IdType maxHint) { +bool SelectIterator::nextRevSingleRange(IdType maxHint) noexcept { if (maxHint < lastVal_) lastVal_ = maxHint + 1; if (lastVal_ > begin()->rrBegin_) lastVal_ = begin()->rrBegin_ + 1; @@ -207,7 +209,7 @@ bool SelectIterator::nextRevSingleRange(IdType maxHint) { } // Unsorted next implementation -bool SelectIterator::nextUnsorted() { +bool SelectIterator::nextUnsorted() noexcept { if (lastIt_ == end()) { return false; } else if (lastIt_->it_ == lastIt_->end_) { @@ -230,8 +232,6 @@ bool SelectIterator::nextUnsorted() { return false; } -bool SelectIterator::nextUnbuiltSortOrders() { return begin()->indexForwardIter_->Next(); } - void SelectIterator::ExcludeLastSet(const PayloadValue &value, IdType rowId, IdType properRowId) { for (auto &comp : comparators_) comp.ExcludeDistinct(value, properRowId); if (type_ == UnbuiltSortOrdersIndex) { @@ -270,8 +270,12 @@ void SelectIterator::AppendAndBind(SelectKeyResult &other, const PayloadType &ty } double SelectIterator::Cost(int expectedIterations) const noexcept { - if (type_ == UnbuiltSortOrdersIndex) return -1; - if (forcedFirst_) return -GetMaxIterations(); + if (type_ == UnbuiltSortOrdersIndex) { + return -1; + } + if (forcedFirst_) { + return -GetMaxIterations(); + } double result{0.0}; if (!comparators_.empty()) { const auto jsonPathComparators = @@ -279,7 +283,13 @@ double SelectIterator::Cost(int expectedIterations) const noexcept { // Comparatos with non index fields must have much higher cost, than comparators with index fields result = jsonPathComparators ? (8 * double(expectedIterations) + jsonPathComparators + 1) : (double(expectedIterations) + 1); } - result += static_cast(distinct ? 1 : GetMaxIterations()) * size(); + if (distinct) { + result += size(); + } else if (type_ != SingleIdSetWithDeferedSort && type_ != RevSingleIdSetWithDeferedSort && !deferedExplicitSort) { + result += static_cast(GetMaxIterations()) * size(); + } else { + result += static_cast(CostWithDefferedSort(size(), GetMaxIterations(), expectedIterations)); + } return isNotOperation_ ? expectedIterations + result : result; } diff --git a/cpp_src/core/nsselecter/selectiterator.h b/cpp_src/core/nsselecter/selectiterator.h index 63a51f45a..0bba0546c 100644 --- a/cpp_src/core/nsselecter/selectiterator.h +++ b/cpp_src/core/nsselecter/selectiterator.h @@ -32,11 +32,11 @@ class SelectIterator : public SelectKeyResult { void Start(bool reverse, int maxIterations); /// Signalizes if iteration is over. /// @return true if iteration is done. - inline bool End() const noexcept { return lastVal_ == (isReverse_ ? INT_MIN : INT_MAX) && !comparators_.size(); } + RX_ALWAYS_INLINE bool End() const noexcept { return lastVal_ == (isReverse_ ? INT_MIN : INT_MAX) && !comparators_.size(); } /// Iterates to a next item of result. /// @param minHint - rowId value to start from. /// @return true if operation succeed. - inline bool Next(IdType minHint) { + RX_ALWAYS_INLINE bool Next(IdType minHint) { bool res = false; switch (type_) { case Forward: @@ -73,7 +73,7 @@ class SelectIterator : public SelectKeyResult { } /// Sets Unsorted iteration mode - inline void SetUnsorted() noexcept { isUnsorted = true; } + RX_ALWAYS_INLINE void SetUnsorted() noexcept { isUnsorted = true; } /// Current rowId IdType Val() const noexcept; @@ -92,12 +92,13 @@ class SelectIterator : public SelectKeyResult { /// Uses each comparator to compare with pl. /// @param pl - PayloadValue to be compared. /// @param rowId - rowId. - inline bool TryCompare(const PayloadValue &pl, int rowId) noexcept { - for (auto &cmp : comparators_) + RX_ALWAYS_INLINE bool TryCompare(const PayloadValue &pl, int rowId) { + for (auto &cmp : comparators_) { if (cmp.Compare(pl, rowId)) { matchedCount_++; return true; } + } return false; } /// @return amonut of matched items @@ -139,14 +140,14 @@ class SelectIterator : public SelectKeyResult { // Iterates to a next item of result // depending on iterator type starting // from minHint which is the least rowId. - bool nextFwd(IdType minHint); - bool nextRev(IdType minHint); - bool nextFwdSingleRange(IdType minHint); - bool nextFwdSingleIdset(IdType minHint); - bool nextRevSingleRange(IdType minHint); - bool nextRevSingleIdset(IdType minHint); - bool nextUnbuiltSortOrders(); - bool nextUnsorted(); + bool nextFwd(IdType minHint) noexcept; + bool nextRev(IdType minHint) noexcept; + bool nextFwdSingleRange(IdType minHint) noexcept; + bool nextFwdSingleIdset(IdType minHint) noexcept; + bool nextRevSingleRange(IdType minHint) noexcept; + bool nextRevSingleIdset(IdType minHint) noexcept; + bool nextUnbuiltSortOrders() noexcept; + bool nextUnsorted() noexcept; /// Performs ID sets merge and sort in case, when this sort was defered earlier and still effective with current maxIterations value bool applyDeferedSort(int maxIterations) { diff --git a/cpp_src/core/nsselecter/selectiteratorcontainer.cc b/cpp_src/core/nsselecter/selectiteratorcontainer.cc index 311a78b7d..48ee368ab 100644 --- a/cpp_src/core/nsselecter/selectiteratorcontainer.cc +++ b/cpp_src/core/nsselecter/selectiteratorcontainer.cc @@ -1,4 +1,5 @@ #include "selectiteratorcontainer.h" +#include #include #include "core/namespace/namespaceimpl.h" #include "core/rdxcontext.h" @@ -15,9 +16,7 @@ void SelectIteratorContainer::SortByCost(int expectedIterations) { indexes.resize(container_.size()); costs.resize(container_.size()); } - for (size_t i = 0; i < container_.size(); ++i) { - indexes[i] = i; - } + std::iota(indexes.begin(), indexes.begin() + container_.size(), 0); sortByCost(indexes, costs, 0, container_.size(), expectedIterations); for (size_t i = 0; i < container_.size(); ++i) { if (indexes[i] != i) { @@ -299,7 +298,14 @@ void SelectIteratorContainer::processJoinEntry(const JoinQueryEntry &jqe, OpType } void SelectIteratorContainer::processQueryEntryResults(SelectKeyResults &selectResults, OpType op, const NamespaceImpl &ns, - const QueryEntry &qe, bool isIndexFt, bool isIndexSparse, bool nonIndexField) { + const QueryEntry &qe, bool isIndexFt, bool isIndexSparse, bool nonIndexField, + std::optional nextOp) { + if (selectResults.empty()) { + if (op == OpAnd) { + Append(OpAnd, AlwaysFalse{}); + } + return; + } for (SelectKeyResult &res : selectResults) { switch (op) { case OpOr: { @@ -334,8 +340,9 @@ void SelectIteratorContainer::processQueryEntryResults(SelectKeyResults &selectR SelectIterator &lastAppended = lastAppendedIt->Value(); lastAppended.Bind(ns.payloadType_, qe.idxNo); lastAppended.SetNotOperationFlag(op == OpNot); - const int cur = op == OpNot ? ns.items_.size() - lastAppended.GetMaxIterations() : lastAppended.GetMaxIterations(); - if (lastAppended.comparators_.empty()) { + const auto maxIterations = lastAppended.GetMaxIterations(); + const int cur = op == OpNot ? ns.items_.size() - maxIterations : maxIterations; + if (lastAppended.comparators_.empty() && (!nextOp.has_value() || nextOp.value() != OpOr)) { if (cur && cur < maxIterations_) maxIterations_ = cur; if (!cur) wasZeroIterations_ = true; } @@ -512,7 +519,11 @@ bool SelectIteratorContainer::prepareIteratorsForSelectLoop(QueryPreprocessor &q selectResults = processQueryEntry(qe, enableSortIndexOptimize, ns, sortId, isQueryFt, selectFnc, isIndexFt, isIndexSparse, ftCtx, qPreproc, rdxCtx); } - processQueryEntryResults(selectResults, op, ns, qe, isIndexFt, isIndexSparse, nonIndexField); + std::optional nextOp; + if (next != end) { + nextOp = queries.GetOperation(next); + } + processQueryEntryResults(selectResults, op, ns, qe, isIndexFt, isIndexSparse, nonIndexField, nextOp); if (op != OpOr) { for (auto &ep : equalPositions) { const auto lastPosition = ep.queryEntriesPositions.back(); @@ -631,7 +642,13 @@ IdType SelectIteratorContainer::next(const_iterator it, IdType from) { return from; }, [from](const JoinSelectIterator &) { return from; }, [from](const FieldsComparator &) { return from; }, - [from](const AlwaysFalse &) { return from; }); + [](const AlwaysFalse &) { + if constexpr (reverse) { + return std::numeric_limits::lowest(); + } else { + return std::numeric_limits::max(); + } + }); } template diff --git a/cpp_src/core/nsselecter/selectiteratorcontainer.h b/cpp_src/core/nsselecter/selectiteratorcontainer.h index 556a70cdc..6b07c7275 100644 --- a/cpp_src/core/nsselecter/selectiteratorcontainer.h +++ b/cpp_src/core/nsselecter/selectiteratorcontainer.h @@ -104,7 +104,7 @@ class SelectIteratorContainer void processField(FieldsComparator &, std::string_view field, int idxNo, const NamespaceImpl &ns) const; void processJoinEntry(const JoinQueryEntry &, OpType); void processQueryEntryResults(SelectKeyResults &selectResults, OpType, const NamespaceImpl &ns, const QueryEntry &qe, bool isIndexFt, - bool isIndexSparse, bool nonIndexField); + bool isIndexSparse, bool nonIndexField, std::optional nextOp); struct EqualPositions { h_vector queryEntriesPositions; size_t positionToInsertIterator = 0; diff --git a/cpp_src/core/nsselecter/substitutionhelpers.h b/cpp_src/core/nsselecter/substitutionhelpers.h index 8450a6b6c..c7ade04ff 100644 --- a/cpp_src/core/nsselecter/substitutionhelpers.h +++ b/cpp_src/core/nsselecter/substitutionhelpers.h @@ -7,19 +7,35 @@ namespace reindexer { namespace composite_substitution_helpers { +class CompositeValuesCountLimits { +public: + uint32_t operator[](uint32_t fieldsCount) const noexcept { + if (rx_unlikely(fieldsCount >= limits_.size())) { + return kMaxValuesCount; + } + return limits_[fieldsCount]; + } + +private: + constexpr static uint32_t kMaxValuesCount = 4000; + + std::array limits_ = {0, 0, 300, 1000, 2000, 4000}; +}; + class CompositeSearcher { public: struct IndexData { - IndexData(int field, int _idx, unsigned entry) : fields{field}, idx{_idx}, entries{entry} {} + IndexData(int field, int _idx, uint16_t entry) : fields(field), idx(_idx), entries{entry} {} IndexesFieldsSet fields; int idx; - h_vector entries; + h_vector entries; }; CompositeSearcher(const NamespaceImpl &ns) noexcept : ns_(ns) {} void Add(int field, const std::vector &composites, unsigned entry) { + assertrx_throw(entry < std::numeric_limits::max()); for (auto composite : composites) { const auto idxType = ns_.indexes_[composite]->Type(); if (idxType != IndexCompositeBTree && idxType != IndexCompositeHash) { @@ -46,9 +62,10 @@ class CompositeSearcher { auto &data = d_[i]; const auto &idxFields = ns_.indexes_[data.idx]->Fields(); // If all of the composite fields were found in query - if (data.fields.size() == idxFields.size() && idxFields.contains(data.fields)) { - if (data.fields.size() > maxSize) { - maxSize = data.fields.size(); + const auto dfCnt = data.fields.count(); + if (dfCnt == idxFields.size() && idxFields.contains(data.fields)) { + if (dfCnt > maxSize) { + maxSize = dfCnt; res = i; } } else { @@ -57,22 +74,32 @@ class CompositeSearcher { } return res; } - int RemoveAndGetNext(unsigned curId) noexcept { + int RemoveUnusedAndGetNext(uint16_t curId) noexcept { + if (unsigned(curId) + 1 != d_.size()) { + std::swap(d_[curId], d_.back()); + } + d_.pop_back(); + return GetResult(); + } + int RemoveUsedAndGetNext(uint16_t curId) noexcept { int res = -1; unsigned deleted = 1; unsigned maxSize = 0; - if (curId + 1 != d_.size()) { + if (unsigned(curId) + 1 != d_.size()) { std::swap(d_[curId], d_.back()); } const auto &cur = d_.back(); - for (unsigned i = 0; i < d_.size() - deleted; ++i) { + for (unsigned i = 0, sz = d_.size(); i < sz - deleted; ++i) { auto &data = d_[i]; if (haveIntersection(data.entries, cur.entries)) { - std::swap(data, d_[d_.size() - ++deleted]); + std::swap(data, d_[sz - ++deleted]); --i; - } else if (data.fields.size() > maxSize) { - res = i; - maxSize = data.fields.size(); + } else { + const auto dfCnt = data.fields.count(); + if (dfCnt > maxSize) { + res = i; + maxSize = dfCnt; + } } } while (deleted--) { @@ -80,16 +107,16 @@ class CompositeSearcher { } return res; } - const IndexData &operator[](unsigned i) const noexcept { return d_[i]; } + const IndexData &operator[](uint16_t i) const noexcept { return d_[i]; } private: - void remove(unsigned i) noexcept { - if (i + 1 != d_.size()) { + void remove(uint16_t i) noexcept { + if (unsigned(i) + 1 != d_.size()) { std::swap(d_[i], d_.back()); } d_.pop_back(); } - static bool haveIntersection(const h_vector &lEntries, const h_vector &rEntries) noexcept { + static bool haveIntersection(const h_vector &lEntries, const h_vector &rEntries) noexcept { for (auto lit = lEntries.begin(), rit = rEntries.begin(); lit != lEntries.end() && rit != rEntries.end();) { if (*lit < *rit) { ++lit; @@ -102,20 +129,20 @@ class CompositeSearcher { return false; } - h_vector d_; + h_vector d_; const NamespaceImpl &ns_; }; // EntriesRange - query entries range. [from; to) class EntriesRange { public: - EntriesRange(unsigned from, unsigned to) : from_(from), to_(to) { + EntriesRange(uint16_t from, uint16_t to) : from_(from), to_(to) { if (to_ <= from_) { throw Error(errLogic, "Unexpected range boarders during indexes substitution: [%u,%u)", from_, to_); } } - unsigned From() const noexcept { return from_; } - unsigned To() const noexcept { return to_; } + uint16_t From() const noexcept { return from_; } + uint16_t To() const noexcept { return to_; } void ExtendRight() noexcept { ++to_; } void ExtendLeft() { if (!from_) { @@ -130,11 +157,11 @@ class EntriesRange { } return false; } - unsigned Size() const noexcept { return to_ - from_; } + uint16_t Size() const noexcept { return to_ - from_; } private: - unsigned from_; - unsigned to_; + uint16_t from_; + uint16_t to_; }; // EntriesRanges - contains ordered vector of entries ranges. Ranges can not intercept with each other @@ -145,7 +172,7 @@ class EntriesRanges : h_vector { Base::const_reverse_iterator rbegin() const noexcept { return Base::rbegin(); } Base::const_reverse_iterator rend() const noexcept { return Base::rend(); } - void Add(span entries) { + void Add(span entries) { for (auto entry : entries) { auto insertionPos = Base::end(); bool wasMerged = false; @@ -188,7 +215,7 @@ class EntriesRanges : h_vector { } } if (!wasMerged) { - Base::insert(insertionPos, EntriesRange{entry, entry + 1}); + Base::insert(insertionPos, EntriesRange{entry, uint16_t(entry + 1)}); } } } diff --git a/cpp_src/core/payload/fieldsset.cc b/cpp_src/core/payload/fieldsset.cc index 453b7e33d..80a6b85c6 100644 --- a/cpp_src/core/payload/fieldsset.cc +++ b/cpp_src/core/payload/fieldsset.cc @@ -4,15 +4,17 @@ namespace reindexer { [[noreturn]] void IndexesFieldsSet::throwMaxValueError(int f) { - static_assert(std::numeric_limits::digits >= maxIndexes, "mask_ needs to provide 'maxIndexes' bits or more"); - throw Error(errLogic, "Can not push_back(%d) to IndexesFieldsSet. Value must be in scope [-1,%d]", f, maxIndexes); + throw Error(errLogic, "Can not push_back(%d) to IndexesFieldsSet. Value must be in scope [-1,%d]", f, kMaxIndexes - 1); } FieldsSet::FieldsSet(const TagsMatcher &tagsMatcher, const h_vector &fields) : mask_(0) { - static_assert(std::numeric_limits::digits >= maxIndexes, "mask_ needs to provide 'maxIndexes' bits or more"); for (const std::string &str : fields) { tagsPaths_.emplace_back(tagsMatcher.path2tag(str)); } } +void FieldsSet::throwMaxValueError(int f) { + throw Error(errLogic, "Can not push_back(%d) to FieldsSet. Value must be in scope [-1,%d]", f, kMaxIndexes - 1); +} + } // namespace reindexer diff --git a/cpp_src/core/payload/fieldsset.h b/cpp_src/core/payload/fieldsset.h index 6fa45d96a..b04f62035 100644 --- a/cpp_src/core/payload/fieldsset.h +++ b/cpp_src/core/payload/fieldsset.h @@ -10,38 +10,35 @@ namespace reindexer { class TagsMatcher; -static constexpr int maxIndexes = 64; +static constexpr int kMaxIndexes = 256; // 'tuple'-index always occupies 1 slot -using base_fields_set = h_vector; +using base_fields_set = h_vector; +static_assert(std::numeric_limits::max() >= kMaxIndexes, + "base_fields_set must be able to store any indexed field number"); +static_assert(std::numeric_limits::min() <= SetByJsonPath, + "base_fields_set must be able to store non-indexed fields"); +static_assert(sizeof(std::bitset) == 32, "Expecting no overhead from std::bitset"); using FieldsPath = std::variant; class IndexesFieldsSet { public: IndexesFieldsSet() noexcept = default; - IndexesFieldsSet(std::initializer_list l) { - for (auto i : l) { - push_back(i); - } - } - bool contains(int f) const noexcept { return f >= 0 && f <= maxIndexes && (mask_ & (1ULL << f)); } + IndexesFieldsSet(int f) { push_back(f); } + bool contains(int f) const noexcept { return f >= 0 && f < kMaxIndexes && mask_.test(unsigned(f)); } void push_back(int f) { if (f < 0) return; - if (f > maxIndexes) { + if (f >= kMaxIndexes) { throwMaxValueError(f); } - if (!contains(f)) { - mask_ |= 1ULL << f; - ++count_; - } + mask_.set(unsigned(f)); } - uint64_t mask() const noexcept { return mask_; } - unsigned size() const noexcept { return count_; } + const std::bitset &mask() const &noexcept { return mask_; } + const std::bitset &mask() const && = delete; + unsigned count() const noexcept { return mask_.count(); } private: [[noreturn]] void throwMaxValueError(int f); - - uint64_t mask_ = 0; - unsigned count_ = 0; + std::bitset mask_; }; class FieldsSet : protected base_fields_set { @@ -53,13 +50,14 @@ class FieldsSet : protected base_fields_set { using base_fields_set::empty; using base_fields_set::operator[]; FieldsSet(const TagsMatcher &, const h_vector &fields); - FieldsSet(std::initializer_list l) : mask_(0) { + FieldsSet(int f) { push_back(f); } + FieldsSet(std::initializer_list l) { for (auto f : l) push_back(f); } - FieldsSet(std::initializer_list l) : mask_(0) { + FieldsSet(std::initializer_list l) { for (const TagsPath &tagsPath : l) push_back(tagsPath); } - FieldsSet(std::initializer_list l) : mask_(0) { + FieldsSet(std::initializer_list l) { for (const IndexedTagsPath &tagsPath : l) push_back(tagsPath); } FieldsSet() = default; @@ -108,38 +106,42 @@ class FieldsSet : protected base_fields_set { } void push_back(int f) { - if (f == IndexValueType::SetByJsonPath) return; - assertrx(f < maxIndexes); + if (f < 0) return; + if (f >= kMaxIndexes) { + throwMaxValueError(f); + } if (!contains(f)) { - mask_ |= 1ULL << f; + mask_.set(unsigned(f)); base_fields_set::push_back(f); } } void push_front(int f) { - if (f == IndexValueType::SetByJsonPath) return; - assertrx(f < maxIndexes); + if (f < 0) return; + if (f >= kMaxIndexes) { + throwMaxValueError(f); + } if (!contains(f)) { - mask_ |= 1ULL << f; + mask_.set(unsigned(f)); base_fields_set::insert(begin(), f); } } void erase(int f) { - bool byJsonPath = (f == IndexValueType::SetByJsonPath); + const bool byJsonPath = (f < 0); if (byJsonPath || contains(f)) { auto it = std::find(begin(), end(), f); assertrx(it != end()); base_fields_set::erase(it); - if (!byJsonPath) mask_ &= ~(1ULL << f); + if (!byJsonPath) mask_.reset(unsigned(f)); } } - bool contains(int f) const noexcept { return mask_ & (1ULL << f); } - bool contains(const FieldsSet &f) const noexcept { return mask_ && ((mask_ & f.mask_) == f.mask_); } + bool contains(int f) const noexcept { return f >= 0 && f < kMaxIndexes && mask_.test(unsigned(f)); } + bool contains(const FieldsSet &f) const noexcept { return (mask_ & f.mask_) == f.mask_; } bool contains(std::string_view jsonPath) const noexcept { return std::find(jsonPaths_.begin(), jsonPaths_.end(), jsonPath) != jsonPaths_.end(); } - bool contains(const IndexesFieldsSet &f) const noexcept { return mask_ && ((mask_ & f.mask()) == f.mask()); } + bool contains(const IndexesFieldsSet &f) const noexcept { return (mask_ & f.mask()) == f.mask(); } bool contains(const TagsPath &tagsPath) const noexcept { for (const FieldsPath &path : tagsPaths_) { if (path.index() == 0) { @@ -187,7 +189,7 @@ class FieldsSet : protected base_fields_set { base_fields_set::clear(); tagsPaths_.clear(); jsonPaths_.clear(); - mask_ = 0; + mask_.reset(); } size_t getTagsPathsLength() const noexcept { return tagsPaths_.size(); } @@ -197,9 +199,12 @@ class FieldsSet : protected base_fields_set { assertrx(idx < tagsPaths_.size()); return (tagsPaths_[idx].index() == 1); } - const TagsPath &getTagsPath(size_t idx) const { return std::get(tagsPaths_[idx]); } - const IndexedTagsPath &getIndexedTagsPath(size_t idx) const { return std::get(tagsPaths_[idx]); } - const std::string &getJsonPath(size_t idx) const { return jsonPaths_[idx]; } + const TagsPath &getTagsPath(size_t idx) const & { return std::get(tagsPaths_[idx]); } + const TagsPath &getTagsPath(size_t idx) const && = delete; + const IndexedTagsPath &getIndexedTagsPath(size_t idx) const & { return std::get(tagsPaths_[idx]); } + const IndexedTagsPath &getIndexedTagsPath(size_t idx) const && = delete; + const std::string &getJsonPath(size_t idx) const &noexcept { return jsonPaths_[idx]; } + const std::string &getJsonPath(size_t idx) const && = delete; bool operator==(const FieldsSet &f) const noexcept { return (mask_ == f.mask_) && (tagsPaths_ == f.tagsPaths_) && (jsonPaths_ == jsonPaths_); @@ -214,7 +219,7 @@ class FieldsSet : protected base_fields_set { if (it != b) os << ", "; os << *it; } - os << "], mask: " << std::bitset<64>{mask_} << ", tagsPaths: ["; + os << "], mask: " << mask_ << ", tagsPaths: ["; for (auto b = tagsPaths_.cbegin(), it = b, e = tagsPaths_.cend(); it != e; ++it) { if (it != b) os << ", "; std::visit(fieldsPathDumper, *it); @@ -236,8 +241,9 @@ class FieldsSet : protected base_fields_set { } return (i == count); } + [[noreturn]] void throwMaxValueError(int f); - uint64_t mask_ = 0; + std::bitset mask_; h_vector tagsPaths_; /// Json paths to non indexed fields. /// Necessary only for composite full text diff --git a/cpp_src/core/payload/payloadfieldvalue.cc b/cpp_src/core/payload/payloadfieldvalue.cc index 8a0ccf296..e9a0d6a06 100644 --- a/cpp_src/core/payload/payloadfieldvalue.cc +++ b/cpp_src/core/payload/payloadfieldvalue.cc @@ -1,82 +1,10 @@ #include "payloadfieldvalue.h" -#include "core/keyvalue/p_string.h" -#include "core/keyvalue/uuid.h" -#include "estl/one_of.h" -#include "tools/stringstools.h" namespace reindexer { -void PayloadFieldValue::Set(Variant kv) { - t_.Type().EvaluateOneOf(overloaded{[&kv](KeyValueType::Int64) { - if (kv.Type().Is()) kv.convert(KeyValueType::Int64{}); - }, - [&kv](KeyValueType::Int) { - if (kv.Type().Is()) kv.convert(KeyValueType::Int{}); - }, - [&kv](KeyValueType::Uuid) { - if (kv.Type().Is()) kv.convert(KeyValueType::Uuid{}); - }, - [](OneOf) noexcept {}}); - if (!kv.Type().IsSame(t_.Type())) { - throw Error(errLogic, "PayloadFieldValue::Set field '%s' type mismatch. passed '%s', expected '%s'\n", t_.Name(), kv.Type().Name(), - t_.Type().Name()); - } - - t_.Type().EvaluateOneOf([&](KeyValueType::Int) noexcept { *reinterpret_cast(p_) = int(kv); }, - [&](KeyValueType::Bool) noexcept { *reinterpret_cast(p_) = bool(kv); }, - [&](KeyValueType::Int64) noexcept { *reinterpret_cast(p_) = int64_t(kv); }, - [&](KeyValueType::Double) noexcept { *reinterpret_cast(p_) = double(kv); }, - [&](KeyValueType::String) noexcept { *reinterpret_cast(p_) = p_string(kv); }, - [&](KeyValueType::Uuid) noexcept { *reinterpret_cast(p_) = Uuid{kv}; }, - [](OneOf) noexcept { - assertrx(0); - abort(); - }); -} - -Variant PayloadFieldValue::Get(bool enableHold) const { - return t_.Type().EvaluateOneOf( - [&](KeyValueType::Bool) noexcept { return Variant(*reinterpret_cast(p_)); }, - [&](KeyValueType::Int) noexcept { return Variant(*reinterpret_cast(p_)); }, - [&](KeyValueType::Int64) noexcept { return Variant(*reinterpret_cast(p_)); }, - [&](KeyValueType::Double) noexcept { return Variant(*reinterpret_cast(p_)); }, - [&](KeyValueType::String) { return Variant(*reinterpret_cast(p_), enableHold); }, - [&](KeyValueType::Uuid) noexcept { return Variant(*reinterpret_cast(p_)); }, - [](OneOf) noexcept -> Variant { - assertrx(0); - abort(); - }); -} -size_t PayloadFieldValue::Hash() const noexcept { - return t_.Type().EvaluateOneOf( - [&](KeyValueType::Bool) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [&](KeyValueType::Int) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [&](KeyValueType::Int64) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [&](KeyValueType::Double) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [&](KeyValueType::String) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [&](KeyValueType::Uuid) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [](OneOf) noexcept -> size_t { - assertrx(0); - abort(); - }); -} - -bool PayloadFieldValue::IsEQ(const PayloadFieldValue &o) const { - if (!t_.Type().IsSame(o.t_.Type())) return false; - return t_.Type().EvaluateOneOf( - [&](KeyValueType::Bool) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, - [&](KeyValueType::Int) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, - [&](KeyValueType::Int64) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, - [&](KeyValueType::Double) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, - [&](KeyValueType::String) { - return collateCompare(*reinterpret_cast(p_), *reinterpret_cast(o.p_), CollateOpts()) == 0; - }, - [&](KeyValueType::Uuid) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, - [](OneOf) noexcept -> bool { - assertrx(0); - abort(); - }); +void PayloadFieldValue::throwSetTypeMissmatch(const Variant& kv) { + throw Error(errLogic, "PayloadFieldValue::Set field '%s' type mismatch. passed '%s', expected '%s'\n", t_.Name(), kv.Type().Name(), + t_.Type().Name()); } } // namespace reindexer diff --git a/cpp_src/core/payload/payloadfieldvalue.h b/cpp_src/core/payload/payloadfieldvalue.h index 7ab1bf76f..011b01ee8 100644 --- a/cpp_src/core/payload/payloadfieldvalue.h +++ b/cpp_src/core/payload/payloadfieldvalue.h @@ -1,7 +1,11 @@ #pragma once +#include "core/keyvalue/p_string.h" +#include "core/keyvalue/uuid.h" #include "core/keyvalue/variant.h" +#include "estl/one_of.h" #include "payloadfieldtype.h" +#include "tools/stringstools.h" namespace reindexer { @@ -15,15 +19,87 @@ class PayloadFieldValue { // Construct object PayloadFieldValue(const PayloadFieldType &t, uint8_t *v) noexcept : t_(t), p_(v) {} // Single value operations - void Set(Variant kv); - Variant Get(bool enableHold = false) const; - size_t Hash() const noexcept; - bool IsEQ(const PayloadFieldValue &o) const; + void Set(Variant kv) { + t_.Type().EvaluateOneOf(overloaded{[&kv](KeyValueType::Int64) { + if (kv.Type().Is()) kv.convert(KeyValueType::Int64{}); + }, + [&kv](KeyValueType::Int) { + if (kv.Type().Is()) kv.convert(KeyValueType::Int{}); + }, + [&kv](KeyValueType::Uuid) { + if (kv.Type().Is()) kv.convert(KeyValueType::Uuid{}); + }, + [](OneOf) noexcept {}}); + if (!kv.Type().IsSame(t_.Type())) { + throwSetTypeMissmatch(kv); + } + + t_.Type().EvaluateOneOf( + [&](KeyValueType::Int) noexcept { *reinterpret_cast(p_) = int(kv); }, + [&](KeyValueType::Bool) noexcept { *reinterpret_cast(p_) = bool(kv); }, + [&](KeyValueType::Int64) noexcept { *reinterpret_cast(p_) = int64_t(kv); }, + [&](KeyValueType::Double) noexcept { *reinterpret_cast(p_) = double(kv); }, + [&](KeyValueType::String) noexcept { *reinterpret_cast(p_) = p_string(kv); }, + [&](KeyValueType::Uuid) noexcept { *reinterpret_cast(p_) = Uuid{kv}; }, + [](OneOf) noexcept { + assertrx(0); + abort(); + }); + } + Variant Get(bool enableHold = false) const { + return t_.Type().EvaluateOneOf( + [&](KeyValueType::Bool) noexcept { return Variant(*reinterpret_cast(p_)); }, + [&](KeyValueType::Int) noexcept { return Variant(*reinterpret_cast(p_)); }, + [&](KeyValueType::Int64) noexcept { return Variant(*reinterpret_cast(p_)); }, + [&](KeyValueType::Double) noexcept { return Variant(*reinterpret_cast(p_)); }, + [&](KeyValueType::String) { return Variant(*reinterpret_cast(p_), enableHold); }, + [&](KeyValueType::Uuid) noexcept { return Variant(*reinterpret_cast(p_)); }, + [](OneOf) noexcept -> Variant { + assertrx(0); + abort(); + }); + } + size_t Hash() const noexcept { + return t_.Type().EvaluateOneOf( + [&](KeyValueType::Bool) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [&](KeyValueType::Int) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [&](KeyValueType::Int64) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [&](KeyValueType::Double) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [&](KeyValueType::String) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [&](KeyValueType::Uuid) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [](OneOf) noexcept -> size_t { + assertrx(0); + abort(); + }); + } + bool IsEQ(const PayloadFieldValue &o) const { + if (!t_.Type().IsSame(o.t_.Type())) return false; + return t_.Type().EvaluateOneOf( + [&](KeyValueType::Bool) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, + [&](KeyValueType::Int) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, + [&](KeyValueType::Int64) noexcept { + return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); + }, + [&](KeyValueType::Double) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, + [&](KeyValueType::String) { + return collateCompare(*reinterpret_cast(p_), *reinterpret_cast(o.p_), + SortingPrioritiesTable()) == 0; + }, + [&](KeyValueType::Uuid) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, + [](OneOf) noexcept -> bool { + assertrx(0); + abort(); + }); + } // Type of value, not owning const PayloadFieldType &t_; // Value data, not owning uint8_t *p_; + +private: + [[noreturn]] void throwSetTypeMissmatch(const Variant &kv); }; } // namespace reindexer diff --git a/cpp_src/core/payload/payloadiface.cc b/cpp_src/core/payload/payloadiface.cc index 7a2b82422..6fe4f9e8a 100644 --- a/cpp_src/core/payload/payloadiface.cc +++ b/cpp_src/core/payload/payloadiface.cc @@ -105,7 +105,7 @@ VariantArray PayloadIface::GetIndexedArrayData(const IndexedTagsPath &tagsPat if (tagsPath.empty()) { throw Error(errParams, "GetIndexedArrayData(): tagsPath shouldn't be empty!"); } - if (field < 0 || field >= maxIndexes) { + if (field < 0 || field >= kMaxIndexes) { throw Error(errParams, "GetIndexedArrayData(): field must be a valid index number"); } VariantArray values; @@ -121,37 +121,6 @@ VariantArray PayloadIface::GetIndexedArrayData(const IndexedTagsPath &tagsPat return values; } -// Set element or array by field index -template -template ::value>::type *> -void PayloadIface::Set(std::string_view field, const VariantArray &keys, bool append) { - return Set(t_.FieldByName(field), keys, append); -} - -template -template ::value>::type *> -void PayloadIface::Set(int field, const VariantArray &keys, bool append) { - const auto size = keys.size(); - if (!t_.Field(field).IsArray() && size >= 1) { - Field(field).Set(keys[0]); - return; - } - - if (keys.IsNullValue()) { - ResizeArray(field, 0, append); - return; - } - - int pos = ResizeArray(field, size, append); - auto const *const arr = reinterpret_cast(Field(field).p_); - const auto elemSize = t_.Field(field).ElemSizeof(); - - for (const Variant &kv : keys) { - PayloadFieldValue pv(t_.Field(field), v_->Ptr() + arr->offset + (pos++) * elemSize); - pv.Set(kv); - } -} - template template ::value>::type *> void PayloadIface::SetSingleElement(int field, const Variant &key) { @@ -222,11 +191,6 @@ size_t PayloadIface::RealSize() const { return sz; } -template -PayloadFieldValue PayloadIface::Field(int field) const noexcept { - return PayloadFieldValue(t_.Field(field), v_->Ptr() + t_.Field(field).Offset()); -} - // Serialize field values template void PayloadIface::SerializeFields(WrSerializer &ser, const FieldsSet &fields) const { @@ -491,6 +455,24 @@ void PayloadIface::copyOrMoveStrings(int field, StrHolder &dest, bool copy) { } } +template +template ::value>::type *> +void PayloadIface::setArray(int field, const VariantArray &keys, bool append) { + if (keys.IsNullValue()) { + ResizeArray(field, 0, append); + return; + } + + int pos = ResizeArray(field, keys.size(), append); + auto const *const arr = reinterpret_cast(Field(field).p_); + const auto elemSize = t_.Field(field).ElemSizeof(); + + for (const Variant &kv : keys) { + PayloadFieldValue pv(t_.Field(field), v_->Ptr() + arr->offset + (pos++) * elemSize); + pv.Set(kv); + } +} + template void PayloadIface::MoveStrings(int field, StringsHolder &dest) { copyOrMoveStrings(field, dest, false); diff --git a/cpp_src/core/payload/payloadiface.h b/cpp_src/core/payload/payloadiface.h index 313043bd5..39569e4c3 100644 --- a/cpp_src/core/payload/payloadiface.h +++ b/cpp_src/core/payload/payloadiface.h @@ -54,7 +54,29 @@ class PayloadIface { // Set element or array by field index template ::value>::type * = nullptr> - void Set(int field, const VariantArray &keys, bool append = false); + void Set(int field, const VariantArray &keys, bool append = false) { + if (!t_.Field(field).IsArray() && keys.size() >= 1) { + Field(field).Set(keys[0]); + } else { + setArray(field, keys, append); + } + } + template ::value>::type * = nullptr> + void Set(int field, const Variant &key, bool append = false) { + if (t_.Field(field).IsArray()) { + Set(field, VariantArray{key}, append); + return; + } + Field(field).Set(key); + } + template ::value>::type * = nullptr> + void Set(int field, Variant &&key, bool append = false) { + if (t_.Field(field).IsArray()) { + Set(field, VariantArray{std::move(key)}, append); + return; + } + Field(field).Set(std::move(key)); + } // Set non-array element by field index template ::value>::type * = nullptr> @@ -62,7 +84,17 @@ class PayloadIface { // Set element or array by field index template ::value>::type * = nullptr> - void Set(std::string_view field, const VariantArray &keys, bool append = false); + void Set(std::string_view field, const VariantArray &keys, bool append = false) { + return Set(t_.FieldByName(field), keys, append); + } + template ::value>::type * = nullptr> + void Set(std::string_view field, const Variant &key, bool append = false) { + return Set(t_.FieldByName(field), key, append); + } + template ::value>::type * = nullptr> + void Set(std::string_view field, Variant &&key, bool append = false) { + return Set(t_.FieldByName(field), std::move(key), append); + } // Set element or array by field index and element index template ::value>::type * = nullptr> @@ -110,7 +142,7 @@ class PayloadIface { const h_vector &collateOpts) const; // Get PayloadFieldValue by field index - PayloadFieldValue Field(int field) const noexcept; + PayloadFieldValue Field(int field) const noexcept { return PayloadFieldValue(t_.Field(field), v_->Ptr() + t_.Field(field).Offset()); } // Add refs to strings - make payload value complete self holding void AddRefStrings() noexcept; @@ -135,6 +167,8 @@ class PayloadIface { T CopyWithRemovedFields(PayloadType t); template void copyOrMoveStrings(int field, StrHolder &dest, bool copy); + template ::value>::type * = nullptr> + void setArray(int field, const VariantArray &keys, bool append); // Array of elements types , not owning const PayloadTypeImpl &t_; diff --git a/cpp_src/core/query/sql/sqlencoder.cc b/cpp_src/core/query/sql/sqlencoder.cc index 996dfb347..4ed0e0bd5 100644 --- a/cpp_src/core/query/sql/sqlencoder.cc +++ b/cpp_src/core/query/sql/sqlencoder.cc @@ -261,7 +261,7 @@ WrSerializer &SQLEncoder::GetSQL(WrSerializer &ser, bool stripArgs) const { return ser; } -const char *opNames[] = {"-", "OR", "AND", "AND NOT"}; +static const char *opNames[] = {"-", "OR", "AND", "AND NOT"}; void SQLEncoder::dumpWhereEntries(QueryEntries::const_iterator from, QueryEntries::const_iterator to, WrSerializer &ser, bool stripArgs) const { diff --git a/cpp_src/core/query/sql/sqlsuggester.cc b/cpp_src/core/query/sql/sqlsuggester.cc index c1a623eb1..c8b2cc8da 100644 --- a/cpp_src/core/query/sql/sqlsuggester.cc +++ b/cpp_src/core/query/sql/sqlsuggester.cc @@ -90,7 +90,8 @@ void SQLSuggester::getMatchingFieldsNames(const std::string &token, std::vector< auto dotPos = token.find('.'); for (auto &idx : namespaces[0].indexes) { if (idx.name_ == "#pk" || idx.name_ == "-tuple") continue; - if (isBlank(token) || checkIfStartsWith(token, idx.name_, dotPos != std::string::npos)) { + if (isBlank(token) || (dotPos != std::string::npos ? checkIfStartsWith(token, idx.name_) + : checkIfStartsWith(token, idx.name_))) { if (dotPos == std::string::npos) { variants.push_back(idx.name_); } else { diff --git a/cpp_src/core/querycache.h b/cpp_src/core/querycache.h index 15b9204c5..4d98bd1d2 100644 --- a/cpp_src/core/querycache.h +++ b/cpp_src/core/querycache.h @@ -19,13 +19,10 @@ struct QueryTotalCountCacheVal { struct QueryCacheKey { QueryCacheKey() = default; - QueryCacheKey(const QueryCacheKey& other) : buf(other.buf) {} - QueryCacheKey& operator=(const QueryCacheKey& other) { - if (this != &other) { - buf = other.buf; - } - return *this; - } + QueryCacheKey(QueryCacheKey&& other) = default; + QueryCacheKey(const QueryCacheKey& other) = default; + QueryCacheKey& operator=(QueryCacheKey&& other) = default; + QueryCacheKey& operator=(const QueryCacheKey& other) = delete; QueryCacheKey(const Query& q) { WrSerializer ser; q.Serialize(ser, (SkipJoinQueries | SkipMergeQueries | SkipLimitOffset)); @@ -39,13 +36,13 @@ struct QueryCacheKey { }; struct EqQueryCacheKey { - bool operator()(const QueryCacheKey& lhs, const QueryCacheKey& rhs) const { + bool operator()(const QueryCacheKey& lhs, const QueryCacheKey& rhs) const noexcept { return (lhs.buf.size() == rhs.buf.size()) && (memcmp(lhs.buf.data(), rhs.buf.data(), lhs.buf.size()) == 0); } }; struct HashQueryCacheKey { - size_t operator()(const QueryCacheKey& q) const { + size_t operator()(const QueryCacheKey& q) const noexcept { uint64_t hash[2]; MurmurHash3_x64_128(q.buf.data(), q.buf.size(), 0, &hash); return hash[0]; diff --git a/cpp_src/core/queryresults/queryresults.h b/cpp_src/core/queryresults/queryresults.h index 0e46e83e5..74b3ffe9a 100644 --- a/cpp_src/core/queryresults/queryresults.h +++ b/cpp_src/core/queryresults/queryresults.h @@ -100,7 +100,7 @@ class QueryResults { struct Context; // precalc context size - static constexpr int kSizeofContext = 208; // sizeof(PayloadType) + sizeof(TagsMatcher) + sizeof(FieldsSet) + sizeof(shared_ptr); + static constexpr int kSizeofContext = 264; // sizeof(PayloadType) + sizeof(TagsMatcher) + sizeof(FieldsSet) + sizeof(shared_ptr); // Order of storing contexts for namespaces: // [0] - main NS context diff --git a/cpp_src/core/querystat.h b/cpp_src/core/querystat.h index de3bbfdec..4d80138c2 100644 --- a/cpp_src/core/querystat.h +++ b/cpp_src/core/querystat.h @@ -1,6 +1,7 @@ #pragma once #include +#include "core/nsselecter/explaincalc.h" #include "estl/fast_hash_map.h" #include "namespace/namespacestat.h" #include "perfstatcounter.h" @@ -57,7 +58,7 @@ class QueryStatCalculator { if (enable_) tmStart = std::chrono::high_resolution_clock::now(); } - QueryStatCalculator(Logger logger) : enable_(true), logger_(std::move(logger)) { + QueryStatCalculator(Logger logger, bool enable = true) : enable_(enable), logger_(std::move(logger)) { if (enable_) tmStart = std::chrono::high_resolution_clock::now(); } ~QueryStatCalculator() { @@ -77,12 +78,57 @@ class QueryStatCalculator { } } + template + auto LogDuration(Type& var, Method method, Args&&... args) { + return exec([&var, &method](Args&&... aa) { return (var.*method)(std::forward(aa)...); }, std::forward(args)...); + } + + template + auto LogFlushDuration(Type& var, Method method, Args&&... args) { + return LogDuration(var, method, std::forward(args)...); + } + + template + auto CreateLock(Type& var, Method method, Args&&... args) { + return LogDuration(args)...))::MutexType::mark)>( + var, method, std::forward(args)...); + } + + template