Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
fix name + tests (#587)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dronplane committed Mar 20, 2024
1 parent be29e85 commit 932b08b
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 68 deletions.
60 changes: 28 additions & 32 deletions core/analysis/multi_delimited_token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ irs::analysis::analyzer::ptr Make(MultiDelimitedAnalyser::Options&& opts) {
return std::make_unique<MultiDelimitedTokenStreamGeneric>(std::move(opts));
}

constexpr std::string_view kDelimiterParamName{"delimiter"};
constexpr std::string_view kDelimiterParamName{"delimiters"};

bool ParseVpackOptions(VPackSlice slice,
MultiDelimitedAnalyser::Options& options) {
Expand All @@ -418,46 +418,42 @@ bool ParseVpackOptions(VPackSlice slice,
"Slice for multi_delimited_token_stream is not an object or string");
return false;
}
auto delim_array_slice = slice.get(kDelimiterParamName);
if (!delim_array_slice.isArray()) {
IRS_LOG_WARN(
absl::StrCat("Invalid type or missing '", kDelimiterParamName,
"' (array expected) for multi_delimited_token_stream from "
"VPack arguments"));
return false;
}

if (auto delim_array_slice = slice.get(kDelimiterParamName);
!delim_array_slice.isNone()) {
if (!delim_array_slice.isArray()) {
IRS_LOG_WARN(
absl::StrCat("Invalid type '", kDelimiterParamName,
"' (array expected) for multi_delimited_token_stream from "
"VPack arguments"));
for (auto delim : VPackArrayIterator(delim_array_slice)) {
if (!delim.isString()) {
IRS_LOG_WARN(absl::StrCat(
"Invalid type in '", kDelimiterParamName,
"' (string expected) for multi_delimited_token_stream from "
"VPack arguments"));
return false;
}
auto view = ViewCast<byte_type>(delim.stringView());

for (auto delim : VPackArrayIterator(delim_array_slice)) {
if (!delim.isString()) {
IRS_LOG_WARN(absl::StrCat(
"Invalid type in '", kDelimiterParamName,
"' (string expected) for multi_delimited_token_stream from "
"VPack arguments"));
return false;
}
auto view = ViewCast<byte_type>(delim.stringView());
if (view.empty()) {
IRS_LOG_ERROR("Delimiter list contains an empty string.");
return false;
}

if (view.empty()) {
IRS_LOG_ERROR("Delimiter list contains an empty string.");
for (const auto& known : options.delimiters) {
if (view.starts_with(known) || known.starts_with(view)) {
IRS_LOG_ERROR(
absl::StrCat("Some delimiters are a prefix of others. See `",
ViewCast<char>(bytes_view{known}), "` and `",
delim.stringView(), "`"));
return false;
}

for (const auto& known : options.delimiters) {
if (view.starts_with(known) || known.starts_with(view)) {
IRS_LOG_ERROR(
absl::StrCat("Some delimiters are a prefix of others. See `",
ViewCast<char>(bytes_view{known}), "` and `",
delim.stringView(), "`"));
return false;
}
}

options.delimiters.emplace_back(view);
}
}

options.delimiters.emplace_back(view);
}
return true;
}

Expand Down
121 changes: 85 additions & 36 deletions tests/analysis/multi_delimited_token_stream_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
#include "analysis/multi_delimited_token_stream.hpp"
#include "gtest/gtest.h"
#include "tests_config.hpp"
#include "velocypack/Parser.h"

using namespace arangodb::velocypack;
using namespace irs::analysis;

namespace {

Expand All @@ -30,6 +34,9 @@ irs::bstring operator""_b(const char* ptr, std::size_t size) {
}

class multi_delimited_token_stream_tests : public ::testing::Test {
public:
static void SetUpTestCase() { MultiDelimitedAnalyser::init(); }

virtual void SetUp() {
// Code here will be called immediately after the constructor (right before
// each test).
Expand All @@ -48,15 +55,12 @@ class multi_delimited_token_stream_tests : public ::testing::Test {
// -----------------------------------------------------------------------------

TEST_F(multi_delimited_token_stream_tests, consts) {
static_assert("multi_delimiter" ==
irs::type<irs::analysis::MultiDelimitedAnalyser>::name());
static_assert("multi_delimiter" == irs::type<MultiDelimitedAnalyser>::name());
}

TEST_F(multi_delimited_token_stream_tests, test_delimiter) {
auto stream =
irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {"a"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {"a"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("baccaad"));

Expand All @@ -83,10 +87,8 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter) {
}

TEST_F(multi_delimited_token_stream_tests, test_delimiter_empty_match) {
auto stream =
irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {"."_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {"."_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset(".."));

Expand All @@ -97,10 +99,9 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_empty_match) {
}

TEST_F(multi_delimited_token_stream_tests, test_delimiter_3) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
{.delimiters = {";"_b, ","_b, "|"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream =
MultiDelimitedAnalyser::Make({.delimiters = {";"_b, ","_b, "|"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("a;b||c|d,ff"));

Expand Down Expand Up @@ -133,10 +134,9 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_3) {
}

TEST_F(multi_delimited_token_stream_tests, test_delimiter_5) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
auto stream = MultiDelimitedAnalyser::Make(
{.delimiters = {";"_b, ","_b, "|"_b, "."_b, ":"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("a:b||c.d,ff."));

Expand Down Expand Up @@ -169,10 +169,8 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_5) {
}

TEST_F(multi_delimited_token_stream_tests, test_delimiter_single_long) {
auto stream =
irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("foobarfoobazbarfoobar"));

Expand All @@ -197,9 +195,8 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_single_long) {
}

TEST_F(multi_delimited_token_stream_tests, no_delimiter) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("foobar"));

Expand All @@ -216,10 +213,9 @@ TEST_F(multi_delimited_token_stream_tests, no_delimiter) {
}

TEST_F(multi_delimited_token_stream_tests, multi_words) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
{.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream =
MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("fooxyzbarbazz"));

Expand All @@ -240,10 +236,9 @@ TEST_F(multi_delimited_token_stream_tests, multi_words) {
}

TEST_F(multi_delimited_token_stream_tests, multi_words_2) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
{.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream =
MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("foobarbaz"));

Expand All @@ -254,10 +249,9 @@ TEST_F(multi_delimited_token_stream_tests, multi_words_2) {
}

TEST_F(multi_delimited_token_stream_tests, trick_matching_1) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
{.delimiters = {"foo"_b, "ffa"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream =
MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b, "ffa"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("abcffoobar"));

Expand All @@ -276,3 +270,58 @@ TEST_F(multi_delimited_token_stream_tests, trick_matching_1) {
ASSERT_EQ(offset->end, 10);
ASSERT_FALSE(stream->next());
}

TEST_F(multi_delimited_token_stream_tests, construct) {
// wrong name
{
auto builder = Parser::fromJson(R"({"delimiter":["a", "b"]})");
std::string in_str;
in_str.assign(builder->slice().startAs<char>(),
builder->slice().byteSize());
auto stream = analyzers::get(
"multi_delimiter", irs::type<irs::text_format::vpack>::get(), in_str);
ASSERT_EQ(nullptr, stream);
}

// wrong type
{
auto builder = Parser::fromJson(R"({"delimiters":1})");
std::string in_str;
in_str.assign(builder->slice().startAs<char>(),
builder->slice().byteSize());
auto stream = analyzers::get(
"multi_delimiter", irs::type<irs::text_format::vpack>::get(), in_str);
ASSERT_EQ(nullptr, stream);
}

{
auto builder = Parser::fromJson(R"({"delimiters":["a", "b"]})");
std::string in_str;
in_str.assign(builder->slice().startAs<char>(),
builder->slice().byteSize());
auto stream = analyzers::get(
"multi_delimiter", irs::type<irs::text_format::vpack>::get(), in_str);
ASSERT_NE(nullptr, stream);
ASSERT_TRUE(stream->reset("aib"));
ASSERT_TRUE(stream->next());
auto* term = irs::get<irs::term_attribute>(*stream);
ASSERT_EQ("i", irs::ViewCast<char>(term->value));
ASSERT_FALSE(stream->next());
}
{
auto builder = Parser::fromJson(R"({"delimiters":["a", "b", "c", "d"]})");
std::string in_str;
in_str.assign(builder->slice().startAs<char>(),
builder->slice().byteSize());
std::string actual;
auto stream =
analyzers::normalize(actual, "multi_delimiter",
irs::type<irs::text_format::vpack>::get(), in_str);

auto slice = Slice(reinterpret_cast<uint8_t*>(actual.data()));
ASSERT_TRUE(slice.isObject());
auto delimiters = slice.get("delimiters");
ASSERT_TRUE(delimiters.isArray());
ASSERT_EQ(4, delimiters.length());
}
}

0 comments on commit 932b08b

Please sign in to comment.