diff --git a/core/analysis/multi_delimited_token_stream.cpp b/core/analysis/multi_delimited_token_stream.cpp index 6628e0dfa..79eb0d2b2 100644 --- a/core/analysis/multi_delimited_token_stream.cpp +++ b/core/analysis/multi_delimited_token_stream.cpp @@ -257,11 +257,13 @@ class multi_delimited_token_stream_generic final nfa.Properties(EXPECTED_NFA_PROPERTIES, true)); #endif + fst::drawFst(nfa, std::cout); automaton dfa; fst::DeterminizeStar(nfa, &dfa); std::cout << "number of states (dfa) = " << nfa.NumStates() << std::endl; fst::Minimize(&dfa); + fst::drawFst(dfa, std::cout); std::cout << "number of states = " << dfa.NumStates() << std::endl; @@ -325,6 +327,8 @@ irs::analysis::analyzer::ptr make_single_char( irs::analysis::analyzer::ptr make( multi_delimited_token_stream::options&& opts) { + return std::make_unique( + std::move(opts)); const bool single_character_case = std::all_of(opts.delimiters.begin(), opts.delimiters.end(), [](const auto& delim) { return delim.size() == 1; }); @@ -334,8 +338,7 @@ irs::analysis::analyzer::ptr make( return std::make_unique( std::move(opts)); } else { - return std::make_unique( - std::move(opts)); + } } diff --git a/external/kaldi/src/fstext/determinize-star-inl.h b/external/kaldi/src/fstext/determinize-star-inl.h index 5e489ca1c..a15eba275 100644 --- a/external/kaldi/src/fstext/determinize-star-inl.h +++ b/external/kaldi/src/fstext/determinize-star-inl.h @@ -583,10 +583,19 @@ template class DeterminizerStar { closed_subset_.clear(); fsa::RangeLabel label; + std::cout << "ALL ELEMS SIZE = " << all_elems_.size() << std::endl; + std::vector brackets; for (auto& e : all_elems_) { const auto& bound = e.bound; const bool is_max = e.IsMax(); + if (!is_max) { + brackets.push_back(true); + } else { + assert(!brackets.empty()); + brackets.pop_back(); + } + if (!is_max) { if (label.ilabel != fst::kNoLabel && label.min != bound) { label.max = bound - 1; diff --git a/tests/analysis/multi_delimited_token_stream_tests.cpp b/tests/analysis/multi_delimited_token_stream_tests.cpp index 60e9a7d1f..1355416ca 100644 --- a/tests/analysis/multi_delimited_token_stream_tests.cpp +++ b/tests/analysis/multi_delimited_token_stream_tests.cpp @@ -155,7 +155,7 @@ TEST_F(multi_delimited_token_stream_tests, no_delimiter) { TEST_F(multi_delimited_token_stream_tests, multi_words) { auto stream = irs::analysis::multi_delimited_token_stream::make( //{.delimiters = {"foo"_b, "bar"_b, "baz"_b}}); - {.delimiters = {"fab1"_b, "goo2"_b, "puh3"_b}}); + {.delimiters = {"f"_b, "g"_b, "h"_b, "j"_b}}); ASSERT_EQ(irs::type::id(), stream->type());