Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
Intermediate state.
Browse files Browse the repository at this point in the history
  • Loading branch information
maierlars committed Dec 6, 2023
1 parent 5d29df1 commit 044738a
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 3 deletions.
7 changes: 5 additions & 2 deletions core/analysis/multi_delimited_token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,13 @@ class multi_delimited_token_stream_generic final
nfa.Properties(EXPECTED_NFA_PROPERTIES, true));
#endif

fst::drawFst(nfa, std::cout);
automaton dfa;
fst::DeterminizeStar(nfa, &dfa);
std::cout << "number of states (dfa) = " << nfa.NumStates() << std::endl;

fst::Minimize(&dfa);
fst::drawFst(dfa, std::cout);

std::cout << "number of states = " << dfa.NumStates() << std::endl;

Expand Down Expand Up @@ -325,6 +327,8 @@ irs::analysis::analyzer::ptr make_single_char(

irs::analysis::analyzer::ptr make(
multi_delimited_token_stream::options&& opts) {
return std::make_unique<multi_delimited_token_stream_generic>(
std::move(opts));
const bool single_character_case =
std::all_of(opts.delimiters.begin(), opts.delimiters.end(),
[](const auto& delim) { return delim.size() == 1; });
Expand All @@ -334,8 +338,7 @@ irs::analysis::analyzer::ptr make(
return std::make_unique<multi_delimited_token_stream_single>(
std::move(opts));
} else {
return std::make_unique<multi_delimited_token_stream_generic>(
std::move(opts));

}
}

Expand Down
9 changes: 9 additions & 0 deletions external/kaldi/src/fstext/determinize-star-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -583,10 +583,19 @@ template<class F> class DeterminizerStar {
closed_subset_.clear();
fsa::RangeLabel label;

std::cout << "ALL ELEMS SIZE = " << all_elems_.size() << std::endl;
std::vector<bool> brackets;
for (auto& e : all_elems_) {
const auto& bound = e.bound;
const bool is_max = e.IsMax();

if (!is_max) {
brackets.push_back(true);
} else {
assert(!brackets.empty());
brackets.pop_back();
}

if (!is_max) {
if (label.ilabel != fst::kNoLabel && label.min != bound) {
label.max = bound - 1;
Expand Down
2 changes: 1 addition & 1 deletion tests/analysis/multi_delimited_token_stream_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ TEST_F(multi_delimited_token_stream_tests, no_delimiter) {
TEST_F(multi_delimited_token_stream_tests, multi_words) {
auto stream = irs::analysis::multi_delimited_token_stream::make(
//{.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
{.delimiters = {"fab1"_b, "goo2"_b, "puh3"_b}});
{.delimiters = {"f"_b, "g"_b, "h"_b, "j"_b}});
ASSERT_EQ(irs::type<irs::analysis::multi_delimited_token_stream>::id(),
stream->type());

Expand Down

0 comments on commit 044738a

Please sign in to comment.