Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
Clean up.
Browse files Browse the repository at this point in the history
  • Loading branch information
maierlars committed Dec 7, 2023
1 parent a9e1bec commit 257689c
Showing 1 changed file with 0 additions and 80 deletions.
80 changes: 0 additions & 80 deletions core/analysis/multi_delimited_token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,86 +154,6 @@ class multi_delimited_token_stream_generic_single_chars final
std::array<bool, CHAR_MAX + 1> bytes_;
};

// TODO move to automaton_utils
automaton make_strings(const std::vector<bstring>& strings) {
// if we find a character c that we don't expect, we have to find
// the longest prefix of `str` that is a suffix of the already matched text
// including c. then go to that state.
automaton a;
a.SetStart(a.AddState());
a.SetFinal(a.AddState(), true);

for (const auto& str : strings) {
std::unordered_multimap<byte_type, int> positions;

for (int i = 0; i < str.length(); i++) {
positions.emplace(str[i], i);
}

auto first_state = a.NumStates();
a.AddStates(str.length());

for (int i = 0; i < str.length(); i++) {
auto expected = str[i];
int last_no_match = -1;

auto current_state = int32_t{i + first_state};

for (int c = 1; c <= UCHAR_MAX; c++) {
if (c == expected) {
// add reset edges
if (last_no_match != -1) {
a.EmplaceArc(current_state,
range_label::fromRange(last_no_match, c - 1), 0);
last_no_match = -1;
}
// add forward edge
a.EmplaceArc(current_state, range_label::fromRange(c),
i == str.length() - 1 ? 1 : (current_state + 1));

} else if (auto iter = positions.find(c); iter != positions.end()) {
// add reset edges
if (last_no_match != -1) {
a.EmplaceArc(current_state,
range_label::fromRange(last_no_match, c - 1), 0);
last_no_match = -1;
}

// find the biggest prefix of `str`
// TODO pull this out of the loop
bstring matched;
matched.reserve(i + 1);
matched.assign(str.begin(), str.begin() + i);
matched.push_back(c);

size_t best = 0;
while (iter != positions.end() && iter->first == c) {
auto view = bytes_view{str.begin(), str.begin() + iter->second};
if (matched.ends_with(view) && iter->second > best) {
best = iter->second;
}
++iter;
}

a.EmplaceArc(current_state, range_label::fromRange(c),
first_state + best);

} else if (last_no_match == -1) {
last_no_match = c;
}
}

if (last_no_match != -1) {
a.EmplaceArc(current_state,
range_label::fromRange(last_no_match, UCHAR_MAX), 0);
last_no_match = -1;
}
}
a.EmplaceArc(0, range_label::fromRange(0), first_state);
}
return a;
}

struct TrieNode {
explicit TrieNode(int32_t stateId, int32_t depth)
: state_id(stateId), depth(depth) {}
Expand Down

0 comments on commit 257689c

Please sign in to comment.