forked from rarten/ooz
-
Notifications
You must be signed in to change notification settings - Fork 8
/
path_rep.cpp
132 lines (124 loc) · 3.47 KB
/
path_rep.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#include "path_rep.h"
#include "util.h"
/* The blocks in the tail section of the index bundle are generated through an
* alternating-phase algorithm where in the first phase a set of strings are built
* to serve as bases for the second phase where they are combined with suffixes
* to form the full outputs of the algorithm.
*
* u32 words control the generation and strings are null-terminated UTF-8.
* A zero word toggles the phase, input starts with a 0 to indicate the base phase.
*
* In the base phase a non-zero word inserts a new string into the base set or
* inserts a the concatenation of the base string and the following input.
*
* In the generation phase a non-zero word references a string in the base set
* and outputs the concatenation of the base string and the following input.
*
* The index words are one-based where 1 refers to the first string.
*
* A zero can toggle back to the generation phase in which all base strings shall
* be removed as if starting from scratch.
*
* The template section can be empty (two zero words after each other), typically
* done to emit a single string as-is.
*/
std::vector<std::string> generate_paths(void const* spec_data, size_t spec_size) {
reader r(spec_data, spec_size);
bool base_phase = false;
std::vector<std::string> bases;
std::vector<std::string> results;
while (r.n_) {
uint32_t cmd;
if (!r.read(cmd)) {
abort();
}
if (cmd == 0) {
base_phase = !base_phase;
if (base_phase) {
bases.clear();
}
}
if (cmd != 0) {
std::string fragment;
if (!r.read(fragment)) {
abort();
}
// the input is one-indexed
size_t index = cmd - 1;
if (index < bases.size()) {
// Back-ref to existing string, concatenate and insert/emit.
std::string full = bases[index] + fragment;
if (base_phase) {
bases.push_back(full);
}
else {
results.push_back(full);
}
}
else {
// New string, insert or emit as-is.
if (base_phase) {
bases.push_back(fragment);
}
else {
results.push_back(fragment);
}
}
}
}
return results;
}
void explain_paths(void const* spec_data, size_t spec_size) {
reader r(spec_data, spec_size);
bool base_phase = false;
std::vector<std::string> bases;
std::vector<std::string> results;
while (r.n_) {
uint32_t cmd;
if (!r.read(cmd)) {
abort();
}
fprintf(stderr, "Command %08u\n", cmd);
if (cmd == 0) {
base_phase = !base_phase;
if (base_phase) {
fprintf(stderr, "Entering template phase\n");
bases.clear();
}
else {
fprintf(stderr, "Entering generation phase\n");
}
}
if (cmd != 0) {
std::string fragment;
if (!r.read(fragment)) {
abort();
}
// the input is one-indexed
size_t index = cmd - 1;
if (index < bases.size()) {
// Back-ref to existing string, concatenate and insert/emit.
std::string full = bases[index] + fragment;
if (base_phase) {
bases.push_back(full);
fprintf(stderr, "Add new reference %zu \"%s\" + \"%s\"\n", bases.size(), bases[index].c_str(), fragment.c_str());
}
else {
results.push_back(full);
fprintf(stderr, "Generating string \"%s\" + \"%s\"\n", bases[index].c_str(), fragment.c_str());
}
}
else {
// New string, insert or emit as-is.
if (base_phase) {
bases.push_back(fragment);
fprintf(stderr, "Add new reference %zu \"%s\"\n", bases.size(), fragment.c_str());
}
else {
results.push_back(fragment);
fprintf(stderr, "Generate string \"%s\"\n", fragment.c_str());
}
}
}
}
}