-
Notifications
You must be signed in to change notification settings - Fork 29
/
Alphabet.h
197 lines (170 loc) · 4.05 KB
/
Alphabet.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#ifndef _ALPHABET_
#define _ALPHABET_
#include "MyLib.h"
#include "Hash_map.hpp"
#include "IO.h"
/*
This class serializes feature from string to int.
Index starts from 0.
*/
/**
* The basic class of quark class.
* @param std::string String class name to be used.
* @param int ID class name to be used.
* @author Naoaki Okazaki
*/
class basic_quark {
protected:
typedef hash_map<std::string, int> StringToId;
typedef std::vector<std::string> IdToString;
StringToId m_string_to_id;
IdToString m_id_to_string;
bool m_b_fixed;
int m_size;
public:
/**
* Construct.
*/
basic_quark()
{
clear();
}
/**
* Destruct.
*/
virtual ~basic_quark()
{
}
/**
* Map a string to its associated ID.
* If string-to-integer association does not exist, allocate a new ID.
* @param str String value.
* @return Associated ID for the string value.
*/
int operator[](const std::string& str)
{
typename StringToId::const_iterator it = m_string_to_id.find(str);
if (it != m_string_to_id.end()) {
return it->second;
} else if (!m_b_fixed){
int newid = m_size;
m_id_to_string.push_back(str);
m_string_to_id.insert(std::pair<std::string, int>(str, newid));
m_size++;
return newid;
}
else
{
return -1;
}
}
/**
* Convert ID value into the associated string value.
* @param qid ID.
* @param def Default value if the ID was out of range.
* @return String value associated with the ID.
*/
const std::string& from_id(const int& qid, const std::string& def = "") const
{
if (qid < 0 || m_size <= qid) {
return def;
} else {
return m_id_to_string[qid];
}
}
/**
* Convert string value into the associated ID value.
* @param str String value.
* @return ID if any, otherwise -1.
*/
int from_string(const std::string& str)
{
typename StringToId::const_iterator it = m_string_to_id.find(str);
if (it != m_string_to_id.end()) {
return it->second;
} else if (!m_b_fixed){
int newid = m_size;
m_id_to_string.push_back(str);
m_string_to_id.insert(std::pair<std::string, int>(str, newid));
m_size++;
return newid;
}
else
{
return -1;
}
}
void clear()
{
m_string_to_id.clear();
m_id_to_string.clear();
m_b_fixed = false;
m_size = 0;
}
void set_fixed_flag(bool bfixed)
{
m_b_fixed = bfixed;
}
/**
* Get the number of string-to-id associations.
* @return The number of association.
*/
size_t size() const
{
return m_size;
}
void read(std::ifstream &inf)
{
clear();
static string tmp;
my_getline(inf, tmp);
chomp(tmp);
m_size = atoi(tmp.c_str());
std::vector<std::string> featids;
for (int i = 0; i < m_size; ++i) {
my_getline(inf, tmp);
split_bychars(tmp, featids);
m_string_to_id[featids[0]] = i;
assert(atoi(featids[1].c_str()) == i);
}
}
void write(std::ofstream &outf) const
{
outf << m_size << std::endl;
for (int i=0; i<m_size; i++)
{
outf << m_id_to_string[i] << i << std::endl;
}
}
void loadModel(LStream &inf)
{
clear();
string tmp_string;
int ID;
ReadBinary(inf, m_size);
ReadBinary(inf, m_b_fixed);
for (int i=0; i<m_size; i++)
{
ReadString(inf, tmp_string);
ReadBinary(inf, ID);
m_string_to_id[tmp_string] = i;
m_id_to_string.push_back(tmp_string);
// cout << tmp_string << " is " << ID << " and " << i << std::endl;
// cout << m_id_to_string[i] << " is " << ID << " and " << i << std::endl;
assert(ID == i);
}
}
void writeModel(LStream &outf) const
{
WriteBinary(outf, m_size);
WriteBinary(outf, m_b_fixed);
for (int i=0; i<m_size; i++)
{
// cout << m_id_to_string[i] << " is " << i << std::endl;
WriteString(outf, m_id_to_string[i]);
WriteBinary(outf, i);
}
}
};
typedef basic_quark Alphabet;
#endif