-
Notifications
You must be signed in to change notification settings - Fork 56
/
Copy pathlaia-netout
executable file
·280 lines (255 loc) · 9.48 KB
/
laia-netout
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
#!/usr/bin/env th
require 'laia'
local batcher = laia.RandomBatcher()
local parser = laia.argparse(){
name = 'laia-netout',
description = 'Computes network outputs in diverse formats for a given' ..
'model and image list.'
}
-- Register laia.Version options
laia.Version():registerOptions(parser)
-- Register laia.log options.
laia.log.registerOptions(parser)
-- Register cudnn options, only if available.
if cudnn then cudnn.registerOptions(parser, true) end
-- Register batcher options.
batcher:registerOptions(parser)
parser:argument('checkpoint', 'File containing the trained checkpoint/model.')
parser:argument('image_list', 'File containing the list of images to decode.')
parser:argument('output', 'Output file.')
parser:option(
'--seed -s', 'Seed for random numbers generation.',
0, laia.toint)
parser:option(
'--gpu', 'If gpu>0, uses the specified GPU, otherwise uses the CPU.',
1, laia.toint)
parser:option(
'--auto_width_factor', 'If true, sets the width factor for the batchers ' ..
'automatically, from the size of the pooling layers.',
false, laia.toboolean)
:argname('<bool>')
parser:option(
'--batch_size -b', 'Batch size', 16, laia.toint)
:ge(1)
parser:option(
'--output_format', 'Format of the output file. Use "matrix" to get a ' ..
' Kaldi\'s archive of matrices (one for each sample), where each row is a ' ..
'timestep and each column represents a label; use "lattice" to get a ' ..
'Kaldi\'s archive of CompactLattices.', 'matrix',
{matrix = 'matrix', lattice = 'lattice', htk = 'htk', maxseq = 'maxseq'})
:argname('<fmt>')
parser:option(
'--output_transform', 'Apply this transformation at the end of the model. ' ..
'For instance, use "softmax" to get posterior probabilities as the ' ..
'output of the model.',
'', { [''] = '', softmax = 'softmax', logsoftmax = 'logsoftmax', negsoftmax = 'negsoftmax', neglogsoftmax = 'neglogsoftmax' })
parser:option(
'--prior', 'File containing the label priors used to compute the ' ..
'log-likelihood: log p(x|s) = log P(s|x) - alpha * log P(s) ' ..
'(see --prior_alpha).', '')
:argname('<file>')
parser:option(
'--prior_alpha', 'Scaling factor for the label priors used to compute the ' ..
'log-likelihood.', 0.3, tonumber)
:argname('<alpha>')
:gt(0.0)
parser:option(
'--prior_smooth', 'Add this smoothing constant to all priors.', 1e-9, tonumber)
:argname('<smooth>')
:ge(0.0)
parser:option(
'--output_hpad', 'If given, write the horizontal padding applied to each ' ..
'image to this file.', '')
:argname('<file>')
-- Parse options
local opts = parser:parse()
-- Initialize random seeds
laia.manualSeed(opts.seed)
-- Load *BEST* model from the checkpoint.
local model = laia.Checkpoint():load(opts.checkpoint):Best():getModel()
assert(model ~= nil, 'No model was found in the checkpoint file!')
-- Add custom output layers.
if opts.prior ~= '' or opts.output_transform == 'logsoftmax' or opts.output_transform == 'neglogsoftmax' then
model:add(nn.LogSoftMax())
elseif opts.output_transform == 'softmax' or opts.output_transform == 'negsoftmax' then
model:add(nn.SoftMax())
end
if opts.output_transform == 'negsoftmax' or opts.output_transform == 'neglogsoftmax' then
model:add(nn.MulConstant(-1,true))
end
-- If a GPU is requested, check that we have everything necessary.
if opts.gpu > 0 then
assert(cutorch ~= nil, 'Package cutorch is required in order to use the GPU.')
assert(nn ~= nil, 'Package nn is required in order to use the GPU.')
cutorch.setDevice(opts.gpu)
model = model:cuda()
-- If cudnn_force_convert=true, force all possible layers to use cuDNN impl.
if cudnn and cudnn.force_convert then
cudnn.convert(model, cudnn)
end
else
-- This should not be necessary, but just in case
model = model:float()
end
-- We are going to evaluate the model
model:evaluate()
-- Prepare batcher
if opts.auto_width_factor then
local width_factor = laia.getWidthFactor(model)
batcher:setOptions({width_factor = width_factor})
laia.log.info('Batcher width factor was automatically set to %d',
width_factor)
end
batcher:load(opts.image_list)
if opts.seed > 0 then
batcher:epochReset()
end
-- Open file to write the horizontal padding of each sample.
local output_hpad = nil
if opts.output_hpad ~= '' then
output_hpad = opts.output_hpad == '-' and io.stdout or
io.open(opts.output_hpad, 'w')
assert(output_hpad ~= nil, 'File %q could not be opened for writing!',
opts.output_hpad)
end
-- Open output file
local output_file = io.stdout
if opts.output_format ~= 'htk' then
output_file = opts.output == '-' and io.stdout or io.open(opts.output, 'w')
assert(output_file ~= nil, 'File %q could not be opened for writing!',
opts.output)
end
-- HTK format aux variables
local nSamples = torch.IntStorage(1);
local sampPeriod = torch.IntStorage(1); sampPeriod[1] = 100000; -- 10000000 = 1seg
local sampSize = torch.ShortStorage(1);
local parmKind = torch.ShortStorage(1); parmKind[1] = 9; -- PARMKIND=USER
-- Function used to load the priors file
local function load_priors(fpriors, alpha, smooth)
alpha = alpha or 1
smooth = smooth or 0.0
local logprior = {}
local zeroprior = {}
local f = io.open(fpriors, 'r')
assert(f ~= nil, string.format('Unable to read priors file: %q', fpriors))
local ln = 0
while true do
local line = f:read('*line')
if line == nil then break end
ln = ln+1
line = laia.strsplit(line)
assert(#line == 4, string.format('Unexpected number of fields in file: %q, line %d',
fpriors, ln))
local counts = tonumber(line[2])
zeroprior[ln] = counts == 0 and true or false
logprior[ln] = counts + smooth
--logprior[ln] = torch.log(tonumber(line[4])) * alpha
end
local z = torch.log(table.reduce(logprior, function(acc, x) return acc + x end, 0))
for i=1,#logprior do
logprior[i] = (torch.log(logprior[i]) - z) * alpha
end
f:close()
return torch.Tensor(logprior):float(), zeroprior
end
local logprior, zeroprior
if opts.prior ~= '' then
logprior, zeroprior = load_priors(opts.prior, opts.prior_alpha, opts.prior_smooth)
if table.any(zeroprior) then
laia.log.warn('There are symbols with prior equal to 0.')
end
logprior = logprior:view(1, 1, logprior:size(1))
end
for b=1,batcher:numSamples(),opts.batch_size do
-- Get batch
local batch_img, _, _, batch_ids, batch_hpad = batcher:next(opts.batch_size)
if opts.gpu > 0 then batch_img = batch_img:cuda() end
-- Forward through network, and copy it to the CPU
local output = model:forward(batch_img):float()
-- Change output layout to batch_size x timesteps x labels
output = output:view(output:size(1) / opts.batch_size,
opts.batch_size, output:size(2))
output = output:permute(2, 1, 3):contiguous()
-- Write horizontal padding of each sample
if output_hpad then
for i=1,opts.batch_size do
if i+b-1 > batcher:numSamples() then break end
output_hpad:write(('%s %d %d %d\n'):format(
batch_ids[i], batch_hpad[i][1], batch_hpad[i][2], batch_hpad[i][3]))
end
output_hpad:flush()
end
-- Convert log-posteriors into log-likelihoods using the provided priors.
-- Note: Notice that if --prior was given, a log-softmax layer is added at
-- the end of the model.
if logprior then
output:csub(logprior:expandAs(output))
end
if opts.output_format == 'matrix' then
-- Output as a Kaldi's table of matrices
for i=1,opts.batch_size do
if i+b-1 > batcher:numSamples() then break end
output_file:write(('%s [\n'):format(batch_ids[i]))
for t=1,output:size(2) do
for k=1,output:size(3) do
output_file:write(('%.10g '):format(output[{i,t,k}]))
end
output_file:write('\n')
end
output_file:write(']\n')
end
output_file:flush()
elseif opts.output_format == 'lattice' then
-- Output as a Kaldi's table of lattices
for i=1,opts.batch_size do
if i+b-1 > batcher:numSamples() then break end
output_file:write(('%s\n'):format(batch_ids[i]))
for t=1,output:size(2) do
for k=1,output:size(3) do
output_file:write(('%d\t%d\t%d\t0,%.10g,%d\n'):format(
t - 1, t, k, output[{i,t,k}], k))
end
end
output_file:write(('%d\t0,0,\n\n'):format(output:size(2)))
end
output_file:flush()
elseif opts.output_format == 'htk' then
-- Output as a HTK feats
for i=1,opts.batch_size do
if i+b-1 > batcher:numSamples() then break end
local fd = torch.DiskFile( opts.output..'/'..batch_ids[i]..'.fea', 'w' ):binary():bigEndianEncoding()
local outputi = output[i]
nSamples[1] = outputi:size(1)
sampSize[1] = 4*outputi:size(2)
fd:writeInt( nSamples[1] )
fd:writeInt( sampPeriod[1] )
fd:writeShort( sampSize[1] )
fd:writeShort( parmKind[1] )
for t=1,outputi:size(1) do
for k=1,outputi:size(2) do
fd:writeFloat( outputi[{t,k}] )
end
end
fd:close()
end
elseif opts.output_format == 'maxseq' then
-- Output as sequence of maximums
for i=1,opts.batch_size do
if i+b-1 > batcher:numSamples() then break end
local maxval, maxidx = torch.max(output[i],2)
maxval = maxval:squeeze();
maxidx = maxidx:squeeze();
output_file:write(batch_ids[i]..' '..torch.mean(maxval))
for t=1,maxval:size(1) do
output_file:write(' '..maxidx[t]..':'..maxval[t])
end
output_file:write('\n')
end
output_file:flush()
end
end
-- Close files
output_file:close()
if output_hpad then
output_hpad:close()
end