laia-netout

#!/usr/bin/env th

require 'laia'

local batcher = laia.RandomBatcher()
local parser = laia.argparse(){
  name = 'laia-netout',
  description = 'Computes network outputs in diverse formats for a given' ..
    'model and image list.'
}

-- Register laia.Version options
laia.Version():registerOptions(parser)
-- Register laia.log options.
laia.log.registerOptions(parser)
-- Register cudnn options, only if available.
if cudnn then cudnn.registerOptions(parser, true) end
-- Register batcher options.
batcher:registerOptions(parser)

parser:argument('checkpoint', 'File containing the trained checkpoint/model.')
parser:argument('image_list', 'File containing the list of images to decode.')
parser:argument('output', 'Output file.')

parser:option(
  '--seed -s', 'Seed for random numbers generation.',
  0, laia.toint)
parser:option(
  '--gpu', 'If gpu>0, uses the specified GPU, otherwise uses the CPU.',
  1, laia.toint)
parser:option(
  '--auto_width_factor', 'If true, sets the width factor for the batchers ' ..
    'automatically, from the size of the pooling layers.',
  false, laia.toboolean)
  :argname('<bool>')
parser:option(
  '--batch_size -b', 'Batch size', 16, laia.toint)
  :ge(1)
parser:option(
  '--output_format', 'Format of the output file. Use "matrix" to get a ' ..
  ' Kaldi\'s archive of matrices (one for each sample), where each row is a ' ..
    'timestep and each column represents a label; use "lattice" to get a ' ..
    'Kaldi\'s archive of CompactLattices.', 'matrix',
  {matrix = 'matrix', lattice = 'lattice', htk = 'htk', maxseq = 'maxseq'})
  :argname('<fmt>')
parser:option(
  '--output_transform', 'Apply this transformation at the end of the model. ' ..
    'For instance, use "softmax" to get posterior probabilities as the ' ..
    'output of the model.',
  '', { [''] = '', softmax = 'softmax', logsoftmax = 'logsoftmax', negsoftmax = 'negsoftmax', neglogsoftmax = 'neglogsoftmax' })
parser:option(
  '--prior', 'File containing the label priors used to compute the ' ..
    'log-likelihood: log p(x|s) = log P(s|x) - alpha * log P(s) ' ..
    '(see --prior_alpha).', '')
  :argname('<file>')
parser:option(
  '--prior_alpha', 'Scaling factor for the label priors used to compute the ' ..
    'log-likelihood.', 0.3, tonumber)
  :argname('<alpha>')
  :gt(0.0)
parser:option(
  '--prior_smooth', 'Add this smoothing constant to all priors.', 1e-9, tonumber)
  :argname('<smooth>')
  :ge(0.0)
parser:option(
  '--output_hpad', 'If given, write the horizontal padding applied to each ' ..
    'image to this file.', '')
  :argname('<file>')

-- Parse options
local opts = parser:parse()

-- Initialize random seeds
laia.manualSeed(opts.seed)

-- Load *BEST* model from the checkpoint.
local model = laia.Checkpoint():load(opts.checkpoint):Best():getModel()
assert(model ~= nil, 'No model was found in the checkpoint file!')

-- Add custom output layers.
if opts.prior ~= '' or opts.output_transform == 'logsoftmax' or opts.output_transform == 'neglogsoftmax' then
  model:add(nn.LogSoftMax())
elseif opts.output_transform == 'softmax' or opts.output_transform == 'negsoftmax' then
  model:add(nn.SoftMax())
end
if opts.output_transform == 'negsoftmax' or opts.output_transform == 'neglogsoftmax' then
  model:add(nn.MulConstant(-1,true))
end

-- If a GPU is requested, check that we have everything necessary.
if opts.gpu > 0 then
  assert(cutorch ~= nil, 'Package cutorch is required in order to use the GPU.')
  assert(nn ~= nil, 'Package nn is required in order to use the GPU.')
  cutorch.setDevice(opts.gpu)
  model = model:cuda()
  -- If cudnn_force_convert=true, force all possible layers to use cuDNN impl.
  if cudnn and cudnn.force_convert then
    cudnn.convert(model, cudnn)
  end
else
  -- This should not be necessary, but just in case
  model = model:float()
end
-- We are going to evaluate the model
model:evaluate()

-- Prepare batcher
if opts.auto_width_factor then
  local width_factor = laia.getWidthFactor(model)
  batcher:setOptions({width_factor = width_factor})
  laia.log.info('Batcher width factor was automatically set to %d',
                width_factor)
end
batcher:load(opts.image_list)
if opts.seed > 0 then
  batcher:epochReset()
end

-- Open file to write the horizontal padding of each sample.
local output_hpad = nil
if opts.output_hpad ~= '' then
  output_hpad = opts.output_hpad == '-' and io.stdout or
    io.open(opts.output_hpad, 'w')
  assert(output_hpad ~= nil, 'File %q could not be opened for writing!',
         opts.output_hpad)
end

-- Open output file
local output_file = io.stdout
if opts.output_format ~= 'htk' then
  output_file = opts.output == '-' and io.stdout or io.open(opts.output, 'w')
  assert(output_file ~= nil, 'File %q could not be opened for writing!',
         opts.output)
end

-- HTK format aux variables
local nSamples = torch.IntStorage(1);
local sampPeriod = torch.IntStorage(1); sampPeriod[1] = 100000; -- 10000000 = 1seg
local sampSize = torch.ShortStorage(1);
local parmKind = torch.ShortStorage(1); parmKind[1] = 9; -- PARMKIND=USER

-- Function used to load the priors file
local function load_priors(fpriors, alpha, smooth)
  alpha = alpha or 1
  smooth = smooth or 0.0
  local logprior = {}
  local zeroprior = {}
  local f = io.open(fpriors, 'r')
  assert(f ~= nil, string.format('Unable to read priors file: %q', fpriors))
  local ln = 0
  while true do
    local line = f:read('*line')
    if line == nil then break end
    ln = ln+1
    line = laia.strsplit(line)
    assert(#line == 4, string.format('Unexpected number of fields in file: %q, line %d',
                                     fpriors, ln))
    local counts = tonumber(line[2])
    zeroprior[ln] = counts == 0 and true or false
    logprior[ln] = counts + smooth
    --logprior[ln] = torch.log(tonumber(line[4])) * alpha
  end
  local z = torch.log(table.reduce(logprior, function(acc, x) return acc + x end, 0))
  for i=1,#logprior do
    logprior[i] = (torch.log(logprior[i]) - z) * alpha
  end
  f:close()
  return torch.Tensor(logprior):float(), zeroprior
end

local logprior, zeroprior
if opts.prior ~= '' then
  logprior, zeroprior = load_priors(opts.prior, opts.prior_alpha, opts.prior_smooth)
  if table.any(zeroprior) then
    laia.log.warn('There are symbols with prior equal to 0.')
  end
  logprior = logprior:view(1, 1, logprior:size(1))
end

for b=1,batcher:numSamples(),opts.batch_size do
  -- Get batch
  local batch_img, _, _, batch_ids, batch_hpad = batcher:next(opts.batch_size)
  if opts.gpu > 0 then batch_img = batch_img:cuda() end
  -- Forward through network, and copy it to the CPU
  local output = model:forward(batch_img):float()
  -- Change output layout to batch_size x timesteps x labels
  output = output:view(output:size(1) / opts.batch_size,
                       opts.batch_size, output:size(2))
  output = output:permute(2, 1, 3):contiguous()

  -- Write horizontal padding of each sample
  if output_hpad then
    for i=1,opts.batch_size do
      if i+b-1 > batcher:numSamples() then break end
      output_hpad:write(('%s %d %d %d\n'):format(
          batch_ids[i], batch_hpad[i][1], batch_hpad[i][2], batch_hpad[i][3]))
    end
    output_hpad:flush()
  end

  -- Convert log-posteriors into log-likelihoods using the provided priors.
  -- Note: Notice that if --prior was given, a log-softmax layer is added at
  -- the end of the model.
  if logprior then
    output:csub(logprior:expandAs(output))
  end

  if opts.output_format == 'matrix' then
    -- Output as a Kaldi's table of matrices
    for i=1,opts.batch_size do
      if i+b-1 > batcher:numSamples() then break end
      output_file:write(('%s [\n'):format(batch_ids[i]))
      for t=1,output:size(2) do
        for k=1,output:size(3) do
          output_file:write(('%.10g '):format(output[{i,t,k}]))
        end
        output_file:write('\n')
      end
      output_file:write(']\n')
    end
    output_file:flush()

  elseif opts.output_format == 'lattice' then
    -- Output as a Kaldi's table of lattices
    for i=1,opts.batch_size do
      if i+b-1 > batcher:numSamples() then break end
      output_file:write(('%s\n'):format(batch_ids[i]))
      for t=1,output:size(2) do
        for k=1,output:size(3) do
          output_file:write(('%d\t%d\t%d\t0,%.10g,%d\n'):format(
              t - 1, t, k, output[{i,t,k}], k))
        end
      end
      output_file:write(('%d\t0,0,\n\n'):format(output:size(2)))
    end
    output_file:flush()

  elseif opts.output_format == 'htk' then
    -- Output as a HTK feats
    for i=1,opts.batch_size do
      if i+b-1 > batcher:numSamples() then break end
      local fd = torch.DiskFile( opts.output..'/'..batch_ids[i]..'.fea', 'w' ):binary():bigEndianEncoding()
      local outputi = output[i]
      nSamples[1] = outputi:size(1)
      sampSize[1] = 4*outputi:size(2)
      fd:writeInt( nSamples[1] )
      fd:writeInt( sampPeriod[1] )
      fd:writeShort( sampSize[1] )
      fd:writeShort( parmKind[1] )
      for t=1,outputi:size(1) do
        for k=1,outputi:size(2) do
          fd:writeFloat( outputi[{t,k}] )
        end
      end
      fd:close()
    end

  elseif opts.output_format == 'maxseq' then
    -- Output as sequence of maximums
    for i=1,opts.batch_size do
      if i+b-1 > batcher:numSamples() then break end
      local maxval, maxidx = torch.max(output[i],2)
      maxval = maxval:squeeze();
      maxidx = maxidx:squeeze();
      output_file:write(batch_ids[i]..' '..torch.mean(maxval))
      for t=1,maxval:size(1) do
        output_file:write(' '..maxidx[t]..':'..maxval[t])
      end
      output_file:write('\n')
    end
    output_file:flush()

  end
end

-- Close files
output_file:close()
if output_hpad then
  output_hpad:close()
end