cnn_ucf101_temporal.m

function cnn_ucf101_temporal(varargin)

if ~isempty(gcp('nocreate')),
    delete(gcp)
end

opts = cnn_setup_environment();
opts.train.gpus =  1 ;
% opts.train.gpus = [ 1 : 3 ]

opts.dataSet = 'ucf101'; 
% opts.dataSet = 'hmdb51'; 

addpath('network_surgery');
opts.dataDir = fullfile(opts.dataPath, opts.dataSet) ;
opts.splitDir = [opts.dataSet '_splits']; 
opts.nSplit = 1 ;
opts.inputdim  = [ 224,  224, 20] ;
opts.train.memoryMapFile = fullfile(tempdir, 'ramdisk', ['matconvnet' num2str(feature('getpid')) '.bin']) ;
removeInputPadding = 0 ;
opts.train.cheapResize = 0;
opts.train.batchSize = 128  ;
opts.train.numSubBatches = 4 ;
opts.dropOutRatio = .8; % inserted after fully connected layers

opts.train.epochFactor = 100 ;
opts.train.learningRate = [ 1e-2*ones(1,1) 1e-3*ones(1, 1) 1e-4*ones(1,1) 1e-5*ones(1,1)]  ;

opts.train.augmentation = 'randCropFlipStretch';
opts.train.augmentation = 'randCropFlip';
opts.train.augmentation = 'borders5';
opts.train.augmentation = 'corners';
opts.train.augmentation = 'multiScaleRegular';

model = ['res50' opts.train.augmentation '-bs=' num2str(opts.train.batchSize) ...
  '-cheapRsz=' num2str(opts.train.cheapResize), ...
  '-split' num2str(opts.nSplit) '-dr' num2str(opts.dropOutRatio)];

if  strfind(model, 'vgg16');
  baseModel = 'imagenet-vgg-verydeep-16.mat' ; 
  opts.train.epochFactor = 100 ;
  opts.train.learningRate =  [ 5e-4*ones(1, 10) 5e-5*ones(1,5) 5e-6*ones(1,1)]  ;
  opts.train.batchSize = 128  ;
  opts.train.numSubBatches =  ceil(8 / max(numel(opts.train.gpus),1));
elseif strfind(model, 'vgg-m');
  baseModel = 'imagenet-vgg-m-2048.mat' ;
elseif strfind(model, 'res152');
  baseModel = 'imagenet-resnet-152-dag.mat' ;
elseif strfind(model, 'res101');
  baseModel = 'imagenet-resnet-101-dag.mat' ;
elseif strfind(model, 'res50');
  baseModel = 'imagenet-resnet-50-dag.mat' ;
else
  error('Unknown model %s', model) ; 
end
opts.model = fullfile(opts.modelPath,baseModel) ;

opts.expDir = fullfile(opts.dataDir, [opts.dataSet '-' model]) ;
opts.train.plotDiagnostics = 0 ;
opts.train.continue = 1 ;
opts.train.prefetch = 1 ;

opts.imdbPath = fullfile(opts.dataDir, [opts.dataSet '_split' num2str(opts.nSplit) 'imdb.mat']);
opts.train.expDir = opts.expDir ;
opts.train.numAugments = 1;
opts.train.frameSample = 'random';
opts.train.nFramesPerVid = 1;
opts.train.uniformAugments  = false;

[opts, varargin] = vl_argparse(opts, varargin) ;

% -------------------------------------------------------------------------
%                                                   Database initialization
% -------------------------------------------------------------------------
if exist(opts.imdbPath)
  imdb = load(opts.imdbPath) ;
  imdb.flowDir = opts.flowDir;
else
  imdb = cnn_ucf101_setup_data(opts) ;
  save(opts.imdbPath, '-struct', 'imdb', '-v6') ;
end
nClasses = length(imdb.classes.name);

if ~exist(opts.model)
  fprintf('Downloading base model file: %s ...\n', baseModel);
  mkdir(fileparts(opts.model)) ;
  urlwrite(...
  ['http://www.vlfeat.org/matconvnet/models/' baseModel], ...
    opts.model) ;
end

net = load(opts.model);
if isfield(net, 'net'), net=net.net;end
if isstruct(net.layers)
  if net.meta.normalization.imageSize(3) == 3
      net.meta.normalization.imageSize(3) = 20 ;
      diff =  net.meta.normalization.imageSize(3) - size(net.params(1).value,3);
      net.params(1).value = padarray(net.params(1).value, [0 0 diff 0], 'symmetric', 'post');
  end
  % replace 1000-way imagenet classifiers
  for p = 1 : numel(net.params)
    sz = size(net.params(p).value);
    if any(sz == 1000)
      sz(sz == 1000) = nClasses;
      fprintf('replace classifier layer of %s\n', net.params(p).name);
      if numel(sz) > 2
         net.params(p).value = 0.01 * randn(sz,  class(net.params(p).value));
      else
         net.params(p).value = zeros(sz,  class(net.params(p).value));
      end
    end
  end
  net.meta.normalization.border = [256 256] - net.meta.normalization.imageSize(1:2);
  net = dagnn.DagNN.loadobj(net);
else
  if isfield(net, 'meta'),
    netNorm = net.meta.normalization;
  else
    netNorm = net.normalization;
  end
  if(netNorm.imageSize(3) == 3)
    if strfind(opts.model,'vgg-m-2048'), 
      net.layers(7) = [] ; %remove norm2 layer as in Simonyan et al NIPS'14
    end
    opts.inputdim  = [netNorm.imageSize(1:2), 20] ;
    net.layers{1}.weights{1} = repmat(mean(net.layers{1}.weights{1},3), [1 1 opts.inputdim(3) 1]) ;
    net.meta.normalization.averageImage = [];
    net.meta.normalization.border = [256 256] - netNorm.imageSize(1:2);
    net = replace_last_layer(net, [1 2], [1 2], nClasses, opts.dropOutRatio);
    net.normalization.imageSize = opts.inputdim ;
  end
  if strfind(model, 'bnorm')
    net = insert_bnorm_layers(net) ;
  end
  net = dagnn.DagNN.fromSimpleNN(net) ;
end

if removeInputPadding
    padMN = [sum(net.layers(1).block.pad(1:2)) sum(net.layers(1).block.pad(3:4))];
    net.layers(1).block.pad  = zeros(1,numel(net.layers(1).block.size));
    net.meta.normalization.imageSize(1:2) = net.meta.normalization.imageSize(1:2) + padMN ;
    net.meta.normalization.averageImage = padarray(net.meta.normalization.averageImage, padMN / 2, 'symmetric');
end
net = dagnn.DagNN.setLrWd(net);
net.renameVar(net.vars(1).name, 'input');

if ~isnan(opts.dropOutRatio)
  dr_layers = find(arrayfun(@(x) isa(x.block,'dagnn.DropOut'), net.layers)) ;
  if ~isempty(dr_layers)
    if opts.dropOutRatio > 0
      for i=dr_layers, net.layers(i).block.rate = opts.dropOutRatio; end
    else
      net.removeLayer({net.layers(dr_layers).name});
    end
  else
    if opts.dropOutRatio > 0
      pool5_layer = find(arrayfun(@(x) isa(x.block,'dagnn.Pooling'), net.layers)) ;
      conv_layers = pool5_layer(end);
      for i=conv_layers
        block = dagnn.DropOut() ;   block.rate = opts.dropOutRatio ;
        newName = ['drop_' net.layers(i).name];

        net.addLayer(newName, ...
          block, ...
          net.layers(i).outputs, ...
          {newName}) ;

        for l = 1:numel(net.layers)-1
          for f = net.layers(i).outputs
             sel = find(strcmp(f, net.layers(l).inputs )) ;
             if ~isempty(sel)
              [net.layers(l).inputs{sel}] = deal(newName) ;
             end
          end
        end
      end
    end
  end
end

net.layers(~cellfun('isempty', strfind({net.layers(:).name}, 'err'))) = [] ;

opts.train.derOutputs = {} ;
for l=numel(net.layers):-1:1
  if isa(net.layers(l).block, 'dagnn.Loss') && isempty(strfind(net.layers(l).name, 'err'))
      opts.train.derOutputs = {opts.train.derOutputs{:}, net.layers(l).outputs{:}, 1} ;
  end
  if isa(net.layers(l).block, 'dagnn.SoftMax') 
    net.removeLayer(net.layers(l).name)
    l = l - 1;
  end
end
if isempty(opts.train.derOutputs)
  net = dagnn.DagNN.insertLossLayers(net, 'numClasses', nClasses) ;
  fprintf('setting derivative for layer %s \n', net.layers(end).name);
  opts.train.derOutputs = {opts.train.derOutputs{:}, net.layers(end).outputs{:}, 1} ;
end

lossLayers = find(arrayfun(@(x) isa(x.block,'dagnn.Loss') && strcmp(x.block.loss,'softmaxlog'),net.layers));


net.addLayer('top1error', ...
             dagnn.Loss('loss', 'classerror'), ...
             net.layers(lossLayers(end)).inputs, ...
             'top1error') ;

net.addLayer('top5error', ...
             dagnn.Loss('loss', 'topkerror', 'opts', {'topK', 5}), ...
             net.layers(lossLayers(end)).inputs, ...
             'top5error') ;
           
           
net.print() ;   
net.rebuild() ;


net.meta.normalization.averageImage = ones(1,1,20)*128;   
if ~exist(opts.expDir, 'dir'), mkdir(opts.expDir) ; end

net.meta.normalization.rgbVariance = [];

opts.train.train = find(ismember(imdb.images.set, [1  ])) ;
opts.train.train = repmat(opts.train.train,1,opts.train.epochFactor);
opts.train.val = find(ismember(imdb.images.set, [2]) );

opts.train.valmode = '250samples' ;
% opts.train.valmode = '30samples' ;
opts.train.denseEval = 1 ;
net.conserveMemory = 1 ;
fn = getBatchWrapper_ucf101_flow(net.meta.normalization, opts.numFetchThreads, opts.train) ;

[info] = cnn_train_dag(net, imdb, fn, opts.train) ;

end