diff --git a/experiments/imagenet_tt_vgg16/cnn_pass_imagenet_mat.m b/experiments/imagenet_tt_vgg16/cnn_pass_imagenet_mat.m
new file mode 100644
index 0000000..bf86da0
--- /dev/null
+++ b/experiments/imagenet_tt_vgg16/cnn_pass_imagenet_mat.m
@@ -0,0 +1,153 @@
+function [net, info] = cnn_pass_imagenet_mat(net, imdb, getBatch, outputDir, varargin)
+%CNN_PASS_IMAGENET_MAT pass data through the network
+
+opts.train = [] ;
+opts.val = [] ;
+opts.numEpochs = 300 ;
+opts.batchSize = 256 ;
+opts.useGpu = false ;
+opts.learningRate = 0.001 ;
+opts.continue = false ;
+opts.expDir = fullfile('data','exp') ;
+opts.figuresPath = fullfile('data','figures','fig') ;
+opts.conserveMemory = true ;
+opts.sync = true ;
+opts.prefetch = false ;
+opts.weightDecay = 0.0005 ;
+opts.useGpu = false;
+opts.momentum = 0.9 ;
+opts.errorType = 'multiclass' ;
+opts.plotDiagnostics = false ;
+opts = vl_argparse(opts, varargin) ;
+
+if ~exist(opts.expDir, 'dir'), mkdir(opts.expDir) ; end
+if isempty(opts.train), opts.train = find(imdb.images.set==1) ; end
+if isempty(opts.val), opts.val = find(imdb.images.set==2) ; end
+if isnan(opts.train), opts.train = [] ; end
+
+% -------------------------------------------------------------------------
+%                                                    Network initialization
+% -------------------------------------------------------------------------
+
+
+
+% -------------------------------------------------------------------------
+%                                                         	   Validate
+% -------------------------------------------------------------------------
+
+rng(0) ;
+
+if opts.useGpu
+  one = gpuArray(single(1)) ;
+else
+  one = single(1) ;
+end
+
+startTime = tic;
+info.train.objective = [] ;
+info.train.error = [] ;
+info.train.topFiveError = [] ;
+info.train.speed = [] ;
+info.val.objective = [] ;
+info.val.error = [] ;
+info.val.topFiveError = [] ;
+info.val.speed = [] ;
+info.time = [] ;
+opts.expDir
+res = [] ;
+%---------------data saving data 
+dataDirMimic = outputDir;
+if ~exist(dataDirMimic, 'dir')
+    mkdir(dataDirMimic);
+end
+
+if opts.useGpu
+  net = vl_simplenn_move(net, 'gpu') ;
+end
+
+for epoch=1:opts.numEpochs
+  val = opts.val ;
+
+  info.train.objective(end+1) = 0 ;
+  info.train.error(end+1) = 0 ;
+  info.train.topFiveError(end+1) = 0 ;
+  info.train.speed(end+1) = 0 ;
+  info.val.objective(end+1) = 0 ;
+  info.val.error(end+1) = 0 ;
+  info.val.topFiveError(end+1) = 0 ;
+  info.val.speed(end+1) = 0 ;
+  info.time(end+1) = 0;
+
+  % evaluation on validation set
+  lastProcessBatch = 1;
+  curBatchnumber = 0;  
+  for t=lastProcessBatch:opts.batchSize:numel(val)
+    curBatchnumber = curBatchnumber + 1;
+    batch_time = tic ;
+    batch = val(t:min(t+opts.batchSize-1, numel(val))) ;
+    fprintf('validation: epoch %02d: processing batch %3d of %3d ...', epoch, ...
+            fix(t/opts.batchSize)+1, ceil(numel(val)/opts.batchSize)) ;
+    [im, labels] = getBatch(imdb, batch) ;
+    if opts.prefetch
+      nextBatch = val(t+opts.batchSize:min(t+2*opts.batchSize-1, numel(val))) ;
+      getBatch(imdb, nextBatch) ;
+    end
+    if opts.useGpu
+      im = gpuArray(im) ;
+    end
+    
+    net.layers{end}.class = labels ;
+    opts.useGpu
+    [res, data_img] = vl_simplenn_imagenet_mat(net, im, [], res, ...
+      'disableDropout', true, ...
+      'conserveMemory', opts.conserveMemory, ...
+      'sync', opts.sync) ; %#ok
+    lastProcessBatch = t; %#ok
+    save(fullfile(outputDir,'curBatch.mat'),'lastProcessBatch')
+    save(fullfile(outputDir, strcat('data_img', num2str(curBatchnumber))), 'data_img');  
+    % print information
+    batch_time = toc(batch_time) ;
+    speed = numel(batch)/batch_time;
+    info.val = updateError(opts, info.val, net, res, batch_time) ;
+    fprintf(' %.2f s (%.1f images/s)', batch_time, speed) ;
+    n = t + numel(batch) - 1 ;
+    if strcmp(opts.errorType, 'mse')
+        fprintf(' err %.5f\n', info.val.error(end)/n) ;
+    else
+        fprintf(' err %.1f err5 %.1f', ...
+          info.val.error(end)/n*100, info.val.topFiveError(end)/n*100) ;
+        fprintf('\n') ;
+    end
+  end
+
+end 
+
+% -------------------------------------------------------------------------
+function info = updateError(opts, info, net, res, speed)
+% -------------------------------------------------------------------------
+predictions = gather(res(end-1).x) ;
+sz = size(predictions) ;
+n = prod(sz(1:2)) ;
+
+labels = net.layers{end}.class ;
+info.objective(end) = info.objective(end) + sum(double(gather(res(end).x))) ;
+info.speed(end) = info.speed(end) + speed ;
+switch opts.errorType
+  case 'multiclass'
+    [~,predictions] = sort(predictions, 3, 'descend') ;
+    error = ~bsxfun(@eq, predictions, reshape(labels, 1, 1, 1, [])) ;
+    info.error(end) = info.error(end) +....
+      sum(sum(sum(error(:,:,1,:))))/n ;
+    info.topFiveError(end) = info.topFiveError(end) + ...
+      sum(sum(sum(min(error(:,:,1:5,:),[],3))))/n ;
+  case 'binary'
+    error = bsxfun(@times, predictions, labels) < 0 ;
+    info.error(end) = info.error(end) + sum(error(:))/n ;
+  case 'mse'
+      error = predictions - labels;
+      info.error(end) = info.error(end) + sum(error(:).^2);
+      
+end
+
+% -------------------------------------------------------------------------
+
diff --git a/experiments/imagenet_tt_vgg16/evaluate_imagenet_tt.m b/experiments/imagenet_tt_vgg16/evaluate_imagenet_tt.m
new file mode 100644
index 0000000..af1cb1e
--- /dev/null
+++ b/experiments/imagenet_tt_vgg16/evaluate_imagenet_tt.m
@@ -0,0 +1,81 @@
+function info = evaluate_imagenet_tt(varargin)
+% evaluate_imagenet_tt   evauate vgg16-tt models on imagenet
+opts.data_path =  fullfile('data', 'imagenet12','data_path') ;
+opts.dataDir = fullfile('data', 'imagenet12') ;
+opts.expDir = fullfile('data', 'imagenet12-eval-vgg-f') ;
+opts.imdbPath = fullfile(opts.expDir, 'imdb.mat');
+opts.modelPath = fullfile('data', 'models', 'imagenet-vgg-f.mat') ;
+opts.lite = false ;
+opts.numFetchThreads = 8 ;
+opts.train.batchSize = 64 ;
+opts.train.numEpochs = 1 ;
+opts.train.gpus = [1];
+opts.train.prefetch = false ;
+opts.train.expDir = opts.expDir ;
+
+opts = vl_argparse(opts, varargin) ;
+display(opts);
+
+% -------------------------------------------------------------------------
+%                                                   Database initialization
+% -------------------------------------------------------------------------
+
+if exist(opts.imdbPath)
+  imdb = load(opts.imdbPath) ;
+else
+  imdb = cnn_imagenet_setup_data('dataDir', opts.dataDir, 'lite', opts.lite) ;
+  mkdir(opts.expDir) ;
+  save(opts.imdbPath, '-struct', 'imdb') ;
+end
+
+% -------------------------------------------------------------------------
+%                                                    Network initialization
+% -------------------------------------------------------------------------
+
+net = load(opts.modelPath) ;
+net.layers{end}.type = 'softmaxloss' ; % softmax -> softmaxloss
+
+% Synchronize label indexes between the model and the image database
+imdb = cnn_imagenet_sync_labels(imdb, net);
+
+% -------------------------------------------------------------------------
+%                                               Stochastic gradient descent
+% -------------------------------------------------------------------------
+
+data_path = fullfile(opts.data_path, 'data_img');
+getBatchWrapper = @(imdb,batch) getBatch(imdb, batch, data_path);
+[net,info] = cnn_train(net, imdb, getBatchWrapper, opts.train, ...
+  'conserveMemory', true, ...
+  'train', NaN, ...
+  'val', find(imdb.images.set==2)) ;
+
+% -------------------------------------------------------------------------
+function [im,labels] = getBatch(imdb, batch, data_path)
+% -------------------------------------------------------------------------
+%data_path = '../../data_permanent/data_imagenet_mimic_deep/data_img';
+sizeBatch = 64;
+%batch(1)
+for i = 1 : numel(batch)
+%for val without train
+   nFile =  ceil((batch(i) - 1281167) / 64);
+   %nEl = batch(i) - 64 * floor((batch(i) - 1281167) / 64);
+
+%    nFile =  ceil(batch(i) / sizeBatch);
+%  fprintf('nFile %d\n', nFile);
+   batchData = load(strcat(data_path, num2str(nFile), '.mat')); 
+%batchData
+   batchData = batchData.data_img;
+   if i == 1
+      im_size = size(batchData);
+      im_size(4) = numel(batch);
+      im =single(zeros(im_size)); 
+   end
+%	batch(i) - (nFile + 20019-1 - 1) * sizeBatch
+%    batch(i)
+%size(batchData)
+%for val without train
+   im(:,:,:,i) = batchData(:,:,:, batch(i) - 15 - (nFile + 20019-1 - 1) * sizeBatch);	
+end
+labels = imdb.images.label(batch) ;
+%size(labels)
+%size(im)
diff --git a/experiments/imagenet_tt_vgg16/make_data_mat.m b/experiments/imagenet_tt_vgg16/make_data_mat.m
new file mode 100644
index 0000000..4455207
--- /dev/null
+++ b/experiments/imagenet_tt_vgg16/make_data_mat.m
@@ -0,0 +1,64 @@
+function info = make_data_mat(varargin)
+% MAKE_DATA_MAT pass data though the network and save it 
+opts.outputDir = fullfile('data', 'imagenet_TT','outputDir') ; 
+opts.dataDir = fullfile('data', 'imagenet_TT') ;
+opts.expDir = fullfile('data', 'imagenet_TT') ;
+opts.imdbPath = fullfile(opts.expDir, 'imdb.mat');
+opts.modelPath = fullfile('data', 'models', 'imagenet-vgg-deep-16.mat') ;
+opts.lite = false ;
+opts.numFetchThreads = 8 ;
+opts.train.batchSize = 64 ;
+opts.train.numEpochs = 1 ;
+opts.train.useGpu = true;
+opts.train.prefetch = false ;
+opts.train.expDir = opts.expDir ;
+
+opts = vl_argparse(opts, varargin) ;
+display(opts);
+
+% -------------------------------------------------------------------------
+%                                                   Database initialization
+% -------------------------------------------------------------------------
+
+if exist(opts.imdbPath)
+  imdb = load(opts.imdbPath) ;
+else
+  imdb = cnn_imagenet_setup_data('dataDir', opts.dataDir, 'lite', opts.lite) ;
+  mkdir(opts.expDir) ;
+  save(opts.imdbPath, '-struct', 'imdb') ;
+end
+
+% -------------------------------------------------------------------------
+%                                                    Network initialization
+% -------------------------------------------------------------------------
+
+net = load(opts.modelPath) ;
+net.layers{end}.type = 'softmaxloss' ; % softmax -> softmaxloss
+
+% Synchronize label indexes between the model and the image database
+imdb = cnn_imagenet_sync_labels(imdb, net);
+
+% -------------------------------------------------------------------------
+%                                               Stochastic gradient descent
+% -------------------------------------------------------------------------
+
+fn = getBatchWrapper(net.normalization, opts.numFetchThreads) ;
+
+[~,info] = cnn_pass_imagenet_mat(net, imdb, fn, opts.outputDir, opts.train, ...
+  'conserveMemory', true, ...
+  'train', NaN, ...
+  'val', find(imdb.images.set == 2)) ;
+
+% -------------------------------------------------------------------------
+function fn = getBatchWrapper(opts, numThreads)
+% -------------------------------------------------------------------------
+fn = @(imdb,batch) getBatch(imdb,batch,opts,numThreads) ;
+
+% -------------------------------------------------------------------------
+function [im,labels] = getBatch(imdb, batch, opts, numThreads)
+% -------------------------------------------------------------------------
+images = strcat([imdb.imageDir filesep], imdb.images.name(batch)) ;
+im = cnn_imagenet_get_batch(images, opts, ...
+                            'numThreads', numThreads, ...
+                            'prefetch', nargout == 0) ;
+labels = imdb.images.label(batch) ;
diff --git a/experiments/imagenet_tt_vgg16/resize_from_tar.sh b/experiments/imagenet_tt_vgg16/resize_from_tar.sh
new file mode 100644
index 0000000..fe90ae8
--- /dev/null
+++ b/experiments/imagenet_tt_vgg16/resize_from_tar.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# sudo apt-get update
+# sudo apt-get install imagemagick
+# Start in folder with original .tar files
+#Before launch set PACKEDTRAINDIR and DATA_OUT
+# Variables
+PACKEDTRAINDIR=/home/ubuntu/tmp/train_packed
+DATA_OUT=/home/ubuntu/imagenet_mimic
+TRAINDIR=$DATA_OUT/images/train
+VALDIR=$DATA_OUT/images/val
+
+mkdir $DATA_OUT
+mkdir $DATA_OUT/images
+# Unpack main train archive
+mkdir $PACKEDTRAINDIR
+tar -xvf ILSVRC2012_img_train.tar -C $PACKEDTRAINDIR
+
+# Unpack & resize nested train archives
+mkdir $TRAINDIR
+
+for NAME in $PACKEDTRAINDIR/*.tar; do
+  INDEX=$(basename $NAME .tar)
+  echo $INDEX
+  if test -d $TRAINDIR/$INDEX; then
+   echo "Folder "$TRAINDIR/$INDEX" exists"
+  else
+  mkdir $TRAINDIR/$INDEX
+  tar -xf $PACKEDTRAINDIR/$INDEX.tar -C $TRAINDIR/$INDEX
+  # Resize to height to 256, preserving the aspect ratio
+  mogrify -resize 256x256^   "$TRAINDIR/$INDEX/*.JPEG"
+  fi
+done
+
+# Validation
+mkdir $VALDIR
+tar -xf ILSVRC2012_img_val.tar -C $VALDIR
+mogrify -resize 256x256^   "$VALDIR/*.JPEG"
+
+
+# CMYK -> RGB. Important
+mogrify -colorspace rgb $TRAINDIR/n03062245/n03062245_4620.JPEG
+mogrify -colorspace rgb $TRAINDIR/n04264628/n04264628_27969.JPEG
+mogrify -colorspace rgb $TRAINDIR/n03961711/n03961711_5286.JPEG
+mogrify -colorspace rgb $TRAINDIR/n01739381/n01739381_1309.JPEG
+mogrify -colorspace rgb $TRAINDIR/n04258138/n04258138_17003.JPEG
+mogrify -colorspace rgb $TRAINDIR/n03018349/n03018349_4028.JPEG
+mogrify -colorspace rgb $TRAINDIR/n04336792/n04336792_7448.JPEG
+mogrify -colorspace rgb $TRAINDIR/n02492035/n02492035_15739.JPEG
+mogrify -colorspace rgb $TRAINDIR/n03544143/n03544143_17228.JPEG
+mogrify -colorspace rgb $TRAINDIR/n03467068/n03467068_12171.JPEG
+mogrify -colorspace rgb $TRAINDIR/n03633091/n03633091_5218.JPEG
+mogrify -colorspace rgb $TRAINDIR/n02447366/n02447366_23489.JPEG
+mogrify -colorspace rgb $TRAINDIR/n03347037/n03347037_9675.JPEG
+mogrify -colorspace rgb $TRAINDIR/n02077923/n02077923_14822.JPEG
+mogrify -colorspace rgb $TRAINDIR/n02747177/n02747177_10752.JPEG
+mogrify -colorspace rgb $TRAINDIR/n04371774/n04371774_5854.JPEG
+mogrify -colorspace rgb $TRAINDIR/n07583066/n07583066_647.JPEG
+mogrify -colorspace rgb $TRAINDIR/n04596742/n04596742_4225.JPEG
+mogrify -colorspace rgb $TRAINDIR/n13037406/n13037406_4650.JPEG
+mogrify -colorspace rgb $TRAINDIR/n03529860/n03529860_11437.JPEG
+mogrify -colorspace rgb $TRAINDIR/n03710637/n03710637_5125.JPEG
+mogrify -colorspace rgb $TRAINDIR/n04033995/n04033995_2932.JPEG
+mogrify -colorspace rgb $VALDIR/ILSVRC2012_val_00019877.JPEG
diff --git a/experiments/imagenet_tt_vgg16/vl_simplenn_imagenet_mat.m b/experiments/imagenet_tt_vgg16/vl_simplenn_imagenet_mat.m
new file mode 100644
index 0000000..c897a80
--- /dev/null
+++ b/experiments/imagenet_tt_vgg16/vl_simplenn_imagenet_mat.m
@@ -0,0 +1,373 @@
+function [res, data, labels] = vl_simplenn_imagenet_mat(net, x, dzdy, res, varargin)
+% VL_SIMPLENN  Evaluates a simple CNN
+%   RES = VL_SIMPLENN(NET, X) evaluates the convnet NET on data X.
+%   RES = VL_SIMPLENN(NET, X, DZDY) evaluates the convnent NET and its
+%   derivative on data X and output derivative DZDY.
+%
+%   The network has a simple (linear) topology, i.e. the computational
+%   blocks are arranged in a sequence of layers. Please note that
+%   there is no need to use this wrapper, which is provided for
+%   convenience. Instead, the individual CNN computational blocks can
+%   be evaluated directly, making it possible to create significantly
+%   more complex topologies, and in general allowing greater
+%   flexibility.
+%
+%   The NET structure contains two fields:
+%
+%   - net.layers: the CNN layers.
+%   - net.normalization: information on how to normalize input data.
+%
+%   The network expects the data X to be already normalized. This
+%   usually involves rescaling the input image(s) and subtracting a
+%   mean.
+%
+%   RES is a structure array with one element per network layer plus
+%   one representing the input. So RES(1) refers to the zeroth-layer
+%   (input), RES(2) refers to the first layer, etc. Each entry has
+%   fields:
+%
+%   - res(i+1).x: the output of layer i. Hence res(1).x is the network
+%     input.
+%
+%   - res(i+1).aux: auxiliary output data of layer i. For example,
+%     dropout uses this field to store the dropout mask.
+%
+%   - res(i+1).dzdx: the derivative of the network output relative to
+%     variable res(i+1).x, i.e. the output of layer i. In particular
+%     res(1).dzdx is the derivative of the network output with respect
+%     to the network input.
+%
+%   - res(i+1).dzdw: the derivative of the network output relative to
+%     the parameters of layer i. It can be a cell array for multiple
+%     parameters.
+%
+%   net.layers is a cell array of network layers. The following
+%   layers, encapsulating corresponding functions in the toolbox, are
+%   supported:
+%
+%   Convolutional layer::
+%     The convolutional layer wraps VL_NNCONV(). It has fields:
+%
+%     - layer.type = 'conv'
+%     - layer.weights = {filters, biases}
+%     - layer.stride: the sampling stride (usually 1).
+%     - layer.padding: the padding (usually 0).
+%
+%   Max pooling layer::
+%     The max pooling layer wraps VL_NNPOOL(). It has fields:
+%
+%     - layer.type = 'pool'
+%     - layer.method: pooling method ('max' or 'avg').
+%     - layer.pool: the pooling size.
+%     - layer.stride: the sampling stride (usually 1).
+%     - layer.padding: the padding (usually 0).
+%
+%   Normalization layer::
+%     The normalization layer wraps VL_NNNORMALIZE(). It has fields
+%
+%     - layer.type = 'normalize'
+%     - layer.param: the normalization parameters.
+%
+%   Spatial normalization layer:
+%     This is similar to the layer above, but wraps VL_NNSPNORM():
+%
+%     - layer.type = 'spnorm'
+%     - layer.param: the normalization parameters.
+%
+%   Batch normalization layer:
+%     This layer wraps VL_NNBNORM(). It has fields:
+%
+%     - layer.type = 'bnorm'
+%     - layer.weights = {multipliers, biases}.
+%
+%   ReLU and Sigmoid layers::
+%     The ReLU layer wraps VL_NNRELU(). It has fields:
+%
+%     - layer.type = 'relu'
+%
+%     The sigmoid layer is the same, but for the sigmoid function, with
+%     `relu` replaced by `sigmoid`.
+%
+%   Dropout layer::
+%     The dropout layer wraps VL_NNDROPOUT(). It has fields:
+%
+%     - layer.type = 'dropout'
+%     - layer.rate: the dropout rate.
+%
+%   Softmax layer::
+%     The softmax layer wraps VL_NNSOFTMAX(). It has fields
+%
+%     - layer.type = 'softmax'
+%
+%   Log-loss layer::
+%     The log-loss layer wraps VL_NNLOSS(). It has fields:
+%
+%     - layer.type = 'loss'
+%     - layer.class: the ground-truth class.
+%
+%   Softmax-log-loss layer::
+%     The softmax-log-loss layer wraps VL_NNSOFTMAXLOSS(). It has
+%     fields:
+%
+%     - layer.type = 'softmaxloss'
+%     - layer.class: the ground-truth class.
+%
+%   P-dist layer:
+%     The pdist layer wraps VL_NNPDIST(). It has fields:
+%
+%     - layer.type = 'pdist'
+%     - layer.p = P parameter of the P-distance
+%     - layer.noRoot = whether to raise the distance to the P-th power
+%     - layer.epsilon = regularization parameter for the derivatives
+%
+%   Custom layer::
+%     This can be used to specify custom layers.
+%
+%     - layer.type = 'custom'
+%     - layer.forward: a function handle computing the block.
+%     - layer.backward: a function handle computing the block derivative.
+%
+%     The first function is called as res(i+1) = forward(layer, res(i), res(i+1))
+%     where res() is the struct array specified before. The second function is
+%     called as res(i) = backward(layer, res(i), res(i+1)). Note that the
+%     `layer` structure can contain additional fields if needed.
+
+% Copyright (C) 2014 Andrea Vedaldi.
+% All rights reserved.
+%
+% This file is part of the VLFeat library and is made available under
+% the terms of the BSD license (see the COPYING file).
+
+opts.res = [] ;
+opts.conserveMemory = false ;
+opts.sync = false ;
+opts.disableDropout = false ;
+opts.freezeDropout = false ;
+opts.accumulate = false;
+opts.backPropDepth = +inf ;
+
+opts = vl_argparse(opts, varargin);
+
+n = numel(net.layers) ;
+
+if (nargin <= 2) || isempty(dzdy)
+  doder = false ;
+else
+  doder = true ;
+end
+
+gpuMode = isa(x, 'gpuArray') ;
+
+if nargin <= 3 || isempty(res)
+  res = struct(...
+    'x', cell(1,n+1), ...
+    'dzdx', cell(1,n+1), ...
+    'dzdw', cell(1,n+1), ...
+    'aux', cell(1,n+1), ...
+    'time', num2cell(zeros(1,n+1)), ...
+    'backwardTime', num2cell(zeros(1,n+1))) ;
+end
+res(1).x = x ;
+
+for i=1:n
+  l = net.layers{i} ;
+  res(i).time = tic ;
+  switch l.type
+    case 'conv'
+      if isfield(l, 'weights')
+        res(i+1).x = vl_nnconv(res(i).x, l.weights{1}, l.weights{2}, 'pad', l.pad, 'stride', l.stride) ;
+      else
+        res(i+1).x = vl_nnconv(res(i).x, l.filters, l.biases, 'pad', l.pad, 'stride', l.stride) ;
+      end
+    case 'pool'
+      res(i+1).x = vl_nnpool(res(i).x, l.pool, 'pad', l.pad, 'stride', l.stride, 'method', l.method) ;
+    case 'tt'
+      res(i+1).x = vl_nntt(res(i).x, l.W, l.biases, l.outHeight, l.outWidth, l.outChannels) ;
+    case 'normalize'
+      res(i+1).x = vl_nnnormalize(res(i).x, l.param) ;
+    case 'softmax'
+      res(i+1).x = vl_nnsoftmax(res(i).x) ;
+    case 'loss'
+      res(i+1).x = vl_nnloss(res(i).x, l.class) ;
+    case 'softmaxloss'
+      res(i+1).x = vl_nnsoftmaxloss(res(i).x, l.class) ;
+    case 'relu'
+      res(i+1).x = vl_nnrelu(res(i).x) ;
+    case 'sigmoid'
+      res(i+1).x = vl_nnsigmoid(res(i).x) ;
+    case 'noffset'
+      res(i+1).x = vl_nnnoffset(res(i).x, l.param) ;
+    case 'spnorm'
+      res(i+1).x = vl_nnspnorm(res(i).x, l.param) ;
+    case 'dropout'
+      if opts.disableDropout
+        res(i+1).x = res(i).x ;
+      elseif opts.freezeDropout
+        [res(i+1).x, res(i+1).aux] = vl_nndropout(res(i).x, 'rate', l.rate, 'mask', res(i+1).aux) ;
+      else
+        [res(i+1).x, res(i+1).aux] = vl_nndropout(res(i).x, 'rate', l.rate) ;
+      end
+    case 'bnorm'
+      if isfield(l, 'weights')
+        res(i+1).x = vl_nnbnorm(res(i).x, l.weights{1}, l.weights{2}) ;
+      else
+        res(i+1).x = vl_nnbnorm(res(i).x, l.filters, l.biases) ;
+      end
+    case 'pdist'
+      res(i+1) = vl_nnpdist(res(i).x, l.p, 'noRoot', l.noRoot, 'epsilon', l.epsilon) ;
+    case 'custom'
+      res(i+1) = l.forward(l, res(i), res(i+1)) ;
+    otherwise
+      error('Unknown layer type %s', l.type) ;
+  end
+  if i == 31
+    data = gather(res(32).x);
+  elseif i == 36
+    labels = gather(res(37).x);
+  end
+  % optionally forget intermediate results
+  forget = opts.conserveMemory ;
+  forget = forget & (~doder || strcmp(l.type, 'relu')) ;
+  forget = forget & ~(strcmp(l.type, 'loss') || strcmp(l.type, 'softmaxloss')) ;
+  forget = forget & (~isfield(l, 'rememberOutput') || ~l.rememberOutput) ;
+  if forget
+    res(i).x = [] ;
+  end
+  if gpuMode & opts.sync
+    % This should make things slower, but on MATLAB 2014a it is necessary
+    % for any decent performance.
+    wait(gpuDevice) ;
+  end
+  res(i).time = toc(res(i).time) ;
+end
+
+if doder
+  res(n+1).dzdx = dzdy ;
+  for i=n:-1:max(1, n-opts.backPropDepth+1)
+    l = net.layers{i} ;
+    res(i).backwardTime = tic ;
+    switch l.type
+      case 'conv'
+        if ~opts.accumulate
+          if isfield(l, 'weights')
+            [res(i).dzdx, res(i).dzdw{1}, res(i).dzdw{2}] = ...
+                vl_nnconv(res(i).x, l.weights{1}, l.weights{2}, ...
+                          res(i+1).dzdx, ...
+                          'pad', l.pad, 'stride', l.stride) ;
+          else
+            % Legacy code: will go
+            [res(i).dzdx, res(i).dzdw{1}, res(i).dzdw{2}] = ...
+                vl_nnconv(res(i).x, l.filters, l.biases, ...
+                          res(i+1).dzdx, ...
+                          'pad', l.pad, 'stride', l.stride) ;
+          end
+        else
+          dzdw = cell(1,2) ;
+          if isfield(l, 'weights')
+            [res(i).dzdx, dzdw{1}, dzdw{2}] = ...
+                vl_nnconv(res(i).x, l.weights{1}, l.weights{2}, ...
+                          res(i+1).dzdx, ...
+                          'pad', l.pad, 'stride', l.stride) ;
+          else
+            % Legacy code: will go
+            [res(i).dzdx, dzdw{1}, dzdw{2}] = ...
+                vl_nnconv(res(i).x, l.filters, l.biases, ...
+                          res(i+1).dzdx, ...
+                          'pad', l.pad, 'stride', l.stride) ;
+          end
+          for j=1:2
+            res(i).dzdw{j} = res(i).dzdw{j} + dzdw{j} ;
+          end
+          clear dzdw ;
+        end
+
+      case 'pool'
+        res(i).dzdx = vl_nnpool(res(i).x, l.pool, res(i+1).dzdx, ...
+          'pad', l.pad, 'stride', l.stride, 'method', l.method) ;
+      case 'tt'
+        if ~opts.accumulate
+          [res(i).dzdx, res(i).dzdw{1}, res(i).dzdw{2}] = ...
+              vl_nntt(res(i).x, l.W, l.biases, ...
+                        l.outHeight, l.outWidth, l.outChannels, ...
+                        res(i+1).dzdx) ;
+        else
+          dzdw = cell(1,2) ;
+          [res(i).dzdx, dzdw{1}, dzdw{2}] = ...
+              vl_nntt(res(i).x, l.W, l.biases, ...
+                        l.outHeight, l.outWidth, l.outChannels, ...
+                        res(i+1).dzdx) ;
+          for j=1:2
+            res(i).dzdw{j} = res(i).dzdw{j} + dzdw{j} ;
+          end
+          clear dzdw ;
+        end
+      case 'normalize'
+        res(i).dzdx = vl_nnnormalize(res(i).x, l.param, res(i+1).dzdx) ;
+      case 'softmax'
+        res(i).dzdx = vl_nnsoftmax(res(i).x, res(i+1).dzdx) ;
+      case 'loss'
+        res(i).dzdx = vl_nnloss(res(i).x, l.class, res(i+1).dzdx) ;
+      case 'softmaxloss'
+        res(i).dzdx = vl_nnsoftmaxloss(res(i).x, l.class, res(i+1).dzdx) ;
+      case 'relu'
+        if ~isempty(res(i).x)
+          res(i).dzdx = vl_nnrelu(res(i).x, res(i+1).dzdx) ;
+        else
+          % if res(i).x is empty, it has been optimized away, so we use this
+          % hack (which works only for ReLU):
+          res(i).dzdx = vl_nnrelu(res(i+1).x, res(i+1).dzdx) ;
+        end
+      case 'sigmoid'
+        res(i).dzdx = vl_nnsigmoid(res(i).x, res(i+1).dzdx) ;
+      case 'noffset'
+        res(i).dzdx = vl_nnnoffset(res(i).x, l.param, res(i+1).dzdx) ;
+      case 'spnorm'
+        res(i).dzdx = vl_nnspnorm(res(i).x, l.param, res(i+1).dzdx) ;
+      case 'dropout'
+        if opts.disableDropout
+          res(i).dzdx = res(i+1).dzdx ;
+        else
+          res(i).dzdx = vl_nndropout(res(i).x, res(i+1).dzdx, 'mask', res(i+1).aux) ;
+        end
+      case 'bnorm'
+        if ~opts.accumulate
+          if isfield(l, 'weights')
+            [res(i).dzdx, res(i).dzdw{1}, res(i).dzdw{2}] = ...
+                vl_nnbnorm(res(i).x, l.weights{1}, l.weights{2}, ...
+                           res(i+1).dzdx) ;
+          else
+            [res(i).dzdx, res(i).dzdw{1}, res(i).dzdw{2}] = ...
+                vl_nnbnorm(res(i).x, l.filters, l.biases, ...
+                           res(i+1).dzdx) ;
+          end
+        else
+          dzdw = cell(1,2) ;
+          if isfield(l, 'weights')
+            [res(i).dzdx, dzdw{1}, dzdw{2}] = ...
+                vl_nnbnorm(res(i).x, l.weights{1}, l.weights{2}, ...
+                           res(i+1).dzdx) ;
+          else
+            [res(i).dzdx, dzdw{1}, dzdw{2}] = ...
+                vl_nnbnorm(res(i).x, l.filters, l.biases, ...
+                           res(i+1).dzdx) ;
+          end
+          for j=1:2
+            res(i).dzdw{j} = res(i).dzdw{j} + dzdw{j} ;
+          end
+          clear dzdw ;
+        end
+      case 'pdist'
+        res(i).dzdx = vl_nnpdist(res(i).x, l.p, res(i+1).dzdx, ...
+                                 'noRoot', l.noRoot, 'epsilon', l.epsilon) ;
+      case 'custom'
+        res(i) = l.backward(l, res(i), res(i+1)) ;
+    end
+    if opts.conserveMemory
+      res(i+1).dzdx = [] ;
+    end
+    if gpuMode & opts.sync
+      wait(gpuDevice) ;
+    end
+    res(i).backwardTime = toc(res(i).backwardTime) ;
+  end
+end