Question

问题陈述

我使用MatConvNet使用示例库附带的函数cnn_train构建一个非常简单的1D示例和小型网络。按照他们的例子，我建立了一个小的CNN示例如下：

    clc;clear;clc;clear;
%% prepare Data
M = 32; %batch size
X_train = zeros(1,1,1,M); % (1 1 1 2) = (1 1 1 M)
for m=1:M,
    X_train(:,:,:,m) = m; %training example value
end
Y_test = 10*X_train;
split = ones(1,M);
split(floor(M*0.75):end) = 2;
% load image dadabase (imgdb)
imdb.images.data = X_train;
imdb.images.label = Y_test;
imdb.images.set = split;
%% prepare parameters
L1=3;
w1 = randn(1,1,1,L1); %1st layer weights
w2 = randn(1,1,1,L1); %2nd layer weights
b1 = randn(1,1,1,L1); %1st layer biases
b2 = randn(1,1,1,L1); %2nd layer biases
G1 = ones(1,1,1,L1); % (1 1 1 3) = (1 1 1 L1) BN scale, one per  dimension
B1 = zeros(1,1,1,L1); % (1 1 1 3) = (1 1 1 L1) BN shift, one per  dimension
EPS = 1e-4;
%% make CNN layers: conv, BN, relu, conv, pdist, l2-loss
net.layers = {} ;
net.layers{end+1} = struct('type', 'conv', ...
                           'name', 'conv1', ...
                           'weights', {{w1, b1}}, ...
                           'pad', 0) ;
net.layers{end+1} = struct('type', 'bnorm', ...
                           'weights', {{G1, B1}}, ...
                           'EPSILON', EPS, ...
                           'learningRate', [1 1 0.05], ...
                           'weightDecay', [0 0]) ;                       
net.layers{end+1} = struct('type', 'relu', ...
                           'name', 'relu1' ) ;
net.layers{end+1} = struct('type', 'conv', ...
                           'name', 'conv2', ...
                           'weights', {{w2, b2}}, ...
                           'pad', 0) ;
net.layers{end+1} = struct('type', 'pdist', ...
                           'name', 'averageing1', ...
                           'class', 0, ...
                           'p', 1) ;
%% add L2-loss                   
fwfun = @l2LossForward;
bwfun = @l2LossBackward;
net = addCustomLossLayer(net, fwfun, bwfun) ;
net.layers{end}.class = Y_test; % its the test set
net = vl_simplenn_tidy(net) ;
res = vl_simplenn(net, X_train);
%% prepare train options
trainOpts.expDir = 'results/' ; %save results/trained cnn
trainOpts.gpus = [] ;
trainOpts.batchSize = 2 ;
trainOpts.learningRate = 0.02 ;
trainOpts.plotDiagnostics = false ;
%trainOpts.plotDiagnostics = true ; % Uncomment to plot diagnostics
trainOpts.numEpochs = 20 ; % number of training epochs
trainOpts.errorFunction = 'none' ;
%% CNN TRAIN
vl_simplenn_display(net) ;
net = cnn_train(net, imdb, @getBatch, trainOpts) ;

我是根据the example they provided创建的，每当我运行示例时，我都会收到错误：

Error using vl_nnconv
DATA and DEROUTPUT do not have compatible formats.

Error in vl_simplenn (line 397)
          [res(i).dzdx, dzdw{1}, dzdw{2}] = vl_nnconv(res(i).x, l.weights{1},
          l.weights{2}, res(i+1).dzdx)

Error in cnn_train>process_epoch (line 323)
    res = vl_simplenn(net, im, dzdy, res, ...

Error in cnn_train (line 139)
    [net,stats.train,prof] = process_epoch(opts, getBatch, epoch, train, learningRate,
    imdb, net) ;

Error in main_1D_1layer_hard_coded_example (line 64)
net = cnn_train(net, imdb, @getBatch, trainOpts) ;

有人知道发生了什么事吗？这个例子实际上是假设很简单所以让我困惑的可能是错误的。

为了解决这个问题而尝试的其他部分。

有关我试图解决的事情的更多细节，请提前阅读。

我去了文件中的那一行导致错误，并将输入打印到该函数，以确保我给出了有意义的参数，并且在这方面似乎一切都很好：

  case 'conv'
      size(res(i).x)
      size(res(i+1).dzdx)
      size(l.weights{1})
      size(l.weights{2})
      [res(i).dzdx, dzdw{1}, dzdw{2}] = vl_nnconv(res(i).x, l.weights{1}, l.weights{2}, res(i+1).dzdx)
    [res(i).dzdx, dzdw{1}, dzdw{2}] = ...
      vl_nnconv(res(i).x, l.weights{1}, l.weights{2}, res(i+1).dzdx, ...
      'pad', l.pad, ...
      'stride', l.stride, ...
      l.opts{:}, ...
      cudnn{:}) ;

打印：

ans =

     1     1     3    16


ans =

     1     1     3    16


ans =

     1     1     1     3


ans =

     1     1     1     3

我的期望。

我甚至继续手动硬编码网络应该计算的衍生产品链，并且该文件似乎工作正常：

clc;clear;clc;clear;
%% prepare Data
M = 3;
x = zeros(1,1,1,M); % (1 1 1 2) = (1 1 1 M)
for m=1:M,
    x(:,:,:,m) = m;
end
Y = 5;
r=Y;
%% parameters
L1 = 3;
w1 = randn(1,1,1,L1); % (1 1 1 L1) = (1 1 1 3)
b1 = ones(1,L1);
w2 = randn(1,1,1,L1); % (1 1 1 L1) = (1 1 1 3)
b2 = ones(1,L1);
G1 = ones(1,1,1,L1); % (1 1 1 3) = (1 1 1 L1) BN scale, one per  dimension
B1 = zeros(1,1,1,L1); % (1 1 1 3) = (1 1 1 L1) BN shift, one per  dimension
EPS = 1e-4;
%% Forward Pass
z1 = vl_nnconv(x,w1,b1); % (1 1 3 2) = (1 1 L1 M)
%bn1 = z1;
bn1 = vl_nnbnorm(z1,G1,B1,'EPSILON',EPS); % (1 1 3 2) = (1 1 L1 M)
a1 = vl_nnrelu(bn1); % (1 1 3 2) = (1 1 L1 M) 
z2 = vl_nnconv(a1,w2,b2);
y1 = vl_nnpdist(z2, 0, 1);
loss_forward = l2LossForward(y1,Y);
%%
net.layers = {} ;
net.layers{end+1} = struct('type', 'conv', ...
                           'name', 'conv1', ...
                           'weights', {{w1, b1}}, ...
                           'pad', 0) ;
net.layers{end+1} = struct('type', 'bnorm', ...
                           'weights', {{G1, B1}}, ...
                           'EPSILON', EPS, ...
                           'learningRate', [1 1 0.05], ...
                           'weightDecay', [0 0]) ;                       
net.layers{end+1} = struct('type', 'relu', ...
                           'name', 'relu1' ) ;
net.layers{end+1} = struct('type', 'conv', ...
                           'name', 'conv2', ...
                           'weights', {{w2, b2}}, ...
                           'pad', 0) ;
net.layers{end+1} = struct('type', 'pdist', ...
                           'name', 'averageing1', ...
                           'class', 0, ...
                           'p', 1) ;
fwfun = @l2LossForward;
bwfun = @l2LossBackward;
net = addCustomLossLayer(net, fwfun, bwfun) ;
net.layers{end}.class = Y;
net = vl_simplenn_tidy(net) ;
res = vl_simplenn(net, x);
%%
loss_forward = squeeze( loss_forward ) % (1 1)
loss_res = squeeze( res(end).x ) % (1 1)
%% Backward Pass
p = 1;
dldx = l2LossBackward(y1,r,p);
dy1dx = vl_nnpdist(z2, 0, 1, dldx);
[dz2dx, dz2dw2] = vl_nnconv(a1, w2, b2, dy1dx);
da1dx = vl_nnrelu(bn1, dz2dx);
[dbn1dx,dbn1dG1,dbn1dB1] = vl_nnbnorm(z1,G1,B1,da1dx);
[dz1dx, dz1dw1] = vl_nnconv(x, w1, b1, dbn1dx);
%%
dzdy = 1;
res = vl_simplenn(net, x, dzdy, res);
%%
% func = @(x) proj(p, forward(x, x0)) ;
% err = checkDerivativeNumerically(f, x, dx)
% %%
dz1dx = squeeze(dz1dx)
dz1dx_vl_simplenn = squeeze(res(1).dzdx)

衍生物似乎是数学，所以我假设该文件中的所有内容都有效。它没有抛出错误，所以它甚至不运行的事实让我非常困惑。有谁知道发生了什么？

我加载CNN的方式基于他们在该教程中提供的the example file。我将粘贴该文件的重要方面的摘要（使用cnn_train函数运行正常，而我的没有）。

setup() ;
% setup('useGpu', true); % Uncomment to initialise with a GPU support
%% Part 3.1: Prepare the data
% Load a database of blurred images to train from
imdb = load('data/text_imdb.mat') ;

%% Part 3.2: Create a network architecture

net = initializeSmallCNN() ;
%net = initializeLargeCNN() ;
% Display network
vl_simplenn_display(net) ;

%% Part 3.3: learn the model
% Add a loss (using a custom layer)
net = addCustomLossLayer(net, @l2LossForward, @l2LossBackward) ;

% Train
trainOpts.expDir = 'data/text-small' ;
trainOpts.gpus = [] ;
% Uncomment for GPU training:
%trainOpts.expDir = 'data/text-small-gpu' ;
%trainOpts.gpus = [1] ;
trainOpts.batchSize = 16 ;
trainOpts.learningRate = 0.02 ;
trainOpts.plotDiagnostics = false ;
%trainOpts.plotDiagnostics = true ; % Uncomment to plot diagnostics
trainOpts.numEpochs = 20 ;
trainOpts.errorFunction = 'none' ;

net = cnn_train(net, imdb, @getBatch, trainOpts) ;

Answer 1

w2的尺寸应为1x1x3x3。

通常偏差也是1x3，因为它们只有一个维度（或者权重为1x1x3xN，相应偏差为1xN，其中N是滤波器的数量），B1和G1也是如此（这里是1xM，其中M是前一层中的过滤器数量）。但无论哪种方式都可以。

在您的示例中，第一次卷积后x的尺寸为1x1x3x16。这意味着一个批次中有16个元素，其中每个元素的宽度和高度为1，深度为3.深度3，因为第一个卷积是使用3个滤镜完成的（w1的尺寸为1x1x1x3）。

示例中的w2的尺寸为1x1x1x3，表示宽度，高度和深度为1的3个滤镜。因此滤镜的深度与输入的深度不匹配。

Answer 2

我通过创建自定义图层遇到了同样的问题。我终于通过跟踪matconvnet实现找到了解决方案。希望以后能有所帮助。

简而言之，您需要确保两个数据不为空，不为空，并且具有相同的设备类型（GPU或CPU）和相同的数据类型（float，single或char）。

在我的情况下，两个数据必须具有相同的＆＃39; gpuArray＆＃39;并且＆＃39;单身＆＃39;

======详细信息================== 首先是错误

DATA和FILTERS没有兼容的格式

DATA和BIASES没有兼容的格式

DATA和DEROUTPUT没有兼容的格式

确切地说两个变量没有兼容的格式。那么Matconvnet意味着兼容的格式＆＃39;？它在vl_nnconv.cu中实现，第269~278行

 /* check for GPU/data class consistency */


if (hasFilters && ! vl::areCompatible(data, filters)) {
    vlmxError(VLMXE_IllegalArgument, "DATA and FILTERS do not have compatible formats.") ;
  }
  if (hasBiases && ! vl::areCompatible(data, biases)) {
    vlmxError(VLMXE_IllegalArgument, "DATA and BIASES do not have compatible formats.") ;
  }
  if (backMode && ! vl::areCompatible(data, derOutput)) {
    vlmxError(VLMXE_IllegalArgument, "DATA and DEROUTPUT do not have compatible formats.") ;
  }

错误来自函数vl :: areCompatible，它实现为

inline bool areCompatible(Tensor const & a, Tensor const & b)
  {
    return
    (a.isEmpty() || a.isNull()) ||
    (b.isEmpty() || b.isNull()) ||
    ((a.getDeviceType() == b.getDeviceType()) & (a.getDataType() == b.getDataType())) ;
  }

因此，基本上，它检查是否有任何输入为空或为空，并确保两个输入具有相同的数据类型（double，single，vs char）和设备类型（GPU，CPU）。

 /// Type of device: CPU or GPU
  enum DeviceType {
    VLDT_CPU = 0,
    VLDT_GPU
  }  ;

  /// Type of data (char, float, double, ...)
  enum DataType {
    VLDT_Char,
    VLDT_Float,
    VLDT_Double
  } ;

为什么MatConvNet说数据和衍生品没有匹配的格式？

2 个答案: