Lasgne的表现比Keras差很多?

时间:2016-10-05 10:12:49

标签: python neural-network conv-neural-network keras lasagne

我在玩具回归问题上对Keras(与Theano)和Lasagne进行了比较,以便为我的最终应用选择其中一个。通过这种比较,我发现Lasagne的表现比Keras差得多,我开始怀疑我的代码。由于我对Keras和Lasagne都很陌生,所以我想和比我更有经验的人检查一下。应训练网络以找到16x16矩阵的平均值。我做了不同的尝试:首先,尝试使用2D转换层+密集层(因为我的最终应用程序将需要使用CNN)。然后,由于烤宽面条结果很糟糕,我尝试使用标准的单层MLP。再次,糟糕的烤宽面条表现。我尝试在两种情况下使用相同的规格:相同的批量大小,相同的初始化,相同的优化器(测试SGD与Nesterov动量和ADAM),当然还有相同数量的时期和网络架构。有人能告诉我发生了什么事吗?我的代码中有什么问题吗?为什么性能差异如此之大?如果一切都正确,为什么Keras比Lasagne好得多?

这里是我正在使用的代码:

Keras:

# -*- coding: utf-8 -*-
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.layers import Convolution2D
from keras import backend as K
from keras.optimizers import SGD
import matplotlib.pyplot as plt


batch_size = 500
nb_output = 1
nb_epoch = 10

# input image dimensions
img_rows, img_cols = 16, 16
# number of convolutional filters to use
nb_filters = 20
# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (3, 3)


X_train = np.random.randn(10000, 16*16)
Y_train = np.mean(X_train, 1)

X_train = X_train.astype('float32')
X_test = np.random.randn(1000, 16*16)
Y_test = np.mean(X_test, 1)

if K._BACKEND == 'theano':
    X_train = np.reshape(X_train, (10000, 1, 16, 16))
    X_test = np.reshape(X_test, (1000, 1, 16, 16))
else:
    X_train = np.reshape(X_train, (10000, 16, 16, 1))    
    X_test = np.reshape(X_test, (1000, 16, 16, 1))

print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


model = Sequential()

model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
                        border_mode='same',
                        input_shape=X_train.shape[1:], init='glorot_uniform'))
model.add(Activation('relu'))

#model.add(Flatten(input_shape=X_train.shape[1:]))
model.add(Flatten())
model.add(Dense(10, init='glorot_uniform'))
model.add(Activation('sigmoid'))
model.add(Dense(nb_output, init='glorot_uniform'))
model.add(Activation('linear'))

sgd = SGD(lr=0.1,  momentum=0.9, nesterov=True)#decay=1e-6,
model.compile(loss='mse',
              optimizer=sgd)

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=1)
predicts = model.predict(X_test, batch_size=1000, verbose=0)
print('Test score:', score[0])
plt.figure()
plt.scatter(Y_test, predicts)

烤宽面条(改编自mnist example):

# -*- coding: utf-8 -*-

from __future__ import print_function

import time

import numpy as np

import theano
import theano.tensor as T

import lasagne

import matplotlib.pyplot as plt


def load_dataset():
    np.random.seed(1337)
    X_train = np.random.randn(10000, 16*16)
    X_train = X_train.astype('float32')
    Y_train = np.mean(X_train, 1)

    X_test = np.random.randn(1000, 16*16)
    X_test = X_test.astype('float32')
    Y_test = np.mean(X_test, 1)

    X_train = np.reshape(X_train, (10000, 1, 16, 16))
    X_test = np.reshape(X_test, (1000, 1, 16, 16))

    return X_train, Y_train, X_test, Y_test


def build_cnn(input_var=None):

    network = lasagne.layers.InputLayer(shape=(None, 1, 16, 16),
                                        input_var=input_var)

    network = lasagne.layers.Conv2DLayer(
            network, num_filters=20, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())

    network = lasagne.layers.DenseLayer(
            network,
            num_units=10,
            nonlinearity=lasagne.nonlinearities.sigmoid)

    network = lasagne.layers.DenseLayer(
            network,
            num_units=1,
            nonlinearity=lasagne.nonlinearities.linear)

    return network


def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]


def main(model='cnn', num_epochs=10):

    print("Loading data...")
    X_train, y_train, X_test, y_test = load_dataset()

    input_var = T.tensor4('inputs')
    target_var = T.vector('targets')

    print("Building model and compiling functions...")
    network = build_cnn(input_var)


    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.squared_error(prediction, target_var)
    loss = loss.mean()

    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=0.1, momentum=0.9)
#    updates = lasagne.updates.adam(loss, params)

    test_prediction = lasagne.layers.get_output(network)
    test_loss = lasagne.objectives.squared_error(test_prediction,
                                                            target_var)
    test_loss = test_loss.mean()


    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    val_fn = theano.function([input_var, target_var], test_loss)

    preds = theano.function([input_var], test_prediction)

    print("Starting training...")

    for epoch in range(num_epochs):

        train_err = 0.0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train, y_train, 500, shuffle=False):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        test_err = 0.0
        test_batches = 0
        for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
            inputs, targets = batch
            err = val_fn(inputs, targets)
            test_err += err
            test_batches += 1
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  test loss:\t\t{:.6f}".format(test_err / test_batches))

    pds = preds(X_test)
    plt.scatter(y_test, pds)
    plt.show()



if __name__ == '__main__':

    main()

这两个代码都很容易适应单层MLP。如果你运行它们,你将在最后得到这个散点图:

烤宽面条:

lasagne

keras:

keras

在x轴上:在y轴预测值上的真值。

0 个答案:

没有答案