Question

我正在尝试根据本文https://arxiv.org/pdf/1806.01506.pdf在auido中实现用于情感分类的自定义层。但是，在训练阶段，我总是出错，当我单独运行模型时，一切都可以正常工作

我该如何解决这个问题？

这是我的模型的样子

图层（类型）输出形状参数＃
input_1（InputLayer）（无，633，128，1）0
conv_1（Conv2D）（无，156、30、96）11712
max_pooling2d_1（MaxPooling2（无，77，14，96）0
convpool_1（BatchNormalizati（无人，77、14、96）384
zero_padding2d_1（ZeroPaddin（无，81，18，96）0
conv_2（Conv2D）（无，77，14，256）614656
max_pooling2d_2（MaxPooling2（None，38，6，256）0
batch_normalization_1（批次（无，38，6，256）1024
zero_padding2d_2（ZeroPaddin（None，40，8，256）0
conv_3_1（Conv2D）（无，38，6，384）885120
conv_3_2（Conv2D）（无，36，4，384）1327488
zero_padding2d_4（ZeroPaddin（无，38，6，384）0
conv_5（Conv2D）（无，36，4，256）884992
convpool_5（MaxPooling2D）（无，17，1，1，256）0
重塑（重塑）（无，272，16）0
tention_layer_1（注意（1、1、1、16个，无）48
flatten_1（Flatten）（无，16）0
density_1（Dense）（None，6）102
参数总计：3,725,526 可训练的参数：3,724,806 不可训练的参数：720

我用于构建模型的代码

ValueError：检查目标时出错：预期density_1的形状为（6，），但数组的形状为（1，）

from keras.layers import Dense, Input 
import keras.backend as K
from keras.engine.topology import Layer
from keras.models import Model
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Lambda, BatchNormalization
from keras.layers import Reshape, Flatten
import numpy as np
import pdb; pdb.set_trace()

# scale factor

lambada = 0.3

# croping, slicing dimensions of tensors
# To slice x as follows, x[:, :, 5:10], just call :
# x = crop(2,5,10)(x)


def crop(dimension, start, end):
    # Crops (or slices) a Tensor on a given dimension from start to end
    # example : to crop tensor x[:, :, 5:10]
    # call slice(2, 5, 10) as you want to crop on the second dimension
    def func(x):
    if dimension == 0:
        return x[start: end]
    if dimension == 1:
        return x[:, start: end]
    if dimension == 2:
        return x[:, :, start: end]
    if dimension == 3:
        return x[:, :, :, start: end]
    if dimension == 4:
        return x[:, :, :, :, start: end]
    return Lambda(func)


# F X T X C
# F: frequency, T: time domains, C is channel size
# output as a variable-length grid of L elements, L = F x T
# elements is a C-dimensional vector corresponding to a region of speech
# spectrogram, represented as ai A = a1, ... , aL, ai

# attention
# L = data.shape[0] * data.shape[1]
# C = data.shape[2]

# using custom layer

class Attention_layer(Layer):

    def __init__(self, layer_shape, **kwargs):
    # self.output_dim = output_dim
    self.layer_shape = layer_shape
    # self.output_dim = output_dim
    super(Attention_layer, self).__init__(**kwargs)

    def build(self, input_shape):
    # Create a trainable weight variable for this layer.
    self.L = self.layer_shape[1]
    self.C = self.layer_shape[2]
    # variable must have shape of tensor dimension 1
    self.W = self.add_weight(name='kernel',
                             shape=(1, self.C),
                             initializer='uniform',
                             trainable=True)
    self.U = self.add_weight(name='kernel',
                             shape=(1, self.C),
                             initializer='uniform',
                             trainable=True)
    self.B = self.add_weight(name='kernel',
                             shape=(1, self.C),
                             initializer='uniform',
                             trainable=False)
    super(Attention_layer, self).build(self.layer_shape)

    def call(self, a):
    # implementation of the attention network
    importance = []
    importance_nm = []
    for i in xrange(self.L):
        a_ = crop(1, i, i+1)(a)
        a_ = K.reshape(a_, (1, self.C))
        fun = K.tanh(self.W * a_ + self.B)
        # fun = K.tanh(K.dot(self.W, a_) + self.B)
        # importance.append(K.dot(self.U, fun))
        importance.append(self.U * fun)

    sum_ = 0
    for k in xrange(len(importance)):
        sum_ += K.exp(lambada * importance[k])
        temp = K.exp(lambada + importance[k])
        temp = temp / sum_
        importance_nm.append(temp)

    emotion_vector = 0
    for k in xrange(len(importance_nm)):
        emotion_vector += importance_nm[k] * a_
        # emotion_vector += K.dot(importance_nm[k], a_)
    emotion_vector = K.reshape(emotion_vector, (1, 1, self.C))
    print emotion_vector

    return emotion_vector

    def compute_output_shape(self, input_shape):
    # need to reshapen layer to 3 dimension
    # so we can that we can send it  flatten layer to
    # so it can go softmax dense layer
    shape = self.layer_shape[0], 1, 1, self.C
    # print self.output_dim
    print "shape"
    print shape
    return shape


def build_model(input_shape, feature_size, nb_classes):
    print "testing"
    net_input = Input(shape=(input_shape, feature_size, 1))

    conv_1 = Convolution2D(96, 11, 11, subsample=(4, 4), activation='relu',
                       name='conv_1', init='he_normal')(net_input)

    conv_2 = MaxPooling2D((3, 3), strides=(2, 2))(conv_1)
    conv_2 = BatchNormalization(name="convpool_1")(conv_2)
    conv_2 = ZeroPadding2D((2, 2))(conv_2)
    conv_2 = Convolution2D(256, 5, 5, activation="relu",
                       init='he_normal', name='conv_2')(conv_2)

    conv_3 = MaxPooling2D((3, 3), strides=(2, 2))(conv_2)
    conv_3 = BatchNormalization()(conv_3)
    conv_3 = ZeroPadding2D((1, 1))(conv_3)
    conv_3 = Convolution2D(384, 3, 3, activation='relu',
                       name='conv_3_1', init='he_normal')(conv_3)
    conv_4 = ZeroPadding2D((1, 1))(conv_3)
    conv_4 = Convolution2D(384, 3, 3, activation='relu',
                       name='conv_3_2', init='he_normal')(conv_3)
    conv_5 = ZeroPadding2D((1, 1))(conv_4)
    conv_5 = Convolution2D(256, 3, 3, activation="relu",
                       init='he_normal', name='conv_5')(conv_5)

    dense_1_ = MaxPooling2D((3, 3), strides=(2, 2), name="convpool_5")(conv_5)
    # set into  L X C, it is easier for processing
    dense_1 = Reshape((272, 16), name="reshape")(dense_1_)
    print 'reshape shape'
    print dense_1._keras_shape
    attention = Attention_layer(dense_1._keras_shape)(dense_1)
    print 'attention shape'
    print attention._keras_shape
    flatten = Flatten()(attention)
    print 'flatten shape'
    print flatten._keras_shape
    output = Dense(nb_classes, activation='softmax')(flatten)

    model = Model(net_input, output)
    model.compile(loss="categorical_crossentropy",
              optimizer='Adam', metrics=['accuracy'])

    return model


if __name__ == "__main__":
    build_model(633, 128, 6).summary()

我的代码以训练模型

import numpy as np
import matplotlib
import pandas
import os
from attention_network_custom_layer import build_model
from sklearn.preprocessing import label_binarize
from keras.preprocessing.sequence import pad_sequences
import pdb; pdb.set_trace()
import logging
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker


path_to_features = "/nas/lrz/tuei/ldv/studierende/Emotion/enterface_features/"


def to_categorical(label, emotions):
    return label_binarize(label, np.asarray(emotions))


def load_x(name):
    df = pandas.read_csv(name, sep=',', index_col=None)
    x = df.iloc[:, :]
    x = np.array(x, dtype=float)
    return np.array(x, dtype=float)


def load_features(path):
    files = os.listdir(path)
    tx = []
    ty = []
    for f in files:
    if f[-4:] == '.csv':
        x = load_x(path_to_features + f)
        y = f[-8:-6]
        tx.append(np.array(x, dtype=float))
        ty.append(y)
    tx = np.array(tx)
    ty = np.array(ty)
    return tx, ty


features, labels = load_features(path_to_features)

print np.unique(labels)

np.random.seed(200)

logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s-\
 %(message)s')

# emotions: anger, disgust, fear, happiness, sadness, surprise


emotions = ['an', 'di', 'fe', 'ha', 'sa', 'su']

batch_size = 30
nb_class = 6
nb_epoch = 2000

print 'length of data'
print len(features)

parts = 5
permutation = np.random.permutation(len(features))
permuted_ids = features[permutation]

step = len(features) / parts

preds = []
trues = []

fold_step = 0


max_length = 633
feature_size = 128
for part in xrange(parts):

    i0 = step * part
    i1 = step * (part + 1)
    train_x = np.append(permuted_ids[:i0], permuted_ids[i1:])
    train_y = np.append(labels[:i0], labels[i1:])
    test_x = permuted_ids[i0:i1]
    test_y = labels[i0:i1]
    train_y = to_categorical(train_y, emotions)
    test_y = to_categorical(test_y, emotions)

    train_y = np.argmax(train_y, axis=1)
    test_y = np.argmax(test_y, axis=1)

    train_y = np.reshape(train_y, (train_y.shape[0], 1))
    test_y = np.reshape(test_y, (test_y.shape[0], 1))

    test_y = np.ravel(test_y)
    train_y = np.ravel(train_y)

    train_x = pad_sequences(train_x, maxlen=max_length, dtype='float',
                        padding='post', value=0.0)
    test_x = pad_sequences(test_x, maxlen=max_length, dtype='float',
                       padding='post', value=0.0)
    test_x = (test_x - np.mean(train_x, axis=0)) / np.std(train_x, axis=0)
    train_x = (train_x - np.mean(train_x, axis=0)) / np.std(train_x, axis=0)

    train_x = np.reshape(train_x, (len(train_x), max_length, feature_size, 1))
    test_x = np.reshape(test_x, (len(test_x), max_length, feature_size, 1))

    model = build_model(max_length, feature_size, nb_class)
    model.summary()

    history = model.fit(train_x, train_y, epochs=nb_epoch, batch_size=batch_size, verbose=2, validation_data=(test_x, test_y))

    history_dict = history.history
    loss_values = history_dict['loss']
    val_loss_values = history_dict['val_loss']
    epochs = range(1, len(loss_values) + 1)

    plt.plot(epochs, loss_values, 'bo', label='Training loss')
    plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.ylim((0.8, 1.4))
    plt.legend()

    plt.savefig('plots/only one-bi-lstm-smaller, ' + str(part)
            + '.png')
    plt.close()

自定义图层关注网络中的图层不匹配

0 个答案: