我正在尝试根据本文https://arxiv.org/pdf/1806.01506.pdf在auido中实现用于情感分类的自定义层。但是,在训练阶段,我总是出错,当我单独运行模型时,一切都可以正常工作
我该如何解决这个问题?
这是我的模型的样子
图层(类型)输出形状参数#
input_1(InputLayer)(无,633,128,1)0
conv_1(Conv2D)(无,156、30、96)11712
max_pooling2d_1(MaxPooling2(无,77,14,96)0
convpool_1(BatchNormalizati(无人,77、14、96)384
zero_padding2d_1(ZeroPaddin(无,81,18,96)0
conv_2(Conv2D)(无,77,14,256)614656
max_pooling2d_2(MaxPooling2(None,38,6,256)0
batch_normalization_1(批次(无,38,6,256)1024
zero_padding2d_2(ZeroPaddin(None,40,8,256)0
conv_3_1(Conv2D)(无,38,6,384)885120
conv_3_2(Conv2D)(无,36,4,384)1327488
zero_padding2d_4(ZeroPaddin(无,38,6,384)0
conv_5(Conv2D)(无,36,4,256)884992
convpool_5(MaxPooling2D)(无,17,1,1,256)0
重塑(重塑)(无,272,16)0
tention_layer_1(注意(1、1、1、16个,无)48
flatten_1(Flatten)(无,16)0
density_1(Dense)(None,6)102
参数总计:3,725,526
可训练的参数:3,724,806
不可训练的参数:720
我用于构建模型的代码
ValueError:检查目标时出错:预期density_1的形状为(6,),但数组的形状为(1,)
from keras.layers import Dense, Input
import keras.backend as K
from keras.engine.topology import Layer
from keras.models import Model
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Lambda, BatchNormalization
from keras.layers import Reshape, Flatten
import numpy as np
import pdb; pdb.set_trace()
# scale factor
lambada = 0.3
# croping, slicing dimensions of tensors
# To slice x as follows, x[:, :, 5:10], just call :
# x = crop(2,5,10)(x)
def crop(dimension, start, end):
# Crops (or slices) a Tensor on a given dimension from start to end
# example : to crop tensor x[:, :, 5:10]
# call slice(2, 5, 10) as you want to crop on the second dimension
def func(x):
if dimension == 0:
return x[start: end]
if dimension == 1:
return x[:, start: end]
if dimension == 2:
return x[:, :, start: end]
if dimension == 3:
return x[:, :, :, start: end]
if dimension == 4:
return x[:, :, :, :, start: end]
return Lambda(func)
# F X T X C
# F: frequency, T: time domains, C is channel size
# output as a variable-length grid of L elements, L = F x T
# elements is a C-dimensional vector corresponding to a region of speech
# spectrogram, represented as ai A = a1, ... , aL, ai
# attention
# L = data.shape[0] * data.shape[1]
# C = data.shape[2]
# using custom layer
class Attention_layer(Layer):
def __init__(self, layer_shape, **kwargs):
# self.output_dim = output_dim
self.layer_shape = layer_shape
# self.output_dim = output_dim
super(Attention_layer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.L = self.layer_shape[1]
self.C = self.layer_shape[2]
# variable must have shape of tensor dimension 1
self.W = self.add_weight(name='kernel',
shape=(1, self.C),
initializer='uniform',
trainable=True)
self.U = self.add_weight(name='kernel',
shape=(1, self.C),
initializer='uniform',
trainable=True)
self.B = self.add_weight(name='kernel',
shape=(1, self.C),
initializer='uniform',
trainable=False)
super(Attention_layer, self).build(self.layer_shape)
def call(self, a):
# implementation of the attention network
importance = []
importance_nm = []
for i in xrange(self.L):
a_ = crop(1, i, i+1)(a)
a_ = K.reshape(a_, (1, self.C))
fun = K.tanh(self.W * a_ + self.B)
# fun = K.tanh(K.dot(self.W, a_) + self.B)
# importance.append(K.dot(self.U, fun))
importance.append(self.U * fun)
sum_ = 0
for k in xrange(len(importance)):
sum_ += K.exp(lambada * importance[k])
temp = K.exp(lambada + importance[k])
temp = temp / sum_
importance_nm.append(temp)
emotion_vector = 0
for k in xrange(len(importance_nm)):
emotion_vector += importance_nm[k] * a_
# emotion_vector += K.dot(importance_nm[k], a_)
emotion_vector = K.reshape(emotion_vector, (1, 1, self.C))
print emotion_vector
return emotion_vector
def compute_output_shape(self, input_shape):
# need to reshapen layer to 3 dimension
# so we can that we can send it flatten layer to
# so it can go softmax dense layer
shape = self.layer_shape[0], 1, 1, self.C
# print self.output_dim
print "shape"
print shape
return shape
def build_model(input_shape, feature_size, nb_classes):
print "testing"
net_input = Input(shape=(input_shape, feature_size, 1))
conv_1 = Convolution2D(96, 11, 11, subsample=(4, 4), activation='relu',
name='conv_1', init='he_normal')(net_input)
conv_2 = MaxPooling2D((3, 3), strides=(2, 2))(conv_1)
conv_2 = BatchNormalization(name="convpool_1")(conv_2)
conv_2 = ZeroPadding2D((2, 2))(conv_2)
conv_2 = Convolution2D(256, 5, 5, activation="relu",
init='he_normal', name='conv_2')(conv_2)
conv_3 = MaxPooling2D((3, 3), strides=(2, 2))(conv_2)
conv_3 = BatchNormalization()(conv_3)
conv_3 = ZeroPadding2D((1, 1))(conv_3)
conv_3 = Convolution2D(384, 3, 3, activation='relu',
name='conv_3_1', init='he_normal')(conv_3)
conv_4 = ZeroPadding2D((1, 1))(conv_3)
conv_4 = Convolution2D(384, 3, 3, activation='relu',
name='conv_3_2', init='he_normal')(conv_3)
conv_5 = ZeroPadding2D((1, 1))(conv_4)
conv_5 = Convolution2D(256, 3, 3, activation="relu",
init='he_normal', name='conv_5')(conv_5)
dense_1_ = MaxPooling2D((3, 3), strides=(2, 2), name="convpool_5")(conv_5)
# set into L X C, it is easier for processing
dense_1 = Reshape((272, 16), name="reshape")(dense_1_)
print 'reshape shape'
print dense_1._keras_shape
attention = Attention_layer(dense_1._keras_shape)(dense_1)
print 'attention shape'
print attention._keras_shape
flatten = Flatten()(attention)
print 'flatten shape'
print flatten._keras_shape
output = Dense(nb_classes, activation='softmax')(flatten)
model = Model(net_input, output)
model.compile(loss="categorical_crossentropy",
optimizer='Adam', metrics=['accuracy'])
return model
if __name__ == "__main__":
build_model(633, 128, 6).summary()
我的代码以训练模型
import numpy as np
import matplotlib
import pandas
import os
from attention_network_custom_layer import build_model
from sklearn.preprocessing import label_binarize
from keras.preprocessing.sequence import pad_sequences
import pdb; pdb.set_trace()
import logging
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
path_to_features = "/nas/lrz/tuei/ldv/studierende/Emotion/enterface_features/"
def to_categorical(label, emotions):
return label_binarize(label, np.asarray(emotions))
def load_x(name):
df = pandas.read_csv(name, sep=',', index_col=None)
x = df.iloc[:, :]
x = np.array(x, dtype=float)
return np.array(x, dtype=float)
def load_features(path):
files = os.listdir(path)
tx = []
ty = []
for f in files:
if f[-4:] == '.csv':
x = load_x(path_to_features + f)
y = f[-8:-6]
tx.append(np.array(x, dtype=float))
ty.append(y)
tx = np.array(tx)
ty = np.array(ty)
return tx, ty
features, labels = load_features(path_to_features)
print np.unique(labels)
np.random.seed(200)
logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s-\
%(message)s')
# emotions: anger, disgust, fear, happiness, sadness, surprise
emotions = ['an', 'di', 'fe', 'ha', 'sa', 'su']
batch_size = 30
nb_class = 6
nb_epoch = 2000
print 'length of data'
print len(features)
parts = 5
permutation = np.random.permutation(len(features))
permuted_ids = features[permutation]
step = len(features) / parts
preds = []
trues = []
fold_step = 0
max_length = 633
feature_size = 128
for part in xrange(parts):
i0 = step * part
i1 = step * (part + 1)
train_x = np.append(permuted_ids[:i0], permuted_ids[i1:])
train_y = np.append(labels[:i0], labels[i1:])
test_x = permuted_ids[i0:i1]
test_y = labels[i0:i1]
train_y = to_categorical(train_y, emotions)
test_y = to_categorical(test_y, emotions)
train_y = np.argmax(train_y, axis=1)
test_y = np.argmax(test_y, axis=1)
train_y = np.reshape(train_y, (train_y.shape[0], 1))
test_y = np.reshape(test_y, (test_y.shape[0], 1))
test_y = np.ravel(test_y)
train_y = np.ravel(train_y)
train_x = pad_sequences(train_x, maxlen=max_length, dtype='float',
padding='post', value=0.0)
test_x = pad_sequences(test_x, maxlen=max_length, dtype='float',
padding='post', value=0.0)
test_x = (test_x - np.mean(train_x, axis=0)) / np.std(train_x, axis=0)
train_x = (train_x - np.mean(train_x, axis=0)) / np.std(train_x, axis=0)
train_x = np.reshape(train_x, (len(train_x), max_length, feature_size, 1))
test_x = np.reshape(test_x, (len(test_x), max_length, feature_size, 1))
model = build_model(max_length, feature_size, nb_class)
model.summary()
history = model.fit(train_x, train_y, epochs=nb_epoch, batch_size=batch_size, verbose=2, validation_data=(test_x, test_y))
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values) + 1)
plt.plot(epochs, loss_values, 'bo', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.ylim((0.8, 1.4))
plt.legend()
plt.savefig('plots/only one-bi-lstm-smaller, ' + str(part)
+ '.png')
plt.close()