这个想法是在两个胆的隐藏状态的余弦相似度矩阵上训练CNN。 我尝试使以下代码正常工作,但未能给出错误消息:
Graph disconnected: cannot obtain value for tensor
Tensor("bidirectional_4/concat:0", shape=(?, ?, 100), dtype=float32)
at layer "input_11". The following previous layers were accessed without issue: []
用于训练模型的代码如下:
def train_model(self, sentences_pair, is_similar,
embedding_meta_data_skt, embedding_meta_data_tib ,
model_save_directory='./'):
tokenizer_skt = embedding_meta_data_skt['tokenizer']
tokenizer_tib = embedding_meta_data_tib['tokenizer']
embedding_matrix_skt = embedding_meta_data_skt['embedding_matrix']
embedding_matrix_tib = embedding_meta_data_tib['embedding_matrix']
train_data_x1, train_data_x2, train_labels, leaks_train, \
val_data_x1, val_data_x2, val_labels, leaks_val = create_train_dev_set(tokenizer_skt, sentences_pair,
is_similar, self.max_sequence_length,
self.validation_split_ratio)
nb_words_skt = len(tokenizer_skt.word_index) + 1
nb_words_tib = len(tokenizer_tib.word_index) + 1
# Creating word embedding layer
embedding_layer_skt = Embedding(nb_words_skt, self.embedding_dim, weights=[embedding_matrix_skt],
input_length=self.max_sequence_length, trainable=False)
embedding_layer_tib = Embedding(nb_words_tib, self.embedding_dim, weights=[embedding_matrix_tib],
input_length=self.max_sequence_length, trainable=False)
# Creating LSTM Encoder
lstm_layer = Bidirectional(LSTM(self.number_lstm_units, dropout=self.rate_drop_lstm, recurrent_dropout=self.rate_drop_lstm,return_sequences=True))
# Creating LSTM Encoder layer for First Sentence
sequence_1_input = Input(shape=(self.max_sequence_length,), dtype='int32')
embedded_sequences_1 = embedding_layer_skt(sequence_1_input)
skt_lstm = lstm_layer(embedded_sequences_1)
# Creating LSTM Encoder layer for Second Sentence
sequence_2_input = Input(shape=(self.max_sequence_length,), dtype='int32')
embedded_sequences_2 = embedding_layer_tib(sequence_2_input)
tib_lstm = lstm_layer(embedded_sequences_2)
A_input = keras.Input(tensor=skt_lstm)
B_input = keras.Input(tensor=tib_lstm)
dist_output = keras.layers.Lambda(pairwise_cosine_sim)([skt_lstm,tib_lstm,A_input,B_input])
dist_output = Reshape((40,40,1))(dist_output)
input_shape = (40,40,1)
cnn_model = Conv2D(128, (2, 2), input_shape=input_shape)(dist_output)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(164, (2, 2))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(192,(3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(192, (3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(128, (3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = MaxPooling2D(pool_size=(2,2))(cnn_model)
cnn_model = Dropout(0.40)(cnn_model)
cnn_model = Flatten()(cnn_model)
# Fully connected layer
cnn_model = Dense(256)(cnn_model)
cnn_model = BatchNormalization()(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Dropout(0.5)(cnn_model)
cnn_model = Dense(num_classes)(cnn_model)
preds = Dense(1, activation='sigmoid')(cnn_model)
model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds)
model.compile(loss=keras.losses.binary_crossentropy,
optimizer=keras.optimizers.Adam(lr=learning_rate),
metrics=['accuracy'])
#model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['acc'])
filepath="skt-tib-bs" + str(batch_size) + "-" + "{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint('skt-tib.h5', monitor='val_acc')
callbacks_list = [checkpoint]
model.fit([train_data_x1, train_data_x2, leaks_train], train_labels,validation_data=([val_data_x1, val_data_x2, leaks_val], val_labels),
batch_size=batch_size,
epochs=epochs,
verbose=1,
class_weight = class_weight,
callbacks = callbacks_list)
score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
model.save(file_name)
计算成对的余弦相似度的函数的定义如下:
def l2_norm(x, axis=None):
square_sum = K.sum(K.square(x), axis=axis, keepdims=True)
norm = K.sqrt(K.maximum(square_sum, K.epsilon()))
return norm
def pairwise_cosine_sim(A_B):
A,B,A_tensor,B_tensor = A_B
A_mag = l2_norm(A, axis=2)
B_mag = l2_norm(B, axis=2)
num = K.batch_dot(A_tensor, K.permute_dimensions(B_tensor, (0,2,1)))
den = (A_mag * K.permute_dimensions(B_mag, (0,2,1)))
dist_mat = num / den
return dist_mat
我已经尝试了几个小时来修复它,但这似乎没有好处。输入和输出未连接的某个地方,但我只是不知道问题出在哪里。有什么建议吗?
答案 0 :(得分:1)
要么完全删除A_input
和B_input
,因为它们首先不是输入层,而是直接使用skt_lstm
和tib_lstm
代替它们,或者如果您愿意在定义Model
时也要使它们通过它们作为模型的输入,因为它们实际上是输入层:
model = Model(inputs=[sequence_1_input, sequence_2_input, A_input, B_input], outputs=preds)
但是,调用fit
方法时不需要为它们传递任何对应的数组,因为它们将使用其对应的张量skt_lstm
和tib_lstm
来馈送(即它们将起作用作为这些张量的包装器。