我正在BiLSTM之后尝试实现crf而不是softmax,并且我正在使用keras_contrib
来获取crf。我想我对数组的尺寸有一些错误,但是我无法解决。
这是代码:
# preds = Dense(num_label, activation='softmax')(out)
# preds_binary = Dense(2, activation='softmax')(out)
'''
test 1
'''
preds = kcl.CRF(num_label, sparse_target=True)(out)
preds_binary = kcl.CRF(2, sparse_target=True)(out)
这是错误消息:
ValueError: Index out of range using input dim 2; input has only 2 dims for 'crf_1/strided_slice' (op: 'StridedSlice') with input shapes: [?,5], [3], [3], [3] and with computed input tensors: input[3] = <1 1 1>.
这里有人可以帮助我吗?
答案 0 :(得分:1)
@giser_yugang这是我的代码: num_labels = 5
train_array = [X_train, POS1_train, POS2_train]
test_array = [X_test, POS1_test, POS2_test]
train_label = [Y_train, binary_label_train]
test_label = [Y_test, binary_label_test ]
x_test_drug, x_test_med, y_test_drug, y_test_med = pd.splitDrug_Med(id_test, X_test, Y_test, POS1_test, POS2_test,
binary_label_test)
print("\nthe shape of x_test_drug[0]: ", x_test_drug[0].shape, '\n')
print("\nthe shape of x_test_med[0] : ", x_test_med[0].shape, '\n')
print("load word2vec...")
len_dic, embedding_matrix = ld.load_word_matrix(GLOVE_DIR,
MAX_NB_WORDS,
word_index,
EMBEDDING_DIM)
print("create word embedding layer...")
embedding_layer = Embedding(len_dic,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH,
trainable=True)
print("create position embedding layer...")
position_em_dim = 10
pos_embedding_matrix = np.random.uniform(-0.1, 0.1, size=(400, position_em_dim))
print("the shape of pos_embedding_matrix", pos_embedding_matrix.shape)
pos_embedding_layer = Embedding(400,
position_em_dim,
weights=[pos_embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH,
trainable=True)
print('create model...')
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
word_embedded_sequences = embedding_layer(sequence_input)
pos1_sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
pos1_embedded_sequences = pos_embedding_layer(pos1_sequence_input)
pos2_sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
pos2_embedded_sequences = pos_embedding_layer(pos2_sequence_input)
# embedded_sequences = merge([word_embedded_sequences, pos1_embedded_sequences, pos2_embedded_sequences],
# mode='concat')
embedded_sequences = concatenate([word_embedded_sequences, pos1_embedded_sequences, pos2_embedded_sequences], axis=-1)
'''
#lstm_attention_add_pos_add_last_two_out
'''
embedded_sequences = Dropout(0.3)(embedded_sequences)
lstm_forward, lstm_backword_reverse = Bidirectional(LSTM(hidden_dim, dropout_W=0.3,
dropout_U=0.3,
return_sequences=True,
U_regularizer=regularizers.l2(0.0001)),
merge_mode=None)(embedded_sequences)
# lstm_forward = LSTM(150, dropout_W=0.2, dropout_U=0.2, return_sequences=True)(embedded_sequences)
# lstm_backword = LSTM(150, dropout_W=0.2, dropout_U=0.2, return_sequences=True, go_backwards=True)(embedded_sequences)
flip_layer = Lambda(lambda x: K.reverse(x, 1), output_shape=lambda x: (x[0], x[1], x[2]))
flip_layer.supports_masking = True
lstm_backword = flip_layer(lstm_backword_reverse)
# lstm_sequence = merge([lstm_forward, lstm_backword_reverse], mode='concat', concat_axis=-1)
lstm_sequence = concatenate([lstm_forward, lstm_backword_reverse], axis=-1)
# pos_featrue = merge([pos1_embedded_sequences, pos2_embedded_sequences], mode='concat', concat_axis=-1)
pos_featrue = concatenate([pos1_embedded_sequences, pos2_embedded_sequences], axis=-1)
pos_featrue = TimeDistributed(Dense(20, init='he_normal'))(pos_featrue)
h_feature = TimeDistributed(Dense(hidden_dim * 2))(lstm_sequence)
# att_feature = merge([h_feature, pos_featrue], mode='concat', concat_axis=-1)
att_feature = concatenate([h_feature, pos_featrue], axis=-1)
weights = AttentionWeight2(name='attention')(att_feature)
weights_repeat = RepeatVector(hidden_dim * 2)(weights)
weights_repeat_per = Permute((2, 1))(weights_repeat)
# mul = merge([lstm_sequence, weights_repeat_per], mode='mul')
mul = multiply([lstm_sequence, weights_repeat_per])
sumpool = Lambda(lambda x: K.sum(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
sumpool.supports_masking = True
att_out = sumpool(mul)
lastout = Lambda(slice, output_shape=lambda x: (x[0], x[2]), arguments={'index': -1})
lstm_last_forward = lastout(lstm_forward)
lstm_last_backward = lastout(lstm_backword)
# lstm_last = merge([lstm_last_forward, lstm_last_backward], mode='concat')
lstm_last = concatenate([lstm_last_forward, lstm_last_backward], axis=-1)
att_out = Dense(hidden_dim * 2)(att_out)
lstm_last = Dense(hidden_dim * 2)(lstm_last)
# out = merge([att_out, lstm_last], mode='sum')
out = add([att_out, lstm_last])
out = Dropout(0.5)(out)
out = Activation(activation='tanh')(out)
preds = Dense(num_label, activation='softmax')(out)
preds_binary = Dense(2, activation='softmax')(out)
'''
test 1
'''
preds = kcl.CRF(num_label, sparse_target=True)(out)
preds_binary = kcl.CRF(2, sparse_target=True)(out)
'''
如果还不够,我会给你更多。