ValueError:使用输入减光2索引超出范围;输入的'crf_1 / strided_slice

时间:2019-03-15 07:37:09

标签: keras lstm crf

我正在BiLSTM之后尝试实现crf而不是softmax,并且我正在使用keras_contrib来获取crf。我想我对数组的尺寸有一些错误,但是我无法解决。 这是代码:

 # preds = Dense(num_label, activation='softmax')(out)
 # preds_binary = Dense(2, activation='softmax')(out)
 '''
 test 1
 '''
 preds = kcl.CRF(num_label, sparse_target=True)(out)
 preds_binary = kcl.CRF(2, sparse_target=True)(out)

这是错误消息:

ValueError: Index out of range using input dim 2; input has only 2 dims for 'crf_1/strided_slice' (op: 'StridedSlice') with input shapes: [?,5], [3], [3], [3] and with computed input tensors: input[3] = <1 1 1>.

这里有人可以帮助我吗?

1 个答案:

答案 0 :(得分:1)

@giser_yugang这是我的代码: num_labels = 5

train_array = [X_train, POS1_train, POS2_train]
    test_array  = [X_test, POS1_test, POS2_test]
    train_label = [Y_train, binary_label_train]
    test_label  = [Y_test,  binary_label_test ]

    x_test_drug, x_test_med, y_test_drug, y_test_med = pd.splitDrug_Med(id_test, X_test, Y_test, POS1_test, POS2_test,
                                                                        binary_label_test)
    print("\nthe shape of x_test_drug[0]: ", x_test_drug[0].shape, '\n')
    print("\nthe shape of x_test_med[0] : ", x_test_med[0].shape,    '\n')


    print("load word2vec...")
    len_dic, embedding_matrix = ld.load_word_matrix(GLOVE_DIR,
                                                    MAX_NB_WORDS,
                                                    word_index,
                                                    EMBEDDING_DIM)

    print("create word embedding layer...")
    embedding_layer = Embedding(len_dic,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=True)
    print("create position embedding layer...")
    position_em_dim = 10
    pos_embedding_matrix = np.random.uniform(-0.1, 0.1, size=(400, position_em_dim))
    print("the shape of pos_embedding_matrix", pos_embedding_matrix.shape)
    pos_embedding_layer = Embedding(400,
                                    position_em_dim,
                                    weights=[pos_embedding_matrix],
                                    input_length=MAX_SEQUENCE_LENGTH,
                                    trainable=True)

    print('create model...')
    sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    word_embedded_sequences = embedding_layer(sequence_input)
    pos1_sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    pos1_embedded_sequences = pos_embedding_layer(pos1_sequence_input)
    pos2_sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    pos2_embedded_sequences = pos_embedding_layer(pos2_sequence_input)
    # embedded_sequences = merge([word_embedded_sequences, pos1_embedded_sequences, pos2_embedded_sequences],
    #                            mode='concat')
    embedded_sequences = concatenate([word_embedded_sequences, pos1_embedded_sequences, pos2_embedded_sequences], axis=-1)


    '''
    #lstm_attention_add_pos_add_last_two_out
    '''
    embedded_sequences = Dropout(0.3)(embedded_sequences)
    lstm_forward, lstm_backword_reverse = Bidirectional(LSTM(hidden_dim, dropout_W=0.3,
                                                             dropout_U=0.3,
                                                             return_sequences=True,
                                                             U_regularizer=regularizers.l2(0.0001)),
                                                        merge_mode=None)(embedded_sequences)
    # lstm_forward = LSTM(150, dropout_W=0.2, dropout_U=0.2, return_sequences=True)(embedded_sequences)
    # lstm_backword = LSTM(150, dropout_W=0.2, dropout_U=0.2, return_sequences=True, go_backwards=True)(embedded_sequences)

    flip_layer = Lambda(lambda x: K.reverse(x, 1), output_shape=lambda x: (x[0], x[1], x[2]))
    flip_layer.supports_masking = True
    lstm_backword = flip_layer(lstm_backword_reverse)

    # lstm_sequence = merge([lstm_forward, lstm_backword_reverse], mode='concat', concat_axis=-1)
    lstm_sequence = concatenate([lstm_forward, lstm_backword_reverse], axis=-1)

    # pos_featrue = merge([pos1_embedded_sequences, pos2_embedded_sequences], mode='concat', concat_axis=-1)
    pos_featrue = concatenate([pos1_embedded_sequences, pos2_embedded_sequences], axis=-1)

    pos_featrue = TimeDistributed(Dense(20, init='he_normal'))(pos_featrue)
    h_feature = TimeDistributed(Dense(hidden_dim * 2))(lstm_sequence)

    # att_feature = merge([h_feature, pos_featrue], mode='concat', concat_axis=-1)
    att_feature = concatenate([h_feature, pos_featrue], axis=-1)

    weights = AttentionWeight2(name='attention')(att_feature)
    weights_repeat = RepeatVector(hidden_dim * 2)(weights)
    weights_repeat_per = Permute((2, 1))(weights_repeat)
    # mul = merge([lstm_sequence, weights_repeat_per], mode='mul')
    mul = multiply([lstm_sequence, weights_repeat_per])

    sumpool = Lambda(lambda x: K.sum(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
    sumpool.supports_masking = True
    att_out = sumpool(mul)

    lastout = Lambda(slice, output_shape=lambda x: (x[0], x[2]), arguments={'index': -1})
    lstm_last_forward = lastout(lstm_forward)
    lstm_last_backward = lastout(lstm_backword)
    # lstm_last = merge([lstm_last_forward, lstm_last_backward], mode='concat')
    lstm_last = concatenate([lstm_last_forward, lstm_last_backward], axis=-1)

    att_out = Dense(hidden_dim * 2)(att_out)
    lstm_last = Dense(hidden_dim * 2)(lstm_last)

    # out = merge([att_out, lstm_last], mode='sum')
    out = add([att_out, lstm_last])
    out = Dropout(0.5)(out)
    out = Activation(activation='tanh')(out)

    preds = Dense(num_label, activation='softmax')(out)
    preds_binary = Dense(2, activation='softmax')(out)
    '''
    test 1
    '''
    preds = kcl.CRF(num_label, sparse_target=True)(out)
    preds_binary = kcl.CRF(2, sparse_target=True)(out)

    '''

如果还不够,我会给你更多。