Question

我收到错误：

ValueError：输入数组应与目标数组具有相同数量的样本。找到6个输入样本和128个目标样本。

与keras一起训练时。我正在使用生成器在我的时间序列上生成一个移动窗口，如下所示：

def generator_val(X,y,number_of_steps, batch_size=128, length=300, overview_steps = 300, shuffle=True, prediction = False):
    while 1:  
        machine_idcs = np.concatenate(
        [np.repeat(i, len(np.arange(length, Xi.shape[0], overview_steps))) for i, Xi in enumerate(X)])
        # Generate all indicies for all possible steps.
        step_idcs = np.concatenate(
        [np.arange(length, Xi.shape[0], overview_steps) for Xi in X])

        # We create a matrix of indices from which we sample for the mini
        # batches.
        examples = np.zeros((len(step_idcs), 2), dtype=np.int32)
        examples[:, 0] = machine_idcs
        examples[:, 1] = step_idcs

        for i in range(0, examples.shape[0], batch_size):
            # Get the machine and step indices of the mini-batch.
            mbatch = examples[i:i + batch_size]

            # Preinitialize the mini batch.
            sequence = np.zeros(
                (len(mbatch), length, X[0].shape[1]), np.float32)
            mini_batch_y = np.zeros((batch_size,), dtype=np.float32) 

            for j in range(mbatch.shape[0]):
                machine_idx = mbatch[j, 0]
                step_idx = mbatch[j, 1]
                sequence[j] = X[machine_idx][step_idx - length: step_idx]
                mini_batch_y[j] = y[machine_idx][step_idx]
            mini_batch_X = sequence  
            yield mini_batch_X, mini_batch_y

要开始训练，我使用的是model.fit_generator：

model.fit_generator(generator(X, y, number_of_steps= number_of_steps,batch_size=128, length=300),
                    validation_data = generator(X_val, y_val,   number_of_steps= number_of_steps_val,batch_size=128, length=300),
                    validation_steps = number_of_steps_val, 
                    samples_per_epoch= number_of_steps, 
                    epochs=2)

如果发生器没有使用无限循环，或者在一批中无法重置？有可能在每个纪元后重置发电机吗？

更新

###sample data
test_X = np.random.rand(len(X),10037, 24).astype(np.float32)
test_Y = np.random.randint(0,2,(len(X),10037)).astype(np.float32)
val_X = np.random.rand(len(X_val), 10037,24).astype(np.float32)
val_Y = np.random.randint(0,1,(len(X_val),10037)).astype(np.float32)

X = [item for item in test_X]
Y = [item for item in test_Y]
X_val = [item for item in val_X]
Y_val = [item for item in val_Y]

解决方法

我找到了这个错误的解决方案，但是我对它并不满意，因为它抛弃了一些最后的序列。解决方案是，有多个序列可以通过批量大小划分数据集。代码如下：

window_steps = 50
number_of_samples = sum([X_[i].shape[0] for i in range(len(X_))])-len(X_)*300
number_of_steps = int(number_of_samples/128/window_steps)

number_of_samples_val = sum([X_val_[i].shape[0] for i in range(len(X_val_))])-len(X_val_)*300
number_of_steps_val = int(number_of_samples_val/128/window_steps)   

def generator_val(X,y, number_of_steps, window_steps = window_steps, batch_size=128, length=300, overview_steps = 300, shuffle=True, prediction = False):
    while 1:  
        machine_idcs = np.concatenate(
        [np.repeat(i, len(np.arange(length, Xi.shape[0], window_steps))) for i, Xi in enumerate(X)])
        # Generate all indicies for all possible steps.
        step_idcs = np.concatenate(
        [np.arange(length, Xi.shape[0], window_steps) for Xi in X])

        # We create a matrix of indices from which we sample for the mini
        # batches.
        examples = np.zeros((number_of_steps*batch_size, 2), dtype=np.int32)
        examples[:, 0] = machine_idcs[:number_of_steps*batch_size]
        examples[:, 1] = step_idcs[:number_of_steps*batch_size]

        for i in range(0, examples.shape[0], batch_size):
            # Get the machine and step indices of the mini-batch.
            mbatch = examples[i:i + batch_size]

            # Preinitialize the mini batch.
            sequence = np.zeros(
                (len(mbatch), length, X[0].shape[1]), np.float32)
            mini_batch_y = np.zeros((batch_size,), dtype=np.float32) 

            for j in range(mbatch.shape[0]):
                machine_idx = mbatch[j, 0]
                step_idx = mbatch[j, 1]
                sequence[j] = X[machine_idx][step_idx - length: step_idx]
                mini_batch_y[j] = y[machine_idx][step_idx]
            mini_batch_X = sequence  
            yield mini_batch_X, mini_batch_y

model.fit_generator(generator_val(X, Y, number_of_steps= number_of_steps, window_steps = window_steps, batch_size=128, length=300),
                    validation_data = generator_val(X_val,Y_val,  number_of_steps= number_of_steps_val,window_steps = window_steps,batch_size=128, length=300),
                    validation_steps = number_of_steps_val, 
                    samples_per_epoch= number_of_steps, 
                    epochs=2)

以下是一个示例网络：

input1 = Input(shape=(sequence_length, num_features))
h1 = LSTM(50)(input1)
prediction = Dense(1)(h1)
model = Model(inputs=[input1], outputs=[prediction])
loss = "binary_crossentropy"
optimizer = "adam"   
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])

那么，你知道怎么使用所有的序列而不抛弃一些吗？

获得ValueError：输入！=使用model.fit_generator时的目标

0 个答案: