我收到错误:
ValueError:输入数组应与目标数组具有相同数量的样本。找到6个输入样本和128个目标样本。
与keras一起训练时。 我正在使用生成器在我的时间序列上生成一个移动窗口,如下所示:
def generator_val(X,y,number_of_steps, batch_size=128, length=300, overview_steps = 300, shuffle=True, prediction = False):
while 1:
machine_idcs = np.concatenate(
[np.repeat(i, len(np.arange(length, Xi.shape[0], overview_steps))) for i, Xi in enumerate(X)])
# Generate all indicies for all possible steps.
step_idcs = np.concatenate(
[np.arange(length, Xi.shape[0], overview_steps) for Xi in X])
# We create a matrix of indices from which we sample for the mini
# batches.
examples = np.zeros((len(step_idcs), 2), dtype=np.int32)
examples[:, 0] = machine_idcs
examples[:, 1] = step_idcs
for i in range(0, examples.shape[0], batch_size):
# Get the machine and step indices of the mini-batch.
mbatch = examples[i:i + batch_size]
# Preinitialize the mini batch.
sequence = np.zeros(
(len(mbatch), length, X[0].shape[1]), np.float32)
mini_batch_y = np.zeros((batch_size,), dtype=np.float32)
for j in range(mbatch.shape[0]):
machine_idx = mbatch[j, 0]
step_idx = mbatch[j, 1]
sequence[j] = X[machine_idx][step_idx - length: step_idx]
mini_batch_y[j] = y[machine_idx][step_idx]
mini_batch_X = sequence
yield mini_batch_X, mini_batch_y
要开始训练,我使用的是model.fit_generator:
model.fit_generator(generator(X, y, number_of_steps= number_of_steps,batch_size=128, length=300),
validation_data = generator(X_val, y_val, number_of_steps= number_of_steps_val,batch_size=128, length=300),
validation_steps = number_of_steps_val,
samples_per_epoch= number_of_steps,
epochs=2)
如果发生器没有使用无限循环,或者在一批中无法重置? 有可能在每个纪元后重置发电机吗?
更新
###sample data
test_X = np.random.rand(len(X),10037, 24).astype(np.float32)
test_Y = np.random.randint(0,2,(len(X),10037)).astype(np.float32)
val_X = np.random.rand(len(X_val), 10037,24).astype(np.float32)
val_Y = np.random.randint(0,1,(len(X_val),10037)).astype(np.float32)
X = [item for item in test_X]
Y = [item for item in test_Y]
X_val = [item for item in val_X]
Y_val = [item for item in val_Y]
解决方法
我找到了这个错误的解决方案,但是我对它并不满意,因为它抛弃了一些最后的序列。解决方案是,有多个序列可以通过批量大小划分数据集。 代码如下:
window_steps = 50
number_of_samples = sum([X_[i].shape[0] for i in range(len(X_))])-len(X_)*300
number_of_steps = int(number_of_samples/128/window_steps)
number_of_samples_val = sum([X_val_[i].shape[0] for i in range(len(X_val_))])-len(X_val_)*300
number_of_steps_val = int(number_of_samples_val/128/window_steps)
def generator_val(X,y, number_of_steps, window_steps = window_steps, batch_size=128, length=300, overview_steps = 300, shuffle=True, prediction = False):
while 1:
machine_idcs = np.concatenate(
[np.repeat(i, len(np.arange(length, Xi.shape[0], window_steps))) for i, Xi in enumerate(X)])
# Generate all indicies for all possible steps.
step_idcs = np.concatenate(
[np.arange(length, Xi.shape[0], window_steps) for Xi in X])
# We create a matrix of indices from which we sample for the mini
# batches.
examples = np.zeros((number_of_steps*batch_size, 2), dtype=np.int32)
examples[:, 0] = machine_idcs[:number_of_steps*batch_size]
examples[:, 1] = step_idcs[:number_of_steps*batch_size]
for i in range(0, examples.shape[0], batch_size):
# Get the machine and step indices of the mini-batch.
mbatch = examples[i:i + batch_size]
# Preinitialize the mini batch.
sequence = np.zeros(
(len(mbatch), length, X[0].shape[1]), np.float32)
mini_batch_y = np.zeros((batch_size,), dtype=np.float32)
for j in range(mbatch.shape[0]):
machine_idx = mbatch[j, 0]
step_idx = mbatch[j, 1]
sequence[j] = X[machine_idx][step_idx - length: step_idx]
mini_batch_y[j] = y[machine_idx][step_idx]
mini_batch_X = sequence
yield mini_batch_X, mini_batch_y
model.fit_generator(generator_val(X, Y, number_of_steps= number_of_steps, window_steps = window_steps, batch_size=128, length=300),
validation_data = generator_val(X_val,Y_val, number_of_steps= number_of_steps_val,window_steps = window_steps,batch_size=128, length=300),
validation_steps = number_of_steps_val,
samples_per_epoch= number_of_steps,
epochs=2)
以下是一个示例网络:
input1 = Input(shape=(sequence_length, num_features))
h1 = LSTM(50)(input1)
prediction = Dense(1)(h1)
model = Model(inputs=[input1], outputs=[prediction])
loss = "binary_crossentropy"
optimizer = "adam"
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
那么,你知道怎么使用所有的序列而不抛弃一些吗?