我正在尝试使用CNN-LSTM时间序列分类器预测运输方向(目前为二进制)。由于数据只能追溯到2010年,因此观察结果有限,因此无法收集更多数据。原始数据由一个二进制Y和68个X变量组成,这些变量进一步扩展为包括相同变量的50个滞后时间点,这些滞后会随着时间的推移而变化,从而给出X的维数[:,50,68]。
问题是当我拟合模型时,它迅速收敛为仅预测一个类(是的,数据不平衡65/35)。当尝试仅使用1层LSTM或简单的CNN时,它完全适合,而更复杂的模型仅返回一个类。任何建议(我尝试了不同的学习率,并增加了辍学率),请参见代码:
def to_supervised():# array[obs,lag,features] every y is denoted by todays and 30 last day values
dataset = read_csv('alldataDests.csv', header=0, infer_datetime_format=True, parse_dates=['0'], index_col=['0'])
lag,endTrain,startTest = 50,2000,2001
mean = dataset.iloc[0:endTrain, 1:].mean(axis=0)
dataset.iloc[:,1:]-=mean
std = dataset.iloc[0:endTrain,1:].std(axis=0)
dataset.iloc[:,1:] /= std
x = dataset.values[:,1:]
y = dataset.values[:,0]
#train, test = dataset.values[0:endTrain], dataset.values[startTest:]
# restructure into windows of lagged data
xLag = array([x[i:i+lag] for i in range(len(x)-lag+1)])
trainY = to_categorical(y[lag-1:endTrain])
trainX=xLag[:endTrain-(lag-1),:,:]
testY = to_categorical(y[startTest:])
testX = xLag[startTest-(lag-1):,:,:]
testX = testX[:1316,:,:]
testY = testY[:1316]
return trainX,trainY,testX,testY
def evaluate_model(trainX, trainY, testX, testy):
verbose, epochs, batch_size = 1, 500, 128
n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainY.shape[1]
model = Sequential()
model.add(Conv1D(32, 2, activation='relu', input_shape=(n_timesteps,n_features),
kernel_regularizer = l2(0.02),
bias_regularizer=l2(0.02)))
model.add(Conv1D(32, 2, activation='relu',kernel_regularizer = l2(0.02),
bias_regularizer=l2(0.02)))
model.add(Dropout(0.5))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=0.00001), metrics=['accuracy'])
# fit network
model.fit(trainX, trainY, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
preds = model.predict_classes(testX, batch_size=batch_size, verbose=0)
preds_proba = model.predict_proba(testX, batch_size=batch_size, verbose=0)
return accuracy, preds,preds_proba
# summarize scores
def summarize_results(scores):
print(scores)
m, s = mean(scores), std(scores)
print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))
# run an experiment
def run_experiment(repeats=1):
# load data
trainX, trainY, testX, testY = to_supervised()
# repeat experiment
scores = list()
for r in range(repeats):
score, preds, preds_proba = evaluate_model(trainX, trainY, testX, testY)
score = score * 100.0
print('>#%d: %.3f' % (r+1, score))
scores.append(score)
# summarize results
summarize_results(scores)
return preds, preds_proba
preds, preds_proba= run_experiment()```