我的数据集是这样的:每个数据点包含7个不同长度的特征(A-G)。
Group1 Group2............ Group 38
A B F
E C A
B E G
C D G
C F F
D G G
. . .
. . .
我应用LSTM来基于先前字母预测下一个字母的准确性。为了在模型中应用,我将它们编码为一个热矢量,并计算了38 * 7 * sequece_length的张量(我从另一个文件将其作为动作导入)
import keras
from keras.layers import Activation, LSTM,Input, Dense,Concatenate,Dropout
from data_loader import load_data, load_data_coded
from keras.models import Model, optimizers
from keras import backend as K
from keras.preprocessing.sequence import pad_sequences
import sys
import numpy as np
import math
from sklearn.metrics import confusion_matrix, roc_curve,auc
#data import
action_count,actions,cluster= load_data_coded(Static_data)
#padding 0 for uneven sequence lengths
Transposed=[]
for i in range(len(actions)):
Transposed.append(actions[i].transpose())
padded=[pad_sequences(i,maxlen=620,padding='post') for i in Transposed]
f_actions=[]
for i in range(len(actions)):
f_actions.append(padded[i].transpose())
#Model Building
main_input= Input(shape=(None,action_count),name='main_input')
lstm_out= LSTM(units=64,activation='tanh')(main_input)
lstm_out=Dropout(0.2)(lstm_out)
lstm_out=Dense(action_count)(lstm_out)
main_output=Activation('softmax')(lstm_out)
model=Model(inputs=[main_input],outputs=main_output)
print(model.summary())
#model_define
lr=[.01]
epochs=12
sgd=optimizers.SGD(lr=lr,momentum=0.9,nesterov=True)
model.compile(optimizer=sgd,loss='categorical_crossentropy')
#Training _Testing
seq_count=len(actions)
LOOCV=seq_count
Fold=4
fold_size=int(math.ceil(float(seq_count)/Fold))
train_accs=[]
test_accs=[]
for i in range(Fold):
print('\nFold {}'.format(i+1))
start = i * fold_size
end = start + fold_size if i + 1 < Fold else seq_count
x_train = [x for j, x in enumerate(f_actions) if j < start or j >= end]
x_test = f_actions[start:end]
max_length = np.amax([len(x) for x in x_train])
indices = np.arange(1, max_length)
def calc_acc(series,skip=0):
loss = 0
count = 0
max_len = np.amax([len(x) for x in series])
for k in range(1, max_len):
feat = np.array([x[0:k] for x in series if len(x) > k])
lab = np.array([x[k] for x in series if len(x) > k])
pred = np.argmax(model.predict([feat]), axis=1)
actual = np.argmax(lab, axis=1)
count += len(pred)
loss += sum([x != y for x, y in zip(pred, actual)])
if count == 0:
return 1.0
else:
return 1. - loss / float(count)
train_acc = calc_acc(x_train)
test_acc = calc_acc(x_test)
print('train_acc={}, test_acc={}'.format(train_acc, test_acc))
got_nan = False
tr_acc=[]
te_acc=[]
for epoch in range(epochs):
np.random.shuffle(indices)
for index, k in enumerate(indices):
feat = np.array([x[0:k] for x in x_train if len(x) > k])
lab = np.array([x[k] for x in x_train if len(x) > k])
h = model.fit(x=[feat], y=lab, verbose=False)
if math.isnan(h.history['loss'][0]):
print('\nWARNING: NaN occurred! Treating as an accuracy of 0.')
got_nan = True
break
print('\repoch {} / {}, batch {} / {}'.format(epoch+1, epochs, index+1, len(indices)), end='')
sys.stdout.flush()
if got_nan:
got_nan = False
train_acc = 0
test_acc = 0
break
train_acc = calc_acc(x_train)
test_acc = calc_acc(x_test)
tr_acc.append(train_acc)
te_acc.append(test_acc)
print('\ntrain_acc={}, test_acc={}'.format(train_acc, test_acc))
train_accs.append(train_acc)
test_accs.append(test_acc)
print('\nAverage: train_acc={}, test_acc={}'.format(np.average(train_accs),
np.average(test_accs)))
我想为此模型计算ROC曲线的AUC。由于不是二进制分类,如何获得此模型的ROC曲线?我在代码中看到了一些实现源。但是我无法真正弄清楚如何将其用于模型。 https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html#multiclass-settings 任何想法或建议将不胜感激。如果您需要更多详细信息,请问我,我将提供更多详细信息解释。