目前我正在尝试为数据集csv中的故障检测构建深度稀疏去噪自动编码器 我使用https://blog.keras.io/building-autoencoders-in-keras.html将其应用于数值数据集中。 我的数据集是:train
和我的代码:
import numpy as np
from keras.layers import Input, Dense
from keras.models import Model
from keras.datasets import mnist
import matplotlib.pyplot as plt
from keras import regularizers
import csv as csv
x_train=[]
x_test=[] #Array Definition
path1 = r'D:\train.csv' #Address Definition
path2 = r'D:\train.csv'
with open(path1, 'r') as f1: #Open File as read by 'r'
reader = csv.reader(f1)
next(reader, None) #Skip header because file header is not needed
for row in reader: #fill array by file info by for loop
x_train.append(row)
x_train = np.array(x_train)
with open(path2, 'r') as f2:
reader2 = csv.reader(f2)
next(reader2, None)
for row2 in reader2:
x_test.append(row2)
x_test = np.array(x_test)
x_train = np.delete(x_train,[0],1) #delete first column of which is patientid
x_test = np.delete(x_test,[0],1)
x_train = np.delete(x_train,[0],1) #Delete name column because it is not needed
x_test = np.delete(x_test,[0],1)
x_train = np.delete(x_train,[12],1) #Delete name column because it is not needed
x_test = np.delete(x_test,[12],1)
# this is our input placeholder
input_img = Input(shape=(12,))
# "encoded" is the encoded representation of the input
encoded = Dense(64, activation='relu',activity_regularizer=regularizers.l1(10e-6))(input_img)
encoded = Dense(128, activation='relu')(encoded)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(128, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(decoded)
decoded = Dense(12, activation='sigmoid')(decoded)
# this model maps an input to its reconstruction
autoencoder = Model(input_img, decoded)
# configure our model to use a per-pixel binary crossentropy loss, and the Adadelta optimizer:
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
# prepare our input data. We're using MNIST digits, and we're discarding the labels
#(x_train, _), (x_test, _) = mnist.load_data()
# normalize all values between 0 and 1 and we will flatten the 28x28 images into vectors of size 784.
x_train = x_train.astype('float32')/255.
x_test = x_test.astype('float32')/255.
#x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
#x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
noise_factor = 0.4
x_train_noisy = x_train + noise_factor * np.random.normal(size=x_train.shape)
x_test_noisy = x_test + noise_factor * np.random.normal(size=x_test.shape)
x_train_noisy = np.clip(x_train_noisy, 0.0, 1.0)
x_test_noisy = np.clip(x_test_noisy, 0.0, 1.0)
print (x_train.shape)
print (x_test.shape)
# train our autoencoder for 50 epochs
print ("Training ....")
autoencoder.fit(x_train_noisy, x_train,epochs=10,batch_size=256,
shuffle=True,
)
# save output
path3 = r'D:\result.csv'
output = autoencoder.predict(x_test_noisy)
with open(path3, 'w', newline='') as f3, open(path2, 'r') as f4: # write output and otherr column from test
forest_Csv = csv.writer(f3)
forest_Csv.writerow(["Index","MachineID","Running","Started","Evict","Finish","Kill","Lost","Cpu","Memory","Disk time","CPI","MAI","CONCLUS","fault detection"])
test_file_object = csv.reader(f4)
next(test_file_object, None)
i = 0
for row in test_file_object:
row.insert(15,output[i].astype(np.uint8))
forest_Csv.writerow(row)
i += 1
结果: 返回一个12位数的序列。 我不知道我哪里做错了,我不知道我是否正确。 谁能帮我? 非常感谢你 link train.csv和test.csv: https://drive.google.com/open?id=1DtXr5RnF8I3FDGN5DnIboO50K_iNughU