如何利用数据集csv进行深度稀疏去噪自动编码器的故障检测

时间:2018-04-04 03:45:28

标签: python deep-learning autoencoder

目前我正在尝试为数据集csv中的故障检测构建深度稀疏去噪自动编码器  我使用https://blog.keras.io/building-autoencoders-in-keras.html将其应用于数值数据集中。 我的数据集是:train

和我的代码:

    import numpy as np
from keras.layers import Input, Dense
from keras.models import Model
from keras.datasets import mnist
import matplotlib.pyplot as plt
from keras import regularizers
import csv as csv
x_train=[]
x_test=[]         #Array Definition
path1 =  r'D:\train.csv'     #Address Definition
path2 =  r'D:\train.csv'
with open(path1, 'r') as f1:    #Open File as read by 'r'
    reader = csv.reader(f1)     
    next(reader, None)          #Skip header because file header is not needed
    for row in reader:          #fill array by file info by for loop
        x_train.append(row)
    x_train = np.array(x_train)         

with open(path2, 'r') as f2:
    reader2 = csv.reader(f2)
    next(reader2, None)  
    for row2 in  reader2:
        x_test.append(row2)
    x_test = np.array(x_test)

x_train = np.delete(x_train,[0],1)  #delete first column of which is patientid
x_test = np.delete(x_test,[0],1)



x_train = np.delete(x_train,[0],1) #Delete name column because it is not needed
x_test = np.delete(x_test,[0],1)

x_train = np.delete(x_train,[12],1) #Delete name column because it is not needed
x_test = np.delete(x_test,[12],1)

# this is our input placeholder
input_img = Input(shape=(12,))
# "encoded" is the encoded representation of the input
encoded = Dense(64, activation='relu',activity_regularizer=regularizers.l1(10e-6))(input_img)
encoded = Dense(128, activation='relu')(encoded)

# "decoded" is the lossy reconstruction of the input
decoded = Dense(128, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(decoded)
decoded = Dense(12, activation='sigmoid')(decoded)
# this model maps an input to its reconstruction
autoencoder = Model(input_img, decoded)
# configure our model to use a per-pixel binary crossentropy loss, and the Adadelta optimizer:
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

# prepare our input data. We're using MNIST digits, and we're discarding the labels
#(x_train, _), (x_test, _) = mnist.load_data()
# normalize all values between 0 and 1 and we will flatten the 28x28 images into vectors of size 784.
x_train = x_train.astype('float32')/255.
x_test = x_test.astype('float32')/255.
#x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
#x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
noise_factor = 0.4
x_train_noisy = x_train + noise_factor * np.random.normal(size=x_train.shape) 
x_test_noisy = x_test + noise_factor * np.random.normal(size=x_test.shape)

x_train_noisy = np.clip(x_train_noisy, 0.0, 1.0)
x_test_noisy = np.clip(x_test_noisy, 0.0, 1.0)
print (x_train.shape)
print (x_test.shape)

# train our autoencoder for 50 epochs
print ("Training ....")
autoencoder.fit(x_train_noisy, x_train,epochs=10,batch_size=256,
                shuffle=True,
               )


# save output
path3 =  r'D:\result.csv'

output = autoencoder.predict(x_test_noisy)
with open(path3, 'w',  newline='') as f3, open(path2, 'r') as f4: # write output and otherr column from test
    forest_Csv = csv.writer(f3)
    forest_Csv.writerow(["Index","MachineID","Running","Started","Evict","Finish","Kill","Lost","Cpu","Memory","Disk time","CPI","MAI","CONCLUS","fault detection"])    
    test_file_object = csv.reader(f4)
    next(test_file_object, None)
    i = 0
    for row in  test_file_object:
        row.insert(15,output[i].astype(np.uint8))
        forest_Csv.writerow(row)
        i += 1

结果: 返回一个12位数的序列。 我不知道我哪里做错了,我不知道我是否正确。 谁能帮我? 非常感谢你 link train.csv和test.csv: https://drive.google.com/open?id=1DtXr5RnF8I3FDGN5DnIboO50K_iNughU

0 个答案:

没有答案