TensorFlow:为什么我的准确度会收敛到0?

时间:2017-03-26 13:11:29

标签: python machine-learning tensorflow neural-network deep-learning

我正在尝试在我拥有的二进制数据集上训练神经网络。精度从0.064开始,到第10个时期变为0.08,然后在第60个时期收敛到0。任何人都可以就我在实施中遇到的问题提出一些建议吗?

import tensorflow as tf
import numpy as np
import math

'''
input -> weight -> hidden layer 1 (activation funtion) -> weights -> hidden layer 2 -> ... -> weights -> output layer
^feed-forward

compare output to intended output = cost function (cross entropy)

optimization function (optimizer) -> minimize cost (AdamOptimizer, AdaGrad, and others)
^backpropogation

feed-forward + backpropogation = epoch (1 cycle)
'''


def nnModel(data, keep_prob):

    hidden1 = {'weights':tf.Variable(tf.random_normal([len(trainX[0]),hl1Nodes])),
                'biases':tf.Variable(tf.random_normal([hl1Nodes]))}
    hidden2 = {'weights':tf.Variable(tf.random_normal([hl1Nodes,hl2Nodes])),
                'biases':tf.Variable(tf.random_normal([hl2Nodes]))}
    hidden3 = {'weights':tf.Variable(tf.random_normal([hl2Nodes,hl3Nodes])),
                'biases':tf.Variable(tf.random_normal([hl3Nodes]))}
    hidden4 = {'weights':tf.Variable(tf.random_normal([hl3Nodes,hl4Nodes])),
                'biases':tf.Variable(tf.random_normal([hl4Nodes]))}
    output = {'weights':tf.Variable(tf.random_normal([hl4Nodes,numClasses])),
                'biases':tf.Variable(tf.random_normal([numClasses]))}
    #(inputData * weight) + bias

    l1 = tf.add(tf.matmul(data,hidden1['weights']),hidden1['biases'])
    l1 = tf.nn.relu(l1)

    l2 = tf.add(tf.matmul(l1,hidden2['weights']),hidden2['biases'])
    l2 = tf.nn.relu(l2)

    l3 = tf.add(tf.matmul(l2,hidden3['weights']),hidden3['biases'])
    l3 = tf.nn.relu(l3)

    l4 = tf.add(tf.matmul(l3,hidden4['weights']),hidden4['biases'])
    l4 = tf.nn.relu(l4)

    drop_out = tf.nn.dropout(l4, keep_prob)

    theOutput = tf.matmul(drop_out, output['weights']) + output['biases']

    return theOutput

def shuffle(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

def nnTrain(inputData, keep_prob, x1, y1, x2, y2):
    prediction = nnModel(inputData, keep_prob)
    print("8787****l",prediction)
    print(y1)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
    optimizer = tf.train.AdagradOptimizer(.0008).minimize(cost)
    numEpochs = 200
    numRecords = x1.shape[0]
    great20 = False
    indexMult = math.floor(numRecords/10)
    interval = math.floor(numEpochs/10)

    trainToTestIndices = []
    for i in range(10):
        trainToTestIndices.append(i*indexMult)
    trainToTestIndices.append(numRecords-1)
    print(trainToTestIndices)
    testX = x2 #[trainToTestIndices[0]:trainToTestIndices[1]][:]
    testY = y2 #[trainToTestIndices[0]:trainToTestIndices[1]][:]
    trainX = x1 #[trainToTestIndices[1]:][:]
    trainY = y1 #[trainToTestIndices[1]:][:]
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        cntCrosses = 0
        for epoch in range(numEpochs):
            print("8787****l",prediction)
            # k = 0
            # while k < epoch:
            #
            # if epoch > 0 and epoch % 20 == 0:
            #     cntCrosses += 1
            #
            #     testX = np.zeros((trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses], x1.shape[1]))
            #     testY = np.zeros((trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses], y1.shape[1]))
            #     trainX = np.zeros(((trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]+trainToTestIndices[-1]-trainToTestIndices[cntCrosses+1]+1), x1.shape[1]))
            #     trainY = np.zeros(((trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]+trainToTestIndices[-1]-trainToTestIndices[cntCrosses+1]+1), y1.shape[1]))
            #
            #     testX[:(trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses])][:] = x1[trainToTestIndices[cntCrosses]:trainToTestIndices[cntCrosses+1]][:]
            #     testY[:(trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses])][:] = y1[trainToTestIndices[cntCrosses]:trainToTestIndices[cntCrosses+1]][:]
            #     trainX[:(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1])][:] = x1[trainToTestIndices[cntCrosses-1]:trainToTestIndices[cntCrosses]][:]
            #     trainY[:(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1])][:] = y1[trainToTestIndices[cntCrosses-1]:trainToTestIndices[cntCrosses]][:]
            #     trainX[(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]):][:] = x1[trainToTestIndices[cntCrosses+1]:][:]
            #     trainY[(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]):][:] = y1[trainToTestIndices[cntCrosses+1]:][:]
            #
            #     great20 = True
            epochCost = 0

            # trainX2, trainY2 = shuffle(trainX, trainY)
            # testX2, testY2 = shuffle(testX, testY)
            i=0
            while i < trainX.shape[0]:
                start = i
                end = i+numBatches
                # if not great20:
                batchX=np.array(trainX[start:end])
                batchY=np.array(trainY[start:end])
                # if great20:
                #     batchX=np.array(trainX2[start:end])
                #     batchY=np.array(trainY2[start:end])
                _, c = sess.run([optimizer, cost], feed_dict = {x: batchX, y: batchY, keep_prob:0.50})
                epochCost += c
                i += numBatches
            print('Epoch', epoch, 'completed out of', numEpochs, 'loss', epochCost)
            correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
            accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
            # if great20:
            print('Accuracy', accuracy.eval({x:testX, y:testY, keep_prob : 1.0}))
        # correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
        # accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
        # print('Accuracy', accuracy.eval({x:testX, y:testY}))

if __name__ == '__main__':
    keep_prob = tf.placeholder(tf.float32)
    trainX_ = np.loadtxt("newBTRAIN.csv", delimiter=',')
    global trainX
    global trainY
    trainX = trainX_[125:,:]
    trainY_ = np.loadtxt("yT.csv", delimiter=',')
    trainY = trainY_[125:,:]
    testX = trainX_[:125, :]
    testY = trainY_[:125, :]
    hl1Nodes = 500
    hl2Nodes = 500
    hl3Nodes = 500
    hl4Nodes = 500

    numClasses = trainY.shape[1]
    print(numClasses)
    numBatches = 100

    x = tf.placeholder('float', [None, len(trainX[0])])
    y = tf.placeholder('float')
    nnTrain(x, keep_prob, trainX, trainY, testX, testY)

0 个答案:

没有答案