我正在尝试在我拥有的二进制数据集上训练神经网络。精度从0.064开始,到第10个时期变为0.08,然后在第60个时期收敛到0。任何人都可以就我在实施中遇到的问题提出一些建议吗?
import tensorflow as tf
import numpy as np
import math
'''
input -> weight -> hidden layer 1 (activation funtion) -> weights -> hidden layer 2 -> ... -> weights -> output layer
^feed-forward
compare output to intended output = cost function (cross entropy)
optimization function (optimizer) -> minimize cost (AdamOptimizer, AdaGrad, and others)
^backpropogation
feed-forward + backpropogation = epoch (1 cycle)
'''
def nnModel(data, keep_prob):
hidden1 = {'weights':tf.Variable(tf.random_normal([len(trainX[0]),hl1Nodes])),
'biases':tf.Variable(tf.random_normal([hl1Nodes]))}
hidden2 = {'weights':tf.Variable(tf.random_normal([hl1Nodes,hl2Nodes])),
'biases':tf.Variable(tf.random_normal([hl2Nodes]))}
hidden3 = {'weights':tf.Variable(tf.random_normal([hl2Nodes,hl3Nodes])),
'biases':tf.Variable(tf.random_normal([hl3Nodes]))}
hidden4 = {'weights':tf.Variable(tf.random_normal([hl3Nodes,hl4Nodes])),
'biases':tf.Variable(tf.random_normal([hl4Nodes]))}
output = {'weights':tf.Variable(tf.random_normal([hl4Nodes,numClasses])),
'biases':tf.Variable(tf.random_normal([numClasses]))}
#(inputData * weight) + bias
l1 = tf.add(tf.matmul(data,hidden1['weights']),hidden1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden2['weights']),hidden2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2,hidden3['weights']),hidden3['biases'])
l3 = tf.nn.relu(l3)
l4 = tf.add(tf.matmul(l3,hidden4['weights']),hidden4['biases'])
l4 = tf.nn.relu(l4)
drop_out = tf.nn.dropout(l4, keep_prob)
theOutput = tf.matmul(drop_out, output['weights']) + output['biases']
return theOutput
def shuffle(a, b):
assert len(a) == len(b)
p = np.random.permutation(len(a))
return a[p], b[p]
def nnTrain(inputData, keep_prob, x1, y1, x2, y2):
prediction = nnModel(inputData, keep_prob)
print("8787****l",prediction)
print(y1)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
optimizer = tf.train.AdagradOptimizer(.0008).minimize(cost)
numEpochs = 200
numRecords = x1.shape[0]
great20 = False
indexMult = math.floor(numRecords/10)
interval = math.floor(numEpochs/10)
trainToTestIndices = []
for i in range(10):
trainToTestIndices.append(i*indexMult)
trainToTestIndices.append(numRecords-1)
print(trainToTestIndices)
testX = x2 #[trainToTestIndices[0]:trainToTestIndices[1]][:]
testY = y2 #[trainToTestIndices[0]:trainToTestIndices[1]][:]
trainX = x1 #[trainToTestIndices[1]:][:]
trainY = y1 #[trainToTestIndices[1]:][:]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
cntCrosses = 0
for epoch in range(numEpochs):
print("8787****l",prediction)
# k = 0
# while k < epoch:
#
# if epoch > 0 and epoch % 20 == 0:
# cntCrosses += 1
#
# testX = np.zeros((trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses], x1.shape[1]))
# testY = np.zeros((trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses], y1.shape[1]))
# trainX = np.zeros(((trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]+trainToTestIndices[-1]-trainToTestIndices[cntCrosses+1]+1), x1.shape[1]))
# trainY = np.zeros(((trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]+trainToTestIndices[-1]-trainToTestIndices[cntCrosses+1]+1), y1.shape[1]))
#
# testX[:(trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses])][:] = x1[trainToTestIndices[cntCrosses]:trainToTestIndices[cntCrosses+1]][:]
# testY[:(trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses])][:] = y1[trainToTestIndices[cntCrosses]:trainToTestIndices[cntCrosses+1]][:]
# trainX[:(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1])][:] = x1[trainToTestIndices[cntCrosses-1]:trainToTestIndices[cntCrosses]][:]
# trainY[:(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1])][:] = y1[trainToTestIndices[cntCrosses-1]:trainToTestIndices[cntCrosses]][:]
# trainX[(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]):][:] = x1[trainToTestIndices[cntCrosses+1]:][:]
# trainY[(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]):][:] = y1[trainToTestIndices[cntCrosses+1]:][:]
#
# great20 = True
epochCost = 0
# trainX2, trainY2 = shuffle(trainX, trainY)
# testX2, testY2 = shuffle(testX, testY)
i=0
while i < trainX.shape[0]:
start = i
end = i+numBatches
# if not great20:
batchX=np.array(trainX[start:end])
batchY=np.array(trainY[start:end])
# if great20:
# batchX=np.array(trainX2[start:end])
# batchY=np.array(trainY2[start:end])
_, c = sess.run([optimizer, cost], feed_dict = {x: batchX, y: batchY, keep_prob:0.50})
epochCost += c
i += numBatches
print('Epoch', epoch, 'completed out of', numEpochs, 'loss', epochCost)
correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
# if great20:
print('Accuracy', accuracy.eval({x:testX, y:testY, keep_prob : 1.0}))
# correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
# accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
# print('Accuracy', accuracy.eval({x:testX, y:testY}))
if __name__ == '__main__':
keep_prob = tf.placeholder(tf.float32)
trainX_ = np.loadtxt("newBTRAIN.csv", delimiter=',')
global trainX
global trainY
trainX = trainX_[125:,:]
trainY_ = np.loadtxt("yT.csv", delimiter=',')
trainY = trainY_[125:,:]
testX = trainX_[:125, :]
testY = trainY_[:125, :]
hl1Nodes = 500
hl2Nodes = 500
hl3Nodes = 500
hl4Nodes = 500
numClasses = trainY.shape[1]
print(numClasses)
numBatches = 100
x = tf.placeholder('float', [None, len(trainX[0])])
y = tf.placeholder('float')
nnTrain(x, keep_prob, trainX, trainY, testX, testY)