首先,我将概述我的问题。我有两个设置:
1)基于张量流的网
2)基于Michael Nielsen的书http://neuralnetworksanddeeplearning.com/index.html
的代码的网络两个网都完全相同。他们都有
我的问题是,张量流结果非常糟糕(比使用尼尔森代码得到的结果差10倍)。 所以在我发布完整代码之前:有人知道张量流StochasticGradientDescent算法中有错误吗? (或者有没有人参考如何定义张量流中Stocharstic Gradient Descent的学习率?我在api中找不到任何东西)
这是我的tensorflow网络代码: regression.py
import readData
import matplotlib.pyplot as plt
import numpy as np
from random import randint
import random
from root_numpy import fill_hist
from ROOT import TCanvas, TH2F, TText, TF1 ,TH1D
import ROOT
import tensorflow as tf
import math
# # # # # # ##
#Read in data#
# #
function_outputs=True# apply an invertable function to the y's and train with the modified outputs y_mod! Up to know this function is just a normalization.
function_inputs=True #
full_set = readData.read_data_set("./TH2D_A00_TB10.root","LHCChi2_CMSSM_nObs1061_A00_TB10","full_set",function_inputs,function_outputs)
N_full_set=full_set.get_N()
N_validation_set=10000
N_training_set=N_full_set-(N_validation_set)
full=range(0,N_full_set)
random.shuffle(full)
training_subset=full[:N_training_set]#indices for training set
validation_subset=full[N_training_set:N_training_set+N_validation_set]#indices for validation set
training_set = readData.read_data_set("./TH2D_A00_TB10.root","LHCChi2_CMSSM_nObs1061_A00_TB10","training_set",
function_inputs,function_outputs,full_set=full_set,subset=training_subset)
validation_set = readData.read_data_set("./TH2D_A00_TB10.root","LHCChi2_CMSSM_nObs1061_A00_TB10","validation_set",
function_inputs,function_outputs,full_set=full_set,subset=validation_subset )
#overwiew of full data set, training_data set and validation_data set. The modified members( normalized in this case) can be accessed with the x_mod() and y_mod() member functions
#the normalized data (input and output) will be used to train the net
print "full_data_set:"
print "x (inputs)"
print full_set.get_x()
print "y (outputs)"
print full_set.get_y()
print "x_mod"
print full_set.get_x_mod()
print "y_mod"
print full_set.get_y_mod()
print "------------------"
print "training_data_set:"
print "x (inputs)"
print training_set.get_x()
print "y (outputs)"
print training_set.get_y()
print "x_mod"
print training_set.get_x_mod()
print "y_mod"
print training_set.get_y_mod()
print "------------------"
print "evaluation_data_set:"
print "x (inputs)"
print validation_set.get_x()
print "y (outputs)"
print validation_set.get_y()
print "x_mod"
print validation_set.get_x_mod()
print "y_mod"
print validation_set.get_y_mod()
print "------------------"
# # # # # # # # # # # ##
#setting up the network#
# #
N_epochs = 20
learning_rate = 3.0
batch_size = 10
N1 = 2 #equals N_inputs
N2 = 30
N3 = 30
N4 = 30
N5 = 1
N_in=N1
N_out=N5
#one calculates everything directly for all elements in one batch
"""example: N_in=2,N_out=3, mini_batch_size=5, activation function=linear. In der output matrix gibt es 5Zeilen,jede fuer ein mini batch. Jede Zeile hat 3 Spalten fuer ein output neuron jeweils
W2
[[-0.31917086 -0.03908769 0.5792625 ]
[ 1.34563279 0.03904691 0.39674851]]
b2
[ 0.40960133 -0.5495823 -0.97048181]
x_in
[[ 23.2 12.2 ]
[ 0. 1.1 ]
[ 2.3 3.3 ]
[ 23.22222 24.44444]
[ 333. 444. ]]
y=x_in*W2+b2
[[ 9.42155647 -0.98004436 17.30874062]
[ 1.88979745 -0.50663072 -0.53405845]
[ 4.1160965 -0.51062918 1.67109203]
[ 25.8909874 -0.50280523 22.17957497]
[ 491.5866394 3.77104688 368.08026123]]
hier wird klar, dass b2 auf jede Zeile der Matrix x_in*w2 draufaddiert wird.
W2 ist die transponierte der atrix, die im Buch definiert ist.
"""
x = tf.placeholder(tf.float32,[None,N1])#don't take the shape=(batch_size,N1) argument, because we need this for different batch sizes
W2 = tf.Variable(tf.random_normal([N1, N2],mean=0.0,stddev=1.0/math.sqrt(N1*1.0)))# Initialize the weights for one neuron with 1/sqrt(Number of weights which enter the neuron/ Number of neurons in layer before)
b2 = tf.Variable(tf.random_normal([N2]))
a2 = tf.sigmoid(tf.matmul(x, W2) + b2) #x=a1
W3 = tf.Variable(tf.random_normal([N2, N3],mean=0.0,stddev=1.0/math.sqrt(N2*1.0)))
b3 = tf.Variable(tf.random_normal([N3]))
a3 = tf.sigmoid(tf.matmul(a2, W3) + b3)
W4 = tf.Variable(tf.random_normal([N3, N4],mean=0.0,stddev=1.0/math.sqrt(N3*1.0)))
b4 = tf.Variable(tf.random_normal([N4]))
a4 = tf.sigmoid(tf.matmul(a3, W4) + b4)
W5 = tf.Variable(tf.random_normal([N4, N5],mean=0.0,stddev=1.0/math.sqrt(N4*1.0)))
b5 = tf.Variable(tf.random_normal([N5]))
y = tf.sigmoid(tf.matmul(a4, W5) + b5)
y_ = tf.placeholder(tf.float32,[None,N_out]) # ,shape=(None,N_out)
# # # # # # # # # # # # # #
#initializing and training#
# #
cost_function = tf.scalar_mul(1.0/(N_training_set*2.0),tf.reduce_sum(tf.squared_difference(y,y_)))
error_to_desired_output= y-y_
abs_error_to_desired_output= tf.abs(y-y_)
sum_abs_error_to_desired_output= tf.reduce_sum(tf.abs(y-y_))
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
init = tf.initialize_all_variables()
#launch the graph
sess = tf.Session()
sess.run(init)
N_training_batch=training_set.get_N()/batch_size #rounds to samllest integer
out_mod_validation=[0]*N_epochs # output of net, when inputting x_mod of validation data. Will be saved after each epoch.
error_mod_validation_data= [0]*N_epochs #absolute error on mod validation data after each epoch
diff_mod_validation=[0]*N_epochs # error vector of validation data after each epoch. i.e. y-y_
cost_training_data=[0]*N_epochs
for i in range(0,N_epochs):
for j in range(0,N_training_batch):
batch_xs, batch_ys, epochs_completed = training_set.next_batch(batch_size)#always gives the modified x's and y's. If one does not want to modifie them the function has to be set to identity
sess.run(train_step, feed_dict={x: batch_xs,
y_: batch_ys})
cost_training_data[i]=sess.run(cost_function, feed_dict={
x: training_set.get_x_mod(), y_: training_set.get_y_mod()})
out_mod_validation[i]= sess.run(y, feed_dict={
x: validation_set.get_x_mod()})# output of net, when imputting x_mod of validation data after each training epoch
diff_mod_validation[i]=sess.run(error_to_desired_output, feed_dict={
x: validation_set.get_x_mod(),y_: validation_set.get_y_mod()})
error_mod_validation_data[i]=sess.run(sum_abs_error_to_desired_output, feed_dict={
x: validation_set.get_x_mod(),y_: validation_set.get_y_mod()})
print "epochs completed: "+str(i)
#now calculate everything for the unmodified/unnormalized outputs
out_validation=[0]*N_epochs # output of net, when inputting x_mod of validation data and making the normalization of the output backwards, saved after each epoch
error_validation_data=[0.0]*N_epochs
diff_validation=[0.0]*N_epochs
#make the transformation on the outputs backwards
for i in range(0,N_epochs):
out_validation[i]=np.ndarray(shape=(validation_set.get_N(),1))
for j in range(0,len(out_mod_validation[i])):
out_validation[i][j]=out_mod_validation[i][j]#do this, because otherwise we will produce only a reference
readData.apply_inverse_function_to_outputs(out_mod_validation[i],out_validation[i],full_set.get_y_max())# second argument will be changed!
diff_validation[i]=np.subtract(out_validation[i],validation_set.get_y())
error_validation_data[i]=np.sum(np.absolute(np.subtract(out_validation[i],validation_set.get_y())))
#print at 10 examples how good the output matches the desired output
for i in range(0,10):
print "desired output"
print validation_set.get_y()[i][0]
print "actual output after last training epoch"
print out_validation[-1][i][0]
print "-------"
print "total error on validation_data set after last training"
print error_validation_data[-1]
# # # # ##
#printing#
# #
plt.figure(1)
plt.title("Costfunction of (modified) Training-data")
plt.xlabel("epochs")
plt.ylabel("cost function")
x_range=[x+1 for x in range(0,N_epochs)]
plt.plot(x_range,cost_training_data)
plt.savefig("cost_on_training_data.png")
plt.figure(2)
plt.title("f data")
plt.xlabel("epochs")
plt.ylabel("total error on validation data")
x_range=[x+1 for x in range(0,N_epochs)]
plt.plot(x_range,error_validation_data)
plt.savefig("error_on_val_data.png")
error_on_validation_data_after_training = diff_validation[-1].reshape((1,validation_set.get_N()))
hist=TH1D('hist',"Errors on val data after last training epoch",200,-10000,10000)
fill_hist(hist,error_on_validation_data_after_training[0])
canvas=TCanvas();
hist.GetXaxis().SetTitle("desired Chi^2- outputted Chi^2");
hist.Draw()
canvas.SaveAs('error_on_val_data_hist.png')
readData.py
import numpy as np
import root_numpy
from ROOT import TFile, TH2D, TCanvas
import itertools
def apply_function_to_inputs(x,x_mod,x_max):# python uebergibt alles als reference
#normalize the inputs
for i in range(0,len(x)):
for j in range(0,len(x[i])):
#print "x["+str(i)+"]["+str(j)+"]="+str(x[i][j])
x_mod[i][j]=x[i][j]/x_max[j]
#print "x_mod["+str(i)+"]["+str(j)+"]="+str(x_mod[i][j])
def apply_inverse_function_to_inputs(x,x_mod,x_max):# python uebergibt alles als reference
#re normalize the inputs
for i in range(0,len(x)):
for j in range(0,len(x[i])):
x_mod[i][j]=x[i][j]*x_max[j]
def apply_function_to_outputs(y,y_mod,y_max):# python uebergibt alles als reference
#normalize the outputs
for i in range(0,len(y)):
for j in range(0,len(y[i])):
y_mod[i][j]=y[i][j]/y_max[j]
def apply_inverse_function_to_outputs(y,y_mod,y_max):# python uebergibt alles als reference
#re-normalize the outputs
for i in range(0,len(y)):
for j in range(0,len(y[i])):
y_mod[i][j]=y[i][j]*y_max[j]
class Dataset(object):
def __init__(self,path,hist_name,kind_of_set,function_inputs,function_outputs,full_set,subset):
self._kind_of_set=kind_of_set
"""example
self._x np.ndarray(shape=(N_points,2))
[[ 10. 95.]
[ 10. 100.]
[ 10. 105.]
...,
[ 2490. 1185.]
[ 2490. 1190.]
[ 2490. 1195.]]
self._y np.ndarray(shape=(N_points,1))
[[ 0.00000000e+00]
[ 0.00000000e+00]
[ 0.00000000e+00]
...,
[ 6.34848448e-06]
[ 6.34845946e-06]
[ 6.34848448e-06]]
"""
rfile = TFile(path)
histogram = rfile.Get(hist_name)
#now prepare data for training:
if kind_of_set=="full_set":
N_points=histogram.GetXaxis().GetNbins() * histogram.GetYaxis().GetNbins() #number of points in full_set
self._N=N_points
self._y=np.ndarray(shape=(N_points,1))
self._x=np.ndarray(shape=(N_points,2))
self._y_mod=np.ndarray(shape=(N_points,1)) #function applied to outputs, for example normalized, or a function is applied
self._x_mod=np.ndarray(shape=(N_points,2)) #function applied to inputs
self._y_max=np.ndarray(shape=(1))
self._y_max[0]=0.0
self._x_max=np.ndarray(shape=(2))
self._x_max=np.ndarray(shape=(2))
self._x_max[0]=0.0
self._x_max[1]=0.0
i=0
for x_bin in range(0, histogram.GetXaxis().GetNbins()):
for y_bin in range(0, histogram.GetYaxis().GetNbins()):
self._x[i][0]=histogram.GetXaxis().GetBinCenter(x_bin)
self._x[i][1]=histogram.GetYaxis().GetBinCenter(y_bin)
self._y[i][0]=histogram.GetBinContent(x_bin,y_bin)
for j in range(0,len(self._x[i])):# only in the full_set case the maximum values are calculated
if self._x[i][j]>self._x_max[j]:
self._x_max[j]=self._x[i][j]
for j in range(0,len(self._y[i])):
if self._y[i][j]>self._y_max[j]:
self._y_max[j]=self._y[i][j]
i=i+1
#apply function to inputs and outputs, the function can also be the identity
apply_function_to_inputs(self._x,self._x_mod,self._x_max)
apply_function_to_outputs(self._y,self._y_mod,self._y_max)
elif kind_of_set=="training_set" or kind_of_set=="validation_set" or kind_of_set=="test_set":
self._N = len(subset)#Number of elements of the data set
self._y=np.ndarray(shape=(self._N,1))
self._x=np.ndarray(shape=(self._N,2))
self._y_mod=np.ndarray(shape=(self._N,1))
self._x_mod=np.ndarray(shape=(self._N,2))
self._y_max=full_set.get_y_max()
self._x_max=full_set.get_x_max()
for i in range(0,self._N):
self._x[i][0]=full_set.get_x()[subset[i]][0]
self._x[i][1]=full_set.get_x()[subset[i]][1]
self._y[i][0]=full_set.get_y()[subset[i]][0]
self._x_mod[i][0]=full_set.get_x_mod()[subset[i]][0]
self._x_mod[i][1]=full_set.get_x_mod()[subset[i]][1]
self._y_mod[i][0]=full_set.get_y_mod()[subset[i]][0]
if len(self._x)==0:# If the set has 0 entries the list is empty
self._N_input=-1
else:
self._N_input = len(self._x[0])
if len(self._y)==0:# If the set has 0 entries the list is empty
self._N_output=-1
else:
self._N_output = len(self._y[0])
self._index_in_epoch = 0 #if one has trained 2 mini batches in the epoch already then this is 2*batch_size
self._epochs_completed = 0
def get_N_input_nodes(self):
return self._N_input
def get_N_output_nodes(self):
return self._N_output
def get_N(self):
return self._N
def get_x(self):
return self._x
def get_y(self):
return self._y
def get_x_max(self):
return self._x_max
def get_y_max(self):
return self._y_max
def get_x_mod(self):
return self._x_mod
def get_y_mod(self):
return self._y_mod
def next_batch(self, batch_size, fake_x=False):
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch >= self._N:
# Finished epoch
self._epochs_completed += 1
# Shuffle the data
perm = np.arange(self._N)
np.random.shuffle(perm)
self._x = self._x[perm]#shuffle both, actually one would only need to shuffle x_mod and y_mod, but for consistency we shuffle both!
self._y = self._y[perm]
self._x_mod = self._x_mod[perm]
self._y_mod = self._y_mod[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._N #if batch size<= self._N then an exception is thrown!
end = self._index_in_epoch
return self._x_mod[start:end], self._y_mod[start:end], self._epochs_completed
def read_data_set(path,hist_name,kind_of_set,function_inputs,function_outputs,full_set=None,subset=None):
return Dataset(path,hist_name,kind_of_set,function_inputs,function_outputs,full_set,subset)
我已将相应的数据输入文件上传到 https://github.com/kanban1992/GradientDescent_Comparison