该程序使用 ReLU 作为具有梯度提升的激活函数来训练表情符号图标(正方形) 执行此程序时,Weights_init 的值会自行更改。我不明白为什么。这弄乱了整个主循环,因为我试图使用相同的初始权重但不同的批次大小来训练算法
import matplotlib.pyplot as plt # for plotting
import cv2
#Activation function (ReLU)
def y_dydz(z): # calculate sigma(z) and sigma'(z)
yval=z*(z>0)
dydz=(z>0)
return(yval,dydz)
def forward_step(y_in,w,b): # calculate output value of a layer, from input y_in
z=np.dot(y_in,w)+b # w=weights for synaptic links between layers, b=bias vector for the layer
#Z is preactivated output
return(y_dydz(z)) # apply activation function sigma and return output as well as its derivative wrt z
def apply_network(y_in): # One forward propagation through the network
global Weights, Biases, NLayers
#NLayers = number of hidden layers + 1 = Number of layers - 1
global Ylayer, dydz_layer # for storing output from each layer and dy/dz values
y=y_in
Ylayer[0]=y #Inputs
for j in range(NLayers): # loop through all layers, excluding inputlayer
# j=0 corresponds to the first hidden layer above the input
y,dydz=forward_step(y,Weights[j],Biases[j]) # one step, into layer j
dydz_layer[j]=dydz # store sigma'(z), needed later in backprop]
Ylayer[j+1]=y # store sigma(z), needed later in backprop]
return(y)
def apply_network_only(y_in): #only one forward pass through the network
#Same as apply_network() by with no storege of y and dydz
y=y_in
for j in range(NLayers):
y,dydz=forward_step(y,Weights[j],Biases[j])
return(y)
def backward_step(Delta,w,dy):
# Delta at layer n, of batchsize x layersize(n))
# w between n-1 and n [layersize(n-1) x layersize(n) matrix]
# dy = dy/dz at layer n-1, of batchsize x layersize(n-1)
return( np.dot(Delta,np.transpose(w))*dy )
def backpropagation(y_true): #one backward pass through the network
# to calculate dCdW and dCdb and store them
global Ylayer, dydz_layer, Weights, Biases, NLayers
global dCdw_layer, dCdb_layer # dCost/dw and dCost/db
global batchsize
Delta=(Ylayer[-1]-y_true)*dydz_layer[-1]
dCdw_layer[-1]=np.dot(np.transpose(Ylayer[-2]),Delta)/batchsize
dCdb_layer[-1]=np.sum(Delta,axis=0)/batchsize
for j in range(NLayers-1):
Delta=backward_step(Delta,Weights[-1-j],dydz_layer[-2-j])
dCdw_layer[-2-j]=np.dot(np.transpose(Ylayer[-3-j]),Delta)/batchsize
dCdb_layer[-2-j]=np.sum(Delta,axis=0)/batchsize
####Gradient descent
def gradient_descent(eta): # update weights & biases (after backpropagation)
global dCdw_layer, dCdb_layer, Weights, Biases
##eta is learning rate or step parameter
for j in range(NLayers):
Weights[j]-=eta*dCdw_layer[j]
Biases[j]-=eta*dCdb_layer[j]
def train_one_step(y_in,y_true,eta): # one full training of a batch
# y_in is an array of size batchsize x number of inputs
# y_true is an array of size batchsize x number of outputs
# eta is the stepsize for the gradient descent method
global y_out
y_out=apply_network(y_in) #Ylayer, dydz are calculated and stored
backpropagation(y_true) #dCdw and dCdb are calculated and stored
gradient_descent(eta)
cost=0.5*(np.sum((y_true-y_out)**2))/batchsize
return(cost)
def Validation(y_in,y_true,nvalid): # one full training of a batch
# y_in is an array of size batchsize x number of inputs
# y_true is an array of size batchsize x number of outputs
y_out=apply_network_only(y_in) #Ylayer, dydz are calculated and stored
cost=0.5*(np.sum((y_true-y_out)**2))/nvalid
return(cost)
# set up all the layers with neurons
LayerSizes=[2,100,100,100,1] # input,hidden1,hidden2,...,output
NLayers=int(len(LayerSizes)-1) # does not count the input-layer (but does count the output-layer)
# Initialize the weights and biases
Weights_init=[np.random.uniform(low=-0.1,high=+0.1,size=[ LayerSizes[j],LayerSizes[j+1] ]) for j in range(NLayers)]
Biases_init=[np.zeros(LayerSizes[j+1]) for j in range(NLayers)]
# set up all the storge lists
Ylayer=[np.zeros(LayerSizes[j]) for j in range(NLayers+1)]
dydz_layer=[np.zeros(LayerSizes[j+1]) for j in range(NLayers)]
dCdw_layer=[np.zeros([LayerSizes[j],LayerSizes[j+1]]) for j in range(NLayers)]
dCdb_layer=[np.zeros(LayerSizes[j+1]) for j in range(NLayers)]
#from PIL import Image
# load the pixel image!
pics=cv2.imread('emoji1.png')
pixel_image=np.transpose(pics[:,:,0]) # have to transpose...
pixel_image=pixel_image[:,::-1] # and flip... to get the right view!
pixel_image-=np.min(pixel_image)
pixel_image=(pixel_image.astype(dtype='float'))/pixel_image.max() # normalize between 0 and 1!
Npixels=np.shape(pixel_image)[0] # assuming a square image!
# the function we want to have (desired outcome)
def CustomFunc(x0,x1):
global pixel_image, Npixels
# convert to integer coordinates (assuming input is 0..1)
x0int=(x0*Npixels*0.99999).astype(dtype='int')
x1int=(x1*Npixels*0.99999).astype(dtype='int')
return(pixel_image[x0int,x1int]) # extract color values at these pixels
# check that this works:
Npixels_Test=100 # do the test output on a low-res grid! (saves time)
xrange=np.linspace(0,1,Npixels_Test)
x0,x1=np.meshgrid(xrange,xrange)
plt.imshow(CustomFunc(x0,x1), cmap='gray', interpolation='nearest',origin='lower')
plt.axis('off')
#plt.colorbar()
plt.show()
######################################################################################
cases=[800,1600,2400]
lvals=[]
for batchsize in cases:
Weights=np.array([])
Biases=np.array([])
Weights=Weights_init
print("Trill",Weights_init)
Biases=Biases_init
def make_train_batch(): #random postions in a 2D square
global batchsize
inputs=np.random.uniform(low=0,high=1,size=[batchsize,2])
Trueval=np.zeros([batchsize,1]) # must have right dimensions
Trueval[:,0]=CustomFunc(inputs[:,0],inputs[:,1])
return(inputs,Trueval)
nvalid=3000
def make_valid_batch(nvalid): #random postions in a 2D square for validation
inputs=np.random.uniform(low=0,high=1,size=[nvalid,2])
Trueval=np.zeros([nvalid,1]) # Careful about right dimensions
Trueval[:,0]=CustomFunc(inputs[:,0],inputs[:,1])
return(inputs,Trueval)
#Training the Neural network in nsteps
eta=0.75 #learning rate
nsteps=1000
costt=np.zeros(nsteps) #for training samples
costv=np.zeros(nsteps) #for validation samples
valid_inp,valid_out=make_valid_batch(nvalid)
best=100.0
for k in range(nsteps):
train_inp,train_out=make_train_batch()
costt[k]=train_one_step(train_inp,train_out,eta)
costv[k]=Validation(valid_inp,valid_out,nvalid)
if k%50 == 0:
print(k,costt[k],costv[k])
if costv[k] < best: ##store the wights/biases for the best model
best=costv[k]
SWeights=Weights
SBiases=Biases
# print(k,costt[k],costv[k])
plt.plot(costt)
plt.plot(costv)
plt.show()
lvals.append(costv)
###################################################################
#Use the best model for testing
Weights=SWeights
Biases=SBiases
# Testing the model
#In this case test batch includes all the points on the image grid 50x50
test_batchsize=np.shape(x0)[0]*np.shape(x0)[1]
testsample=np.zeros([test_batchsize,2])
testsample[:,0]=np.ndarray.flatten(x0)
testsample[:,1]=np.ndarray.flatten(x1)
# check the output of this net
testoutput=apply_network_only(testsample)
# show this!
myim=plt.imshow(np.reshape(testoutput,np.shape(x0)), cmap='gray', origin='lower',interpolation='nearest',vmin=0.0,vmax=1.0)
plt.axis('off')
#plt.colorbar()
plt.show()
colors=['r','g','b']
for i in range(len(lvals)):
plt.plot(lvals[i],colors[i])
plt.show```