Question

该程序使用 ReLU 作为具有梯度提升的激活函数来训练表情符号图标（正方形）执行此程序时，Weights_init 的值会自行更改。我不明白为什么。这弄乱了整个主循环，因为我试图使用相同的初始权重但不同的批次大小来训练算法

import matplotlib.pyplot as plt # for plotting
import cv2

#Activation function (ReLU)
def y_dydz(z): # calculate sigma(z) and sigma'(z)
     yval=z*(z>0)
     dydz=(z>0)
     return(yval,dydz)

def forward_step(y_in,w,b): # calculate output value of a layer, from input y_in
    z=np.dot(y_in,w)+b # w=weights for synaptic links between layers, b=bias vector for the layer
    #Z is preactivated output
    return(y_dydz(z)) # apply activation function sigma and return output as well as its derivative wrt z

def apply_network(y_in): # One forward propagation through the network
    global Weights, Biases, NLayers
    #NLayers = number of hidden layers + 1 = Number of layers - 1
    global Ylayer, dydz_layer # for storing output from each layer and dy/dz values
    
    y=y_in
    Ylayer[0]=y #Inputs
    for j in range(NLayers): # loop through all layers, excluding inputlayer
        # j=0 corresponds to the first hidden layer above the input
        y,dydz=forward_step(y,Weights[j],Biases[j]) # one step, into layer j
        dydz_layer[j]=dydz # store sigma'(z), needed later in backprop]
        Ylayer[j+1]=y # store sigma(z), needed later in backprop]        
    return(y)

def apply_network_only(y_in): #only one forward pass through the network
    #Same as apply_network() by with no storege of y and dydz
    y=y_in 
    for j in range(NLayers): 
        y,dydz=forward_step(y,Weights[j],Biases[j])
    return(y)

def backward_step(Delta,w,dy): 
    # Delta at layer n, of batchsize x layersize(n))
    # w between n-1 and n [layersize(n-1) x layersize(n) matrix]
    # dy = dy/dz at layer n-1, of batchsize x layersize(n-1)
    return( np.dot(Delta,np.transpose(w))*dy )

def backpropagation(y_true): #one backward pass through the network
    # to calculate dCdW and dCdb and store them
    global Ylayer, dydz_layer, Weights, Biases, NLayers
    global dCdw_layer, dCdb_layer # dCost/dw and dCost/db 
    global batchsize
    
    Delta=(Ylayer[-1]-y_true)*dydz_layer[-1]
    dCdw_layer[-1]=np.dot(np.transpose(Ylayer[-2]),Delta)/batchsize
    dCdb_layer[-1]=np.sum(Delta,axis=0)/batchsize
    for j in range(NLayers-1):
        Delta=backward_step(Delta,Weights[-1-j],dydz_layer[-2-j])
        dCdw_layer[-2-j]=np.dot(np.transpose(Ylayer[-3-j]),Delta)/batchsize
        dCdb_layer[-2-j]=np.sum(Delta,axis=0)/batchsize


####Gradient descent 
def gradient_descent(eta): # update weights & biases (after backpropagation)
    global dCdw_layer, dCdb_layer, Weights, Biases
    ##eta is learning rate or step parameter
    for j in range(NLayers):
        Weights[j]-=eta*dCdw_layer[j]
        Biases[j]-=eta*dCdb_layer[j]

def train_one_step(y_in,y_true,eta): # one full training of a batch
    # y_in is an array of size batchsize x number of inputs
    # y_true is an array of size batchsize x number of outputs
    # eta is the stepsize for the gradient descent method
    global y_out
    
    y_out=apply_network(y_in) #Ylayer, dydz are calculated and stored
    backpropagation(y_true) #dCdw and dCdb are calculated and stored
    gradient_descent(eta)
    cost=0.5*(np.sum((y_true-y_out)**2))/batchsize
    return(cost)

def Validation(y_in,y_true,nvalid): # one full training of a batch
    # y_in is an array of size batchsize x number of inputs
    # y_true is an array of size batchsize x number of outputs
    y_out=apply_network_only(y_in) #Ylayer, dydz are calculated and stored
    cost=0.5*(np.sum((y_true-y_out)**2))/nvalid
    return(cost)

# set up all the layers with neurons
LayerSizes=[2,100,100,100,1] # input,hidden1,hidden2,...,output
NLayers=int(len(LayerSizes)-1) # does not count the input-layer (but does count the output-layer)

# Initialize the weights and biases
Weights_init=[np.random.uniform(low=-0.1,high=+0.1,size=[ LayerSizes[j],LayerSizes[j+1] ]) for j in range(NLayers)]
Biases_init=[np.zeros(LayerSizes[j+1]) for j in range(NLayers)]

# set up all the storge lists
Ylayer=[np.zeros(LayerSizes[j]) for j in range(NLayers+1)]
dydz_layer=[np.zeros(LayerSizes[j+1]) for j in range(NLayers)]
dCdw_layer=[np.zeros([LayerSizes[j],LayerSizes[j+1]]) for j in range(NLayers)]
dCdb_layer=[np.zeros(LayerSizes[j+1]) for j in range(NLayers)]

#from PIL import Image
# load the pixel image!
pics=cv2.imread('emoji1.png')
pixel_image=np.transpose(pics[:,:,0]) # have to transpose...
pixel_image=pixel_image[:,::-1] # and flip... to get the right view!
pixel_image-=np.min(pixel_image)
pixel_image=(pixel_image.astype(dtype='float'))/pixel_image.max() # normalize between 0 and 1!
Npixels=np.shape(pixel_image)[0] # assuming a square image!

# the function we want to have (desired outcome)
def CustomFunc(x0,x1):
    global pixel_image, Npixels
    # convert to integer coordinates (assuming input is 0..1)
    x0int=(x0*Npixels*0.99999).astype(dtype='int')
    x1int=(x1*Npixels*0.99999).astype(dtype='int')
    return(pixel_image[x0int,x1int]) # extract color values at these pixels

# check that this works:
Npixels_Test=100 # do the test output on a low-res grid! (saves time)
xrange=np.linspace(0,1,Npixels_Test)
x0,x1=np.meshgrid(xrange,xrange)
plt.imshow(CustomFunc(x0,x1), cmap='gray', interpolation='nearest',origin='lower')
plt.axis('off')
#plt.colorbar()
plt.show()

######################################################################################
cases=[800,1600,2400]
lvals=[]
for batchsize in cases:
    Weights=np.array([])
    Biases=np.array([])
    Weights=Weights_init
    print("Trill",Weights_init)
    Biases=Biases_init

    def make_train_batch(): #random postions in a 2D square
        global batchsize

        inputs=np.random.uniform(low=0,high=1,size=[batchsize,2])
        Trueval=np.zeros([batchsize,1]) # must have right dimensions
        Trueval[:,0]=CustomFunc(inputs[:,0],inputs[:,1])
        return(inputs,Trueval)

    nvalid=3000
    def make_valid_batch(nvalid): #random postions in a 2D square for validation
        inputs=np.random.uniform(low=0,high=1,size=[nvalid,2])
        Trueval=np.zeros([nvalid,1]) # Careful about right dimensions
        Trueval[:,0]=CustomFunc(inputs[:,0],inputs[:,1])
        return(inputs,Trueval)

    #Training the Neural network in nsteps
    eta=0.75 #learning rate
    nsteps=1000
    costt=np.zeros(nsteps) #for training samples
    costv=np.zeros(nsteps) #for validation samples

    valid_inp,valid_out=make_valid_batch(nvalid)
    best=100.0
    for k in range(nsteps):
        train_inp,train_out=make_train_batch()
        costt[k]=train_one_step(train_inp,train_out,eta)
        costv[k]=Validation(valid_inp,valid_out,nvalid)
        if k%50 == 0:
            print(k,costt[k],costv[k])
        if costv[k] < best: ##store the wights/biases for the best model
            best=costv[k]
            SWeights=Weights
            SBiases=Biases
    #        print(k,costt[k],costv[k])

    plt.plot(costt)
    plt.plot(costv)
    plt.show()
    lvals.append(costv)
    ###################################################################
    #Use the best model for testing
    Weights=SWeights
    Biases=SBiases

    # Testing the model
    #In this case test batch includes all the points on the image grid 50x50
    test_batchsize=np.shape(x0)[0]*np.shape(x0)[1]
    testsample=np.zeros([test_batchsize,2])
    testsample[:,0]=np.ndarray.flatten(x0)
    testsample[:,1]=np.ndarray.flatten(x1)
    # check the output of this net
    testoutput=apply_network_only(testsample)
    # show this!
    myim=plt.imshow(np.reshape(testoutput,np.shape(x0)), cmap='gray', origin='lower',interpolation='nearest',vmin=0.0,vmax=1.0)
    plt.axis('off')
    #plt.colorbar()
    plt.show()
colors=['r','g','b']
for i in range(len(lvals)):
    plt.plot(lvals[i],colors[i])
plt.show```

为什么 Weights_init 的值会发生变化？

0 个答案: