MultiLayer神经网络提供错误的输出

时间:2014-04-10 06:21:16

标签: python artificial-intelligence neural-network

这是我正在使用的开源代码:

import math
import random
import string

class NN:
  def __init__(self, NI, NH, NO):
    # number of nodes in layers
    self.ni = NI + 1 # +1 for bias
    self.nh = NH
    self.no = NO

    # initialize node-activations
    self.ai, self.ah, self.ao = [],[], []
    self.ai = [1.0]*self.ni
    self.ah = [1.0]*self.nh
    self.ao = [1.0]*self.no

    # create node weight matrices
    self.wi = makeMatrix (self.ni, self.nh)
    self.wo = makeMatrix (self.nh, self.no)
    # initialize node weights to random vals
    randomizeMatrix ( self.wi, -0.2, 0.2 )
    randomizeMatrix ( self.wo, -2.0, 2.0 )
    # create last change in weights matrices for momentum
    self.ci = makeMatrix (self.ni, self.nh)
    self.co = makeMatrix (self.nh, self.no)

  def runNN (self, inputs):
    if len(inputs) != self.ni-1:
      print 'incorrect number of inputs'

    for i in range(self.ni-1):
      self.ai[i] = inputs[i]

    for j in range(self.nh):
      sum = 0.0
      for i in range(self.ni):
        sum +=( self.ai[i] * self.wi[i][j] )
      self.ah[j] = sigmoid (sum)

    for k in range(self.no):
      sum = 0.0
      for j in range(self.nh):        
        sum +=( self.ah[j] * self.wo[j][k] )
      self.ao[k] = sigmoid (sum)

    return self.ao



  def backPropagate (self, targets, N, M):
    # calc output deltas
    # we want to find the instantaneous rate of change of ( error with respect to weight from node j to node k)
    # output_delta is defined as an attribute of each ouput node. It is not the final rate we need.
    # To get the final rate we must multiply the delta by the activation of the hidden layer node in question.
    # This multiplication is done according to the chain rule as we are taking the derivative of the activation function
    # of the ouput node.
    # dE/dw[j][k] = (t[k] - ao[k]) * s'( SUM( w[j][k]*ah[j] ) ) * ah[j]
    output_deltas = [0.0] * self.no
    for k in range(self.no):
      error = targets[k] - self.ao[k]
      output_deltas[k] =  error * dsigmoid(self.ao[k]) 

    # update output weights
    for j in range(self.nh):
      for k in range(self.no):
        # output_deltas[k] * self.ah[j] is the full derivative of dError/dweight[j][k]
        change = output_deltas[k] * self.ah[j]
        self.wo[j][k] += N*change + M*self.co[j][k]
        self.co[j][k] = change

    # calc hidden deltas
    hidden_deltas = [0.0] * self.nh
    for j in range(self.nh):
      error = 0.0
      for k in range(self.no):
        error += output_deltas[k] * self.wo[j][k]
      hidden_deltas[j] = error * dsigmoid(self.ah[j])

    #update input weights
    for i in range (self.ni):
      for j in range (self.nh):
        change = hidden_deltas[j] * self.ai[i]
        #print 'activation',self.ai[i],'synapse',i,j,'change',change
        self.wi[i][j] += N*change + M*self.ci[i][j]
        self.ci[i][j] = change

    # calc combined error
    # 1/2 for differential convenience & **2 for modulus
    error = 0.0
    for k in range(len(targets)):
      error = 0.5 * (targets[k]-self.ao[k])**2
    return error

  def weights(self):
    print 'Input weights:'
    for i in range(self.ni):
      print self.wi[i]
    print
    print 'Output weights:'
    for j in range(self.nh):
      print self.wo[j]
    print ''

  def test(self, patterns):
    for p in patterns:
      inputs = p[0]
      print 'Inputs:', p[0], '-->', self.runNN(inputs), '\tTarget', p[1]

  def train (self, patterns, max_iterations = 1000, N=0.5, M=0.1):
    for i in range(max_iterations):
      for p in patterns:
        inputs = p[0]
        targets = p[1]
        self.runNN(inputs)
        error = self.backPropagate(targets, N, M)
      if i % 50 == 0:
        print 'Combined error', error
    self.test(patterns)


def sigmoid (x):
  return math.tanh(x)

def dsigmoid (y):
  return 1 - y**2

def makeMatrix ( I, J, fill=0.0):
  m = []
  for i in range(I):
    m.append([fill]*J)
  return m

def randomizeMatrix ( matrix, a, b):
  for i in range ( len (matrix) ):
    for j in range ( len (matrix[0]) ):
      matrix[i][j] = random.uniform(a,b)

def main ():

    #print mylist
    pat = [
    [ [0.0,0.0], [0.0] ],
    [ [0.0,0.5], [2.0] ],
    [ [0.0,1.0], [0.0] ],

    [ [0.5,0.0], [3.0] ],
    [ [0.5,0.5], [0.0] ],
    [ [0.5,1.0], [5.0] ],

    [ [1.0,0.0], [0.0] ],
    [ [1.0,0.5], [89.0] ],
    [ [1.0,1.0], [0.0] ]
    ]

    myNN = NN ( 2, 10, 1)
    myNN.train(pat)

if __name__ == "__main__":
    main()

但是,当我运行代码时,我得到几乎相同的输出,这是错误的。

Combined error 0.499991904422
Combined error 0.499996323964
Combined error 0.499997646742    
Combined error 0.499998277742
Combined error 0.499998645609
Combined error 0.499998885941
Combined error 0.499999054982
Combined error 0.49999918021
Combined error 0.499999276619    
Combined error 0.49999935308
Combined error 0.499999415171
Combined error 0.499999466571
Combined error 0.499999509808
Combined error 0.499999546673
Combined error 0.499999578468
Combined error 0.499999606167
Combined error 0.499999630508
Combined error 0.499999652063
Combined error 0.499999671282
Combined error 0.499999688523
Inputs: [0.0, 0.0] --> [0.9999971763261493]     Target [0.0]
Inputs: [0.0, 0.5] --> [0.9999991710833099]     Target [2.0]
Inputs: [0.0, 1.0] --> [0.9999996328965068]     Target [0.0]
Inputs: [0.5, 0.0] --> [0.9999976785687611]     Target [3.0]
Inputs: [0.5, 0.5] --> [0.9999992837399216]     Target [0.0]
Inputs: [0.5, 1.0] --> [0.9999996729737041]     Target [5.0]
Inputs: [1.0, 0.0] --> [0.9999980402687116]     Target [0.0]
Inputs: [1.0, 0.5] --> [0.9999993680567348]     Target [89.0]
Inputs: [1.0, 1.0] --> [0.9999997038262324]     Target [0.0]

代码或代码的使用有什么问题吗? 为什么我总是得到小于1的输出值?

1 个答案:

答案 0 :(得分:1)

您正在使用sigmmoid激活功能,并要求您的网络输出大于1的值,这是不可能的。缩小所有输出值的最大值(在您的情况下为89)。