神经网络反向传播无法过度拟合小数据

时间:2018-04-24 15:19:56

标签: neural-network racket backpropagation

我正在尝试在Racket中使用反向传播算法实现神经网络。为了测试实现,我决定在非常小的数据上训练它以进行大量迭代,并查看它是否适合它所训练的数据。然而它没有 - 使用sigmoid函数它输出极小的值(大小为-20),但相对值是正确的(也就是说,具有最大目标值的输入向量也在训练的网络中产生最大值) 。使用relu函数,其magnitued的输出更接近期望,但相对于彼此不正确。我很高兴得到任何见解,为什么会如此。

#lang racket

; activation function. Fn - function, dfn - its derirative
(define-struct activation (fn dfn))

;activation using sigmoid
(define sigmoid-a (let ([sigmoid (lambda (x)
                                   (/ 1 (+ 1 (exp (* x -1)))))])
                    (activation (lambda(x)
                                  (sigmoid x))
                                (lambda(x)
                                  (*(sigmoid x) (- 1 (sigmoid x)))))))

; activation using relu
(define relu-a (activation (lambda(x) (if (< x 0)
                                          (* 0.2 x)
                                          x))
                           (lambda(x) (if (< x 0)
                                          0.2
                                          1))))
; neuron. Bias is implicit first weight
(define-struct neuron (weights) #:transparent )

; neuron output before applying activation function
(define (neuron-out-unactivated neuron inputs)
  (foldl (lambda(w in result)
           (+ result (* w in)))
         0
         (neuron-weights neuron)
         (cons -1 inputs)))

; neuron output with activation function applied
(define (neuron-out neuron inputs activation-fn)
  (activation-fn (neuron-out-unactivated neuron inputs)))

; neuron layer
(define-struct layer (neurons) #:transparent )

; list of layer's neurons' output, before activation function
(define (layer-out-unactivated layer inputs)
  (map (lambda(neuron)
         (neuron-out-unactivated neuron inputs))
       (layer-neurons layer)))

; list of layer's neurons' output with activation function applied
(define (layer-out layer inputs activation-fn)
  (map (lambda(neuron)
         (neuron-out neuron inputs activation-fn))
       (layer-neurons layer)))

; neural network
(define-struct neural-network (layers activation) #:transparent)

; neural network output 
(define (neural-network-out nn inputs)
  (let pass ([layers (neural-network-layers nn)]
             [inputs inputs])
    (if (empty? layers) inputs
        (pass (rest layers)
              (layer-out (first layers)
                         inputs
                         (activation-fn (neural-network-activation nn)))))))

; calculating derirative for the neuron in the last (output) layer
; out-unactivated -- neuron's output before applying activation function
; target-- teaching data / desired result
; activation -- activation fn and its derirative
(define (d-lastlayer out-unactivated target activation)
  (let ([result (* (- ((activation-fn activation) out-unactivated) target) ((activation-dfn activation) out-unactivated))])
    result))

; calculating derirative for the neuron in the inner (hidden) layer
; neuron-index -- place of the neuron in its layer. Needed, because weights are stored in the next layer's neurons.
; out-unactivated -- neuron's output before applying activation function
; d-nextlayer -- deriratives of the next layer
; activation -- activation fn and its derirative
(define (d-innerlayer neuron-index out-unactivated d-nextlayer nextlayer activation)
  (define mp (map (lambda (neur d)
                    (let* ([w (list-ref (neuron-weights neur) 
                                        (add1 neuron-index))]
                           [result (* d w)])
                      result))
                  (layer-neurons nextlayer)
                  d-nextlayer))
  (* (foldl + 0 mp) 
     ((activation-dfn activation) out-unactivated)))

; maps list of layers into list of layer deriratives, where each layer derirative is a list of its neuron deriratives 
(define (backpropagation layers inputs targets activation)
  (let ([output (layer-out-unactivated (first layers) inputs)])
    (if (empty? (rest layers)) 
        (list (map (lambda (out target) (d-lastlayer out target activation)) output targets))
        (let ([next-layer-d (backpropagation (rest layers) output targets activation)])
          (cons (map (lambda(index out)
                       (d-innerlayer index 
                                     out 
                                     (first next-layer-d)
                                     (first (rest layers))
                                     activation)) 
                     (range (length output)) 
                     output) 
                next-layer-d)))))

; calculates new weights for the layer.
(define (transform-layer _layer input derirative train-speed)
  (layer (map (lambda(n d)
                (neuron (map (lambda(w i)
                               (+ w (* (- train-speed) i d))) 
                             (neuron-weights n) 
                             (cons -1 input)))) 
              (layer-neurons _layer)
              derirative)))

; calculates new weights for all layers
(define (update-layers layers inputs deriratives train-speed activation-fn)
  (if (empty? layers) '()
      (cons (transform-layer (first layers)
                             inputs
                             (first deriratives)
                             train-speed) 
            (update-layers (rest layers)
                           (layer-out (first layers)
                                      inputs
                                      activation-fn)
                           (rest deriratives)
                           train-speed
                           activation-fn))))

; performs network update for single input vector
(define (train-neural-network-iteration network inputs target train-speed)
  (let* ([layers (neural-network-layers network)]
         [activation (neural-network-activation network)]
         [deriratives (backpropagation layers inputs target activation)]
         [new-layers (update-layers layers inputs deriratives train-speed (activation-fn activation))])
    (neural-network new-layers (neural-network-activation network))))

; performs network update for each input in teaching-data
(define (train-neural-network-epoch network teaching-data train-speed)
  (let train ([network network]
              [data teaching-data])
    (if (empty? data) network
        (train (train-neural-network-iteration network (car (first data)) (cdr (first data)) train-speed) (rest data)))))

; Trains network for `iterations` amount of epochs
(define (train-neural-network network data iterations train-speed)
  (let it ([i 0] [network network])
    (if (> i iterations) network
        (it (add1 i) (train-neural-network-epoch network data train-speed)))))

; creates a network. Neuron count list -- a list of integers, each telling how many neurons in that layer
(define (create-neural-network inputs-length neuron-count-list activation)
  (let _create ([inputs-l inputs-length] [n-count neuron-count-list] [layers '()])
    (if (empty? n-count) (neural-network (reverse layers) activation)
        (_create (first n-count)
                 (rest n-count)
                 (cons (layer (build-list (first n-count)
                                          (lambda (n)
                                            (neuron (build-list (add1 inputs-l)
                                                                (lambda(n2) (/ (+ (random 50) 14) 64)))))))
                       layers)))
    ))

;test
(define (test-case act)
(define nn (create-neural-network 1 (list 3 1) act))
(define data (list (cons (list 0) (list 0))
                   (cons (list 1) (list 1))
                   (cons (list 2) (list 0))))
(define trained-nn (train-neural-network nn data 1000000 0.001))
(println (~a (neural-network-out trained-nn (list 0))))
(println (~a (neural-network-out trained-nn (list 1))))
(println (~a (neural-network-out trained-nn (list 2))))
(println (~a trained-nn)))

(test-case sigmoid-a)
;outputs
;0->2 * 10^(-29)
;1->5 * 10^(-21)
;2->2 * 10^(-31)

(test-case relu-a)
;outputs
;0 -> ~164
;1 -> ~164
;2 -> ~0

(provide (all-defined-out))

1 个答案:

答案 0 :(得分:0)

问题在于backpropogation函数的递归调用 -

(let ([next-layer-d (backpropagation (rest layers) output targets activation)])

这里的输出是激活功能之前的当前层的输出,但它应该