我正在尝试在Racket中使用反向传播算法实现神经网络。为了测试实现,我决定在非常小的数据上训练它以进行大量迭代,并查看它是否适合它所训练的数据。然而它没有 - 使用sigmoid函数它输出极小的值(大小为-20),但相对值是正确的(也就是说,具有最大目标值的输入向量也在训练的网络中产生最大值) 。使用relu函数,其magnitued的输出更接近期望,但相对于彼此不正确。我很高兴得到任何见解,为什么会如此。
#lang racket
; activation function. Fn - function, dfn - its derirative
(define-struct activation (fn dfn))
;activation using sigmoid
(define sigmoid-a (let ([sigmoid (lambda (x)
(/ 1 (+ 1 (exp (* x -1)))))])
(activation (lambda(x)
(sigmoid x))
(lambda(x)
(*(sigmoid x) (- 1 (sigmoid x)))))))
; activation using relu
(define relu-a (activation (lambda(x) (if (< x 0)
(* 0.2 x)
x))
(lambda(x) (if (< x 0)
0.2
1))))
; neuron. Bias is implicit first weight
(define-struct neuron (weights) #:transparent )
; neuron output before applying activation function
(define (neuron-out-unactivated neuron inputs)
(foldl (lambda(w in result)
(+ result (* w in)))
0
(neuron-weights neuron)
(cons -1 inputs)))
; neuron output with activation function applied
(define (neuron-out neuron inputs activation-fn)
(activation-fn (neuron-out-unactivated neuron inputs)))
; neuron layer
(define-struct layer (neurons) #:transparent )
; list of layer's neurons' output, before activation function
(define (layer-out-unactivated layer inputs)
(map (lambda(neuron)
(neuron-out-unactivated neuron inputs))
(layer-neurons layer)))
; list of layer's neurons' output with activation function applied
(define (layer-out layer inputs activation-fn)
(map (lambda(neuron)
(neuron-out neuron inputs activation-fn))
(layer-neurons layer)))
; neural network
(define-struct neural-network (layers activation) #:transparent)
; neural network output
(define (neural-network-out nn inputs)
(let pass ([layers (neural-network-layers nn)]
[inputs inputs])
(if (empty? layers) inputs
(pass (rest layers)
(layer-out (first layers)
inputs
(activation-fn (neural-network-activation nn)))))))
; calculating derirative for the neuron in the last (output) layer
; out-unactivated -- neuron's output before applying activation function
; target-- teaching data / desired result
; activation -- activation fn and its derirative
(define (d-lastlayer out-unactivated target activation)
(let ([result (* (- ((activation-fn activation) out-unactivated) target) ((activation-dfn activation) out-unactivated))])
result))
; calculating derirative for the neuron in the inner (hidden) layer
; neuron-index -- place of the neuron in its layer. Needed, because weights are stored in the next layer's neurons.
; out-unactivated -- neuron's output before applying activation function
; d-nextlayer -- deriratives of the next layer
; activation -- activation fn and its derirative
(define (d-innerlayer neuron-index out-unactivated d-nextlayer nextlayer activation)
(define mp (map (lambda (neur d)
(let* ([w (list-ref (neuron-weights neur)
(add1 neuron-index))]
[result (* d w)])
result))
(layer-neurons nextlayer)
d-nextlayer))
(* (foldl + 0 mp)
((activation-dfn activation) out-unactivated)))
; maps list of layers into list of layer deriratives, where each layer derirative is a list of its neuron deriratives
(define (backpropagation layers inputs targets activation)
(let ([output (layer-out-unactivated (first layers) inputs)])
(if (empty? (rest layers))
(list (map (lambda (out target) (d-lastlayer out target activation)) output targets))
(let ([next-layer-d (backpropagation (rest layers) output targets activation)])
(cons (map (lambda(index out)
(d-innerlayer index
out
(first next-layer-d)
(first (rest layers))
activation))
(range (length output))
output)
next-layer-d)))))
; calculates new weights for the layer.
(define (transform-layer _layer input derirative train-speed)
(layer (map (lambda(n d)
(neuron (map (lambda(w i)
(+ w (* (- train-speed) i d)))
(neuron-weights n)
(cons -1 input))))
(layer-neurons _layer)
derirative)))
; calculates new weights for all layers
(define (update-layers layers inputs deriratives train-speed activation-fn)
(if (empty? layers) '()
(cons (transform-layer (first layers)
inputs
(first deriratives)
train-speed)
(update-layers (rest layers)
(layer-out (first layers)
inputs
activation-fn)
(rest deriratives)
train-speed
activation-fn))))
; performs network update for single input vector
(define (train-neural-network-iteration network inputs target train-speed)
(let* ([layers (neural-network-layers network)]
[activation (neural-network-activation network)]
[deriratives (backpropagation layers inputs target activation)]
[new-layers (update-layers layers inputs deriratives train-speed (activation-fn activation))])
(neural-network new-layers (neural-network-activation network))))
; performs network update for each input in teaching-data
(define (train-neural-network-epoch network teaching-data train-speed)
(let train ([network network]
[data teaching-data])
(if (empty? data) network
(train (train-neural-network-iteration network (car (first data)) (cdr (first data)) train-speed) (rest data)))))
; Trains network for `iterations` amount of epochs
(define (train-neural-network network data iterations train-speed)
(let it ([i 0] [network network])
(if (> i iterations) network
(it (add1 i) (train-neural-network-epoch network data train-speed)))))
; creates a network. Neuron count list -- a list of integers, each telling how many neurons in that layer
(define (create-neural-network inputs-length neuron-count-list activation)
(let _create ([inputs-l inputs-length] [n-count neuron-count-list] [layers '()])
(if (empty? n-count) (neural-network (reverse layers) activation)
(_create (first n-count)
(rest n-count)
(cons (layer (build-list (first n-count)
(lambda (n)
(neuron (build-list (add1 inputs-l)
(lambda(n2) (/ (+ (random 50) 14) 64)))))))
layers)))
))
;test
(define (test-case act)
(define nn (create-neural-network 1 (list 3 1) act))
(define data (list (cons (list 0) (list 0))
(cons (list 1) (list 1))
(cons (list 2) (list 0))))
(define trained-nn (train-neural-network nn data 1000000 0.001))
(println (~a (neural-network-out trained-nn (list 0))))
(println (~a (neural-network-out trained-nn (list 1))))
(println (~a (neural-network-out trained-nn (list 2))))
(println (~a trained-nn)))
(test-case sigmoid-a)
;outputs
;0->2 * 10^(-29)
;1->5 * 10^(-21)
;2->2 * 10^(-31)
(test-case relu-a)
;outputs
;0 -> ~164
;1 -> ~164
;2 -> ~0
(provide (all-defined-out))
答案 0 :(得分:0)
问题在于backpropogation
函数的递归调用 -
(let ([next-layer-d (backpropagation (rest layers) output targets activation)])
这里的输出是激活功能之前的当前层的输出,但它应该后。