softmax函数的导数的计算

时间:2019-12-02 13:35:05

标签: python python-3.x machine-learning math neural-network

我试图了解神经网络的数学原理。坚持计算softmax函数的导数。我找到了以下解释:https://medium.com/@aerinykim/how-to-implement-the-softmax-derivative-independently-from-any-loss-function-ae6d44363a9d。这是关于pole的例子的代码:

import numpy as np
import gym
env = gym.make('CartPole-v0')
env = env.unwrapped
env.seed(1)

max_episodes=1000
state_size = 4
action_size = env.action_space.n



def relu(x,deriv):
    if deriv:
        x[x<=0] = 0
        x[x>0] = 1
        return x
    else:
        return np.maximum(0,x)



def softmax(X, train):
    if train:
        max_prob = np.max(X, axis=1).reshape((-1, 1))
        X -= max_prob
        np.exp(X, X)
        sum_prob = np.sum(X, axis=0).reshape((-1, 1))
        X /= sum_prob[0]
    else:
        max_prob = np.max(X, axis=0).reshape((-1, 1))
        X -= max_prob[0]
        np.exp(X, X)
        sum_prob = np.sum(X, axis=0).reshape((-1, 1))
        X /= sum_prob[0]
    return X

def softmax_grad(s):
    # Reshape the 1-d softmax to 2-d so that np.dot will do the matrix multiplication
    s = s.reshape(-1,1)
    return np.diagflat(s) - np.dot(s, s.T) 

np.random.seed(1)

syn0 = 2*np.random.random((4,8)) - 1
syn1 = 2*np.random.random((8,2)) - 1



def nn(x,y,train):
    global syn1,syn0
    l0 = x
    l1 = relu(np.dot(l0,syn0),deriv=False) 
    l2 = softmax(np.dot(l1,syn1),train=False)
    if train:
        y=np.array(y)
        l2_error = y - l2  
        l2_error = l2_error.reshape(20,1)#<--- shape change for multiplication
        l2_delta = l2_error*softmax_grad(l2)#<--- ERROR
        l1_error = l2_delta.dot(syn1.T)
        l1_delta = l1_error * relu(l1,deriv=True)
        syn1 += l1.T.dot(l2_delta)
        syn0 += l0.T.dot(l1_delta)
    else:
        return l2

allRewards = []
total_rewards = 0
maximumRewardRecorded = 0
episode = 0
episode_states, episode_actions, episode_rewards = [],[],[]

for episode in range(max_episodes):
        episode_rewards_sum = 0
        state = env.reset()
        env.render()
        while True:
            action_probability_distribution = nn(x=state,y=_,train=False)
            action_probability_distribution=action_probability_distribution/sum(action_probability_distribution)
            action = np.random.choice(range(len(action_probability_distribution)), p=action_probability_distribution.ravel())
            new_state, reward, done, info = env.step(action)
            episode_states.append(state)
            action_ = np.zeros(action_size)
            action_[action] = 1
            episode_actions.append(action_)
            episode_rewards.append(reward)

            if done:
                episode_rewards_sum = np.sum(episode_rewards)
                allRewards.append(episode_rewards_sum)
                total_rewards = np.sum(allRewards)
                mean_reward = np.divide(total_rewards, episode+1)
                maximumRewardRecorded = np.amax(allRewards)

                print("==========================================")
                print("Episode: ", episode)
                print("Reward: ", episode_rewards_sum)
                print("Mean Reward", mean_reward)
                print("Max reward so far: ", maximumRewardRecorded)

                episode_states = np.array(episode_states)   
                nn(x=episode_states,y=episode_actions,train=True)               
                episode_states, episode_actions, episode_rewards = [],[],[]

                break

            state = new_state

我在反向传播中遇到了一个错误。使用softmax_grad函数后,会出现一个大尺寸(20,20)的矩阵。这是由于雅可比矩阵。 softmax_grad函数正确吗?如何解决此问题,以便反向传播正常工作?

0 个答案:

没有答案