我试图了解神经网络的数学原理。坚持计算softmax函数的导数。我找到了以下解释:https://medium.com/@aerinykim/how-to-implement-the-softmax-derivative-independently-from-any-loss-function-ae6d44363a9d。这是关于pole的例子的代码:
import numpy as np
import gym
env = gym.make('CartPole-v0')
env = env.unwrapped
env.seed(1)
max_episodes=1000
state_size = 4
action_size = env.action_space.n
def relu(x,deriv):
if deriv:
x[x<=0] = 0
x[x>0] = 1
return x
else:
return np.maximum(0,x)
def softmax(X, train):
if train:
max_prob = np.max(X, axis=1).reshape((-1, 1))
X -= max_prob
np.exp(X, X)
sum_prob = np.sum(X, axis=0).reshape((-1, 1))
X /= sum_prob[0]
else:
max_prob = np.max(X, axis=0).reshape((-1, 1))
X -= max_prob[0]
np.exp(X, X)
sum_prob = np.sum(X, axis=0).reshape((-1, 1))
X /= sum_prob[0]
return X
def softmax_grad(s):
# Reshape the 1-d softmax to 2-d so that np.dot will do the matrix multiplication
s = s.reshape(-1,1)
return np.diagflat(s) - np.dot(s, s.T)
np.random.seed(1)
syn0 = 2*np.random.random((4,8)) - 1
syn1 = 2*np.random.random((8,2)) - 1
def nn(x,y,train):
global syn1,syn0
l0 = x
l1 = relu(np.dot(l0,syn0),deriv=False)
l2 = softmax(np.dot(l1,syn1),train=False)
if train:
y=np.array(y)
l2_error = y - l2
l2_error = l2_error.reshape(20,1)#<--- shape change for multiplication
l2_delta = l2_error*softmax_grad(l2)#<--- ERROR
l1_error = l2_delta.dot(syn1.T)
l1_delta = l1_error * relu(l1,deriv=True)
syn1 += l1.T.dot(l2_delta)
syn0 += l0.T.dot(l1_delta)
else:
return l2
allRewards = []
total_rewards = 0
maximumRewardRecorded = 0
episode = 0
episode_states, episode_actions, episode_rewards = [],[],[]
for episode in range(max_episodes):
episode_rewards_sum = 0
state = env.reset()
env.render()
while True:
action_probability_distribution = nn(x=state,y=_,train=False)
action_probability_distribution=action_probability_distribution/sum(action_probability_distribution)
action = np.random.choice(range(len(action_probability_distribution)), p=action_probability_distribution.ravel())
new_state, reward, done, info = env.step(action)
episode_states.append(state)
action_ = np.zeros(action_size)
action_[action] = 1
episode_actions.append(action_)
episode_rewards.append(reward)
if done:
episode_rewards_sum = np.sum(episode_rewards)
allRewards.append(episode_rewards_sum)
total_rewards = np.sum(allRewards)
mean_reward = np.divide(total_rewards, episode+1)
maximumRewardRecorded = np.amax(allRewards)
print("==========================================")
print("Episode: ", episode)
print("Reward: ", episode_rewards_sum)
print("Mean Reward", mean_reward)
print("Max reward so far: ", maximumRewardRecorded)
episode_states = np.array(episode_states)
nn(x=episode_states,y=episode_actions,train=True)
episode_states, episode_actions, episode_rewards = [],[],[]
break
state = new_state
我在反向传播中遇到了一个错误。使用softmax_grad函数后,会出现一个大尺寸(20,20)的矩阵。这是由于雅可比矩阵。 softmax_grad函数正确吗?如何解决此问题,以便反向传播正常工作?