下面是我试图计算softmax的一个小代码。它适用于单个阵列。但是如果有更大的数字,如1000等,它会爆炸
import numpy as np
def softmax(x):
print (x.shape)
softmax1 = np.exp(x)/np.sum(np.exp(x))
return softmax1
def test_softmax():
print "Running your code"
#print softmax(np.array([1,2]))
test1 = softmax(np.array([1,2]))
ans1 = np.array([0.26894142, 0.73105858])
assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06)
print ("Softmax values %s" % test1)
test2 = softmax(np.array([[1001,1002],[3,4]]))
print test2
ans2 = np.array([
[0.26894142, 0.73105858],
[0.26894142, 0.73105858]])
assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06)
if __name__ == "__main__":
test_softmax()
我收到错误 RuntimeWarning:在exp中遇到溢出 运行你的代码 softmax1 = np.exp(x)/ np.sum(np.exp(x))
答案 0 :(得分:4)
softmax的典型实现首先消除了最大值以解决此问题:
def softmax(x, axis=-1):
# save typing...
kw = dict(axis=axis, keepdims=True)
# make every value 0 or below, as exp(0) won't overflow
xrel = x - x.max(**kw)
# if you wanted better handling of small exponents, you could do something like this
# to try and make the values as large as possible without overflowing, The 0.9
# is a fudge factor to try and ignore rounding errors
#
# xrel += np.log(np.finfo(float).max / x.shape[axis]) * 0.9
exp_xrel = np.exp(xrel)
return exp_xrel / exp_xrel.sum(**kw)
代数地,这是完全相同的,但这确保了传递到exp
的最大值是1
。