我按如下方式设置了numpy.seterr:
np.seterr(invalid='raise', over ='raise', under='raise')
我收到以下错误:
c = beta[j,i] + oneminusbeta[j,i]
FloatingPointError: overflow encountered in double_scalars
我已经检查了beta[j,i]
和oneminusbeta[j,i]
在崩溃时的情况,这些是他们的值:
beta: -131.340389182
oneminusbeta: 0.0
请注意,此行添加(beta[j,i] + oneminusbeta[j,i]
)已在循环中运行了数千行(执行图像分类),此时此处崩溃。
我怎么处理这个?是否有必要更改numpy数组的类型?
这就是我初始化它们的方式:
beta = np.empty([m,n])
oneminusbeta = np.empty([m,n])
是否可以在添加之前投射各个值?而不是改变整个数组声明?或者这甚至是一个严重的问题?简单地关闭numpy.seterr配置并让计算继续进行而不会引发错误是否安全?
有人在下面建议,我也怀疑,添加的值不应该导致溢出。那我怎样才能找出真正发生溢出的地方?
这是我的代码:
epthreshold = 709
enthreshold = -708
f.write("weights["+str(i)+", " + str(j)+"] = math.exp(beta: " +str(beta[j,i])+ " + oneminusbeta: " + str(oneminusbeta[j,i])+")\n" )
c = beta[j,i] + oneminusbeta[j,i]
weights[i,j] = math.exp(np.clip(c, enthreshold, epthreshold))
当我检查我的日志文件时,这就是我得到的行:
weights[5550, 13] = math.exp(beta: -131.340389182 + oneminusbeta: 0.0)
以下是我的其余代码,其中变量n
,m
和H
已初始化为整数值:
import numba
import numpy as np
import statsmodels.api as sm
weights = np.empty([n,m])
for curr_n in range(n):
for curr_m in range(m):
weights[curr_n,curr_m] = 1.0/(n)
beta = np.empty([m,n])
oneminusbeta = np.empty([m,n])
for curr_class in range(m):
for curr_sample in range(n):
beta[curr_class,curr_sample] = 1./m
epthreshold = 709 # positive exponential threshold
enthreshold = -708
for h in range(H):
print "Boosting round %d ... " % h
z = np.empty([n,m])
for j in range(m): # computing working responses and weights, Step 2(a)(i)
for i in range(no_samples):
i_class = y[i] #get the correct class for the current sample
if h == 0:
z[i,j] = (int(j==i_class) - beta[j,i])/((beta[j,i])*(1. - beta[j,i]))
weights[i,j] = beta[j,i]*(1. - beta[j,i])
else:
if j == i_class:
z[i,j] = math.exp(np.clip(-beta[j,i],enthreshold, epthreshold))
else:
z[i,j] = -math.exp(np.clip(oneminusbeta[j,i], enthreshold, epthreshold))
f.write("weights["+str(i)+", " + str(j)+"] = math.exp(beta: " +str(beta[j,i])+ " + oneminusbeta: " + str(oneminusbeta[j,i])+")\n" )
c = beta[j,i] + oneminusbeta[j,i]
weights[i,j] = math.exp(np.clip(c, enthreshold, epthreshold))
g_h = np.zeros([1,1])
j = 0
# Calculating regression coefficients per class
# building the parameters per j class
for y1_w in zip(z.T, weights.T):
y1, w = y1_w
temp_g = sm.WLS(y1, X, w).fit() # Step 2(a)(ii)
if np.allclose(g_h,0):
g_h = temp_g.params
else:
g_h = np.c_[g_h, temp_g.params]
j = j + 1
if np.allclose(g,0):
g = g_h
else:
g = g + g_h # Step(2)(a)(iii)
# now set g(x), function coefficients according to new formula, step (2)(b)
sum_g = g.sum(axis=1)
for j in range(m):
diff = (g[:,j] - ((1./m) * sum_g))
g[:,j] = ((m-1.)/m) * diff
g_per_round[h,:,j] = g[:,j]
#Now computing beta, Step 2(c)...."
Q = 0.
e = 0.
for j in range(m):
# Calculating beta and oneminusbeta for class j
aj = 0.0
for i in range(no_samples):
i_class = y[i]
X1 = X[i].reshape(1, no_features)
g1 = g[:,j].reshape(no_features, 1)
gc = g[:,i_class].reshape(no_features, 1)
dot = 1. + float(np.dot(X1, g1)) - float(np.dot(X1,gc))
aj = dot
sum_e = 0.
a_q = []
a_q.append(0.)
for j2 in range(m): # calculating sum of e's except for all j except where j=i_class
if j2 != i_class: # g based on j2, not necessarily g1?
g2 = g[:,j2].reshape(no_features, 1)
dot1 = 1. + float(np.dot(X1, g2)) - float(np.dot(X1,gc))
e2 = math.exp(np.clip(dot1,enthreshold, epthreshold))
sum_e = sum_e + e2
a_q.append(dot1)
if (int(j==i_class) == 1):
a_q_arr = np.array(a_q)
alpha = np.array(a_q_arr[1:])
Q = mylogsumexp(f,a_q_arr, 1, 0)
sumalpha = mylogsumexp(f,alpha, 1, 0)
beta[j,i] = -Q
oneminusbeta[j,i] = sumalpha - Q
else:
alpha = a_q
alpha = np.array(alpha[1:])
a_q_arr = np.array(a_q)
Q = mylogsumexp(f,a_q_arr, 0, aj)
sumalpha = log(math.exp(np.clip(Q, enthreshold, epthreshold)) - math.exp(np.clip(aj, enthreshold, epthreshold)))
beta[j,i] = aj - Q
oneminusbeta[j,i] = sumalpha - Q
,函数mylogsumexp
为:
def mylogsumexp(f, a, is_class, maxaj, axis=None, b=None):
np.seterr(over="raise", under="raise", invalid="raise")
threshold = -sys.float_info.max
maxthreshold = sys.float_info.max
epthreshold = 709 # positive exponential threshold
enthreshold = -708
a = asarray(a)
if axis is None:
a = a.ravel()
else:
a = rollaxis(a, axis)
if is_class == 1:
a_max = a.max(axis=0)
else:
a_max = maxaj
#bnone = " none "
if b is not None:
a_max = maxaj
b = asarray(b)
if axis is None:
b = b.ravel()
else:
b = rollaxis(b, axis)
a = np.clip(a - a_max, enthreshold, epthreshold)
midout = np.sum(np.exp(a), axis=0)
midout = 1.0 + np.clip(midout - math.exp(a_max), threshold, maxthreshold)
out = np.log(midout)
else:
a = np.clip(a - a_max, enthreshold, epthreshold)
out = np.log(np.sum(np.exp(a)))
out += a_max
if out == float("inf"):
out = maxthreshold
if out == float("-inf"):
out = threshold
return out