我有以下代码,这是我的Python代码中的瓶颈:
def get_payoff(self, actual, predicted):
if abs(actual - 1.0) < 1e-5: # if actual == 1
if predicted < 0.5:
return self.fn_payoff * (0.5 - predicted)
elif predicted > 0.5:
return self.tp_payoff * (predicted - 0.5)
else:
return 0
else:
if predicted < 0.5:
return self.tn_payoff * (0.5 - predicted)
elif predicted > 0.5:
return self.fp_payoff * (predicted - 0.5)
else:
return 0
def get_total_payoff(self):
total_payoff = 0
for target_element, prediction_element in zip(np.nditer(self.target), np.nditer(predictions)):
total_payoff += self.get_payoff(target_element, prediction_element)
fn_payoff,tp_payoff,tn_payoff和fp_payoff都是浮点数。 self.target和self.predictions都是numpy ndarrays。
我假设有一些方法可以用get_total_payoff替换for循环中的某种numpy向量化,但我不知道如何处理if / then语句来正确地进行向量化。 / p>
答案 0 :(得分:2)
基于条件使用不同表达式的向量化函数的关键是使用np.choose
。另外,在您的情况下,predict-0.5
和0.5-predict
可以替换为abs(predict-0.5)
,加上predict==0.5
的情况的特殊处理(我猜测特殊处理是针对的)正确处理NaN)。
import numpy as np
class A(object):
def __init__(self):
self.fn_payoff = 222.
self.tn_payoff = 444.
self.fp_payoff = 777.
self.tp_payoff = 888.
self.target = np.array([ 0.3, 1., 2. ])
self.predictions = np.array([ 0.4, 0.5, 1.7 ])
def get_payoff(self, actual, predicted):
if abs(actual - 1.0) < 1e-5: # if actual == 1
if predicted < 0.5:
return self.fn_payoff * (0.5 - predicted)
elif predicted > 0.5:
return self.tp_payoff * (predicted - 0.5)
else:
return 0
else:
if predicted < 0.5:
return self.tn_payoff * (0.5 - predicted)
elif predicted > 0.5:
return self.fp_payoff * (predicted - 0.5)
else:
return 0
def get_total_payoff(self):
total_payoff = 0
for target_element, prediction_element in zip(np.nditer(self.target), np.nditer(self.predictions)):
total_payoff += self.get_payoff(target_element, prediction_element)
return total_payoff
def get_total_payoff_VECTORIZED(self):
actual_mask = np.abs(self.target - 1) < 1e-5
predict_mask = self.predictions < 0.5
payoff_n = np.choose(actual_mask, [ self.tn_payoff, self.fn_payoff ])
payoff_p = np.choose(actual_mask, [ self.fp_payoff, self.tp_payoff ])
payoff = np.choose(predict_mask, [ payoff_p, payoff_n ]) * abs(self.predictions-0.5)
payoff[self.predictions==0.5] = 0
return payoff.sum()
a = A()
print a.get_total_payoff()
=> 976.8
print a.get_total_payoff_VECTORIZED()
=> 976.8
答案 1 :(得分:1)
def _get_payoff(self, actual, predicted):
pred_factor = numpy.abs(0.5 - predicted)
payoff_selector = 2*numpy.isclose(actual, 1) + (predicted < 0.5)
payoff = numpy.choose(payoff_selector,
[
self.fp_payoff,
self.tn_payoff,
self.tp_payoff,
self.fn_payoff,
])
return numpy.sum(payoff * pred_factor)
def get_total_payoff(self):
return self._get_payoff(self.target, predictions)
我们使用numpy.choose
生成一个支付选择数组,并将其与0.5
和预测值之间的绝对差值数组相乘,然后求和。 numpy.isclose
用于测试actual
值是否接近1.我们可以忽略predicted == 0.5
情况,因为乘以numpy.abs(0.5 - predicted)
无论如何都会得到正确的0结果。如果self.target
和predictions
保证为1D,则numpy.dot
的效果可能优于单独的乘法和求和。