我正在像这样计算熊猫数据帧中滚动平均值的标准偏差(布林带,此处的示例非常简化):
import pandas as pd
import numpy as np
no_of_std = 3
window = 20
df = pd.DataFrame({'A': [34, 34, 34, 33, 32, 34, 35.0, 21, 22, 25, 23, 21, 39, 26, 31, 34, 38, 26, 21, 39, 31]})
rolling_mean = df['A'].rolling(window).mean()
rolling_std = df['A'].rolling(window).std(ddof=0)
df['M'] = rolling_mean
df['BBL'] = rolling_mean - (rolling_std * no_of_std)
df['BBH'] = rolling_mean + (rolling_std * no_of_std)
print (df)
结果如下:
A M BBL BBH
0 34.0 NaN NaN NaN
1 34.0 NaN NaN NaN
2 34.0 NaN NaN NaN
3 33.0 NaN NaN NaN
4 32.0 NaN NaN NaN
5 34.0 NaN NaN NaN
6 35.0 NaN NaN NaN
7 21.0 NaN NaN NaN
8 22.0 NaN NaN NaN
9 25.0 NaN NaN NaN
10 23.0 NaN NaN NaN
11 21.0 NaN NaN NaN
12 39.0 NaN NaN NaN
13 26.0 NaN NaN NaN
14 31.0 NaN NaN NaN
15 34.0 NaN NaN NaN
16 38.0 NaN NaN NaN
17 26.0 NaN NaN NaN
18 21.0 NaN NaN NaN
19 39.0 30.10 11.633544 48.566456
20 31.0 29.95 11.665375 48.234625
现在,我想在另一个方向上计算列“ A”中的最后一个值,该值必须精确地达到滚动平均值的第三标准偏差。 换句话说,这就是我要计算的值:在下一行nr.15中需要哪个值与BBH或BBL中的值完全相同。 我可以通过递归逼近来做到这一点,但这需要很多性能,我认为必须有更好的方法。这是我认为要减慢速度的解决方案的示例,必须有一种更好的更快的方法:
import pandas as pd
odf = pd.DataFrame({'A': [34, 34, 34, 33, 32, 34, 35.0, 21, 22, 25, 23, 21, 39, 26, 31, 34, 38, 26, 21, 39, 31]})
def get_last_bbh_bbl(idf):
xdf = idf.copy()
no_of_std = 3
window = 20
rolling_mean = xdf['A'].rolling(window).mean()
rolling_std = xdf['A'].rolling(window).std()
xdf['M'] = rolling_mean
xdf['BBL'] = rolling_mean - (rolling_std * no_of_std)
xdf['BBH'] = rolling_mean + (rolling_std * no_of_std)
bbh = xdf.loc[len(xdf) - 1, 'BBH']
bbl = xdf.loc[len(xdf) - 1, 'BBL']
return bbh, bbl
def search_matching_value(idf, low, high, search_for):
xdf = idf.copy()
if abs(high-low) < 0.000001:
return high
middle = low + ((high-low)/2)
xdf = xdf.append({'A' : middle}, ignore_index=True)
bbh, bbl = get_last_bbh_bbl(xdf)
if search_for == 'bbh':
if bbh < middle:
result=search_matching_value(idf, low, middle, search_for)
elif bbh > middle:
result=search_matching_value(idf, middle, high, search_for)
else:
return middle
elif search_for == 'bbl':
if bbl > middle:
result=search_matching_value(idf, middle, high, search_for)
elif bbl < middle:
result=search_matching_value(idf, low, middle, search_for)
else:
return middle
return result
actual_bbh, actual_bbl = get_last_bbh_bbl(odf)
last_value = odf.loc[len(odf) - 1, 'A']
print('last_value: {}, actual bbh: {}, actual bbl: {}'.format(last_value, actual_bbh, actual_bbl))
low = last_value
high = actual_bbh * 10
next_value_that_hits_bbh = search_matching_value(odf, low, high, 'bbh')
print ('next_value_that_hits_bbh: {}'.format(next_value_that_hits_bbh))
low=0
high=last_value
next_value_that_hits_bbl = search_matching_value(odf, low, high, 'bbl')
print ('next_value_that_hits_bbl: {}'.format(next_value_that_hits_bbl))
结果如下:
last_value: 31.0, actual bbh: 48.709629106422284, actual bbl: 11.190370893577711
next_value_that_hits_bbh: 57.298733206475276
next_value_that_hits_bbl: 2.174952656030655
答案 0 :(得分:1)
这里是一种使用快速算法来计算下一个值的解决方案:牛顿opt和牛顿经典算法比二分法更快,并且该解决方案不使用数据框重新计算不同的值,我直接使用同名库中的统计函数
from scipy import misc
import pandas as pd
import statistics
from scipy.optimize import newton
#scipy.optimize if you want to test the newton optimized function
def get_last_bbh_bbl(idf):
xdf = idf.copy()
rolling_mean = xdf['A'].rolling(window).mean()
rolling_std = xdf['A'].rolling(window).std()
xdf['M'] = rolling_mean
xdf['BBL'] = rolling_mean - (rolling_std * no_of_std)
xdf['BBH'] = rolling_mean + (rolling_std * no_of_std)
bbh = xdf.loc[len(xdf) - 1, 'BBH']
bbl = xdf.loc[len(xdf) - 1, 'BBL']
lastvalue = xdf.loc[len(xdf) - 1, 'A']
return lastvalue, bbh, bbl
#classic newton
def NewtonsMethod(f, x, tolerance=0.00000001):
while True:
x1 = x - f(x) / misc.derivative(f, x)
t = abs(x1 - x)
if t < tolerance:
break
x = x1
return x
#to calculate the result of function bbl(x) - x (we want 0!)
def low(x):
l = lastlistofvalue[:-1]
l.append(x)
avg = statistics.mean(l)
std = statistics.stdev(l, avg)
return avg - std * no_of_std - x
#to calculate the result of function bbh(x) - x (we want 0!)
def high(x):
l = lastlistofvalue[:-1]
l.append(x)
avg = statistics.mean(l)
std = statistics.stdev(l, avg)
return avg + std * no_of_std - x
odf = pd.DataFrame({'A': [34, 34, 34, 33, 32, 34, 35.0, 21, 22, 25, 23, 21, 39, 26, 31, 34, 38, 26, 21, 39, 31]})
no_of_std = 3
window = 20
lastlistofvalue = odf['A'].shift(0).to_list()[::-1][:window]
"""" Newton classic method """
x = odf.loc[len(odf) - 1, 'A']
x0 = NewtonsMethod(high, x)
print(f'value to hit bbh: {x0}')
odf = pd.DataFrame({'A': [34, 34, 34, 33, 32, 34, 35.0, 21, 22, 25, 23, 21, 39, 26, 31, 34, 38, 26, 21, 39, 31, x0]})
lastvalue, new_bbh, new_bbl = get_last_bbh_bbl(odf)
print(f'value to hit bbh: {lastvalue} -> check new bbh: {new_bbh}')
x0 = NewtonsMethod(low, x)
print(f'value to hit bbl: {x0}')
odf = pd.DataFrame({'A': [34, 34, 34, 33, 32, 34, 35.0, 21, 22, 25, 23, 21, 39, 26, 31, 34, 38, 26, 21, 39, 31, x0]})
lastvalue, new_bbh, new_bbl = get_last_bbh_bbl(odf)
print(f'value to hit bbl: {lastvalue} -> check new bbl: {new_bbl}')
输出:
value to hit bbh: 57.298732375228624
value to hit bbh: 57.298732375228624 -> check new bbh: 57.29873237527272
value to hit bbl: 2.1749518354059636
value to hit bbl: 2.1749518354059636 -> check new bbl: 2.1749518353102992
您可以比较优化后的牛顿:
""" Newton optimized method """
x = odf.loc[len(odf) - 1, 'A']
x0 = newton(high, x, fprime=None, args=(), tol=1.00e-08, maxiter=50, fprime2=None)
print(f'Newton opt value to hit bbh: {x0}')
x0 = newton(low, x, fprime=None, args=(), tol=1.48e-08, maxiter=50, fprime2=None)
print(f'Newton value to hit bbl: {x0}')
输出:
Newton opt value to hit bbh: 57.29873237532118
Newton value to hit bbl: 2.1749518352051225
优化牛顿后,您可以进行最大迭代
优化后的速度比经典速度快
每个微积分的度量
0.002秒进行优化
经典版0.005秒
*备注:*
如果使用rolling(window).std(),则使用的是标准偏差,因此必须使用
std = statistics.stdev(l, avg)
除以N-1个项目
如果使用rolling(window).std(ddof=0
),则使用的是人口偏差,因此必须使用
std = statistics.pstdev(l, avg)
除以N个项目