这是我的数据
Last
Date
2019-02-19 277.850006
2019-02-20 278.410004
2019-02-21 277.420013
2019-02-22 279.140015
2019-02-25 279.940002
我正在使用此功能获取每日数据,效果很好。
def corr_window (data, cdw, dyf, corr_level):
'''
data = historical data
cdw = corralation days window
dyf = number of days forward
corr_level = desirable correlation level
'''
mylabels = ['Dax', str(dyf)+' days forward']
s=[]
data2= data[-cdw:]
data[-cdw:].plot(title='Dax last {} trading Days'.format(str(cdw)))
for i in range(len(data)-cdw*2):
if (pearsonr(data[i:i+cdw],data2)[0]) > corr_level:
s.append((data.iloc[i+cdw+dyf]['Last']/data.iloc[i+cdw]['Last'])-1)
fig, ax = plt.subplots(1, 1)
data[i:i+cdw+dyf].plot(title="Correlation:"+str(pearsonr(data[i:i+cdw],data2)[0]),ax=ax)
data[i+cdw:i+cdw+dyf].plot(color = 'red',label='Days forward', ax=ax)
ax.legend(labels=mylabels, loc=0)
plt.tight_layout();
return print(f'Average Return after {dyf} days is {round(np.mean(s)*100,2)}% \nfor {len(s)} occurrences\
----> {np.round(sum(1 for x in s if x>0)/len(s)*100,1)}% positive returns\n')
当尝试将数据移动到分辨率时,我使用:
data.index = pd.to_datetime(data.Date + ' ' + data.Time)
data['Date'] = pd.to_datetime(data.Date)
data['Time'] = pd.to_datetime(data['Time'], format=' %H:%M:%S.%f').dt.time
我的数据如下:
Date Time Last
2019-03-01 20:51:00 2019-03-01 20:51:00.0 11628.5
2019-03-01 20:54:00 2019-03-01 20:54:00.0 11627.5
2019-03-01 20:57:00 2019-03-01 20:57:00.0 11633.5
2019-03-01 21:00:00 2019-03-01 21:00:00.0 11633.0
2019-03-01 21:03:00 2019-03-01 21:03:00.0 11629.5
在我的小数据上运行上述功能时,出现此错误:
ValueError: view limit minimum -24654.425000000003 is less than 1 and is an invalid Matplotlib date value. This often happens if you pass a non-datetime value to an axis that has datetime units
答案 0 :(得分:1)
我看了一下,但它似乎对我来说很好用:
(唯一的调整是添加了 .values.ravel()
,因为 scipy.pearsonr
似乎不喜欢数据帧...)
你能提供一个(不)工作的例子吗?
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
data = pd.DataFrame([['2019-03-01 20:51:00', '11628.5'],
['2019-03-01 20:54:00', '11627.5'],
['2019-03-01 20:57:00', '11633.5'],
['2019-03-01 21:00:00', '11633.0'],
['2019-03-01 21:03:00', '11629.5']],
columns=['index', 'Last'])
data.index = pd.to_datetime(data.pop('index'))
data['Last'] = data['Last'].astype(float)
# make the dataframe a bit longer...
data = data.append(data)
# --------------------------
def corr_window (data, cdw, dyf, corr_level):
'''
data = historical data
cdw = corralation days window
dyf = number of days forward
corr_level = desirable correlation level
'''
mylabels = ['Dax', str(dyf)+' days forward']
s=[]
data2= data[-cdw:]
data[-cdw:].plot(title='Dax last {} trading Days'.format(str(cdw)))
for i in range(len(data)-cdw*2):
if (pearsonr(data[i:i+cdw].values.ravel(),data2.values.ravel())[0]) > corr_level:
s.append((data.iloc[i+cdw+dyf]['Last']/data.iloc[i+cdw]['Last'])-1)
fig, ax = plt.subplots(1, 1)
data[i:i+cdw+dyf].plot(title="Correlation:"+str(pearsonr(data[i:i+cdw].values.ravel(),data2.values.ravel())[0]),ax=ax)
data[i+cdw:i+cdw+dyf].plot(color = 'red',label='Days forward', ax=ax)
ax.legend(labels=mylabels, loc=0)
plt.tight_layout();
return print(f'Average Return after {dyf} days is {round(np.mean(s)*100,2)}% \nfor {len(s)} occurrences\
----> {np.round(sum(1 for x in s if x>0)/len(s)*100,1)}% positive returns\n')
corr_window(data, 2, 0, -1)