Question

我想使用机器学习来预测资产的价格走势。到目前为止，我已经得到了数据和结果。现在我想回测模型。前提很简单：只要预测值是1并持有就买。我想应用预测模型，并从下至上遍历测试行，直到指定的数字，检查预测的输出是否匹配相应的标签（此处的标签为-1,1），然后进行一些计算。

代码如下：

def backtest():
    x = df[['open', 'high', 'low', 'close', 'vol']]
    y = df['label']
    z = np.array(df['log_ret'].values)

test_size = 366
rf = RandomForestClassifier(n_estimators = 100)
rf.fit(x[:-test_size],y[:-test_size])

invest_amount = 1000
trade_qty = 0
correct_count = 0

for i in range(1, test_size):
    if rf.predict(x[-i])[0] == y[-i]:
    correct_count += 1

if rf.predict(x[-i])[0] == 1:
    invest_return = invest_amount + (invest_amount * (z[-i]/100))
    trade_qty += 1


print('accuracy:', (correct_count/test_size)*100)
print('total trades:', trade_qty)
print('profits:', invest_return)

backtest()

到目前为止，我一直坚持下去：

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2645             try:
-> 2646                 return self._engine.get_loc(key)
   2647             except KeyError:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: -1

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-29-feab89792f26> in <module>
     22 
     23 for i in range(1, test_size):
---> 24     if rf.predict(x[-i])[0] == y[-i]:
     25         correct_count += 1
     26 

~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2798             if self.columns.nlevels > 1:
   2799                 return self._getitem_multilevel(key)
-> 2800             indexer = self.columns.get_loc(key)
   2801             if is_integer(indexer):
   2802                 indexer = [indexer]

~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2646                 return self._engine.get_loc(key)
   2647             except KeyError:
-> 2648                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2649         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
   2650         if indexer.ndim > 1 or indexer.size > 1:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: -1

Answer 1

下面的代码通过一些修改即可解决该问题：

import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression

df = pd.read_csv("premstats.csv")
print(df.describe())
print(df.columns)
y = df.Points
X = df.Value
X = X.values.reshape(-1, 1)
y = y.values.reshape(-1, 1)

# Can we do linear regression on this?

model = LinearRegression()
model.fit(X,y)
predictions = model.predict(X)
plt.scatter(X, y, alpha=0.4)
# Plot line here:
plt.plot(X,predictions, "-")
plt.title("Premier League")
plt.xlabel("Team Values from seaons 2013/14 to 2018/19")
plt.ylabel("Points collected")
plt.show()

while True:
    enquiry = float(input("Enter the value of a team, and I'll predict the number of points they'll collect!"))
    print(model.predict(enquiry))

解释修改：

通过过滤索引def backtest(): x = df[['open', 'high', 'low', 'close', 'vol']] y = df['label'] z = np.array(df['log_ret'].values) test_size = 366 rf = RandomForestClassifier(n_estimators = 100) rf.fit(x[:-test_size],y[:-test_size]) invest_amount = 1000 trade_qty = 0 correct_count = 0 for i in range(1, test_size)[::-1]: if rf.predict(x[x.index == i])[0] == y[i]: correct_count += 1 if rf.predict(x[x.index == i])[0] == 1: invest_return = invest_amount + (invest_amount * (z[i]/100)) trade_qty += 1 print('accuracy:', (correct_count/test_size)*100) print('total trades:', trade_qty) print('profits:', invest_return) backtest()访问数据框行；
用更少的适应性x[x.index == i]修改反向范围的负索引；

生成测试用例：

range(1, test_size)[::-1]

这将产生以下结果：

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

data = {'open': np.random.rand(1000), 
        'high': np.random.rand(1000), 
        'low': np.random.rand(1000), 
        'close': np.random.rand(1000), 
        'vol': np.random.rand(1000),
        'log_ret': np.random.rand(1000),
        'label': np.random.choice([-1,1], 1000)}

df = pd.DataFrame(data)

指定测试行以进行回测和机器学习

1 个答案: