python中股票价格预测的线性回归

时间:2018-04-02 22:18:38

标签: python linear-regression predict

我正在尝试重写下面的代码,以使其更简单:

import quandl, math
import numpy as np

import pandas as pd
from sklearn import preprocessing, cross_validation, svm
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from matplotlib import style
import datetime

style.use('ggplot')

df = quandl.get("WIKI/AAPL")
df = df[['Adj. Open',  'Adj. High',  'Adj. Low',  'Adj. Close', 'Adj. Volume']]
df['HL_PCT'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Close'] * 100.0
df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0

df = df[['Adj. Close', 'HL_PCT', 'PCT_change', 'Adj. Volume']]
forecast_col = 'Adj. Close'
df.fillna(value=-99999, inplace=True)
forecast_out = int(math.ceil(0.01 * len(df)))
df['label'] = df[forecast_col].shift(-forecast_out)

X = np.array(df.drop(['label'], 1))
X = preprocessing.scale(X)
X_lately = X[-forecast_out:]
X = X[:-forecast_out]

df.dropna(inplace=True)

y = np.array(df['label'])

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)
clf = LinearRegression(n_jobs=-1)
clf.fit(X_train, y_train)
confidence = clf.score(X_test, y_test)

forecast_set = clf.predict(X_lately)
df['Forecast'] = np.nan
last_date = df.iloc[-1].name
last_unix = last_date.timestamp()
one_day = 86400
next_unix = last_unix + one_day

for i in forecast_set:
    next_date = datetime.datetime.fromtimestamp(next_unix)
    next_unix += 86400
    df.loc[next_date] = [np.nan for _ in range(len(df.columns)-1)]+[i]

df['Adj. Close'].plot()
df['Forecast'].plot()
plt.legend(loc=4)
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

----------------------------------- 我的重写代码如下:

import pandas as pd
import numpy as np
import datetime
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from matplotlib import style
from sklearn import preprocessing
from sklearn import linear_model
import quandl, math
df = quandl.get('WIKI/AAPL',start_date="1996-9-26",end_date='2017-12-31')
df = df[['Adj. Open','Adj. High','Adj. Low','Adj. Close','Adj. Volume']]
df['HL_PCT']=(df['Adj. High']-df['Adj. Low'])/df['Adj. Close'] *100.0
df['PCT_change']= df['Adj. Close'].pct_change()
#fill nan with 1
_ = df.fillna(1.0, inplace=True)
df = df[['Adj. Close','HL_PCT','Adj. Volume']]

from sklearn.model_selection import train_test_split
predictor=df[['HL_PCT','Adj. Volume']]
#normalize the predictor
predictor=preprocessing.scale(predictor)
price=df['Adj. Close']
predictor=np.array(predictor)
price=np.array(price)
X_train, X_test, y_train, y_test =train_test_split(predictor , price, test_size=0.2,shuffle= False)
clf = linear_model.LinearRegression()
clf.fit(X_train, y_train)
confidence=clf.score(X_test, y_test)

forecast_set = clf.predict(X_test)
num_samples = df.shape[0]
#add Forecase column to dataframe
df['Forecast'] = np.nan
df['Forecast'][int(0.8*num_samples):num_samples]=forecast_set
style.use('ggplot')

df['Adj. Close'].plot()
df['Forecast'].plot()
plt.legend(loc=4)
plt.xlabel('Date')
plt.ylabel('Price')
plt.rcParams['figure.figsize'] = (50,50)
plt.show()

我们都使用线性回归,但我的置信度值= -11.65 因为我们都使用几乎相同的数据。上一个代码置信度值= 0.98

我不知道哪个部分是错的。任何身体可以帮助我吗?

0 个答案:

没有答案