我正在尝试构建一个函数,该函数将提取任何股票的数据,然后绘制回归。但是,我遇到了源数据问题。我的问题是-如何在熊猫数据框中获取时间序列并绘制随时间的线性趋势?我的代码如下:
此代码将产生回归:
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import numpy as np
rng = np.random.RandomState(1)
x = 10 * rng.rand(50)
y = 2 * x - 5 + rng.randn(50)
plt.scatter(x, y);
plt.show()
from sklearn.linear_model import LinearRegression
model = LinearRegression(fit_intercept=True)
model.fit(x[:, np.newaxis], y)
xfit = np.linspace(0, 10, 1000)
yfit = model.predict(xfit[:, np.newaxis])
plt.scatter(x, y)
plt.plot(xfit, yfit);
plt.show()
这是我尝试通过数据框传递数据
from datetime import datetime
import pandas_datareader.data as web
start = datetime(2017, 8, 1)
end = datetime(2018, 7, 30)
data_SP = web.DataReader('JPM', 'iex', start, end)
y = dates # not sure how to get here?
plt.scatter(data['close'], y);
plt.show()
from sklearn.linear_model import LinearRegression
model = LinearRegression(fit_intercept=True)
model.fit(data['close'][:, np.newaxis], y)
xfit = np.linspace(0, 10, 1000)
yfit = model.predict(xfit[:, np.newaxis])
plt.scatter(data['close'], y)
plt.plot(xfit, yfit);
plt.show()
答案 0 :(得分:1)
回归不能采用日期时间对象,必须转换为数字类型:
/* eslint-disable func-names */
/* eslint-disable no-console */
const Alexa = require('ask-sdk');
const GetNewFactHandler = {
canHandle(handlerInput) {
const request = handlerInput.requestEnvelope.request;
return request.type === 'LaunchRequest'
|| (request.type === 'IntentRequest'
&& request.intent.name === 'GetNewFactIntent');
},
handle(handlerInput) {
const speechOutput = "Welcome to your personal heart health monitor. What would you like to know?";
return handlerInput.responseBuilder
.speak(speechOutput)
.withSimpleCard(speechOutput)
.getResponse();
},
};
const CurrentBPMHandler = {
canHandle(handlerInput) {
const request = handlerInput.requestEnvelope.request;
return request.type === 'IntentRequest'
&& request.intent.name === 'currentbpm';
},
handle(handlerInput) {
return handlerInput.responseBuilder
.speak('seventy five bpm')
.reprompt('seventy five bpm')
.getResponse();
},
};
const HelpHandler = {
canHandle(handlerInput) {
const request = handlerInput.requestEnvelope.request;
return request.type === 'IntentRequest'
&& request.intent.name === 'AMAZON.HelpIntent';
},
handle(handlerInput) {
return handlerInput.responseBuilder
.speak(HELP_MESSAGE)
.reprompt(HELP_REPROMPT)
.getResponse();
},
};
const ExitHandler = {
canHandle(handlerInput) {
const request = handlerInput.requestEnvelope.request;
return request.type === 'IntentRequest'
&& (request.intent.name === 'AMAZON.CancelIntent'
|| request.intent.name === 'AMAZON.StopIntent');
},
handle(handlerInput) {
return handlerInput.responseBuilder
.speak(STOP_MESSAGE)
.getResponse();
},
};
const SessionEndedRequestHandler = {
canHandle(handlerInput) {
const request = handlerInput.requestEnvelope.request;
return request.type === 'SessionEndedRequest';
},
handle(handlerInput) {
console.log(`Session ended with reason: ${handlerInput.requestEnvelope.request.reason}`);
return handlerInput.responseBuilder.getResponse();
},
};
const ErrorHandler = {
canHandle() {
return true;
},
handle(handlerInput, error) {
console.log(`Error handled: ${error.message}`);
return handlerInput.responseBuilder
.speak('Sorry, an error occurred.')
.reprompt('Sorry, an error occurred.')
.getResponse();
},
};
const HELP_MESSAGE = 'You can say tell me a space fact, or, you can say exit... What can I help you with?';
const HELP_REPROMPT = 'What can I help you with?';
const STOP_MESSAGE = 'Goodbye!';
const skillBuilder = Alexa.SkillBuilders.standard();
exports.handler = skillBuilder
.addRequestHandlers(
GetNewFactHandler,
CurrentBPMHandler,
HelpHandler,
ExitHandler,
SessionEndedRequestHandler
)
.addErrorHandlers(ErrorHandler)
.lambda();
如果使用百分比更改,则需要考虑烦人的NaN。
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import numpy as np
from datetime import datetime
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
from sklearn.linear_model import LinearRegression
import pandas_datareader.data as web
start = datetime(2017, 8, 1)
end = datetime(2018, 7, 30)
data_SP = web.DataReader('JPM', 'iex', start, end)
dates = list(map(lambda x: datetime.strptime(x,"%Y-%m-%d"),list(data_SP.index)))
days_since = list(map(lambda x: (x-start).days,dates))
model = LinearRegression(fit_intercept=True)
model.fit(np.array(days_since)[:, np.newaxis],data_SP['close'])
yfit = model.predict(np.array(days_since)[:, np.newaxis])
plt.figure()
plt.scatter(dates, yfit)
plt.scatter(dates, data_SP['close'])
plt.xlabel('date')
plt.ylabel('close')
plt.show()
答案 1 :(得分:1)
我认为您要问的是能够随着时间推移绘制库存数据。就像我在评论中建议的那样,您的x轴应该是日期,而y轴应该是收盘价。
从那里,我们将简单地绘制图形:
plt.scatter(data_SP.index,data_SP['close'])
我的代码导入中还有其他一些问题,因此,如果遇到这些问题,我将在此处发布完整的代码:
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader import data, wb
start = datetime(2017, 8, 1)
end = datetime(2018, 7, 30)
data_SP = data.DataReader('JPM', 'iex', start, end)
plt.scatter(data_SP.index,data_SP['close'])
您必须重新设置x轴的格式,才能看到dates,并且如果要创建其他changes。而且,如果要使用回归模型,则必须使用数字数据,而不是日期时间数据。 (我会为您发布link)
线性回归不适用于日期数据。因此,我们需要 将其转换为数值。以下代码会将 日期转换为数值:
import datetime as dt
data_df['Date'] = pd.to_datetime(data_df['Date'])
data_df['Date']=data_df['Date'].map(dt.datetime.toordinal)
这是链接中的第一个答案(对Chandan的全额荣誉)
答案 2 :(得分:0)
我已将代码调整为以下代码。它将产生视觉效果,显示回报超过基准。该代码有很多说明。例如-我们可以让它循环买卖标准普尔中的全部500只股票,并根据指数找出收益最高的股票,或者可以让它在1个月内循环所有500只股票,并根据历史记录找出最适合的股票保持时间。视觉效果是进行分析的良好方式。
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import pandas
from sklearn.linear_model import LinearRegression
import pandas_datareader.data as pdr
def close_price_trending(analysis):
model = LinearRegression(fit_intercept=True)
model.fit(np.array(days_since)[:, np.newaxis],data_sample_processed[analysis])
yfit = model.predict(np.array(days_since)[:, np.newaxis])
plt.scatter(dates, data_sample_processed[analysis])
plt.scatter(dates, yfit)
plt.xlabel('date')
plt.ylabel('close')
plt.show()
def return_excess_benchmark1(analysis, benchmark):
fig = plt.figure()
ax = fig.add_subplot(111)
fig.subplots_adjust(top=0.85)
ax.set_title(str(analysis) + ' O/U ' + str(benchmark))
plt.scatter(dates, (1 + data_sample_processed[analysis]).cumprod() - (1 + data_sample_processed[benchmark]).cumprod())
model = LinearRegression(fit_intercept=True)
model.fit(np.array(days_since)[:, np.newaxis],(1 + data_sample_processed[analysis]).cumprod() - (1 + data_sample_processed[benchmark]).cumprod())
yfit = model.predict(np.array(days_since)[:, np.newaxis])
plt.scatter(dates, yfit)
plt.xlabel('date')
plt.ylabel('close')
fig.show()
# get and process data
start = datetime(2015, 8, 1)
end = datetime(2018, 7, 30)
Symbol_List = ['GSLC', 'AGG', 'JPM','CAR', 'IVV', 'DSI', 'VTI']
data = pandas.concat([pdr.DataReader(s, 'iex', start, end).rename(columns={'close': s})
for s in Symbol_List], axis=1)
data_sample = data[Symbol_List]
data_sample_processed = data_sample.pct_change()
data_sample_processed = data_sample_processed.fillna(0)
dates = list(map(lambda x: datetime.strptime(x,"%Y-%m-%d"),list(data_sample_processed.index)))
days_since = list(map(lambda x: (x-start).days,dates))
# start analysis
analysis_symbol_1 = 'DSI'
analysis_symbol_2 = 'GSLC'
benchmark_1 = 'VTI'
return_excess_benchmark1(analysis_symbol_1, benchmark_1)
return_excess_benchmark1(analysis_symbol_2, benchmark_1)