我有下面的代码,我试图在显示为Jul-Dec的图中为df“ ltyc”中的数据绘制一条黑线。我的错误是在plt之前的最后.legend行。
import warnings
import itertools
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import pandas as pd
import statsmodels.api as sm
import matplotlib
matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'k'
from sys import exit
df = pd.read_excel("MOSDailyWindSpeed.xlsx")
wspdBH1 = df.groupby('Date')[' Simulated WS BH1PI'].sum().reset_index()
wspdHOO = df.groupby('Date')[' Simulated WS HOO801'].sum().reset_index()
wspdBH1 = wspdBH1.set_index('Date')
wspdHOO = wspdHOO.set_index('Date')
wspdBH1.index
wspdHOO.index
y = wspdHOO[' Simulated WS HOO801'].resample('MS').mean()#monthly mean -->
change site here 'MS' is month start for 'Date' col
y['2017':]#look at daily data starting 2017 -view data
y.plot(figsize=(15, 6))
plt.show()
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8
decomposition = sm.tsa.seasonal_decompose(y, model='additive')
fig = decomposition.plot()
plt.show()
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p,
d, q))]
print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))
for param in pdq:
for param_seasonal in seasonal_pdq:
try:
mod = sm.tsa.statespace.SARIMAX(y,
order=param,
seasonal_order=param_seasonal,
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal,
results.aic))
except:
continue
mod = sm.tsa.statespace.SARIMAX(y,
order=(1, 1, 1),
seasonal_order=(1, 1, 0, 12),
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
results.plot_diagnostics(figsize=(16, 8))
plt.show()
# VALIDATE THE FORECAST - PLOT FORECAST VS ACTUAL
pred = results.get_prediction(start=pd.to_datetime('2019-01-01'),
dynamic=False)
pred_ci = pred.conf_int()
ax = y['2019':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7,
figsize=(14, 7))
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()
plt.show()
y_forecasted = pred.predicted_mean
y_truth = y['2019-01-01':]
# Compute the mean square error
mse = ((y_forecasted - y_truth) ** 2).mean()
print('The Mean Squared Error of our forecasts is {}'.format(round(mse,
2)))
print('The Root Mean Squared Error of our forecasts is
{}'.format(round(np.sqrt(mse), 2)))
#PRODUCE AND VISUALIZE FORECAST
pred_uc = results.get_forecast(steps=6)
pred_ci = pred_uc.conf_int()
ax = y['2019':].plot(label='observed', figsize=(14, 7))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('MOS Wind Speed')
#add the LT monthly average to plot
from datetime import date
cm = date.today().month
lty = y.groupby(y.index.month).mean()
lty = lty.to_frame()
ltyc = lty.iloc[cm-1:12].reset_index() # extract curr month to end of LT
mean monthly wind speed
ltyc.plot(label='LT Mean',ax=ax, color='k')
plt.legend()
plt.show()
ltyc df看起来像这样,我尝试使用axis = ax在下图中绘制。我想我需要在下面更改“日期”列,因为当前轴不能将7、8、9、10、11、12解释为月,但是我不确定该怎么做。
Date Simulated WS HOO801
0 7 5.491916
1 8 5.596823
2 9 5.793934
3 10 7.501096
4 11 8.152358
5 12 8.426322
最后,我的错误如下:
File
"C:\Users\U321103\AppData\Local\Continuum\anaconda3\envs\Stats\lib\site-
packages\pandas\plotting\_matplotlib\timeseries.py", line 309, in
format_dateaxis
raise TypeError("index type not supported")
TypeError: index type not supported
答案 0 :(得分:0)
我用它在DF中将整数日期转换为yyyy-mm-dd格式。 ltyc ['Date'] = pd.to_datetime(ltyc [“ Date”],format ='%m')。apply(lambda dt:dt.replace(year = 2020))
然后,我将DF(ltyc)转换为如下序列: ltycs = pd.Series(ltyc ['LT Mean']。values,index = ltyc ['Date'])#转换为Series,因为其他绘图均为系列格式
ltycs.plot(label ='LT Mean',ax = ax,color ='k')