绘制其他熊猫DF

时间:2020-07-14 20:51:35

标签: python pandas plot

我有下面的代码,我试图在显示为Jul-Dec的图中为df“ ltyc”中的数据绘制一条黑线。我的错误是在plt之前的最后.legend行。

import warnings
import itertools
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import pandas as pd
import statsmodels.api as sm
import matplotlib

matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'k'
from sys import exit

df = pd.read_excel("MOSDailyWindSpeed.xlsx")

wspdBH1 = df.groupby('Date')[' Simulated WS BH1PI'].sum().reset_index()
wspdHOO = df.groupby('Date')[' Simulated WS HOO801'].sum().reset_index()
wspdBH1 = wspdBH1.set_index('Date')
wspdHOO = wspdHOO.set_index('Date')
wspdBH1.index
wspdHOO.index
y = wspdHOO[' Simulated WS HOO801'].resample('MS').mean()#monthly mean --> 
change site here 'MS' is month start for 'Date' col
y['2017':]#look at daily data starting 2017 -view data 

y.plot(figsize=(15, 6))
plt.show()

from pylab import rcParams
rcParams['figure.figsize'] = 18, 8

decomposition = sm.tsa.seasonal_decompose(y, model='additive')
fig = decomposition.plot()
plt.show()

p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, 
d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

for param in pdq:
  for param_seasonal in seasonal_pdq:
    try:
        mod = sm.tsa.statespace.SARIMAX(y,
                                        order=param,
                                        seasonal_order=param_seasonal,
                                        enforce_stationarity=False,
                                        enforce_invertibility=False)

        results = mod.fit()

        print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, 
results.aic))
    except:
        continue

mod = sm.tsa.statespace.SARIMAX(y,
                            order=(1, 1, 1),
                            seasonal_order=(1, 1, 0, 12),
                            enforce_stationarity=False,
                            enforce_invertibility=False)

results = mod.fit()

print(results.summary().tables[1])        

results.plot_diagnostics(figsize=(16, 8))
plt.show()

# VALIDATE THE FORECAST - PLOT FORECAST VS ACTUAL 
pred = results.get_prediction(start=pd.to_datetime('2019-01-01'), 
dynamic=False)
pred_ci = pred.conf_int()
ax = y['2019':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, 
figsize=(14, 7))

ax.fill_between(pred_ci.index,
            pred_ci.iloc[:, 0],
            pred_ci.iloc[:, 1], color='k', alpha=.2)

ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()

plt.show()

y_forecasted = pred.predicted_mean
y_truth = y['2019-01-01':]

# Compute the mean square error
mse = ((y_forecasted - y_truth) ** 2).mean()
print('The Mean Squared Error of our forecasts is {}'.format(round(mse, 
2)))
print('The Root Mean Squared Error of our forecasts is 
{}'.format(round(np.sqrt(mse), 2)))

#PRODUCE AND VISUALIZE FORECAST
pred_uc = results.get_forecast(steps=6)
pred_ci = pred_uc.conf_int()

ax = y['2019':].plot(label='observed', figsize=(14, 7))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
            pred_ci.iloc[:, 0],
            pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('MOS Wind Speed')
#add the LT monthly average to plot
from datetime import date
cm = date.today().month
lty = y.groupby(y.index.month).mean()
lty = lty.to_frame()
ltyc = lty.iloc[cm-1:12].reset_index() # extract curr month to end of LT 
mean monthly wind speed
ltyc.plot(label='LT Mean',ax=ax, color='k')

plt.legend()
plt.show()

ltyc df看起来像这样,我尝试使用axis = ax在下图中绘制。我想我需要在下面更改“日期”列,因为当前轴不能将7、8、9、10、11、12解释为月,但是我不确定该怎么做。

   Date   Simulated WS HOO801
0     7              5.491916
1     8              5.596823
2     9              5.793934
3    10              7.501096
4    11              8.152358
5    12              8.426322 

最后,我的错误如下:

File 
"C:\Users\U321103\AppData\Local\Continuum\anaconda3\envs\Stats\lib\site- 
packages\pandas\plotting\_matplotlib\timeseries.py", line 309, in 
format_dateaxis
raise TypeError("index type not supported")

TypeError: index type not supported

enter image description here

1 个答案:

答案 0 :(得分:0)

我用它在DF中将整数日期转换为yyyy-mm-dd格式。 ltyc ['Date'] = pd.to_datetime(ltyc [“ Date”],format ='%m')。apply(lambda dt:dt.replace(year = 2020))

然后,我将DF(ltyc)转换为如下序列: ltycs = pd.Series(ltyc ['LT Mean']。values,index = ltyc ['Date'])#转换为Series,因为其他绘图均为系列格式

ltycs.plot(label ='LT Mean',ax = ax,color ='k')