我将两组不同的数据框垂直附加到一个系列中。我想在使用ControlTemplate
拟合模型后绘制图表并绘制实际值和预测值。我正在考虑的两个数据集可以在this link
我的预测解决方案和尝试使用这些值的图表如下所示
RandomForestRegressor
当我只使用单个数据集(例如:import glob
import os
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from math import sqrt
from sklearn.cross_validation import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.concat(map(pd.read_csv, glob.glob(os.path.join('', "cubic*.csv"))),ignore_index=True)
#df = pd.read_csv('cubic31.csv')
#df.sort_values(['time'], inplace=True)
df['time'] = pd.Series(["{0:.10f}".format(val * 100) for val in df['time']], index = df.index)
for i in range(1,3):
df['X_t'+str(i)] = df['X'].shift(i)
print(df)
df.dropna(inplace=True)
X =np.array (pd.DataFrame({ 'X_%d'%i : df['X'].shift(i) for i in range(3)}).apply(np.nan_to_num, axis=0).values)
X = df.drop('Y', axis=1)
y = df['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40)
X_train = X_train.drop('time', axis=1)
X_test = X_test.drop('time', axis=1)
print(X.shape)
print(df['Y'].shape)
print()
print("Size of X_train:",(len(X_train)))
print("Size of Y_train:",(len(X_train)))
print("Size of X_test:",(len(X_test)))
print("Size of Y_test:",(len(y_test)))
print(X_train.shape)
print(y_train.shape)
print()
####### to add the trendline
fig, ax = plt.subplots()
#df.plot(x='time', y='Y', ax=ax)
ax.plot(df['time'].values, df['Y'].values)
fig, ax = plt.subplots()
plt.annotate('annote test!',
xy=(len(modelPred_test), modelPred_test[-1]),
xycoords='data',
xytext=(-30,30),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
index_values=range(0,len(y_test))
y_test.sort_index(inplace=True)
X_test.sort_index(inplace=True)
modelPred_test = reg.predict(X_test)
ax.plot(pd.Series(index_values), y_test.values)
plotsInOne=pd.DataFrame(pd.concat([pd.Series(modelPred_test), pd.Series(y_test.values)], axis=1))
plt.figure(); plotsInOne.plot(); plt.legend(loc='best')
)时
cubic31.csv
并应用绘图命令
df = pd.read_csv('cubic31.csv')
这是我得到的情节。
当我们将第二个数据集 fig, ax = plt.subplots()
ax.plot(df['time'].values, df['Y'].values)
fig, ax = plt.subplots()
)作为
cubic32.csv
并应用绘图命令
df = pd.read_csv('testdata2.csv')
这是我得到的情节
但是,如果我将两个数据集合并为
fig, ax = plt.subplots()
ax.plot(df['time'].values, df['Y'].values)
fig, ax = plt.subplots()
,这是我得到的情节
我想在每个情节结束时加上一个标记(如图中的红色箭头所示)。我已尝试使用以下内容,但它仅指向第二个绘图的结尾但不是第一个绘图,如下所示
df = pd.concat(map(pd.read_csv, glob.glob(os.path.join('', "cubic*.csv"))))
我们如何对绘图命令执行此操作,以便我们可以自动注释和标记(例如plt.annotate('annote test!',
xy=(len(modelPred_test), modelPred_test[-1]),
xycoords='data',
xytext=(-30,30),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
,this is where cubic31.csv ends
,...等)图,如下所示?
答案 0 :(得分:2)
考虑在annotate的xy坐标中使用df.ix[]
,您可以通过df.shape
检索索引(行数和列数的元组)。下面用随机数据进行演示(种子可重复)。第二个数据帧是第一个数据帧的一半。下面还显示了水平和垂直合并注释图:
数据
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# DATAFRAME 1
np.random.seed(33)
df1 = pd.DataFrame({'Y': [np.random.randint(0,1000) for i in range(50)],
'time': range(50)})
# DATAFRAME 2
np.random.seed(64)
df2 = pd.DataFrame({'Y': [np.random.randint(0,1000) for i in range(25)],
'time': range(25)})
个人注释
def runplot(df, title):
rows, cols = df.shape
fig, ax = plt.subplots()
ax.plot(df['time'].values, df['Y'].values)
plt.title(title)
plt.annotate('annote test!',
xy=(rows, df.ix[rows-1,'Y']),
xycoords='data',
xytext=(-30,30),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
runplot(df1, 'Dataframe 1')
runplot(df2, 'Dataframe 2')
水平合并/垂直附加注释
def runplot_merge(left_df, right_df, df, title):
fig, ax = plt.subplots()
ax.plot(df['time'].values, df['Y'].values)
rows, cols = left_df.shape
plt.title(title)
plt.annotate('annote test 1!',
xy=(rows, left_df.ix[rows-1,'Y']),
xycoords='data',
xytext=(-30,30),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
rows, cols = right_df.shape
plt.annotate('annote test 2!',
xy=(rows, right_df.ix[rows-1,'Y']),
xycoords='data',
xytext=(-30,30),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
df_wide = pd.concat([df1, df2], axis=1)
runplot_merge(df1, df2, df_wide, 'Horizontal Merge')
df_long = pd.concat([df1, df2]).reset_index(drop=True).sort_values('time')
runplot_merge(df1, df2, df_long, 'Vertical Append')
plt.show()
plt.clf()
plt.close()