我正在尝试使用两种不同的标准化方法minmax和zscore标准化我的时间序列,并比较结果。这是我的代码:
def scale_raw_data_zscore(raw_data):
scaled_zscore = pd.DataFrame()
idx = 514844
values = raw_data.loc[idx]['d_column'].values
values = values.reshape((len(values), 1))
scaler = StandardScaler()
scaler = scaler.fit(values)
normalized = scaler.transform(values)
normalized = normalized.reshape(normalized.shape[0])
normalized = pd.DataFrame(normalized, index=raw_data.loc[idx].index, columns=raw_data.columns)
scaled_zscore = scaled_zscore.append(normalized)
return scaled_zscore
def scale_raw_data_minmax(raw_data):
scaled_minmax = pd.DataFrame()
idx = 514844
values = raw_data.loc[idx]['d_column'].values
values = values.reshape((len(values), 1))
scaler = MinMaxScaler(feature_range=(0, 1))
scaler = scaler.fit(values)
normalized = scaler.transform(values)
normalized = normalized.reshape(normalized.shape[0])
normalized = pd.DataFrame(normalized, index=raw_data.loc[idx].index, columns=raw_data.columns)
scaled_minmax = scaled_minmax.append(normalized)
return scaled_minmax
def plot_data(raw_data, scaled_zscore, scaled_minmax):
fig = pyplot.figure()
idx = 514844
ax1 = fig.add_subplot(311)
ax2 = fig.add_subplot(312)
ax3 = fig.add_subplot(313)
raw_data.loc[idx].plot(kind='line', x='date', y='d_column', ax=ax1, title='ID: ' + str(idx), legend=False, figsize=(20, 5))
scaled_zscore.reset_index(drop=True).plot(kind='line', y='d_column', ax=ax2, title='zscore', color='green', legend=False, figsize=(20, 5))
scaled_minmax.reset_index(drop=True).plot(kind='line', y='d_column', ax=ax3, title='minmax', color='red', legend=False, figsize=(20, 5))
pyplot.show()
scaled_zscore = scale_raw_data_zscore(raw_data)
scaled_minmax = scale_raw_data_minmax(raw_data)
plot_data(raw_data, scaled_zscore, scaled_minmax)
我正在添加结果图。为什么两种缩放方法的结果都完全相同?为什么它们与原始数据有不同的模式?