日期格式的LSTM错误

时间:2017-12-03 11:34:36

标签: python deep-learning lstm forex

这是我深度学习的第一次尝试,此代码的目的是预测FOREX市场方向。

以下是代码:

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

column_names = ['Date', 'Time', 'Open', 'High', 'Low','Close', 'Volume']

data = pd.read_csv(r"E:\Tutorial\EURUSD60.csv", header=None, names=column_names)

data['DateTime'] = pd.to_datetime(data.Date + ' ' + data.Time)
del data['Date']
del data['Time']

sequence_length = 21
n_features = len(data.columns)
val_ratio = 0.1
n_epochs = 300
batch_size = 512

data = data.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
    data_processed.append(data[index: index + sequence_length])
data_processed = np.array(data_processed)

val_split = round((1 - val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split):, :]

print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))

train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape

train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))

preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)

train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))

X_train = train[:, : -1]
y_train = train[:, -1][:, -1]
X_val = val[:, : -1]
y_val = val[:, -1][:, -1]

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))

model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units=128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("linear"))

model.compile(loss="mse", optimizer="adam")

history = model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=n_epochs,
    verbose=2)

preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
    pred = preds_val[i][0]
    diff.append(y_val[i] - pred)

real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[104])
print(preprocessor.data_max_[104])

preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min

plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()

这是错误:

  

使用TensorFlow后端。 2017-12-03 13:26:44.494199:W
  C:\ tf_jenkins \ home \ workspace \ rel-win \ M \ windows \ PY \ 36 \ tensorflow \ core \ platform \ cpu_feature_guard.cc:45] TensorFlow库未编译为使用AVX指令,但是
  这些都可以在您的机器上使用,并可以加速CPU   计算。 2017-12-03 13:26:44.494660:W
  C:\ tf_jenkins \ home \ workspace \ rel-win \ M \ windows \ PY \ 36 \ tensorflow \ core \ platform \ cpu_feature_guard.cc:45] TensorFlow库未编译为使用AVX2指令,但是
  这些都可以在您的机器上使用,并可以加速CPU   计算。培训数据:(1824,21,6)验证数据:(203,21,
  6)回溯(最近的呼叫最后):文件" E:/ Tutorial / Deep
  Learning.py",第42行,in       预处理器= MinMaxScaler()。fit(train)文件" C:\ Users \ sydgo \ Anaconda3 \ lib \ site-packages \ sklearn \ preprocessing \ data.py",
  线308,在适合中       return self.partial_fit(X,y)文件" C:\ Users \ sydgo \ Anaconda3 \ lib \ site-packages \ sklearn \ preprocessing \ data.py",
  第334行,在partial_fit中       estimator = self,dtype = FLOAT_DTYPES)文件" C:\ Users \ sydgo \ Anaconda3 \ lib \ site-packages \ sklearn \ utils \ validation.py",
  第433行,在check_array中       array = np.array(array,dtype = dtype,order = order,copy = copy)TypeError:float()参数必须是字符串或数字,而不是
  '时间戳'

2 个答案:

答案 0 :(得分:0)

预期的 dtype 之间存在冲突 和
实际上已经提供了数据类型:

  

TypeError: float() argument must be a string or a number, not 'Timestamp'

最可能修改的嫌疑人是转换:

data['DateTime'] = pd.to_datetime(data.Date + ' ' + data.Time)

你必须回到这个概念中,应该将什么(如果有的话)作为预期的FOREX定量建模特征的集合输入到LSTM模型中。

答案 1 :(得分:0)

这是修复错误后的代码

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

column_names = ['Date', 'Time', 'Open', 'High', 'Low','Close', 'Volume']

df = pd.read_csv(r"E:\Tutorial\EURUSD60.csv", header=None, names=column_names)

df['DateTime'] = pd.to_datetime(df.Date + ' ' + df.Time)
del df['Date']
del df['Time']

df.rename(columns={'DateTime': 'timestamp', 'Open': 'open',
                   'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'}, inplace=True)
df['timestamp'] = pd.to_datetime(df['timestamp'], infer_datetime_format=True)
df.set_index('timestamp', inplace=True)
df = df.astype(float)
df['hour'] = df.index.hour
df['day'] = df.index.weekday
df['week'] = df.index.week


sequence_length = 21
n_features = len(df.columns)
val_ratio = 0.1
n_epochs = 300
batch_size = 512

data = df.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
    data_processed.append(data[index: index + sequence_length])
data_processed = np.array(data_processed)

val_split = round((1 - val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split):, :]

print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))

train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape

train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))

preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)

train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))

X_train = train[:, : -1]
y_train = train[:, -1][:, -1]
X_val = val[:, : -1]
y_val = val[:, -1][:, -1]

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))

model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units=128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("linear"))

model.compile(loss="mse", optimizer="adam")

history = model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=n_epochs,
    verbose=2)

preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
    pred = preds_val[i][0]
    diff.append(y_val[i] - pred)

real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[:120])
print(preprocessor.data_max_[:120])

preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min

plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()