Python和深度学习的新手。我试图用一些数据建立一个RNN而且我不知道我哪里出错了。
这是我的代码:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
raw = pd.read_excel('Online Retail.xlsx',index_col='InvoiceDate')
sales = raw.drop(['InvoiceNo','StockCode','Country','Description'],axis=1)
sales.head()
sales.index = pd.to_datetime(sales.index)
sales.info()
train_set = sales.head(50000)
test_set = sales.tail(41909)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
training = np.nan_to_num(train_set)
testing = np.nan_to_num(test_set)
train_scaled = scaler.fit_transform(training)
test_scaled = scaler.fit_transform(testing)
def next_batch(training_data,batch_size,steps):
rand_start = np.random.randint(0,len(training_data)-steps)
y_batch =
np.array(training_data[rand_start:rand_start+steps+1].reshape(26,steps+1))
return
y_batch[:,:-1].reshape(-1,steps,1),y_batch[:,1:].reshape(-1,steps,1)
import tensorflow as tf
num_inputs = 1
num_time_steps = 10
num_neurons = 100
num_outputs = 1
learning_rate = 0.03
num_train_iterations = 4000
batch_size = 1
X = tf.placeholder(tf.float32,[None,num_time_steps,num_inputs])
y = tf.placeholder(tf.float32,[None,num_time_steps,num_outputs])
cell = tf.contrib.rnn.OutputProjectionWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units=num_neurons,activation=tf.nn.relu),output_size=num_outputs)
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
loss = tf.reduce_mean(tf.square(outputs - y)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto()) as sess:
sess.run(init)
for iteration in range(num_train_iterations):
X_batch, y_batch = next_batch(train_scaled,batch_size,num_time_steps)
sess.run(train, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
# Save Model for Later
saver.save(sess, "./ex_time_series_model")
输出:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-36-f2f7c66a33df> in <module>()
4 for iteration in range(num_train_iterations):
5
----> 6 X_batch, y_batch = next_batch(train_scaled,batch_size,num_time_steps)
7 sess.run(train, feed_dict={X: X_batch, y: y_batch})
8
<ipython-input-26-f673a469c67d> in next_batch(training_data, batch_size, steps)
1 def next_batch(training_data,batch_size,steps):
2 rand_start = np.random.randint(0,len(training_data)-steps)
----> 3 y_batch = np.array(training_data[rand_start:rand_start+steps+1].reshape(26,steps+1))
4 return y_batch[:,:-1].reshape(-1,steps,1),y_batch[:,1:].reshape(-1,steps,1)
ValueError: cannot reshape array of size 33 into shape (26,11)
In [ ]:
答案 0 :(得分:1)
我不确定26
号码的来源,但它与您的数据维度不匹配。删除四列后,training_data
数组为(50000, 3)
,您可以在其中进行(11, 3)
次批处理。这个数组显然不能重塑为(26, 11)
。
你可能意味着这个(在next_batch
函数中):
y_batch = np.array(training_data[rand_start:rand_start+steps+1].reshape(3,steps+1))
答案 1 :(得分:0)
错误表示您尝试将尺寸为33
的张量重塑为尺寸为26x11
的张量,这是您无法做到的。您应该将大小为286
的张量重塑为26x11
。
尝试使用next_batch
在每个步骤中打印y_batch
形状来调试print (y_batch.get_shape())
函数,并检查它是否具有286
形状。
我没有抓住这一点,为什么你要随机取出每一批?为什么你没有正常读取输入数据?
如果在发布代码时修复缩进量会很好,但很难跟踪。