我与Theano做了一个简单的RNN来测试一些东西。但是对于小测试,我想在隐藏层的输出中添加更多的点击。
然而,这给了我以下错误:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 585, in runfile
execfile(filename, namespace)
File "C:/Dev/Spyder/Theano_Experiments/RNN/simple_rnn.py", line 531, in <module>
test_real()
File "C:/Dev/Spyder/Theano_Experiments/RNN/simple_rnn.py", line 509, in test_real
model.fit(seq, targets, validation_frequency=1000)
File "C:/Dev/Spyder/Theano_Experiments/RNN/simple_rnn.py", line 418, in fit
gparam = T.grad(cost, param)
File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 542, in grad
grad_dict, wrt, cost_name)
File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 1272, in _populate_grad_dict
rval = [access_grad_cache(elem) for elem in wrt]
File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 1232, in access_grad_cache
term = access_term_cache(node)[idx]
File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 1082, in access_term_cache
input_grads = node.op.grad(inputs, new_output_grads)
File "C:\Anaconda\lib\site-packages\theano\scan_module\scan_op.py", line 1729, in grad
outer_inp_mitmot.append(dC_douts[idx + offset][::-1])
TypeError: 'Variable' object has no attribute '__getitem__'
您可以在下面找到我用来重现此错误的完整代码
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 07 13:28:51 2014
@author: vaneetke
"""
import numpy as np
import theano
import theano.tensor as T
dtype='float32'
# sequences: x_t
# prior results: h_tm1
# non-sequences: W_ih, W_hh, W_ho, b_h
def one_step(x_t, h_tm2, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):
h_t = T.tanh(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + theano.dot(h_tm2, W_hh) + b_h)
y_t = theano.dot(h_t, W_ho) + b_o
return [h_t, y_t]
#first dimension is time
x = T.matrix(dtype=dtype)
n_hid = 3
n_in = 1
n_out = 1
W_hh_values = np.array(np.random.uniform(size=(n_hid, n_hid), low=-.01, high=.01), dtype=dtype)
h0_value = np.array(np.random.uniform(size=(2,n_hid), low=-.01, high=.01), dtype=dtype)
b_h_value = np.array(np.random.uniform(size=(n_hid), low=-.01, high=.01), dtype=dtype)
W_ih_values = np.array(np.random.uniform(size=(n_in, n_hid), low=-.01, high=.01), dtype=dtype)
W_ho_values = np.array(np.random.uniform(size=(n_hid, n_out), low=-.01, high=.01), dtype=dtype)
b_o_value = np.array(np.random.uniform(size=(n_out), low=-.01, high=.01), dtype=dtype)
# parameters of the rnn
b_h = theano.shared(b_h_value)
h0 = theano.shared(h0_value)
W_ih = theano.shared(W_ih_values)
W_hh = theano.shared(W_hh_values)
W_ho = theano.shared(W_ho_values)
b_o = theano.shared(b_o_value)
params = [W_ih, W_hh, b_h, W_ho, b_o, h0]
# hidden and outputs of the entire sequence
[h_vals, y_vals], _ = theano.scan(fn=one_step,
sequences = dict(input = x),
outputs_info = [dict(initial=h0, taps=[ -2, -1]), None], # corresponds to the return type of one_step
non_sequences = [W_ih, W_hh, b_h, W_ho, b_o]
)
# target values
t = T.matrix(dtype=dtype)
# learning rate
lr = np.cast[dtype](0.1)
learning_rate = theano.shared(lr)
cost = (0.5*((y_vals - t)**2.0).mean()) + (0.5*(y_vals.std() - t.std())**2.0)
gparams = []
for param in params:
gparam = T.grad(cost, param)
gparams.append(gparam)
updates=[]
for param, gparam in zip(params, gparams):
updates.append((param, param - gparam * learning_rate))
learn_rnn_fn = theano.function(inputs = [x, t],
outputs = cost,
updates = updates
)
eval_rnn_fn = theano.function(inputs = [x],
outputs = y_vals
)
# artificial data
x_ = np.array(np.arange(0.,100.,0.21), dtype=dtype)
x_ = x_.reshape(len(x_), 1)
s_ = np.sin(x_)
t_ = np.roll(s_, -1)[:-1]
s_ = s_[:-1]
for i in xrange(100):
cost = learn_rnn_fn(s_,t_)
print i, " - cost: ", cost.mean()
pred = eval_rnn_fn(s_)
from matplotlib import pyplot as plt
plt.plot(t_)
plt.plot(pred, '--')
plt.show()
没有添加第二个水龙头就可以正常工作。 (所以只有taps = [ - 1]并且在one_step()函数中只有h_tm1。)
我在这里做错了什么,或者这可能是Theano的错误?
答案 0 :(得分:0)
我不确定这是否是您的问题,但以这种方式编写outputs_info
只需要一个矩阵类型的返回值。输出序列将是3d张量(sequence_length,2,input_width)。