在谈到这一点之前,我为可能相当尴尬的英语句子道歉。因为英语不是我的第一语言。
现在我正在努力正确使用theano.tensor.scan功能。但是,我不知道'sequence parameter'是如何工作的。 我创建了3维数组(42,10,7002)并作为序列输入。我希望序列的单位是二维数组(10,7002),步数是42。 但似乎序列的单位是(1,7002)我如何处理应该是第10行和第7002列的序列单元? 谢谢你阅读这个问题。
# -*- coding: utf-8 -*- __author__ = "Haizhou Qu" import readFile import numpy as np import theano import theano.tensor as T from six.moves import zip # from theano.compile.debugmode import DebugMode theano.config.optimizer='fast_compile' theano.config.exception_verbosity='high' theano.config.compute_test_value = 'warn' epsilon = 1e-6 dtype = theano.config.floatX minibatch_size_g = 0 longest_seq_g = 0 voca_dim_global = 0 n_time_step_input_g = 0 n_timestep_target_g = 0 def printT(x): t = theano.printing.Print('T')(x) return t def shared(value, name=None): return theano.shared(value.astype(dtype), name=name) def shared_zeros(shape, name=None): return shared(value=np.zeros(shape), name=name) def shared_zeros_like(x, name=None): return shared_zeros(shape=x.shape, name=name) def init_weights(shape, name=None): bound = np.sqrt(1.0/shape[1]) w = np.random.uniform(-bound, bound, shape) return shared(value=w, name=name) def adadelta(params, cost, lr=1.0, rho=0.95): # from https://github.com/fchollet/keras/blob/master/keras/optimizers.py cost = cost.astype('float32') grads = T.grad(cost, params) accus = [shared_zeros_like(p.get_value()) for p in params] delta_accus = [shared_zeros_like(p.get_value()) for p in params] updates = [] for p, g, a, d_a in zip(params, grads, accus, delta_accus): new_a = rho * a + (1.0 - rho) * T.square(g) updates.append((a, new_a)) update = g * T.sqrt(d_a + epsilon) / T.sqrt(new_a + epsilon) new_p = p - lr * update updates.append((p, new_p)) new_d_a = rho * d_a + (1.0 - rho) * T.square(update) updates.append((d_a, new_d_a)) return updates def categorical_crossentropy(y_true, y_pred): # from https://github.com/fchollet/keras/blob/master/keras/objectives.py y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon) y_pred = y_true.astype('int64') # only matrix can be calculated cce, updates = theano.scan( fn=T.nnet.categorical_crossentropy, sequences=[y_pred,y_true] ) cce.astype('float32') return T.mean(cce) def mean_square_error(y_true, y_pred): return T.mean(T.square(y_pred - y_true)) class LSTM(object): def __init__(self, size, dim): self.size = size self.dim = dim shape_b = (minibatch_size_g, size) shape_U = (dim, size) shape_W = (size, size) self.h_tm1 = shared_zeros(shape_b, "h_tm1") self.c_tm1 = shared_zeros(shape_b, "c_tm1") self.Ui = init_weights(shape_U, "Ui") self.Wi = init_weights(shape_W, "Wi") self.bi = shared_zeros(shape_b, "bi") self.Uf = init_weights(shape_U, "Uf") self.Wf = init_weights(shape_W, "Wf") self.bf = shared_zeros(shape_b, "bf") self.Uo = init_weights(shape_U, "Uo") self.Wo = init_weights(shape_W, "Wo") self.bo = shared_zeros(shape_b, "bo") self.Ug = init_weights(shape_U, "Ug") self.Wg = init_weights(shape_W, "Wg") self.bg = shared_zeros(shape_b, "bg") self.params = [ self.Ui, self.Wi, self.bi, self.Uf, self.Wf, self.bf, self.Uo, self.Wo, self.bo, self.Ug, self.Wg, self.bg ] def set_state(self, h, c): self.h_tm1.set_value(h.get_value()) self.c_tm1.set_value(c.get_value()) def reset_state(self): self.h_tm1 = shared_zeros((1, self.size), "h_tm1") self.c_tm1 = shared_zeros((1, self.size), "c_tm1") @staticmethod def step( x_t, h_tm1, c_tm1, Ui, Wi, bi, Uf, Wf, bf, Uo, Wo, bo, Ug, Wg, bg ): """ x_t.shape = (timestep=1, dim) x_t.shape = (n_samples, timestep=1, dim) """ # x_t.eval().shape x_t = x_t.reshape( (minibatch_size_g , -1) ) #x_t = x_t.reshape( (voca_dim_global , -1) ) h_tm1 = h_tm1.reshape( (-1, n_time_step_input_g) ) c_tm1 = c_tm1.reshape( (-1, n_time_step_input_g) ) i_t = T.nnet.sigmoid(T.dot(x_t, Ui) + T.dot(h_tm1, Wi) + bi) a=T.dot(x_t, Uf) b=T.dot(h_tm1, Wf) c=a+b f_t=c+bf #f_t = T.nnet.sigmoid(T.dot(x_t, Uf) + T.dot(h_tm1, Wf) + bf) o_t = T.nnet.sigmoid(T.dot(x_t, Uo) + T.dot(h_tm1, Wo) + bo) g_t = T.tanh(T.dot(x_t, Ug) + T.dot(h_tm1, Wg) + bg) c_t = c_tm1 * f_t + g_t * i_t h_t = T.tanh(c_t) * o_t #c_t = c_t.reshape( (1, -1) ) #h_t = h_t.reshape( (1, -1) ) return h_t, c_t def forward(self, X): """ X.shape = (timesteps, dim) X.shape = (n_samples, timesteps, dim) """ X = X.reshape( (-1, voca_dim_global * minibatch_size_g) ) states, updates = theano.scan( fn=self.step, sequences=[ X ], outputs_info=[self.h_tm1, self.c_tm1], non_sequences=[ self.Ui, self.Wi, self.bi, self.Uf, self.Wf, self.bf, self.Uo, self.Wo, self.bo, self.Ug, self.Wg, self.bg ] ) updates = [(self.h_tm1, states[0][-1]), (self.c_tm1, states[1][-1])] print("### forward completed ###") return states, updates class LSTMEncoder(LSTM): def encode(self, X): states, updates = self.forward(X) h_t = states[0][-1] c_t = states[1][-1] return h_t, c_t, updates class LSTMDecoder(LSTM): def __init__(self, size, dim, h_tm1=None, c_tm1=None): super(LSTMDecoder, self).__init__(size=size, dim=dim) self.Wh = init_weights((size, dim), "Wh") self.bh = shared_zeros((1, dim), "bh") self.h_tm1 = h_tm1 or shared_zeros((1, size), "h_tm1") self.c_tm1 = c_tm1 or shared_zeros((1, size), "c_tm1") self.y_t = shared_zeros((1, dim), "y_t") # self.decode_length = theano.shared(decode_length) self.params.append(self.Wh) self.params.append(self.bh) def decode_step( self, y_t, h_tm1, c_tm1, Ui, Wi, bi, Uf, Wf, bf, Uo, Wo, bo, Ug, Wg, bg, Wh, bh ): h_t, c_t = self.step( y_t, h_tm1, c_tm1, Ui, Wi, bi, Uf, Wf, bf, Uo, Wo, bo, Ug, Wg, bg ) y_t = T.dot(h_t, Wh) + bh return y_t, h_t, c_t def decode(self, h_tm1, c_tm1, timesteps): outputs, updates = theano.scan( fn=self.decode_step, outputs_info=[self.y_t, h_tm1, c_tm1], non_sequences=[ self.Ui, self.Wi, self.bi, self.Uf, self.Wf, self.bf, self.Uo, self.Wo, self.bo, self.Ug, self.Wg, self.bg, self.Wh, self.bh ], n_steps=timesteps ) updates = [ (self.h_tm1, outputs[1][-1]), (self.c_tm1, outputs[2][-1]) ] return T.flatten(outputs[0], 2), updates class Seq2Seq(object): def __init__(self, size, dim): self.encoder = LSTMEncoder(size, dim) self.decoder = LSTMDecoder(size, dim) self.params = [] self.params += self.encoder.params self.params += self.decoder.params self._predict = None self._train = None self._test = None def compile(self, loss_func, optimizer): seq_input = T.tensor3() seq_target = T.tensor3() decode_timesteps = T.iscalar() h_tm1, c_tm1, updates_encode = self.encoder.encode(seq_input) seq_predict_flex, updates_decode_flex = self.decoder.decode(h_tm1, c_tm1, decode_timesteps) seq_predict, updates_decode = self.decoder.decode(h_tm1, c_tm1, T.shape(seq_target)[0]) loss = loss_func(seq_predict, seq_target) self._predict = theano.function([seq_input, decode_timesteps], seq_predict_flex,
更新= updates_encode + updates_decode_flex) self._test = theano.function([seq_input,seq_target],loss,updates = updates_encode + updates_decode) 更新= [] 更新+ = updates_encode 更新+ = updates_decode 更新+ =优化器(self.params,loss) self._train = theano.function([seq_input,seq_target],loss,updates = updates)
def predict(self, seq_input, decode_timesteps): self.encoder.reset_state() self.decoder.reset_state() return self._predict(seq_input, decode_timesteps) def train(self, seq_input, seq_target): self.encoder.reset_state() self.decoder.reset_state() return self._train(seq_input, seq_target) def test(self, seq_input, seq_target): self.encoder.reset_state() self.decoder.reset_state() return self._test(seq_input, seq_target) def train(x, target): for mini_batch, target in zip(x,target): print("mini_batch shape :",mini_batch.shape) mini_batch = mini_batch.astype(dtype) target = target.astype(dtype) print(seq2seq.train(mini_batch, target)) def predict(x, target): for mini_batch, target in zip(x,target): so = seq2seq.predict(x, n_time_step_output_g) print(so) loss = seq2seq.test(x, so) print(loss) if __name__ == "__main__": si, st, maxlen_input, minibatch_size, voca_dim = readFile.preprocessing() voca_dim_global = voca_dim + 2 minibatch_size_g = si[0].shape[1] print("minibatch_size_g : " , 10) print("minibatch_size_g : " , si[0].shape[1]) n_time_step_input_g = si[0].shape[0] n_time_step_output_g = st[0].shape[0] seq2seq = Seq2Seq(n_time_step_input_g, voca_dim_global ) seq2seq.compile(loss_func=categorical_crossentropy, optimizer=adadelta) print("select a menu") print("1. Training") print("2. Predict and test translated sentence.") val = input("selection : ") if val == 1: train(si, st) elif val == 2: predict(si, st)