如何'序列'参数在theano扫描功能中起作用

时间:2016-09-15 23:53:46

标签: theano theano.scan

在谈到这一点之前,我为可能相当尴尬的英语句子道歉。因为英语不是我的第一语言。

现在我正在努力正确使用theano.tensor.scan功能。但是,我不知道'sequence parameter'是如何工作的。 我创建了3维数组(42,10,7002)并作为序列输入。我希望序列的单位是二维数组(10,7002),步数是42。 但似乎序列的单位是(1,7002)我如何处理应该是第10行和第7002列的序列单元? 谢谢你阅读这个问题。

  • 添加
# -*- coding: utf-8 -*-

__author__ = "Haizhou Qu"

import readFile
import numpy as np
import theano
import theano.tensor as T
from six.moves import zip
# from theano.compile.debugmode import DebugMode

theano.config.optimizer='fast_compile'
theano.config.exception_verbosity='high'
theano.config.compute_test_value = 'warn'

epsilon = 1e-6
dtype = theano.config.floatX
minibatch_size_g = 0
longest_seq_g = 0
voca_dim_global = 0
n_time_step_input_g = 0
n_timestep_target_g = 0


def printT(x):
    t = theano.printing.Print('T')(x)
    return t

def shared(value, name=None):
    return theano.shared(value.astype(dtype), name=name)


def shared_zeros(shape, name=None):
    return shared(value=np.zeros(shape), name=name)


def shared_zeros_like(x, name=None):
    return shared_zeros(shape=x.shape, name=name)


def init_weights(shape, name=None):
    bound = np.sqrt(1.0/shape[1])
    w = np.random.uniform(-bound, bound, shape)
    return shared(value=w, name=name)


def adadelta(params, cost, lr=1.0, rho=0.95):
    # from https://github.com/fchollet/keras/blob/master/keras/optimizers.py
    cost = cost.astype('float32')
    grads = T.grad(cost, params)
    accus = [shared_zeros_like(p.get_value()) for p in params]
    delta_accus = [shared_zeros_like(p.get_value()) for p in params]
    updates = []
    for p, g, a, d_a in zip(params, grads, accus, delta_accus):
        new_a = rho * a + (1.0 - rho) * T.square(g)
        updates.append((a, new_a))
        update = g * T.sqrt(d_a + epsilon) / T.sqrt(new_a + epsilon)
        new_p = p - lr * update
        updates.append((p, new_p))
        new_d_a = rho * d_a + (1.0 - rho) * T.square(update)
        updates.append((d_a, new_d_a))
    return updates


def categorical_crossentropy(y_true, y_pred):
    # from https://github.com/fchollet/keras/blob/master/keras/objectives.py
    y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon)

    y_pred = y_true.astype('int64')
    # only matrix can be calculated
    cce, updates = theano.scan(
  fn=T.nnet.categorical_crossentropy,   
        sequences=[y_pred,y_true]
  )
    cce.astype('float32')
    return T.mean(cce)


def mean_square_error(y_true, y_pred):
    return T.mean(T.square(y_pred - y_true))


class LSTM(object):
    def __init__(self, size, dim):
        self.size = size
        self.dim = dim

        shape_b = (minibatch_size_g, size)
        shape_U = (dim, size)
        shape_W = (size, size)

        self.h_tm1 = shared_zeros(shape_b, "h_tm1")
        self.c_tm1 = shared_zeros(shape_b, "c_tm1")

        self.Ui = init_weights(shape_U, "Ui")
        self.Wi = init_weights(shape_W, "Wi")
        self.bi = shared_zeros(shape_b, "bi")

        self.Uf = init_weights(shape_U, "Uf")
        self.Wf = init_weights(shape_W, "Wf")
        self.bf = shared_zeros(shape_b, "bf")

        self.Uo = init_weights(shape_U, "Uo")
        self.Wo = init_weights(shape_W, "Wo")
        self.bo = shared_zeros(shape_b, "bo")

        self.Ug = init_weights(shape_U, "Ug")
        self.Wg = init_weights(shape_W, "Wg")
        self.bg = shared_zeros(shape_b, "bg")

        self.params = [
            self.Ui, self.Wi, self.bi,
            self.Uf, self.Wf, self.bf,
            self.Uo, self.Wo, self.bo,
            self.Ug, self.Wg, self.bg
        ]

    def set_state(self, h, c):
        self.h_tm1.set_value(h.get_value())
        self.c_tm1.set_value(c.get_value())

    def reset_state(self):
        self.h_tm1 = shared_zeros((1, self.size), "h_tm1")
        self.c_tm1 = shared_zeros((1, self.size), "c_tm1")




    @staticmethod
    def step(
        x_t, h_tm1, c_tm1,
        Ui, Wi, bi, Uf, Wf, bf,
        Uo, Wo, bo, Ug, Wg, bg
    ):

        """
        x_t.shape = (timestep=1, dim)
        x_t.shape = (n_samples, timestep=1, dim)
        """
        # x_t.eval().shape
        x_t = x_t.reshape( (minibatch_size_g , -1) )
        #x_t = x_t.reshape( (voca_dim_global , -1) )


        h_tm1 = h_tm1.reshape( (-1, n_time_step_input_g)    )
        c_tm1 = c_tm1.reshape( (-1, n_time_step_input_g)    )

        i_t = T.nnet.sigmoid(T.dot(x_t, Ui) + T.dot(h_tm1, Wi) + bi)

        a=T.dot(x_t, Uf)
        b=T.dot(h_tm1, Wf)
        c=a+b
        f_t=c+bf
        #f_t = T.nnet.sigmoid(T.dot(x_t, Uf) + T.dot(h_tm1, Wf) + bf)


        o_t = T.nnet.sigmoid(T.dot(x_t, Uo) + T.dot(h_tm1, Wo) + bo)
        g_t = T.tanh(T.dot(x_t, Ug) + T.dot(h_tm1, Wg) + bg)

        c_t = c_tm1 * f_t + g_t * i_t
        h_t = T.tanh(c_t) * o_t

        #c_t = c_t.reshape( (1, -1) )
        #h_t = h_t.reshape( (1, -1) )

        return h_t, c_t



    def forward(self, X):
        """
        X.shape = (timesteps, dim)
        X.shape = (n_samples, timesteps, dim)        
        """

        X = X.reshape( (-1, voca_dim_global * minibatch_size_g) ) 

        states, updates = theano.scan(
            fn=self.step,
            sequences=[  X  ],
            outputs_info=[self.h_tm1, self.c_tm1],
            non_sequences=[
                self.Ui, self.Wi, self.bi,
                self.Uf, self.Wf, self.bf,
                self.Uo, self.Wo, self.bo,
                self.Ug, self.Wg, self.bg
            ]
        )

        updates = [(self.h_tm1, states[0][-1]), (self.c_tm1, states[1][-1])]
        print("### forward completed ###")
        return states, updates


class LSTMEncoder(LSTM):
    def encode(self, X):
        states, updates = self.forward(X)
        h_t = states[0][-1]
        c_t = states[1][-1]
        return h_t, c_t, updates


class LSTMDecoder(LSTM):
    def __init__(self, size, dim, h_tm1=None, c_tm1=None):
        super(LSTMDecoder, self).__init__(size=size, dim=dim)
        self.Wh = init_weights((size, dim), "Wh")
        self.bh = shared_zeros((1, dim), "bh")

        self.h_tm1 = h_tm1 or shared_zeros((1, size), "h_tm1")
        self.c_tm1 = c_tm1 or shared_zeros((1, size), "c_tm1")

        self.y_t = shared_zeros((1, dim), "y_t")

        # self.decode_length = theano.shared(decode_length)

        self.params.append(self.Wh)
        self.params.append(self.bh)

    def decode_step(
        self, y_t, h_tm1, c_tm1,
        Ui, Wi, bi, Uf, Wf, bf,
        Uo, Wo, bo, Ug, Wg, bg,
        Wh, bh
    ):
        h_t, c_t = self.step(
            y_t, h_tm1, c_tm1,
            Ui, Wi, bi, Uf, Wf, bf,
            Uo, Wo, bo, Ug, Wg, bg
        )
        y_t = T.dot(h_t, Wh) + bh
        return y_t, h_t, c_t

    def decode(self, h_tm1, c_tm1, timesteps):
        outputs, updates = theano.scan(
            fn=self.decode_step,
            outputs_info=[self.y_t, h_tm1, c_tm1],
            non_sequences=[
                self.Ui, self.Wi, self.bi,
                self.Uf, self.Wf, self.bf,
                self.Uo, self.Wo, self.bo,
                self.Ug, self.Wg, self.bg,
                self.Wh, self.bh
            ],
            n_steps=timesteps
        )
        updates = [
            (self.h_tm1, outputs[1][-1]),
            (self.c_tm1, outputs[2][-1])
        ]
        return T.flatten(outputs[0], 2), updates


class Seq2Seq(object):
    def __init__(self, size, dim):
        self.encoder = LSTMEncoder(size, dim)
        self.decoder = LSTMDecoder(size, dim)
        self.params = []
        self.params += self.encoder.params
        self.params += self.decoder.params
        self._predict = None
        self._train = None
        self._test = None

    def compile(self, loss_func, optimizer):
        seq_input = T.tensor3()
        seq_target = T.tensor3()
        decode_timesteps = T.iscalar()

        h_tm1, c_tm1, updates_encode = self.encoder.encode(seq_input)
        seq_predict_flex, updates_decode_flex = self.decoder.decode(h_tm1, c_tm1, decode_timesteps)
        seq_predict, updates_decode = self.decoder.decode(h_tm1, c_tm1, T.shape(seq_target)[0])

        loss = loss_func(seq_predict, seq_target)
        self._predict = theano.function([seq_input, decode_timesteps], seq_predict_flex,
     

更新= updates_encode + updates_decode_flex)               self._test = theano.function([seq_input,seq_target],loss,updates = updates_encode + updates_decode)               更新= []               更新+ = updates_encode               更新+ = updates_decode               更新+ =优化器(self.params,loss)               self._train = theano.function([seq_input,seq_target],loss,updates = updates)

    def predict(self, seq_input, decode_timesteps):
        self.encoder.reset_state()
        self.decoder.reset_state()
        return self._predict(seq_input, decode_timesteps)

    def train(self, seq_input, seq_target):
        self.encoder.reset_state()
        self.decoder.reset_state()
        return self._train(seq_input, seq_target)

    def test(self, seq_input, seq_target):
        self.encoder.reset_state()
        self.decoder.reset_state()
        return self._test(seq_input, seq_target)


def train(x, target):
    for mini_batch, target in zip(x,target):
        print("mini_batch shape :",mini_batch.shape)

        mini_batch = mini_batch.astype(dtype)
        target = target.astype(dtype)

        print(seq2seq.train(mini_batch, target))


def predict(x, target):
    for mini_batch, target in zip(x,target):
        so = seq2seq.predict(x, n_time_step_output_g)
        print(so)
        loss = seq2seq.test(x, so)
        print(loss)


if __name__ == "__main__":

    si, st, maxlen_input, minibatch_size, voca_dim = readFile.preprocessing()

    voca_dim_global = voca_dim + 2
    minibatch_size_g = si[0].shape[1]

    print("minibatch_size_g : " , 10)
    print("minibatch_size_g : " , si[0].shape[1])

    n_time_step_input_g = si[0].shape[0]
    n_time_step_output_g = st[0].shape[0]

    seq2seq = Seq2Seq(n_time_step_input_g, voca_dim_global )
    seq2seq.compile(loss_func=categorical_crossentropy, optimizer=adadelta)

    print("select a menu")
    print("1. Training")
    print("2. Predict and test translated sentence.")
    val = input("selection : ") 

    if val == 1:
        train(si, st)

    elif val == 2:
        predict(si, st)

0 个答案:

没有答案