如何在Keras中使用嵌入层

时间:2019-07-21 06:26:07

标签: python-3.x tensorflow keras

我正在学习使用Tensorflow和Keras进行深度学习。我想制作COBOW模型或Word2Vec模型。即使我创建了一个简单的COBOW模型,我也不知道如何使用Embedding Layer

我试图更改嵌入层的参数,但仍然不知道如何在COBOW模型中使用。

import tensorflow as tf

from tensorflow.keras.layers import Dense, Flatten, Layer, Dot
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential


class SimpleCBOW(Layer):
    def __init__(self, vocab_size, hidden_size):
        super(SimpleCBOW, self).__init__()
        self.V, self.H = vocab_size, hidden_size
        print((self.V, self.H))

    def build(self, input_shape):
        self.W_in = self.add_weight(
            name='W_in',
            shape=[self.V, self.H],
            initializer='random_normal',
            trainable=True,
            dtype=tf.float32,
        )
        self.W_out = self.add_weight(
            shape=[self.H, self.V],
            initializer='random_normal',
            trainable=True,
            dtype=tf.float32,
        )
        super().build(input_shape)

    def call(self, contexts):
        self.h0 = tf.matmul(tf.cast(contexts[:, 0], tf.float32), self.W_in)
        self.h1 = tf.matmul(tf.cast(contexts[:, 1], tf.float32), self.W_in)
        self.h = ((self.h0 + self.h1) * 0.5)
        self.score = tf.matmul(self.h, self.W_out)
        return self.score



import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Softmax, Embedding
from tensorflow.keras.models import Sequential
from simple_cbow import SimpleCBOW
import sys
import numpy as np
from util import preprocess, create_contexts_target, convert_one_hot, most_similar


window_size = 1
hidden_size = 5
batch_size = 3
max_epoch = 1000

text = 'You say goodbye and I say hello.'
corpus, word_to_id, id_to_word = preprocess(text)

vocab_size = len(word_to_id)
contexts, target = create_contexts_target(corpus, window_size)
target = convert_one_hot(target, vocab_size)
contexts = convert_one_hot(contexts, vocab_size)
cbow = SimpleCBOW(vocab_size, hidden_size)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='logs/mnist')

model = Sequential([
    Embedding(....)
    cbow,
    Softmax()
])
optimizer = tf.keras.optimizers.Adam()

model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
print('contexts:', np.shape(contexts[:, 0]))

model.fit(
    contexts,
    target,
    epochs=max_epoch,
    callbacks=[tensorboard_callback],
)

word_vecs = cbow.get_weights()[0]
for word_id, word in id_to_word.items():
    print(word, word_vecs[word_id])

我需要使用“嵌入层”来减小矢量的尺寸。

0 个答案:

没有答案