我正在学习使用Tensorflow和Keras进行深度学习。我想制作COBOW模型或Word2Vec模型。即使我创建了一个简单的COBOW模型,我也不知道如何使用Embedding Layer
。
我试图更改嵌入层的参数,但仍然不知道如何在COBOW模型中使用。
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Layer, Dot
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
class SimpleCBOW(Layer):
def __init__(self, vocab_size, hidden_size):
super(SimpleCBOW, self).__init__()
self.V, self.H = vocab_size, hidden_size
print((self.V, self.H))
def build(self, input_shape):
self.W_in = self.add_weight(
name='W_in',
shape=[self.V, self.H],
initializer='random_normal',
trainable=True,
dtype=tf.float32,
)
self.W_out = self.add_weight(
shape=[self.H, self.V],
initializer='random_normal',
trainable=True,
dtype=tf.float32,
)
super().build(input_shape)
def call(self, contexts):
self.h0 = tf.matmul(tf.cast(contexts[:, 0], tf.float32), self.W_in)
self.h1 = tf.matmul(tf.cast(contexts[:, 1], tf.float32), self.W_in)
self.h = ((self.h0 + self.h1) * 0.5)
self.score = tf.matmul(self.h, self.W_out)
return self.score
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Softmax, Embedding
from tensorflow.keras.models import Sequential
from simple_cbow import SimpleCBOW
import sys
import numpy as np
from util import preprocess, create_contexts_target, convert_one_hot, most_similar
window_size = 1
hidden_size = 5
batch_size = 3
max_epoch = 1000
text = 'You say goodbye and I say hello.'
corpus, word_to_id, id_to_word = preprocess(text)
vocab_size = len(word_to_id)
contexts, target = create_contexts_target(corpus, window_size)
target = convert_one_hot(target, vocab_size)
contexts = convert_one_hot(contexts, vocab_size)
cbow = SimpleCBOW(vocab_size, hidden_size)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='logs/mnist')
model = Sequential([
Embedding(....)
cbow,
Softmax()
])
optimizer = tf.keras.optimizers.Adam()
model.compile(
optimizer=optimizer,
loss='categorical_crossentropy',
metrics=['accuracy']
)
print('contexts:', np.shape(contexts[:, 0]))
model.fit(
contexts,
target,
epochs=max_epoch,
callbacks=[tensorboard_callback],
)
word_vecs = cbow.get_weights()[0]
for word_id, word in id_to_word.items():
print(word, word_vecs[word_id])
我需要使用“嵌入层”来减小矢量的尺寸。