Question

我正在尝试使用keras实现skip-gram。添加Dot（）层时出错。以下实现受Dipanjan代码的启发。我做了一些与不同版本的keras相关的修改。

这是我到目前为止所执行的：

import pandas as pd
import numpy as np
import re
import nltk
import matplotlib.pyplot as plt
pd.options.display.max_colwidth = 200
%matplotlib inline
from nltk.corpus import gutenberg
from string import punctuation

wpt = nltk.WordPunctTokenizer()
stop_words = nltk.corpus.stopwords.words('english')

def normalize_document(doc):
    # lower case and remove special characters\whitespaces
    doc = re.sub(r'[^a-zA-Z\s]', '', doc, re.I|re.A)
    doc = doc.lower()
    doc = doc.strip()
    # tokenize document
    tokens = wpt.tokenize(doc)
    # filter stopwords out of document
    filtered_tokens = [token for token in tokens if token not in stop_words]
    # re-create document from filtered tokens
    doc = ' '.join(filtered_tokens)
    return doc

normalize_corpus = np.vectorize(normalize_document)
bible = gutenberg.sents('bible-kjv.txt')
remove_terms = punctuation + '0123456789'
remove_terms
norm_bible = [[word.lower() for word in sent if word not in remove_terms] for sent in bible]
norm_bible = [' '.join(tok_sent) for tok_sent in norm_bible]
norm_bible = filter(None, normalize_corpus(norm_bible))
norm_bible = [tok_sent for tok_sent in norm_bible if len(tok_sent.split()) > 2]

from keras.preprocessing import text

tokenizer = text.Tokenizer()
tokenizer.fit_on_texts(norm_bible)

word2id = tokenizer.word_index
id2word = {v:k for k, v in word2id.items()}

vocab_size = len(word2id) + 1
embed_size = 100
wids = [[word2id[w] for w in text.text_to_word_sequence(doc)] for doc in norm_bible]

from keras.preprocessing.sequence import skipgrams

skip_grams = [skipgrams(wid, vocabulary_size=vocab_size, window_size=10) for wid in wids]

from keras.layers import Add
from keras.layers.core import Dense, Reshape
from keras.layers import Input
from keras.layers.embeddings import Embedding
from keras.models import Sequential
from keras.layers.merge import Dot
from keras.models import Model

word_input = Input(shape=(1,), name="word_input")
word = Embedding(vocab_size, embed_size,embeddings_initializer="glorot_uniform",input_length=1)(word_input)
word = Reshape(target_shape=(embed_size,))(word)

context_input = Input(shape=(1,), name="context_input")
context = Embedding(vocab_size, embed_size,embeddings_initializer="glorot_uniform",input_length=1)(context_input)
context = Reshape(target_shape=(embed_size,))(context)

model = Sequential()
model.add(Dot(axes=1)([word, context]))

最后一行之后，出现类似-

的错误

The added layer must be an instance of class Layer. Found: Tensor("dot_6/MatMul:0", shape=(?, 100, 100), dtype=float32)

想知道为什么会这样吗？

Keras-添加的图层必须是Layer类的实例。发现：张量

0 个答案: