我得到" ValueError:太多的值要解包(预期4)"使用以下代码。请帮我!! 我试图对词汇进行引理并切断常用词然后添加到库中,这样我就可以找出最常用的词并找到词之间的关系。
def build_dataset(words, vocabulary_size):
lexicon = []
for l in words:
all_words = word_tokenize(l.lower())
lexicon += list(all_words )
lexicon = [lemmatizer.lemmatize(i) for i in lexicon]
w_counts = Counter(lexicon)
word = []
for w in w_counts:
if 5000 > w_counts[w] > 50 :
word.append(w)
print(len(word))
return word
count = [['UNK', -1]]
count.extend(collections.Counter(word).most_common(vocabulary_size - 1))
dictionary = dict()
for l2, _ in count:
dictionary[l2] = len(dictionary)
data = list()
unk_count = 0
for l2 in word:
if l2 in dictionary:
index = dictionary[l2]
else:
index = 0
unk_count += 1
data.append(index)
count[0][1] = unk_count
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
return data, count, dictionary, reverse_dictionary
data, count, dictionary, reverse_dictionary = build_dataset(words, vocabulary_size)