Question

我得到＆＃34; ValueError：太多的值要解包（预期4）＆＃34;使用以下代码。请帮我！！我试图对词汇进行引理并切断常用词然后添加到库中，这样我就可以找出最常用的词并找到词之间的关系。

def build_dataset(words, vocabulary_size):
    lexicon = []
    for l in words: 
        all_words = word_tokenize(l.lower())
        lexicon += list(all_words )
    lexicon = [lemmatizer.lemmatize(i) for i in lexicon] 
    w_counts = Counter(lexicon) 
    word = [] 
    for w in w_counts:
        if 5000 > w_counts[w] > 50 :
            word.append(w) 
    print(len(word))
    return word

    count = [['UNK', -1]]
    count.extend(collections.Counter(word).most_common(vocabulary_size - 1))
    dictionary = dict()
    for l2, _ in count:
        dictionary[l2] = len(dictionary)
        data = list()
        unk_count = 0
    for l2 in word:
        if l2 in dictionary:
            index = dictionary[l2]
        else:
            index = 0 
            unk_count += 1
            data.append(index)
            count[0][1] = unk_count
            reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) 
        return data, count, dictionary, reverse_dictionary     
data, count, dictionary, reverse_dictionary = build_dataset(words, vocabulary_size)

ValueError：解压缩的值太多（预期4）

0 个答案: