def words_to_indices(inverse_vocabulary, words):
return [inverse_vocabulary[word] for word in words]
if __name__ == "__main__":
vocabulary = open("G:\clickbait-detector-master\data/vocabulary.txt").read().split("\n")
inverse_vocabulary = dict((word, i) for i, word in enumerate(vocabulary))
clickbait = open("G:\clickbait-detector-master\data/clickbait.preprocessed.txt").read().split("\n")
clickbait = sequence.pad_sequences([words_to_indices(inverse_vocabulary, sentence.split()) for sentence in clickbait], maxlen=SEQUENCE_LENGTH)
genuine = open("G:\clickbait-detector-master\data/genuine.preprocessed.txt").read().split("\n")
genuine = sequence.pad_sequences([words_to_indices(inverse_vocabulary, sentence.split()) for sentence in genuine], maxlen=SEQUENCE_LENGTH)
我的错误是:
KeyError Traceback (most recent call last)
<ipython-input-6-692b7e251048> in <module>()
25
26 clickbait = open("G:\clickbait-detector-master\data/clickbait.preprocessed.txt").read().split("\n")
---> 27 clickbait = sequence.pad_sequences([words_to_indices(inverse_vocabulary, sentence.split()) for sentence in clickbait], maxlen=SEQUENCE_LENGTH)
28
29 genuine = open("G:\clickbait-detector-master\data/genuine.preprocessed.txt").read().split("\n")
<ipython-input-6-692b7e251048> in <listcomp>(.0)
25
26 clickbait = open("G:\clickbait-detector-master\data/clickbait.preprocessed.txt").read().split("\n")
---> 27 clickbait = sequence.pad_sequences([words_to_indices(inverse_vocabulary, sentence.split()) for sentence in clickbait], maxlen=SEQUENCE_LENGTH)
28
29 genuine = open("G:\clickbait-detector-master\data/genuine.preprocessed.txt").read().split("\n")
<ipython-input-6-692b7e251048> in words_to_indices(inverse_vocabulary, words)
16
17 def words_to_indices(inverse_vocabulary, words):
---> 18 return [inverse_vocabulary[word] for word in words]
19
20 if __name__ == "__main__":
<ipython-input-6-692b7e251048> in <listcomp>(.0)
16
17 def words_to_indices(inverse_vocabulary, words):
---> 18 return [inverse_vocabulary[word] for word in words]
19
20 if __name__ == "__main__":
KeyError: 'C'
答案 0 :(得分:1)
我不知道您要如何处理缺少的值,但是错误告诉您它来自于此:
def words_to_indices(inverse_vocabulary, words):
return [inverse_vocabulary[word] for word in words]
具体来说,问题是inverse_vocabulary[word]
,当您提供的密钥不存在时,它将抛出KeyError
,在这种情况下,引发错误的密钥是C
。
由于我不知道您要如何处理不存在的缺失键,因此我将向您展示一种处理这种情况的方法,以免引发错误。
def words_to_indices(inverse_vocabulary, words):
return [inverse_vocabulary.get(word, '{} does not exist'.format(word) for word in words]
在这种情况下,words_to_indices
的输出应如下所示:
['val1', 'val2', 'val3', 'C does not exist', etc...]
您必须对其进行修改才能执行您想要的操作。