从embedding_matrix调用的函数。有人可以解释一下我为什么收到此错误以及如何解决该错误。
def getEmbeddingMatrix (word_index, vectorSource):
wordVecSources = {'fasttext' : './vectors/crawl-300d-2M-subword.vec', 'custom-fasttext' :
'./vectors/' + '20news-fasttext.json' }
f = open (wordVecSources[vectorSource])
allWv = {}
if (vectorSource == 'custom-fasttext'):
allWv = json.loads(f.read())
elif (vectorSource == 'fasttext'):
errorCount = 0
for line in f:
values = line.split()
word = values[0].strip()
try:
wv = np.asarray(values[1:], dtype='float32')
if (len(wv) != wvLength):
errorCount = errorCount + 1
continue
except:
errorCount = errorCount + 1
continue
allWv[word] = wv
print ("# Bad Word Vectors:", errorCount)
f.close()
embedding_matrix = np.zeros((len(word_index)+1, wvLength)) # +1 for the masked 0
for word, i in word_index.items():
if word in allWv:
embedding_matrix[i] = allWv[word]
return embedding_matrix
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=2, mode='auto', restore_best_weights=False)
model = keras.models.Sequential()
if (vectorSource != 'none'):
embedding_matrix = getEmbeddingMatrix (kTokenizer.word_index, vectorSource)
embedding = keras.layers.embeddings.Embedding(input_dim=len(kTokenizer.word_index)+1, output_dim=wvLength, weights=[embedding_matrix], input_length=sequenceLength, trainable=False, mask_zero=True)
else:
embedding = keras.layers.embeddings.Embedding(input_dim=len(kTokenizer.word_index)+1, output_dim=wvLength, input_length=sequenceLength, trainable=True, mask_zero=True)
model.add(embedding)
model.add(keras.layers.LSTM(units=150, dropout=0.2, recurrent_dropout=0.2, return_sequences=False))
model.add(keras.layers.Dense(numClasses, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
print(model.summary())
调用getEmbeddingMatrix时,即使两个文件都位于同一位置,它也会给出KeyError'-f'
KeyError Traceback (most recent call last)
<ipython-input-45-ca6718845b1d> in <module>
2 model = keras.models.Sequential()
3 if (vectorSource != 'none'):
----> 4 embedding_matrix = getEmbeddingMatrix (kTokenizer.word_index, vectorSource)
5 embedding = keras.layers.embeddings.Embedding(input_dim=len(kTokenizer.word_index)+1, output_dim=wvLength, weights=[embedding_matrix], input_length=sequenceLength, trainable=False, mask_zero=True)
6 else:
<ipython-input-40-b29c56e927a3> in getEmbeddingMatrix(word_index, vectorSource)
1 def getEmbeddingMatrix (word_index, vectorSource):
2 wordVecSources = {'fasttext' : './vectors/crawl-300d-2M-subword.vec', 'custom-fasttext' : './vectors/' + '20news-fasttext.json' }
----> 3 f = open (wordVecSources[vectorSource])
4 allWv = {}
5 if (vectorSource == 'custom-fasttext'):
KeyError: '-f'
答案 0 :(得分:0)
字典wordVecSources
的密钥可以是'fasttext'
或'custom-fasttext'
,但是它收到的密钥是'-f'
。
这意味着您正在以某种方式为vectorSource
分配'-f'
值'none'
,'fasttext'
或'custom-fasttext'
来防止{{1 }}。
在代码中检查与KeyError
相关的shell命令行参数。例如,在vectorSource
中说您有your_file.py
,那么您应该运行类似vectorSource = str(sys.argv[1])
的东西,而不是运行$python your_file.py fasttext
,因为这里是第一个命令行参数(sys.argv [1])在运行时为其$python your_file.py -f
赋值。