我正在使用一堆单词并遇到“ ValueError:值的长度与索引的长度不匹配”。
我正在跟踪在上一个项目中成功使用的文档,但是这次我遇到了错误。得到错误后,我打印了以下值:len(sentences)= 13680; len(comedy_common_words)= 4675。
def bag_of_words(text):
allwords = [token.lemma_
for token in text
if not token.is_punct
and not token.is_stop]
return [item[0] for item in Counter(allwords).most_common(2000)
def bow_features(sentences, comedy_common_words):
df = pd.DataFrame(columns=comedy_common_words)
df['text_sentence'] = sentences[0]
df['text_source'] = sentences[1]
df.loc[:, comedy_common_words] = 0
for i, sentence in enumerate(df['text_sentence']):
words = [token.lemma_
for token in sentence
if (
not token.is_punct
and not token.is_stop
and token.lemma_ in comedy_common_words
)]
for word in words:
df.loc[i, word] += 1
if i % 50 == 0:
print("Processing row{}".format(i))
return df
much_ado_words = bag_of_words(much_ado_doc)
midsummer_words = bag_of_words(midsummer_doc)
twelfth_words = bag_of_words(twelfth_doc)
merchant_words = bag_of_words(merchant_doc)
comedy_common_words = set(much_ado_words + midsummer_words +
twelfth_words + merchant_words)```
comedy_word_counts = bow_features(句子,喜剧常用字) comedy_word_counts.head()```
需要迭代的单词袋,并返回一个数据帧,其中Comedy_common_words作为列,句子作为行。而是收到:
ValueError Traceback (most recent call last)
<ipython-input-25-3f3cba5f1f1d> in <module>
1 # Create our data frame with features. This can take a while to run.
----> 2 comedy_word_counts = bow_features(sentences, comedy_common_words)
3 comedy_word_counts.head()
<ipython-input-24-57bed1524659> in bow_features(sentences, comedy_common_words)
18 df = pd.DataFrame(columns=comedy_common_words)
19 df['text_sentence'] = sentences[0]
---> 20 df['text_source'] = sentences[1]
21 df.loc[:, comedy_common_words] = 0
22
~/miniconda3/lib/python3.7/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
3368 else:
3369 # set column
-> 3370 self._set_item(key, value)
3371
3372 def _setitem_slice(self, key, value):
~/miniconda3/lib/python3.7/site-packages/pandas/core/frame.py in _set_item(self, key, value)
3443
3444 self._ensure_valid_index(value)
-> 3445 value = self._sanitize_column(key, value)
3446 NDFrame._set_item(self, key, value)
3447
~/miniconda3/lib/python3.7/site-packages/pandas/core/frame.py in _sanitize_column(self, key, value, broadcast)
3628
3629 # turn me into an ndarray
-> 3630 value = sanitize_index(value, self.index, copy=False)
3631 if not isinstance(value, (np.ndarray, Index)):
3632 if isinstance(value, list) and len(value) > 0:
~/miniconda3/lib/python3.7/site-packages/pandas/core/internals/construction.py in sanitize_index(data, index, copy)
517
518 if len(data) != len(index):
--> 519 raise ValueError('Length of values does not match length of index')
520
521 if isinstance(data, ABCIndexClass) and not copy:
ValueError: Length of values does not match length of index