我正在从事一个NLP项目,我们在其中分析了大量文本数据。我想在Jupyter笔记本中查看数据框时无法加载,这是一个奇怪的问题。它给出了格式化错误。
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
D:\Program\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
D:\Program\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
393 if callable(meth):
394 return meth(obj, self, cycle)
--> 395 return _default_pprint(obj, self, cycle)
396 finally:
397 self.end_group()
D:\Program\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _default_pprint(obj, p, cycle)
508 if _safe_getattr(klass, '__repr__', None) is not object.__repr__:
509 # A user-provided repr. Find newlines and replace them with p.break_()
--> 510 _repr_pprint(obj, p, cycle)
511 return
512 p.begin_group(1, '<')
D:\Program\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
699 """A pprint that just redirects to the normal repr function."""
700 # Find newlines and replace them with p.break_()
--> 701 output = repr(obj)
702 for idx,output_line in enumerate(output.splitlines()):
703 if idx:
D:\Program\Anaconda3\lib\site-packages\pandas\core\base.py in __repr__(self)
78 Yields Bytestring in Py2, Unicode String in py3.
79 """
---> 80 return str(self)
81
82
D:\Program\Anaconda3\lib\site-packages\pandas\core\base.py in __str__(self)
57
58 if compat.PY3:
---> 59 return self.__unicode__()
60 return self.__bytes__()
61
D:\Program\Anaconda3\lib\site-packages\pandas\core\frame.py in __unicode__(self)
634 width = None
635 self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
--> 636 line_width=width, show_dimensions=show_dimensions)
637
638 return buf.getvalue()
D:\Program\Anaconda3\lib\site-packages\pandas\core\frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, line_width, max_rows, max_cols, show_dimensions)
1673 max_cols=max_cols,
1674 show_dimensions=show_dimensions)
-> 1675 formatter.to_string()
1676
1677 if buf is None:
D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in to_string(self)
595 else:
596
--> 597 strcols = self._to_str_columns()
598 if self.line_width is None: # no need to wrap around just print
599 # the whole frame
D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _to_str_columns(self)
523 str_columns = [[label] for label in self.header]
524 else:
--> 525 str_columns = self._get_formatted_column_labels(frame)
526
527 stringified = []
D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _get_formatted_column_labels(self, frame)
772 need_leadsp[x] else x]
773 for i, (col, x) in enumerate(zip(columns,
--> 774 fmt_columns))]
775
776 if self.show_index_names and self.has_index_names:
D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in <listcomp>(.0)
771 str_columns = [[' ' + x if not self._get_formatter(i) and
772 need_leadsp[x] else x]
--> 773 for i, (col, x) in enumerate(zip(columns,
774 fmt_columns))]
775
D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _get_formatter(self, i)
355 else:
356 if is_integer(i) and i not in self.columns:
--> 357 i = self.columns[i]
358 return self.formatters.get(i, None)
359
D:\Program\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in __getitem__(self, key)
1741
1742 if is_scalar(key):
-> 1743 return getitem(key)
1744
1745 if isinstance(key, slice):
IndexError: index 4 is out of bounds for axis 0 with size 4
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
D:\Program\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
D:\Program\Anaconda3\lib\site-packages\pandas\core\frame.py in _repr_html_(self)
667
668 return self.to_html(max_rows=max_rows, max_cols=max_cols,
--> 669 show_dimensions=show_dimensions, notebook=True)
670 else:
671 return None
D:\Program\Anaconda3\lib\site-packages\pandas\core\frame.py in to_html(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, bold_rows, classes, escape, max_rows, max_cols, show_dimensions, notebook, decimal, border)
1732 decimal=decimal)
1733 # TODO: a generic formatter wld b in DataFrameFormatter
-> 1734 formatter.to_html(classes=classes, notebook=notebook, border=border)
1735
1736 if buf is None:
D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in to_html(self, classes, notebook, border)
731 border=border)
732 if hasattr(self.buf, 'write'):
--> 733 html_renderer.write_result(self.buf)
734 elif isinstance(self.buf, compat.string_types):
735 with open(self.buf, 'w') as f:
D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in write_result(self, buf)
1214 indent += self.indent_delta
1215 indent = self._write_header(indent)
-> 1216 indent = self._write_body(indent)
1217
1218 self.write('</table>', indent)
D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _write_body(self, indent)
1377 self._write_hierarchical_rows(fmt_values, indent)
1378 else:
-> 1379 self._write_regular_rows(fmt_values, indent)
1380 else:
1381 for i in range(min(len(self.frame), self.max_rows)):
D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _write_regular_rows(self, fmt_values, indent)
1411 row = []
1412 row.append(index_values[i])
-> 1413 row.extend(fmt_values[j][i] for j in range(ncols))
1414
1415 if truncate_h:
D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in <genexpr>(.0)
1411 row = []
1412 row.append(index_values[i])
-> 1413 row.extend(fmt_values[j][i] for j in range(ncols))
1414
1415 if truncate_h:
KeyError: 0
我能够对数据执行所有操作,并且它作为单独的列加载。一旦遇到这种情况,那么一个小的示例数据框也会出现相同的错误。似乎与Jupyter格式渲染有关。
当我重新启动并运行Kernal时,可以正常加载相同的数据,但这是不切实际的解决方案,因为我无法每次都重新启动,因此我的文本数据处理需要大量时间。我不知道为什么我突然遇到这个问题,并且在重新启动内核时运行相同的代码。任何帮助将不胜感激。