Question

我正在从事一个NLP项目，我们在其中分析了大量文本数据。我想在Jupyter笔记本中查看数据框时无法加载，这是一个奇怪的问题。它给出了格式化错误。

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
D:\Program\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
    700                 type_pprinters=self.type_printers,
    701                 deferred_pprinters=self.deferred_printers)
--> 702             printer.pretty(obj)
    703             printer.flush()
    704             return stream.getvalue()

D:\Program\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
    393                             if callable(meth):
    394                                 return meth(obj, self, cycle)
--> 395             return _default_pprint(obj, self, cycle)
    396         finally:
    397             self.end_group()

D:\Program\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _default_pprint(obj, p, cycle)
    508     if _safe_getattr(klass, '__repr__', None) is not object.__repr__:
    509         # A user-provided repr. Find newlines and replace them with p.break_()
--> 510         _repr_pprint(obj, p, cycle)
    511         return
    512     p.begin_group(1, '<')

D:\Program\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
    699     """A pprint that just redirects to the normal repr function."""
    700     # Find newlines and replace them with p.break_()
--> 701     output = repr(obj)
    702     for idx,output_line in enumerate(output.splitlines()):
    703         if idx:

D:\Program\Anaconda3\lib\site-packages\pandas\core\base.py in __repr__(self)
     78         Yields Bytestring in Py2, Unicode String in py3.
     79         """
---> 80         return str(self)
     81 
     82 

D:\Program\Anaconda3\lib\site-packages\pandas\core\base.py in __str__(self)
     57 
     58         if compat.PY3:
---> 59             return self.__unicode__()
     60         return self.__bytes__()
     61 

D:\Program\Anaconda3\lib\site-packages\pandas\core\frame.py in __unicode__(self)
    634             width = None
    635         self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
--> 636                        line_width=width, show_dimensions=show_dimensions)
    637 
    638         return buf.getvalue()

D:\Program\Anaconda3\lib\site-packages\pandas\core\frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, line_width, max_rows, max_cols, show_dimensions)
   1673                                            max_cols=max_cols,
   1674                                            show_dimensions=show_dimensions)
-> 1675         formatter.to_string()
   1676 
   1677         if buf is None:

D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in to_string(self)
    595         else:
    596 
--> 597             strcols = self._to_str_columns()
    598             if self.line_width is None:  # no need to wrap around just print
    599                 # the whole frame

D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _to_str_columns(self)
    523                 str_columns = [[label] for label in self.header]
    524             else:
--> 525                 str_columns = self._get_formatted_column_labels(frame)
    526 
    527             stringified = []

D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _get_formatted_column_labels(self, frame)
    772                             need_leadsp[x] else x]
    773                            for i, (col, x) in enumerate(zip(columns,
--> 774                                                             fmt_columns))]
    775 
    776         if self.show_index_names and self.has_index_names:

D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in <listcomp>(.0)
    771             str_columns = [[' ' + x if not self._get_formatter(i) and
    772                             need_leadsp[x] else x]
--> 773                            for i, (col, x) in enumerate(zip(columns,
    774                                                             fmt_columns))]
    775 

D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _get_formatter(self, i)
    355         else:
    356             if is_integer(i) and i not in self.columns:
--> 357                 i = self.columns[i]
    358             return self.formatters.get(i, None)
    359 

D:\Program\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in __getitem__(self, key)
   1741 
   1742         if is_scalar(key):
-> 1743             return getitem(key)
   1744 
   1745         if isinstance(key, slice):

IndexError: index 4 is out of bounds for axis 0 with size 4

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
D:\Program\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
    343             method = get_real_method(obj, self.print_method)
    344             if method is not None:
--> 345                 return method()
    346             return None
    347         else:

D:\Program\Anaconda3\lib\site-packages\pandas\core\frame.py in _repr_html_(self)
    667 
    668             return self.to_html(max_rows=max_rows, max_cols=max_cols,
--> 669                                 show_dimensions=show_dimensions, notebook=True)
    670         else:
    671             return None

D:\Program\Anaconda3\lib\site-packages\pandas\core\frame.py in to_html(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, bold_rows, classes, escape, max_rows, max_cols, show_dimensions, notebook, decimal, border)
   1732                                            decimal=decimal)
   1733         # TODO: a generic formatter wld b in DataFrameFormatter
-> 1734         formatter.to_html(classes=classes, notebook=notebook, border=border)
   1735 
   1736         if buf is None:

D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in to_html(self, classes, notebook, border)
    731                                       border=border)
    732         if hasattr(self.buf, 'write'):
--> 733             html_renderer.write_result(self.buf)
    734         elif isinstance(self.buf, compat.string_types):
    735             with open(self.buf, 'w') as f:

D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in write_result(self, buf)
   1214         indent += self.indent_delta
   1215         indent = self._write_header(indent)
-> 1216         indent = self._write_body(indent)
   1217 
   1218         self.write('</table>', indent)

D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _write_body(self, indent)
   1377                 self._write_hierarchical_rows(fmt_values, indent)
   1378             else:
-> 1379                 self._write_regular_rows(fmt_values, indent)
   1380         else:
   1381             for i in range(min(len(self.frame), self.max_rows)):

D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _write_regular_rows(self, fmt_values, indent)
   1411             row = []
   1412             row.append(index_values[i])
-> 1413             row.extend(fmt_values[j][i] for j in range(ncols))
   1414 
   1415             if truncate_h:

D:\Program\Anaconda3\lib\site-packages\pandas\io\formats\format.py in <genexpr>(.0)
   1411             row = []
   1412             row.append(index_values[i])
-> 1413             row.extend(fmt_values[j][i] for j in range(ncols))
   1414 
   1415             if truncate_h:

KeyError: 0

我能够对数据执行所有操作，并且它作为单独的列加载。一旦遇到这种情况，那么一个小的示例数据框也会出现相同的错误。似乎与Jupyter格式渲染有关。

当我重新启动并运行Kernal时，可以正常加载相同的数据，但这是不切实际的解决方案，因为我无法每次都重新启动，因此我的文本数据处理需要大量时间。我不知道为什么我突然遇到这个问题，并且在重新启动内核时运行相同的代码。任何帮助将不胜感激。

数据框正在运行，但无法在Jupyter中作为表格查看

0 个答案: