我正在使用Pandas Profiling
为 Pandas DataFrame对象创建 HTML分析报告 data
但是,每当我尝试通过以下方式显示报告时:
%time profiling = pandas_profiling.ProfileReport(data)
profiling
我收到此错误:
TypeErrorTraceback (most recent call last)
<ipython-input-46-afbac23ca8ca> in <module>()
----> 1 get_ipython().magic(u'time profiling = pandas_profiling.ProfileReport(data)')
2 profiling
/home/cloud_user/anaconda3/envs/fasttext/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
2158 magic_name, _, magic_arg_s = arg_s.partition(' ')
2159 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2160 return self.run_line_magic(magic_name, magic_arg_s)
2161
2162 #-------------------------------------------------------------------------
/home/cloud_user/anaconda3/envs/fasttext/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
2079 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2080 with self.builtin_trap:
-> 2081 result = fn(*args,**kwargs)
2082 return result
2083
<decorator-gen-60> in time(self, line, cell, local_ns)
/home/cloud_user/anaconda3/envs/fasttext/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
186 # but it's overkill for just that one bit of state.
187 def magic_deco(arg):
--> 188 call = lambda f, *a, **k: f(*a, **k)
189
190 if callable(arg):
/home/cloud_user/anaconda3/envs/fasttext/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
1191 else:
1192 st = clock2()
-> 1193 exec(code, glob, local_ns)
1194 end = clock2()
1195 out = None
<timed exec> in <module>()
/home/cloud_user/anaconda3/envs/fasttext/lib/python2.7/site-packages/pandas_profiling/__init__.pyc in __init__(self, df, **kwargs)
67
68 self.html = to_html(sample,
---> 69 description_set)
70
71 self.description_set = description_set
/home/cloud_user/anaconda3/envs/fasttext/lib/python2.7/site-packages/pandas_profiling/report.pyc in to_html(sample, stats_object)
171 formatted_values['freqtable'] = freq_table(stats_object['freq'][idx], n_obs,
172 templates.template('freq_table'), templates.template('freq_table_row'), 10)
--> 173 formatted_values['firstn_expanded'] = extreme_obs_table(stats_object['freq'][idx], templates.template('freq_table'), templates.template('freq_table_row'), 5, n_obs, ascending = True)
174 formatted_values['lastn_expanded'] = extreme_obs_table(stats_object['freq'][idx], templates.template('freq_table'), templates.template('freq_table_row'), 5, n_obs, ascending = False)
175
/home/cloud_user/anaconda3/envs/fasttext/lib/python2.7/site-packages/pandas_profiling/report.pyc in extreme_obs_table(freqtable, table_template, row_template, number_to_print, n, ascending)
110 # If it's mixed between base types (str, int) convert to str. Pure "mixed" types are filtered during type discovery
111 if "mixed" in freqtable.index.inferred_type:
--> 112 freqtable.index = freqtable.index.astype(str)
113
114 sorted_freqTable = freqtable.sort_index()
/home/cloud_user/anaconda3/envs/fasttext/lib/python2.7/site-packages/pandas/core/indexes/base.pyc in astype(self, dtype, copy)
1294 except (TypeError, ValueError):
1295 msg = 'Cannot cast {name} to dtype {dtype}'
-> 1296 raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
1297
1298 def _to_safe_for_reshape(self):
TypeError: Cannot cast Index to dtype <type 'str'>
我的数据框对象data
如下:
并且具有形状和类型:
print("Data type: {}\nData shape: {}".format(type(data), data.shape))
Data type: <class 'pandas.core.frame.DataFrame'>
Data shape: (243697, 23)
并且具有数据类型:
data.dtypes
我该如何解决?