我从df.pivot()创建了一个数据帧,如下所示:
cluster 0 1 2 3 4 5 6 7 8 9
value
5 0 0 1 1 2 1 1 3 0 0
20 0 0 0 0 0 0 0 1 0 0
22 0 0 0 0 0 0 1 0 0 0
50 0 0 0 0 0 0 0 1 0 0
100 211 493 133 180 262 19 782 6295 137 517
200 667 1685 444 588 877 242 2630 21077 494 1751
250 0 1 0 0 0 0 0 3 1 0
300 180 480 133 177 234 20 744 5985 236 474
350 0 0 0 0 0 0 0 1 1 0
我正在尝试创建一个包含多个散点图的网格,每个群集一个散点图。可能类似于以下内容:
这是我一直在尝试的:
chart = df.plot(
kind = 'scatter',
x = 'value',
y = 'cluster',
subplots = True,
sharex = True,
title = "Question value distribution across clusters"
)
这将导致KeyError,指示该图无法访问数据透视表列。这是日志:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2524 try:
-> 2525 return self._engine.get_loc(key)
2526 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'cluster'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-90-175952b92cec> in <module>()
10 rk = gk.pivot(index = 'value', columns = 'cluster', values = 'count').fillna(0)
11 rk = rk.astype('int')
---> 12 chart = xk.plot(kind = 'scatter', x = 'value', y = 'cluster', subplots = True, sharex = True, title = "Question value distribution for cluster "+str(cluster_no))
13 # chart.set_xlabel("Value of question ($)")
14 # chart.set_ylabel("Questions in cluster")
/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2675 fontsize=fontsize, colormap=colormap, table=table,
2676 yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2677 sort_columns=sort_columns, **kwds)
2678 __call__.__doc__ = plot_frame.__doc__
2679
/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
1900 yerr=yerr, xerr=xerr,
1901 secondary_y=secondary_y, sort_columns=sort_columns,
-> 1902 **kwds)
1903
1904
/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
1685 if isinstance(data, DataFrame):
1686 plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax,
-> 1687 kind=kind, **kwds)
1688 else:
1689 raise ValueError("plot kind %r can only be used for data frames"
/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in __init__(self, data, x, y, s, c, **kwargs)
835 # the handling of this argument later
836 s = 20
--> 837 super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs)
838 if is_integer(c) and not self.data.columns.holds_integer():
839 c = self.data.columns[c]
/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in __init__(self, data, x, y, **kwargs)
811 if len(self.data[x]._get_numeric_data()) == 0:
812 raise ValueError(self._kind + ' requires x column to be numeric')
--> 813 if len(self.data[y]._get_numeric_data()) == 0:
814 raise ValueError(self._kind + ' requires y column to be numeric')
815
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __getitem__(self, key)
2137 return self._getitem_multilevel(key)
2138 else:
-> 2139 return self._getitem_column(key)
2140
2141 def _getitem_column(self, key):
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _getitem_column(self, key)
2144 # get column
2145 if self.columns.is_unique:
-> 2146 return self._get_item_cache(key)
2147
2148 # duplicate columns & possible reduce dimensionality
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in _get_item_cache(self, item)
1840 res = cache.get(item)
1841 if res is None:
-> 1842 values = self._data.get(item)
1843 res = self._box_item_values(item, values)
1844 cache[item] = res
/usr/local/lib/python3.6/dist-packages/pandas/core/internals.py in get(self, item, fastpath)
3841
3842 if not isna(item):
-> 3843 loc = self.items.get_loc(item)
3844 else:
3845 indexer = np.arange(len(self.items))[isna(self.items)]
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2525 return self._engine.get_loc(key)
2526 except KeyError:
-> 2527 return self._engine.get_loc(self._maybe_cast_indexer(key))
2528
2529 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'cluster'
我该如何解决?