我有一个示例Pandas数据框如下:
Action Comedy Crime Thriller SciFi
1 0 1 1 0
0 1 0 0 1
0 1 0 1 0
0 0 1 0 1
1 1 0 0 0
我想使用Python绘制数据集(最好使用matplotlib),使每个列都是一个单独的轴。因此在这种情况下,将有5个轴(Action,Comedy,Crime ...)和5个数据点(因为它有5行)。 是否可以使用python matplotlib绘制这种多轴数据?如果不可能,那么可视化这些数据的最佳解决方案是什么?
答案 0 :(得分:2)
使用RadarChart可以完成多个轴。您可以根据需要调整Radar Chart example。
u = u"""Action Comedy Crime Thriller SciFi
1 0 1 1 0
0 1 0 0 1
0 1 0 1 0
0 0 1 0 1
1 1 0 0 0"""
import io
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.path import Path
from matplotlib.spines import Spine
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
def radar_factory(num_vars, frame='circle'):
theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
theta += np.pi/2
def draw_poly_patch(self):
verts = unit_poly_verts(theta)
return plt.Polygon(verts, closed=True, edgecolor='k')
def draw_circle_patch(self):
return plt.Circle((0.5, 0.5), 0.5)
patch_dict = {'polygon': draw_poly_patch, 'circle': draw_circle_patch}
def unit_poly_verts(theta):
x0, y0, r = [0.5] * 3
verts = [(r*np.cos(t) + x0, r*np.sin(t) + y0) for t in theta]
return verts
class RadarAxes(PolarAxes):
name = 'radar'
RESOLUTION = 1
draw_patch = patch_dict[frame]
def fill(self, *args, **kwargs):
"""Override fill so that line is closed by default"""
closed = kwargs.pop('closed', True)
return super(RadarAxes, self).fill(closed=closed, *args, **kwargs)
def plot(self, *args, **kwargs):
"""Override plot so that line is closed by default"""
lines = super(RadarAxes, self).plot(*args, **kwargs)
for line in lines:
self._close_line(line)
def _close_line(self, line):
x, y = line.get_data()
if x[0] != x[-1]:
x = np.concatenate((x, [x[0]]))
y = np.concatenate((y, [y[0]]))
line.set_data(x, y)
def set_varlabels(self, labels):
self.set_thetagrids(np.degrees(theta), labels)
def _gen_axes_patch(self):
return self.draw_patch()
def _gen_axes_spines(self):
if frame == 'circle':
return PolarAxes._gen_axes_spines(self)
spine_type = 'circle'
verts = unit_poly_verts(theta)
# close off polygon by repeating first vertex
verts.append(verts[0])
path = Path(verts)
spine = Spine(self, spine_type, path)
spine.set_transform(self.transAxes)
return {'polar': spine}
register_projection(RadarAxes)
return theta
df = pd.read_csv(io.StringIO(u), delim_whitespace=True)
N = 5
theta = radar_factory(N, frame='polygon')
fig, ax = plt.subplots(subplot_kw=dict(projection='radar'))
colors = ['b', 'r', 'g', 'm', 'y']
markers = ["s", "o","P", "*", "^"]
ax.set_rgrids([1])
for i,(col, row) in enumerate(df.iterrows()):
ax.scatter(theta, row, c=colors[i], marker=markers[i], label=col)
ax.fill(theta, row, facecolor=colors[i], alpha=0.25)
ax.set_varlabels(df.columns)
labels = ["Book {}".format(i+1) for i in range(len(df))]
ax.legend(labels*2, loc=(0.97, .1), labelspacing=0.1, fontsize='small')
plt.show()
可视化数据的简单且可能更易读的方式是热图。
u = u"""Action Comedy Crime Thriller SciFi
1 0 1 1 0
0 1 0 0 1
0 1 0 1 0
0 0 1 0 1
1 1 0 0 0"""
import io
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(io.StringIO(u), delim_whitespace=True)
print df
plt.matshow(df, cmap="gray")
plt.xticks(range(len(df.columns)), df.columns)
plt.yticks(range(len(df)), range(1,len(df)+1))
plt.ylabel("Book number")
plt.show()
答案 1 :(得分:1)
这是一个很好的简单可视化,您可以通过一些数据操作和Seaborn获得。
import seaborn as sns
# df is a Pandas DataFrame with the following content:
# Action Comedy Crime Thriller SciFi
# 1 0 1 1 0
# 0 1 0 0 1
# 0 1 0 1 0
# 0 0 1 0 1
# 1 1 0 0 0
df = ...
# Give name to the indices for convenience
df.index.name = "Index"
df.columns.name = "Genre"
# Get a data frame containing the relevant genres and indices
df2 = df.unstack()
df2 = df2[df2 > 0].reset_index()
# Plot it
ax = sns.stripplot(x="Genre", y="Index", data=df2)
ax.set_yticks(df.index)
你得到:
要进行微调,您可以查看sns.stripplot
的文档。