我试图在众所周知的MNIST数据集上可视化不同的流形学习算法。我用文本标记替换了每个数据点,告诉它真正代表哪个数字。但是,我无法弄清楚如何添加图例,描述哪些颜色属于哪些数字。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import offsetbox
from sklearn import (manifold, datasets, decomposition, ensemble)
import seaborn as sns
digits = datasets.load_digits(n_class=10)
X = digits.data
y = digits.target
n_samples, n_features = X.shape
tsne = manifold.TSNE(n_components=2, random_state=0, perplexity=40)
X_t = tsne.fit_transform(X)
X_t=pd.DataFrame(X_t)
y=pd.DataFrame(y)
XY=pd.concat([X_t,y], axis=1)
XY.columns = ['x', 'y', 'digit']
customPalette=sns.hls_palette(10, l=.7, s=1)
fig, axes = plt.subplots(figsize=(10,10))
axes.set_xlim(XY['x'].min()*0.98,XY['x'].max()*1.02)
axes.set_ylim(XY['y'].min()*0.98,XY['y'].max()*1.02)
for i in range(0,10):
for l, row in XY.loc[XY['digit']==i,:].iterrows():
plt.annotate(int(row['digit']), (row['x'], row['y']),
horizontalalignment='center', verticalalignment='center',
size=10, color=customPalette[i])
plt.title('t-SNE')
导致下图:
我想添加一个类似的图例,如下图所示:
答案 0 :(得分:0)
这样的事情怎么样?
fig, axes = plt.subplots(figsize=(10,10))
axes.set_xlim(XY['x'].min()*0.98,XY['x'].max()*1.02)
axes.set_ylim(XY['y'].min()*0.98,XY['y'].max()*1.02)
for i in range(0,10):
k=0
for l, row in XY.loc[XY['digit']==i,:].iterrows():
if k==0:
plt.plot(row['x'], row['y'], '.', color=customPalette[i], label=int(row['digit']))
plt.legend(numpoints=1, markerscale=3)
k += 1
plt.annotate(int(row['digit']), (row['x'], row['y']), horizontalalignment='center', verticalalignment='center', size=10, color=customPalette[i])
plt.title('t-SNE')