使用python可视化org.apache.spark.mllib.linalg.Matrix / DenseMatrix内容的最佳方法是什么?
例如confusionMatrix
from pyspark.mllib.linalg import Vectors, DenseMatrix
m = DenseMatrix(3,3,[ 3.33470000e+04, 2.44000000e+02, 1.60000000e+01, 2.93500000e+03, 1.01400000e+03, 1.90000000e+01, 3.50300000e+03, 1.24000000e+02, 1.20000000e+01])
print(m)
m.toArray()
DenseMatrix([[ 3.33470000e+04, 2.93500000e+03, 3.50300000e+03],
[ 2.44000000e+02, 1.01400000e+03, 1.24000000e+02],
[ 1.60000000e+01, 1.90000000e+01, 1.20000000e+01]])
Out[58]:
array([[ 3.33470000e+04, 2.93500000e+03, 3.50300000e+03],
[ 2.44000000e+02, 1.01400000e+03, 1.24000000e+02],
[ 1.60000000e+01, 1.90000000e+01, 1.20000000e+01]])
答案 0 :(得分:1)
用seaborn
from pyspark.mllib.linalg import Vectors, DenseMatrix
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()
from pyspark.mllib.linalg import Vectors, DenseMatrix
import numpy as np
def display_cm(m):
a = m.toArray().astype(np.float)
print(a)
print(m)
row_sums = a.sum(axis=1)
percentage_matrix = a.astype(np.float) / row_sums[:, np.newaxis]
#percentage_matrix = 100 *a.astype(np.float64) /a.astype(np.float64).sum(axis=1)
print(percentage_matrix)
plt.figure(figsize=(3, 3))
sns.heatmap(percentage_matrix, annot=True, fmt='.2f', xticklabels=['0' ,'1','2'], yticklabels=['0' ,'1','2']);
plt.title('Confusion Matrix');
m = DenseMatrix(3,3,[ 3.33470000e+04,2.93500000e+03,3.50300000e+03,2.44000000e+02,1.01400000e+03, 1.24000000e+02,1.60000000e+01,1.90000000e+01,1.20000000e+01])
display_cm(m)