(python矩阵)可视化mllib评估

时间:2016-03-12 23:33:20

标签: python numpy apache-spark pyspark seaborn

我在这段代码中遇到溢出或类似情况:

from pyspark.mllib.linalg import Vectors, DenseMatrix 
import numpy as np

    def display_cm(m):
      a = m.toArray().astype(np.float)
      print(a)
      print(m)
      percentage_matrix = 100 *a.astype(np.float) /a.astype(np.float).sum(axis=1)
      print(percentage_matrix)
      plt.figure(figsize=(3, 3))
      sns.heatmap(percentage_matrix, annot=True,  fmt='.2f', xticklabels=['0' ,'1','2'], yticklabels=['0' ,'1','2']);
      plt.title('Confusion Matrix');

输入

m = DenseMatrix(3,3,[ 3,1,1, \
                      1,3,1, \
                      1,1,3])


display_cm(m)

给我

enter image description here

[[ 3.  1.  1.]
 [ 1.  3.  1.]
 [ 1.  1.  3.]]
DenseMatrix([[ 3.,  1.,  1.],
             [ 1.,  3.,  1.],
             [ 1.,  1.,  3.]])
[[ 60.  20.  20.]
 [ 20.  60.  20.]
 [ 20.  20.  60.]]

但是这个输入我得到一个错误

#DenseMatrix([[  1.57760000e+04,   1.49200000e+03,   2.63000000e+02],
#             [  1.10000000e+01,   1.38200000e+03,   2.07000000e+02],
#             [  2.03000000e+02,   1.05000000e+03,   2.47000000e+02]])

m = DenseMatrix(3,3,[  1.57760000e+04,1.10000000e+01,2.03000000e+02, \
                       1.49200000e+03, 1.38200000e+03, 1.05000000e+03, \
                       2.63000000e+02,2.07000000e+02,2.47000000e+02])


display_cm(m)


[[  1.57760000e+04   1.49200000e+03   2.63000000e+02]
 [  1.10000000e+01   1.38200000e+03   2.07000000e+02]
 [  2.03000000e+02   1.05000000e+03   2.47000000e+02]]
DenseMatrix([[  1.57760000e+04,   1.49200000e+03,   2.63000000e+02],
             [  1.10000000e+01,   1.38200000e+03,   2.07000000e+02],
             [  2.03000000e+02,   1.05000000e+03,   2.47000000e+02]])
[[  8.99891621e+01   9.32500000e+01   1.75333333e+01]
 [  6.27459928e-02   8.63750000e+01   1.38000000e+01]
 [  1.15794878e+00   6.56250000e+01   1.64666667e+01]]

enter image description here

也尝试了这个

https://stackoverflow.com/a/8904762/203968

from pyspark.mllib.linalg import Vectors, DenseMatrix 
import numpy as np

def display_cm(m):
  a = m.toArray().astype(np.float)
  print(a)
  print(m)
  row_sums = a.sum(axis=1)
  percentage_matrix = a.astype(np.float) / row_sums[:, np.newaxis]
  #percentage_matrix =   100 *a.astype(np.float64) /a.astype(np.float64).sum(axis=1)
  print(percentage_matrix)
  plt.figure(figsize=(3, 3))
  sns.heatmap(percentage_matrix, annot=True,  fmt='.2f', xticklabels=['0' ,'1','2'], yticklabels=['0' ,'1','2']);
  plt.title('Confusion Matrix');


#DenseMatrix([[  1.57760000e+04,   1.49200000e+03,   2.63000000e+02],
#             [  1.10000000e+01,   1.38200000e+03,   2.07000000e+02],
#             [  2.03000000e+02,   1.05000000e+03,   2.47000000e+02]])

m = DenseMatrix(3,3,[  1.57760000e+04,1.10000000e+01,2.03000000e+02, \
                       1.49200000e+03, 1.38200000e+03, 1.05000000e+03, \
                       2.63000000e+02,2.07000000e+02,2.47000000e+02])


display_cm(m)


[[  1.57760000e+04   1.49200000e+03   2.63000000e+02]
 [  1.10000000e+01   1.38200000e+03   2.07000000e+02]
 [  2.03000000e+02   1.05000000e+03   2.47000000e+02]]
DenseMatrix([[  1.57760000e+04,   1.49200000e+03,   2.63000000e+02],
             [  1.10000000e+01,   1.38200000e+03,   2.07000000e+02],
             [  2.03000000e+02,   1.05000000e+03,   2.47000000e+02]])
[[ 0.89989162  0.08510638  0.015002  ]
 [ 0.006875    0.86375     0.129375  ]
 [ 0.13533333  0.7         0.16466667]]

enter image description here

基于

失败
m = DenseMatrix(3,3,[  1.57760000e+04,1.10000000e+01,2.03000000e+02, \
                       1.49200000e+03, 1.38200000e+03, 1.05000000e+03, \
                       2.63000000e+02,2.07000000e+02,2.47000000e+02])


display_cm(m)

1 个答案:

答案 0 :(得分:1)

使用此答案可以解决我当前的数据问题。 (不确定我是否也更新了一些库)

https://stackoverflow.com/a/8904762/203968

 
from pyspark.mllib.linalg import Vectors, DenseMatrix 
import numpy as np

def display_cm(m):
  a = m.toArray()
  print(a)
  print(m)
  row_sums = a.astype(np.float64).sum(axis=1).astype(np.float64)
  percentage_matrix = a.astype(np.float64) / row_sums[:, np.newaxis]
  print(percentage_matrix)
  plt.figure(figsize=(3, 3))
  sns.heatmap(percentage_matrix, annot=True,  fmt='.2f', xticklabels=['0' ,'1','2'], yticklabels=['0' ,'1','2']);
  plt.title('Confusion Matrix');


# DenseMatrix([[  1.57760000e+04,   1.49200000e+03,   2.63000000e+02],
#              [  1.10000000e+01,   1.38200000e+03,   2.07000000e+02],
#              [  2.03000000e+02,   1.05000000e+03,   2.47000000e+02]])

m = DenseMatrix(3,3,[  1.57760000e+04,1.10000000e+01,2.03000000e+02, \
                       1.49200000e+03, 1.38200000e+03, 1.05000000e+03, \
                       2.63000000e+02,2.07000000e+02,2.47000000e+02])


display_cm(m)


[[  1.57760000e+04   1.49200000e+03   2.63000000e+02]
 [  1.10000000e+01   1.38200000e+03   2.07000000e+02]
 [  2.03000000e+02   1.05000000e+03   2.47000000e+02]]
DenseMatrix([[  1.57760000e+04,   1.49200000e+03,   2.63000000e+02],
             [  1.10000000e+01,   1.38200000e+03,   2.07000000e+02],
             [  2.03000000e+02,   1.05000000e+03,   2.47000000e+02]])
[[ 0.89989162  0.08510638  0.015002  ]
 [ 0.006875    0.86375     0.129375  ]
 [ 0.13533333  0.7         0.16466667]]

enter image description here