X = np.array([[24,13,38],[8,3,17],[21,6,40],[1,14,-9],[9,3,21],[7,1,14],[8,7,11],[10,16,3],[1,3,2],
[15,2,30],[4,6,1],[12,10,18],[1,9,-4],[7,3,19],[5,1,13],[1,12,-6],[21,9,34],[8,8,7],
[1,18,-18],[15,8,25],[16,10,29],[7,0,17],[14,2,31],[3,7,0],[5,6,7]])
pca = PCA(n_components=1)
pca.fit(X)
a = pca.components_[0][0] # a
b = pca.components_[0][1] # b
c = pca.components_[0][2] # c
def average(values):
if(values) ==0:
return None
return sum(values, 0.0) / len(values)
x_mean = average(x) # For an approximation
y_mean = average(y)
z_mean = average(z)
d = -(a * x_mean + b * y_mean + c * z_mean)
so -0.375978766054x + 0.10612154283y -0.920531469111z + 15.1366572005 = 0
实际上,我不确定它是对的。
我想在这种情况下使用matplotlib库绘制一架飞机。
我该如何编码?
答案 0 :(得分:2)
每个主成分在特征空间中定义一个向量。 PCA根据每个方向上数据的方差对这些向量进行排序。因此,第一个向量将代表数据的最大方差,而最后一个向量将代表数据的最小方差。假设数据围绕平面分布,则第三矢量应垂直于该平面。这是代码:
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
X = np.array([[24,13,38],[8,3,17],[21,6,40],[1,14,-9],[9,3,21],[7,1,14],[8,7,11],[10,16,3],[1,3,2],
[15,2,30],[4,6,1],[12,10,18],[1,9,-4],[7,3,19],[5,1,13],[1,12,-6],[21,9,34],[8,8,7],
[1,18,-18],[15,8,25],[16,10,29],[7,0,17],[14,2,31],[3,7,0],[5,6,7]])
pca = PCA(n_components=3)
pca.fit(X)
eig_vec = pca.components_
print(pca.explained_variance_ratio_)
# [0.90946569 0.08816839 0.00236591]
# Percentage of variance explain by last vector is less 0.2%
# This is the normal vector of minimum variance
normal = eig_vec[2, :] # (a, b, c)
centroid = np.mean(X, axis=0)
# Every point (x, y, z) on the plane should satisfy a*x+b*y+c*z = d
# Taking centroid as a point on the plane
d = -centroid.dot(normal)
# Draw plane
xx, yy = np.meshgrid(np.arange(np.min(X[:, 0]), np.max(X[:, 0])), np.arange(np.min(X[:, 1]), np.max(X[:, 1])))
z = (-normal[0] * xx - normal[1] * yy - d) * 1. / normal[2]
# plot the surface
plt3d = plt.figure().gca(projection='3d')
plt3d.plot_surface(xx, yy, z)
plt3d.scatter(*(X.T))
plt.show()
答案 1 :(得分:0)
第一个主成分没有定义平面,它定义了三维矢量。以下是如何在3D中对其进行可视化:代码从您的代码开始,然后进行绘图步骤:
select apartment_id,
max(case when feature_name = 'bedrooms' then feature_value end) as bedrooms,
max(case when feature_name = 'bathrooms' then feature_value end) as bathrooms,
max(case when feature_name = 'flooring' then feature_value end) as flooring
from t
group by apartment_id;
(注意上面的代码是用yapf自动格式化的,我强烈推荐。)结果图: