在这里,是2D散射数据的示例 但是当我绘制特征向量时 地块被压缩为一维图像。
我正在尝试根据温度和应变数据进行PCA。
为什么我有散射和标绘本征矢量组合的一维矢量?
def process_data_PCA(temperature, strain):
print("process data")
T1 = temperature['T1'].tolist()
T2 = temperature['T2'].tolist()
T3 = temperature['T3'].tolist()
T4 = temperature['T4'].tolist()
T5 = temperature['T5'].tolist()
T6 = temperature['T6'].tolist()
T7 = temperature['T7'].tolist()
T8 = temperature['T8'].tolist()
T9 = temperature['T9'].tolist()
T10 = temperature['T10'].tolist()
W_A1 = strain[0]
N = len(T1)
xData = T1
yData = W_A1
xData = np.reshape(xData, (N, 1))
yData = np.reshape(yData, (N, 1))
data = np.hstack((xData, yData))
print(data)
mu = data.mean(axis=0)
data = data - mu
# data = (data - mu)/data.std(axis=0) # Uncommenting this reproduces mlab.PCA results
eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
projected_data = np.dot(data, eigenvectors)
sigma = projected_data.std(axis=0).mean()
print(eigenvectors)
fig, ax = plt.subplots()
ax.scatter(xData, yData, s= 0.1)
for axis in eigenvectors:
start, end = mu, mu + sigma * axis
ax.annotate(
'', xy=end, xycoords='data',
xytext=start, textcoords='data',
arrowprops=dict(facecolor='red', width=2.0))
ax.set_aspect('equal')
plt.show()
打印(数据)
[[14.25 0. ]
[14.25 0. ]
[14.26 0. ]
...
[12.51 -0.02470534]
[12.51 -0.02540376]
[12.52 -0.02542746]]
[[-0.99999927 -0.00120856]
[-0.00120856 0.99999927]]
特征向量 [-0.99999927 -0.00120856] [-0.00120856 0.99999927]
开始,结束
1.95096698e+01 -5.70968943e-03] [ 1.7057429e+01 -8.6733643e-03]
[ 1.95096698e+01 -5.70968943e-03] [19.50670611 2.44653112]
答案 0 :(得分:0)
您所遇到的问题似乎是轴的限制。注释不会更新轴限制的信息,并且绘图仅使用数据的限制(图像上的散点图)。
我能够使用代码(函数plot_andre)手动生成所需的图。
#!/usr/bin/env ipython
import numpy as np
from pylab import plt
# ---------------------------------------------
np.random.seed(0);
# ---------------------------------------------
def process_data_PCA():
print("process data")
T1=np.random.random((60000,1));
W_A1=np.random.random((60000,1));
N = len(T1)
xData = T1
yData = W_A1
xData = np.reshape(xData, (N, 1))
yData = np.reshape(yData, (N, 1))
data = np.hstack((xData, yData))
print(data)
mu = data.mean(axis=0)
data = data - mu
# data = (data - mu)/data.std(axis=0) # Uncommenting this reproduces mlab.PCA results
eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
projected_data = np.dot(data, eigenvectors)
sigma = projected_data.std(axis=0).mean()
print(eigenvectors)
# ----------------------------------------
fig, ax = plt.subplots()
ax.scatter(xData, yData, s= 0.1)
for axis in eigenvectors:
start, end = mu, mu + sigma * axis
ax.annotate(
'', xy=end, xycoords='data',
xytext=start, textcoords='data',
arrowprops=dict(facecolor='red', width=2.0))
# ------------------------------------
print start,end
# ----------------------------------------
ax.set_aspect('equal');#plt.axis('tight');
plt.savefig('test_01.png',bbox_inches='tight');
plt.show()
# -----------------------------------
def plot_andre():
# ----------------------------------------
vectors=[[[1.95096698e+01,-5.70968943e-03],[ 1.7057429e+01,-8.6733643e-03]],[[ 1.95096698e+01,-5.70968943e-03],[19.50670611,2.44653112]]];
# ----------------------------------------
fig, ax = plt.subplots()
for iax in range(len(vectors)):
start,end=vectors[iax];
ax.annotate(
'', xy=end, xycoords='data',
xytext=start, textcoords='data',
arrowprops=dict(facecolor='red', width=2.0))
# ----------------------------------------
vectors=np.array(vectors);
ax.set_xlim(np.min(vectors[:,0]),np.max(vectors[:,0]));ax.set_ylim(np.min(vectors[:,1]),np.max(vectors[:,1]));
ax.set_aspect('equal');#plt.axis('tight');
plt.savefig('test_02.png',bbox_inches='tight');
plt.show()
# -----------------------------------
process_data_PCA();
plot_andre();
只需将轴限制设置为一些合适的值,例如0-20和0-20。
答案 1 :(得分:0)
在这里,我对您的代码进行了测试,我认为对您来说主要的问题是行sigma = projected_data.std(axis=0).mean()
,要查看各个特征方向上的散点,您不需要mean()
,但是您需要两个值std
的两个本征方向。因此,只要除去sigma = projected_data.std(axis=0)
的均值,您就会得到良好的pca图。我在下面用一些伪随机数对其进行了测试。
#data = np.hstack((xData, yData))
N = 8000
data = np.random.random((N,2))
########################################################################
# Random number in Ellipse
########################################################################
a = 0.5
b = 0.15
a2 = a**2
b2 = b**2
cx = 0.5
cy = 0.5
xData = []
yData = []
for i in range(N):
if ((data[i,0]-cx)**2/a2+(data[i,1]-cy)**2/b2 -1.)<0:
xData.append(data[i,0])
yData.append(data[i,1])
##################################################
xData = np.array(xData)
yData = np.array(yData)
data = np.vstack((xData, yData)).T
mu = data.mean(axis=0)
data = data - mu
# data = (data - mu)/data.std(axis=0) # Uncommenting this reproduces mlab.PCA results
eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
projected_data = np.dot(data, eigenvectors)
print np.shape(projected_data)
############################################################
#sigma = projected_data.std(axis=0).mean()
# In this line, mean is removed
sigma = projected_data.std(axis=0)
############################################################
fig, ax = plt.subplots(figsize=(7,7))
ax.scatter(xData, yData, s= 0.1)
ax.scatter(mu[0],mu[1],s = 50,marker='*', c = 'r')
for axis,s in zip(eigenvectors,sigma):
#start, end = mu, mu + sigma * axis
start, end = mu, mu + s * axis
ax.annotate(
'', xy=end, xycoords='data',
xytext=start, textcoords='data',
arrowprops=dict(facecolor='red', width=2.0))
ax.set_aspect('equal')
plt.savefig("pcs.png")
plt.show()
#print eigenvalues
现在的样子。