使用Python进行主成分分析

时间:2018-09-12 09:22:19

标签: python

在这里,figure of my data是2D散射数据的示例 但是当我绘制特征向量时 地块被压缩为一维图像like this

我正在尝试根据温度和应变数据进行PCA。

为什么我有散射和标绘本征矢量组合的一维矢量?

def process_data_PCA(temperature, strain):
    print("process data")
    T1 = temperature['T1'].tolist()
    T2 = temperature['T2'].tolist()
    T3 = temperature['T3'].tolist()
    T4 = temperature['T4'].tolist()
    T5 = temperature['T5'].tolist()
    T6 = temperature['T6'].tolist()
    T7 = temperature['T7'].tolist()
    T8 = temperature['T8'].tolist()
    T9 = temperature['T9'].tolist()
    T10 = temperature['T10'].tolist()

    W_A1 = strain[0]
    N = len(T1)
    xData =  T1
    yData =  W_A1
    xData = np.reshape(xData, (N, 1))
    yData = np.reshape(yData, (N, 1))

    data = np.hstack((xData, yData))
    print(data)
    mu = data.mean(axis=0)
    data = data - mu
    # data = (data - mu)/data.std(axis=0)  # Uncommenting this reproduces mlab.PCA results
    eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
    projected_data = np.dot(data, eigenvectors)
    sigma = projected_data.std(axis=0).mean()
    print(eigenvectors)

    fig, ax = plt.subplots()
    ax.scatter(xData, yData, s= 0.1)
    for axis in eigenvectors:
        start, end = mu, mu + sigma * axis
        ax.annotate(
            '', xy=end, xycoords='data',
            xytext=start, textcoords='data',
            arrowprops=dict(facecolor='red', width=2.0))
    ax.set_aspect('equal')
    plt.show()

打印(数据)

[[14.25        0.        ]
 [14.25        0.        ]
 [14.26        0.        ]
 ...
 [12.51       -0.02470534]
 [12.51       -0.02540376]
 [12.52       -0.02542746]]
[[-0.99999927 -0.00120856]
 [-0.00120856  0.99999927]]

特征向量 [-0.99999927 -0.00120856] [-0.00120856 0.99999927]

开始,结束

 1.95096698e+01 -5.70968943e-03] [ 1.7057429e+01 -8.6733643e-03]
[ 1.95096698e+01 -5.70968943e-03] [19.50670611  2.44653112]

2 个答案:

答案 0 :(得分:0)

您所遇到的问题似乎是轴的限制。注释不会更新轴限制的信息,并且绘图仅使用数据的限制(图像上的散点图)。

我能够使用代码(函数plot_andre)手动生成所需的图。

#!/usr/bin/env ipython
import numpy as np
from pylab import plt
# ---------------------------------------------
np.random.seed(0);
# ---------------------------------------------
def process_data_PCA():
    print("process data")
    T1=np.random.random((60000,1));
    W_A1=np.random.random((60000,1));

    N = len(T1)
    xData =  T1
    yData =  W_A1
    xData = np.reshape(xData, (N, 1))
    yData = np.reshape(yData, (N, 1))

    data = np.hstack((xData, yData))
    print(data)
    mu = data.mean(axis=0)
    data = data - mu
    # data = (data - mu)/data.std(axis=0)  # Uncommenting this reproduces mlab.PCA results
    eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
    projected_data = np.dot(data, eigenvectors)
    sigma = projected_data.std(axis=0).mean()
    print(eigenvectors)
    # ----------------------------------------
    fig, ax = plt.subplots()
    ax.scatter(xData, yData, s= 0.1)
    for axis in eigenvectors:
        start, end = mu, mu + sigma * axis
        ax.annotate(
            '', xy=end, xycoords='data',
            xytext=start, textcoords='data',
            arrowprops=dict(facecolor='red', width=2.0))
        # ------------------------------------
        print start,end
    # ----------------------------------------
    ax.set_aspect('equal');#plt.axis('tight');
    plt.savefig('test_01.png',bbox_inches='tight');
    plt.show()
# -----------------------------------
def plot_andre():
    # ----------------------------------------
    vectors=[[[1.95096698e+01,-5.70968943e-03],[ 1.7057429e+01,-8.6733643e-03]],[[ 1.95096698e+01,-5.70968943e-03],[19.50670611,2.44653112]]];
    # ----------------------------------------
    fig, ax = plt.subplots()
    for iax in range(len(vectors)):
        start,end=vectors[iax];
        ax.annotate(
            '', xy=end, xycoords='data',
            xytext=start, textcoords='data',
            arrowprops=dict(facecolor='red', width=2.0))
    # ----------------------------------------
    vectors=np.array(vectors);
    ax.set_xlim(np.min(vectors[:,0]),np.max(vectors[:,0]));ax.set_ylim(np.min(vectors[:,1]),np.max(vectors[:,1]));
    ax.set_aspect('equal');#plt.axis('tight');
    plt.savefig('test_02.png',bbox_inches='tight');
    plt.show()

# -----------------------------------
process_data_PCA();
plot_andre();

只需将轴限制设置为一些合适的值,例如0-20和0-20。

答案 1 :(得分:0)

在这里,我对您的代码进行了测试,我认为对您来说主要的问题是行sigma = projected_data.std(axis=0).mean(),要查看各个特征方向上的散点,您不需要mean(),但是您需要两个值std的两个本征方向。因此,只要除去sigma = projected_data.std(axis=0)的均值,您就会得到良好的pca图。我在下面用一些伪随机数对其进行了测试。

#data = np.hstack((xData, yData))
N = 8000
data = np.random.random((N,2))
########################################################################
# Random number in Ellipse
########################################################################
a = 0.5
b = 0.15
a2 = a**2
b2 = b**2
cx = 0.5
cy = 0.5
xData = []
yData = []
for i in range(N):
    if ((data[i,0]-cx)**2/a2+(data[i,1]-cy)**2/b2 -1.)<0:
        xData.append(data[i,0])
        yData.append(data[i,1])
##################################################
xData = np.array(xData)
yData = np.array(yData)
data = np.vstack((xData, yData)).T

mu = data.mean(axis=0)

data = data - mu
# data = (data - mu)/data.std(axis=0)  # Uncommenting this reproduces mlab.PCA results
eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
projected_data = np.dot(data, eigenvectors)
print np.shape(projected_data)
############################################################
#sigma = projected_data.std(axis=0).mean()
# In this line, mean is removed
sigma = projected_data.std(axis=0)
############################################################
fig, ax = plt.subplots(figsize=(7,7))
ax.scatter(xData, yData, s= 0.1)
ax.scatter(mu[0],mu[1],s = 50,marker='*', c = 'r')
for axis,s in zip(eigenvectors,sigma):
    #start, end = mu, mu + sigma * axis
    start, end = mu, mu + s * axis
    ax.annotate(
        '', xy=end, xycoords='data',
        xytext=start, textcoords='data',
        arrowprops=dict(facecolor='red', width=2.0))
ax.set_aspect('equal')
plt.savefig("pcs.png")
plt.show()

#print eigenvalues

现在的样子。

enter image description here