我试图计算PCA,然后尝试使用python中的散点图绘制那些主要成分。计算出主成分的值,但未在图中绘制。
这是我的代码:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df=pd.read_csv("new_file.txt", delimiter=',', header=None, skiprows=0, names['Feature_name_1','Value'])
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(df['Feature_name_1'].astype(str))
df['Feature_name_1'] = le.transform(df['Feature_name_1'].astype(str))
features = ['Feature_name_1','Value']
x = df.loc[:, features].values
y = df.loc[:,['Value']].values
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(x)
principalDf = pd.DataFrame(data = principalComponents
, columns = ['principal component 1','principal component 2'])
finalDf = pd.concat([principalDf, df[['Value']]], axis = 1)
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('PCA', fontsize = 20)
targets = ['Feature_name_1','Value']
colors = ['r', 'g', 'b']
for Value, color in zip(targets,colors):
indicesToKeep = finalDf['Value'] == Value
ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1']
, finalDf.loc[indicesToKeep, 'principal component 2']
, c = color
, s = 50)
ax.legend(targets)
ax.grid()