我尝试运行t-sne,但是python向我显示此错误:
IndexError:只有整数,切片(
:
),省略号(...
),numpy.newaxis(None
)和整数或布尔数组都是有效索引
此link正在提供数据。
代码如下:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
#Step 1 - Download the data
dataframe_all = pd.read_csv('https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv')
num_rows = dataframe_all.shape[0]
#Step 2 - Clearn the data
#count the number of missing elements (NaN) in each column
counter_nan = dataframe_all.isnull().sum()
counter_without_nan = counter_nan[counter_nan==0]
#remove the columns with missing elements
dataframe_all = dataframe_all[counter_without_nan.keys()]
#remove the first 7 columns which contain no descriminative information
dataframe_all = dataframe_all.ix[:,7:]
#Step 3: Create feature vectors
x = dataframe_all.ix[:,:-1].values
standard_scalar = StandardScaler()
x_std = standard_scalar.fit_transform(x)
# t distributed stochastic neighbour embedding (t-SNE) visualization
tsne = TSNE(n_components=2, random_state = 0)
x_test_2d = tsne.fit_transform(x_std)
#scatter plot the sample points among 5 classes
markers=('s','d','o','^','v')
color_map = {0:'red', 1:'blue', 2:'lightgreen', 3:'purple', 4:'cyan'}
plt.figure()
for idx, cl in enumerate(np.unique(x_test_2d)):
plt.scatter(x=x_test_2d[cl, 0],y =x_test_2d[cl, 1], c=color_map[idx], marker=markers[idx], label=cl)
plt.show()
要使此功能生效,我必须更改什么?
答案 0 :(得分:1)
该错误归因于以下行:
plt.scatter(x_test_2d[cl, 0], x_test_2d[cl, 1], c=color_map[idx], marker=markers[idx])
在这里,cl
可以采用和不采用整数值(来自np.unique(x_test_2d)
),这会引起错误,例如cl
的最后一个值是99.46295
,然后使用:x_test_2d[cl, 0]
转换为x_test_2d[99.46295, 0]
定义一个变量y
,该变量{strong>保留类标签,然后使用:
# variable holding the classes
y = dataframe_all.classe.values
y = np.array([ord(i) for i in y])
#scatter plot the sample points among 5 classes
plt.figure()
plt.scatter(x_test_2d[:, 0], x_test_2d[:, 1], c = y)
plt.show()
完整代码:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
#Step 1 - Download the data
dataframe_all = pd.read_csv('https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv')
num_rows = dataframe_all.shape[0]
#Step 2 - Clearn the data
#count the number of missing elements (NaN) in each column
counter_nan = dataframe_all.isnull().sum()
counter_without_nan = counter_nan[counter_nan==0]
#remove the columns with missing elements
dataframe_all = dataframe_all[counter_without_nan.keys()]
#remove the first 7 columns which contain no descriminative information
dataframe_all = dataframe_all.ix[:,7:]
#Step 3: Create feature vectors
x = dataframe_all.ix[:,:-1].values
standard_scalar = StandardScaler()
x_std = standard_scalar.fit_transform(x)
# t distributed stochastic neighbour embedding (t-SNE) visualization
tsne = TSNE(n_components=2, random_state = 0)
x_test_2d = tsne.fit_transform(x_std)
# variable holding the classes
y = dataframe_all.classe.values # you need this for the colors
y = np.array([ord(i) for i in y]) # convert letters to numbers
#scatter plot the sample points among 5 classes
plt.figure()
plt.scatter(x_test_2d[:, 0], x_test_2d[:, 1], c = y)
plt.show()