我正在尝试使用下面给出的代码执行线性判别分析
#!/usr/bin/python3
import pandas as pd
df = pd.read_excel('Hazara1.xlsx', sheetname='Sheet1')
feature_dict = {i:label for i,label in zip(
range(15), ("DYS19","DYS389I","DYS389II","DYS390","DYS391","DYS392","DYS393","DYS437","DYS438","DYS439","DYS448","DYS456","DYS458","DYS635","Y_GATA_H4",))}
df.columns = [l for i,l in sorted(feature_dict.items())] + ['class label']
df.dropna(how="all", inplace=True)
from sklearn.preprocessing import LabelEncoder
X = df[[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]].values
y = df['class label'].values
enc = LabelEncoder()
label_encoder = enc.fit(y)
y = label_encoder.transform(y) + 1
label_dict = {1: 'Central_Asia', 2: 'South_Asia', 3:'Russia',4:'East_Asia',5:'Hazara'}
from matplotlib import pyplot as plt
import numpy as np
import math
np.set_printoptions(precision=15)
mean_vectors = []
for cl in range(1,6):
mean_vectors.append(np.mean(X[y==cl], axis=0))
df 包含here所示的数据。 但是当我执行上面的代码时,我得到以下错误:
Traceback (most recent call last):
File "iris2.py", line 30, in <module>
mean_vectors.append(np.mean(X[y==cl], axis=0))
File "/home/ammar/anaconda3/lib/python3.5/site-packages/numpy /core/fromnumeric.py", line 2878, in mean
out=out, keepdims=keepdims)
File "/home/ammar/anaconda3/lib/python3.5/site-packages/numpy/core/_methods.py", line 65, in _mean
ret = umr_sum(arr, axis, dtype, out, keepdims)
TypeError: unsupported operand type(s) for +: 'int' and 'str'
有没有办法解决这个问题?