我正在使用带有以下代码的sklearn库,并获得标题中提到的错误。如果我使用非线性分类器(例如DecisionTreeClassifier或KNeighborsClassifier)但使用像rayD和PassiveAggressive这样的linear_model分类器,则代码可以正常运行。我已经尝试将数组内容映射到浮点数,但它不起作用。数组的所有内容都是'numpy.unicode_'。我很感激帮助解决这个问题。
代码:
from io import open
from os import listdir
import numpy as np
path = "address"
months = listdir(path)
for month in months:
if "_sanitized" in month:
files = listdir (path + month)
for file in files:
temp_path = path + month + "\\" + file
text = open (temp_path, "r").read()
lines = text.split("\n")
attr_train = list()
label_train = list()
go = False
for line in lines:
if line != "" and not go:
row = line.split()
label_train.append(row[-1])
go = (row[-1] != u'-1')
del row[-1]
attr_train.append(row)
label_train_arr = np.asarray(label_train)
attr_train_arr = np.asarray(attr_train)
break
break
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
for arr in range(len(attr_train_arr)):
newArr = le.fit_transform(attr_train_arr[arr])
attr_train_arr[arr] = newArr
label_train_arr = map(float, label_train_arr)
for arr in attr_train_arr:
arr = map(float, arr)
x = attr_train_arr
y = label_train_arr
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = .5)
print "after split"
from sklearn.linear_model import SGDClassifier
#my_classifier = SGDClassifier()
from sklearn.linear_model import PassiveAggressiveClassifier
my_classifier = PassiveAggressiveClassifier()
my_classifier.fit(X_train, Y_train)
print "after fit"
predictions = my_classifier.predict(X_test)