我很困惑为什么我的输入形状输入错误。失败的行是
test_datapoint_encoded[i] = int(label_encoder[count].transform(test_datapoint[i]))
我在这里读到,显然转换函数不适用于列表,但我使用了一个不同的例子,它可以正常工作。 test_datapoint
和test_datapoint_encoded
的形状相同。我尝试使用numpy数组,但我得到了同样的错误。
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, mean_absolute_error
from sklearn import cross_validation, preprocessing
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import classification_report
# Load input data
input_file = 'traffic_data.txt'
data = []
with open(input_file, 'r') as f:
for line in f.readlines():
items = line[:-1].split(',')
data.append(items)
data = np.array(data)
#convert string to numerical
label_encoder =[]
X_encoded = np.empty(data.shape)
for i, item in enumerate(data[0]):
if item.isdigit():
X_encoded[:,i] = data[:,i]
else:
label_encoder.append(preprocessing.LabelEncoder())
X_encoded[:,i] = label_encoder[-1].fit_transform(data[:,i])
#evrything but the last column
X = X_encoded[:, :-1].astype(int)
#only the last collumn
y = X_encoded[:, -1].astype(int)
#split into test and train
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.25, random_state=5)
#create extreme forest regressor
params = {'n_estimators': 100, 'max_depth': 4, 'random_state': 0}
ext_regressor = ExtraTreesRegressor(**params)
ext_regressor.fit(X_train, y_train)
# Compute the regressor performance on test data
y_pred = ext_regressor.predict(X_test)
print("Mean absolute error:", round(mean_absolute_error(y_test, y_pred),
2))
enter code here
# Testing encoding on single data instance
test_datapoint = ['Saturday', '10:20', 'Atlanta', 'no']
test_datapoint_encoded = [0, 0, 0, 0]
count = 0
for i, item in enumerate(test_datapoint):
if item.isdigit():
test_datapoint_encoded[i] = int(test_datapoint[i])
else:
test_datapoint_encoded[i] = int(label_encoder[count].transform(test_datapoint[i]))
count = count + 1
test_datapoint_encoded = np.array(test_datapoint_encoded)
# Predict the output for the test datapoint
print("Predicted traffic:",
int(ext_regressor.predict([test_datapoint_encoded])[0]))
我使用了一个更简单的标签编码器示例,之前没有任何错误。
import numpy as np
from sklearn import preprocessing
#definde sample labels
input_labels = ['red', 'black', 'red', 'green', 'black', 'yellow', 'white']
#creating label encoder and train it.
encoder = preprocessing.LabelEncoder()
#train my encoder to associate each color with a digit
encoder.fit(input_labels)
#print the mapping between words and numbers
print("\nLabel mapping:")
for i, item in enumerate(encoder.classes_):
print(item, '--->', i)
#encode a set of test labels and compare results
test_labels = ['green', 'red', 'black']
encoded_values = encoder.transform(test_labels)
print("\nLabels =", test_labels)
print("Encoded values =", list(encoded_values))
##ii can now decode the numbers into colors
encoded_test_values = [3, 0 ,4, 1]
decoded_list = encoder.inverse_transform(encoded_test_values)
print("\nEncoded values =", encoded_test_values)
print("Decoded labels =", list(decoded_list))
答案 0 :(得分:0)
在你的行
X_encoded[:,i] = label_encoder[-1].fit_transform(data[:,i])
您正在使用N维数组输入训练标签编码器。 但在你的行
test_datapoint_encoded[i] = int(label_encoder[count].transform(test_datapoint[i]))
传递给标签编码器的值并不完全相同,因此您可能会收到“输入形状错误”