当我试图预测z
的值时,出现错误
“ ValueError:输入包含NaN,无穷大或对于dtype('float32')而言太大的值。”我在data.fillna(0, inplace=True)
行中犯了错误还是其他问题?
import pandas as pd
import numpy as np
from sklearn import cross_validation
from sklearn.ensemble import RandomForestClassifier
data = pd.read_csv("C:/Users/Animkush/Desktop/train.csv")
data.replace("?", -99999, inplace=True)
data.drop(["Id"], 1, inplace=True)
data.convert_objects(convert_numeric=True)
data.fillna(0, inplace=True)
data1 = pd.read_csv("C:/Users/Animkush/Desktop/test.csv")
data1.replace("?", -99999, inplace=True)
data1.drop(["Id"], 1, inplace=True)
data.convert_objects(convert_numeric=True)
data.fillna(0, inplace=True)
def handle_non_numerical_data(data):
columns = data.columns.values
for column in columns:
text_digit_vals = {}
def convert_to_int(val):
return text_digit_vals[val]
if data[column].dtype != np.int64 and data[column].dtype != np.float64:
column_contents = data[column].values.tolist()
unique_elements = set(column_contents)
x = 0
for unique in unique_elements:
if unique not in text_digit_vals:
text_digit_vals[unique] = x
x += 1
data[column] = list(map(convert_to_int, data[column]))
return data
data = handle_non_numerical_data(data)
data1 = handle_non_numerical_data(data1)
x = np.array(data.drop(["SalePrice"], 1))
y = np.array(data["SalePrice"])
z = np.array(data1)
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(x, y,test_size=0.1)
clf = RandomForestClassifier()
clf.fit(X_train, Y_train)
print(clf.score(X_train, Y_train))
print(clf.predict(z))
答案 0 :(得分:0)
您可能有inf值,请添加以下行
data.replace([np.inf, -np.inf], 0, inplace=True)