import pandas as pd
from sklearn.preprocessing import LabelEncoder
df = pd.read_csv('C:/Users/Samuel Mireku/Desktop/pokemon_data.csv')
#data shape(800, 12)
print(df.shape)
#CHECKING FOR NULLS AND SUM OF NULLS
print(df.iloc[0:12].isnull())
print(df.isnull().sum())
#removes nulls
print(df.dropna(inplace=True))
print(df['Type 2'])
#removal of nulls brought shape to 414, 12
print(df.shape)
print(df.columns)
#dropped '#' column
print(df.drop(['#'], axis=1))
#encoding
var_mod = ['Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk',
'Sp. Def', 'Speed', 'Generation', 'Legendary']
le = LabelEncoder()
for i in var_mod:
df[i] = le.fit_transform(df[i])
from sklearn.model_selection import train_test_split
X = df.iloc[0:12]
y = df.iloc[12]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=5)
print(X_train)
print(y_train)
print(X_test)
from sklearn.naive_bayes import GaussianNB
#Create a Gaussian Classifier
clf = GaussianNB()
clf.fit(X_train, y_train)
clf.predict(X_test)
clf.score(X_test, y_test)
出[91]:0.0是我的准确性得分
我的准确性得分为0.0,如何解决这个问题有什么想法?还没有运气在网上寻找答案。请记住,我是新来的。编码可能是问题吗?我选择了编码,因为这是我在编码之前收到的错误:
“ ValueError:无法将字符串转换为浮点数:”