我是Python的初学者。我在实现后验概率时遇到问题。使用镜头数据集https://archive.ics.uci.edu/ml/datasets/Lenses。这是我的代码。
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
header=['age of the patient','spectacle prescription','astigmatic','tear production rate','target']
ds=pd.read_csv('lenses.data',names=header)
def Train_Test_Split_9(ds2):
Y = ds2['target']
X = ds2
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=0)
return X_train, X_test, Y_train, Y_test
X_train_9, X_test_9, Y_train, Y_test=Train_Test_Split_9(ds)
# =====================Posterior Probability================
def class_Probability(train_dataset):
keys_class = np.array(train_dataset["target"].unique())
p_class = np.zeros((len(keys_class)))
for u in train_dataset["target"]:
index_class = np.where(keys_class == u)
p_class[index_class] += 1
p_class=p_class/np.sum(p_class,axis=0)
df_p_class=pd.DataFrame(p_class,keys_class)
return df_p_class
p_class=class_Probability(X_train_9)