我正在使用高斯分布的贝叶斯分类器完成代码。
如您所见,在代码的某些部分中,我需要定义一个名为meanDict的字典来计算参数X1和X2的均值,然后使用相同的computeMeanDict函数来计算calculate var函数中的var。
但是,在进入代码的var = np.mean((X-meanDict[y])^2)
部分之后,我收到一个 TypeError:无法散列的类型:'numpy.ndarray'错误。
有人可以帮我吗?
# !!! Must Not Change the Imports !!!
from pathlib import Path
from typing import Dict
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from TicToc import timing, Timer
def loadDataset(filepath='Data/PokemonUnivariate.csv', labelCol='type1') -> (np.ndarray, np.ndarray, LabelEncoder):
"""
!!! Must Not Change the Content !!!
This function is used to load dataset
Data will be scaled from 0 to 1
:param filepath: csv file path
:param labelCol: column name
:return: X: ndarray, y: ndarray, labelEncoder
"""
data = pd.read_csv(filepath)
y = data[labelCol].values
labelEncoder = LabelEncoder()
y = labelEncoder.fit_transform(y)
X = data.drop(columns=[labelCol]).values
X = np.nan_to_num(X)
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
return X, y, labelEncoder
class LinearClassifier(object):
def __init__(self):
"""!!! Must Not Change the Content !!!"""
self.X = None
self.y = None
self.w = 0
self.b = 0
def name(self) -> str:
"""!!! Must Not Change the Content !!!"""
return self.__class__.__name__
def sigmoid(self, X, w=None, b=None):
"""
!!! Must Not Change the Content !!!
Sigmoid function
:param X: X
:param w: weight
:param b: bias
:return: sigmoid(wx + b)
"""
w, b = self.w if w is None else w, self.b if b is None else b
return 1 / (1 + np.exp(-(w * X + b)))
def fit(self, X, y):
"""!!! Must Not Change the Content !!!"""
raise NotImplementedError
def predictSample(self, x) -> int:
"""
ToDo: Implement This Function
Predict a label of given x
If sigmoid(x) is greater than 0.5, then the label is 1; otherwise the label is 0
:param x: a sample
:return: a label
"""
if sigmoid(X, w, b) > 0.5:
label = 1
else:
label = 0
return label
def predict(self, X):
"""
!!! Must Not Change the Content !!!
Predict the labels of given X
If sigmoid(x) is greater than 0.5, then the label is 1; otherwise the label is 0
"""
return np.apply_along_axis(self.predictSample, axis=1, arr=X)
class UnivariateGaussianDiscriminantAnalysisClassifier(LinearClassifier):
"""
Univariate Gaussian Classifier
Univariate & Binary Class
"""
@staticmethod
def lnPrior(X1, X2):
"""
!!! Must Not Change the Content !!!
Calculate the ln P(C1) and ln P(C2)
:param X1: X with C1 label
:param X2: X with C2 label
:return (ln P(C1), ln P(C2))
"""
(nX1, _), (nX2, _) = X1.shape, X2.shape
return np.log(nX1 / (nX1 + nX2)), np.log(nX2 / (nX1 + nX2))
@staticmethod
def calculateMeanDict(X1, X2) -> Dict[int, float]:
"""
ToDo: Implement This Function
This function should return a diction,
which 0: mean X1, and 1: mean X2
:param X1: ndarray
:param X2: ndarray
:return: mean dict
"""
meanDict = dict(mean_X1= np.mean(X1), mean_X2= np.mean(X2))
return meanDict
@staticmethod
def calculateVar(meanDict, X, y) -> float:
"""
ToDo: Implement This Function
This function calculates the variance of X
var = mean (xi-meanDict[yi])^2
:param meanDict: 0: mean X1, and 1: mean X2
:param X: X
:param y: y
:return: var
"""
var = meanDict['mean_X1']
#var = np.mean((X-meanDict[y])^2)
return var
@staticmethod
def calculateW(meanDict, var) -> float:
"""
ToDo: Implement This Function
Calculate w. w=(mean2-mean1)/var
:param meanDict: 0: mean X1, and 1: mean X2
:param var: variance
:return: w
"""
w = (meanDict['mean_X1'] - meanDict['mean_X2'])/var^2
return w
@staticmethod
def calculateB(meanDict, var, lnPrior1, lnPrior2) -> float:
"""
ToDo: Implement This Function
calculate b. b=(mean1^2-mean2^2)/(2*var) + ln(P(C2)) - ln(P(C1))
:param meanDict: 0: mean X1, and 1: mean X2
:param var: variance
:param lnPrior1: ln(P(C1))
:param lnPrior2: ln(P(C2))
:return: b
"""
b = ((meanDict['mean X1'])^2 -(meanDict['mean X2'])^2/(2*var^2)) + lnPrior2 - lnPrior1
def fit(self, X, y):
"""
!!! Must Not Change the Content !!!
Train the Univariate Gaussian Classifier
:param X: shape (N Samples, 1)
:param y: shape (1, N Samples)
"""
self.X, self.y = X, y
nX, _ = X.shape
X1 = np.array([x1 for x1, y1 in zip(X, y) if y1 == 0])
X2 = np.array([x2 for x2, y2 in zip(X, y) if y2 == 1])
lnPrior1, lnPrior2 = self.lnPrior(X1, X2)
meanDict = self.calculateMeanDict(X1, X2)
var = self.calculateVar(meanDict, X, y)
self.w = self.calculateW(meanDict, var)
self.b = self.calculateB(meanDict, var, lnPrior1, lnPrior2)
return self
@timing
def main():
"""
!!! Must Not Change the Content !!!
"""
randomState = 0
resultFolder = Path('Data/')
with Timer('Data Loaded'):
X, y, _ = loadDataset()
XTrain, XTest, yTrain, yTest = \
train_test_split(X, y, test_size=0.2, random_state=randomState)
print(f'Training Set Length: {XTrain.shape[0]}\n'
f'Testing Set Length: {XTest.shape[0]}')
classifiers = [UnivariateGaussianDiscriminantAnalysisClassifier()]
for classifier in classifiers:
with Timer(f'{classifier.name()} Trained'):
classifier.fit(XTrain, yTrain)
with Timer(f'{classifier.name()} Tested'):
yPredicted = classifier.predict(XTest)
with Timer(f'{classifier.name()} Results Saved'):
resultsCsv = pd.DataFrame()
resultsCsv['yPredicted'] = yPredicted
resultsCsv['yTrue'] = yTest
resultsCsvPath = resultFolder / f'{classifier.name()}Results.csv'
resultsCsv.to_csv(resultsCsvPath, index=False)
resultsStr = f'{classification_report(yTest, yPredicted, digits=5)}\n' \
f'{classifier.name()}: w={classifier.w}; b={classifier.b}'
resultsTxtPath = resultFolder / f'{classifier.name()}Results.txt'
with open(resultsTxtPath, 'w') as resultsTxtFile:
resultsTxtFile.write(resultsStr)
print(resultsStr)
if __name__ == '__main__':
main(timerPrefix='Total Time Costs: ', timerBeep=False)