我尝试编写代码以使用Naive Bayes classifier
。但它的工作原理不正确。
我的数据是一些关于社交网络中个人资料的观看数量的数据来自某个ID,并且确定它是某个人的真实id
from __future__ import division
from collections import defaultdict
from math import log
def train(samples):
classes, freq = defaultdict(lambda:0), defaultdict(lambda:0)
for feats, label in samples:
classes[label] += 1 # count classes frequencies
for feat in feats:
freq[label, feat] += 1 # count features frequencies
for label, feat in freq: # normalize features frequencies
freq[label, feat] /= classes[label]
for c in classes: # normalize classes frequencies
classes[c] /= len(samples)
return classes, freq # return P(C) and P(O|C)
def classify(classifier, feats):
classes, prob = classifier
return min(classes.keys(), # calculate argmin(-log(C|O))
key = lambda cl: -log(classes[cl]) + \
sum(-log(prob.get((cl,feat), 10**(-7))) for feat in feats))
def get_features(sample): return (str(sample)[2:5],)
samples = (line.decode('utf-8').split() for line in open('sample.txt'))
features = [(get_features(feat), label) for feat, label in samples]
classifier = train(features)
print 'Is it real ID: ', classify(classifier, get_features(0.132))
samole.txt
是
0.019546345 1
0.009769094 0
0.000888099 0
0.004440497 0
0.009769094 0
0.000888099 0
当我尝试print 'Is it real ID: ', classify(classifier, get_features(0.132))
时,它始终返回0
。
哪里有错误?