我的大多数样本都是重复样本,是否有一种方法可以给每个样本赋予权重,以表示其出现频率,从而使算法只需要遍历唯一的集合即可?
还是有一种方法可以操纵我定义的log(probability)函数来实现此效果?
# simple example for data:
data = [(0,1,10), (0,2,10), (1,0,20), (1,0,20), (1,0,20), (0,0,49), (1,1,12)]
member_a = mc.Uniform('a', lower=-1.0, upper=0.0)
member_d = mc.Uniform('d', lower=-1.0, upper=0.0)
@mc.stochastic(observed=True, dtype=int)
def logLikelihood(value=data, a=member_a, d=member_d):
ratesMatrix = np.zeros((2,2))
ratesMatrix[0,0] = a
ratesMatrix[0,1] = -a
ratesMatrix[1,0] = -d
ratesMatrix[1,1] = d
r = []
t = []
for i in range(len(data)):
r.append(ratesMatrix[int(value[i][0]), int(value[i][1])])
t.append(value[i][2])
r = np.array(r, dtype=np.float64)
t = np.array(t, dtype=np.float64)
model = mc.MCMC([member_a,member_d,logLikelihood])
trace = model.sample(iter=5000)