我正在研究Kaggle的数据集(关于人力资源的数据集)我正在尝试实现一些贝叶斯统计(逻辑回归)但我不明白如何更改此模型的先验(我想要拦截成为一个无信息的高斯和所有其他预测者的拉普拉斯)
import pymc3 as pm
priors = {"Intercept": pm.Normal('alpha', mu=0, sd=100),
"Regressor": pm.Laplace('beta', mu=0, b=np.sqrt(2))
}
with pm.Model() as logistic_model:
pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation \
+ number_project + average_montly_hours \
+ time_spend_company + Work_accident + promotion_last_5years\
+ sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
trace_logistic_model = pm.sample(4000)
错误信息:
TypeError: No model on context stack, which is needed to instantiate distributions. Add variable inside a 'with model:' block, or use the '.dist' syntax for a standalone distribution.
我尝试使用“dist”但它不起作用
感谢您的帮助
更新
我已将代码更改为以下内容:
with pm.Model() as logistic_model:
priors = {"Intercept": pm.Normal('alpha', mu=0, sd=100),
"Regressor": pm.Laplace('beta', mu=0, b=np.sqrt(2))
}
pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation \
+ number_project + average_montly_hours \
+ time_spend_company + Work_accident + promotion_last_5years\
+ sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
trace_logistic_model = pm.sample(4000)
现在我有这个错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-16-e3566f815877> in <module>()
5 }
6
----> 7 pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation + number_project + average_montly_hours + time_spend_company + Work_accident + promotion_last_5years + sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
8 trace_logistic_model = pm.sample(4000)
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/glm/linear.py in from_formula(cls, formula, data, priors, vars, family, name, model)
134 labels = x.design_info.column_names
135 return cls(np.asarray(x), np.asarray(y)[:, 0], intercept=False, labels=labels,
--> 136 priors=priors, vars=vars, family=family, name=name, model=model)
137
138 glm = GLM
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/model.py in __call__(cls, *args, **kwargs)
238 instance = cls.__new__(cls, *args, **kwargs)
239 with instance: # appends context
--> 240 instance.__init__(*args, **kwargs)
241 return instance
242
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/glm/linear.py in __init__(self, x, y, intercept, labels, priors, vars, family, name, model)
111 super(GLM, self).__init__(
112 x, y, intercept=intercept, labels=labels,
--> 113 priors=priors, vars=vars, name=name, model=model
114 )
115
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/glm/linear.py in __init__(self, x, y, intercept, labels, priors, vars, name, model)
57 dist=priors.get(
58 name,
---> 59 self.default_intercept_prior
60 )
61 )
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/model.py in Var(self, name, dist, data, total_size)
750 with self:
751 var = FreeRV(name=name, distribution=dist,
--> 752 total_size=total_size, model=self)
753 self.free_RVs.append(var)
754 else:
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/model.py in __init__(self, type, owner, index, name, distribution, total_size, model)
1117 if distribution is not None:
1118 self.dshape = tuple(distribution.shape)
-> 1119 self.dsize = int(np.prod(distribution.shape))
1120 self.distribution = distribution
1121 self.tag.test_value = np.ones(
/home/glouis/anaconda/lib/python3.5/site-packages/numpy/core/fromnumeric.py in prod(a, axis, dtype, out, keepdims)
2513 pass
2514 else:
-> 2515 return prod(axis=axis, dtype=dtype, out=out, **kwargs)
2516
2517 return _methods._prod(a, axis=axis, dtype=dtype,
TypeError: prod() got an unexpected keyword argument 'out'