基于这个问题enter link description here,我使用statsmodels在python中实现ANOVA。我的数据位于Pandas DataFrame中,country
是一个分类变量。
def anova(data):
mod = ols('C(country) ~ playerRank+playerGames', data=data).fit()
aov_table = sm.stats.anova_lm(mod, typ=2)
print aov_table
当我使用上述功能时,显示:
File "<ipython-input-32-e77ae8a55692>", line 1, in <module>
aov_table = sm.stats.anova_lm(mod, typ=2)
File "C:\ProgramData\Anaconda2\lib\site-packages\statsmodels\stats\anova.py", line 326, in anova_lm
return anova_single(model, **kwargs)
File "C:\ProgramData\Anaconda2\lib\site-packages\statsmodels\stats\anova.py", line 83, in anova_single
robust)
File "C:\ProgramData\Anaconda2\lib\site-packages\statsmodels\stats\anova.py", line 178, in anova2_lm_single
cov = _get_covariance(model, None)
File "C:\ProgramData\Anaconda2\lib\site-packages\statsmodels\stats\anova.py", line 15, in _get_covariance
return model.cov_params()
File "C:\ProgramData\Anaconda2\lib\site-packages\statsmodels\base\wrapper.py", line 95, in wrapper
obj = data.wrap_output(func(results, *args, **kwargs), how)
File "C:\ProgramData\Anaconda2\lib\site-packages\statsmodels\base\model.py", line 1180, in cov_params
scale = self.scale
File "C:\ProgramData\Anaconda2\lib\site-packages\statsmodels\tools\decorators.py", line 97, in __get__
_cachedval = self.fget(obj)
File "C:\ProgramData\Anaconda2\lib\site-packages\statsmodels\regression\linear_model.py", line 1510, in scale
return np.dot(wresid, wresid) / self.df_resid
ValueError: shapes (10481,41) and (10481,41) not aligned: 41 (dim 1) != 10481 (dim 0)
修改
我试着用this appraoch作为风箱:
y, X = dmatrices('CountryCode ~ playerRank+playerGames', data=data[[['playerRank', 'playerGames', 'CountryCode']]], return_type='dataframe')
mod = sm.OLS(y, X)
res = mod.fit()
res.summary()
aov_table = sm.stats.anova_lm(res, typ=2)
print aov_table
,它显示以下错误:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-13-e9f27287c5da>", line 1, in <module>
aov_table = sm.stats.anova_lm(res, typ=2)
File "C:\ProgramData\Anaconda2\lib\site-packages\statsmodels\stats\anova.py", line 326, in anova_lm
return anova_single(model, **kwargs)
File "C:\ProgramData\Anaconda2\lib\site-packages\statsmodels\stats\anova.py", line 68, in anova_single
design_info = model.model.data.design_info
AttributeError: 'PandasData' object has no attribute 'design_info'