statsmodels ValueError

时间:2015-11-16 07:19:42

标签: python numpy plot regression statsmodels

我的项目有问题,我从twitter中提取数据,将其保存为csv并执行数据情绪分析但是当我试图绘制它们时我无法做到。

#statmodels OLS first
y, X = dmatrices('retweet_count_l ~ surge_pricing + free_rides + promo+   driver + food + controversy + regulations', data=training, return_type='dataframe')
# Define the model from above Patsy-created variables, using Statsmodels
print sm.OLS(y,X).fit().summary()
print sm.OLS(y,X).fit().params
print 'r sqd is : ', sm.OLS(y,X).fit().rsquared
rainbow = sm.stats.linear_rainbow(sm.OLS(y,X).fit())
print 'Rainbow Test for Linearity is ', rainbow
y_hat, X_hat = dmatrices('retweet_count_l ~ surge_pricing + free_rides + promo + driver + food + controversy + regulations', data=testing, return_type='dataframe')
y_pred = sm.OLS(y,X).fit().predict(X_hat)
testing['retweet_pred_smols'] = pd.Series(y_pred)

    ---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-b5c392dca77b> in <module>()
      2 y, X = dmatrices('retweet_count_l ~ surge_pricing + free_rides + promo + driver + food + controversy + regulations', data=training, return_type='dataframe')
      3 # Define the model from above Patsy-created variables, using Statsmodels
----> 4 model = sm.OLS(y,X)
      5 results = model.fit()
      6 print(results.summary())

/usr/lib/python2.7/dist-packages/statsmodels/regression/linear_model.pyc in __init__(self, endog, exog, missing, hasconst)
    481     def __init__(self, endog, exog=None, missing='none', hasconst=None):
    482         super(OLS, self).__init__(endog, exog, missing=missing,
--> 483                                   hasconst=hasconst)
    484 
    485     def loglike(self, params):

/usr/lib/python2.7/dist-packages/statsmodels/regression/linear_model.pyc in __init__(self, endog, exog, weights, missing, hasconst)
    383             weights = weights.squeeze()
    384         super(WLS, self).__init__(endog, exog, missing=missing,
--> 385                                   weights=weights, hasconst=hasconst)
    386         nobs = self.exog.shape[0]
    387         weights = self.weights

/usr/lib/python2.7/dist-packages/statsmodels/regression/linear_model.pyc in __init__(self, endog, exog, **kwargs)
     77     """
     78     def __init__(self, endog, exog, **kwargs):
---> 79         super(RegressionModel, self).__init__(endog, exog, **kwargs)
     80         self._data_attr.extend(['pinv_wexog', 'wendog', 'wexog', 'weights'])
     81 

/usr/lib/python2.7/dist-packages/statsmodels/base/model.pyc in __init__(self, endog, exog, **kwargs)
    135     def __init__(self, endog, exog=None, **kwargs):
    136         super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
--> 137         self.initialize()
    138 
    139     def initialize(self):

/usr/lib/python2.7/dist-packages/statsmodels/regression/linear_model.pyc in initialize(self)
     86         # overwrite nobs from class Model:
     87         self.nobs = float(self.wexog.shape[0])
---> 88         self.rank = rank(self.exog)
     89         self.df_model = float(self.rank - self.k_constant)
     90         self.df_resid = self.nobs - self.rank

/usr/lib/python2.7/dist-packages/statsmodels/tools/tools.pyc in rank(X, cond)
    380     if len(X.shape) == 2:
    381         D = svdvals(X)
--> 382         return int(np.add.reduce(np.greater(D / D.max(), cond).astype(np.int32)))
    383     else:
    384         return int(not np.alltrue(np.equal(X, 0.)))

/usr/local/lib/python2.7/dist-packages/numpy/core/_methods.pyc in _amax(a, axis, out, keepdims)
     24 # small reductions
     25 def _amax(a, axis=None, out=None, keepdims=False):
---> 26     return umr_maximum(a, axis, None, out, keepdims)
     27 
     28 def _amin(a, axis=None, out=None, keepdims=False):

ValueError: zero-size array to reduction operation maximum which has no identity

2 个答案:

答案 0 :(得分:1)

我可以用以下内容重现错误:

In [224]: np.array([]).max()
...
ValueError: zero-size array to reduction operation maximum which has no identity
In [225]: 

跟踪D.max()追溯......

sm.OLS(y,X)

X必须为零大小。

所以你需要检查X。这是什么.shape?尝试打印它。

答案 1 :(得分:1)

我有同样的问题。这是因为我的列中有一些“ nan”值。用'0's填充后,问题解决了。