我的项目有问题,我从twitter中提取数据,将其保存为csv并执行数据情绪分析但是当我试图绘制它们时我无法做到。
#statmodels OLS first
y, X = dmatrices('retweet_count_l ~ surge_pricing + free_rides + promo+ driver + food + controversy + regulations', data=training, return_type='dataframe')
# Define the model from above Patsy-created variables, using Statsmodels
print sm.OLS(y,X).fit().summary()
print sm.OLS(y,X).fit().params
print 'r sqd is : ', sm.OLS(y,X).fit().rsquared
rainbow = sm.stats.linear_rainbow(sm.OLS(y,X).fit())
print 'Rainbow Test for Linearity is ', rainbow
y_hat, X_hat = dmatrices('retweet_count_l ~ surge_pricing + free_rides + promo + driver + food + controversy + regulations', data=testing, return_type='dataframe')
y_pred = sm.OLS(y,X).fit().predict(X_hat)
testing['retweet_pred_smols'] = pd.Series(y_pred)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-7-b5c392dca77b> in <module>()
2 y, X = dmatrices('retweet_count_l ~ surge_pricing + free_rides + promo + driver + food + controversy + regulations', data=training, return_type='dataframe')
3 # Define the model from above Patsy-created variables, using Statsmodels
----> 4 model = sm.OLS(y,X)
5 results = model.fit()
6 print(results.summary())
/usr/lib/python2.7/dist-packages/statsmodels/regression/linear_model.pyc in __init__(self, endog, exog, missing, hasconst)
481 def __init__(self, endog, exog=None, missing='none', hasconst=None):
482 super(OLS, self).__init__(endog, exog, missing=missing,
--> 483 hasconst=hasconst)
484
485 def loglike(self, params):
/usr/lib/python2.7/dist-packages/statsmodels/regression/linear_model.pyc in __init__(self, endog, exog, weights, missing, hasconst)
383 weights = weights.squeeze()
384 super(WLS, self).__init__(endog, exog, missing=missing,
--> 385 weights=weights, hasconst=hasconst)
386 nobs = self.exog.shape[0]
387 weights = self.weights
/usr/lib/python2.7/dist-packages/statsmodels/regression/linear_model.pyc in __init__(self, endog, exog, **kwargs)
77 """
78 def __init__(self, endog, exog, **kwargs):
---> 79 super(RegressionModel, self).__init__(endog, exog, **kwargs)
80 self._data_attr.extend(['pinv_wexog', 'wendog', 'wexog', 'weights'])
81
/usr/lib/python2.7/dist-packages/statsmodels/base/model.pyc in __init__(self, endog, exog, **kwargs)
135 def __init__(self, endog, exog=None, **kwargs):
136 super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
--> 137 self.initialize()
138
139 def initialize(self):
/usr/lib/python2.7/dist-packages/statsmodels/regression/linear_model.pyc in initialize(self)
86 # overwrite nobs from class Model:
87 self.nobs = float(self.wexog.shape[0])
---> 88 self.rank = rank(self.exog)
89 self.df_model = float(self.rank - self.k_constant)
90 self.df_resid = self.nobs - self.rank
/usr/lib/python2.7/dist-packages/statsmodels/tools/tools.pyc in rank(X, cond)
380 if len(X.shape) == 2:
381 D = svdvals(X)
--> 382 return int(np.add.reduce(np.greater(D / D.max(), cond).astype(np.int32)))
383 else:
384 return int(not np.alltrue(np.equal(X, 0.)))
/usr/local/lib/python2.7/dist-packages/numpy/core/_methods.pyc in _amax(a, axis, out, keepdims)
24 # small reductions
25 def _amax(a, axis=None, out=None, keepdims=False):
---> 26 return umr_maximum(a, axis, None, out, keepdims)
27
28 def _amin(a, axis=None, out=None, keepdims=False):
ValueError: zero-size array to reduction operation maximum which has no identity
答案 0 :(得分:1)
我可以用以下内容重现错误:
In [224]: np.array([]).max()
...
ValueError: zero-size array to reduction operation maximum which has no identity
In [225]:
跟踪D.max()
追溯......
sm.OLS(y,X)
X
必须为零大小。
所以你需要检查X
。这是什么.shape
?尝试打印它。
答案 1 :(得分:1)
我有同样的问题。这是因为我的列中有一些“ nan”值。用'0's填充后,问题解决了。