在sklearn中获得Nan,无限错误

时间:2015-11-18 03:21:39

标签: python pandas scikit-learn

我正在从sklearn运行几毫升algos。但对于所有那些我收到以下错误

/Users//anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric)
    448     else:
    449         y = column_or_1d(y, warn=True)
--> 450         _assert_all_finite(y)
    451     if y_numeric and y.dtype.kind == 'O':
    452         y = y.astype(np.float64)

/Users//anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in _assert_all_finite(X)
     50             and not np.isfinite(X).all()):
     51         raise ValueError("Input contains NaN, infinity"
---> 52                          " or a value too large for %r." % X.dtype)
     53 
     54 

ValueError: Input contains NaN, infinity or a value too large for dtype('float64')

请注意我的设计矩阵没有纳米值或无限值。以下是我要检查的内容:

np.isfinite(X_cohort_pr).all()
Out[259]:
True

X.isnull().any().any()
Out[261]:
False

因此,如果您看到我的数据矩阵没有null值或无限值。那么为什么我得到这个错误以及如何解决这个问题?我花了8个多小时来调试它。请帮忙

EDIT2:

这是数据矩阵的前五行。它总共有800K行和180多个奇特的功能。

array([[ 1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,
         0.    ,  0.    , -0.2637,  1.    ,  0.    ,  1.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  1.    ,  0.    ,  0.    ,  1.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    , -0.012 , -0.012 ,  0.    , -0.0028,
        -0.0108,  0.    , -0.0111, -0.0135,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    , -0.0034,
        -0.0027, -0.0725, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
        -0.0809, -0.355 ],
       [ 1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.2413,  1.    ,  0.    ,  1.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    , -0.012 , -0.012 ,  0.    , -0.0028,
        -0.0108,  0.    , -0.0111, -0.0135,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    , -0.0034,
        -0.0027, -0.0718, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
        -0.0809,  0.1579],
       [ 1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,
         0.    ,  0.    ,  0.1688,  1.    ,  0.    ,  1.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    , -0.012 , -0.012 ,  0.    , -0.0028,
        -0.0108,  0.    , -0.0111, -0.0135,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    , -0.0034,
        -0.0027, -0.0725, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
        -0.0809,  0.1642],
       [ 1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.2462,  1.    ,  0.    ,  1.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  1.    ,  1.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    , -0.012 , -0.012 ,  0.    , -0.0028,
        -0.0108,  0.    , -0.0111, -0.0135,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  1.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.0004,
        -0.0012, -0.069 , -0.0673, -0.0618, -0.0582, -0.0065, -0.057 ,
        -0.0809,  0.1713],
       [ 1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,
         1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.1273,  1.    ,  0.    ,  1.    ,  0.    ,
         0.    ,  1.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    , -0.012 , -0.012 ,  0.    , -0.0028,
        -0.0108,  0.    , -0.0111, -0.0135,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  1.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.0037,
        -0.0023, -0.0633, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
        -0.0809,  0.1713]])

此外,我已经看到一个我运行SVM,然后我得到相同的Nan,Inf错误,但它也打印一些值如下。同样,任何地方都没有NaN。我已经完全检查过了。我仍然不知道为什么会抛出这些价值观。

_unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
    105     if not _unique_labels:
--> 106         raise ValueError("Unknown label type: %r" % ys)
    107 
    108     ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))

ValueError: Unknown label type: 117456     0
117457     0
117458     0
117459     0
117460     0
117461     0
117462     0
117463     0
117464     0
117465     0
117466     2
117467     0
117468     0
117469     0
117470   NaN
117471     0
117472   NaN
117473     3
117474     0
117475   NaN
117476     0
117477   NaN
117478     6
117479     0
117480     0
117481   NaN
117482   NaN
117483     0
117484   NaN

1 个答案:

答案 0 :(得分:0)

还有一个案例你没有检查:强制转换为np.float64

如果您使用的是更高精度的数据类型,则可能会将非常大的数字渲染为无穷大;当你以原始形式检查数据时,它会愉快地报告一个有限的数字。

我无法对您的确切问题表示肯定,但这是产生错误的一种方法:如果您需要更多帮助,请提供最小的可重复示例。经过8个小时的调试,我确定你有这样的例子:)

a = np.array([1e100],dtype=np.float128)
# 1e100 still fits within 64 bits. It's not trivial to
# instantiate an array with that large a number without
# doing arithmetic at some point
a *= a 
# make it EVEN BIGGER!!! (we're at 1e400 for those keeping track)
a *= a 
np.isfinite(a) # prints True: Check 
np.isfinite(a.astype(np.float64)) # prints... False. :(

要查看是否是这种情况,只需检查进入该过程的数据类型。如果您从文件中读取或进行一些算术处理,您可能会发现如此巨大的价值。你在64位中的最大值是something like this但是有plenty of data types来容纳numpy中的那个值,就像python中的标准无限精度整数一样,任何事都可能!

将来,调试过程可能是在ValueError之前提取或注释掉每个条件并直接打印数据。您应该能够看到完全数据失败的地方。自从您使用外部软件包以来,这个过程有点混乱,但如果您已经丢失了这么多小时,您仍然可以通过这种方式进行测试