我正在从sklearn运行几毫升algos。但对于所有那些我收到以下错误
/Users//anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric)
448 else:
449 y = column_or_1d(y, warn=True)
--> 450 _assert_all_finite(y)
451 if y_numeric and y.dtype.kind == 'O':
452 y = y.astype(np.float64)
/Users//anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in _assert_all_finite(X)
50 and not np.isfinite(X).all()):
51 raise ValueError("Input contains NaN, infinity"
---> 52 " or a value too large for %r." % X.dtype)
53
54
ValueError: Input contains NaN, infinity or a value too large for dtype('float64')
请注意我的设计矩阵没有纳米值或无限值。以下是我要检查的内容:
np.isfinite(X_cohort_pr).all()
Out[259]:
True
X.isnull().any().any()
Out[261]:
False
因此,如果您看到我的数据矩阵没有null值或无限值。那么为什么我得到这个错误以及如何解决这个问题?我花了8个多小时来调试它。请帮忙
EDIT2:
这是数据矩阵的前五行。它总共有800K行和180多个奇特的功能。
array([[ 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 1. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 1. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 1. ,
0. , 0. , -0.2637, 1. , 0. , 1. , 0. ,
0. , 1. , 0. , 1. , 0. , 0. , 1. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 1. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , -0.012 , -0.012 , 0. , -0.0028,
-0.0108, 0. , -0.0111, -0.0135, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , -0.0034,
-0.0027, -0.0725, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
-0.0809, -0.355 ],
[ 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 1. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 1. , 0. , 0. , 0. , 0. ,
0. , 0. , 0.2413, 1. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , -0.012 , -0.012 , 0. , -0.0028,
-0.0108, 0. , -0.0111, -0.0135, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 1. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , -0.0034,
-0.0027, -0.0718, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
-0.0809, 0.1579],
[ 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 1. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 1. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 1. ,
0. , 0. , 0.1688, 1. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 1. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , -0.012 , -0.012 , 0. , -0.0028,
-0.0108, 0. , -0.0111, -0.0135, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , -0.0034,
-0.0027, -0.0725, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
-0.0809, 0.1642],
[ 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0.2462, 1. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. , 1. , 1. , 0. ,
0. , 0. , 0. , 1. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , -0.012 , -0.012 , 0. , -0.0028,
-0.0108, 0. , -0.0111, -0.0135, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 1. , 0. , 0. , 1. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0.0004,
-0.0012, -0.069 , -0.0673, -0.0618, -0.0582, -0.0065, -0.057 ,
-0.0809, 0.1713],
[ 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 1. ,
1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0.1273, 1. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 1. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , -0.012 , -0.012 , 0. , -0.0028,
-0.0108, 0. , -0.0111, -0.0135, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 1. , 0. , 0. , 1. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0.0037,
-0.0023, -0.0633, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
-0.0809, 0.1713]])
此外,我已经看到一个我运行SVM,然后我得到相同的Nan,Inf错误,但它也打印一些值如下。同样,任何地方都没有NaN。我已经完全检查过了。我仍然不知道为什么会抛出这些价值观。
_unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
105 if not _unique_labels:
--> 106 raise ValueError("Unknown label type: %r" % ys)
107
108 ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))
ValueError: Unknown label type: 117456 0
117457 0
117458 0
117459 0
117460 0
117461 0
117462 0
117463 0
117464 0
117465 0
117466 2
117467 0
117468 0
117469 0
117470 NaN
117471 0
117472 NaN
117473 3
117474 0
117475 NaN
117476 0
117477 NaN
117478 6
117479 0
117480 0
117481 NaN
117482 NaN
117483 0
117484 NaN
答案 0 :(得分:0)
还有一个案例你没有检查:强制转换为np.float64
如果您使用的是更高精度的数据类型,则可能会将非常大的数字渲染为无穷大;当你以原始形式检查数据时,它会愉快地报告一个有限的数字。
我无法对您的确切问题表示肯定,但这是产生错误的一种方法:如果您需要更多帮助,请提供最小的可重复示例。经过8个小时的调试,我确定你有这样的例子:)
a = np.array([1e100],dtype=np.float128)
# 1e100 still fits within 64 bits. It's not trivial to
# instantiate an array with that large a number without
# doing arithmetic at some point
a *= a
# make it EVEN BIGGER!!! (we're at 1e400 for those keeping track)
a *= a
np.isfinite(a) # prints True: Check
np.isfinite(a.astype(np.float64)) # prints... False. :(
要查看是否是这种情况,只需检查进入该过程的数据类型。如果您从文件中读取或进行一些算术处理,您可能会发现如此巨大的价值。你在64位中的最大值是something like this但是有plenty of data types来容纳numpy中的那个值,就像python中的标准无限精度整数一样,任何事都可能!
将来,调试过程可能是在ValueError之前提取或注释掉每个条件并直接打印数据。您应该能够看到完全数据失败的地方。自从您使用外部软件包以来,这个过程有点混乱,但如果您已经丢失了这么多小时,您仍然可以通过这种方式进行测试