我正在运行以下代码。
# Function for Stochastic Gradient Descent SVM
def SGD(k_fold,train_X,train_Y):
""" Method to implement Multi-class SVM using
Stochastic Gradient Descent
"""
from sklearn.linear_model import SGDClassifier
scores_sgd = []
for train_indices, test_indices in k_fold:
train_X_cv = train_X[train_indices]
train_Y_cv = train_Y[train_indices]
test_X_cv = train_X[test_indices]
test_Y_cv = train_Y[test_indices]
sgd = SGDClassifier( loss = 'hinge', penalty = 'l2' )
scores_sgd.append( sgd.fit( train_X_cv, train_Y_cv ).score( test_X_cv, test_Y_cv ) )
print( "The mean accuracy of Stochastic Gradient Descent Classifier on CV data is:", np.mean( scores_sgd ) )
return sgd
这是 k_folds
数据的样子:
for a,b in k_fold:
print len(a), len(b)
我甚至检查了我的 train_X
和 train_Y
NaN
>>> numpy.isnan( train_X ).any()
False
但我仍然收到此错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-22-a2f71582edd0> in <module>()
1 # Running SGD. and RF
2
----> 3 sgd=SGD(k_fold,train_X,train_Y)
4 #rf=RF(k_fold,train_X,train_Y)
<ipython-input-12-7e3b6395f3d6> in SGD(k_fold, train_X, train_Y)
62
63 sgd=SGDClassifier(loss='hinge',penalty='l2')
---> 64 scores_sgd.append(sgd.fit(train_X_cv,train_Y_cv).score(test_X_cv,test_Y_cv))
65
66 print("The mean accuracy of Stochastic Gradient Descent Classifier on CV data is:", np.mean(scores_sgd))
/Users/mtripathi/anaconda/lib/python2.7/site-packages/sklearn/linear_model/stochastic_gradient.pyc in fit(self, X, y, coef_init, intercept_init, class_weight, sample_weight)
562 loss=self.loss, learning_rate=self.learning_rate,
563 coef_init=coef_init, intercept_init=intercept_init,
--> 564 sample_weight=sample_weight)
565
566
/Users/mtripathi/anaconda/lib/python2.7/site-packages/sklearn/linear_model/stochastic_gradient.pyc in _fit(self, X, y, alpha, C, loss, learning_rate, coef_init, intercept_init, sample_weight)
401 self.classes_ = None
402
--> 403 X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")
404 n_samples, n_features = X.shape
405
/Users/mtripathi/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric)
448 else:
449 y = column_or_1d(y, warn=True)
--> 450 _assert_all_finite(y)
451 if y_numeric and y.dtype.kind == 'O':
452 y = y.astype(np.float64)
/Users/ ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-22-a2f71582edd0> in <module>()
1 # Running SGD. and RF
2
----> 3 sgd=SGD(k_fold,train_X,train_Y)
4 #rf=RF(k_fold,train_X,train_Y)
<ipython-input-12-7e3b6395f3d6> in SGD(k_fold, train_X, train_Y)
62
63 sgd=SGDClassifier(loss='hinge',penalty='l2')
---> 64 scores_sgd.append(sgd.fit(train_X_cv,train_Y_cv).score(test_X_cv,test_Y_cv))
65
66 print("The mean accuracy of Stochastic Gradient Descent Classifier on CV data is:", np.mean(scores_sgd))
/Users//anaconda/lib/python2.7/site-packages/sklearn/linear_model/stochastic_gradient.pyc in fit(self, X, y, coef_init, intercept_init, class_weight, sample_weight)
562 loss=self.loss, learning_rate=self.learning_rate,
563 coef_init=coef_init, intercept_init=intercept_init,
--> 564 sample_weight=sample_weight)
565
566
/Users//anaconda/lib/python2.7/site-packages/sklearn/linear_model/stochastic_gradient.pyc in _fit(self, X, y, alpha, C, loss, learning_rate, coef_init, intercept_init, sample_weight)
401 self.classes_ = None
402
--> 403 X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")
404 n_samples, n_features = X.shape
405
/Users//anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric)
448 else:
449 y = column_or_1d(y, warn=True)
--> 450 _assert_all_finite(y)
451 if y_numeric and y.dtype.kind == 'O':
452 y = y.astype(np.float64)
/Users//anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in _assert_all_finite(X)
50 and not np.isfinite(X).all()):
51 raise ValueError("Input contains NaN, infinity"
---> 52 " or a value too large for %r." % X.dtype)
53
54
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').tr/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in _assert_all_finite(X)
50 and not np.isfinite(X).all()):
51 raise ValueError("Input contains NaN, infinity"
---> 52 " or a value too large for %r." % X.dtype)
53
54
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
请帮忙。我 random forest
但它正在杀死内核我不知道为什么。
答案 0 :(得分:0)
np.nan_to_num( ... ).astype( np.float32 ) # may help to prototope the methods
"""
Replace nan with zero and inf with finite numbers.
Returns an array or scalar replacing Not a Number (NaN) with zero,
(positive) infinity with a very large number and negative infinity
with a very small (or negative) number.
Parameters
----------
x : array_like
Input data.
Returns
-------
out : ndarray, float
Array with the same shape as `x` and dtype of the element in `x` with
the greatest precision. NaN is replaced by zero, and infinity
(-infinity) is replaced by the largest (smallest or most negative)
floating point value that fits in the output dtype. All finite numbers
are upcast to the output dtype (default float64).
See Also
--------
isinf : Shows which elements are negative or negative infinity.
isneginf : Shows which elements are negative infinity.
isposinf : Shows which elements are positive infinity.
isnan : Shows which elements are Not a Number (NaN).
isfinite : Shows which elements are finite (not NaN, not infinity)
Notes
-----
Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic
(IEEE 754). This means that Not a Number is not equivalent to infinity.
Examples
--------
>>> np.set_printoptions(precision=8)
>>> x = np.array([np.inf, -np.inf, np.nan, -128, 128])
>>> np.nan_to_num(x)
array([ 1.79769313e+308, -1.79769313e+308, 0.00000000e+000,
-1.28000000e+002, 1.28000000e+002])
"""
1)修改可用(O / S相关) RAM大小/虚拟内存管理选项并尽可能升级路径。
2)减少您的DataSET表示,从 np.float64
向下延伸到 np.float32
3)避免RAM内重复并避免python-memory-manager不愿意将mem-alloc返回到O / S重用,可能导致进入 numpy
非in-RAM矩阵表示(内存映射文件)。
4)最后,靠近RAM边界,可能会受益于使用 RandomForest
增量构建( warm = True
)
如果您的localhost O / S不允许所需的大小,请将您的处理移至不同的O / S平台/云托管处理,其中RAM大小和O / S允许处理更大的RAM内对象。
python
内存使用情况的小型实用程序def getExtMemoryUsed(): # msMOD.DEBUG.self ( ref. 2GB Memory Crashes in python + wXP /3GB [BOOT.ini]-options.............................................................
import os, psutil
aMemoryINFO = psutil.Process( os.getpid() ).get_ext_memory_info()
anItemLIST = [ ( "num_page_faults", "x" ),
( "peak_wset", "MB" ),
( "wset", "MB" ),
( "peak_paged_pool", "MB" ),
( "paged_pool", "MB" ),
( "peak_nonpaged_pool", "MB" ),
( "nonpaged_pool", "MB" ),
( "pagefile", "MB" ),
( "peak_pagefile", "MB" ),
( "private", "MB" )
]
aListOfShowIDs = [ 8, 7, 9, 1, 2, 3, 4, 5, 6, 0 ]
for i, id in enumerate( aListOfShowIDs[:-1] ):
print "{0: > 16.3f} {2:_>20s} {1: >3s} ".format( aMemoryINFO[id] / float( 2**20 ), anItemLIST[id][1], anItemLIST[id][0] )
print " {2:_>20s} {0: >11d} x\n".format( aMemoryINFO[ 0], "", anItemLIST[ 0][0] )
"""
|>>> getExtMemoryUsed()
1534.012 _______peak_pagefile MB
1195.777 ____________pagefile MB
1195.777 _____________private MB
1119.109 ___________peak_wset MB
662.457 ________________wset MB
0.321 _____peak_paged_pool MB
0.321 __________paged_pool MB
0.084 __peak_nonpaged_pool MB
0.083 _______nonpaged_pool MB
_____num_page_faults 3727767 x
pextmem(num_page_faults=3727767, peak_wset=1173471232, wset=694636544, peak_paged_pool=336316, paged_pool=336172, peak_nonpaged_pool=87968, nonpaged_pool=87480, pagefile=1253863424, peak_pagefile=1608527872, private=1253863424)
"""
return aMemoryINFO
这个微小的代码比完全成熟的内存分析器附件需要更少的内存,同时帮助您诊断代码已经到达RAM内边界的距离/距离。
对于鹰派的pythoneers,该帖子故意使用非PEP-8源代码格式,因为作者体验到在学习阶段,代码读取能力提高了对任务解决方案的关注,并有助于习惯于底层概念而不是花费在正式的印刷术上。希望提供帮助的原则得到尊重,非PEP-8样式格式以易于阅读的名义被宽恕。