我需要应用网格搜索我有20000列和110000行的数据帧,我需要使用python的网格搜索模块调整我的参数
#validation for svm
#there are an error to check it ( grid search ne marche pas i will check why after)
label = df.Sentiment
train= df.drop('Sentiment', axis=1)
from sklearn.pipeline import Pipeline
text_clf = Pipeline([('tfidf', TfidfVectorizer()),
('clf', LinearSVC()),
])
from sklearn.model_selection import GridSearchCV
parameters = {'tfidf_min_df': [[0.8, 1.0], 1],
'tfidf__max_df': ([0.8, 1.0], 1),
'clf__c': (0.1, 1 , 10),
}
gs_clf = GridSearchCV(text_clf, parameters)
gs_clf = gs_clf.fit(trainas_matrix(), label)
我收到此错误
----------------------------------------------- ---------------------------- MemoryError Traceback(最近一次调用 最后)in() 3 classif = SklearnClassifier(LinearSVC()) 4#Séparationduset de train et de test ----> 5 X_train,X_test,y_train,y_test = train_test_split(train.as_matrix(),label,test_size = 0.33)
/usr/local/lib/python2.7/dist-packages/pandas/core/generic.pyc in as_matrix(self,columns)2832 self._consolidate_inplace()
2833如果self._AXIS_REVERSED: - > 2834 return self._data.as_matrix(columns).T 2835 return self._data.as_matrix(columns)2836/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in as_matrix(self,items)3148返回 mgr.blocks [0] .get_values()3149其他: - > 3150 return mgr._interleave()3151 3152 def _interleave(self):
/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in _interleave(self)3157 dtype = _interleaved_dtype(self.blocks)3158 - > 3159 result = np.empty(self.shape,dtype = dtype)3160 3161 if result.shape [0] == 0:
的MemoryError: