使用sklearn svm中的预测函数的内存错误

时间:2017-11-25 17:35:23

标签: python machine-learning scikit-learn out-of-memory svm

我正在训练一个SVM,其中包含一个包含93个要素和61878个训练样例的大数据集。

由于我不想绘制如此大的数据集,我正在提取40个随机样本来训练另一个SVM以使其可视化。 但是我在轮廓绘图函数中的MEMORY ERROR函数中得到clf.predict()

我的代码:

import numpy as np
from numpy import random
from matplotlib import cm
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import matplotlib.axes as ax
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#Making the meshgrid
def make_meshgrid(x, y, h=.02):
    x_min, x_max = x.min() - 1, x.max() + 1
    y_min, y_max = y.min() - 1, y.max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h),copy=False)
    return xx, yy

#plotting the decision contour
def plot_contours(ax, clf, xx, yy, **params):
    Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])
    Z = Z.reshape(xx.shape)
    out = ax.contourf(xx, yy, Z, **params)
    return out

#extracting data from csv file
f1 = open('train.csv', 'r')
train_data = pd.read_csv(f1)
train_arr = np.array(train_data)
X = np.array(train_arr[:, :-1]) 
y = np.array(train_arr[:, 94])
X_train, X_cv, y_train, y_cv = train_test_split(X, y, test_size=0.3);
print('Printing cross validation data.....')
print(X_cv)
print(y_cv)

clf = SVC(C=1, kernel='poly', gamma='auto')
clf.fit(X_train, y_train)
print('Cross validation score:', clf.score(X_cv, y_cv))
print('Cross validation prediction:')

res = clf.predict(X_cv)
print(res)
print('Cross validation accuracy score:', accuracy_score(y_cv, res, normalize=True))

f2 = open('test.csv', 'r')
test_data = pd.read_csv(f2)
test_arr = np.array(test_data)
print('Test data prediction:')
y_res = clf.predict(test_arr)
print(y_res)

#plotting part of the data by taking random 40 rows to reduce the data set
plot_arr=[]
for i in range(0,39):
    j = random.randint(0,61878)
    plot_arr.append(train_arr[j,:])
parr = np.asarray(plot_arr)
X_plot = parr[:,:-1]
y_plot = parr[:,94]
clf1 = SVC(C=1, kernel='poly', gamma='auto')
clf1.fit(X_plot, y_plot)
X0, X1 = X_plot[:, :], X_plot[:, :]
xx, yy = make_meshgrid(X0,X1)
plot_contours(plt, clf1, xx, yy,cmap=plt.cm.coolwarm, alpha=0.8)
plt.scatter(X0, X1, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
plt.show()
f1.close()
f2.close()  

我得到的错误如下:

Traceback (most recent call last):
File "C:\Program Files\JetBrains\PyCharm Community Edition 2017.2.3\helpers\pydev\pydevd.py", line 1599, in <module>
    globals = debugger.run(setup['file'], None, None, is_module)
File "C:\Program Files\JetBrains\PyCharm Community Edition 2017.2.3\helpers\pydev\pydevd.py", line 1026, in run
    pydev_imports.execfile(file, globals, locals)  # execute the script
File "C:\Program Files\JetBrains\PyCharm Community Edition 2017.2.3\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
    exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/Chinnu/PycharmProject/ml/different", line 59, in <module>
    plot_contours(plt, clf, xx, yy,cmap=plt.cm.coolwarm, alpha=0.8)
File "C:/Users/Chinnu/PycharmProject/ml/different", line 38, in plot_contours
    Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])
MemoryError

有谁能告诉我如何纠正这个错误?

0 个答案:

没有答案