我正在训练一个SVM
,其中包含一个包含93个要素和61878个训练样例的大数据集。
由于我不想绘制如此大的数据集,我正在提取40个随机样本来训练另一个SVM
以使其可视化。
但是我在轮廓绘图函数中的MEMORY ERROR
函数中得到clf.predict()
。
我的代码:
import numpy as np
from numpy import random
from matplotlib import cm
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import matplotlib.axes as ax
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
#Making the meshgrid
def make_meshgrid(x, y, h=.02):
x_min, x_max = x.min() - 1, x.max() + 1
y_min, y_max = y.min() - 1, y.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h),copy=False)
return xx, yy
#plotting the decision contour
def plot_contours(ax, clf, xx, yy, **params):
Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])
Z = Z.reshape(xx.shape)
out = ax.contourf(xx, yy, Z, **params)
return out
#extracting data from csv file
f1 = open('train.csv', 'r')
train_data = pd.read_csv(f1)
train_arr = np.array(train_data)
X = np.array(train_arr[:, :-1])
y = np.array(train_arr[:, 94])
X_train, X_cv, y_train, y_cv = train_test_split(X, y, test_size=0.3);
print('Printing cross validation data.....')
print(X_cv)
print(y_cv)
clf = SVC(C=1, kernel='poly', gamma='auto')
clf.fit(X_train, y_train)
print('Cross validation score:', clf.score(X_cv, y_cv))
print('Cross validation prediction:')
res = clf.predict(X_cv)
print(res)
print('Cross validation accuracy score:', accuracy_score(y_cv, res, normalize=True))
f2 = open('test.csv', 'r')
test_data = pd.read_csv(f2)
test_arr = np.array(test_data)
print('Test data prediction:')
y_res = clf.predict(test_arr)
print(y_res)
#plotting part of the data by taking random 40 rows to reduce the data set
plot_arr=[]
for i in range(0,39):
j = random.randint(0,61878)
plot_arr.append(train_arr[j,:])
parr = np.asarray(plot_arr)
X_plot = parr[:,:-1]
y_plot = parr[:,94]
clf1 = SVC(C=1, kernel='poly', gamma='auto')
clf1.fit(X_plot, y_plot)
X0, X1 = X_plot[:, :], X_plot[:, :]
xx, yy = make_meshgrid(X0,X1)
plot_contours(plt, clf1, xx, yy,cmap=plt.cm.coolwarm, alpha=0.8)
plt.scatter(X0, X1, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
plt.show()
f1.close()
f2.close()
我得到的错误如下:
Traceback (most recent call last):
File "C:\Program Files\JetBrains\PyCharm Community Edition 2017.2.3\helpers\pydev\pydevd.py", line 1599, in <module>
globals = debugger.run(setup['file'], None, None, is_module)
File "C:\Program Files\JetBrains\PyCharm Community Edition 2017.2.3\helpers\pydev\pydevd.py", line 1026, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "C:\Program Files\JetBrains\PyCharm Community Edition 2017.2.3\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/Chinnu/PycharmProject/ml/different", line 59, in <module>
plot_contours(plt, clf, xx, yy,cmap=plt.cm.coolwarm, alpha=0.8)
File "C:/Users/Chinnu/PycharmProject/ml/different", line 38, in plot_contours
Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])
MemoryError
有谁能告诉我如何纠正这个错误?