我有一个包含12000行和160列的数据集,所有数据都是整数。当我想使用此代码查找偏差方差折衷时,我收到内存错误:
for i, step in enumerate(steps):
# Generate polynomial features
poly = PolynomialFeatures(step, interaction_only=True)
pX_train = poly.fit_transform(X_train_new_subset.astype(float))
pX_test = poly.transform(X_test_new_subset.astype(float))
# Create model instance
model = KNeighborsRegressor(n_neighbors=5)
# Train model
model.fit(pX_train, y_train_new_subset.astype(float))
# Make Predictions
y_train_pred = model.predict(pX_train)
y_test_pred = model.predict(pX_test)
# Calculate Error and accuracy
tr_score[i]=r2_score(y_train_new_subset.astype(float), y_train_pred)
tst_score[i]=r2_score(y_test_new_subset.astype(float), y_test_pred)
trainErr[i] = mean_squared_error(y_train_new_subset.astype(float), y_train_pred)
testErr[i] = mean_squared_error(y_test_new_subset.astype(float), y_test_pred)
此代码有什么问题吗?
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-45-1c32333265a0> in <module>
10 # Generate polynomial features
11 poly = PolynomialFeatures(step, interaction_only=True)
---> 12 pX_train = poly.fit_transform(X_train_new_subset)
13 pX_test = poly.transform(X_test_new_subset)
14
d:\Users\rmardani\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
460 if y is None:
461 # fit method of arity 1 (unsupervised transformation)
--> 462 return self.fit(X, **fit_params).transform(X)
463 else:
464 # fit method of arity 2 (supervised transformation)
d:\Users\rmardani\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py in transform(self, X)
1484 XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
1485 else:
-> 1486 XP = np.empty((n_samples, self.n_output_features_), dtype=X.dtype)
1487 for i, comb in enumerate(combinations):
1488 XP[:, i] = X[:, comb].prod(1)
MemoryError: