我正在研究应在sklearn中的gridsearchcv函数中传递的自定义估算器。我现在创建了估算器,但是遇到内存错误。在下面的代码中,您将看到一些常量,例如'KxRange [0]'或数组,例如keep_rate。它们只是预先定义的,其中包含一些随机值。 这是我的代码:
# sklearn grid search
from sklearn.model_selection import GridSearchCV
# import the base estimator
from sklearn.base import BaseEstimator, RegressorMixin
# define my own estimator
class MyEstimator(BaseEstimator,RegressorMixin):
# define constructor
# possible tau: int/float
# other parameters: array of int/floats, length 9
def __init__(self, tau=0, \
K1=K1Range[0], K2=K2Range[0], K3=K3Range[0], K4=K4Range[0], K5=K5Range[0], K6=K6Range[0], K7=K7Range[0], K8=K8Range[0], K9=K9Range[0], \
S1=0, S2=0, S3=0, S4=0, S5=0, S6=0, S7=0, S8=0, S9=0, \
alpha1=1, alpha2=1, alpha3=1, alpha4=1, alpha5=1, alpha6=1, alpha7=1, alpha8=1, alpha9=1, \
beta1=1, beta2=1, beta3=1, beta4=1, beta5=1, beta6=1, beta7=1, beta8=1, beta9=1):
# initialize parameters
self.tau = tau
self.K1 = K1
self.K2 = K2
self.K3 = K3
self.K4 = K4
self.K5 = K5
self.K6 = K6
self.K7 = K7
self.K8 = K8
self.K9 = K9
self.S1 = S1
self.S2 = S2
self.S3 = S3
self.S4 = S4
self.S5 = S5
self.S6 = S6
self.S7 = S7
self.S8 = S8
self.S9 = S9
self.alpha1 = alpha1
self.alpha2 = alpha2
self.alpha3 = alpha3
self.alpha4 = alpha4
self.alpha5 = alpha5
self.alpha6 = alpha6
self.alpha7 = alpha7
self.alpha8 = alpha8
self.alpha9 = alpha9
self.beta1 = beta1
self.beta2 = beta2
self.beta3 = beta3
self.beta4 = beta4
self.beta5 = beta5
self.beta6 = beta6
self.beta7 = beta7
self.beta8 = beta8
self.beta9 = beta9
# to fit the model
def fit(self, X,y=None):
# define the mu vector
self.mu_ = np.ones((N))
# define lag weights
lag_weights = np.ones((max_lag))
# define retain_rate
retain_rate = np.array([alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9])
# define cum_effect, set to a random value
cum_effect = 1
# define cum_effects_hill
cum_effects_hill = np.ones((N, num_media))
# parameter transformation
for nn in range(N):
for m in range(num_media):
for l in range(max_lag):
lag_weights[l] = retain_rate[m]**l
cum_effect = Adstock(X[nn][m], lag_weights)
cum_effects_hill[nn][m] = Hill(cum_effect, ec[m], slope[m])
self.mu_[nn] = tau + np.dot(cum_effects_hill[nn], beta_medias)
return self
# the predict function
def predict(self, X, y=None):
# try to get the mu_ argument. If it does not exist, we throw an error
try:
getattr(self, "mu_")
except AttributeError:
raise RuntimeError("You must train classifer before predicting data!")
return self.mu_
# the score function
def score(self, X, y):
# calculate the MSE
return np.dot(y - self.predict(X), y - self.predict(X))/len(X)
以下类似于“主要”功能
# initiliaze estimator
t = MyEstimator()
# parameter grid
# tau
param_grid = {'tau': [100,200], \
# K
'K1': [K1Range[0], K1Range[1]], 'K2' : [K2Range[0], K2Range[1]], 'K3': [K3Range[0], K3Range[1]], 'K4' : [K4Range[0], K4Range[1]], 'K5' : [K5Range[0], K5Range[1]], 'K6' : [K6Range[0], K6Range[1]], 'K7' : [K7Range[0], K7Range[1]], 'K8': [K8Range[0], K8Range[1]], 'K9': [K9Range[0], K9Range[1]], \
# S
'S1': [1, 100], 'S2': [1, 100], 'S3': [1, 100], 'S4': [1, 100], 'S5': [1, 100], 'S6': [1, 100], 'S7': [1, 100], 'S8': [1, 100], 'S9': [1, 100], \
# alpha
'alpha1': [0.1, 0.5], 'alpha2': [0.1, 0.5], 'alpha3': [0.1, 0.5], 'alpha4': [0.1, 0.5], 'alpha5': [0.1, 0.5], 'alpha6': [0.1, 0.5], 'alpha7': [0.1, 0.5], 'alpha8': [0.1, 0.5], 'alpha9': [0.1, 0.5], \
# beta
'beta1': [100,200], 'beta2': [100,200], 'beta3': [100,200], 'beta4': [100,200], 'beta5': [100,200], 'beta6': [100,200], 'beta7': [100,200], 'beta8': [100,200], 'beta9': [100,200]}
#
clf = GridSearchCV(t, param_grid)
clf.fit(X_media, actual_sales)
#clf.predict(X_media)
这是错误消息:
MemoryError Traceback (most recent call last)
<ipython-input-22-de0388db8453> in <module>
14 #
15 clf = GridSearchCV(t, param_grid)
---> 16 clf.fit(X_media, actual_sales)
17 #clf.predict(X_media)
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
71 FutureWarning)
72 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 73 return f(**kwargs)
74 return inner_f
75
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
734 return results
735
--> 736 self._run_search(evaluate_candidates)
737
738 # For multi-metric evaluation, store the best_index_, best_params_ and
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1186 def _run_search(self, evaluate_candidates):
1187 """Search all candidates in param_grid"""
-> 1188 evaluate_candidates(ParameterGrid(self.param_grid))
1189
1190
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
698
699 def evaluate_candidates(candidate_params):
--> 700 candidate_params = list(candidate_params)
701 n_candidates = len(candidate_params)
702
MemoryError:
谁能告诉我如何解决此错误?还是我的代码有问题?谢谢!
答案 0 :(得分:2)
追溯显示网格搜索在尝试生成候选列表(即网格)时已经用尽了内存。您显然拥有37个参数,每个参数都有两个可能的值,因此候选者的数量为 plt.rcParams['figure.autolayout']=True
figa, axa = plt.subplots(rowcnt, colcnt)
figa.suptitle("Users Disk Space Usage Over Time.\n")
ax_index = 0
for r in range(rowcnt)
for c in range(colcnt):
n = r * c
user = gr.columns[n]
ur = gr[user]
x = ur.index
y = ur.values
while is_color_like(colorpairs[colorindex]) == False or is_color_like(colorpairs[colorindex+1]) == False :
colorindex = int((colorindex + 2) % (len(colorpairs)/2))
axa[r,c].plot(x, y, color=colorpairs[colorindex+1], alpha=0.6)
plt.setp(axa[r,c].get_xticklabels(), rotation=30)
if len(x) > 1:
axa[r,c].fill_between(x, y, color=colorpairs[colorindex],alpha=0.4)
axa[r,c].set_ylim(0,disksizebytes)
axa[r,c].set_title(user)
axa[r,c].set_xlabel('date')
axa[r,c].set_ylabel('space used')
axa[r,c].grid(True)
i += 1
colorindex = int((colorindex + 2) % (len(colorpairs)/2))
detailarryimage = "{}/detailarryimage.png".format(datafolder)
figa.savefig(detailarryimage)
,超过了1,370亿个。您可能真的不想尝试那么多候选人,所以也许2^(37)
更合适?