我正在尝试使用前向验证方法构建随机森林模型。
我使用TimeBasedCV()相应地分割我的数据:TimeBasedCV()
我的代码如下:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10, 15]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4,10]
# Method of selecting samples for training each tree
bootstrap = [True, False]# Create the random grid
random_grid = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'bootstrap': bootstrap}
from sklearn.model_selection import RandomizedSearchCV
from random import randint, uniform
tscv = TimeBasedCV(train_period=60,test_period=12,freq='months')
index_output = tscv.split(X_train, date_column='Date')
rf = RandomForestRegressor()
model = RandomizedSearchCV(
estimator = rf,
param_distributions = random_grid,
n_iter = 10,
n_jobs = -1,
cv = index_output,
verbose=5,
random_state = 42,
return_train_score = True)
model.fit(X_train.drop('Date', axis=1),y_train)
model.cv_results_
我的model.fit的错误消息是
IndexError: positional indexers are out-of-bounds
我需要调整随机搜索吗?还是由于我的数据错误导致此错误?
IndexError Traceback (most recent call last)
<ipython-input-71-eebc6186b2c3> in <module>
18 return_train_score = True)
19
---> 20 model.fit(X_train,y_train)
21 model.cv_results_
~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
708 return results
709
--> 710 self._run_search(evaluate_candidates)
711
712 # For multi-metric evaluation, store the best_index_, best_params_ and
~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1482 evaluate_candidates(ParameterSampler(
1483 self.param_distributions, self.n_iter,
-> 1484 random_state=self.random_state))
~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
687 for parameters, (train, test)
688 in product(candidate_params,
--> 689 cv.split(X, y, groups)))
690
691 if len(out) < 1:
~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1015
1016 with self._backend.retrieval_context():
-> 1017 self.retrieve()
1018 # Make sure that we get a last message telling us we are done
1019 elapsed_time = time.time() - self._start_time
~\anaconda3\lib\site-packages\joblib\parallel.py in retrieve(self)
907 try:
908 if getattr(self._backend, 'supports_timeout', False):
--> 909 self._output.extend(job.get(timeout=self.timeout))
910 else:
911 self._output.extend(job.get())
~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in wrap_future_result(future, timeout)
560 AsyncResults.get from multiprocessing."""
561 try:
--> 562 return future.result(timeout=timeout)
563 except LokyTimeoutError:
564 raise TimeoutError()
~\anaconda3\lib\concurrent\futures\_base.py in result(self, timeout)
433 raise CancelledError()
434 elif self._state == FINISHED:
--> 435 return self.__get_result()
436 else:
437 raise TimeoutError()
~\anaconda3\lib\concurrent\futures\_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
IndexError: positional indexers are out-of-bounds