我正面临一个奇怪的问题,感谢任何帮助。我的训练数据集对象是纯float32 numpy数组,由vectorizer填充。问题必须是我提供给RandomForestClassifier的参数之一,因为我能够通过它而不传递任何参数。我很肯定我的输入中没有字符串:
X_train
memmap([0.25173673, 0.01420455, 0.00684149, ..., 0. , 0. ,
0. ], dtype=float32)
y_train
memmap([ 0., 0., 0., ..., -1., 1., 1.], dtype=float32)
但是,当我在数据集上运行RandomForest时,我得到以下结果:
model_RandomForest = ek.RandomForestClassifier(n_estimators = 200, max_depth = 'auto', n_jobs = 1, random_state = 5,max_features = 'auto',min_samples_leaf = 100, verbose=1)
result_RandomForest = model_RandomForest.fit(X_train[train_rows], y_train[train_rows])
跟踪输出:
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\ensemble\forest.py in fit(self, X, y, sample_weight)
326 t, self, X, y, sample_weight, i, len(trees),
327 verbose=self.verbose, class_weight=self.class_weight)
--> 328 for i, t in enumerate(trees))
329
330 # Collect newly grown trees
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
777 # was dispatched. In particular this covers the edge
778 # case of Parallel used with an exhausted iterator.
--> 779 while self.dispatch_one_batch(iterator):
780 self._iterating = True
781 else:
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
623 return False
624 else:
--> 625 self._dispatch(tasks)
626 return True
627
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
586 dispatch_timestamp = time.time()
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588 job = self._backend.apply_async(batch, callback=cb)
589 self._jobs.append(job)
590
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
109 def apply_async(self, func, callback=None):
110 """Schedule a func to be run"""
--> 111 result = ImmediateResult(func)
112 if callback:
113 callback(result)
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
330 # Don't delay the application, to avoid keeping the input
331 # arguments in memory
--> 332 self.results = batch()
333
334 def get(self):
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\ensemble\forest.py in _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight)
119 curr_sample_weight *= compute_sample_weight('balanced', y, indices)
120
--> 121 tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)
122 else:
123 tree.fit(X, y, sample_weight=sample_weight, check_input=False)
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\tree\tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
788 sample_weight=sample_weight,
789 check_input=check_input,
--> 790 X_idx_sorted=X_idx_sorted)
791 return self
792
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\tree\tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
181 min_samples_leaf = self.min_samples_leaf
182 else: # float
--> 183 if not 0. < self.min_samples_leaf <= 0.5:
184 raise ValueError("min_samples_leaf must be at least 1 "
185 "or in (0, 0.5], got %s"
TypeError: '<' not supported between instances of 'float' and 'str'
任何想法在这里发生了什么或我如何解决它?
谢谢!