TypeError:'<' tree.py中'float'和'str'的实例之间不支持

时间:2018-05-11 21:20:23

标签: python machine-learning random-forest

我正面临一个奇怪的问题,感谢任何帮助。我的训练数据集对象是纯float32 numpy数组,由vectorizer填充。问题必须是我提供给RandomForestClassifier的参数之一,因为我能够通过它而不传递任何参数。我很肯定我的输入中没有字符串:

X_train
memmap([0.25173673, 0.01420455, 0.00684149, ..., 0.        , 0.        ,
        0.        ], dtype=float32)
y_train
memmap([ 0.,  0.,  0., ..., -1.,  1.,  1.], dtype=float32)

但是,当我在数据集上运行RandomForest时,我得到以下结果:

model_RandomForest = ek.RandomForestClassifier(n_estimators = 200, max_depth = 'auto', n_jobs = 1, random_state = 5,max_features = 'auto',min_samples_leaf = 100, verbose=1)  
result_RandomForest = model_RandomForest.fit(X_train[train_rows], y_train[train_rows]) 

跟踪输出:

~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\ensemble\forest.py in fit(self, X, y, sample_weight)
    326                     t, self, X, y, sample_weight, i, len(trees),
    327                     verbose=self.verbose, class_weight=self.class_weight)
--> 328                 for i, t in enumerate(trees))
    329 
    330             # Collect newly grown trees
~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    777             # was dispatched. In particular this covers the edge
    778             # case of Parallel used with an exhausted iterator.
--> 779             while self.dispatch_one_batch(iterator):
    780                 self._iterating = True
    781             else:

~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    623                 return False
    624             else:
--> 625                 self._dispatch(tasks)
    626                 return True
    627 

~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    586         dispatch_timestamp = time.time()
    587         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588         job = self._backend.apply_async(batch, callback=cb)
    589         self._jobs.append(job)
    590 

~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    109     def apply_async(self, func, callback=None):
    110         """Schedule a func to be run"""
--> 111         result = ImmediateResult(func)
    112         if callback:
    113             callback(result)

~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    330         # Don't delay the application, to avoid keeping the input
    331         # arguments in memory
--> 332         self.results = batch()
    333 
    334     def get(self):

~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\ensemble\forest.py in _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight)
    119             curr_sample_weight *= compute_sample_weight('balanced', y, indices)
    120 
--> 121         tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)
    122     else:
    123         tree.fit(X, y, sample_weight=sample_weight, check_input=False)

~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\tree\tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
    788             sample_weight=sample_weight,
    789             check_input=check_input,
--> 790             X_idx_sorted=X_idx_sorted)
    791         return self
    792 

~\Anaconda3\envs\emberenv\lib\site-packages\sklearn\tree\tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
    181             min_samples_leaf = self.min_samples_leaf
    182         else:  # float
--> 183             if not 0. < self.min_samples_leaf <= 0.5:
    184                 raise ValueError("min_samples_leaf must be at least 1 "
    185                                  "or in (0, 0.5], got %s"

TypeError: '<' not supported between instances of 'float' and 'str'

任何想法在这里发生了什么或我如何解决它?

谢谢!

0 个答案:

没有答案