我尝试编写一个简单的决策树类,但我不断遇到这个错误:fit()缺少1个必需的位置参数:'cat_features'。 即使我适合使用猫特征数组。 而且我看到我还有另一个错误,但我看不到它们在哪里。我该如何解决这个问题?
类DecisionTree: def init (自身,条件):
self.criterion = criterion
self.feature_numbers_and_splits = []
def criterion_count(self, node_y):
node_critetion = criterion_dict[self.criterion](node_y)
return node_critetion
def _find_best_split(self, node_X, node_y, cat_feature=False):
best_feature_number, best_split_value, best_score = None, None, -1
for feature_number in range(node_X.shape[1]):
unique_items = np.unique(node_X.iloc[:, feature_number])
for split_value in unique_items:
if not cat_feature:
node_y_left = [y for j, y in enumerate(node_y) if node_X.iloc[j, feature_number] > split_value]
node_y_right = [y for j, y in enumerate(node_y) if node_X.iloc[j, feature_number] <= split_value]
else:
node_y_left = [y for j, y in enumerate(node_y) if node_X.iloc[j, feature_number] == split_value]
node_y_right = [y for j, y in enumerate(node_y) if node_X.iloc[j, feature_number] != split_value]
criterion_left = self.criterion_count(node_y_left)
criterion_right = self.criterion_count(node_y_right)
score = criterion_left * len(node_y_left) / len(node_y) + criterion_right * len(node_y_right) / len(node_y)
if best_score < score:
best_split_value = split_value
best_score = score
best_feature_number = feature_number
return best_feature_number, best_split_value
def _split_leaf(self, node_X, node_y, feature_number, split_value):
left_node_X=node_X.loc[(node_X.iloc[:,feature_number] < split_value)]
right_node_X=node_X.loc[(node_X.iloc[:,feature_number] >= split_value)]
left_node_y=node_y.loc[(node_X.iloc[:,feature_number] < split_value)]
right_node_y=node_y.loc[(node_X.iloc[:,feature_number] >= split_value)]
return left_node_X, left_node_y, right_node_X, right_node_y
def fit(self, X, y, cat_features, verbose=False, recursion_depth=0):
best_feature_number, best_split_value = self._find_best_split(X, y)
if recursion_depth == 0:
self.feature_numbers_and_splits.append((best_feature_number, best_split_value))
else:
self.feature_numbers_and_splits[recursion_depth].append((best_feature_number, best_split_value))
left_node_X, left_node_y, right_node_X, right_node_y = \
self._split_leaf(X, y, best_feature_number, best_split_value)
if len(left_node_X) == 0 or len(right_node_X) == 0:
return
self.feature_numbers_and_splits.append([])
print(self.feature_numbers_and_splits)
self.fit(left_node_X, left_node_y)
self.fit(right_node_X, right_node_y)
def predict(self, X, cat_features, id_=0, feature_number=None, split_value=None):
if feature_number is None:
feature_number, split_value = self.feature_numbers_and_splits[0]
X_left = X[X.iloc[:,feature_number]<split_value]
X_right = X[X.iloc[:,feature_number]>=split_value]
y_pred_left = self.predict(X_left, cat_features,
feature_number=self.feature_numbers_and_splits[id_+1][0][0],
split_value=self.feature_numbers_and_splits[id_+1][0][1])
y_pred_right = self.predict(X_right, cat_features,
feature_number=self.feature_numbers_and_splits[id_+1][1][0],
split_value=self.feature_numbers_and_splits[id_+1][1][1])
X[X[feature_number]<split_value]['y'] = y_pred_left
X[X[feature_number]>=split_value]['y'] = y_pred_right
y_pred = X[['y']]
return y_pred