我正在尝试使用某些程序修剪决策树。
在对当前树进行各种操作之后,我在操作树之前和之后从函数validate()
(prune_helper
内部,print(self.validate(validation_data))
)获得不同的值。很棒(这意味着给定节点的树发生了某些事情)。
def prune_helper(self, curr_node):
all_err = []
# The current tree.
tree_1 = self
all_err.append((tree_1._generalization_error(), tree_1))
# Replace node with leaf 1.
tree_2 = self._replace_leaf(curr_node, DEMOCRAT)
all_err.append((tree_2._generalization_error(), tree_2))
# Replace node with leaf 0.
tree_3 = self._replace_leaf(curr_node, REPUBLICAN)
all_err.append((tree_3._generalization_error(), tree_3))
# Replace node with left subtree.
test_4 = self._replace_subtree(curr_node, LEFT)
all_err.append((test_4._generalization_error(), test_4))
# Replace node with middle subtree.
test_5 = self._replace_subtree(curr_node, MIDDLE)
all_err.append((test_5._generalization_error(), test_5))
# Replace node with right subtree.
test_6 = self._replace_subtree(curr_node, RIGHT)
all_err.append((test_6._generalization_error(), test_6))
all_err.sort(key=lambda tup: tup[0])
min_tree = all_err[0][1]
# print(self.validate(validation_data)) <-- This
self = copy.deepcopy(min_tree)
# print(self.validate(validation_data)) <-- Mostly different than this
curr_node.pruned = True
def prune(self, curr_node=None):
if curr_node is None:
curr_node = self._root
# Node is leaf.
if curr_node.leaf:
self.prune_helper(curr_node=curr_node)
return
# Node is not a leaf, we may assume that he has all three children.
if curr_node.left.pruned is False:
self.prune(curr_node=curr_node.left)
if curr_node.middle.pruned is False:
self.prune(curr_node=curr_node.middle)
if curr_node.right.pruned is False:
self.prune(curr_node=curr_node.right)
# We'll prune the current node, only if we checked all of its children.
self.prune_helper(curr_node=curr_node)
但是问题是,当我想要在完全&#34;完全&#34;之后为树计算一些值时,我得到validate()
返回的相同值,这意味着树可能不是事后改变了,对树的影响只发生在prune_tree
-
def prune_tree(tree):
# print(tree.validate(validation_data)) <- This
tree.prune()
# print(tree.validate(validation_data)) <- Same as this
tree.print_tree('after')
我认为可能问题在于我尝试更改self
对象的方式。有没有明显的事情表明我在实施可能导致这一结果的整个事情上做错了?