我试图打印Penn Tree Bank风格的解析,我似乎无法让parens正确平衡(令人沮丧 - 解析器工作正常,但我无法获得输出打印正确!)有关递归函数的任何提示或技巧吗?
这是我当前的打印方法,我调用以解析的start节点开头的函数。
def print_tree(current_node, parents, side):
print(parents)
for i in range(parents):
print(" ", " ", end="")
if current_node.is_terminal:
if side == 'left':
print("(" + current_node.lhs, current_node.word_label + " )")
else:
print("(" + current_node.lhs, current_node.word_label +")", "", end ="")
for i in range(parents):
print(")", "", end="")
print()
else:
print("( " +current_node.lhs)
if current_node.left_child != None:
print_tree(current_node.left_child, parents+1, 'left')
if current_node.right_child != None:
print_tree(current_node.right_child, parents+1, 'right')
我得到的结果:
( TOP
( S_VP
(VB 'List' )
( NP
( NP
(DT 'the' )
(NNS 'flights') ) ) ) ) )
( PP
( PP
(IN 'from' )
(NP_NNP 'Baltimore') ) ) ) ) ) )
( PP
(TO 'to' )
( NP
(NP_NNP 'Seattle' )
( NP
( NP
(DT 'that' )
(NN 'stop') ) ) ) ) ) ) ) ) )
( PP
(IN 'in' )
(NP_NNP 'Minneapolis') ) ) ) ) ) ) ) ) )
(PUNC '.') ) )
期望的结果:
( TOP
( S_VP
(VB 'List' )
( NP
( NP
(DT 'the' )
(NNS 'flights') )
( PP
( PP
(IN 'from' )
(NP_NNP 'Baltimore') )
( PP
(TO 'to' )
( NP
(NP_NNP 'Seattle' ))
( NP
( NP
(DT 'that' )
(NN 'stop') )
( PP
(IN 'in' )
(NP_NNP 'Minneapolis')))))
(PUNC '.') ) ))
我试图想办法把它作为递归/缩进次数的函数来做,但是它并没有取得多大的成功。
答案 0 :(得分:0)
在每个终端节点(叶子)上,您正在为所有父母打印)
;你应该只对连续的右侧父母这样做。
我建议将parents
重命名为depth
并添加right_depth
参数。
编辑:在玩了一下之后,我决定将它委托给树更好:
class Node:
INDENT = " "
__slots__ = ["lhs", "word_label", "left_child", "right_child"]
def __init__(self, lhs, *args):
self.lhs = lhs
num_args = len(args)
if num_args == 1:
self.word_label = args[0]
self.left_child = None
self.right_child = None
elif num_args == 2:
self.word_label = None
self.left_child = args[0]
self.right_child = args[1]
else:
raise ValueError("should have one arg (word_label: str) or two args (left: Node and right: Node)")
def is_terminal(self):
return self.word_label is not None
def tree_str(self, depth=0, indent=None):
if indent is None:
indent = self.INDENT
if self.is_terminal():
return "\n{}({} '{}' )".format(
indent * depth,
self.lhs,
self.word_label
)
else:
return "\n{}( {}{}{} )".format(
indent * depth,
self.lhs,
self.left_child .tree_str(depth + 1, indent),
self.right_child.tree_str(depth + 1, indent)
)
def __str__(self):
return self.tree_str()
然后是一些句法助手,
def make_leaf_type(name):
def fn(x):
return Node(name, x)
fn.__name__ = name
return fn
for leaf_type in ("VB", "DT", "NNS", "IN", "NP_NNP", "TO", "NN", "PUNC"):
locals()[leaf_type] = make_leaf_type(leaf_type)
def make_node_type(name):
def fn(l, r):
return Node(name, l, r)
fn.__name__ = name
return fn
for node_type in ("TOP", "S_VP", "NP", "PP"):
locals()[node_type] = make_node_type(node_type)
所以我可以创建树,
tree = \
TOP(
S_VP(
VB('List'),
NP(
NP(
DT('the'),
NNS('flights')
),
PP(
PP(
IN('from'),
NP_NNP('Baltimore')
),
PP(
TO('to'),
NP(
NP_NNP('Seattle'),
NP(
NP(
DT('that'),
NN('stop')
),
PP(
IN('in'),
NP_NNP('Minneapolis')
)
)
)
)
)
)
),
PUNC('.')
)
然后打印像
>>> print(tree)
( TOP
( S_VP
(VB 'List' )
( NP
( NP
(DT 'the' )
(NNS 'flights' ) )
( PP
( PP
(IN 'from' )
(NP_NNP 'Baltimore' ) )
( PP
(TO 'to' )
( NP
(NP_NNP 'Seattle' )
( NP
( NP
(DT 'that' )
(NN 'stop' ) )
( PP
(IN 'in' )
(NP_NNP 'Minneapolis' ) ) ) ) ) ) ) )
(PUNC '.' ) )
我认为实际上是所需要的。