我是编程和使用Python的初学者。目前我正在尝试理解Jurafsky和Martin的2008年关于语音和语言处理的一些代码(关于语法分析的练习13.1)。我将在下面复制它(除了最后4行,我自己没有编写这段代码)。
我的问题很简单:我没有打印语法规则,而是得到如下输出:
set([<__main__.Rule object at 0x011E1810>, <__main__.Rule object at 0x011E1790>, <__main__.Rule object at 0x011E15F0>, ...)
我知道我应该用 str (自我)做一些事情,但我尝试了一些事情但仍然没有得到正常输出。我怀疑解决方案很简单,但我只是不知道该怎么做。很感谢任何形式的帮助。可能您不需要阅读和理解下面的所有代码,看看哪些代码无效。
非常感谢!
def chomsky_normal_form(grammar):
grammar = set(grammar)
nonterminals = set(rule.head for rule in grammar)
# remove single symbol nonterminal rules
for rule, symbol in _unary_rules(grammar, nonterminals):
grammar.discard(rule)
for rule2 in _rules_headed_by(grammar, symbol):
grammar.add(Rule(rule.head, tuple(rule2.symbols)))
if all(symbol not in rule.symbols for rule in grammar):
for rule2 in _rules_headed_by(grammar, symbol):
grammar.discard(rule2)
# move terminals to their own rules
for rule in list(grammar):
if len(rule.symbols) >= 2:
for i, symbol in enumerate(rule.symbols):
if all(rule.head != symbol for rule in grammar):
rule = _new_symbol(grammar, rule, i, i + 1)
# ensure there are only two nonterminals per rule
for rule in _multi_symbol_rules(grammar):
_new_symbol(grammar, rule, 0, 2)
# return the grammar in CNF
return grammar
# find A -> B rules, allowing concurrent modifications
def _unary_rules(grammar, nonterminals):
while True:
g = ((rule, rule.symbols[0])
for rule in grammar
if len(rule.symbols) == 1
if rule.symbols[0] in nonterminals)
yield g.next()
# find all rules headed by the given symbol
def _rules_headed_by(grammar, symbol):
return [rule for rule in grammar if rule.head == symbol]
# create a new symbol which derives the given span of symbols
def _new_symbol(grammar, rule, start, stop):
symbols = rule.symbols
new_head = '_'.join(symbols[start:stop]).upper()
new_symbols = symbols[:start] + (new_head,) + symbols[stop:]
new_rule = Rule(rule.head, new_symbols)
grammar.discard(rule)
grammar.add(new_rule)
grammar.add(Rule(new_head, symbols[start:stop]))
return new_rule
# find A -> BCD... rules, allowing concurrent modifications
def _multi_symbol_rules(grammar):
while True:
g = (rule for rule in grammar if len(rule.symbols) >= 3)
yield g.next()
# representation of a rule A -> B...C
class Rule(object):
def __init__(self, head, symbols):
self.head = head
self.symbols = symbols
self._key = head, symbols
def __eq__(self, other):
return self._key == other._key
def __hash__(self):
return hash(self._key)
def __str__(self):
rep = grammar_cnf
return rep
# build a grammar from a string of lines like "X -> YZ | b"
def get_grammar(string):
grammar = set()
for line in string.splitlines():
head, symbols_str = line.split(' -> ')
for symbols_str in symbols_str.split(' | '):
symbols = tuple(symbols_str.split())
grammar.add(Rule(head, symbols))
return grammar
grammar = get_grammar("""S -> NP VP | Aux NP VP | VP
NP -> Pronoun | Proper-Noun | Det Nominal
Nominal -> Noun | Nominal Noun | Nominal PP
VP -> Verb | Verb NP | Verb NP PP | Verb PP | VP PP
PP -> Preposition NP
Det -> that | this | a
Noun -> book | flight | meal | money
Verb -> book | include | prefer
Pronoun -> I | she | me
Proper-Noun -> Houston | TWA
Aux -> does
Preposition -> from | to | on | near | through""")
grammar_cnf = chomsky_normal_form(grammar)
print(grammar_cnf)
答案 0 :(得分:2)
您可以在规则类
中实现__repr__
您可以使用__str__
进行非正式演示,(如果 str 不存在,则会回退到 repr
这些都是这样的:
class Rule(object):
def __init__(self, name):
self.name = name
def __repr__(self):
return 'Rule({0})'.format(self.name)
def __str__(self):
return self.name
rule = Rule('test')
print(rule) # test