我想通过运行find_pattern.py提取解析树,这些树与以下模式匹配:
^[^\d]*(\d+)[^\d]*$
但是以下输出显示:
SIMPLE_PREDICATE = (ROOT, ((SENTENCE, (NP, VP, PERIOD)),))
APPOSITION = (SENTENCE, ((NP, (NP, COMMA, NP, COMMA)), VP, PERIOD))
find_pattern.py
Traceback (most recent call last):
File "C:\Users\Anon\Desktop\ZAHID\Working Model 1\zahid\Practice\New folder\find_pattern.py", line 40, in <module>
appos = sent_extract.find_appositions(parse_trees)
File "C:\Users\Anon\Desktop\ZAHID\Working Model 1\zahid\Practice\New folder\sent_extract.py", line 30, in find_appositions
s in search_for_matches(parse_tree, APPOSITION)]
File "C:\Users\Anon\Desktop\ZAHID\Working Model 1\zahid\Practice\New folder\sent_extract.py", line 58, in search_for_matches
if is_match(parse_tree, pattern):
File "C:\Users\Anon\Desktop\ZAHID\Working Model 1\zahid\Practice\New folder\sent_extract.py", line 45, in is_match
if tree.label() == parent and len(tree) == len(children):
AttributeError: 'listiterator' object has no attribute 'label'
sent_extract.py
import os
import parse_article
import stanford_parser
import sent_extract
article_filename = 'C:\Users\Anon\Desktop\ZAHID\Working Model 1\zahid\Practice\New folder\____.html'
sentences = parse_article.parse_html(article_filename)
user=''
parser = stanford_parser.create_parser(user)
parse_trees = parser.raw_parse_sents(sentences)
appos = sent_extract.find_appositions(parse_trees)
print(appos)
tags.py
from nltk.tree import Tree
from tags import *
SIMPLE_PREDICATE = (ROOT, ((SENTENCE, (NP, VP, PERIOD)),))
APPOSITION = (SENTENCE, ((NP, (NP, COMMA, NP, COMMA)), VP, PERIOD))
def find_predicates(parse_trees):
preds = []
for parse_tree in parse_trees:
if is_match(parse_tree, SIMPLE_PREDICATE):
preds.append(parse_tree[0])
return preds
def find_appositions(parse_trees):
appos = []
for parse_tree in parse_trees:
appos += [(s[0,0], s[0,2]) for
s in search_for_matches(parse_tree, APPOSITION)]
return appos
def is_match(tree, pattern):
if not isinstance(pattern, tuple):
return tree.label() == pattern
else:
parent = pattern[0]
children = pattern[1]
if tree.label() == parent and len(tree) == len(children):
for i in xrange(len(tree)):
ith_child = tree[i]
if not is_match(ith_child, children[i]):
return False
return True
def search_for_matches(parse_tree, pattern):
matches = []
if is_match(parse_tree, pattern):
matches.append(parse_tree)
for child in parse_tree:
if isinstance(child, Tree):
matches += search_for_matches(child, pattern)
return matches
答案 0 :(得分:1)
raw_parse_sents()
method旨在处理多个句子。每个句子都解析为一系列树,但是您的代码假设每个句子只有一棵树。从文档中:
返回类型:
iter(iter(Tree))
因此,您将获得树木的可迭代项。
所以不要使用
for parse_tree in parse_trees:
if is_match(parse_tree, SIMPLE_PREDICATE):
preds.append(parse_tree[0])
您必须使用
for sentence in parse_trees:
for parse_tree in sentence:
if is_match(parse_tree, SIMPLE_PREDICATE):
preds.append(parse_tree[0])
现在您要传递实际的Tree()
instances,它们是可以被索引并具有长度的类似列表的对象。
find_appositions()
也是如此:
def find_appositions(parse_trees):
appos = []
for sentence in parse_trees:
for parse_tree in sentence:
appos += [(s[0,0], s[0,2]) for
s in search_for_matches(parse_tree, APPOSITION)]
return appos