我编写了以下python代码
import re
def get_items():
text = '''
<a href="/archive/q-fin">Quantitative Finance</a>
<a href="/archive/stat">Statistics</a>
<a href="/help/general">General information</a>
<a href="/help/support">Support and Governance Model</a>
<a href="/help/find">Find</a>
'''
pattern = re.compile(r'<a href="/archive/(.*?)">(.*?)</a>', re.S)
items = re.match(pattern, text).group(1)
print(items)
get_items()
但它确实有效,为什么?
正则表达式如下:
pattern = re.compile(r'<a href="/archive/(.*?)">(.*?)</a>', re.S)
答案 0 :(得分:0)
你的正则表达式是正确的,但是你使用了错误的调用来迭代这些机器。请参阅下面的更正版本,该版本使用pattern.finditer(text)
和match.group(1)
。
import re
def get_items():
text = '''
<a href="/archive/q-fin">Quantitative Finance</a>
<a href="/archive/stat">Statistics</a>
<a href="/help/general">General information</a>
<a href="/help/support">Support and Governance Model</a>
<a href="/help/find">Find</a>
'''
pattern = re.compile(r'<a href="/archive/(.*?)">(.*?)</a>', re.S)
for match in pattern.finditer(text):
yield match.group(1)
for item in get_items():
print(item)