35 23 56 30
110 123 13 31
这不会取代from bs4 import BeautifulSoup
REPLACEMENTS = [('u', '<span class="underline">{}</span>'),
('b', '<strong>{}</strong>'),
('i', '<em>{}</em>')]
def replace_tags(html, replacements=REPLACEMENTS):
soup = BeautifulSoup(html, 'html.parser')
for tag, template in replacements:
for node in soup.find_all(tag):
replacement = template.format(node.text)
r = BeautifulSoup(replacement, 'html.parser')
node.replace_with(r)
return str(soup)
if __name__ == "__main__":
my_html = """<html><body><p><b>I am strong</b> and
<i>I am emphasized</i> and <u>I am underlined</u>.</p></body></html>"""
revised = replace_tags(my_html, REPLACEMENTS)
print(revised)
代码。
<i>
但是如果我将<html><body><p><strong>I am strong</strong> and
<i>I am emphasized</i> and <span class="underline">I am underlined</span>.</p></body></html>
中的元组顺序更改为
REPLACEMENTS
然后REPLACEMENTS = [('b', '<strong>{}</strong>'),
('i', '<em>{}</em>'),
('u', '<span class="underline">{}</span>')]
和<i>
代码未被替换:
<u>
再次重新排序......
<html><body><p><strong>I am strong</strong> and
<i>I am emphasized</i> and <u>I am underlined</u>.</p></body></html>
现在输出
REPLACEMENTS = [('i', '<em>{}</em>'),
('b', '<strong>{}</strong>'),
('u', '<span class="underline">{}</span>')]
未进行<html><body><p><strong>I am strong</strong> and
<em>I am emphasized</em> and <u>I am underlined</u>.</p></body></html>
替换。
我无法弄清楚为什么订单会对输出产生这种影响。这些不是嵌套的。每次通过似乎都是独立的替代品。我很难过。有什么想法吗?
答案 0 :(得分:2)
问题是为什么会发生这种情况,问题的答案是您正在使用replace_with()替换并提供字符串。字符串不是可导航字符串 https://www.crummy.com/software/BeautifulSoup/bs4/doc/#navigablestring 因此,您无法再在更换的部件上使用BeautifulSoup进行导航。如果您使用新标签替换它们是导航的,那么以下内容适用于所有情况。
from bs4 import BeautifulSoup
def replace_tags(html, replacements):
soup = BeautifulSoup(html, 'html.parser')
for tag in replacements:
for node in soup.find_all(tag):
if tag == 'i':
newtag = soup.new_tag("em")
if tag == 'b':
newtag = soup.new_tag("strong")
if tag == 'u':
newtag = soup.new_tag("span", **{'class':'underline'})
newtag.string = node.string
node.replace_with(newtag)
return str(soup)
if __name__ == "__main__":
my_html = """<html><body><p><b>I am strong</b> <b>I am strong too</b> and
<i>I am emphasized</i> and <u>I am underlined</u>.</p></body></html>"""
replacements = ['i','b','u']
revised = replace_tags(my_html, replacements)
print(revised)
replacements = ['b','u','i']
revised = replace_tags(my_html, replacements)
print(revised)
replacements = ['u','i','b']
revised = replace_tags(my_html, replacements)
print(revised)
<强>已更新强> 我忽略了这条线:
r = BeautifulSoup(replacement, 'html.parser')
但是我不认为你可以从另一个汤中添加一个标签,并出于同样的原因进行导航。我读过的所有文档都涉及从原汤中创建一个新标签并使用它。