我编写了一个脚本来递归文件夹并将<td>
标记包装在<title>
标记中。经过一些考虑后,我更愿意保留<td>
并在文档的头部添加一个新的<title>
标记,但我不知道该怎么做。
以下代码将运行<td>
代码中的<title>
代码,但我怎样才能将此<td>
中的文本复制到我的html的<head>
中文档,由<title>
标记包围。
import os
from bs4 import BeautifulSoup
def clean_up_folder(dir):
for root, dirs, files in os.walk(dir):
for f in files:
clean_up_file(os.path.join(root, f))
def clean_up_file(original_file):
with open(original_file) as orig_f2:
soup = BeautifulSoup(orig_f2.read())
for t in soup.find_all('td', class_='title'):
t.string.wrap(soup.new_tag('title'))
with open(original_file, 'w') as orig_f:
if soup.original_encoding is not None:
orig_f.write(soup.prettify().encode(soup.original_encoding))
clean_up_folder('Test')
基本上我想在我的<td class="title">
中按住ctrl + c并在<head></head>
标记内ctrl + v,包含在<title></title>
标记中。是否有可能做到这一点?有什么指针吗?
答案 0 :(得分:1)
基本上,从<title>
然后.string
标题标记到<td class="title">
,使用.append
制作一个新的<head>
代码。
import os
from bs4 import BeautifulSoup
def clean_up_folder(dir):
for root, dirs, files in os.walk(dir):
for f in files:
clean_up_file(os.path.join(root, f))
def clean_up_file(original_file):
with open(original_file) as orig_f2:
soup = BeautifulSoup(orig_f2.read())
title = soup.new_tag('title')
td = soup.find('td', class_='title')
if td is not None and td.string is not None:
title.string = td.string
head = soup.find('head')
if head is not None:
head.append(title)
with open(original_file, 'w') as orig_f:
if soup.original_encoding is not None:
orig_f.write(soup.prettify().encode(soup.original_encoding))
clean_up_folder('Test')
参见文档: