步骤:
需要: - 将标题发送到单独的.txt文件
预期:任何建议。理想情况下,我想从html文件名('23434.html')中提取整数,并将文本文件命名为'23434.txt'
结果: - 指定路径中没有创建txt文件。 - 什么都写不出来
for file_name in glob.glob(os.path.join(dir_path, "*.html")):
with open(file_name) as html_file:
soup=BeautifulSoup(html_file)
d=soup.title.get_text()
#resultfile=re.findall('\d+', file_name)
with open("m"+".txt", "w") as outfile:
outfile.write(d)
outfile.close
答案 0 :(得分:0)
for fpath in glob.glob(os.path.join(dir_path, "*.html")):
with open(fpath) as html_file:
soup = BeautifulSoup(html_file)
html_title = soup.title.get_text()
html_number = os.path.basename(fpath).rsplit('.',1)[0]
with open(html_number + '.txt', 'w') as outfile:
outfile.write(html_title)