我有一个python程序,可以从HTML文件中提取信息,但我希望它作为json文件转储。
import glob
import json
from bs4 import BeautifulSoup
for filename in glob.iglob('*.html'):
with open(filename) as f:
soup = BeautifulSoup(f)
price = soup.findAll('span', {"class":'bb_price'})
title = soup.find("span", id="btAsinTitle")
author = title.find_next("a", href=True)
isbn = soup.find('b', text='ISBN-10:').next_sibling
weight = soup.find('b', text='Shipping Weight:').next_sibling
print {'title': title.get_text(),
'author': author.get_text(),
'isbn': isbn,
'weight': weight,
'price': price}
答案 0 :(得分:3)
with open(output_filename, 'w') as f
json.dump(data, f)
答案 1 :(得分:1)
类似的东西:
import glob
import json
from bs4 import BeautifulSoup
def main():
data = []
for filename in glob.iglob('*.html'):
with open(filename) as f:
soup = BeautifulSoup(f)
title = soup.find("span", id="btAsinTitle")
data.append({
"title": title.get_text(),
"author": title.find_next("a", href=True).get_text(),
"isbn": soup.find('b', text='ISBN-10:').next_sibling,
"weight": soup.find('b', text='Shipping Weight:').next_sibling,
"price": soup.findAll('span', {"class":'bb_price'})
})
with open("my_output.json", "w") as outf:
json.dump(data, outf)
main()