Question

我有一个python程序，可以从HTML文件中提取信息，但我希望它作为json文件转储。

import glob
import json
from bs4 import BeautifulSoup

for filename in glob.iglob('*.html'):
    with open(filename) as f:
        soup = BeautifulSoup(f)

        price = soup.findAll('span', {"class":'bb_price'})
        title = soup.find("span", id="btAsinTitle")
        author = title.find_next("a", href=True)
        isbn = soup.find('b', text='ISBN-10:').next_sibling
        weight = soup.find('b', text='Shipping Weight:').next_sibling

        print {'title': title.get_text(),
               'author': author.get_text(),
               'isbn': isbn,
               'weight': weight,
               'price': price}

Answer 1

with open(output_filename, 'w') as f
    json.dump(data, f)

请参阅https://docs.python.org/2/library/json.html#json.dump

Answer 2

类似的东西：

import glob
import json
from bs4 import BeautifulSoup

def main():
    data = []
    for filename in glob.iglob('*.html'):
        with open(filename) as f:
            soup = BeautifulSoup(f)

            title = soup.find("span", id="btAsinTitle")
            data.append({
                "title":  title.get_text(),
                "author": title.find_next("a", href=True).get_text(),
                "isbn":   soup.find('b', text='ISBN-10:').next_sibling,
                "weight": soup.find('b', text='Shipping Weight:').next_sibling,
                "price":  soup.findAll('span', {"class":'bb_price'})
            })

    with open("my_output.json", "w") as outf:
        json.dump(data, outf)

main()

如何将数据从python转储为JSON

2 个答案: