Question

我正在用Web抓取Beautiful Soup，在第13行出现错误：table.findAll（'tr'）中的行。它在cmd上出现错误。希望有人可以帮忙。

import csv
import requests
from bs4 import BeautifulSoup 

url='http://www.dublincity.ie/dublintraffic/carparks.htm'
response = requests.get(url)
html= response.content

soup=BeautifulSoup(html)
table=soup.find('tbody', attrs={'id' :'itemsBody'})

list_of_rows=[]
for row in table.findAll('tr'):
    list_of_cells=[]
    for cell in row.findAll('td'):
        text = cell.text.replace('&nbsp;','')
        list_of_cells.append(text)
    list_of_cells.append(list_of_cells)

outfile= open("./carpark.csv", "wb")
writer=csv.writer(outfile)
writer.writerows(["location","spaces"])
writer.writerows(list_of_rows)

Answer 1

您可以将数据检索为xml文档，然后进行解析。这只是您可以定制的一部分流程的示例。

import requests
from xml.etree import ElementTree
import pandas as pd
url = 'http://www.dublincity.ie/dublintraffic/cpdata.xml?1543254514266'
xml_data = requests.get(url).content
tree = ElementTree.fromstring(xml_data)
parking = []
for child in tree:
    for nextChild in child:
        parking.append([child.tag ,nextChild.attrib['name'],nextChild.attrib['spaces']])
df = pd.DataFrame(parking)
print(df)
df.to_csv(r'C:\Users\User\Desktop\Data.csv', sep=',', encoding='utf-8',index = False )

Answer 2

如果您想坚持使用BeautifulSoup，则可以使用其xml解析器和csv.DictWriter()来获取和写入内容。查看实现：

import csv
import requests
from bs4 import BeautifulSoup

url = 'http://www.dublincity.ie/dublintraffic/cpdata.xml?1543254514266'

res = requests.get(url)
soup = BeautifulSoup(res.content,"xml")
data = []
for item in soup.select("carpark"):
    ditem = {}
    ditem['Name'] = item.get("name")
    ditem['Spaces'] = item.get("spaces")
    data.append(ditem)

with open("xmldocs.csv","w",newline="") as f:
    writer = csv.DictWriter(f,["Name","Spaces"])
    writer.writeheader()
    for info in data:
        writer.writerow(info)

与表BeautifulSoup

2 个答案: