我正在尝试解析页面中所有列表内容的数据,并将其存储在一组词典中。 我试过的一个程序
import re
import pytz
import requests
import datetime
from flask import url_for
from bs4 import BeautifulSoup
from urllib.parse import urljoin
matchinfor_link = "http://www.espncricinfo.com/ci/engine/match/index.html?date=2018-03-12"
r = requests.get(matchinfor_link)
matchinfor_html = r.text
soup = BeautifulSoup(matchinfor_html, "html.parser")
details = soup.find("div",{"class":"matches-container"})
matchinfor_dict = {}
least = []
count = 0
for div in details:
# try:
# details_div = div.find("div",{"class":"match-section-head"})
# except TypeError:
# continue
count+=1
print(count)
try:
sww = div.find("div",{"class":"match-section-head"})
except TypeError:
name = sww.find("h2").text.strip()
matchinfor_dict['name'] = name
least.append(matchinfor_dict)
matchinfor_dict = {}
print(least)
我最终得到属性错误和类型错误,我如何解析页面内容以获得输出
预期输出:
[{'name':'Twenty20 Internationals','Date':'Mar 12, 2018', 'place':' 4th Match at R Premadasa Stadium, Colombo (night)','team1':'Sri Lanka','team2':'India', 'time':'Match scheduled to begin at 19:00 local time '....................}]
答案 0 :(得分:0)
soup.find
不返回列表,而是返回不可迭代的单个对象。请考虑改为使用find_all
。
答案 1 :(得分:0)
这应该有所帮助。
import requests
import datetime
from bs4 import BeautifulSoup
matchinfor_link = "http://www.espncricinfo.com/ci/engine/match/index.html?date=2018-03-12"
r = requests.get(matchinfor_link)
matchinfor_html = r.text
soup = BeautifulSoup(matchinfor_html, "html.parser")
details = soup.find_all("section",{"class":"default-match-block"})
result = []
count = 0
for div in details:
matchinfor_dict = {}
c1 = div.find("div", {"class": "match-info"})
matchinfor_dict["Date"] = c1.find("span", {"class": "bold"}).text
matchinfor_dict["Place"] = c1.find("span", {"class": "match-no"}).a.text.strip()
matchinfor_dict["Team1"] = div.find("div", {"class": "innings-info-1"}).text.strip()
matchinfor_dict["Team2"] = div.find("div", {"class": "innings-info-2"}).text.strip()
matchinfor_dict["Time"] = div.find("div", {"class": "match-status"}).span.text.strip()
result.append(matchinfor_dict)
print(result)
<强>输出:强>
[{'Date': u'Mar 12, 2018', 'Place': u'4th Match at R Premadasa Stadium, Colombo (night)', 'Team1': u'Sri Lanka', 'Team2': u'India', 'Time': u'Match scheduled to begin at 19:00 local time'}, {'Date': u'Mar 9-13, 2018', 'Place': u"2nd Test at St George's Park, Port Elizabeth", 'Team1': u'Australia 243 & 131/4 (46 ov)', 'Team2': u'South Africa 382', 'Time': u'Drinks - Australia trail by 8 runs with 6 wickets remaining'}, {'Date': u'Mar 12, 2018', 'Place': u'20th Match, Group B at Queens Sports Club, Bulawayo', 'Team1': u'Zimbabwe', 'Team2': u'Scotland', 'Time': u'Match scheduled to begin at 09:30 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'18th Match, Group A at Old Hararians, Harare', 'Team1': u'Ireland', 'Team2': u'United Arab Emirates', 'Time': u'Match scheduled to begin at 09:30 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'1st ODI at Reliance Stadium, Vadodara', 'Team1': u'India Women', 'Team2': u'Australia Women', 'Time': u'Match scheduled to begin at 09:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'17th Match, Group A at Harare Sports Club', 'Team1': u'Netherlands', 'Team2': u'West Indies', 'Time': u'Match scheduled to begin at 09:30 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'19th Match, Group B at Bulawayo Athletic Club', 'Team1': u'Hong Kong', 'Team2': u'Nepal', 'Time': u'Match scheduled to begin at 09:30 local time'}, {'Date': u'Mar 9-12, 2018', 'Place': u'at Eden Park Outer Oval, Auckland', 'Team1': u'Central Districts 524', 'Team2': u'Auckland 205 & 152/5 (65 ov, f/o)', 'Time': u'Stumps - Auckland trail by 167 runs with 5 wickets remaining'}, {'Date': u'Mar 9-12, 2018', 'Place': u'at Cobham Oval, Whangarei', 'Team1': u'Canterbury 193 & 141/2 (42 ov)', 'Team2': u'Northern Districts 409/4d', 'Time': u'Stumps - Canterbury trail by 75 runs with 8 wickets remaining'}, {'Date': u'Mar 10-13, 2018', 'Place': u'at University Oval, Dunedin', 'Team1': u'Wellington 194 & 107/1 (44 ov)', 'Team2': u'Otago 289', 'Time': u'Stumps - Wellington lead by 12 runs with 9 wickets remaining'}, {'Date': u'Mar 12, 2018', 'Place': u'52nd match at Shere Bangla National Stadium, Mirpur', 'Team1': u'Sheikh Jamal', 'Team2': u'Khelaghar', 'Time': u'Match scheduled to begin at 09:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'53rd match at Bangladesh Krira Shikkha Protisthan No 3 Ground, Savar', 'Team1': u'Legends of Rupganj', 'Team2': u'Kalabagan Krira Chakra', 'Time': u'Match scheduled to begin at 09:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'54th match at Khan Shaheb Osman Ali Stadium, Fatullah', 'Team1': u'Brothers Union', 'Team2': u'Agrani Bank Cricket Club', 'Time': u'Match scheduled to begin at 09:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'Group C at Colombo Cricket Club Ground', 'Team1': u'Colombo Cricket Club', 'Team2': u'Police Sports Club', 'Time': u'Match scheduled to begin at 10:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'Group B at Colts Cricket Club Ground, Colombo', 'Team1': u'Colts Cricket Club', 'Team2': u'Chilaw', 'Time': u'Match scheduled to begin at 10:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'Group D at Bloomfield Cricket and Athletic Club Ground, Colombo', 'Team1': u'Kalutara Town Club', 'Team2': u'Bloomfield', 'Time': u'Match scheduled to begin at 10:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'Group B at Army Ground, Panagoda', 'Team1': u'Lankan Cricket Club', 'Team2': u'Sri Lanka Army Sports Club', 'Time': u'Match scheduled to begin at 10:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'Group D at Kadirana Cricket Grounds, Gampaha', 'Team1': u'Negambo Cricket Club', 'Team2': u'Ragama Cricket Club', 'Time': u'Match scheduled to begin at 10:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'Group C at Nondescripts Cricket Club Ground, Colombo', 'Team1': u'Nondescripts Cricket Club', 'Team2': u'Burgher Recreation Club', 'Time': u'Match scheduled to begin at 10:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'Group A at Surrey Village, Maggona', 'Team1': u'Panadura Sports Club', 'Team2': u'Saracens Sports Club', 'Time': u'Match scheduled to begin at 10:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'Group A at Sinhalese Sports Club Ground, Colombo', 'Team1': u'Sinhalese Sports Club', 'Team2': u'Moors Sports Club', 'Time': u'Match scheduled to begin at 10:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'Group C at Air Force Ground, Katunayake', 'Team1': u'SL Air SC', 'Team2': u'Badureliya Sports Club', 'Time': u'Match scheduled to begin at 10:00 local time'}, {'Date': u'Mar 12, 2018', 'Place': u'Group D at Navy Ground, Welisara', 'Team1': u'Sri Lanka Navy Sports Club', 'Team2': u'Tamil Union', 'Time': u'Match scheduled to begin at 10:00 local time'}]