import requests
from bs4 import BeautifulSoup
res = requests.get('https://www.amcham.com.au/web/Events/Web/Events/Upcoming_Events.aspx?hkey=6f098583-ca3d-4a6f-87de-cd4f13d50b11')
soup = BeautifulSoup(res.text,"lxml")
event_title = soup.find('span', {'class': 'eventTitle'})
event_location = soup.find('span', {'class': 'city'})
event_place = soup.find('span', {'class': 'place'})
event_date = soup.find('span', {'class': 'eventDate'})
print(event_title.text)
print(event_place.text,event_location.text)
print(event_date.text)
我使用此代码从网站上提取即将发生的事件标题,位置,日期信息。
我期待在整个系列中循环关注网站上的活动并提取活动标题,地点,地点,日期信息,有人可以帮我一样吗?
答案 0 :(得分:4)
您需要使用findAll
获取每个的完整列表:
event_title = [i.text for i in soup.findAll('span', {'class': 'eventTitle'})]
event_location = [i.text for i in soup.findAll('span', {'class': 'city'})]
event_place = [i.text for i in soup.findAll('span', {'class': 'place'})]
event_date = [i.text for i in soup.findAll('span', {'class': 'eventDate'})]
答案 1 :(得分:1)
另一种方法可能如下所示:
import requests
from bs4 import BeautifulSoup
res = requests.get('https://www.amcham.com.au/web/Events/Web/Events/Upcoming_Events.aspx?hkey=6f098583-ca3d-4a6f-87de-cd4f13d50b11')
soup = BeautifulSoup(res.text,"lxml")
for item in soup.find_all(class_=["rgRow","rgAltRow"]):
event_title = item.find(class_='eventTitle').text
event_location = item.find(class_='city').text
event_place = item.find(class_='place').text
event_date = item.find(class_='eventDate').text
print(event_title,event_location,event_place,event_date)
答案 2 :(得分:1)
使用单select
个查询的优化解决方案:
import requests, pprint
import collections
from bs4 import BeautifulSoup
res = requests.get('https://www.amcham.com.au/web/Events/Web/Events/Upcoming_Events.aspx?hkey=6f098583-ca3d-4a6f-87de-cd4f13d50b11')
soup = BeautifulSoup(res.text,"lxml")
event_data = collections.defaultdict(list)
for el in soup.select('span.eventTitle, span.city, span.place, span.eventDate'):
event_data[el['class'][0]].append(el.text)
pprint.pprint(dict(event_data))
漂亮输出:
{'city': ['The Rocks, NSW',
'Brisbane, QLD',
'Sydney, NSW',
'Richmond, VIC',
'Adelaide, SA',
'Perth, WA',
'Melbourne, VIC',
'Pyrmont, NSW',
'South Wharf, VIC'],
'eventDate': ['Mon22Jan',
'Mon5Feb',
'Fri9Feb',
'Tue20Feb',
'Fri23Feb',
'Thu8Mar',
'Wed14Mar',
'Thu15Mar',
'Mon19Mar',
'Tue20Mar',
'Wed21Mar',
'Fri23Mar',
'Wed30May'],
'eventTitle': ['AMCHAM & UNITED AIRLINES PRESENT',
'Super Bowl LII',
'AMCHAM SUPER BOWL LII - SYDNEY',
'SUPER BOWL LII NETWORKING EVENT',
'MR SANJEEV GUPTA',
'Meet the Minister Luncheon with The Hon. Alannah MacTiernan',
'ADVANCING WOMEN IN LEADERSHIP',
'GLOBAL CITIZENS: DRIVING THE FUTURE OF EXPERIENCE',
"INTERNATIONAL WOMEN'S DAY",
'THE EXECUTIVE SPIN ON SERVICE',
'TOLL GROUP',
'THE SCIENCE BEHIND LEADERSHIP',
'PEAK PERFORMANCE LEADERSHIP SUMMIT',
'WORLD BUSINESS FORUM: TWO-DAY EVENT'],
'place': ['Shangri-La Hotel',
'The Pav Bar',
'Hotel CBD',
'Richmond Football Club',
'InterContinental',
'CBD Venue',
'Karstens',
'Sydney CBD',
'Plaza Ballroom',
'RACV Club',
'The Star',
'Melbourne Convention Centre']}