我阅读了bs4文档,但无法找到一种方法从下面的HTML中仅提取发言人的姓名及其公司名称(例如,名称= Jonathan Tan,标题=常务董事)。有人可以帮我吗?
此外,虽然我已学习了一些基础知识,但是提高我从HTML中选择和提取信息的能力的最佳方法是什么?
ws.Range("X1").Value = arr(0,0)
ws.Range("B2:B100").Find(ws.Range("X1").Value).Select
#generate a list of useful urls
url_list = []
url = "http://www.fccsingapore.com/events/upcoming-events"
webpage_response = requests.get(url)
webpage = webpage_response.content
soup = BeautifulSoup(webpage, "html.parser")
all_href = soup.find_all("a")
for link in all_href:
if "http://www.fccsingapore.com/events" in link.get("href"):
url_list.append(link.get("href"))
counter = 0
for i in url_list:
counter += 1
print("The program has " + str(counter) + " events to output.")
#extract useful information from each link
for link in url_list[:1]:
webpage_response = requests.get(link)
event = BeautifulSoup(webpage_response.content, "html.parser")
title = event.find("h1").get_text()
date_and_time = event.find("div", attrs={"class":"field field-name-event-date-formated field-type-ds field-label-above"})
date_time = date_and_time.find("div", attrs={"class":"field-item even"})
event_date = date_time.text[:11]
event_time = date_time.text[12:]
address_details = event.find("div", attrs={"class":"field field-name-field-address field-type-text-long field-label-above"})
address = address_details.find("div", attrs={"field-item even"}).get_text()
reg_details = event.find("div", attrs={"class":"field field-name-event-reg-date-format field-type-ds field-label-above"})
registration = reg_details.find("div", attrs={"class":"field-item even"}).get_text()
reg_start = registration[:11]
reg_end = registration[13:]
for detail in event.find_all("ul"):
details =detail.find("li")
print(details)