这是我的代码。在这里,我试图一一提取元素,但没有得到相关答案。有人可以帮我吗。
from bs4 import BeautifulSoup
import requests
####/// for data
r = requests.get('https://www.yelu.in/company/911002/abntravels')
soup = BeautifulSoup(r.text,'lxml')
data = soup.find('div',{"id":"company_item"})
for info1 in data.find_all('div',class_='info')[0]:
c_name = info1.find('span',{'id':'company_name'})
print(c_name)
for info2 in data.find_all('div',class_='info')[1]:
c_add = info2.find('div',class_='text location')
print(c_add)
for info3 in data.find_all('div',class_='info')[2]:
phone = info3.find('div',class_='text phone')
print(phone)
for info4 in data.find_all('div',class_='info')[3]:
mob = info4.find('div',class_='text')
print(mob.text)
for info5 in data.find_all('div',class_='info')[4]:
Fax = info5.find('div',class_='text')
print(Fax.text)
for info6 in data.find_all('div',class_='info')[5]:
Website = info6.find('div',class_='text weblinks')
print(Website.text)
for info7 in data.find_all('div',class_='info')[6]:
Year = info7.findNextSibling('span',class_='label')
print(year)
for info8 in data.find_all('div',class_='info')[7]:
employees = info8.findNextSibling('span',class_='label')
print(employees)
for info9 in data.find_all('div',class_='info')[7]:
manager = info9.findNextSibling('span',class_='label')
print(manager)
前3个元素我得到None
。从下一个元素开始,我得到以下错误:AttributeError: 'NoneType' object has no attribute 'text'
。对于最后3个元素,再次是AttributeError
错误。
答案 0 :(得分:1)
没有id的元素很难抓取。但是您不需要一直进行迭代。我简化了您的解决方案。
from bs4 import BeautifulSoup
import requests
headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'}
r = requests.get('https://www.yelu.in/company/911002/abntravels',headers=headers)
soup = BeautifulSoup(r.text,'lxml')
company = {
"company_name" : soup.select_one('#company_name').text,
"address" : soup.select_one('div.text.location').text,
"phone" : soup.select_one('div.text.phone').text,
"mobile_phone" : soup.find('div',string = "Mobile phone").find_next_sibling('div').text,
"fax": soup.find('div',string = "Fax").find_next_sibling('div').text,
"website" : soup.find('div',string = "Website").find_next_sibling('div').text,
"year" :soup.find('span',string = "Establishment year").next_sibling,
"employees" :soup.find('span',string = "Employees").next_sibling,
"manager" :soup.find('span',string = "Company manager").next_sibling
}
print(company)
结果是
{'company_name': 'ABN Travels & Vacation Pvt Ltd', 'address': 'Wave Silver Tower, F-410 4th Floor SECTOR-18 NOIDA-201301 INDIA, NOIDA, Uttar Pradesh', 'phone': '9910007715', 'mobile_phone': '0120 - 2516781', 'fax': '+91 - 120 - 2516785', 'website': 'www.abntravels.com', 'year': ' 1999', 'employees': ' 26-50', 'manager': ' Deepak Batra'}