from bs4 import BeautifulSoup
import requests
for count in range(1,3):
r = requests.get('https://www.indiancattle.com/directory-page/page/'+
str(count)+'/? ds&type=29&st&dis&dshid=1&dssearch=SEARCH#038;type=29&st&dis&dshid=1&dssearch=
SEARCH')
soup = BeautifulSoup(r.text,'lxml')
for link in soup.find_all('a',{"rel":"bookmark"}):
link1 =link.get('href')
r = requests.get(link1)
soup = BeautifulSoup(r.text,'lxml')
try:
name = soup.find('h1',class_='entry-title')
print "NAME:"+name.text
except:
"NAME:NA"
try:
Type = soup.find('strong',string='Type:').next_sibling
print "TYPE:"+Type.text
except:
"TYPE:NA"
try:
Address = soup.find('strong',string='Address:').next_sibling
print "ADDRESS:"+Address
except:
print"ADDRESS:NA"
try:
District = soup.find('strong',string='District:').next_sibling
print "DISTRICT:"+District
except:
print"DISTRICT:NA"
try:
State = soup.find('strong',string='State:').next_sibling
print "STATE:"+State
except:
print"STATE:NA"
try:
Pin = soup.find('strong',string='Pin Code:').next_sibling
print "PIN:"+Pin
except:
print"PIN:NA"
try:
Mobile = soup.find('strong',string='Mobile 1: ').next_sibling
print "MOBILE:"+Mobile
except:
print "MOBILE:NA"
try:
Mobile1 = soup.find('strong',string='Mobile 2: ').next_sibling
print "MOBILE1:"+Mobile1
except:
print"MOBILE1:NA"
try:
reg_No = soup.find('strong',string='Registration Number:').next_sibling
print "REG:"+reg_No
except:
print"REG:NA"
try:
Exper = soup.find('strong',string='Years:').next_sibling
print "EXP:"+Exper
except:
print"EXP:NA"
try:
Email = soup.find('strong',string='PersonalEmail:').next_sibling
print "EMAIL:"+Email
except:
print"EMAIL:NA"
答案 0 :(得分:0)
电子邮件是使用JS动态加载的,因此您必须使用硒,检查下面的代码...
SELECT * FROM account_data WHERE account_id is null
输出:
from bs4 import BeautifulSoup
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.indiancattle.com/directory/dr-mandeep-tajinder-kaur/')
soup = BeautifulSoup(driver.page_source,'lxml')
Email = soup.find(text='Personal Email:').findNext('a').text
Address = soup.find(text='Address:').next
print('Email: {}\nAddress: {}'.format(Email,Address))
答案 1 :(得分:0)
具有属性=值选择器的硒,易于阅读
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.indiancattle.com/directory/dr-mandeep-tajinder-kaur/')
email = driver.find_element_by_css_selector('[href^=mailto]').text
print(email)