我m making a python web scrapper for a project, It
得到了我想要的所有信息,但是唯一的问题是他在没有其他人的情况下就为第一个个人资料进行了
我试图找出问题所在,但被卡住了,任何建议都会有所帮助
import requests
import pandas
from bs4 import BeautifulSoup
base_url = "https://www.ratemds.com/best-doctors/?page=1"
for page in range(1, 2, 1):
r = requests.get(base_url)
c = r.content
soup = BeautifulSoup(c, 'html.parser')
all = soup.find_all("div", {"class": "search-item doctor-profile"})
l = []
for item in all:
d = {}
d["Name"] = item.find("a", {"class": "search-item-doctor-link"}).text
d["Phone Number"] = item.find("div", {"class": "search-item-specialty"}).text
n = item.find("a", {"class": "search-item-doctor-link"})
a = n.get('href')
new_url = ("https://www.ratemds.com"+a)
r1 = requests.get(new_url)
c1 = r1.content
soup1 = BeautifulSoup(c1, 'html.parser')
sve = soup1.find_all("div", {"class": "col-sm-3 col-md-4 search-item-extra"})
for profil in sve:
try:
d["Phone Number"] = profil.find("meta", itemprop = "telephone")["content"]
except:
d["Phone Number"] = None
try:
d["Adress"] = profil.find("meta", itemprop = "streetAddress")["content"]
except:
d["Adress"] = None
try:
d["Website"] = profil.find("a", itemprop = "sameAs")["href"]
except:
d["Website"] = None
pass
l.append(d)
df = pandas.DataFrame(l)
df.to_csv("123.csv")
print(df)
答案 0 :(得分:1)
这是经过一些调整的代码:
base_url = "https://www.ratemds.com/best-doctors/?page={}" # Change base url to this
# Moved the list of dicts outsided of the main loop
l = []
for page in range(1, 5):
r = requests.get(base_url.format(page)) # substitute 'page' variable in base_url
c = r.content
soup = BeautifulSoup(c, 'html.parser')
all = soup.find_all("div", {"class": "search-item doctor-profile"})
for item in all:
d = {}
d["Name"] = item.find("a", {"class": "search-item-doctor-link"}).text
d["Phone Number"] = item.find("div", {"class": "search-item-specialty"}).text
n = item.find("a", {"class": "search-item-doctor-link"})
a = n.get('href')
new_url = ("https://www.ratemds.com"+a)
r1 = requests.get(new_url)
c1 = r1.content
soup1 = BeautifulSoup(c1, 'html.parser')
sve = soup1.find_all("div", {"class": "col-sm-3 col-md-4 search-item-extra"})
for profil in sve:
try:
d["Phone Number"] = profil.find("meta", itemprop = "telephone")["content"]
except:
d["Phone Number"] = None
try:
d["Adress"] = profil.find("meta", itemprop = "streetAddress")["content"]
except:
d["Adress"] = None
try:
d["Website"] = profil.find("a", itemprop = "sameAs")["href"]
except:
d["Website"] = None
pass
l.append(d) # indented this line to append within this loop
df = pd.DataFrame(l)
df.to_csv("123.csv")