只有第一个结果被写入csv,每行有一个url字母。这不是所有写的网址,而是每行一个。
我在这段代码的最后一部分做的是什么,导致cvs只用其中一个结果而不是所有结果写出来?
import requests
from bs4 import BeautifulSoup
import csv
def grab_listings():
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/2/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/3/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/4/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/5/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/6/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/7/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/8/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/9/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
l = grab_listings()
with open ("gyms.csv", "wb") as file:
writer = csv.writer(file)
for row in l:
writer.writerow(row)
答案 0 :(得分:1)
所以我稍微重构了你的代码,我认为它应该像你现在所期望的那样工作:
import requests
from bs4 import BeautifulSoup
import csv
def grab_listings(page_idx):
ret = []
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/"
"page/{}/").format(page_idx) # the index of the page will be inserted here
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class": "wlt_search_results"})
for elem in l_area.findAll("a", {"class": "frame"}):
# be sure to add all your results to a list and return it,
# if you return here then you will only get the first result
ret.append(elem["href"])
return ret
def main():
l = [] # this will be a list of lists
# call the function 9 times here with idx from 1 till 9
for page_idx in range(1, 10):
l.append(grab_listings(page_idx))
print l
with open("gyms.csv", "wb") as f:
writer = csv.writer(f)
for row in l:
# be sure that your row is a list here, if it is only
# a string all characters will be seperated by a comma.
writer.writerow(row)
# for writing each URL in one line separated by commas at the end
# with open("gyms.csv", "wb") as f:
# for row in l:
# string_to_write = ',\n'.join(row)
# f.write(string_to_write)
if __name__ == '__main__':
main()
我在代码中添加了一些注释,希望它足够解释。如果不只是问:)
答案 1 :(得分:0)
简化为:
import requests
from bs4 import BeautifulSoup
import csv
def grab_listings():
for i in range(0, 5):
url = "http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/{}/"
r = requests.get(url.format(i + 1))
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class": "wlt_search_results"})
for elem in l_area.findAll("a", {"class": "frame"}):
yield elem["href"]
l = grab_listings()
with open("gyms.csv", "w") as file:
writer = csv.writer(file)
for row in l:
writer.writerow(row)