我无法抓取多个页面,但我可以单页完成。
from urllib.request import urlopen
from bs4 import BeautifulSoup
file = "pyp.csv"
f = open(file, "w")
Headers = "product, description, img_url, price, amzn_link\n"
f.write(Headers)
for page in range(1,5):
url = "https://www.homerungifts.com/gift-ideas-for-mom/page/{}/".format(page)
html = urlopen(url)
soup = BeautifulSoup(html,"html.parser")
containers = soup.findAll("article", {"class":"repick_item small_post col_item inf_scr_item contain_im_grid"})
for container in containers:
for i in containers:
try:
product = i.container.div.div.a.text
description = i.container.div.div.p.text
img_url = i.container.a.img['data-src']
price = i.container.div.span.span.ins.text
amzn_link = i.container.findAll("a", {"class": "btn_offer_block re_track_btn"})[0]['href']
print("product: " + product + "\n")
print("description: " + str(description) + "\n")
print("img_url: " + str(img_url) +"\n")
print("price: " + str(price) + "\n")
print("amzn_link: " + str(amzn_link) + "\n")
f.write("{}".format(product) + ",{}".format(description).replace(",", " ")+ ",{}".format(img_url) + ",{}".format(price) + ",{}".format(amzn_link) + "\n")
except: AttributeError
f.close()
输出
F:\aaa\2>python ppp.py
F:\aaa\2>python ppp.py
F:\aaa\2>python ppp.py
F:\aaa\2>