我试图从这个website中搜集一堆信息。
我需要逐个产品列表:
[名称,包装,价格/公斤]
因此,对于第一个,我的输出将是三个列表。
我设法在名单中获取名字。
from bs4 import BeautifulSoup
import csv
import urllib.request
from collections import *
import time
start_time = time.clock()
page_0=urllib.request.urlopen("http://www.zooplus.co.uk/shop/dogs/dry_dog_food/royal_canin_vet_diet")
soup_0 = BeautifulSoup(page_0.read(),"html.parser")
restricted_webpage=soup_0.find_all( "tbody", {"class":"product-variants-list"} )
readable_restricted_webpage=str(restricted_webpage)
soup_1 = BeautifulSoup(readable_restricted_webpage,"html.parser")
title=soup_1.find_all("tr", {"style":["background-color:#ffffff;", "background-color:#ededed;"]})
soup_title = BeautifulSoup(str(title),"html.parser")
name=[]
for b in soup_title.find_all("b"):
name.append(str(b).replace("<b>","").replace("</b>",""))
其他两个信息很难找到我。 我无法看到标签,我可以用来获得(包裹,价格/公斤)夫妇的标签。
我尝试使用多个标签,而我最接近的是("td", {"style":"background-color:#ededed;"},{"valign":"top"} )
,但我认为我应该以某种方式使用<span>
?
你会如何处理男人?
答案 0 :(得分:1)
from bs4 import BeautifulSoup
import urllib2
url = 'http://www.zooplus.co.uk/shop/dogs/dry_dog_food/royal_canin_vet_diet'
response = urllib2.urlopen(url)
html = response.read()
soup = BeautifulSoup(html,"html.parser")
product_list = soup.find_all("tbody", {"class":"product-variants-list"})
i=0
def visible(text):
lines = (line.strip() for line in text.splitlines())
# break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
# drop blank lines
text = ''.join(chunk for chunk in chunks if chunk)
return text
for product in product_list:
product_name = product.find("a",{"class":"follow3"}).find("b").text
print i, product_name
product_variants = product.find_all("tr",{"class":"product-variant"})
i +=1
for variant in product_variants:
product_weight = variant.find_all("td")[0].string
print '-'+product_weight,
print ' | ',
if variant.find_all("td")[1].find("span",{"class":"specialPrices"}) is not None:
product_price = variant.find_all("td")[1].find("span",{"class":"specialPrices"}).find("span",{"class":"smalltextPrices"}).get_text(" ", strip=True)
product_price_per_kg = visible(product_price)
else:
product_price = variant.find_all("td")[1].find("span",{"class":"smalltext"}).get_text(" ", strip=True)
product_price_per_kg = visible(product_price)
print product_price_per_kg
输出:
0 Royal Canin Veterinary Diet - Hypoallergenic DR 21
-- 7kg | (£4.84/ kg)
-- 14kg | (£3.71/ kg)
-- Economy Pack: 2 x 14kg | (£3.68/ kg)
1 Royal Canin Veterinary Diet - Sensitivity Control SC 21
-- 7kg | (£5.13/ kg)
-- 14kg | (£3.78/ kg)
-- Economy Pack: 2 x 14kg | (£3.71/ kg)
2 Royal Canin Veterinary Diet - Gastro Intestinal GI 25
-- 2kg | (£6.95/ kg)
-- 7.5kg | (£5.05/ kg)
-- 14kg | (£3.56/ kg)
-- Economy Pack: 2 x 14kg | (£3.50/ kg)
3 Royal Canin Veterinary Diet - Renal RF 14
-- 7kg | (£4.99/ kg)
-- 14kg | (£3.49/ kg)
-- Economy Pack: 2 x 14kg | (£3.43/ kg)
4 Royal Canin Veterinary Diet - Obesity Management DP 34
-- 6kg | (£4.82/ kg)
-- 14kg | (£3.56/ kg)
-- Economy Pack: 2 x 14kg | (£3.50/ kg)
5 Royal Canin Veterinary Diet - Urinary S/O LP 18
-- 7.5kg | (£5.05/ kg)
-- 14kg | (£3.56/ kg)
-- Economy Pack: 2 x 14kg | (£3.50/ kg)
6 Royal Canin Veterinary Diet - Mobility MS 25
-- 7kg | (£4.99/ kg)
-- 14kg | (£4.06/ kg)
-- Economy Pack: 2 x 14kg | (£3.93/ kg)
7 Royal Canin Veterinary Diet - Satiety Support SAT 30
-- 12kg | (£3.99/ kg)
-- Economy Pack: 2 x 12kg | (£3.91/ kg)
8 Royal Canin Veterinary Diet - Hepatic HF 16
-- 6kg | (£5.32/ kg)
-- 12kg | (£3.99/ kg)
-- Economy Pack: 2 x 12kg | (£3.91/ kg)
9 Royal Canin Veterinary Diet - Dental DLK 22
-- 14kg | (£4.14/ kg)
-- Economy Pack: 2 x 14kg | (£4.07/ kg)
10 Royal Canin Veterinary Diet - Diabetic DS 37
-- 7kg | (£4.99/ kg)
-- 12kg | (£3.91/ kg)
-- Economy Pack: 2 x 12kg | (£3.75/ kg)
11 Royal Canin Veterinary Diet - Calm CD 25
-- 4kg | (£6.72/ kg)
-- Economy Pack: 2 x 4kg | (£6.61/ kg)