我的问题涉及代码###底部容器的最后一部分:
以下网站包含17个“ productlist-item__bottom-container”,其中4个包含“ productlist-item__discount-text”
我想做什么:
“ productlist-item__bottom-container”中的所有容器
if "productlist-item__bottom-container" contains "productlist-item__discount-text"
store value in a list
else
store value "no text" in the list
感谢您的帮助。
from bs4 import BeautifulSoup
import pandas as pd
desired_width = 320
pd.set_option("display.width", desired_width)
pd.set_option("display.max_columns", 30)
chrome_path = r"C:\Users\Sebas\Desktop\chromedriver_win32 (1)\chromedriver.exe"
url = "https://www.nemlig.com/dagligvarer/husholdning/rengoering/opvaskemiddel/opvasketabs-pulver-til-maskine"
browser = webdriver.Chrome(chrome_path)
browser.get(url)
import time
time.sleep(5)
html = browser.page_source
soup = BeautifulSoup(html, "html.parser")
###Get the descriptions
all_descriptions = []
containers = soup.find_all("div", {"class":"productlist-item__info"})
for container in containers:
brand = container.text
all_descriptions.append(brand)
###Get the product name
all_productnames = []
productnames = soup.find_all("div", {"class":"productlist-item__name"})
for product in productnames:
productname = product.text
all_productnames.append(productname)
###Get the base price
all_basePrices = []
basePrices = soup.find_all("div", {"class":"pricecontainer__base-price"})
for price in basePrices:
x = price.text
all_basePrices.append(x)
###Get promo price
all_promoPrices = []
promoPrices = soup.find_all("div", {"class":"pricecontainer__campaign-price"})
for promoprice in promoPrices:
promoprice_ = promoprice.text
all_promoPrices.append(promoprice_)
###Bottom container
all_texts =[]
bottom_containers = soup.find_all("div", {"class":"productlist-item__bottom-container"})
for container in bottom_containers:
discountText = bottom_containers.find_all("div", {"class": "productlist-item__discount-text"})
if discountText != None :
text = discountText.text
all_texts.append(text)
else :
all_texts.append("No text")
print(all_texts)
Traceback (most recent call last):
File "C:/Users/Sebas/PycharmProjects/DePivotize/venv/Selenium.py", line 58, in <module>
discountText = bottom_containers.find_all("div", {"class": "productlist-item__discount-text"})
File "C:\Users\Sebas\PycharmProjects\DePivotize\venv\lib\site-packages\bs4\element.py", line 1602, in __getattr__
"ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key
AttributeError: ResultSet object has no attribute 'find_all'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?
[1]: https://www.nemlig.com/dagligvarer/husholdning/rengoering/opvaskemiddel/opvasketabs-pulver-til-maskine
答案 0 :(得分:1)
我不确切知道您要使用的discount text
是什么,但是它可能在json响应之内吗?
import requests
import json
from bs4 import BeautifulSoup
# Get coded timestamp for the api request url
url = 'https://www.nemlig.com/dagligvarer/husholdning/rengoering/opvaskemiddel/opvasketabs-pulver-til-maskine'
headers = {'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Mobile Safari/537.36'}
s = requests.Session()
response = s.get(url,headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
scripts = soup.find_all('script')
for script in scripts:
if 'contentAsJson =' in script.text:
jsonStr = script.text.strip().split('var contentAsJson = ')[-1][:-1]
jsonData = json.loads(jsonStr)
productStamp = jsonData['Settings']['ProductsImportedTimestamp']
publishStamp = jsonData['Settings']['SitecorePublishedStamp']
timeSlot = jsonData['Settings']['TimeslotUtc']
deliveryZoneId = jsonData['Settings']['DeliveryZoneId']
zipcode = jsonData['Settings']['ZipCode']
productGroupId = jsonData['content'][0]['ProductGroupId']
# Access data from API
req_url = 'https://www.nemlig.com/webapi/%s-%s/%s/%s/%s/Products/GetByProductGroupId' %(productStamp,publishStamp, timeSlot, deliveryZoneId,zipcode)
payload = {
'pageindex': '-1',
'pagesize': '-1',
'productGroupId': '%s' %(productGroupId)}
jsonData = s.get(req_url, headers=headers,params=payload).json()
products = jsonData['Products']
for each in products:
if each['DiscountItem'] == True:
try:
print (each['CampaignAttribute'])
except:
pass
print (each['Campaign'], '\n')
输出:
Fast mixtilbud
{'MinQuantity': 4, 'MaxQuantity': 0, 'TotalPrice': 90.0, 'CampaignPrice': 90.0, 'CampaignUnitPrice': None, 'Type': 'ProductCampaignBuyXForY', 'DiscountSavings': 29.8}
{'MinQuantity': 3, 'MaxQuantity': 0, 'TotalPrice': 89.95, 'VariousPriceProductsCampaign': True, 'CampaignPrice': 89.95, 'CampaignUnitPrice': None, 'Type': 'ProductCampaignMixOffer', 'DiscountSavings': 47.9}
您还可以将所有信息放入通过json响应迭代的表中:
import requests
import json
from bs4 import BeautifulSoup
import pandas as pd
s = requests.Session()
# Get coded timestamp for the api request url
url = 'https://www.nemlig.com/dagligvarer/husholdning/rengoering/opvaskemiddel/opvasketabs-pulver-til-maskine'
headers = {'user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Mobile Safari/537.36'}
response = s.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
scripts = soup.find_all('script')
for script in scripts:
if 'contentAsJson =' in script.text:
jsonStr = script.text.strip().split('var contentAsJson = ')[-1][:-1]
jsonData = json.loads(jsonStr)
productStamp = jsonData['Settings']['ProductsImportedTimestamp']
publishStamp = jsonData['Settings']['SitecorePublishedStamp']
timeSlot = jsonData['Settings']['TimeslotUtc']
deliveryZoneId = jsonData['Settings']['DeliveryZoneId']
zipcode = jsonData['Settings']['ZipCode']
productGroupId = jsonData['content'][0]['ProductGroupId']
# Access data from API
req_url = 'https://www.nemlig.com/webapi/%s-%s/%s/%s/%s/Products/GetByProductGroupId' %(productStamp,publishStamp, timeSlot, deliveryZoneId,zipcode)
payload = {
'pageindex': '-1',
'pagesize': '-1',
'productGroupId': '%s' %(productGroupId)}
df = pd.DataFrame()
jsonData = s.get(req_url, headers=headers, params=payload).json()
products = jsonData['Products']
for each in products:
temp_df = pd.DataFrame([each])
df = df.append(temp_df, sort=True).reset_index(drop=True)
输出:
print (df.to_string())
Availability Brand Campaign CampaignAttribute Category Description DiscountItem Id Labels Name Price PrimaryImage ProductCategoryGroupName ProductCategoryGroupNumber ProductMainGroupName ProductMainGroupNumber ProductSubGroupName ProductSubGroupNumber SaleBeforeLastSalesDate Score SearchDescription TemplateName UnitPrice UnitPriceCalc UnitPriceLabel Url
0 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Ecover None NaN Maskinopvask 25 stk. / zero / Ecover False 5039333 [Svanemærket] Opvasketabs all in one 44.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 1,80 kr./Stk. 1.80 kr./Stk. opvasketabs-all-in-one-5039333
1 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Finish None NaN Maskinopvask 60 stk. / Quantum / Finish False 5038266 [] Maskinopvask (tabs) 99.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS NEOPHOS] productlistitem 1,67 kr./Stk. 1.67 kr./Stk. maskinopvask-tabs-5038266
2 {'IsDeliveryAvailable': True, 'IsAvailableInSt... First Price {'MinQuantity': 4, 'MaxQuantity': 0, 'TotalPri... Fast mixtilbud Maskinopvask 40 stk. / First Price True 102160 [Discount] Maskinopvasketabs 29.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 0,75 kr./Stk. 0.75 kr./Stk. maskinopvasketabs-102160
3 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Grøn Balance {'MinQuantity': 4, 'MaxQuantity': 0, 'TotalPri... Fast mixtilbud Maskinopvask 30 stk. / Grøn Balance False 5012162 [Den Blå Krans, Parfumefri, Svanemærket] Maskinopvask alt-i-en (tabs) 32.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 1,10 kr./Stk. 1.10 kr./Stk. maskinopvask-alt-i-en-tabs-5012162
4 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos None NaN Maskinopvask 2,50 kg / Neophos False 101638 [] Maskinopvask (pulver) 79.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, pulver og flydende 4800600005 0 3.433886 [MASKINOPVASK ] productlistitem 31,98 kr./Kg. 31.98 kr./Kg. maskinopvask-pulver-101638
5 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos None NaN Maskinopvask 0,65 l / Eco / Neophos False 5008215 [Svanemærket] Maskinopvask 0% (flydende) 79.00 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, pulver og flydende 4800600005 0 3.433886 [MASKINOPVASK ] productlistitem 121,54 kr./Ltr. 121.54 kr./Ltr. maskinopvask-0-flydende-5008215
6 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos {'MinQuantity': 3, 'MaxQuantity': 0, 'TotalPri... NaN Maskinopvask 1 stk. / Neophos True 5009595 [Discount] Odour Stop 2-in-1 33.00 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Tilbehør til opvaskemaskine 4800600004 0 3.433886 [LUGTFJERNER] productlistitem 33,00 kr./Stk. 33.00 kr./Stk. odour-stop-2-in-1-5009595
7 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos {'DiscountSavings': 79.95, 'MaxQuantity': 0, '... NaN Maskinopvask 70 stk. / All-in-1 / Neophos False 5021615 [] Maskinopvask (tabs) 199.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 2,86 kr./Stk. 2.86 kr./Stk. maskinopvask-tabs-5021615
8 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos {'DiscountSavings': 20.0, 'MaxQuantity': 0, 'C... NaN Maskinopvask 25 stk. / All-in-1 / Neophos False 5025942 [] Maskinopvask (tabs) 79.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 2,96 kr./Stk. 2.96 kr./Stk. maskinopvask-tabs-5025942
9 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos None NaN Maskinopvask 40 stk. / Classic Powerball / Neophos False 5025945 [] Maskinopvask (tabs) 89.00 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 2,23 kr./Stk. 2.22 kr./Stk. maskinopvask-tabs-5025945
10 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos {'DiscountSavings': 50.0, 'MaxQuantity': 0, 'C... NaN Maskinopvask 50 stk. / All-in-1 / Neophos False 5025966 [] Maskinopvask (tabs) 149.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 149,95 kr./Stk. 149.95 kr./Stk. maskinopvask-tabs-5025966
11 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos None NaN Maskinopvask 60 stk. / Neophos Quantum False 5031592 [] Maskinopvaskemiddel (tabs) 163.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 2,73 kr./Stk. 2.73 kr./Stk. maskinopvaskemiddel-tabs-5031592
12 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos {'DiscountSavings': 20.0, 'MaxQuantity': 0, 'C... NaN Maskinopvask 22 stk. / Neophos Quantum False 5031593 [] Maskinopvaskemiddel (tabs) 79.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 3,63 kr./Stk. 3.63 kr./Stk. maskinopvaskemiddel-tabs-5031593
13 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos {'DiscountSavings': 60.0, 'MaxQuantity': 0, 'C... NaN Maskinopvask 45 stk. / Neophos Quantum False 5031594 [] Maskinopvaskemiddel (tabs) 149.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 3,33 kr./Stk. 3.33 kr./Stk. maskinopvaskemiddel-tabs-5031594
14 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos None NaN Maskinopvask 65 stk. / Neophos False 5042520 [] Maskinopvask 0% 179.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 2,77 kr./Stk. 2.77 kr./Stk. maskinopvask-0-5042520
15 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Neophos {'DiscountSavings': 19.05, 'MaxQuantity': 0, '... Fast mixtilbud Maskinopvask 110 stk. / Classic Powerball / Neophos False 5043729 [] Maskinopvask (tabs) 119.00 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 1,08 kr./Stk. 1.08 kr./Stk. maskinopvask-tabs-5043729
16 {'IsDeliveryAvailable': True, 'IsAvailableInSt... Sun None NaN Maskinopvask 70 stk. / Sun False 5028082 [] Maskinopvask (tabs) 79.95 https://live.nemligstatic.com/scommerce/images... Opvask 4800600000 Husholdning og dyremad 4800000000 Maskinopvask, tabs 4800600002 0 3.433886 [OPVASKETABS] productlistitem 1,14 kr./Stk. 1.14 kr./Stk. maskinopvask-tabs-5028082