使用Beautifulsoup-Python解决问题的Harvey Norman问题

时间:2019-12-29 04:30:59

标签: python web-scraping beautifulsoup

“名称”列表比“价格”列表长。这是由于Harvy Norman中的某些商品没有价格Inspect element at Harvey Norman。我该如何点燃没有价格的物品?例如,不将项目名称附加在列表“名称”中。

from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
import time

name = []
price = []


for i in range(1, 100):

    url = 'https://www.harveynorman.com.au/catalogsearch/result/index/?p='  + str(i) + '&q=game'
    print(url)


    options = Options()
    options.add_argument('--headless')

    driver = webdriver.Firefox(options=options)
    driver.get(url)

    soup = BeautifulSoup(driver.page_source, 'lxml')

    ii = 0

    for item in soup.findAll("a", {'class': 'name fn l_mgn-tb-sm l_dsp-blc'}):

        ii = ii + 1
        name.append(item.get_text(strip=True))

    for item in soup.findAll(["div"], {'class': ['product-item']}, limit = ii):
        aia = item.get_text(strip=True)
        #aia = aia[1:]
        price.append(aia)

    driver.close()

1 个答案:

答案 0 :(得分:1)

import requests
import csv

output = []
for page in range(1, 11):
    print(f"Extracting Page# {page}")
    r = requests.get(
        f"https://www.harveynorman.com.au/catalogsearch/tracking/search?p={page}&q=game&format=json").json()
    for item in r['results']:
        if item['price'] != "":
            result = item['title'], item['price']
            output.append(result)

with open('result.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['Name', 'Price'])
    writer.writerows(output)