动态网页抓取

时间:2021-01-05 12:59:17

标签: selenium web-scraping selenium-chromedriver

我正在尝试从一个本地网站获取产品名称及其价格。为此,我使用硒。但是,我一直坚持获取产品名称和价格,这可能是因为某些元素只有将鼠标悬停在它们上时才可用。您能否为这个问题提出一个解决方案。

谢谢!

import os
import path
import requests
from bs4 import BeautifulSoup
import pandas as pd
from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from requests_html import HTMLSession
from tqdm import tqdm
import time

options = Options()
# options.headless = True
options.add_argument("--start-maximized")

url = "https://www.mechta.kz/section/stiralnye-mashiny/?setcity=al&arrFilter5_pf%5BNEW%5D=&arrFilter5_pf%5BARFP%5D=43843%2C43848&arrFilter5_pf%5BPROMOCODE_PROCENT%5D%5BLEFT%5D=&arrFilter5_pf%5BPROMOCODE_PROCENT%5D%5BRIGHT%5D=&arrFilter5_pf%5BMINPRICE_s1%5D%5BLEFT%5D=38990&arrFilter5_pf%5BMINPRICE_s1%5D%5BRIGHT%5D=1171000&set_filter=%D0%A4%D0%B8%D0%BB%D1%8C%D1%82%D1%80&set_filter=Y"
driver = webdriver.Chrome(r"C:\Users\Adlet\Documents\Prices\chromedriver.exe", options=options)
driver.implicitly_wait(15)
driver.get(url)

driver.find_element_by_xpath('//*[@id="q-app"]/div/div[1]/main/div/div/div/div/div/div[4]/div[1]/div/div/div[3]/div/div[2]/div/div[6]/a/span').click()
print('clicked')
driver.implicitly_wait(15)
driver.find_element_by_xpath('//*[@id="q-app"]/div/div[1]/main/div/div/div/div/div/div[4]/div[1]/div/div/div[3]/div/div[2]/div/div[9]/div/div[1]').click()
print('LG')
driver.implicitly_wait(15)
driver.find_element_by_xpath('//*[@id="q-app"]/div/div[1]/main/div/div/div/div/div/div[4]/div[1]/div/div/div[4]/div/div[2]/div/div[6]/a/span').click()
print('clicked')
driver.implicitly_wait(15)
driver.find_element_by_xpath('//*[@id="q-app"]/div/div[1]/main/div/div/div/div/div/div[4]/div[1]/div/div/div[4]/div/div[2]/div/div[10]/div/div[1]').click()
print('Samsung')
driver.implicitly_wait(15)


final_output = []

x = 1
condition = True
while condition:
    driver.find_element_by_xpath('//*[@id="q-app"]/div/div[1]/main/div/div/div/div/div/div[4]/div[2]/div[2]/div[1]/button/span[2]/span').click()
    sleep(1)
    print('Next page', x )
    x = x + 1
    if 6 <= x <= 7:
         condition = False

### Getting model name and price information

AllInfo = []
info = driver.find_elements_by_class_name('hoverCard')

for products in info:
    name = products.find_element_by_class_name('text-ts1 text-bold q-mr-md text-black')

    Model = driver.find_element_by_class_name('q-pt-md q-mt-xs q-px-md text-ts3 text-color2 ellipsis')
    Price = driver.find_element_by_class_name('text-ts1 text-bold q-mr-md text-black')
    ListofModels = {'Model': Model,
                    'Price': Price}
    AllInfo.append(ListofModels)

0 个答案:

没有答案