从下拉列表中抓取产品

时间:2021-02-14 05:02:31

标签: selenium web-scraping selenium-chromedriver web-scraping-language

所以我正在抓取具有不同版本(不同色调)的特定产品。我如何让它点击产品阴影并浏览该页面?

这是一个示例链接:https://www.target.com/p/maybelline-superstay-ink-crayon-lipstick-0-04oz/-/A-76581433?preselect=76151470#lnk=sametab

这是我的代码:

driver.execute_script("window.scrollTo(0, 300)")

# get each color button
time.sleep(4)
elems = driver.find_elements_by_class_name('VariationButton__StyledButtonWrapper-sc-1hf3dzx-0.dcQiPK')
if not elems:
    time.sleep(4)
    shade_btn = driver.find_elements_by_class_name("Button-bwu3xu-0.SelectBox__SelectButtonWithValidation-sc-6gt3w9-1.hUOeWC.kCheAN")[1]
    driver.execute_script("arguments[0].click();", shade_btn)
    # shade_btn.click()
    time.sleep(4)
    elems = driver.find_elements_by_class_name("Link-sc-1khjl8b-0.OptionLink-wl31ru-0.kdCHb.evmUhs")
    print(elems)
    time.sleep(4)
    btn = driver.find_elements_by_class_name("Heading__StyledHeading-sc-1m9kw5a-0.fhmjpN.h-text-center")[0]
    time.sleep(4)
    driver.execute_script("arguments[0].click();", btn)
    time.sleep(4)
    for elem in elems:
        # add all of the attributes to the line that will stay the same at the begining (we don't have to scrape these)
        time.sleep(4)
        line = df.Category[i]
        line += ',"' + df.Product[i] +  '"'
        line += ',"' + df.URL[i] +  '"'
        line += ',"' + df.Price[i]+  '"'
        line += ',"' + df.Description[i].replace('\n',' ') +  '"'
        time.sleep(4)
        elem.click()
        time.sleep(4)
        btn = driver.find_elements_by_class_name("Heading__StyledHeading-sc-1m9kw5a-0.fhmjpN.h-text-center")[0]
        time.sleep(4)
        driver.execute_script("arguments[0].click();", btn)
        time.sleep(4)
        shade_btn = driver.find_elements_by_class_name("Button-bwu3xu-0.SelectBox__SelectButtonWithValidation-sc-6gt3w9-1.hUOeWC.kCheAN")[1]
        driver.execute_script("arguments[0].click();", shade_btn)

        #ADD THE IMAGE URL
        j = 0
        found = False
        while(j < 5 and not found):
            try:
                img_panel = driver.find_element_by_class_name('slideDeckPicture')
                img_panel = img_panel.find_element_by_tag_name('img')
                img_url = img_panel.get_attribute('src')

                # img_urls.append(img_url)
                line += ',"' + img_url + '"'
                found = True
                break
            # if it can't find the image, it probably hasn't loaded. wait and try again.
            except:
                j += 1
                time.sleep(4)
                # img_urls.append('NO URL')
                # pass
        # if we've tried 5 times add no url
        if found == False:
            line += ',NO IMG URL'

0 个答案:

没有答案
相关问题