所以我正在抓取具有不同版本(不同色调)的特定产品。我如何让它点击产品阴影并浏览该页面?
这是我的代码:
driver.execute_script("window.scrollTo(0, 300)")
# get each color button
time.sleep(4)
elems = driver.find_elements_by_class_name('VariationButton__StyledButtonWrapper-sc-1hf3dzx-0.dcQiPK')
if not elems:
time.sleep(4)
shade_btn = driver.find_elements_by_class_name("Button-bwu3xu-0.SelectBox__SelectButtonWithValidation-sc-6gt3w9-1.hUOeWC.kCheAN")[1]
driver.execute_script("arguments[0].click();", shade_btn)
# shade_btn.click()
time.sleep(4)
elems = driver.find_elements_by_class_name("Link-sc-1khjl8b-0.OptionLink-wl31ru-0.kdCHb.evmUhs")
print(elems)
time.sleep(4)
btn = driver.find_elements_by_class_name("Heading__StyledHeading-sc-1m9kw5a-0.fhmjpN.h-text-center")[0]
time.sleep(4)
driver.execute_script("arguments[0].click();", btn)
time.sleep(4)
for elem in elems:
# add all of the attributes to the line that will stay the same at the begining (we don't have to scrape these)
time.sleep(4)
line = df.Category[i]
line += ',"' + df.Product[i] + '"'
line += ',"' + df.URL[i] + '"'
line += ',"' + df.Price[i]+ '"'
line += ',"' + df.Description[i].replace('\n',' ') + '"'
time.sleep(4)
elem.click()
time.sleep(4)
btn = driver.find_elements_by_class_name("Heading__StyledHeading-sc-1m9kw5a-0.fhmjpN.h-text-center")[0]
time.sleep(4)
driver.execute_script("arguments[0].click();", btn)
time.sleep(4)
shade_btn = driver.find_elements_by_class_name("Button-bwu3xu-0.SelectBox__SelectButtonWithValidation-sc-6gt3w9-1.hUOeWC.kCheAN")[1]
driver.execute_script("arguments[0].click();", shade_btn)
#ADD THE IMAGE URL
j = 0
found = False
while(j < 5 and not found):
try:
img_panel = driver.find_element_by_class_name('slideDeckPicture')
img_panel = img_panel.find_element_by_tag_name('img')
img_url = img_panel.get_attribute('src')
# img_urls.append(img_url)
line += ',"' + img_url + '"'
found = True
break
# if it can't find the image, it probably hasn't loaded. wait and try again.
except:
j += 1
time.sleep(4)
# img_urls.append('NO URL')
# pass
# if we've tried 5 times add no url
if found == False:
line += ',NO IMG URL'