我正在使用这个蜘蛛点击颜色,然后页面被刷新然后随后点击链接但它间断并且抛出未在缓存中找到的元素 - 也许页面已经改变,因为它是查找错误如何在完成循环后获取原始页面?
无法为此找到合适的解决方案。
import scrapy
from scrapy.contrib.spiders import CrawlSpider
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from scrapy.selector import Selector
import time
class CompItem(scrapy.Item):
model_name = scrapy.Field()
model_link = scrapy.Field()
url = scrapy.Field()
what = scrapy.Field()
seller = scrapy.Field()
class criticspider(CrawlSpider):
name = "extract"
allowed_domains = ["mysmartprice.com"]
start_urls = ["http://www.mysmartprice.com/mobile/huawei-honor-holly-msp4857"]
def __init__(self, *args, **kwargs):
super(criticspider, self).__init__(*args, **kwargs)
self.download_delay = 0.25
self.browser = webdriver.Firefox()
self.browser.maximize_window()
self.browser.implicitly_wait(20)
def parse_start_url(self, response):
self.browser.get(response.url)
# waiting for "Go to store" to become visible
wait = WebDriverWait(self.browser, 10)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.store_pricetable")))
main_window = self.browser.window_handles[0]
# iterate over featured stores and visit them
for i,store in enumerate(self.browser.find_elements_by_css_selector("div.store_pricetable")):
link = store.find_element_by_css_selector("div.store_gostore > div.storebutton")
ActionChains(self.browser).key_down(Keys.SHIFT).move_to_element(link).click(link).key_up(Keys.SHIFT).perform()
# there is a popup preventing us to navigate to the store URL - close it
try:
popup_close = self.browser.find_element_by_css_selector(".popup-closebutton")
popup_close.click()
# repeat the click
ActionChains(self.browser).key_down(Keys.SHIFT).move_to_element(link).click(link).key_up(Keys.SHIFT).perform()
except NoSuchElementException:
pass
button = self.browser.find_element_by_xpath('/html/body/div[3]/div/div[2]/div/div[2]/div/div[1]/div[4]/div/div[3]').click()
#time.sleep(5)
item = CompItem()
sel = Selector(text=self.browser.page_source)
item["what"] = "url"
item["seller"] = response.xpath('//div[@class="store_rating_bar_out"]/@data-storename').extract()[i]
item["model_name"] = sel.xpath('//span[contains(@itemprop,"brand")]/text()').extract()[0] +" "+sel.xpath('//span[contains(@itemprop,"name")]/text()').extract()[0] + sel.xpath('//span[contains(@class,"variant")]/text()').extract()[0]
# shift+click on the "Go to Store" link
# switch to the newly opened window, read the current url and close the window
self.browser.switch_to.window(self.browser.window_handles[-1])
# wait until "On your way to the store" would not be in title
wait.until(lambda browser: "On your way to the Store" not in browser.title)
item['url'] = self.browser.current_url
yield item
self.browser.close()
# switch back to the main window
self.browser.switch_to.window(main_window)
self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + "r")
wait = WebDriverWait(self.browser, 10)