Scrapy,python + selenium。从页面收集的信息中的错误

时间:2016-02-01 19:04:14

标签: python selenium drop-down-menu scrapy

大家好,我需要你的支持! 我有一些页面(here)并尝试废弃它。在页面上的下拉菜单和下拉菜单中的选项更改页面上的信息(对于我使用selenium的更改选项)。我尝试迭代选择选项(我发现示例here)并在更新后从页面收集信息,但在输出文件中我有4个相同的值(就好像选项不会改变)。 Selenium正确迭代选项(我看到它),我不知道我做错了什么。 这是我的代码:

from colombo.items import ColomboItem
from scrapy.contrib.spiders import CrawlSpider
from selenium import webdriver
from scrapy.selector import Selector
from selenium.webdriver.support.ui import Select
import time

class ColomboSpider(CrawlSpider):

    name = 'ColomboSpider'
    allowed_domains = ["http://colombo.in.ua"]
    start_urls = [
        "http://colombo.in.ua/colombo-design/ruchka-colombo-gira-jm11.html",
    ] 

    def __init__(self):
        CrawlSpider.__init__(self)
        # use any browser you wish
        self.browser = webdriver.Firefox() 

    def __del__(self):
        self.browser.close()

    def parse(self, response):

        self.browser.get(response.url)
        #let JavaScript Load
        time.sleep(3) 

        optionsList = []# options attribute value
        dropdown = self.browser.find_element_by_id("jshop_attr_id13")#my dropdown element name
        options = dropdown.find_elements_by_tag_name('option')
        for option in options: #iterate over the options, place attribute value in list
            optionsList.append(option.get_attribute("value"))

        hxs = Selector(response)
        items = []

        for optionValue in optionsList:
            select = Select(self.browser.find_element_by_id("jshop_attr_id13"))#i found dropdown
            select.select_by_value(optionValue)#and i click on n-value in my list
            time.sleep(2)
            firm_list = hxs.xpath('.//div[@class="jshop productfull"]/form[1]')
            for sel in firm_list:
                item = ColomboItem()
                item['price']=sel.xpath('.//span[@id="block_price"]/text()').extract()
                item['name']=sel.xpath('.//h1/text()').extract()
                items.append(item)
        return iter(items)

我在输出文件中有什么:

1929.61 грн Ручка Colombo GIRA JM11
1929.61 грн Ручка Colombo GIRA JM11
1929.61 грн Ручка Colombo GIRA JM11
1929.61 грн Ручка Colombo GIRA JM11

但是,正确的输出是

1929.61 грн Ручка Colombo GIRA JM11
2275.21 грн Ручка Colombo GIRA JM11
2456.66 грн Ручка Colombo GIRA JM11
2966.42 грн Ручка Colombo GIRA JM11

感谢您的回答)

2 个答案:

答案 0 :(得分:0)

我正在跳过文本部分,但此代码适用于金额部分

import unittest
from selenium import webdriver
import datetime
import os
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from random import randint
import time
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from sshtunnel import SSHTunnelForwarder
import MySQLdb
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import autoit

class SprintTests(unittest.TestCase):

    def setUp(self):       
        self.driver = webdriver.Firefox()
        self.driver.get("http://colombo.in.ua/colombo-design/ruchka-colombo-gira-jm11.html")
        self.driver.implicitly_wait(30)
        self.driver.maximize_window()

    def test_input(self):
        dropdown = self.driver.find_element_by_id("jshop_attr_id13")
        options = dropdown.find_elements_by_tag_name('option')
        for option in options:
                a = self.driver.find_element_by_xpath('//*[@id="block_price"]')
                b=a.text
                a = self.driver.find_element_by_xpath('//*[@id="jshop_attr_id13"]').click()
                print b
                autoit.send("{DOWN}{ENTER}")
                time.sleep(2)


    def tearDown(self):
        self.driver.quit()

if __name__ == '__main__':
    unittest.main(verbosity=2)

输出是:

1929.61 грн
2275.21 грн
2456.66 грн
2966.42 грн

答案 1 :(得分:0)

对不起时间回答:

from colombo.items import ColomboItem
from scrapy.contrib.spiders import CrawlSpider
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import time

class ColomboSpider(CrawlSpider):

    name = 'ColomboSpider'
    allowed_domains = ["http://colombo.in.ua"]
    start_urls = [

    ] 

    def __init__(self):
        self.driver = webdriver.Firefox()


    def __del__(self):
        self.driver.close()

    def parse(self, response):

        self.driver.get(response.url)
        #let JavaScript Load
        time.sleep(3) 
        try:
            optionsList = []# options attribute value
            dropdown = self.driver.find_element_by_id("jshop_attr_id13")#my dropdown element name
            options = dropdown.find_elements_by_tag_name('option')
            for option in options: #iterate over the options, place attribute value in list
                optionsList.append(option.get_attribute("value"))


            #hxs = Selector(response)
            items = []

            for optionValue in optionsList:
                select = Select(self.driver.find_element_by_id("jshop_attr_id13"))#i found dropdown
                select.select_by_value(optionValue)#and i click on n-value in my list
                time.sleep(2)
                item = ColomboItem()
                item['price']=self.driver.find_element_by_xpath('//*[@id="block_price"]').text
                item['name'] = self.driver.find_element_by_xpath('.//h1').text
                options_value = self.driver.find_element_by_id("jshop_attr_id13")
                for option in options_value.find_elements_by_tag_name('option'):
                    if option.get_attribute("value") == optionValue:
                        item['color'] = option.get_attribute("innerHTML")
                items.append(item)
            return iter(items)

        except Exception:
            items = []
            item = ColomboItem()
            item['price']=self.driver.find_element_by_xpath('//*[@id="block_price"]').text
            item['name'] = self.driver.find_element_by_xpath('.//h1').text
            items.append(item)
            return iter(items)

输出

Chromo - Хром   3333.53 грн     Ручка Colombo DEA FF21 (Код: FF21) 
Chromat - Матовый хром  3817.33 грн     Ручка Colombo DEA FF21 (Код: FF21)