大家好,我需要你的支持! 我有一些页面(here)并尝试废弃它。在页面上的下拉菜单和下拉菜单中的选项更改页面上的信息(对于我使用selenium的更改选项)。我尝试迭代选择选项(我发现示例here)并在更新后从页面收集信息,但在输出文件中我有4个相同的值(就好像选项不会改变)。 Selenium正确迭代选项(我看到它),我不知道我做错了什么。 这是我的代码:
from colombo.items import ColomboItem
from scrapy.contrib.spiders import CrawlSpider
from selenium import webdriver
from scrapy.selector import Selector
from selenium.webdriver.support.ui import Select
import time
class ColomboSpider(CrawlSpider):
name = 'ColomboSpider'
allowed_domains = ["http://colombo.in.ua"]
start_urls = [
"http://colombo.in.ua/colombo-design/ruchka-colombo-gira-jm11.html",
]
def __init__(self):
CrawlSpider.__init__(self)
# use any browser you wish
self.browser = webdriver.Firefox()
def __del__(self):
self.browser.close()
def parse(self, response):
self.browser.get(response.url)
#let JavaScript Load
time.sleep(3)
optionsList = []# options attribute value
dropdown = self.browser.find_element_by_id("jshop_attr_id13")#my dropdown element name
options = dropdown.find_elements_by_tag_name('option')
for option in options: #iterate over the options, place attribute value in list
optionsList.append(option.get_attribute("value"))
hxs = Selector(response)
items = []
for optionValue in optionsList:
select = Select(self.browser.find_element_by_id("jshop_attr_id13"))#i found dropdown
select.select_by_value(optionValue)#and i click on n-value in my list
time.sleep(2)
firm_list = hxs.xpath('.//div[@class="jshop productfull"]/form[1]')
for sel in firm_list:
item = ColomboItem()
item['price']=sel.xpath('.//span[@id="block_price"]/text()').extract()
item['name']=sel.xpath('.//h1/text()').extract()
items.append(item)
return iter(items)
我在输出文件中有什么:
1929.61 грн Ручка Colombo GIRA JM11
1929.61 грн Ручка Colombo GIRA JM11
1929.61 грн Ручка Colombo GIRA JM11
1929.61 грн Ручка Colombo GIRA JM11
但是,正确的输出是
1929.61 грн Ручка Colombo GIRA JM11
2275.21 грн Ручка Colombo GIRA JM11
2456.66 грн Ручка Colombo GIRA JM11
2966.42 грн Ручка Colombo GIRA JM11
感谢您的回答)
答案 0 :(得分:0)
我正在跳过文本部分,但此代码适用于金额部分
import unittest
from selenium import webdriver
import datetime
import os
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from random import randint
import time
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from sshtunnel import SSHTunnelForwarder
import MySQLdb
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import autoit
class SprintTests(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.get("http://colombo.in.ua/colombo-design/ruchka-colombo-gira-jm11.html")
self.driver.implicitly_wait(30)
self.driver.maximize_window()
def test_input(self):
dropdown = self.driver.find_element_by_id("jshop_attr_id13")
options = dropdown.find_elements_by_tag_name('option')
for option in options:
a = self.driver.find_element_by_xpath('//*[@id="block_price"]')
b=a.text
a = self.driver.find_element_by_xpath('//*[@id="jshop_attr_id13"]').click()
print b
autoit.send("{DOWN}{ENTER}")
time.sleep(2)
def tearDown(self):
self.driver.quit()
if __name__ == '__main__':
unittest.main(verbosity=2)
输出是:
1929.61 грн
2275.21 грн
2456.66 грн
2966.42 грн
答案 1 :(得分:0)
对不起时间回答:
from colombo.items import ColomboItem
from scrapy.contrib.spiders import CrawlSpider
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import time
class ColomboSpider(CrawlSpider):
name = 'ColomboSpider'
allowed_domains = ["http://colombo.in.ua"]
start_urls = [
]
def __init__(self):
self.driver = webdriver.Firefox()
def __del__(self):
self.driver.close()
def parse(self, response):
self.driver.get(response.url)
#let JavaScript Load
time.sleep(3)
try:
optionsList = []# options attribute value
dropdown = self.driver.find_element_by_id("jshop_attr_id13")#my dropdown element name
options = dropdown.find_elements_by_tag_name('option')
for option in options: #iterate over the options, place attribute value in list
optionsList.append(option.get_attribute("value"))
#hxs = Selector(response)
items = []
for optionValue in optionsList:
select = Select(self.driver.find_element_by_id("jshop_attr_id13"))#i found dropdown
select.select_by_value(optionValue)#and i click on n-value in my list
time.sleep(2)
item = ColomboItem()
item['price']=self.driver.find_element_by_xpath('//*[@id="block_price"]').text
item['name'] = self.driver.find_element_by_xpath('.//h1').text
options_value = self.driver.find_element_by_id("jshop_attr_id13")
for option in options_value.find_elements_by_tag_name('option'):
if option.get_attribute("value") == optionValue:
item['color'] = option.get_attribute("innerHTML")
items.append(item)
return iter(items)
except Exception:
items = []
item = ColomboItem()
item['price']=self.driver.find_element_by_xpath('//*[@id="block_price"]').text
item['name'] = self.driver.find_element_by_xpath('.//h1').text
items.append(item)
return iter(items)
输出
Chromo - Хром 3333.53 грн Ручка Colombo DEA FF21 (Код: FF21)
Chromat - Матовый хром 3817.33 грн Ручка Colombo DEA FF21 (Код: FF21)