这是我的蜘蛛:
# -*- coding: utf-8 -*-
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class ExampleSpider(CrawlSpider):
name = 'example'
allowed_domains = ['www.example.com']
start_urls = ['http://www.example.com']
rules = (
Rule(LinkExtractor(allow=('/items/.'),deny=('sendMessage')), follow=True),
Rule(LinkExtractor(allow=('/item/[a-z\+]+\-[0-9]+') ,deny=('sendMessage')), callback='parse_item', follow=False),
)
def parse_item(self, response):
name = response.xpath('//*[@id="main"]/h1/text()').extract_first()
locations = response.xpath("//*[@id='mylocation']//div[@class='location']")
list_location = []
for (i, location) in enumerate(locations):
location_tab = {}
phoneitem = []
data = [
('somedata', ''),
('somedata', ''),
('somedata', ''),
]
request = scrapy.FormRequest(
'http://www.example.com/ajax/my-phone',
formdata= data,
callback=self.parse_for_number
)
request.meta['phoneitem'] = phoneitem
yield request
location_tab['phone'] = phoneitem
location_title = location.xpath("string(.//h3)").extract()
location_title[0] = ' '.join(location_title[0].split())
location_address = location.xpath(".//p[@class='address']").extract()
#if not location_title:
# location_title = location.xpath("h3/a/text()").extract()
location_tab['location_title'] = location_title[0].encode('utf-8')
location_tab['location_address'] = location_address[0].encode('utf-8')
list_location.append(location_tab)
location_title = ''
location_address = ''
location_tab = {}
if name :
yield { 'name' : name,
'url' : url,
'locations' : list_location
}
def parse_for_number(self, response):
phoneitem = request.meta['phoneitem']
phones = response.xpath("//li[@class='phone']/text()").extract()
for phone in phones:
phoneitem.append(' '.join(phone.split()))
yield phoneitem
我的蜘蛛包括检索人名(姓名)以及他们的地址和每个地址中的电话号码(电话号码通过带变量的ajax请求显示)。
手机列表(phoneitem)始终为空。错误在哪里?
编辑:
如何将parse_for_number函数的结果存储在变量中,并在之后使用它?
由于我在FOR循环中调用FormRequest,因此无法为每次执行函数生成电话变量。
答案 0 :(得分:0)
已解决!
我使用请求库
import requests
request = requests.post('http://www.example.com/ajax/my-phone', cookies=cookies, data=data)
phones = Selector(text=request.content).xpath("//li[@class='phone']/text()").extract()