spider_closed()函数未执行。如果我只给出print语句它是打印但如果我执行任何函数调用并返回值它不起作用。
import scrapy
import re
from pydispatch import dispatcher
from scrapy import signals
from SouthShore.items import Product
from SouthShore.internalData import internalApi
from scrapy.http import Request
class bestbuycaspider(scrapy.Spider):
name = "bestbuy_dca"
allowed_domains = ["bestbuy.ca"]
start_urls = ["http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+beds",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+night+stand",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+headboard",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+desk",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+bookcase",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+dresser",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+tv+stand",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+armoire",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+kids",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+changing+table",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+baby"]
def __init__(self,jsondetails="",serverdetails="", *args,**kwargs):
super(bestbuycaspider, self).__init__(*args, **kwargs)
dispatcher.connect(self.spider_closed, signal=signals.spider_closed)
self.jsondetails = jsondetails
self.serverdetails=serverdetails
self.data = []
def parse(self,response):
#my stuff here
def spider_closed(self,spider):
print "returning values"
self.results['extractedData']=self.data
print self.results=internalApi(self.jsondetails,self.serverdetails)
yield self.results
1)我想调用一些函数并返回刮取的值
答案 0 :(得分:1)
您可以使用close_spider()
方法创建Item Pipeline:
class MyPipeline(object):
def close_spider(self, spider):
do_something_here()
不要忘记在settings.py中激活它,如上面的文档链接中所述。