我花了很多时间玩游戏并使用谷歌,但我无法解决我的问题。
以下代码正常,但我想将参数/参数传递给parse item
。我怎么能用规则做到这一点?
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
import mysql.connector
from scrapy.http import Request
class FirstSpider(CrawlSpider):
name = 'firstspider'
def start_requests(self):
conn = mysql.connector.connect(user='root', password = 'root', host= 'localhost', database = 'Eistee')
cursor = conn.cursor()
query = ("SELECT Domain, CompanyName FROM Crawlbydomain LIMIT 300, 100")
cursor.execute(query)
results = cursor.fetchall()
for result in results:
urlrequest = 'http://'+result[0]
yield Request(urlrequest)
rules = (Rule (SgmlLinkExtractor(allow=('.ch', )), callback='parse_item', follow= True),)
def parse_item(self, response):
filename = response.url.translate(None, './')
open(filename, 'wb').write(response.body)
parse_start_url = parse_item
如何将每个请求的新值成功传递给parse_item
可以传送给Rule的参数?