尝试运行我的scrapy脚本时,我一直收到以下错误。
错误::
2017-06-08 07:32:02 [scrapy.core.scraper] ERROR: Spider error processing <GET https://xxxx.orderxxxx.com/PartLookup.aspx?company=001&language=en-us> (referer: None)
以下是我的代码:
import scrapy
from scrapy.http import *
from scrapy.selector import Selector
import json
from scrapy.utils.response import open_in_browser
class TestSpider(scrapy.Spider):
name = "test"
allowed_domains = ['xxxx.orderxxxx.com']
start_urls = ['https://xxxx.orderxxxx.com/vehiclePartLookupByZip.aspx?company=001&language=en-us']
def parse(self, response):
form_data = {
'VarsSessionID': '',
'__VIEWSTATE': '/wEPDwULLTE3NDkxNjc2NjIPZBYCAgEPZBYCAgcPZBYCZg9kFgYCCQ8PFgIeB1Zpc2libGVoZGQCDQ8PFgIfAGhkZAIfD2QWCAIJDxBkEBVcES0gU2VsZWN0IGEgWWVhciAtBDIwMTgEMjAxNwQyMDE2BDIwMTUEMjAxNAQyMDEzBDIwMTIEMjAxMQQyMDEwBDIwMDkEMjAwOAQyMDA3BDIwMDYEMjAwNQQyMDA0BDIwMDMEMjAwMgQyMDAxBDIwMDAEMTk5OQQxOTk4BDE5OTcEMTk5NgQxOTk1BDE5OTQEMTk5MwQxOTkyBDE5OTEEMTk5MAQxOTg5BDE5ODgEMTk4NwQxOTg2BDE5ODUEMTk4NAQxOTgzBDE5ODIEMTk4MQQxOTgwBDE5NzkEMTk3OAQxOTc3BDE5NzYEMTk3NQQxOTc0BDE5NzMEMTk3MgQxOTcxBDE5NzAEMTk2OQQxOTY4BDE5NjcEMTk2NgQxOTY1BDE5NjQEMTk2MwQxOTYyBDE5NjEEMTk2MAQxOTU5BDE5NTgEMTk1NwQxOTU2BDE5NTUEMTk1NAQxOTUzBDE5NTIEMTk1MQQxOTUwBDE5NDkEMTk0OAQxOTQ3BDE5NDYEMTk0NQQxOTQ0BDE5NDMEMTk0MgQxOTQxBDE5NDAEMTkzOQQxOTM4BDE5MzcEMTkzNgQxOTM1BDE5MzQEMTkzMwQxOTMyBDE5MzEEMTkzMAQxOTI5BDE5MjgVXBEtIFNlbGVjdCBhIFllYXIgLQQyMDE4BDIwMTcEMjAxNgQyMDE1BDIwMTQEMjAxMwQyMDEyBDIwMTEEMjAxMAQyMDA5BDIwMDgEMjAwNwQyMDA2BDIwMDUEMjAwNAQyMDAzBDIwMDIEMjAwMQQyMDAwBDE5OTkEMTk5OAQxOTk3BDE5OTYEMTk5NQQxOTk0BDE5OTMEMTk5MgQxOTkxBDE5OTAEMTk4OQQxOTg4BDE5ODcEMTk4NgQxOTg1BDE5ODQEMTk4MwQxOTgyBDE5ODEEMTk4MAQxOTc5BDE5NzgEMTk3NwQxOTc2BDE5NzUEMTk3NAQxOTczBDE5NzIEMTk3MQQxOTcwBDE5NjkEMTk2OAQxOTY3BDE5NjYEMTk2NQQxOTY0BDE5NjMEMTk2MgQxOTYxBDE5NjAEMTk1OQQxOTU4BDE5NTcEMTk1NgQxOTU1BDE5NTQEMTk1MwQxOTUyBDE5NTEEMTk1MAQxOTQ5BDE5NDgEMTk0NwQxOTQ2BDE5NDUEMTk0NAQxOTQzBDE5NDIEMTk0MQQxOTQwBDE5MzkEMTkzOAQxOTM3BDE5MzYEMTkzNQQxOTM0BDE5MzMEMTkzMgQxOTMxBDE5MzAEMTkyOQQxOTI4FCsDXGdnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnFgFmZAINDxBkEBUBES0gU2VsZWN0IGEgTWFrZSAtFQERLSBTZWxlY3QgYSBNYWtlIC0UKwMBZxYBZmQCEQ8QZBAVARItIFNlbGVjdCBhIE1vZGVsIC0VARItIFNlbGVjdCBhIE1vZGVsIC0UKwMBZxYBZmQCFQ8QZBAVARUtIFNlbGVjdCBhIENhdGVnb3J5IC0VARUtIFNlbGVjdCBhIENhdGVnb3J5IC0UKwMBZxYBZmRkW3snnFJQXlxa5cFaGgo4Mh5Tbr6vJZWrCYmbroXUnEs='}
yield FormRequest.from_response(response,
formid='frmChangePage',
formdata=form_data,
method='POST',
callback=self.after_parse,
url='https://xxxx.orderxxxx.com/vehiclePartLookupByZip.aspx?company=001&language=en-us',
)
def after_parse(self, response):
print "====RESPONSE==="
print response.headers
print "=========="
print response.request.headers
print "=========="
VarsSessionID = Selector(response=response).xpath("//* [@id='VarsSessionID']/@value").extract()[0]
viewstate = Selector(response=response).xpath("//*[@id='__VIEWSTATE']/@value").extract()[0]
print "VarsSessionID: " + VarsSessionID
print "__VIEWSTATE: " + viewstate
url = "https://xxxx.orderxxxx.com/B2BAjaxCalls.asmx/js"
payload = {
"FormData":
{
'txtZipCode': '85015',
'cboYears': ['2004'],
'cboMake': ['HONDA'],
'cboModel': ['CIVIC_COUPE'],
'cboCategory': ['AXLE SHAFT']
},
}
headers = {
'Accept': 'application/json, text/javascript, */*',
'Accept-Encoding': 'gzip, deflate, br',
'accept-language': 'en_US',
'Connection': 'keep-alive',
'content-type': 'application/json',
'Cookie': {'VarsSessionID':''},
'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"
}
yield Request(url,
callback=self.after_search,
method='POST',
body=json.dumps(payload),
headers=headers)
def after_search(self, response):
print "========SEARCH HEADERS========"
print response.headers
print response.request.headers
open_in_browser(response)
Scrapy不断抛出错误..
2017-06-08 07:32:02 [scrapy.core.scraper] ERROR: Spider error processing <GET https://xxxx.orderxxxx.com/vehiclePart.aspx?company=001&language=en-us> (referer: None)
Traceback (most recent call last):
File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 577, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/usr/local/lib/python2.7/dist-packages/scrapy/spiders/__init__.py", line 90, in parse
raise NotImplementedError
NotImplementedError
我想知道是否有人可以看看并指出我正确的方向来解决这个错误..非常感谢先进!