我正在尝试通过10002邮政编码抓一个网页。 这是我的Scrapy Spyder
class HoytSpyder(scrapy.Spider):
name = "hoyt_usa"
allowed_domains = ["hoyt.com"]
start_urls = ["http://hoyt.com/find-a-dealer"]
def parse(self, response):
url = 'http://hoyt.com/find-a-dealer'
headers = {}
headers['Content-Type'] = 'application/x-www-form-urlencoded'
headers['Cookie'] = '__cfduid=db974b4cdb2e79ef45abf70b16fd9b83e1494447240; PHPSESSID=3re29iv8ejkl9fapv1kqk28jk7; _dc_gtm_UA-28625097-1=1; _ga=GA1.2.689069018.1494447289; _gid=GA1.2.581535200.1494451642; _gat_UA-28625'
headers['Referer'] = 'http://hoyt.com/find-a-dealer'
headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
payload = {'hoyt_dealer_basic_search%5Bzip%5D':'10002', 'hoyt_dealer_basic_search%5Bradius%5D':'50', 'hoyt_dealer_basic_search%5Bgo%5D': '', 'hoyt_dealer_basic_search%5B_token%5D': 'FxDqyxsyOtDcLHBhsY1AR49Zhq9Oj2rmPeDxPvq0chg', 'hoyt_dealer_basic_search%5Bemail_address%5D': ''}
return FormRequest.from_response(response, formname='hoyt_dealer_basic_search', headers=headers, formdata=payload, callback=self.parse_dealers)
def parse_dealers(self, response):
print '+++++++++++'
html = response.xpath('//div[@class="row content-row"]//div[@class="medium-3"]')
print len(html)
print response.body
我在响应正文中有一些html,但没有结果。我也尝试过Formdata(url,headers = headers,formdata = payload) - 但这也没有帮助 我想我有一些问题,通过响应传递有效负载。 有任何想法吗?非常感谢
响应应包含var theMarkers = [items]。现在迈克尔是空的
答案 0 :(得分:1)
您需要为表单数据使用非URL编码的密钥:
payload = {
'hoyt_dealer_basic_search[Bemail_address]': '',
'hoyt_dealer_basic_search[_token]': 'FxDqyxsyOtDcLHBhsY1AR49Zhq9Oj2rmPeDxPvq0chg',
'hoyt_dealer_basic_search[go]': '',
'hoyt_dealer_basic_search[radius]': '50',
'hoyt_dealer_basic_search[zip]': '10002'}
示例scrapy shell会话:
>>> url = 'http://hoyt.com/find-a-dealer'
>>> headers = {}
>>> headers['Content-Type'] = 'application/x-www-form-urlencoded'
>>> headers['Cookie'] = '__cfduid=db974b4cdb2e79ef45abf70b16fd9b83e1494447240; PHPSESSID=3re29iv8ejkl9fapv1kqk28jk7; _dc_gtm_UA-28625097-1=1; _ga=GA1.2.689069018.1494447289; _gid=GA1.2.581535200.1494451642; _gat_UA-28625'
>>> headers['Referer'] = 'http://hoyt.com/find-a-dealer'
>>> headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
>>> payload = {'hoyt_dealer_basic_search[zip]':'10002', 'hoyt_dealer_basic_search[radius]':'50', 'hoyt_dealer_basic_search[go]': '', 'hoyt_dealer_basic_search[_token]': 'FxDqyxsyOtDcLHBhsY1AR49Zhq9Oj2rmPeDxPvq0chg', 'hoyt_dealer_basic_search[Bemail_address]': ''}
>>> frq = scrapy.FormRequest.from_response(response, formname='hoyt_dealer_basic_search', headers=headers, formdata=payload)
>>> fetch(frq)
2017-05-11 10:10:29 [scrapy.core.engine] DEBUG: Crawled (200) <POST http://hoyt.com/find-a-dealer> (referer: http://hoyt.com/find-a-dealer)
>>> print(response.xpath('//script/text()').extract()[2])
//Generate Markers Value Array
var theMarkers = [
{"title":'Gotham Archery',"lat": '40.684381',"lng": '-73.980437',"description":
'<strong>Gotham Archery</strong><br />480 Baltic Street'}
, {"title":'Pro Line Archery Lanes, Inc.',"lat": '40.684494',"lng": '-73.850427',"description":
'<strong>Pro Line Archery Lanes, Inc.</strong><br />9511 101st Ave'}
, {"title":'Queen`s Archery',"lat": '40.760434',"lng": '-73.799188',"description":
'<strong>Queen`s Archery</strong><br />170-20 39TH Ave'}
, {"title":'Targeteers SG2, Inc.',"lat": '40.902033',"lng": '-74.100102',"description":
'<strong>Targeteers SG2, Inc.</strong><br />P O Box 878'}
, {"title":'Extreme Archery, Inc.',"lat": '40.955281',"lng": '-73.737657',"description":
'<strong>Extreme Archery, Inc.</strong><br />801 East Boston Post Rd'}
, {"title":'C & B Archery',"lat": '40.766663',"lng": '-73.516470',"description":
'<strong>C & B Archery</strong><br />11 Commercial Street'}
, {"title":'American Outdoor Sports',"lat": '40.725297',"lng": '-73.444737',"description":
'<strong>American Outdoor Sports</strong><br />238 Route 109'}
, {"title":'Davis Sport Shop',"lat": '41.160697',"lng": '-74.189154',"description":
'<strong>Davis Sport Shop</strong><br />P.O. Box 87'}
, {"title":'Heritage Guild Branchburg',"lat": '40.543740',"lng": '-74.660040',"description":
'<strong>Heritage Guild Branchburg</strong><br />3321 Hwy 22 East'}
, {"title":'Garden State Bow & Reel',"lat": '41.088300',"lng": '-74.514420',"description":
'<strong>Garden State Bow & Reel</strong><br />2760A Route 23 North'}
, {"title":'A & M Archery',"lat": '40.084328',"lng": '-74.203837',"description":
'<strong>A & M Archery</strong><br />201 2nd Street'}
, {"title":'Simon Peter Sport, CO.',"lat": '41.036057',"lng": '-74.764550',"description":
'<strong>Simon Peter Sport, CO.</strong><br />660 Route 206 South'}
];
var theMapOptions = {
center: new google.maps.LatLng(40.715523, -73.988379),
zoom: 8
};