我正在尝试从此网址https://www.farfetch.com/shopping/men/gucci-white-rhyton-web-print-leather-sneakers-item-12889013.aspx?storeid=9359抓取一些数据
html看起来像这样:
<div class="cdb2b6" id="bannerComponents-Container">
<p class="_41db0e _527bd9 eda00d" data-tstid="merchandiseTag">New Season</p>
<div class="_1c3e57">
<h1 class="_61cb2e" itemprop="brand" itemscope="" itemtype="http://schema.org/Brand">
<a href="/shopping/men/gucci/items.aspx" class="fd9e8e e484bf _4a941d f140b0" data-trk="pp_infobrd" data-tstid="cardInfo-title" itemprop="url" aria-label="Gucci">
<span itemprop="name">Gucci</span>
</a>
</h1>
</div>
</div>
我在易碎的外壳中运行了response.xpath('//div[@id="bannerComponents-Container"]/@class')
,但我得到的却是:
In [1]: response.xpath('//div[@id="bannerComponents-Container"]/@class')
Out[1]: []
为什么?我在Amazon,Ebay等上遇到了类似的问题,而我的xpath选择器似乎不起作用
答案 0 :(得分:2)
这是因为var map; //google map
var llOffset; //size of grid squares (in degrees)
var countryOff = 1.6;
var countryZoom = 4;
var gridSquares = [];
var infoWindow;
function ZoomControl(controlDiv, map) {
var control = this;
//set CSS for the control border
var countryZoomUI = document.createElement('div');
countryZoomUI.id = 'countryZoomUI';
countryZoomUI.title = 'Click to go to clear squares';
controlDiv.appendChild(countryZoomUI);
//set CSS for the control interior
var countryZoomText = document.createElement('div');
countryZoomText.id = 'countryZoomText';
countryZoomText.innerHTML = 'Clear Squares';
countryZoomUI.appendChild(countryZoomText);
// Setup the click event listeners: set zoom level to country level.
countryZoomUI.addEventListener('click', function() {
clearSquares();
setllOffset(countryOff);
map.setZoom(countryZoom);
});
}
//GOOGLE MAP INITILIZE
function initMap() {
var markers = [];
map = new google.maps.Map(document.getElementById('map'), {
center: {lat: 33.00, lng: -100.00},
zoom: 4,
scaleControl: true
});
setllOffset(countryOff);
// Create the DIV to hold the control and call the CenterControl()
// constructor passing in this DIV.
var zoomControlDiv = document.createElement('div');
var zoomControl = new ZoomControl(zoomControlDiv, map);
zoomControlDiv.index = 1;
map.controls[google.maps.ControlPosition.TOP_CENTER].push(zoomControlDiv);
//on click, create filled in rectangle
google.maps.event.addListener(map, 'click', function (event) {
createGridBox(event.latLng);
});
DrawGridOn();
}
google.maps.event.addDomListener(window, 'load', initMap);
function DrawGridOn() {
drawGridBox = true;
}
function DrawGridOff() {
drawGridBox = false;
}
function setllOffset(offset){
llOffset = offset;
}
function clearSquares(){
for (var i=0; i< gridSquares.length; i++) {
gridSquares[i].setMap(null);
}
}
/**
Create filled in rectangle in grid (when clicked)
**/
function createGridBox(point) {
var rectangle = new google.maps.Rectangle({
strokeColor: '#0000FF',
strokeOpacity: 0.7,
strokeWeight: 2,
fillColor: '#0000FF',
fillOpacity: 0.25,
map: map,
bounds: {
north: Math.floor(point.lat() / llOffset) * llOffset + llOffset,
south: Math.floor(point.lat() / llOffset) * llOffset,
east: Math.floor(point.lng() / llOffset) * llOffset + llOffset,
west: Math.floor(point.lng() / llOffset) * llOffset
}
});
google.maps.event.addListener(rectangle, 'click', function( event ){
rectangle.setMap(null);
});
gridSquares.push(rectangle);
}
。定义一个并得到你想要的。请尝试以下操作:如果踢出headers
,则结果将变为无。
headers
输出:
import requests
from scrapy import Selector
LINK = 'https://www.farfetch.com/bd/shopping/men/gucci-white-rhyton-web-print-leather-sneakers-item-12889013.aspx?storeid=9359'
def get_item(url):
res = requests.get(url,headers={"User-Agent":"Mozilla/5.0"})
sel = Selector(res)
name = sel.xpath('//div[@id="bannerComponents-Container"]//span[@itemprop="name"]/text()').extract_first()
print(name)
if __name__ == '__main__':
get_item(LINK)