function getHTMLSource(url) {
return $http.get(url).then(function(response) {
var html = response.data;
url = getDetailPage(html)
return url ;
}
)};
getHTMLSource('http://www.footpatrol.co.uk/s:282524/search=282524/')
每当我尝试上面的代码时,我在控制台GET http://localhost:9000/templates/footpatrol.co.uk/_assets/images/content/footpatrol_logo.png 404 (Not Found)
中收到以下错误消息。
图片似乎存在于http://www.footpatrol.co.uk/templates/footpatrol.co.uk/_assets/images/content/footpatrol_logo.png
,但由于我正在使用Chrome的Allow-Control-Allow-Origin
插件在localhost上运行scipt,因此它看起来效果不佳。我不想获取我只想要源代码的图像,还有这个吗?
更新:我认为可能是我的解析器导致问题,因为此处会抛出错误消息
function getDetailPage(html) {
var temp = document.createElement('div');
temp.innerHTML = html;
var a = temp.querySelector('a[class*=\'fp-product-thumb-link\']');
var partOfUrl = a.href;
var splitUrl = partOfUrl.split('/');
var url = 'http://www.footpatrol.co.uk/' + splitUrl[3] + '/' + splitUrl[4];
var url = 'http://www.footpatrol.co.uk/' + splitUrl[3] + '/' + splitUrl[4];
$log.debug('Detail page url found: ' + url);
return url;
}
答案 0 :(得分:1)
function getDetailPage(html) {
//var temp = document.createElement('div');
//temp.innerHTML = html;
var parser = new DOMParser();
var temp = parser.parseFromString(html, "text/html");
var a = temp.querySelector('a[class*=\'fp-product-thumb-link\']');
//var partOfUrl = a.href;
//console.log(partOfUrl);
//var splitUrl = partOfUrl.split('/');
//var url = 'http://www.footpatrol.co.uk/' + splitUrl[3] + '/' + splitUrl[4];
var elem = angular.element(a);
var url = 'http://www.footpatrol.co.uk/' + elem.attr('href');
console.debug('Detail page url found: ' + url);
return url;
}
找到详细信息页面网址:http://www.footpatrol.co.uk//footwear/282524-air-retro-15-obsidian.html