NodeJS x-ray web-scraper:循环回调中的多个url

时间:2016-05-16 17:59:49

标签: javascript node.js x-ray

我正在使用X射线,这很棒,但缺乏教程。无论如何,我使用一个名为urls的url数组。在循环中,每个url都通过回调获取并返回结果。在回调函数中,我需要知道解析的url是什么。如果回调仅返回错误和结果,我怎么知道哪个url返回? (它真的是X射线问题还是js)

 xrayRamiLevy = function(){
  var index = 0;
  for (index; index < urls.length; index++){
   x(urls[index].url, '.product_item',
  [{
      title : '.prodDescDiv h3',
      description : '.prodBrand',
      imageUrl : '.image_icons_zone .image img@src',
      onclick : '.image_icons_zone .image a @onclick',
  }]
)
(function(err, results){
    for (var i = 0; i < results.length; i++){

      var s = results[i].onclick.substr(0, results[i].onclick.lastIndexOf("'"));
      s = s.slice(s.lastIndexOf("'") + 1);
      results[i].catalogueNumber = s;
      delete results[i].onclick;
      if (results[i].description !== undefined && results[i].description.length > 0)
      s = results[i].description.replace(/\s+/g, ' ').trim();
      results[i].description = s;

      if (urls[index].category !== undefined && urls[index].category.length > 0)
      results[i].categoriesIds = urls[index].category;

      if (urls[index].subcategory !== undefined && urls[index].subcategory.length > 0)
      results[i].subcategoriesIds = urls[index].subcategory;

    }
    fs.writeFile("./results.json", JSON.stringify(results, null, '\t'));
});

}  }

2 个答案:

答案 0 :(得分:1)

我没有完全得到你的榜样,但是:

urls.forEach(function(url, index){
  //whatever you need to do to prep your call to x
 var callback = x(url, '.product_item', ...);
 callback(wrappedCallback(url));
}

function wrappedCallback(url) {
  return function(err, results){
    // url is defined here
  }
};

答案 1 :(得分:1)

参见使用JS闭包获取url的示例。请注意fn回调中数组中的URL如何可用。

var Xray = require('x-ray');
var util = require('util');
var x = Xray();

var sitesToHandle = ['https://dribbble.com?x=1', 'https://dribbble.com?x=2'];
sitesToHandle.forEach((urlToHandle) => {
  x(urlToHandle, 'li.group', [{
    title: '.dribbble-img strong',
    image: '.dribbble-img [data-src]@data-src',
  }]).(function (err, results) {
    console.log(`let's now handle the result of ${urlToHandle}, the results are ${util.inspect(results)}`);
  });
});

P.S。注意,处理返回的错误时,您可能会看一下I just wrote here

的错误处理指南