网页抓取使用javascript的网站

时间:2013-07-01 18:53:43

标签: javascript jquery html web-scraping

我会尽量保持这个简短;我正试图从这个网站上抓取信息:http://eu.battle.net/wow/en/character/uldaman/Dus/statistic#21:152

该列表包括一个项目“最高2人个人评级”,后跟一个数字。这个数字正是我要找的。存储的数字到底在哪里,我该如何获得它?

提前致谢。

2 个答案:

答案 0 :(得分:1)

我在考虑你正在使用jQuery:

$('#cat-152 dt').filter(function() { return $(this).text() == "Highest 2 man personal rating" }).siblings('dd').text()

答案 1 :(得分:0)

var http = require('http');
var options = {
    host: 'eu.battle.net',
    path: '/wow/en/character/uldaman/Dus/statistic/152'
};

var count = 0;

http.get(options, function(res) {

    var data = "";

    res.on('data', function (chunk) {
        data += chunk;
    });

    res.on('end', function(){
       var d = data;
       var payload = d.toString();
       var finder = "<dt>Highest 2 man team rating</dt><dd>";
       var indexOfHighest2Man = payload.indexOf(finder);
       var indexOfClosingDD = payload.indexOf("</dd>", indexOfHighest2Man);

       var count = payload.substr(indexOfHighest2Man,  indexOfClosingDD - indexOfHighest2Man);
       count = count.replace(/\s/g, "");
       count = count.replace("<dt>Highest2manteamrating</dt><dd>", "");

       //***************** Here is the answer *******************
       console.log('Highest 2 man rating ',count);
       //********************************************************
    })


}).on('error', function(e) {
        console.log('ERROR: ' + e.message);
    });