刮痧Netflix

时间:2016-03-26 21:56:46

标签: node.js web-scraping netflix

尝试以编程方式从Netflix获取我上次查看的数据,但在登录阶段遇到一些问题。我当前的代码只会导致Netflix回吐We were unable to process your request.页面:

var request = require('request').defaults({jar: true});
var cheerio = require('cheerio');

var url = "https://www.netflix.com/Login?locale=en-GB&nextpage=https%3A%2F%2Fwww.netflix.com%2FWiViewingActivity";

request(url, function (error, response, body) {
    if (!error && response.statusCode == 200) {
        var $ = cheerio.load(body);
        var authCode = $("#login-form > input").attr("value");
        request.post(url+"?email=myemail%40gmail.com&password=mypassword&RememberMe=on&authURL="+authCode, {
        }, function(err, response, body){
            console.log(body);
        });
    }
})

有什么想法吗?

令人惊讶的是,Google Scraping Netflix上几乎没有任何内容。

1 个答案:

答案 0 :(得分:10)

想出来,需要:

  1. 发送任何类型的用户代理字符串
  2. 使用请求form param
  3. 发送表单数据
  4. 手动发送Cookie
  5. 这是我的最终代码,它获取最新观看的项目:

    var request = require('request').defaults({jar: true});
    var cheerio = require('cheerio');
    
    var url = "https://www.netflix.com/Login";
    
    request(url, function (error, response, body) {
        if (!error && response.statusCode == 200) {
            var $ = cheerio.load(body);
            var authCode = $("#login-form > input").attr("value");
            request.post({url: url, 
                form: {
                    "email": "email@gmail.com",
                    "password": "password",
                    "authURL": authCode,
                    "RememberMe": "on"
                },
                headers:{
                    'User-Agent': "NodeScrape"
                }
            }, function(err, response, body){
                var cookies = response.headers['set-cookie'];
                request({url: "https://www.netflix.com/WiViewingActivity", headers: {'Cookie': cookies, 'User-Agent': "NodeScrape"}}, function(error, response, body){
                    var $ = cheerio.load(body);
                    console.log($(".seriestitle").eq(0).text());
                });
            });
        }
    })