Question

我想抓取网站的帖子登录页面。我使用chrome登录到网站，然后使用chrome cookie inspector插件为网站提取Cookie。 https://chrome.google.com/webstore/detail/cookie-inspector/jgbbilmfbammlbbhmmgaagdkbkepnijn?hl=en

然后我使用node-crawler模块对网站进行爬网。

https://www.npmjs.com/package/crawler

但是我不能正确设置cookie。这是我的代码：

var Crawler = require("crawler");
 var fs = require('fs')
var c = new Crawler({
    maxConnections : 10,
    jQuery: false,
    headers:{Cookie: [
        {
          "domain": "www.example.com",
          "hostOnly": true,
          "httpOnly": true,
          "name": "BIGipServerfk.example.com-80",
          "path": "/",
          "sameSite": "no_restriction",
          "secure": false,
          "session": true,
          "storeId": "0",
          "value": "3358858762.20480.0000",
          "id": 1
        },
        {
          "domain": "www.example.com",
          "hostOnly": true,
          "httpOnly": true,
          "name": "JSESSIONID",
          "path": "/",
          "sameSite": "no_restriction",
          "secure": false,
          "session": true,
          "storeId": "0",
          "value": "49A78003F8C87804475AE5F151FC4BEE.0605",
          "id": 2
        }]},

    // This will be called for each crawled page
    callback : function (error, res, done) {

        if(error){
            console.log(error);
        }else{

            fs.writeFile('./example.html', res.body, ()=>{})
        }
        done();
    }
});

// Queue just one URL, with default callback
c.queue('https://www.example.com/');

这是我创建的唯一的东西：

https://github.com/bda-research/node-crawler/issues/165

https://github.com/bda-research/node-crawler/issues/187

如何设置Cookie？显然，该模块使用请求模块作为依赖项，我该如何使用艰难的cookie？

如何为节点搜寻器设置多个Cookie

0 个答案: