如何检查网络流量并获取资源请求的URL?

时间:2017-08-29 14:23:20

标签: node.js google-chrome network-monitoring puppeteer

我希望监控网页的网络并获取JavaScript网络事件的所有网址,类似于PhantomJS' page.onResourceRequested正在做,但我无法弄清楚如何使用Google Chrome的Puppeteer来做到这一点。

我一直在讨论Google Chrome's puppeteer,但我无法弄清楚如何让它发挥作用,因为它的输出看起来像这样:

Page {
    domain: null,
    _events: {
        request: [Function]
    },
    _eventsCount: 1,
    _maxListeners: undefined,
    _client: Session {
        domain: null,
        _events: {
            'Page.frameAttached': [Function],
            'Page.frameNavigated': [Function],
            'Page.frameDetached': [Function],
            'Runtime.executionContextCreated': [Function],
            'Network.requestWillBeSent': [Function: bound _onRequestWillBeSent],
            'Network.requestIntercepted': [Function: bound _onRequestIntercepted],
            'Network.responseReceived': [Function: bound _onResponseReceived],
            'Network.loadingFinished': [Function: bound _onLoadingFinished],
            'Network.loadingFailed': [Function: bound _onLoadingFailed],
            'Page.loadEventFired': [Function],
            'Runtime.consoleAPICalled': [Function],
            'Page.javascriptDialogOpening': [Function],
            'Runtime.exceptionThrown': [Function],
            'Security.certificateError': [Function],
            'Inspector.targetCrashed': [Function]
        },
        _eventsCount: 15,
        _maxListeners: undefined,
        _lastId: 14,
        _callbacks: Map {},
        _connection: Connection {
            domain: null,
            _events: {},
            _eventsCount: 0,
            _maxListeners: undefined,
            _url: 'ws://127.0.0.1:65343/devtools/browser/ca214df4-4357-4b8f-8552-a1524d6652ff',
            _lastId: 17,
            _callbacks: Map {},
            _delay: 0,
            _ws: [Object],
            _sessions: [Object]
        },
        _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
        _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
    },
    _keyboard: Keyboard {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _modifiers: 0,
        _pressedKeys: Set {}
    },
    _mouse: Mouse {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _keyboard: Keyboard {
            _client: [Object],
            _modifiers: 0,
            _pressedKeys: Set {}
        },
        _x: 0,
        _y: 0,
        _button: 'none'
    },
    _frameManager: FrameManager {
        domain: null,
        _events: {
            frameattached: [Function],
            framedetached: [Function],
            framenavigated: [Function]
        },
        _eventsCount: 3,
        _maxListeners: undefined,
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _mouse: Mouse {
            _client: [Object],
            _keyboard: [Object],
            _x: 0,
            _y: 0,
            _button: 'none'
        },
        _frames: Map {
            '232.1' => [Object]
        },
        _mainFrame: Frame {
            _client: [Object],
            _mouse: [Object],
            _parentFrame: null,
            _url: 'http://mytestdomain.com/',
            _id: '232.1',
            _defaultContextId: 4,
            _waitTasks: Set {},
            _childFrames: Set {},
            _name: undefined,
            _loadingFailed: false
        }
    },
    _networkManager: NetworkManager {
        domain: null,
        _events: {
            request: [Function],
            response: [Function],
            requestfailed: [Function],
            requestfinished: [Function]
        },
        _eventsCount: 4,
        _maxListeners: undefined,
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _requestIdToRequest: Map {},
        _interceptionIdToRequest: Map {
            null => [Object], 'id-1' => [Object], 'id-2' => [Object], 'id-3' => [Object]
        },
        _extraHTTPHeaders: Map {},
        _requestInterceptionEnabled: true,
        _requestHashToRequestIds: Multimap {
            _map: [Object]
        },
        _requestHashToInterceptions: Multimap {
            _map: Map {}
        }
    },
    _emulationManager: EmulationManager {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _emulatingMobile: false,
        _injectedTouchScriptId: null
    },
    _tracing: Tracing {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _recording: false,
        _path: ''
    },
    _pageBindings: Map {},
    _ignoreHTTPSErrors: false,
    _screenshotTaskQueue: TaskQueue {
        _chain: Promise {
            undefined
        }
    },
    _viewport: {
        width: 800,
        height: 600
    }
}

请告诉我如何通过Puppeteer获取JavaScript网络事件的所有网址?

3 个答案:

答案 0 :(得分:5)

查看截取图片请求的sample。易于修改以查看其他类型的资源请求:

await page.setRequestInterceptionEnabled(true);
page.on('request', request => {
  if (/\.js$/i.test(request.url)) {
    // request for js resource
  }
  request.continue();
});
await page.goto('https://example.com');

答案 1 :(得分:0)

在此帖子中看到的内容和SetRequestInterceptionEnabled已重命名为

page.setRequestInterception(value)

这是我在文档中找到的一段代码:

const puppeteer = require('puppeteer');

puppeteer.launch().then(async browser => {
  const page = await browser.newPage();
  await page.setRequestInterception(true);
  page.on('request', interceptedRequest => {
    if (interceptedRequest.url.endsWith('.png') || interceptedRequest.url.endsWith('.jpg'))
      interceptedRequest.abort();
    else
      interceptedRequest.continue();
  });
  await page.goto('https://example.com');
  await browser.close();
});

  

注意启用请求拦截将禁用页面缓存。

以下是伪造者文档的URL:Puppeteer Documentation

答案 2 :(得分:0)

我认为使用Page.on()侦听器可以更准确地解决该问题,并且不会干扰访问量。

类似的东西:

page.on('request', (req) => console.log(req)); // 'requestFinished' and 'requestFailed' are other options
page.on('response', (res) => console.log(res));