节点js cheerio显示错误的html源代码

时间:2018-03-03 18:54:46

标签: javascript node.js

我正在尝试抓取此代码中给出的网址,以使用类public class Main extends Application { @Override public void start(Stage primaryStage) throws Exception { // Not sure what this does, but you probably can't use it without some // modification. // scene = JavaFXUtils.createScene(1000, 600, "Main.fxml", this); // assuming path is correct: FXMLLoader loader = new FXMLLoader(getClass().getResource("Main.fxml")); Scene scene = new Scene(loader.load(), 1000, 600); Controller controller = loader.getController(); scene.addEventHandler(KeyEvent.KEY_PRESSED, controller::keyPressed); primaryStage.setScene(scene); primaryStage.getIcons().add(new Image(Main.class.getResourceAsStream("/assets/resources/icon/icon_256.png"))); primaryStage.setTitle("HWP"); primaryStage.show(); } } 获取标记ng-binding

的坐标
p

我得到以下html。,这与我手动查看源代码时得到的不一样。有人可以指导我吗?

Const request = require('request') 
Const cheerio = require('cheerio') 
Const logger = require('winston') 

request(`https://pokedex100.com/discord/coord=2zHeG5fz71icW`, function (error, response, html) {
                if (!error && response.statusCode == 200) {
                    logger.info(html);
                    var $ = cheerio.load(html)
                    var cord = $('p.ng-binding').text()
                    logger.error(cord)
                }
            });

修改

22:32:43 - info: <!DOCTYPE html><html>

<head>
  <meta charset="utf-8" />
  <meta content="width=device-width, initial-scale=1.0, maximum-scale=1, user-scalable=no" name="viewport">

  <meta property="og:type" content="website" />  <meta property="og:site_name" content="Discord" />  <meta property="og:title" content="Discord - Free voice and text chat for gamers" />  <meta property="og:description" content="Step up your game with a modern voice & text chat app. Crystal clear voice, multiple server and channel support, mobile apps, and more. Get your free server now!"  /><meta property="og:image" content="https://discordapp.com/assets/ee7c382d9257652a88c8f7b7f22a994d.png" />  <meta name="twitter:card" content="summary_large_image">  <meta name="twitter:site" content="@discordapp">  <meta name="twitter:creator" content="@discordapp">
  <link rel="chrome-webstore-item" href="https://chrome.google.com/webstore/detail/lcbhdgefieegnkbopmgklhlpjjdgmbog">
<link rel="stylesheet" href="/assets/24cfd050a820092e88717a7b474a1087.css" integrity="sha256-WYgktx8T6Pkz7IBvCEpViHtcNBJDI9tykxV3SpLkz+s= sha512-fXegHnQBxgOX43MAMz9XZZpa4gxVLtgwj2vfRn/OOsMUGT+VJYqti68o0ok7QunHggCNlv1oNsM1DDl9o+jZJw=="><link rel="icon" href="/assets/07dca80a102d4149e9736d4b162cff6f.ico" />  <title>Discord</title>
</head>

<body>
  <div id="app-mount"></div>
  <script>window.__require = window.require</script>
  <script>window.__OVERLAY__ = /overlay/.test(location.pathname)</script><script>!function(){if(null!=window.WebSocket){var n=function(n){try{var e=localStorage.getItem(n);return null==e?null:JSON.parse(e)}catch(n){return null}},e=n("token"),o=n("gatewayURL");if(e&&o){var r=null!=window.__require?"etf":"json",t=o+"/?encoding="+r+"&v=6";void 0!==window.Uint8Array&&(t+="&compress=zlib-stream"),console.log("[FAST CONNECT] "+t+", encoding: "+r+", version: 6");var a=new WebSocket(t);a.binaryType="arraybuffer";var s=Date.now(),i={open:!1,gateway:t,messages:[]};a.onopen=function(){console.log("[FAST CONNECT] connected in "+(Date.now()-s)+"ms"),i.open=!0},a.onclose=a.onerror=function(){window._ws=null},a.onmessage=function(n){i.messages.push(n)},window._ws={ws:a,state:i}}}}();</script><script src="/assets/ef85b442dc6e960fcdb2.js" integrity="sha256-YRcm2EGe1y248RLha42j5bJzgo7unYVPfEaRjIfI6OU= sha512-5rFJb426vcPzMhaOWbyhgHe5rP59EMHbQlYChDbJ5Ivjxkg3HBcF4qhU6d6OMxNaQ8ivLJGiReIVUNyQYKOE9A=="></script><script src="/assets/f36f220d9b6f06eed734.js" integrity="sha256-gERbeDHAE3txb+KP87BEkbIawMeMaMnLTZ5gGRXpYv8= sha512-2mJlwmLdySqA+3psIzKCkNG+ThSLeOAls13fO6sFLQveqomiys1HAei4MHx4eCYizZ1uzXqJWq6gmrmMpzHH+Q=="></script></body>

</html>
22:32:43 - error:

我使用的原因是page = browser.newPage()是因为,我观察到,页面加载const browser = await puppeteer.launch({ headless: false, timeout: 1000000 }); let page = await browser.newPage(); page.setDefaultNavigationTimeout(1000000) await page.goto('https://pokedex100.com/?d=dcgrtr4WmaRvW', { waitUntil: "domcontentloaded" }); await page.waitForSelector('input[id="register-email"]', { timeout: 1000000 }); await page.type('input[id="register-email"]', "my_email_id") await page.type('input[id="register-password"]', "my_login_password") await page.click('button[class="btn btn-primary"]') await page.waitForSelector('button[class="primary"]', { timeout: 1000000 }) await page.click('button[class="primary"]') page = await browser.newPage(); // See justification below page.waitFor(5000) page.setDefaultNavigationTimeout(1000000) await page.goto('https://pokedex100.com/?d=dcgrtr4WmaRvW', { waitUntil: "domcontentloaded" }) await page.waitForSelector('p[class="ng-binding"]', { timeout: 1000000 }).then(async cords => { //let cord = await page.$('p[class="ng-binding"]') console.log("target") console.log(page.target) page.$eval('p[class="ng-binding"]', element => { console.log("element.innerHTML") console.log(element.innerHTML) console.log(element.textContent) console.log(element.nodeName + ' ' + element.nodeValue) }) }) 需要时间,但是当我打开新窗口时加载速度更快。

1 个答案:

答案 0 :(得分:0)

页面内容似乎是由页面上的脚本加载的。 request只是获取html,因此它不会执行任何脚本。您可以使用无头浏览器(例如puppeteer)加载页面并允许在抓取之前填充内容。