Question

我想从源（链接或文件，...）获取HTML 从中找到值。 html格式是：

<!doctype html>
<html>
<body>
  <main>
    <section id="serp">
      <div>
        <article>a</article>
        <article>b</article>
        <article>c</article>
        <article>d</article>
      </div>
    </section>
  </main>
</body>
</html>

首先我使用cheerio。根据我写的文件：

const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content); // null

根据相同的程序，我使用x-ray和jsdom，但所有这些 print null。

Answer 1

我做了以下事情：

"segment": {
            "_parent": {
                "type": "tm"
            },
            "_routing": {
              "required": "true"
            },
            "properties": {
                "@timestamp": {
                    "type": "date",
                    "format": "strict_date_optional_time||epoch_millis"
                },
                "@version": {
                    "type": "string"
                },
                "source": {
                    "type": "string",
                    "store": "true",
                    "fields": {
                        "length": { 
                            "type":     "token_count",
                            "analyzer": "standard"
                        }
                    }
                },
                "target": {
                    "type": "string",
                    "store": "true",
                    "fields": {
                        "length": { 
                            "type":     "token_count",
                            "analyzer": "standard"
                        }
                    }
                }
            }
        }

将以下内容输出到控制台：

let myhtml = `<!doctype html>
<html>
<body>
  <main>
    <section id="serp">
      <div>
        <article>a</article>
        <article>b</article>
        <article>c</article>
        <article>d</article>
      </div>
    </section>
  </main>
</body>
</html>`;

const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content);
console.log(`html: ${content.html()}`);

使用Node JS进行html操作

1 个答案: