我想从源(链接或文件,...)获取HTML 从中找到值。 html格式是:
<!doctype html>
<html>
<body>
<main>
<section id="serp">
<div>
<article>a</article>
<article>b</article>
<article>c</article>
<article>d</article>
</div>
</section>
</main>
</body>
</html>
首先我使用cheerio。 根据我写的文件:
const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content); // null
根据相同的程序,我使用x-ray和jsdom,但所有这些 print null。
答案 0 :(得分:0)
我做了以下事情:
"segment": {
"_parent": {
"type": "tm"
},
"_routing": {
"required": "true"
},
"properties": {
"@timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"@version": {
"type": "string"
},
"source": {
"type": "string",
"store": "true",
"fields": {
"length": {
"type": "token_count",
"analyzer": "standard"
}
}
},
"target": {
"type": "string",
"store": "true",
"fields": {
"length": {
"type": "token_count",
"analyzer": "standard"
}
}
}
}
}
将以下内容输出到控制台:
let myhtml = `<!doctype html>
<html>
<body>
<main>
<section id="serp">
<div>
<article>a</article>
<article>b</article>
<article>c</article>
<article>d</article>
</div>
</section>
</main>
</body>
</html>`;
const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content);
console.log(`html: ${content.html()}`);