我试图抓取一个html页面并将其变成一个json对象
这是页面页面
03-04-2017-output-1.txt
03-04-2017-output-2.txt
04-04-2017-output-1.txt
04-04-2017-output-2.txt
这是我的尝试:
<html><head><title>Index</title><meta charset="UTF-8"></head><body><div><p>[ <a href="index.html">Index</a> ] | [ <a href="config.html">Device Config</a> ]</p></div><div>Neighbors<pre>fe80::212:4b00:8b8:6ecb REACHABLE</pre></div><div>Default Route<pre>fe80::212:4b00:8b8:6ecb</pre></div><div>Routes<pre></pre></div><div>Sensors<pre>Battery Temp = 19 C
Battery Volt = 3320 mV
Air Pressure = 1031.12 hPa
Air Temp = 22.66 C
Object Temp = 12.375 C
Ambient Temp = 23.062 C
Light = 0.00 lux
HDC Humidity = 43.93 %RH
HDC Temp = 23.03 C
Acc X = 0.02 G
Acc Y = 0.02 G
Acc Z = -1.10 G
Gyro X = -2.93 deg per sec
Gyro Y = -2.74 deg per sec
Gyro Z = 5.18 deg per sec</pre></div><div>Page hits: 4<br>Uptime: 138 secs<br></div></body></html>
如何实现将传感器div放入包含传感器名称作为键的对象和传感器数据作为属性的对象
更新:
感谢RafalWiliński帮助我以某种方式设法使其工作,但最后一个关键是将div作为对象中的值
新代码:
var request = require('request');
var cheerio = require('cheerio');
request('http://[aaaa::212:4b00:c2a:b704]/index.html', function(error, response,html){
if(!error && response.statusCode == 200){
//JSON.parse(html)
//console.log('--------------------------------------');
var temp = {"id":html}
var obj = JSON.parse(temp)
console.log(JSON.stringify(obj));
}
});
但我的输出是
var request = require('request');
var cheerio = require('cheerio');
request('http://[aaaa::212:4b00:c2a:b704]/index.html', function(error, response,html){
if(!error && response.statusCode == 200){
var obj = {};
html.split('\n').forEach((line) => {
var key = line.split(' = ')[0];
var value = line.split(' = ')[1];
obj[key] = value;
});
console.log(JSON.stringify(obj,null,' '))
}});
答案 0 :(得分:2)
您需要将字符串除以=
个符号。前面的部分是关键,后面的部分是你的价值。
以下功能可能会解决此问题:
function jsonify(str) {
var obj = {};
str.split('\n').forEach((line) => {
var key = line.split(' = ')[0];
var value = line.split(' = ')[1];
obj[key] = value;
});
return obj;
}
答案 1 :(得分:1)
我建议您使用HTML解析器(我个人认为jQuery易于使用,但there are a LOT of options)来查找和获取特定元素的内容。然后,您可以在结果上运行解析逻辑。
var response = '<html><head><title>Index</title><meta charset="UTF-8"></head><body><div><p>[ <a href="index.html">Index</a> ] | [ <a href="config.html">Device Config</a> ]</p></div><div>Neighbors<pre>fe80::212:4b00:8b8:6ecb REACHABLE</pre></div><div>Default Route<pre>fe80::212:4b00:8b8:6ecb</pre></div><div>Routes<pre></pre></div><div>Sensors<pre>Battery Temp = 19 C\nBattery Volt = 3320 mV\nAir Pressure = 1031.12 hPa\nAir Temp = 22.66 C\nObject Temp = 12.375 C\nAmbient Temp = 23.062 C\nLight = 0.00 lux\nHDC Humidity = 43.93 %RH\nHDC Temp = 23.03 C\nAcc X = 0.02 G\nAcc Y = 0.02 G\nAcc Z = -1.10 G\nGyro X = -2.93 deg per sec\nGyro Y = -2.74 deg per sec\nGyro Z = 5.18 deg per sec</pre></div><div>Page hits: 4<br>Uptime: 138 secs<br></div></body></html>';
// Turn the result into an HTML DOM.
var responseDOM = $(response);
// Find the specific element you want (in this case, the third pre) and get its content.
var preContent = $('pre', responseDOM).eq(3).text();
// Now, split the content into lines, split again by " = ", and then merge the result back into a single object.
var obj = preContent
// Split content into lines (by "\n")
.split('\n')
// split each line into key and value (by " = ")
.map(line => line.split(' = '))
// reduce each key value pair into a single object with properties
.reduce( (acc,kvp) => { acc[kvp[0]] = kvp[1]; return acc; }, {})
// Finally, turn the object into a JSON string.
var json = JSON.stringify(obj);
console.log(json);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>