从HTML responseText解析JSON

时间:2016-08-19 16:06:18

标签: javascript python html json parsing

Python的webob模块默认返回text / html响应,特别是ServerErorr,这些最终将错误JSON Paylod嵌入HTML responseText的主体中包含以下内容:

<html>
<head>
  <title>503 Service Unavailable</title>
</head>
<body>
<h1>503 Service Unavailable</h1>
{
    "status": "object-specific error",
    "payload": {
            "Message": "Unable to list resources",
            "HTTP Method": "GET",
            "URI": "api/myManager/1.0/Node",
            "Operation": "LIST",
            "Object": {
                    "Name": "myManager.Node",
                    "Interface": "Node"
            },
            "Version": {
                    "Major": 1,
                    "Minor": 0
            }
       }
}<br /><br />
</body>
</html>

在客户端使用Javascript提取嵌入HTML中的JSON的最佳方法是什么? 提取嵌入HTML中的这个JSON对象的最佳方法是什么?

2 个答案:

答案 0 :(得分:0)

所以我一般都认为,更好的解决方案是确保服务器只返回JSON,但是通过@Barmer建议的客户端Javascript快速实现这一点,将html解析为DOM,获取文本在body体内的childNode并在其上运行JSONParse。

var responseStr = '<html>' +
                  '<head>' +
                  '  <title>503 Service Unavailable</title>' +
                  '</head>' +
                  '<body>' +
                  '<h1>503 Service Unavailable</h1>' +
                  '{' +
                  '  "status": "object-specific error",' +
                  '  "payload": {' +
                  '    "Message": "Unable to list resources",' +
                  '    "HTTP Method": "GET",' +
                  '    "URI": "api/myManager/1.0/Node",' +
                  '    "Operation": "LIST",' +
                  '    "Object": {' +
                  '      "Name": "myManager.Node",' +
                  '      "Interface": "Node"' +
                  '    },' +
                  '    "Version": {' +
                  '      "Major": 1,' +
                  '      "Minor": 0' +
                  '    }' +
                  '  }' +
                  '}<br /><br />' +
                  '</body>' +
                  '</html>';
var parser = new DOMParser();
var doc = parser.parseFromString(responseStr, "text/html");
var items = doc.body.getElementsByTagName("*");
var json_obj;

for (var i = 0, len = doc.body.childNodes.length; i < len; i++) {
    if (doc.body.childNodes[i].nodeName == "#text") {
        json_obj = JSON.parse(doc.body.childNodes[i].data);
        break;
    }
}

// You can access json directly now e.g.
console.log(json_obj.status);
console.log(json_obj.payload['HTTP Method']);

答案 1 :(得分:-1)

使用RegEx进行解析(不是真的可靠但有效) 进口重新     导入json

yyyy-MM-dd HH:mm:ss

你会得到:

content = """\
<html>
<head>
  <title>503 Service Unavailable</title>
</head>
<body>
<h1>503 Service Unavailable</h1>
{
    "status": "object-specific error",
    "payload": {
            "Message": "Unable to list resources",
            "HTTP Method": "GET",
            "URI": "api/myManager/1.0/Node",
            "Operation": "LIST",
            "Object": {
                    "Name": "myManager.Node",
                    "Interface": "Node"
            },
            "Version": {
                    "Major": 1,
                    "Minor": 0
            }
       }
}<br /><br />
</body>
</html>"""

mo = re.search(r"</h1>(.*?)<br", content, flags=re.DOTALL)
if mo:
    data = mo.group(1)
    obj = json.loads(data)
    print(obj)

或者,使用lxml

{'payload': {'Operation': 'LIST', 'HTTP Method': 'GET',
'URI': 'api/myManager/1.0/Node',
'Message': 'Unable to list resources',
'Version': {'Major': 1, 'Minor': 0},
'Object': {'Interface': 'Node', 'Name': 'myManager.Node'}},
'status': 'object-specific error'}

相同的结果