JavaScript - 从日志文件中提取JSON

时间:2016-07-19 20:09:00

标签: javascript regex

我需要从日志文件中提取JSON字符串(稍后将用于解析JSON)。该文件采用以下格式:

[16:11:20] some text
[16:11:20] some text
some text
some text
[
  {
    "description": "some text",
    "elements": [{
        "id": "some text",
        "keyword": "some text",
        "line": 20,
        "name": "some text",
        "steps": [{
            "arguments": [],
            "keyword": "some text ",
            "result": {
                "status": "passed",
                "duration": 14884761888
            },
            "hidden": true,
            "match": {
                "location": "some text"
            }
        },
        {
            "arguments": [],
            "keyword": "sometext ",
            "name": "sometext",
            "result": {
                "status": "passed",
                "duration": 463674
            },
            "line": 11,
            "match": {
                "location": "sometext"
            }
        }
        ],
        "tags": [
          {
            "name": "@sometext-no",
            "line": 7
          },
          {
            "name": "@sometext",
            "line": 8
          }
        ],
        "type": "sometext"
      }

    ],
    "id": "sometext",
    "keyword": "sometext",
    "line": 1,
    "name": "sometext",
    "tags": [],
    "uri": "sometext"
  }
][16:11:54] some text
[16:11:54] some text

如何在JavaScript中构造一个正则表达式,它将从该文件中提取JSON数据,以便string.match()可以使用它来提供所需的输出

我正在尝试的模式如下

var regExtract = /(\[\s+\{[\s\S]*\}\s+\]\s+\}\s+\]\s+\}\s+\])/;
var matchedJson = data.toString().match(regExtract)[1];

2 个答案:

答案 0 :(得分:2)

我更新了我的答案,你可以尝试这样的事情:

var str = `[16:11:20] some text
[16:11:20] some text
some text
some text
[
  {
    "description": "some text",
    "elements": [{
        "id": "some text",
        "keyword": "some text",
        "line": 20,
        "name": "some text",
        "steps": [{
            "arguments": [],
            "keyword": "some text ",
            "result": {
                "status": "passed",
                "duration": 14884761888
            },
            "hidden": true,
            "match": {
                "location": "some text"
            }
        },
        {
            "arguments": [],
            "keyword": "sometext ",
            "name": "sometext",
            "result": {
                "status": "passed",
                "duration": 463674
            },
            "line": 11,
            "match": {
                "location": "sometext"
            }
        }
        ],
        "tags": [
          {
            "name": "@sometext-no",
            "line": 7
          },
          {
            "name": "@sometext",
            "line": 8
          }
        ],
        "type": "sometext"
      }

    ],
    "id": "sometext",
    "keyword": "sometext",
    "line": 1,
    "name": "sometext",
    "tags": [],
    "uri": "sometext"
  }
][16:11:54] some text
[16:11:54] some text
`;

var json = '';
var jsonArray = [];
var implode = false;
str.split('\n').forEach(function(v, k){   
  if(v === '  {'){
    console.log(k, v);
    implode = true;
  }

  if(implode){
    json = json + v + '\n';
  }

  if(v === '  }'){      
    console.log(k, v);
    implode = false;
    jsonArray.push(json.trim());
    json = '';

    console.log(jsonArray);    
  }
});
console.log(JSON.parse(jsonArray[0]));

小提琴:https://jsfiddle.net/x0cc25q4/7/

更新:带正则表达式的短版本:

var arr = str.split('\n').join('').match(/\[\s{2}(\{.*?\})\]/g);
console.log(JSON.parse(arr[0]));

小提琴:https://jsfiddle.net/x0cc25q4/8/

答案 1 :(得分:1)

不确定这是否适用于regexp,因为您需要计算开始和结束括号。尝试

var start=string.indexOf('[{');
var level=1;
start++;
while (level>0){
   if (string[start]==='[') level++;
   if (string[start]===']') level--;
   start++;
}
如果json中的字符串中有'['或']',这肯定会变得更加复杂。