我需要解析几乎是JSON格式的数据流,并带有一些日期格式的借口,如下所示。
2020-09-28 15:52:13.633+0000 INFO |RestAPI.ServiceManager | Request #399: {
"context": {
"httpContextKey": 18446744071313531680,
"verbId": 2,
"verb": "GET",
"originalVerb": "GET",
"uri": "/services/v2/installation/deployments",
"protocol": "https",
"headers": {
"X-OGG-Context": "services",
"X-OGG-Service": "ServiceManager",
"X-OGG-Version": "v2",
"X-OGG-Resource": "installation/deployments",
"Content-Length": "0",
"Accept": "application/json"
},
"host": "testing-db.com",
"securityEnabled": true,
"authorization": null,
"requestId": 399,
"uriTemplate": "/services/{version}/installation/deployments",
"catalogUriTemplate": "/services/{version}/metadata-catalog/deployments"
},
"isScaRequest": true,
"content": null,
"parameters": {
"uri": {
"version": "v2"
}
}
}
Response: {
"context": {
"httpContextKey": 18446744071313531680,
"requestId": 399,
"code": "200 OK",
"headers": {
"Content-Type": "application/json",
"Expires": "0",
"Pragma": "no-cache",
"Strict-Transport-Security": "max-age=31536000;includeSubDomains"
},
"Content-Type": "application/json",
"contentType": "application/json"
},
"isScaResponse": true,
"originScaRequest": {
"context": {
"httpContextKey": 18446744071313531680,
"verbId": 2,
"verb": "GET",
"originalVerb": "GET",
"uri": "/services/v2/installation/deployments",
"protocol": "https",
"headers": {
"X-OGG-Context": "services",
"X-OGG-Service": "ServiceManager",
"X-OGG-Version": "v2",
"X-OGG-Resource": "installation/deployments",
"Content-Length": "0",
"Accept": "application/json"
},
"securityEnabled": true,
"authorization": null,
"requestId": 399
},
"isScaRequest": true
},
"content": {
"$schema": "api:standardResponse",
"links": [
{
"rel": "describedby",
"href": "https://testing-db.com/services/v2/metadata-catalog/deployments",
"mediaType": "application/schema+json"
}
],
"messages": [],
"response": {
"$schema": "ogg:installationDeployments",
"xagEnabled": false,
"deployments": [
{
"deploymentId": "39398e93-7e53-484c-9e90-3bf2f820ee73",
"deploymentName": "FOR-TMDB",
"enabled": true,
"status": "running"
},
{
"deploymentId": "30233230-94a6-4ae7-9b5e-db66105d9046",
"deploymentName": "ServiceManager",
"enabled": true,
"status": "running"
}
]
}
}
}
运行tail -0f apiserver.log | python parse
时,它应该解析每个JSON条目并获取以显示诸如DATE
和Request
详细信息之类的任何实体。请注意,日志还具有JSON格式的Response
,需要排除。
我尝试使用下面的代码利用生成器,但是问题是如何对多行进行分组,然后解析以获取JSON条目?因为我需要退出for循环才能获取它,但是它将无法流式传输。
import sys
def read_stdin():
readline = sys.stdin.readline()
while readline:
yield readline
readline = sys.stdin.readline()
for line in read_stdin():
print(line)
答案 0 :(得分:0)
您有几种选择(第二种选择更简单,也许最好):
regex
存储库中的PyPi
包,并尝试如下识别请求JSON
字符串:import regex
import sys
import json
json_rex = regex.compile(r"""
(?<json> #capturing group json
{ #open {
(?: #non-capturing group
[^{}]++ #anything but {} without backtracking
| #or
(?&json) #recursive substitute of group expr
)*
} #close }
)
""", flags=regex.VERBOSE)
while True:
line = sys.stdin.readline()
if not line:
break
if regex.search(r'Request #\d+: {', line):
json_str = '{\n'
while True:
json_str += sys.stdin.readline()
if json_rex.match(json_str):
d = json.loads(json_str) # we have our dictionary
print(d)
break
JSON
,直到没有错误为止:import re
import sys
import json
while True:
line = sys.stdin.readline()
if not line:
break
if re.search(r'Request #\d+: {', line):
json_str = '{\n'
while True:
json_str += sys.stdin.readline()
try:
d = json.loads(json_str) # we have our dictionary, perhaps
except Exception:
pass
else:
print(d)
break