我正在处理一个JSON结构,它以这样的结构输出给我:
{
"time":"2015-10-20T20:15:00.847Z",
"name":"meta.response.ean",
"level":"info",
"data1":{
"HotelListResponse":{
"customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested":1,
"moreResultsAvailable":true,
"cacheKey":"-705f6d43:15086db3fd1:-4c58",
"cacheLocation":"10.178.144.36:7300",
"HotelList":{
"@size":2,
"@activePropertyCount":2,
"HotelSummary":[
{
"hotelId":132684,
"city":"Seattle",
"highRate":159.0,
"lowRate":159.0,
"rateCurrencyCode":"USD",
"RoomRateDetailsList":{
"RoomRateDetails":{
"roomTypeCode":10351,
"rateCode":10351,
"roomDescription":"Standard Room, 1 Queen Bed",
"RateInfos":{
"RateInfo":{
"@promo":false,
"ChargeableRateInfo":{
"@averageBaseRate":159.0,
"@averageRate":159.0,
"@currencyCode":"USD",
"@nightlyRateTotal":159.0,
"@surchargeTotal":26.81,
"@total":185.81
}
}
}
}
}
},
{
"hotelId":263664,
"city":"Las Vegas",
"highRate":135.0,
"lowRate":94.5,
"rateCurrencyCode":"USD",
"RoomRateDetailsList":{
"RoomRateDetails":{
"roomTypeCode":373685,
"rateCode":1238953,
"roomDescription":"Standard Room, 1 King Bed",
"RateInfos":{
"RateInfo":{
"@promo":true,
"ChargeableRateInfo":{
"@averageBaseRate":135.0,
"@averageRate":94.5,
"@currencyCode":"USD",
"@nightlyRateTotal":94.5,
"@surchargeTotal":9.45,
"@total":103.95
}
}
}
}
}
}
]
}
}
},
"context":{
"X-Request-Id":"dca47992-b6cc-4b87-956c-90523c0bf3bb",
"host":"getaways-search-app2",
"thread":"http-nio-80-exec-12"
}
}
如您所见,这些是嵌套数组。关于以递归方式展平这些问题的讨论很多。我无法在“HotelSummary”下展平数组。有什么想法吗?
答案 0 :(得分:0)
{
"HotelListResponse":{
"customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested":1,
"moreResultsAvailable":"true",
"cacheKey":"-705f6d43:15086db3fd1:-4c58",
"cacheLocation":"10.178.144.36:7300",
"HotelList":{
"size":2,
"activePropertyCount":2,
"HotelSummary":[
{
"hotelId":132684,
"city":"Seattle",
"highRate":159.0,
"lowRate":159.0,
"rateCurrencyCode":"USD",
"RoomRateDetailsList":{
"RoomRateDetails":{
"roomTypeCode":10351,
"rateCode":10351,
"roomDescription":"Standard Room, 1 Queen Bed",
"RateInfos":{
"RateInfo":{
"promo":"false",
"ChargeableRateInfo":{
"averageBaseRate":159.0,
"averageRate":159.0,
"currencyCode":"USD",
"nightlyRateTotal":159.0,
"surchargeTotal":26.81,
"total":185.81
}
}
}
}
}
},
{
"hotelId":263664,
"city":"Las Vegas",
"highRate":135.0,
"lowRate":94.5,
"rateCurrencyCode":"USD",
"RoomRateDetailsList":{
"RoomRateDetails":{
"roomTypeCode":373685,
"rateCode":1238953,
"roomDescription":"Standard Room, 1 King Bed",
"RateInfos":{
"RateInfo":{
"promo":"true",
"ChargeableRateInfo":{
"averageBaseRate":135.0,
"averageRate":94.5,
"currencyCode":"USD",
"nightlyRateTotal":94.5,
"surchargeTotal":9.45,
"total":103.95
}
}
}
}
}
}
]
}
}
}
我想将数据压缩成以下格式
{
"customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested":1,
"moreResultsAvailable":"true",
"cacheKey":"-705f6d43:15086db3fd1:-4c58",
"cacheLocation":"10.178.144.36:7300",
"size":2,
"activePropertyCount":2,
"hotelId":132684,
"city":"Seattle",
"highRate":159.0,
"lowRate":159.0,
"rateCurrencyCode":"USD",
"roomTypeCode":10351,
"rateCode":10351,
"roomDescription":"Standard Room, 1 Queen Bed",
"promo":"false",
"averageBaseRate":159.0,
"averageRate":159.0,
"currencyCode":"USD",
"nightlyRateTotal":159.0,
"surchargeTotal":26.81,
"total":185.81
}
{
"customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested":1,
"moreResultsAvailable":"true",
"cacheKey":"-705f6d43:15086db3fd1:-4c58",
"cacheLocation":"10.178.144.36:7300",
"size":2,
"activePropertyCount":2,
"hotelId":263664,
"city":"Las Vegas",
"highRate":135.0,
"lowRate":94.5,
"rateCurrencyCode":"USD",
"roomTypeCode":373685,
"rateCode":1238953,
"roomDescription":"Standard Room, 1 King Bed",
"promo":"true",
"averageBaseRate":135.0,
"averageRate":94.5,
"currencyCode":"USD",
"nightlyRateTotal":94.5,
"surchargeTotal":9.45,
"total":103.95
}
我尝试过使用flattenDict类。不知何故,我没有得到所需格式的输出。以下是我的代码
def flattenDict(d, result=None):
if result is None:
result = {}
for key in d:
value = d[key]
if isinstance(value, dict):
value1 = {}
for keyIn in value:
value1[".".join([key,keyIn])]=value[keyIn]
flattenDict(value1, result)
elif isinstance(value, (list, tuple)):
for indexB, element in enumerate(value):
if isinstance(element, dict):
value1 = {}
index = 0
for keyIn in element:
newkey = ".".join([key,keyIn])
value1[".".join([key,keyIn])]=value[indexB][keyIn]
index += 1
for keyA in value1:
flattenDict(value1, result)
else:
result[key]=value
return result
答案 1 :(得分:0)
pandas
和json_normalize
:record_path
是主要key
要展平的参数meta
是用于附加keys
进行展平的参数json_normalize
创建的列名将所有keys
包含到所需的key
中,因此创建了长列名(例如RoomRateDetailsList.RoomRateDetails.roomTypeCode
)
dict
理解来创建rename
dict
。pathlib
库
.open
是pathlib
import pandas as pd
from pandas.io.json import json_normalize
import json
from pathlib import Path
# path to file
p = Path(r'c:\some_path_to_file\test.json')
# read json file
with p.open('r', encoding='utf-8') as f:
data = json.loads(f.read())
# create dataframe
df = json_normalize(data,
record_path=['data1', 'HotelListResponse', 'HotelList', 'HotelSummary'],
meta=[['data1', 'HotelListResponse', 'customerSessionId'],
['data1', 'HotelListResponse', 'numberOfRoomsRequested'],
['data1', 'HotelListResponse', 'moreResultsAvailable'],
['data1', 'HotelListResponse', 'cacheKey'],
['data1', 'HotelListResponse', 'cacheLocation'],
['data1', 'HotelListResponse', 'HotelList', '@size'],
['data1', 'HotelListResponse', 'HotelList', '@activePropertyCount']])
# rename columns:
rename = {value: value.split('.')[-1].replace('@', '') for value in df.columns}
df.rename(columns=rename, inplace=True)
# dataframe view
hotelId city highRate lowRate rateCurrencyCode roomTypeCode rateCode roomDescription promo averageBaseRate averageRate currencyCode nightlyRateTotal surchargeTotal total customerSessionId numberOfRoomsRequested moreResultsAvailable cacheKey cacheLocation size activePropertyCount
132684 Seattle 159.0 159.0 USD 10351 10351 Standard Room, 1 Queen Bed False 159.0 159.0 USD 159.0 26.81 185.81 0AB29024-F6D4-3915-0862-DB3FD1904C5A 1 True -705f6d43:15086db3fd1:-4c58 10.178.144.36:7300 2 2
263664 Las Vegas 135.0 94.5 USD 373685 1238953 Standard Room, 1 King Bed True 135.0 94.5 USD 94.5 9.45 103.95 0AB29024-F6D4-3915-0862-DB3FD1904C5A 1 True -705f6d43:15086db3fd1:-4c58 10.178.144.36:7300 2 2
# save to JSON
df.to_json('out.json', orient='records')
[{
"hotelId": 132684,
"city": "Seattle",
"highRate": 159.0,
"lowRate": 159.0,
"rateCurrencyCode": "USD",
"roomTypeCode": 10351,
"rateCode": 10351,
"roomDescription": "Standard Room, 1 Queen Bed",
"promo": false,
"averageBaseRate": 159.0,
"averageRate": 159.0,
"currencyCode": "USD",
"nightlyRateTotal": 159.0,
"surchargeTotal": 26.81,
"total": 185.81,
"customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested": 1,
"moreResultsAvailable": true,
"cacheKey": "-705f6d43:15086db3fd1:-4c58",
"cacheLocation": "10.178.144.36:7300",
"size": 2,
"activePropertyCount": 2
}, {
"hotelId": 263664,
"city": "Las Vegas",
"highRate": 135.0,
"lowRate": 94.5,
"rateCurrencyCode": "USD",
"roomTypeCode": 373685,
"rateCode": 1238953,
"roomDescription": "Standard Room, 1 King Bed",
"promo": true,
"averageBaseRate": 135.0,
"averageRate": 94.5,
"currencyCode": "USD",
"nightlyRateTotal": 94.5,
"surchargeTotal": 9.45,
"total": 103.95,
"customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested": 1,
"moreResultsAvailable": true,
"cacheKey": "-705f6d43:15086db3fd1:-4c58",
"cacheLocation": "10.178.144.36:7300",
"size": 2,
"activePropertyCount": 2
}
]