在python中展平Json数组数据

时间:2015-11-04 23:13:47

标签: python json list recursion

我正在处理一个JSON结构,它以这样的结构输出给我:

{  
  "time":"2015-10-20T20:15:00.847Z",
  "name":"meta.response.ean",
  "level":"info",
  "data1":{  
     "HotelListResponse":{  
        "customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
        "numberOfRoomsRequested":1,
        "moreResultsAvailable":true,
        "cacheKey":"-705f6d43:15086db3fd1:-4c58",
        "cacheLocation":"10.178.144.36:7300",
        "HotelList":{  
           "@size":2,
           "@activePropertyCount":2,
           "HotelSummary":[  
              {  
                 "hotelId":132684,
                 "city":"Seattle",
                 "highRate":159.0,
                 "lowRate":159.0,
                 "rateCurrencyCode":"USD",
                 "RoomRateDetailsList":{  
                    "RoomRateDetails":{  
                       "roomTypeCode":10351,
                       "rateCode":10351,
                       "roomDescription":"Standard Room, 1 Queen Bed",
                       "RateInfos":{  
                          "RateInfo":{  
                             "@promo":false,
                             "ChargeableRateInfo":{  
                                "@averageBaseRate":159.0,
                                "@averageRate":159.0,
                                "@currencyCode":"USD",
                                "@nightlyRateTotal":159.0,
                                "@surchargeTotal":26.81,
                                "@total":185.81
                             }
                          }
                       }
                    }
                 }
              },
              {  
                 "hotelId":263664,
                 "city":"Las Vegas",
                 "highRate":135.0,
                 "lowRate":94.5,
                 "rateCurrencyCode":"USD",
                 "RoomRateDetailsList":{  
                    "RoomRateDetails":{  
                       "roomTypeCode":373685,
                       "rateCode":1238953,
                       "roomDescription":"Standard Room, 1 King Bed",
                       "RateInfos":{  
                          "RateInfo":{  
                             "@promo":true,
                             "ChargeableRateInfo":{  
                                "@averageBaseRate":135.0,
                                "@averageRate":94.5,
                                "@currencyCode":"USD",
                                "@nightlyRateTotal":94.5,
                                "@surchargeTotal":9.45,
                                "@total":103.95
                             }
                          }
                       }
                    }
                 }
              }
           ]
        }
     }
  },
  "context":{  
     "X-Request-Id":"dca47992-b6cc-4b87-956c-90523c0bf3bb",
     "host":"getaways-search-app2",
     "thread":"http-nio-80-exec-12"
  }
}

如您所见,这些是嵌套数组。关于以递归方式展平这些问题的讨论很多。我无法在“HotelSummary”下展平数组。有什么想法吗?

2 个答案:

答案 0 :(得分:0)

{  
   "HotelListResponse":{  
      "customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
      "numberOfRoomsRequested":1,
      "moreResultsAvailable":"true",
      "cacheKey":"-705f6d43:15086db3fd1:-4c58",
      "cacheLocation":"10.178.144.36:7300",
      "HotelList":{  
         "size":2,
         "activePropertyCount":2,
         "HotelSummary":[  
            {  
               "hotelId":132684,
               "city":"Seattle",
               "highRate":159.0,
               "lowRate":159.0,
               "rateCurrencyCode":"USD",
               "RoomRateDetailsList":{  
                  "RoomRateDetails":{  
                     "roomTypeCode":10351,
                     "rateCode":10351,
                     "roomDescription":"Standard Room, 1 Queen Bed",
                     "RateInfos":{  
                        "RateInfo":{  
                           "promo":"false",
                           "ChargeableRateInfo":{  
                              "averageBaseRate":159.0,
                              "averageRate":159.0,
                              "currencyCode":"USD",
                              "nightlyRateTotal":159.0,
                              "surchargeTotal":26.81,
                              "total":185.81
                           }
                        }
                     }
                  }
               }
            },
            {  
               "hotelId":263664,
               "city":"Las Vegas",
               "highRate":135.0,
               "lowRate":94.5,
               "rateCurrencyCode":"USD",
               "RoomRateDetailsList":{  
                  "RoomRateDetails":{  
                     "roomTypeCode":373685,
                     "rateCode":1238953,
                     "roomDescription":"Standard Room, 1 King Bed",
                     "RateInfos":{  
                        "RateInfo":{  
                           "promo":"true",
                           "ChargeableRateInfo":{  
                              "averageBaseRate":135.0,
                              "averageRate":94.5,
                              "currencyCode":"USD",
                              "nightlyRateTotal":94.5,
                              "surchargeTotal":9.45,
                              "total":103.95
                           }
                        }
                     }
                  }
               }
            }
         ]
      }
   }
}

我想将数据压缩成以下格式

{  
   "customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
   "numberOfRoomsRequested":1,
   "moreResultsAvailable":"true",
   "cacheKey":"-705f6d43:15086db3fd1:-4c58",
   "cacheLocation":"10.178.144.36:7300",
   "size":2,
   "activePropertyCount":2,
   "hotelId":132684,
   "city":"Seattle",
   "highRate":159.0,
   "lowRate":159.0,
   "rateCurrencyCode":"USD",
   "roomTypeCode":10351,
   "rateCode":10351,
   "roomDescription":"Standard Room, 1 Queen Bed",
   "promo":"false",
   "averageBaseRate":159.0,
   "averageRate":159.0,
   "currencyCode":"USD",
   "nightlyRateTotal":159.0,
   "surchargeTotal":26.81,
   "total":185.81
}


{  
   "customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
   "numberOfRoomsRequested":1,
   "moreResultsAvailable":"true",
   "cacheKey":"-705f6d43:15086db3fd1:-4c58",
   "cacheLocation":"10.178.144.36:7300",
   "size":2,
   "activePropertyCount":2,
   "hotelId":263664,
   "city":"Las Vegas",
   "highRate":135.0,
   "lowRate":94.5,
   "rateCurrencyCode":"USD",
   "roomTypeCode":373685,
   "rateCode":1238953,
   "roomDescription":"Standard Room, 1 King Bed",
   "promo":"true",
   "averageBaseRate":135.0,
   "averageRate":94.5,
   "currencyCode":"USD",
   "nightlyRateTotal":94.5,
   "surchargeTotal":9.45,
   "total":103.95
}

我尝试过使用flattenDict类。不知何故,我没有得到所需格式的输出。以下是我的代码

def flattenDict(d, result=None):
    if result is None:
        result = {}
    for key in d:
        value = d[key]
        if isinstance(value, dict):
            value1 = {}
            for keyIn in value:
                value1[".".join([key,keyIn])]=value[keyIn]
            flattenDict(value1, result)
        elif isinstance(value, (list, tuple)):   
            for indexB, element in enumerate(value):
                if isinstance(element, dict):
                    value1 = {}
                    index = 0
                    for keyIn in element:
                        newkey = ".".join([key,keyIn])        
                        value1[".".join([key,keyIn])]=value[indexB][keyIn]
                        index += 1
                    for keyA in value1:
                        flattenDict(value1, result)   
        else:
            result[key]=value
    return result

答案 1 :(得分:0)

使用pandasjson_normalize

  • record_path是主要key要展平的参数
  • meta是用于附加keys进行展平的参数
  • json_normalize创建的列名将所有keys包含到所需的key中,因此创建了长列名(例如RoomRateDetailsList.RoomRateDetails.roomTypeCode
    • 长列名称需要重命名为较短的版本
    • 使用dict理解来创建rename dict
  • 以下代码利用pathlib
    • .openpathlib
    • 的方法
    • 也适用于非Windows路径
import pandas as pd
from pandas.io.json import json_normalize
import json
from pathlib import Path


# path to file
p = Path(r'c:\some_path_to_file\test.json')

# read json file
with p.open('r', encoding='utf-8') as f:
    data = json.loads(f.read())

# create dataframe
df = json_normalize(data,
                    record_path=['data1', 'HotelListResponse', 'HotelList', 'HotelSummary'],
                    meta=[['data1', 'HotelListResponse', 'customerSessionId'],
                          ['data1', 'HotelListResponse', 'numberOfRoomsRequested'],
                          ['data1', 'HotelListResponse', 'moreResultsAvailable'],
                          ['data1', 'HotelListResponse', 'cacheKey'],
                          ['data1', 'HotelListResponse', 'cacheLocation'],
                          ['data1', 'HotelListResponse', 'HotelList', '@size'],
                          ['data1', 'HotelListResponse', 'HotelList', '@activePropertyCount']])

# rename columns:
rename = {value: value.split('.')[-1].replace('@', '') for value in df.columns}
df.rename(columns=rename, inplace=True)

# dataframe view
 hotelId       city  highRate  lowRate rateCurrencyCode  roomTypeCode  rateCode             roomDescription  promo  averageBaseRate  averageRate currencyCode  nightlyRateTotal  surchargeTotal   total                     customerSessionId numberOfRoomsRequested moreResultsAvailable                     cacheKey       cacheLocation size activePropertyCount
  132684    Seattle     159.0    159.0              USD         10351     10351  Standard Room, 1 Queen Bed  False            159.0        159.0          USD             159.0           26.81  185.81  0AB29024-F6D4-3915-0862-DB3FD1904C5A                      1                 True  -705f6d43:15086db3fd1:-4c58  10.178.144.36:7300    2                   2
  263664  Las Vegas     135.0     94.5              USD        373685   1238953   Standard Room, 1 King Bed   True            135.0         94.5          USD              94.5            9.45  103.95  0AB29024-F6D4-3915-0862-DB3FD1904C5A                      1                 True  -705f6d43:15086db3fd1:-4c58  10.178.144.36:7300    2                   2

# save to JSON
df.to_json('out.json', orient='records')

最终JSON输出:

[{
        "hotelId": 132684,
        "city": "Seattle",
        "highRate": 159.0,
        "lowRate": 159.0,
        "rateCurrencyCode": "USD",
        "roomTypeCode": 10351,
        "rateCode": 10351,
        "roomDescription": "Standard Room, 1 Queen Bed",
        "promo": false,
        "averageBaseRate": 159.0,
        "averageRate": 159.0,
        "currencyCode": "USD",
        "nightlyRateTotal": 159.0,
        "surchargeTotal": 26.81,
        "total": 185.81,
        "customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
        "numberOfRoomsRequested": 1,
        "moreResultsAvailable": true,
        "cacheKey": "-705f6d43:15086db3fd1:-4c58",
        "cacheLocation": "10.178.144.36:7300",
        "size": 2,
        "activePropertyCount": 2
    }, {
        "hotelId": 263664,
        "city": "Las Vegas",
        "highRate": 135.0,
        "lowRate": 94.5,
        "rateCurrencyCode": "USD",
        "roomTypeCode": 373685,
        "rateCode": 1238953,
        "roomDescription": "Standard Room, 1 King Bed",
        "promo": true,
        "averageBaseRate": 135.0,
        "averageRate": 94.5,
        "currencyCode": "USD",
        "nightlyRateTotal": 94.5,
        "surchargeTotal": 9.45,
        "total": 103.95,
        "customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
        "numberOfRoomsRequested": 1,
        "moreResultsAvailable": true,
        "cacheKey": "-705f6d43:15086db3fd1:-4c58",
        "cacheLocation": "10.178.144.36:7300",
        "size": 2,
        "activePropertyCount": 2
    }
]