Python使用多个键对JSON对象进行分组

时间:2016-08-18 11:51:03

标签: javascript python json

我有这个JSON对象,其结构如下(json对象是使用to_json(orient="records")从pandas数据框中提取的)

data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
        {'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
        {'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
        {'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
        {'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
        {'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]

我尝试按两个字段monthdate进行分组 预期结果:

data =  [{
            "month": "Jan",
            "details": [{
                "date": "18",
                "effort": [{
                    "activity": "cycling",
                    "duration": 3
                }, {
                    "activity": "reading",
                    "duration": 3.0
                }]
            }, {
                "date": "19",
                "effort": [{
                    "activity": "scripting",
                    "duration": 19.5
                }]
            }]
        }, {
            "month": "Feb",
            "details": [{
                "date": "18",
                "effort": [{
                    "activity": "work",
                    "duration": 22.0
                }]
            }, {
                "date": "19",
                "effort": [{
                    "activity": "cooking",
                    "duration": 0.7
                }]
            }]
        }, {
            "month": "March",
            "details": [{
                "date": "16",
                "effort": [{
                    "activity": "hiking",
                    "duration": 8.0
                }]
            }]
        }]

我尝试将数据作为python字典,使用to_dict(orient="records")

从pandas数据框中提取
list_ = []

for item in dict_:
    list_.append({"month" : item["month"],
                                "details":
                                [{
                                    "date" : item["date"],
                                    "efforts" : 
                                        [{
                                            "activity" : item["activity"],
                                            "duration": item["duration"]
                                        }]
                                }]
                            })

json.dumps(list_)       

我得到的输出是

[{
    "month": "Jan",
    "details": [{
        "date": "18",
        "efforts": [{
            "duration": 3,
            "activity": "cycling"
        }]
    }]
}, {
    "month": "Jan",
    "details": [{
        "date": "18",
        "efforts": [{
            "duration": 3.0,
            "activity": "reading"
        }]
    }]
}, {
    "month": "Jan",
    "details": [{
        "date": "19",
        "efforts": [{
            "duration": 19.5,
            "activity": "scripting"
        }]
    }]
}, {
    "month": "Feb",
    "details": [{
        "date": "18",
        "efforts": [{
            "duration": 22.0,
            "activity": "work"
        }]
    }]
}, {
    "month": "Feb",
    "details": [{
        "date": "19",
        "efforts": [{
            "duration": 0.7,
            "activity": "cooking"
        }]
    }]
}, {
    "month": "March",
    "details": [{
        "date": "16",
        "efforts": [{
            "duration": 8.0,
            "activity": "hiking"
        }]
    }]
}]

我没有处理现有字段的值的连接。

尝试使用python和java脚本,你们有任何建议或解决方案吗?感谢

2 个答案:

答案 0 :(得分:1)

这似乎有效:

代码

data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
        {'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
        {'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
        {'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
        {'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
        {'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]

new_data = []
not_found = True
for item in data:
    for month in new_data:
        not_found = True
        if item['month'] == month['month']:
            not_found = False
            for date in month['details']:
                if item['date'] == date['date']:
                    date['effort'].append({'activity':item['activity'], 'duration':item['duration']})
                else:
                    month['details'].append({'date':item['date'], 'effort':[{'activity':item['activity'], 'duration':item['duration']}]})
            break
    if not_found:
        new_data.append({'month':item['month'], 'details':[{'date':item['date'], \
            'effort':[{'activity':item['activity'], 'duration':item['duration']}]}]})

print new_data

输出

[{'details': [{'date': '18', 'effort': [{'duration': 3, 'activity': 'cycling'}, {'duration': 3.0, 'activity': 'reading'}]}, {'date': '19', 'effort': [{'duration': 19.5, 'activity': 'scripting'}, {'duration': 19.5, 'activity': 'scripting'}]}], 'month': 'Jan'}, {'details': [{'date': '18', 'effort': [{'duration': 22.0, 'activity': 'work'}]}, {'date': '19', 'effort': [{'duration': 0.7, 'activity': 'cooking'}, {'duration': 0.7, 'activity': 'cooking'}]}], 'month': 'Feb'}, {'details': [{'date': '16', 'effort': [{'duration': 8.0, 'activity': 'hiking'}]}], 'month': 'March'}]

基本上只是遍历每个条目,首先检查月份是否存在,如果存在,检查日期是否已存在,并相应地附加到新数据。因此,如果不存在月份,则追加所有内容,如果不存在日期,则附加日期详细信息和新活动。如果日期也存在,那么您只需附加活动

答案 1 :(得分:0)

用于对 JSON 进行分组的通用函数。您必须将字段传递给组和组的键数组名称

def groupBy(vetor, campos, pos):
    if(pos >= len(campos)):
      return vetor
    gmx = campos[pos]
    agrupado = gmx["field"]
    kx = gmx["gbkey"]
    tam = len(campos)
    agrupados = {}
    saida = {}
    retorno = []    
    for l in vetor:
        lmf = {}
        for k, s in l.items():
            val_agrupado = l[agrupado]             
            if not (val_agrupado in agrupados):
                agrupados[val_agrupado] = []
            if agrupado != k:
              lmf[k] = s
        agrupados[val_agrupado].append(lmf)              
    for l in agrupados:
        agrup = agrupados[l]
        if(len(campos) > 1):            
            agrup = groupBy(agrup, campos, pos + 1)
        saida = {}
        saida[agrupado] = l
        saida[kx] = agrup
        retorno.append(saida)
    return retorno
    
data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
        {'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
        {'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
        {'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
        {'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
        {'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]
print(groupBy(data, [{'field':'month', 'gbkey': 'details'}, {'field':'date', 'gbkey': 'effort'}], 0))

它会产生类似的东西

[
   {
      "month":"Jan",
      "details":[
         {
            "date":"18",
            "effort":[
               {
                  "activity":"cycling",
                  "duration":3
               },
               {
                  "activity":"reading",
                  "duration":3.0
               }
            ]
         },
         {
            "date":"19",
            "effort":[
               {
                  "activity":"scripting",
                  "duration":19.5
               }
            ]
         }
      ]
   },
   {
      "month":"Feb",
      "details":[
         {
            "date":"18",
            "effort":[
               {
                  "activity":"work",
                  "duration":22.0
               }
            ]
         },
         {
            "date":"19",
            "effort":[
               {
                  "activity":"cooking",
                  "duration":0.7
               }
            ]
         }
      ]
   },
   {
      "month":"March",
      "details":[
         {
            "date":"16",
            "effort":[
               {
                  "activity":"hiking",
                  "duration":8.0
               }
            ]
         }
      ]
   }
]