我有这个JSON对象,其结构如下(json对象是使用to_json(orient="records")
从pandas数据框中提取的)
data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
{'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
{'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
{'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
{'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
{'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]
我尝试按两个字段month
和date
进行分组
预期结果:
data = [{
"month": "Jan",
"details": [{
"date": "18",
"effort": [{
"activity": "cycling",
"duration": 3
}, {
"activity": "reading",
"duration": 3.0
}]
}, {
"date": "19",
"effort": [{
"activity": "scripting",
"duration": 19.5
}]
}]
}, {
"month": "Feb",
"details": [{
"date": "18",
"effort": [{
"activity": "work",
"duration": 22.0
}]
}, {
"date": "19",
"effort": [{
"activity": "cooking",
"duration": 0.7
}]
}]
}, {
"month": "March",
"details": [{
"date": "16",
"effort": [{
"activity": "hiking",
"duration": 8.0
}]
}]
}]
我尝试将数据作为python字典,使用to_dict(orient="records")
list_ = []
for item in dict_:
list_.append({"month" : item["month"],
"details":
[{
"date" : item["date"],
"efforts" :
[{
"activity" : item["activity"],
"duration": item["duration"]
}]
}]
})
json.dumps(list_)
我得到的输出是
[{
"month": "Jan",
"details": [{
"date": "18",
"efforts": [{
"duration": 3,
"activity": "cycling"
}]
}]
}, {
"month": "Jan",
"details": [{
"date": "18",
"efforts": [{
"duration": 3.0,
"activity": "reading"
}]
}]
}, {
"month": "Jan",
"details": [{
"date": "19",
"efforts": [{
"duration": 19.5,
"activity": "scripting"
}]
}]
}, {
"month": "Feb",
"details": [{
"date": "18",
"efforts": [{
"duration": 22.0,
"activity": "work"
}]
}]
}, {
"month": "Feb",
"details": [{
"date": "19",
"efforts": [{
"duration": 0.7,
"activity": "cooking"
}]
}]
}, {
"month": "March",
"details": [{
"date": "16",
"efforts": [{
"duration": 8.0,
"activity": "hiking"
}]
}]
}]
我没有处理现有字段的值的连接。
尝试使用python和java脚本,你们有任何建议或解决方案吗?感谢
答案 0 :(得分:1)
这似乎有效:
data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
{'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
{'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
{'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
{'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
{'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]
new_data = []
not_found = True
for item in data:
for month in new_data:
not_found = True
if item['month'] == month['month']:
not_found = False
for date in month['details']:
if item['date'] == date['date']:
date['effort'].append({'activity':item['activity'], 'duration':item['duration']})
else:
month['details'].append({'date':item['date'], 'effort':[{'activity':item['activity'], 'duration':item['duration']}]})
break
if not_found:
new_data.append({'month':item['month'], 'details':[{'date':item['date'], \
'effort':[{'activity':item['activity'], 'duration':item['duration']}]}]})
print new_data
[{'details': [{'date': '18', 'effort': [{'duration': 3, 'activity': 'cycling'}, {'duration': 3.0, 'activity': 'reading'}]}, {'date': '19', 'effort': [{'duration': 19.5, 'activity': 'scripting'}, {'duration': 19.5, 'activity': 'scripting'}]}], 'month': 'Jan'}, {'details': [{'date': '18', 'effort': [{'duration': 22.0, 'activity': 'work'}]}, {'date': '19', 'effort': [{'duration': 0.7, 'activity': 'cooking'}, {'duration': 0.7, 'activity': 'cooking'}]}], 'month': 'Feb'}, {'details': [{'date': '16', 'effort': [{'duration': 8.0, 'activity': 'hiking'}]}], 'month': 'March'}]
基本上只是遍历每个条目,首先检查月份是否存在,如果存在,检查日期是否已存在,并相应地附加到新数据。因此,如果不存在月份,则追加所有内容,如果不存在日期,则附加日期详细信息和新活动。如果日期也存在,那么您只需附加活动
答案 1 :(得分:0)
用于对 JSON 进行分组的通用函数。您必须将字段传递给组和组的键数组名称
def groupBy(vetor, campos, pos):
if(pos >= len(campos)):
return vetor
gmx = campos[pos]
agrupado = gmx["field"]
kx = gmx["gbkey"]
tam = len(campos)
agrupados = {}
saida = {}
retorno = []
for l in vetor:
lmf = {}
for k, s in l.items():
val_agrupado = l[agrupado]
if not (val_agrupado in agrupados):
agrupados[val_agrupado] = []
if agrupado != k:
lmf[k] = s
agrupados[val_agrupado].append(lmf)
for l in agrupados:
agrup = agrupados[l]
if(len(campos) > 1):
agrup = groupBy(agrup, campos, pos + 1)
saida = {}
saida[agrupado] = l
saida[kx] = agrup
retorno.append(saida)
return retorno
data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
{'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
{'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
{'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
{'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
{'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]
print(groupBy(data, [{'field':'month', 'gbkey': 'details'}, {'field':'date', 'gbkey': 'effort'}], 0))
它会产生类似的东西
[
{
"month":"Jan",
"details":[
{
"date":"18",
"effort":[
{
"activity":"cycling",
"duration":3
},
{
"activity":"reading",
"duration":3.0
}
]
},
{
"date":"19",
"effort":[
{
"activity":"scripting",
"duration":19.5
}
]
}
]
},
{
"month":"Feb",
"details":[
{
"date":"18",
"effort":[
{
"activity":"work",
"duration":22.0
}
]
},
{
"date":"19",
"effort":[
{
"activity":"cooking",
"duration":0.7
}
]
}
]
},
{
"month":"March",
"details":[
{
"date":"16",
"effort":[
{
"activity":"hiking",
"duration":8.0
}
]
}
]
}
]