在我的mongodb中,我收藏的很少,我想通过使用pymongo比较集合1和集合2来创建一个新的集合。
Collection 1 :
Object id timestamp Prof_Name subjects1
abc67478898k ISODate("2018-01-03T09:26:37.541Z") ABDC "sub1, sub2, sub3"
jjjjjjjjjj ISODate("2018-01-03T09:26:37.541Z") XYZ "sub2, sub4, sub8"
Collection 2 :
Object id timestamp UUID subjects2 rating score
3333333 ISODate("2018-01-03TZ") 7897 "sub1,sub4, sub7" 7 10
444444 ISODate("2018-01-03TZ") 4532 "sub2" 4 6
777777 ISODate("2018-01-03TZ") 7876 "sub1,sub2,sub3" 8 8
1111111 ISODate("2018-01-03TZ") 654 "sub1,sub3" 7 8
我正在获取我的3个集合,Prof_name为每个主题找到collection2中的匹配主题,并且某个时间戳和我的mongo查询之间的UUID和UUID_count如下:
db.data1.aggregate([
{"$lookup":{
"from":"data2",
"let":{"subject":{"$split":["$SUBJECT",", "]}},
"pipeline":[
{"$match": {"expr":{"$and":[{"$eq":[{"$year":"$timestamp"}, 2016]}, {"$eq":[{"$month":"$timestamp"}, 1]}]}}},
{"$addFields":{"SUBJECT_ID":{"$split":["$SUBJECT_ID",", "]},"SUBJECT":"$$subject"}},
{"$unwind":"$SUBJECT"},
{"$match":{"$expr":{"$in":["$SUBJECT","$SUBJECT_ID"]}}},
{"$facet":{
"UUID":[{"$group":{"_id":{"id":"$_id","UUID":"$UUID"}}},{"$count":"UUID_Count"}],
"REST":[
{"$group":{"_id":null,"subjects_list":{"$addToSet":"$SUBJECT"},"UUID_distinct_list":{"$addToSet":"$UUID"}}},
{"$addFields":{"subject_count":{"$size":"$subjects_list"},"UUID_distinct_count":{"$size":"$UUID_distinct_list"}}},
{"$project":{"_id":0}}
]
}},
{"$replaceRoot":{"newRoot":{"$mergeObjects":[{"$arrayElemAt":["$UUID",0]},{"$arrayElemAt":["$REST",0]}]}}}
],
"as":"ref_data"
}},
{"$unwind":{"path":"$ref_data","preserveNullAndEmptyArrays":true}},
{"$addFields":{"ref_data.Prof_Name":"$Prof_Name"}},
{"$replaceRoot":{"newRoot":"$ref_data"}},
{"$out":"data3"}
])
以上查询为我提供了下面的收藏。
Collection 3 :
objectid Prof_name subjects_list UUID_list UUID-count subject_count
12 ABDC sub1,sub2,sub3 7897,4532,7876,654 4 3
34 XYZ sub2,sub4,sub8 7897,4532,7876 2 3
现在我想为我的集合3获取另一个列,其中列出了每个主题的计数列表以及与每个主题相关的UUID,如下所示 收集3:
objectid Prof_name subjects_list UUID_list UUID-count subject_count each_sub_count UUID-assocaited_sub
12 ABDC sub1,sub2,sub3 7897,4532,7876,654 4 3 sub1:3,sub2:2,sub3:2 [sub1:7897,7876,654, sub2:4532,7876, sub3:7876]
34 XYZ sub2,sub4,sub8 7897,4532,7876 2 3 sub2:2,sub4:1,sub8:0 [sub2:4532,7876, sub4:7897,sub8:0]
最后2列是我需要的,我如何实现这一点,可以修改上面的查询并获取它或什么是新查询来获取这些列。
答案 0 :(得分:1)
在$facet
中包含另一个管道。
{"$facet":{
"UUID":[{"$group":{"_id":{"id":"$_id","UUID":"$UUID"}}},{"$count":"UUID_Count"}],
"COUNT":[
{"$group":{"_id":null,"subjects_list":{"$addToSet":"$SUBJECT"},"UUID_distinct_list":{"$addToSet":"$UUID"}}},
{"$addFields":{"subject_count":{"$size":"$subjects_list"},"UUID_distinct_count":{"$size":"$UUID_distinct_list"}}},
{"$project":{"_id":0}}
],
"SUB":[
{"$group":{"_id":"$SUBJECT","count":{"$sum":1}," UUID_list":{"$push":"$UUID"}}},
{"$group":{"_id":null,"each_sub_count":{"$push":{"sub":"$_id", "count":"$count"}},"UUID-assocaited_sub":{"$push":{"sub":"$_id", uuids:"$UUID_list"}}}},
{"$project":{"_id":0}}
]
}},
{"$replaceRoot":{"newRoot":{"$mergeObjects":[{"$arrayElemAt":["$UUID",0]},{"$arrayElemAt":["$COUNT",0]}, {"$arrayElemAt":["$SUB",0]}]}}}