我想在rethinkdb python中做两个字段计数数据。 示例:我的表是...... 频道表
{
"channel_name": "channel01" ,
"id": "58115c5b-af1f-4b1a-b572-20611ba34ee5" ,
"userid": "b9936a2f-6cea-41ef-a7f8-1812c3192112" ,
"visibility": "Public"
}
{
"channel_name": "channel02" ,
"id": "1852fac4-3056-46d9-9dfa-2cd969872daa" ,
"userid": "b9936a2f-6cea-41ef-a7f8-1812c3192112" ,
"visibility": "Public"
}
{
"channel_name": "channel03" ,
"id": "4304fa69-173b-4284-9bf8-d9078bb018f8" ,
"userid": "ef5e924e-9cab-44a1-a381-16b35d0d7578" ,
"visibility": "Public"
}
活动表
{
"content": "Welcome to channel01" ,
"id": "cc9bb13e-07e1-4ec9-b1ec-4546b8f8feda" ,
"parent": "channel01" ,
"type": "message" ,
"user": {
"user_id": "b9936a2f-6cea-41ef-a7f8-1812c3192112" ,
"user_name": "wymh"
}
}
{
"content": "hello" ,
"id": "288847a9-ffb5-4531-bdc3-ddb3effbd993" ,
"parent": "channel02" ,
"type": "message" ,
"user": {
"user_id": "b9936a2f-6cea-41ef-a7f8-1812c3192112" ,
"user_name": "wymh"
}
}
{
"content": "hi" ,
"id": "3232f906-d9f1-43af-a594-5da82376f296" ,
"parent": "channel02" ,
"type": "message" ,
"user": {
"user_id": "ef5e924e-9cab-44a1-a381-16b35d0d7578" ,
"user_name": "waiyan"
}
}
{
"content": "Nice to meet you" ,
"id": "77b7afb3-af98-47a0-84bf-9ac8bd1fe14c" ,
"parent": "channel02" ,
"type": "message" ,
"user": {
"user_id": "b9936a2f-6cea-41ef-a7f8-1812c3192112" ,
"user_name": "wymh"
}
}
{
"content": "hi" ,
"id": "31068384-4734-4733-a7f7-677edd21e557" ,
"parent": "channel03" ,
"type": "message" ,
"user": {
"user_id": "ef5e924e-9cab-44a1-a381-16b35d0d7578" ,
"user_name": "waiyan"
}
}
{
"content": "hi" ,
"id": "0b535878-3cac-4619-9c48-f0985a7ee587" ,
"parent": "channel03" ,
"type": "message" ,
"user": {
"user_id": "7e485c95-f0b6-45f7-8af2-d93bde0ea8a3" ,
"user_name": "user01"
}
}
我的查询是:
channel_count =(yield r.table("channel").filter(r.row['visibility'] != 'private').inner_join(
r.table("events"),
lambda channel, events:
(channel["channel_name"] == events["parent"])
).zip().group('channel_name').count().run())
我的查询输出数据是:
{u'channel01': 1, u'channel02': 3, u'channel03': 2}
我想低于结果:
{'channel01': 1, 'userid': 1}
{'channel02': 3, 'userid': 2}
{'channel03': 2, 'userid': 2}
请帮我看看如何编写python Rethinkdb查询?
答案 0 :(得分:1)
如果您要查找每个频道的唯一用户ID数量以及事件数量,我会执行以下操作(我将其间隔开以使其更具可读性IMO)...
>>> events_per_channel = (
r
.table("channel")
.filter(r.row['visibility'] != 'private')
.inner_join(
r.table("events"),
lambda channel,
events: (channel["channel_name"] == events["parent"])
)
.zip()
.map(
{
'channel_name': r.row['channel_name'],
'user_id': r.row['user']['user_id']
}
)
.group('channel_name')
.count()
.ungroup()
.map(
lambda x:
{
'channel_name': x['group'],
'events': x['reduction'],
'uniq_user_ids': (
r
.table('events')
.filter(
{
'parent': x['group']
}
)
.coerce_to('array')
.map(lambda y: y['user']['user_id'])
.distinct()
.count()
)
}
)
.run()
)
>>> print json.dumps(events_per_channel, indent=4)
[
{
"channel_name": "channel01",
"events": 1,
"uniq_user_ids": 1
},
{
"channel_name": "channel02",
"events": 3,
"uniq_user_ids": 2
},
{
"channel_name": "channel03",
"events": 2,
"uniq_user_ids": 2
}
]
基本上我所做的是使用嵌套查询来查找每个通道的唯一用户数以及使用ungroup,因此我可以使用reduction关键字从组中获取原始数据。输出并不完全是您要求的输出,因为您要求的是使用通道名称作为键,值作为事件数量。我所做的是为每个值创建了一个关键名称。如果这不是你真正的意思,请告诉我。