如何将以下SQL语句翻译成pig latin?
select userid, siteid, eventdate,
count(*) over(partition by userid, siteid order by eventdate) as c,
rank() over (partition by userid, siteid order by eventdate) as rank
from views
我看到猪支持Windows功能:http://pig.apache.org/docs/r0.12.0/api/org/apache/pig/piggybank/evaluation/Over.html
我可以将第一个分区投影(我认为)计算为
A = load views as (userid, siteid, eventdate);
B = group views by (userid, siteid);
C = foreach B {
C1 = order A by eventdate;
generate flatten(Stitch(C1, over(c1.userid, 'count')));
-- how do I translate rank() over (partition by userid, siteid order by eventdate)
as rank
}
D = ??? -- how do I project the fields
我不清楚如何使用两个多重条款,并且不清楚如何预测最后一步
答案 0 :(得分:2)
您可以使用DataFu中的Enumerate,它会为包中的每个元组添加索引
在我的脑海中,您的代码可能如下所示:
define Enumerate datafu.pig.bags.Enumerate('1');
A = load views as (userid, siteid, eventdate);
B = group views by (userid, siteid);
C = foreach B {
C1 = order A by eventdate;
generate FLATTEN(group) as (userid, siteid),
FLATTEN(Enumerate(C1.eventdate)) as (eventdate, rank),
COUNT(C1) as count;
}
dump C;
OR
define Enumerate datafu.pig.bags.Enumerate('1');
A = load views as (userid, siteid, eventdate);
B = group views by (userid, siteid);
C = foreach B {
C1 = order A by eventdate;
generate FLATTEN(Enumerate(C1)) as (userid, siteid, eventdate, rank),
COUNT(C1) as count;
}
dump C;