dataset = LOAD '/user/cloudera/project/answers.txt' USING PigStorage('\t') AS ( qid:chararray , i:chararray , qs:int, qt:long, tags:chararray, qvc:chararray , qac:int , aid:chararray, j:chararray, as:int, at:long);
onedate = FOREACH dataset GENERATE ToDate(qt*1000) as qstntime , ToDate(at*1000) as anstime,tags;
difftime = FILTER onedate by GetHour(qstntime)-GetHour(anstime)==1;
dump difftime;
输出
(2009-02-18T17:37:11.000-08:00,2009-04-17T16:22:01.000-07:00,"ctags")
(2009-02-18T20:31:17.000-08:00,2009-02-19T19:29:40.000-08:00,"iphone")
(2009-02-18T22:11:11.000-08:00,2009-03-20T21:58:21.000-07:00,"php")
(2009-02-18T23:36:58.000-08:00,2009-02-19T22:18:10.000-08:00,"sqlserver")
(2009-02-19T01:05:39.000-08:00,2009-02-20T00:44:53.000-08:00,"python")
输出错误。它只计算减法数小时。虽然减去它也必须考虑月份和年份。
答案 0 :(得分:0)
由于您使用的是GetHours,因此您只是比较小时数,因此结果不正确。而是使用HoursBetween来比较整个日期时间对象。它返回两个DateTime对象之间的小时数。
difftime = FILTER onedate by (HoursBetween(qstntime,anstime) == 1);