数据
session time_interval activity
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:40 walking
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:41 (null)
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:42 (null)
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:43 (null)
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:44 walking
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:45 (null)
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:46 running
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:47 (null)
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:48 (null)
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:49 (null)
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:50 walking
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:51 (null)
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:52 (null)
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:53 (null)
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:54 running
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:55 (null)
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:56 (null)
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:57 (null)
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:58 resting
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:59 (null)
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:17:00 (null)
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:17:01 (null)
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:17:02 walking
SQL
SELECT session,
time_interval,
activity,
FIRST_VALUE(activity)
OVER (
PARTITION BY session
ORDER BY time_interval
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS activity_b
FROM my_table;
但这仅取会话的第一个值。如何获得每秒的前一个值?
期望的结果
session time_interval activity
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:40 walking
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:41 walking
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:42 walking
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:43 walking
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:44 walking
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:45 walking
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:46 running
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:47 running
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:48 running
c889ddb532e76c961c2944dd90b10142 2017-05-25 20:16:49 running
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:50 walking
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:51 walking
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:52 walking
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:53 walking
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:54 running
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:55 running
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:56 running
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:57 running
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:58 resting
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:16:59 resting
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:17:00 resting
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:17:01 resting
dddjg894hlog8sdlf2090288fmma201c 2017-05-25 20:17:02 walking
SQL Fiddle具有容量,所以这里有一些DDL
CREATE TABLE public.my_table (
session varchar(32),
time_interval timestamp,
activity varchar(10));
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:40','walking');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:41','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:42','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:43','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:44','walking');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:45','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:46','running');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:47','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:48','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:49','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:50','walking');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:51','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:52','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:53','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:54','running');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:55','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:56','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:57','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:58','resting');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:59','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:17:00','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:17:01','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:17:02','resting');
答案 0 :(得分:1)
这正是您想要ignore null
选项的地方。但那是不可用的。因此,一种方法使用最大扫描和join
:
select t.session, t.time_interval, tt.activity
from (select t.*,
max(case when t.activity is not null then t.time_interval end) over (partition by t.session order by t.time_interval) as value_ti
from t
) t left join
t tt
on t.value_ti = tt.time_interval and t.session = tt.session;
当值不是NULL
时,这会计算每行的最近时间间隔。然后它加入以获得当时的活动。
如果您知道连续不超过3个NULL
,您还可以使用lag()
:
select t.session, t.time_interval,
coalesce(t.activity,
lag(t.activity, 1) over (partition by t.session order by t.time_interval),
lag(t.activity, 2) over (partition by t.session order by t.time_interval),
lag(t.activity, 3) over (partition by t.session order by t.time_interval)
) as acctivity
from t;
答案 1 :(得分:1)
注意:在示例数据activity is null
中缺少值,但是在提供的插入语句activity is ''
中缺少值。我在下面的示例中使用''
。改变这个例子以使用空值
您可以创建"子会话"在活动相同的地方。转换数据集以创建子会话的方法是随时增加一个虚拟变量activity != ''
。一旦在内部查询中执行了该操作,就可以使用FIRST_VALUE
窗口函数来获得所需的结果。
SELECT
"session"
, time_interval
, FIRST_VALUE(activity) OVER (PARTITION BY "session", sub_session) activity
FROM (
SELECT
*
, SUM(CASE WHEN activity != '' THEN 1 ELSE 0 END) OVER (PARTITION BY "session" ORDER BY time_interval) sub_session
FROM my_table
) sub