连续几天在sql中

时间:2014-03-03 08:36:28

标签: sql postgresql

我发现连续几天有很多stackoverflow QnAs 仍然答案太短,我无法理解发生了什么。

为了具体,我将组成一个模型(或表) (如果它有所作为,我正在使用postgresql。)

CREATE TABLE work (
    id integer NOT NULL,
    user_id integer NOT NULL,
    arrived_at timestamp with time zone NOT NULL
);


insert into work(user_id, arrived_at) values(1, '01/03/2011');
insert into work(user_id, arrived_at) values(1, '01/04/2011');
  1. (最简单的形式)对于给定的用户,我想找到最后连续的日期范围。

  2. (我的最终目标)对于给定的用户,我想找到他连续的工作日 如果他昨天来上班,他仍然(截至今天)有连续工作的机会。所以我昨天连续几天给他看 但如果他昨天错过了,他的连续几天是0或1,取决于他今天是否来。

  3. 今天说是第8天。

    3 * 5 6 7 * = 3 days (5 to 7)
    3 * 5 6 7 8 = 4 days (5 to 8)
    3 4 5 * 7 * = 1 day (7 to 7)
    3 * * * * * = 0 day 
    3 * * * * 8 = 1 day (8 to 8)
    

4 个答案:

答案 0 :(得分:3)

以下是使用CTE

解决此问题的方法
WITH RECURSIVE CTE(attendanceDate)
AS
(
   SELECT * FROM 
   (
      SELECT attendanceDate FROM attendance WHERE attendanceDate = current_date 
      OR attendanceDate = current_date - INTERVAL '1 day' 
      ORDER BY attendanceDate DESC
      LIMIT 1
   ) tab
   UNION ALL

   SELECT a.attendanceDate  FROM attendance a
   INNER JOIN CTE c
   ON a.attendanceDate = c.attendanceDate - INTERVAL '1 day'
) 
SELECT COUNT(*) FROM CTE;

检查SQL Fiddle

处的代码

以下是查询的工作方式:

  1. 它从attendance表中选择今天的记录。如果今天的记录不可用,那么它会选择昨天的记录
  2. 然后继续在最短日期前一天递归地添加记录
  3. 如果您想选择最新的连续日期范围而不管用户最近的出勤时间(今天,昨天或前几天),那么CTE的初始化部分必须替换为以下代码段:

    SELECT MAX(attendanceDate) FROM attendance
    

    [编辑] 以下是SQL Fiddle的查询,它可以解决您的问题#1:SQL Fiddle

答案 1 :(得分:0)

-- some data
CREATE table dayworked (
        id SERIAL NOT NULL PRIMARY KEY
        , user_id INTEGER NOT NULL
        ,  arrived_at DATE NOT NULL
        , UNIQUE (user_id,  arrived_at)
        );

INSERT INTO dayworked(user_id, arrived_at) VALUES
 ( 1, '2014-02-03')
,( 1, '2014-02-05')
,( 1, '2014-02-06')
,( 1, '2014-02-07')
        --
,( 2, '2014-02-03')
,( 2, '2014-02-05')
,( 2, '2014-02-06')
,( 2, '2014-02-07')
,( 2, '2014-02-08')
        --
,( 3, '2014-02-03')
,( 3, '2014-02-04')
,( 3, '2014-02-05')
,( 3, '2014-02-07')
        --
,( 5, '2014-02-08')
        ;

-- The query
WITH RECURSIVE stretch AS (
        SELECT dw.user_id AS user_id
                , dw.arrived_at AS first_day
                , dw.arrived_at AS last_day
                , 1::INTEGER AS nday
        FROM dayworked dw
        WHERE NOT EXISTS ( -- Find start of chain: no previous day
                SELECT * FROM dayworked nx
                WHERE nx.user_id = dw.user_id
                AND nx. arrived_at = dw.arrived_at -1
                )
        UNION ALL
        SELECT dw.user_id AS user_id
                , st.first_day AS first_day
                , dw.arrived_at AS last_day
                , 1+st.nday AS nday
        FROM dayworked dw -- connect to chain: previous day := day before this day
        JOIN stretch st ON st.user_id = dw.user_id AND st.last_day = dw.arrived_at -1
        )
SELECT * FROM stretch st
WHERE (st.nday > 1 OR st.first_day = NOW()::date ) -- either more than one consecutive dat or starting today
AND NOT EXISTS ( -- Only the most recent stretch
        SELECT * FROM stretch nx
        WHERE nx.user_id = st .user_id
        AND nx.first_day > st.first_day
        )
AND NOT EXISTS ( -- omit partial chains
        SELECT * FROM stretch nx
        WHERE nx.user_id = st .user_id
        AND nx.first_day = st.first_day
        AND nx.last_day > st.last_day
        )
        ;

结果:

CREATE TABLE
INSERT 0 14
 user_id | first_day  |  last_day  | nday 
---------+------------+------------+------
       1 | 2014-02-05 | 2014-02-07 |    3
       2 | 2014-02-05 | 2014-02-08 |    4
(2 rows)

答案 2 :(得分:0)

您可以使用范围类型创建聚合:

Create function sfunc (tstzrange, timestamptz)
    returns tstzrange
    language sql strict as $$
        select case when $2 - upper($1) <= '1 day'::interval
                then tstzrange(lower($1), $2, '[]')
                else tstzrange($2, $2, '[]') end
    $$;

Create aggregate consecutive (timestamptz) (
        sfunc = sfunc,
        stype = tstzrange,
        initcond = '[,]'
);

使用正确顺序的聚合获得最后到达的连续日期范围:

Select user_id, consecutive(arrived_at order by arrived_at)
    from work
    group by user_id;

    ┌─────────┬─────────────────────────────────────────────────────┐
    │ user_id │                     consecutive                     │
    ├─────────┼─────────────────────────────────────────────────────┤
    │       1 │ ["2011-01-03 00:00:00+02","2011-01-05 00:00:00+02"] │
    │       2 │ ["2011-01-06 00:00:00+02","2011-01-06 00:00:00+02"] │
    └─────────┴─────────────────────────────────────────────────────┘

在窗口函数中使用聚合:

Select *,
        consecutive(arrived_at)
                over (partition by user_id order by arrived_at)
    from work;

    ┌────┬─────────┬────────────────────────┬─────────────────────────────────────────────────────┐
    │ id │ user_id │       arrived_at       │                     consecutive                     │
    ├────┼─────────┼────────────────────────┼─────────────────────────────────────────────────────┤
    │  1 │       1 │ 2011-01-03 00:00:00+02 │ ["2011-01-03 00:00:00+02","2011-01-03 00:00:00+02"] │
    │  2 │       1 │ 2011-01-04 00:00:00+02 │ ["2011-01-03 00:00:00+02","2011-01-04 00:00:00+02"] │
    │  3 │       1 │ 2011-01-05 00:00:00+02 │ ["2011-01-03 00:00:00+02","2011-01-05 00:00:00+02"] │
    │  4 │       2 │ 2011-01-06 00:00:00+02 │ ["2011-01-06 00:00:00+02","2011-01-06 00:00:00+02"] │
    └────┴─────────┴────────────────────────┴─────────────────────────────────────────────────────┘

查询结果以找到所需内容:

With work_detail as (select *,
            consecutive(arrived_at)
                    over (partition by user_id order by arrived_at)
        from work)
    select arrived_at, upper(consecutive) - lower(consecutive) as days
        from work_detail
            where user_id = 1 and upper(consecutive) != lower(consecutive)
            order by arrived_at desc
                limit 1;

    ┌────────────────────────┬────────┐
    │       arrived_at       │  days  │
    ├────────────────────────┼────────┤
    │ 2011-01-05 00:00:00+02 │ 2 days │
    └────────────────────────┴────────┘

答案 3 :(得分:0)

你甚至可以在没有递归CTE的情况下做到这一点:
使用generate_series()LEFT JOINrow_count()和最终LIMIT 1

1表示“今天”加上连续几天直到“昨天”:

SELECT count(*)   -- 1 / 0  for "today"
     + COALESCE(( -- + optional count of consecutive days up until "yesterday"
       SELECT ct
       FROM  (
          SELECT d.ct, count(w.arrived_at) OVER (ORDER BY d.ct) AS day_ct
          FROM   generate_series(1, 8) AS d(ct)   -- maximum = 8
          LEFT   JOIN work w ON  w.arrived_at >= current_date -  d.ct
                             AND w.arrived_at <  current_date - (d.ct - 1)
                             AND w.user_id = 1    -- given user
          ) sub
       WHERE  ct = day_ct
       ORDER  BY ct DESC
       LIMIT  1
       ), 0) AS total
FROM   work
WHERE  arrived_at >= current_date  -- no future timestamps
AND    user_id = 1                 -- given user

假设每天0或1次进入。应该很快。

为获得最佳性能(对于此类或CTE解决方案),您将拥有多列索引,如:

CREATE INDEX foo_idx ON work (user_id,arrived_at);