如何计算sql中列的连续行数

时间:2015-11-26 10:52:59

标签: sql sql-server hadoop hive

我有一个有3列的表。

表数据

Customer_id   timestamp   page_url
1234     10-11-2015 10:00:12    page1
1234     10-11-2015 10:00:16   page3
1234     10-11-2015 10:00:19   page2
1234     10-11-2015 10:01:12   page1
1234     10-11-2015 10:01:20   page2   --- this should be counted as 1 
1234     10-11-2015 10:01:50   page2   --- this should be counted as 1
1234     10-11-2015 10:02:12   page4
5678     10-11-2015 09:11:12   page1
5678     10-11-2015 09:30:12   page3
5678     10-11-2015 09:50:10   page1
5678     10-11-2015 10:00:12   page2  -- this should be counted as 1
5678     10-11-2015 10:03:12   page3

结果:总计数应为3

逻辑 我需要根据时间戳在第1页后面的客户访问第2页的计数,如果客户在第1页之后有2页2连续的条目,则应计为2。

需要你帮助编写一个sql逻辑。

2 个答案:

答案 0 :(得分:0)

我很确定这不是最佳解决方案,但它确实有效。

DECLARE @tbl AS TABLE (CustomerId NVARCHAR(4), Stamp DATETIME2, PageUrl NVARCHAR(20))

INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('1234','10-11-2015 10:00:12','page1');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('1234','10-11-2015 10:00:16','page3');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('1234','10-11-2015 10:00:19','page2');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('1234','10-11-2015 10:01:12','page1');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('1234','10-11-2015 10:01:20','page2');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('1234','10-11-2015 10:01:50','page2');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('1234','10-11-2015 10:01:53','page1');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('1234','10-11-2015 10:01:55','page2');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('1234','10-11-2015 10:02:12','page4');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 09:11:12','page1');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 09:30:12','page3');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 09:50:10','page1');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 10:00:12','page2');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 10:03:12','page3');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 10:03:13','page1');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 10:03:14','page2');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 10:03:16','page2');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 10:03:17','page2');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 10:03:18','page1');
INSERT INTO @tbl (CustomerId, Stamp, PageUrl) VALUES('5678','10-11-2015 10:03:19','page1');

SELECT  CustomerId,
        Stamp,
        PageUrl,
        ROW_NUMBER() OVER (PARTITION BY CustomerId ORDER BY CustomerId, Stamp) AS RowCnt
INTO #calls
FROM @tbl;

-- Selecting all calls of page2 where the previous page was page1 or page2
SELECT  tt.CustomerId,
        tt.Stamp,
        tt.PageUrl, 
        t.PageUrl AS PreviousPage,
        t.Stamp AS PreviousStamp,
        ROW_NUMBER() OVER (PARTITION BY t.CustomerId ORDER BY t.CustomerId, t.Stamp) AS RowCnt
INTO #result
FROM #calls AS t
INNER JOIN #calls AS tt
    ON tt.CustomerId = t.CustomerId
    AND tt.RowCnt - 1 = t.RowCnt
    AND tt.PageUrl = 'page2'
WHERE t.PageUrl = 'page1' OR t.PageUrl = 'page2';


SELECT count(*) AS Result
FROM (
        -- Select all calls where the previous page was page1
        SELECT  CustomerId,
                Stamp,
                PageUrl, 
                PageUrl AS PreviousPage,
                Stamp AS PreviousStamp 
        FROM #result
        WHERE PreviousPage = 'page1'

        UNION ALL

        -- Select all calls of page2 where the previous page was page2 and the entry is in the result
        SELECT  tt.CustomerId,
                tt.Stamp,
                tt.PageUrl,
                tt.PreviousPage,
                tt.PreviousStamp
        FROM #result AS t
        INNER JOIN #result AS tt
            ON tt.CustomerId = t.CustomerId
            AND tt.RowCnt - 1 = t.RowCnt
            AND tt.PreviousStamp = t.Stamp
) AS result;

DROP TABLE #calls;
DROP TABLE #result;

答案 1 :(得分:-1)

以下是计算具有不同条目的行的解决方案

select count(distinct(page_url)) from TABLENAME;