SQL Server SQL - 基于特定值或标志的递增序列

时间:2017-12-14 09:48:11

标签: sql-server sequence

我正在尝试从超市的交易中识别购物篮。

我有客户ID和交易ID以及从货架上挑选物品的时间 - 交易。我确实有一种篮子ID,但它并不准确。我可以看到篮子中的交易是在一天的不同时间,所以我可以看到他们应该是不同的篮子_s。这是我无法做任何事情的数据中的缺陷。

如果一项交易与另一项交易之间的差异大于20分钟,那么我可以看到它是一个不同的篮子。我这样做是使用SQL中的Lag函数。

我在事务中添加一个标志,其值为1,其中该时间大于20分钟。所以我有篮子ID,实际上是1 +篮子。

我有什么想法可以创建real_basket_id吗?

非常感谢

安德鲁

1 个答案:

答案 0 :(得分:0)

作为变体,您可以尝试使用递归。 看看我的例子。

CREATE TABLE #baskets(
  buyer_id int,
  basket_id int,
  trans_time datetime
)

INSERT #baskets(buyer_id,basket_id,trans_time)VALUES
(1,11,DATETIMEFROMPARTS(2017,12,14,1,0,0,0)),
(1,12,DATETIMEFROMPARTS(2017,12,14,1,5,0,0)),
(1,12,DATETIMEFROMPARTS(2017,12,14,1,15,0,0)),
(1,13,DATETIMEFROMPARTS(2017,12,14,1,50,0,0)),
(2,21,DATETIMEFROMPARTS(2017,12,14,2,0,0,0)),
(2,22,DATETIMEFROMPARTS(2017,12,14,2,45,0,0))

SELECT *
FROM #baskets
ORDER BY buyer_id,trans_time

;WITH numBaskCTE AS(
  SELECT
    buyer_id,
    basket_id,
    trans_time,
    ROW_NUMBER()OVER(PARTITION BY buyer_id ORDER BY trans_time) n
  FROM #baskets
),
checkBaskCTE AS(
  SELECT
    buyer_id,
    basket_id,
    trans_time,
    n,
    basket_id real_basket_id,
    trans_time prev_time
  FROM numBaskCTE
  WHERE n=1

  UNION ALL

  SELECT
    n.buyer_id,
    n.basket_id,
    n.trans_time,
    n.n,
    IIF(DATEDIFF(MINUTE,c.prev_time,n.trans_time)<=20,c.basket_id,n.basket_id),
    IIF(DATEDIFF(MINUTE,c.prev_time,n.trans_time)<=20,c.prev_time,n.trans_time) prev_time
  FROM checkBaskCTE c
  JOIN numBaskCTE n ON n.buyer_id=c.buyer_id AND n.n=c.n+1
)
SELECT
  buyer_id,
  basket_id,
  trans_time,
  real_basket_id
FROM checkBaskCTE
ORDER BY buyer_id,trans_time

DROP TABLE #baskets

如果您有列real_basket_id,那么您只能对新行WHERE real_basket_id IS NULL使用更新。

CREATE TABLE #baskets(
  buyer_id int,
  basket_id int,
  trans_time datetime,
  real_basket_id int
)

INSERT #baskets(buyer_id,basket_id,trans_time,real_basket_id)VALUES
(1,10,DATETIMEFROMPARTS(2017,12,12,21,40,0,0),10),
(1,11,DATETIMEFROMPARTS(2017,12,13,22,30,0,0),11),
(1,12,DATETIMEFROMPARTS(2017,12,14,1,0,0,0),NULL),
(1,13,DATETIMEFROMPARTS(2017,12,14,1,5,0,0),NULL),
(1,13,DATETIMEFROMPARTS(2017,12,14,1,15,0,0),NULL),
(1,13,DATETIMEFROMPARTS(2017,12,14,1,50,0,0),NULL),
(2,21,DATETIMEFROMPARTS(2017,12,14,2,0,0,0),NULL),
(2,22,DATETIMEFROMPARTS(2017,12,14,2,45,0,0),NULL),
(3,30,DATETIMEFROMPARTS(2017,12,12,21,40,0,0),30),
(3,31,DATETIMEFROMPARTS(2017,12,14,0,54,0,0),31),
(3,32,DATETIMEFROMPARTS(2017,12,14,1,0,0,0),NULL),
(3,33,DATETIMEFROMPARTS(2017,12,14,1,5,0,0),NULL)


SELECT *
FROM #baskets
WHERE real_basket_id IS NULL -- only new rows
ORDER BY buyer_id,trans_time

;WITH numBaskCTE AS(
  -- all new transactions + one last transaction for each buyers
  SELECT
    buyer_id,
    basket_id,
    real_basket_id,
    trans_time,
    ROW_NUMBER()OVER(PARTITION BY buyer_id ORDER BY trans_time) n
  FROM
    (
      SELECT *,LEAD(real_basket_id)OVER(PARTITION BY buyer_id ORDER BY trans_time) next_real_basket_id
      FROM #baskets
    ) q
  WHERE next_real_basket_id IS NULL
),
checkBaskCTE AS(
  SELECT
    buyer_id,
    basket_id,
    trans_time,
    n,
    ISNULL(real_basket_id,basket_id) real_basket_id,
    trans_time prev_time,
    IIF(real_basket_id IS NULL,1,0) is_new_row
  FROM numBaskCTE
  WHERE n=1

  UNION ALL

  SELECT
    n.buyer_id,
    n.basket_id,
    n.trans_time,
    n.n,
    IIF(DATEDIFF(MINUTE,c.prev_time,n.trans_time)<=20,c.basket_id,n.basket_id),
    IIF(DATEDIFF(MINUTE,c.prev_time,n.trans_time)<=20,c.prev_time,n.trans_time) prev_time,
    1 is_new_row
  FROM checkBaskCTE c
  JOIN numBaskCTE n ON n.buyer_id=c.buyer_id AND n.n=c.n+1
)
UPDATE b
SET
  b.real_basket_id=q.real_basket_id
FROM #baskets b
JOIN
  (
    SELECT
      buyer_id,
      basket_id,
      trans_time,
      real_basket_id
    FROM checkBaskCTE
    WHERE is_new_row=1
  ) q
ON b.buyer_id=q.buyer_id AND b.trans_time=q.trans_time

SELECT *
FROM #baskets
ORDER BY buyer_id,trans_time

DROP TABLE #baskets