我有一张包含交易,时间戳和用户的表格。
CREATE TABLE [dbo].[Transactions]
(
[transaction_ts] [datetime] NULL,
[user_id] [bigint] NULL,
[transaction_id] [bigint] NULL,
[item] [varchar](50) NULL
)
对于每个user_id
,我需要选择他们在第一次交易和72小时后进行的所有交易。
--get first and last timestamps for range
DROP TABLE IF EXISTS #first;
SELECT mt.transaction_ts as first_trans,mt.user_id
INTO #first
FROM Transactions mt
INNER JOIN
(SELECT user_id, MIN(transaction_ts) MinDate
FROM Transactions
GROUP BY user_id) t ON mt.user_id = t.user_id AND mt.transaction_ts = t.MinDate;
ALTER TABLE #first
ADD first_trans_plus_72 datetime;
UPDATE #first
SET first_trans_plus_72 = DATEADD(hour, 72, first_trans)
--loop through user_id and select ranges using variables
DECLARE @Table TABLE (user_id bigint, Id int identity(1,1));
INSERT INTO @Table
SELECT DISTINCT user_id
FROM #first;
DECLARE @max int;
DECLARE @SQL VARCHAR(MAX);
DECLARE @user_id VARCHAR(max);
DECLARE @first VARCHAR(max);
DECLARE @first_trans_plus_72 VARCHAR(max);
DECLARE @id int = 1;
SELECT @max = MAX(Id) FROM@Table;
WHILE (@id <= @max)
BEGIN
SELECT @user_id = user_id FROM @Table WHERE Id = @id
SELECT @first = first_trans FROM #First WHERE user_id = @user_id
SELECT @first_trans_plus_72 = first_trans_plus_72 FROM #First WHERE user_id = @user_id
SET @SQL = 'select * from Transactions
where transaction_ts between ' + @first + ' and ' + @first_trans_plus_72 + '
and user_id = ' + @user_id + ';'
PRINT(@SQL)
EXEC(@SQL)
SET @id = @id +1
END
这会生成正确的逻辑sql,但是datetime变量是字符串,因此查询错误输出。我尝试将日期时间变量(@first
和@first_trans_plus_72
设置为datetime
,但这会导致转换错误。
有更简单的方法吗?
答案 0 :(得分:5)
为什么在使用简单查询时会使用循环?
select t.*
from (select t.*, min(transaction_ts) over (partition by user_id) as min_tts
from transactions t
) t
where t.transaction_ts <= dateadd(hour, 72, min_tts);
通常,最好使用基于集合的操作编写代码。它更简单,表现更好,更好。
您可以将其合并到更新中,但我认为没有必要。以上选择交易。您可以使用group by user_id
对它们进行汇总 - 例如计算它们或对值进行求和。