TSQL查找订单连续3个月发生

时间:2010-09-18 21:08:03

标签: sql sql-server sql-server-2005 tsql grouping

请帮我生成以下查询。假设我有客户表和订单表。

客户表

CustID CustName

1      AA     
2      BB
3      CC
4      DD  

订单表

OrderID  OrderDate          CustID
100      01-JAN-2000        1  
101      05-FEB-2000        1     
102      10-MAR-2000        1 
103      01-NOV-2000        2    
104      05-APR-2001        2 
105      07-MAR-2002        2
106      01-JUL-2003        1
107      01-SEP-2004        4
108      01-APR-2005        4
109      01-MAY-2006        3 
110      05-MAY-2007        1  
111      07-JUN-2007        1
112      06-JUL-2007        1 

我想找出连续三个月订单的客户。 (允许使用SQL Server 2005和2008进行查询)。

所需的输出是:

CustName      Year   OrderDate   

    AA        2000  01-JAN-2000       
    AA        2000  05-FEB-2000
    AA        2000  10-MAR-2000

    AA        2007  05-MAY-2007        
    AA        2007  07-JUN-2007        
    AA        2007  06-JUL-2007         

4 个答案:

答案 0 :(得分:7)

编辑:摆脱或MAX() OVER (PARTITION BY ...),因为这似乎会扼杀性能。

;WITH cte AS ( 
SELECT    CustID  ,
          OrderDate,
          DATEPART(YEAR, OrderDate)*12 + DATEPART(MONTH, OrderDate) AS YM
 FROM     Orders
 ),
 cte1 AS ( 
SELECT    CustID  ,
          OrderDate,
          YM,
          YM - DENSE_RANK() OVER (PARTITION BY CustID ORDER BY YM) AS G
 FROM     cte
 ),
 cte2 As
 (
 SELECT CustID  ,
          MIN(OrderDate) AS Mn,
          MAX(OrderDate) AS Mx
 FROM cte1
GROUP BY CustID, G
HAVING MAX(YM)-MIN(YM) >=2 
 )
SELECT     c.CustName, o.OrderDate, YEAR(o.OrderDate) AS YEAR
FROM         Customers AS c INNER JOIN
                      Orders AS o ON c.CustID = o.CustID
INNER JOIN  cte2 c2 ON c2.CustID = o.CustID and o.OrderDate between Mn and Mx
order by c.CustName, o.OrderDate

答案 1 :(得分:4)

这是我的版本。我真的把它作为一种纯粹的好奇心来展示,以展示另一种思考问题的方式。事实证明它比这更有用,因为它的表现甚至超过了马丁史密斯的酷“分组岛”解决方案。虽然,一旦他摆脱了一些过于昂贵的聚合窗口函数并做了真正的聚合,他的查询开始踢屁股。

解决方案1:运行3个月或更长时间,通过提前1个月检查并使用半联接来完成。

WITH Months AS (
   SELECT DISTINCT
      O.CustID,
      Grp = DateDiff(Month, '20000101', O.OrderDate)
   FROM
      CustOrder O
), Anchors AS (
   SELECT
      M.CustID,
      Ind = M.Grp + X.Offset
   FROM
      Months M
      CROSS JOIN (
         SELECT -1 UNION ALL SELECT 0 UNION ALL SELECT 1
      ) X (Offset)
   GROUP BY
      M.CustID,
      M.Grp + X.Offset
   HAVING
      Count(*) = 3
)
SELECT
   C.CustName,
   [Year] = Year(OrderDate),
   O.OrderDate
FROM
   Cust C
   INNER JOIN CustOrder O ON C.CustID = O.CustID
WHERE
   EXISTS (
      SELECT 1
      FROM
         Anchors A
      WHERE
         O.CustID = A.CustID
         AND O.OrderDate >= DateAdd(Month, A.Ind, '19991201')
         AND O.OrderDate < DateAdd(Month, A.Ind, '20000301')
   )
ORDER BY
   C.CustName,
   OrderDate;

解决方案2:确切的3个月模式。如果是4个月或更长时间,则排除这些值。这是通过提前2个月和两个月后检查来完成的(主要是寻找N,Y,Y,Y,N模式)。

WITH Months AS (
   SELECT DISTINCT
      O.CustID,
      Grp = DateDiff(Month, '20000101', O.OrderDate)
   FROM
      CustOrder O
), Anchors AS (
   SELECT
      M.CustID,
      Ind = M.Grp + X.Offset
   FROM
      Months M
      CROSS JOIN (
         SELECT -2 UNION ALL SELECT -1 UNION ALL SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2
      ) X (Offset)
   GROUP BY
      M.CustID,
      M.Grp + X.Offset
   HAVING
      Count(*) = 3
      AND Min(X.Offset) = -1
      AND Max(X.Offset) = 1
)
SELECT
   C.CustName,
   [Year] = Year(OrderDate),
   O.OrderDate
FROM
   Cust C
   INNER JOIN CustOrder O ON C.CustID = O.CustID
   INNER JOIN Anchors A
      ON O.CustID = A.CustID
      AND O.OrderDate >= DateAdd(Month, A.Ind, '19991201')
      AND O.OrderDate < DateAdd(Month, A.Ind, '20000301')
ORDER BY
   C.CustName,
   OrderDate;

如果有其他人想玩,这是我的表加载脚本:

IF Object_ID('CustOrder', 'U') IS NOT NULL DROP TABLE CustOrder
IF Object_ID('Cust', 'U') IS NOT NULL DROP TABLE Cust
GO
SET NOCOUNT ON
CREATE TABLE Cust (
  CustID int identity(1,1) NOT NULL PRIMARY KEY CLUSTERED,
  CustName varchar(100) UNIQUE
)

CREATE TABLE CustOrder (
   OrderID int identity(100, 1) NOT NULL PRIMARY KEY CLUSTERED,
   CustID int NOT NULL FOREIGN KEY REFERENCES Cust (CustID),
   OrderDate smalldatetime NOT NULL
)

DECLARE @i int
SET @i = 1000
WHILE @i > 0 BEGIN
   WITH N AS (
      SELECT
         Nm =
            Char(Abs(Checksum(NewID())) % 26 + 65)
            + Char(Abs(Checksum(NewID())) % 26 + 97)
            + Char(Abs(Checksum(NewID())) % 26 + 97)
            + Char(Abs(Checksum(NewID())) % 26 + 97)
            + Char(Abs(Checksum(NewID())) % 26 + 97)
            + Char(Abs(Checksum(NewID())) % 26 + 97)
   )
   INSERT Cust
   SELECT N.Nm
   FROM N
   WHERE NOT EXISTS (
      SELECT 1
      FROM Cust C
      WHERE
         N.Nm = C.CustName
   )

   SET @i = @i - @@RowCount
END
WHILE @i < 50000 BEGIN
   INSERT CustOrder
   SELECT TOP (50000 - @i)
      Abs(Checksum(NewID())) % 1000 + 1,
      DateAdd(Day, Abs(Checksum(NewID())) % 10000, '19900101')
   FROM master.dbo.spt_values
   SET @i = @i + @@RowCount
END

<强>性能

以下是3个月或更长时间查询的一些性能测试结果:

Query     CPU   Reads Duration
Martin 1  2297 299412   2348 
Martin 2   625    285    809
Denis     3641    401   3855
Erik      1855  94727   2077

每个只有一次,但数字相当具有代表性。事实证明,你的查询表现并不是那么糟糕,毕竟丹尼斯。 Martin的查询击败了其他人,但起初他正在使用一些过于昂贵的窗口函数策略。

当然,正如我所指出的,当客户在同一天有两个订单时,Denis的查询并没有拉出正确的行,所以除非他修复了,否则他的查询是没有争用的。

此外,不同的指数可能会动摇。我不知道。

答案 2 :(得分:1)

你走了:

select distinct
 CustName
,year(OrderDate) [Year]
,OrderDate
from 
(
select 
 o2.OrderDate [prev]
,o1.OrderDate [curr]
,o3.OrderDate [next]
,c.CustName
from [order] o1 
join [order] o2 on o1.CustId = o2.CustId and datediff(mm, o2.OrderDate, o1.OrderDate) = 1
join [order] o3 on o1.CustId = o3.CustId and o2.OrderId <> o3.OrderId and datediff(mm, o3.OrderDate, o1.OrderDate) = -1
join Customer c on c.CustId = o1.CustId
) t
unpivot
(
    OrderDate for [DateName] in ([prev], [curr], [next])
)
unpvt
order by CustName, OrderDate

答案 3 :(得分:0)

这是我的看法。

select 100 as OrderID,convert(datetime,'01-JAN-2000') OrderDate,    1  as CustID  into #tmp union
    select 101,convert(datetime,'05-FEB-2000'),        1 union
    select 102,convert(datetime,'10-MAR-2000'),        1 union
    select 103,convert(datetime,'01-NOV-2000'),        2 union   
    select 104,convert(datetime,'05-APR-2001'),        2 union
    select 105,convert(datetime,'07-MAR-2002'),        2 union
    select 106,convert(datetime,'01-JUL-2003'),        1 union
    select 107,convert(datetime,'01-SEP-2004'),        4 union
    select 108,convert(datetime,'01-APR-2005'),        4 union
    select 109,convert(datetime,'01-MAY-2006'),        3 union
    select 110,convert(datetime,'05-MAY-2007'),        1 union 
    select 111,convert(datetime,'07-JUN-2007'),        1 union
    select 112,convert(datetime,'06-JUL-2007'),        1 


    ;with cte as
    (
        select
            *   
            ,convert(int,convert(char(6),orderdate,112)) - dense_rank() over(partition by custid order by orderdate) as g
        from #tmp
    ),
    cte2 as 
    (
    select 
        CustID
        ,g  
    from cte a
    group by CustID, g
    having count(g)>=3
    )
    select
        a.CustID
        ,Yr=Year(OrderDate)
        ,OrderDate
    from cte2 a join cte b
        on a.CustID=b.CustID and a.g=b.g