SQL查询 - 每天确定新访问者

时间:2013-07-10 12:21:10

标签: sql tsql

我的架构如下(简化):

CREATE TABLE MyTable (
   DateTimeOffset HitDate NOT NULL,
   IpAddress varchar(15)
)

示例行可能如下所示:

'7/10/2013 8:05:29 -07:00' '111.222.333.444'

我正在尝试找出一个查询,它会在每天(例如,2013年7月10日)为我提供唯一的IpAddresses数量。实际上,这部分相当简单,我已经为此创建了一个查询。但是,对于此查询,我想要的是在当前日期之前从未存在的唯一IpAddresses的数量。我不关心日期之后,就在日期之前。

例如,假设我有以下数据,这就是我所拥有的:

'7/10/2013 8:05:29 -07:00' '111.222.333.444'
'7/10/2013 12:05:29 -07:00' '111.222.333.222'
'7/9/2013 9:05:29 -07:00' '111.222.333.444'
'7/9/2013 10:05:29 -07:00' '111.222.333.555'
'7/8/2013 11:05:29 -07:00' '111.222.333.222'
'7/8/2013 4:05:29 -07:00' '111.222.333.555'

查询应输出以下内容:

'7/8/2013' 2 (neither IpAddress existed before this date so both are new)
'7/9/2013' 1 (only one of the IpAddresses is new - the one ending in '444')
'7/10/2013' 0 (both IpAddresses had existed before this date)

目标数据库为SQL Server 2012。我正在向第一个正确创建SQL语句的人提供100分的赏金。

9 个答案:

答案 0 :(得分:3)

CREATE TABLE #MyTable (
HitDate DateTimeOffset NOT NULL,
IpAddress varchar(15))

insert #mytable values
('7/10/2013 8:05:29 -07:00', '111.222.333.444'),
('7/10/2013 12:05:29 -07:00', '111.222.333.222'),
('7/9/2013 9:05:29 -07:00' ,'111.222.333.444'),
('7/9/2013 10:05:29 -07:00', '111.222.333.555'),
('7/8/2013 11:05:29 -07:00', '111.222.333.222'),
('7/8/2013 4:05:29 -07:00', '111.222.333.555')

;WITH a AS
(
 select 
 cast(HitDate as date) HitDate,  
 IpAddress
 from #mytable
), b AS
(
 SELECT  min(HitDate) md, IpAddress
 FROM a
 GROUP BY IpAddress
)
SELECT c.HitDate, Count(distinct b.IpAddress) IpAddress
FROM b
right join (select distinct HitDate from a) c
on b.md = c.HitDate
GROUP by c.HitDate

结果:

HitDate    IpAddress
2013-07-08 2
2013-07-09 1
2013-07-10 0

答案 1 :(得分:2)

尝试以下查询:

SELECT A.HITDATE , ISNULL(B.CNT , 0) AS COUNT
FROM 
(
SELECT DISTINCT CONVERT(DATE , HITDATE) AS HITDATE
FROM mytable1
) AS A 
LEFT OUTER JOIN
(
SELECT CONVERT(DATE , HITDATE) AS HITDATE , COUNT(*) AS CNT
FROM MyTable1 AS A
WHERE IpAddress NOT IN 
(
SELECT B.IpAddress FROM mytable1 AS B WHERE A.HITDATE > B.HITDATE
)
GROUP BY CONVERT(DATE , HITDATE)
) AS B ON A.HITDATE = B.HITDATE

http://sqlfiddle.com/#!6/7536a/1

答案 2 :(得分:2)

这是我的解决方案:

CREATE TABLE Test(
DT datetime NOT NULL,
IP varchar(15)
)

INSERT INTO TEST
SELECT '2013-07-10', 'aaa' UNION ALL
SELECT '2013-07-10', 'bbb' UNION ALL
SELECT '2013-07-10', 'ccc' UNION ALL
SELECT '2013-07-09', 'aaa' UNION ALL
SELECT '2013-07-09', 'bbb' UNION ALL
SELECT '2013-07-08', 'aaa' UNION ALL
SELECT '2013-07-08', 'bbb'


SELECT CAST(T1.DT AS DATE) Date, COUNT(T1.IP) 
FROM Test T1
WHERE T1.IP NOT IN (
    SELECT DISTINCT T2.IP FROM Test T2 WHERE CAST(T2.DT AS DATE) < T1.DT
)
GROUP BY CAST(T1.DT AS DATE)            

正确结果为:

Date:       Unique IPs
2013-07-08  2
2013-07-10  1

答案 3 :(得分:2)

最简单的方法(在我看来)是找到IP地址出现的最早日期,然后将其用于聚合:

select cast(minHitDate as Date), count(*) as FirstTimeVisitors
from (select IpAddress, min(HitDate) as minHitDate
      from MyTable t
      group by IpAddress
     ) i
group by cast(minHitDate as Date)
order by 1;

另一种形式,可让您计算第一次访问者,第二次访问者等,使用dense_rank()

select cast(HitDate as Date),
       count(distinct IpAddress) as NumVisitors,
       sum(case when nth = 1 then 1 else 0 end) as FirstTime,
       sum(case when nth = 2 then 1 else 0 end) as SecondTime,
       sum(case when nth = 3 then 1 else 0 end) as ThirdTime
from (select IpAddress,
             dense_rank() over (partition by IpAddress
                                order by cast(HitDate as date)
                               ) as nth
      from MyTable t
     ) i
group by cast(HitDate as Date)
order by 1;

答案 4 :(得分:1)

[编辑 - 改为使用DateTimeOffset]

我想出了以下内容,首先创建基础MyTable:

-- Test data creation to match schema & examples

create TABLE MyTable (
   HitDate DateTimeOffset  NOT NULL,
   IpAddress varchar(15)
)

insert into MyTable values ('7/10/2013 8:05:29 -07:00', '111.222.333.444')
insert into MyTable values ('7/10/2013 12:05:29 -07:00', '111.222.333.222')
insert into MyTable values ('7/9/2013 9:05:29 -07:00', '111.222.333.444')
insert into MyTable values ('7/9/2013 10:05:29 -07:00', '111.222.333.555')
insert into MyTable values ('7/8/2013 11:05:29 -07:00', '111.222.333.222')
insert into MyTable values ('7/8/2013 4:05:29 -07:00', '111.222.333.555')

-- actual solution starts here

create TABLE #MyTable (
   HitDate date,
   IpAddress varchar(15)
)

-- populate data into required format for main query

insert into #MyTable SELECT HitDate, IpAddress FROM MyTable

-- main query

select distinct convert(varchar(10), HitDate, 101) HitDate, ISNULL(dateCounts.counter, 0) NewIPAddresses
from #MyTable mainDates
left outer join 
    (
        select main.HitDate dateValue, count(*) counter from
            #MyTable main
            left outer join #MyTable sub on main.IpAddress = sub.IpAddress and main.HitDate > sub.HitDate
            where sub.IpAddress is null
            group by main.HitDate
    ) dateCounts on dateCounts.dateValue = HitDate

给出:

    HitDate        NewIPAddresses 
    -------------- --------------
    07/08/2013     2
    07/09/2013     1
    07/10/2013     0

答案 5 :(得分:1)

试试这个

SELECT 
  CONVERT(DATE,T1.[HitDate]) AS [HitDate]
  ,SUM(CASE WHEN T2.[IpAddress] IS NULL THEN 1 ELSE 0 END) AS IpAddress
FROM 
    YourTableName T1 
LEFT JOIN
    YourTableName T2 ON T1.IpAddress = T2.IpAddress
AND 
    T1.[HitDate] > T2.[HitDate]
GROUP BY CONVERT(DATE,T1.[HitDate])

SQL FIDDLE DEMO

data.stackexchange demo

<强>输出

HitDate    IpAddress
2013-07-08  2
2013-07-09  1
2013-07-10  0

答案 6 :(得分:0)

这看起来也不错,但有点复杂......

SELECT DISTINCT(IPAddress)
FROM [MyTable] 
WHERE CONVERT(DATE , HITDATE) <= '2013-07-09'
GROUP BY IPAddress
HAVING COUNT(*) = 1 AND MAX(CONVERT(DATE , HITDATE)) = '2013-07-09';

答案 7 :(得分:0)

我在T-SQL中使用了一个游标来实现所需的结果。

以下代码:

INSERT INTO MyTable(HitDate, IpAddress) values ('7/10/2013 8:05:29 -07:00', '111.222.333.444')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/10/2013 12:05:29 -07:00',     '111.222.333.222')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/9/2013 9:05:29 -07:00', '111.222.333.444')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/9/2013 10:05:29 -07:00', '111.222.333.555')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/8/2013 11:05:29 -07:00', '111.222.333.222')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/8/2013 4:05:29 -07:00', '111.222.333.555')


DECLARE @HitDate DATE

DECLARE cCursor CURSOR FAST_FORWARD LOCAL
FOR
SELECT 
    DISTINCT CAST(HitDate AS DATE)
FROM 
    MyTable
ORDER BY
    CAST(HitDate AS DATE) ASC

OPEN cCursor

FETCH NEXT FROM cCursor INTO @HitDate
WHILE (@@FETCH_STATUS = 0)
BEGIN

    SELECT @HitDate, COUNT(IpAddress)
    FROM MyTable
    WHERE IpAddress NOT IN (SELECT IpAddress FROM MyTable WHERE HitDate < @HitDate) AND     CAST(HitDate AS DATE) = @HitDate

    FETCH NEXT FROM cCursor INTO @HitDate
END

CLOSE cCursor
DEALLOCATE cCursor

输出如下:

2013-07-08 2
2013-07-09 1
2013-07-10 0

答案 8 :(得分:0)

试试这个:

    SELECT CONVERT(DATE, mt.HitDate) AS hitDate
           , COUNT(firstDate.ipAddress) AS newIPAddresses
      FROM MyTable mt
 LEFT JOIN (SELECT IpAddress, MIN(HitDate) AS FirstHitDate
              FROM MyTable
          GROUP BY IpAddress) firstDate
          ON mt.HitDate = firstDate.FirstHitDate
          AND mt.IpAddress = firstDate.IpAddress
  GROUP BY CONVERT(DATE, mt.HitDate)
  ORDER BY 1

输出:

hitDate     newIPAddress
2013-07-08  2
2013-07-09  1
2013-07-10  0