SQL - 在列中查找表中

时间:2017-04-26 09:47:51

标签: sql sql-server

我有一个包含两个表的数据库; [客户] [交易] 。我有一个外键分配给 [Transactions] 表中与 [Clients] 表相关的一个或多个记录。

[Transactions] 表格中,我有一个名为'URL'的字段,其中填充了网址(就像它在锡上所说的那样)。我想浏览 [Transactions] 表格中的所有记录,找出与'网址'字段中最常见的值,以便与 [客户] 表。

一旦我拥有最常见的值,我想将它们插入 [Clients] 表中,在一个名为'URL'的字段中(就像在< strong> [Transactions] table),针对关联的客户记录。

我确信我能解决大部分问题,我唯一的问题是找到许多不同小组的最常见值。任何帮助表示赞赏!

示例数据:

[Clients]

ID          Name               URL
-----------------------------------
999999999   Testing Client 1   NULL
999999998   Testing Client 2   NULL
999999997   Testing Client 3   NULL
999999996   Testing Client 4   NULL
999999995   Testing Client 5   NULL


[Transactions]

ID      ClientID    URL
-----------------------------------------
73611   999999999   http://www.google.com
73612   999999999   http://www.yahoo.com
73613   999999999   http://www.google.com
73626   999999998   http://www.stackoverflow.com
73627   999999998   http://www.stackoverflow.com
73628   999999998   http://www.slack.com
73629   999999997   http://www.dotnetpearls.com
73630   999999997   http://www.c-sharpcorner.com
73631   999999996   http://www.roastmymealdeal.co.uk
73632   999999996   http://www.roastmymealdeal.co.uk
73633   999999996   http://www.roastmymealdeal.co.uk
73634   999999996   NULL
73635   999999995   NULL
73636   999999995   http://www.w3schools.com
73637   999999995   http://www.w3schools.com

5 个答案:

答案 0 :(得分:0)

这将使用最常见的URL更新Clients表:

update Clients
set Clients.URL = x2.URL
from(
    --This takes the inner query and sorts the rows according to the URLCount (descending), assigning a rank (using the row_number() function).
    --The highest URLCount will be given a URLRank of 1.  The URLRank resets for each client (partition by clientID).
    select ClientID, URL, row_number() over (partition by clientID order by URLCount desc) URLRank
    from(
        --This groups the clients, giving one row for each client/URL combo, along with how many times that combo occurs.
        select t.ClientID, t.URL, Count(1) URLCount
        from Transactions t
        group by t.ClientID, t.Url
        ) x
) x2
where x2.URLRank = 1  --Set the URL to the highest ranking URL
and Clients.ID = x2.ClientID

如果您只想查看每个客户端最常用的URL,请使用以下命令:

select *
from(
    select ClientID, URL, row_number() over (partition by clientID order by URLCount desc) URLRank
    from(
        select t.ClientID, t.URL, Count(1) URLCount
        from Transactions t
        group by t.ClientID, t.Url
        ) x
) x2
where x2.URLRank = 1

答案 1 :(得分:0)

您可以像这样使用2 CTE

;WITH temp as 
(
    SELECT cl.ID as ClientID, t.URL, COUNT(t.ID) as NumberTransactions  
    FROM  Clients cl
    LEFT JOIN [Transactions] t on cl.ID = t.ClientID
    GROUP BY cl.ID, t.URL
), 
temp1 as
(
    SELECT *, 
     ROW_NUMBER() OVER(PARTITION BY t.ClientID ORDER BY t.NumberTransactions desc) as Rn
    FROM temp t
)

SELECT t.ClientID, t.URL
FROM temp1 t
WHERE t.Rn = 1

答案 2 :(得分:0)

鉴于此数据:

IF OBJECT_ID('tempdb..#client') IS NOT NULL DROP TABLE #client
IF OBJECT_ID('tempdb..#transactions') IS NOT NULL DROP TABLE #transactions

CREATE TABLE #client (id INT, name VARCHAR(100), url VARCHAR(100));
CREATE TABLE #transactions (id INT IDENTITY(1, 1), clientID INT, url VARCHAR(100));

INSERT #client
        ( id, name, url )
VALUES  ( 9, 'a', null ),
        ( 8, 'b', null ),
        ( 7, 'c', null ),
        ( 6, 'd', null ),
        ( 5, 'e', null );

INSERT #transactions
        ( clientID, url )
VALUES  (9,   'http://www.google.com'                ),
        (9,   'http://www.yahoo.com'                 ),
        (9,   'http://www.google.com'                ),
        (8,   'http://www.stackoverflow.com'         ),
        (8,   'http://www.stackoverflow.com'         ),
        (8,   'http://www.slack.com'                 ),
        (7,   'http://www.dotnetpearls.com'          ),
        (7,   'http://www.c-sharpcorner.com'         ),
        (6,   'http://www.roastmymealdeal.co.uk'     ),
        (6,   'http://www.roastmymealdeal.co.uk'     ),
        (6,   'http://www.roastmymealdeal.co.uk'     ),
        (6,   NULL                                   ),
        (5,   NULL                                   ),
        (5,   'http://www.w3schools.com'             ),
        (5,   'http://www.w3schools.com'             );

此查询将获取每个clientID的每个网址的计数。但是,我们确实每次交易获得一行

SELECT 
         urlCount = COUNT(*) OVER (PARTITION BY clientID, url)
       , transactionURL = t.URL
       , t.clientID
FROM #transactions t

从该查询中我们只需要最高urlCount,我们可以通过在urlCount DESC上订购并获得TOP 1.我们确实想要获得每个客户端这个最重要的网址

我们可以通过使用交叉应用来执行此操作,以便为每个客户端运行此计数和前1。交叉应用将在客户端表中的每一行运行一次内部查询。在client.ID上过滤内部查询以获取该客户端的urlCount。

内部查询按urlCount DESC排序,以获得最高计数的URL(每个客户端)。作为一个打破平局,它也在transactions.ID上排序 - 这样做是为了每次运行时产生相同的结果。

  SELECT c.url, transURLs.transactionURL, c.id
    FROM #client c
    CROSS APPLY (
        SELECT TOP 1 
                 urlCount = COUNT(*) OVER (PARTITION BY clientID, url)
               , transactionURL = t.URL
        FROM #transactions t
        WHERE t.clientID = c.ID
        ORDER BY urlCount DESC, t.id
    ) transURLs

要完成它,我们只需更新

WITH baseData AS (
    SELECT c.url, transURLs.transactionURL, c.id
    FROM #client c
    CROSS APPLY (
        SELECT TOP 1 
                 urlCount = COUNT(*) OVER (PARTITION BY clientID, url)
               , transactionURL = t.URL
        FROM #transactions t
        WHERE t.clientID = c.ID
        ORDER BY urlCount DESC, t.id
    ) transURLs
)
UPDATE baseData 
SET url = transactionURL;

SELECT * FROM #client

答案 3 :(得分:0)

with ctetbl (clientid,url,cnt,rowid) As 
(
Select t.clientid, t.url,t.cnt,
ROW_NUMBER () over (partition by clientid order by t.cnt desc)as RowId 
from (select  clientid,url, COUNT(1)as cnt from transactions group by clientid,url)t 
)
Update c
set url=ct.url
from clients c
inner join 
ctetbl ct on c.id=ct.clientid where rowid=1

答案 4 :(得分:0)

/*************Script to recreate the scenario *************/

Create table [Clients]
(
    Id bigint PRIMARY KEY,
    Name nvarchar(100),
    URL nvarchar(1000)
)

Insert into Clients VALUES
(999999999,'Testing Client 1',NULL),
(999999998,'Testing Client 2',NULL),
(999999997,'Testing Client 3',NULL),
(999999996,'Testing Client 4',NULL),
(999999995,'Testing Client 5',NULL)

Create table Transactions
(
    ID bigint,
    ClientID bigint FOREIGN KEY REFERENCES Clients(ID),
    URL nvarchar(1000)
)
Insert into Transactions VALUES
(73611,   999999999,'http://www.google.com'),
(73612,   999999999,'http://www.yahoo.com'),
(73613,   999999999,'http://www.google.com'),
(73626,   999999998,'http://www.stackoverflow.com'),
(73627,   999999998,'http://www.stackoverflow.com'),
(73628,   999999998,'http://www.slack.com'),
(73629,   999999997,'http://www.dotnetpearls.com'),
(73630,   999999997,'http://www.c-sharpcorner.com'),
(73631,   999999996,'http://www.roastmymealdeal.co.uk'),
(73632,   999999996,'http://www.roastmymealdeal.co.uk'),
(73633,   999999996,'http://www.roastmymealdeal.co.uk'),
(73634,   999999996,NULL),
(73635,   999999995,NULL),
(73636,   999999995,'http://www.w3schools.com'),
(73637,   999999995,'http://www.w3schools.com')

/***************List the tables *****************/

Select * from dbo.Clients
Select * from dbo.Transactions
/***************************************************************************************
cte_grp1 -- 
    Using window functions find the ClientID and URL and number of times it is found in Transaction
cte_grp2
    In second step, just find out the ones with maximum count and rank them accordingly
    Store the result into another table
**************************************************************************************/
;with cte_grp1
as
(
    SELECT
        ClientID,
        URL,
        Count(URL) as CountOfURL
    FROM Transactions
    WHERE URL IS NOT NULL
    GROUP BY ClientID,URL
--  Order by ClientID ASC,CountOfURL DESC
),
cte_grp2
as
(
    SELECT  y.ClientID,
            y.URL,
            x.MaxCount  
    FROM cte_grp1 y 
    INNER JOIN 
    (Select ClientID,URL,Max(CountOfURL) as MaxCount
    FROM cte_grp1
    Group by ClientID,URL ) x 
    ON x.ClientId=y.ClientID and x.URL=y.URL
    --Order by y.ClientID,x.MaxCount DESC
)
Select ClientID,URL,MaxCount,
       DENSE_RANK() OVER (PARTITION BY ClientId ORDER BY MaxCount DESC) as Rnk
INTO #Temp_Resultant
from cte_grp2

/*******************************************************************
  Using this temp table we will be using FOR XML clause for those links
  where a Client has visited the link one time each as they both quality 
  to be updated in the Clients table

  At last.. Update based on ClientID
************************************************************************/

;with resultant
as
(
Select distinct t2.ClientID,STUFF((SELECT ','+ t1.URL 
              FROM #Temp_Resultant t1 
              WHERE Rnk=1
              AND t1.ClientID=t2.ClientID
FOR XML PATH('')),1,1,'') as CommonURL
From #Temp_Resultant t2
)
Update A
SET A.URL=B.CommonURL
FROM Clients A INNER JOIN resultant B
ON A.Id=B.ClientID

---Check the results
Select * from Clients
Select * from Transactions