合并来自2个数据集的数据而没有匹配值(SQL)

时间:2018-09-27 22:50:38

标签: sql sql-server

我正在创建一个棒球统计主管的文件,但是在将不同的统计主管合并到一行时遇到了麻烦。只想要每个类别中的前5名,但是例如,如果多个玩家具有相同的点击数,那么返回的行数可能会超过5(如第二张图片所示)

我尝试使用ROW_NUMBER并选择前5名,但是不包括人数超过1个人但仍在前5名中的那些实例。

我当前的数据集设置如下:

create table #Tmp 
(PlayerName varchar(10)
,AtBats int
,Hits int
)
INSERT INTO #Tmp
select 'p1', 30, null
UNION ALL SELECT 'p3', 27, null
UNION ALL SELECT 'p2', 22, null
UNION ALL SELECT 'p9', 15, null
UNION ALL SELECT 'p7', 10, null
UNION ALL SELECT 'p2', null, 15
UNION ALL SELECT 'p9', null, 12
UNION ALL SELECT 'p11', null, 9
UNION ALL SELECT 'p3', null, 8
UNION ALL SELECT 'p7', null, 5    
UNION ALL SELECT 'p1', null, 5

enter image description here

我可以通过选择where AtBats is not nullwhere Hits is not null来创建2个数据集,但是没有什么可匹配的,以便将行合并为所需的结果集:

SELECT 'p1' as PlayerName_AB, 30 as AtBats, 'p2' as PlayerName_H, 15 as Hits
UNION ALL SELECT 'p3', 27, 'p1', 12
UNION ALL SELECT 'p2', 22, 'p11', 9
UNION ALL SELECT 'p9', 15, 'p3', 8
UNION ALL SELECT 'p7', 10, 'p7', 5
UNION ALL SELECT NULL, NULL, 'p1', 5

enter image description here   我认为,如果我可以用2个数据集来完成此任务,那么添加第三个,第4个等等就不会太难了。我错了...下面的答案适用于2个数据集,但不适用于3个数据集。第3个数据集之间存在差距。

create table #Tmp 
(PlayerName varchar(10)
,AtBats int
,Hits int
,RunsBattedIn int
)
INSERT INTO #Tmp
select 'p1', 30, null, NULL
UNION ALL SELECT 'p3', 27, null, null
UNION ALL SELECT 'p2', 22, null, null
UNION ALL SELECT 'p9', 15, null, null
UNION ALL SELECT 'p7', 10, null, null
UNION ALL SELECT 'p2', null, 15, NULL
UNION ALL SELECT 'p9', null, 12, NULL
UNION ALL SELECT 'p11', null, 9, NULL
UNION ALL SELECT 'p3', null, 8, NULL
UNION ALL SELECT 'p7', null, 5, NULL    
UNION ALL SELECT 'p1', null, 5, NULL
UNION ALL SELECT 'p2', null, NULL, 10
UNION ALL SELECT 'p9', null, NULL, 9
UNION ALL SELECT 'p11', null, NULL, 8
UNION ALL SELECT 'p3', null, NULL, 7
UNION ALL SELECT 'p7', null, NULL, 6
UNION ALL SELECT 'p5', null, NULL, 6
UNION ALL SELECT 'p14', null, NULL, 6

编辑2:

declare @Tmp table
(PlayerName varchar(10)
,AtBats int
,Hits int
,RunsBattedIn int
)
INSERT INTO @Tmp
select 'p1', 30, null, NULL
UNION ALL SELECT 'p3', 27, null, null
UNION ALL SELECT 'p2', 22, null, null
UNION ALL SELECT 'p9', 15, null, null
UNION ALL SELECT 'p7', 10, null, null
UNION ALL SELECT 'p2', null, 15, NULL
UNION ALL SELECT 'p9', null, 12, NULL
UNION ALL SELECT 'p11', null, 9, NULL
UNION ALL SELECT 'p3', null, 8, NULL
UNION ALL SELECT 'p7', null, 5, NULL    
UNION ALL SELECT 'p1', null, 5, NULL
UNION ALL SELECT 'p2', null, NULL, 10
UNION ALL SELECT 'p9', null, NULL, 9
UNION ALL SELECT 'p11', null, NULL, 8
UNION ALL SELECT 'p3', null, NULL, 7
UNION ALL SELECT 'p7', null, NULL, 6
UNION ALL SELECT 'p5', null, NULL, 6
UNION ALL SELECT 'p14', null, NULL, 6



;with PlayerStats as (
    -- Roll the rows into one per player
    select PlayerName, max(atbats) as AtBats, max(hits) as Hits, max(RunsBattedIn) as RBI 
    from @Tmp
    group by PlayerName
),
Ranks as (
    -- Rank them for each stat
    select *,
        row_number() over(order by AtBats desc) as AtBatRank,
        row_number() over(order by Hits desc) as HitRank,
        row_number() over(order by RBI desc) as RBIRank
    from PlayerStats
),
RankRow as (
    -- Add a row number per rank to handle ties
    select *,
        row_number() over(partition by AtBatRank order by PlayerName) as AtBatRow,
        row_number() over(partition by HitRank order by PlayerName) as HitRow,
        row_number() over(partition by RBIRank order by PlayerName) as RBIRow
    from Ranks
),--select * from rankrow,
Top5 as (
    -- Get the full list of ranks and rows so we can line them all up
    select AtBatRank r, AtBatRow n, AtBats as ab from RankRow
    union
    select HitRank r, HitRow n, hits as h from RankRow
    union
    select RBIRank r, RBIRow n, rbi from RankRow
)
-- Final query, lining up ranks and row numbers
select distinct r,n,
case when atbat.atbats is not null then AtBat.PlayerName else null end as AtBat, AtBat.atbats,
case when hit.hits is not null then Hit.PlayerName else null end as Hit, hit.hits, 
case when rbi.rbi is not null then RBI.PlayerName else null end as RBI, rbi.rbi as rbii
from top5
left join RankRow as AtBat on AtBat.AtBatRank=r and AtBat.AtBatRow=n
left join RankRow as Hit on Hit.HitRank=r and Hit.HitRow=n
left join RankRow as RBI on RBI.RBIRank=r and RBI.RBIRow=n
--where r<=5
order by r,n    

2 个答案:

答案 0 :(得分:2)

您可以尝试在行号上使用FULL OUTER JOIN,该行号从 ROW_NUMBER窗口功能。

SELECT 
    t2.PlayerName,
    t2.AtBats,
    t1.PlayerName,
    t1.Hits 
FROM 
(
    SELECT *,ROW_NUMBER() OVER(ORDER BY Hits desc) rn
    FROM #Tmp
    WHERE  AtBats IS NULL
) t1 FULL OUTER JOIN 
(
    SELECT *,ROW_NUMBER() OVER(ORDER BY AtBats desc) rn
    FROM #Tmp
    WHERE  Hits IS NULL
) t2 on t1.rn = t2.rn

sqlfiddle

答案 1 :(得分:1)

最大的问题是如何处理领带。在我的回答中,我使用Rank()对球员进行排名,然后使用row_number()对他们进行排名,然后将他们全部排列在一起,以便正确显示平局,并且我们总是至少获得前5名。

请注意,我没有使用density_rank,因此对于一个或多个统计数据,您可能看不到特定排名的任何人。

这可以扩展为您需要的许多统计信息。

设置示例数据:

declare @Tmp table
(PlayerName varchar(10)
,AtBats int
,Hits int
,RunsBattedIn int
)
INSERT INTO @Tmp
select 'p1', 30, null, NULL
UNION ALL SELECT 'p3', 27, null, null
UNION ALL SELECT 'p2', 22, null, null
UNION ALL SELECT 'p9', 15, null, null
UNION ALL SELECT 'p7', 10, null, null
UNION ALL SELECT 'p2', null, 15, NULL
UNION ALL SELECT 'p9', null, 12, NULL
UNION ALL SELECT 'p11', null, 9, NULL
UNION ALL SELECT 'p3', null, 8, NULL
UNION ALL SELECT 'p7', null, 5, NULL    
UNION ALL SELECT 'p1', null, 5, NULL
UNION ALL SELECT 'p2', null, NULL, 10
UNION ALL SELECT 'p9', null, NULL, 9
UNION ALL SELECT 'p11', null, NULL, 8
UNION ALL SELECT 'p3', null, NULL, 7
UNION ALL SELECT 'p7', null, NULL, 6
UNION ALL SELECT 'p5', null, NULL, 6
UNION ALL SELECT 'p14', null, NULL, 6

这是查询

;with PlayerStats as (
    -- Roll the rows into one per player
    select PlayerName, max(AtBats) as AtBats, max(Hits) as Hits, max(RunsBattedIn) as RBI 
    from @Tmp
    group by PlayerName
),
Ranks as (
    -- Rank them for each stat
    select *,
        rank() over(order by AtBats desc) as AtBatRank,
        rank() over(order by Hits desc) as HitRank,
        rank() over(order by RBI desc) as RBIRank
    from PlayerStats
),
RankRow as (
    -- Add a row number per rank to handle ties
    select *,
        row_number() over(partition by AtBatRank order by PlayerName) as AtBatRow,
        row_number() over(partition by HitRank order by PlayerName) as HitRow,
        row_number() over(partition by RBIRank order by PlayerName) as RBIRow
    from Ranks
),
Top5 as (
    -- Get the full list of ranks and rows so we can line them all up
    select AtBatRank r, AtBatRow n from RankRow
    union
    select HitRank r, HitRow n from RankRow
    union
    select RBIRank r, RBIRow n from RankRow
)
-- Final query, lining up ranks and row numbers
select r, AtBat.PlayerName as AtBat, Hit.PlayerName as Hit, RBI.PlayerName as RBI
from top5
left join RankRow as AtBat on AtBat.AtBatRank=r and AtBat.AtBatRow=n
left join RankRow as Hit on Hit.HitRank=r and Hit.HitRow=n
left join RankRow as RBI on RBI.RBIRank=r and RBI.RBIRow=n
where r<=5
order by r,n    

这是结果:

r   AtBat   Hit     RBI
1   p1      p2      p2
2   p3      p9      p9
3   p2      p11     p11
4   p9      p3      p3
5   p7      p1      p14
5   NULL    p7      p5
5   NULL    NULL    p7