T-SQL返回具有最匹配列的结果

时间:2016-01-28 18:18:35

标签: sql-server tsql

我希望有人能指出我正确的方向。我有两个 任意表:

    TableA                                  TableB  
    ----------                              ----------  
    (cola-int)  (colb-int)  (colc-char)     (cola-int)  (colb-int)  (colc-char)  
    1           10          Data1           1           NULL        Match1  
    2           20          Data2           2           NULL        Match2  
    1           10          Data3           2           30          Match3  
    2           30          Data4  
    2           30          Data5  

查询:

    select a.cola, a.colb, a.colc, b.cola, b.colb, b.colc  
    from TableA a  
    inner join TableB b on b.cola = a.cola  
    where a.colb = b.colb or b.colb is null  

产生以下结果:

    cola    colb    colc    cola    colb    colc  
    1       10      Data1   1       NULL    Match1  
    1       10      Data3   1       NULL    Match1  
    2       20      Data2   2       NULL    Match2  
    2       30      Data4   2       NULL    Match2  
    2       30      Data5   2       NULL    Match2  
    2       30      Data4   2       30      Match3  
    2       30      Data5   2       30      Match3  

我怎样才能返回Data3和Data5的匹配3和
不是Data4和Data5的Match2,因为它们不匹配
在colb?我希望我的结果集看起来像这样:

    cola    colb    colc    cola    colb    colc  
    1       10      Data1   1       NULL    Match1  
    1       10      Data3   1       NULL    Match1  
    2       20      Data2   2       NULL    Match2  
    2       30      Data4   2       30      Match3  
    2       30      Data5   2       30      Match3  

我尝试使用外连接无效并尝试使用独占或
在具有以下查询的位置:

    select a.cola, a.colb, a.colc, b.cola, b.colb, b.colc  
    from TableA a  
    left outer join TableB b on b.cola = a.cola  
    where (case when (a.colb = b.colb) then 1 else 0 end) +  
        (case when (b.colb is null) then 1 else 0 end) = 1  

产生以下结果:

    cola    colb    colc    cola    colb    colc  
    1       10      Data1   1       NULL    Match1  
    2       20      Data2   2       NULL    Match2  
    1       10      Data3   1       NULL    Match1  
    2       30      Data4   2       NULL    Match2  
    2       30      Data4   2       30      Match3  
    2       30      Data5   2       NULL    Match2  
    2       30      Data5   2       30      Match3  

仍然为Data4和Data5提供Match2。

谢谢!

3 个答案:

答案 0 :(得分:0)

在查看案例陈述版本(我认为在上面的评论中回答我的问题)之后对此进行了尝试。

您可以使用窗口函数或相关子查询来解决此问题:

窗口功能(比较a.cola的b.colb与max(col.b),看看我们是否要“保留”或“删除”记录。 )

SELECT
    acola,acolb,acolc,bcola,bcolb,bcolc
FROM
    (
        SELECT 
            a.cola as acola,
            a.colb as acolb,
            a.colc as acolc,
            b.cola as bcola,
            b.colb as bcolb,
            b.colc as bcolc,
            CASE 
                WHEN 
                    b.colb = max(b.colb) OVER (PARTITION BY a.colc) OR
                    b.colb IS NULL AND count(*) OVER (PARTITION BY a.colc) = 1
                THEN 'KEEP' 
                ELSE 'DROP' 
                END AS KeepOrDrop
        FROM TableA a
        INNER JOIN TableB b ON b.cola = a.cola

    ) as c
WHERE c.KeepOrDrop = 'Keep'

相关子查询(我们在WHERE子句中的相关子查询中找到max b.colb)

SELECT 
    a.cola as acola,
    a.colb as acolb,
    a.colc as acolc,
    b.cola as bcola,
    b.colb as bcolb,
    b.colc as bcolc,

FROM TableA a
INNER JOIN TableB b ON b.cola = a.cola
WHERE coalesce(b.colb,0) IN (SELECT max(coalesce(colb,0)) FROM TableB WHERE cola = a.cola)

答案 1 :(得分:0)

我想出了这个:

/* Test data. */
DECLARE @TA TABLE (cola int, colb int, colc char(10))
DECLARE @TB TABLE (cola int, colb int, colc char(10))

INSERT INTO @TA VALUES (1, 10, 'Data1')
INSERT INTO @TA VALUES (2, 20, 'Data2')
INSERT INTO @TA VALUES (1, 10, 'Data3')
INSERT INTO @TA VALUES (2, 30, 'Data4')
INSERT INTO @TA VALUES (2, 30, 'Data5')

INSERT INTO @TB VALUES (1, NULL, 'Match1')
INSERT INTO @TB VALUES (2, NULL, 'Match2')
INSERT INTO @TB VALUES (2, 30, 'Match3')
/* End Test data. */

/* First get all that match on both fields [cola] and [colb]. */
; WITH BothMatch As (
SELECT
    A.cola As acola
    , A.colb As acolb
    , A.colc As acolc
    , B.cola As bcola
    , B.colb As bcolb
    , B.colc As bcolc
FROM
    @TA As A
    INNER JOIN @TB As B
        ON A.cola = B.cola
        AND A.colb = B.colb
)
SELECT
    *
FROM
    BothMatch

UNION

/* Then match all by [cola] that do not already exist in BothMatch. */
SELECT
*
FROM 
    @TA As A
    INNER JOIN @TB As B
        ON A.cola = B.cola
        AND B.colb IS NULL
WHERE
    NOT EXISTS 
    (
        SELECT 
            BothMatch.acola 
        FROM 
            BothMatch 
        WHERE 
            BothMatch.acola = A.cola 
            AND BothMatch.acolb = A.colb
    )
ORDER BY
    acola
    , acolb

结果:

acola   acolb   acolc   bcola   bcolb   bcolc
1       10      Data1       1   NULL    Match1    
1       10      Data3       1   NULL    Match1    
2       20      Data2       2   NULL    Match2    
2       30      Data4       2   30      Match3    
2       30      Data5       2   30      Match3    

答案 2 :(得分:0)

这比Uporabnik003's answer更短的方式来实现这一目标:

/* Test data - code by Uporabnik003 */
declare @ta table (cola int, colb int, colc char(10))
declare @tb table (cola int, colb int, colc char(10))

insert into @ta values (1, 10, 'Data1')
insert into @ta values (2, 20, 'Data2')
insert into @ta values (1, 10, 'Data3')
insert into @ta values (2, 30, 'Data4')
insert into @ta values (2, 30, 'Data5')

insert into @tb values (1, NULL, 'Match1')
insert into @tb values (2, NULL, 'Match2')
insert into @tb values (2, 30, 'Match3')
/* End Test data. */

;with cteAllResults as (
    select      a_cola  =a.cola
                ,a_colb =a.colb
                ,a_colc =a.colc
                ,b_cola =b.cola
                ,b_colb =b.colb
                ,b_colc =b.colc
                -- The "best" row of each joining group will get RowNo=1
                ,RowNo  =row_number() over  (   partition by a.cola,a.colb,a.colc 
                                                -- Depending on your real data, you may want 
                                                -- to play around with the order by clause 
                                                -- to identify the "best" row differently.
                                                order by b.colb desc
                                            )
    from        @ta     a  
    inner join  @tb     b   on a.cola   = b.cola  
                            -- This additional join clause is equivalent to your
                            -- `where a.colb = b.colb or b.colb is null`
                            and a.colb  = isnull(b.colb,a.colb) 
    )
select      *
from        cteAllResults
where       RowNo=1

它产生与问题所需输出完全相同的结果。

但是,更喜欢"更短"代码更高效的代码。对于初学者来说,行数受到关于空格和代码布局的注释和(半)任意决定的影响很大。

更重要的是,Uporabnik003的解决方案在大型数据集上的表现可能比这更好。这个解决方案依赖于首先生成一个太大的行集,然后将其削减;他们的解决方案从两个较小的集合中构建正确的行集。

如果我在表格上有适当的主键,我会使用Uporabnik003的解决方案,但用where not exists替换left join子句,测试加入失败({{1 }} 在右手侧)。