sql不同的列比较

时间:2014-12-25 01:58:04

标签: sql algorithm

我有一个包含六列(column1,column2 ... column6)的数据库表。数据是有序的,没有重复。这些是数据库表中的数据

col1    col2    col3    col4    col5    col6
--------------------------------------------
1       3       4       6       7       8
2       5       7       9       10      14

我想编写sql来比较数据,如果我有一个不同数字在变化/移位位置的数据。这些是sql select语句中的参数。

col1    col2    col3    col4    col5    col6
--------------------------------------------
2       3       4       6       7       8
3       4       6       7       8       9
1       2       4       6       7       8

我想查询1,3,4,6,7,8行

更复杂......两个不同的数字和三个不同的数字

圣诞快乐!!

以下sql来自我的朋友。

 data in table: Col1=10, Col2=11, Col3=12, Col4=13, Col5=26, Col6=28
  parameters: 10,11,12,18,26,28


 select * from 
 ( select id,Col1,Col2, Col3,Col4, Col5,Col6,
         ( (case when Col1=10 then 1 else 0 end) 
          +(case when Col2=10 then 1 else 0 end) 
          +(case when Col3=10 then 1 else 0 end) 
          +(case when Col4=10 then 1 else 0 end) 
          +(case when Col5=10 then 1 else 0 end) 
          +(case when Col6=10 then 1 else 0 end)
         )
       +(  (case when Col1=11 then 1 else 0 end)  
          +(case when Col2=11 then 1 else 0 end) 
          +(case when Col3=11 then 1 else 0 end) 
          +(case when Col4=11 then 1 else 0 end) 
          +(case when Col5=11 then 1 else 0 end) 
          +(case when Col6=11 then 1 else 0 end)
        )
       +(  (case when Col1=12 then 1 else 0 end)  
          +(case when Col2=12 then 1 else 0 end) 
          +(case when Col3=12 then 1 else 0 end) 
          +(case when Col4=12 then 1 else 0 end) 
          +(case when Col5=12 then 1 else 0 end) 
          +(case when Col6=12 then 1 else 0 end)
        )
       +(  (case when Col1=18 then 1 else 0 end)  
          +(case when Col2=18 then 1 else 0 end) 
          +(case when Col3=18 then 1 else 0 end) 
          +(case when Col4=18 then 1 else 0 end) 
          +(case when Col5=18 then 1 else 0 end) 
          +(case when Col6=18 then 1 else 0 end)
        )
       +(  (case when Col1=26 then 1 else 0 end)  
          +(case when Col2=26 then 1 else 0 end) 
          +(case when Col3=26 then 1 else 0 end) 
          +(case when Col4=26 then 1 else 0 end) 
          +(case when Col5=26 then 1 else 0 end) 
          +(case when Col6=26 then 1 else 0 end)
        )
       +(  (case when Col1=28 then 1 else 0 end)  
          +(case when Col2=28 then 1 else 0 end) 
          +(case when Col3=28 then 1 else 0 end) 
          +(case when Col4=28 then 1 else 0 end) 
          +(case when Col5=28 then 1 else 0 end) 
          +(case when Col6=28 then 1 else 0 end)
        ) as sub
       from [DBName1].[dbo].[nList] ) aa
       wheCole aa.sub>=5

请提供更多评论和新答案! 谢谢你的回复!

1 个答案:

答案 0 :(得分:0)

我的解决方案版本(适用于SQL Server 2005及更高版本):

-- PREPARATIONS
create table #tbl1 (col1 int, col2 int, col3 int, col4 int, col5 int, col6 int);
create table #tbl2 (col1 int, col2 int, col3 int, col4 int, col5 int, col6 int);

insert into #tbl1 (col1, col2, col3, col4, col5, col6) values
(1,       3,       4,       6,       7,       8),
(2,       5,       7,       9,       10,      14);

insert into #tbl2 (col1, col2, col3, col4, col5, col6) values
(2,       3,       4,       6,       7,       8),
(3,       4,       6,       7,       8,       9),
(1,       2,       4,       6,       7,       8);
go

create function [dbo].[CompareDelimitedStrings] (@value1 varchar(max), @value2 varchar(max), @separator char(1))
returns int
as
begin
    declare @result int = 0;

    with r1 as
    (
        select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(@value1 as varchar(max))) [value]) as j
        union all
        select right(value, len(value)-case charindex(@separator, value) when 0 then len(value) else charindex(@separator, value) end) [value]
        , left(r.[value], case charindex(@separator, r.value) when 0 then len(r.value) else abs(charindex(@separator, r.[value])-1) end ) [x]
        , [no] + 1 [no]
        from r1 r where value > ''
    )
    , r2 as
    (
        select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(@value2 as varchar(max))) [value]) as j
        union all
        select right(value, len(value)-case charindex(@separator, value) when 0 then len(value) else charindex(@separator, value) end) [value]
        , left(r.[value], case charindex(@separator, r.value) when 0 then len(r.value) else abs(charindex(@separator, r.[value])-1) end ) [x]
        , [no] + 1 [no]
        from r2 r where value > ''
    )
    select @result = count(*)
    from (
        select x, [no] from r1 where x is not null
        intersect
        select x, [no] from r2 where x is not null
    ) as t

    return @result;
end
go

-- SOLUTION   
with [t1] as
(
    select *
    , replace(str(col1) + ',' + str(col2) + ',' + str(col3) + ',' + str(col4) + ',' + str(col5) + ',' + str(col6), ' ', '') [str]  
    from #tbl1
)
, [t2] as
(
    select *
    , replace(str(col1) + ',' + str(col2) + ',' + str(col3) + ',' + str(col4) + ',' + str(col5) + ',' + str(col6), ' ', '') [str]  
    from #tbl2
)
select distinct t1.col1, t1.col2, t1.col3, t1.col4, t1.col5, t1.col6
from t1
-- number 5 in this case means 5 intersections (or 1 difference).
-- you can change this number to 4 or 3 to find rows with 2 or 3 differences
join t2 on [dbo].[CompareDelimitedStrings] (t1.[str], t2.[str], ',') >= 5;

-- CLEANUP
drop table #tbl1;
drop table #tbl2;
drop function [dbo].[CompareDelimitedStrings];

结果:

col1    col2    col3    col4    col5    col6
--------------------------------------------
1       3       4       6       7       8

算法用几句话说:在下一个条件下连接这两个表:行中数字的交集必须大于或等于5(对于1差异的情况)


这是 one-table-solution (显示匹配的对和交叉点数)

-- PREPARATIONS
create table #tbl (col1 int, col2 int, col3 int, col4 int, col5 int, col6 int);

insert into #tbl (col1, col2, col3, col4, col5, col6) values
(1,       3,       4,       6,       7,       8),
(2,       5,       7,       9,       10,      14),
(2,       3,       4,       6,       7,       8),
(3,       4,       6,       7,       8,       9),
(1,       2,       4,       6,       7,       8);
go

create function [dbo].[CompareDelimitedStrings] (@value1 varchar(max), @value2 varchar(max), @separator char(1))
returns int
as
begin
    declare @result int = 0;

    with r1 as
    (
        select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(@value1 as varchar(max))) [value]) as j
        union all
        select right(value, len(value)-case charindex(@separator, value) when 0 then len(value) else charindex(@separator, value) end) [value]
        , left(r.[value], case charindex(@separator, r.value) when 0 then len(r.value) else abs(charindex(@separator, r.[value])-1) end ) [x]
        , [no] + 1 [no]
        from r1 r where value > ''
    )
    , r2 as
    (
        select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(@value2 as varchar(max))) [value]) as j
        union all
        select right(value, len(value)-case charindex(@separator, value) when 0 then len(value) else charindex(@separator, value) end) [value]
        , left(r.[value], case charindex(@separator, r.value) when 0 then len(r.value) else abs(charindex(@separator, r.[value])-1) end ) [x]
        , [no] + 1 [no]
        from r2 r where value > ''
    )
    select @result = count(*)
    from (
        select x, [no] from r1 where x is not null
        intersect
        select x, [no] from r2 where x is not null
    ) as t

    return @result;
end
go

-- SOLUTION   
with [t] as
(
    select *
    , replace(str(col1) + ',' + str(col2) + ',' + str(col3) + ',' + str(col4) + ',' + str(col5) + ',' + str(col6), ' ', '') [str]
    , row_number() over(order by col1, col2, col3, col4, col5, col6) [id]
    from #tbl
)
, cross_dedup as
(
    select t1.col1, t1.col2, t1.col3, t1.col4, t1.col5, t1.col6
    , [dbo].[CompareDelimitedStrings] (t1.[str], t2.[str], ',') [intersections_count]
    -- ranking the cross-doubled pairs
    , row_number() over (partition by case when t1.id > t2.id then t1.id else t2.id end order by t1.id, t2.id) [rank]
    from t t1
    -- number 5 in this case means 5 intersections (or 1 difference).
    -- you can change this number to 4 or 3 to find rows with 2 or 3 differences
    join t t2 on [dbo].[CompareDelimitedStrings] (t1.[str], t2.[str], ',') >= 5
    and t1.id != t2.id
)
select distinct col1, col2, col3, col4, col5, col6, [intersections_count] from cross_dedup where [rank] = 1;

-- CLEANUP
drop table #tbl;
drop function [dbo].[CompareDelimitedStrings];

结果:

col1    col2    col3    col4    col5    col6    intersections_count
-------------------------------------------------------------------
1       2       4       6       7       8       5
1       3       4       6       7       8       5