如何选择列组合与值列表匹配的行?

时间:2016-08-25 18:26:25

标签: sql sql-server

假设我有一个包含这样值的表:

hashmap

如果CREATE TABLE foo ( ID int NOT NULL, Box1 varchar(50), Box2 varchar(50), Box3 varchar(50), CONSTRAINT PK_foo PRIMARY KEY (ID) ); INSERT INTO foo (ID, Box1, Box2, Box3) VALUES (0, 'FOOBIE BLETCH', NULL, NULL), (1, 'DUAM XNAHT', NULL, NULL), (2, 'HACKEM MUCHE', 'FNORD', NULL), (3, 'DAIYEN FOOELS', 'ELBIB YLOH', 'GARVEN DEH'), (4, 'JUYED AWK YACC', 'FNORD', NULL), (5, 'FOOBIE BLETCH', NULL, NULL), (6, 'JUYED AWK YACC', 'FOOBIE BLETCH', NULL), (7, 'HACKEM MUCHE', 'FNORD', 'FOOBIE BLETCH'), (8, 'DAIYEN FOOELS', 'GARVEN DEH', 'ELBIB YLOH') foo.IDBox1的任意组合包含指定值,我如何找到Box2的值?要找到的值的顺序无关紧要。包含比查找值更多的值的行应显示在结果中。 e.g。

Box3

假设(如果需要)存在proc DECLARE @ArgValue varchar SET @ArgValue = 'FOOBIE BLETCH' -- match 0, 5, 6, 7 SET @ArgValue = 'GARVEN DEH, DAIYEN FOOELS, ELBIB YLOH' -- match 3, 8 SET @ArgValue = 'FNORD, JUYED AWK YACC' -- match 4 ,它可以提取子字符串并将它们作为单列表返回。

我的真实数据实际上比这复杂得多(一个表有20个不同的列可能匹配)所以我正在寻找不涉及枚举列名的解决方案。

6 个答案:

答案 0 :(得分:1)

首先,您需要更正参数的数据类型,否则它将是单个字符:

DECLARE @ArgValue varchar(1000)

然后,您可以使用递归查询将参数拆分为单个术语 - 每个术语一行 - 然后使用foo运算符将每个结果与in表连接起来。最后,在连接匹配数等于参数中的单词数的情况下过滤结果。

;with terms(term, rest) as (
        select  cast(LEFT(@ArgValue, CHARINDEX(',',@ArgValue+',')-1) as varchar(1000)) 
                     as term,
                LTRIM(STUFF(@ArgValue, 1, CHARINDEX(',',@ArgValue+','), '')) as rest
        union all
        select  cast(LEFT(rest, CHARINDEX(',',rest+',')-1) as varchar(1000)),
                LTRIM(STUFF(rest, 1, CHARINDEX(',',rest+','), ''))
        from    terms
        where   rest > ''
        )
select     foo.id
from       foo
inner join terms on term in (box1, box2, box3)
group by   foo.id
having     count(term) = (select count(*) from terms)

递归公用表表达式可以替换为ArgVal_Split()结果,但性能可能会有所不同。

列名仅在一个地方枚举,就像它们一样。即使你有20,这也不应成为问题。

答案 1 :(得分:1)

这种设计闻起来......

每当你想要添加带有数字的列时(通常看到:Telephone1,Telephone2 ......),你宁愿选择1:n相关的边桌!

但这仍然是可能的:

DECLARE @foo TABLE 
(
  ID int NOT NULL,
  Box1 varchar(50),
  Box2 varchar(50),
  Box3 varchar(50)
);

INSERT INTO @foo (ID, Box1, Box2, Box3) VALUES 
  (0, 'FOOBIE BLETCH', NULL, NULL),
  (1, 'DUAM XNAHT', NULL, NULL),
  (2, 'HACKEM MUCHE', 'FNORD', NULL),
  (3, 'DAIYEN FOOELS', 'ELBIB YLOH', 'GARVEN DEH'),
  (4, 'JUYED AWK YACC', 'FNORD', NULL),
  (5, 'FOOBIE BLETCH', NULL, NULL),
  (6, 'JUYED AWK YACC', 'FOOBIE BLETCH', NULL),
  (7, 'HACKEM MUCHE', 'FNORD', 'FOOBIE BLETCH'),
  (8, 'DAIYEN FOOELS', 'GARVEN DEH', 'ELBIB YLOH');

DECLARE @ArgValue VARCHAR(100);

--SET @ArgValue = 'FOOBIE BLETCH'; -- match 0, 5, 6, 7
SET @ArgValue = 'GARVEN DEH, DAIYEN FOOELS, ELBIB YLOH'; -- match 3, 8
--SET @ArgValue = 'FNORD, JUYED AWK YACC'; -- match 4


WITH DerivedTable AS
(
    SELECT x.value('.','nvarchar(max)') aS part
    FROM
    ( 
    SELECT CAST('<x>' + REPLACE(@ArgValue,', ','</x><x>') + '</x>' AS XML)
    ) AS tbl(Casted)
    CROSS APPLY Casted.nodes('/x') AS A(x)
)
SELECT ID
FROM @foo AS f
CROSS APPLY (VALUES(Box1),(Box2),(Box3)) AS boxes(box)
INNER JOIN DerivedTable AS dt ON box=part
GROUP BY ID
HAVING COUNT(ID)>=(SELECT COUNT(*) FROM DerivedTable)

一个简短的解释:

,替换为xml-tags,您可以将字符串'GARVEN DEH, DAIYEN FOOELS, ELBIB YLOH'转换为'<x>GARVEN DEH</x><x>DAIYEN FOOELS</x><x>ELBIB YLOH</x>'。这是XML,可以很容易地用作派生表。

值得一提的是,您的字符串不应包含<>&等禁用字符。如果您需要,可以找到一个可靠的解决方案here

选择使用CROSS APPLYVALUESUNPIVOT的技巧并执行INNER JOIN。只有结果是有效的,其中计数是相同的,因为搜索参数中有部分。

答案 2 :(得分:0)

我没有ArgVal_Split()函数,可以找到许多分割器。

with argVal as ( -- simulates ArgVal_Split() results
     select 'GARVEN DEH' as txt
     union select 'DAIYEN FOOELS'
     union select 'ELBIB YLOH'
)
select *
from  foo 
where 0 = (select count(*) n from (
    select Box1 x
    union select Box2 
    union select Box3 
    except 
    select txt 
    from argVal --from ArgVal_Split(@ArgVal, ',')
  ) t )

此部分select Box1 x union select Box2 union select Box3只能通过动态sql适应可变数量的列。无法在常规sql中迭代列。

答案 3 :(得分:0)

结果:

select fooID from dbo.IntersectFooBoxes('FOOBIE BLETCH')
--RESULT: 0,5,6,7
select fooID from dbo.IntersectFooBoxes('GARVEN DEH, DAIYEN FOOELS, ELBIB YLOH')
--RESULT: 3,8
select fooID from dbo.IntersectFooBoxes('FNORD, JUYED AWK YACC')
--RESULT: 4

设置:

CREATE TABLE foo 
(
  ID int NOT NULL PRIMARY KEY,
  Box1 varchar(50),
  Box2 varchar(50),
  Box3 varchar(50)
)

GO

INSERT INTO foo (ID, Box1, Box2, Box3) 
SELECT 0, 'FOOBIE BLETCH', NULL, NULL UNION ALL
SELECT 1, 'DUAM XNAHT', NULL, NULL UNION ALL
SELECT 2, 'HACKEM MUCHE', 'FNORD', NULL UNION ALL
SELECT 3, 'DAIYEN FOOELS', 'ELBIB YLOH', 'GARVEN DEH' UNION ALL
SELECT 4, 'JUYED AWK YACC', 'FNORD', NULL UNION ALL
SELECT 5, 'FOOBIE BLETCH', NULL, NULL UNION ALL
SELECT 6, 'JUYED AWK YACC', 'FOOBIE BLETCH', NULL UNION ALL
SELECT 7, 'HACKEM MUCHE', 'FNORD', 'FOOBIE BLETCH' UNION ALL
SELECT 8, 'DAIYEN FOOELS', 'GARVEN DEH', 'ELBIB YLOH'

GO

功能:LookupFooBoxes

此函数采用单个值,并返回任何所需列中包含此值的任何fooID。使用此函数可以有一个位置来定义要搜索的列。

CREATE FUNCTION dbo.LookupFooBoxes (@Value varchar(50))
RETURNS @results TABLE 
(
    -- Columns returned by the function
    ID INT PRIMARY KEY NOT NULL IDENTITY(1,1), 
    fooID INT
)
AS 
BEGIN

    INSERT INTO @results (fooID)
    SELECT ID
    FROM
    (
        SELECT ID FROM foo WHERE Box1 = @Value
        UNION
        SELECT ID FROM foo WHERE Box2 = @Value
        UNION
        SELECT ID FROM foo WHERE Box3 = @Value
    ) matches
    ORDER BY ID asc

    RETURN;

END

功能:拆分

此函数采用@Values参数和值分隔的“列表”,以及@Split参数定义要拆分的内容(默认值:逗号),并修剪任何空格。结果值

CREATE FUNCTION dbo.Split (@Values varchar(max), @Split varchar(5) = ',')
RETURNS @result TABLE 
(
    -- Columns returned by the function
    ID INT PRIMARY KEY NOT NULL IDENTITY(1,1), 
    Value varchar(max)
)
AS 
BEGIN

    DECLARE @X xml

    SELECT @X = CONVERT(xml,' <root> <s>' + REPLACE(@Values, @Split,'</s> <s>') + '</s>   </root> ')

    INSERT INTO @result (Value)
    SELECT LTRIM(RTRIM(T.c.value('.','varchar(max)')))
    FROM @X.nodes('/root/s') T(c)

    RETURN;

END

功能:InsersectFooBoxes

此函数将逗号分隔的值列表作为单个参数,并使用游标迭代它们。如果没有值,则返回一个空表,对于第一个值,它只使用dbo.LookupFooBoxes查找包含该值的所有行,对于任何其他值,它将删除不包含该值的ID,从而导致交叉只有那些拥有所有元素的fooID。

CREATE FUNCTION dbo.IntersectFooBoxes (@Values varchar(max))
RETURNS @results TABLE 
(
    -- Columns returned by the function
    ID INT PRIMARY KEY NOT NULL IDENTITY(1,1), 
    fooID INT
)
AS 
BEGIN

    DECLARE c CURSOR FOR SELECT Value FROM dbo.Split(@Values, ',')
    OPEN c

    DECLARE @value varchar(max)
    FETCH NEXT FROM c INTO @value

    --No Elements
    IF @@FETCH_STATUS <> 0
        RETURN;

    INSERT INTO @results (fooID)
    SELECT fooID FROM dbo.LookupFooBoxes(@Value)

    FETCH NEXT FROM c INTO @value

    WHILE @@FETCH_STATUS = 0
    BEGIN

        DELETE @results
        FROM 
            @results r
            LEFT OUTER JOIN dbo.LookupFooBoxes(@Value) m ON m.fooID = r.fooID
        WHERE 
            m.fooID is null

        FETCH NEXT FROM c INTO @value

    END

    RETURN;

END

答案 4 :(得分:0)

使用任何一百万个解决方案将@ArgValue拆分为@a1, @a2, @a3。最简单的可能只是嵌套的charindex()表达式。 (我看到你有20个,所以你可能只想要一个循环。)实际的查询非常简单。

select *
from foo
where
        coalesce(@a1, '!@#$%') in (Box1, Box2, Box3, '!@#$%')
    and coalesce(@a2, '!@#$%') in (Box1, Box2, Box3, '!@#$%')
    and coalesce(@a3, '!@#$%') in (Box1, Box2, Box3, '!@#$%');

此查询可能不会非常快。如果这是您系统中的核心操作,则应重新考虑设计。

答案 5 :(得分:0)

-- uncomment the 'create table' commands the first time you run it, and then comment them out again
-- requires costest_split, with fixes

---- this table stands for CostEst_ClaimInfo
CREATE TABLE foo 
(
  ID int NOT NULL,
  Box1 varchar(50),
  Box2 varchar(50),
  Box3 varchar(50),
  CONSTRAINT PK_foo PRIMARY KEY (ID)
);

---- This table stands for CostEst_ClaimDiagnoses
create table FooCrossRef
(
    ID int NOT NULL,
    BoxVal varchar(50)
)

---- This table stands for #tdiags
create table FooVals ( ABoxVal varchar(50) )

---- This table stands for #tClaims
CREATE TABLE fooResults
(
  ID int NOT NULL,
  Box1 varchar(50),
  Box2 varchar(50),
  Box3 varchar(50),
  CONSTRAINT PK_fooRes PRIMARY KEY (ID)
);

-- empty out all the tables every time, for simplicity
delete from foo
delete from FooCrossRef
delete from FooVals
delete from fooResults


declare @BoxCount int

INSERT INTO foo (ID, Box1, Box2, Box3) VALUES 
  (0, 'FOOBIE BLETCH', NULL, NULL),
  (1, 'DUAM XNAHT', NULL, NULL),
  (2, 'HACKEM MUCHE', 'FNORD', NULL),
  (3, 'DAIYEN FOOELS', 'ELBIB YLOH', 'GARVEN DEH'),
  (4, 'JUYED AWK YACC', 'FNORD', NULL),
  (5, 'FOOBIE BLETCH', NULL, NULL),
  (6, 'JUYED AWK YACC', 'FOOBIE BLETCH', NULL),
  (7, 'HACKEM MUCHE', 'FNORD', 'FOOBIE BLETCH'),
  (8, 'DAIYEN FOOELS', 'GARVEN DEH', 'ELBIB YLOH')
  ,(9, 'JUYED AWK YACC', 'GARVEN DEH', 'ELBIB YLOH')
  ,(10, 'ELBIB YLOH', NULL, NULL)
  ,(11, 'JUYED AWK YACC', 'FOOBIE BLETCH', 'FNORD')

DECLARE @ArgValue varchar(max)

-- unit-test arg values
--SET @ArgValue = 'FOOBIE BLETCH' -- match 0, 5, 6, 7, 11
--SET @ArgValue = 'GARVEN DEH, DAIYEN FOOELS, ELBIB YLOH' -- match 3, 8
--SET @ArgValue = 'FNORD, JUYED AWK YACC' -- match 4, 11
SET @ArgValue = 'FOOBIE BLETCH, LEP GEX VEN ZEA' -- match nothing

insert into FooCrossRef (ID, BoxVal )
    select ID, Box
    from
    (
        select foo.ID, Box1, Box2, Box3 from foo
    ) as PVT
    UNPIVOT (Box for position in (Box1, Box2, Box3)) as UnPvt



-- Do the thing with the argument
insert into FooVals exec costest_split @OrigString = @ArgValue, @Delimiter = ',';

set @BoxCount = (select Count(distinct ABoxVal) from FooVals)



insert into fooResults (ID, Box1, Box2, Box3)
(

    select distinct foo.ID, Box1, Box2, Box3  from foo 
        join FooCrossRef on foo.ID = FooCrossRef.ID 
        join FooVals on FooCrossRef.BoxVal = FooVals.ABoxVal
    group by foo.ID, Box1, Box2, Box3
    having count(distinct FooCrossRef.BoxVal) >=  @BoxCount
)

select * from fooResults