我有一个包含4列的表,比如说Rec_Id,作为int,Name作为varchar,ID1作为int,ID2作为int数据类型。
创建表格
Create table Sample
(
Rec_Id Int Not null,
Name varchar(30) null,
ID1 int null,
ID2 int null,
CONSTRAINT [PK_Sample] PRIMARY KEY NONCLUSTERED
(
[Rec_Id] ASC
)
插入声明:
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 1 ,'A',1,2)
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 2 ,'A', 2,3)
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 3 ,'A', 3 ,1)
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 4 ,'B', 1 , 2)
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 5 , 'B', 2 , 3)
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 6 , 'B' , 3 , null)
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 7 ,'C', 1 , 2)
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 8 ,'C' , 2 , 3)
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 9 ,'C' , 3 , 4)
Insert into sample (Rec_ID,Name,ID1,ID2) values (10 ,'C' , 5 , 1 )
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 11 ,'D', 2 , 3)
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 12 , 'D', 3 , 4)
Insert into sample (Rec_ID,Name,ID1,ID2) values ( 13 , 'D' , 4 , 3)
我的源数据看起来像这样..
Rec_Id Name ID1 ID2
1 A 1 2
2 A 2 3
3 A 3 1
4 B 1 2
5 B 2 3
6 B 3 null --> Need to display this row in the output, because 1 is missing in ID2
7 C 1 2
8 C 2 3
9 C 3 4
10 C 5 1 --> Need to display this row in the output, because 4 is missing after 3 in ID1
11 D 2 3
12 D 3 4
13 D 4 3 --> Need to display this row in the output, because 2 is missing in ID2
我的输出应如下所示:
Rec_Id Name ID1 ID2
6 B 3
10 C 5 1
13 D 4 3
好的,让我用其他的话来解释......在上面的例子中,现在'姓名'列有4个组,名称= A,B,C,D。
一个组 - 有3个记录,它们构成一个循环。我说它在ID2列中形成一个循环becoz第3行值(ID2 = 1)与第1行ID1列(ID1 = 1)匹配。 相同的循环概念意味着B,C和D组。
- A - 组记录:
Name ID1 ID2
A 1 2
A 2 3
A 3 1
- B - 组记录:
Name ID1 ID2
B 1 2
B 2 3
B 3 null
B组 - 有3条记录是破环的。我说这是断开循环becoz ID2列中的第3行值(ID2 = null)与第1行ID1列(ID1 = 1)不匹配。
C-组记录:C组的相同循环概念意味着。如果系列中有中断则需要显示。
D-组记录:
Name ID1 ID2
D 2 3
D 3 4
D 4 3
D组 - 有3条记录是断循环的。我说这是断开循环becoz ID2列中的第3行值(ID2 = 3)与第1行ID1列(ID1 = 2)不匹配。
所以,我需要t-sql来获得以上输出。
提前致谢, RH
答案 0 :(得分:0)
这是回答问题的一种方法。根据您的业务规则,您可能希望将其分解。根据您的样本,我可以看到ID1列的总和等于ID2列的总和。规则依赖于ID列中的第一个和最后一个值,所以我也计算出这些值(使用MIN和MAX来获取这些值并添加到每一行,以便我可以在末尾进行过滤)。
WITH errs AS (
SELECT s.Rec_id, s.[Name], s.ID1, s.ID2, p.startId, p.lastId, p.TotID1, p.TotID2, 0 AS artificialID
FROM Sample AS s
INNER JOIN (
-- general filter for which groups have problems, assumes sum of ID1 column equals sum of ID2 column (optimize query in case this is bigger data set)
SELECT s0.[Name], SUM(ISNULL(s0.ID1,0)) AS TotID1, SUM(ISNULL(s0.ID2,0)) AS TotID2
, MIN(s0.ID1) AS startId
, MAX(s0.ID1) AS lastId
FROM Sample AS s0
GROUP BY s0.[Name]
HAVING SUM(ISNULL(s0.ID1,0)) <> SUM(ISNULL(s0.ID2,0))) AS p ON s.[Name] = p.[Name]
),
-- get a list of those with a missing row when the last row in the group exists and has the right ID2 value
er1 AS (
SELECT * FROM (
SELECT e.Rec_id, e.[Name], e.ID1, e.ID2, e.startId, e.lastId, e.totID1, e.TotID2, ROW_NUMBER() OVER (PARTITION BY e.[Name] ORDER BY e.ID1) AS artificialID
FROM errs AS e
WHERE e.[Name] NOT IN (SELECT o.[Name] FROM errs AS o WHERE (ISNULL(o.ID1,0) = 0 OR ISNULL(o.ID2,0) = 0))
) AS er1a
WHERE er1a.ID1 <> er1a.artificialID
-- get a list of those where the last row exists and ID2 is correct, IE, something missing in the middle
AND er1a.[Name] IN (SELECT o1.[Name] FROM errs AS o1 WHERE (ISNULL(o1.ID1,0) = lastId AND ISNULL(o1.ID2,0) = o1.startId))
)
SELECT e.*
FROM errs AS e
WHERE (ISNULL(e.ID1,0) = 0 OR ISNULL(e.ID2,0) = 0)
UNION
SELECT e1.*
FROM er1 AS e1
UNION
-- the third group - where the last row does not have the right ID2 on it
SELECT e2.*
FROM errs AS e2
WHERE (ISNULL(e2.ID1,0) = lastId AND ISNULL(e2.ID2,0) <> e2.startId)
ORDER BY [Name],ID1, ID2