如何从SQL Server表中选择唯一的行集

时间:2013-04-10 17:45:55

标签: sql sql-server sql-server-2008-r2

我有一张看起来像这样的表

StudentId  Subject   Section
1           2          AM
1           3          AM
1           1          AM

2           2          AM
2           3          AM
2           1          AM

3           4          AM
3           2          PM
3           3          PM

4           2          PM
4           3          PM

我想从此表中获取唯一的行集来安排课堂。具有完全相同科目和科目的学生可以去同一个教室。因此,基于上面的样本数据,学生1和2将进入同一个教室,但不会进入学生3和4,因为他们有不同的科目和/或部分。

虽然学生4的科目/科目组合是学生3的一部分(但不完全相同),但学生3和4不能同班同学。

换句话说,为了进入同一个教室,学生必须拥有完全相同的科目,相同数量的科目和相同的科目。上面的示例数据的输出应该如下所示。

ClassId   Subject   Section
1           2        AM
1           3        AM
1           1        AM

2           4        AM
2           2        PM
2           3        PM

3           2        PM
3           3        PM

我正在处理的表有1000万行,但只有200个唯一的行集。 select语句可以忽略StudentId,并可以用动态生成的ClassId替换它。然后我可以使用这个select语句将唯一的行集插入到类表中。

2 个答案:

答案 0 :(得分:0)

所以我拿了你的顶级表并编写了以下动态查询来生成一个教室列表。这将为您提供第二个表的确切输出。

declare @subjects varchar(max)
set nocount on
select
    @subjects = coalesce(@subjects,'') + 
          quotename(cast(subject as varchar(25))) + ','
from
    [student]
group by
    Subject

set @subjects = substring(@subjects,0,len(@subjects))
declare @dyn_sql varchar(max)

set @dyn_sql = 
'
select
    class_room,
    subject,
    section
from
    (
select
    row_number() over (order by '+@subjects+') as class_room,
    '+@subjects+',
    count(distinct studentid) as total_students
from
    (
    SELECT 
        studentId,
        cast(subject as varchar(25)) as subject,
        section
    FROM [student]
    ) as d1
pivot (max(section) for subject in ('+@subjects+')) as d2
group by
'+@subjects + '
) as d1
unpivot(section for subject in  ('+@subjects+')) as d2
'

exec (@dyn_sql)
print (@dyn_sql)

答案 1 :(得分:0)

这包含填充class表和studentID与classID之间的连接表的查询...

(注意:首先运行注释掉的逻辑以填充临时表)

/*
Create  Table testData (StudentID Int, [Subject] Int, Section Varchar(2))

Insert  testData
Select  1,2,'AM'
Union   All
Select  1,3,'AM'
Union   All
Select  1,1,'AM'
Union   All
Select  2,2,'AM'
Union   All
Select  2,3,'AM'
Union   All
Select  2,1,'AM'
Union   All
Select  3,4,'AM'
Union   All
Select  3,2,'PM'
Union   All
Select  3,3,'PM'
Union   All
Select  4,2,'PM'
Union   All
Select  4,3,'PM';
*/

--      Get class info
If      Object_ID('tempdb..#classes') Is Not Null Drop Table #classes;

With    studentClasses As
(
        Select  t.StudentID,
                Replace((   Select Convert(Varchar(10),[Subject])+'_'+Section As 'data()'
                            From    #testData t2
                            Where   t.studentID = t2.studentID
                            Order   By Section,[Subject]
                            For     Xml Path('X')), ' ', ',') As classes
        From    #testData t
        Group   By t.studentID
),      classIDs As
(
        Select  Row_Number() Over (Order By classes) As classID,
                Convert(XML,classes) As classes
        From   (Select  Distinct classes
                From    studentClasses) sc
),      breakOutClasses As
(
        Select  Row_Number() Over (Partition By cID,classID Order By classID) As nID,
                n.cID,
                n.classID,
                t.split
        From   (Select  Row_Number() Over (Partition By classID Order By classID) As cID,
                        classID,
                        Convert(Xml,'<X>'+Replace(t2.split,'_','</X><X>')+'</X>') As firstBreak
                From    classIDs c
                Cross   Apply  (Select  colData.D.value('.','Varchar(50)') As split
                                From    c.classes.nodes('X') As colData(D)) t2) n
        Cross   Apply  (Select  colData.D.value('.','Varchar(50)') As split
                        From    n.firstBreak.nodes('X') As colData(D)) As t                 
)
Select  b1.classID, b1.split As [Subject], b2.split As Section Into #classes
From    breakOutClasses b1
Join    breakOutClasses b2
        On  b1.classID = b2.classID
        And b1.cID = b2.cID
        And b1.nID = 1
        And b2.nID = 2
Order   By classID, Section, [Subject];


If      Object_ID('tempdb..#studentClassID') Is Not Null Drop Table #studentClassID;

With    students As
(
        Select  t.StudentID,
                Replace((   Select Convert(Varchar(10),[Subject])+'_'+Section As 'data()'
                            From    #testData t2
                            Where   t.studentID = t2.studentID
                            Order   By Section,[Subject]
                            For     Xml Path('')), ' ', ',') As classes
        From    #testData t
        Group   By t.studentID
),      classes As
(
        Select  t.classID,
                Replace((   Select Convert(Varchar(10),[Subject])+'_'+Section As 'data()'
                            From    #classes t2
                            Where   t.classID = t2.classID
                            Order   By Section,[Subject]
                            For     Xml Path('')), ' ', ',') As classes
        From    #classes t
        Group   By t.classID
)
Select  c.classID, s.studentID Into #studentClassID
From    classes c
Join    students s
        On  c.classes = s.classes;


Select  *
From    #classes;

Select  *
From    #studentClassID;