我有一张看起来像这样的表
StudentId Subject Section
1 2 AM
1 3 AM
1 1 AM
2 2 AM
2 3 AM
2 1 AM
3 4 AM
3 2 PM
3 3 PM
4 2 PM
4 3 PM
我想从此表中获取唯一的行集来安排课堂。具有完全相同科目和科目的学生可以去同一个教室。因此,基于上面的样本数据,学生1和2将进入同一个教室,但不会进入学生3和4,因为他们有不同的科目和/或部分。
虽然学生4的科目/科目组合是学生3的一部分(但不完全相同),但学生3和4不能同班同学。
换句话说,为了进入同一个教室,学生必须拥有完全相同的科目,相同数量的科目和相同的科目。上面的示例数据的输出应该如下所示。
ClassId Subject Section
1 2 AM
1 3 AM
1 1 AM
2 4 AM
2 2 PM
2 3 PM
3 2 PM
3 3 PM
我正在处理的表有1000万行,但只有200个唯一的行集。 select语句可以忽略StudentId,并可以用动态生成的ClassId替换它。然后我可以使用这个select语句将唯一的行集插入到类表中。
答案 0 :(得分:0)
所以我拿了你的顶级表并编写了以下动态查询来生成一个教室列表。这将为您提供第二个表的确切输出。
declare @subjects varchar(max)
set nocount on
select
@subjects = coalesce(@subjects,'') +
quotename(cast(subject as varchar(25))) + ','
from
[student]
group by
Subject
set @subjects = substring(@subjects,0,len(@subjects))
declare @dyn_sql varchar(max)
set @dyn_sql =
'
select
class_room,
subject,
section
from
(
select
row_number() over (order by '+@subjects+') as class_room,
'+@subjects+',
count(distinct studentid) as total_students
from
(
SELECT
studentId,
cast(subject as varchar(25)) as subject,
section
FROM [student]
) as d1
pivot (max(section) for subject in ('+@subjects+')) as d2
group by
'+@subjects + '
) as d1
unpivot(section for subject in ('+@subjects+')) as d2
'
exec (@dyn_sql)
print (@dyn_sql)
答案 1 :(得分:0)
这包含填充class表和studentID与classID之间的连接表的查询...
(注意:首先运行注释掉的逻辑以填充临时表)
/*
Create Table testData (StudentID Int, [Subject] Int, Section Varchar(2))
Insert testData
Select 1,2,'AM'
Union All
Select 1,3,'AM'
Union All
Select 1,1,'AM'
Union All
Select 2,2,'AM'
Union All
Select 2,3,'AM'
Union All
Select 2,1,'AM'
Union All
Select 3,4,'AM'
Union All
Select 3,2,'PM'
Union All
Select 3,3,'PM'
Union All
Select 4,2,'PM'
Union All
Select 4,3,'PM';
*/
-- Get class info
If Object_ID('tempdb..#classes') Is Not Null Drop Table #classes;
With studentClasses As
(
Select t.StudentID,
Replace(( Select Convert(Varchar(10),[Subject])+'_'+Section As 'data()'
From #testData t2
Where t.studentID = t2.studentID
Order By Section,[Subject]
For Xml Path('X')), ' ', ',') As classes
From #testData t
Group By t.studentID
), classIDs As
(
Select Row_Number() Over (Order By classes) As classID,
Convert(XML,classes) As classes
From (Select Distinct classes
From studentClasses) sc
), breakOutClasses As
(
Select Row_Number() Over (Partition By cID,classID Order By classID) As nID,
n.cID,
n.classID,
t.split
From (Select Row_Number() Over (Partition By classID Order By classID) As cID,
classID,
Convert(Xml,'<X>'+Replace(t2.split,'_','</X><X>')+'</X>') As firstBreak
From classIDs c
Cross Apply (Select colData.D.value('.','Varchar(50)') As split
From c.classes.nodes('X') As colData(D)) t2) n
Cross Apply (Select colData.D.value('.','Varchar(50)') As split
From n.firstBreak.nodes('X') As colData(D)) As t
)
Select b1.classID, b1.split As [Subject], b2.split As Section Into #classes
From breakOutClasses b1
Join breakOutClasses b2
On b1.classID = b2.classID
And b1.cID = b2.cID
And b1.nID = 1
And b2.nID = 2
Order By classID, Section, [Subject];
If Object_ID('tempdb..#studentClassID') Is Not Null Drop Table #studentClassID;
With students As
(
Select t.StudentID,
Replace(( Select Convert(Varchar(10),[Subject])+'_'+Section As 'data()'
From #testData t2
Where t.studentID = t2.studentID
Order By Section,[Subject]
For Xml Path('')), ' ', ',') As classes
From #testData t
Group By t.studentID
), classes As
(
Select t.classID,
Replace(( Select Convert(Varchar(10),[Subject])+'_'+Section As 'data()'
From #classes t2
Where t.classID = t2.classID
Order By Section,[Subject]
For Xml Path('')), ' ', ',') As classes
From #classes t
Group By t.classID
)
Select c.classID, s.studentID Into #studentClassID
From classes c
Join students s
On c.classes = s.classes;
Select *
From #classes;
Select *
From #studentClassID;