T-SQL-连接多个表导致重复的行

时间:2018-07-04 13:18:52

标签: tsql sql-server-2012 pivot

我正努力从下面的示例Fiddle中获得以下结果。

结果:

enter image description here

源表:

enter image description here

Fiddle

这个想法是通过找到预算ID和预测ID的最大计数来为每个RecordID创建一行,以创建其他列(如果该列没有值,则将这些列单元格留空)预算或预测ID已存在。

我尝试使用PIVOT函数,但甚至无法获得令人满意的结果。

更新: 请参阅下图,我试图解释预期的输出

用文字表示:每个属于RecordID的BudgetID BDate创建单独结果(百分比*记录的BAmount表)和状态

在我的示例中,RecordID 55 在“预算”表中有两个条目-因此,需要 2x3 列来显示每个日期< / em>,结果状态在此RecordID的单行中自己的列中。

由于RecordID 77 在“预算”表中具有个条目(三个),因此可用于在所有行上创建3x3列。

预测也是如此。

output

希望你能帮助我。

谢谢。

3 个答案:

答案 0 :(得分:6)

美好的一天,

  

注意!:我的目标是帮助读者学习,而不是提供最终的查询作为作业的一种解决方案。因此,我将分两步介绍解决方案,并在动态解决方案中添加几个“ PRINT”命令,以便读者可以选择检查工作中的中间步骤。

OP提供的DDL + DML:

DROP TABLE IF EXISTS Budget;
CREATE TABLE Budget
    (BudgetID int, RecordID int, BDate date,Percentage int, [Status] varchar(50));
INSERT INTO Budget
    (BudgetID, RecordID, BDate,Percentage,Status)
VALUES
    (1, 55, '2017-01-01', 60, 'ordered'),
    (2, 55, '2017-03-24', 40, 'ordered'),
    (3, 66, '2018-08-15', 100, 'invoiced'),
    (4, 77, '2018-12-02', 25, 'paid'),
    (5, 77, '2018-09-10', 35, 'ordered'),
    (6, 77, '2019-07-13', 40, 'ordered')
GO

DROP TABLE IF EXISTS Forecast;
CREATE TABLE Forecast
    (ForecastID int, RecordID int, FDate date, Percentage int);
INSERT INTO Forecast
    (ForecastID, RecordID, FDate,Percentage)
VALUES
    (1, 55, '2020-12-01', 100),
    (2, 77, '2023-05-17', 25),
    (3, 77, '2024-11-28', 75)
GO

DROP TABLE IF EXISTS Records;
CREATE TABLE Records
    (RecordID int, BAmount int, FAmount int, Name varchar(40), Description varchar(40) )
;
INSERT INTO Records
   (RecordID, BAmount,FAmount,Name, Description)
VALUES
    (55, 15000, 33000, 'Prod1', 'Desc1' ),
     (66, 22000, 17500, 'Prod2', 'Desc2' ),
    (77, 40000, 44000, 'Prod3', 'Desc3' )
GO

select * from Budget
select * from Forecast
select * from Records

让我们首先展示一个简单的静态解决方案

这是基于以下认识:预算表中每个RecordID最多具有三行,而预测表中每个RecordID最多具有两行。这将有助于理解我将在接下来展示的动态解决方案

;With CteBudget as (
    select 
        b.BDate, b.BudgetID, b.Percentage, b.RecordID, b.Status
        ,RN = ROW_NUMBER() OVER (partition by b.RecordID order by b.BudgetID)
    from Budget b
),
CteForecast as (
    select 
        f.FDate, f.ForecastID, f.Percentage, f.RecordID
        ,RN = ROW_NUMBER() OVER (partition by f.RecordID order by f.ForecastID)
    from Forecast f
)
select 
    r.RecordID, r.Name, r.Description, 

    b1.BDate BDate1, (b1.Percentage * r.BAmount)/100 BResult1, b1.Status BStatus1, 
    b2.BDate BDate2, (b2.Percentage * r.BAmount)/100 BResult2, b2.Status BStatus2,
    b3.BDate BDate3, (b3.Percentage * r.BAmount)/100 BResult3, b3.Status BStatus3, 

    f1.FDate FDate1, (f1.Percentage * r.BAmount)/100 FResult1,
    f2.FDate FDate2, (f2.Percentage * r.BAmount)/100 FResult2

from Records r
left join CteBudget b1 on r.RecordID = b1.RecordID and b1.RN = 1
left join CteBudget b2 on r.RecordID = b2.RecordID and b2.RN = 2
left join CteBudget b3 on r.RecordID = b3.RecordID and b3.RN = 3
left join CteForecast f1 on r.RecordID = f1.RecordID and f1.RN = 1
left join CteForecast f2 on r.RecordID = f2.RecordID and f2.RN = 2
--where r.RecordID = 77
GO
  

注意!对于静态解决方案,没有索引(我将在结尾处添加),上述解决方案在性能方面非常糟糕,但是一旦我们添加了正确的索引并以此为基础动态解决方案,此选项应该非常合适。

现在我们可以提出动态解决方案。

-- Get number of columns
Declare @NumBudget tinyint
Declare @NumForecast tinyint
SELECT @NumBudget = MAX(C) FROM (
    select COUNT(RecordID) C
    from Budget
    GROUP BY RecordID
) t
SELECT @NumForecast = MAX(C) FROM (
    select COUNT(RecordID) C
    from Forecast
    GROUP BY RecordID
) t
---------------------------------------------
DECLARE @SQLString1 nvarchar(MAX) = '';
DECLARE @SQLString2 nvarchar(MAX) = '';
DECLARE @loop int = 1;
WHILE @loop <= @NumBudget BEGIN 
    SET @SQLString1 = @SQLString1 + N'
    b' + CONVERT(VARCHAR(2),@loop) + '.BDate BDate' + CONVERT(VARCHAR(2),@loop) + ', (b' + CONVERT(VARCHAR(2),@loop) + '.Percentage * r.BAmount)/100 BResult' + CONVERT(VARCHAR(2),@loop) + ', b' + CONVERT(VARCHAR(2),@loop) + '.Status BStatus' + CONVERT(VARCHAR(2),@loop) + ', '

    SET @SQLString2 = @SQLString2 + N'
    left join CteBudget b' + CONVERT(VARCHAR(2),@loop) + ' on r.RecordID = b' + CONVERT(VARCHAR(2),@loop) + '.RecordID and b' + CONVERT(VARCHAR(2),@loop) + '.RN = 1'

    SET @loop = @loop + 1
END
SET @loop = 1

WHILE @loop <= @NumForecast BEGIN  
    SET @SQLString1 = @SQLString1 + N'
    f' + CONVERT(VARCHAR(2),@loop) + '.FDate FDate' + CONVERT(VARCHAR(2),@loop) + ', (f' + CONVERT(VARCHAR(2),@loop) + '.Percentage * r.BAmount)/100 FResult' + CONVERT(VARCHAR(2),@loop) + ','

    SET @SQLString2 = @SQLString2 + N'
    left join CteForecast f' + CONVERT(VARCHAR(2),@loop) + ' on r.RecordID = f' + CONVERT(VARCHAR(2),@loop) + '.RecordID and f' + CONVERT(VARCHAR(2),@loop) + '.RN = 1'

    SET @loop = @loop + 1
END
SET @SQLString1 = STUFF (@SQLString1, LEN(@SQLString1) , 1 , '')  
PRINT '/************************************************/'
PRINT @SQLString1
PRINT @SQLString2
PRINT '/************************************************/'

DECLARE @SQLString nvarchar(MAX);
SET @SQLString = N'
;With CteBudget as (
    select 
        b.BDate, b.BudgetID, b.Percentage, b.RecordID, b.Status
        ,RN = ROW_NUMBER() OVER (partition by b.RecordID order by b.BudgetID)
    from Budget b
),
CteForecast as (
    select 
        f.FDate, f.ForecastID, f.Percentage, f.RecordID
        ,RN = ROW_NUMBER() OVER (partition by f.RecordID order by f.ForecastID)
    from Forecast f
)
select 
    r.RecordID, r.Name, r.Description, 
'
+ @SQLString1
+ N'
from Records r'
+ @SQLString2

print @SQLString

EXECUTE sp_executesql @SQLString
GO
  

重要!此解决方案不一定是性能最佳的解决方案,而是可能最容易理解和理解的解决方案。在生产中,一旦有了真正的DDL + DML,服务器就会有了统计信息,那么我们将能够提高性能,并针对特定情况选择最佳解决方案。

索引

  

注意!以上解决方案可能会导致对数据进行大量排序,在这里拥有正确的索引非常重要!测试几个不同的选项并选择最佳选项非常重要。

出于论坛的考虑(或任何我认为不是讨论论坛界面而是Q&A界面的stackoverflow),我添加了查询以创建假定在生产环境中使用的CLUSTERED INDEX,并添加了一个可选项您应该测试的NONCLUSTERED INDEX(我没有测试其他选项,这是我想到的第一个选项,因此建议您使用真实的DDL + DML继续检查正确的索引)。

-- CLUSTERED INDEX
CREATE CLUSTERED INDEX IX_Budget_BudgetID
    ON dbo.Budget (BudgetID);   
GO 
CREATE CLUSTERED INDEX IX_Forecast_ForecastID
    ON dbo.Forecast (ForecastID);   
GO 
CREATE CLUSTERED INDEX IX_Records_RecordID  
    ON dbo.Records (RecordID);   
GO 

-- NONCLUSTERED INDEX
CREATE NONCLUSTERED INDEX NX_Budget_RecordID_BudgetID
    ON dbo.Budget (RecordID,BudgetID);   
GO 
CREATE NONCLUSTERED INDEX NX_Forecast_RecordID_ForecastID
    ON dbo.Forecast (RecordID,ForecastID);   
GO 
CREATE NONCLUSTERED INDEX NX_Records_RecordID_RecordID  
    ON dbo.Records (RecordID);   
GO 

答案 1 :(得分:3)

我想我会这样做:

select Max(BudgetId) as BudgetID, RecordID
into #MBudget
from Budget
group by RecordID

select B.* 
into #MaxB
from #MBudget M
inner join Budget B
on M.BudgetID = B.BudgetID
and M.RecordID = B.RecordID

--The above will then only have the "maximum" BudgetID data from the table.

-- You need to then do the same with the Forecast table

select Max(ForecastId) as ForecastID, RecordID
into #MForecast
from Forecast
group by RecordID

select F.* 
into #MaxF
from #MForecast M
inner join Forecast F
on M.ForecastID = F.ForecastID
and M.RecordID = F.RecordID

-- Join them together on the RecordID

select *  -- you will need to pick the required fields
from #MaxF F
inner join #MaxB B 
on F.RecordID = B.RecordID
inner join Record R
on F.RecordID = R.RecordID

我想这会带你去那儿,我不是小提琴迷,我的脑子也是这样。

我确实同意你们的看法,您本可以变得更加清晰,但是希望这能使您找到所需的位置。

答案 2 :(得分:3)

最后,我们将动态查询创建为多个联接。

输入表:

CREATE TABLE #Budget (BudgetID int, RecordID int, BDate date,Percentage int, [Status] varchar(50));
INSERT INTO #Budget(BudgetID, RecordID, BDate,Percentage,Status)
VALUES(1, 55, '2017-01-01', 60, 'ordered'),(2, 55, '2017-03-24', 40, 'ordered'),(3, 66, '2018-08-15', 100, 'invoiced'),(4, 77, '2018-12-02', 25, 'paid'),(5, 77, '2018-09-10', 35, 'ordered'),(6, 77, '2019-07-13', 40, 'ordered')

CREATE TABLE #Forecast(ForecastID int, RecordID int, FDate date, Percentage int)
INSERT INTO #Forecast(ForecastID, RecordID, FDate,Percentage)
VALUES(4, 77, '2018-07-18', 24),(1, 55, '2020-12-01', 100),(2, 77, '2023-05-17', 25),(3, 77, '2024-11-28', 75)

CREATE TABLE #Records(RecordID int, BAmount int, FAmount int, Name varchar(40), Description varchar(40) )
INSERT INTO #Records(RecordID, BAmount,FAmount,Name, Description)
VALUES(55, 15000, 33000, 'Prod1', 'Desc1' ),(66, 22000, 17500, 'Prod2', 'Desc2' ),(77, 40000, 44000, 'Prod3', 'Desc3' )

最终加入查询

 select * from (select r1.RecordID, f1.FDate fdate1, b1.BDate bdate1
 , ROW_NUMBER() over(partition by r1.recordid order by (select NULL)) rn from #Records r1
 join #Forecast f1
 on r1.RecordID = f1.RecordID
 and r1.RecordID = 77
 join #Budget b1
 on r1.RecordID = b1.RecordID
)t1 join (select r2.RecordID, f2.FDate fdate2, b2.BDate bdate2
 , ROW_NUMBER() over(partition by r2.recordid order by (select NULL)) rn from #Records r2
 join #Forecast f2
 on r2.RecordID = f2.RecordID
 and r2.RecordID = 77
 join #Budget b2
 on r2.RecordID = b2.RecordID
)t2 on t1.RecordID = t2.RecordID and t1.rn < t2.rn join (select r3.RecordID, f3.FDate fdate3, b3.BDate bdate3
 , ROW_NUMBER() over(partition by r3.recordid order by (select NULL)) rn from #Records r3
 join #Forecast f3
 on r3.RecordID = f3.RecordID
 and r3.RecordID = 77
 join #Budget b3
 on r3.RecordID = b3.RecordID
)t3 on t2.RecordID = t3.RecordID and t2.rn < t3.rn join (select r4.RecordID, f4.FDate fdate4, b4.BDate bdate4
 , ROW_NUMBER() over(partition by r4.recordid order by (select NULL)) rn from #Records r4
 join #Forecast f4
 on r4.RecordID = f4.RecordID
 and r4.RecordID = 77
 join #Budget b4
 on r4.RecordID = b4.RecordID
)t4 on t3.RecordID = t4.RecordID and t3.rn < t4.rn join (select r5.RecordID, f5.FDate fdate5, b5.BDate bdate5
 , ROW_NUMBER() over(partition by r5.recordid order by (select NULL)) rn from #Records r5
 join #Forecast f5
 on r5.RecordID = f5.RecordID
 and r5.RecordID = 77
 join #Budget b5
 on r5.RecordID = b5.RecordID
)t5 on t4.RecordID = t5.RecordID and t4.rn < t5.rn join (select r6.RecordID, f6.FDate fdate6, b6.BDate bdate6
 , ROW_NUMBER() over(partition by r6.recordid order by (select NULL)) rn from #Records r6
 join #Forecast f6
 on r6.RecordID = f6.RecordID
 and r6.RecordID = 77
 join #Budget b6
 on r6.RecordID = b6.RecordID
)t6 on t5.RecordID = t6.RecordID and t5.rn < t6.rn join (select r7.RecordID, f7.FDate fdate7, b7.BDate bdate7
 , ROW_NUMBER() over(partition by r7.recordid order by (select NULL)) rn from #Records r7
 join #Forecast f7
 on r7.RecordID = f7.RecordID
 and r7.RecordID = 77
 join #Budget b7
 on r7.RecordID = b7.RecordID
)t7 on t6.RecordID = t7.RecordID and t6.rn < t7.rn join (select r8.RecordID, f8.FDate fdate8, b8.BDate bdate8
 , ROW_NUMBER() over(partition by r8.recordid order by (select NULL)) rn from #Records r8
 join #Forecast f8
 on r8.RecordID = f8.RecordID
 and r8.RecordID = 77
 join #Budget b8
 on r8.RecordID = b8.RecordID
)t8 on t7.RecordID = t8.RecordID and t7.rn < t8.rn join (select r9.RecordID, f9.FDate fdate9, b9.BDate bdate9
 , ROW_NUMBER() over(partition by r9.recordid order by (select NULL)) rn from #Records r9
 join #Forecast f9
 on r9.RecordID = f9.RecordID
 and r9.RecordID = 77
 join #Budget b9
 on r9.RecordID = b9.RecordID
)t9 on t8.RecordID = t9.RecordID and t8.rn < t9.rn

首先从IP表和上面的查询中验证OP。如果答案被接受,则将使用以下SP。

SP 1

exec rownumber

create procedure rownumber as                                           --1st sp.
declare @r int = 1, @select nvarchar(100) = 'select * from ', @count int, @finalquery nvarchar(max) = N'', @out nvarchar(max)
begin
 set @count = (select Count(*) from (select r.RecordID, f1.FDate fdate1, b1.BDate bdate1
  , ROW_NUMBER() over(partition by r.recordid order by (select NULL)) rn from #Records r
  join #Forecast f1
  on r.RecordID = f1.RecordID
  and r.RecordID = 77
  join #Budget b1
  on r.RecordID = b1.RecordID
  )t
 )
 if @count > 2
 begin
  while @count >= @r
  begin
   exec multyJoin @r, @out output
   set @finalquery += @out
   set @r = @r + 1
  end
 end
 else begin
  select * from (
   select r.RecordID, f1.FDate fdate1, b1.BDate bdate1
   , ROW_NUMBER() over(partition by r.recordid order by (select NULL)) rn from #Records r
   join #Forecast f1
   on r.RecordID = f1.RecordID
   and r.RecordID = 77
   join #Budget b1
   on r.RecordID = b1.RecordID
  )t
 end
 set @finalquery = stuff(@finalquery,1, 6,@select)
 set @finalquery = REPLACE(@finalquery, 'on t0.RecordID = t1.RecordID and t0.rn < t1.rn ','')
 print @finalquery
 exec sp_executesql @finalquery
end

SP 2

create procedure multyJoin (@r int, @join varchar(max) output) as       --2nd sp. which is called form below sp
declare @cond nvarchar(100), @rvc varchar(3) = convert(int, @r)     ----row number of current in varchar
, @rvp varchar(3) = convert(int, @r-1)      ----row number of previous in varchar
begin

 set @join = ' join (select r'+@rvc+'.RecordID, f'+@rvc+'.FDate fdate'+@rvc+', b'+@rvc+'.BDate bdate'+@rvc+  ---Here add your columns as like as 'Percentage'+@rvc+' * '+@rvc+'BAmount'
  ', ROW_NUMBER() over(partition by r'+@rvc+'.recordid order by (select NULL)) rn from #Records r'+@rvc+'
  join #Forecast f'+@rvc+'
  on r'+@rvc+'.RecordID = f'+@rvc+'.RecordID
  and r'+@rvc+'.RecordID = 77
  join #Budget b'+@rvc+'
  on r'+@rvc+'.RecordID = b'+@rvc+'.RecordID
 )t'+@rvc+
 ' on t'+@rvp+'.RecordID = t'+@rvc+'.RecordID and t'+@rvp+'.rn < t'+@rvc+'.rn'  --Here multiple join has been created. Each table contains one row
end