我需要在SQL Server中使用共同的列日期联接多个表,但是我想避免合并时重复来自不同表的值。
drop table if exists #d, #t1, #t2
create table #d (DataDate date)
create table #t1 (DataDate date, Value1 float, Value2 float)
create table #t2 (DataDate date, Value3 float, Value4 float)
insert into #d values ('20181201'),('20181202'),('20181203')
insert into #t1 values
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.15, 1.19),
('20181203', 3.16, 1.195)
insert into #t2 values
('20181201', 4.14, 2.18),
('20181203', 4.15, 2.19),
('20181203', 4.1, 2.195)
select #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
from #d
left join #t1 on #d.DataDate = #t1.DataDate
left join #t2 on #d.DataDate = #t2.DataDate
实际结果
DataDate Value1 Value2 Value3 Value4
12/1/2018 3.14 1.18 4.14 2.18
12/1/2018 3.135 1.185 4.14 2.18
12/2/2018 3.15 1.19 NULL NULL
12/3/2018 3.16 1.195 4.15 2.19
12/3/2018 3.16 1.195 4.1 2.195
所需结果
DataDate Value1 Value2 Value3 Value4
12/1/2018 3.14 1.18 4.14 2.18
12/1/2018 3.135 1.185 NULL NULL
12/2/2018 3.15 1.19 NULL NULL
12/3/2018 3.16 1.195 4.15 2.19
12/3/2018 NULL NULL 4.1 2.195
答案 0 :(得分:1)
这是一个建议的解决方案。
我添加了第三个表,只是为了证明可以解决具有公共列的N个表。
准备演示数据:
/* Prepare demo objects */
DROP TABLE IF EXISTS #d, #t1, #t2
CREATE TABLE #d (DataDate date)
CREATE TABLE #t1 (DataDate date, Value1 float, Value2 float)
CREATE TABLE #t2 (DataDate date, Value3 float, Value4 float)
CREATE TABLE #t3 (DataDate date, Value5 float, Value6 float)
/* Insert demo data */
INSERT INTO #d VALUES ('20181201'),('20181202'),('20181203')
INSERT INTO #t1 VALUES
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.15, 1.19),
('20181203', 3.16, 1.195)
INSERT INTO #t2 VALUES
('20181201', 4.14, 2.18),
('20181203', 4.15, 2.19),
('20181203', 4.1, 2.195)
INSERT INTO #t3 VALUES
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.16, 1.195)
拟议的查询解决方案:
SELECT
COALESCE(d.DataDate, t1.datadate, t2.datadate, t3.datadate) AS DataDate
, t1.Value1
, t1.Value2
, t2.Value3
, t2.Value4
, t3.Value5
, t3.Value6
FROM
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
FROM #d) AS d
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
FROM #t1) AS t1
ON (t1.DataDate = d.DataDate AND t1.rn = d.rn)
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
FROM #t2) AS t2
ON (t2.DataDate = d.DataDate AND t2.rn = d.rn)
OR (t2.DataDate = t1.DataDate AND t2.rn = t1.rn)
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
FROM #t3) AS t3
ON (t3.DataDate = d.DataDate AND t3.rn = d.rn)
OR (t3.DataDate = t1.DataDate AND t3.rn = t1.rn)
OR (t3.DataDate = t2.DataDate AND t3.rn = t2.rn)
ORDER BY DataDate;
演示小提琴发布在 db <>小提琴here
结果:
DataDate | Value1 | Value2 | Value3 | Value4 | Value5 | Value6 :------------------ | -----: | -----: | -----: | -----: | -----: | -----: 01/12/2018 00:00:00 | 3.14 | 1.18 | 4.14 | 2.18 | 3.14 | 1.18 01/12/2018 00:00:00 | 3.135 | 1.185 | null | null | 3.135 | 1.185 02/12/2018 00:00:00 | 3.15 | 1.19 | null | null | 3.16 | 1.195 03/12/2018 00:00:00 | 3.16 | 1.195 | 4.15 | 2.19 | null | null 03/12/2018 00:00:00 | null | null | 4.1 | 2.195 | null | null
注意(可选):
通过引入索引,可以极大地提高性能。
作为一个演示,我在DateData列上添加了CLUSTERED INDEXES,性能的提高非常明显。
/* Add to improve performance */
CREATE CLUSTERED INDEX CI_DataDate ON #d (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t1 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t2 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t3 (DataDate);
答案 1 :(得分:0)
如果要通过日期和t1.value1关联最小值(或最大值取决于所要查找的内容),请使用min。在您的示例中,值不是重复项,因此区别将不起作用
select #d.DataDate,#t1.Value1,min(#t1.Value2),max(#t2.Value3),min(#t2.Value4)
from #d
left join #t1 on #d.DataDate = #t1.DataDate
left join #t2 on #d.DataDate = #t2.DataDate
group by 1,2
如果有确切的重复项要删除,请使用以下内容
select distinct #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
from #d
left join #t1 on #d.DataDate = #t1.DataDate
left join #t2 on #d.DataDate = #t2.DataDate