SQL自连接成对

时间:2016-01-15 16:45:37

标签: sql sql-server tsql join self-join

假设我有一个由

等条目组成的表
if (cv::countNonZero(frame) == 0)
{
   //discard frame
} else
{
  //proceed with frame
}

并且对于每个ID我希望将此数据加入其自身,以便将后续ID Arrival Date Arrival City Departure Date Departure City 1 Jun 27 2015 Berlin Jun 20 2015 Paris 1 Jul 1 2015 Rome Jun 29 2015 Berlin 1 Jul 30 2015 Vienna Jul 15 2015 Rome 2 Jun 28 2015 Prague Jun 23 2015 Vienna 2 Jul 1 2015 Rome Jun 29 2015 Prague 2 Jul 30 2015 Vienna Jul 15 2015 Moscow ... Departure Date的观察成对分组 - 即,出发与每个ID的先前到达配对。

在上面的示例中(为方便起见,观察结果被排序),第2行将附加到第1行,第3行到第2行,第5行到第4行和第6行到第5行(因此产生4行字段Arrival Date)。

每个ID可能有三个以上的离开,因此需要一般方法。另请注意,ID Arrival Date Arrival City Departure Date Departure City Arrival Date2 Arrival City2 Departure Date2 Departure City2Arrival City不匹配的数据可能存在漏洞,例如第5行的Departure City不是第6行的Arrival City,但它们仍应合并。实际上,一个主要目标是更好地了解数据中有多少个洞。

6 个答案:

答案 0 :(得分:8)

解决方案是使用CTE并考虑两个连续行(由rowno标识)之间的差异始终为1(并且还考虑日期):

;WITH CTE AS (
SELECT
    rownum = ROW_NUMBER() OVER (ORDER BY t.ID, t.arrivalDate),
    t.ID,
    t.arrivalDate, 
    t.arrivalCity, 
    t.departureDate, 
    t.departureCity
FROM #test t
)
SELECT *
FROM CTE c1
JOIN CTE c2
ON c1.ID = c2.ID 
    AND c2.departureDate > c1.arrivalDate
    AND c2.rownum - c1.rownum = 1
GO

-- structure of the #test table
CREATE TABLE #test (
    ID int,
    arrivalDate date,
    arrivalCity varchar(30),
    departureDate date,
    departureCity varchar(30)
)

SQL小提琴:SQLFiddle

答案 1 :(得分:5)

试试这个:

SELECT a.id
    ,a.arrival_date
    ,a.arrival_city
    ,a.departure_date
    ,a.departure_city
    ,b.arrival_date arrival_date_2
    ,b.arrival_city arrival_city_2
    ,b.departure_date departure_date_2
    ,b.departure_city departure_city_2
FROM triptable a
JOIN triptable b ON a.id = b.id
    AND a.departure_date = (SELECT min(departure_date) FROM so34815894 x WHERE x.departure_date > b.arrival_date AND x.id = b.id)

根据您的评论编辑:

  • 找到上一条记录之后最早离开日期的记录 到达日期和
  • 忽略样本数据的第6条记录有不同的事实 离开城市比第5个记录的到达城市。

答案 2 :(得分:4)

不完全确定你要找的是什么结果......但我想我会试一试,看看是否有任何一个帮助你。

drop table #t1 
create table #t1 (id int, ArrivalDate datetime, ArrivalCity varchar(50), Departuredate datetime, DepartureCity varchar(50))

insert into #t1 
values (1, 'Jun 27 2015', 'Berlin', 'Jun 20 2015','Paris'), 
       (1, 'Jul 1 2015', 'Rome','Jun 29 2015','Berlin'), 
       (1, 'Jul 30 2015', 'Vienna','Jul 15 2015','Rome'), 
       (2, 'Jun 28 2015','Prague','Jun 23 2015','Vienna'),
       (2, 'Jul 1 2015','Rome','Jun 29 2015','Prague'), 
       (2, 'Jul 30 2015','Vienna','Jul 15 2015','Moscow') 

select *, case when lead(departurecity) over (partition by id order by Arrivaldate) = ArrivalCity or lead(departurecity) over (partition by id order by Arrivaldate) is null then 1 else 0 end as PairID into #t2 from #t1 

update #t2 
set PairID = id 
where pairid != id 
and pairid != 0 

这是启动的代码..

select * from #t2 

将导致:

id  ArrivalDate ArrivalCity Departuredate   DepartureCity   PairID
1   2015-06-27  Berlin      2015-06-20      Paris           1
1   2015-07-01  Rome        2015-06-29      Berlin          1
1   2015-07-30  Vienna      2015-07-15      Rome            1
2   2015-06-28  Prague      2015-06-23      Vienna          2
2   2015-07-01  Rome        2015-06-29      Prague          0
2   2015-07-30  Vienna      2015-07-15      Moscow          2

对id = 0的任何位置...你有一个缺口/坏数据,但是你想要把它...

你也可以:

select *, lead(departurecity) over (partition by ID order by ArrivalDate) as PreviousDepartureCity, lead(Departuredate) over (partition by ID order by ArrivalDate) as PreviousDepartureDate from #t2 

这将添加以前的出发城市和日期..你可以用空值做你想做的......它们将表示第一次飞行......或者如果后续对id = 0则有间隙......

选择选项变得无穷无尽....如果null和lag(pairid)= 0那么你有一个带有间隙的行..如果null和pair id = id ..和lag(pairid)= id那么你有你的第一次飞行..

我的意思是我可以继续......然后给你更多详细信息,但我不确定这是你在想什么..希望它无论如何都有帮助..

祝你好运!

P.S不知道为什么你需要自己加入这个桌子......也许我错过了整个观点......如果那样的话就会错过......

答案 3 :(得分:4)

听起来我想要转动结果并将结果放在其他列中。我使用ROW_NUMBER()进行排序。我在枢轴之前连接了一列,旋转,然后使用一个函数来反转连接。

SELECT
    p.ID,
    dbo.SplitString(p.[1], CHAR(13), 1) AS arrivalDate1,
    dbo.SplitString(p.[1], CHAR(13), 2) AS arrivalCity1,
    dbo.SplitString(p.[1], CHAR(13), 3) AS departureDate1,
    dbo.SplitString(p.[1], CHAR(13), 4) AS departureCity1,
    *
FROM
    (
        SELECT *
        FROM
        (
            SELECT
                ID,
                ROW_NUMBER() OVER (PARTITION BY ID ORDER BY arrivalDate) RowNum,
                CAST(arrivalDate AS VARCHAR(MAX)) + CHAR(13) 
                + arrivalCity + CHAR(13)
                + CAST(departureDate AS VARCHAR(MAX)) + CHAR(13)
                + departureCity TripDetails
            FROM trip t
        ) t
        PIVOT (MIN(t.TripDetails) FOR t.RowNum IN ([1], [2], [3], [4], [5] /* , ... */)) p
    ) p;

使用此SplitString函数

CREATE FUNCTION dbo.SplitString ( 
    @stringToSplit VARCHAR(MAX),
    @delim VARCHAR(255),
    @occurence INT )
RETURNS VARCHAR(MAX) AS
BEGIN

 DECLARE @name NVARCHAR(255);

 DECLARE @pos INT;

 DECLARE @orderNum INT;

 SET @orderNum=0;

 WHILE CHARINDEX(@delim, @stringToSplit) > 0

 BEGIN
    SELECT @orderNum=@orderNum+1;
  SELECT @pos  = CHARINDEX(@delim, @stringToSplit)  ;
  SELECT @name = SUBSTRING(@stringToSplit, 1, @pos-1);

  IF @orderNum = @occurence
  BEGIN
    RETURN @name;
  END

  SELECT @stringToSplit = SUBSTRING(@stringToSplit, @pos+1, LEN(@stringToSplit)-@pos)
 END

    SELECT @orderNum=@orderNum+1;

  IF @orderNum = @occurence
  BEGIN
    RETURN @stringToSplit;
  END

  RETURN NULL;
END

答案 4 :(得分:3)

这应该有效:

with cte as(select *, row_number() over(partition by id order by date) rn from table)
select * from cte c1
join cte c2 on c1.id = c2.id and c1.rn = c2.rn - 1

答案 5 :(得分:3)

试试这个,

declare @t table(ID int,ArrivalDate datetime, ArrivalCity varchar(50)
,DepartureDate datetime,DepartureCity varchar(50))
insert into  @t values
(1,     'Jun 27 2015',     'Berlin',          'Jun 20 2015',       'Paris  ')
,(1,     'Jul 1 2015 ',     'Rome  ',          'Jun 29 2015',       'Berlin ')
,(1,     'Jul 30 2015',     'Vienna',          'Jul 15 2015',       'Rome    ')
,(2,     'Jun 28 2015',     'Prague',          'Jun 23 2015',       'Vienna ')
,(2,     'Jul 1 2015 ',     'Rome  ',          'Jun 29 2015',       'Prague ')
,(2  ,   'Jul 30 2015',     'Vienna',          'Jul 15 2015',       'Moscow ')

;WITH CTE
AS (
    SELECT *
        ,ROW_NUMBER() OVER (
            ORDER BY id
                ,arrivaldate
            ) rn
    FROM @t
    )
SELECT A.arrivaldate
    ,a.arrivalcity
    ,a.DepartureDate
    ,a.DepartureCity
    ,b.arrivaldate
    ,b.arrivalcity
    ,b.DepartureDate
    ,b.DepartureCity
FROM CTE A
LEFT JOIN CTE b ON a.rn + 1 = b.rn