我有以下数据集,
from to stayed
01.01.2011 03.04.2011 NY
03.04.2011 25.05.2011 NJ
25.05.2011 04.06.2011 NJ
04.06.2011 20.06.2011 NJ
20.06.2011 30.06.2011 NJ
30.06.2011 05.07.2011 CA
05.07.2011 20.07.2011 CA
20.07.2011 05.08.2011 NY
并且必须转换为,
from to stayed
01.01.2011 03.04.2011 NY
03.04.2011 30.06.2011 NJ
30.06.2011 20.07.2011 CA
20.07.2011 05.08.2011 NY
还有另一个数据集,与前一个“to”和当前“from”之间存在差距,应计入新行,如下所示,
from to stayed
01.01.2011 03.04.2011 NY
03.04.2011 25.05.2011 NJ
25.05.2011 04.06.2011 NJ
04.06.2011 20.06.2011 NJ
20.06.2011 30.06.2011 NJ
30.06.2011 05.07.2011 CA
05.07.2011 20.07.2011 CA
20.07.2011 05.08.2011 NY
16.09.2011 20.09.2011 NY
20.09.2011 29.09.2011 NY
05.10.2011 20.10.2011 NY
和预期结果如下,
from to stayed 01.01.2011 03.04.2011 NY 03.04.2011 30.06.2011 NJ 30.06.2011 20.07.2011 CA 20.07.2011 05.08.2011 NY 16.09.2011 29.09.2011 NY 05.10.2011 20.10.2011 NY
进行这样的合并我用分析函数编写了复杂的查询。有一个简单的方法吗?
答案 0 :(得分:2)
Oracle 11g R2架构设置:
CREATE TABLE tbl ( "from", "to", "stayed" ) AS
SELECT TO_DATE( '01.01.2011', 'DD.MM.YYYY' ), TO_DATE( '03.04.2011', 'DD.MM.YYYY' ), 'NY' FROM DUAL
UNION ALL SELECT TO_DATE( '03.04.2011', 'DD.MM.YYYY' ), TO_DATE( '25.05.2011', 'DD.MM.YYYY' ), 'NJ' FROM DUAL
UNION ALL SELECT TO_DATE( '25.05.2011', 'DD.MM.YYYY' ), TO_DATE( '04.06.2011', 'DD.MM.YYYY' ), 'NJ' FROM DUAL
UNION ALL SELECT TO_DATE( '04.06.2011', 'DD.MM.YYYY' ), TO_DATE( '20.06.2011', 'DD.MM.YYYY' ), 'NJ' FROM DUAL
UNION ALL SELECT TO_DATE( '20.06.2011', 'DD.MM.YYYY' ), TO_DATE( '30.06.2011', 'DD.MM.YYYY' ), 'NJ' FROM DUAL
UNION ALL SELECT TO_DATE( '30.06.2011', 'DD.MM.YYYY' ), TO_DATE( '05.07.2011', 'DD.MM.YYYY' ), 'CA' FROM DUAL
UNION ALL SELECT TO_DATE( '05.07.2011', 'DD.MM.YYYY' ), TO_DATE( '20.07.2011', 'DD.MM.YYYY' ), 'CA' FROM DUAL
UNION ALL SELECT TO_DATE( '20.07.2011', 'DD.MM.YYYY' ), TO_DATE( '05.08.2011', 'DD.MM.YYYY' ), 'NY' FROM DUAL;
查询1 :
WITH groups AS (
SELECT t.*,
ROW_NUMBER() OVER ( ORDER BY "from" ) - ROW_NUMBER() OVER ( PARTITION BY "stayed" ORDER BY "from" ) AS grp
FROM tbl t
)
SELECT MIN( "from" ) AS "from",
MAX( "to" ) AS "to",
"stayed"
FROM groups
GROUP BY "stayed", grp
ORDER BY "from"
<强> Results 强>:
| FROM | TO | STAYED |
|--------------------------------|-------------------------------|--------|
| January, 01 2011 00:00:00+0000 | April, 03 2011 00:00:00+0000 | NY |
| April, 03 2011 00:00:00+0000 | June, 30 2011 00:00:00+0000 | NJ |
| June, 30 2011 00:00:00+0000 | July, 20 2011 00:00:00+0000 | CA |
| July, 20 2011 00:00:00+0000 | August, 05 2011 00:00:00+0000 | NY |
修改强>
要回答您的后续编辑 - 这是解决问题的一种方法:
Oracle 11g R2架构设置:
CREATE TABLE tbl ( "from", "to", "stayed" ) AS
SELECT TO_DATE( '01.01.2011', 'DD.MM.YYYY' ), TO_DATE( '03.04.2011', 'DD.MM.YYYY' ), 'NY' FROM DUAL
UNION ALL SELECT TO_DATE( '03.04.2011', 'DD.MM.YYYY' ), TO_DATE( '25.05.2011', 'DD.MM.YYYY' ), 'NJ' FROM DUAL
UNION ALL SELECT TO_DATE( '25.05.2011', 'DD.MM.YYYY' ), TO_DATE( '04.06.2011', 'DD.MM.YYYY' ), 'NJ' FROM DUAL
UNION ALL SELECT TO_DATE( '04.06.2011', 'DD.MM.YYYY' ), TO_DATE( '20.06.2011', 'DD.MM.YYYY' ), 'NJ' FROM DUAL
UNION ALL SELECT TO_DATE( '20.06.2011', 'DD.MM.YYYY' ), TO_DATE( '30.06.2011', 'DD.MM.YYYY' ), 'NJ' FROM DUAL
UNION ALL SELECT TO_DATE( '30.06.2011', 'DD.MM.YYYY' ), TO_DATE( '05.07.2011', 'DD.MM.YYYY' ), 'CA' FROM DUAL
UNION ALL SELECT TO_DATE( '05.07.2011', 'DD.MM.YYYY' ), TO_DATE( '20.07.2011', 'DD.MM.YYYY' ), 'CA' FROM DUAL
UNION ALL SELECT TO_DATE( '20.07.2011', 'DD.MM.YYYY' ), TO_DATE( '05.08.2011', 'DD.MM.YYYY' ), 'NY' FROM DUAL
UNION ALL SELECT TO_DATE( '16.09.2011', 'DD.MM.YYYY' ), TO_DATE( '20.09.2011', 'DD.MM.YYYY' ), 'NY' FROM DUAL
UNION ALL SELECT TO_DATE( '20.09.2011', 'DD.MM.YYYY' ), TO_DATE( '29.09.2011', 'DD.MM.YYYY' ), 'NY' FROM DUAL
UNION ALL SELECT TO_DATE( '05.10.2011', 'DD.MM.YYYY' ), TO_DATE( '20.10.2011', 'DD.MM.YYYY' ), 'NY' FROM DUAL;
查询2 :
WITH change_in_groups AS (
SELECT t.*,
CASE WHEN t."from" = LAG( t."to" ) OVER ( ORDER BY "from" )
AND t."stayed" = LAG( t."stayed" ) OVER ( ORDER BY "from" )
THEN 0
ELSE 1
END AS has_changed_group
FROM tbl t
),
groups AS (
SELECT "from",
"to",
"stayed"
,SUM( has_changed_group ) OVER ( ORDER BY "from" ) AS grp
FROM change_in_groups
)
SELECT MIN( "from" ) AS "from",
MAX( "to" ) AS "to",
MAX( "stayed" ) AS "stayed"
FROM groups
GROUP BY grp
ORDER BY "from"
<强> Results 强>:
| FROM | TO | STAYED |
|----------------------------------|----------------------------------|--------|
| January, 01 2011 00:00:00+0000 | April, 03 2011 00:00:00+0000 | NY |
| April, 03 2011 00:00:00+0000 | June, 30 2011 00:00:00+0000 | NJ |
| June, 30 2011 00:00:00+0000 | July, 20 2011 00:00:00+0000 | CA |
| July, 20 2011 00:00:00+0000 | August, 05 2011 00:00:00+0000 | NY |
| September, 16 2011 00:00:00+0000 | September, 29 2011 00:00:00+0000 | NY |
| October, 05 2011 00:00:00+0000 | October, 20 2011 00:00:00+0000 | NY |
使用分层查询的替代解决方案:
查询3 :
WITH leaves AS (
SELECT CONNECT_BY_ROOT "from" AS "from",
"to",
"stayed",
CONNECT_BY_ISLEAF AS leaf
FROM tbl t
CONNECT BY PRIOR "stayed" = "stayed"
AND PRIOR "to" = "from"
)
SELECT MIN( "from" ) AS "from",
"to",
MIN( "stayed" ) AS "stayed"
FROM leaves
WHERE leaf = 1
GROUP BY "to"
<强> Results 强>:
| FROM | TO | STAYED |
|----------------------------------|----------------------------------|--------|
| April, 03 2011 00:00:00+0000 | June, 30 2011 00:00:00+0000 | NJ |
| January, 01 2011 00:00:00+0000 | April, 03 2011 00:00:00+0000 | NY |
| June, 30 2011 00:00:00+0000 | July, 20 2011 00:00:00+0000 | CA |
| July, 20 2011 00:00:00+0000 | August, 05 2011 00:00:00+0000 | NY |
| September, 16 2011 00:00:00+0000 | September, 29 2011 00:00:00+0000 | NY |
| October, 05 2011 00:00:00+0000 | October, 20 2011 00:00:00+0000 | NY |
您可能希望对查询进行概要分析,但是从解释计划中,分层查询看起来更有效。
答案 1 :(得分:0)
Select distinct
min(from) over (parttion by stayed order by from) as from_stayed,
max(to) over (parttion by stayed order by to) as to_stayed,
stayed
from table
好的,试试这个更正的
with t2 as
(select t1.*,
case lag(stayed,1,stayed) over (order by dt)
when stayed then 0 else 1 end as stayed_mod
from table),
t3 as
(select t2.*,
sum(stayed_mod) over (order by from) as group_id
from t2)
select distinct stayed,
min(from) over (partition by GROUP_ID) as from_min,
max(to) over (partition by GROUP_ID) as to_max
from t3
order by 2;