我们已经将错误的数据填充到NZ数据库中,同时我们还需要修复历史数据。
错误的数据样本
ID |from_date |to_date
5002800000gvgQHAAY|2017-04-05 07:13:42|2017-05-04 07:27:03
5002800000gvgQHAAY|2017-04-06 06:40:08|2017-05-04 07:27:03
5002800000gvgQHAAY|2017-04-07 07:44:08|2017-05-04 07:27:03
5002800000gvgQHAAY|2017-04-08 06:00:22|2017-05-04 07:27:03
5002800000gvgQHAAY|2017-04-12 07:00:22|2017-05-04 07:27:03
5002800000gvgQHAAY|2017-04-20 11:05:20|2017-05-04 07:27:03
5002800000gvgQHAAY|2017-05-04 07:27:04|2999-12-31 00:00:00
填写
的正确方法ID |from_date |to_date
5002800000gvgQHAAY|2017-04-05 07:13:42|2017-04-06 06:40:08
5002800000gvgQHAAY|2017-04-06 06:40:08|2017-04-07 07:44:08
5002800000gvgQHAAY|2017-04-07 07:44:08|2017-04-08 06:00:22
5002800000gvgQHAAY|2017-04-08 06:00:22|2017-04-12 07:00:22
5002800000gvgQHAAY|2017-04-12 07:00:22|2017-04-20 11:05:20
5002800000gvgQHAAY|2017-04-20 11:05:20|2017-05-04 07:27:03
5002800000gvgQHAAY|2017-05-04 07:27:04|2999-12-31 00:00:00
只是想知道我们可以使用SQL来修复上面的数据。 如果有人可以提供这方面的帮助。
此致
答案 0 :(得分:0)
这是tsql中的代码(在microsoft sql server上运行)。抱歉我对netezza不熟悉,因为没有人响应,我会的。无论如何Tsql看起来非常类似于netezzas sql,所以你需要微调它以在netezza中工作。基本上你会使用一些函数来获取由from_date排序的行的索引。然后尝试连接连续的行来转换to_date。
declare @data table(
ID varchar(50) NOT NULL,
from_date datetime NOT NULL,
to_date datetime NOT NULL
)
insert into @data (ID, from_date,to_date) VALUES
('5002800000gvgQHAAY','2017-04-05 07:13:42','2017-05-04 07:27:03'),
('5002800000gvgQHAAY','2017-04-06 06:40:08','2017-05-04 07:27:03'),
('5002800000gvgQHAAY','2017-04-07 07:44:08','2017-05-04 07:27:03'),
('5002800000gvgQHAAY','2017-04-08 06:00:22','2017-05-04 07:27:03'),
('5002800000gvgQHAAY','2017-04-12 07:00:22','2017-05-04 07:27:03'),
('5002800000gvgQHAAY','2017-04-20 11:05:20','2017-05-04 07:27:03'),
('5002800000gvgQHAAY','2017-05-04 07:27:04','2999-12-31 00:00:00')
;with cte_data(ID,from_date,to_date,row_idx)
as
(
select ID,from_date,to_date,
ROW_NUMBER() OVER(ORDER BY from_date) row_idx
from @data
)
select data1.ID,data1.from_date,
coalesce(data2.from_date,'2999-12-31 00:00:00') [to_date]
from cte_data data1
left join cte_data data2 on data2.row_idx = data1.row_idx + 1
希望您觉得这很有帮助。
答案 1 :(得分:0)
假设您要“修复”整个表,最有效的方法是根据此查询创建一个新表,然后在数据看起来很好的时候重命名旧表和新表:
select id, from_date,
lead(from_date,1, '2999-12-31 00:00:00') over (partition by id order by from_date) to_date
from test_table order by id, from_date;
ID | FROM_DATE | TO_DATE
--------------------+---------------------+---------------------
5002800000gvgQHAAY | 2017-04-05 07:13:42 | 2017-04-06 06:40:08
5002800000gvgQHAAY | 2017-04-06 06:40:08 | 2017-04-07 07:44:08
5002800000gvgQHAAY | 2017-04-07 07:44:08 | 2017-04-08 06:00:22
5002800000gvgQHAAY | 2017-04-08 06:00:22 | 2017-04-12 07:00:22
5002800000gvgQHAAY | 2017-04-12 07:00:22 | 2017-04-20 11:05:20
5002800000gvgQHAAY | 2017-04-20 11:05:20 | 2017-05-04 07:27:04
5002800000gvgQHAAY | 2017-05-04 07:27:04 | 2999-12-31 00:00:00
(7 rows)
create table new_test_table as
select id, from_date,
lead(from_date,1, '2999-12-31 00:00:00') over (partition by id order by from_date) to_date
from test_table;
INSERT 0 7