我有一个源表employee_source和一个目标分区表employee_partitioned
。 employee_source
每天都会通过新数据进行补充,我想将增量传输到目标employee_partitioned,每天在同一日期按id进行清理,如果不同日期的记录中的标识符相等,则更新记录。
您能否帮助我将正确的插入内容写入employee_partitioned
-- init
drop table if exists employee_source;
create table if not exists employee_source
(
employee_id string,
name string,
process_date string
);
drop table if exists employee_partitioned;
create table employee_partitioned
(
employee_id string,
name string
) partitioned by (process_date string);
首先插入
insert into table employee_source
values
('100', 'Michael', '2020-03-12'),
('100', 'Michael', '2020-03-12'),
('101', 'Will', '2020-03-12');
第一次清洁和运输
insert into table employee_partitioned partition (process_date)
select employee_id, name, process_date
from employee_source eh
where process_date = '2020-03-12';
第二次插入
insert into table employee_source
values ('101', 'Arnold', '2020-03-13'),
('102', 'Steven', '2020-03-13'),
('103', 'Lucy', '2020-03-13');
第二次清洁和运输
insert into table employee_partitioned partition (process_date)
select employee_id, name, process_date
from employee_source eh
where process_date = '2020-03-13';
select *
from employee_partitioned;
预期结果是
-- 100 Michael 2020-03-12
-- 101 Arnold 2020-03-13
-- 102 Steven 2020-03-13
-- 103 Lucy 2020-03-13