如何更新分区表

时间:2020-03-13 11:02:01

标签: sql hive

我有一个源表employee_source和一个目标分区表employee_partitionedemployee_source每天都会通过新数据进行补充,我想将增量传输到目标employee_partitioned,每天在同一日期按id进行清理,如果不同日期的记录中的标识符相等,则更新记录。 您能否帮助我将正确的插入内容写入employee_partitioned

-- init
drop table if exists employee_source;
create table if not exists employee_source
(
    employee_id  string,
    name         string,
    process_date string
);

drop table if exists employee_partitioned;
create table employee_partitioned
(
    employee_id string,
    name        string
) partitioned by (process_date string);

首先插入

insert into table employee_source
values
       ('100', 'Michael', '2020-03-12'),
       ('100', 'Michael', '2020-03-12'),
       ('101', 'Will', '2020-03-12');

第一次清洁和运输

insert into table employee_partitioned partition (process_date)
select employee_id, name, process_date
from employee_source eh
where process_date = '2020-03-12';

第二次插入

insert into table employee_source
values ('101', 'Arnold', '2020-03-13'),
       ('102', 'Steven', '2020-03-13'),
       ('103', 'Lucy', '2020-03-13');

第二次清洁和运输

insert into table employee_partitioned partition (process_date)
select employee_id, name, process_date
from employee_source eh
where process_date = '2020-03-13';


select *
from employee_partitioned;

预期结果是

-- 100     Michael 2020-03-12
-- 101     Arnold  2020-03-13
-- 102     Steven  2020-03-13
-- 103     Lucy    2020-03-13

0 个答案:

没有答案