我还提供了每个语句的输出。我只希望最终结果仅在有值时才写入文件。
NEW_roles = LOAD'/folder/newata/roles.gz'使用PigStorage('\ t')AS(id,name,is_active,created_at,updated_at); NEW_roles_t = FOREACH NEW_roles生成id,name,is_active,ToString(created_at,'yyyy-MM-dd hh:mm:ss')as created_at:chararray,ToString(updated_at,'yyyy-MM-dd hh:mm:ss')如updated_at:chararray; OLD_roles = LOAD'/folder//old-data/roles.gz'使用PigStorage('\ t')AS(id,name,is_active,created_at,updated_at); OLD_roles_t = FOREACH OLD_roles生成id,name,is_active,ToString(created_at,'yyyy-MM-dd hh:mm:ss')as created_at:chararray,ToString(updated_at,'yyyy-MM-dd hh:mm:ss')如updated_at:chararray;
co_group = COGROUP NEW_roles_t by id , OLD_roles_t by id; dump co_group;
(1,{(1,CCCCO Admin,t,,)},{(1,CCCCO Admin,t,,)})
(2,{(2,COCI Read Only,t,,)},{(2,COCI Read Only,t,,)})
(3,{(3,College Admin,t,,)},{(3,College Admin,t,,)})
(4,{(4,College Submitter,t,,)},{(4,College Submitter,t,,)})
(5,{(5,CCCCO Reviewer,t,,)},{(5,CCCCO Reviewer,t,,)})
(6,{},{(6,Test,t,,)}) (7,{},{(7,Test1,t,,)})
(8,{},{(8,Test2,t,,)})
filtered_delete = FILTER co_group BY IsEmpty($1);
DUMP filtered_delete;
(6,{},{(6,Test,t,,)})
(7,{},{(7,Test1,t,,)})
(8,{},{(8,Test2,t,,)})
flat_delete = FOREACH filtered_delete GENERATE FLATTEN((IsEmpty(NEW_roles_t)? OLD_roles_t: null));
DUMP flat_delete;
(6,Test,t,,)
(7,Test1,t,,)
(8,Test2,t,,)
intermediate_delete = FOREACH flat_delete GENERATE $0 as id,$1 as name,$2 as is_active,$3 as created_at,$4 as updated_at;
dump intermediate_delete;
(6,Test,t,,) (7,Test1,t,,) (8,Test2,t,,)
/*WRITE IT IN FILE*/
/*If the intermediate_delete has no records, we are not supposed to create a file.
Based on the count of records in dump intermediate_delete we can write a condition to acheive this.
But don't know how I used count function to get the no.of records
but after that I was not able to write any condition based logic to write into a file only when record count is greater than 0
*/
STORE intermediate_insert INTO '/folder/refreshed-data/roles_diff.txt' USING PigStorage('\t');