仅将非null值存储在文件中

时间:2019-02-19 09:27:31

标签: apache-pig

我还提供了每个语句的输出。我只希望最终结果仅在有值时才写入文件。

  

NEW_roles = LOAD'/folder/newata/roles.gz'使用PigStorage('\ t')AS(id,name,is_active,created_at,updated_at); NEW_roles_t = FOREACH NEW_roles生成id,name,is_active,ToString(created_at,'yyyy-MM-dd hh:mm:ss')as created_at:chararray,ToString(updated_at,'yyyy-MM-dd hh:mm:ss')如updated_at:chararray;   OLD_roles = LOAD'/folder//old-data/roles.gz'使用PigStorage('\ t')AS(id,name,is_active,created_at,updated_at); OLD_roles_t = FOREACH OLD_roles生成id,name,is_active,ToString(created_at,'yyyy-MM-dd hh:mm:ss')as created_at:chararray,ToString(updated_at,'yyyy-MM-dd hh:mm:ss')如updated_at:chararray;

co_group = COGROUP NEW_roles_t by id , OLD_roles_t by id; dump co_group;
    (1,{(1,CCCCO Admin,t,,)},{(1,CCCCO Admin,t,,)})
    (2,{(2,COCI Read Only,t,,)},{(2,COCI Read Only,t,,)}) 
    (3,{(3,College Admin,t,,)},{(3,College Admin,t,,)}) 
    (4,{(4,College Submitter,t,,)},{(4,College Submitter,t,,)})
    (5,{(5,CCCCO Reviewer,t,,)},{(5,CCCCO Reviewer,t,,)})
    (6,{},{(6,Test,t,,)}) (7,{},{(7,Test1,t,,)})
    (8,{},{(8,Test2,t,,)})

filtered_delete = FILTER co_group BY IsEmpty($1); 
DUMP filtered_delete; 
(6,{},{(6,Test,t,,)}) 
(7,{},{(7,Test1,t,,)}) 
(8,{},{(8,Test2,t,,)}) 

flat_delete = FOREACH filtered_delete GENERATE  FLATTEN((IsEmpty(NEW_roles_t)? OLD_roles_t: null)); 
DUMP flat_delete; 
(6,Test,t,,) 
(7,Test1,t,,) 
(8,Test2,t,,) 

intermediate_delete = FOREACH flat_delete GENERATE $0 as id,$1 as name,$2 as is_active,$3 as created_at,$4 as updated_at; 
dump intermediate_delete; 
(6,Test,t,,) (7,Test1,t,,) (8,Test2,t,,)

/*WRITE IT IN FILE*/ 
/*If the intermediate_delete has no records, we are not supposed to create a file. 
Based on the count of records in dump intermediate_delete we can write a condition to acheive this. 
But don't know  how I used count function to get the no.of records 
but after that I was not able to write any condition based logic to write into a file only when record count is greater than 0
*/ 
STORE intermediate_insert INTO '/folder/refreshed-data/roles_diff.txt' USING PigStorage('\t');

0 个答案:

没有答案