empdoc = load 'file_name' using PigStorage('\t') as (emp_id : chararray, emp_code : chararray, employee_internal_id : int);
masterdoc = load 'file_name' using PigStorage('\t')as (emp_internal_id : int, emp_id : chararray, mother_id : int, father_id : int, section_code : chararray);
childone = JOIN empdoc by (employee_internal_id), masterdoc by (mother_id) OR masterdoc by(father_id);
genone = FOREACH childone GENERATE employee_internal_id, emp_internal_id,emp_id, ;
基本上我正在尝试运行OR(如在SQL查询中)employee_internal_id = mother_id或father_id。
答案 0 :(得分:0)
试试这个
empdoc = load 'file_name' using PigStorage('\t') as (emp_id : chararray, emp_code : chararray, employee_internal_id : int);
masterdoc = load 'file_name' using PigStorage('\t')as (emp_internal_id : int, emp_id : chararray, mother_id : int, father_id : int, section_code : chararray);
firstjoin = JOIN empdoc by (employee_internal_id), masterdoc by (mother_id) ;
genone = FOREACH firstjoin GENERATE employee_internal_id, emp_internal_id,emp_id ;
secondjoin=JOIN genone by (employee_internal_id) LEFT OUTER , masterdoc by (father_id) ;
result = FOREACH secondjoin GENERATE employee_internal_id, emp_internal_id,emp_id ;