Java UDF函数返回空值

时间:2019-07-25 10:27:19

标签: java apache-pig user-defined-functions

我需要创建一个Java udf,以根据其他变量中的特定条件返回字符串(CD_NOT)输出:

for each group (IDT_GCP) do :
if there is a CD_NOT == "" then return the latest CD_NOT (given by the date variable DA_PRM_CTR_ORDER )
if IDC_PSE_PCL = 1 ET IDC_CD_NOT == 0
return CD_NOT having this condition
else ORDER BY DA_PRM_CTR_ORDER ASC
if  DA_PRM_CTR_ORDER are equal 
ORDER BY IDT_ETT_PSE ASC
return the lowest CD_NOT 
else 
return the CD_NOT of the latest DA_PRM_CTR_ORDER

此函数在Pig脚本中的调用方式为:

ENEE_ENR_GCP = FOREACH (GROUP ENEE_ENR BY IDT_GCP)
{
    ENEE_ENR_GROUP = ORDER ENEE_ENR BY IDT_GCP;

    GENERATE

    group                 AS  IDT_GCP,
    SelectionNote(ENEE_ENR_GROUP)  AS CD_NOT;
};

ENEE_ENR_GCP如下所示:

"001590842","51862499", "", "1", "20170201","0"
"001590842","51862499", "X0", "1", "20150529", "1"
"001639055","51862517", "", "1", "20170201", "2"
"001639055","51862517", "G3", "1", "20170201", "2"
"001597135","51862517", "G3", "1", "20170201", "2"
"001597135","51862517", "", "1", "20170201", "2"
"002804935","00006178","G4","1","19870101","1"
"002804935","00009118","X0","1","19861201","1"
"002804935","00009957","","1","19861229","1"
"002804935","00012970","B3++","1","20100227","1"
Where :

1column : IDT_GCP
2column : IDT_ETT_PSE
3column : CD_NOT (same times it's empty)
4column : IDC_PSE_PCL
5column : DA_PRM_CTR_ORDER
6column : IDC_CD_NOT

用于获取输入并返回结果的输出和输入函数是:

public DataBag exec(Tuple input) throws IOException {

        try {

            if (input.get(0) == null || input.size() == 0)
                return null;

            // Récupération de bag à partir de Script PIG

            DataBag bagFromPigScript = (DataBag) input.get(0);

            // Tableau permettant de sauvegarder les tuples sous format
            // string

            List<Personne> listPersonnes = new ArrayList<Personne>();

            // Convertir au script PIG

            Tuple returnTuple = TupleFactory.getInstance().newTuple();
            List<Tuple> returnTupleList = new ArrayList<Tuple>();

            returnTuple.append(SelectionCodeNote((ArrayList<Personne>) listPersonnes));
            returnTupleList.add(returnTuple);

            return BagFactory.getInstance().newDefaultBag(returnTupleList);

        } catch (Exception e) {
            throw new IOException("Caught exception processing input row ", e);
        }
    }

    @Override
    public Schema outputSchema(final Schema input) {
        try {

            Schema.FieldSchema CD_NOT = new Schema.FieldSchema("CD_NOT", DataType.CHARARRAY);

            Schema tupleSchema = new Schema();
            tupleSchema.add(CD_NOT);

            Schema bagSchema = new Schema(tupleSchema);
            return new Schema(new Schema.FieldSchema("DATA_CD_NOT", bagSchema, DataType.BAG));

        } catch (FrontendException e) {
            e.printStackTrace();
            return null;
        }
    }

我的人员班是:

private class Personne implements Comparable {

        private String ID_GCP;

        private String IDT_ETT_PSE;

        private String CD_NOT;

        private String IDC_PSE_PCL;

        private String DA_PRM_CTR_ORDER;

        private String IDC_CD_NOT;

        public Personne (String ID_GCP, String IDT_ETT_PSE, String CD_NOT, String IDC_PSE_PCL, String DA_PRM_CTR_ORDER, String IDC_CD_NOT) {
            this.ID_GCP = ID_GCP;
            this.IDC_PSE_PCL = IDC_PSE_PCL;
            this.IDC_CD_NOT = IDC_CD_NOT;
            this.DA_PRM_CTR_ORDER = DA_PRM_CTR_ORDER;
            this.IDT_ETT_PSE = IDT_ETT_PSE;
            this.CD_NOT = CD_NOT;
        }

        public String getIDC_CD_NOT() {
            return this.IDC_CD_NOT;
        }

        public String getID_GCP() {
            return this.ID_GCP;
        }

        public String getIDC_PSE_PCL() {
            return this.IDC_PSE_PCL;
        }

        public String getDA_PRM_CTR_ORDER() {
            return this.DA_PRM_CTR_ORDER;
        }

        public String getIDT_ETT_PSE() {
            return this.IDT_ETT_PSE;
        }

        public String getCD_NOT() {
            return this.CD_NOT;
        }

我的操作功能是:

CompareTo:如上所述进行比较和排序

public int compareTo(Object personne) {
        int res = 0;

        Personne other = (Personne) personne;

        // Conversion of Dates from String to Dates

        Date otherDate = converteDate(other.getDA_PRM_CTR_ORDER());
        Date entreePersonne = converteDate(this.DA_PRM_CTR_ORDER);
        res = entreePersonne.compareTo(otherDate);        

        // if there is Legality between dates 
        if (res == 0) {

            Long entreePersonneIDT = Long.parseLong(this.getIDT_ETT_PSE());
            Long otherPersonneIDT = Long.parseLong(other.getIDT_ETT_PSE());
            res = entreePersonneIDT.compareTo(otherPersonneIDT);
            return res;
        }
        return res;
    }



 private Date converteDate(String date) {
        DateFormat df = new SimpleDateFormat("yyyyMMdd");
        Date convertedDate = null;

        try {
            convertedDate = (Date) df.parse(date);
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        return convertedDate;
    }

要将字符串转换为日期

private Date converteDate(String date) {
        DateFormat df = new SimpleDateFormat("yyyyMMdd");
        Date convertedDate = null;

        try {
            convertedDate = (Date) df.parse(date);
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        return convertedDate;
    }

恢复组标识符的每个不同值

private static List DistinctValues (ArrayList<Personne> listPersonnes)
{
    List outList = (List) listPersonnes.stream().map(m -> m.getIDT_GCT()).distinct().collect(Collectors.toList());
    return(outList);

}

最后按照条件选择CD_NOT:

private static String SelectionCodeNote(ArrayList<Personne> listPersonnes) {

        if(listPersonnes != null){

                List IDT_CGCT_DISTINCT = (List) DistinctValues(listPersonnes);

                ArrayList<Personne> usedList = new ArrayList<Personne>();

                for(int i=0; i<IDT_CGCT_DISTINCT.size();i++)
                {

                String IDT_GCT_Local = (String) IDT_CGCT_DISTINCT.get(i);

                usedList.stream().filter(c -> c.getIDT_GCT().equals(IDT_GCT_Local));

                for(Personne personne: usedList)
                { 

              if (personne.getIDC_PSE_PCL().equals("1") && personne.getIDC_CD_NOT().equals("0")) {
                    return (personne.getCD_NOT());
                } else {
                    Collections.sort(usedList);
                    return (personne.getCD_NOT());
                }
            }
        }
        }
        return null;
    }

这是作为猪脚本输出感知的一个例子:

011274144,{()}
011274145,{()}
011274146,{()}
011274148,{()}
011274149,{()}
011274150,{()}
011274162,{()}
011274167,{()}
011274178,{()}
011274197,{()}
011274231,{()}

您可以看到第二列为空(SelectionCodeNote的输出 )

非常感谢您

0 个答案:

没有答案