使用|拆分csv文件分隔符

时间:2018-04-17 17:59:53

标签: java apache-beam

我有一个csv文件,其中一个列格式为:

enter image description here

我想处理这个文件,结果应该是:

enter image description here

第四列应使用" |"分隔符。

我是java的新手,所以任何人都可以帮助我设计它背后的逻辑吗?

修改:我尝试过以下内容:

PCollection<String> event1 = p.apply(TextIO.read().from("gs://bucket/input/Event_Setup.csv"));
     PCollection<ClassEvent1> pojos1 = event1.apply(ParDo.of(new DoFn<String, ClassEvent1>() { // converting String into class              
        private static final long serialVersionUID = 1L;
        @ProcessElement
        public void processElement(ProcessContext c) {          
            String[] strArr = c.element().split(",");
            int strArrlen = strArr.length;
            String[] strsplit = null;
            for (int i = 0 ;i < strArrlen ;i++)
            {
                strsplit =  strArr[i].split("|");
            }

            ClassEvent1 clr = new ClassEvent1();
            clr.setEvntType(strArr[0]);
            clr.setEvntKey(strArr[1]);
            clr.setEvntName(strArr[2]);
            clr.setEvntComponents(strsplit[3]);
            clr.setCustEvntStrt(strArr[4]);
            clr.setCustEvntEnd(strArr[5]);
            clr.setNotes(strArr[6]);
            c.output(clr);
        }
    }));

1 个答案:

答案 0 :(得分:1)

    @ProcessElement
    public void processElement(ProcessContext c) { 
      // assuming "c.element()" is a line in your CSV         
      String[] strArr = c.element().split(",");  // this contains your CSV columns
      int strArrlen = strArr.length;  // this is the number of columns

      // no need to iterate over each column you are concerned with column 3
      /** 
      * for (int i = 0 ;i < strArrlen ;i++)
      * {
      *   strsplit = strArr[i].split("|");
      * }
      */
      // get the values of the 4 column (events)
      String[] strsplit = strArr[3].split("\\|"); // 3 not 4 because is zero based

      if (strsplit.length == 0) {  // there are no events
         ClassEvent1 clr = new ClassEvent1();
         clr.setEvntType(strArr[0]);
         clr.setEvntKey(strArr[1]);
         clr.setEvntName(strArr[2]);
         clr.setEvntComponents(strArr[3]); // no events just put original value
         clr.setCustEvntStrt(strArr[4]);
         clr.setCustEvntEnd(strArr[5]);
         clr.setNotes(strArr[6]);
         c.output(clr);
      } else {

        // create class event for each event in column 3
        for (int i=0; i < strsplit.length; i++) {
          ClassEvent1 clr = new ClassEvent1();
          clr.setEvntType(strArr[0]);
          clr.setEvntKey(strArr[1]);
          clr.setEvntName(strArr[2]);
          clr.setEvntComponents(strsplit[i]); // increment for each event
          clr.setCustEvntStrt(strArr[4]);
          clr.setCustEvntEnd(strArr[5]);
          clr.setNotes(strArr[6]);
          c.output(clr);
        }
    }
}