我有一个csv文件,其中一个列格式为:
我想处理这个文件,结果应该是:
第四列应使用" |"分隔符。
我是java的新手,所以任何人都可以帮助我设计它背后的逻辑吗?
修改:我尝试过以下内容:
PCollection<String> event1 = p.apply(TextIO.read().from("gs://bucket/input/Event_Setup.csv"));
PCollection<ClassEvent1> pojos1 = event1.apply(ParDo.of(new DoFn<String, ClassEvent1>() { // converting String into class
private static final long serialVersionUID = 1L;
@ProcessElement
public void processElement(ProcessContext c) {
String[] strArr = c.element().split(",");
int strArrlen = strArr.length;
String[] strsplit = null;
for (int i = 0 ;i < strArrlen ;i++)
{
strsplit = strArr[i].split("|");
}
ClassEvent1 clr = new ClassEvent1();
clr.setEvntType(strArr[0]);
clr.setEvntKey(strArr[1]);
clr.setEvntName(strArr[2]);
clr.setEvntComponents(strsplit[3]);
clr.setCustEvntStrt(strArr[4]);
clr.setCustEvntEnd(strArr[5]);
clr.setNotes(strArr[6]);
c.output(clr);
}
}));
答案 0 :(得分:1)
@ProcessElement
public void processElement(ProcessContext c) {
// assuming "c.element()" is a line in your CSV
String[] strArr = c.element().split(","); // this contains your CSV columns
int strArrlen = strArr.length; // this is the number of columns
// no need to iterate over each column you are concerned with column 3
/**
* for (int i = 0 ;i < strArrlen ;i++)
* {
* strsplit = strArr[i].split("|");
* }
*/
// get the values of the 4 column (events)
String[] strsplit = strArr[3].split("\\|"); // 3 not 4 because is zero based
if (strsplit.length == 0) { // there are no events
ClassEvent1 clr = new ClassEvent1();
clr.setEvntType(strArr[0]);
clr.setEvntKey(strArr[1]);
clr.setEvntName(strArr[2]);
clr.setEvntComponents(strArr[3]); // no events just put original value
clr.setCustEvntStrt(strArr[4]);
clr.setCustEvntEnd(strArr[5]);
clr.setNotes(strArr[6]);
c.output(clr);
} else {
// create class event for each event in column 3
for (int i=0; i < strsplit.length; i++) {
ClassEvent1 clr = new ClassEvent1();
clr.setEvntType(strArr[0]);
clr.setEvntKey(strArr[1]);
clr.setEvntName(strArr[2]);
clr.setEvntComponents(strsplit[i]); // increment for each event
clr.setCustEvntStrt(strArr[4]);
clr.setCustEvntEnd(strArr[5]);
clr.setNotes(strArr[6]);
c.output(clr);
}
}
}