我正在尝试根据自己的需要编写一个自定义serde,但仍然停留在获得类强制转换异常的位置。
输入数据是:
john,miller
我想将这些数据作为fname string,lname string
插入配置单元中,所以我写了一个customserde。
我只实现了SerDe接口的deserialize方法,如下所示:
package com.datametica.serde;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
public class CustomSerde implements SerDe {
int numColumns;
StructObjectInspector rowOI;
List<String> columnNames;
List<Object> rows;
List<TypeInfo> columnTypes;
@Override
public void initialize(Configuration conf, Properties tblProps)
throws SerDeException {
String columnNameProperty = tblProps
.getProperty(Constants.LIST_COLUMNS);
columnNames = Arrays.asList(columnNameProperty.split(","));
String columnTypeProperty = tblProps
.getProperty(Constants.LIST_COLUMN_TYPES);
columnTypes = TypeInfoUtils
.getTypeInfosFromTypeString(columnTypeProperty);
numColumns = columnNames.size();
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(
columnNames.size());
ObjectInspector oi;
for (int c = 0; c < numColumns; c++) {
oi = TypeInfoUtils
.getStandardJavaObjectInspectorFromTypeInfo(columnTypes
.get(c));
columnOIs.add(oi);
}
/*
* for (int c = 0; c < numColumns; c++) { rows.add(); }
*/
rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(
columnNames, columnOIs);
}
@Override
public CustomDataFormat deserialize(Writable record) throws SerDeException {
Text text = (Text) record;
String[] valArray = text.toString().split(",");
System.out.println("----------------------------\n");
System.out.println("yo yo yo "+text.toString() + "\n");
System.out.println("----------------------------\n");
CustomDataFormat dataObject = new CustomDataFormat();
dataObject.setFname(valArray[0]);
dataObject.setLname(valArray[1]);
return dataObject;
}
@Override
public ObjectInspector getObjectInspector() throws SerDeException {
return rowOI;
}
@Override
public SerDeStats getSerDeStats() {
return null;
}
@Override
public Class<? extends Writable> getSerializedClass() {
return null;
}
@Override
public Writable serialize(Object arg0, ObjectInspector arg1)
throws SerDeException {
return null;
}
}
将保存数据的类
package com.datametica.serde;
import java.util.ArrayList;
import java.util.List;
public class CustomDataFormat {
String fname;
String lname;
/*List<LevelOneStruct> arrayOfLevelTwoStruct = new ArrayList<LevelOneStruct>();
public List<LevelOneStruct> getArrayOfLevelTwoStruct() {
return arrayOfLevelTwoStruct;
}
public void setArrayOfLevelTwoStruct(
List<LevelOneStruct> arrayOfLevelTwoStruct) {
this.arrayOfLevelTwoStruct = arrayOfLevelTwoStruct;
}*/
public String getFname() {
return fname;
}
public void setFname(String fname) {
this.fname = fname;
}
public String getLname() {
return lname;
}
public void setLname(String lname) {
this.lname = lname;
}
}
CustomDataFormat类的ObjectInspector
package com.datametica.serde;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
public class CustomStructObjectInspector extends StandardStructObjectInspector {
@Override
public Object getStructFieldData(Object data, StructField fieldRef) {
Object dataToReturn = new Object();
CustomDataFormat customSerde = (CustomDataFormat) data;
switch (fieldRef.getFieldName()) {
case "fname":
dataToReturn = customSerde.getFname();
break;
/*
* case "arrayOfLevelTwoStruct": dataToReturn =
* customSerde.getArrayOfLevelTwoStruct(); break;
*/
case "lname":
dataToReturn = customSerde.getLname();
break;
default:
dataToReturn = null;
}
return dataToReturn;
}
@Override
public List<Object> getStructFieldsDataAsList(Object data) {
List<Object> listOfData = new ArrayList<Object>();
CustomDataFormat customSerde = (CustomDataFormat) data;
listOfData.add(customSerde.getFname());
listOfData.add(customSerde.getLname());
return listOfData;
}
}
创建jar后我正在创建hive表为
create table customserde (fname string,lname string) row format serde 'com.datametica.serde.CustomSerde';
将数据加载到表中
load data inpath '/user/dm3/tables_data/customserde' into table customserde;
到目前为止一切都很好,但是当我选择在桌面上进行操作时
select * from customserde;
获得异常
Caused by: java.lang.ClassCastException: com.datametica.serde.CustomDataFormat cannot be cast to [Ljava.lang.Object;
感谢任何帮助我完全陷入困境
提前感谢。
答案 0 :(得分:3)
我发现我的错误deserialize()
方法没有返回customdataformat.class
的对象,但是row的对象意味着arrayList如下
public Object deserialize(Writable record) throws SerDeException {
Text text = (Text) record;
String[] valArray = text.toString().split(",");
CustomDataFormat dataObject = new CustomDataFormat();
dataObject.setFname(valArray[0]);
dataObject.setLname(valArray[1]);
rows.set(0, dataObject.getFname());
rows.set(1, dataObject.getLname());
return rows;
}