使用Hive UDF解压缩列数据

时间:2017-05-19 04:37:36

标签: java hive udf compression

上下文 使用Hive UDF evaluate()方法

解压缩列数据

例外:

  

异常失败   产生java.io.IOException:org.apache.hadoop.hive.ql.metadata.HiveException:   无法执行方法public static org.apache.hadoop.io.Text   Test.UDFDecompressor.evaluate(java.lang.String)抛出   对象上的org.apache.hadoop.hive.ql.metadata.HiveException   Test.UDFDecompressor的Test.UDFDecompressor@1008df1e   参数   {x��}kw⸲�_a�����֤�\��a-B�i�@`ï¿½ï¿½ï¿½ï¿½ï¿ ½" �nc3�I����$_�E��   大小为1

源代码:

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.zip.DataFormatException;
import java.util.zip.InflaterInputStream;

import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

public class Decompress extends UDF{
public static String evaluate(String data1) throws IOException, DataFormatException{
ByteArrayInputStream bao=new ByteArrayInputStream(data1.getBytes());
InflaterInputStream iis= new InflaterInputStream(bao);
String out="";
byte[] bt=new byte[1024];
int len=-1;
while ((len =iis.read(bt))!=-1){ 
out += new String(Arrays.copyOf(bt, len));
}
JavaStringObjectInspector stringInspector;
stringInspector = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
String ip = stringInspector.getPrimitiveJavaObject(out);

//return new String(ip.getBytes(Charset.forName("UTF-8")));
//return new String(ip.getBytes(Charset.forName("UTF-8")));
return ip;
}
}

我尝试了多种使用gZib,zLIb Java Api进行解压缩的方法但面临同样的错误。任何人都可以帮我解决这个问题,并建议使用 Hive UDF解压缩列数据的正确方法

先谢谢。

1 个答案:

答案 0 :(得分:0)

import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.InflaterInputStream;

public class Decompress extends UDF {

    private final Text r = new Text();

    public Text evaluate(BytesWritable bw) throws IOException {
        ByteArrayInputStream zipped = new ByteArrayInputStream(bw.getBytes());
        InflaterInputStream inflater = new InflaterInputStream(zipped);
        ByteArrayOutputStream unzipped = new ByteArrayOutputStream();
        byte[] bt = new byte[1024];
        int len;
        while ((len = inflater.read(bt)) != -1) {
            unzipped.write(bt, 0, len);
        }

        r.clear();
        r.set(unzipped.toByteArray());
        return r;
    }
}