包含两列的表(名称字符串,工资小数(10,3)并在hive中以镶木地板格式存储。使用Morphline和solar执行索引时获得以下异常:
ERROR morphline.MorphlineMapRunner: Unable to process file <parquet file>
java.lang.ClassCastException: org.apache.avro.generic.GenericData$Record cannot be cast to java.io.InputStream
at org.kitesdk.morphline.stdio.AbstractParser.getAttachmentInputStream(AbstractParser.java:184)
at org.kitesdk.morphline.stdio.AbstractParser.doProcess(AbstractParser.java:94)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.stdlib.ConvertTimestampBuilder$ConvertTimestamp.doProcess(ConvertTimestampBuilder.java:161)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.stdlib.GenerateUUIDBuilder$GenerateUUID.doProcess(GenerateUUIDBuilder.java:98)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.avro.ExtractAvroPathsBuilder$ExtractAvroPaths.doProcess(ExtractAvroPathsBuilder.java:143)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile.extract(ReadAvroParquetFileBuilder.java:201)
at org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile.doProcess(ReadAvroParquetFileBuilder.java:180)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.apache.solr.hadoop.morphline.MorphlineMapRunner.map(MorphlineMapRunner.java:208)
at org.apache.solr.hadoop.MapReduceIndexerTool.dryRun(MapReduceIndexerTool.java:1250)
at org.apache.solr.hadoop.MapReduceIndexerTool.run(MapReduceIndexerTool.java:875)
at org.apache.solr.hadoop.MapReduceIndexerTool.run(MapReduceIndexerTool.java:700)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.solr.hadoop.MapReduceIndexerTool.main(MapReduceIndexerTool.java:687)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Exception in thread "main" org.kitesdk.morphline.api.MorphlineRuntimeException: java.lang.ClassCastException: org.apache.avro.generic.GenericData$Record cannot be cast to java.io.InputStream
at org.kitesdk.morphline.base.FaultTolerance.handleException(FaultTolerance.java:73)
at org.apache.solr.hadoop.morphline.MorphlineMapRunner.map(MorphlineMapRunner.java:220)
at org.apache.solr.hadoop.MapReduceIndexerTool.dryRun(MapReduceIndexerTool.java:1250)
at org.apache.solr.hadoop.MapReduceIndexerTool.run(MapReduceIndexerTool.java:875)
at org.apache.solr.hadoop.MapReduceIndexerTool.run(MapReduceIndexerTool.java:700)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.solr.hadoop.MapReduceIndexerTool.main(MapReduceIndexerTool.java:687)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: java.lang.ClassCastException: org.apache.avro.generic.GenericData$Record cannot be cast to java.io.InputStream
at org.kitesdk.morphline.stdio.AbstractParser.getAttachmentInputStream(AbstractParser.java:184)
at org.kitesdk.morphline.stdio.AbstractParser.doProcess(AbstractParser.java:94)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.stdlib.ConvertTimestampBuilder$ConvertTimestamp.doProcess(ConvertTimestampBuilder.java:161)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.stdlib.GenerateUUIDBuilder$GenerateUUID.doProcess(GenerateUUIDBuilder.java:98)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.avro.ExtractAvroPathsBuilder$ExtractAvroPaths.doProcess(ExtractAvroPathsBuilder.java:143)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile.extract(ReadAvroParquetFileBuilder.java:201)
at org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile.doProcess(ReadAvroParquetFileBuilder.java:180)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.apache.solr.hadoop.morphline.MorphlineMapRunner.map(MorphlineMapRunner.java:208)
... 11 more
以下来自Morphline文件的内容:
{
readAvroParquetFile {
readerSchemaString:"""{"type":"record","name":"employee","fields":[
{"name":"name","type":["string","null"],"default":""},
{"name": "salary","type":
["bytes","null"],"logicalType":"decimal","precision":10,"scale":4,"default":0 }
]}"""
}
}
有关如何索引表格的镶木地板文件的任何帮助都包含使用Morphline和solar的小数列。
答案 0 :(得分:0)
Per http://kitesdk.org/docs/current/morphlines/morphlines-reference-guide.html#readAvroParquetFile:“morphline记录输入字段file_upload_url必须包含要读取的Parquet文件的HDFS路径。(此字段已经开箱即用,提供了MapReduceIndexerTool)。”