我有一个JSON数据,我必须在HIVE TABLE中加载。在创建一个外部表来加载JSON时,我得到了多个解析异常,因为使用了数组和嵌套的结构,而这些结构没有正确放置。
这是我的JSON:
{
"uploadTimeStamp" : "1486631318873",
"PDID" : "01",
"data" : [ {
"Data" : {
"unit" : "rpm",
"value" : "0"
},
"EventID" : "Event1",
"PDID" : "01",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.0",
"pii" : { }
}, {
"Data" : {
"heading" : "N",
"Location" : "false",
"latitude" : "14.022425",
"longitude" : "78.760587",
"Location2" : "false",
"speed" : "10"
},
"EventID" : "Event2",
"PDID" : "01",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.1",
"pii" : { }
}, {
"Data" : {
"x" : "1.1",
"y" : "1.2",
"z" : "2.2"
},
"EventID" : "Event3",
"PDID" : "01",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.0",
"pii" : { }
}, {
"EventID" : "Event4",
"Data" : {
"value" : "50",
"unit" : "percentage"
},
"Version" : "1.0",
"Timestamp" : 1486631318873,
"PDID" : "01",
"Timezone" : 330
}, {
"Data" : {
"unit" : "kmph",
"value" : "70"
},
"EventID" : "Event5",
"PDID" : "01",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.0",
"pii" : { }
} ]
}
这是解析例外之一:
NoViableAltException(313@[])
at org.apache.hadoop.hive.ql.parse.HiveParser.type(HiveParser.java:42781)
at org.apache.hadoop.hive.ql.parse.HiveParser.colType(HiveParser.java:42538)
at org.apache.hadoop.hive.ql.parse.HiveParser.columnNameType(HiveParser.java:42058)
at org.apache.hadoop.hive.ql.parse.HiveParser.columnNameTypeOrPKOrFK(HiveParser.java:42308)
at org.apache.hadoop.hive.ql.parse.HiveParser.columnNameTypeOrPKOrFKList(HiveParser.java:37966)
at org.apache.hadoop.hive.ql.parse.HiveParser.createTableStatement(HiveParser.java:5259)
at org.apache.hadoop.hive.ql.parse.HiveParser.ddlStatement(HiveParser.java:2763)
at org.apache.hadoop.hive.ql.parse.HiveParser.execStatement(HiveParser.java:1756)
at org.apache.hadoop.hive.ql.parse.HiveParser.statement(HiveParser.java:1178)
at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:204)
at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:166)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:444)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1242)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1384)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1171)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1161)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:232)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
FAILED: ParseException line 13:23 missing > at ',' near 'struct' in column type
line 14:6 cannot recognize input near '<' 'Data' ':' in column type
看起来关闭嵌套结构或一个数组中的多个结构不会通过。
这里有什么可以纠正的地方?
创建表格stmt:
create table table_prd_in_general_events_test7(
uploadtime bigint,
pdid string,
data array<
struct<Data:struct<
unit:string,
value:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
heading:string,
Location:string,
latitude:bigint,
longitude:bigint,
Location2:string,
speed:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
unit:string,
value:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
x:int,
y:int,
z:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
heading:string,
isLastKnownLocation:string,
latitude:bigint,
longitude:bigint,
noLastKnownLocation:string,
speed:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>
>
)
ROW FORMAT SERDE
'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
LOCATION
'/xyz/abc/';
添加jar:
ADD JAR /home/hive/lib/hive-hcatalog-core-0.13.0.jar;