首先,我是Hive的新手。
我通过apache flume获取了twitter数据。
{
"filter_level": "low",
"retweeted": false,
"in_reply_to_screen_name": null,
"possibly_sensitive": false,
"truncated": false,
"lang": "en",
"in_reply_to_status_id_str": null,
"id": 756378998838530048,
"in_reply_to_user_id_str": null,
"timestamp_ms": "1469169780822",
"in_reply_to_status_id": null,
"created_at": "Fri Jul 22 06:43:00 +0000 2016",
"favorite_count": 0,
"place": null,
"coordinates": null,
"text": "#furry pokemon sex mermaid sex position",
"contributors": null,
"geo": null,
"entities": {
"symbols": [],
"urls": [{
"expanded_url": "http://14.gerase.tk",
"indices": [40, 63],
"display_url": "14.gerase.tk",
"url": ""
}],
"hashtags": [{
"text": "furry",
"indices": [0, 6]
}],
"user_mentions": []
},
"is_quote_status": false,
"source": "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client<\/a>",
"favorited": false,
"in_reply_to_user_id": null,
"retweet_count": 0,
"id_str": "756378998838530048",
"user": {
"location": null,
"default_profile": true,
"statuses_count": 3436,
"profile_background_tile": false,
"lang": "ru",
"profile_link_color": "2B7BB9",
"id": 752318303280955392,
"following": null,
"favourites_count": 0,
"protected": false,
"profile_text_color": "333333",
"verified": false,
"description": null,
"contributors_enabled": false,
"profile_sidebar_border_color": "C0DEED",
"name": "Мария Виноградова",
"profile_background_color": "F5F8FA",
"created_at": "Mon Jul 11 01:47:15 +0000 2016",
"default_profile_image": false,
"followers_count": 5,
"profile_image_url_https": "https://pbs.twimg.com/profile_images/753398763201425408/X_2mAGt1_normal.jpg",
"geo_enabled": false,
"profile_background_image_url": "",
"profile_background_image_url_https": "",
"follow_request_sent": null,
"url": null,
"utc_offset": null,
"time_zone": null,
"notifications": null,
"profile_use_background_image": true,
"friends_count": 21,
"profile_sidebar_fill_color": "DDEEF6",
"screen_name": "afinafedorova2",
"id_str": "752318303280955392",
"profile_image_url": "http://pbs.twimg.com/profile_images/753398763201425408/X_2mAGt1_normal.jpg",
"listed_count": 5,
"is_translator": false
}
}
我正在使用https://github.com/cloudera/cdh-twitter-example中提到的HiveJSONSerDe 这是我试图运行的Hive查询。
create external table twitterdata(
filter_level string,
retweeted boolean,
in_reply_to_screen_name string,
possibly_sensitive boolean,
trauncated boolean,
lang string,
in_reply_to_status_id_str string,
id bigint,
in_reply_to_user_id_str string,
timestamp_ms string,
in_reply_to_status_id int,
created_at string,
favourite_count int,
place string,
coordinates string,
text string,
contributors string,
geo string,
entities STRUCT<
urls:ARRAY<STRUCT<expanded_url:STRING,url:STRING>>,
hashtags:ARRAY<STRUCT<text:STRING>>,
user_mentions:ARRAY<STRUCT<screen_name:STRING,name:STRING>>>,
is_quote_status boolean,
source string,
favotited boolean,
in_reply_to_user_id int,
retweet_count int,
id_str string,
user STRUCT<
location:STRING,
default_profile:boolean,
statuses_count:int,
profile_background_tile:boolean,
lang: string,
profile_link_color: string,
id: bigint,
following: string,
protected: boolean,
profile_text_color: string,
verified: boolean,
description: string,
contributors_enabled: boolean,
name: string,
created_at: string,
default_profile_image: boolean,
followers_count: int,
profile_image_url_https: string,
geo_enabled: boolean
url: string,
time_zone: string,
friends_count: int,
screen_name: string,
id_str: string,
listed_count: int,
is_translator: boolean>
)
ROW FORMAT SERDE 'com.cloudera.hive.serde.JSONSerDe'
STORED AS TEXTFILE
LOCATION '/Twitter/Pokemon/';
但它显示有关解析JSON脚本的错误。
FAILED: Parse Error: line 31:2 mismatched input 'location' expecting identifier in column specification
我无法找到创建表查询中的错误。请帮助。
答案 0 :(得分:0)
您可能需要查看数据文件中的该行。您的JSON文件最有可能是完整的,或者与模式不匹配。
如果您希望不断将数据加载到配置单元表,您可能还需要查看StreamSets。您可以使用它来捕获JSON数据,自动转换为Avro并填充和更新配置单元表。
完全披露,我是该项目的提交者。