Hive JSON SerDe - ClassCastException:java.lang.Integer无法强制转换为java.lang.Double

时间:2014-01-13 22:43:36

标签: java json hadoop hive cloudera

我正在尝试使用Hive JSON SerDe将Twitter JSON放入Hive表中。我首先将JSON导入到由ROW FORMAT SERDE定义的一个表中,然后将其导入到另一个存储为RCFile的表中。它可以达到一定程度,但后来我得到了以下性质的ClassCastException:

java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row [Error getting row data with exception java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.Double
    at org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaDoubleObjectInspector.get(JavaDoubleObjectInspector.java:40)
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:259)
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:307)
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
    at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:220)
    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:667)
    at org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:141)
    at org.apache.hadoop

这是我用来定义SerDe表的模式:

CREATE EXTERNAL TABLE gh_raw (
   coordinates struct <
      coordinates: array <double>,
      type: string>,
   created_at string,
   entities struct <
      hashtags: array <struct <text: string>>,
      media: array <struct <
            display_url: string,
            expanded_url: string,
            media_url: string,
            media_url_https: string,
            sizes: struct <
               large: struct <
                  h: int,
                  resize: string,
                  w: int>,
               medium: struct <
                  h: int,
                  resize: string,
                  w: int>,
               small: struct <
                  h: int,
                  resize: string,
                  w: int>,
               thumb: struct <
                  h: int,
                  resize: string,
                  w: int>>,
            type: string,
            url: string>>,
      urls: array <struct <
            display_url: string,
            expanded_url: string,
            url: string>>,
      user_mentions: array <struct <
            id: int,
            name: string,
            screen_name: string>>>,
   geo struct <
      coordinates: array <double>,
      type: string>,
   id_str string,
   in_reply_to_screen_name string,
   in_reply_to_status_id_str string,
   in_reply_to_user_id_str string,
   place struct <
      attributes: struct <
         locality: string,
         region: string,
         street_address: string>,
      bounding_box: struct <
         coordinates: array <array <array <double>>>,
         type: string>,
      country: string,
      country_code: string,
      full_name: string,
      name: string,
      place_type: string,
      url: string>,
   possibly_sensitive boolean,
   retweeted_status struct <
      coordinates: struct <
         coordinates: array <double>,
         type: string>,
      created_at: string,
      entities: struct <
         hashtags: array <struct <
               text: string>>,
         media: array <struct <
               display_url: string,
               expanded_url: string,
               media_url: string,
               media_url_https: string,
               sizes: struct <
                  large: struct <
                     h: int,
                     resize: string,
                     w: int>,
                  medium: struct <
                     h: int,
                     resize: string,
                     w: int>,
                  small: struct <
                     h: int,
                     resize: string,
                     w: int>,
                  thumb: struct <
                     h: int,
                     resize: string,
                     w: int>>,
               type: string,
               url: string>>,
         urls: array <struct <
               display_url: string,
               expanded_url: string,
               url: string>>,
         user_mentions: array <struct <
               id: int,
               name: string,
               screen_name: string>>>,
      favorited: boolean,
      geo: struct <
         coordinates: array <double>,
         type: string>,
      id_str: string,
      in_reply_to_screen_name: string,
      in_reply_to_status_id_str: string,
      in_reply_to_user_id_str: string,
      place: struct <
         attributes: struct <
         locality: string,
         region: string,
         street_address: string
         >,
         bounding_box: struct <
            coordinates: array <array <array <double>>>,
            type: string>,
         country: string,
         country_code: string,
         full_name: string,
         name: string,
         place_type: string,
         url: string>,
      possibly_sensitive: boolean,
      scopes: struct <
         followers: boolean>,
      source: string,
      text: string,
      truncated: boolean,
      user: struct <
         contributors_enabled: boolean,
         created_at: string,
         default_profile: boolean,
         default_profile_image: boolean,
         description: string,
         favourites_count: int,
         followers_count: int,
         friends_count: int,
         geo_enabled: boolean,
         id: int,
         id_str: string,
         is_translator: boolean,
         lang: string,
         listed_count: int,
         `location`: string,
         name: string,
         profile_background_color: string,
         profile_background_image_url: string,
         profile_background_image_url_https: string,
         profile_background_tile: boolean,
         profile_banner_url: string,
         profile_image_url: string,
         profile_image_url_https: string,
         profile_link_color: string,
         profile_sidebar_border_color: string,
         profile_sidebar_fill_color: string,
         profile_text_color: string,
         profile_use_background_image: boolean,
         protected: boolean,
         screen_name: string,
         statuses_count: int,
         time_zone: string,
         url: string,
         utc_offset: int,
         verified: boolean>>,
   source string,
   text string,
   truncated boolean,
   user struct <
      contributors_enabled: boolean,
      created_at: string,
      default_profile: boolean,
      default_profile_image: boolean,
      description: string,
      favourites_count: int,
      followers_count: int,
      friends_count: int,
      geo_enabled: boolean,
      id: int,
      id_str: string,
      is_translator: boolean,
      lang: string,
      listed_count: int,
      `location`: string,
      name: string,
      profile_background_color: string,
      profile_background_image_url: string,
      profile_background_image_url_https: string,
      profile_background_tile: boolean,
      profile_banner_url: string,
      profile_image_url: string,
      profile_image_url_https: string,
      profile_link_color: string,
      profile_sidebar_border_color: string,
      profile_sidebar_fill_color: string,
      profile_text_color: string,
      profile_use_background_image: boolean,
      protected: boolean,
      screen_name: string,
      statuses_count: int,
      time_zone: string,
      url: string,
      utc_offset: int,
      verified: boolean>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION '/user/ahanna/gh_raw';

我发现当找到一组坐标或一个边界框时,这会崩溃。

我认为这是我使用的JSON SerDe的一个错误,但我不确定。我已经从头开始编译了我正在使用的那个人,他说他们已经解决了这个问题,但没有去:https://github.com/brndnmtthws/Hive-JSON-Serde

2 个答案:

答案 0 :(得分:1)

试试这个SerDe - https://github.com/rcongiu/Hive-JSON-Serde。 在尝试从推文中读取坐标时,我得到了相同的异常。用它来修复它!

二进制文件可在此处使用,因此您无需构建它 - http://www.congiu.net/hive-json-serde/

答案 1 :(得分:0)

尝试使用bigint而不是int。它对我有用。