使用hive的复杂struct数据类型,如何使用where子句编写查询

时间:2017-03-31 20:14:34

标签: struct types hive

我有以下具有复杂数据类型STRive的hive表。能否请你帮助用特定城市的where子句编写hive查询?

CREATE EXTERNAL TABLE user_t (
 name      STRING,
 id        BIGINT,
 isFTE     BOOLEAN,
 role      VARCHAR(64),
 salary    DECIMAL(8,2),
 phones    ARRAY<INT>,
 deductions MAP<STRING, FLOAT>,
 address   ARRAY<STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>>,
 others    UNIONTYPE<FLOAT,BOOLEAN,STRING>,
 misc      BINARY
 )

我可以在select子句中使用STRUCT数据类型,但不能在where子句中使用它。

工作:

select address.city from user_t; 

不工作:

select address.city from user_t where address.city = 'XYZ'

文档说它在使用group by或where子句时有限制,并提供了解决方案。但我没有清楚地理解它 链接:Documentation

请建议。谢谢。

1 个答案:

答案 0 :(得分:5)

演示

create table user_t 
(
    id        bigint
   ,address   array<struct<street:string, city:string, state:string, zip:int>>
)
;

insert into user_t 

    select  1
           ,array
            (
                named_struct('street','street_1','city','city_1','state','state_1','zip',11111)
               ,named_struct('street','street_2','city','city_1','state','state_1','zip',11111)
               ,named_struct('street','street_3','city','city_3','state','state_3','zip',33333)
            )

    union all

    select  2
           ,array
            (
                named_struct('street','street_4','city','city_4','state','state_4','zip',44444)
               ,named_struct('street','street_5','city','city_5','state','state_5','zip',55555)
            )
;

选项1:爆炸

select  u.id
       ,a.*

from    user_t as u
        lateral view explode(address) a as details

where   details.city = 'city_1'
;
+----+---------------------------------------------------------------------+
| id |                               details                               |
+----+---------------------------------------------------------------------+
|  1 | {"street":"street_1","city":"city_1","state":"state_1","zip":11111} |
|  1 | {"street":"street_2","city":"city_1","state":"state_1","zip":11111} |
+----+---------------------------------------------------------------------+

选项2:内联

select  u.id
       ,a.*

from    user_t as u
        lateral view inline(address) a

where   a.city = 'city_1'
;
+----+----------+--------+---------+-------+
| id |  street  |  city  |  state  |  zip  |
+----+----------+--------+---------+-------+
|  1 | street_1 | city_1 | state_1 | 11111 |
|  1 | street_2 | city_1 | state_1 | 11111 |
+----+----------+--------+---------+-------+

选项3:自我加入

select  u.*

from            user_t as u

        join   (select  distinct
                        u.id

                from    user_t as u
                        lateral view inline(address) a

                where   a.city = 'city_1'
                ) as u2

        on      u2.id = u.id
;
+----+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| id |                                                                                                    address                                                                                                    |
+----+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|  1 | [{"street":"street_1","city":"city_1","state":"state_1","zip":11111},{"street":"street_2","city":"city_1","state":"state_1","zip":11111},{"street":"street_3","city":"city_3","state":"state_3","zip":33333}] |
+----+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+