我有一个表,其列如
[{"key":"e","value":["253","203","204"]},{"key":"st","value":["mi"]},{"key":"k2","value":["1","2"]}]
哪种格式为array<struct<key:string,value:array<string>>>
我想将列转换为以下格式:
{"e":["253","203","204"],"st":["mi"],"k2":["1","2"]}
类型为map<string,array<string>>
我尝试分解数组,但是不起作用。关于如何在蜂巢中执行此操作的任何想法。
答案 0 :(得分:0)
不使用外部库是不可能的。请参考brickhouse或创建自己的UDAF。
注意:进一步的代码提供了摘要,以重现问题并解决Hive内置函数可以解决的问题。即map<string,string>
而非map<string, array<string>>
。
-- reproducing the problem
CREATE TABLE test_table(id INT, input ARRAY<STRUCT<key:STRING,value:ARRAY<STRING>>>);
INSERT INTO TABLE test_table
SELECT
1 AS id,
ARRAY(
named_struct("key","e", "value", ARRAY("253","203","204")),
named_struct("key","st", "value", ARRAY("mi")),
named_struct("key","k2", "value", ARRAY("1", "2"))
) AS input;
SELECT id, input FROM test_table;
+-----+-------------------------------------------------------------------------------------------------------+--+
| id | input |
+-----+-------------------------------------------------------------------------------------------------------+--+
| 1 | [{"key":"e","value":["253","203","204"]},{"key":"st","value":["mi"]},{"key":"k2","value":["1","2"]}] |
+-----+-------------------------------------------------------------------------------------------------------+--+
通过爆炸并使用STRUCT
功能,我们可以拆分键和值。
SELECT id, exploded_input.key, exploded_input.value
FROM (
SELECT id, exploded_input
FROM test_table LATERAL VIEW explode(input) d AS exploded_input
) x;
+-----+------+----------------------+--+
| id | key | value |
+-----+------+----------------------+--+
| 1 | e | ["253","203","204"] |
| 1 | st | ["mi"] |
| 1 | k2 | ["1","2"] |
+-----+------+----------------------+--+
该想法是在汇总id
时使用您的UDAF来“收集”地图。
Hive可以通过内置函数解决的问题是通过将行转换为带有特殊定界符的字符串来生成map<string,string>
,通过另一个特殊定界符对行进行聚合,并在定界符上使用str_to_map
内置函数来生成map<string, string>
。
SELECT
id,
str_to_map(
-- outputs: e:253,203,204#st:mi#k2:1,2 with delimiters between aggregated rows
concat_ws('#', collect_list(list_to_string)),
'#', -- first delimiter
':' -- second delimiter
) mapped_output
FROM (
SELECT
id,
-- outputs 3 rows: (e:253,203,203), (st:mi), (k2:1,2)
CONCAT(exploded_input.key,':' , CONCAT_WS(',', exploded_input.value)) as list_to_string
FROM (
SELECT id, exploded_input
FROM test_table LATERAL VIEW explode(input) d AS exploded_input
) x
) y
GROUP BY id;
哪个将字符串输出到字符串映射,例如:
+-----+-------------------------------------------+--+
| id | mapped_output |
+-----+-------------------------------------------+--+
| 1 | {"e":"253,203,204","st":"mi","k2":"1,2"} |
+-----+-------------------------------------------+--+
答案 1 :(得分:0)
with input_set as (
select array(named_struct('key','e','value',array('253','203','204')),named_struct('key','st','value',array('mi')),named_struct('key','k2','value',array('1','2'))) as input_array
), break_input_set as (
select y.col_num as y_col_num,y.col_value as y_col_value from input_set lateral view posexplode(input_set.input_array) y as col_num, col_value
), create_map as (
select map(y_col_value.key,y_col_value.value) as final_map from break_input_set
)
select * from create_map;
答案 2 :(得分:-1)
var Array = [{"key":"e","value":["253","203","204"]},{"key":"st","value":["mi"]},{"key":"k2","value":["1","2"]}];
var obj = {}
for(var i=0;i<Array.length;i++){
obj[Array[i].key] = Array[i].value
}
obj将采用必需的格式