我有一个带有“periode”列的hive表,列的类型是字符串。
该列的值如下:
const filteredVariants =
[
{
name: "Test0",
images: [{name: "test0img1", shot_type: "swatch"}]
},
{
name: "Test1",
images: [
{name: "test1img1", shot_type: "product"},
{name: "test1img2", shot_type: "product"}
]
}
]
// compare an object to a given model.
function compare(object, model){
for(str in model)
if(object[str] != model[str])
return false;
return true;
}
// gets all 'images" properties, iterates over them and returns the first
// one that matches. returns null if nothing works
function findImage(array, model){
for(var i in filteredVariants){
var obj = filteredVariants[i],
images = obj.images;
for(var j in images){
if(compare(images[j], model))
return images[j];
}
}
return null;
}
console.log(findImage(filteredVariants, {shot_type: "product"}));
我想在[{periode:20160118-20160205,nb:1},{periode:20161130-20161130,nb:1},{periode:20161130-20161221,nb:1}]
[{periode:20161212-20161217,nb:0}]
中投放此列。
最终的目标是通过periode获得一个原始的。
为此,我想在柱周期上使用侧视图爆炸。
这就是我想将其转换为array<struct<periode:string, nb:int>>
感谢您的帮助。 思迪
答案 0 :(得分:1)
你不需要“强制转换”任何东西,你只需要爆炸数组然后解压缩结构。我为您的数据添加了一个索引,以便更清楚地了解最终结果。
数据强>:
idx arr_of_structs
0 [{periode:20160118-20160205,nb:1},{periode:20161130-20161130,nb:1},{periode:20161130-20161221,nb:1}]
1 [{periode:20161212-20161217,nb:0}]
<强>查询强>:
SELECT idx -- index
, my_struct.periode AS periode -- unpacks periode
, my_struct.nb AS nb -- unpacks nb
FROM database.table
LATERAL VIEW EXPLODE(arr_of_structs) exptbl AS my_struct
<强>输出强>:
idx periode nb
0 20160118-20160205 1
0 20161130-20161130 1
0 20161130-20161221 1
1 20161212-20161217 0
从你的问题中有点不清楚所需的结果是什么,但是一旦你更新它我会相应地修改查询。
上述解决方案不正确,我没有注意到您的输入是STRING
。
<强>查询强>:
SELECT REGEXP_EXTRACT(tmp_arr[0], "([0-9]{8}-[0-9]{8})") AS periode
, REGEXP_EXTRACT(tmp_arr[1], ":([0-9]*)") AS nb
FROM (
SELECT idx
, pos
, COLLECT_SET(tmp_col) AS tmp_arr
FROM (
SELECT idx
, tmp_col
, CASE WHEN PMOD(pos, 2) = 0 THEN pos+1 ELSE pos END AS pos
FROM (
SELECT *
, ROW_NUMBER() OVER () AS idx
FROM database.table ) x
LATERAL VIEW POSEXPLODE(SPLIT(periode, ',')) exptbl AS pos, tmp_col ) y
GROUP BY idx, pos) z
<强>输出强>:
periode nb
20160118-20160205 1
20161130-20161130 1
20161130-20161221 1
20161212-20161217 0
答案 1 :(得分:0)
如何使用分割功能?你应该能够做类似
的事情select nb, period from
(select split(periode, "-") as periods, nb from yourtable) t
LATERAL VIEW explode(periods) sss AS period;
我没试过,但应该可以工作:)
编辑:如果您在模式date-date-date ..和列nb之后有一个列periodes,则上述应该有效,但看起来并非如此。以下查询应该适合您(详细但工作)
select period, nb from (
select
regexp_replace(split(split(tok1,",")[1],":")[1], "[\\]|}]", "") as nb,
split(split(split(tok1,",")[0],":")[1],"-") as periods
from
(select split(YOURSTRINGCOLUMN, "},") as s1 from YOURTABLE)
r1 LATERAL VIEW explode(s1) ss1 AS tok1
) r2 LATERAL VIEW explode(periods) ss2 AS period;
答案 2 :(得分:0)
我意识到这个问题是1YO,但我遇到了同样的问题并使用json_split
brickhouse UDF解决了这个问题。
SELECT EXPLODE(
json_split(
'[{"periode":"20160118-20160205","nb":1},{"periode":"20161130-20161130","nb":1},{"periode":"20161130-20161221","nb":1}]'
));
col
{"periode":"20160118-20160205","nb":1}
{"periode":"20161130-20161130","nb":1}
{"periode":"20161130-20161221","nb":1}
抱歉意大利面条代码。
还有类似问题here使用JSON数组而不是JSON字符串。它不是同一个案例,但对于任何面临这种任务的人来说,它可能在更大的背景下有用。