答案 0 :(得分:2)
以下是使用SQL UDF的解决方案。这应该更快,因为它避免了使用JavaScript v8沙箱的延迟:
CREATE TEMP FUNCTION ExpandList(input STRING) AS (
ARRAY(
-- Find the value before the *
SELECT SPLIT(elem, '*')[OFFSET(0)]
-- For each comma-separated element inside the braces
FROM UNNEST(REGEXP_EXTRACT_ALL(input, r'[^\[\],]+')) AS elem,
-- Repeated by the value after the *, or once if there is no *
UNNEST(GENERATE_ARRAY(1, IFNULL(CAST(SPLIT(elem, '*')[SAFE_OFFSET(1)] AS INT64), 1))))
);
WITH Input AS (
SELECT 1 AS id, '[5*2,8,6]' AS values UNION ALL
SELECT 2, '[5*2,0*3]' UNION ALL
SELECT 3, '[1,2,5,6]'
)
SELECT id, value
FROM Input,
UNNEST(ExpandList(values)) AS value;
答案 1 :(得分:1)
下面是一个带有TEMP function的SQL示例,演示了如何展平数组
CREATE TEMP FUNCTION flatten(input ARRAY<STRING>)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
let flatten = []
for (let j = 0; j < input.length; j++) {
if (input[j].indexOf('*') === -1) {
flatten.push(input[j])
} else {
let prefix = input[j].split('*')[1]
let value = input[j].split('*')[0]
for (let i = 0; i < prefix; i++) {
flatten.push(value)
}
}
}
return flatten
""";
WITH numbers AS
(SELECT ['5*2','8','6'] as value
)
SELECT flatten(value) as product
FROM numbers;
此请求的输出如下:
答案 2 :(得分:1)
以下是BigQuery标准SQL的另一个简单选项(基于REPEAT函数的使用)
#standardSQL
SELECT id, value
FROM `project.dataset.table` t,
UNNEST(SPLIT(REGEXP_REPLACE(t.values, r'\[|]', ''))) x,
UNNEST(SPLIT(REPEAT(
CONCAT(',', SPLIT(x, '*')[OFFSET(0)]),
IFNULL(CAST(SPLIT(x, '*')[SAFE_OFFSET(1)] AS INT64), 1)
))) value
WHERE value != ''
您可以使用问题中的示例数据来测试,玩游戏,如下例所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 AS id, '[5*2,8,6]' AS `values` UNION ALL
SELECT 2, '[5*2,0*3]' UNION ALL
SELECT 3, '[1*1,2,5,6]'
)
SELECT id, value
FROM `project.dataset.table` t,
UNNEST(SPLIT(REGEXP_REPLACE(t.values, r'\[|]', ''))) x,
UNNEST(SPLIT(REPEAT(
CONCAT(',', SPLIT(x, '*')[OFFSET(0)]),
IFNULL(CAST(SPLIT(x, '*')[SAFE_OFFSET(1)] AS INT64), 1)
))) value
WHERE value != ''
有结果
Row id value
1 1 5
2 1 5
3 1 8
4 1 6
5 2 5
6 2 5
7 2 0
8 2 0
9 2 0
10 3 1
11 3 2
12 3 5
13 3 6