我正在尝试解析一个json,它包含BigQuery中的字段数组,如下所示:
SELECT
ARRAY(
SELECT
STRUCT( CAST(JSON_EXTRACT_SCALAR(split_items,
'$.index') AS int64) AS index,
TIMESTAMP_MILLIS(CAST(JSON_EXTRACT_SCALAR(split_items,
'$.startTime') AS int64)) AS startTime)
FROM (
SELECT
CONCAT('{', REGEXP_REPLACE(split_items, r'^"\[{|}\]"$', ''), '}') AS split_items
FROM
UNNEST(SPLIT(json, '},{')) AS split_items )) AS split_items
FROM (
SELECT
json
FROM
dataset:table)
示例json如下:
"[{\"index\":0,\"startTime\":1524607204000},{\"index\":1,\"startTime\":1524607204000},{\"index\":2,\"startTime\":1524607204000}]"
index
和startTime
都是空的。这个查询有什么问题吗?
答案 0 :(得分:1)
下面应该有效
#standardSQL
SELECT
ARRAY(
SELECT
STRUCT(
CAST(JSON_EXTRACT_SCALAR(split_items, '$.index') AS int64) AS index,
TIMESTAMP_MILLIS(CAST(JSON_EXTRACT_SCALAR(split_items, '$.startTime') AS int64)) AS startTime
)
FROM (
SELECT CONCAT('{', REGEXP_REPLACE(split_items, r'\[{|}\]', ''), '}') AS split_items
FROM UNNEST(SPLIT(json, '},{')) AS split_items
)
) AS split_items
FROM `dataset.table`
“修复”位于以下行
REGEXP_REPLACE(split_items, r'\[{|}\]', '')
您可以使用json的示例进行测试/播放,如下所示
#standardSQL
WITH `dataset.table` AS (
SELECT '[{"index":0,"startTime":1524607204000},{"index":1,"startTime":1524607204000},{"index":2,"startTime":1524607204000}]' AS json
)
SELECT
ARRAY(
SELECT
STRUCT(
CAST(JSON_EXTRACT_SCALAR(split_items, '$.index') AS int64) AS index,
TIMESTAMP_MILLIS(CAST(JSON_EXTRACT_SCALAR(split_items, '$.startTime') AS int64)) AS startTime
)
FROM (
SELECT CONCAT('{', REGEXP_REPLACE(split_items, r'\[{|}\]', ''), '}') AS split_items
FROM UNNEST(SPLIT(json, '},{')) AS split_items
)
) AS split_items
FROM `dataset.table`
结果为
Row split_items.index split_items.startTime
1 0 2018-04-24 22:00:04.000 UTC
1 2018-04-24 22:00:04.000 UTC
2 2018-04-24 22:00:04.000 UTC