我正在尝试从大查询中的数据中提取2列。以下是我的查询:
SELECT user_id, ep FROM table_name limit 3
现在,event_params是一个嵌套列。它具有关键和价值。下面是数据的样子:
user_id ep.key ep.value.string_value ep.value.int_value
1 origin fcm null
2 origin fcm null
3 screen null 4
origin auto null
id null 97
大查询一些如何将列ep分为键和值(存储在字符串或int中)。我需要以下格式的数据:
user_id ep.key ep.value
1 origin fcm
2 origin fcm
3 screen 4
origin auto
id 97
答案 0 :(得分:2)
以下是用于BigQuery标准SQL
#standardSQL
SELECT user_id,
ARRAY(
SELECT AS STRUCT ep.key AS key,
COALESCE(ep.value.string_value, CAST(ep.value.int_value AS STRING)) AS value
FROM UNNEST(ep) ep
) ep
FROM `project.dataset.table_name`
您可以使用问题中的示例数据来进行测试,如上示例所示
#standardSQL
WITH `project.dataset.table_name` AS (
SELECT 1 user_id, [STRUCT<key STRING, value STRUCT<string_value STRING, int_value INT64>>('origin', STRUCT('fcm', NULL))] ep UNION ALL
SELECT 2 user_id, [STRUCT<key STRING, value STRUCT<string_value STRING, int_value INT64>>('origin', STRUCT('fcm', NULL))] ep UNION ALL
SELECT 3 user_id, [STRUCT<key STRING, value STRUCT<string_value STRING, int_value INT64>>('screen', STRUCT(NULL, 4)),
STRUCT('origin', STRUCT('auto', NULL)),
STRUCT('id', STRUCT(NULL, 97))
] ep
)
SELECT user_id,
ARRAY(
SELECT AS STRUCT ep.key AS key,
COALESCE(ep.value.string_value, CAST(ep.value.int_value AS STRING)) AS value
FROM UNNEST(ep) ep
) ep
FROM `project.dataset.table_name`
有结果
Row user_id ep.key ep.value
1 1 origin fcm
2 2 origin fcm
3 3 screen 4
origin auto
vid 97
如果您需要将所有具有相同user_id的行分组,则另一个选项很有用
#standardSQL
SELECT user_id,
ARRAY_AGG(STRUCT( ep.key AS key,
COALESCE(ep.value.string_value, CAST(ep.value.int_value AS STRING)) AS value
)) ep
FROM `project.dataset.table_name`, UNNEST(ep) ep
GROUP BY user_id
像下面的示例一样,示例数据中有额外的行
#standardSQL
WITH `project.dataset.table_name` AS (
SELECT 1 user_id, [STRUCT<key STRING, value STRUCT<string_value STRING, int_value INT64>>('origin', STRUCT('fcm', NULL))] ep UNION ALL
SELECT 1 user_id, [STRUCT<key STRING, value STRUCT<string_value STRING, int_value INT64>>('origin2', STRUCT('fcm2', NULL))] ep UNION ALL
SELECT 2 user_id, [STRUCT<key STRING, value STRUCT<string_value STRING, int_value INT64>>('origin', STRUCT('fcm', NULL))] ep UNION ALL
SELECT 3 user_id, [STRUCT<key STRING, value STRUCT<string_value STRING, int_value INT64>>('screen', STRUCT(NULL, 4)),
STRUCT('origin', STRUCT('auto', NULL)),
STRUCT('id', STRUCT(NULL, 97))
] ep
)
SELECT user_id,
ARRAY_AGG(STRUCT( ep.key AS key,
COALESCE(ep.value.string_value, CAST(ep.value.int_value AS STRING)) AS value
)) ep
FROM `project.dataset.table_name`, UNNEST(ep) ep
GROUP BY user_id
有结果
Row user_id ep.key ep.value
1 1 origin fcm
origin2 fcm2
2 2 origin fcm
3 3 screen 4
origin auto
id 97
如果您对相同的数据运行第一个选项,则会得到低于结果的结果
Row user_id ep.key ep.value
1 1 origin fcm
2 1 origin2 fcm2
3 2 origin fcm
4 3 screen 4
origin auto
id 97