使用BigQuery,如果行内容与其他行匹配,我想将多行合并为一行。例如
ColumnA | ColumnB
ABC | DEF
DEF | GHI
DEF | 123
XYZ | JKL
结果:
Column C
ABC, DEF, GHI, 123
答案 0 :(得分:0)
使用标准SQL for BigQuery,您可以使用STRING_AGG函数(单击here以获取有关该函数的更多信息),并将一列中的所有行聚合为一个字符串,其值以逗号分隔。 在您的情况下,您可以从columnA和columnB中选择不同的值,并将它们添加到一列中:
SELECT
DISTINCT columnA AS example
FROM (
SELECT
DISTINCT columnA
FROM
“TABLE_NAME”
UNION ALL
SELECT
DISTINCT columnB
FROM
“TABLE_NAME”)
之后您可以使用STRING_AGG将前一个选择中的不同值聚合为一行:
SELECT
STRING_AGG(example) AS columnC
FROM (
SELECT
DISTINCT A AS example
FROM (
SELECT
DISTINCT A
FROM
“TABLE_NAME”
UNION ALL
SELECT
DISTINCT columnB
FROM
“TABLE_NAME”))
答案 1 :(得分:0)
尝试使用BigQuery脚本
DECLARE rows_count, run_away_stop INT64 DEFAULT 0;
CREATE TEMP TABLE ttt AS WITH input AS (
SELECT 'ABC' ColumnA, 'DEF' ColumnB UNION ALL
SELECT 'DEF', 'GHI' UNION ALL
SELECT 'DEF', '123' UNION ALL
SELECT 'XYZ', 'JKL'
)
SELECT ARRAY(SELECT val FROM UNNEST([ColumnA, ColumnB]) val ORDER BY val ) arr FROM input;
LOOP
SET (run_away_stop, rows_count) = (SELECT AS STRUCT run_away_stop + 1, COUNT(1) FROM ttt);
CREATE OR REPLACE TEMP TABLE ttt AS
SELECT ANY_VALUE(arr) arr FROM (
SELECT ARRAY(SELECT DISTINCT val FROM UNNEST(arr) val ORDER BY val) arr
FROM (
SELECT ANY_VALUE(arr1) arr1, ARRAY_CONCAT_AGG(arr) arr
FROM (
SELECT t1.arr arr1, t2.arr arr2, ARRAY(SELECT DISTINCT val FROM UNNEST(ARRAY_CONCAT( t1.arr, t2.arr)) val ORDER BY val) arr
FROM ttt t1, ttt t2
WHERE (SELECT COUNT(1) FROM UNNEST(t1.arr) val JOIN UNNEST(t2.arr) val USING(val)) > 0
) GROUP BY FORMAT('%t', arr1)
)
) GROUP BY FORMAT('%t', arr);
IF (rows_count = (SELECT COUNT(1) FROM ttt) AND run_away_stop > 1) OR run_away_stop > 10 THEN BREAK; END IF;
END LOOP;
SELECT FORMAT('%t', arr) merged_values FROM ttt;
有输出
Row merged_values
1 [JKL, XYZ]
2 [123, ABC, DEF, GHI]