BigQuery:如果行内容相同,则多个SQL行合并为单行

时间:2018-02-12 21:08:43

标签: sql google-bigquery

使用BigQuery,如果行内容与其他行匹配,我想将多行合并为一行。例如

ColumnA  | ColumnB
 ABC      | DEF
 DEF      | GHI
 DEF      | 123
 XYZ      | JKL 

结果:

 Column C
ABC, DEF, GHI, 123

2 个答案:

答案 0 :(得分:0)

使用标准SQL for BigQuery,您可以使用STRING_AGG函数(单击here以获取有关该函数的更多信息),并将一列中的所有行聚合为一个字符串,其值以逗号分隔。 在您的情况下,您可以从columnA和columnB中选择不同的值,并将它们添加到一列中:

SELECT
  DISTINCT columnA AS example
FROM (
  SELECT
    DISTINCT columnA
  FROM
    “TABLE_NAME”
  UNION ALL
  SELECT
    DISTINCT columnB
  FROM
    “TABLE_NAME”)

之后您可以使用STRING_AGG将前一个选择中的不同值聚合为一行:

SELECT
  STRING_AGG(example) AS columnC
FROM (
  SELECT
    DISTINCT A AS example
  FROM (
    SELECT
      DISTINCT A
    FROM
      “TABLE_NAME”
    UNION ALL
    SELECT
      DISTINCT columnB
    FROM
      “TABLE_NAME”))

答案 1 :(得分:0)

尝试使用BigQuery脚本

DECLARE rows_count, run_away_stop INT64 DEFAULT 0;

CREATE TEMP TABLE ttt AS WITH input AS (
  SELECT 'ABC' ColumnA, 'DEF' ColumnB UNION ALL
  SELECT 'DEF', 'GHI' UNION ALL
  SELECT 'DEF', '123' UNION ALL
  SELECT 'XYZ', 'JKL' 
)
SELECT ARRAY(SELECT val FROM UNNEST([ColumnA, ColumnB]) val ORDER BY val ) arr FROM input;

LOOP
  SET (run_away_stop, rows_count) = (SELECT AS STRUCT run_away_stop + 1, COUNT(1) FROM ttt);

  CREATE OR REPLACE TEMP TABLE ttt AS
  SELECT ANY_VALUE(arr) arr FROM (
    SELECT ARRAY(SELECT DISTINCT val FROM UNNEST(arr) val ORDER BY val) arr
    FROM (
      SELECT ANY_VALUE(arr1) arr1, ARRAY_CONCAT_AGG(arr) arr    
      FROM (
        SELECT t1.arr arr1, t2.arr arr2, ARRAY(SELECT DISTINCT val FROM UNNEST(ARRAY_CONCAT( t1.arr, t2.arr)) val ORDER BY val) arr 
        FROM ttt t1, ttt t2 
        WHERE (SELECT COUNT(1) FROM UNNEST(t1.arr) val JOIN UNNEST(t2.arr) val USING(val)) > 0
      ) GROUP BY FORMAT('%t', arr1)
    )
  ) GROUP BY FORMAT('%t', arr);

  IF (rows_count = (SELECT COUNT(1) FROM ttt) AND run_away_stop > 1) OR run_away_stop > 10 THEN BREAK; END IF;
END LOOP;

SELECT FORMAT('%t', arr) merged_values FROM ttt;    

有输出

Row merged_values 
1   [JKL, XYZ]   
2   [123, ABC, DEF, GHI]