在处理空列值

时间:2016-06-23 17:30:49

标签: arrays postgresql join null

鉴于以下起始数据:

CREATE TABLE t1 AS
  SELECT generate_series(1, 20) AS id,
    (SELECT array_agg(generate_series) FROM generate_series(1, 6)) as array_1;

CREATE TABLE t2 AS
  SELECT generate_series(5, 10) AS id,
    (SELECT array_agg(generate_series) FROM generate_series(7, 10)) as array_2;

CREATE TABLE t3 AS
  SELECT generate_series(8, 15) AS id,
    (SELECT array_agg(generate_series) FROM generate_series(11, 15)) as array_3;

我想在几个表之间进行外连接,每个表都有一个固定长度的数组列,在给定的表中是统一的,但可能因表而异(如上例所示),连接数组列在每个表中放入一个大型数组列。我想知道是否有一种有效或直接的方法来在新的组合列中维护一致的索引,将NULL列值(由外连接引起)替换为NULL值的数组,以便最终数组列将具有统一的长度。与上面的例子不同,在我的实际用例中,我不知道每个表的数组列先验的长度,只是它在整个表中的长度是统一的。换句话说,而不是这个查询:

SELECT id, (array_1 || array_2 || array_3 ) AS combined_array FROM
t1 LEFT OUTER JOIN t2 USING(id) LEFT OUTER JOIN t3 USING (id);

产生:

id |            combined_array
----+---------------------------------------
 1 | {1,2,3,4,5,6}
 2 | {1,2,3,4,5,6}
 3 | {1,2,3,4,5,6}
 4 | {1,2,3,4,5,6}
 5 | {1,2,3,4,5,6,7,8,9,10}
 6 | {1,2,3,4,5,6,7,8,9,10}
 7 | {1,2,3,4,5,6,7,8,9,10}
 8 | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}
 9 | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}
10 | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}
11 | {1,2,3,4,5,6,11,12,13,14,15}
12 | {1,2,3,4,5,6,11,12,13,14,15}
13 | {1,2,3,4,5,6,11,12,13,14,15}
14 | {1,2,3,4,5,6,11,12,13,14,15}
15 | {1,2,3,4,5,6,11,12,13,14,15}
16 | {1,2,3,4,5,6}
17 | {1,2,3,4,5,6}
18 | {1,2,3,4,5,6}
19 | {1,2,3,4,5,6}
20 | {1,2,3,4,5,6}
(20 rows)

我希望结果如下:

id |            combined_array
----+---------------------------------------
 1 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
 2 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
 3 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
 4 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
 5 | {1,2,3,4,5,6,7,8,9,10,NULL,NULL,NULL,NULL,NULL}
 6 | {1,2,3,4,5,6,7,8,9,10,NULL,NULL,NULL,NULL,NULL}
 7 | {1,2,3,4,5,6,7,8,9,10,NULL,NULL,NULL,NULL,NULL}
 8 | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}
 9 | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}
10 | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}
11 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,11,12,13,14,15}
12 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,11,12,13,14,15}
13 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,11,12,13,14,15}
14 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,11,12,13,14,15}
15 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,11,12,13,14,15}
16 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
17 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
18 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
19 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
20 | {1,2,3,4,5,6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
(20 rows)

这样每行包含一个长度为15的数组。

1 个答案:

答案 0 :(得分:1)

要回答我自己的问题,这是我提出的查询似乎可以完成的工作。它对我来说似乎并不特别优雅或高效,因此绝对仍然可以接受其他答案。

SELECT id, (
  coalesce(array_1, array_fill(NULL::INT,
    ARRAY[(SELECT max(array_length(array_1, 1)) FROM t1)])) ||
  coalesce(array_2, array_fill(NULL::INT,
    ARRAY[(SELECT max(array_length(array_2, 1)) FROM t2)])) ||
  coalesce(array_3, array_fill(NULL::INT,
    ARRAY[(SELECT max(array_length(array_3, 1)) FROM t3)]))
) AS combined_array FROM
t1 LEFT OUTER JOIN t2 USING(id) LEFT OUTER JOIN t3 USING (id);