取消包含json列表的列的嵌套

时间:2019-03-18 12:23:37

标签: json postgresql

我有一个表,其中包含一列,其中填充了json列表,例如以下列表:

 ID | json_col 
----+----------
 1  | [{"A":"foo11","B":"bar11"},{"A":"foo12","B":"bar12"}]
 2  | [{"A":"foo21","B":"bar21"}]

我希望对其进行“嵌套”以获得下表:

 ID | A       | B
----+---------+------
 1  | "foo11" | "bar11"
 1  | "foo12" | "bar12"
 2  | "foo21" | "bar21"

理想情况下,我会跟踪商品的订单,并且会得到类似的信息:

 ID | json_col_id | A       | B
----+-------------+---------+-------
 1  | 1           | "foo11" | "bar11"
 1  | 2           | "foo12" | "bar12"
 2  | 1           | "foo21" | "bar21"

在某些情况下,由于我有一些嵌套的json元素,因此情况更加复杂,因此在这些情况下,我的输入应为:

 ID | json_col 
----+----------
 1  | [{"A":"foo11", "B":[{"C":"bar111", "D":"baz111"},{"C":"bar112", "D":"baz112"}], {"A":"foo12","B":[{"C":"bar121", "D":"baz121"}]}]
 2  | [{"A":"foo21", "B":[{"C":"bar211", "D":"baz211"}]}]

我想要的输出是:

 ID | A       | C        | D
----+---------+----------+----------
 1  | "foo11" | "bar111" | "baz111"
 1  | "foo11" | "bar112" | "baz112"
 1  | "foo12" | "bar121" | "baz122"
 2  | "foo21" | "bar211" | "baz211"

在梦幻世界中:

 ID | json_col_id | B_id | A       | C        | D
----+-------------+----------------+----------+------------
 1  | 1           | 1    | "foo11" | "bar111" | "baz111"
 1  | 1           | 2    | "foo11" | "bar112" | "baz112"
 1  | 2           | 1    | "foo12" | "bar121" | "baz122"
 2  | 1           | 1    | "foo21" | "bar211" | "baz211"

我了解如何对嵌套列表使用 PostrgreSQL json::jscon_col -> "item" ->> "subitem" as my_new_col语法,但是我不知道如何解开此处显示的格式,其中列表具有常规格式,但具有不规则的长度。

我认为答案必须为somewhere there,但我无法弄清楚,也找不到关于SO的类似示例。

3 个答案:

答案 0 :(得分:1)

看看是否有帮助:

WITH X AS
(
    SELECT 
        JSON_ARRAY_ELEMENTS(cod_proj::JSON) AS jsonelement
    FROM temp.kmltests
)
SELECT 
    ROW_NUMBER() OVER (ORDER BY jsonelement->>'A'),
    jsonelement->'A' AS A,

    -- Use a CASE to determine if B is a scalar value or an array.
    -- If B is a scalar value, simply access jsonelement->'B'

    JSON_ARRAY_ELEMENTS(jsonelement->'B')->'C' AS C,
    JSON_ARRAY_ELEMENTS(jsonelement->'B')->'D' AS D
FROM X xx

答案 1 :(得分:1)

您可以使用json_populate_recordset函数。 https://www.postgresql.org/docs/11/functions-json.html 最好是用作侧面,必须先创建类型。

postgres=# CREATE TYPE x AS (a int, b int);
CREATE TYPE
postgres=# CREATE TABLE y(id int, c json);
CREATE TABLE
postgres=# INSERT INTO y VALUES(1,'[{"a":1},{"a":2,"b":3},{"b":4}]');
INSERT 0 1
postgres=# SELECT z.* FROM y, json_populate_recordset(NULL::x, c) z;
┌───┬───┐
│ a │ b │
╞═══╪═══╡
│ 1 │   │
│ 2 │ 3 │
│   │ 4 │
└───┴───┘
(3 rows)

postgres=#

=====编辑:====

桌上的例子

DROP TABLE IF EXISTS moodys_table;
BEGIN;
CREATE TABLE moodys_table ("ID" int, json_col json);
--INSERT INTO moodys_table VALUES (1, '[{"A":"foo11", "B":[{"C":"bar111", "D":"baz111"},{"C":"bar112", "D":"baz112"}],{"A":"foo12","B":[{"C":"bar121", "D":"baz121"}]}]'::json);
--missing braces in json
INSERT INTO moodys_table VALUES (1, '[{"A":"foo11", "B":[{"C":"bar111", "D":"baz111"},{"C":"bar112", "D":"baz112"}]},{"A":"foo12","B":[{"C":"bar121", "D":"baz121"}]}]'::json);
INSERT INTO moodys_table VALUES (2, '[{"A":"foo21", "B":[{"C":"bar211", "D":"baz211"}]}]'::json);

CREATE TYPE btype AS ("C" varchar, "D" varchar);
CREATE TYPE json_col_type AS ("A" text, "B" json); --btype[]);

SELECT * FROM moodys_table;

SELECT "ID", data."A", data2.*
, dense_rank() over(partition by "ID" ORDER BY "A") json_col_id
, dense_rank() over(partition by "ID", "A" ORDER BY "C", "D") b_id
--order in json array have no sense
FROM moodys_table
, json_populate_recordset(NULL::json_col_type, json_col) data
, json_populate_recordset(NULL::btype, "B") data2

;

ROLLBACK;

postgres=# \i /tmp/moodys.sql 
psql:/tmp/moodys.sql:1: NOTICE:  table "moodys_table" does not exist, skipping
DROP TABLE
BEGIN
CREATE TABLE
INSERT 0 1
INSERT 0 1
CREATE TYPE
CREATE TYPE
┌────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ ID │                                                             json_col                                                              │
╞════╪═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╡
│  1 │ [{"A":"foo11", "B":[{"C":"bar111", "D":"baz111"},{"C":"bar112", "D":"baz112"}]},{"A":"foo12","B":[{"C":"bar121", "D":"baz121"}]}] │
│  2 │ [{"A":"foo21", "B":[{"C":"bar211", "D":"baz211"}]}]                                                                               │
└────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
(2 rows)

┌────┬───────┬────────┬────────┬─────────────┬──────┐
│ ID │   A   │   C    │   D    │ json_col_id │ b_id │
╞════╪═══════╪════════╪════════╪═════════════╪══════╡
│  1 │ foo11 │ bar111 │ baz111 │           1 │    1 │
│  1 │ foo11 │ bar112 │ baz112 │           1 │    2 │
│  1 │ foo12 │ bar121 │ baz121 │           2 │    1 │
│  2 │ foo21 │ bar211 │ baz211 │           1 │    1 │
└────┴───────┴────────┴────────┴─────────────┴──────┘
(4 rows)

ROLLBACK

答案 2 :(得分:0)

修改@ diego-victor-de-jesus的答案以添加ID列并添加一些详细信息。

让我的表命名为tbl

  • json_array_elements()消耗一个json列表,重复其他列中的值,
  • ROW_NUMBER() OVER (PARTITION BY mycolumn)为我提供了组内的增量ID。

由于我需要2个不同的IDs,因此我需要重复此序列2次,一次剥离一层。

SELECT 
ID, json_col_id,
ROW_NUMBER() OVER (PARTITION BY json_col_id) as B_id,
A,
B ->> 'C' AS C,
B ->> 'D' AS D
FROM
(SELECT 
ID, json_col_id, A,
JSON_ARRAY_ELEMENTS(B::JSON) AS B
FROM
(SELECT 
ID,
ROW_NUMBER() OVER (PARTITION BY ID) as json_col_id,
json_col ->> 'A' AS A,
json_col -> 'B' AS B
FROM 
(SELECT 
index,
JSON_ARRAY_ELEMENTS(json_col::JSON) AS json_col
FROM tbl
) AS expanded_json_col) AS extracted_json_col) as expanded_B"