用于合并行的SQL

时间:2011-11-23 19:26:25

标签: sql postgresql plpgsql

更新

如何从订购时的表格中选择行

  • 第一个元素匹配某行
  • 第二个元素与下一行匹配
  • 第三行
  • 之后的第二行
  • 第四行之后的第四行
  • 依此类推,直到数组中的值结束?

逻辑

假设我将这些行作为查询结果(表token保留idword,表positioning保留id和{{1} }):

position

我可以在表格中使用不同的文本块,句子和位置进行传播。

我想改变这个:

 id | word | textblockid |sentence |position 
 5  | Fear |      5      |    1    |    1
 8  | of   |      5      |    1    |    2
 6  | the  |      5      |    1    |    3
 7  | Dark |      5      |    1    |    4
 9  | is   |      5      |    1    |    5

我正在做一个接收带有要合并的ID的数组的函数,类似于 id | word | textblockid | sentence |position 10 | Fear of the Dark | 5 | 1 | 1 9 | is | 5 | 1 | 2

我在表merge_tokens('{5,8,6,7}')中插入新单词Fear of the Dark并获取生成的ID(例如,tokenid)。这很容易。

问题

我需要将第一个单词的10(在本例中为id)更新为Fear并删除下一个单词(10,{{1} },of)。

我怀疑我是如何执行这些操作的。我想我需要来自一个有序表的the,其中第一行id匹配id数组中的第一个元素,第二行id匹配第二个元素id数组,依此类推,然后更新第一个元素并删除下一个。

我无法删除只删除ID中的其他行,因为换句话说就是使用它们。我只删除前一个为Dark的{​​{1}},下一个为SELECT,下一个为of。遵循此规则,我只能删除前一个为Fear的{​​{1}},之前为of,下一个为Dark

例如,我可以在同一个表中使用不会受到影响的内容:

the

5 个答案:

答案 0 :(得分:1)

最好在一次交易中这样做:

UPDATE token
SET    word = (
    SELECT string_agg(word, ' '  ORDER BY position)
    FROM   token
    WHERE  id = ANY('{5,8,6,7}'::int[])
    )
      ,id = nextval('token_id_seq')
WHERE  id = ('{5,8,6,7}'::int[])[1];

DELETE FROM token
WHERE  id = ANY('{5,8,6,7}'::int[])
AND    id <> ('{5,8,6,7}'::int[])[1];

'{5,8,6,7}'::int[]替换为整数数组参数 我从我认为存在的序列中得到了新的id 我进一步假设数组中的排序与按位置排序一致。替代版本如下。
要更新的id是数组的第一个元素。

单词的排序可以在聚合函数内完成(自PostgreSQL 9.0起)。阅读about that in the manual


回答其他问题

根据数组元素的顺序对所选行进行排序:

SELECT rn, t.*
FROM   (
    SELECT id
          ,row_number() OVER () AS rn
    FROM (SELECT unnest('{5,8,6,7}'::int[]) id) x
    )  x
JOIN   token t USING (id)
ORDER  BY rn;

或者......用不同的技术做同样的事情,也适用于旧版本的Postgres:

SELECT rn, t.*
FROM   (
    SELECT rn
          ,a[rn] AS id
    FROM (SELECT '{5,8,6,7}'::int[] AS a
                ,generate_series(1, array_upper('{5,8,6,7}'::int[], 1)) rn) x
    )  x
JOIN   token t USING (id)
ORDER  BY rn;

组合

在UPDATE语句中使用它:

UPDATE token
SET    word = (
    SELECT string_agg(word, ' '  ORDER BY rn)
    FROM   (
    SELECT rn
          ,a[rn] AS id
    FROM  (
           SELECT '{5,8,6,7}'::int[] AS a
                 ,generate_series(1, array_upper('{5,8,6,7}'::int[], 1)) rn) x
          ) x
    JOIN   token t USING (id)
    )
      ,id = nextval('token_id_seq')
WHERE  id = ('{5,8,6,7}'::int[])[1];

答案 1 :(得分:1)

此片段不使用数组。 (我不喜欢数组)

set search_path='tmp';

DROP TABLE wordlist;
CREATE TABLE wordlist
    ( id INTEGER NOT NULL PRIMARY KEY
    , word varchar
    , textblockid INTEGER NOT NULL
    , sentence INTEGER NOT NULL
    , postion INTEGER NOT NULL
    , UNIQUE (textblockid,sentence,postion)
    );

INSERT INTO wordlist(id,word,textblockid,sentence,postion) VALUES
 (5 , 'Fear', 5 , 1 , 1 )
,(8 , 'of', 5 , 1 , 2 )
,(6 , 'the', 5 , 1 , 3 )
,(7 , 'Dark', 5 , 1 , 4 )
,(9 , 'is', 5 , 1 , 5 )
    ;

WITH RECURSIVE meuk AS (
    SELECT 0 AS lev
        , id,word AS words
        , textblockid,sentence,postion AS lastpos
    FROM wordlist
    UNION
    SELECT 1+ mk.lev AS lev
        , wl.id
        , mk.words || ' '::text || wl.word AS words
        , wl.textblockid,wl.sentence
        , wl.postion AS lastpos
    FROM meuk mk
    JOIN wordlist wl ON (wl.textblockid = mk.textblockid
        AND wl.sentence = mk.sentence
        AND wl.postion = mk.lastpos+1)
    )
SELECT * FROM meuk
WHERE lev = 3
    ;

结果:

SET
DROP TABLE
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "wordlist_pkey" for table "wordlist"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "wordlist_textblockid_sentence_postion_key" for table "wordlist"
CREATE TABLE
INSERT 0 5
 lev | id |      words       | textblockid | sentence | lastpos 
-----+----+------------------+-------------+----------+---------
   3 |  7 | Fear of the Dark |           5 |        1 |       4
   3 |  9 | of the Dark is   |           5 |        1 |       5
(2 rows)

答案 2 :(得分:1)

在回答了你最近的大部分问题后,我对你正在做的事情有一个模糊的概念。所以我仔细研究了你的解决方案并进行了相当的优化。大多数情况下我简化了代码,但也有一些实质性的改进。

有些观点:

  • 不要在plpgsql中使用未记录的赋值运算符=。请改用:=。见related question for more info
  • 为什么LOOP BEGIN?如果您不需要,单独的代码块只会减慢速度。删除了它。
  • 更多,我添加了一些评论

请仔细查看代码并提供一些提示 测试两个版本以查看哪个版本执行得更快。

供您考虑:

CREATE OR REPLACE FUNCTION merge_tokens(words varchar[], separator varchar)
  RETURNS VOID AS
$body$
DECLARE         
    r              record;
    current_id     integer;
    ids            integer[];
    generated_word varchar :=  '';  -- you can initialize variables at declaration time. Saves additional assignment.

BEGIN
    -- get the ids and generate the word
    RAISE NOTICE 'Getting ids and generating words';
    generated_word := array_to_string(words, separator);  -- 1 assignment is much cheaper. Also: no trim() needed.
    ids := ARRAY
    (  SELECT t.id
       FROM  (
          SELECT row_number() OVER () AS rn, text
          FROM  (SELECT unnest(words) AS text) x) y
          JOIN   token t USING (text)
       ORDER  BY rn);
    RAISE NOTICE 'Generated word: %', generated_word;

    -- check if the don't exists to insert it
    SELECT INTO current_id  t.id FROM token t WHERE t.text = generated_word; 
    IF NOT FOUND THEN
        RAISE NOTICE 'Word don''t exists';
        INSERT INTO token(text) VALUES(generated_word)
        RETURNING id
        INTO current_id;  --get the last value without additional query.
    END IF;
    RAISE NOTICE 'Word id: %', current_id;

    -- select the records that will be updated
    RAISE NOTICE 'Getting words to be updated.';
    FOR r IN
        SELECT textblockid, sentence, position, tokenid, rn
        FROM
        ( -- select the rows that are complete
          SELECT textblockid, sentence, position, tokenid, rn, count(*) OVER (PARTITION BY grp) AS counting
          FROM
          ( -- match source with lookup table
                SELECT source.textblockid, source.sentence, source.position, source.tokenid, source.rn, source.grp
                FROM
                (   -- select textblocks where words appears with row number to matching
                     SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid, grp
                                           ,CASE WHEN grp > 0 THEN
                                            row_number() OVER (PARTITION BY grp ORDER BY tb.textblockid, tb.sentence, tb.position)
                                            END AS rn               
                     FROM
                     (   -- create the groups to be used in partition by to generate the row numbers
                          SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid
                                ,SUM(CASE WHEN tb.tokenid = ids[1] THEN 1 ELSE 0 END) OVER (ORDER BY tb.textblockid, tb.sentence, tb.position) AS grp
                          FROM  textblockhastoken tb
                          JOIN
                          (   --select the textblocks where the word appears
                                SELECT textblockid, sentence
                                FROM   textblockhastoken tb
                                WHERE  tb.tokenid = ids[1]
                          ) res USING (textblockid, sentence)
                     ) tb
                ) source
                -- create the lookup table to match positions
                JOIN (SELECT row_number() OVER () as rn, id FROM unnest(ids) AS id) lookup USING (rn)
                WHERE source.tokenid = lookup.id
          ) merged
        ) g  
        WHERE g.counting = array_length(ids,1)
        ORDER BY g.rn --order by row number to update first, delete and change positions after
    LOOP
        --check if update or delete
        IF (r.rn = 1) THEN
            RAISE NOTICE 'Updating word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
            UPDATE textblockhastoken tb SET tokenid = current_id
            WHERE (tb.textblockid, tb.sentence, tb.position)
                = ( r.textblockid,  r.sentence,  r.position);
        ELSE
            RAISE NOTICE 'Deleting word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
            DELETE FROM textblockhastoken tb
            WHERE (tb.textblockid, tb.sentence, tb.position)
                = ( r.textblockid,  r.sentence,  r.position);
        END IF;
        --check if is the last word to update the positions
        IF (r.rn = array_length(ids,1)) THEN
            RAISE NOTICE 'Changing positions in T:% S:%', r.textblockid, r.sentence;
            UPDATE textblockhastoken tb SET position = new_position
            FROM
            (   SELECT textblockid, sentence, position
                      ,row_number() OVER (PARTITION BY tb.textblockid, tb.sentence ORDER BY tb.position) as new_position
                FROM   textblockhastoken tb
                WHERE  tb.textblockid = r.textblockid AND tb.sentence = r.sentence
            ) np
            WHERE (tb.textblockid, tb.sentence, tb.position)
                = (np.textblockid, np.sentence, np.position)
            AND    tb.position <> np.new_position;
        END IF;
    END LOOP;
END;
$body$ LANGUAGE plpgsql;

答案 3 :(得分:0)

这可以作为merge_tokens函数的一部分吗?好像你可以让这个函数跟踪哪些记录需要更新/删除,只需基于提供的数组(第一个元素更新,其余删除)。

答案 4 :(得分:0)

这个答案适用于我的具体情况。我不知道是否是最好的方式,但对我有用。

我使用以下问题的答案构建此过程:Is possible have different conditions for each row in a query?How create a WINDOW in PostgreSQL until the same value appears again?

FOREARCH仅适用于PostgreSQL 9.1。

CREATE OR REPLACE FUNCTION merge_tokens(words VARCHAR[], separator VARCHAR)
RETURNS VOID
AS $$
DECLARE         
    r RECORD;
    current_id INTEGER;
    current_word VARCHAR;       
    ids INTEGER[];
    generated_word VARCHAR;

BEGIN       
    -- get the ids and generate the word
    RAISE NOTICE 'Getting ids and generating words';
    generated_word = '';
    FOREACH current_word IN ARRAY words
    LOOP BEGIN                      
        generated_word = generated_word || current_word;
        generated_word = generated_word || separator;
        SELECT t.id INTO current_id FROM token t WHERE t.text = current_word;
        ids = ids || current_id;
    END;
    END LOOP;

    -- remove lead and ending spacing in word
    RAISE NOTICE 'Generated word: %', generated_word;
    generated_word = TRIM(generated_word);

    -- check if the don't exists to insert it
    SELECT t.id INTO current_id FROM token t WHERE t.text = generated_word; 
    IF (current_id IS NULL) THEN
        RAISE NOTICE 'Word don''t exists';
        INSERT INTO token(id,text) VALUES(nextval('tokenidsqc'),generated_word);
        current_id = lastval(); --get the last value from the sequence      
    END IF;
    RAISE NOTICE 'Word id: %', current_id;

    -- select the records that will be updated
    RAISE NOTICE 'Getting words to be updated.';
    FOR r IN SELECT grouping.textblockid, grouping.sentence, grouping.position, grouping.tokenid, grouping.row_number
    FROM
    (
        -- select the rows that are complete
        SELECT merged.textblockid, merged.sentence, merged.position, merged.tokenid,merged.row_number,count(*) OVER w as counting           
        FROM
        (
            -- match source with lookup table
            SELECT source.textblockid, source.sentence, source.position, source.tokenid,source.row_number, source.grp
            FROM
            (   -- select textblocks where words appears with row number to matching
                SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid, grp,
                    CASE WHEN grp > 0 THEN
                        row_number() OVER (PARTITION BY grp ORDER BY tb.textblockid,tb.sentence,tb.position)
                    END AS row_number               
                FROM
                (   -- create the groups to be used in partition by to generate the row numbers
                    SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid,
                        SUM(CASE WHEN tb.tokenid = ids[1] THEN 1 ELSE 0 END) OVER (ORDER BY tb.textblockid,tb.sentence,tb.position) AS grp
                    FROM textblockhastoken tb,
                    (   --select the textblocks where the word appears
                        SELECT textblockid, sentence
                        FROM textblockhastoken tb
                        WHERE tb.tokenid = ids[1]
                    )res
                    WHERE tb.textblockid = res.textblockid
                    AND tb.sentence = res.sentence                      
                )tb
            )source,
            -- create the lookup table to match positions
            (
                SELECT row_number() OVER () as row_number,id FROM unnest(ids::INTEGER[]) as id
            )lookup
            WHERE source.tokenid = lookup.id
            AND source.row_number = lookup.row_number
        )merged
        WINDOW w AS (PARTITION BY grp)
    ) grouping      
    WHERE grouping.counting = array_length(ids,1)
    ORDER BY grouping.row_number --order by row number to update first, delete and change positions after
    -- end of query and start of iterations actions
    LOOP BEGIN
        --check if update or delete
        IF (r.row_number = 1) THEN
            RAISE NOTICE 'Updating word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
            UPDATE textblockhastoken tb SET tokenid = current_id
            WHERE tb.textblockid = r.textblockid 
            AND tb.sentence = r.sentence
            AND tb.position = r.position;
        ELSE
            RAISE NOTICE 'Deleting word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
            DELETE FROM textblockhastoken tb
            WHERE tb.textblockid = r.textblockid 
            AND tb.sentence = r.sentence
            AND tb.position = r.position;
        END IF;
        --check if is the last word to update the positions
        IF (r.row_number = array_length(ids,1)) THEN
            RAISE NOTICE 'Changing positions in T:% S:%', r.textblockid, r.sentence;
            UPDATE textblockhastoken tb SET position = new_position
            FROM
            (   
                SELECT textblockid, sentence, position, row_number() OVER w as new_position
                FROM textblockhastoken tb
                WHERE tb.textblockid = r.textblockid AND tb.sentence = r.sentence
                WINDOW w AS (PARTITION BY tb.textblockid, tb.sentence ORDER BY tb.position)             
            )new_positioning                
            WHERE tb.textblockid = new_positioning.textblockid 
            AND tb.sentence = new_positioning.sentence
            AND tb.position = new_positioning.position
            AND tb.position <> new_positioning.new_position;
        END IF;
    END;
    END LOOP;
END 
$$
LANGUAGE plpgsql;