Question

更新

如何从订购时的表格中选择行

第一个元素匹配某行
第二个元素与下一行匹配
第三行
第四行之后的第四行
依此类推，直到数组中的值结束？

逻辑

假设我将这些行作为查询结果（表token保留id和word，表positioning保留id和{{1} }）：

position

我可以在表格中使用不同的文本块，句子和位置进行传播。

我想改变这个：

 id | word | textblockid |sentence |position 
 5  | Fear |      5      |    1    |    1
 8  | of   |      5      |    1    |    2
 6  | the  |      5      |    1    |    3
 7  | Dark |      5      |    1    |    4
 9  | is   |      5      |    1    |    5

我正在做一个接收带有要合并的ID的数组的函数，类似于id | word | textblockid | sentence |position 10 | Fear of the Dark | 5 | 1 | 1 9 | is | 5 | 1 | 2。

我在表merge_tokens('{5,8,6,7}')中插入新单词Fear of the Dark并获取生成的ID（例如，token为id）。这很容易。

问题

我需要将第一个单词的10（在本例中为id）更新为Fear并删除下一个单词（10，{{1} }，of）。

我怀疑我是如何执行这些操作的。我想我需要来自一个有序表的the，其中第一行id匹配id数组中的第一个元素，第二行id匹配第二个元素id数组，依此类推，然后更新第一个元素并删除下一个。

我无法删除只删除ID中的其他行，因为换句话说就是使用它们。我只删除前一个为Dark的{{1}}，下一个为SELECT，下一个为of。遵循此规则，我只能删除前一个为Fear的{{1}}，之前为of，下一个为Dark。

例如，我可以在同一个表中使用不会受到影响的内容：

the

Answer 1

最好在一次交易中这样做：

UPDATE token
SET    word = (
    SELECT string_agg(word, ' '  ORDER BY position)
    FROM   token
    WHERE  id = ANY('{5,8,6,7}'::int[])
    )
      ,id = nextval('token_id_seq')
WHERE  id = ('{5,8,6,7}'::int[])[1];

DELETE FROM token
WHERE  id = ANY('{5,8,6,7}'::int[])
AND    id <> ('{5,8,6,7}'::int[])[1];

将'{5,8,6,7}'::int[]替换为整数数组参数我从我认为存在的序列中得到了新的id 我进一步假设数组中的排序与按位置排序一致。替代版本如下。
要更新的id是数组的第一个元素。

单词的排序可以在聚合函数内完成（自PostgreSQL 9.0起）。阅读about that in the manual。

回答其他问题

根据数组元素的顺序对所选行进行排序：

SELECT rn, t.*
FROM   (
    SELECT id
          ,row_number() OVER () AS rn
    FROM (SELECT unnest('{5,8,6,7}'::int[]) id) x
    )  x
JOIN   token t USING (id)
ORDER  BY rn;

或者......用不同的技术做同样的事情，也适用于旧版本的Postgres：

SELECT rn, t.*
FROM   (
    SELECT rn
          ,a[rn] AS id
    FROM (SELECT '{5,8,6,7}'::int[] AS a
                ,generate_series(1, array_upper('{5,8,6,7}'::int[], 1)) rn) x
    )  x
JOIN   token t USING (id)
ORDER  BY rn;

组合

在UPDATE语句中使用它：

UPDATE token
SET    word = (
    SELECT string_agg(word, ' '  ORDER BY rn)
    FROM   (
    SELECT rn
          ,a[rn] AS id
    FROM  (
           SELECT '{5,8,6,7}'::int[] AS a
                 ,generate_series(1, array_upper('{5,8,6,7}'::int[], 1)) rn) x
          ) x
    JOIN   token t USING (id)
    )
      ,id = nextval('token_id_seq')
WHERE  id = ('{5,8,6,7}'::int[])[1];

Answer 2

此片段不使用数组。（我不喜欢数组）

set search_path='tmp';

DROP TABLE wordlist;
CREATE TABLE wordlist
    ( id INTEGER NOT NULL PRIMARY KEY
    , word varchar
    , textblockid INTEGER NOT NULL
    , sentence INTEGER NOT NULL
    , postion INTEGER NOT NULL
    , UNIQUE (textblockid,sentence,postion)
    );

INSERT INTO wordlist(id,word,textblockid,sentence,postion) VALUES
 (5 , 'Fear', 5 , 1 , 1 )
,(8 , 'of', 5 , 1 , 2 )
,(6 , 'the', 5 , 1 , 3 )
,(7 , 'Dark', 5 , 1 , 4 )
,(9 , 'is', 5 , 1 , 5 )
    ;

WITH RECURSIVE meuk AS (
    SELECT 0 AS lev
        , id,word AS words
        , textblockid,sentence,postion AS lastpos
    FROM wordlist
    UNION
    SELECT 1+ mk.lev AS lev
        , wl.id
        , mk.words || ' '::text || wl.word AS words
        , wl.textblockid,wl.sentence
        , wl.postion AS lastpos
    FROM meuk mk
    JOIN wordlist wl ON (wl.textblockid = mk.textblockid
        AND wl.sentence = mk.sentence
        AND wl.postion = mk.lastpos+1)
    )
SELECT * FROM meuk
WHERE lev = 3
    ;

结果：

SET
DROP TABLE
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "wordlist_pkey" for table "wordlist"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "wordlist_textblockid_sentence_postion_key" for table "wordlist"
CREATE TABLE
INSERT 0 5
 lev | id |      words       | textblockid | sentence | lastpos 
-----+----+------------------+-------------+----------+---------
   3 |  7 | Fear of the Dark |           5 |        1 |       4
   3 |  9 | of the Dark is   |           5 |        1 |       5
(2 rows)

Answer 3

在回答了你最近的大部分问题后，我对你正在做的事情有一个模糊的概念。所以我仔细研究了你的解决方案并进行了相当的优化。大多数情况下我简化了代码，但也有一些实质性的改进。

有些观点：

不要在plpgsql中使用未记录的赋值运算符=。请改用:=。见related question for more info。
为什么LOOP BEGIN？如果您不需要，单独的代码块只会减慢速度。删除了它。
更多，我添加了一些评论

请仔细查看代码并提供一些提示测试两个版本以查看哪个版本执行得更快。

供您考虑：

CREATE OR REPLACE FUNCTION merge_tokens(words varchar[], separator varchar)
  RETURNS VOID AS
$body$
DECLARE         
    r              record;
    current_id     integer;
    ids            integer[];
    generated_word varchar :=  '';  -- you can initialize variables at declaration time. Saves additional assignment.

BEGIN
    -- get the ids and generate the word
    RAISE NOTICE 'Getting ids and generating words';
    generated_word := array_to_string(words, separator);  -- 1 assignment is much cheaper. Also: no trim() needed.
    ids := ARRAY
    (  SELECT t.id
       FROM  (
          SELECT row_number() OVER () AS rn, text
          FROM  (SELECT unnest(words) AS text) x) y
          JOIN   token t USING (text)
       ORDER  BY rn);
    RAISE NOTICE 'Generated word: %', generated_word;

    -- check if the don't exists to insert it
    SELECT INTO current_id  t.id FROM token t WHERE t.text = generated_word; 
    IF NOT FOUND THEN
        RAISE NOTICE 'Word don''t exists';
        INSERT INTO token(text) VALUES(generated_word)
        RETURNING id
        INTO current_id;  --get the last value without additional query.
    END IF;
    RAISE NOTICE 'Word id: %', current_id;

    -- select the records that will be updated
    RAISE NOTICE 'Getting words to be updated.';
    FOR r IN
        SELECT textblockid, sentence, position, tokenid, rn
        FROM
        ( -- select the rows that are complete
          SELECT textblockid, sentence, position, tokenid, rn, count(*) OVER (PARTITION BY grp) AS counting
          FROM
          ( -- match source with lookup table
                SELECT source.textblockid, source.sentence, source.position, source.tokenid, source.rn, source.grp
                FROM
                (   -- select textblocks where words appears with row number to matching
                     SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid, grp
                                           ,CASE WHEN grp > 0 THEN
                                            row_number() OVER (PARTITION BY grp ORDER BY tb.textblockid, tb.sentence, tb.position)
                                            END AS rn               
                     FROM
                     (   -- create the groups to be used in partition by to generate the row numbers
                          SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid
                                ,SUM(CASE WHEN tb.tokenid = ids[1] THEN 1 ELSE 0 END) OVER (ORDER BY tb.textblockid, tb.sentence, tb.position) AS grp
                          FROM  textblockhastoken tb
                          JOIN
                          (   --select the textblocks where the word appears
                                SELECT textblockid, sentence
                                FROM   textblockhastoken tb
                                WHERE  tb.tokenid = ids[1]
                          ) res USING (textblockid, sentence)
                     ) tb
                ) source
                -- create the lookup table to match positions
                JOIN (SELECT row_number() OVER () as rn, id FROM unnest(ids) AS id) lookup USING (rn)
                WHERE source.tokenid = lookup.id
          ) merged
        ) g  
        WHERE g.counting = array_length(ids,1)
        ORDER BY g.rn --order by row number to update first, delete and change positions after
    LOOP
        --check if update or delete
        IF (r.rn = 1) THEN
            RAISE NOTICE 'Updating word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
            UPDATE textblockhastoken tb SET tokenid = current_id
            WHERE (tb.textblockid, tb.sentence, tb.position)
                = ( r.textblockid,  r.sentence,  r.position);
        ELSE
            RAISE NOTICE 'Deleting word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
            DELETE FROM textblockhastoken tb
            WHERE (tb.textblockid, tb.sentence, tb.position)
                = ( r.textblockid,  r.sentence,  r.position);
        END IF;
        --check if is the last word to update the positions
        IF (r.rn = array_length(ids,1)) THEN
            RAISE NOTICE 'Changing positions in T:% S:%', r.textblockid, r.sentence;
            UPDATE textblockhastoken tb SET position = new_position
            FROM
            (   SELECT textblockid, sentence, position
                      ,row_number() OVER (PARTITION BY tb.textblockid, tb.sentence ORDER BY tb.position) as new_position
                FROM   textblockhastoken tb
                WHERE  tb.textblockid = r.textblockid AND tb.sentence = r.sentence
            ) np
            WHERE (tb.textblockid, tb.sentence, tb.position)
                = (np.textblockid, np.sentence, np.position)
            AND    tb.position <> np.new_position;
        END IF;
    END LOOP;
END;
$body$ LANGUAGE plpgsql;

Answer 4

这可以作为merge_tokens函数的一部分吗？好像你可以让这个函数跟踪哪些记录需要更新/删除，只需基于提供的数组（第一个元素更新，其余删除）。

Answer 5

这个答案适用于我的具体情况。我不知道是否是最好的方式，但对我有用。

我使用以下问题的答案构建此过程：Is possible have different conditions for each row in a query?和How create a WINDOW in PostgreSQL until the same value appears again?

FOREARCH仅适用于PostgreSQL 9.1。

CREATE OR REPLACE FUNCTION merge_tokens(words VARCHAR[], separator VARCHAR)
RETURNS VOID
AS $$
DECLARE         
    r RECORD;
    current_id INTEGER;
    current_word VARCHAR;       
    ids INTEGER[];
    generated_word VARCHAR;

BEGIN       
    -- get the ids and generate the word
    RAISE NOTICE 'Getting ids and generating words';
    generated_word = '';
    FOREACH current_word IN ARRAY words
    LOOP BEGIN                      
        generated_word = generated_word || current_word;
        generated_word = generated_word || separator;
        SELECT t.id INTO current_id FROM token t WHERE t.text = current_word;
        ids = ids || current_id;
    END;
    END LOOP;

    -- remove lead and ending spacing in word
    RAISE NOTICE 'Generated word: %', generated_word;
    generated_word = TRIM(generated_word);

    -- check if the don't exists to insert it
    SELECT t.id INTO current_id FROM token t WHERE t.text = generated_word; 
    IF (current_id IS NULL) THEN
        RAISE NOTICE 'Word don''t exists';
        INSERT INTO token(id,text) VALUES(nextval('tokenidsqc'),generated_word);
        current_id = lastval(); --get the last value from the sequence      
    END IF;
    RAISE NOTICE 'Word id: %', current_id;

    -- select the records that will be updated
    RAISE NOTICE 'Getting words to be updated.';
    FOR r IN SELECT grouping.textblockid, grouping.sentence, grouping.position, grouping.tokenid, grouping.row_number
    FROM
    (
        -- select the rows that are complete
        SELECT merged.textblockid, merged.sentence, merged.position, merged.tokenid,merged.row_number,count(*) OVER w as counting           
        FROM
        (
            -- match source with lookup table
            SELECT source.textblockid, source.sentence, source.position, source.tokenid,source.row_number, source.grp
            FROM
            (   -- select textblocks where words appears with row number to matching
                SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid, grp,
                    CASE WHEN grp > 0 THEN
                        row_number() OVER (PARTITION BY grp ORDER BY tb.textblockid,tb.sentence,tb.position)
                    END AS row_number               
                FROM
                (   -- create the groups to be used in partition by to generate the row numbers
                    SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid,
                        SUM(CASE WHEN tb.tokenid = ids[1] THEN 1 ELSE 0 END) OVER (ORDER BY tb.textblockid,tb.sentence,tb.position) AS grp
                    FROM textblockhastoken tb,
                    (   --select the textblocks where the word appears
                        SELECT textblockid, sentence
                        FROM textblockhastoken tb
                        WHERE tb.tokenid = ids[1]
                    )res
                    WHERE tb.textblockid = res.textblockid
                    AND tb.sentence = res.sentence                      
                )tb
            )source,
            -- create the lookup table to match positions
            (
                SELECT row_number() OVER () as row_number,id FROM unnest(ids::INTEGER[]) as id
            )lookup
            WHERE source.tokenid = lookup.id
            AND source.row_number = lookup.row_number
        )merged
        WINDOW w AS (PARTITION BY grp)
    ) grouping      
    WHERE grouping.counting = array_length(ids,1)
    ORDER BY grouping.row_number --order by row number to update first, delete and change positions after
    -- end of query and start of iterations actions
    LOOP BEGIN
        --check if update or delete
        IF (r.row_number = 1) THEN
            RAISE NOTICE 'Updating word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
            UPDATE textblockhastoken tb SET tokenid = current_id
            WHERE tb.textblockid = r.textblockid 
            AND tb.sentence = r.sentence
            AND tb.position = r.position;
        ELSE
            RAISE NOTICE 'Deleting word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
            DELETE FROM textblockhastoken tb
            WHERE tb.textblockid = r.textblockid 
            AND tb.sentence = r.sentence
            AND tb.position = r.position;
        END IF;
        --check if is the last word to update the positions
        IF (r.row_number = array_length(ids,1)) THEN
            RAISE NOTICE 'Changing positions in T:% S:%', r.textblockid, r.sentence;
            UPDATE textblockhastoken tb SET position = new_position
            FROM
            (   
                SELECT textblockid, sentence, position, row_number() OVER w as new_position
                FROM textblockhastoken tb
                WHERE tb.textblockid = r.textblockid AND tb.sentence = r.sentence
                WINDOW w AS (PARTITION BY tb.textblockid, tb.sentence ORDER BY tb.position)             
            )new_positioning                
            WHERE tb.textblockid = new_positioning.textblockid 
            AND tb.sentence = new_positioning.sentence
            AND tb.position = new_positioning.position
            AND tb.position <> new_positioning.new_position;
        END IF;
    END;
    END LOOP;
END 
$$
LANGUAGE plpgsql;

用于合并行的SQL

更新

逻辑

问题

5 个答案:

回答其他问题

组合

有些观点：