如何从订购时的表格中选择行
假设我将这些行作为查询结果(表token
保留id
和word
,表positioning
保留id
和{{1} }):
position
我可以在表格中使用不同的文本块,句子和位置进行传播。
我想改变这个:
id | word | textblockid |sentence |position
5 | Fear | 5 | 1 | 1
8 | of | 5 | 1 | 2
6 | the | 5 | 1 | 3
7 | Dark | 5 | 1 | 4
9 | is | 5 | 1 | 5
我正在做一个接收带有要合并的ID的数组的函数,类似于 id | word | textblockid | sentence |position
10 | Fear of the Dark | 5 | 1 | 1
9 | is | 5 | 1 | 2
。
我在表merge_tokens('{5,8,6,7}')
中插入新单词Fear of the Dark
并获取生成的ID(例如,token
为id
)。这很容易。
我需要将第一个单词的10
(在本例中为id
)更新为Fear
并删除下一个单词(10
,{{1} },of
)。
我怀疑我是如何执行这些操作的。我想我需要来自一个有序表的the
,其中第一行id匹配id数组中的第一个元素,第二行id匹配第二个元素id数组,依此类推,然后更新第一个元素并删除下一个。
我无法删除只删除ID中的其他行,因为换句话说就是使用它们。我只删除前一个为Dark
的{{1}},下一个为SELECT
,下一个为of
。遵循此规则,我只能删除前一个为Fear
的{{1}},之前为of
,下一个为Dark
。
例如,我可以在同一个表中使用不会受到影响的内容:
the
答案 0 :(得分:1)
最好在一次交易中这样做:
UPDATE token
SET word = (
SELECT string_agg(word, ' ' ORDER BY position)
FROM token
WHERE id = ANY('{5,8,6,7}'::int[])
)
,id = nextval('token_id_seq')
WHERE id = ('{5,8,6,7}'::int[])[1];
DELETE FROM token
WHERE id = ANY('{5,8,6,7}'::int[])
AND id <> ('{5,8,6,7}'::int[])[1];
将'{5,8,6,7}'::int[]
替换为整数数组参数
我从我认为存在的序列中得到了新的id
我进一步假设数组中的排序与按位置排序一致。替代版本如下。
要更新的id
是数组的第一个元素。
单词的排序可以在聚合函数内完成(自PostgreSQL 9.0起)。阅读about that in the manual。
根据数组元素的顺序对所选行进行排序:
SELECT rn, t.*
FROM (
SELECT id
,row_number() OVER () AS rn
FROM (SELECT unnest('{5,8,6,7}'::int[]) id) x
) x
JOIN token t USING (id)
ORDER BY rn;
或者......用不同的技术做同样的事情,也适用于旧版本的Postgres:
SELECT rn, t.*
FROM (
SELECT rn
,a[rn] AS id
FROM (SELECT '{5,8,6,7}'::int[] AS a
,generate_series(1, array_upper('{5,8,6,7}'::int[], 1)) rn) x
) x
JOIN token t USING (id)
ORDER BY rn;
在UPDATE语句中使用它:
UPDATE token
SET word = (
SELECT string_agg(word, ' ' ORDER BY rn)
FROM (
SELECT rn
,a[rn] AS id
FROM (
SELECT '{5,8,6,7}'::int[] AS a
,generate_series(1, array_upper('{5,8,6,7}'::int[], 1)) rn) x
) x
JOIN token t USING (id)
)
,id = nextval('token_id_seq')
WHERE id = ('{5,8,6,7}'::int[])[1];
答案 1 :(得分:1)
此片段不使用数组。 (我不喜欢数组)
set search_path='tmp';
DROP TABLE wordlist;
CREATE TABLE wordlist
( id INTEGER NOT NULL PRIMARY KEY
, word varchar
, textblockid INTEGER NOT NULL
, sentence INTEGER NOT NULL
, postion INTEGER NOT NULL
, UNIQUE (textblockid,sentence,postion)
);
INSERT INTO wordlist(id,word,textblockid,sentence,postion) VALUES
(5 , 'Fear', 5 , 1 , 1 )
,(8 , 'of', 5 , 1 , 2 )
,(6 , 'the', 5 , 1 , 3 )
,(7 , 'Dark', 5 , 1 , 4 )
,(9 , 'is', 5 , 1 , 5 )
;
WITH RECURSIVE meuk AS (
SELECT 0 AS lev
, id,word AS words
, textblockid,sentence,postion AS lastpos
FROM wordlist
UNION
SELECT 1+ mk.lev AS lev
, wl.id
, mk.words || ' '::text || wl.word AS words
, wl.textblockid,wl.sentence
, wl.postion AS lastpos
FROM meuk mk
JOIN wordlist wl ON (wl.textblockid = mk.textblockid
AND wl.sentence = mk.sentence
AND wl.postion = mk.lastpos+1)
)
SELECT * FROM meuk
WHERE lev = 3
;
结果:
SET
DROP TABLE
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "wordlist_pkey" for table "wordlist"
NOTICE: CREATE TABLE / UNIQUE will create implicit index "wordlist_textblockid_sentence_postion_key" for table "wordlist"
CREATE TABLE
INSERT 0 5
lev | id | words | textblockid | sentence | lastpos
-----+----+------------------+-------------+----------+---------
3 | 7 | Fear of the Dark | 5 | 1 | 4
3 | 9 | of the Dark is | 5 | 1 | 5
(2 rows)
答案 2 :(得分:1)
在回答了你最近的大部分问题后,我对你正在做的事情有一个模糊的概念。所以我仔细研究了你的解决方案并进行了相当的优化。大多数情况下我简化了代码,但也有一些实质性的改进。
=
。请改用:=
。见related question for more info。LOOP BEGIN
?如果您不需要,单独的代码块只会减慢速度。删除了它。请仔细查看代码并提供一些提示 测试两个版本以查看哪个版本执行得更快。
供您考虑:
CREATE OR REPLACE FUNCTION merge_tokens(words varchar[], separator varchar)
RETURNS VOID AS
$body$
DECLARE
r record;
current_id integer;
ids integer[];
generated_word varchar := ''; -- you can initialize variables at declaration time. Saves additional assignment.
BEGIN
-- get the ids and generate the word
RAISE NOTICE 'Getting ids and generating words';
generated_word := array_to_string(words, separator); -- 1 assignment is much cheaper. Also: no trim() needed.
ids := ARRAY
( SELECT t.id
FROM (
SELECT row_number() OVER () AS rn, text
FROM (SELECT unnest(words) AS text) x) y
JOIN token t USING (text)
ORDER BY rn);
RAISE NOTICE 'Generated word: %', generated_word;
-- check if the don't exists to insert it
SELECT INTO current_id t.id FROM token t WHERE t.text = generated_word;
IF NOT FOUND THEN
RAISE NOTICE 'Word don''t exists';
INSERT INTO token(text) VALUES(generated_word)
RETURNING id
INTO current_id; --get the last value without additional query.
END IF;
RAISE NOTICE 'Word id: %', current_id;
-- select the records that will be updated
RAISE NOTICE 'Getting words to be updated.';
FOR r IN
SELECT textblockid, sentence, position, tokenid, rn
FROM
( -- select the rows that are complete
SELECT textblockid, sentence, position, tokenid, rn, count(*) OVER (PARTITION BY grp) AS counting
FROM
( -- match source with lookup table
SELECT source.textblockid, source.sentence, source.position, source.tokenid, source.rn, source.grp
FROM
( -- select textblocks where words appears with row number to matching
SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid, grp
,CASE WHEN grp > 0 THEN
row_number() OVER (PARTITION BY grp ORDER BY tb.textblockid, tb.sentence, tb.position)
END AS rn
FROM
( -- create the groups to be used in partition by to generate the row numbers
SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid
,SUM(CASE WHEN tb.tokenid = ids[1] THEN 1 ELSE 0 END) OVER (ORDER BY tb.textblockid, tb.sentence, tb.position) AS grp
FROM textblockhastoken tb
JOIN
( --select the textblocks where the word appears
SELECT textblockid, sentence
FROM textblockhastoken tb
WHERE tb.tokenid = ids[1]
) res USING (textblockid, sentence)
) tb
) source
-- create the lookup table to match positions
JOIN (SELECT row_number() OVER () as rn, id FROM unnest(ids) AS id) lookup USING (rn)
WHERE source.tokenid = lookup.id
) merged
) g
WHERE g.counting = array_length(ids,1)
ORDER BY g.rn --order by row number to update first, delete and change positions after
LOOP
--check if update or delete
IF (r.rn = 1) THEN
RAISE NOTICE 'Updating word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
UPDATE textblockhastoken tb SET tokenid = current_id
WHERE (tb.textblockid, tb.sentence, tb.position)
= ( r.textblockid, r.sentence, r.position);
ELSE
RAISE NOTICE 'Deleting word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
DELETE FROM textblockhastoken tb
WHERE (tb.textblockid, tb.sentence, tb.position)
= ( r.textblockid, r.sentence, r.position);
END IF;
--check if is the last word to update the positions
IF (r.rn = array_length(ids,1)) THEN
RAISE NOTICE 'Changing positions in T:% S:%', r.textblockid, r.sentence;
UPDATE textblockhastoken tb SET position = new_position
FROM
( SELECT textblockid, sentence, position
,row_number() OVER (PARTITION BY tb.textblockid, tb.sentence ORDER BY tb.position) as new_position
FROM textblockhastoken tb
WHERE tb.textblockid = r.textblockid AND tb.sentence = r.sentence
) np
WHERE (tb.textblockid, tb.sentence, tb.position)
= (np.textblockid, np.sentence, np.position)
AND tb.position <> np.new_position;
END IF;
END LOOP;
END;
$body$ LANGUAGE plpgsql;
答案 3 :(得分:0)
这可以作为merge_tokens函数的一部分吗?好像你可以让这个函数跟踪哪些记录需要更新/删除,只需基于提供的数组(第一个元素更新,其余删除)。
答案 4 :(得分:0)
这个答案适用于我的具体情况。我不知道是否是最好的方式,但对我有用。
我使用以下问题的答案构建此过程:Is possible have different conditions for each row in a query?和How create a WINDOW in PostgreSQL until the same value appears again?
FOREARCH仅适用于PostgreSQL 9.1。
CREATE OR REPLACE FUNCTION merge_tokens(words VARCHAR[], separator VARCHAR)
RETURNS VOID
AS $$
DECLARE
r RECORD;
current_id INTEGER;
current_word VARCHAR;
ids INTEGER[];
generated_word VARCHAR;
BEGIN
-- get the ids and generate the word
RAISE NOTICE 'Getting ids and generating words';
generated_word = '';
FOREACH current_word IN ARRAY words
LOOP BEGIN
generated_word = generated_word || current_word;
generated_word = generated_word || separator;
SELECT t.id INTO current_id FROM token t WHERE t.text = current_word;
ids = ids || current_id;
END;
END LOOP;
-- remove lead and ending spacing in word
RAISE NOTICE 'Generated word: %', generated_word;
generated_word = TRIM(generated_word);
-- check if the don't exists to insert it
SELECT t.id INTO current_id FROM token t WHERE t.text = generated_word;
IF (current_id IS NULL) THEN
RAISE NOTICE 'Word don''t exists';
INSERT INTO token(id,text) VALUES(nextval('tokenidsqc'),generated_word);
current_id = lastval(); --get the last value from the sequence
END IF;
RAISE NOTICE 'Word id: %', current_id;
-- select the records that will be updated
RAISE NOTICE 'Getting words to be updated.';
FOR r IN SELECT grouping.textblockid, grouping.sentence, grouping.position, grouping.tokenid, grouping.row_number
FROM
(
-- select the rows that are complete
SELECT merged.textblockid, merged.sentence, merged.position, merged.tokenid,merged.row_number,count(*) OVER w as counting
FROM
(
-- match source with lookup table
SELECT source.textblockid, source.sentence, source.position, source.tokenid,source.row_number, source.grp
FROM
( -- select textblocks where words appears with row number to matching
SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid, grp,
CASE WHEN grp > 0 THEN
row_number() OVER (PARTITION BY grp ORDER BY tb.textblockid,tb.sentence,tb.position)
END AS row_number
FROM
( -- create the groups to be used in partition by to generate the row numbers
SELECT tb.textblockid, tb.sentence, tb.position, tb.tokenid,
SUM(CASE WHEN tb.tokenid = ids[1] THEN 1 ELSE 0 END) OVER (ORDER BY tb.textblockid,tb.sentence,tb.position) AS grp
FROM textblockhastoken tb,
( --select the textblocks where the word appears
SELECT textblockid, sentence
FROM textblockhastoken tb
WHERE tb.tokenid = ids[1]
)res
WHERE tb.textblockid = res.textblockid
AND tb.sentence = res.sentence
)tb
)source,
-- create the lookup table to match positions
(
SELECT row_number() OVER () as row_number,id FROM unnest(ids::INTEGER[]) as id
)lookup
WHERE source.tokenid = lookup.id
AND source.row_number = lookup.row_number
)merged
WINDOW w AS (PARTITION BY grp)
) grouping
WHERE grouping.counting = array_length(ids,1)
ORDER BY grouping.row_number --order by row number to update first, delete and change positions after
-- end of query and start of iterations actions
LOOP BEGIN
--check if update or delete
IF (r.row_number = 1) THEN
RAISE NOTICE 'Updating word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
UPDATE textblockhastoken tb SET tokenid = current_id
WHERE tb.textblockid = r.textblockid
AND tb.sentence = r.sentence
AND tb.position = r.position;
ELSE
RAISE NOTICE 'Deleting word in T:% S:% P:%', r.textblockid, r.sentence, r.position;
DELETE FROM textblockhastoken tb
WHERE tb.textblockid = r.textblockid
AND tb.sentence = r.sentence
AND tb.position = r.position;
END IF;
--check if is the last word to update the positions
IF (r.row_number = array_length(ids,1)) THEN
RAISE NOTICE 'Changing positions in T:% S:%', r.textblockid, r.sentence;
UPDATE textblockhastoken tb SET position = new_position
FROM
(
SELECT textblockid, sentence, position, row_number() OVER w as new_position
FROM textblockhastoken tb
WHERE tb.textblockid = r.textblockid AND tb.sentence = r.sentence
WINDOW w AS (PARTITION BY tb.textblockid, tb.sentence ORDER BY tb.position)
)new_positioning
WHERE tb.textblockid = new_positioning.textblockid
AND tb.sentence = new_positioning.sentence
AND tb.position = new_positioning.position
AND tb.position <> new_positioning.new_position;
END IF;
END;
END LOOP;
END
$$
LANGUAGE plpgsql;