我有一个很大的表,我想从最低的id开始,循环遍历并过滤,直到得到1000行,然后再返回。
所以我的函数getFilteredItems将进行一些过滤,然后返回1000行。它有一个latestItemId参数,因此它将在特定的id之后占用1000行。然后,getJournalistTopicItems调用getFilteredItems,该函数甚至可以过滤更多内容。大多数情况下,此功能将过滤掉所有行,因此我想循环直到所有过滤完成后最终获得1000行。
我在sql方面特别是超级菜鸟,尤其是postgres,所以这一切可能都是矫kill过正或完全错误,但这是我到目前为止所做的。
CREATE OR REPLACE FUNCTION getJournalistTopicItems() RETURNS TEXT AS $$
DECLARE
currentCount INTEGER = 0;
currentItemIds int[];
latestItem INTEGER = 0;
BEGIN
WHILE currentCount < 1000 LOOP
WITH fItems AS (
SELECT * FROM getFilteredItems(latestItem)
)
SELECT fi.itemId, ift.content
FROM fItems fi
INNER JOIN journalistitems ji ON ji.itemid = fi.itemid
INNER JOIN itemfulltexts ift ON ji.itemid = ift.itemid
INNER JOIN journalists j ON j.id = ji.journalistid
INNER JOIN contacts c ON c.journalistid = j.id
WHERE c.isprivate = FALSE
AND c.disabled = FALSE
AND c.verificationstatus = 1;
currentItemIds := array_append(fi.itemId);
currentCount := array_length(currentItemIds);
SELECT latestItemId INTO latestItem FROM getFilteredItems(latestItem) limit 1;
END LOOP;
RETURN array_to_string(currentItemIds);
END;
$$ LANGUAGE plpgsql;
getFilteredItems函数如下所示:
CREATE OR REPLACE FUNCTION getFilteredItems(latestId INTEGER) RETURNS TABLE(
latestItemId integer,
itemid integer
) AS $$
BEGIN
RETURN QUERY
WITH fItems AS(select i.id
from items i
inner join articlemetadatas am on am.itemid = i.id
where NOT EXISTS (SELECT 1 FROM journalistitemcategories jic WHERE jic.itemid = i.id)
AND NOT EXISTS (SELECT 1 FROM topicitemwords tw WHERE tw.itemid = i.id)
AND i.languagecode in ('sv')
AND am.contentprotected != 0
AND i.id > latestId
limit 1000)
SELECT (SELECT id FROM fItems order by id desc limit 1), fItems.id FROM fItems;
END;
$$ LANGUAGE plpgsql;
调用getJournalistTopicItems时出现的错误是:
ERROR: query has no destination for result data
HINT: If you want to discard the results of a SELECT, use PERFORM instead.
CONTEXT: PL/pgSQL function getjournalisttopicitems() line 8 at SQL statement
SQL state: 42601
最新更新:
CREATE OR REPLACE FUNCTION getJournalistTopicItems() RETURNS SETOF TEXT AS $func$
DECLARE
idarr int[];
idmed int[];
latestItem INTEGER = 0;
currentCount INTEGER = 0;
BEGIN
WHILE currentCount < 1000 LOOP
WITH fItems AS (
SELECT * FROM getFilteredItems(latestItem)
)
SELECT fi.itemId, ift.content
FROM fItems fi
INNER JOIN journalistitems ji ON ji.itemid = fi.itemid
INNER JOIN itemfulltexts ift ON ji.itemid = ift.itemid
INNER JOIN journalists j ON j.id = ji.journalistid
INNER JOIN contacts c ON c.journalistid = j.id
WHERE c.isprivate = FALSE
AND c.disabled = FALSE
AND c.verificationstatus = 1;
idmed := (select array_agg(itemId) from fItems);
idarr := (select idarr || idmed);
currentCount := (select array_length(currentCount, int));
latestItem := (select latestItem FROM getFilteredItems(latestItem) limit 1);
END LOOP;
RETURN QUERY SELECT array_to_string(idarr, ',');
END;
$func$ language plpgsql;