我们在PostgreSQL 9.4.4中有一个非常大的plpgsql函数和if-和elsif语句 在每个if-body中都有对stable-sql函数的函数调用。
我们按以下方式调用该函数:
rawdata.metricevent (metriceventid bigint PRIMARY KEY,
metricevent integer,
client integer,
age integer,
country varchar(256),
userideventowner bigint,
contributoruserid bigint,
tournamentid bigint,
eventoccurtime timestamp,
iscounted boolean)
该函数的前4-5次在约 2.5秒中执行得相当快,但突然性能迅速下降,执行大约需要 7.5秒。所有连续呼叫都保持在该级别。 我们还尝试将plpgsql函数声明为稳定,但这没有帮助。
当我们直接调用其中一个内部stable-sql函数时,执行总是需要大约2.5秒。
这是rawdata.metricevent表的Schema:
CREATE OR REPLACE FUNCTION rawdata.getNumbersForUserBasedMetricEventsGroupedByClient(pFrom timestamp, pTo timestamp, pMetricEvent integer[], pTimeDomainType integer,
pCountry varchar(100),pAgeFrom integer,pAgeTo integer,pUserlanguage varchar(50),pTournamentlanguage varchar(50))
RETURNS TABLE(dfrom timestamp, x bigint, y bigint, xx bigint, yy bigint)
AS $$
BEGIN
IF pTimeDomainType = 1 THEN
--hours
RETURN QUERY
SELECT * FROM rawdata.getNumbersForUBMetricEventsGroupedByClientPerHours(pFrom,pTo,pMetricEvent,pCountry,pAgeFrom,pAgeTo,pUserLanguage,pTournamentLanguage);
ELSIF pTimeDomainType = 2 THEN
--days
RETURN QUERY
SELECT * FROM rawdata.getNumbersForUBMetricEventsGroupedByClientPerDays(pFrom,pTo,pMetricEvent,pCountry,pAgeFrom,pAgeTo,pUserLanguage,pTournamentLanguage);
ELSIF pTimeDomainType = 3 THEN
--week
RETURN QUERY
SELECT * FROM rawdata.getNumbersForUBMetricEventsGroupedByClientPerWeeks(pFrom,pTo,pMetricEvent,pCountry,pAgeFrom,pAgeTo,pUserLanguage,pTournamentLanguage);
ELSIF pTimeDomainType = 4 THEN
--month
RETURN QUERY
SELECT * FROM rawdata.getNumbersForUBMetricEventsGroupedByClientPerMonths(pFrom,pTo,pMetricEvent,pCountry,pAgeFrom,pAgeTo,pUserLanguage,pTournamentLanguage);
END IF;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION rawdata.getNumbersForUBMetricEventsGroupedByClientPerHours(pFrom timestamp, pTo timestamp, pMetricEvent integer[],
pCountry varchar(100),pAgeFrom integer,pAgeTo integer,pUserlanguage varchar(50),pTournamentlanguage varchar(50))
RETURNS TABLE(dfrom timestamp, x bigint, y bigint, xx bigint, yy bigint)
AS $$
SELECT hours timedomain,count(distinct em.userideventowner) as x,count(distinct ef.userideventowner) as y,count(distinct emh.userideventowner) as xx,count(distinct efh.userideventowner) as yy
FROM generate_series
( pFrom::timestamp
, pTo::timestamp + '23 hour'
, '1 hour'::interval) hours
LEFT JOIN rawdata.metricevent e1 ON e1.eventoccurtime >=pFrom
AND e1.eventoccurtime < pTo + '1 day'
AND (e1.metricevent = ANY (pMetricEvent))
AND (e1.country = pCountry OR pCountry is null)
AND (e1.age >= pAgeFrom OR pAgeFrom is null) AND (e1.age <= pAgeTo OR pAgeTo is null)
AND userideventowner >= 110
AND hours = date_trunc('hour',e1.eventoccurtime)
LEFT JOIN rawdata.userlanguage ul ON e1.userideventowner = ul.userideventowner
AND (ul.userlanguage = pUserLanguage OR pUserLanguage is null)
LEFT JOIN rawdata.metricevent ei ON e1.metriceventid = em.metriceventid AND ei.client=1
LEFT JOIN rawdata.metricevent ea ON e1.metriceventid = ef.metriceventid AND ea.client=2
LEFT JOIN rawdata.metricevent ew ON e1.metriceventid = emh.metriceventid AND ew.client=3
LEFT JOIN rawdata.metricevent eww ON e1.metriceventid = efh.metriceventid AND eww.client=4
GROUP BY hours
ORDER BY hours;
$$
LANGUAGE sql STABLE;
CREATE OR REPLACE FUNCTION rawdata.getNumbersForUBMetricEventsGroupedByClientPerDays(pFrom timestamp, pTo timestamp, pMetricEvent integer[],
pCountry varchar(100),pAgeFrom integer,pAgeTo integer,pUserlanguage varchar(50),pTournamentlanguage varchar(50))
RETURNS TABLE(dfrom timestamp, x bigint, y bigint, xx bigint, yy bigint)
AS $$
SELECT days timedomain,count(distinct em.userideventowner) as x,count(distinct ef.userideventowner) as y,count(distinct emh.userideventowner) as xx,count(distinct efh.userideventowner) as yy
FROM generate_series
( pFrom::timestamp
, pTo::timestamp
, '1 day'::interval) days
LEFT JOIN rawdata.metricevent e1 ON e1.eventoccurtime >=pFrom
AND e1.eventoccurtime < pTo + '1 day'
AND (e1.metricevent = ANY (pMetricEvent))
AND (e1.country = pCountry OR pCountry is null)
AND (e1.age >= pAgeFrom OR pAgeFrom is null) AND (e1.age <= pAgeTo OR pAgeTo is null)
AND userideventowner >= 110
AND days = date_trunc('day',e1.eventoccurtime)
LEFT JOIN rawdata.userlanguage ul ON e1.userideventowner = ul.userideventowner
AND (ul.userlanguage = pUserLanguage OR pUserLanguage is null)
LEFT JOIN rawdata.metricevent ei ON e1.metriceventid = em.metriceventid AND ei.client=1
LEFT JOIN rawdata.metricevent ea ON e1.metriceventid = ef.metriceventid AND ea.client=2
LEFT JOIN rawdata.metricevent ew ON e1.metriceventid = emh.metriceventid AND ew.client=3
LEFT JOIN rawdata.metricevent eww ON e1.metriceventid = efh.metriceventid AND eww.client=4
GROUP BY days
ORDER BY days;
$$
LANGUAGE sql STABLE;
CREATE OR REPLACE FUNCTION rawdata.getNumbersForUBMetricEventsGroupedByClientPerWeeks(pFrom timestamp, pTo timestamp, pMetricEvent integer[],
pCountry varchar(100),pAgeFrom integer,pAgeTo integer,pUserlanguage varchar(50),pTournamentlanguage varchar(50))
RETURNS TABLE(dfrom timestamp, x bigint, y bigint, xx bigint, yy bigint)
AS $$
SELECT min(days) timedomain,count(distinct em.userideventowner) as x,count(distinct ef.userideventowner) as y,count(distinct emh.userideventowner) as xx,count(distinct efh.userideventowner) as yy
FROM generate_series
( pFrom::timestamp
, pTo::timestamp
, '1 day'::interval) days
LEFT JOIN rawdata.metricevent e1 ON e1.eventoccurtime >=pFrom
AND e1.eventoccurtime < pTo + '1 day'
AND (e1.metricevent = ANY (pMetricEvent))
AND (e1.country = pCountry OR pCountry is null)
AND (e1.age >= pAgeFrom OR pAgeFrom is null) AND (e1.age <= pAgeTo OR pAgeTo is null)
AND userideventowner >= 110
AND days = date_trunc('day',e1.eventoccurtime)
LEFT JOIN rawdata.userlanguage ul ON e1.userideventowner = ul.userideventowner
AND (ul.userlanguage = pUserLanguage OR pUserLanguage is null)
LEFT JOIN rawdata.metricevent ei ON e1.metriceventid = em.metriceventid AND ei.client=1
LEFT JOIN rawdata.metricevent ea ON e1.metriceventid = ef.metriceventid AND ea.client=2
LEFT JOIN rawdata.metricevent ew ON e1.metriceventid = emh.metriceventid AND ew.client=3
LEFT JOIN rawdata.metricevent eww ON e1.metriceventid = efh.metriceventid AND eww.client=4
GROUP BY EXTRACT(WEEK FROM days)
ORDER BY 1;
$$
LANGUAGE sql STABLE;
CREATE OR REPLACE FUNCTION rawdata.getNumbersForUBMetricEventsGroupedByClientPerMonths(pFrom timestamp, pTo timestamp, pMetricEvent integer[],
pCountry varchar(100),pAgeFrom integer,pAgeTo integer,pUserlanguage varchar(50),pTournamentlanguage varchar(50))
RETURNS TABLE(dfrom timestamp, x bigint, y bigint, xx bigint, yy bigint)
AS $$
SELECT min(days) timedomain,count(distinct em.userideventowner) as x,count(distinct ef.userideventowner) as y,count(distinct emh.userideventowner) as xx,count(distinct efh.userideventowner) as yy
FROM generate_series
( pFrom::timestamp
, pTo::timestamp
, '1 day'::interval) days
LEFT JOIN rawdata.metricevent e1 ON e1.eventoccurtime >=pFrom
AND e1.eventoccurtime < pTo + '1 day'
AND (e1.metricevent = ANY (pMetricEvent))
AND (e1.country = pCountry OR pCountry is null)
AND (e1.age >= pAgeFrom OR pAgeFrom is null) AND (e1.age <= pAgeTo OR pAgeTo is null)
AND userideventowner >= 110
AND days = date_trunc('day',e1.eventoccurtime)
LEFT JOIN rawdata.userlanguage ul ON e1.userideventowner = ul.userideventowner
AND (ul.userlanguage = pUserLanguage OR pUserLanguage is null)LEFT JOIN rawdata.metricevent ei ON e1.metriceventid = em.metriceventid AND ei.client=1
LEFT JOIN rawdata.metricevent ea ON e1.metriceventid = ef.metriceventid AND ea.client=2
LEFT JOIN rawdata.metricevent ew ON e1.metriceventid = emh.metriceventid AND ew.client=3
LEFT JOIN rawdata.metricevent eww ON e1.metriceventid = efh.metriceventid AND eww.client=4
GROUP BY EXTRACT(MONTH FROM days)
ORDER BY 1;
$$
LANGUAGE sql STABLE;
我们在eventoccurtime列上有一个btree索引。如果没有btree索引,差异就会更大,执行有时会在几秒内完成,但有时会持续超过100秒。
现在我们的问题:为什么?发生了什么,当第五次或第六次执行plpgsql函数时,为什么它会突然耗费这么长时间?顺便说一下,这个查询的CPU负载也非常高。 我们还使用EXPLAIN ANALYZE分析了查询,查询计划程序ALWAYS大约需要0.034ms,但查询执行时间从2.5秒到7.5秒不等。它也从来没有介于两者之间,无论是2.5秒还是7.5秒。
这些是Main-pgpsql函数,它具有变量执行时间,下面的stable-sql函数具有不变的执行时间。
/anyword/akram/anyotherword
亲切的问候,托马斯