我有一个非常慢的查询(超过30分钟或更长时间),我认为可以加快编码效率。以下是结果的代码和查询计划。所以我正在寻找能够加快查询的答案,这些查询在大型表上执行多个连接。
drop table if exists totalshad;
create temporary table totalshad as
select pricedate, hour, sum(cast(price as numeric)) as totalprice from
pjm.rtcons
where
rtcons.pricedate >= '2017-12-01'
-- and
-- rtcons.pricedate <= '2018-01-23'
group by pricedate, hour
order by pricedate, hour;
-----------------------------
drop table if exists percshad;
create temporary table percshad as
select totalshad.pricedate, totalshad.hour, facility, round(sum(cast(price
as numeric)),2) as cons_shad, round(sum(cast(totalprice as numeric)),2) as
total_shad, round(cast(price/totalprice as numeric),4) as per_shad from
totalshad
join pjm.rtcons on
rtcons.pricedate = totalshad.pricedate
and
rtcons.hour = totalshad.hour
and
facility = 'ETOWANDA-NMESHOPP ETL 1057 A 115 KV'
where totalprice <> 0 and totalshad.pricedate > '2017-12-01'
group by totalshad.pricedate, totalshad.hour, facility,
(price/totalprice)
order by per_shad desc
limit 5;
EXPLAIN select facility, percshad.pricedate, percshad.hour, per_shad,
minmcc.rtmcc, minnode.nodename, maxmcc.rtmcc, maxnode.nodename from percshad
join pjm.prices minmcc on
minmcc.pricedate = percshad.pricedate
and
minmcc.hour = percshad.hour
and
minmcc.rtmcc = (select min(rtmcc) from pjm.prices where pricedate =
percshad.pricedate and hour = percshad.hour)
join pjm.nodes minnode on
minnode.node_id = minmcc.node_id
join pjm.prices maxmcc on
maxmcc.pricedate = percshad.pricedate
and
maxmcc.hour = percshad.hour
and
maxmcc.rtmcc = (select max(rtmcc) from pjm.prices where pricedate =
percshad.pricedate and hour = percshad.hour)
join pjm.nodes maxnode on
maxnode.node_id = maxmcc.node_id
order by per_shad desc
limit 5
这是EXPLAIN输出:
更新:我现在已将代码简化为以下内容。但是从EXPLAIN中可以看出,在最后一个select语句中找到node_id仍然需要花费很长时间
drop table if exists totalshad;
create temporary table totalshad as
select pricedate, hour, sum(cast(price as numeric)) as totalprice from
pjm.rtcons
where
rtcons.pricedate >= '2017-12-01'
-- and
-- rtcons.pricedate <= '2018-01-23'
group by pricedate, hour
order by pricedate, hour;
-----------------------------
drop table if exists percshad;
create temporary table percshad as
select totalshad.pricedate, totalshad.hour, facility, round(sum(cast(price
as numeric)),2) as cons_shad, round(sum(cast(totalprice as numeric)),2) as
total_shad,
round(cast(price/totalprice as numeric),4) as per_shad from totalshad
join pjm.rtcons on
rtcons.pricedate = totalshad.pricedate
and
rtcons.hour = totalshad.hour
and
facility = 'ETOWANDA-NMESHOPP ETL 1057 A 115 KV'
where totalprice <> 0 and totalshad.pricedate > '2017-12-01'
group by totalshad.pricedate, totalshad.hour, facility, (price/totalprice)
order by per_shad desc
limit 5;
drop table if exists mincong;
create temporary table mincong as
select pricedate, hour, min(rtmcc) as rtmcc
from pjm.prices JOIN percshad USING (pricedate, hour)
group by pricedate, hour;
EXPLAIN select distinct on (pricedate, hour) prices.node_id from mincong
JOIN pjm.prices USING (pricedate, hour, rtmcc)
group by pricedate, hour, node_id
答案 0 :(得分:1)
问题是连接条件中的子选择;它们必须在每一行加入时执行。
如果您无法摆脱它们,请尝试创建一个尽可能支持子选择的索引:
CREATE INDEX ON pjm.prices(pricedate, hour, rtmcc);