我的查询非常慢,导致超时错误。我需要所有这些数据,有时甚至按月分组。
要求是我必须显示4年前的数据。结果以网格形式显示,具有分页,因此它必须计算所有这些性能密集的结果。
我正在考虑运行计算新行的crons,因此我们不必在流上使用聚合函数,但我应该如何处理旧数据(1亿行)?
有问题的查询 - 有时我需要分组并计算结果
SELECT
(SUM(onsite) / NULLIF(SUM(sessions),0)) as sumonsite,
SUM(onsite) as sum_onsite,
SUM(bounce_count) as bounce_count,
SUM(bounce_desktop) as bounce_pc,
SUM(bounce_mobile) as bounce_mobile,
SUM(bounce_tablet) as bounce_tablet,
(SUM(bounce_desktop) / NULLIF(SUM(uniques_desktop),0)) * 100 as bounce_pc,
(SUM(bounce_mobile) / NULLIF(SUM(uniques_mobile_phone),0)) * 100 as bounce_mobile,
(SUM(bounce_tablet) / NULLIF(SUM(uniques_tablet),0)) * 100 as bounce_tablet,
SUM(sessions) as sessions,
SUM(quality_3) as quality_3,
SUM(quality_2) as quality_2,
SUM(quality_1) as quality_1,
(SUM(amount)::float / NULLIF(SUM(uniques),0)) * 1000 as avg_cpc,
(SUM(bounce_count)::float / NULLIF(SUM(sessions),0)) * 100 as sumbounce,
(AVG(quality_3) / NULLIF(AVG(uniques),0)) * 100 as hq_quality,
(AVG(quality_2) / NULLIF(AVG(uniques),0)) * 100 as mq_quality,
(AVG(quality_1) / NULLIF(AVG(uniques),0)) * 100 as lq_quality,
SUM(cast(money_bonus as numeric(15,2))) as activity,
SUM(money_volume) as volume,
SUM(amount) as sumamount,
(SUM(clicks)::float / NULLIF(SUM(sessions),0)) as pages_per_visit,
SUM(add_par_1) as video_views,
SUM(add_par_3) as video_views_clicks,
((SUM(add_par_1)::decimal / NULLIF(SUM(sessions)::decimal,0))*100)::decimal(15,2) as sum_video_views,
100 * SUM(uniques_mobile_phone) ::FLOAT / SUM (uniques)::FLOAT AS uniques_mobile_phone,
100 * SUM(uniques_tablet)::FLOAT / SUM (uniques)::FLOAT AS uniques_tablet
FROM "aff_ref" "t" LEFT JOIN affiliate_domains ad ON ad.domain = t.referer AND ad.affiliate_id=t.affiliate_id WHERE ((DATE(day) >= '2013-12-14') AND (DATE(day) <= '2018-01-20'))
查询表
尺寸 - 50 GB 行数 - 94 917 680 每天添加~500K新行
CREATE TABLE aff_ref
(
site_id INTEGER NOT NULL,
day DATE NOT NULL,
affiliate_id INTEGER DEFAULT 0 NOT NULL,
referer VARCHAR(250) NOT NULL,
uniques INTEGER DEFAULT 0 NOT NULL,
uniques_hq INTEGER DEFAULT 0 NOT NULL,
clicks INTEGER DEFAULT 0 NOT NULL,
uniques_bounce_count BIGINT DEFAULT (0)::bigint NOT NULL,
avg_clicks_all DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
bounce_count INTEGER DEFAULT 0 NOT NULL,
bounce DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
onsite BIGINT DEFAULT (0)::bigint NOT NULL,
avg_onsite DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
sessions INTEGER DEFAULT 1 NOT NULL,
quality_1 INTEGER DEFAULT 0 NOT NULL,
quality_2 INTEGER DEFAULT 0 NOT NULL,
quality_3 INTEGER DEFAULT 0 NOT NULL,
hq_1 INTEGER DEFAULT 0 NOT NULL,
hq_2 INTEGER DEFAULT 0 NOT NULL,
hq_3 INTEGER DEFAULT 0 NOT NULL,
add_par_1 INTEGER DEFAULT 0 NOT NULL,
add_par_2 INTEGER DEFAULT 0 NOT NULL,
add_par_3 INTEGER DEFAULT 0 NOT NULL,
add_par_4 INTEGER DEFAULT 0 NOT NULL,
add_par_5 BIGINT DEFAULT (0)::bigint NOT NULL,
avg_videoviews DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
avg_searches DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
money_prime DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
money_prime_low DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
money_prime_bounce DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
money_bonus DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
amount DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
amount_basic DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
onsite_coef DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
pageviews_coef DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
videoviews_coef DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
searches_coef DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
money_volume DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
cpc DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
recur_direct INTEGER DEFAULT 0 NOT NULL,
recur_search INTEGER DEFAULT 0 NOT NULL,
totally_fresh INTEGER DEFAULT 0 NOT NULL,
ntv_ctr REAL DEFAULT (0)::real NOT NULL,
top_ctr DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
videofooter_ctr DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
money_alt DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
amount_alt DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
money_prime_old DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
amount_old DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
amount_por DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
por_id BIGINT DEFAULT (0)::bigint NOT NULL,
amount_test INTEGER DEFAULT 0 NOT NULL,
js_time BIGINT DEFAULT (0)::bigint NOT NULL,
js_time_mouse BIGINT DEFAULT (0)::bigint NOT NULL,
js_exists_count BIGINT DEFAULT (0)::bigint NOT NULL,
js_not_exists_count BIGINT DEFAULT (0)::bigint NOT NULL,
vp_un BIGINT DEFAULT (0)::bigint NOT NULL,
vp_un_tr BIGINT DEFAULT (0)::bigint NOT NULL,
nb_normal_hq BIGINT DEFAULT (0)::bigint NOT NULL,
nb_normal_mq BIGINT DEFAULT (0)::bigint NOT NULL,
nb_normal_lq BIGINT DEFAULT (0)::bigint NOT NULL,
nb_embed_hq BIGINT DEFAULT (0)::bigint NOT NULL,
nb_embed_mq BIGINT DEFAULT (0)::bigint NOT NULL,
nb_embed_lq BIGINT DEFAULT (0)::bigint NOT NULL,
custom_cpc DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
amount_old_cpc DOUBLE PRECISION DEFAULT (0)::double precision NOT NULL,
uniques_desktop INTEGER DEFAULT 0 NOT NULL,
uniques_mobile_phone INTEGER DEFAULT 0 NOT NULL,
uniques_tablet INTEGER DEFAULT 0 NOT NULL,
clicks_desktop INTEGER DEFAULT 0 NOT NULL,
clicks_mobile_phone INTEGER DEFAULT 0 NOT NULL,
clicks_tablet INTEGER DEFAULT 0 NOT NULL,
bounce_desktop DOUBLE PRECISION DEFAULT (0)::double precision,
bounce_tablet DOUBLE PRECISION DEFAULT (0)::double precision,
bounce_mobile DOUBLE PRECISION DEFAULT (0)::double precision,
country VARCHAR(2)
);
CREATE UNIQUE INDEX ref_sites_day_aff_stype_idx ON aff_ref (day, site_id, affiliate_id, referer, country);
CREATE UNIQUE INDEX ref_sites_day_aff_stype_idx ON aff_ref (day, site_id, affiliate_id, referer);
加入的表格 尺寸 - 11 mb 行数 - 107 278
CREATE TABLE domains
(
id INTEGER PRIMARY KEY NOT NULL,
affiliate_id INTEGER NOT NULL,
domain TEXT NOT NULL,
checked_date TIMESTAMP,
status SMALLINT DEFAULT 0 NOT NULL,
addedon_date TIMESTAMP(6),
suspended_date TIMESTAMP(6),
checked_via SMALLINT DEFAULT (1)::smallint NOT NULL,
is_redirect SMALLINT DEFAULT (0)::smallint,
compliance SMALLINT DEFAULT (-1) NOT NULL,
note VARCHAR(512),
CONSTRAINT domains_affiliate_id_fkey FOREIGN KEY (affiliate_id) REFERENCES affiliates (affiliate_id)
);
CREATE UNIQUE INDEX domains_affiliate_id_domain_key ON affiliate_domains (affiliate_id, domain);
Execution Plan:
Aggregate (cost=18169962.62..18169962.76 rows=1 width=123) (actual time=379233.584..379233.584 rows=1 loops=1)
-> Seq Scan on stats_aff_ref_sites t (cost=0.00..8203606.20 rows=94917680 width=123) (actual time=0.005..159746.597 rows=94917677 loops=1)
Filter: ((day >= '2013-12-14'::date) AND (day <= '2018-01-20'::date))
Planning time: 0.360 ms
Execution time: 379233.797 ms
答案 0 :(得分:-1)
我在这里看到2个简单的解决方案:
永远不要调用您无法使用的数据。申请有页面吗?太棒了 - 你可以通过调用一页的数据来减少你的服务器负载(确保使用索引fiedls的条件)
您需要汇总数据吗?太棒了 - 只需创建一个表,其中聚合结果按一些属性值(按日/周/月计算)进行拆分。最后一个(当前月份数据)将从主表中即时调用。