我有一个查询涉及对存储潜在客户数据的同一个表进行大量连接。在我有大量潜在客户(5k)的广告系列中,查询运行速度非常慢。仅供参考,表中有40657335条记录,其中包含大量连接。我已建立索引。任何人都有任何建议如何解决这个问题?
SELECT to_char(DATE, 'MM/DD/YYYY HH:MI:SS AM') AS "Date"
,email AS "Email"
,f512.value AS "f512value"
,f341.value AS "f341value"
,f11.value AS "f11value"
,f12.value AS "f12value"
,f15.value AS "f15value"
,f14.value AS "f14value"
,UPPER(f21.value) AS "f21value"
,f20.value AS "f20value"
,f19.value AS "f19value"
,f22.value AS "f22value"
,f225.value AS "f225value"
,f223.value AS "f223value"
,f24.value AS "f24value"
,f28.value AS "f28value"
,f228.value AS "f228value"
,f229.value AS "f229value"
,f231.value AS "f231value"
,f230.value AS "f230value"
,f232.value AS "f232value"
,f233.value AS "f233value"
,f234.value AS "f234value"
,f235.value AS "f235value"
,f264.value AS "f264value"
,f246.value AS "f246value"
,f247.value AS "f247value"
,f248.value AS "f248value"
,f249.value AS "f249value"
,f250.value AS "f250value"
,f251.value AS "f251value"
,f252.value AS "f252value"
,f253.value AS "f253value"
,f314.value AS "f314value"
,f254.value AS "f254value"
,f255.value AS "f255value"
,f256.value AS "f256value"
,f236.value AS "f236value"
,f242.value AS "f242value"
,f241.value AS "f241value"
,f257.value AS "f257value"
,f258.value AS "f258value"
,f259.value AS "f259value"
,f260.value AS "f260value"
,f261.value AS "f261value"
,f262.value AS "f262value"
,f263.value AS "f263value"
,f243.value AS "f243value"
,f244.value AS "f244value"
,f237.value AS "f237value"
,f238.value AS "f238value"
,f239.value AS "f239value"
,f240.value AS "f240value"
,f245.value AS "f245value"
,f82.value AS "f82value"
,f127.value AS "f127value"
,classification AS "Classification"
,sent_to AS "Leads Sent To"
,delivery_method AS "Delivery Method"
,sent_date AS "Date Lead Sent"
,source AS "Source"
FROM reg_event_uniques
LEFT JOIN reg_event_fields f512 ON f512.field_id = 512
AND f512.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f341 ON f341.field_id = 341
AND f341.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f11 ON f11.field_id = 11
AND f11.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f12 ON f12.field_id = 12
AND f12.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f15 ON f15.field_id = 15
AND f15.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f14 ON f14.field_id = 14
AND f14.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f21 ON f21.field_id = 21
AND f21.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f20 ON f20.field_id = 20
AND f20.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f19 ON f19.field_id = 19
AND f19.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f22 ON f22.field_id = 22
AND f22.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f225 ON f225.field_id = 225
AND f225.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f223 ON f223.field_id = 223
AND f223.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f24 ON f24.field_id = 24
AND f24.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f28 ON f28.field_id = 28
AND f28.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f228 ON f228.field_id = 228
AND f228.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f229 ON f229.field_id = 229
AND f229.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f231 ON f231.field_id = 231
AND f231.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f230 ON f230.field_id = 230
AND f230.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f232 ON f232.field_id = 232
AND f232.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f233 ON f233.field_id = 233
AND f233.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f234 ON f234.field_id = 234
AND f234.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f235 ON f235.field_id = 235
AND f235.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f264 ON f264.field_id = 264
AND f264.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f246 ON f246.field_id = 246
AND f246.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f247 ON f247.field_id = 247
AND f247.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f248 ON f248.field_id = 248
AND f248.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f249 ON f249.field_id = 249
AND f249.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f250 ON f250.field_id = 250
AND f250.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f251 ON f251.field_id = 251
AND f251.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f252 ON f252.field_id = 252
AND f252.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f253 ON f253.field_id = 253
AND f253.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f314 ON f314.field_id = 314
AND f314.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f254 ON f254.field_id = 254
AND f254.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f255 ON f255.field_id = 255
AND f255.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f256 ON f256.field_id = 256
AND f256.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f236 ON f236.field_id = 236
AND f236.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f242 ON f242.field_id = 242
AND f242.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f241 ON f241.field_id = 241
AND f241.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f257 ON f257.field_id = 257
AND f257.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f258 ON f258.field_id = 258
AND f258.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f259 ON f259.field_id = 259
AND f259.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f260 ON f260.field_id = 260
AND f260.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f261 ON f261.field_id = 261
AND f261.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f262 ON f262.field_id = 262
AND f262.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f263 ON f263.field_id = 263
AND f263.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f243 ON f243.field_id = 243
AND f243.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f244 ON f244.field_id = 244
AND f244.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f237 ON f237.field_id = 237
AND f237.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f238 ON f238.field_id = 238
AND f238.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f239 ON f239.field_id = 239
AND f239.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f240 ON f240.field_id = 240
AND f240.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f245 ON f245.field_id = 245
AND f245.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f82 ON f82.field_id = 82
AND f82.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f127 ON f127.field_id = 127
AND f127.reg_id = reg_event_uniques.reg_event_id
WHERE reg_event_uniques.cid = 3741
AND (
UPPER(f21.value) = 'UNITED STATES'
OR UPPER(f21.value) = 'CANADA'
OR UPPER(f21.value) != 'UNITED STATES'
AND UPPER(f21.value) != 'CANADA'
AND f21.value IS NOT NULL
OR f21.value IS NULL
)
AND (
reg_event_uniques.classification = 'End user or machine builder'
OR reg_event_uniques.classification = 'Interested in sponsoring'
OR reg_event_uniques.classification = 'Automation suppliers'
)
ORDER BY DATE DESC LIMIT 100;
这就是我的表格:
id | field_id | value | reg_id
---------------------------------------------------------------
1 | 11 | first name value would go here | 2234
2 | 12 | last name value would go here | 2234
3 | 13 | middle name vlaue would go here | 2234
4 | 14 | address 1 value would go here | 2234
5 | 15 | address 2 value would go here | 2234
...
...
答案 0 :(得分:3)
这是一个令人难以置信的问题。我检查了日历以确保4月1日结束。
在包含40M行的表上有超过50个连接。当然这会很慢!引用@cha,这看起来像EAV模型出了问题。阅读本文:
解决方案是重新构建数据库。使用类似hstore
或jsonb
的文档类型,或使用带有许多可空列的宽行。
您评论的20个字段(或100个字段)可能为NULL,几乎没有。 NULL存储非常便宜。带有NULL的64列仅占用NULL位图中的8个字节。与您的模型相反,非常昂贵。 一个附加行的开销至少为 28个字节:
f21.value
上的所有条件都会烧毁......没有。该字段必须是美国或CAN或不是(美国或CAN)或NULL - 简而言之:它可以任何东西,它只是噪音。删除它。
如果f21.value
上的条件有意义,您可以使用JOIN
代替LEFT JOIN
,并将联接移至f21
直至顶部。有了这么多连接,Postgres就无法再适当地优化查询了。您必须确保连接已经以有用的顺序编写。不要尝试优化,这将是浪费时间。使用set join_collapse_limit = 1
:
SET LOCAL join_collapse_limit = 1;
SELECT to_char(DATE, 'MM/DD/YYYY HH:MI:SS AM') AS "Date"
, ...
FROM reg_event_uniques r
JOIN reg_event_fields f21 ON f21.field_id = 21
AND f21.reg_id = reg_event_uniques.reg_event_id
LEFT JOIN reg_event_fields f512 ON f512.field_id = 512
AND f512.reg_id = reg_event_uniques.reg_event_id
...
LEFT JOIN reg_event_fields f127 ON f127.field_id = 127
AND f127.reg_id = reg_event_uniques.reg_event_id
WHERE r.cid = 3741
AND r.classification IN
('End user or machine builder'
, 'Interested in sponsoring'
, 'Automation suppliers')
AND f21.value ... -- some useful condition
ORDER BY DATE DESC
LIMIT 100;
答案 1 :(得分:1)
涉及大型表的大量连接的查询很慢,这并不奇怪。如果任何连接将内部表的多行与外部表的同一行匹配,则肯定会更糟。
假设给定的注册每个字段最多只有一个值,则至少不会发生多个匹配。您也可以在没有这么多连接的情况下重写查询 - 实际上只有一个连接。像这样:
SELECT *
FROM (
SELECT
reg_event_id
,to_char(DATE, 'MM/DD/YYYY HH:MI:SS AM') AS "Date"
,email AS "Email"
,classification AS "Classification"
,sent_to AS "Leads Sent To"
,delivery_method AS "Delivery Method"
,sent_date AS "Date Lead Sent"
,source AS "Source"
FROM reg_event_uniques
WHERE cid = 3741
AND CASE classification
WHEN 'End user or machine builder' THEN 1
WHEN 'Interested in sponsoring' THEN 1
WHEN 'Automation suppliers' THEN 1
ELSE 0
END
ORDER BY reg_event_uniques.date DESC
LIMIT 100
) uniq
LEFT JOIN (
SELECT
reg_id
,MAX(CASE field_id WHEN 1 THEN value END) AS "f1value"
,MAX(CASE field_id WHEN 2 THEN value END) AS "f2value"
,MAX(CASE field_id WHEN 3 THEN value END) AS "f3value"
-- ...
GROUP BY reg_id
) fields
ON fields.reg_id = uniq.reg_event_id
由于整个查询的过滤条件仅依赖于reg_event_uniques
表中的列,并且由于该表是原始查询中最外层的表,因此该版本使用内联视图仅选择{的行{1}}有助于最终结果。请注意,它按基表的reg_event_uniques
列排序,而不是按结果的date
列排序,因为后者将产生错误的顺序(词典)。此内联视图可能有用,也可能没用,因为如果查询规划器足够聪明,它可以对原始查询执行相同的重组。
主要的进步是将字段表转换为每个date
只有一行的行集,并将所有相应的字段值作为单独的列包含在内。为此,它使用带有聚合查询的内联视图。其中reg_id
函数的使用不是特别重要,因为对于每列,它将确定最多一个非NULL值的最大值。 MAX()
函数可能已被用于完全相同的效果。