我正在设计一个新的数据库,并且注意到我的查询没有按预期扩展。当我的汇总涉及数百条记录时,我发现响应时间显着增加。我想知道我的查询是否存在严重缺陷,或者我只是没有使用正确的索引。
我对查询做了很多调整,但是还没有想出一种消除全表扫描而使用索引的方法。当我在查询中使用类似于EXPLAIN
的工具时,会看到以下内容:
表格:
CREATE TABLE `indexTable` (
`id` int(10) unsigned NOT NULL,
`userId` int(10) unsigned NOT NULL,
`col1` varbinary(320) NOT NULL,
`col2` tinyint(3) unsigned NOT NULL,
`col3` tinyint(3) unsigned NOT NULL,
`createdAt` bigint(20) unsigned NOT NULL,
`updatedAt` bigint(20) unsigned NOT NULL,
`metadata` json NOT NULL,
PRIMARY KEY (`id`,`userId`,`col1`,`col2`,`col3`),
KEY `createdAt` (`createdAt`),
KEY `id_userId_col1_col2_createdAt` (`id`,`userId`,`col1`,`col2`,`createdAt`),
KEY `col1_col2_createdAt` (`col1`,`col2`,`createdAt`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8
查询:
SELECT t1.id, t1.userId, t1.col1, t1.col2, t1.col3, t1.metadata
FROM indexTable as t1
INNER JOIN(
SELECT col1, col2, MAX(createdAt) AS maxCreatedAt
FROM indexTable
WHERE id = ? AND userId = ?
GROUP BY col1, col2
ORDER BY maxCreatedAt
LIMIT 10 OFFSET 0) AS sub
ON t1.col1 = sub.col1
AND t1.col2 = sub.col2
AND t1.createdAt = sub.maxCreatedAt
WHERE t1.id = ? AND t1.userId = ?
ORDER BY t1.createdAt;
PK:id, userId, col1, col2, col3
索引:createdAt
说明:
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "34.50"
},
"ordering_operation": {
"using_temporary_table": true,
"using_filesort": true,
"cost_info": {
"sort_cost": "10.00"
},
"nested_loop": [
{
"table": {
"table_name": "sub",
"access_type": "ALL",
"rows_examined_per_scan": 10,
"rows_produced_per_join": 10,
"filtered": "100.00",
"cost_info": {
"read_cost": "10.50",
"eval_cost": "2.00",
"prefix_cost": "12.50",
"data_read_per_join": "3K"
},
"used_columns": [
"col1",
"col2",
"maxCreatedAt"
],
"attached_condition": "(`sub`.`maxCreatedAt` is not null)",
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "10.27"
},
"ordering_operation": {
"using_filesort": true,
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": false,
"table": {
"table_name": "indexTable",
"access_type": "ref",
"possible_keys": [
"PRIMARY",
"createdAt",
"id_userId_col1_col2_createdAt",
"col1_col2_createdAt"
],
"key": "PRIMARY",
"used_key_parts": [
"id",
"userId"
],
"key_length": "8",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 46,
"rows_produced_per_join": 46,
"filtered": "100.00",
"cost_info": {
"read_cost": "1.07",
"eval_cost": "9.20",
"prefix_cost": "10.27",
"data_read_per_join": "16K"
},
"used_columns": [
"id",
"userId",
"createdAt",
"col1",
"col2",
"col3"
],
"attached_condition": "((`MyDB`.`indexTable`.`id` <=> 53) and (`MyDB`.`indexTable`.`userId` <=> 549814))"
}
}
}
}
}
}
},
{
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": [
"PRIMARY",
"createdAt",
"id_userId_col1_col2_createdAt",
"col1_col2_createdAt"
],
"key": "id_userId_col1_col2_createdAt",
"used_key_parts": [
"id",
"userId",
"col1",
"col2",
"createdAt"
],
"key_length": "339",
"ref": [
"const",
"const",
"sub.col1",
"sub.col2",
"sub.maxCreatedAt"
],
"rows_examined_per_scan": 1,
"rows_produced_per_join": 10,
"filtered": "100.00",
"cost_info": {
"read_cost": "10.00",
"eval_cost": "2.00",
"prefix_cost": "24.50",
"data_read_per_join": "3K"
},
"used_columns": [
"id",
"userId",
"createdAt",
"updatedAt",
"col1",
"col2",
"col3",
"metadata",
]
}
}
]
}
}
}
此查询查找col1
和col2
分组中的最新记录,按createdAt
排序,并将条目限制为10。
答案 0 :(得分:0)
“派生”表(子查询)需要此复合索引:
INDEX(id, userid, -- in either order
col1, col2, -- in this order
createdAt) -- to make it "covering"
使用该索引,可能可能不会进行全表扫描。但是,它将涉及一个文件排序。这是因为ORDER BY
与GROUP BY
不同,它是一个聚合。
t1
的需求
INDEX(col1, col2, -- in either order
createdAt)
sub,maxCreatedAt
-错字?
ORDER BY t1.createdAt
-另一个必要文件排序。
不要提防文件排序。尤其是当只有10行时(如第二种情况)。
没有看到SHOW CREATE TABLE
,我不能说“文件排序”和“临时表”是否完全接触了磁盘,还是在RAM中完成了。
FORCE INDEX
几乎总是一个坏主意-即使对今天有帮助,但明天可能会受到伤害。
如果需要查看过多的表,优化器将故意(正确地)使用表扫描-比在索引和数据之间跳动要快。
答案 1 :(得分:0)
我能够通过更新查询以在id
中包含userId
和GROUP BY
来解决此问题。然后,我能够加入另外两列,并且由于某种原因使MySQL使用了正确的索引。