加速与MySQL的多表连接

时间:2013-04-25 03:52:35

标签: mysql sql database innodb myisam

我有3个表,我正在尝试预先形成联接,并将结果数据插入另一个表中。查询占用15-30分钟,具体取决于数据集。我正在选择和加入的表格各自至少有25,000条记录,但很快就会增长到500k +。

我尝试在字段上添加索引,但仍然没有帮助那么多。还有其他事情我可以尝试或加入这个规模只需要花这么长时间吗?

以下是我正在尝试执行的查询:

INSERT INTO audience.topitem
(runs_id, total_training_count, item, standard_index_value, significance, seed_count, nonseed_count, prod, model_type, level_1, level_2, level_3, level_4, level_5)
SELECT 5, seed_count + nonseed_count AS total_training_count,
ii.item, standard_index_value, NULL, seed_count, nonseed_count,
standard_index_value * seed_count AS prod, 'site', topic_L1, topic_L2, topic_L3, topic_L4, topic_L5
FROM audience.item_indexes ii
LEFT JOIN audience.usercounts uc ON ii.item = uc.item AND ii.runs_id = uc.runs_id 
LEFT JOIN categorization.categorization at on ii.item = at.url
WHERE ii.runs_id = 5

表:audience.item_indexes

CREATE TABLE `item_indexes` (
`item` varchar(1024) DEFAULT NULL,
`standard_index_value` float DEFAULT NULL,
`runs_id` int(11) DEFAULT NULL,
`model_type` enum('site','term','combo') DEFAULT NULL,
KEY `item_idx` (`item`(333))
) ENGINE=MyISAM DEFAULT CHARSET=utf8; 

表:audience.usercounts

CREATE TABLE `usercounts` (
`item` varchar(1024) DEFAULT NULL,
`seed_count` int(11) DEFAULT NULL,
`nonseed_count` int(11) DEFAULT NULL,
`significance` float(19,6) DEFAULT NULL,
`runs_id` int(11) DEFAULT NULL,
`model_type` enum('site','term','combo') DEFAULT NULL,
KEY `item_idx` (`item`(333))
) ENGINE=MyISAM DEFAULT CHARSET=utf8;

表:audience.topitem

CREATE TABLE `topitem` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`total_training_count` int(11) DEFAULT NULL,
`item` varchar(1024) DEFAULT NULL,
`standard_index_value` float(19,6) DEFAULT NULL,
`significance` float(19,6) DEFAULT NULL,
`seed_count` int(11) DEFAULT NULL,
`nonseed_count` int(11) DEFAULT NULL,
`prod` float(19,6) DEFAULT NULL,
`cat_type` varchar(32) DEFAULT NULL,
`cat_level` int(11) DEFAULT NULL,
`conf` decimal(19,9) DEFAULT NULL,
`level_1` varchar(64) DEFAULT NULL,
`level_2` varchar(64) DEFAULT NULL,
`level_3` varchar(64) DEFAULT NULL,
`level_4` varchar(64) DEFAULT NULL,
`level_5` varchar(64) DEFAULT NULL,
`runs_id` int(11) DEFAULT NULL,
`model_type` enum('site','term','combo') DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=825 DEFAULT CHARSET=utf8;

表:categorization.categorization

CREATE TABLE `AT_categorization` (
`url` varchar(760) NOT NULL ,
`language` varchar(10) DEFAULT NULL,
`category` text,
`entity` text,
`source` varchar(255) DEFAULT NULL,
`topic_L1` varchar(45) NOT NULL DEFAULT '',
`topic_L2` varchar(45) NOT NULL DEFAULT '',
`topic_L3` varchar(45) NOT NULL DEFAULT '',
`topic_L4` varchar(45) NOT NULL DEFAULT '',
`topic_L5` varchar(45) NOT NULL DEFAULT '',
`last_refreshed` datetime DEFAULT NULL,
PRIMARY KEY (`url`,`topic_L1`,`topic_L2`,`topic_L3`,`topic_L4`,`topic_L5`),
UNIQUE KEY `inx_url` (`url`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;

1 个答案:

答案 0 :(得分:1)

如果添加以下索引,您的查询将运行得更快:

CREATE INDEX runs_idx ON audience.item_indexes (runs_id);

ALTER TABLE audience.usercounts
DROP INDEX item_idx,
ADD INDEX item_idx (runs_id, item(333));

此外,item_indexesutf8,但AT_categorization为latin1,这样可以防止使用任何索引。要解决此问题,请将AT_categorization更改为utf8

ALTER TABLE AT_categorization CHARSET=utf8;

最后,对于AT_categorization表,两个索引

PRIMARY KEY (`url`,`topic_L1`,`topic_L2`,`topic_L3`,`topic_L4`,`topic_L5`),
UNIQUE KEY `inx_url` (`url`)

是多余的。因此,您可以DROP这些,只需将url字段作为主键:

ALTER TABLE AT_categorization 
DROP PRIMARY KEY,
DROP KEY `inx_url`,
ADD PRIMARY KEY (url);