我几乎没有查询,也无法弄清楚如何优化它们,
QUERY 1
select *
from t_twitter_tracking
where classified is null and tweetType='ENGLISH'
order by id limit 500;
QUERY 2
Select
count(*) as cnt,
DATE_FORMAT(CONVERT_TZ(wrdTrk.createdOnGMTDate,'+00:00','+05:30'),'%Y-%m-%d')
as dat
from
t_twitter_tracking wrdTrk
where
wrdTrk.word like ('dell')
and CONVERT_TZ(wrdTrk.createdOnGMTDate,'+00:00','+05:30')
between '2010-12-12 00:00:00' and '2010-12-26 00:00:00'
group by dat;
这两个查询都在同一个表上运行,
CREATE TABLE `t_twitter_tracking` (
`id` BIGINT(20) NOT NULL AUTO_INCREMENT,
`word` VARCHAR(200) NOT NULL,
`tweetId` BIGINT(100) NOT NULL,
`twtText` VARCHAR(800) NULL DEFAULT NULL,
`language` TEXT NULL,
`links` TEXT NULL,
`tweetType` VARCHAR(20) NULL DEFAULT NULL,
`source` TEXT NULL,
`sourceStripped` TEXT NULL,
`isTruncated` VARCHAR(40) NULL DEFAULT NULL,
`inReplyToStatusId` BIGINT(30) NULL DEFAULT NULL,
`inReplyToUserId` INT(11) NULL DEFAULT NULL,
`rtUsrProfilePicUrl` TEXT NULL,
`isFavorited` VARCHAR(40) NULL DEFAULT NULL,
`inReplyToScreenName` VARCHAR(40) NULL DEFAULT NULL,
`latitude` BIGINT(100) NOT NULL,
`longitude` BIGINT(100) NOT NULL,
`retweetedStatus` VARCHAR(40) NULL DEFAULT NULL,
`statusInReplyToStatusId` BIGINT(100) NOT NULL,
`statusInReplyToUserId` BIGINT(100) NOT NULL,
`statusFavorited` VARCHAR(40) NULL DEFAULT NULL,
`statusInReplyToScreenName` TEXT NULL,
`screenName` TEXT NULL,
`profilePicUrl` TEXT NULL,
`twitterId` BIGINT(100) NOT NULL,
`name` TEXT NULL,
`location` VARCHAR(100) NULL DEFAULT NULL,
`bio` TEXT NULL,
`url` TEXT NULL COLLATE 'latin1_swedish_ci',
`utcOffset` INT(11) NULL DEFAULT NULL,
`timeZone` VARCHAR(100) NULL DEFAULT NULL,
`frenCnt` BIGINT(20) NULL DEFAULT '0',
`createdAt` DATETIME NULL DEFAULT NULL,
`createdOnGMT` VARCHAR(40) NULL DEFAULT NULL,
`createdOnServerTime` DATETIME NULL DEFAULT NULL,
`follCnt` BIGINT(20) NULL DEFAULT '0',
`favCnt` BIGINT(20) NULL DEFAULT '0',
`totStatusCnt` BIGINT(20) NULL DEFAULT NULL,
`usrCrtDate` VARCHAR(200) NULL DEFAULT NULL,
`humanSentiment` VARCHAR(30) NULL DEFAULT NULL,
`replied` BIT(1) NULL DEFAULT NULL,
`replyMsg` TEXT NULL,
`classified` INT(32) NULL DEFAULT NULL,
`createdOnGMTDate` DATETIME NULL DEFAULT NULL,
`locationDetail` TEXT NULL,
`geonameid` INT(11) NULL DEFAULT NULL,
`country` VARCHAR(255) NULL DEFAULT NULL,
`continent` CHAR(2) NULL DEFAULT NULL,
`placeLongitude` FLOAT NULL DEFAULT NULL,
`placeLatitude` FLOAT NULL DEFAULT NULL,
PRIMARY KEY (`id`),
INDEX `id` (`id`, `word`),
INDEX `createdOnGMT_index` (`createdOnGMT`) USING BTREE,
INDEX `word_index` (`word`) USING BTREE,
INDEX `location_index` (`location`) USING BTREE,
INDEX `classified_index` (`classified`) USING BTREE,
INDEX `tweetType_index` (`tweetType`) USING BTREE,
INDEX `getunclassified_index` (`classified`, `tweetType`) USING BTREE,
INDEX `timeline_index` (`word`, `createdOnGMTDate`, `classified`) USING BTREE,
INDEX `createdOnGMTDate_index` (`createdOnGMTDate`) USING BTREE,
INDEX `locdetail_index` (`country`, `id`) USING BTREE,
FULLTEXT INDEX `twtText_index` (`twtText`)
)
COLLATE='utf8_general_ci'
ENGINE=MyISAM
ROW_FORMAT=DEFAULT
AUTO_INCREMENT=12608048;
该表有超过1000万条记录。我该如何优化呢?
EDITED
解释第二个问题
“ID”; “SELECT_TYPE”; “表”, “类型”, “possible_keys”; “键”, “key_len”; “REF”, “行”, “额外” “1”;“SIMPLE”;“wrdTrk”;“range”;“word_index,word_createdOnGMT”;“word_index”;“602”; NULL;“222847”;“使用where;使用临时;使用filesort”
此致 罗希特夏尔
答案 0 :(得分:1)
在Query2中,我建议:
1.删除DATE_FORMAT和CONVERT_TZ。您可以在PHP中处理输出或在条件之间
2.喜欢('dell'):我没有看到任何'%',所以你可以使用wrdTrk.word ='dell'让它更快。
答案 1 :(得分:0)
需要删除where条件中的convert_tz,
Select
count(*) as cnt,
DATE_FORMAT(CONVERT_TZ(wrdTrk.createdOnGMTDate,'+00:00','+05:30'),'%Y-%m-%d')
as dat
from
t_twitter_tracking wrdTrk
where
wrdTrk.word like ('dell')
and CONVERT_TZ(wrdTrk.createdOnGMTDate,'+00:00','+05:30')
between '2010-12-12 00:00:00' and '2010-12-26 00:00:00'
group by dat;
这将导致比较每一行并找出正确的结果,从而大大改善了查询结果。只需将转换后的数据传递给查询。
Select
count(*) as cnt,
DATE_FORMAT(CONVERT_TZ(wrdTrk.createdOnGMTDate,'+00:00','+05:30'),'%Y-%m-%d')
as dat
from
t_twitter_tracking wrdTrk
where
wrdTrk.word like ('dell')
and CONVERT_TZ(wrdTrk.createdOnGMTDate,'+00:00','+05:30')
between '2010-12-12 00:00:00' and '2010-12-26 00:00:00'
group by dat;