我浏览了其他一些帖子并设法让我的查询运行得更快一些。但是,我对如何进一步优化此查询感到茫然。我将在一个网站上使用它,它会在页面加载时执行查询,但是5.5秒太长了,等待一些应该简单得多的东西。最大的表有大约4,000,000行,其他的大约有400,000行。
表格结构
匹配
class CategoryViewController: UIViewController, UITableViewDataSource, UITableViewDelegate {
private let themeColors = ThemeColors()
private let expensesOrganizer = ExpensesOrganizer()
override func viewDidLoad() {
super.viewDidLoad()
//Set up subCategory table view
subCategoryTableView.dataSource = self
subCategoryTableView.delegate = self
}
// MARK: UITableViewDataSource
func tableView(tableView: UITableView, numberOfRowsInSection section: Int) -> Int {
return expensesOrganizer.getNumOfSubcategoriesFor(category!)
}
func tableView(tableView: UITableView, cellForRowAtIndexPath indexPath: NSIndexPath) -> UITableViewCell {
let subcategoryCell = tableView.dequeueReusableCellWithIdentifier("subCategoryCell", forIndexPath: indexPath) as! SubcategoryTableViewCell
let subcategory = expensesOrganizer.getSubcategoryFor(category!, index: indexPath.row)
subcategoryCell.subCategoryLabel.text = "\(indexPath.row) \(expensesOrganizer.getText(subcategory.rawValue))"
subcategoryCell.selectedBackgroundView = UIView(frame: CGRect.zero)
subcategoryCell.selectedBackgroundView?.backgroundColor = themeColors.getColorOfCategory(category!)
return subcategoryCell
}
// MARK: UITableViewDelegate
var indexPathSelectedCell: NSIndexPath?
func tableView(tableView: UITableView, didSelectRowAtIndexPath indexPath: NSIndexPath) {
let subcategoryCell = tableView.cellForRowAtIndexPath(indexPath) as! SubcategoryTableViewCell
subcategoryCell.subCategoryLabel.textColor = UIColor.redColor()
subcategoryCell.subCategoryLabel.text = "\(indexPath.row) didSELECTRowAtIndexPath called"
indexPathSelectedCell = indexPath
//What the post said to add:
let selectedRows = subCategoryTableView.indexPathsForSelectedRows
for i in selectedRows! {
if !i.isEqual(indexPath){
subCategoryTableView.deselectRowAtIndexPath(i, animated: false)
}
}
}
func tableView(tableView: UITableView, didDeselectRowAtIndexPath indexPath: NSIndexPath) {
let subcategoryCell = tableView.cellForRowAtIndexPath(indexPath) as! SubcategoryTableViewCell
subcategoryCell.subCategoryLabel.textColor = themeColors.getFontColor(Shade.Light)
subcategoryCell.subCategoryLabel.text = "\(indexPath.row) didDESELECTRowAtIndexPath called"
}
队
id BIGINT PRIMARY KEY,
region TEXT,
matchType TEXT,
matchVersion TEXT
冠军
matchid BIGINT REFERENCES match(id),
id INTEGER,
PRIMARY KEY(matchid, id),
winner TEXT
项目
id INTEGER PRIMARY KEY,
version TEXT,
name TEXT
参与者
id INTEGER PRIMARY KEY,
name TEXT
查询
PRIMARY KEY(matchid, id),
id INTEGER NOT NULL,
matchid BIGINT REFERENCES match(id),
championid INTEGER REFERENCES champion(id),
teamid INTEGER,
FOREIGN KEY (matchid, teamid) REFERENCES team(matchid, id),
magicDamageDealtToChampions REAL,
damageDealtToChampions REAL,
item0 TEXT,
item1 TEXT,
item2 TEXT,
item3 TEXT,
item4 TEXT,
item5 TEXT,
highestAchievedSeasonTier TEXT
select champion.name,
sum(case when participant.item0 = '3285' then 1::int8 else 0::int8 end) as it0,
sum(case when participant.item1 = '3285' then 1::int8 else 0::int8 end) as it1,
sum(case when participant.item2 = '3285' then 1::int8 else 0::int8 end) as it2,
sum(case when participant.item3 = '3285' then 1::int8 else 0::int8 end) as it3,
sum(case when participant.item4 = '3285' then 1::int8 else 0::int8 end) as it4,
sum(case when participant.item5 = '3285' then 1::int8 else 0::int8 end) as it5
from participant
left join champion
on champion.id = participant.championid
left join team
on team.matchid = participant.matchid and team.id = participant.teamid
left join match
on match.id = participant.matchid
where (team.winner = 'True' and matchversion = '5.14' and matchtype='RANKED_SOLO_5x5')
group by champion.name;
的输出:http://explain.depesz.com/s/ZYX
到目前为止我做了什么
我在EXPLAIN ANALYZE
,match.region
创建了单独的索引,并在团队participant.championid
上创建了部分索引(因为这只是我感兴趣的内容)。请注意where winner = 'True'
因为它离开查询时非常慢。从本质上讲,我试图获得的结果是这样的:
enable_seqscan = on
由于我仍然是数据库设计的初学者,如果我的整体表格设计存在缺陷,我不会感到惊讶。不过,我仍然倾向于查询绝对低效。我玩过内连接和左连接 - 虽然没有显着差异。此外,匹配需要为Champion |item0 | item1 | ... | item5
champ_name | num | num1 | ... | num5
...
(或大于bigint
的匹配,因为它太小了。)
答案 0 :(得分:4)
我建议:
CREATE TABLE matchversion (
matchversion_id int PRIMARY KEY
, matchversion text UNIQUE NOT NULL
);
CREATE TABLE matchtype (
matchtype_id int PRIMARY KEY
, matchtype text UNIQUE NOT NULL
);
CREATE TABLE region (
region_id int PRIMARY KEY
, region text NOT NULL
);
CREATE TABLE match (
match_id bigint PRIMARY KEY
, region_id int REFERENCES region
, matchtype_id int REFERENCES matchtype
, matchversion_id int REFERENCES matchversion
);
CREATE TABLE team (
match_id bigint REFERENCES match
, team_id integer -- better name !
, winner boolean -- ?!
, PRIMARY KEY(match_id, team_id)
);
CREATE TABLE champion (
champion_id int PRIMARY KEY
, version text
, name text
);
CREATE TABLE participant (
participant_id serial PRIMARY KEY -- use proper name !
, champion_id int NOT NULL REFERENCES champion
, match_id bigint NOT NULL REFERENCES match -- this FK might be redundant
, team_id int
, magic_damage_dealt_to_champions real
, damage_dealt_to_champions real
, item0 text -- or integer ??
, item1 text
, item2 text
, item3 text
, item4 text
, item5 text
, highest_achieved_season_tier text -- integer ??
, FOREIGN KEY (match_id, team_id) REFERENCES team
);
更多规范化,以获得更小的表和索引以及更快的访问。为matchversion
,matchtype
和region
创建查找表,只在match
中写一个小整数ID。
似乎列participant.item0
.. item5
和highestAchievedSeasonTier
可能是integer
,但定义为text
?
列team.winner
似乎是boolean
,但定义为text
。
我还更改了列的顺序以提高效率。详细说明:
基于上述修改以及Postgres 9.3:
SELECT c.name, *
FROM (
SELECT p.champion_id
, count(p.item0 = '3285' OR NULL) AS it0
, count(p.item1 = '3285' OR NULL) AS it1
, count(p.item2 = '3285' OR NULL) AS it2
, count(p.item3 = '3285' OR NULL) AS it3
, count(p.item4 = '3285' OR NULL) AS it4
, count(p.item5 = '3285' OR NULL) AS it5
FROM matchversion mv
CROSS JOIN matchtype mt
JOIN match m USING (matchtype_id, matchversion_id)
JOIN team t USING (match_id)
JOIN participant p USING (match_id, team_id)
WHERE mv.matchversion = '5.14'
AND mt.matchtype = 'RANKED_SOLO_5x5'
AND t.winner = 'True' -- should be boolean
GROUP BY p.champion_id
) p
JOIN champion c USING (champion_id); -- probably just JOIN ?
由于champion.name
未定义UNIQUE
,因此它可能错误到GROUP BY
。它效率也很低。请改用participant.championid
(如果您需要结果中的名称,请稍后加入champion
。
LEFT JOIN
的所有实例都毫无意义,因为无论如何你都在左表中有谓词和/或使用GROUP BY
中的列。
不需要围绕AND
- WHERE
条件的括号。
在Postgres 9.4或更高版本中,您可以使用新的聚合FILTER
语法。细节和备选方案:
您已经拥有的team
部分索引应该如下所示,以允许仅索引扫描:
CREATE INDEX on team (matchid, id) WHERE winner -- boolean
但是从我看到的情况来看,您可能只需向winner
添加participant
列,然后完全删除表team
(除非有更多内容)。
此外,该索引不会有很大帮助,因为(从您的查询计划中得知)该表有800k行,其中一半符合条件:
rows=399999 ... Filter: (winner = 'True'::text) ... Rows Removed by Filter: 399999
当你有更多不同的匹配类型和匹配项时,match
上的这个索引会有所帮助(稍后):
CREATE INDEX on match (matchtype_id, matchversion_id, match_id);
尽管如此,当100k行符合400k时,该索引仅对仅索引扫描有用。否则,顺序扫描会更快。索引通常需要支付约5%或更少的费用。
您的主要问题是您显然正在使用几乎不现实的数据分发来运行测试用例。通过更具选择性的谓词,可以更容易地使用索引。
确保您拥有configured basic Postgres settings like random_page_cost
or work_mem
etc.
enable_seqscan = on
不言而喻。这只是关闭调试或本地作为绝望的绝望措施。
答案 1 :(得分:1)
我试着用 count(*)filter(其中item0 ='3285')as it0
为你的计数而不是总和。
另外,你为什么要离开加入你的最后2个表,然后有一个where语句。这违背了目的,并且常规的内部联接更快
select champion.name,
count(*) filter( where participant.item0 = 3285) as it0,
count(*) filter( where participant.item1 = 3285) as it1,
count(*) filter( where participant.item2 = 3285) as it2,
count(*) filter( where participant.item3 = 3285) as it3,
count(*) filter( where participant.item4 = 3285) as it4,
count(*) filter( where participant.item5 = 3285) as it5
from participant
join champion on champion.id = participant.championid
join team on team.matchid = participant.matchid and team.id = participant.teamid
join match on match.id = participant.matchid
where (team.winner = 'True' and matchversion = '5.14' and matchtype='RANKED_SOLO_5x5')
group by champion.name;