使用多个连接和分组优化SQL查询(Postgres 9.3)

时间:2016-01-23 22:31:04

标签: postgresql join postgresql-9.3 postgresql-performance sql-optimization

我浏览了其他一些帖子并设法让我的查询运行得更快一些。但是,我对如何进一步优化此查询感到茫然。我将在一个网站上使用它,它会在页面加载时执行查询,但是5.5秒太长了,等待一些应该简单得多的东西。最大的表有大约4,000,000行,其他的大约有400,000行。

表格结构

匹配

class CategoryViewController: UIViewController, UITableViewDataSource, UITableViewDelegate {

    private let themeColors = ThemeColors()
    private let expensesOrganizer = ExpensesOrganizer()

    override func viewDidLoad() {
        super.viewDidLoad()

        //Set up subCategory table view
        subCategoryTableView.dataSource = self
        subCategoryTableView.delegate = self
    }

    // MARK: UITableViewDataSource

    func tableView(tableView: UITableView, numberOfRowsInSection section: Int) -> Int {
        return expensesOrganizer.getNumOfSubcategoriesFor(category!)
    }

    func tableView(tableView: UITableView, cellForRowAtIndexPath indexPath: NSIndexPath) -> UITableViewCell {
        let subcategoryCell = tableView.dequeueReusableCellWithIdentifier("subCategoryCell", forIndexPath: indexPath) as! SubcategoryTableViewCell
        let subcategory = expensesOrganizer.getSubcategoryFor(category!, index: indexPath.row)
        subcategoryCell.subCategoryLabel.text = "\(indexPath.row) \(expensesOrganizer.getText(subcategory.rawValue))"
        subcategoryCell.selectedBackgroundView = UIView(frame: CGRect.zero)
        subcategoryCell.selectedBackgroundView?.backgroundColor = themeColors.getColorOfCategory(category!)

        return subcategoryCell
    }

    // MARK: UITableViewDelegate

    var indexPathSelectedCell: NSIndexPath?

    func tableView(tableView: UITableView, didSelectRowAtIndexPath indexPath: NSIndexPath) {
        let subcategoryCell = tableView.cellForRowAtIndexPath(indexPath) as! SubcategoryTableViewCell
        subcategoryCell.subCategoryLabel.textColor = UIColor.redColor()
        subcategoryCell.subCategoryLabel.text = "\(indexPath.row) didSELECTRowAtIndexPath called"
        indexPathSelectedCell = indexPath

       //What the post said to add:
        let selectedRows = subCategoryTableView.indexPathsForSelectedRows
        for i in selectedRows! {
            if !i.isEqual(indexPath){
                subCategoryTableView.deselectRowAtIndexPath(i, animated: false)
            }
        }
    }

    func tableView(tableView: UITableView, didDeselectRowAtIndexPath indexPath: NSIndexPath) {
        let subcategoryCell = tableView.cellForRowAtIndexPath(indexPath) as! SubcategoryTableViewCell
        subcategoryCell.subCategoryLabel.textColor = themeColors.getFontColor(Shade.Light)
        subcategoryCell.subCategoryLabel.text = "\(indexPath.row) didDESELECTRowAtIndexPath called"
    }

id BIGINT PRIMARY KEY,
region TEXT,
matchType TEXT,
matchVersion TEXT

冠军

matchid BIGINT REFERENCES match(id),
id INTEGER,
PRIMARY KEY(matchid, id),
winner TEXT

项目

id INTEGER PRIMARY KEY,
version TEXT,
name TEXT

参与者

id INTEGER PRIMARY KEY,
name TEXT

查询

PRIMARY KEY(matchid, id),
id INTEGER NOT NULL,
matchid BIGINT REFERENCES match(id),
championid INTEGER REFERENCES champion(id),
teamid INTEGER,
FOREIGN KEY (matchid, teamid) REFERENCES team(matchid, id),
magicDamageDealtToChampions REAL,
damageDealtToChampions REAL,
item0 TEXT,
item1 TEXT,
item2 TEXT,
item3 TEXT,
item4 TEXT,
item5 TEXT,
highestAchievedSeasonTier TEXT

select champion.name, sum(case when participant.item0 = '3285' then 1::int8 else 0::int8 end) as it0, sum(case when participant.item1 = '3285' then 1::int8 else 0::int8 end) as it1, sum(case when participant.item2 = '3285' then 1::int8 else 0::int8 end) as it2, sum(case when participant.item3 = '3285' then 1::int8 else 0::int8 end) as it3, sum(case when participant.item4 = '3285' then 1::int8 else 0::int8 end) as it4, sum(case when participant.item5 = '3285' then 1::int8 else 0::int8 end) as it5 from participant left join champion on champion.id = participant.championid left join team on team.matchid = participant.matchid and team.id = participant.teamid left join match on match.id = participant.matchid where (team.winner = 'True' and matchversion = '5.14' and matchtype='RANKED_SOLO_5x5') group by champion.name; 的输出:http://explain.depesz.com/s/ZYX

到目前为止我做了什么

我在EXPLAIN ANALYZEmatch.region创建了单独的索引,并在团队participant.championid上创建了部分索引(因为这只是我感兴趣的内容)。请注意where winner = 'True'因为它离开查询时非常慢。从本质上讲,我试图获得的结果是这样的:

enable_seqscan = on

由于我仍然是数据库设计的初学者,如果我的整体表格设计存在缺陷,我不会感到惊讶。不过,我仍然倾向于查询绝对低效。我玩过内连接和左连接 - 虽然没有显着差异。此外,匹配需要为Champion |item0 | item1 | ... | item5 champ_name | num | num1 | ... | num5 ... (或大于bigint的匹配,因为它太小了。)

2 个答案:

答案 0 :(得分:4)

数据库设计

我建议:

CREATE TABLE matchversion (
  matchversion_id int PRIMARY KEY
, matchversion    text UNIQUE NOT NULL
);

CREATE TABLE matchtype (
  matchtype_id int PRIMARY KEY
, matchtype    text UNIQUE NOT NULL
);

CREATE TABLE region (
  region_id int PRIMARY KEY
, region    text NOT NULL
);

CREATE TABLE match (
  match_id        bigint PRIMARY KEY
, region_id       int REFERENCES region
, matchtype_id    int REFERENCES matchtype
, matchversion_id int REFERENCES matchversion
);

CREATE TABLE team (
  match_id bigint REFERENCES match
, team_id  integer  -- better name !
, winner   boolean  -- ?!
, PRIMARY KEY(match_id, team_id)
);

CREATE TABLE champion (
  champion_id int PRIMARY KEY
, version     text
, name        text
);

CREATE TABLE participant (
  participant_id serial PRIMARY KEY -- use proper name !
, champion_id    int NOT NULL REFERENCES champion
, match_id       bigint NOT NULL REFERENCES match -- this FK might be redundant
, team_id        int
, magic_damage_dealt_to_champions real
, damage_dealt_to_champions       real
, item0      text  -- or integer ??
, item1      text
, item2      text
, item3      text
, item4      text
, item5      text
, highest_achieved_season_tier text  -- integer ??
, FOREIGN KEY (match_id, team_id) REFERENCES team
);
  • 更多规范化,以获得更小的表和索引以及更快的访问。为matchversionmatchtyperegion创建查找表,只在match中写一个小整数ID。

  • 似乎列participant.item0 .. item5highestAchievedSeasonTier可能是integer,但定义为text

  • team.winner似乎是boolean,但定义为text

  • 我还更改了列的顺序以提高效率。详细说明:

查询

基于上述修改以及Postgres 9.3:

SELECT c.name, *
FROM  (
   SELECT p.champion_id
        , count(p.item0 = '3285' OR NULL) AS it0
        , count(p.item1 = '3285' OR NULL) AS it1
        , count(p.item2 = '3285' OR NULL) AS it2
        , count(p.item3 = '3285' OR NULL) AS it3
        , count(p.item4 = '3285' OR NULL) AS it4
        , count(p.item5 = '3285' OR NULL) AS it5
   FROM   matchversion   mv  
   CROSS  JOIN matchtype mt
   JOIN   match          m  USING (matchtype_id, matchversion_id)
   JOIN   team           t  USING (match_id)
   JOIN   participant    p  USING (match_id, team_id)
   WHERE  mv.matchversion = '5.14'
   AND    mt.matchtype = 'RANKED_SOLO_5x5'
   AND    t.winner = 'True' -- should be boolean
   GROUP  BY p.champion_id
   ) p
JOIN  champion c USING (champion_id);  -- probably just JOIN ?
  • 由于champion.name未定义UNIQUE,因此它可能错误GROUP BY。它效率也很低。请改用participant.championid(如果您需要结果中的名称,请稍后加入champion

  • LEFT JOIN的所有实例都毫无意义,因为无论如何你都在左表中有谓词和/或使用GROUP BY中的列。

  • 不需要围绕AND - WHERE条件的括号。

  • 在Postgres 9.4或更高版本中,您可以使用新的聚合FILTER语法。细节和备选方案:

索引

您已经拥有的team部分索引应该如下所示,以允许仅索引扫描:

CREATE INDEX on team (matchid, id) WHERE winner -- boolean

但是从我看到的情况来看,您可能只需向winner添加participant列,然后完全删除表team(除非有更多内容)。

此外,该索引会有很大帮助,因为(从您的查询计划中得知)该表有800k行,其中一半符合条件:

rows=399999 ... Filter: (winner = 'True'::text) ... Rows Removed by Filter: 399999

当你有更多不同的匹配类型和匹配项时,match上的这个索引会有所帮助(稍后):

CREATE INDEX on match (matchtype_id, matchversion_id, match_id);

尽管如此,当100k行符合400k时,该索引仅对仅索引扫描有用。否则,顺序扫描会更快。索引通常需要支付约5%或更少的费用。

您的主要问题是您显然正在使用几乎不现实的数据分发来运行测试用例。通过更具选择性的谓词,可以更容易地使用索引。

除了

确保您拥有configured basic Postgres settings like random_page_cost or work_mem etc.

enable_seqscan = on不言而喻。这只是关闭调试或本地作为绝望的绝望措施。

答案 1 :(得分:1)

我试着用 count(*)filter(其中item0 ='3285')as it0

为你的计数而不是总和。

另外,你为什么要离开加入你的最后2个表,然后有一个where语句。这违背了目的,并且常规的内部联接更快

select champion.name,
count(*) filter( where participant.item0 = 3285) as it0,
count(*) filter( where participant.item1 = 3285) as it1,
count(*) filter( where participant.item2 = 3285) as it2,
count(*) filter( where participant.item3 = 3285) as it3,
count(*) filter( where participant.item4 = 3285) as it4,
count(*) filter( where participant.item5 = 3285) as it5
from participant
join champion on champion.id = participant.championid
join team on team.matchid = participant.matchid and team.id = participant.teamid
join match on match.id = participant.matchid
where (team.winner = 'True' and matchversion = '5.14'  and matchtype='RANKED_SOLO_5x5')
group by champion.name;