如何提高MySQL

时间:2015-07-21 00:00:33

标签: mysql database performance subquery entity-attribute-value

我有一个使用EAV model生成属性的CRM系统。问题是您可能非常清楚EAV模型需要复杂的查询来提取数据。每个属性都必须在单独的列中返回。

使用子查询时,MySQL性能很糟糕。我必须找到一种更好的方法来编写我的查询,方法是使用给予where子句,排序顺序和限制“if any”来分析它们。

通过子查询,我正在裁判一个看起来像这样的查询

SELECT a.account_name, a.account_type, a.status, a.account_id, s.fieldValue, s2.last_training_on, s3.fieldValue
FROM accounts AS a
INNER JOIN clients AS c ON c.client_id = a.client_id
LEFT JOIN (
    SELECT p.related_to AS account_id, decimal_value AS fieldValue
    FROM df_answers_text AS p
    INNER JOIN df_field_to_client_relation AS r ON r.field_id = p.field_id
    WHERE p.field_id = '19' AND r.client_id = '7';
) AS s ON s.account_id = a.account_id
LEFT JOIN (
    SELECT p.related_to AS account_id, datetime_value AS last_training_on
    FROM df_answers_text AS p
    INNER JOIN df_field_to_client_relation AS r ON r.field_id = p.field_id
    WHERE p.field_id = '10' AND r.client_id = '7';
) AS s2 ON s2.account_id = a.account_id
LEFT JOIN (
    SELECT
      p.related_to
    , CAST(GROUP_CONCAT(o.label SEPARATOR " | ") AS CHAR(255)) AS fieldValue
    FROM df_answer_predefined AS p
    INNER JOIN df_fields_options AS o ON o.option_id = p.option_id
    INNER JOIN df_field_to_client_relation AS r ON r.field_id = o.field_id
    WHERE o.is_place_holder = 0 AND o.field_id = '16' AND r.field_id = '16' AND r.client_id = '7'
    GROUP BY p.related_to;
) AS s3 ON s3.related_to = a.account_id
WHERE c.client_id = '7' AND c.status = 'Active' AND ( a.account_type = 'TEST' OR a.account_type = 'VALUE' OR s2.last_training_on > '2015-01-01 00:00:00') AND (s.fieldValue = 'Medium' OR s.fieldValue = 'Low' OR a.expType = 'Very High')
ORDER BY a.account_name
LIMIT 500;

我考虑使用MEMORY引擎创建一个临时表,其中包含子查询的内容,如

CREATE TEMPORARY TABLE s (KEY(account_id, fieldValue)) ENGINE = MEMORY
SELECT p.related_to AS account_id, decimal_value AS fieldValue
FROM df_answers_text AS p
INNER JOIN df_field_to_client_relation AS r ON r.field_id = p.field_id
WHERE p.field_id = '19' AND r.client_id = '7';

CREATE TEMPORARY TABLE s2 (KEY(account_id, INDEX USING BTREE last_training_on)) ENGINE = MEMORY
SELECT p.related_to AS account_id, datetime_value AS last_training_on
FROM df_answers_text AS p
INNER JOIN df_field_to_client_relation AS r ON r.field_id = p.field_id
WHERE p.field_id = '10' AND r.client_id = '7';


    CREATE TEMPORARY TABLE s3 (KEY(related_to, fieldValue)) ENGINE = MEMORY
    SELECT
      p.related_to
    , CAST(GROUP_CONCAT(o.label SEPARATOR " | ") AS CHAR(255)) AS fieldValue
    FROM df_answer_predefined AS p
    INNER JOIN df_fields_options AS o ON o.option_id = p.option_id
    INNER JOIN df_field_to_client_relation AS r ON r.field_id = o.field_id
    WHERE o.is_place_holder = 0 AND o.field_id = '16' AND r.field_id = '16' AND r.client_id = '7'
    GROUP BY p.related_to;


    CREATE TEMPORARY TABLE s3 (KEY(related_to)) ENGINE = MEMORY
    SELECT
      p.related_to
    , CAST(GROUP_CONCAT(o.label SEPARATOR " | ") AS CHAR(255)) AS fieldValue
    FROM df_answer_predefined AS p
    INNER JOIN df_fields_options AS o ON o.option_id = p.option_id
    INNER JOIN df_field_to_client_relation AS r ON r.field_id = o.field_id
    WHERE o.is_place_holder = 0 AND o.field_id = '16' AND r.field_id = '16' AND r.client_id = '7'
    GROUP BY p.related_to;


Then my new query will look like this

    SELECT a.account_name, a.account_type, a.status, a.account_id, s.fieldValue, s2.last_training_on, s3.fieldValue
    FROM accounts AS a
    INNER JOIN clients AS c ON c.client_id = a.client_id
    LEFT JOIN s ON s.account_id = a.account_id
    LEFT JOIN s2 ON s2.account_id = a.account_id
    LEFT JOIN s3 ON s2.related_to = a.account_id
    WHERE c.client_id = '7' AND c.status = 'Active' AND ( a.account_type = 'TEST' OR a.account_type = 'VALUE' OR s2.last_training_on > '2015-01-01 00:00:00') AND (s.fieldValue = 'Medium' OR s.fieldValue = 'Low' OR a.expType = 'Very High')
    ORDER BY a.account_name
    LIMIT 500;

    DROP TEMPORARY TABLE s, s2;

我现在面临的问题是临时表将创建数据库中可用的整个数据的临时表,这会占用大量时间。但我的外部查询只查找按a.account_name排序的500条记录。如果临时表有100万条记录会浪费时间,显然会给我带来不好的表现。

我希望找到一种更好的方法将子句传递给子查询,这样我就只能创建一个包含外部查询所需数据的临时表

注意:这些查询是使用GUI动态生成的。我无法弄清楚如何提取逻辑/子句并将它们正确地传递给子查询。

问题

  • 如何查看where子句,解析它们并将它们传递给子查询以拒绝子查询中的数据量?如果调用条款“AND”然后我的生活会更容易,但由于我有一个混合或“AND”和“OR”,这是非常复杂的。
  • 是否有更好的方法解决此问题,而不是使用临时表。

EDITED     这是我的表定义

CREATE TABLE df_answer_predefined ( answer_id int(11) unsigned NOT NULL AUTO_INCREMENT, field_id int(11) unsigned DEFAULT NULL, related_to int(11) unsigned DEFAULT NULL, option_id int(11) unsigned DEFAULT NULL, created_by int(11) unsigned NOT NULL, created_on datetime DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (answer_id), UNIQUE KEY un_row (field_id,option_id,related_to), KEY field_id (field_id), KEY related_to (related_to), KEY to_delete (field_id,related_to), KEY outter_view (field_id,option_id,related_to) ) ENGINE=InnoDB AUTO_INCREMENT=4946214 DEFAULT CHARSET=utf8;

 `CREATE TABLE df_fields_options (
  option_id int(11) unsigned NOT NULL AUTO_INCREMENT,
  field_id int(11) unsigned NOT NULL,
  label varchar(255) DEFAULT NULL,
  is_place_holder tinyint(1) NOT NULL DEFAULT '0',
  is_default tinyint(1) NOT NULL DEFAULT '0',
  sort smallint(3) NOT NULL DEFAULT '1',
  status tinyint(1) NOT NULL DEFAULT '1',
  PRIMARY KEY (option_id),
  KEY i (field_id),
  KEY d (option_id,field_id,is_place_holder)
) ENGINE=InnoDB AUTO_INCREMENT=155 DEFAULT CHARSET=utf8;`


`CREATE TABLE df_field_to_client_relation (
  relation_id int(11) unsigned NOT NULL AUTO_INCREMENT,
  client_id int(11) unsigned DEFAULT NULL,
  field_id int(11) unsigned DEFAULT NULL,
  PRIMARY KEY (relation_id),
  UNIQUE KEY unique_row (field_id,client_id),
  KEY client_id (client_id),
  KEY flient_id (field_id)
) ENGINE=InnoDB AUTO_INCREMENT=26 DEFAULT CHARSET=utf8;`


`CREATE TABLE df_answers_text (
  answer_id int(11) unsigned NOT NULL AUTO_INCREMENT,
  notes varchar(20000) DEFAULT NULL,
  datetime_value datetime DEFAULT NULL,
  date_value date DEFAULT NULL,
  us_phone_number char(10) DEFAULT NULL,
  field_id int(11) unsigned DEFAULT NULL,
  related_to int(11) unsigned DEFAULT NULL,
  created_by int(11) unsigned NOT NULL,
  created_on datetime DEFAULT CURRENT_TIMESTAMP,
  modified_by int(11) DEFAULT NULL,
  modified_on datetime DEFAULT NULL,
  big_unsigned_value bigint(20) DEFAULT NULL,
  big_signed_value bigint(19) DEFAULT NULL,
  unsigned_value int(11) DEFAULT NULL,
  signed_value int(10) DEFAULT NULL,
  decimal_value decimal(18,4) DEFAULT NULL,
  PRIMARY KEY (answer_id),
  UNIQUE KEY unique_answer (field_id,related_to),
  KEY field_id (field_id),
  KEY related_to (related_to),
  KEY big_unsigned_value (big_unsigned_value),
  KEY big_signed_value (big_signed_value),
  KEY unsigned_value (unsigned_value),
  KEY signed_value (signed_value),
  KEY decimal_Value (decimal_value)
) ENGINE=InnoDB AUTO_INCREMENT=2458748 DEFAULT CHARSET=utf8;`

花费最多时间的查询是带有别名s3

的第三个子查询

以下是我们花了很长时间“2秒”的查询的执行计划

enter image description here

1 个答案:

答案 0 :(得分:1)

UNIQUE(a,b,c)
INDEX (a)

DROP INDEX,因为UNIQUE键是INDEX ,INDEX是UNIQUE的前缀。

PRIMARY KEY(d)
UNIQUE(a,b,c)

为什么要d?只需说PRIMARY KEY(a,b,c)

FROM ( SELECT ... )
JOIN ( SELECT ... ) ON ...

优化不佳(直到5.6.6)。尽可能将JOIN ( SELECT )转换为与表格的JOIN。正如您所建议的那样,使用tmp表可能更好, if 可以为tmp表添加合适的索引。最好是尽量避免使用超过一个“表”的子查询。

在多对多关系表中,不要包含表格的ID,而只包含

PRIMARY KEY (a,b),  -- for enforcing uniqueness, providing a PK, and going one direction
INDEX       (b,a)   -- for going the other way.

EXPLAIN似乎与您提供的SELECT不匹配。没有另一个,每个都没用。

可能帮助的另一种方法......而不是

SELECT ..., s2.foo, ...
    ...
    JOIN ( SELECT ... FROM x WHERE ... ) AS s2 ON s2.account_id = a.account_id

看看你是否可以将其重新表述为:

SELECT ..., 
       ( SELECT foo FROM x WHERE ... AND related = a.account_id) AS foo, ...
    ...

也就是说,将JOIN子查询替换为您需要的一个值的相关子查询。

底线是EAV模型很糟糕。

嗯...我根本没有看到这个需要,因为r在查询的其他地方没有使用......

INNER JOIN df_field_to_client_relation AS r ON r.field_id = p.field_id
    WHERE p.field_id = '19' AND r.client_id = '7'

似乎等同于

WHERE EXISTS ( SELECT * FROM df_field_to_client_relation 
               WHERE field_id = '19' AND client_id = '7' )

但为什么还要检查是否存在?