0、关键字
- mysql
- explain
- optimizer_trace
- 成本
- 全表扫描
SELECT count(*) FROM table;
SELECT count(1) FROM table;
1、sql执行成本
sql查询是否选择索引还是全表,执行引擎会通过成本核算出最佳的方式,如果全表扫描成本最低,则使用全表扫描。mysql执行成本组成如下:
- IO成本:即从磁盘把数据加载到内存的成本,默认情况下,读取数据页的IO成本是1,mysql是以页的形式读取数据。即当用到某个数据时,并非仅读取该数据,会把这个数据相邻的数据一起读到内存中,这就是有名的程序局部性原理,所以mysql每次会读取一整页数据(一页大小16kb),一页成本即1。
- CPU成本:数据读入内存后,还要检测数据是否满足条件和排序等CPU操作成本,显然与行数有关,默认情况下,检查成本0.2。
成本核算公式如下:
cost = rows*0.2 + data_length/(1024*16)
在了解mysql查询成本核算公式后,计算下table表的全表扫描成本,通过以下命令查询table表属性信息。
show table status like 'table'
Name | Engine | Version | Row_format | Rows | Avg_row_length | Data_length | Max_data_length | Index_length | Data_free | Auto_increment | Create_time | Update_time | Check_time | Collation | Checksum | Create_options | Comment |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
table | InnoDB | 10 | Dynamic | 8139 | 324 | 2637824 | 0 | 0 | 4194304 | 200000000000008199 | 2020-06-03 16:56:50 | 2020-06-03 11:24:01 | utf8_general_ci | 样例信息表 |
依据查询结果rows、data_length可以计算出全表扫描成本,如下:
2、sql执行过程跟踪
sql优化工具使用,开启optimizer_trace
set optimizer_trace = "enabled=on";
explain select count(*) from table;
select * from information_schema.`OPTIMIZER_TRACE`;
set optimizer_trace = "enabled=off";
通过对执行计划过程跟踪,查看其执行成本。
{
"steps": [
{
"join_preparation": {
"select#": 1,
"steps": [
{
"expanded_query": "/* select#1 */ select count(0) AS `count(*)` from `table` limit 0,200"
}
]
}
},
{
"join_optimization": {
"select#": 1,
"steps": [
{
"table_dependencies": [
{
"table": "`table`",
"row_may_be_null": false,
"map_bit": 0,
"depends_on_map_bits": [
]
}
]
},
{
"rows_estimation": [
{
"table": "`table`",
"table_scan": {
"rows": 8139,
"cost": 161
}
}
]
},
{
"considered_execution_plans": [
{
"plan_prefix": [
],
"table": "`table`",
"best_access_path": {
"considered_access_paths": [
{
"rows_to_scan": 8139,
"access_type": "scan",
"resulting_rows": 8139,
"cost": 1788.8,
"chosen": true
}
]
},
"condition_filtering_pct": 100,
"rows_for_plan": 8139,
"cost_for_plan": 1788.8,
"chosen": true
}
]
},
{
"attaching_conditions_to_tables": {
"original_condition": null,
"attached_conditions_computation": [
],
"attached_conditions_summary": [
{
"table": "`table`",
"attached": null
}
]
}
},
{
"refine_plan": [
{
"table": "`table`"
}
]
}
]
}
},
{
"join_execution": {
"select#": 1,
"steps": [
]
}
}
]
}
3、具体场景分析
序号 | SQL | 执行计划 |
---|---|---|
1 | select created_date from table where enabled_flag = ‘Y’ order by created_date desc; | 全表扫描 |
2 | select created_date from table force index(created_date_idx) where enabled_flag = ‘Y’ order by created_date desc; | 使用索引 |
备注:table表created_date字段已创建索引。
3.1、select created_date from table where enabled_flag = ‘Y’ order by created_date desc;
explain select created_date from table where enabled_flag = ‘Y’ order by created_date desc;
查看执行计划,该sql为全表(All)扫描。成本为1786.2。
{
"considered_execution_plans": [
{
"plan_prefix": [
],
"table": "`do_task_ver`",
"best_access_path": {
"considered_access_paths": [
{
"rows_to_scan": 8126,
"access_type": "scan",
"resulting_rows": 812.6,
"cost": 1786.2,
"chosen": true,
"use_tmp_table": true
}
]
},
"condition_filtering_pct": 100,
"rows_for_plan": 812.6,
"cost_for_plan": 1786.2,
"sort_cost": 812.6,
"new_cost_for_plan": 2598.8,
"chosen": true
}
]
}
3.2、select created_date from table force index(created_date_idx) where enabled_flag = ‘Y’ order by created_date desc;
explain select created_date from table force index(created_date_idx) where enabled_flag = ‘Y’ order by created_date desc;
查看执行计划,该sql为使用了索引index(created_date_idx)。成本为9752.2。
{
"considered_execution_plans": [
{
"plan_prefix": [
],
"table": "`do_task_ver` FORCE INDEX (`created_date_idx`)",
"best_access_path": {
"considered_access_paths": [
{
"rows_to_scan": 8126,
"access_type": "scan",
"resulting_rows": 812.6,
"cost": 9752.2,
"chosen": true,
"use_tmp_table": true
}
]
},
"condition_filtering_pct": 100,
"rows_for_plan": 812.6,
"cost_for_plan": 9752.2,
"sort_cost": 812.6,
"new_cost_for_plan": 10565,
"chosen": true
}
]
}
通过对比以上两sql,发现使用索引的成本大于全表扫描成本,因此默认情况select created_date from table where enabled_flag = ‘Y’ order by created_date desc;语句不会使用索引。
mysql选择的执行计划未必是最佳的,原因大多为行数统计不准确,mysql认为的最优是成本最低,我们认为最优是时间最短。
本篇文章仅一个浅显的记录,若想了解更深入,需要参考更多资料。