【GreatSQL 优化器 -17】DYNAMIC RANGE
- 2025-03-19 福建
本文字数:12405 字
阅读完需:约 41 分钟
【GreatSQL 优化器-17】DYNAMIC RANGE
一、DYNAMIC RANGE 介绍
GreatSQL 的优化器有一种扫描方式是动态范围扫描方式,类似于“已读乱回”模式,这种模式是在表有多个索引的情况下,对驱动表连接的时候部分选择索引的情况。优化器没有找到好的索引可以使用,但发现在知道前面表的列值后,可能会使用某些索引。对于前面表中的每个行组合,优化器检查是否可以使用 range 或 index merge 访问方法来检索行。虽然这不是很快,但比执行完全没有索引的连接要快。
下面用一个简单的例子来说明直方图怎么应用在优化器。
CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 INT,date1 DATETIME);
INSERT INTO t1 VALUES (1,10,'2021-03-25 16:44:00.123456'),(2,1,'2022-03-26 16:44:00.123456'),(3,4,'2023-03-27 16:44:00.123456'),(5,5,'2024-03-25 16:44:00.123456'),(7,null,'2020-03-25 16:44:00.123456'),(8,10,'2020-10-25 16:44:00.123456'),(11,16,'2023-03-25 16:44:00.123456');
CREATE TABLE t2 (cc1 INT PRIMARY KEY, cc2 INT);
INSERT INTO t2 VALUES (1,3),(2,1),(3,2),(4,3),(5,15);
CREATE TABLE t3 (ccc1 INT, ccc2 varchar(100));
INSERT INTO t3 VALUES (1,'aa1'),(2,'bb1'),(3,'cc1'),(4,'dd1'),(null,'ee');
CREATE INDEX idx1 ON t1(c2);
CREATE INDEX idx2 ON t1(c2,date1);
CREATE INDEX idx2_1 ON t2(cc2);
CREATE INDEX idx3_1 ON t3(ccc1);
greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5;
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
| 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | NULL |
| 1 | SIMPLE | t1 | NULL | ALL | PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 43.67 | Range checked for each record (index map: 0x7) |
-- 这里的结果出现了Range checked,说明进行了DYNAMIC_RANGE
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
greatsql> SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5;
+----+------+---------------------+------+------+
| c1 | c2 | date1 | ccc1 | ccc2 |
+----+------+---------------------+------+------+
| 1 | 10 | 2021-03-25 16:44:00 | 1 | aa1 |
| 2 | 1 | 2022-03-26 16:44:00 | 1 | aa1 |
| 3 | 4 | 2023-03-27 16:44:00 | 1 | aa1 |
| 2 | 1 | 2022-03-26 16:44:00 | 2 | bb1 |
| 3 | 4 | 2023-03-27 16:44:00 | 2 | bb1 |
| 2 | 1 | 2022-03-26 16:44:00 | 3 | cc1 |
| 3 | 4 | 2023-03-27 16:44:00 | 3 | cc1 |
| 2 | 1 | 2022-03-26 16:44:00 | 4 | dd1 |
| 3 | 4 | 2023-03-27 16:44:00 | 4 | dd1 |
| 2 | 1 | 2022-03-26 16:44:00 | NULL | ee |
| 3 | 4 | 2023-03-27 16:44:00 | NULL | ee |
+----+------+---------------------+------+------+
greatsql> SELECT * FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
"attaching_conditions_to_tables": {
"original_condition": "((`t1`.`c1` = `t3`.`ccc1`) or (`t1`.`c2` < 5))",
"attached_conditions_computation": [
{
"table": "`t1`",
"rechecking_index_usage": { -- 这里对t1进行了recheck
"recheck_reason": "not_first_table",
"range_analysis": {
"table_scan": {
"rows": 7,
"cost": 3.05
},
"potential_range_indexes": [
{
"index": "PRIMARY",
"usable": true,
"key_parts": [
"c1"
]
},
{
"index": "idx1",
"usable": true,
"key_parts": [
"c2",
"c1"
]
},
{
"index": "idx2",
"usable": true,
"key_parts": [
"c2",
"date1",
"c1"
]
}
],
"best_covering_index_scan": {
"index": "idx2",
"cost": 0.952742,
"chosen": true
},
"setup_range_conditions": [
],
"group_index_range": {
"chosen": false,
"cause": "not_single_table"
},
"skip_scan_range": {
"chosen": false,
"cause": "not_single_table"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
],
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
}
},
"analyzing_index_merge_union": [
{
"indexes_to_merge": [
{
"range_scan_alternatives": [
{
"index": "PRIMARY",
"chosen": false,
"cause": "depends_on_unread_values"
}
],
"chosen": false,
"cause": "cost"
},
{
"range_scan_alternatives": [
{
"index": "idx1",
"ranges": [
"NULL < c2 < 5"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 2,
"cost": 0.460274,
"chosen": true
},
{
"index": "idx2",
"ranges": [
"NULL < c2 < 5"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 2,
"cost": 0.460457,
"chosen": false,
"cause": "cost"
}
],
"chosen": false,
"cause": "cost"
}
],
"cost_of_reading_ranges": 0,
"chosen": false,
"cause": "cost"
}
]
}
}
}
],
"attached_conditions_summary": [
{
"table": "`t3`",
"attached": null
},
{
"table": "`t1`",
"attached": "((`t1`.`c1` = `t3`.`ccc1`) or (`t1`.`c2` < 5))"
}
]
}
"join_execution": {
"select#": 1,
"steps": [
{
"rows_estimation_per_outer_row": { -- 这里只截取t3读第一行时候t1的索引选择
"table": "`t1`",
"range_analysis": {
"table_scan": {
"rows": 7,
"cost": 3.05
},
"potential_range_indexes": [
{
"index": "PRIMARY",
"usable": true,
"key_parts": [
"c1"
]
},
{
"index": "idx1",
"usable": true,
"key_parts": [
"c2",
"c1"
]
},
{
"index": "idx2",
"usable": true,
"key_parts": [
"c2",
"date1",
"c1"
]
}
],
"best_covering_index_scan": {
"index": "idx2",
"cost": 0.952742,
"chosen": true
},
"setup_range_conditions": [
],
"group_index_range": {
"chosen": false,
"cause": "not_single_table"
},
"skip_scan_range": {
"chosen": false,
"cause": "not_single_table"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
],
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
}
},
"analyzing_index_merge_union": [
{
"indexes_to_merge": [
{
"range_scan_alternatives": [
{
"index": "PRIMARY",
"ranges": [
"c1 = 1"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": true,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 1,
"cost": 0.36,
"chosen": true
}
],
"index_to_merge": "PRIMARY",
"cumulated_cost": 0.36
},
{
"range_scan_alternatives": [
{
"index": "idx1",
"ranges": [
"NULL < c2 < 5"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 2,
"cost": 0.460274,
"chosen": true
},
{
"index": "idx2",
"ranges": [
"NULL < c2 < 5"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 2,
"cost": 0.460457,
"chosen": false,
"cause": "cost"
}
],
"index_to_merge": "idx1",
"cumulated_cost": 0.820274
}
],
"cost_of_reading_ranges": 0.820274,
"cost_of_mapping_rowid_in_non_clustered_pk_scan": 0.1,
"cost_sort_rowid_and_read_disk": 0.4375,
"use_roworder_index_merge": true, -- 根据上面结果选择了主键合并idx1的结果
"cause": "cost"
}
]
}
}
},
greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5;
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
| 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | NULL |
| 1 | SIMPLE | t1 | NULL | ALL | PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 42.85 | Range checked for each record (index map: 0x7) | -- 这里0x7指的是t1表的3条索引
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
-- 驱动表t3一共5条数据,每行记录都进行了一次test_quick_select()来查找t1是否有可用的更优索引
详细DYNAMIC RANGE
执行过程如下表所示
二、相关代码解释
DYNAMIC_RANGE 的判断在对表 join 排序以后执行make_join_query_block
的时候,这时候要对非 join 第一张表做 recheck,recheck 以后有可能让表变为 DYNAMIC_RANGE 扫描方式。
static bool make_join_query_block(JOIN *join, Item *cond) {
// 满足以下条件的需要进行RECHECK,要么是非第一张表要么是有limit语句
if ((tab->type() == JT_ALL || tab->type() == JT_RANGE ||
tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) &&
tab->use_quick != QS_RANGE) {
if (cond && // 1a
(tab->keys() != tab->const_keys) && // 1b
(i > 0 || // 1c
(join->query_block->master_query_expression()->item &&
cond->is_outer_reference())))
recheck_reason = NOT_FIRST_TABLE;
else if (!tab->const_keys.is_clear_all() && // 2a
i == join->const_tables && // 2b
(join->query_expression()->select_limit_cnt <
(tab->position()->rows_fetched *
tab->position()->filter_effect)) && // 2c
!join->calc_found_rows) // 2d
recheck_reason = LOW_LIMIT;
// 满足这个if条件的执行QS_DYNAMIC_RANGE,详情见下面几张表
if (!tab->table()->quick_keys.is_subset(tab->checked_keys) ||
!tab->needed_reg.is_subset(tab->checked_keys)) {
tab->keys().merge(tab->table()->quick_keys);
tab->keys().merge(tab->needed_reg);
if (!tab->needed_reg.is_clear_all() &&
(tab->table()->quick_keys.is_clear_all() ||
(tab->range_scan() &&
(tab->range_scan()->num_output_rows() >= 100.0)))) {
tab->use_quick = QS_DYNAMIC_RANGE;
tab->set_type(JT_ALL);
} else
tab->use_quick = QS_RANGE;
}
}
}
// 实际使用代码
bool DynamicRangeIterator::Init() {
// 每次迭代器开始的时候先获取最佳索引组合和AccessPath
AccessPath *range_scan;
int rc = test_quick_select(&range_scan);
}
表一:需要进行 recheck 的原因
表二:quick_type 类型
表三:表用 QS_DYNAMIC_RANGE 的场合
三、实际例子说明
接下来看几个例子来说明上面的代码:
greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5;
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
| 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | NULL |
| 1 | SIMPLE | t1 | NULL | ALL | PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 43.67 | Range checked for each record (index map: 0x7) |
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
-- 第一次对单表进行估计的时候t1.c1=t3.ccc1条件没有加入到mm tree,只对t1.c1打了一个标记MAYBE_KEY表示后面可能可以用到这个索引,因此最后没有生成mm tree,在make_join_query_block因为已经确定表连接顺序可以使用t1.c1=t3.ccc1条件,这时候可以把两个条件结合起来对t1执行mm tree,因此这个时候才生成mm tree,而这个时候才能考虑range scan和索引合并等操作。
t1 的 JOIN_TAB 的两个相关 key 变量,因此 keys() != const_keys
下面删除 t1 的一些数据,让 join 方式变为 t1 在前 t3 在后,这样的话对 t1 就不需要执行 recheck,并且最后 t1 也通过索引进行扫描。
greatsql> DELETE FROM t1 WHERE c1 IN (7,8,11);
greatsql> EXPLAIN SELECT * FROM t1 JOIN t3 ON t1.c1=t3.ccc1 OR t1.c2<5;
+----+-------------+-------+------------+-------+-------------------+------+---------+------+------+----------+--------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+-------------------+------+---------+------+------+----------+--------------------------------------------+
| 1 | SIMPLE | t1 | NULL | index | PRIMARY,idx1,idx2 | idx2 | 11 | NULL | 3 | 100.00 | Using index |
| 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 10 | 100.00 | Using where; Using join buffer (hash join) |
+----+-------------+-------+------------+-------+-------------------+------+---------+------+------+----------+--------------------------------------------+
下面执行一下三张表的连接,看到因为 t1 不是第一张表,最后用到了DYNAMIC RANGE
greatsql> INSERT INTO t1 VALUES (7,null,'2020-03-25 16:44:00.123456'),(8,10,'2020-10-25 16:44:00.123456'),(11,16,'2023-03-25 16:44:00.123456');
greatsql> EXPLAIN SELECT * FROM t1 join t3 join t2 ON t1.c1=t3.ccc1 and t1.c1=t2.cc1 or t1.c2<5 ;
+----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+------------------------------------------------+
| 1 | SIMPLE | t2 | NULL | index | PRIMARY | idx2_1 | 5 | NULL | 5 | 100.00 | Using index |
| 1 | SIMPLE | t1 | NULL | ALL | PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 42.85 | Range checked for each record (index map: 0x7) |
| 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | Using where; Using join buffer (hash join) |
+----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+------------------------------------------------+
如果遇到 DYNAMIC RANGE
的情况,每行进行索引判断肯定比一开始就决定单纯走索引浪费时间,为了避免这种情况,可以采取以下的方法来避免。
1、强制让 DYNAMIC RANGE
的表作为驱动表
greatsql> EXPLAIN SELECT /*+ qb_name(qb1) JOIN_ORDER(@qb1 t1,t2,t3) */ * FROM t1 join t3 join t2 ON t1.c1=t3.ccc1 and t1.c1=t2.cc1 or t1.c2<5 ;
+----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+---------------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+---------------------------------------------------------+
| 1 | SIMPLE | t1 | NULL | index | PRIMARY,idx1,idx2 | idx2 | 11 | NULL | 7 | 100.00 | Using index |
| 1 | SIMPLE | t2 | NULL | index | PRIMARY | idx2_1 | 5 | NULL | 5 | 100.00 | Using where; Using index; Using join buffer (hash join) |
| 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | Using where; Using join buffer (hash join) |
+----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+---------------------------------------------------------+
2、给 DYNAMIC RANGE
的表强制指定索引
greatsql> EXPLAIN SELECT * FROM t1 FORCE INDEX(idx1,idx2) join t3 join t2 ON t1.c1=t3.ccc1 and t1.c1=t2.cc1 or t1.c2<5 ;
+----+-------------+-------+------------+-------+---------------+--------+---------+------+------+----------+---------------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+--------+---------+------+------+----------+---------------------------------------------------------+
| 1 | SIMPLE | t2 | NULL | index | PRIMARY | idx2_1 | 5 | NULL | 5 | 100.00 | Using index |
| 1 | SIMPLE | t1 | NULL | index | idx1,idx2 | idx2 | 11 | NULL | 7 | 42.85 | Using where; Using index; Using join buffer (hash join) |
| 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | Using where; Using join buffer (hash join) |
+----+-------------+-------+------------+-------+---------------+--------+---------+------+------+----------+---------------------------------------------------------+
3、改变条件,删除 or 条件,使用 keyuse_array 数组来选择路径。
greatsql> EXPLAIN SELECT * FROM t1 join t3 join t2 ON t1.c1=t3.ccc1 and t1.c1=t2.cc1;
+----+-------------+-------+------------+--------+---------------+---------+---------+------------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+--------+---------------+---------+---------+------------+------+----------+-------------+
| 1 | SIMPLE | t2 | NULL | index | PRIMARY | idx2_1 | 5 | NULL | 5 | 100.00 | Using index |
| 1 | SIMPLE | t3 | NULL | ref | idx3_1 | idx3_1 | 5 | db1.t2.cc1 | 1 | 100.00 | NULL |
| 1 | SIMPLE | t1 | NULL | eq_ref | PRIMARY | PRIMARY | 4 | db1.t2.cc1 | 1 | 100.00 | NULL |
+----+-------------+-------+------------+--------+---------------+---------+---------+------------+------+----------+-------------+
四、总结
从上面关于 DYNAMIC RANGE
的解释我们知道这个状态需要经过一系列判断,并且只在特定条件才有可能出现,但是每行进行一次索引判断还是很消耗资源的,最好还是直接走索引,所以要尽量避免这种情况,适当的时候可以进行 SQL 命令干预。
版权声明: 本文为 InfoQ 作者【GreatSQL】的原创文章。
原文链接:【http://xie.infoq.cn/article/a4e83d50d7c5a6ca770cc3bb5】。文章转载请联系作者。

GreatSQL
GreatSQL社区 2023-01-31 加入
GreatSQL是由万里数据库维护的MySQL分支,专注于提升MGR可靠性及性能,支持InnoDB并行查询特性,是适用于金融级应用的MySQL分支版本。 社区:https://greatsql.cn/ Gitee: https://gitee.com/GreatSQL/GreatSQL
评论