From 554587b8f7a6a7e743de65e429791d2959470585 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 5 Feb 2026 16:51:31 +0200 Subject: [PATCH] MDEV-38273: Optimizer trace should have selectivities collected via sampling Add to optimizer trace: "sampled_selectivity": [ { "cond":"condition", "selectivity": n.nnnn } ... ] --- mysql-test/main/opt_trace_selectivity.result | 56 ++++++++++++++++++++ mysql-test/main/opt_trace_selectivity.test | 23 ++++++++ sql/opt_range.cc | 8 +++ 3 files changed, 87 insertions(+) diff --git a/mysql-test/main/opt_trace_selectivity.result b/mysql-test/main/opt_trace_selectivity.result index 26d5671f0645f..0d247049f97ba 100644 --- a/mysql-test/main/opt_trace_selectivity.result +++ b/mysql-test/main/opt_trace_selectivity.result @@ -367,4 +367,60 @@ JS ] ] drop table t1; +# +# MDEV-38273: Optimizer trace should have selectivities collected via sampling +# +create table t1 (a int, b int, c varchar(32), d varchar(32)); +insert into t1 +select +seq, seq, +if(mod(seq, 10) < 4,'c-ccc-c', 'no-match'), +if(mod(seq, 10) < 3,'d-ddd-d', 'no-match') +from seq_1_to_1000; +analyze table t1 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +set statement optimizer_use_condition_selectivity=5 for +explain select * from t1 +where a < 700 and b < 500 and c like '%ccc%' and d like '%ddd%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 Using where +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.rows_estimation[0]')) as JS +from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JS +[ + { + "selectivity_for_indexes": + [], + "selectivity_for_columns": + [ + { + "column_name": "a", + "ranges": + ["NULL < a < 700"], + "selectivity_from_histogram": 0.699 + }, + { + "column_name": "b", + "ranges": + ["NULL < b < 500"], + "selectivity_from_histogram": 0.499 + } + ], + "sampled_selectivity": + [ + { + "cond": "t1.c like '%ccc%'", + "selectivity": 0.4 + }, + { + "cond": "t1.d like '%ddd%'", + "selectivity": 0.3 + } + ], + "cond_selectivity": 0.04185612 + } +] +drop table t1; set optimizer_trace='enabled=off'; diff --git a/mysql-test/main/opt_trace_selectivity.test b/mysql-test/main/opt_trace_selectivity.test index 40f78d91db788..ef23bdf6299cb 100644 --- a/mysql-test/main/opt_trace_selectivity.test +++ b/mysql-test/main/opt_trace_selectivity.test @@ -83,4 +83,27 @@ select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) as JS from INFORMATION_SCHEMA.OPTIMIZER_TRACE; drop table t1; + +--echo # +--echo # MDEV-38273: Optimizer trace should have selectivities collected via sampling +--echo # +create table t1 (a int, b int, c varchar(32), d varchar(32)); +insert into t1 +select + seq, seq, + if(mod(seq, 10) < 4,'c-ccc-c', 'no-match'), + if(mod(seq, 10) < 3,'d-ddd-d', 'no-match') +from seq_1_to_1000; + +analyze table t1 persistent for all; + +set statement optimizer_use_condition_selectivity=5 for +explain select * from t1 +where a < 700 and b < 500 and c like '%ccc%' and d like '%ddd%'; + +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.rows_estimation[0]')) as JS +from INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +drop table t1; + set optimizer_trace='enabled=off'; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index bb00556e92b07..6e0def3618ff9 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3873,6 +3873,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) if (*cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD && thd->variables.optimizer_use_condition_selectivity > 4) { + Json_writer_array trace_sampled_sel(thd, "sampled_selectivity"); find_selective_predicates_list_processor_data *dt= (find_selective_predicates_list_processor_data *) alloc_root(thd->mem_root, @@ -3902,6 +3903,13 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) (double)stat->positive / examined_rows)); double selectivity= ((double)stat->positive) / examined_rows; table->multiply_cond_selectivity(selectivity); + + if (unlikely(trace_sampled_sel.trace_started())) + { + Json_writer_object selectivity_for_cond(thd); + selectivity_for_cond.add("cond", stat->cond); + selectivity_for_cond.add("selectivity", selectivity); + } /* If a field is involved then we register its selectivity in case there in an equality with the field.