From 8ffdb9ded77ce49e6a187666a0aab3af7bf5a07b Mon Sep 17 00:00:00 2001
From: Oleg Smirnov <olernov@gmail.com>
Date: Fri, 23 Jan 2026 17:39:02 +0700
Subject: [PATCH] MDEV-38728 Improve join size estimation for ref access

When estimating number of rows produced by a join after `ref` access,
the optimizer assumes all driving table values will find matches
in the inner table. This causes overestimation when the driving
table has more distinct values than the inner table's key.

Fix: use number of distinct values (NDV) for columns in the
join predicate to calculate match probability:
  match_prob = min(1.0, NDV(inner) / NDV(driving))
The expected number of records after `ref` access is then multiplied
by match probability to provide more accurate estimate.

Limitations:
- EITS must be available for both columns in the join predicate
- both columns must be real table fields
- only single-column ref access is supported
- only first key part of the inner table's index is used

TODO:
- WHERE filter on the driving table may reduce NDV and affect estimation.
  Currently, it is handled only basically
    (driving_ndv must be <= number of records of current partial join)

This commit overwrites only those test results which have been verified,
i.e. provided better join size estimation. Other failing tests are not
yet verified.
---
 .../main/delete_use_source_engines.result     |  20 +--
 mysql-test/main/derived_cond_pushdown.result  |  12 +-
 mysql-test/main/join_cache.result             |   4 +-
 mysql-test/main/match_probability.result      | 148 ++++++++++++++++++
 mysql-test/main/match_probability.test        | 113 +++++++++++++
 mysql-test/main/null_key.result               |   2 +-
 mysql-test/main/selectivity.result            |  14 +-
 sql/sql_select.cc                             | 133 ++++++++++++++++
 sql/sql_select.h                              |   7 +
 sql/sql_test.cc                               |   2 +
 10 files changed, 429 insertions(+), 26 deletions(-)
 create mode 100644 mysql-test/main/match_probability.result
 create mode 100644 mysql-test/main/match_probability.test

diff --git a/mysql-test/main/delete_use_source_engines.result b/mysql-test/main/delete_use_source_engines.result
index b1ad9c009d924..7b8097bbd8d92 100644
--- a/mysql-test/main/delete_use_source_engines.result
+++ b/mysql-test/main/delete_use_source_engines.result
@@ -731,15 +731,15 @@ create table tmp as select * from t1 where c1 in (select a.c2 from t1 a where a.
 explain select * from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 explain delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 analyze delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	32.00	100.00	12.50	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	0.25	18.75	100.00	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	0.25	25.00	100.00	Using index; FirstMatch(t1)
 select * from t1;
 c1	c2	c3
 1	2	2
@@ -2709,15 +2709,15 @@ create table tmp as select * from t1 where c1 in (select a.c2 from t1 a where a.
 explain select * from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 explain delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 analyze delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	27.00	100.00	14.81	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	0.25	18.75	100.00	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	0.25	25.00	100.00	Using index; FirstMatch(t1)
 select * from t1;
 c1	c2	c3
 1	2	2
@@ -2762,7 +2762,7 @@ explain select * from t1 where c1 in (select a.c2 from t1 a where a.c3 = t1.c3)
 order by c3 desc limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where; Using filesort
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using where; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using where; FirstMatch(t1)
 explain delete from t1 where c1 in (select a.c2 from t1 a where a.c3 = t1.c3)
 order by c3 desc limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -5114,15 +5114,15 @@ create table tmp as select * from t1 where c1 in (select a.c2 from t1 a where a.
 explain select * from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 explain delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 analyze delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	29.00	100.00	13.79	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	0.25	18.75	100.00	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	0.25	25.00	100.00	Using index; FirstMatch(t1)
 select * from t1;
 c1	c2	c3
 1	2	2
diff --git a/mysql-test/main/derived_cond_pushdown.result b/mysql-test/main/derived_cond_pushdown.result
index 1fa1eea5dd307..0593d8a10d1f6 100644
--- a/mysql-test/main/derived_cond_pushdown.result
+++ b/mysql-test/main/derived_cond_pushdown.result
@@ -20147,7 +20147,7 @@ from t2, t3, (select c, max(b) max, min(b) min from t4 group by c) t
 where t2.b < 40 and t2.a=t3.a and t3.c=t.c;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	90	60.00	Using where
-1	PRIMARY	t3	ref	idx_a	idx_a	5	test.t2.a	1	100.00	Using where
+1	PRIMARY	t3	ref	idx_a	idx_a	5	test.t2.a	0	100.00	Using where
 1	PRIMARY	<derived2>	ref	key0	key0	128	test.t3.c	5	100.00	
 2	DERIVED	t4	ALL	idx_c	NULL	NULL	NULL	160	100.00	Using temporary; Using filesort
 Warnings:
@@ -20182,7 +20182,7 @@ EXPLAIN
           "used_key_parts": ["a"],
           "ref": ["test.t2.a"],
           "loops": 53.99999991,
-          "rows": 1,
+          "rows": 0,
           "cost": "COST_REPLACED",
           "filtered": 100,
           "attached_condition": "t3.c is not null"
@@ -20197,7 +20197,7 @@ EXPLAIN
           "key_length": "128",
           "used_key_parts": ["c"],
           "ref": ["test.t3.c"],
-          "loops": 80.99999987,
+          "loops": 52.89695991,
           "rows": 5,
           "cost": "COST_REPLACED",
           "filtered": 100,
@@ -20690,7 +20690,7 @@ from t2, t3, (select c, b, sum(b) over (partition by c) from t4 ) t
 where t2.b < 40 and t2.a=t3.a and t3.c=t.c;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	90	60.00	Using where
-1	PRIMARY	t3	ref	idx_a	idx_a	5	test.t2.a	1	100.00	Using where
+1	PRIMARY	t3	ref	idx_a	idx_a	5	test.t2.a	0	100.00	Using where
 1	PRIMARY	<derived2>	ref	key0	key0	128	test.t3.c	10	100.00	
 2	DERIVED	t4	ALL	idx_c	NULL	NULL	NULL	160	100.00	Using temporary
 Warnings:
@@ -20725,7 +20725,7 @@ EXPLAIN
           "used_key_parts": ["a"],
           "ref": ["test.t2.a"],
           "loops": 53.99999991,
-          "rows": 1,
+          "rows": 0,
           "cost": "COST_REPLACED",
           "filtered": 100,
           "attached_condition": "t3.c is not null"
@@ -20740,7 +20740,7 @@ EXPLAIN
           "key_length": "128",
           "used_key_parts": ["c"],
           "ref": ["test.t3.c"],
-          "loops": 80.99999987,
+          "loops": 52.89695991,
           "rows": 10,
           "cost": "COST_REPLACED",
           "filtered": 100,
diff --git a/mysql-test/main/join_cache.result b/mysql-test/main/join_cache.result
index aa12ac8942d39..ddd82d5ce3933 100644
--- a/mysql-test/main/join_cache.result
+++ b/mysql-test/main/join_cache.result
@@ -6365,7 +6365,7 @@ LEFT JOIN t3 ON t2.d = t3.c
 LEFT JOIN t4 ON t3.c=1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	10000	Using index
-1	SIMPLE	t2	ref	b	b	4	test.t1.b	1	
+1	SIMPLE	t2	ref	b	b	4	test.t1.b	0	
 1	SIMPLE	t3	eq_ref	PRIMARY	PRIMARY	4	test.t2.d	1	Using where; Using index
 1	SIMPLE	t4	index	NULL	PRIMARY	4	NULL	3000	Using where; Using index
 SELECT COUNT(*)
@@ -6383,7 +6383,7 @@ LEFT JOIN t3 ON t2.d = t3.c
 LEFT JOIN t4 ON t3.c=1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	10000	Using index
-1	SIMPLE	t2	ref	b	b	4	test.t1.b	1	
+1	SIMPLE	t2	ref	b	b	4	test.t1.b	0	
 1	SIMPLE	t3	eq_ref	PRIMARY	PRIMARY	4	test.t2.d	1	Using where; Using index
 1	SIMPLE	t4	index	NULL	PRIMARY	4	NULL	3000	Using where; Using index; Using join buffer (flat, BNL join)
 SELECT COUNT(*)
diff --git a/mysql-test/main/match_probability.result b/mysql-test/main/match_probability.result
new file mode 100644
index 0000000000000..f2d2bea1286cd
--- /dev/null
+++ b/mysql-test/main/match_probability.result
@@ -0,0 +1,148 @@
+# Setup: Two tables with skewed foreign key distribution
+CREATE TABLE t_ndv100 (a INT);
+CREATE TABLE t_ndv3 (a INT, b VARCHAR(50), KEY idx_a (a));
+# All values in t_ndv100.a are different (100 distinct values)
+INSERT INTO t_ndv100 (a) SELECT seq FROM seq_1_to_100;
+# There are only 3 distinct values in t_ndv3.a
+INSERT INTO t_ndv3 (a, b) SELECT seq/100, 'def' FROM seq_1_to_300;
+set optimizer_trace=1;
+# Inefficient plan until EITS is collected (full scan of t_ndv3)
+EXPLAIN SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	
+1	SIMPLE	t_ndv3	ALL	idx_a	NULL	NULL	NULL	300	Using where; Using join buffer (flat, BNL join)
+# Collect statistics only for the driving table (t_ndv100)
+ANALYZE TABLE t_ndv100 PERSISTENT FOR ALL;
+Table	Op	Msg_type	Msg_text
+test.t_ndv100	analyze	status	Engine-independent statistics collected
+test.t_ndv100	analyze	status	OK
+# It is not enough until there are statistics for the inner table (t_ndv3)
+EXPLAIN SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	
+1	SIMPLE	t_ndv3	ALL	idx_a	NULL	NULL	NULL	300	Using where; Using join buffer (flat, BNL join)
+ANALYZE TABLE t_ndv3 PERSISTENT FOR ALL;
+Table	Op	Msg_type	Msg_text
+test.t_ndv3	analyze	status	Engine-independent statistics collected
+test.t_ndv3	analyze	status	Table is already up to date
+# After EITS is collected for both tables, the plan is efficient
+# (index scan of t_ndv3). "rows" in the output are close to actual "r_rows"
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	100.00	100.00	100.00	Using where
+1	SIMPLE	t_ndv3	ref	idx_a	idx_a	5	test.t_ndv100.a	3	2.51	100.00	100.00	
+# Optimization must be reflected in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+[
+    0.04,
+    75
+]
+# Match probability is not applicable because the left part of the
+# join condition is not a field but an expression (t_ndv100.a + 10). See the
+# difference between "rows" and "r_rows"
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a + 10 = t_ndv3.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	100.00	100.00	100.00	
+1	SIMPLE	t_ndv3	ALL	idx_a	NULL	NULL	NULL	300	300.00	25.00	0.00	Using where; Using join buffer (flat, BNL join)
+# Must be no records in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+NULL
+# If all values are NULL, match probability is not applicable
+CREATE TABLE t_nulls (a INT, b VARCHAR(50), KEY idx_a (a));
+INSERT INTO t_nulls (a, b) SELECT NULL, 'def' FROM seq_1_to_300;
+ANALYZE TABLE t_nulls PERSISTENT FOR ALL;
+Table	Op	Msg_type	Msg_text
+test.t_nulls	analyze	status	Engine-independent statistics collected
+test.t_nulls	analyze	status	Table is already up to date
+EXPLAIN SELECT t_ndv100.a, t_nulls.b
+FROM t_ndv100
+STRAIGHT_JOIN t_nulls ON t_ndv100.a = t_nulls.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	Using where
+1	SIMPLE	t_nulls	ref	idx_a	idx_a	5	test.t_ndv100.a	1	
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+NULL
+#------------------------------
+# Test multi-part index
+#------------------------------
+CREATE TABLE t_ndv30 (a INT, b INT, c VARCHAR(50), KEY idx_ab (a, b));
+INSERT INTO t_ndv30 (a, b, c) SELECT seq/10, seq/10, 'def' FROM seq_1_to_300;
+ANALYZE TABLE t_ndv30 PERSISTENT FOR ALL;
+Table	Op	Msg_type	Msg_text
+test.t_ndv30	analyze	status	Engine-independent statistics collected
+test.t_ndv30	analyze	status	Table is already up to date
+# If more than one key part is used, match probability is not applicable.
+# See the mismatch between "rows" and "r_rows" in ANALYZE output
+ANALYZE SELECT t_ndv100.a, t_ndv30.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv30 ON t_ndv100.a = t_ndv30.a 
+AND t_ndv100.a = t_ndv30.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	100.00	100.00	100.00	Using where
+1	SIMPLE	t_ndv30	ref	idx_ab	idx_ab	10	test.t_ndv100.a,test.t_ndv100.a	9	2.96	100.00	100.00	Using index
+# Must be no records in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+NULL
+# Only first key part of `idx_ab` is used, match probability is applicable.
+# "rows" is now much closer to actual "r_rows".
+ANALYZE SELECT t_ndv100.a, t_ndv30.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv30 ON t_ndv100.a = t_ndv30.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	100.00	100.00	100.00	Using where
+1	SIMPLE	t_ndv30	ref	idx_ab	idx_ab	5	test.t_ndv100.a	3	2.96	100.00	100.00	Using index
+# Must be reflected in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+[
+    0.31000062,
+    9.6774
+]
+# Three tables with equality propagation.
+# min_driving_ndv = min(NDV(t_ndv30.a), NDV(t_ndv100.a)) = min(30, 100) = 30
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv30
+STRAIGHT_JOIN t_ndv100 ON t_ndv30.a = t_ndv100.a
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t_ndv30	index	idx_ab	idx_ab	10	NULL	300	300.00	100.00	100.00	Using where; Using index
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	100.00	100.00	0.99	Using where; Using join buffer (flat, BNL join)
+1	SIMPLE	t_ndv3	ref	idx_a	idx_a	5	test.t_ndv30.a	9	8.48	100.00	100.00	
+# Must be reflected in the trace (match_prob ~= 0.1)
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+[
+    0.129032,
+    75
+]
+DROP TABLE t_ndv100, t_ndv3, t_ndv30, t_nulls;
diff --git a/mysql-test/main/match_probability.test b/mysql-test/main/match_probability.test
new file mode 100644
index 0000000000000..7890a5dc99eaf
--- /dev/null
+++ b/mysql-test/main/match_probability.test
@@ -0,0 +1,113 @@
+--source include/have_sequence.inc
+
+--echo # Setup: Two tables with skewed foreign key distribution
+CREATE TABLE t_ndv100 (a INT);
+
+CREATE TABLE t_ndv3 (a INT, b VARCHAR(50), KEY idx_a (a));
+
+--echo # All values in t_ndv100.a are different (100 distinct values)
+INSERT INTO t_ndv100 (a) SELECT seq FROM seq_1_to_100;
+
+--echo # There are only 3 distinct values in t_ndv3.a
+INSERT INTO t_ndv3 (a, b) SELECT seq/100, 'def' FROM seq_1_to_300;
+
+set optimizer_trace=1;
+
+--echo # Inefficient plan until EITS is collected (full scan of t_ndv3)
+EXPLAIN SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+
+--echo # Collect statistics only for the driving table (t_ndv100)
+ANALYZE TABLE t_ndv100 PERSISTENT FOR ALL;
+
+--echo # It is not enough until there are statistics for the inner table (t_ndv3)
+EXPLAIN SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+
+ANALYZE TABLE t_ndv3 PERSISTENT FOR ALL;
+
+--echo # After EITS is collected for both tables, the plan is efficient
+--echo # (index scan of t_ndv3). "rows" in the output are close to actual "r_rows"
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+
+--echo # Optimization must be reflected in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+--echo # Match probability is not applicable because the left part of the
+--echo # join condition is not a field but an expression (t_ndv100.a + 10). See the
+--echo # difference between "rows" and "r_rows"
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a + 10 = t_ndv3.a;
+
+--echo # Must be no records in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+--echo # If all values are NULL, match probability is not applicable
+CREATE TABLE t_nulls (a INT, b VARCHAR(50), KEY idx_a (a));
+
+INSERT INTO t_nulls (a, b) SELECT NULL, 'def' FROM seq_1_to_300;
+
+ANALYZE TABLE t_nulls PERSISTENT FOR ALL;
+
+EXPLAIN SELECT t_ndv100.a, t_nulls.b
+FROM t_ndv100
+STRAIGHT_JOIN t_nulls ON t_ndv100.a = t_nulls.a;
+
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+--echo #------------------------------
+--echo # Test multi-part index
+--echo #------------------------------
+CREATE TABLE t_ndv30 (a INT, b INT, c VARCHAR(50), KEY idx_ab (a, b));
+
+INSERT INTO t_ndv30 (a, b, c) SELECT seq/10, seq/10, 'def' FROM seq_1_to_300;
+
+ANALYZE TABLE t_ndv30 PERSISTENT FOR ALL;
+
+--echo # If more than one key part is used, match probability is not applicable.
+--echo # See the mismatch between "rows" and "r_rows" in ANALYZE output
+ANALYZE SELECT t_ndv100.a, t_ndv30.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv30 ON t_ndv100.a = t_ndv30.a 
+  AND t_ndv100.a = t_ndv30.b;
+
+--echo # Must be no records in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+--echo # Only first key part of `idx_ab` is used, match probability is applicable.
+--echo # "rows" is now much closer to actual "r_rows".
+ANALYZE SELECT t_ndv100.a, t_ndv30.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv30 ON t_ndv100.a = t_ndv30.a;
+
+--echo # Must be reflected in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+--echo # Three tables with equality propagation.
+--echo # min_driving_ndv = min(NDV(t_ndv30.a), NDV(t_ndv100.a)) = min(30, 100) = 30
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv30
+STRAIGHT_JOIN t_ndv100 ON t_ndv30.a = t_ndv100.a
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+
+--echo # Must be reflected in the trace (match_prob ~= 0.1)
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+DROP TABLE t_ndv100, t_ndv3, t_ndv30, t_nulls;
\ No newline at end of file
diff --git a/mysql-test/main/null_key.result b/mysql-test/main/null_key.result
index 0b1446fd72e1c..9bc20427487b1 100644
--- a/mysql-test/main/null_key.result
+++ b/mysql-test/main/null_key.result
@@ -421,7 +421,7 @@ EXPLAIN SELECT SQL_CALC_FOUND_ROWS * FROM t1 LEFT JOIN t2 ON t1.a=t2.a
 LEFT JOIN t3 ON t2.b=t3.b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	4	
-1	SIMPLE	t2	ref	idx	idx	5	test.t1.a	1	Using where
+1	SIMPLE	t2	ref	idx	idx	5	test.t1.a	0	Using where
 1	SIMPLE	t3	ref	idx	idx	5	test.t2.b	1	Using where; Using index
 FLUSH STATUS ;
 SELECT SQL_CALC_FOUND_ROWS * FROM t1 LEFT JOIN t2 ON t1.a=t2.a
diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result
index ef2d2e85b8681..b8f5da7537f1a 100644
--- a/mysql-test/main/selectivity.result
+++ b/mysql-test/main/selectivity.result
@@ -325,13 +325,13 @@ and o_orderkey = l_orderkey
 group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
 order by o_totalprice desc, o_orderdate;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	orders	ALL	PRIMARY,i_o_custkey	NULL	NULL	NULL	1500	100.00	Using where; Using temporary; Using filesort
-1	PRIMARY	customer	eq_ref	PRIMARY	PRIMARY	4	dbt3_s001.orders.o_custkey	1	100.00	
+1	PRIMARY	customer	ALL	PRIMARY	NULL	NULL	NULL	150	100.00	Using temporary; Using filesort
+1	PRIMARY	orders	ref	PRIMARY,i_o_custkey	i_o_custkey	5	dbt3_s001.customer.c_custkey	10	100.00	
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	dbt3_s001.orders.o_orderkey	1	100.00	
 1	PRIMARY	lineitem	ref	PRIMARY,i_l_orderkey,i_l_orderkey_quantity	i_l_orderkey_quantity	4	dbt3_s001.orders.o_orderkey	4	100.00	Using index
 2	MATERIALIZED	lineitem	index	NULL	i_l_orderkey_quantity	13	NULL	6005	100.00	Using index
 Warnings:
-Note	1003	/* select#1 */ select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from  <materialize> (/* select#2 */ select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where `dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey` and `<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` and `dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
+Note	1003	/* select#1 */ select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from  <materialize> (/* select#2 */ select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where `dbt3_s001`.`orders`.`o_custkey` = `dbt3_s001`.`customer`.`c_custkey` and `<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` and `dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
 select 
 c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
 from customer, orders, lineitem
@@ -359,13 +359,13 @@ and o_orderkey = l_orderkey
 group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
 order by o_totalprice desc, o_orderdate;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	orders	ALL	PRIMARY,i_o_custkey	NULL	NULL	NULL	1500	100.00	Using where; Using temporary; Using filesort
-1	PRIMARY	customer	eq_ref	PRIMARY	PRIMARY	4	dbt3_s001.orders.o_custkey	1	100.00	
+1	PRIMARY	customer	ALL	PRIMARY	NULL	NULL	NULL	150	100.00	Using temporary; Using filesort
+1	PRIMARY	orders	ref	PRIMARY,i_o_custkey	i_o_custkey	5	dbt3_s001.customer.c_custkey	10	100.00	
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	dbt3_s001.orders.o_orderkey	1	100.00	
 1	PRIMARY	lineitem	ref	PRIMARY,i_l_orderkey,i_l_orderkey_quantity	i_l_orderkey_quantity	4	dbt3_s001.orders.o_orderkey	4	100.00	Using index
 2	MATERIALIZED	lineitem	index	NULL	i_l_orderkey_quantity	13	NULL	6005	100.00	Using index
 Warnings:
-Note	1003	/* select#1 */ select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from  <materialize> (/* select#2 */ select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where `dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey` and `<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` and `dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
+Note	1003	/* select#1 */ select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from  <materialize> (/* select#2 */ select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where `dbt3_s001`.`orders`.`o_custkey` = `dbt3_s001`.`customer`.`c_custkey` and `<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` and `dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
 select 
 c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
 from customer, orders, lineitem
@@ -1294,7 +1294,7 @@ select * from t1, t2, t1 as t3
 where t1.b=t2.c and t2.d=t3.a and t3.b<5 and t1.a < 2000;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	262144	100.00	Using where
-1	SIMPLE	t2	ref	c,d	c	5	test.t1.b	5	100.00	
+1	SIMPLE	t2	ref	c,d	c	5	test.t1.b	2	100.00	
 1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	262144	100.00	Using where; Using join buffer (flat, BNL join)
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b` from `test`.`t1` join `test`.`t2` join `test`.`t1` `t3` where `test`.`t2`.`c` = `test`.`t1`.`b` and `test`.`t3`.`a` = `test`.`t2`.`d` and `test`.`t3`.`b` < 5 and `test`.`t1`.`a` < 2000
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index b986e59656e08..ba88584ac098d 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7297,6 +7297,26 @@ add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field,
   keyuse.cond_guard= key_field->cond_guard;
   keyuse.sj_pred_no= key_field->sj_pred_no;
   keyuse.validity_ref= 0;
+
+  /* Compute driving_ndv from EITS statistics if val is a field */
+  keyuse.driving_ndv= 0;
+  {
+    Item *driving_item= key_field->val->real_item();
+    if (driving_item->type() == Item::FIELD_ITEM)
+    {
+      Field *driving_field= ((Item_field*) driving_item)->field;
+      if (driving_field->read_stats)
+      {
+        double avg_freq= driving_field->read_stats->get_avg_frequency();
+        if (avg_freq > 0)
+        {
+          double rows= (double) driving_field->table->stat_records();
+          keyuse.driving_ndv= MY_MIN(rows / avg_freq, rows);
+        }
+      }
+    }
+  }
+
   return (insert_dynamic(keyuse_array,(uchar*) &keyuse));
 }
 
@@ -7450,6 +7470,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
   keyuse.sj_pred_no= UINT_MAX;
   keyuse.validity_ref= 0;
   keyuse.null_rejecting= FALSE;
+  keyuse.driving_ndv= 0;  /* Not applicable for fulltext keys */
   return insert_dynamic(keyuse_array,(uchar*) &keyuse);
 }
 
@@ -8222,6 +8243,52 @@ inline double use_found_constraint(double records)
 }
 
 
+/*
+  Compute the probability that a value from the driving table will find
+  a match in the inner table's index, based on NDV (number of distinct values).
+
+  @param  inner_table     The inner (looked-up) table
+  @param  key             The key being used for ref access
+  @param  min_driving_ndv Minimum NDV across all usable driving columns
+                          (precomputed during KEYUSE iteration)
+
+  @return match_probability in range (0, 1.0]
+          1.0 if statistics are not available
+
+  The idea: if the driving table has more distinct values than the inner
+  table's key, only a fraction of driving rows will find matches.
+  
+  match_probability = min(1.0, ndv(inner.key) / ndv(outer.col))
+  
+  where ndv = table_rows / avg_frequency
+*/
+
+static double
+get_ref_match_probability(TABLE *inner_table, uint key, double min_driving_ndv)
+{
+  /* Get NDV for inner table's key (first key part) */
+  KEY *keyinfo= inner_table->key_info + key;
+  Field *inner_field= keyinfo->key_part[0].field;
+  if (!inner_field->read_stats)
+    return 1.0;
+
+  double inner_avg_freq= inner_field->read_stats->get_avg_frequency();
+  if (inner_avg_freq <= 0)
+    return 1.0;
+
+  double inner_rows= (double) inner_table->stat_records();
+  double inner_ndv= inner_rows / inner_avg_freq;
+
+  /*
+    match_probability = min(1.0, ndv(inner) / ndv(outer))
+    
+    If inner has fewer distinct values than outer, only a fraction
+    of outer rows will find a match.
+  */
+  return MY_MIN(1.0, inner_ndv / min_driving_ndv);
+}
+
+
 /*
   Calculate the cost of reading a set of rows trough an index
 
@@ -8801,6 +8868,8 @@ best_access_path(JOIN      *join,
       DBUG_PRINT("info", ("Considering ref access on key %s",
                           keyuse->table->key_info[keyuse->key].name.str));
 
+      double min_driving_ndv= DBL_MAX;  /* Track min driving NDV for keypart 0 */
+
       do /* For each keypart */
       {
         uint keypart= keyuse->keypart;
@@ -8844,6 +8913,10 @@ best_access_path(JOIN      *join,
               best_part_found_ref= (keyuse->used_tables &
                                     ~join->const_table_map);
             }
+            /* Track minimum driving NDV for keypart 0 */
+            if (keypart == 0 && keyuse->driving_ndv > 0)
+              set_if_smaller(min_driving_ndv, keyuse->driving_ndv);
+
             if (rec > keyuse->ref_table_rows)
               rec= keyuse->ref_table_rows;
 	    /*
@@ -9059,6 +9132,36 @@ best_access_path(JOIN      *join,
                 }
               }
             }
+
+            /*
+              Apply match_probability based on NDV to adjust records.
+              If the driving table has more distinct values than the inner
+              table's key, only a fraction of driving rows will find matches.
+            */
+            if (key_parts == 1 &&
+                min_driving_ndv > 0 && min_driving_ndv < DBL_MAX)
+            {
+              /*
+                Bound effective NDV by record_count - NDV can't exceed the
+                number of rows from driving tables after filtering.
+              */
+              double effective_driving_ndv= min_driving_ndv;
+              set_if_smaller(effective_driving_ndv, record_count);
+
+              double match_prob= get_ref_match_probability(table, key,
+                                                           effective_driving_ndv);
+              if (match_prob < 1.0)
+              {
+                if (unlikely(trace_access_idx.trace_started()))
+                {
+                  trace_access_idx.
+                    add("match_probability", match_prob).
+                    add("rows_before_adjustment", records);
+                }
+                records *= match_prob;
+              }
+            }
+
             /* Calculate the cost of the index access */
             tmp= cost_for_index_read(thd, table, key,
                                      (ha_rows) records, 0);
@@ -9270,6 +9373,36 @@ best_access_path(JOIN      *join,
             }
 
             set_if_smaller(records, (double) s->records);
+
+            /*
+              Apply match_probability based on NDV to adjust records.
+              If the driving table has more distinct values than the inner
+              table's key, only a fraction of driving rows will find matches.
+            */
+            if (max_key_part == 1 &&
+                min_driving_ndv > 0 && min_driving_ndv < DBL_MAX)
+            {
+              /*
+                Bound effective NDV by record_count - NDV can't exceed the
+                number of rows from driving tables after filtering.
+              */
+              double effective_driving_ndv= min_driving_ndv;
+              set_if_smaller(effective_driving_ndv, record_count);
+
+              double match_prob= get_ref_match_probability(table, key,
+                                                           effective_driving_ndv);
+              if (match_prob < 1.0)
+              {
+                if (unlikely(trace_access_idx.trace_started()))
+                {
+                  trace_access_idx.
+                    add("match_probability", match_prob).
+                    add("rows_before_adjustment", records);
+                }
+                records *= match_prob;
+              }
+            }
+
             tmp= cost_for_index_read(thd, table, key, (ha_rows)records, 0);
             tmp.copy_cost+= extra_cost;
           }
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 82e975407edda..06d92b76e0d07 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -81,6 +81,13 @@ typedef struct keyuse_t {
   */
   bool *validity_ref;
 
+  /*
+    Number of distinct values in the driving column (val).
+    Computed from EITS statistics when KEYUSE is created.
+    0 means statistics not available.
+  */
+  double driving_ndv;
+
   bool is_for_hash_join() { return is_hash_join_key_no(key); }
 } KEYUSE;
 
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 24736007f083c..52d5bf97177d9 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -705,5 +705,7 @@ void print_keyuse_array_for_trace(THD *thd, DYNAMIC_ARRAY *keyuse_array)
            .field->field_name.str)).
       add("equals",keyuse->val).
       add("null_rejecting",keyuse->null_rejecting);
+    if (keyuse->driving_ndv > 0)
+      keyuse_elem.add("driving_ndv", keyuse->driving_ndv);
   }
 }