diff --git a/mysql-test/main/delete_use_source_engines.result b/mysql-test/main/delete_use_source_engines.result
index b1ad9c009d924..7b8097bbd8d92 100644
--- a/mysql-test/main/delete_use_source_engines.result
+++ b/mysql-test/main/delete_use_source_engines.result
@@ -731,15 +731,15 @@ create table tmp as select * from t1 where c1 in (select a.c2 from t1 a where a.
 explain select * from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 explain delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 analyze delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	32.00	100.00	12.50	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	0.25	18.75	100.00	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	0.25	25.00	100.00	Using index; FirstMatch(t1)
 select * from t1;
 c1	c2	c3
 1	2	2
@@ -2709,15 +2709,15 @@ create table tmp as select * from t1 where c1 in (select a.c2 from t1 a where a.
 explain select * from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 explain delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 analyze delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	27.00	100.00	14.81	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	0.25	18.75	100.00	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	0.25	25.00	100.00	Using index; FirstMatch(t1)
 select * from t1;
 c1	c2	c3
 1	2	2
@@ -2762,7 +2762,7 @@ explain select * from t1 where c1 in (select a.c2 from t1 a where a.c3 = t1.c3)
 order by c3 desc limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where; Using filesort
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using where; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using where; FirstMatch(t1)
 explain delete from t1 where c1 in (select a.c2 from t1 a where a.c3 = t1.c3)
 order by c3 desc limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -5114,15 +5114,15 @@ create table tmp as select * from t1 where c1 in (select a.c2 from t1 a where a.
 explain select * from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 explain delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	Using index; FirstMatch(t1)
 analyze delete from t1 where c1 in (select a.c2 from t1 a where a.c2 = t1.c3) limit 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	32	29.00	100.00	13.79	Using where
-1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	5	0.25	18.75	100.00	Using index; FirstMatch(t1)
+1	PRIMARY	a	ref	t1_c2	t1_c2	5	test.t1.c1	4	0.25	25.00	100.00	Using index; FirstMatch(t1)
 select * from t1;
 c1	c2	c3
 1	2	2
diff --git a/mysql-test/main/derived_cond_pushdown.result b/mysql-test/main/derived_cond_pushdown.result
index 1fa1eea5dd307..0593d8a10d1f6 100644
--- a/mysql-test/main/derived_cond_pushdown.result
+++ b/mysql-test/main/derived_cond_pushdown.result
@@ -20147,7 +20147,7 @@ from t2, t3, (select c, max(b) max, min(b) min from t4 group by c) t
 where t2.b < 40 and t2.a=t3.a and t3.c=t.c;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	90	60.00	Using where
-1	PRIMARY	t3	ref	idx_a	idx_a	5	test.t2.a	1	100.00	Using where
+1	PRIMARY	t3	ref	idx_a	idx_a	5	test.t2.a	0	100.00	Using where
 1	PRIMARY	<derived2>	ref	key0	key0	128	test.t3.c	5	100.00	
 2	DERIVED	t4	ALL	idx_c	NULL	NULL	NULL	160	100.00	Using temporary; Using filesort
 Warnings:
@@ -20182,7 +20182,7 @@ EXPLAIN
           "used_key_parts": ["a"],
           "ref": ["test.t2.a"],
           "loops": 53.99999991,
-          "rows": 1,
+          "rows": 0,
           "cost": "COST_REPLACED",
           "filtered": 100,
           "attached_condition": "t3.c is not null"
@@ -20197,7 +20197,7 @@ EXPLAIN
           "key_length": "128",
           "used_key_parts": ["c"],
           "ref": ["test.t3.c"],
-          "loops": 80.99999987,
+          "loops": 52.89695991,
           "rows": 5,
           "cost": "COST_REPLACED",
           "filtered": 100,
@@ -20690,7 +20690,7 @@ from t2, t3, (select c, b, sum(b) over (partition by c) from t4 ) t
 where t2.b < 40 and t2.a=t3.a and t3.c=t.c;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	90	60.00	Using where
-1	PRIMARY	t3	ref	idx_a	idx_a	5	test.t2.a	1	100.00	Using where
+1	PRIMARY	t3	ref	idx_a	idx_a	5	test.t2.a	0	100.00	Using where
 1	PRIMARY	<derived2>	ref	key0	key0	128	test.t3.c	10	100.00	
 2	DERIVED	t4	ALL	idx_c	NULL	NULL	NULL	160	100.00	Using temporary
 Warnings:
@@ -20725,7 +20725,7 @@ EXPLAIN
           "used_key_parts": ["a"],
           "ref": ["test.t2.a"],
           "loops": 53.99999991,
-          "rows": 1,
+          "rows": 0,
           "cost": "COST_REPLACED",
           "filtered": 100,
           "attached_condition": "t3.c is not null"
@@ -20740,7 +20740,7 @@ EXPLAIN
           "key_length": "128",
           "used_key_parts": ["c"],
           "ref": ["test.t3.c"],
-          "loops": 80.99999987,
+          "loops": 52.89695991,
           "rows": 10,
           "cost": "COST_REPLACED",
           "filtered": 100,
diff --git a/mysql-test/main/join_cache.result b/mysql-test/main/join_cache.result
index aa12ac8942d39..ddd82d5ce3933 100644
--- a/mysql-test/main/join_cache.result
+++ b/mysql-test/main/join_cache.result
@@ -6365,7 +6365,7 @@ LEFT JOIN t3 ON t2.d = t3.c
 LEFT JOIN t4 ON t3.c=1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	10000	Using index
-1	SIMPLE	t2	ref	b	b	4	test.t1.b	1	
+1	SIMPLE	t2	ref	b	b	4	test.t1.b	0	
 1	SIMPLE	t3	eq_ref	PRIMARY	PRIMARY	4	test.t2.d	1	Using where; Using index
 1	SIMPLE	t4	index	NULL	PRIMARY	4	NULL	3000	Using where; Using index
 SELECT COUNT(*)
@@ -6383,7 +6383,7 @@ LEFT JOIN t3 ON t2.d = t3.c
 LEFT JOIN t4 ON t3.c=1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	10000	Using index
-1	SIMPLE	t2	ref	b	b	4	test.t1.b	1	
+1	SIMPLE	t2	ref	b	b	4	test.t1.b	0	
 1	SIMPLE	t3	eq_ref	PRIMARY	PRIMARY	4	test.t2.d	1	Using where; Using index
 1	SIMPLE	t4	index	NULL	PRIMARY	4	NULL	3000	Using where; Using index; Using join buffer (flat, BNL join)
 SELECT COUNT(*)
diff --git a/mysql-test/main/match_probability.result b/mysql-test/main/match_probability.result
new file mode 100644
index 0000000000000..f2d2bea1286cd
--- /dev/null
+++ b/mysql-test/main/match_probability.result
@@ -0,0 +1,148 @@
+# Setup: Two tables with skewed foreign key distribution
+CREATE TABLE t_ndv100 (a INT);
+CREATE TABLE t_ndv3 (a INT, b VARCHAR(50), KEY idx_a (a));
+# All values in t_ndv100.a are different (100 distinct values)
+INSERT INTO t_ndv100 (a) SELECT seq FROM seq_1_to_100;
+# There are only 3 distinct values in t_ndv3.a
+INSERT INTO t_ndv3 (a, b) SELECT seq/100, 'def' FROM seq_1_to_300;
+set optimizer_trace=1;
+# Inefficient plan until EITS is collected (full scan of t_ndv3)
+EXPLAIN SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	
+1	SIMPLE	t_ndv3	ALL	idx_a	NULL	NULL	NULL	300	Using where; Using join buffer (flat, BNL join)
+# Collect statistics only for the driving table (t_ndv100)
+ANALYZE TABLE t_ndv100 PERSISTENT FOR ALL;
+Table	Op	Msg_type	Msg_text
+test.t_ndv100	analyze	status	Engine-independent statistics collected
+test.t_ndv100	analyze	status	OK
+# It is not enough until there are statistics for the inner table (t_ndv3)
+EXPLAIN SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	
+1	SIMPLE	t_ndv3	ALL	idx_a	NULL	NULL	NULL	300	Using where; Using join buffer (flat, BNL join)
+ANALYZE TABLE t_ndv3 PERSISTENT FOR ALL;
+Table	Op	Msg_type	Msg_text
+test.t_ndv3	analyze	status	Engine-independent statistics collected
+test.t_ndv3	analyze	status	Table is already up to date
+# After EITS is collected for both tables, the plan is efficient
+# (index scan of t_ndv3). "rows" in the output are close to actual "r_rows"
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	100.00	100.00	100.00	Using where
+1	SIMPLE	t_ndv3	ref	idx_a	idx_a	5	test.t_ndv100.a	3	2.51	100.00	100.00	
+# Optimization must be reflected in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+[
+    0.04,
+    75
+]
+# Match probability is not applicable because the left part of the
+# join condition is not a field but an expression (t_ndv100.a + 10). See the
+# difference between "rows" and "r_rows"
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a + 10 = t_ndv3.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	100.00	100.00	100.00	
+1	SIMPLE	t_ndv3	ALL	idx_a	NULL	NULL	NULL	300	300.00	25.00	0.00	Using where; Using join buffer (flat, BNL join)
+# Must be no records in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+NULL
+# If all values are NULL, match probability is not applicable
+CREATE TABLE t_nulls (a INT, b VARCHAR(50), KEY idx_a (a));
+INSERT INTO t_nulls (a, b) SELECT NULL, 'def' FROM seq_1_to_300;
+ANALYZE TABLE t_nulls PERSISTENT FOR ALL;
+Table	Op	Msg_type	Msg_text
+test.t_nulls	analyze	status	Engine-independent statistics collected
+test.t_nulls	analyze	status	Table is already up to date
+EXPLAIN SELECT t_ndv100.a, t_nulls.b
+FROM t_ndv100
+STRAIGHT_JOIN t_nulls ON t_ndv100.a = t_nulls.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	Using where
+1	SIMPLE	t_nulls	ref	idx_a	idx_a	5	test.t_ndv100.a	1	
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+NULL
+#------------------------------
+# Test multi-part index
+#------------------------------
+CREATE TABLE t_ndv30 (a INT, b INT, c VARCHAR(50), KEY idx_ab (a, b));
+INSERT INTO t_ndv30 (a, b, c) SELECT seq/10, seq/10, 'def' FROM seq_1_to_300;
+ANALYZE TABLE t_ndv30 PERSISTENT FOR ALL;
+Table	Op	Msg_type	Msg_text
+test.t_ndv30	analyze	status	Engine-independent statistics collected
+test.t_ndv30	analyze	status	Table is already up to date
+# If more than one key part is used, match probability is not applicable.
+# See the mismatch between "rows" and "r_rows" in ANALYZE output
+ANALYZE SELECT t_ndv100.a, t_ndv30.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv30 ON t_ndv100.a = t_ndv30.a 
+AND t_ndv100.a = t_ndv30.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	100.00	100.00	100.00	Using where
+1	SIMPLE	t_ndv30	ref	idx_ab	idx_ab	10	test.t_ndv100.a,test.t_ndv100.a	9	2.96	100.00	100.00	Using index
+# Must be no records in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+NULL
+# Only first key part of `idx_ab` is used, match probability is applicable.
+# "rows" is now much closer to actual "r_rows".
+ANALYZE SELECT t_ndv100.a, t_ndv30.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv30 ON t_ndv100.a = t_ndv30.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	100.00	100.00	100.00	Using where
+1	SIMPLE	t_ndv30	ref	idx_ab	idx_ab	5	test.t_ndv100.a	3	2.96	100.00	100.00	Using index
+# Must be reflected in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+[
+    0.31000062,
+    9.6774
+]
+# Three tables with equality propagation.
+# min_driving_ndv = min(NDV(t_ndv30.a), NDV(t_ndv100.a)) = min(30, 100) = 30
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv30
+STRAIGHT_JOIN t_ndv100 ON t_ndv30.a = t_ndv100.a
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t_ndv30	index	idx_ab	idx_ab	10	NULL	300	300.00	100.00	100.00	Using where; Using index
+1	SIMPLE	t_ndv100	ALL	NULL	NULL	NULL	NULL	100	100.00	100.00	0.99	Using where; Using join buffer (flat, BNL join)
+1	SIMPLE	t_ndv3	ref	idx_a	idx_a	5	test.t_ndv30.a	9	8.48	100.00	100.00	
+# Must be reflected in the trace (match_prob ~= 0.1)
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.match_probability',
+'$**.rows_before_adjustment'))
+[
+    0.129032,
+    75
+]
+DROP TABLE t_ndv100, t_ndv3, t_ndv30, t_nulls;
diff --git a/mysql-test/main/match_probability.test b/mysql-test/main/match_probability.test
new file mode 100644
index 0000000000000..7890a5dc99eaf
--- /dev/null
+++ b/mysql-test/main/match_probability.test
@@ -0,0 +1,113 @@
+--source include/have_sequence.inc
+
+--echo # Setup: Two tables with skewed foreign key distribution
+CREATE TABLE t_ndv100 (a INT);
+
+CREATE TABLE t_ndv3 (a INT, b VARCHAR(50), KEY idx_a (a));
+
+--echo # All values in t_ndv100.a are different (100 distinct values)
+INSERT INTO t_ndv100 (a) SELECT seq FROM seq_1_to_100;
+
+--echo # There are only 3 distinct values in t_ndv3.a
+INSERT INTO t_ndv3 (a, b) SELECT seq/100, 'def' FROM seq_1_to_300;
+
+set optimizer_trace=1;
+
+--echo # Inefficient plan until EITS is collected (full scan of t_ndv3)
+EXPLAIN SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+
+--echo # Collect statistics only for the driving table (t_ndv100)
+ANALYZE TABLE t_ndv100 PERSISTENT FOR ALL;
+
+--echo # It is not enough until there are statistics for the inner table (t_ndv3)
+EXPLAIN SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+
+ANALYZE TABLE t_ndv3 PERSISTENT FOR ALL;
+
+--echo # After EITS is collected for both tables, the plan is efficient
+--echo # (index scan of t_ndv3). "rows" in the output are close to actual "r_rows"
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+
+--echo # Optimization must be reflected in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+--echo # Match probability is not applicable because the left part of the
+--echo # join condition is not a field but an expression (t_ndv100.a + 10). See the
+--echo # difference between "rows" and "r_rows"
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a + 10 = t_ndv3.a;
+
+--echo # Must be no records in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+--echo # If all values are NULL, match probability is not applicable
+CREATE TABLE t_nulls (a INT, b VARCHAR(50), KEY idx_a (a));
+
+INSERT INTO t_nulls (a, b) SELECT NULL, 'def' FROM seq_1_to_300;
+
+ANALYZE TABLE t_nulls PERSISTENT FOR ALL;
+
+EXPLAIN SELECT t_ndv100.a, t_nulls.b
+FROM t_ndv100
+STRAIGHT_JOIN t_nulls ON t_ndv100.a = t_nulls.a;
+
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+--echo #------------------------------
+--echo # Test multi-part index
+--echo #------------------------------
+CREATE TABLE t_ndv30 (a INT, b INT, c VARCHAR(50), KEY idx_ab (a, b));
+
+INSERT INTO t_ndv30 (a, b, c) SELECT seq/10, seq/10, 'def' FROM seq_1_to_300;
+
+ANALYZE TABLE t_ndv30 PERSISTENT FOR ALL;
+
+--echo # If more than one key part is used, match probability is not applicable.
+--echo # See the mismatch between "rows" and "r_rows" in ANALYZE output
+ANALYZE SELECT t_ndv100.a, t_ndv30.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv30 ON t_ndv100.a = t_ndv30.a 
+  AND t_ndv100.a = t_ndv30.b;
+
+--echo # Must be no records in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+--echo # Only first key part of `idx_ab` is used, match probability is applicable.
+--echo # "rows" is now much closer to actual "r_rows".
+ANALYZE SELECT t_ndv100.a, t_ndv30.b
+FROM t_ndv100
+STRAIGHT_JOIN t_ndv30 ON t_ndv100.a = t_ndv30.a;
+
+--echo # Must be reflected in the trace
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+--echo # Three tables with equality propagation.
+--echo # min_driving_ndv = min(NDV(t_ndv30.a), NDV(t_ndv100.a)) = min(30, 100) = 30
+ANALYZE SELECT t_ndv100.a, t_ndv3.b
+FROM t_ndv30
+STRAIGHT_JOIN t_ndv100 ON t_ndv30.a = t_ndv100.a
+STRAIGHT_JOIN t_ndv3 ON t_ndv100.a = t_ndv3.a;
+
+--echo # Must be reflected in the trace (match_prob ~= 0.1)
+SELECT json_detailed(json_extract(trace, '$**.match_probability',
+                                         '$**.rows_before_adjustment'))
+FROM information_schema.optimizer_trace;
+
+DROP TABLE t_ndv100, t_ndv3, t_ndv30, t_nulls;
\ No newline at end of file
diff --git a/mysql-test/main/null_key.result b/mysql-test/main/null_key.result
index 0b1446fd72e1c..9bc20427487b1 100644
--- a/mysql-test/main/null_key.result
+++ b/mysql-test/main/null_key.result
@@ -421,7 +421,7 @@ EXPLAIN SELECT SQL_CALC_FOUND_ROWS * FROM t1 LEFT JOIN t2 ON t1.a=t2.a
 LEFT JOIN t3 ON t2.b=t3.b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	4	
-1	SIMPLE	t2	ref	idx	idx	5	test.t1.a	1	Using where
+1	SIMPLE	t2	ref	idx	idx	5	test.t1.a	0	Using where
 1	SIMPLE	t3	ref	idx	idx	5	test.t2.b	1	Using where; Using index
 FLUSH STATUS ;
 SELECT SQL_CALC_FOUND_ROWS * FROM t1 LEFT JOIN t2 ON t1.a=t2.a
diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result
index ef2d2e85b8681..b8f5da7537f1a 100644
--- a/mysql-test/main/selectivity.result
+++ b/mysql-test/main/selectivity.result
@@ -325,13 +325,13 @@ and o_orderkey = l_orderkey
 group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
 order by o_totalprice desc, o_orderdate;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	orders	ALL	PRIMARY,i_o_custkey	NULL	NULL	NULL	1500	100.00	Using where; Using temporary; Using filesort
-1	PRIMARY	customer	eq_ref	PRIMARY	PRIMARY	4	dbt3_s001.orders.o_custkey	1	100.00	
+1	PRIMARY	customer	ALL	PRIMARY	NULL	NULL	NULL	150	100.00	Using temporary; Using filesort
+1	PRIMARY	orders	ref	PRIMARY,i_o_custkey	i_o_custkey	5	dbt3_s001.customer.c_custkey	10	100.00	
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	dbt3_s001.orders.o_orderkey	1	100.00	
 1	PRIMARY	lineitem	ref	PRIMARY,i_l_orderkey,i_l_orderkey_quantity	i_l_orderkey_quantity	4	dbt3_s001.orders.o_orderkey	4	100.00	Using index
 2	MATERIALIZED	lineitem	index	NULL	i_l_orderkey_quantity	13	NULL	6005	100.00	Using index
 Warnings:
-Note	1003	/* select#1 */ select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from  <materialize> (/* select#2 */ select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where `dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey` and `<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` and `dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
+Note	1003	/* select#1 */ select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from  <materialize> (/* select#2 */ select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where `dbt3_s001`.`orders`.`o_custkey` = `dbt3_s001`.`customer`.`c_custkey` and `<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` and `dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
 select 
 c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
 from customer, orders, lineitem
@@ -359,13 +359,13 @@ and o_orderkey = l_orderkey
 group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
 order by o_totalprice desc, o_orderdate;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	orders	ALL	PRIMARY,i_o_custkey	NULL	NULL	NULL	1500	100.00	Using where; Using temporary; Using filesort
-1	PRIMARY	customer	eq_ref	PRIMARY	PRIMARY	4	dbt3_s001.orders.o_custkey	1	100.00	
+1	PRIMARY	customer	ALL	PRIMARY	NULL	NULL	NULL	150	100.00	Using temporary; Using filesort
+1	PRIMARY	orders	ref	PRIMARY,i_o_custkey	i_o_custkey	5	dbt3_s001.customer.c_custkey	10	100.00	
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	dbt3_s001.orders.o_orderkey	1	100.00	
 1	PRIMARY	lineitem	ref	PRIMARY,i_l_orderkey,i_l_orderkey_quantity	i_l_orderkey_quantity	4	dbt3_s001.orders.o_orderkey	4	100.00	Using index
 2	MATERIALIZED	lineitem	index	NULL	i_l_orderkey_quantity	13	NULL	6005	100.00	Using index
 Warnings:
-Note	1003	/* select#1 */ select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from  <materialize> (/* select#2 */ select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where `dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey` and `<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` and `dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
+Note	1003	/* select#1 */ select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from  <materialize> (/* select#2 */ select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where `dbt3_s001`.`orders`.`o_custkey` = `dbt3_s001`.`customer`.`c_custkey` and `<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` and `dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey` group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
 select 
 c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
 from customer, orders, lineitem
@@ -1294,7 +1294,7 @@ select * from t1, t2, t1 as t3
 where t1.b=t2.c and t2.d=t3.a and t3.b<5 and t1.a < 2000;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	262144	100.00	Using where
-1	SIMPLE	t2	ref	c,d	c	5	test.t1.b	5	100.00	
+1	SIMPLE	t2	ref	c,d	c	5	test.t1.b	2	100.00	
 1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	262144	100.00	Using where; Using join buffer (flat, BNL join)
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b` from `test`.`t1` join `test`.`t2` join `test`.`t1` `t3` where `test`.`t2`.`c` = `test`.`t1`.`b` and `test`.`t3`.`a` = `test`.`t2`.`d` and `test`.`t3`.`b` < 5 and `test`.`t1`.`a` < 2000
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index b986e59656e08..ba88584ac098d 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7297,6 +7297,26 @@ add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field,
   keyuse.cond_guard= key_field->cond_guard;
   keyuse.sj_pred_no= key_field->sj_pred_no;
   keyuse.validity_ref= 0;
+
+  /* Compute driving_ndv from EITS statistics if val is a field */
+  keyuse.driving_ndv= 0;
+  {
+    Item *driving_item= key_field->val->real_item();
+    if (driving_item->type() == Item::FIELD_ITEM)
+    {
+      Field *driving_field= ((Item_field*) driving_item)->field;
+      if (driving_field->read_stats)
+      {
+        double avg_freq= driving_field->read_stats->get_avg_frequency();
+        if (avg_freq > 0)
+        {
+          double rows= (double) driving_field->table->stat_records();
+          keyuse.driving_ndv= MY_MIN(rows / avg_freq, rows);
+        }
+      }
+    }
+  }
+
   return (insert_dynamic(keyuse_array,(uchar*) &keyuse));
 }
 
@@ -7450,6 +7470,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
   keyuse.sj_pred_no= UINT_MAX;
   keyuse.validity_ref= 0;
   keyuse.null_rejecting= FALSE;
+  keyuse.driving_ndv= 0;  /* Not applicable for fulltext keys */
   return insert_dynamic(keyuse_array,(uchar*) &keyuse);
 }
 
@@ -8222,6 +8243,52 @@ inline double use_found_constraint(double records)
 }
 
 
+/*
+  Compute the probability that a value from the driving table will find
+  a match in the inner table's index, based on NDV (number of distinct values).
+
+  @param  inner_table     The inner (looked-up) table
+  @param  key             The key being used for ref access
+  @param  min_driving_ndv Minimum NDV across all usable driving columns
+                          (precomputed during KEYUSE iteration)
+
+  @return match_probability in range (0, 1.0]
+          1.0 if statistics are not available
+
+  The idea: if the driving table has more distinct values than the inner
+  table's key, only a fraction of driving rows will find matches.
+  
+  match_probability = min(1.0, ndv(inner.key) / ndv(outer.col))
+  
+  where ndv = table_rows / avg_frequency
+*/
+
+static double
+get_ref_match_probability(TABLE *inner_table, uint key, double min_driving_ndv)
+{
+  /* Get NDV for inner table's key (first key part) */
+  KEY *keyinfo= inner_table->key_info + key;
+  Field *inner_field= keyinfo->key_part[0].field;
+  if (!inner_field->read_stats)
+    return 1.0;
+
+  double inner_avg_freq= inner_field->read_stats->get_avg_frequency();
+  if (inner_avg_freq <= 0)
+    return 1.0;
+
+  double inner_rows= (double) inner_table->stat_records();
+  double inner_ndv= inner_rows / inner_avg_freq;
+
+  /*
+    match_probability = min(1.0, ndv(inner) / ndv(outer))
+    
+    If inner has fewer distinct values than outer, only a fraction
+    of outer rows will find a match.
+  */
+  return MY_MIN(1.0, inner_ndv / min_driving_ndv);
+}
+
+
 /*
   Calculate the cost of reading a set of rows trough an index
 
@@ -8801,6 +8868,8 @@ best_access_path(JOIN      *join,
       DBUG_PRINT("info", ("Considering ref access on key %s",
                           keyuse->table->key_info[keyuse->key].name.str));
 
+      double min_driving_ndv= DBL_MAX;  /* Track min driving NDV for keypart 0 */
+
       do /* For each keypart */
       {
         uint keypart= keyuse->keypart;
@@ -8844,6 +8913,10 @@ best_access_path(JOIN      *join,
               best_part_found_ref= (keyuse->used_tables &
                                     ~join->const_table_map);
             }
+            /* Track minimum driving NDV for keypart 0 */
+            if (keypart == 0 && keyuse->driving_ndv > 0)
+              set_if_smaller(min_driving_ndv, keyuse->driving_ndv);
+
             if (rec > keyuse->ref_table_rows)
               rec= keyuse->ref_table_rows;
 	    /*
@@ -9059,6 +9132,36 @@ best_access_path(JOIN      *join,
                 }
               }
             }
+
+            /*
+              Apply match_probability based on NDV to adjust records.
+              If the driving table has more distinct values than the inner
+              table's key, only a fraction of driving rows will find matches.
+            */
+            if (key_parts == 1 &&
+                min_driving_ndv > 0 && min_driving_ndv < DBL_MAX)
+            {
+              /*
+                Bound effective NDV by record_count - NDV can't exceed the
+                number of rows from driving tables after filtering.
+              */
+              double effective_driving_ndv= min_driving_ndv;
+              set_if_smaller(effective_driving_ndv, record_count);
+
+              double match_prob= get_ref_match_probability(table, key,
+                                                           effective_driving_ndv);
+              if (match_prob < 1.0)
+              {
+                if (unlikely(trace_access_idx.trace_started()))
+                {
+                  trace_access_idx.
+                    add("match_probability", match_prob).
+                    add("rows_before_adjustment", records);
+                }
+                records *= match_prob;
+              }
+            }
+
             /* Calculate the cost of the index access */
             tmp= cost_for_index_read(thd, table, key,
                                      (ha_rows) records, 0);
@@ -9270,6 +9373,36 @@ best_access_path(JOIN      *join,
             }
 
             set_if_smaller(records, (double) s->records);
+
+            /*
+              Apply match_probability based on NDV to adjust records.
+              If the driving table has more distinct values than the inner
+              table's key, only a fraction of driving rows will find matches.
+            */
+            if (max_key_part == 1 &&
+                min_driving_ndv > 0 && min_driving_ndv < DBL_MAX)
+            {
+              /*
+                Bound effective NDV by record_count - NDV can't exceed the
+                number of rows from driving tables after filtering.
+              */
+              double effective_driving_ndv= min_driving_ndv;
+              set_if_smaller(effective_driving_ndv, record_count);
+
+              double match_prob= get_ref_match_probability(table, key,
+                                                           effective_driving_ndv);
+              if (match_prob < 1.0)
+              {
+                if (unlikely(trace_access_idx.trace_started()))
+                {
+                  trace_access_idx.
+                    add("match_probability", match_prob).
+                    add("rows_before_adjustment", records);
+                }
+                records *= match_prob;
+              }
+            }
+
             tmp= cost_for_index_read(thd, table, key, (ha_rows)records, 0);
             tmp.copy_cost+= extra_cost;
           }
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 82e975407edda..06d92b76e0d07 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -81,6 +81,13 @@ typedef struct keyuse_t {
   */
   bool *validity_ref;
 
+  /*
+    Number of distinct values in the driving column (val).
+    Computed from EITS statistics when KEYUSE is created.
+    0 means statistics not available.
+  */
+  double driving_ndv;
+
   bool is_for_hash_join() { return is_hash_join_key_no(key); }
 } KEYUSE;
 
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 24736007f083c..52d5bf97177d9 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -705,5 +705,7 @@ void print_keyuse_array_for_trace(THD *thd, DYNAMIC_ARRAY *keyuse_array)
            .field->field_name.str)).
       add("equals",keyuse->val).
       add("null_rejecting",keyuse->null_rejecting);
+    if (keyuse->driving_ndv > 0)
+      keyuse_elem.add("driving_ndv", keyuse->driving_ndv);
   }
 }