From bc4141d954d53046b53ef5575dafbd1caa04fcf9 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 20 Mar 2026 15:47:17 -0700 Subject: [PATCH 1/3] fix: set escape_char when simplifying starts_with to LIKE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `starts_with` simplification escapes special LIKE characters (%, _, \) with backslash but did not set escape_char in the resulting LIKE expression. This caused the escaped characters to be treated as literals instead of escape sequences. For example, `starts_with(col, 'test_ns')` was simplified to: `col LIKE 'test\_ns%'` with `escape_char: None` This meant the `\_` was not recognized as an escaped underscore, causing incorrect query results when the prefix contained special LIKE pattern characters. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- datafusion/functions/src/string/starts_with.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs index e50bd9f657669..a840295d631aa 100644 --- a/datafusion/functions/src/string/starts_with.rs +++ b/datafusion/functions/src/string/starts_with.rs @@ -209,7 +209,7 @@ impl ScalarUDFImpl for StartsWithFunc { negated: false, expr: Box::new(expr), pattern: Box::new(pattern), - escape_char: None, + escape_char: Some('\\'), case_insensitive: false, }))); } From ab5d6f6915fd8eb143b4dfd1228b55c4400bb107 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 20 Mar 2026 16:23:24 -0700 Subject: [PATCH 2/3] fix tests --- datafusion/sqllogictest/test_files/string/string_view.slt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index 4639103f9e8e3..131cd5a29e91a 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -355,7 +355,7 @@ EXPLAIN SELECT FROM test; ---- logical_plan -01)Projection: test.column1_utf8view LIKE Utf8View("äöüß%") AS c1, CASE test.column1_utf8view IS NOT NULL WHEN Boolean(true) THEN Boolean(true) END AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4 +01)Projection: test.column1_utf8view LIKE Utf8View("äöüß%") ESCAPE '\' AS c1, CASE test.column1_utf8view IS NOT NULL WHEN Boolean(true) THEN Boolean(true) END AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4 02)--TableScan: test projection=[column1_utf8view] ## Test STARTS_WITH is rewitten to LIKE when the pattern is a constant @@ -370,7 +370,7 @@ EXPLAIN SELECT FROM test; ---- logical_plan -01)Projection: test.column1_utf8 LIKE Utf8("foo\%%") AS c1, test.column1_large_utf8 LIKE LargeUtf8("foo\%%") AS c2, test.column1_utf8view LIKE Utf8View("foo\%%") AS c3, test.column1_utf8 LIKE Utf8("f\_o%") AS c4, test.column1_large_utf8 LIKE LargeUtf8("f\_o%") AS c5, test.column1_utf8view LIKE Utf8View("f\_o%") AS c6 +01)Projection: test.column1_utf8 LIKE Utf8("foo\%%") ESCAPE '\' AS c1, test.column1_large_utf8 LIKE LargeUtf8("foo\%%") ESCAPE '\' AS c2, test.column1_utf8view LIKE Utf8View("foo\%%") ESCAPE '\' AS c3, test.column1_utf8 LIKE Utf8("f\_o%") ESCAPE '\' AS c4, test.column1_large_utf8 LIKE LargeUtf8("f\_o%") ESCAPE '\' AS c5, test.column1_utf8view LIKE Utf8View("f\_o%") ESCAPE '\' AS c6 02)--TableScan: test projection=[column1_utf8, column1_large_utf8, column1_utf8view] ## Test STARTS_WITH works with column arguments @@ -950,7 +950,7 @@ EXPLAIN SELECT FROM test; ---- logical_plan -01)Projection: test.column1_utf8view LIKE Utf8View("foo%") AS c, starts_with(test.column1_utf8view, test.column2_utf8view) AS c2 +01)Projection: test.column1_utf8view LIKE Utf8View("foo%") ESCAPE '\' AS c, starts_with(test.column1_utf8view, test.column2_utf8view) AS c2 02)--TableScan: test projection=[column1_utf8view, column2_utf8view] ## Ensure no casts for TRANSLATE From 8aa04aa1d6a2864909e5c858a5e1ad50f17d4f3e Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 20 Mar 2026 23:09:58 -0700 Subject: [PATCH 3/3] fix: update parquet.slt test to expect ESCAPE clause Update the expected output of the starts_with simplification test to include the ESCAPE '\' clause, which is now correctly set when simplifying starts_with to LIKE. Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude Co-Authored-By: Happy --- datafusion/sqllogictest/test_files/parquet.slt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt index be713b963b451..9b81e4bf7c886 100644 --- a/datafusion/sqllogictest/test_files/parquet.slt +++ b/datafusion/sqllogictest/test_files/parquet.slt @@ -666,8 +666,8 @@ query TT explain select * from foo where starts_with(column1, 'f'); ---- logical_plan -01)Filter: foo.column1 LIKE Utf8View("f%") -02)--TableScan: foo projection=[column1], partial_filters=[foo.column1 LIKE Utf8View("f%")] +01)Filter: foo.column1 LIKE Utf8View("f%") ESCAPE '\' +02)--TableScan: foo projection=[column1], partial_filters=[foo.column1 LIKE Utf8View("f%") ESCAPE '\'] physical_plan 01)FilterExec: column1@0 LIKE f% 02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1