From c6e6e633edfa423a5f4f9f5f2c1f25cec02e200c Mon Sep 17 00:00:00 2001 From: Abanoub Doss Date: Thu, 21 May 2026 09:01:13 -0500 Subject: [PATCH] =?UTF-8?q?test:=20repro=20for=20ABA-32=20=E2=80=94=20LIKE?= =?UTF-8?q?=20ESCAPE=20unrepresentable=20in=20Vortex;=20DataFusion=20escap?= =?UTF-8?q?e=20silently=20dropped?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two #[ignore]'d regression tests that demonstrate the gap described in https://linear.app/abanoubdoss/issue/ABA-32: - vortex-array: `issue_aba32_like_options_supports_escape_char` — asserts `LikeOptions` exposes an `escape_char` field; fails on develop because the struct only has `negated` and `case_insensitive`. - vortex-datafusion: `issue_aba32_datafusion_conversion_preserves_like_escape_char` — builds a DataFusion `LikeExpr`, converts it through the Vortex path, and asserts the resulting `LikeOptions` carries escape information; fails on develop for the same reason. Both tests are REPRO-ONLY. The fix requires adding `escape_char: Option` to `LikeOptions`, extending the proto schema, and threading the field through the kernel dispatch (including the FSST fast-path bailout already added by PR #8038, which inspects the pattern string for `\` rather than a `LikeOptions` field). Co-authored-by: Claude Sonnet 4.6 Signed-off-by: abnobdoss@proton.me Signed-off-by: Abanoub Doss --- vortex-array/src/scalar_fn/fns/like/mod.rs | 25 ++++++++++++ vortex-datafusion/src/convert/exprs.rs | 44 ++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/vortex-array/src/scalar_fn/fns/like/mod.rs b/vortex-array/src/scalar_fn/fns/like/mod.rs index 2e908ca17bd..18322a79778 100644 --- a/vortex-array/src/scalar_fn/fns/like/mod.rs +++ b/vortex-array/src/scalar_fn/fns/like/mod.rs @@ -364,4 +364,29 @@ mod tests { ); assert_eq!(None, ilike(col("a"), lit("a")).stat_falsification(&catalog)); } + + /// [ABA-32] `LikeOptions` has no `escape_char` field, so SQL LIKE … ESCAPE is + /// unrepresentable in Vortex's expression layer. + /// + /// The fix requires adding `escape_char: Option` (or equivalent) to + /// `LikeOptions` and threading it through serialization, the kernel, and every + /// encoding fast-path. + /// + /// This test fails on develop because `format!("{:?}", opts)` produces a string + /// that does NOT contain "escape_char", proving the field is absent. + #[ignore = "demonstrates ABA-32; see https://linear.app/abanoubdoss/issue/ABA-32"] + #[test] + fn issue_aba32_like_options_supports_escape_char() { + use crate::scalar_fn::fns::like::LikeOptions; + + let opts = LikeOptions::default(); + let debug_repr = format!("{opts:?}"); + // After the fix `LikeOptions` will have an `escape_char` field; its Debug + // output will contain the substring "escape_char". On develop the struct + // only has `negated` and `case_insensitive`, so this assertion fails. + assert!( + debug_repr.contains("escape_char"), + "LikeOptions is missing `escape_char` field (ABA-32): got {debug_repr:?}" + ); + } } diff --git a/vortex-datafusion/src/convert/exprs.rs b/vortex-datafusion/src/convert/exprs.rs index 6a9ac0fdc52..e973f30845c 100644 --- a/vortex-datafusion/src/convert/exprs.rs +++ b/vortex-datafusion/src/convert/exprs.rs @@ -1005,4 +1005,48 @@ mod tests { assert_eq!(df_as_arrow, vec![0, 0, 50, 100, 100]); assert_eq!(vortex_as_arrow, df_as_arrow); } + + /// [ABA-32] The DataFusion→Vortex expression conversion silently drops the SQL + /// LIKE ESCAPE character. + /// + /// DataFusion's physical [`LikeExpr`] does not carry `escape_char` (the logical + /// layer drops it before producing the physical plan), and Vortex's `LikeOptions` + /// has no `escape_char` field either. Even for the default `\` escape, there is + /// no API surface to express or round-trip escape semantics. + /// + /// The fix requires: + /// 1. Adding `escape_char: Option` to `LikeOptions` (vortex-array). + /// 2. Threading it through the proto serialization (vortex-proto). + /// 3. Plumbing it through this conversion once DataFusion exposes the field on + /// the physical `LikeExpr`. + /// + /// This test fails on develop because the converted `LikeOptions` debug output + /// does NOT contain "escape_char", proving the schema gap is present. + #[ignore = "demonstrates ABA-32; see https://linear.app/abanoubdoss/issue/ABA-32"] + #[test] + fn issue_aba32_datafusion_conversion_preserves_like_escape_char() { + let expr = Arc::new(df_expr::Column::new("text_col", 0)) as Arc; + let pattern = Arc::new(df_expr::Literal::new(ScalarValue::Utf8(Some( + "test%".to_string(), + )))) as Arc; + // DataFusion's physical LikeExpr has no escape_char parameter; we construct + // the simplest case (negated=false, case_insensitive=false) and verify + // whether the Vortex conversion at least exposes an escape_char field on + // the resulting LikeOptions. + let like_expr = df_expr::LikeExpr::new(false, false, expr, pattern); + + let result = DefaultExpressionConvertor::default() + .convert(&like_expr) + .unwrap(); + let opts = result.as_::(); + let debug_repr = format!("{opts:?}"); + + // After the fix `LikeOptions` will contain an `escape_char` field and this + // string will appear in the debug output. On develop the field is absent. + assert!( + debug_repr.contains("escape_char"), + "LikeOptions converted from DataFusion is missing `escape_char` (ABA-32): \ + got {debug_repr:?}" + ); + } }