Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions encodings/fsst/src/dfa/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,13 @@ enum LikeKind<'a> {

impl<'a> LikeKind<'a> {
fn parse(pattern: &'a [u8]) -> Option<Self> {
// The fast-path matchers below do not understand SQL LIKE escape sequences (e.g. `\%`
// matching a literal `%`). If the pattern contains a backslash we fall back to the
// general implementation, which correctly interprets escapes.
if pattern.contains(&b'\\') {
return None;
}

// `prefix%` (including just `%` where prefix is empty)
if let Some(prefix) = pattern.strip_suffix(b"%")
&& !prefix.contains(&b'%')
Expand Down
10 changes: 10 additions & 0 deletions encodings/fsst/src/dfa/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@ fn test_like_kind_parse() {
// Suffix and underscore patterns are not supported.
assert!(LikeKind::parse(b"%suffix").is_none());
assert!(LikeKind::parse(b"a_c").is_none());

// Patterns containing the SQL LIKE escape character must not be parsed by the fast path,
// because that path treats `%` and `_` literally and would misinterpret escapes. For
// example, `%\%` (the pattern produced by Spark's `endsWith("%")`) means "ends with `%`",
// not "contains `\`". The fast path should bail so the general implementation handles it.
assert!(LikeKind::parse(br"%\%").is_none());
assert!(LikeKind::parse(br"\%%").is_none());
assert!(LikeKind::parse(br"%\_%").is_none());
assert!(LikeKind::parse(br"\_%").is_none());
assert!(LikeKind::parse(br"%\\%").is_none());
}

/// No symbols — all bytes escaped. Simplest case to see the two tables.
Expand Down
Loading