From a370dc7c9eafdc1c347a2a43461e4b38cc7f8612 Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Thu, 25 Sep 2025 10:59:55 -0700 Subject: [PATCH] [pr] changes based on feedback Files: * lib.rs: Fix Java placeholder regex, it was consuming too much stuff in between the curlies. * source_ref.rs: Add comments for the regexes. Don't reuse `text` in `build_matcher()`, it's really confusing. --- src/lib.rs | 8 +-- .../log2src__tests__basic_python.snap | 6 +-- src/source_ref.rs | 51 +++++++++++++++---- 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5aa5804..67861ac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -279,7 +279,7 @@ static RUST_PLACEHOLDER_REGEX: LazyLock = LazyLock::new(|| { }); static JAVA_PLACEHOLDER_REGEX: LazyLock = - LazyLock::new(|| Regex::new(r#"\{.*}|\\\{(.*)}"#).unwrap()); + LazyLock::new(|| Regex::new(r#"\{[^}]*}|\\\{([^}]*)}"#).unwrap()); static CPP_PLACEHOLDER_REGEX: LazyLock = LazyLock::new(|| { Regex::new(r#"%[-+ #0]*\d*(?:\.\d+)?[hlLzjt]*[diuoxXfFeEgGaAcspn%]|\{(?:([a-zA-Z_][a-zA-Z0-9_.]*)|(\d+))?\s*(?::[^}]*)?}"#).unwrap() @@ -338,7 +338,7 @@ impl SourceLanguage { arguments: [ (argument_list (template_expression template_argument: (string_literal) @arguments)) - (argument_list (string_literal) @arguments) + (argument_list . (string_literal) @arguments) ] (#match? @object-name "log(ger)?|LOG(GER)?") (#match? @method-name "fine|debug|info|warn|trace") @@ -586,7 +586,7 @@ pub fn extract_logging_guarded(sources: &[CodeSource], guard: &WorkGuard) -> Vec let range = result.range; let source = code.buffer.as_str(); let text = source[range.start_byte..range.end_byte].to_string(); - // println!("text={} matched.len()={}", text, matched.len()); + // eprintln!("text={} matched.len()={}", text, matched.len()); // check the text doesn't match any of the logging related identifiers if code .info @@ -921,7 +921,7 @@ def main(args): logging.info(f'Hello, {args[1]}!') logger.warning(f"warning message:\nlow disk space") logger.info(rf"""info message: -processing started -- {args[0]}""") +processing \started -- {args[0]}""") "#; #[test] diff --git a/src/snapshots/log2src__tests__basic_python.snap b/src/snapshots/log2src__tests__basic_python.snap index bfe8ba0..ad6b625 100644 --- a/src/snapshots/log2src__tests__basic_python.snap +++ b/src/snapshots/log2src__tests__basic_python.snap @@ -44,9 +44,9 @@ expression: src_refs endLineNumber: 7 column: 16 name: main - text: "rf\"\"\"info message:\nprocessing started -- {args[0]}\"\"\"" - quality: 33 - pattern: "(?s)^info message:\\nprocessing started -- (.+)$" + text: "rf\"\"\"info message:\nprocessing \\started -- {args[0]}\"\"\"" + quality: 35 + pattern: "(?s)^info message:\\nprocessing \\\\started -- (.+)$" args: - Named: "args[0]" vars: [] diff --git a/src/source_ref.rs b/src/source_ref.rs index 1e75b9a..f3bd546 100644 --- a/src/source_ref.rs +++ b/src/source_ref.rs @@ -119,19 +119,19 @@ fn build_matcher(raw: bool, text: &str, language: SourceLanguage) -> Option Option = LazyLock::new(|| { Regex::new(r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\[0-7]{3}|\\0)|(\\N\{[^}]+})"#).unwrap() }); -// A regex for raw strings that doesn't try to interpret escape sequences. -static RAW_ESCAPE_REGEX: LazyLock = LazyLock::new(|| { - Regex::new(r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\)"#).unwrap() -}); +/// Regex for finding values that need to be escaped in a raw string-literal. The components are +/// as follows: +/// +/// * `[.*+?^${}()|\[\]]` - Characters that are used in regexes and need to be escaped. +/// * `[\n\r\t]` - White space characters that we should turn into regex escape sequences. +/// * `\\` - A backslash +static RAW_ESCAPE_REGEX: LazyLock = + LazyLock::new(|| Regex::new(r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\)"#).unwrap()); /// Escape special chars except newlines and carriage returns in order to support multiline strings fn escape_ignore_newlines(raw: bool, segment: &str) -> String { @@ -181,7 +194,7 @@ fn escape_ignore_newlines(raw: bool, segment: &str) -> String { } else if let Some(c) = cap.get(3) { if raw { result.push('\\'); - result.push_str(c.as_str()); + result.push_str(c.as_str()); } else { let c = c.as_str(); let c = &c[1..]; @@ -303,4 +316,20 @@ mod tests { matcher.as_str() ); } + + #[test] + fn test_build_matcher_raw() { + let MessageMatcher { matcher, .. } = build_matcher( + true, + "Hard-coded \\Windows\\Path", + SourceLanguage::Rust, + ) + .unwrap(); + assert_eq!( + Regex::new(r#"(?s)^Hard-coded \\Windows\\Path$"#) + .unwrap() + .as_str(), + matcher.as_str() + ); + } }