Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ static RUST_PLACEHOLDER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
});

static JAVA_PLACEHOLDER_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"\{.*}|\\\{(.*)}"#).unwrap());
LazyLock::new(|| Regex::new(r#"\{[^}]*}|\\\{([^}]*)}"#).unwrap());

static CPP_PLACEHOLDER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"%[-+ #0]*\d*(?:\.\d+)?[hlLzjt]*[diuoxXfFeEgGaAcspn%]|\{(?:([a-zA-Z_][a-zA-Z0-9_.]*)|(\d+))?\s*(?::[^}]*)?}"#).unwrap()
Expand Down Expand Up @@ -338,7 +338,7 @@ impl SourceLanguage {
arguments: [
(argument_list (template_expression
template_argument: (string_literal) @arguments))
(argument_list (string_literal) @arguments)
(argument_list . (string_literal) @arguments)
]
(#match? @object-name "log(ger)?|LOG(GER)?")
(#match? @method-name "fine|debug|info|warn|trace")
Expand Down Expand Up @@ -586,7 +586,7 @@ pub fn extract_logging_guarded(sources: &[CodeSource], guard: &WorkGuard) -> Vec
let range = result.range;
let source = code.buffer.as_str();
let text = source[range.start_byte..range.end_byte].to_string();
// println!("text={} matched.len()={}", text, matched.len());
// eprintln!("text={} matched.len()={}", text, matched.len());
// check the text doesn't match any of the logging related identifiers
if code
.info
Expand Down Expand Up @@ -921,7 +921,7 @@ def main(args):
logging.info(f'Hello, {args[1]}!')
logger.warning(f"warning message:\nlow disk space")
logger.info(rf"""info message:
processing started -- {args[0]}""")
processing \started -- {args[0]}""")
"#;

#[test]
Expand Down
6 changes: 3 additions & 3 deletions src/snapshots/log2src__tests__basic_python.snap
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ expression: src_refs
endLineNumber: 7
column: 16
name: main
text: "rf\"\"\"info message:\nprocessing started -- {args[0]}\"\"\""
quality: 33
pattern: "(?s)^info message:\\nprocessing started -- (.+)$"
text: "rf\"\"\"info message:\nprocessing \\started -- {args[0]}\"\"\""
quality: 35
pattern: "(?s)^info message:\\nprocessing \\\\started -- (.+)$"
args:
- Named: "args[0]"
vars: []
51 changes: 40 additions & 11 deletions src/source_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,19 @@ fn build_matcher(raw: bool, text: &str, language: SourceLanguage) -> Option<Mess
let mut quality = 0;
for cap in language.get_placeholder_regex().captures_iter(text) {
let placeholder = cap.get(0).unwrap();
let text = escape_ignore_newlines(raw, &text[last_end..placeholder.start()]);
quality += text.chars().filter(|c| !c.is_whitespace()).count();
pattern.push_str(text.as_str());
let subtext = escape_ignore_newlines(raw, &text[last_end..placeholder.start()]);
quality += subtext.chars().filter(|c| !c.is_whitespace()).count();
pattern.push_str(subtext.as_str());
last_end = placeholder.end();
pattern.push_str("(.+)");
args.push(language.captures_to_format_arg(&cap));
}
let text = escape_ignore_newlines(raw, &text[last_end..]);
quality += text.chars().filter(|c| !c.is_whitespace()).count();
let subtext = escape_ignore_newlines(raw, &text[last_end..]);
quality += subtext.chars().filter(|c| !c.is_whitespace()).count();
if quality == 0 {
None
} else {
pattern.push_str(text.as_str());
pattern.push_str(subtext.as_str());
pattern.push('$');
Some(MessageMatcher {
matcher: Regex::new(pattern.as_str()).unwrap(),
Expand All @@ -142,14 +142,27 @@ fn build_matcher(raw: bool, text: &str, language: SourceLanguage) -> Option<Mess
}
}

/// Regex for finding values that need to be escaped in a string-literal. The components are
/// as follows:
///
/// * `[.*+?^${}()|\[\]]` - Characters that are used in regexes and need to be escaped.
/// * `[\n\r\t]` - White space characters that we should turn into regex escape sequences.
/// * `\\[0-7]{3}|\\0` - Regex does not support octal escape-sequences, so we need to turn
/// them into a hex escape.
/// * `\\N\{[^}]+}` - Python named-Unicode escape that is turned into a `\w` since it would be
/// challenging to get the names all right.
static ESCAPE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\[0-7]{3}|\\0)|(\\N\{[^}]+})"#).unwrap()
});

// A regex for raw strings that doesn't try to interpret escape sequences.
static RAW_ESCAPE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\)"#).unwrap()
});
/// Regex for finding values that need to be escaped in a raw string-literal. The components are
/// as follows:
///
/// * `[.*+?^${}()|\[\]]` - Characters that are used in regexes and need to be escaped.
/// * `[\n\r\t]` - White space characters that we should turn into regex escape sequences.
/// * `\\` - A backslash
static RAW_ESCAPE_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\)"#).unwrap());

/// Escape special chars except newlines and carriage returns in order to support multiline strings
fn escape_ignore_newlines(raw: bool, segment: &str) -> String {
Expand Down Expand Up @@ -181,7 +194,7 @@ fn escape_ignore_newlines(raw: bool, segment: &str) -> String {
} else if let Some(c) = cap.get(3) {
if raw {
result.push('\\');
result.push_str(c.as_str());
result.push_str(c.as_str());
} else {
let c = c.as_str();
let c = &c[1..];
Expand Down Expand Up @@ -303,4 +316,20 @@ mod tests {
matcher.as_str()
);
}

#[test]
fn test_build_matcher_raw() {
let MessageMatcher { matcher, .. } = build_matcher(
true,
"Hard-coded \\Windows\\Path",
SourceLanguage::Rust,
)
.unwrap();
assert_eq!(
Regex::new(r#"(?s)^Hard-coded \\Windows\\Path$"#)
.unwrap()
.as_str(),
matcher.as_str()
);
}
}
Loading