From 4f3ef4c1a9bbce57810630e9d50ca9a567be567e Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Wed, 1 Oct 2025 22:04:07 -0700 Subject: [PATCH 1/2] [stacktrace] pull file/line/name out of stack traces The previous change just pulled out the entire stack trace from the log message. This change processes the stack traces to pull out the file/line/name data. Note: I haven't standardized an order for the call sites in a trace, Java goes one way and Python goes the other. Sigh... Files: * debugAdapter.ts: Add exceptionTrace field to LogMapping. * lib.rs: The find_source_file_statements() can now match multiple source files since find_file() can now match multiple files. Add exception_trace to LogMapping and to_exception_trace() to StackTrace. * main.rs: Change the meaning of --count to be the number of messages since we don't necessarily know how many lines are in a message. * source_hier.rs: Since the Java stack trace code can only figure out the package and class name. The find_file() method has been changed to allow searching the tree for a partial path. For example, searching for "org/example/Main.java" will walk the entire tree looking for a match of that path. It will then return all possible matches of that path. --- editors/code/src/debugAdapter.ts | 6 + src/lib.rs | 139 ++++++++++++++++-- src/main.rs | 23 ++- .../log2src__tests__python_trace.snap | 12 ++ src/source_hier.rs | 57 ++++--- src/source_ref.rs | 10 ++ 6 files changed, 210 insertions(+), 37 deletions(-) create mode 100644 src/snapshots/log2src__tests__python_trace.snap diff --git a/editors/code/src/debugAdapter.ts b/editors/code/src/debugAdapter.ts index 9fbb020..4434ebd 100644 --- a/editors/code/src/debugAdapter.ts +++ b/editors/code/src/debugAdapter.ts @@ -19,9 +19,15 @@ import * as path from 'path'; import { outputChannel } from './extension'; +interface CallSite { + name: string, + sourcePath: string, + lineNumber: number +} interface LogMapping { srcRef: SourceRef, + exceptionTrace: Array, variables: Array } diff --git a/src/lib.rs b/src/lib.rs index 5c63a7e..2dbbfd8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,7 +23,7 @@ mod source_ref; // TODO: doesn't need to be exposed if we can clean up the arguments to do_mapping use crate::progress::WorkGuard; use crate::source_hier::{ScanEvent, SourceFileID, SourceHierContent, SourceHierTree}; -use crate::source_ref::FormatArgument; +use crate::source_ref::{CallSite, FormatArgument}; pub use code_source::CodeSource; pub use log_format::LogFormat; pub use progress::ProgressTracker; @@ -142,15 +142,17 @@ impl LogMatcher { .next() } - pub fn find_source_file_statements(&self, path: &Path) -> Option<&StatementsInFile> { - if let Some((_root_path, src_tree)) = self.match_path(path) { - src_tree - .tree - .find_file(path) - .and_then(|info| src_tree.files_with_statements.get(&info.id)) - } else { - None - } + pub fn find_source_file_statements(&self, path: &Path) -> Vec<&StatementsInFile> { + let mut retval: Vec<&StatementsInFile> = Vec::new(); + self.roots.values().for_each(|root| { + retval.extend( + root.tree + .find_file(path) + .iter() + .filter_map(|(_actual_path, info)| root.files_with_statements.get(&info.id)), + ); + }); + retval } /// Traverse the roots looking for supported source files. @@ -256,11 +258,22 @@ impl LogMatcher { .sorted_by(|lhs, rhs| rhs.quality.cmp(&lhs.quality)) .next() { + let exception_trace = match log_ref { + LogRef { + details: + Some(LogDetails { + trace: Some(trace), .. + }), + .. + } => trace.to_exception_trace(self), + _ => Vec::new(), + }; let variables = extract_variables(log_ref, src_ref); return Some(LogMapping { log_ref: log_ref.clone(), src_ref: Some((*src_ref).clone()), variables, + exception_trace, }); } } @@ -319,7 +332,7 @@ static BACKTRACE_REGEX: LazyLock = LazyLock::new(|| { # Match all stack frames (?: # File line: ' File "path", line N, in function' - ^\s{2}File\s+\"[^\"]*\",\s+line\s+\d+,\s+in\s+\S+\s*$\n? + ^\s{2}File\s+"[^"]*",\s+line\s+\d+,\s+in\s+\S+\s*$\n? # Code line (optional): ' code_here' (?:^\s{4}.*$\n?)? @@ -411,7 +424,7 @@ impl SourceLanguage { } SourceLanguage::Java => { r#" - (method_invocation + (method_invocation object: (identifier) @object-name name: (identifier) @method-name arguments: [ @@ -504,6 +517,9 @@ pub struct LogMapping<'a> { pub log_ref: LogRef<'a>, #[serde(rename(serialize = "srcRef"))] pub src_ref: Option, + #[serde(skip_serializing_if = "Vec::is_empty")] + #[serde(rename(serialize = "exceptionTrace"))] + pub exception_trace: Vec, pub variables: Vec, } @@ -526,12 +542,88 @@ fn is_only_body(details: &Option) -> bool { } } +static PYTHON_CALLER_REGEX: LazyLock = LazyLock::new(|| { + Regex::new( + r#"(?smx) + (?: + ^\s+File\s+"(?[^"]+)",\s+line\s+(?\d+),\s+in\s+(?[^\n]+)$\n? + ) +"#, + ) + .unwrap() +}); + +static JAVA_CALLER_REGEX: LazyLock = LazyLock::new(|| { + Regex::new( + r#"(?smx) + (?: + ^\s+at\s+(?(?:[^.\n(]+\.)*)(?[^.$\n(]+)\.(?\S+)\((?[^:]+):(?\d+)\)\s*$\n? + ) +"#, + ) + .unwrap() +}); + #[derive(Copy, Clone, Debug, PartialEq, Serialize)] pub struct StackTrace<'a> { pub language: SourceLanguage, pub content: &'a str, } +impl<'a> StackTrace<'a> { + fn to_exception_trace(&self, log_matcher: &LogMatcher) -> Vec { + let mut retval = Vec::new(); + match self.language { + SourceLanguage::Rust => {} + SourceLanguage::Java => { + for cap in JAVA_CALLER_REGEX.captures_iter(self.content) { + // The Java stack trace does not contain the full path to the source file. + // So, we need to construct a path from the package and class name. Then, + // we use SourceHierTree::find_file() to find the actual path. + let path_for_pkg = cap + .name("pkg") + .map(|m| PathBuf::from(m.as_str().replace(".", "/"))) + .unwrap_or_default(); + let path_for_class = path_for_pkg.join(cap.name("file").unwrap().as_str()); + let full_path = log_matcher + .roots + .values() + .filter_map(|root| { + if let Some((actual_path, _source_info)) = + root.tree.find_file(&path_for_class).iter().next() + { + Some(actual_path.clone()) + } else { + None + } + }) + .next(); + if let Some(full_path) = full_path { + retval.push(CallSite { + name: cap.name("name").unwrap().as_str().to_string(), + source_path: full_path.to_string_lossy().to_string(), + language: SourceLanguage::Java, + line_no: cap.name("line").unwrap().as_str().parse::().unwrap(), + }); + } + } + } + SourceLanguage::Cpp => {} + SourceLanguage::Python => { + for cap in PYTHON_CALLER_REGEX.captures_iter(self.content) { + retval.push(CallSite { + name: cap.name("name").unwrap().as_str().to_string(), + source_path: cap.name("path").unwrap().as_str().to_string(), + language: SourceLanguage::Python, + line_no: cap.name("line").unwrap().as_str().parse::().unwrap(), + }); + } + } + } + retval + } +} + #[derive(Copy, Clone, Debug, PartialEq, Serialize, Default)] pub struct LogDetails<'a> { #[serde(skip_serializing_if = "Option::is_none")] @@ -1088,4 +1180,27 @@ java.lang.IllegalStateException: simulated failure for demo let vars = extract_variables(&log_ref, &src_refs[0]); assert_yaml_snapshot!(vars); } + + const PYTHON_TRACE: &str = r#"\ +Traceback (most recent call last): + File "python-logging-example/python_logging_example/__main__.py", line 26, in main + helper.fail_now() + ~~~~~~~~~~~~~~~^^ + File "python-logging-example/python_logging_example/helper.py", line 3, in fail_now + return 1 / 0 + ~~^~~ +ZeroDivisionError: division by zero +"#; + + #[test] + fn test_python_trace() { + let stacktrace = StackTrace { + language: SourceLanguage::Python, + content: PYTHON_TRACE, + }; + + let log_matcher = LogMatcher::new(); + let trace = stacktrace.to_exception_trace(&log_matcher); + assert_yaml_snapshot!(trace); + } } diff --git a/src/main.rs b/src/main.rs index dcd9c08..1ec372a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -33,7 +33,7 @@ struct Cli { #[arg(short, long, value_name = "START")] start: Option, - /// The number of lines to process + /// The number of log messages to process #[arg(short, long, value_name = "COUNT")] count: Option, @@ -54,15 +54,17 @@ struct MessageAccumulator { log_format: Option, content: String, message_count: usize, + limit: usize, } impl MessageAccumulator { - fn new(log_matcher: LogMatcher, log_format: Option) -> Self { + fn new(log_matcher: LogMatcher, log_format: Option, limit: usize) -> Self { Self { log_matcher, log_format, content: String::new(), message_count: 0, + limit, } } @@ -73,6 +75,7 @@ impl MessageAccumulator { log_ref, src_ref: None, variables: vec![], + exception_trace: vec![], }) } @@ -130,11 +133,15 @@ impl MessageAccumulator { } fn flush(&mut self) { - if !self.content.is_empty() { + if !self.content.is_empty() && !self.at_limit() { self.process_msg(); } } + fn at_limit(&self) -> bool { + self.message_count >= self.limit + } + fn eof(mut self) -> miette::Result<()> { self.flush(); @@ -255,13 +262,13 @@ fn main() -> miette::Result<()> { return Err(LogError::NoLogStatements.into()); } let start = args.start.unwrap_or(0); - let desired_line_range = start..start.saturating_add(args.count.unwrap_or(usize::MAX)); - let mut accumulator = MessageAccumulator::new(log_matcher, log_format); + let count = args.count.unwrap_or(usize::MAX); + let mut accumulator = MessageAccumulator::new(log_matcher, log_format, count); let reader = BufReader::new(reader); - for (lineno, line_res) in reader.lines().enumerate() { - if !desired_line_range.contains(&lineno) { - continue; + for (lineno, line_res) in reader.lines().skip(start).enumerate() { + if accumulator.at_limit() { + break; } match line_res { Ok(line) => accumulator.consume_line(&line), diff --git a/src/snapshots/log2src__tests__python_trace.snap b/src/snapshots/log2src__tests__python_trace.snap new file mode 100644 index 0000000..f593607 --- /dev/null +++ b/src/snapshots/log2src__tests__python_trace.snap @@ -0,0 +1,12 @@ +--- +source: src/lib.rs +expression: trace +--- +- name: main + sourcePath: python-logging-example/python_logging_example/__main__.py + language: Python + lineNumber: 26 +- name: fail_now + sourcePath: python-logging-example/python_logging_example/helper.py + language: Python + lineNumber: 3 diff --git a/src/source_hier.rs b/src/source_hier.rs index 694dfdf..e37f9b0 100644 --- a/src/source_hier.rs +++ b/src/source_hier.rs @@ -236,21 +236,31 @@ impl SourceHierContent { true } - pub fn find_file(&self, self_path: &Path, desired_path: &Path) -> Option { + pub fn find_file( + &self, + self_path: &Path, + desired_path: &Path, + accum: &mut Vec<(PathBuf, SourceFileInfo)>, + ) { match self { - SourceHierContent::File { info, .. } if desired_path == Path::new("") => Some(*info), + SourceHierContent::File { info, .. } if desired_path == Path::new("") => { + accum.push((self_path.to_path_buf(), *info)); + } SourceHierContent::Directory { ref entries } => { let mut components = desired_path.components(); if let Some(Component::Normal(name)) = components.next() { if let Some(node) = entries.get(name) { - return node - .content - .find_file(&self_path.join(name), components.as_path()); + node.content + .find_file(&self_path.join(name), components.as_path(), accum); + } else { + for (name, entry) in entries { + let sub_path = self_path.join(name); + entry.content.find_file(&sub_path, desired_path, accum); + } } } - None } - _ => None, + _ => {} } } } @@ -459,11 +469,20 @@ impl SourceHierTree { } } - pub fn find_file(&self, path: &Path) -> Option { - match path.strip_prefix(&self.root_path) { - Ok(sub_path) => self.root_node.content.find_file(&self.root_path, sub_path), - Err(_) => None, - } + pub fn find_file(&self, path: &Path) -> Vec<(PathBuf, SourceFileInfo)> { + let path_to_find = if path.is_absolute() { + match path.strip_prefix(&self.root_path) { + Ok(sub_path) => sub_path, + Err(_) => return vec![], + } + } else { + path + }; + let mut retval = vec![]; + self.root_node + .content + .find_file(&self.root_path, path_to_find, &mut retval); + retval } /// Visit every node in the hierarchy, depth-first, calling `f` on each. @@ -550,7 +569,8 @@ mod test { let temp_test_dir = setup_test_environment(&tests_path); let _ = fs::create_dir(temp_test_dir.path().join(".git")).unwrap(); let _ = File::create_new(temp_test_dir.path().join(".git/config")) - .unwrap().write("abc".as_bytes()) + .unwrap() + .write("abc".as_bytes()) .unwrap(); let basic_path = temp_test_dir.path().join("tests/java/Basic.java"); { @@ -566,10 +586,13 @@ mod test { let find_res = tree.find_file(&basic_path); assert_eq!( find_res, - Some(SourceFileInfo { - language: SourceLanguage::Java, - id: SourceFileID(1) - }) + vec![( + basic_path.clone(), + SourceFileInfo { + language: SourceLanguage::Java, + id: SourceFileID(1) + } + )] ); let no_events: Vec = tree.scan().map(redact_event).collect(); assert_yaml_snapshot!(no_events); diff --git a/src/source_ref.rs b/src/source_ref.rs index f3bd546..292e157 100644 --- a/src/source_ref.rs +++ b/src/source_ref.rs @@ -11,6 +11,16 @@ pub enum FormatArgument { Placeholder, } +#[derive(Clone, Debug, Serialize)] +pub struct CallSite { + pub name: String, + #[serde(rename(serialize = "sourcePath"))] + pub source_path: String, + pub language: SourceLanguage, + #[serde(rename(serialize = "lineNumber"))] + pub line_no: usize, +} + // TODO: get rid of this clone? #[derive(Clone, Debug, Serialize)] pub struct SourceRef { From c65fe18382ecfa74302df3a1290135565a9e4f84 Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Thu, 2 Oct 2025 09:13:47 -0700 Subject: [PATCH 2/2] [nit] use flat_map() instead of extending an existing Vec This was bugging me... missed needing to use into_iter() for the result of find_file() --- src/lib.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2dbbfd8..57d656f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -143,16 +143,15 @@ impl LogMatcher { } pub fn find_source_file_statements(&self, path: &Path) -> Vec<&StatementsInFile> { - let mut retval: Vec<&StatementsInFile> = Vec::new(); - self.roots.values().for_each(|root| { - retval.extend( + self.roots + .values() + .flat_map(|root| { root.tree .find_file(path) - .iter() - .filter_map(|(_actual_path, info)| root.files_with_statements.get(&info.id)), - ); - }); - retval + .into_iter() + .filter_map(|(_actual_path, info)| root.files_with_statements.get(&info.id)) + }) + .collect() } /// Traverse the roots looking for supported source files.