Skip to content

Commit f28fe42

Browse files
authored
[stacktrace] pull file/line/name out of stack traces (#19)
The previous change just pulled out the entire stack trace from the log message. This change processes the stack traces to pull out the file/line/name data. Note: I haven't standardized an order for the call sites in a trace, Java goes one way and Python goes the other. Sigh... Files: * debugAdapter.ts: Add exceptionTrace field to LogMapping. * lib.rs: The find_source_file_statements() can now match multiple source files since find_file() can now match multiple files. Add exception_trace to LogMapping and to_exception_trace() to StackTrace. * main.rs: Change the meaning of --count to be the number of messages since we don't necessarily know how many lines are in a message. * source_hier.rs: Since the Java stack trace code can only figure out the package and class name. The find_file() method has been changed to allow searching the tree for a partial path. For example, searching for "org/example/Main.java" will walk the entire tree looking for a match of that path. It will then return all possible matches of that path.
1 parent e771767 commit f28fe42

File tree

6 files changed

+209
-37
lines changed

6 files changed

+209
-37
lines changed

editors/code/src/debugAdapter.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,15 @@ import * as path from 'path';
1919

2020
import { outputChannel } from './extension';
2121

22+
interface CallSite {
23+
name: string,
24+
sourcePath: string,
25+
lineNumber: number
26+
}
2227

2328
interface LogMapping {
2429
srcRef: SourceRef,
30+
exceptionTrace: Array<CallSite>,
2531
variables: Array<VariablePair>
2632
}
2733

src/lib.rs

Lines changed: 126 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ mod source_ref;
2323
// TODO: doesn't need to be exposed if we can clean up the arguments to do_mapping
2424
use crate::progress::WorkGuard;
2525
use crate::source_hier::{ScanEvent, SourceFileID, SourceHierContent, SourceHierTree};
26-
use crate::source_ref::FormatArgument;
26+
use crate::source_ref::{CallSite, FormatArgument};
2727
pub use code_source::CodeSource;
2828
pub use log_format::LogFormat;
2929
pub use progress::ProgressTracker;
@@ -142,15 +142,16 @@ impl LogMatcher {
142142
.next()
143143
}
144144

145-
pub fn find_source_file_statements(&self, path: &Path) -> Option<&StatementsInFile> {
146-
if let Some((_root_path, src_tree)) = self.match_path(path) {
147-
src_tree
148-
.tree
149-
.find_file(path)
150-
.and_then(|info| src_tree.files_with_statements.get(&info.id))
151-
} else {
152-
None
153-
}
145+
pub fn find_source_file_statements(&self, path: &Path) -> Vec<&StatementsInFile> {
146+
self.roots
147+
.values()
148+
.flat_map(|root| {
149+
root.tree
150+
.find_file(path)
151+
.into_iter()
152+
.filter_map(|(_actual_path, info)| root.files_with_statements.get(&info.id))
153+
})
154+
.collect()
154155
}
155156

156157
/// Traverse the roots looking for supported source files.
@@ -256,11 +257,22 @@ impl LogMatcher {
256257
.sorted_by(|lhs, rhs| rhs.quality.cmp(&lhs.quality))
257258
.next()
258259
{
260+
let exception_trace = match log_ref {
261+
LogRef {
262+
details:
263+
Some(LogDetails {
264+
trace: Some(trace), ..
265+
}),
266+
..
267+
} => trace.to_exception_trace(self),
268+
_ => Vec::new(),
269+
};
259270
let variables = extract_variables(log_ref, src_ref);
260271
return Some(LogMapping {
261272
log_ref: log_ref.clone(),
262273
src_ref: Some((*src_ref).clone()),
263274
variables,
275+
exception_trace,
264276
});
265277
}
266278
}
@@ -319,7 +331,7 @@ static BACKTRACE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
319331
# Match all stack frames
320332
(?:
321333
# File line: ' File "path", line N, in function'
322-
^\s{2}File\s+\"[^\"]*\",\s+line\s+\d+,\s+in\s+\S+\s*$\n?
334+
^\s{2}File\s+"[^"]*",\s+line\s+\d+,\s+in\s+\S+\s*$\n?
323335
324336
# Code line (optional): ' code_here'
325337
(?:^\s{4}.*$\n?)?
@@ -411,7 +423,7 @@ impl SourceLanguage {
411423
}
412424
SourceLanguage::Java => {
413425
r#"
414-
(method_invocation
426+
(method_invocation
415427
object: (identifier) @object-name
416428
name: (identifier) @method-name
417429
arguments: [
@@ -504,6 +516,9 @@ pub struct LogMapping<'a> {
504516
pub log_ref: LogRef<'a>,
505517
#[serde(rename(serialize = "srcRef"))]
506518
pub src_ref: Option<SourceRef>,
519+
#[serde(skip_serializing_if = "Vec::is_empty")]
520+
#[serde(rename(serialize = "exceptionTrace"))]
521+
pub exception_trace: Vec<CallSite>,
507522
pub variables: Vec<VariablePair>,
508523
}
509524

@@ -526,12 +541,88 @@ fn is_only_body(details: &Option<LogDetails>) -> bool {
526541
}
527542
}
528543

544+
static PYTHON_CALLER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
545+
Regex::new(
546+
r#"(?smx)
547+
(?:
548+
^\s+File\s+"(?<path>[^"]+)",\s+line\s+(?<line>\d+),\s+in\s+(?<name>[^\n]+)$\n?
549+
)
550+
"#,
551+
)
552+
.unwrap()
553+
});
554+
555+
static JAVA_CALLER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
556+
Regex::new(
557+
r#"(?smx)
558+
(?:
559+
^\s+at\s+(?<pkg>(?:[^.\n(]+\.)*)(?<class>[^.$\n(]+)\.(?<name>\S+)\((?<file>[^:]+):(?<line>\d+)\)\s*$\n?
560+
)
561+
"#,
562+
)
563+
.unwrap()
564+
});
565+
529566
#[derive(Copy, Clone, Debug, PartialEq, Serialize)]
530567
pub struct StackTrace<'a> {
531568
pub language: SourceLanguage,
532569
pub content: &'a str,
533570
}
534571

572+
impl<'a> StackTrace<'a> {
573+
fn to_exception_trace(&self, log_matcher: &LogMatcher) -> Vec<CallSite> {
574+
let mut retval = Vec::new();
575+
match self.language {
576+
SourceLanguage::Rust => {}
577+
SourceLanguage::Java => {
578+
for cap in JAVA_CALLER_REGEX.captures_iter(self.content) {
579+
// The Java stack trace does not contain the full path to the source file.
580+
// So, we need to construct a path from the package and class name. Then,
581+
// we use SourceHierTree::find_file() to find the actual path.
582+
let path_for_pkg = cap
583+
.name("pkg")
584+
.map(|m| PathBuf::from(m.as_str().replace(".", "/")))
585+
.unwrap_or_default();
586+
let path_for_class = path_for_pkg.join(cap.name("file").unwrap().as_str());
587+
let full_path = log_matcher
588+
.roots
589+
.values()
590+
.filter_map(|root| {
591+
if let Some((actual_path, _source_info)) =
592+
root.tree.find_file(&path_for_class).iter().next()
593+
{
594+
Some(actual_path.clone())
595+
} else {
596+
None
597+
}
598+
})
599+
.next();
600+
if let Some(full_path) = full_path {
601+
retval.push(CallSite {
602+
name: cap.name("name").unwrap().as_str().to_string(),
603+
source_path: full_path.to_string_lossy().to_string(),
604+
language: SourceLanguage::Java,
605+
line_no: cap.name("line").unwrap().as_str().parse::<usize>().unwrap(),
606+
});
607+
}
608+
}
609+
}
610+
SourceLanguage::Cpp => {}
611+
SourceLanguage::Python => {
612+
for cap in PYTHON_CALLER_REGEX.captures_iter(self.content) {
613+
retval.push(CallSite {
614+
name: cap.name("name").unwrap().as_str().to_string(),
615+
source_path: cap.name("path").unwrap().as_str().to_string(),
616+
language: SourceLanguage::Python,
617+
line_no: cap.name("line").unwrap().as_str().parse::<usize>().unwrap(),
618+
});
619+
}
620+
}
621+
}
622+
retval
623+
}
624+
}
625+
535626
#[derive(Copy, Clone, Debug, PartialEq, Serialize, Default)]
536627
pub struct LogDetails<'a> {
537628
#[serde(skip_serializing_if = "Option::is_none")]
@@ -1088,4 +1179,27 @@ java.lang.IllegalStateException: simulated failure for demo
10881179
let vars = extract_variables(&log_ref, &src_refs[0]);
10891180
assert_yaml_snapshot!(vars);
10901181
}
1182+
1183+
const PYTHON_TRACE: &str = r#"\
1184+
Traceback (most recent call last):
1185+
File "python-logging-example/python_logging_example/__main__.py", line 26, in main
1186+
helper.fail_now()
1187+
~~~~~~~~~~~~~~~^^
1188+
File "python-logging-example/python_logging_example/helper.py", line 3, in fail_now
1189+
return 1 / 0
1190+
~~^~~
1191+
ZeroDivisionError: division by zero
1192+
"#;
1193+
1194+
#[test]
1195+
fn test_python_trace() {
1196+
let stacktrace = StackTrace {
1197+
language: SourceLanguage::Python,
1198+
content: PYTHON_TRACE,
1199+
};
1200+
1201+
let log_matcher = LogMatcher::new();
1202+
let trace = stacktrace.to_exception_trace(&log_matcher);
1203+
assert_yaml_snapshot!(trace);
1204+
}
10911205
}

src/main.rs

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ struct Cli {
3333
#[arg(short, long, value_name = "START")]
3434
start: Option<usize>,
3535

36-
/// The number of lines to process
36+
/// The number of log messages to process
3737
#[arg(short, long, value_name = "COUNT")]
3838
count: Option<usize>,
3939

@@ -54,15 +54,17 @@ struct MessageAccumulator {
5454
log_format: Option<LogFormat>,
5555
content: String,
5656
message_count: usize,
57+
limit: usize,
5758
}
5859

5960
impl MessageAccumulator {
60-
fn new(log_matcher: LogMatcher, log_format: Option<LogFormat>) -> Self {
61+
fn new(log_matcher: LogMatcher, log_format: Option<LogFormat>, limit: usize) -> Self {
6162
Self {
6263
log_matcher,
6364
log_format,
6465
content: String::new(),
6566
message_count: 0,
67+
limit,
6668
}
6769
}
6870

@@ -73,6 +75,7 @@ impl MessageAccumulator {
7375
log_ref,
7476
src_ref: None,
7577
variables: vec![],
78+
exception_trace: vec![],
7679
})
7780
}
7881

@@ -130,11 +133,15 @@ impl MessageAccumulator {
130133
}
131134

132135
fn flush(&mut self) {
133-
if !self.content.is_empty() {
136+
if !self.content.is_empty() && !self.at_limit() {
134137
self.process_msg();
135138
}
136139
}
137140

141+
fn at_limit(&self) -> bool {
142+
self.message_count >= self.limit
143+
}
144+
138145
fn eof(mut self) -> miette::Result<()> {
139146
self.flush();
140147

@@ -255,13 +262,13 @@ fn main() -> miette::Result<()> {
255262
return Err(LogError::NoLogStatements.into());
256263
}
257264
let start = args.start.unwrap_or(0);
258-
let desired_line_range = start..start.saturating_add(args.count.unwrap_or(usize::MAX));
259-
let mut accumulator = MessageAccumulator::new(log_matcher, log_format);
265+
let count = args.count.unwrap_or(usize::MAX);
266+
let mut accumulator = MessageAccumulator::new(log_matcher, log_format, count);
260267

261268
let reader = BufReader::new(reader);
262-
for (lineno, line_res) in reader.lines().enumerate() {
263-
if !desired_line_range.contains(&lineno) {
264-
continue;
269+
for (lineno, line_res) in reader.lines().skip(start).enumerate() {
270+
if accumulator.at_limit() {
271+
break;
265272
}
266273
match line_res {
267274
Ok(line) => accumulator.consume_line(&line),
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
source: src/lib.rs
3+
expression: trace
4+
---
5+
- name: main
6+
sourcePath: python-logging-example/python_logging_example/__main__.py
7+
language: Python
8+
lineNumber: 26
9+
- name: fail_now
10+
sourcePath: python-logging-example/python_logging_example/helper.py
11+
language: Python
12+
lineNumber: 3

src/source_hier.rs

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -236,21 +236,31 @@ impl SourceHierContent {
236236
true
237237
}
238238

239-
pub fn find_file(&self, self_path: &Path, desired_path: &Path) -> Option<SourceFileInfo> {
239+
pub fn find_file(
240+
&self,
241+
self_path: &Path,
242+
desired_path: &Path,
243+
accum: &mut Vec<(PathBuf, SourceFileInfo)>,
244+
) {
240245
match self {
241-
SourceHierContent::File { info, .. } if desired_path == Path::new("") => Some(*info),
246+
SourceHierContent::File { info, .. } if desired_path == Path::new("") => {
247+
accum.push((self_path.to_path_buf(), *info));
248+
}
242249
SourceHierContent::Directory { ref entries } => {
243250
let mut components = desired_path.components();
244251
if let Some(Component::Normal(name)) = components.next() {
245252
if let Some(node) = entries.get(name) {
246-
return node
247-
.content
248-
.find_file(&self_path.join(name), components.as_path());
253+
node.content
254+
.find_file(&self_path.join(name), components.as_path(), accum);
255+
} else {
256+
for (name, entry) in entries {
257+
let sub_path = self_path.join(name);
258+
entry.content.find_file(&sub_path, desired_path, accum);
259+
}
249260
}
250261
}
251-
None
252262
}
253-
_ => None,
263+
_ => {}
254264
}
255265
}
256266
}
@@ -459,11 +469,20 @@ impl SourceHierTree {
459469
}
460470
}
461471

462-
pub fn find_file(&self, path: &Path) -> Option<SourceFileInfo> {
463-
match path.strip_prefix(&self.root_path) {
464-
Ok(sub_path) => self.root_node.content.find_file(&self.root_path, sub_path),
465-
Err(_) => None,
466-
}
472+
pub fn find_file(&self, path: &Path) -> Vec<(PathBuf, SourceFileInfo)> {
473+
let path_to_find = if path.is_absolute() {
474+
match path.strip_prefix(&self.root_path) {
475+
Ok(sub_path) => sub_path,
476+
Err(_) => return vec![],
477+
}
478+
} else {
479+
path
480+
};
481+
let mut retval = vec![];
482+
self.root_node
483+
.content
484+
.find_file(&self.root_path, path_to_find, &mut retval);
485+
retval
467486
}
468487

469488
/// Visit every node in the hierarchy, depth-first, calling `f` on each.
@@ -550,7 +569,8 @@ mod test {
550569
let temp_test_dir = setup_test_environment(&tests_path);
551570
let _ = fs::create_dir(temp_test_dir.path().join(".git")).unwrap();
552571
let _ = File::create_new(temp_test_dir.path().join(".git/config"))
553-
.unwrap().write("abc".as_bytes())
572+
.unwrap()
573+
.write("abc".as_bytes())
554574
.unwrap();
555575
let basic_path = temp_test_dir.path().join("tests/java/Basic.java");
556576
{
@@ -566,10 +586,13 @@ mod test {
566586
let find_res = tree.find_file(&basic_path);
567587
assert_eq!(
568588
find_res,
569-
Some(SourceFileInfo {
570-
language: SourceLanguage::Java,
571-
id: SourceFileID(1)
572-
})
589+
vec![(
590+
basic_path.clone(),
591+
SourceFileInfo {
592+
language: SourceLanguage::Java,
593+
id: SourceFileID(1)
594+
}
595+
)]
573596
);
574597
let no_events: Vec<ScanEvent> = tree.scan().map(redact_event).collect();
575598
assert_yaml_snapshot!(no_events);

0 commit comments

Comments
 (0)