From 61e447bb41e89515b3f8deb7504668ba8e84251c Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Tue, 7 Oct 2025 11:22:36 -0700 Subject: [PATCH 1/7] [caching] save/load source hierarchy and extracted log statements Scanning the source hierarchy and extracting log statements is pretty costly, so we need to cache what we found between runs. This change writes a separate file for every source root. The file contains a header that is a JSON-line with interesting metadata that a user might find helpful. The body of the file is a bincode serialization of the SourceTree object. The CLI will try to load from the cache on startup, check for any changes in the source tree, and then update the cache, if necessary. I was originally looking at using SQLite for the cache, but it's hard to write blobs of unknown size to a table. This approach seems to work since the header can easily be read separately from the main content to determine if the file is valid or not. I also tried using the postcard serializer, but had trouble getting it to work. The bincode one seems to just work. Files: * Cargo.toml: Add some more dependencies: - bincode for serialization - directories for finding the user's platform-specific cache directory - serde_regex for serializing/deserializing Regex - sha2 for producing a hash used for the cache file names - tempfile for creating the cache file - walkdir for walking the cache dir in the tests * cache-header-v1.json: The schema for the cache entry header * lib.rs: Add cache stuff * main.rs: Add a footer to the help to mention where the cache is. Load the statements from the cache and save for future use. * source_hier.rs: Derive `Deserialize` on various structs. * common_settings/mod.rs: Move helper module to its own directory. * source_ref.rs: Serialize/deserialize the Regex directly so the deserialize can fail if the regex string is invalid. Add separate `pattern_str` to cache the string version of the pattern. --- Cargo.lock | 221 +++++++++++- Cargo.toml | 8 +- schemas/cache-header-v1.json | 34 ++ src/lib.rs | 317 ++++++++++++++++-- src/main.rs | 57 +++- ...source_hier__test__with_resources_dir.snap | 2 +- src/source_hier.rs | 18 +- src/source_ref.rs | 19 +- tests/common_settings.rs | 7 - tests/common_settings/mod.rs | 67 ++++ tests/snapshots/test_java__basic_slf4j-2.snap | 23 ++ tests/snapshots/test_java__basic_slf4j-3.snap | 26 ++ .../test_java__basic_with_log_format-2.snap | 25 ++ .../test_rust__invalid_source_path.snap | 8 +- tests/test_java.rs | 92 +++-- tests/test_rust.rs | 18 +- 16 files changed, 840 insertions(+), 102 deletions(-) create mode 100644 schemas/cache-header-v1.json delete mode 100644 tests/common_settings.rs create mode 100644 tests/common_settings/mod.rs create mode 100644 tests/snapshots/test_java__basic_slf4j-2.snap create mode 100644 tests/snapshots/test_java__basic_slf4j-3.snap create mode 100644 tests/snapshots/test_java__basic_with_log_format-2.snap diff --git a/Cargo.lock b/Cargo.lock index 0083ea9..50c9aa4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,12 +114,41 @@ dependencies = [ "backtrace", ] +[[package]] +name = "bincode" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" +dependencies = [ + "bincode_derive", + "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +dependencies = [ + "virtue", +] + [[package]] name = "bitflags" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bstr" version = "1.9.1" @@ -234,6 +263,15 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -259,12 +297,53 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "difflib" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "directories" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f5094c54661b38d03bd7e50df373292118db60b585c08a411c6d840017fe7d" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.60.2", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -334,6 +413,27 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.3.2" @@ -343,7 +443,7 @@ dependencies = [ "cfg-if", "libc", "r-efi", - "wasi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -472,6 +572,16 @@ version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +[[package]] +name = "libredox" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" +dependencies = [ + "bitflags", + "libc", +] + [[package]] name = "linux-raw-sys" version = "0.9.4" @@ -489,9 +599,11 @@ name = "log2src" version = "0.1.0" dependencies = [ "assert_cmd", + "bincode", "cc", "clap", "colored_json", + "directories", "env_logger", "fs_extra", "indicatif", @@ -505,6 +617,8 @@ dependencies = [ "regex", "serde", "serde_json", + "serde_regex", + "sha2", "tempfile", "thiserror", "tree-sitter", @@ -512,6 +626,7 @@ dependencies = [ "tree-sitter-java", "tree-sitter-python", "tree-sitter-rust-orchard", + "walkdir", ] [[package]] @@ -575,6 +690,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "owo-colors" version = "4.2.2" @@ -680,7 +801,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom", + "getrandom 0.3.2", ] [[package]] @@ -703,6 +824,17 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror", +] + [[package]] name = "regex" version = "1.11.1" @@ -757,6 +889,15 @@ version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "serde" version = "1.0.219" @@ -790,6 +931,27 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_regex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8136f1a4ea815d7eac4101cfd0b16dc0cb5e1fe1b8609dfd728058656b7badf" +dependencies = [ + "regex", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -848,12 +1010,12 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.22.0" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84fa4d11fadde498443cca10fd3ac23c951f0dc59e080e9f4b93d4df4e4eea53" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom", + "getrandom 0.3.2", "once_cell", "rustix", "windows-sys 0.60.2", @@ -965,6 +1127,12 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -995,12 +1163,30 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" +[[package]] +name = "unty" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" + [[package]] name = "utf8parse" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "virtue" +version = "0.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" + [[package]] name = "wait-timeout" version = "0.2.0" @@ -1010,6 +1196,22 @@ dependencies = [ "libc", ] +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + [[package]] name = "wasi" version = "0.14.2+wasi-0.2.4" @@ -1087,6 +1289,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.60.2", +] + [[package]] name = "windows-link" version = "0.1.3" diff --git a/Cargo.toml b/Cargo.toml index 35d1ded..cb015b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,13 +6,18 @@ version = "0.1.0" edition = "2021" [dependencies] -clap = { version = "4.5.36", features = ["derive"] } +bincode = { version = "2.0.1", features = ["serde", "std"]} +clap = { version = "4.5.36", features = ["std", "derive"] } colored_json = "5.0.0" +directories = "6.0.0" indicatif = "0.18.0" itertools = "0.14.0" regex = "1.11.1" serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.140" +serde_regex = "1.1.0" +sha2 = "0.10.9" +tempfile = "3.23.0" thiserror = "2.0.16" tree-sitter = "0.25.3" tree-sitter-cpp = "0.23.4" @@ -34,3 +39,4 @@ log = "0.4.27" rand = "0.9.0" tempfile = "3.22.0" fs_extra = "1.3" +walkdir = "2.5.0" diff --git a/schemas/cache-header-v1.json b/schemas/cache-header-v1.json new file mode 100644 index 0000000..492ea7f --- /dev/null +++ b/schemas/cache-header-v1.json @@ -0,0 +1,34 @@ +{ + "$schema": "https://json-schema.org/draft-07/schema", + "title": "log2src cache entry header v1", + "description": "Schema for the header of a log2src cache entry.", + "type": "object", + "additionalProperties": false, + "properties": { + "$schema": { + "type": "string", + "format": "uri" + }, + "revision": { + "type": "string" + }, + "format": { + "type": "string", + "enum": [ + "Bincode" + ] + }, + "path": { + "type": "string" + }, + "timestamp": { + "type": "integer" + } + }, + "required": [ + "$schema", + "format", + "path", + "timestamp" + ] +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 57d656f..d9a573c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,22 @@ +use directories::ProjectDirs; +use indicatif::HumanBytes; use itertools::Itertools; use miette::Diagnostic; use rayon::prelude::*; use regex::{Captures, Regex, RegexSet}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; use std::collections::HashMap; +use std::error::Error; use std::ffi::OsStr; use std::fs::File; -use std::io; +use std::io::{BufRead, BufReader, Seek, Write}; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::{Arc, LazyLock}; +use std::time::{SystemTime, UNIX_EPOCH}; +use std::{fs, io}; +use tempfile::NamedTempFile; use thiserror::Error; use tree_sitter::Language; @@ -33,8 +40,11 @@ use source_query::QueryResult; pub use source_query::SourceQuery; pub use source_ref::SourceRef; -#[derive(Error, Debug, Diagnostic, Clone)] +#[derive(Error, Debug, Diagnostic, Clone, Default)] pub enum LogError { + #[default] + #[error("unknown error")] + Unknown, #[error("unable to read line {line}")] UnableToReadLine { line: usize, source: Arc }, #[error("invalid log format regular expression")] @@ -78,10 +88,89 @@ pub enum LogError { #[error("no log messages found in input")] #[diagnostic(help("Make sure the log format matches the input"))] NoLogMessages, + #[error("failed to find user cache directory")] + #[diagnostic(severity(warning))] + CannotFindCache, + #[error("failed to create cache directory \"{path}\"")] + #[diagnostic(severity(warning))] + CannotCreateCache { + path: PathBuf, + source: Arc, + }, + #[error("failed to write cache file")] + #[diagnostic(severity(warning))] + FailedToWriteCache { + source: Arc, + }, + #[error("outdated cache file \"{path}\"")] + #[diagnostic(severity(info))] + OldCacheEntry { path: PathBuf }, + #[error("failed to read cache file \"{path}\"")] + #[diagnostic(severity(warning))] + FailedToReadCache { + path: PathBuf, + source: Arc, + }, +} + +/// Handle for the source tree cache +pub struct Cache { + pub location: PathBuf, +} + +impl Cache { + /// Try to get a handle on the cache in the user's default location. + pub fn open() -> Result { + // XXX we don't own log2src.org + let project_dirs = + ProjectDirs::from("org", "log2src", "log2src").ok_or(LogError::CannotFindCache {})?; + let location = project_dirs.cache_dir().to_path_buf(); + Ok(Cache { location }) + } +} + +#[derive(Serialize, Deserialize, Debug)] +pub enum CacheEntrySchema { + #[serde( + rename = "https://raw.githubusercontent.com/ttiimm/log2src/refs/heads/main/schemas/cache-header-v1.json" + )] + V1, +} + +/// The revision value is a simple way to invalidate the cache entries by changing the number. +#[derive(Serialize, Deserialize, Debug)] +pub enum Revision { + #[serde(rename = "1")] + Current, +} + +#[derive(Serialize, Deserialize, Debug)] +pub enum CacheEntryFormat { + Bincode, +} + +/// Header for an entry in the cache. Currently, this is more of interest to humans than machines. +#[derive(Serialize, Deserialize, Debug)] +pub struct CacheEntryHeader { + #[serde(rename = "$schema")] + pub schema: CacheEntrySchema, + pub revision: Revision, + pub format: CacheEntryFormat, + pub path: String, + pub timestamp: u64, +} + +fn to_write_cache_error(err: E) -> LogError +where + E: Error + Send + Sync + 'static, +{ + LogError::FailedToWriteCache { + source: Arc::new(err), + } } /// Collection of log statements in a single source file -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct StatementsInFile { pub path: String, id: SourceFileID, @@ -90,10 +179,33 @@ pub struct StatementsInFile { /// XXX If there are too many in the file, the RegexSet constructor /// will fail with CompiledTooBig. We should probably fall back to /// manually trying each one at that point... - pub matcher: RegexSet, + #[serde(skip)] + pub matcher: Option, +} + +impl StatementsInFile { + /// When loading from the cache, we need to fill in the pattern string and populate the + /// RegexSet matcher. + fn try_creating_matcher(&mut self) { + for stmt in self.log_statements.iter_mut() { + if stmt.pattern_str.is_empty() { + stmt.pattern_str = stmt.pattern.to_string(); + } + } + if self.matcher.is_some() { + return; + } + let patterns = self + .log_statements + .iter() + .map(|s| s.pattern_str.as_str()) + .collect::>(); + self.matcher = RegexSet::new(&patterns).ok(); + } } /// Collection of individual source files under a root path +#[derive(Serialize, Deserialize, Debug)] pub struct SourceTree { pub tree: SourceHierTree, pub files_with_statements: HashMap, @@ -105,6 +217,27 @@ pub struct LogMatcher { roots: HashMap, } +fn to_cached_name(path: &Path) -> String { + format!( + "cache.{:x}", + Sha256::digest(path.as_os_str().as_encoded_bytes()) + ) +} + +/// A summary of the work done by extract_log_statements(). Useful for knowing if there were +/// any changes that need to be saved to the cache. +#[derive(Default, Debug)] +pub struct ExtractLogSummary { + pub deleted: u64, + pub new: u64, +} + +impl ExtractLogSummary { + pub fn changes(&self) -> u64 { + self.new.saturating_add(self.deleted) + } +} + impl LogMatcher { /// Create an empty LogMatcher pub fn new() -> Self { @@ -113,6 +246,128 @@ impl LogMatcher { } } + fn load_cache_entry(path: &Path, mut file: &File) -> Result { + let mut reader = BufReader::new(&mut file); + let mut header_str = String::new(); + reader + .read_line(&mut header_str) + .map_err(|err| LogError::FailedToReadCache { + path: path.to_owned(), + source: Arc::new(err), + })?; + // We're deserializing the header to check for garbage and version compatibility. + let _header = serde_json::from_str::(&header_str).map_err(|_err| { + LogError::OldCacheEntry { + path: path.to_owned(), + } + })?; + // XXX check that the path matches? + let mut decoded_root: SourceTree = + bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()) + .map_err(|err| LogError::FailedToReadCache { + path: path.to_owned(), + source: Arc::new(err), + })?; + for sif in decoded_root.files_with_statements.values_mut() { + sif.try_creating_matcher(); + } + Ok(decoded_root) + } + + /// Try to load SourceTrees from the cache for each root. + #[must_use] + pub fn load_from_cache(&mut self, cache: &Cache, tracker: &ProgressTracker) -> Vec { + tracker.begin_step(format!( + "Loading cached log statements from: {}", + cache.location.display() + )); + let mut old_roots: HashMap = HashMap::new(); + let mut retval: Vec = Vec::new(); + std::mem::swap(&mut self.roots, &mut old_roots); + let work_guard = tracker.doing_work(old_roots.len() as u64, "root".to_string()); + let mut found = 0; + let mut not_found = 0; + let mut skipped = 0; + for (root_path, old_root) in old_roots.into_iter() { + let cached_name = to_cached_name(&root_path); + let cached_path = cache.location.join(&cached_name); + let new_root = if let Ok(mut file) = File::open(&cached_path) { + match Self::load_cache_entry(&cached_path, &mut file) { + Ok(new_root) => { + found += 1; + new_root + } + Err(err) => { + skipped += 1; + retval.push(err); + old_root + } + } + } else { + not_found += 1; + old_root + }; + self.roots.insert(root_path, new_root); + work_guard.inc(1); + } + tracker.end_step(format!( + "found {}; skipped {}; not found {}", + found, skipped, not_found + )); + + retval + } + + /// Save the log statements to the cache. + pub fn cache_to(&self, cache: &Cache, tracker: &ProgressTracker) -> Result<(), LogError> { + tracker.begin_step(format!( + "Saving log statements to: {}", + cache.location.display() + )); + let mut total_size: u64 = 0; + let work_guard = tracker.doing_work(self.roots.len() as u64, "root".to_string()); + for (root_path, root) in &self.roots { + let cached_name = to_cached_name(&root_path); + let tmp_path = { + fs::create_dir_all(&cache.location).map_err(to_write_cache_error)?; + let mut file = + NamedTempFile::with_suffix_in(".tmp", &cache.location).map_err(|err| { + LogError::FailedToWriteCache { + source: Arc::new(err), + } + })?; + // Write a JSON header as the first line so that a user can figure out what this + // file is. It can also be used in the future if the file format needs to change. + let header = CacheEntryHeader { + schema: CacheEntrySchema::V1, + revision: Revision::Current, + format: CacheEntryFormat::Bincode, + path: root_path.to_string_lossy().to_string(), + timestamp: SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(), + }; + serde_json::to_writer(&file, &header).map_err(to_write_cache_error)?; + file.write_all("\n".as_bytes()) + .map_err(to_write_cache_error)?; + bincode::serde::encode_into_std_write(root, &mut file, bincode::config::standard()) + .map_err(to_write_cache_error)?; + total_size = total_size.saturating_add(file.stream_position().unwrap_or(0)); + file.into_temp_path() + }; + fs::rename(tmp_path, cache.location.join(cached_name)).map_err(to_write_cache_error)?; + work_guard.inc(1); + } + tracker.end_step(format!( + "{} files totaling {}", + self.roots.len(), + HumanBytes(total_size) + )); + + Ok(()) + } + /// True if no log statements are recognized by this matcher. pub fn is_empty(&self) -> bool { self.roots @@ -122,7 +377,8 @@ impl LogMatcher { /// Add a source root path pub fn add_root(&mut self, path: &Path) -> Result<(), LogError> { - if let Some(_existing_path) = self.match_path(path) { + let path = path.canonicalize().unwrap_or(path.to_owned()); + if let Some(_existing_path) = self.match_path(&path) { } else { self.roots .entry(path.to_owned()) @@ -180,23 +436,28 @@ impl LogMatcher { } /// Scan the source files looking for potential log statements. - pub fn extract_log_statements(&mut self, tracker: &ProgressTracker) { + pub fn extract_log_statements(&mut self, tracker: &ProgressTracker) -> ExtractLogSummary { + let mut retval = ExtractLogSummary::default(); tracker.begin_step("Extracting log statements".to_string()); self.roots.iter_mut().for_each(|(_path, coll)| { let guard = tracker.doing_work(coll.tree.stats().files as u64, "files".to_string()); for event_chunk in &coll.tree.scan().chunks(10) { let sources = event_chunk .flat_map(|event| match event { - ScanEvent::NewFile(path, info) => match File::open(&path) { - Ok(file) => match CodeSource::new(&path, info, file) { - Ok(cs) => Some(cs), - Err(_) => todo!(), - }, - Err(_) => { - todo!() + ScanEvent::NewFile(path, info) => { + retval.new += 1; + match File::open(&path) { + Ok(file) => match CodeSource::new(&path, info, file) { + Ok(cs) => Some(cs), + Err(_) => todo!(), + }, + Err(_) => { + todo!() + } } - }, + } ScanEvent::DeletedFile(_path, id) => { + retval.deleted += 1; coll.files_with_statements.remove(&id); None } @@ -217,6 +478,8 @@ impl LogMatcher { .map(|stmts| stmts.log_statements.len()) .sum::() )); + + retval } /// Attempt to match the given log message. @@ -233,7 +496,8 @@ impl LogMatcher { .values() .filter(|stmts| stmts.path.contains(filename)) .flat_map(|stmts| { - let file_matches = stmts.matcher.matches(body); + let file_matches = + stmts.matcher.as_ref().expect("have RegexSet").matches(body); match file_matches.iter().next() { None => None, Some(index) => stmts.log_statements.get(index), @@ -244,7 +508,12 @@ impl LogMatcher { coll.files_with_statements .par_iter() .flat_map(|src_ref_coll| { - let file_matches = src_ref_coll.1.matcher.matches(log_ref.body()); + let file_matches = src_ref_coll + .1 + .matcher + .as_ref() + .expect("have RegexSet") + .matches(log_ref.body()); match file_matches.iter().next() { None => None, Some(index) => src_ref_coll.1.log_statements.get(index), @@ -280,7 +549,7 @@ impl LogMatcher { } } -#[derive(Debug, Eq, PartialEq, Copy, Clone, Serialize)] +#[derive(Debug, Eq, PartialEq, Copy, Clone, Serialize, Deserialize)] pub enum SourceLanguage { Rust, Java, @@ -560,7 +829,7 @@ static JAVA_CALLER_REGEX: LazyLock = LazyLock::new(|| { ) "#, ) - .unwrap() + .unwrap() }); #[derive(Copy, Clone, Debug, PartialEq, Serialize)] @@ -799,7 +1068,6 @@ pub fn extract_logging_guarded(sources: &[CodeSource], guard: &WorkGuard) -> Vec .par_iter() .flat_map(|code| { let mut matched = vec![]; - let mut patterns = vec![]; let src_query = SourceQuery::new(code); let query = code.info.language.get_query(); let results = src_query.query(query, None); @@ -808,7 +1076,6 @@ pub fn extract_logging_guarded(sources: &[CodeSource], guard: &WorkGuard) -> Vec match result.kind.as_str() { "string_literal" | "string" => { if let Some(src_ref) = SourceRef::new(code, result) { - patterns.push(src_ref.pattern.clone()); matched.push(src_ref); } } @@ -841,12 +1108,14 @@ pub fn extract_logging_guarded(sources: &[CodeSource], guard: &WorkGuard) -> Vec if matched.is_empty() { None } else { - Some(StatementsInFile { + let mut sif = StatementsInFile { path: matched.first().unwrap().source_path.clone(), id: code.info.id, log_statements: matched, - matcher: RegexSet::new(patterns).expect("To combine patterns"), - }) + matcher: None, + }; + sif.try_creating_matcher(); + Some(sif) } }) .collect() diff --git a/src/main.rs b/src/main.rs index 1ec372a..15e1749 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,20 +2,37 @@ use clap::Parser as ClapParser; use colored_json::{ColoredFormatter, CompactFormatter, Styler}; use indicatif::{ProgressBar, ProgressStyle}; use log2src::{ - LogError, LogFormat, LogMapping, LogMatcher, LogRef, LogRefBuilder, ProgressTracker, + Cache, LogError, LogFormat, LogMapping, LogMatcher, LogRef, LogRefBuilder, ProgressTracker, ProgressUpdate, }; -use miette::{IntoDiagnostic, Report}; +use miette::{IntoDiagnostic, MietteHandlerOpts, Report}; use serde::Serialize; use std::io::{stdout, BufRead, BufReader}; use std::sync::atomic::Ordering; use std::thread::sleep; use std::time::Duration; -use std::{fs, io, path::PathBuf}; +use std::{env, fs, io, path::PathBuf}; + +fn get_footer() -> String { + let mut footer = String::new(); + if let Ok(cache) = Cache::open() { + footer.push_str("Paths:\n"); + footer.push_str( + format!( + " Cache directory: {}\n", + cache.location.to_string_lossy() + ) + .as_str(), + ); + } + footer.push_str("\nFor more information, see https://github.com/ttiimm/log2src\n"); + footer +} /// The log2src command maps log statements back to the source code that emitted them. #[derive(ClapParser)] #[command(author, version, about, long_about)] +#[command(after_help = get_footer())] struct Cli { /// The source directories to map logs onto #[arg(short = 'd', long, value_name = "SOURCES")] @@ -184,6 +201,14 @@ struct ErrorWrapper { } fn main() -> miette::Result<()> { + let _ = miette::set_hook(Box::new(move |_| { + Box::new( + MietteHandlerOpts::new() + .width(env::var("COLS").unwrap_or_default().parse().unwrap_or(80)) + .break_words(false) + .build(), + ) + })); let mut tracker = ProgressTracker::new(); let args = Cli::parse(); @@ -253,14 +278,38 @@ fn main() -> miette::Result<()> { .into_diagnostic()?; } + let cache_open_res = Cache::open(); + + if let Ok(cache) = &cache_open_res { + let res = log_matcher.load_from_cache(&cache, &tracker); + for err in res { + let report = Report::new(err); + if args.verbose + || report.severity().unwrap_or(miette::Severity::Error) != miette::Severity::Advice + { + eprintln!("{:?}", report); + } + } + } + log_matcher .discover_sources(&tracker) .into_iter() .for_each(|err| eprintln!("{:?}", Report::new(err))); - log_matcher.extract_log_statements(&tracker); + let extract_summary = log_matcher.extract_log_statements(&tracker); if log_matcher.is_empty() { return Err(LogError::NoLogStatements.into()); } + + if extract_summary.changes() > 0 { + if let Ok(cache) = &cache_open_res { + let res = log_matcher.cache_to(&cache, &tracker); + if let Err(err) = res { + eprintln!("{:?}", Report::new(err)); + } + } + } + let start = args.start.unwrap_or(0); let count = args.count.unwrap_or(usize::MAX); let mut accumulator = MessageAccumulator::new(log_matcher, log_format, count); diff --git a/src/snapshots/log2src__source_hier__test__with_resources_dir.snap b/src/snapshots/log2src__source_hier__test__with_resources_dir.snap index 72fdd7e..5947207 100644 --- a/src/snapshots/log2src__source_hier__test__with_resources_dir.snap +++ b/src/snapshots/log2src__source_hier__test__with_resources_dir.snap @@ -31,6 +31,6 @@ expression: events - language: Java id: 1 - NewFile: - - common_settings.rs + - mod.rs - language: Rust id: 0 diff --git a/src/source_hier.rs b/src/source_hier.rs index e37f9b0..9072d5e 100644 --- a/src/source_hier.rs +++ b/src/source_hier.rs @@ -1,5 +1,5 @@ use crate::{LogError, SourceLanguage}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use std::cell::RefCell; use std::collections::BTreeMap; use std::ffi::{OsStr, OsString}; @@ -24,11 +24,11 @@ enum ShallowCheckResult { } /// A unique identifier for a file that can be used instead of retaining the full path. -#[derive(Copy, Clone, Debug, Serialize, Hash, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Serialize, Deserialize, Hash, Eq, PartialEq)] pub struct SourceFileID(usize); /// A summary of a source code file -#[derive(Copy, Clone, Debug, Serialize, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] pub struct SourceFileInfo { pub language: SourceLanguage, pub id: SourceFileID, @@ -51,7 +51,7 @@ impl SourceFileInfo { } /// The type of content in a node in the source hierarchy -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub enum SourceHierContent { File { info: SourceFileInfo, @@ -62,6 +62,7 @@ pub enum SourceHierContent { entries: BTreeMap, }, Error { + #[serde(skip)] source: LogError, }, Unknown {}, @@ -267,7 +268,7 @@ impl SourceHierContent { /// A node in the SourceHierTree. It contains information that is common to all types of content /// and the content itself (e.g. file, directory, error, ...). -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct SourceHierNode { pub last_scan_time: Option, pub content: SourceHierContent, @@ -365,7 +366,7 @@ impl SourceHierNode { /// An event when iterating over the value returned by the [`scan()`](SourceHierTree::scan()) /// method. -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, Deserialize)] pub enum ScanEvent { NewFile(PathBuf, SourceFileInfo), DeletedFile(PathBuf, SourceFileID), @@ -413,7 +414,7 @@ impl Iterator for TreeScanner<'_> { } } -#[derive(Debug, Serialize, Default)] +#[derive(Debug, Serialize, Deserialize, Default)] pub struct SourceHierStats { pub files: usize, pub unsupported_files: usize, @@ -422,11 +423,12 @@ pub struct SourceHierStats { } /// A SourceHierTree tracks the state of a source code hierarchy. -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct SourceHierTree { pub root_path: PathBuf, pub root_node: SourceHierNode, next_id: usize, + #[serde(skip)] deleted_events: Vec, stats: SourceHierStats, } diff --git a/src/source_ref.rs b/src/source_ref.rs index 292e157..f0e86ef 100644 --- a/src/source_ref.rs +++ b/src/source_ref.rs @@ -1,10 +1,10 @@ use crate::{CodeSource, QueryResult, SourceLanguage}; use core::fmt; use regex::{Captures, Regex}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use std::sync::LazyLock; -#[derive(Clone, Debug, Serialize, Eq, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] pub enum FormatArgument { Named(String), Positional(usize), @@ -22,7 +22,7 @@ pub struct CallSite { } // TODO: get rid of this clone? -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct SourceRef { #[serde(rename(serialize = "sourcePath"))] pub source_path: String, @@ -35,9 +35,10 @@ pub struct SourceRef { pub name: String, pub text: String, pub quality: usize, - #[serde(skip_serializing)] - pub(crate) matcher: Regex, - pub pattern: String, + #[serde(with = "serde_regex")] + pub(crate) pattern: Regex, + #[serde(skip)] + pub pattern_str: String, pub(crate) args: Vec, pub(crate) vars: Vec, } @@ -87,8 +88,8 @@ impl SourceRef { name, text, quality, - matcher, - pattern, + pattern: matcher, + pattern_str: pattern, args, vars: vec![], }) @@ -98,7 +99,7 @@ impl SourceRef { } pub fn captures<'a>(&self, line: &'a str) -> Option> { - self.matcher.captures(line) + self.pattern.captures(line) } } diff --git a/tests/common_settings.rs b/tests/common_settings.rs deleted file mode 100644 index 84f9715..0000000 --- a/tests/common_settings.rs +++ /dev/null @@ -1,7 +0,0 @@ -pub fn enable_filters() -> insta::internals::SettingsBindDropGuard { - let mut settings = insta::Settings::clone_current(); - settings.add_filter(r#""examples(?:/|\\\\?)"#, "\"{example_dir}/"); - settings.add_filter(r#""tests(?:/|\\\\?)java(?:/|\\\\?)"#, "\"{java_dir}/"); - settings.add_filter(r#"(?:[ \w\.]+) (\(os error \d+\))"#, " {errmsg} $1"); - settings.bind_to_scope() -} diff --git a/tests/common_settings/mod.rs b/tests/common_settings/mod.rs new file mode 100644 index 0000000..de9a7e1 --- /dev/null +++ b/tests/common_settings/mod.rs @@ -0,0 +1,67 @@ +use assert_cmd::prelude::CommandCargoExt; +use std::ffi::OsStr; +use std::path::Path; +use std::process::Command; +use tempfile::TempDir; + +pub fn enable_filters(home_path: &Path) -> insta::internals::SettingsBindDropGuard { + let mut settings = insta::Settings::clone_current(); + settings.add_filter(r#""[^"]*examples(?:/|\\\\?)"#, "\"{example_dir}/"); + settings.add_filter(r#""[^"]*tests(?:/|\\\\?)java(?:/|\\\\?)"#, "\"{java_dir}/"); + settings.add_filter(r#"(?:[ \w\.]+) (\(os error \d+\))"#, " {errmsg} $1"); + settings.add_filter(r#"cache\.[0-9a-f]+"#, "cache.XXX"); + settings.add_filter( + regex::escape(home_path.to_string_lossy().as_ref()).as_str(), + "{home}", + ); + settings.bind_to_scope() +} + +pub struct TempHome { + location: TempDir, +} + +impl TempHome { + pub fn new() -> Self { + Self { + location: tempfile::tempdir().unwrap(), + } + } + + pub fn set_vars_in(&self, cmd: &mut Command) { + cmd.env("HOME", self.location.path()); + cmd.env("XDG_CONFIG_HOME", self.location.path()); + cmd.env("USERPROFILE", self.location.path()); + } +} + +pub struct CommandGuard { + _filter_guard: insta::internals::SettingsBindDropGuard, + home_guard: TempHome, + pub cmd: Command, +} + +impl CommandGuard { + pub fn new() -> Result> { + let home_guard = TempHome::new(); + let _filter_guard = enable_filters(home_guard.location.path()); + let mut cmd = Command::cargo_bin("log2src")?; + cmd.env("COLS", "1000"); + home_guard.set_vars_in(&mut cmd); + Ok(Self { + _filter_guard, + home_guard, + cmd, + }) + } + + #[allow(dead_code)] + pub fn home_path(&self) -> &Path { + self.home_guard.location.path() + } + + pub fn arg>(&mut self, arg: S) -> &mut Self { + self.cmd.arg(arg); + self + } +} diff --git a/tests/snapshots/test_java__basic_slf4j-2.snap b/tests/snapshots/test_java__basic_slf4j-2.snap new file mode 100644 index 0000000..1c0c731 --- /dev/null +++ b/tests/snapshots/test_java__basic_slf4j-2.snap @@ -0,0 +1,23 @@ +--- +source: tests/test_java.rs +info: + program: log2src + args: + - "-d" + - tests/java/BasicSlf4j.java + - "-l" + - tests/resources/java/basic-slf4j.log + - "-f" + - "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) (?\\d+) (?.*)$" + env: + HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpBH8OwF + USERPROFILE: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpBH8OwF + XDG_CONFIG_HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpBH8OwF +--- +success: true +exit_code: 0 +----- stdout ----- +{"logRef":{"details":{"thread":"123"}},"srcRef":{"sourcePath":"{java_dir}/BasicSlf4j.java","language":"Java","lineNumber":10,"endLineNumber":10,"column":20,"name":"main","text":"\"Application starting\"","quality":19,"pattern":"(?s)^Application starting$","args":[],"vars":[]},"variables":[]} +{"logRef":{"details":{"thread":"123"}},"srcRef":{"sourcePath":"{java_dir}/BasicSlf4j.java","language":"Java","lineNumber":12,"endLineNumber":13,"column":21,"name":"main","text":"\"Debug message: args length = {}\"","quality":24,"pattern":"(?s)^Debug message: args length = (.+)$","args":["Placeholder"],"vars":["args.length"]},"variables":[{"expr":"args.length","value":"0"}]} + +----- stderr ----- diff --git a/tests/snapshots/test_java__basic_slf4j-3.snap b/tests/snapshots/test_java__basic_slf4j-3.snap new file mode 100644 index 0000000..83fcc77 --- /dev/null +++ b/tests/snapshots/test_java__basic_slf4j-3.snap @@ -0,0 +1,26 @@ +--- +source: tests/test_java.rs +info: + program: log2src + args: + - "-d" + - tests/java/BasicSlf4j.java + - "-l" + - tests/resources/java/basic-slf4j.log + - "-f" + - "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) (?\\d+) (?.*)$" + env: + COLS: "1000" + HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpkUcT52 + USERPROFILE: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpkUcT52 + XDG_CONFIG_HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpkUcT52 +--- +success: true +exit_code: 0 +----- stdout ----- +{"logRef":{"details":{"thread":"123"}},"srcRef":{"sourcePath":"{java_dir}/BasicSlf4j.java","language":"Java","lineNumber":10,"endLineNumber":10,"column":20,"name":"main","text":"\"Application starting\"","quality":19,"pattern":"(?s)^Application starting$","args":[],"vars":[]},"variables":[]} +{"logRef":{"details":{"thread":"123"}},"srcRef":{"sourcePath":"{java_dir}/BasicSlf4j.java","language":"Java","lineNumber":12,"endLineNumber":13,"column":21,"name":"main","text":"\"Debug message: args length = {}\"","quality":24,"pattern":"(?s)^Debug message: args length = (.+)$","args":["Placeholder"],"vars":["args.length"]},"variables":[{"expr":"args.length","value":"0"}]} + +----- stderr ----- + ⚠ failed to read cache file "{home}/Library/Caches/org.log2src.log2src/cache.XXX" + ╰─▶ Io { inner: Error { kind: UnexpectedEof, message: "failed to fill whole buffer" }, additional: 11 } diff --git a/tests/snapshots/test_java__basic_with_log_format-2.snap b/tests/snapshots/test_java__basic_with_log_format-2.snap new file mode 100644 index 0000000..4929895 --- /dev/null +++ b/tests/snapshots/test_java__basic_with_log_format-2.snap @@ -0,0 +1,25 @@ +--- +source: tests/test_java.rs +info: + program: log2src + args: + - "-d" + - tests/java/BasicWithCustom.java + - "-l" + - tests/resources/java/basic-class-line.log + - "-f" + - "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) (?\\w+) (?[\\w$.]+):(?\\d+) (?[\\w$]+): (?.*)$" + env: + HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmp4qYTFu + USERPROFILE: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmp4qYTFu + XDG_CONFIG_HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmp4qYTFu +--- +success: true +exit_code: 0 +----- stdout ----- +{"logRef":{"details":{"file":"BasicWithCustom","lineno":15}},"srcRef":{"sourcePath":"{java_dir}/BasicWithCustom.java","language":"Java","lineNumber":15,"endLineNumber":15,"column":16,"name":"main","text":"\"Hello from main\"","quality":13,"pattern":"(?s)^Hello from main$","args":[],"vars":[]},"variables":[]} +{"logRef":{"details":{"file":"BasicWithCustom","lineno":22}},"srcRef":{"sourcePath":"{java_dir}/BasicWithCustom.java","language":"Java","lineNumber":22,"endLineNumber":22,"column":20,"name":"foo","text":"\"Hello from foo i=\\{i}\"","quality":14,"pattern":"(?s)^Hello from foo i=(.+)$","args":[{"Named":"i"}],"vars":[]},"variables":[{"expr":"i","value":"0"}]} +{"logRef":{"details":{"file":"BasicWithCustom","lineno":22}},"srcRef":{"sourcePath":"{java_dir}/BasicWithCustom.java","language":"Java","lineNumber":22,"endLineNumber":22,"column":20,"name":"foo","text":"\"Hello from foo i=\\{i}\"","quality":14,"pattern":"(?s)^Hello from foo i=(.+)$","args":[{"Named":"i"}],"vars":[]},"variables":[{"expr":"i","value":"1"}]} +{"logRef":{"details":{"file":"BasicWithCustom","lineno":22}},"srcRef":{"sourcePath":"{java_dir}/BasicWithCustom.java","language":"Java","lineNumber":22,"endLineNumber":22,"column":20,"name":"foo","text":"\"Hello from foo i=\\{i}\"","quality":14,"pattern":"(?s)^Hello from foo i=(.+)$","args":[{"Named":"i"}],"vars":[]},"variables":[{"expr":"i","value":"2"}]} + +----- stderr ----- diff --git a/tests/snapshots/test_rust__invalid_source_path.snap b/tests/snapshots/test_rust__invalid_source_path.snap index 6f7606f..4620a25 100644 --- a/tests/snapshots/test_rust__invalid_source_path.snap +++ b/tests/snapshots/test_rust__invalid_source_path.snap @@ -11,6 +11,11 @@ info: - "\\[\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z \\w+ \\w+\\]\\s+(?.*)" - "-s" - "1" + env: + COLS: "1000" + HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpD3nTNw + USERPROFILE: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpD3nTNw + XDG_CONFIG_HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpD3nTNw --- success: false exit_code: 1 @@ -21,5 +26,4 @@ exit_code: 1 ╰─▶ {errmsg} (os error 2) Error: × no log statements found - help: Make sure the source path is valid and refers to a tree with supported - source code and logging statements + help: Make sure the source path is valid and refers to a tree with supported source code and logging statements diff --git a/tests/test_java.rs b/tests/test_java.rs index 96c97e4..89a81b8 100644 --- a/tests/test_java.rs +++ b/tests/test_java.rs @@ -1,13 +1,14 @@ -use assert_cmd::prelude::*; use insta_cmd::assert_cmd_snapshot; -use std::{path::Path, process::Command}; +use std::fs::File; +use std::io::{Read, Write}; +use std::path::Path; +use walkdir::WalkDir; mod common_settings; #[test] fn invalid_log_path() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let basic_source = Path::new("tests").join("java").join("Basic.java"); let basic_log = Path::new("badname.log"); cmd.arg("-d") @@ -17,14 +18,13 @@ fn invalid_log_path() -> Result<(), Box> { .arg("-f") .arg(r#"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \w+ \w+ \w+: (?.*)"#); - assert_cmd_snapshot!(cmd); + assert_cmd_snapshot!(cmd.cmd); Ok(()) } #[test] fn invalid_log_format() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let basic_source = Path::new("tests").join("java").join("Basic.java"); let basic_log = Path::new("tests") .join("resources") @@ -37,14 +37,13 @@ fn invalid_log_format() -> Result<(), Box> { .arg("-f") .arg(r#"^-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \w+ \w+ \w+: (?.*)"#); - assert_cmd_snapshot!(cmd); + assert_cmd_snapshot!(cmd.cmd); Ok(()) } #[test] fn basic() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let basic_source = Path::new("tests").join("java").join("Basic.java"); let basic_log = Path::new("tests") .join("resources") @@ -57,14 +56,13 @@ fn basic() -> Result<(), Box> { .arg("-f") .arg(r#"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \w+ \w+ \w+: (?.*)"#); - assert_cmd_snapshot!(cmd); + assert_cmd_snapshot!(cmd.cmd); Ok(()) } #[test] fn basic_range() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let basic_source = Path::new("tests").join("java").join("Basic.java"); let basic_log = Path::new("tests") .join("resources") @@ -81,14 +79,13 @@ fn basic_range() -> Result<(), Box> { .arg("-c") .arg("2"); - assert_cmd_snapshot!(cmd); + assert_cmd_snapshot!(cmd.cmd); Ok(()) } #[test] fn basic_invalid_utf() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let basic_source = Path::new("tests").join("java").join("Basic.java"); let basic_log = Path::new("tests") .join("resources") @@ -101,14 +98,13 @@ fn basic_invalid_utf() -> Result<(), Box> { .arg("-f") .arg(r#"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \w+ \w+ \w+: (?.*)"#); - assert_cmd_snapshot!(cmd); + assert_cmd_snapshot!(cmd.cmd); Ok(()) } #[test] fn basic_with_log() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let basic_source = Path::new("tests").join("java").join("BasicWithLog.java"); let basic_log = Path::new("tests") .join("resources") @@ -121,14 +117,13 @@ fn basic_with_log() -> Result<(), Box> { .arg("-f") .arg(r#"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \w+ \w+ \w+: (?.*)"#); - assert_cmd_snapshot!(cmd); + assert_cmd_snapshot!(cmd.cmd); Ok(()) } #[test] fn basic_with_upper() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let basic_source = Path::new("tests").join("java").join("BasicWithUpper.java"); let basic_log = Path::new("tests") .join("resources") @@ -141,14 +136,13 @@ fn basic_with_upper() -> Result<(), Box> { .arg("-f") .arg(r#"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \w+ \w+ \w+: (?.*)"#); - assert_cmd_snapshot!(cmd); + assert_cmd_snapshot!(cmd.cmd); Ok(()) } #[test] fn basic_with_log_format() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let source = Path::new("tests").join("java").join("BasicWithCustom.java"); let log = Path::new("tests") .join("resources") @@ -161,14 +155,15 @@ fn basic_with_log_format() -> Result<(), Box> { .arg("-f") .arg("^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) (?\\w+) (?[\\w$.]+):(?\\d+) (?[\\w$]+): (?.*)$"); - assert_cmd_snapshot!(cmd); + for _index in 0..2 { + assert_cmd_snapshot!(cmd.cmd); + } Ok(()) } #[test] fn basic_slf4j() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let source = Path::new("tests").join("java").join("BasicSlf4j.java"); let log = Path::new("tests") .join("resources") @@ -179,8 +174,45 @@ fn basic_slf4j() -> Result<(), Box> { .arg("-l") .arg(log.to_str().expect("test case log exists")) .arg("-f") - .arg("^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) (?\\d+) (?.*)$"); + .arg( + "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) (?\\d+) (?.*)$", + ); + + assert_cmd_snapshot!(cmd.cmd); + + // corrupt the cache entry header + for entry in WalkDir::new(cmd.home_path()) { + let entry = entry?; + if entry.file_name().to_string_lossy().starts_with("cache.") { + let mut buffer = Vec::new(); + { + let mut file = File::open(entry.path())?; + file.read_to_end(&mut buffer)?; + } + buffer[0] = b'0'; + let mut file = File::create(entry.path())?; + file.write_all(&buffer)?; + } + } + + assert_cmd_snapshot!(cmd.cmd); + + // corrupt the cache entry content + for entry in WalkDir::new(cmd.home_path()) { + let entry = entry?; + if entry.file_name().to_string_lossy().starts_with("cache.") { + let mut buffer = Vec::new(); + { + let mut file = File::open(entry.path())?; + file.read_to_end(&mut buffer)?; + } + let mut file = File::create(entry.path())?; + buffer.resize(buffer.len() - 1, 0); + file.write_all(&buffer)?; + } + } + + assert_cmd_snapshot!(cmd.cmd); - assert_cmd_snapshot!(cmd); Ok(()) } diff --git a/tests/test_rust.rs b/tests/test_rust.rs index 74f77c5..fa47e0a 100644 --- a/tests/test_rust.rs +++ b/tests/test_rust.rs @@ -1,13 +1,11 @@ -use assert_cmd::prelude::*; use insta_cmd::assert_cmd_snapshot; -use std::{path::Path, process::Command}; +use std::path::Path; mod common_settings; #[test] fn basic() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let source = Path::new("examples").join("basic.rs"); let log = Path::new("tests") .join("resources") @@ -20,14 +18,13 @@ fn basic() -> Result<(), Box> { .arg("-f") .arg(r#"\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z \w+ \w+\]\s+(?.*)"#); - assert_cmd_snapshot!(cmd); + assert_cmd_snapshot!(cmd.cmd); Ok(()) } #[test] fn stack() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let source = Path::new("examples").join("stack.rs"); let log = Path::new("tests") .join("resources") @@ -42,14 +39,13 @@ fn stack() -> Result<(), Box> { .arg("-s") .arg("1"); - assert_cmd_snapshot!(cmd); + assert_cmd_snapshot!(cmd.cmd); Ok(()) } #[test] fn invalid_source_path() -> Result<(), Box> { - let _guard = common_settings::enable_filters(); - let mut cmd = Command::cargo_bin("log2src")?; + let mut cmd = common_settings::CommandGuard::new()?; let source = Path::new("examples").join("stack.r"); let log = Path::new("tests") .join("resources") @@ -64,6 +60,6 @@ fn invalid_source_path() -> Result<(), Box> { .arg("-s") .arg("1"); - assert_cmd_snapshot!(cmd); + assert_cmd_snapshot!(cmd.cmd); Ok(()) } From a0e9eaeea9e531f185e82f1e1dd7651b54c5dd54 Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Tue, 7 Oct 2025 12:36:44 -0700 Subject: [PATCH 2/7] [tests] fix expected out --- tests/common_settings/mod.rs | 2 +- tests/snapshots/test_java__basic_slf4j-3.snap | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/common_settings/mod.rs b/tests/common_settings/mod.rs index de9a7e1..29db25e 100644 --- a/tests/common_settings/mod.rs +++ b/tests/common_settings/mod.rs @@ -9,7 +9,7 @@ pub fn enable_filters(home_path: &Path) -> insta::internals::SettingsBindDropGua settings.add_filter(r#""[^"]*examples(?:/|\\\\?)"#, "\"{example_dir}/"); settings.add_filter(r#""[^"]*tests(?:/|\\\\?)java(?:/|\\\\?)"#, "\"{java_dir}/"); settings.add_filter(r#"(?:[ \w\.]+) (\(os error \d+\))"#, " {errmsg} $1"); - settings.add_filter(r#"cache\.[0-9a-f]+"#, "cache.XXX"); + settings.add_filter(r#""[^"]*cache\.[0-9a-f]+"#, "\"cache.XXX"); settings.add_filter( regex::escape(home_path.to_string_lossy().as_ref()).as_str(), "{home}", diff --git a/tests/snapshots/test_java__basic_slf4j-3.snap b/tests/snapshots/test_java__basic_slf4j-3.snap index 83fcc77..2540e7a 100644 --- a/tests/snapshots/test_java__basic_slf4j-3.snap +++ b/tests/snapshots/test_java__basic_slf4j-3.snap @@ -11,9 +11,9 @@ info: - "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) (?\\d+) (?.*)$" env: COLS: "1000" - HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpkUcT52 - USERPROFILE: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpkUcT52 - XDG_CONFIG_HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpkUcT52 + HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpRSyw4W + USERPROFILE: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpRSyw4W + XDG_CONFIG_HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpRSyw4W --- success: true exit_code: 0 @@ -22,5 +22,5 @@ exit_code: 0 {"logRef":{"details":{"thread":"123"}},"srcRef":{"sourcePath":"{java_dir}/BasicSlf4j.java","language":"Java","lineNumber":12,"endLineNumber":13,"column":21,"name":"main","text":"\"Debug message: args length = {}\"","quality":24,"pattern":"(?s)^Debug message: args length = (.+)$","args":["Placeholder"],"vars":["args.length"]},"variables":[{"expr":"args.length","value":"0"}]} ----- stderr ----- - ⚠ failed to read cache file "{home}/Library/Caches/org.log2src.log2src/cache.XXX" + ⚠ failed to read cache file "cache.XXX" ╰─▶ Io { inner: Error { kind: UnexpectedEof, message: "failed to fill whole buffer" }, additional: 11 } From 2abe8d203a8734e69c58da419c27d8abc93e1562 Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Tue, 7 Oct 2025 12:46:04 -0700 Subject: [PATCH 3/7] [tests] try tweak for win --- tests/snapshots/test_java__basic_slf4j-3.snap | 8 ++++---- tests/test_java.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/snapshots/test_java__basic_slf4j-3.snap b/tests/snapshots/test_java__basic_slf4j-3.snap index 2540e7a..f6cdbbb 100644 --- a/tests/snapshots/test_java__basic_slf4j-3.snap +++ b/tests/snapshots/test_java__basic_slf4j-3.snap @@ -11,9 +11,9 @@ info: - "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) (?\\d+) (?.*)$" env: COLS: "1000" - HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpRSyw4W - USERPROFILE: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpRSyw4W - XDG_CONFIG_HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpRSyw4W + HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpzzq9Js + USERPROFILE: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpzzq9Js + XDG_CONFIG_HOME: /var/folders/gq/s3t7n95d19g3w41nsw0m0n240000gn/T/.tmpzzq9Js --- success: true exit_code: 0 @@ -23,4 +23,4 @@ exit_code: 0 ----- stderr ----- ⚠ failed to read cache file "cache.XXX" - ╰─▶ Io { inner: Error { kind: UnexpectedEof, message: "failed to fill whole buffer" }, additional: 11 } + ╰─▶ Io { inner: Error { kind: UnexpectedEof, message: "failed to fill whole buffer" }, additional: 39 } diff --git a/tests/test_java.rs b/tests/test_java.rs index 89a81b8..51b2e87 100644 --- a/tests/test_java.rs +++ b/tests/test_java.rs @@ -207,7 +207,7 @@ fn basic_slf4j() -> Result<(), Box> { file.read_to_end(&mut buffer)?; } let mut file = File::create(entry.path())?; - buffer.resize(buffer.len() - 1, 0); + buffer.resize(buffer.len() - 50, 0); file.write_all(&buffer)?; } } From 01bb01f4e8bf659d45a7edfaefa328e3dcdddf85 Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Tue, 7 Oct 2025 12:54:41 -0700 Subject: [PATCH 4/7] [tests] windows... --- tests/test_java.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_java.rs b/tests/test_java.rs index 51b2e87..4c5f4d7 100644 --- a/tests/test_java.rs +++ b/tests/test_java.rs @@ -1,3 +1,4 @@ +use std::fs; use insta_cmd::assert_cmd_snapshot; use std::fs::File; use std::io::{Read, Write}; @@ -206,9 +207,8 @@ fn basic_slf4j() -> Result<(), Box> { let mut file = File::open(entry.path())?; file.read_to_end(&mut buffer)?; } - let mut file = File::create(entry.path())?; buffer.resize(buffer.len() - 50, 0); - file.write_all(&buffer)?; + fs::write(entry.path(), &buffer)?; } } From 5174865e1eb3c4ed35a58ebd23d3b848efe69c3e Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Tue, 7 Oct 2025 13:19:03 -0700 Subject: [PATCH 5/7] [tests] maybe the cache dir is not found on win If running with "-v", log that the cache will not be used --- src/main.rs | 3 +++ tests/common_settings/mod.rs | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/main.rs b/src/main.rs index 15e1749..2ff7cdf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -279,6 +279,9 @@ fn main() -> miette::Result<()> { } let cache_open_res = Cache::open(); + if args.verbose && cache_open_res.is_err() { + eprintln!("Could not find cache directory, will not cache source trees"); + } if let Ok(cache) = &cache_open_res { let res = log_matcher.load_from_cache(&cache, &tracker); diff --git a/tests/common_settings/mod.rs b/tests/common_settings/mod.rs index 29db25e..358ac3f 100644 --- a/tests/common_settings/mod.rs +++ b/tests/common_settings/mod.rs @@ -32,6 +32,8 @@ impl TempHome { cmd.env("HOME", self.location.path()); cmd.env("XDG_CONFIG_HOME", self.location.path()); cmd.env("USERPROFILE", self.location.path()); + cmd.env("LOCALAPPDATA", self.location.path()); + cmd.env("APPDATA", self.location.path()); } } From c6079c25488ed51621ed8b40eed631258983527b Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Tue, 7 Oct 2025 13:24:25 -0700 Subject: [PATCH 6/7] [tests] exclude the corruption test from windows for now --- tests/test_java.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_java.rs b/tests/test_java.rs index 4c5f4d7..0edc27b 100644 --- a/tests/test_java.rs +++ b/tests/test_java.rs @@ -163,6 +163,7 @@ fn basic_with_log_format() -> Result<(), Box> { } #[test] +#[cfg(not(windows))] fn basic_slf4j() -> Result<(), Box> { let mut cmd = common_settings::CommandGuard::new()?; let source = Path::new("tests").join("java").join("BasicSlf4j.java"); @@ -199,6 +200,7 @@ fn basic_slf4j() -> Result<(), Box> { assert_cmd_snapshot!(cmd.cmd); // corrupt the cache entry content + // XXX for some reason this doesn't work on windows for entry in WalkDir::new(cmd.home_path()) { let entry = entry?; if entry.file_name().to_string_lossy().starts_with("cache.") { From ea17eb494641971a79355f753b676e483375b145 Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Tue, 7 Oct 2025 14:23:13 -0700 Subject: [PATCH 7/7] [tasks] checkoff a couple more tasks --- docs/Tasks.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Tasks.md b/docs/Tasks.md index da15d72..807949a 100644 --- a/docs/Tasks.md +++ b/docs/Tasks.md @@ -6,8 +6,8 @@ and it can find the message. - [X] Extract a thread id from log when available and associate with source ref. - [X] Generate call stack from exceptions. -- [ ] Support multiple source roots from CLI. -- [ ] Serialize state for re-use on subsequent executions +- [X] Support multiple source roots from CLI. +- [X] Serialize state for re-use on subsequent executions ## Extension