From bee444a3c9d6c201997a2d0eb1922b19286a4d9d Mon Sep 17 00:00:00 2001 From: niekdomi Date: Wed, 25 Feb 2026 20:13:37 +0100 Subject: [PATCH 01/14] Added cli --- Cargo.lock | 2 + crates/codebook-lsp/Cargo.toml | 2 + crates/codebook-lsp/src/lint.rs | 215 ++++++++++++++++++++++++++++++++ crates/codebook-lsp/src/main.rs | 15 +++ 4 files changed, 234 insertions(+) create mode 100644 crates/codebook-lsp/src/lint.rs diff --git a/Cargo.lock b/Cargo.lock index a481ac85..2f3d8d55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -427,6 +427,7 @@ dependencies = [ "codebook_config", "env_logger", "fs2", + "glob", "log", "lru", "serde", @@ -436,6 +437,7 @@ dependencies = [ "tempfile", "tokio", "tower-lsp", + "walkdir", ] [[package]] diff --git a/crates/codebook-lsp/Cargo.toml b/crates/codebook-lsp/Cargo.toml index b621344e..27d1a11a 100644 --- a/crates/codebook-lsp/Cargo.toml +++ b/crates/codebook-lsp/Cargo.toml @@ -28,6 +28,8 @@ env_logger.workspace = true fs2.workspace = true log.workspace = true lru.workspace = true +glob.workspace = true +walkdir.workspace = true serde.workspace = true serde_json.workspace = true string-offsets.workspace = true diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs new file mode 100644 index 00000000..3ecd5ab4 --- /dev/null +++ b/crates/codebook-lsp/src/lint.rs @@ -0,0 +1,215 @@ +use codebook::Codebook; +use codebook_config::{CodebookConfig, CodebookConfigFile}; +use std::collections::HashSet; +use std::io::{IsTerminal, Write}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +struct Styles { + bold: &'static str, + dim: &'static str, + yellow: &'static str, + bold_red: &'static str, + reset: &'static str, +} + +const STYLES_ON: Styles = Styles { + bold: "\x1b[1m", + dim: "\x1b[2m", + yellow: "\x1b[33m", + bold_red: "\x1b[1;31m", + reset: "\x1b[0m", +}; + +const STYLES_OFF: Styles = Styles { + bold: "", + dim: "", + yellow: "", + bold_red: "", + reset: "", +}; + +fn styles(is_terminal: bool) -> &'static Styles { + if is_terminal && std::env::var_os("NO_COLOR").is_none() { + &STYLES_ON + } else { + &STYLES_OFF + } +} + +fn fatal(msg: impl std::fmt::Display, s: &Styles) -> ! { + eprintln!("{}error:{} {msg}", s.bold_red, s.reset); + std::process::exit(2); +} + +pub fn run_lint(files: &[String], root: &Path, unique: bool) -> bool { + let out = styles(std::io::stdout().is_terminal()); + let err = styles(std::io::stderr().is_terminal()); + + let config = Arc::new( + CodebookConfigFile::load(Some(root)) + .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"), err)), + ); + + print_config_source(&config, err); + + let codebook = Codebook::new(config.clone()) + .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"), err)); + + let resolved = resolve_paths(files); + if resolved.is_empty() { + fatal("no files matched the given patterns", err); + } + + let mut seen_words: HashSet = HashSet::new(); + let mut total_errors = 0usize; + let mut files_with_errors = 0usize; + + for path in &resolved { + if config.should_ignore_path(path) { + continue; + } + + let text = match std::fs::read_to_string(path) { + Ok(t) => t, + Err(e) => { + eprintln!( + "{}error:{} {}: {e}", + err.bold_red, + err.reset, + path.display() + ); + continue; + } + }; + + let mut locations = codebook.spell_check(&text, None, path.to_str()); + locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); + + let hits: Vec<(usize, usize, &str)> = locations + .iter() + .flat_map(|wl| wl.locations.iter().map(move |r| (wl, r))) + .filter_map(|(wl, range)| { + if unique && !seen_words.insert(wl.word.to_lowercase()) { + return None; + } + let (line, col) = byte_offset_to_line_col(&text, range.start_byte); + Some((line, col, wl.word.as_str())) + }) + .collect(); + + if hits.is_empty() { + continue; + } + + let raw = path.to_string_lossy(); + let display = raw.strip_prefix("./").unwrap_or(&raw); + let pad_len = hits + .iter() + .map(|(l, c, _)| format!("{l}:{c}").len()) + .max() + .unwrap_or(0); + + println!("{}{display}{}", out.bold, out.reset); + for (line, col, word) in &hits { + let loc = format!("{line}:{col}"); + println!( + " {}{display}{}:{}{loc}{}{pad} {}{}{}", + out.dim, + out.reset, + out.yellow, + out.reset, + out.bold_red, + word, + out.reset, + pad = " ".repeat(pad_len - loc.len()), + ); + } + println!(); + + total_errors += hits.len(); + files_with_errors += 1; + } + + if total_errors > 0 { + let _ = std::io::stdout().flush(); + let unique_label = if unique { "unique " } else { "" }; + eprintln!( + "Found {}{total_errors}{} {unique_label}spelling error(s) in {}{files_with_errors}{} file(s).", + err.bold, err.reset, err.bold, err.reset, + ); + } + + total_errors > 0 +} + +fn print_config_source(config: &CodebookConfigFile, s: &Styles) { + let cwd = std::env::current_dir().unwrap_or_default(); + match ( + config.project_config_path().filter(|p| p.is_file()), + config.global_config_path().filter(|p| p.is_file()), + ) { + (Some(p), _) => eprintln!( + "using config {}{}{}", + s.dim, + p.strip_prefix(&cwd).unwrap_or(&p).display(), + s.reset + ), + (None, Some(g)) => eprintln!( + "using global config {}{}{}", + s.dim, + g.strip_prefix(&cwd).unwrap_or(&g).display(), + s.reset + ), + (None, None) => eprintln!("using default config"), + } +} + +fn resolve_paths(patterns: &[String]) -> Vec { + let mut paths = Vec::new(); + for pattern in patterns { + let p = PathBuf::from(pattern); + if p.is_dir() { + collect_dir(&p, &mut paths); + } else { + match glob::glob(pattern) { + Ok(entries) => { + let mut matched = false; + for entry in entries.flatten() { + if entry.is_file() { + paths.push(entry); + matched = true; + } else if entry.is_dir() { + collect_dir(&entry, &mut paths); + matched = true; + } + } + if !matched { + eprintln!("codebook: no match for '{pattern}'"); + } + } + Err(e) => eprintln!("codebook: invalid pattern '{pattern}': {e}"), + } + } + } + paths.sort(); + paths.dedup(); + paths +} + +fn collect_dir(dir: &Path, out: &mut Vec) { + walkdir::WalkDir::new(dir) + .follow_links(false) + .into_iter() + .flatten() + .filter(|e| e.file_type().is_file()) + .for_each(|e| out.push(e.into_path())); +} + +fn byte_offset_to_line_col(text: &str, byte_offset: usize) -> (usize, usize) { + let offset = byte_offset.min(text.len()); + let before = &text[..offset]; + let line = before.bytes().filter(|&b| b == b'\n').count() + 1; + let col = before.rfind('\n').map(|p| offset - p).unwrap_or(offset + 1); + (line, col) +} diff --git a/crates/codebook-lsp/src/main.rs b/crates/codebook-lsp/src/main.rs index bc055ee2..c6898c49 100644 --- a/crates/codebook-lsp/src/main.rs +++ b/crates/codebook-lsp/src/main.rs @@ -1,5 +1,6 @@ mod file_cache; mod init_options; +mod lint; mod lsp; mod lsp_logger; @@ -30,6 +31,15 @@ enum Commands { Serve {}, /// Remove server cache Clean {}, + /// Check files for spelling errors + Lint { + /// Files or glob patterns to spell-check + #[arg(required = true)] + files: Vec, + /// Only report each misspelled word once, ignoring duplicates across files + #[arg(short = 'u', long)] + unique: bool, + }, } #[tokio::main(flavor = "current_thread")] @@ -58,6 +68,11 @@ async fn main() { info!("Cleaning: {:?}", config.cache_dir); config.clean_cache() } + Some(Commands::Lint { files, unique }) => { + if lint::run_lint(files, root, *unique) { + std::process::exit(1); + } + } None => {} } } From 3a4b0d8cedbf5c7ee532d3a597609637dd53cdf4 Mon Sep 17 00:00:00 2001 From: niekdomi Date: Wed, 4 Mar 2026 18:56:01 +0100 Subject: [PATCH 02/14] improved the cli integration Replaces the manual ansi escape approach with owo-colors, which handles NO_COLOR and windows automatically. The code is also way better in general than my previous approach... --- Cargo.lock | 36 +++++ Cargo.toml | 1 + crates/codebook-lsp/Cargo.toml | 1 + crates/codebook-lsp/src/lint.rs | 261 ++++++++++++++++---------------- crates/codebook-lsp/src/main.rs | 16 +- 5 files changed, 185 insertions(+), 130 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 946d94e4..6db3a140 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -430,6 +430,7 @@ dependencies = [ "glob", "log", "lru", + "owo-colors", "serde", "serde_json", "streaming-iterator", @@ -1374,6 +1375,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "is_ci" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -1705,6 +1712,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "owo-colors" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" +dependencies = [ + "supports-color 2.1.0", + "supports-color 3.0.2", +] + [[package]] name = "parking" version = "2.2.1" @@ -2451,6 +2468,25 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "supports-color" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" +dependencies = [ + "is-terminal", + "is_ci", +] + +[[package]] +name = "supports-color" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64fc7232dd8d2e4ac5ce4ef302b1d81e0b80d055b9d77c7c4f51f6aa4c867d6" +dependencies = [ + "is_ci", +] + [[package]] name = "symbolic-common" version = "12.17.2" diff --git a/Cargo.toml b/Cargo.toml index 09220040..0ad3137a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ env_logger = "0.11.6" fs2 = "0.4" git2 = "0.20.0" glob = "0.3" +owo-colors = { version = "4", features = ["supports-colors"] } httpmock = "<0.9.0" lazy_static = "1.5.0" log = "0.4.22" diff --git a/crates/codebook-lsp/Cargo.toml b/crates/codebook-lsp/Cargo.toml index 3398f960..114e3fd7 100644 --- a/crates/codebook-lsp/Cargo.toml +++ b/crates/codebook-lsp/Cargo.toml @@ -29,6 +29,7 @@ fs2.workspace = true log.workspace = true lru.workspace = true glob.workspace = true +owo-colors.workspace = true walkdir.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 3ecd5ab4..12c9e1a6 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -1,170 +1,185 @@ use codebook::Codebook; -use codebook_config::{CodebookConfig, CodebookConfigFile}; +use codebook_config::CodebookConfigFile; +use owo_colors::{OwoColorize, Stream, Style}; use std::collections::HashSet; -use std::io::{IsTerminal, Write}; use std::path::{Path, PathBuf}; use std::sync::Arc; -struct Styles { - bold: &'static str, - dim: &'static str, - yellow: &'static str, - bold_red: &'static str, - reset: &'static str, -} - -const STYLES_ON: Styles = Styles { - bold: "\x1b[1m", - dim: "\x1b[2m", - yellow: "\x1b[33m", - bold_red: "\x1b[1;31m", - reset: "\x1b[0m", -}; - -const STYLES_OFF: Styles = Styles { - bold: "", - dim: "", - yellow: "", - bold_red: "", - reset: "", -}; - -fn styles(is_terminal: bool) -> &'static Styles { - if is_terminal && std::env::var_os("NO_COLOR").is_none() { - &STYLES_ON - } else { - &STYLES_OFF - } -} +const BOLD: Style = Style::new().bold(); +const DIM: Style = Style::new().dimmed(); +const YELLOW: Style = Style::new().yellow(); +const BOLD_RED: Style = Style::new().bold().red(); -fn fatal(msg: impl std::fmt::Display, s: &Styles) -> ! { - eprintln!("{}error:{} {msg}", s.bold_red, s.reset); +fn fatal(msg: impl std::fmt::Display) -> ! { + eprintln!( + "{} {msg}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) + ); std::process::exit(2); } -pub fn run_lint(files: &[String], root: &Path, unique: bool) -> bool { - let out = styles(std::io::stdout().is_terminal()); - let err = styles(std::io::stderr().is_terminal()); - +/// Returns `true` if any spelling errors were found. +pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool { let config = Arc::new( CodebookConfigFile::load(Some(root)) - .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"), err)), + .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"))), ); - print_config_source(&config, err); + print_config_source(&config); + eprintln!(); let codebook = Codebook::new(config.clone()) - .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"), err)); + .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"))); let resolved = resolve_paths(files); - if resolved.is_empty() { - fatal("no files matched the given patterns", err); - } let mut seen_words: HashSet = HashSet::new(); let mut total_errors = 0usize; let mut files_with_errors = 0usize; for path in &resolved { - if config.should_ignore_path(path) { - continue; + let error_count = check_file(path, &codebook, &mut seen_words, unique, suggest); + if error_count > 0 { + total_errors += error_count; + files_with_errors += 1; } + } - let text = match std::fs::read_to_string(path) { - Ok(t) => t, - Err(e) => { - eprintln!( - "{}error:{} {}: {e}", - err.bold_red, - err.reset, - path.display() - ); - continue; - } - }; + if total_errors > 0 { + let unique_label = if unique { "unique " } else { "" }; + eprintln!( + "Found {} {unique_label}spelling error(s) in {} file(s).", + total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), + files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), + ); + } - let mut locations = codebook.spell_check(&text, None, path.to_str()); - locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); + total_errors > 0 +} - let hits: Vec<(usize, usize, &str)> = locations - .iter() - .flat_map(|wl| wl.locations.iter().map(move |r| (wl, r))) - .filter_map(|(wl, range)| { - if unique && !seen_words.insert(wl.word.to_lowercase()) { - return None; - } - let (line, col) = byte_offset_to_line_col(&text, range.start_byte); - Some((line, col, wl.word.as_str())) - }) - .collect(); +/// Spell-checks a single file and prints any diagnostics to stdout. +/// +/// Returns the number of errors found (0 if the file was clean or unreadable). +fn check_file( + path: &Path, + codebook: &Codebook, + seen_words: &mut HashSet, + unique: bool, + suggest: bool, +) -> usize { + let text = match std::fs::read_to_string(path) { + Ok(t) => t, + Err(e) => { + eprintln!( + "{} {}: {e}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), + path.display() + ); + return 0; + } + }; + + let raw = path.to_string_lossy(); + let display = raw.strip_prefix("./").unwrap_or(&raw); + + let mut locations = codebook.spell_check(&text, None, path.to_str()); + // Sort by first occurrence in the file. + locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); - if hits.is_empty() { + // Collect (linecol, word, suggestions) first so we can compute pad_len for alignment. + // unique check is per-word (outer loop) so all ranges of a word are included or skipped together. + let mut hits: Vec<(String, &str, Option>)> = Vec::new(); + for wl in &locations { + if unique && !seen_words.insert(wl.word.to_lowercase()) { continue; } - let raw = path.to_string_lossy(); - let display = raw.strip_prefix("./").unwrap_or(&raw); - let pad_len = hits - .iter() - .map(|(l, c, _)| format!("{l}:{c}").len()) - .max() - .unwrap_or(0); - - println!("{}{display}{}", out.bold, out.reset); - for (line, col, word) in &hits { - let loc = format!("{line}:{col}"); - println!( - " {}{display}{}:{}{loc}{}{pad} {}{}{}", - out.dim, - out.reset, - out.yellow, - out.reset, - out.bold_red, - word, - out.reset, - pad = " ".repeat(pad_len - loc.len()), - ); + let suggestions = if suggest { + codebook.get_suggestions(wl.word.as_str()) + } else { + None + }; + + for range in &wl.locations { + let before = &text[..range.start_byte.min(text.len())]; + let line = before.bytes().filter(|&b| b == b'\n').count() + 1; + let col = before + .rfind('\n') + .map(|p| before.len() - p) + .unwrap_or(before.len() + 1); + hits.push(( + format!("{line}:{col}"), + wl.word.as_str(), + suggestions.clone(), + )); } - println!(); + } - total_errors += hits.len(); - files_with_errors += 1; + if hits.is_empty() { + return 0; } - if total_errors > 0 { - let _ = std::io::stdout().flush(); - let unique_label = if unique { "unique " } else { "" }; - eprintln!( - "Found {}{total_errors}{} {unique_label}spelling error(s) in {}{files_with_errors}{} file(s).", - err.bold, err.reset, err.bold, err.reset, + let pad_len = hits + .iter() + .map(|(linecol, _, _)| linecol.len()) + .max() + .unwrap_or(0); + + println!( + "{}", + display.if_supports_color(Stream::Stdout, |t| t.style(BOLD)) + ); + for (linecol, word, suggestions) in &hits { + let pad = " ".repeat(pad_len - linecol.len()); + print!( + " {}:{}{} {}", + display.if_supports_color(Stream::Stdout, |t| t.style(DIM)), + linecol.if_supports_color(Stream::Stdout, |t| t.style(YELLOW)), + pad, + word.if_supports_color(Stream::Stdout, |t| t.style(BOLD_RED)), ); + if let Some(suggestions) = suggestions { + println!( + " {}", + format!("→ {}", suggestions.join(", ")) + .if_supports_color(Stream::Stdout, |t| t.style(DIM)), + ); + } else { + println!(); + } } + println!(); - total_errors > 0 + hits.len() } -fn print_config_source(config: &CodebookConfigFile, s: &Styles) { +/// Prints which config file is being used, or notes that the default is active. +fn print_config_source(config: &CodebookConfigFile) { let cwd = std::env::current_dir().unwrap_or_default(); match ( config.project_config_path().filter(|p| p.is_file()), config.global_config_path().filter(|p| p.is_file()), ) { - (Some(p), _) => eprintln!( - "using config {}{}{}", - s.dim, - p.strip_prefix(&cwd).unwrap_or(&p).display(), - s.reset - ), - (None, Some(g)) => eprintln!( - "using global config {}{}{}", - s.dim, - g.strip_prefix(&cwd).unwrap_or(&g).display(), - s.reset - ), - (None, None) => eprintln!("using default config"), + (Some(p), _) => { + let path = p.strip_prefix(&cwd).unwrap_or(&p).display().to_string(); + eprintln!( + "using config {}", + path.if_supports_color(Stream::Stderr, |t| t.style(DIM)) + ); + } + (None, Some(g)) => { + let path = g.strip_prefix(&cwd).unwrap_or(&g).display().to_string(); + eprintln!( + "using global config {}", + path.if_supports_color(Stream::Stderr, |t| t.style(DIM)) + ); + } + (None, None) => eprintln!("No config found, using default config"), } } +/// Resolves a mix of file paths, directories, and glob patterns into a sorted, +/// deduplicated list of file paths. fn resolve_paths(patterns: &[String]) -> Vec { let mut paths = Vec::new(); for pattern in patterns { @@ -205,11 +220,3 @@ fn collect_dir(dir: &Path, out: &mut Vec) { .filter(|e| e.file_type().is_file()) .for_each(|e| out.push(e.into_path())); } - -fn byte_offset_to_line_col(text: &str, byte_offset: usize) -> (usize, usize) { - let offset = byte_offset.min(text.len()); - let before = &text[..offset]; - let line = before.bytes().filter(|&b| b == b'\n').count() + 1; - let col = before.rfind('\n').map(|p| offset - p).unwrap_or(offset + 1); - (line, col) -} diff --git a/crates/codebook-lsp/src/main.rs b/crates/codebook-lsp/src/main.rs index c6898c49..449c174a 100644 --- a/crates/codebook-lsp/src/main.rs +++ b/crates/codebook-lsp/src/main.rs @@ -39,15 +39,21 @@ enum Commands { /// Only report each misspelled word once, ignoring duplicates across files #[arg(short = 'u', long)] unique: bool, + /// Show spelling suggestions for each misspelled word + #[arg(short = 's', long)] + suggest: bool, }, } #[tokio::main(flavor = "current_thread")] async fn main() { // Initialize logger early with stderr output and buffering - // Default to INFO level, will be adjusted when LSP client connects + // Default to INFO for LSP, WARN for lint (to suppress LSP-oriented noise) + let is_lint = std::env::args().nth(1).as_deref() == Some("lint"); let log_level = match env::var("RUST_LOG").as_deref() { Ok("debug") => LevelFilter::Debug, + Ok("info") => LevelFilter::Info, + _ if is_lint => LevelFilter::Warn, _ => LevelFilter::Info, }; LspLogger::init_early(log_level).expect("Failed to initialize early logger"); @@ -68,8 +74,12 @@ async fn main() { info!("Cleaning: {:?}", config.cache_dir); config.clean_cache() } - Some(Commands::Lint { files, unique }) => { - if lint::run_lint(files, root, *unique) { + Some(Commands::Lint { + files, + unique, + suggest, + }) => { + if lint::run_lint(files, root, *unique, *suggest) { std::process::exit(1); } } From e21a221079acb9b7e282b6c0a079ec7a05721d5a Mon Sep 17 00:00:00 2001 From: niekdomi Date: Wed, 4 Mar 2026 19:21:50 +0100 Subject: [PATCH 03/14] improve --- crates/codebook-lsp/src/lint.rs | 59 +++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 12c9e1a6..565e4ae2 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -18,6 +18,22 @@ fn fatal(msg: impl std::fmt::Display) -> ! { std::process::exit(2); } +/// Computes a workspace-relative path string for a given file. Falls back to +/// the absolute path if the file is outside the workspace or canonicalization fails. +fn relative_to_root(root: &Path, path: &Path) -> String { + let root_canonical = match root.canonicalize() { + Ok(r) => r, + Err(_) => return path.to_string_lossy().to_string(), + }; + match path.canonicalize() { + Ok(canon) => match canon.strip_prefix(&root_canonical) { + Ok(rel) => rel.to_string_lossy().to_string(), + Err(_) => path.to_string_lossy().to_string(), + }, + Err(_) => path.to_string_lossy().to_string(), + } +} + /// Returns `true` if any spelling errors were found. pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool { let config = Arc::new( @@ -31,28 +47,27 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b let codebook = Codebook::new(config.clone()) .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"))); - let resolved = resolve_paths(files); + let resolved = resolve_paths(files, root); let mut seen_words: HashSet = HashSet::new(); let mut total_errors = 0usize; let mut files_with_errors = 0usize; for path in &resolved { - let error_count = check_file(path, &codebook, &mut seen_words, unique, suggest); + let relative = relative_to_root(root, path); + let error_count = check_file(path, &relative, &codebook, &mut seen_words, unique, suggest); if error_count > 0 { total_errors += error_count; files_with_errors += 1; } } - if total_errors > 0 { - let unique_label = if unique { "unique " } else { "" }; - eprintln!( - "Found {} {unique_label}spelling error(s) in {} file(s).", - total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), - files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), - ); - } + let unique_label = if unique { "unique " } else { "" }; + eprintln!( + "Found {} {unique_label}spelling error(s) in {} file(s).", + total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), + files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), + ); total_errors > 0 } @@ -60,8 +75,10 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b /// Spell-checks a single file and prints any diagnostics to stdout. /// /// Returns the number of errors found (0 if the file was clean or unreadable). +/// `relative` is the workspace-relative path used for display and ignore matching. fn check_file( path: &Path, + relative: &str, codebook: &Codebook, seen_words: &mut HashSet, unique: bool, @@ -79,10 +96,9 @@ fn check_file( } }; - let raw = path.to_string_lossy(); - let display = raw.strip_prefix("./").unwrap_or(&raw); + let display = relative.strip_prefix("./").unwrap_or(relative); - let mut locations = codebook.spell_check(&text, None, path.to_str()); + let mut locations = codebook.spell_check(&text, None, Some(relative)); // Sort by first occurrence in the file. locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); @@ -100,7 +116,14 @@ fn check_file( None }; - for range in &wl.locations { + // In unique mode only emit the first occurrence of each word + let ranges = if unique { + &wl.locations[..1] + } else { + &wl.locations[..] + }; + + for range in ranges { let before = &text[..range.start_byte.min(text.len())]; let line = before.bytes().filter(|&b| b == b'\n').count() + 1; let col = before @@ -179,15 +202,17 @@ fn print_config_source(config: &CodebookConfigFile) { } /// Resolves a mix of file paths, directories, and glob patterns into a sorted, -/// deduplicated list of file paths. -fn resolve_paths(patterns: &[String]) -> Vec { +/// deduplicated list of file paths. Non-absolute patterns are resolved relative to root. +fn resolve_paths(patterns: &[String], root: &Path) -> Vec { let mut paths = Vec::new(); for pattern in patterns { let p = PathBuf::from(pattern); + let p = if p.is_absolute() { p } else { root.join(&p) }; if p.is_dir() { collect_dir(&p, &mut paths); } else { - match glob::glob(pattern) { + let pattern = p.to_string_lossy(); + match glob::glob(&pattern) { Ok(entries) => { let mut matched = false; for entry in entries.flatten() { From 87f77c8cef8bd09ecb226ee1684ed03ab4aa508d Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Sat, 21 Mar 2026 15:09:00 +0100 Subject: [PATCH 04/14] apply review --- crates/codebook-lsp/src/lint.rs | 145 +++++++++++++++++++++++--------- 1 file changed, 104 insertions(+), 41 deletions(-) diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 565e4ae2..9a39560b 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -4,6 +4,7 @@ use owo_colors::{OwoColorize, Stream, Style}; use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::sync::Arc; +use string_offsets::{AllConfig, StringOffsets}; const BOLD: Style = Style::new().bold(); const DIM: Style = Style::new().dimmed(); @@ -20,21 +21,24 @@ fn fatal(msg: impl std::fmt::Display) -> ! { /// Computes a workspace-relative path string for a given file. Falls back to /// the absolute path if the file is outside the workspace or canonicalization fails. -fn relative_to_root(root: &Path, path: &Path) -> String { - let root_canonical = match root.canonicalize() { - Ok(r) => r, - Err(_) => return path.to_string_lossy().to_string(), +/// `root_canonical` should be the already-canonicalized workspace root. +fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String { + let Some(root_canonical) = root_canonical else { + return path.to_string_lossy().into_owned(); }; match path.canonicalize() { - Ok(canon) => match canon.strip_prefix(&root_canonical) { - Ok(rel) => rel.to_string_lossy().to_string(), - Err(_) => path.to_string_lossy().to_string(), + Ok(canon) => match canon.strip_prefix(root_canonical) { + Ok(rel) => rel.to_string_lossy().into_owned(), + Err(_) => path.to_string_lossy().into_owned(), }, - Err(_) => path.to_string_lossy().to_string(), + Err(_) => path.to_string_lossy().into_owned(), } } /// Returns `true` if any spelling errors were found. +/// +/// Exits with code 2 if infrastructure failures occurred (unreadable files, +/// directory errors, unmatched or invalid patterns). pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool { let config = Arc::new( CodebookConfigFile::load(Some(root)) @@ -47,15 +51,27 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b let codebook = Codebook::new(config.clone()) .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"))); - let resolved = resolve_paths(files, root); + // Canonicalize the root once here rather than once per file. + let root_canonical = root.canonicalize().ok(); + + let mut had_failure = false; + let resolved = resolve_paths(files, root, &mut had_failure); let mut seen_words: HashSet = HashSet::new(); let mut total_errors = 0usize; let mut files_with_errors = 0usize; for path in &resolved { - let relative = relative_to_root(root, path); - let error_count = check_file(path, &relative, &codebook, &mut seen_words, unique, suggest); + let relative = relative_to_root(root_canonical.as_deref(), path); + let error_count = check_file( + path, + &relative, + &codebook, + &mut seen_words, + unique, + suggest, + &mut had_failure, + ); if error_count > 0 { total_errors += error_count; files_with_errors += 1; @@ -69,13 +85,18 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), ); + if had_failure { + std::process::exit(2); + } + total_errors > 0 } /// Spell-checks a single file and prints any diagnostics to stdout. /// -/// Returns the number of errors found (0 if the file was clean or unreadable). +/// Returns the number of spelling errors found (0 if the file was clean). /// `relative` is the workspace-relative path used for display and ignore matching. +/// Sets `*had_failure = true` on I/O errors so the caller can exit non-zero. fn check_file( path: &Path, relative: &str, @@ -83,6 +104,7 @@ fn check_file( seen_words: &mut HashSet, unique: bool, suggest: bool, + had_failure: &mut bool, ) -> usize { let text = match std::fs::read_to_string(path) { Ok(t) => t, @@ -92,6 +114,7 @@ fn check_file( "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), path.display() ); + *had_failure = true; return 0; } }; @@ -102,8 +125,13 @@ fn check_file( // Sort by first occurrence in the file. locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); + // Build the offset table once per file – O(n) construction, O(1) per lookup – + // and gives Unicode character columns rather than raw byte offsets. + let offsets = StringOffsets::::new(&text); + // Collect (linecol, word, suggestions) first so we can compute pad_len for alignment. - // unique check is per-word (outer loop) so all ranges of a word are included or skipped together. + // The unique check is per-word (outer loop) so all ranges of a word are included + // or skipped together. let mut hits: Vec<(String, &str, Option>)> = Vec::new(); for wl in &locations { if unique && !seen_words.insert(wl.word.to_lowercase()) { @@ -116,7 +144,7 @@ fn check_file( None }; - // In unique mode only emit the first occurrence of each word + // In unique mode only emit the first occurrence of each word. let ranges = if unique { &wl.locations[..1] } else { @@ -124,12 +152,10 @@ fn check_file( }; for range in ranges { - let before = &text[..range.start_byte.min(text.len())]; - let line = before.bytes().filter(|&b| b == b'\n').count() + 1; - let col = before - .rfind('\n') - .map(|p| before.len() - p) - .unwrap_or(before.len() + 1); + // utf8_to_char_pos returns 0-based line and Unicode-char column. + let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len())); + let line = pos.line + 1; // 0-based → 1-based + let col = pos.col + 1; // 0-based → 1-based hits.push(( format!("{line}:{col}"), wl.word.as_str(), @@ -202,33 +228,57 @@ fn print_config_source(config: &CodebookConfigFile) { } /// Resolves a mix of file paths, directories, and glob patterns into a sorted, -/// deduplicated list of file paths. Non-absolute patterns are resolved relative to root. -fn resolve_paths(patterns: &[String], root: &Path) -> Vec { +/// deduplicated list of file paths. Non-absolute patterns are resolved relative to `root`. +/// +/// Sets `*had_failure = true` for unmatched patterns, invalid globs, or glob I/O errors. +fn resolve_paths(patterns: &[String], root: &Path, had_failure: &mut bool) -> Vec { let mut paths = Vec::new(); for pattern in patterns { let p = PathBuf::from(pattern); let p = if p.is_absolute() { p } else { root.join(&p) }; if p.is_dir() { - collect_dir(&p, &mut paths); + collect_dir(&p, &mut paths, had_failure); } else { - let pattern = p.to_string_lossy(); - match glob::glob(&pattern) { + let pattern_str = p.to_string_lossy(); + match glob::glob(&pattern_str) { Ok(entries) => { let mut matched = false; - for entry in entries.flatten() { - if entry.is_file() { - paths.push(entry); - matched = true; - } else if entry.is_dir() { - collect_dir(&entry, &mut paths); - matched = true; + for entry in entries { + match entry { + Ok(e) => { + if e.is_file() { + paths.push(e); + matched = true; + } else if e.is_dir() { + collect_dir(&e, &mut paths, had_failure); + matched = true; + } + } + Err(e) => { + eprintln!( + "{} failed to read glob entry: {e}", + "error:" + .if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) + ); + *had_failure = true; + } } } if !matched { - eprintln!("codebook: no match for '{pattern}'"); + eprintln!( + "{} no match for '{pattern_str}'", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) + ); + *had_failure = true; } } - Err(e) => eprintln!("codebook: invalid pattern '{pattern}': {e}"), + Err(e) => { + eprintln!( + "{} invalid pattern '{pattern_str}': {e}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) + ); + *had_failure = true; + } } } } @@ -237,11 +287,24 @@ fn resolve_paths(patterns: &[String], root: &Path) -> Vec { paths } -fn collect_dir(dir: &Path, out: &mut Vec) { - walkdir::WalkDir::new(dir) - .follow_links(false) - .into_iter() - .flatten() - .filter(|e| e.file_type().is_file()) - .for_each(|e| out.push(e.into_path())); +/// Recursively collects all files under `dir` into `out`. +/// Sets `*had_failure = true` for any directory-entry I/O error (e.g. permission denied). +fn collect_dir(dir: &Path, out: &mut Vec, had_failure: &mut bool) { + for entry in walkdir::WalkDir::new(dir).follow_links(false).into_iter() { + match entry { + Ok(e) => { + if e.file_type().is_file() { + out.push(e.into_path()); + } + } + Err(err) => { + eprintln!( + "{} failed to read directory entry under '{}': {err}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), + dir.display() + ); + *had_failure = true; + } + } + } } From e88087c21cbebf2bbf4d7a9e1c1c665460a72bee Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Sat, 21 Mar 2026 16:25:19 +0100 Subject: [PATCH 05/14] test --- crates/codebook-lsp/src/lint.rs | 616 ++++++++++++++++++++++++++------ 1 file changed, 498 insertions(+), 118 deletions(-) diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 9a39560b..9c2faadd 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -11,11 +11,18 @@ const DIM: Style = Style::new().dimmed(); const YELLOW: Style = Style::new().yellow(); const BOLD_RED: Style = Style::new().bold().red(); +macro_rules! err { + ($($arg:tt)*) => { + eprintln!( + "{} {}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), + format_args!($($arg)*) + ) + }; +} + fn fatal(msg: impl std::fmt::Display) -> ! { - eprintln!( - "{} {msg}", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) - ); + err!("{msg}"); std::process::exit(2); } @@ -23,16 +30,15 @@ fn fatal(msg: impl std::fmt::Display) -> ! { /// the absolute path if the file is outside the workspace or canonicalization fails. /// `root_canonical` should be the already-canonicalized workspace root. fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String { - let Some(root_canonical) = root_canonical else { - return path.to_string_lossy().into_owned(); - }; - match path.canonicalize() { - Ok(canon) => match canon.strip_prefix(root_canonical) { - Ok(rel) => rel.to_string_lossy().into_owned(), - Err(_) => path.to_string_lossy().into_owned(), - }, - Err(_) => path.to_string_lossy().into_owned(), - } + root_canonical + .and_then(|root| { + let canon = path.canonicalize().ok()?; + canon + .strip_prefix(root) + .ok() + .map(|rel| rel.to_string_lossy().into_owned()) + }) + .unwrap_or_else(|| path.to_string_lossy().into_owned()) } /// Returns `true` if any spelling errors were found. @@ -54,8 +60,7 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b // Canonicalize the root once here rather than once per file. let root_canonical = root.canonicalize().ok(); - let mut had_failure = false; - let resolved = resolve_paths(files, root, &mut had_failure); + let (resolved, mut had_failure) = resolve_paths(files, root); let mut seen_words: HashSet = HashSet::new(); let mut total_errors = 0usize; @@ -63,17 +68,11 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b for path in &resolved { let relative = relative_to_root(root_canonical.as_deref(), path); - let error_count = check_file( - path, - &relative, - &codebook, - &mut seen_words, - unique, - suggest, - &mut had_failure, - ); - if error_count > 0 { - total_errors += error_count; + let (errors, file_failure) = + check_file(path, &relative, &codebook, &mut seen_words, unique, suggest); + had_failure |= file_failure; + if errors > 0 { + total_errors += errors; files_with_errors += 1; } } @@ -94,9 +93,9 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b /// Spell-checks a single file and prints any diagnostics to stdout. /// -/// Returns the number of spelling errors found (0 if the file was clean). +/// Returns `(error_count, had_io_error)`. `error_count` is 0 if the file was +/// clean; `had_io_error` is true when the file could not be read. /// `relative` is the workspace-relative path used for display and ignore matching. -/// Sets `*had_failure = true` on I/O errors so the caller can exit non-zero. fn check_file( path: &Path, relative: &str, @@ -104,41 +103,35 @@ fn check_file( seen_words: &mut HashSet, unique: bool, suggest: bool, - had_failure: &mut bool, -) -> usize { +) -> (usize, bool) { let text = match std::fs::read_to_string(path) { Ok(t) => t, Err(e) => { - eprintln!( - "{} {}: {e}", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), - path.display() - ); - *had_failure = true; - return 0; + err!("{}: {e}", path.display()); + return (0, true); } }; let display = relative.strip_prefix("./").unwrap_or(relative); - let mut locations = codebook.spell_check(&text, None, Some(relative)); - // Sort by first occurrence in the file. - locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); - // Build the offset table once per file – O(n) construction, O(1) per lookup – // and gives Unicode character columns rather than raw byte offsets. let offsets = StringOffsets::::new(&text); - // Collect (linecol, word, suggestions) first so we can compute pad_len for alignment. - // The unique check is per-word (outer loop) so all ranges of a word are included - // or skipped together. + let mut locations = codebook.spell_check(&text, None, Some(relative)); + // Sort by first occurrence in the file. + locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); + + // Collect hits first so we can compute pad_len for column alignment. + // The unique check is per-word (outer loop) so all ranges of a word are + // included or skipped together. let mut hits: Vec<(String, &str, Option>)> = Vec::new(); for wl in &locations { if unique && !seen_words.insert(wl.word.to_lowercase()) { continue; } - let suggestions = if suggest { + let mut suggestions = if suggest { codebook.get_suggestions(wl.word.as_str()) } else { None @@ -151,28 +144,28 @@ fn check_file( &wl.locations[..] }; - for range in ranges { + for (i, range) in ranges.iter().enumerate() { // utf8_to_char_pos returns 0-based line and Unicode-char column. let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len())); - let line = pos.line + 1; // 0-based → 1-based - let col = pos.col + 1; // 0-based → 1-based + // Move out of `suggestions` on the last iteration to avoid a clone. + let sugg = if i + 1 < ranges.len() { + suggestions.clone() + } else { + suggestions.take() + }; hits.push(( - format!("{line}:{col}"), + format!("{}:{}", pos.line + 1, pos.col + 1), wl.word.as_str(), - suggestions.clone(), + sugg, )); } } if hits.is_empty() { - return 0; + return (0, false); } - let pad_len = hits - .iter() - .map(|(linecol, _, _)| linecol.len()) - .max() - .unwrap_or(0); + let pad_len = hits.iter().map(|(lc, _, _)| lc.len()).max().unwrap_or(0); println!( "{}", @@ -199,45 +192,50 @@ fn check_file( } println!(); - hits.len() + (hits.len(), false) } /// Prints which config file is being used, or notes that the default is active. fn print_config_source(config: &CodebookConfigFile) { let cwd = std::env::current_dir().unwrap_or_default(); - match ( + let (label, path) = match ( config.project_config_path().filter(|p| p.is_file()), config.global_config_path().filter(|p| p.is_file()), ) { - (Some(p), _) => { - let path = p.strip_prefix(&cwd).unwrap_or(&p).display().to_string(); - eprintln!( - "using config {}", - path.if_supports_color(Stream::Stderr, |t| t.style(DIM)) - ); + (Some(p), _) => ("using config", p), + (None, Some(g)) => ("using global config", g), + (None, None) => { + eprintln!("No config found, using default config"); + return; } - (None, Some(g)) => { - let path = g.strip_prefix(&cwd).unwrap_or(&g).display().to_string(); - eprintln!( - "using global config {}", - path.if_supports_color(Stream::Stderr, |t| t.style(DIM)) - ); - } - (None, None) => eprintln!("No config found, using default config"), - } + }; + let display = path + .strip_prefix(&cwd) + .unwrap_or(&path) + .display() + .to_string(); + eprintln!( + "{label} {}", + display.if_supports_color(Stream::Stderr, |t| t.style(DIM)) + ); } /// Resolves a mix of file paths, directories, and glob patterns into a sorted, -/// deduplicated list of file paths. Non-absolute patterns are resolved relative to `root`. +/// deduplicated list of file paths. Non-absolute patterns are resolved relative +/// to `root`. `Path::join` replaces the base when the argument is absolute, so +/// no explicit `is_absolute` check is needed. /// -/// Sets `*had_failure = true` for unmatched patterns, invalid globs, or glob I/O errors. -fn resolve_paths(patterns: &[String], root: &Path, had_failure: &mut bool) -> Vec { +/// Returns `(paths, had_failure)`. `had_failure` is true for unmatched +/// patterns, invalid globs, or glob I/O errors. +fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { let mut paths = Vec::new(); + let mut had_failure = false; + for pattern in patterns { - let p = PathBuf::from(pattern); - let p = if p.is_absolute() { p } else { root.join(&p) }; + // root.join() is a no-op when pattern is absolute (replaces root entirely). + let p = root.join(pattern); if p.is_dir() { - collect_dir(&p, &mut paths, had_failure); + had_failure |= collect_dir(&p, &mut paths); } else { let pattern_str = p.to_string_lossy(); match glob::glob(&pattern_str) { @@ -245,66 +243,448 @@ fn resolve_paths(patterns: &[String], root: &Path, had_failure: &mut bool) -> Ve let mut matched = false; for entry in entries { match entry { - Ok(e) => { - if e.is_file() { - paths.push(e); - matched = true; - } else if e.is_dir() { - collect_dir(&e, &mut paths, had_failure); - matched = true; - } + Ok(e) if e.is_file() => { + paths.push(e); + matched = true; + } + Ok(e) if e.is_dir() => { + had_failure |= collect_dir(&e, &mut paths); + matched = true; } + Ok(_) => {} Err(e) => { - eprintln!( - "{} failed to read glob entry: {e}", - "error:" - .if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) - ); - *had_failure = true; + err!("failed to read glob entry: {e}"); + had_failure = true; } } } if !matched { - eprintln!( - "{} no match for '{pattern_str}'", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) - ); - *had_failure = true; + err!("no match for '{pattern_str}'"); + had_failure = true; } } Err(e) => { - eprintln!( - "{} invalid pattern '{pattern_str}': {e}", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) - ); - *had_failure = true; + err!("invalid pattern '{pattern_str}': {e}"); + had_failure = true; } } } } + paths.sort(); paths.dedup(); - paths + (paths, had_failure) } /// Recursively collects all files under `dir` into `out`. -/// Sets `*had_failure = true` for any directory-entry I/O error (e.g. permission denied). -fn collect_dir(dir: &Path, out: &mut Vec, had_failure: &mut bool) { - for entry in walkdir::WalkDir::new(dir).follow_links(false).into_iter() { +/// Returns `true` if any directory-entry I/O error occurred (e.g. permission denied). +fn collect_dir(dir: &Path, out: &mut Vec) -> bool { + let mut had_failure = false; + for entry in walkdir::WalkDir::new(dir).follow_links(false) { match entry { - Ok(e) => { - if e.file_type().is_file() { - out.push(e.into_path()); - } - } - Err(err) => { - eprintln!( - "{} failed to read directory entry under '{}': {err}", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), + Ok(e) if e.file_type().is_file() => out.push(e.into_path()), + Ok(_) => {} + Err(e) => { + err!( + "failed to read directory entry under '{}': {e}", dir.display() ); - *had_failure = true; + had_failure = true; } } } + had_failure +} + +#[cfg(test)] +mod tests { + use super::*; + use codebook::Codebook; + use codebook_config::CodebookConfigMemory; + use std::collections::HashSet; + use std::fs; + use std::sync::Arc; + use tempfile::tempdir; + + /// Builds a Codebook backed by the default in-memory config (en_us dictionary). + /// This mirrors the pattern used throughout the rest of the test suite. + fn make_codebook() -> Codebook { + let config = Arc::new(CodebookConfigMemory::default()); + Codebook::new(config).unwrap() + } + + // ── relative_to_root ───────────────────────────────────────────────────── + + #[test] + fn test_relative_to_root_inside_workspace() { + let dir = tempdir().unwrap(); + let subdir = dir.path().join("src"); + fs::create_dir_all(&subdir).unwrap(); + let file = subdir.join("lib.rs"); + fs::write(&file, "").unwrap(); + + let root_canonical = dir.path().canonicalize().unwrap(); + let result = relative_to_root(Some(&root_canonical), &file); + assert_eq!(result, "src/lib.rs"); + } + + #[test] + fn test_relative_to_root_outside_workspace() { + let root = tempdir().unwrap(); + let other = tempdir().unwrap(); + let file = other.path().join("outside.rs"); + fs::write(&file, "").unwrap(); + + let root_canonical = root.path().canonicalize().unwrap(); + let result = relative_to_root(Some(&root_canonical), &file); + // Falls back to a path string since the file is outside the workspace. + assert!(result.contains("outside.rs")); + } + + #[test] + fn test_relative_to_root_none_returns_raw_path() { + let result = relative_to_root(None, Path::new("/some/path/file.rs")); + assert_eq!(result, "/some/path/file.rs"); + } + + // ── collect_dir ─────────────────────────────────────────────────────────── + + #[test] + fn test_collect_dir_empty_dir() { + let dir = tempdir().unwrap(); + let mut out = Vec::new(); + let had_failure = collect_dir(dir.path(), &mut out); + assert!(out.is_empty()); + assert!(!had_failure); + } + + #[test] + fn test_collect_dir_collects_files_recursively() { + let dir = tempdir().unwrap(); + let sub = dir.path().join("sub"); + fs::create_dir_all(&sub).unwrap(); + fs::write(dir.path().join("a.txt"), "").unwrap(); + fs::write(sub.join("b.txt"), "").unwrap(); + + let mut out = Vec::new(); + let had_failure = collect_dir(dir.path(), &mut out); + + assert_eq!(out.len(), 2); + assert!(!had_failure); + } + + #[test] + fn test_collect_dir_only_yields_files_not_directories() { + let dir = tempdir().unwrap(); + let sub = dir.path().join("subdir"); + fs::create_dir_all(&sub).unwrap(); + fs::write(sub.join("c.txt"), "").unwrap(); + + let mut out = Vec::new(); + let had_failure = collect_dir(dir.path(), &mut out); + + assert!( + out.iter().all(|p| p.is_file()), + "only files should be collected, not directories" + ); + assert!(!had_failure); + } + + // ── resolve_paths ───────────────────────────────────────────────────────── + + #[test] + fn test_resolve_paths_single_existing_file() { + let dir = tempdir().unwrap(); + let file = dir.path().join("hello.txt"); + fs::write(&file, "").unwrap(); + + let (result, had_failure) = + resolve_paths(&[file.to_string_lossy().to_string()], dir.path()); + + assert_eq!(result, vec![file]); + assert!(!had_failure); + } + + #[test] + fn test_resolve_paths_missing_file_sets_failure() { + let dir = tempdir().unwrap(); + let nonexistent = dir.path().join("nope.txt"); + + let (result, had_failure) = + resolve_paths(&[nonexistent.to_string_lossy().to_string()], dir.path()); + + assert!(result.is_empty()); + assert!(had_failure, "unmatched path should set had_failure"); + } + + #[test] + fn test_resolve_paths_directory_collects_all_files() { + let dir = tempdir().unwrap(); + let sub = dir.path().join("sub"); + fs::create_dir_all(&sub).unwrap(); + fs::write(dir.path().join("a.txt"), "").unwrap(); + fs::write(sub.join("b.txt"), "").unwrap(); + + let (result, had_failure) = + resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path()); + + assert_eq!(result.len(), 2); + assert!(!had_failure); + } + + #[test] + fn test_resolve_paths_glob_matches_by_extension() { + let dir = tempdir().unwrap(); + fs::write(dir.path().join("foo.rs"), "").unwrap(); + fs::write(dir.path().join("bar.rs"), "").unwrap(); + fs::write(dir.path().join("baz.txt"), "").unwrap(); + + let pattern = format!("{}/*.rs", dir.path().to_string_lossy()); + let (result, had_failure) = resolve_paths(&[pattern], dir.path()); + + assert_eq!(result.len(), 2, "only .rs files should match"); + assert!( + result + .iter() + .all(|p| p.extension().unwrap_or_default() == "rs") + ); + assert!(!had_failure); + } + + #[test] + fn test_resolve_paths_glob_no_match_sets_failure() { + let dir = tempdir().unwrap(); // empty dir — no .rs files + let pattern = format!("{}/*.rs", dir.path().to_string_lossy()); + + let (result, had_failure) = resolve_paths(&[pattern], dir.path()); + + assert!(result.is_empty()); + assert!(had_failure, "unmatched glob should set had_failure"); + } + + #[test] + fn test_resolve_paths_deduplicates_same_file() { + let dir = tempdir().unwrap(); + let file = dir.path().join("hello.txt"); + fs::write(&file, "").unwrap(); + + let path_str = file.to_string_lossy().to_string(); + // List the same file twice — it should appear only once in the output. + let (result, had_failure) = resolve_paths(&[path_str.clone(), path_str], dir.path()); + + assert_eq!(result.len(), 1); + assert!(!had_failure); + } + + #[test] + fn test_resolve_paths_output_is_sorted() { + let dir = tempdir().unwrap(); + // Create files in non-alphabetical order. + fs::write(dir.path().join("c.txt"), "").unwrap(); + fs::write(dir.path().join("a.txt"), "").unwrap(); + fs::write(dir.path().join("b.txt"), "").unwrap(); + + let (result, had_failure) = + resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path()); + + let names: Vec<_> = result + .iter() + .map(|p| p.file_name().unwrap().to_string_lossy().into_owned()) + .collect(); + assert_eq!(names, ["a.txt", "b.txt", "c.txt"]); + assert!(!had_failure); + } + + #[test] + fn test_resolve_paths_relative_pattern_resolved_against_root() { + let dir = tempdir().unwrap(); + fs::write(dir.path().join("x.txt"), "").unwrap(); + + // "x.txt" is relative — resolve_paths should join it with the root. + let (result, had_failure) = resolve_paths(&["x.txt".to_string()], dir.path()); + + assert_eq!(result.len(), 1); + assert!(!had_failure); + } + + // ── check_file ──────────────────────────────────────────────────────────── + + #[test] + fn test_check_file_unreadable_sets_failure() { + let dir = tempdir().unwrap(); + let nonexistent = dir.path().join("ghost.txt"); + + let codebook = make_codebook(); + let (count, had_failure) = check_file( + &nonexistent, + "ghost.txt", + &codebook, + &mut HashSet::new(), + false, + false, + ); + + assert_eq!(count, 0); + assert!(had_failure, "unreadable file must set had_failure"); + } + + #[test] + fn test_check_file_flags_misspelling() { + let dir = tempdir().unwrap(); + let file = dir.path().join("bad.txt"); + // "actualbad" is not a real English word; the checker should flag it. + fs::write(&file, "actualbad").unwrap(); + + let codebook = make_codebook(); + let (count, had_failure) = check_file( + &file, + "bad.txt", + &codebook, + &mut HashSet::new(), + false, + false, + ); + + assert!( + count > 0, + "expected at least one misspelling to be reported" + ); + assert!(!had_failure); + } + + #[test] + fn test_check_file_unique_counts_each_word_once_per_file() { + let dir = tempdir().unwrap(); + let file = dir.path().join("dup.txt"); + // The checker groups all occurrences of the same misspelled word into one + // WordLocation with multiple TextRanges; unique mode takes only the first. + fs::write(&file, "actualbad actualbad").unwrap(); + + let codebook = make_codebook(); + let mut had_failure = false; + + let (count_all, f) = check_file( + &file, + "dup.txt", + &codebook, + &mut HashSet::new(), + false, // unique = false + false, + ); + had_failure |= f; + + let (count_unique, f) = check_file( + &file, + "dup.txt", + &codebook, + &mut HashSet::new(), + true, // unique = true + false, + ); + had_failure |= f; + + assert_eq!(count_all, 2, "non-unique mode must count every occurrence"); + assert_eq!(count_unique, 1, "unique mode must count each word once"); + assert!(!had_failure); + } + + #[test] + fn test_check_file_unique_deduplicates_across_files() { + let dir = tempdir().unwrap(); + let file1 = dir.path().join("first.txt"); + let file2 = dir.path().join("second.txt"); + fs::write(&file1, "actualbad").unwrap(); + fs::write(&file2, "actualbad").unwrap(); + + let codebook = make_codebook(); + // A single shared `seen_words` set, exactly as run_lint uses it. + let mut seen = HashSet::new(); + + let (count1, f1) = check_file(&file1, "first.txt", &codebook, &mut seen, true, false); + let (count2, f2) = check_file(&file2, "second.txt", &codebook, &mut seen, true, false); + + assert_eq!(count1, 1, "first file should report the misspelling"); + assert_eq!(count2, 0, "second file should skip the already-seen word"); + assert!(!f1); + assert!(!f2); + } + + // ── line/col correctness (pure StringOffsets, no network I/O) ──────────── + // + // These tests exercise the exact same utf8_to_char_pos call that check_file + // uses, confirming that columns are in Unicode characters rather than raw + // bytes and that the 0-based → 1-based conversion is applied correctly. + + #[test] + fn test_linecol_word_at_start_of_file() { + let text = "actualbad rest of line"; + let offsets = StringOffsets::::new(text); + let pos = offsets.utf8_to_char_pos(0); + assert_eq!(pos.line + 1, 1); + assert_eq!(pos.col + 1, 1); + } + + #[test] + fn test_linecol_word_on_second_line() { + // "ok text\n" is 8 bytes; "actualbad" starts at byte 8. + let text = "ok text\nactualbad more"; + let offsets = StringOffsets::::new(text); + let byte_offset = text.find("actualbad").unwrap(); + assert_eq!(byte_offset, 8, "sanity-check byte layout"); + + let pos = offsets.utf8_to_char_pos(byte_offset); + assert_eq!(pos.line + 1, 2, "should be on the second line"); + assert_eq!(pos.col + 1, 1, "should be the first column of that line"); + } + + #[test] + fn test_linecol_mid_line_ascii() { + // "hello " is 6 bytes/chars; "actualbad" starts at byte/char 6. + let text = "hello actualbad"; + let offsets = StringOffsets::::new(text); + let byte_offset = text.find("actualbad").unwrap(); + assert_eq!(byte_offset, 6, "sanity-check byte layout"); + + let pos = offsets.utf8_to_char_pos(byte_offset); + assert_eq!(pos.line + 1, 1); + assert_eq!(pos.col + 1, 7); + } + + /// The old byte-count approach reported col=10 here; StringOffsets correctly + /// reports col=8 because "résumé" is 8 bytes but only 6 Unicode characters. + #[test] + fn test_linecol_multibyte_chars_before_word() { + // r(1) + é(2) + s(1) + u(1) + m(1) + é(2) + space(1) = 9 bytes, 7 chars. + // "actualbad" starts at byte 9, char offset 7 (0-based) → col 8 (1-based). + let text = "résumé actualbad"; + let offsets = StringOffsets::::new(text); + let byte_offset = text.find("actualbad").unwrap(); + assert_eq!(byte_offset, 9, "sanity-check byte layout"); + + let pos = offsets.utf8_to_char_pos(byte_offset); + assert_eq!(pos.line + 1, 1); + assert_eq!( + pos.col + 1, + 8, + "col must count Unicode chars (6 + space = 7, 1-based = 8), not bytes (9, 1-based = 10)" + ); + } + + #[test] + fn test_linecol_emoji_before_word() { + // 🐛 is U+1F41B — 4 UTF-8 bytes, 1 Unicode char. + // "🐛 " = 5 bytes, 2 chars; "actualbad" starts at byte 5, char 2 (0-based) → col 3. + let text = "🐛 actualbad"; + let offsets = StringOffsets::::new(text); + let byte_offset = text.find("actualbad").unwrap(); + assert_eq!(byte_offset, 5, "sanity-check byte layout"); + + let pos = offsets.utf8_to_char_pos(byte_offset); + assert_eq!(pos.line + 1, 1); + assert_eq!( + pos.col + 1, + 3, + "col must count Unicode chars (emoji=1, space=1, 1-based=3), not bytes (5, 1-based=6)" + ); + } } From adcf2787e0c897c53ae8781c92abc3aee943d283 Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Sat, 21 Mar 2026 20:19:10 +0100 Subject: [PATCH 06/14] update tests --- crates/codebook-lsp/src/lint.rs | 470 ++++++-------------------------- 1 file changed, 86 insertions(+), 384 deletions(-) diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 9c2faadd..3f0c8ac5 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -21,14 +21,20 @@ macro_rules! err { }; } +macro_rules! paint { + ($val:expr, $stream:expr, $style:expr) => { + $val.if_supports_color($stream, |t| t.style($style)) + }; +} + fn fatal(msg: impl std::fmt::Display) -> ! { err!("{msg}"); std::process::exit(2); } /// Computes a workspace-relative path string for a given file. Falls back to -/// the absolute path if the file is outside the workspace or canonicalization fails. -/// `root_canonical` should be the already-canonicalized workspace root. +/// the absolute path if the file is outside the workspace or canonicalization +/// fails. `root_canonical` should be the already-canonicalized workspace root. fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String { root_canonical .and_then(|root| { @@ -80,8 +86,8 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b let unique_label = if unique { "unique " } else { "" }; eprintln!( "Found {} {unique_label}spelling error(s) in {} file(s).", - total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), - files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), + paint!(total_errors, Stream::Stderr, BOLD), + paint!(files_with_errors, Stream::Stderr, BOLD), ); if had_failure { @@ -94,8 +100,8 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b /// Spell-checks a single file and prints any diagnostics to stdout. /// /// Returns `(error_count, had_io_error)`. `error_count` is 0 if the file was -/// clean; `had_io_error` is true when the file could not be read. -/// `relative` is the workspace-relative path used for display and ignore matching. +/// clean; `had_io_error` is true when the file could not be read. `relative` is +/// the workspace-relative path used for display and ignore matching. fn check_file( path: &Path, relative: &str, @@ -114,17 +120,15 @@ fn check_file( let display = relative.strip_prefix("./").unwrap_or(relative); - // Build the offset table once per file – O(n) construction, O(1) per lookup – - // and gives Unicode character columns rather than raw byte offsets. + // Build the offset table once per file let offsets = StringOffsets::::new(&text); - let mut locations = codebook.spell_check(&text, None, Some(relative)); // Sort by first occurrence in the file. locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); - // Collect hits first so we can compute pad_len for column alignment. - // The unique check is per-word (outer loop) so all ranges of a word are - // included or skipped together. + // Collect hits first so we can compute pad_len for column alignment. The + // unique check is per-word, so all ranges of a word are included or skipped + // together. let mut hits: Vec<(String, &str, Option>)> = Vec::new(); for wl in &locations { if unique && !seen_words.insert(wl.word.to_lowercase()) { @@ -137,7 +141,7 @@ fn check_file( None }; - // In unique mode only emit the first occurrence of each word. + // If unique mode: Only emit the first occurrence of each word. let ranges = if unique { &wl.locations[..1] } else { @@ -147,12 +151,14 @@ fn check_file( for (i, range) in ranges.iter().enumerate() { // utf8_to_char_pos returns 0-based line and Unicode-char column. let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len())); + // Move out of `suggestions` on the last iteration to avoid a clone. let sugg = if i + 1 < ranges.len() { suggestions.clone() } else { suggestions.take() }; + hits.push(( format!("{}:{}", pos.line + 1, pos.col + 1), wl.word.as_str(), @@ -175,17 +181,14 @@ fn check_file( let pad = " ".repeat(pad_len - linecol.len()); print!( " {}:{}{} {}", - display.if_supports_color(Stream::Stdout, |t| t.style(DIM)), - linecol.if_supports_color(Stream::Stdout, |t| t.style(YELLOW)), + paint!(display, Stream::Stdout, DIM), + paint!(linecol, Stream::Stdout, YELLOW), pad, - word.if_supports_color(Stream::Stdout, |t| t.style(BOLD_RED)), + paint!(word, Stream::Stdout, BOLD_RED), ); - if let Some(suggestions) = suggestions { - println!( - " {}", - format!("→ {}", suggestions.join(", ")) - .if_supports_color(Stream::Stdout, |t| t.style(DIM)), - ); + if let Some(s) = suggestions { + let text = format!("→ {}", s.join(", ")); + println!(" {}", paint!(text, Stream::Stdout, DIM)); } else { println!(); } @@ -232,7 +235,7 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { let mut had_failure = false; for pattern in patterns { - // root.join() is a no-op when pattern is absolute (replaces root entirely). + // root.join() is a no-op when pattern is absolute let p = root.join(pattern); if p.is_dir() { had_failure |= collect_dir(&p, &mut paths); @@ -276,8 +279,8 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { (paths, had_failure) } -/// Recursively collects all files under `dir` into `out`. -/// Returns `true` if any directory-entry I/O error occurred (e.g. permission denied). +/// Recursively collects all files under `dir` into `out`. Returns `true` if any +/// directory-entry I/O error occurred. fn collect_dir(dir: &Path, out: &mut Vec) -> bool { let mut had_failure = false; for entry in walkdir::WalkDir::new(dir).follow_links(false) { @@ -306,385 +309,84 @@ mod tests { use std::sync::Arc; use tempfile::tempdir; - /// Builds a Codebook backed by the default in-memory config (en_us dictionary). - /// This mirrors the pattern used throughout the rest of the test suite. - fn make_codebook() -> Codebook { - let config = Arc::new(CodebookConfigMemory::default()); - Codebook::new(config).unwrap() - } - - // ── relative_to_root ───────────────────────────────────────────────────── - #[test] - fn test_relative_to_root_inside_workspace() { - let dir = tempdir().unwrap(); - let subdir = dir.path().join("src"); - fs::create_dir_all(&subdir).unwrap(); - let file = subdir.join("lib.rs"); - fs::write(&file, "").unwrap(); - - let root_canonical = dir.path().canonicalize().unwrap(); - let result = relative_to_root(Some(&root_canonical), &file); - assert_eq!(result, "src/lib.rs"); - } - - #[test] - fn test_relative_to_root_outside_workspace() { - let root = tempdir().unwrap(); - let other = tempdir().unwrap(); - let file = other.path().join("outside.rs"); - fs::write(&file, "").unwrap(); - - let root_canonical = root.path().canonicalize().unwrap(); - let result = relative_to_root(Some(&root_canonical), &file); - // Falls back to a path string since the file is outside the workspace. - assert!(result.contains("outside.rs")); - } - - #[test] - fn test_relative_to_root_none_returns_raw_path() { - let result = relative_to_root(None, Path::new("/some/path/file.rs")); - assert_eq!(result, "/some/path/file.rs"); - } - - // ── collect_dir ─────────────────────────────────────────────────────────── - - #[test] - fn test_collect_dir_empty_dir() { - let dir = tempdir().unwrap(); - let mut out = Vec::new(); - let had_failure = collect_dir(dir.path(), &mut out); - assert!(out.is_empty()); - assert!(!had_failure); - } - - #[test] - fn test_collect_dir_collects_files_recursively() { + fn test_path_and_dir_resolution() { let dir = tempdir().unwrap(); let sub = dir.path().join("sub"); fs::create_dir_all(&sub).unwrap(); - fs::write(dir.path().join("a.txt"), "").unwrap(); - fs::write(sub.join("b.txt"), "").unwrap(); - - let mut out = Vec::new(); - let had_failure = collect_dir(dir.path(), &mut out); - - assert_eq!(out.len(), 2); - assert!(!had_failure); - } - #[test] - fn test_collect_dir_only_yields_files_not_directories() { - let dir = tempdir().unwrap(); - let sub = dir.path().join("subdir"); - fs::create_dir_all(&sub).unwrap(); - fs::write(sub.join("c.txt"), "").unwrap(); + let f1 = dir.path().join("a.rs"); + let f2 = sub.join("b.txt"); + fs::write(&f1, "").unwrap(); + fs::write(&f2, "").unwrap(); - let mut out = Vec::new(); - let had_failure = collect_dir(dir.path(), &mut out); + let root_canon = dir.path().canonicalize().unwrap(); + assert_eq!(relative_to_root(Some(&root_canon), &f1), "a.rs"); - assert!( - out.iter().all(|p| p.is_file()), - "only files should be collected, not directories" - ); - assert!(!had_failure); - } - - // ── resolve_paths ───────────────────────────────────────────────────────── - - #[test] - fn test_resolve_paths_single_existing_file() { - let dir = tempdir().unwrap(); - let file = dir.path().join("hello.txt"); - fs::write(&file, "").unwrap(); - - let (result, had_failure) = - resolve_paths(&[file.to_string_lossy().to_string()], dir.path()); - - assert_eq!(result, vec![file]); - assert!(!had_failure); - } - - #[test] - fn test_resolve_paths_missing_file_sets_failure() { - let dir = tempdir().unwrap(); - let nonexistent = dir.path().join("nope.txt"); - - let (result, had_failure) = - resolve_paths(&[nonexistent.to_string_lossy().to_string()], dir.path()); - - assert!(result.is_empty()); - assert!(had_failure, "unmatched path should set had_failure"); - } - - #[test] - fn test_resolve_paths_directory_collects_all_files() { - let dir = tempdir().unwrap(); - let sub = dir.path().join("sub"); - fs::create_dir_all(&sub).unwrap(); - fs::write(dir.path().join("a.txt"), "").unwrap(); - fs::write(sub.join("b.txt"), "").unwrap(); - - let (result, had_failure) = - resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path()); - - assert_eq!(result.len(), 2); - assert!(!had_failure); - } - - #[test] - fn test_resolve_paths_glob_matches_by_extension() { - let dir = tempdir().unwrap(); - fs::write(dir.path().join("foo.rs"), "").unwrap(); - fs::write(dir.path().join("bar.rs"), "").unwrap(); - fs::write(dir.path().join("baz.txt"), "").unwrap(); - - let pattern = format!("{}/*.rs", dir.path().to_string_lossy()); - let (result, had_failure) = resolve_paths(&[pattern], dir.path()); - - assert_eq!(result.len(), 2, "only .rs files should match"); - assert!( - result - .iter() - .all(|p| p.extension().unwrap_or_default() == "rs") - ); - assert!(!had_failure); - } - - #[test] - fn test_resolve_paths_glob_no_match_sets_failure() { - let dir = tempdir().unwrap(); // empty dir — no .rs files - let pattern = format!("{}/*.rs", dir.path().to_string_lossy()); + let pattern = format!("{}/**/*.*", dir.path().display()); + let (paths, err) = resolve_paths(&[pattern], dir.path()); - let (result, had_failure) = resolve_paths(&[pattern], dir.path()); + assert!(!err); + assert_eq!(paths.len(), 2); + let path_strs: HashSet<_> = paths.iter().map(|p| p.to_string_lossy()).collect(); + assert!(path_strs.iter().any(|s| s.ends_with("a.rs"))); + assert!(path_strs.iter().any(|s| s.ends_with("b.txt"))); - assert!(result.is_empty()); - assert!(had_failure, "unmatched glob should set had_failure"); + let (_, err_missing) = resolve_paths(&["nonexistent.rs".into()], dir.path()); + assert!(err_missing); } #[test] - fn test_resolve_paths_deduplicates_same_file() { + fn test_check_file_logic() { let dir = tempdir().unwrap(); - let file = dir.path().join("hello.txt"); - fs::write(&file, "").unwrap(); + let f = dir.path().join("test.txt"); + fs::write(&f, "actualbad\n🦀 actualbad").unwrap(); - let path_str = file.to_string_lossy().to_string(); - // List the same file twice — it should appear only once in the output. - let (result, had_failure) = resolve_paths(&[path_str.clone(), path_str], dir.path()); - - assert_eq!(result.len(), 1); - assert!(!had_failure); - } - - #[test] - fn test_resolve_paths_output_is_sorted() { - let dir = tempdir().unwrap(); - // Create files in non-alphabetical order. - fs::write(dir.path().join("c.txt"), "").unwrap(); - fs::write(dir.path().join("a.txt"), "").unwrap(); - fs::write(dir.path().join("b.txt"), "").unwrap(); - - let (result, had_failure) = - resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path()); - - let names: Vec<_> = result - .iter() - .map(|p| p.file_name().unwrap().to_string_lossy().into_owned()) - .collect(); - assert_eq!(names, ["a.txt", "b.txt", "c.txt"]); - assert!(!had_failure); - } - - #[test] - fn test_resolve_paths_relative_pattern_resolved_against_root() { - let dir = tempdir().unwrap(); - fs::write(dir.path().join("x.txt"), "").unwrap(); - - // "x.txt" is relative — resolve_paths should join it with the root. - let (result, had_failure) = resolve_paths(&["x.txt".to_string()], dir.path()); - - assert_eq!(result.len(), 1); - assert!(!had_failure); - } - - // ── check_file ──────────────────────────────────────────────────────────── - - #[test] - fn test_check_file_unreadable_sets_failure() { - let dir = tempdir().unwrap(); - let nonexistent = dir.path().join("ghost.txt"); - - let codebook = make_codebook(); - let (count, had_failure) = check_file( - &nonexistent, - "ghost.txt", - &codebook, - &mut HashSet::new(), - false, - false, - ); - - assert_eq!(count, 0); - assert!(had_failure, "unreadable file must set had_failure"); - } - - #[test] - fn test_check_file_flags_misspelling() { - let dir = tempdir().unwrap(); - let file = dir.path().join("bad.txt"); - // "actualbad" is not a real English word; the checker should flag it. - fs::write(&file, "actualbad").unwrap(); - - let codebook = make_codebook(); - let (count, had_failure) = check_file( - &file, - "bad.txt", - &codebook, - &mut HashSet::new(), - false, - false, - ); - - assert!( - count > 0, - "expected at least one misspelling to be reported" - ); - assert!(!had_failure); - } + let cb = Codebook::new(Arc::new(CodebookConfigMemory::default())).unwrap(); + let mut seen = HashSet::new(); - #[test] - fn test_check_file_unique_counts_each_word_once_per_file() { - let dir = tempdir().unwrap(); - let file = dir.path().join("dup.txt"); - // The checker groups all occurrences of the same misspelled word into one - // WordLocation with multiple TextRanges; unique mode takes only the first. - fs::write(&file, "actualbad actualbad").unwrap(); - - let codebook = make_codebook(); - let mut had_failure = false; - - let (count_all, f) = check_file( - &file, - "dup.txt", - &codebook, - &mut HashSet::new(), - false, // unique = false + // Test basic flagging and multi-occurrence counting + let (count, err) = check_file(&f, "test.txt", &cb, &mut seen, false, false); + assert_eq!(count, 2); + assert!(!err); + + // Test unique mode + let mut seen_unique = HashSet::new(); + let (c1, _) = check_file(&f, "f1.txt", &cb, &mut seen_unique, true, false); + let (c2, _) = check_file(&f, "f2.txt", &cb, &mut seen_unique, true, false); + assert_eq!(c1, 1, "Should flag word once"); + assert_eq!(c2, 0, "Should skip already-seen word in second file"); + + // Test IO failure + let (_, err_io) = check_file( + &dir.path().join("missing"), + "!", + &cb, + &mut seen, false, - ); - had_failure |= f; - - let (count_unique, f) = check_file( - &file, - "dup.txt", - &codebook, - &mut HashSet::new(), - true, // unique = true false, ); - had_failure |= f; - - assert_eq!(count_all, 2, "non-unique mode must count every occurrence"); - assert_eq!(count_unique, 1, "unique mode must count each word once"); - assert!(!had_failure); - } - - #[test] - fn test_check_file_unique_deduplicates_across_files() { - let dir = tempdir().unwrap(); - let file1 = dir.path().join("first.txt"); - let file2 = dir.path().join("second.txt"); - fs::write(&file1, "actualbad").unwrap(); - fs::write(&file2, "actualbad").unwrap(); - - let codebook = make_codebook(); - // A single shared `seen_words` set, exactly as run_lint uses it. - let mut seen = HashSet::new(); - - let (count1, f1) = check_file(&file1, "first.txt", &codebook, &mut seen, true, false); - let (count2, f2) = check_file(&file2, "second.txt", &codebook, &mut seen, true, false); - - assert_eq!(count1, 1, "first file should report the misspelling"); - assert_eq!(count2, 0, "second file should skip the already-seen word"); - assert!(!f1); - assert!(!f2); - } - - // ── line/col correctness (pure StringOffsets, no network I/O) ──────────── - // - // These tests exercise the exact same utf8_to_char_pos call that check_file - // uses, confirming that columns are in Unicode characters rather than raw - // bytes and that the 0-based → 1-based conversion is applied correctly. - - #[test] - fn test_linecol_word_at_start_of_file() { - let text = "actualbad rest of line"; - let offsets = StringOffsets::::new(text); - let pos = offsets.utf8_to_char_pos(0); - assert_eq!(pos.line + 1, 1); - assert_eq!(pos.col + 1, 1); - } - - #[test] - fn test_linecol_word_on_second_line() { - // "ok text\n" is 8 bytes; "actualbad" starts at byte 8. - let text = "ok text\nactualbad more"; - let offsets = StringOffsets::::new(text); - let byte_offset = text.find("actualbad").unwrap(); - assert_eq!(byte_offset, 8, "sanity-check byte layout"); - - let pos = offsets.utf8_to_char_pos(byte_offset); - assert_eq!(pos.line + 1, 2, "should be on the second line"); - assert_eq!(pos.col + 1, 1, "should be the first column of that line"); - } - - #[test] - fn test_linecol_mid_line_ascii() { - // "hello " is 6 bytes/chars; "actualbad" starts at byte/char 6. - let text = "hello actualbad"; - let offsets = StringOffsets::::new(text); - let byte_offset = text.find("actualbad").unwrap(); - assert_eq!(byte_offset, 6, "sanity-check byte layout"); - - let pos = offsets.utf8_to_char_pos(byte_offset); - assert_eq!(pos.line + 1, 1); - assert_eq!(pos.col + 1, 7); - } - - /// The old byte-count approach reported col=10 here; StringOffsets correctly - /// reports col=8 because "résumé" is 8 bytes but only 6 Unicode characters. - #[test] - fn test_linecol_multibyte_chars_before_word() { - // r(1) + é(2) + s(1) + u(1) + m(1) + é(2) + space(1) = 9 bytes, 7 chars. - // "actualbad" starts at byte 9, char offset 7 (0-based) → col 8 (1-based). - let text = "résumé actualbad"; - let offsets = StringOffsets::::new(text); - let byte_offset = text.find("actualbad").unwrap(); - assert_eq!(byte_offset, 9, "sanity-check byte layout"); - - let pos = offsets.utf8_to_char_pos(byte_offset); - assert_eq!(pos.line + 1, 1); - assert_eq!( - pos.col + 1, - 8, - "col must count Unicode chars (6 + space = 7, 1-based = 8), not bytes (9, 1-based = 10)" - ); + assert!(err_io); } #[test] - fn test_linecol_emoji_before_word() { - // 🐛 is U+1F41B — 4 UTF-8 bytes, 1 Unicode char. - // "🐛 " = 5 bytes, 2 chars; "actualbad" starts at byte 5, char 2 (0-based) → col 3. - let text = "🐛 actualbad"; - let offsets = StringOffsets::::new(text); - let byte_offset = text.find("actualbad").unwrap(); - assert_eq!(byte_offset, 5, "sanity-check byte layout"); - - let pos = offsets.utf8_to_char_pos(byte_offset); - assert_eq!(pos.line + 1, 1); - assert_eq!( - pos.col + 1, - 3, - "col must count Unicode chars (emoji=1, space=1, 1-based=3), not bytes (5, 1-based=6)" - ); + fn test_unicode_line_col() { + let cases = [ + ("actualbad", 0, 1, 1), // Start + ("ok\nactualbad", 3, 2, 1), // Newline + ("résumé actualbad", 9, 1, 8), // Multi-byte chars (é is 2 bytes) + ("🦀 actualbad", 5, 1, 3), // Emoji (4 bytes, 1 char) + ]; + + for (text, offset, line, col) in cases { + let table = StringOffsets::::new(text); + let pos = table.utf8_to_char_pos(offset); + assert_eq!( + (pos.line + 1, pos.col + 1), + (line, col), + "Failed on: {}", + text + ); + } } } From 086e983d2d65ee1a0efbf9b870392fd06139752b Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Mon, 23 Mar 2026 10:45:16 +0100 Subject: [PATCH 07/14] Update readme --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index 074e96f5..2b1486d0 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,26 @@ Any editor that implements the Language Server Protocol should be compatible wit codebook-lsp serve ``` +### CLI (Lint) + +Codebook can also be used as a standalone command-line spell checker, which is useful for CI pipelines, pre-commit hooks, or one-off checks. + +```sh +# Check specific files +codebook-lsp lint src/main.rs src/lib.rs + +# Check all files in a directory (recursive) +codebook-lsp lint src/ + +# Show spelling suggestions +codebook-lsp lint --suggest src/ + +# Only report each misspelled word once across all files +codebook-lsp lint --unique src/ +``` + +The exit code is **0** if all files are clean, **1** if any spelling errors are found, and **2** if there was an unreadable files, invalid UTF-8, etc... + ## About Codebook is a spell checker for code. It binds together the venerable Tree Sitter and the fast spell checker [Spellbook](https://github.com/helix-editor/spellbook). Included is a Language Server for use in (theoretically) any editor. Everything is done in Rust to keep response times snappy and memory usage _low_. From fc626039190073d1718e220bfc77c216dde72a93 Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Mon, 23 Mar 2026 11:43:14 +0100 Subject: [PATCH 08/14] apply copilot review --- README.md | 2 +- crates/codebook-lsp/src/lint.rs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2b1486d0..908283b7 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ codebook-lsp lint --suggest src/ codebook-lsp lint --unique src/ ``` -The exit code is **0** if all files are clean, **1** if any spelling errors are found, and **2** if there was an unreadable files, invalid UTF-8, etc... +The exit code is **0** if all files are clean, **1** if any spelling errors are found, and **2** if there were unreadable files, invalid UTF-8, etc. ## About diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 3f0c8ac5..3a1b1cc0 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -123,7 +123,11 @@ fn check_file( // Build the offset table once per file let offsets = StringOffsets::::new(&text); let mut locations = codebook.spell_check(&text, None, Some(relative)); - // Sort by first occurrence in the file. + // Sort inner locations first (HashSet iteration order is nondeterministic), + // then sort the outer list by first occurrence in the file. + for wl in &mut locations { + wl.locations.sort_by_key(|r| r.start_byte); + } locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); // Collect hits first so we can compute pad_len for column alignment. The From 3e237c77457ca9b935a33819b60b34356f3acaa9 Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Mon, 23 Mar 2026 15:38:05 +0100 Subject: [PATCH 09/14] . --- crates/codebook-lsp/src/lint.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 3a1b1cc0..c84e9041 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -1,5 +1,5 @@ use codebook::Codebook; -use codebook_config::CodebookConfigFile; +use codebook_config::{CodebookConfig, CodebookConfigFile}; use owo_colors::{OwoColorize, Stream, Style}; use std::collections::HashSet; use std::path::{Path, PathBuf}; @@ -74,6 +74,14 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b for path in &resolved { let relative = relative_to_root(root_canonical.as_deref(), path); + + if config.should_ignore_path(Path::new(&relative)) { + continue; + } + if !config.should_include_path(Path::new(&relative)) { + continue; + } + let (errors, file_failure) = check_file(path, &relative, &codebook, &mut seen_words, unique, suggest); had_failure |= file_failure; From ed84ec66aecd987792b9205e3e70d18fbe8789f0 Mon Sep 17 00:00:00 2001 From: Bo Lopker Date: Mon, 23 Mar 2026 14:00:39 -0700 Subject: [PATCH 10/14] Improve lint CLI: drop owo-colors, use ignore crate, fix exit handling - Remove owo-colors dependency and all terminal coloring in lint output - Replace walkdir with ignore crate for gitignore-aware directory walking - Return LintResult enum instead of mixing bool returns with process::exit - Fix log level detection to use parsed CLI command instead of fragile arg position check --- Cargo.lock | 96 +++++++++++++++++----------- Cargo.toml | 2 +- crates/codebook-lsp/Cargo.toml | 3 +- crates/codebook-lsp/src/lint.rs | 110 ++++++++++++++------------------ crates/codebook-lsp/src/main.rs | 18 ++++-- 5 files changed, 119 insertions(+), 110 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34d88d3e..440b88c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -278,6 +278,16 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -453,9 +463,9 @@ dependencies = [ "env_logger", "fs2", "glob", + "ignore", "log", "lru", - "owo-colors", "serde", "serde_json", "streaming-iterator", @@ -463,7 +473,6 @@ dependencies = [ "tempfile", "tokio", "tower-lsp", - "walkdir", ] [[package]] @@ -577,6 +586,25 @@ dependencies = [ "libc", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -990,6 +1018,19 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "h2" version = "0.4.13" @@ -1344,6 +1385,22 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "ignore" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d782a365a015e0f5c04902246139249abf769125006fbe7649e2ee88169b4a" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + [[package]] name = "indexmap" version = "2.13.0" @@ -1401,12 +1458,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "is_ci" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" - [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -1738,16 +1789,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" -[[package]] -name = "owo-colors" -version = "4.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" -dependencies = [ - "supports-color 2.1.0", - "supports-color 3.0.2", -] - [[package]] name = "parking" version = "2.2.1" @@ -2500,25 +2541,6 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" -[[package]] -name = "supports-color" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" -dependencies = [ - "is-terminal", - "is_ci", -] - -[[package]] -name = "supports-color" -version = "3.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64fc7232dd8d2e4ac5ce4ef302b1d81e0b80d055b9d77c7c4f51f6aa4c867d6" -dependencies = [ - "is_ci", -] - [[package]] name = "symbolic-common" version = "12.17.2" diff --git a/Cargo.toml b/Cargo.toml index 401d9011..5d6b1b42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ env_logger = "0.11.6" fs2 = "0.4" git2 = "0.20.0" glob = "0.3" -owo-colors = { version = "4", features = ["supports-colors"] } +ignore = "0.4" httpmock = "<0.9.0" lazy_static = "1.5.0" log = "0.4.22" diff --git a/crates/codebook-lsp/Cargo.toml b/crates/codebook-lsp/Cargo.toml index 4b30dc58..411d24be 100644 --- a/crates/codebook-lsp/Cargo.toml +++ b/crates/codebook-lsp/Cargo.toml @@ -29,8 +29,7 @@ fs2.workspace = true log.workspace = true lru.workspace = true glob.workspace = true -owo-colors.workspace = true -walkdir.workspace = true +ignore.workspace = true serde.workspace = true serde_json.workspace = true string-offsets.workspace = true diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index c84e9041..1f991f49 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -1,35 +1,25 @@ use codebook::Codebook; use codebook_config::{CodebookConfig, CodebookConfigFile}; -use owo_colors::{OwoColorize, Stream, Style}; +use ignore::WalkBuilder; use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::sync::Arc; use string_offsets::{AllConfig, StringOffsets}; -const BOLD: Style = Style::new().bold(); -const DIM: Style = Style::new().dimmed(); -const YELLOW: Style = Style::new().yellow(); -const BOLD_RED: Style = Style::new().bold().red(); - macro_rules! err { ($($arg:tt)*) => { - eprintln!( - "{} {}", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), - format_args!($($arg)*) - ) + eprintln!("error: {}", format_args!($($arg)*)) }; } -macro_rules! paint { - ($val:expr, $stream:expr, $style:expr) => { - $val.if_supports_color($stream, |t| t.style($style)) - }; -} - -fn fatal(msg: impl std::fmt::Display) -> ! { - err!("{msg}"); - std::process::exit(2); +/// Result of a lint run, mapped to exit codes by the caller. +pub enum LintResult { + /// All files clean — exit 0. + Clean, + /// Spelling errors found — exit 1. + Errors, + /// Infrastructure failure (IO errors, bad patterns, etc.) — exit 2. + Failure, } /// Computes a workspace-relative path string for a given file. Falls back to @@ -47,21 +37,25 @@ fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String { .unwrap_or_else(|| path.to_string_lossy().into_owned()) } -/// Returns `true` if any spelling errors were found. -/// -/// Exits with code 2 if infrastructure failures occurred (unreadable files, -/// directory errors, unmatched or invalid patterns). -pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool { - let config = Arc::new( - CodebookConfigFile::load(Some(root)) - .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"))), - ); +pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> LintResult { + let config = match CodebookConfigFile::load(Some(root)) { + Ok(c) => Arc::new(c), + Err(e) => { + err!("failed to load config: {e}"); + return LintResult::Failure; + } + }; print_config_source(&config); eprintln!(); - let codebook = Codebook::new(config.clone()) - .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"))); + let codebook = match Codebook::new(config.clone()) { + Ok(c) => c, + Err(e) => { + err!("failed to initialize: {e}"); + return LintResult::Failure; + } + }; // Canonicalize the root once here rather than once per file. let root_canonical = root.canonicalize().ok(); @@ -93,16 +87,16 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b let unique_label = if unique { "unique " } else { "" }; eprintln!( - "Found {} {unique_label}spelling error(s) in {} file(s).", - paint!(total_errors, Stream::Stderr, BOLD), - paint!(files_with_errors, Stream::Stderr, BOLD), + "Found {total_errors} {unique_label}spelling error(s) in {files_with_errors} file(s)." ); if had_failure { - std::process::exit(2); + LintResult::Failure + } else if total_errors > 0 { + LintResult::Errors + } else { + LintResult::Clean } - - total_errors > 0 } /// Spell-checks a single file and prints any diagnostics to stdout. @@ -185,24 +179,13 @@ fn check_file( let pad_len = hits.iter().map(|(lc, _, _)| lc.len()).max().unwrap_or(0); - println!( - "{}", - display.if_supports_color(Stream::Stdout, |t| t.style(BOLD)) - ); + println!("{display}"); for (linecol, word, suggestions) in &hits { let pad = " ".repeat(pad_len - linecol.len()); - print!( - " {}:{}{} {}", - paint!(display, Stream::Stdout, DIM), - paint!(linecol, Stream::Stdout, YELLOW), - pad, - paint!(word, Stream::Stdout, BOLD_RED), - ); if let Some(s) = suggestions { - let text = format!("→ {}", s.join(", ")); - println!(" {}", paint!(text, Stream::Stdout, DIM)); + println!(" {display}:{linecol}{pad} {word} -> {}", s.join(", ")); } else { - println!(); + println!(" {display}:{linecol}{pad} {word}"); } } println!(); @@ -229,19 +212,16 @@ fn print_config_source(config: &CodebookConfigFile) { .unwrap_or(&path) .display() .to_string(); - eprintln!( - "{label} {}", - display.if_supports_color(Stream::Stderr, |t| t.style(DIM)) - ); + eprintln!("{label} {display}"); } /// Resolves a mix of file paths, directories, and glob patterns into a sorted, -/// deduplicated list of file paths. Non-absolute patterns are resolved relative -/// to `root`. `Path::join` replaces the base when the argument is absolute, so -/// no explicit `is_absolute` check is needed. +/// deduplicated list of file paths. Directories are walked using the `ignore` +/// crate, which automatically respects `.gitignore` rules and skips hidden +/// files/directories. /// /// Returns `(paths, had_failure)`. `had_failure` is true for unmatched -/// patterns, invalid globs, or glob I/O errors. +/// patterns, invalid globs, or walk I/O errors. fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { let mut paths = Vec::new(); let mut had_failure = false; @@ -251,7 +231,10 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { let p = root.join(pattern); if p.is_dir() { had_failure |= collect_dir(&p, &mut paths); + } else if p.is_file() { + paths.push(p); } else { + // Try as a glob pattern let pattern_str = p.to_string_lossy(); match glob::glob(&pattern_str) { Ok(entries) => { @@ -291,13 +274,14 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { (paths, had_failure) } -/// Recursively collects all files under `dir` into `out`. Returns `true` if any -/// directory-entry I/O error occurred. +/// Recursively collects all files under `dir` into `out`, respecting +/// `.gitignore` rules and skipping hidden files/directories. +/// Returns `true` if any I/O error occurred during the walk. fn collect_dir(dir: &Path, out: &mut Vec) -> bool { let mut had_failure = false; - for entry in walkdir::WalkDir::new(dir).follow_links(false) { + for entry in WalkBuilder::new(dir).follow_links(false).build() { match entry { - Ok(e) if e.file_type().is_file() => out.push(e.into_path()), + Ok(e) if e.file_type().is_some_and(|ft| ft.is_file()) => out.push(e.into_path()), Ok(_) => {} Err(e) => { err!( diff --git a/crates/codebook-lsp/src/main.rs b/crates/codebook-lsp/src/main.rs index 449c174a..ff682e67 100644 --- a/crates/codebook-lsp/src/main.rs +++ b/crates/codebook-lsp/src/main.rs @@ -47,9 +47,11 @@ enum Commands { #[tokio::main(flavor = "current_thread")] async fn main() { - // Initialize logger early with stderr output and buffering - // Default to INFO for LSP, WARN for lint (to suppress LSP-oriented noise) - let is_lint = std::env::args().nth(1).as_deref() == Some("lint"); + let cli = Cli::parse(); + + // Initialize logger early with stderr output and buffering. + // Default to INFO for LSP, WARN for lint (to suppress LSP-oriented noise). + let is_lint = matches!(cli.command, Some(Commands::Lint { .. })); let log_level = match env::var("RUST_LOG").as_deref() { Ok("debug") => LevelFilter::Debug, Ok("info") => LevelFilter::Info, @@ -58,7 +60,6 @@ async fn main() { }; LspLogger::init_early(log_level).expect("Failed to initialize early logger"); debug!("Logger initialized with log level: {log_level:?}"); - let cli = Cli::parse(); let root = match cli.root.as_deref() { Some(path) => path, @@ -79,9 +80,12 @@ async fn main() { unique, suggest, }) => { - if lint::run_lint(files, root, *unique, *suggest) { - std::process::exit(1); - } + let code = match lint::run_lint(files, root, *unique, *suggest) { + lint::LintResult::Clean => 0, + lint::LintResult::Errors => 1, + lint::LintResult::Failure => 2, + }; + std::process::exit(code); } None => {} } From 3bbb42c0ed8100dfac4ed16ffe03bb57acbe1ffb Mon Sep 17 00:00:00 2001 From: Bo Lopker Date: Mon, 23 Mar 2026 14:07:48 -0700 Subject: [PATCH 11/14] Fix glob inputs bypassing gitignore and binary files causing failures - Build a Gitignore matcher and filter glob-matched files against it, so glob mode behaves consistently with directory mode - Silently skip binary/non-UTF-8 files instead of treating them as infrastructure failures that trigger exit code 2 --- crates/codebook-lsp/src/lint.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 1f991f49..4e32e53e 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -1,6 +1,7 @@ use codebook::Codebook; use codebook_config::{CodebookConfig, CodebookConfigFile}; use ignore::WalkBuilder; +use ignore::gitignore::{Gitignore, GitignoreBuilder}; use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -114,6 +115,10 @@ fn check_file( ) -> (usize, bool) { let text = match std::fs::read_to_string(path) { Ok(t) => t, + Err(e) if e.kind() == std::io::ErrorKind::InvalidData => { + // Binary / non-UTF-8 file — silently skip. + return (0, false); + } Err(e) => { err!("{}: {e}", path.display()); return (0, true); @@ -215,16 +220,24 @@ fn print_config_source(config: &CodebookConfigFile) { eprintln!("{label} {display}"); } +/// Builds a gitignore matcher from the `.gitignore` file in `root`, if present. +fn build_gitignore(root: &Path) -> Gitignore { + let mut builder = GitignoreBuilder::new(root); + builder.add(root.join(".gitignore")); + builder.build().unwrap_or_else(|_| Gitignore::empty()) +} + /// Resolves a mix of file paths, directories, and glob patterns into a sorted, /// deduplicated list of file paths. Directories are walked using the `ignore` /// crate, which automatically respects `.gitignore` rules and skips hidden -/// files/directories. +/// files/directories. Glob-matched files are also filtered against `.gitignore`. /// /// Returns `(paths, had_failure)`. `had_failure` is true for unmatched /// patterns, invalid globs, or walk I/O errors. fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { let mut paths = Vec::new(); let mut had_failure = false; + let gitignore = build_gitignore(root); for pattern in patterns { // root.join() is a no-op when pattern is absolute @@ -242,7 +255,9 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { for entry in entries { match entry { Ok(e) if e.is_file() => { - paths.push(e); + if !gitignore.matched(&e, false).is_ignore() { + paths.push(e); + } matched = true; } Ok(e) if e.is_dir() => { From b9d2718b46cefc10bb76c44fe4bb5ff8864b4dd2 Mon Sep 17 00:00:00 2001 From: Bo Lopker Date: Mon, 23 Mar 2026 14:15:56 -0700 Subject: [PATCH 12/14] Use WalkBuilder for glob patterns to respect nested .gitignore files - Replace glob::glob with ignore's WalkBuilder + OverrideBuilder so glob mode handles nested .gitignore files consistently with dir mode - Extract static prefix from glob patterns via glob_base_dir to determine the correct walk root for absolute patterns - Drop glob dependency from codebook-lsp (no longer needed) --- Cargo.lock | 1 - crates/codebook-lsp/Cargo.toml | 1 - crates/codebook-lsp/src/lint.rs | 98 +++++++++++++++++---------------- 3 files changed, 52 insertions(+), 48 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 440b88c3..edec7922 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -462,7 +462,6 @@ dependencies = [ "codebook_config", "env_logger", "fs2", - "glob", "ignore", "log", "lru", diff --git a/crates/codebook-lsp/Cargo.toml b/crates/codebook-lsp/Cargo.toml index 411d24be..6df4157a 100644 --- a/crates/codebook-lsp/Cargo.toml +++ b/crates/codebook-lsp/Cargo.toml @@ -28,7 +28,6 @@ env_logger.workspace = true fs2.workspace = true log.workspace = true lru.workspace = true -glob.workspace = true ignore.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 4e32e53e..76cc6cce 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -1,7 +1,7 @@ use codebook::Codebook; use codebook_config::{CodebookConfig, CodebookConfigFile}; use ignore::WalkBuilder; -use ignore::gitignore::{Gitignore, GitignoreBuilder}; +use ignore::overrides::OverrideBuilder; use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -220,58 +220,49 @@ fn print_config_source(config: &CodebookConfigFile) { eprintln!("{label} {display}"); } -/// Builds a gitignore matcher from the `.gitignore` file in `root`, if present. -fn build_gitignore(root: &Path) -> Gitignore { - let mut builder = GitignoreBuilder::new(root); - builder.add(root.join(".gitignore")); - builder.build().unwrap_or_else(|_| Gitignore::empty()) -} - /// Resolves a mix of file paths, directories, and glob patterns into a sorted, -/// deduplicated list of file paths. Directories are walked using the `ignore` -/// crate, which automatically respects `.gitignore` rules and skips hidden -/// files/directories. Glob-matched files are also filtered against `.gitignore`. +/// deduplicated list of file paths. All paths are resolved through the `ignore` +/// crate's `WalkBuilder`, which respects `.gitignore` rules (including nested +/// ones) and skips hidden files/directories. /// /// Returns `(paths, had_failure)`. `had_failure` is true for unmatched /// patterns, invalid globs, or walk I/O errors. fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { let mut paths = Vec::new(); let mut had_failure = false; - let gitignore = build_gitignore(root); for pattern in patterns { // root.join() is a no-op when pattern is absolute let p = root.join(pattern); if p.is_dir() { - had_failure |= collect_dir(&p, &mut paths); + had_failure |= collect_walk(&mut WalkBuilder::new(&p), &mut paths); } else if p.is_file() { paths.push(p); } else { - // Try as a glob pattern + // Treat as a glob pattern: walk with an override filter so that + // nested .gitignore files are respected automatically. We extract + // the static (non-glob) prefix of the path to use as the walk + // root, so absolute patterns work correctly. let pattern_str = p.to_string_lossy(); - match glob::glob(&pattern_str) { - Ok(entries) => { - let mut matched = false; - for entry in entries { - match entry { - Ok(e) if e.is_file() => { - if !gitignore.matched(&e, false).is_ignore() { - paths.push(e); - } - matched = true; - } - Ok(e) if e.is_dir() => { - had_failure |= collect_dir(&e, &mut paths); - matched = true; - } - Ok(_) => {} - Err(e) => { - err!("failed to read glob entry: {e}"); - had_failure = true; - } - } - } - if !matched { + let walk_root = glob_base_dir(&p); + // Build the glob portion relative to walk_root for the override. + let rel_pattern = p + .strip_prefix(&walk_root) + .map(|r| format!("/{}", r.to_string_lossy())) + .unwrap_or_else(|_| pattern_str.to_string()); + let mut ob = OverrideBuilder::new(&walk_root); + if let Err(e) = ob.add(&rel_pattern) { + err!("invalid pattern '{pattern_str}': {e}"); + had_failure = true; + continue; + } + match ob.build() { + Ok(overrides) => { + let before = paths.len(); + let mut walker = WalkBuilder::new(&walk_root); + walker.overrides(overrides); + had_failure |= collect_walk(&mut walker, &mut paths); + if paths.len() == before { err!("no match for '{pattern_str}'"); had_failure = true; } @@ -289,20 +280,35 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { (paths, had_failure) } -/// Recursively collects all files under `dir` into `out`, respecting -/// `.gitignore` rules and skipping hidden files/directories. -/// Returns `true` if any I/O error occurred during the walk. -fn collect_dir(dir: &Path, out: &mut Vec) -> bool { +/// Extracts the longest non-glob prefix directory from a path pattern. +/// For example, `/tmp/foo/**/*.rs` returns `/tmp/foo`. +fn glob_base_dir(pattern: &Path) -> PathBuf { + let mut base = PathBuf::new(); + for component in pattern.components() { + let s = component.as_os_str().to_string_lossy(); + if s.contains('*') || s.contains('?') || s.contains('[') { + break; + } + base.push(component); + } + if base.as_os_str().is_empty() { + PathBuf::from(".") + } else { + base + } +} + +/// Walks using the given `WalkBuilder`, collecting all files into `out`. +/// Respects `.gitignore` rules (including nested) and skips hidden +/// files/directories. Returns `true` if any I/O error occurred. +fn collect_walk(walker: &mut WalkBuilder, out: &mut Vec) -> bool { let mut had_failure = false; - for entry in WalkBuilder::new(dir).follow_links(false).build() { + for entry in walker.follow_links(false).build() { match entry { Ok(e) if e.file_type().is_some_and(|ft| ft.is_file()) => out.push(e.into_path()), Ok(_) => {} Err(e) => { - err!( - "failed to read directory entry under '{}': {e}", - dir.display() - ); + err!("walk error: {e}"); had_failure = true; } } From 14e952163ef8196b1a7145ce6013f32e6f983483 Mon Sep 17 00:00:00 2001 From: Bo Lopker Date: Mon, 23 Mar 2026 14:29:07 -0700 Subject: [PATCH 13/14] Fix glob mode to walk from workspace root for full .gitignore coverage Walk from the lint root instead of the glob base directory so that parent and nested .gitignore files are all evaluated. Post-filter surviving files against the glob pattern using globset. This makes glob mode fully consistent with directory mode. --- Cargo.lock | 1 + Cargo.toml | 1 + crates/codebook-lsp/Cargo.toml | 1 + crates/codebook-lsp/src/lint.rs | 73 ++++++++++++--------------------- 4 files changed, 30 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index edec7922..f543b383 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -462,6 +462,7 @@ dependencies = [ "codebook_config", "env_logger", "fs2", + "globset", "ignore", "log", "lru", diff --git a/Cargo.toml b/Cargo.toml index 5d6b1b42..714cfe45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ env_logger = "0.11.6" fs2 = "0.4" git2 = "0.20.0" glob = "0.3" +globset = "0.4" ignore = "0.4" httpmock = "<0.9.0" lazy_static = "1.5.0" diff --git a/crates/codebook-lsp/Cargo.toml b/crates/codebook-lsp/Cargo.toml index 6df4157a..fc2fdb2e 100644 --- a/crates/codebook-lsp/Cargo.toml +++ b/crates/codebook-lsp/Cargo.toml @@ -28,6 +28,7 @@ env_logger.workspace = true fs2.workspace = true log.workspace = true lru.workspace = true +globset.workspace = true ignore.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 76cc6cce..6f237091 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -1,7 +1,7 @@ use codebook::Codebook; use codebook_config::{CodebookConfig, CodebookConfigFile}; +use globset::Glob; use ignore::WalkBuilder; -use ignore::overrides::OverrideBuilder; use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -239,39 +239,38 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { } else if p.is_file() { paths.push(p); } else { - // Treat as a glob pattern: walk with an override filter so that - // nested .gitignore files are respected automatically. We extract - // the static (non-glob) prefix of the path to use as the walk - // root, so absolute patterns work correctly. + // Treat as a glob pattern. Walk from the workspace root so that + // the full .gitignore hierarchy (parent + nested) is respected, + // then post-filter surviving files against the glob. let pattern_str = p.to_string_lossy(); - let walk_root = glob_base_dir(&p); - // Build the glob portion relative to walk_root for the override. - let rel_pattern = p - .strip_prefix(&walk_root) - .map(|r| format!("/{}", r.to_string_lossy())) - .unwrap_or_else(|_| pattern_str.to_string()); - let mut ob = OverrideBuilder::new(&walk_root); - if let Err(e) = ob.add(&rel_pattern) { - err!("invalid pattern '{pattern_str}': {e}"); - had_failure = true; - continue; - } - match ob.build() { - Ok(overrides) => { - let before = paths.len(); - let mut walker = WalkBuilder::new(&walk_root); - walker.overrides(overrides); - had_failure |= collect_walk(&mut walker, &mut paths); - if paths.len() == before { - err!("no match for '{pattern_str}'"); - had_failure = true; - } - } + let matcher = match Glob::new(&pattern_str).map(|g| g.compile_matcher()) { + Ok(m) => m, Err(e) => { err!("invalid pattern '{pattern_str}': {e}"); had_failure = true; + continue; + } + }; + let before = paths.len(); + let mut walker = WalkBuilder::new(root); + for entry in walker.follow_links(false).build() { + match entry { + Ok(e) if e.file_type().is_some_and(|ft| ft.is_file()) => { + if matcher.is_match(e.path()) { + paths.push(e.into_path()); + } + } + Ok(_) => {} + Err(e) => { + err!("walk error: {e}"); + had_failure = true; + } } } + if paths.len() == before { + err!("no match for '{pattern_str}'"); + had_failure = true; + } } } @@ -280,24 +279,6 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { (paths, had_failure) } -/// Extracts the longest non-glob prefix directory from a path pattern. -/// For example, `/tmp/foo/**/*.rs` returns `/tmp/foo`. -fn glob_base_dir(pattern: &Path) -> PathBuf { - let mut base = PathBuf::new(); - for component in pattern.components() { - let s = component.as_os_str().to_string_lossy(); - if s.contains('*') || s.contains('?') || s.contains('[') { - break; - } - base.push(component); - } - if base.as_os_str().is_empty() { - PathBuf::from(".") - } else { - base - } -} - /// Walks using the given `WalkBuilder`, collecting all files into `out`. /// Respects `.gitignore` rules (including nested) and skips hidden /// files/directories. Returns `true` if any I/O error occurred. From 5bce4050a813cdf9f8ae6772e3b75d05a81692ed Mon Sep 17 00:00:00 2001 From: Bo Lopker Date: Mon, 23 Mar 2026 14:33:49 -0700 Subject: [PATCH 14/14] Update readme --- .claude/settings.local.json | 3 ++- README.md | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index c66a86c3..c62b52d4 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -15,7 +15,8 @@ "Bash(gh issue view:*)", "Bash(gh repo view:*)", "Bash(cargo build:*)", - "Bash(cargo search:*)" + "Bash(cargo search:*)", + "WebFetch(domain:docs.rs)" ] } } diff --git a/README.md b/README.md index 908283b7..1cc09530 100644 --- a/README.md +++ b/README.md @@ -117,10 +117,12 @@ Any editor that implements the Language Server Protocol should be compatible wit codebook-lsp serve ``` -### CLI (Lint) +### CLI (Lint) (Unstable) Codebook can also be used as a standalone command-line spell checker, which is useful for CI pipelines, pre-commit hooks, or one-off checks. +Note: this command is currently experimental, unstable, and subject to breaking changes in future releases. Please submit feedback! + ```sh # Check specific files codebook-lsp lint src/main.rs src/lib.rs