From 4d519525af0ecce817b7ff8034cfc17bafcecfb3 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 15 Mar 2026 17:34:50 +0100 Subject: [PATCH 1/4] fix: resolve mandoc lint warnings in generated manpages - Fix TH header: uppercase command names and remove invalid date format - Remove trailing whitespace from all manpage lines - Fix redundant .br paragraph macros that cause mandoc warnings - Post-process manpage output to ensure compliance with mandoc standards --- src/bin/uudoc.rs | 64 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/src/bin/uudoc.rs b/src/bin/uudoc.rs index 76f04774ec7..e7445ae2257 100644 --- a/src/bin/uudoc.rs +++ b/src/bin/uudoc.rs @@ -94,9 +94,69 @@ fn gen_manpage( cmd }; + // Generate the manpage to a buffer first so we can post-process it + let mut buffer = Vec::new(); let man = Man::new(command); - man.render(&mut io::stdout()) - .expect("Man page generation failed"); + man.render(&mut buffer).expect("Man page generation failed"); + + // Convert to string for processing + let mut manpage = String::from_utf8(buffer).expect("Invalid UTF-8 in manpage"); + + // Fix the TH line: remove version info from date field and uppercase the command name + if let Some(th_pos) = manpage.find(".TH ") { + if let Some(line_end) = manpage[th_pos..].find('\n') { + let th_line = &manpage[th_pos..th_pos + line_end]; + // Parse the TH line parts + let parts: Vec<&str> = th_line.split_whitespace().collect(); + if parts.len() >= 2 { + let cmd_name = parts[1].to_uppercase(); + // Reconstruct TH line with uppercase command name and no date + let new_th = format!(".TH {} 1", cmd_name); + manpage.replace_range(th_pos..th_pos + line_end, &new_th); + } + } + } + + // Remove trailing whitespace from all lines and fix .br issues + let lines: Vec = manpage + .lines() + .map(|line| line.trim_end().to_string()) + .collect(); + + // Fix .br paragraph macro issues + let mut fixed_lines = Vec::new(); + let mut skip_next_br = false; + + for i in 0..lines.len() { + let line = &lines[i]; + + if line == ".br" { + // Check for problematic patterns with .br + let prev_is_br = i > 0 && lines[i - 1] == ".br"; + let next_is_empty_then_br = + i + 2 < lines.len() && lines[i + 1].is_empty() && lines[i + 2] == ".br"; + let prev_is_empty_with_br = i >= 2 && lines[i - 1].is_empty() && lines[i - 2] == ".br"; + + // Skip redundant .br in these patterns + if skip_next_br || prev_is_br || next_is_empty_then_br || prev_is_empty_with_br { + skip_next_br = false; + continue; + } + + // If this .br is followed by empty line and another .br, skip the second one + if next_is_empty_then_br { + skip_next_br = true; + } + } + + fixed_lines.push(line.clone()); + } + + manpage = fixed_lines.join("\n"); + manpage.push('\n'); + + // Write the processed manpage to stdout + io::stdout().write_all(manpage.as_bytes()).unwrap(); io::stdout().flush().unwrap(); process::exit(0); } From c854208ad050255151eaef9f6e681ff3476a177e Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 15 Mar 2026 17:21:00 +0100 Subject: [PATCH 2/4] ci: add GitHub Action to validate manpages with mandoc Add a new workflow that builds and validates all manpages using mandoc -T lint. The workflow runs on PRs and pushes that modify manpage-related files and validates both English and French manpages in parallel. --- .github/workflows/manpage-lint.yml | 109 +++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 .github/workflows/manpage-lint.yml diff --git a/.github/workflows/manpage-lint.yml b/.github/workflows/manpage-lint.yml new file mode 100644 index 00000000000..fe30d79afa7 --- /dev/null +++ b/.github/workflows/manpage-lint.yml @@ -0,0 +1,109 @@ +# spell-checker:ignore mandoc uudoc manpages dtolnay libsystemd libattr libcap DESTDIR + +name: Manpage Validation + +on: + pull_request: + paths: + - 'src/bin/uudoc.rs' + - 'src/uu/*/locales/*.ftl' + - 'src/uu/*/src/*.rs' + - 'Cargo.toml' + - 'GNUmakefile' + - '.github/workflows/manpage-lint.yml' + push: + branches: + - main + paths: + - 'src/bin/uudoc.rs' + - 'src/uu/*/locales/*.ftl' + - 'src/uu/*/src/*.rs' + - 'Cargo.toml' + - 'GNUmakefile' + - '.github/workflows/manpage-lint.yml' + +jobs: + manpage-lint: + name: Validate manpages with mandoc + runs-on: ubuntu-latest + strategy: + matrix: + locale: [en_US.UTF-8, fr_FR.UTF-8] + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Install prerequisites + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y mandoc locales-all + sudo apt-get install -y libselinux1-dev libsystemd-dev libacl1-dev libattr1-dev libcap-dev + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Build manpages (${{ matrix.locale }}) + run: | + # Create temporary directory for manpages + MANPAGE_DIR=$(mktemp -d) + echo "MANPAGE_DIR=${MANPAGE_DIR}" >> $GITHUB_ENV + + # Set locale for manpage generation + export LANG=${{ matrix.locale }} + + # Build and install manpages to temporary directory + make install-manpages DESTDIR="${MANPAGE_DIR}" + + - name: Validate manpages with mandoc (${{ matrix.locale }}) + run: | + # Find all generated manpages + MANPAGE_PATH="${MANPAGE_DIR}/usr/local/share/man/man1" + + # Check if manpages were generated + if [ ! -d "${MANPAGE_PATH}" ]; then + echo "Error: No manpages found at ${MANPAGE_PATH}" + exit 1 + fi + + # Initialize error tracking + ERRORS_FOUND=0 + ERROR_LOG=$(mktemp) + + echo "Validating ${{ matrix.locale }} manpages with mandoc..." + echo "==========================================" + + # Validate each manpage + for manpage in "${MANPAGE_PATH}"/*.1; do + if [ -f "$manpage" ]; then + filename=$(basename "$manpage") + + # Run mandoc lint and capture output (only errors, not style warnings) + if ! mandoc -T lint -W error "$manpage" 2>&1 | tee -a "$ERROR_LOG"; then + echo "Errors found in $filename" + ERRORS_FOUND=1 + else + # Check if mandoc produced any output (errors only, not style warnings) + if mandoc -T lint -W error "$manpage" 2>&1 | grep -q .; then + echo "Warnings found in $filename" + ERRORS_FOUND=1 + else + echo "$filename is valid" + fi + fi + fi + done + + echo "" + echo "==================================" + + # Summary and exit + if [ "$ERRORS_FOUND" -eq 1 ]; then + echo "Manpage validation failed. Issues found:" + echo "" + cat "$ERROR_LOG" + exit 1 + else + echo "All manpages validated successfully!" + fi From f848e3adfce9eff8904d5d13f72a11c04e051981 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 15 Mar 2026 18:03:30 +0100 Subject: [PATCH 3/4] refactor: extract manpage post-processing into a function with tests - Move manpage post-processing logic into post_process_manpage() function - Add comprehensive unit tests for all post-processing functionality - Improve .br macro handling to correctly fix mandoc warnings - Tests cover TH header fixes, whitespace removal, and .br pattern fixes --- Cargo.toml | 9 +- src/bin/uudoc.rs | 251 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 203 insertions(+), 57 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f37b1b28fe5..e8117b270c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,13 @@ expensive_tests = [] # "test_risky_names" == enable tests that create problematic file names (would make a network share inaccessible to Windows, breaks SVN on Mac OS, etc.) test_risky_names = [] # * only build `uudoc` when `--feature uudoc` is activated -uudoc = ["dep:clap_complete", "dep:clap_mangen", "dep:fluent-syntax", "dep:zip"] +uudoc = [ + "dep:clap_complete", + "dep:clap_mangen", + "dep:fluent-syntax", + "dep:regex", + "dep:zip", +] ## features ## Optional feature for stdbuf # "feat_external_libstdbuf" == use an external libstdbuf.so for stdbuf instead of embedding it @@ -475,6 +481,7 @@ clap_complete = { workspace = true, optional = true } clap_mangen = { workspace = true, optional = true } clap.workspace = true fluent-syntax = { workspace = true, optional = true } +regex = { workspace = true, optional = true } itertools.workspace = true phf.workspace = true selinux = { workspace = true, optional = true } diff --git a/src/bin/uudoc.rs b/src/bin/uudoc.rs index e7445ae2257..5eb75629a03 100644 --- a/src/bin/uudoc.rs +++ b/src/bin/uudoc.rs @@ -3,10 +3,10 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore mangen tldr +// spell-checker:ignore mangen tldr mandoc uppercasing uppercased manpages DESTDIR use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, ffi::OsString, fs::File, io::{self, Read, Seek, Write}, @@ -17,7 +17,9 @@ use clap::{Arg, Command}; use clap_complete::Shell; use clap_mangen::Man; use fluent_syntax::ast::{Entry, Message, Pattern}; +use jiff::Zoned; use fluent_syntax::parser; +use regex::Regex; use textwrap::{fill, indent, termwidth}; use zip::ZipArchive; @@ -26,6 +28,75 @@ use uucore::Args; include!(concat!(env!("OUT_DIR"), "/uutils_map.rs")); +/// Post-process a generated manpage to fix mandoc lint issues +/// +/// This function: +/// - Fixes the TH header by uppercasing command names and removing invalid date formats +/// - Removes trailing whitespace from all lines +/// - Fixes redundant .br paragraph macros that cause mandoc warnings +fn post_process_manpage(manpage: String) -> String { + // Only match TH headers that have at least a command name on the same line + // Use [ \t] instead of \s to avoid matching newlines + // Use a date format that satisfies mandoc (YYYY-MM-DD) + let date = date.map_or_else( + || Zoned::now().strftime("%Y-%m-%d").to_string(), + str::to_string, + ); + + let th_regex = Regex::new(r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$").unwrap(); + let mut result = th_regex + .replace_all(&manpage, |caps: ®ex::Captures| { + // Add date to satisfy mandoc - date must be quoted + format!(".TH {} 1 \"{date}\"", caps[1].to_uppercase()) + }) + .to_string(); + + // Process lines: remove trailing whitespace and fix .br issues in a single pass + let lines: Vec<&str> = result.lines().collect(); + let mut fixed_lines = Vec::with_capacity(lines.len()); + let mut skip_indices = HashSet::new(); + + // First pass: identify lines to skip (redundant .br macros) + for i in 0..lines.len() { + let line = lines[i].trim_end(); + + if line == ".br" && !skip_indices.contains(&i) { + // Check for consecutive .br macros + if i > 0 && lines[i - 1].trim_end() == ".br" { + skip_indices.insert(i); + } + // Check for .br, empty line, .br pattern + else if i + 2 < lines.len() + && lines[i + 1].trim().is_empty() + && lines[i + 2].trim_end() == ".br" + { + skip_indices.insert(i + 2); + } + } + } + + // Second pass: build the final output + for (i, line) in lines.iter().enumerate() { + if !skip_indices.contains(&i) { + fixed_lines.push(line.trim_end()); + } + } + + result = fixed_lines.join("\n"); + + // Fix escape sequence issues + // \\\\0 appears when trying to represent literal \0 string + // In man pages, use \e for literal backslash + result = result.replace("\\\\\\\\0", "\\e0"); + result = result.replace("\\\\0", "\\e0"); + + if !result.ends_with('\n') { + result.push('\n'); + } + + result +} + /// Print usage information for uudoc fn usage(utils: &UtilityMap) { println!("uudoc - Documentation generator for uutils coreutils"); @@ -100,63 +171,15 @@ fn gen_manpage( man.render(&mut buffer).expect("Man page generation failed"); // Convert to string for processing - let mut manpage = String::from_utf8(buffer).expect("Invalid UTF-8 in manpage"); - - // Fix the TH line: remove version info from date field and uppercase the command name - if let Some(th_pos) = manpage.find(".TH ") { - if let Some(line_end) = manpage[th_pos..].find('\n') { - let th_line = &manpage[th_pos..th_pos + line_end]; - // Parse the TH line parts - let parts: Vec<&str> = th_line.split_whitespace().collect(); - if parts.len() >= 2 { - let cmd_name = parts[1].to_uppercase(); - // Reconstruct TH line with uppercase command name and no date - let new_th = format!(".TH {} 1", cmd_name); - manpage.replace_range(th_pos..th_pos + line_end, &new_th); - } - } - } + let manpage = String::from_utf8(buffer).expect("Invalid UTF-8 in manpage"); - // Remove trailing whitespace from all lines and fix .br issues - let lines: Vec = manpage - .lines() - .map(|line| line.trim_end().to_string()) - .collect(); - - // Fix .br paragraph macro issues - let mut fixed_lines = Vec::new(); - let mut skip_next_br = false; - - for i in 0..lines.len() { - let line = &lines[i]; - - if line == ".br" { - // Check for problematic patterns with .br - let prev_is_br = i > 0 && lines[i - 1] == ".br"; - let next_is_empty_then_br = - i + 2 < lines.len() && lines[i + 1].is_empty() && lines[i + 2] == ".br"; - let prev_is_empty_with_br = i >= 2 && lines[i - 1].is_empty() && lines[i - 2] == ".br"; - - // Skip redundant .br in these patterns - if skip_next_br || prev_is_br || next_is_empty_then_br || prev_is_empty_with_br { - skip_next_br = false; - continue; - } - - // If this .br is followed by empty line and another .br, skip the second one - if next_is_empty_then_br { - skip_next_br = true; - } - } - - fixed_lines.push(line.clone()); - } - - manpage = fixed_lines.join("\n"); - manpage.push('\n'); + // Post-process the manpage to fix mandoc lint issues + let processed_manpage = post_process_manpage(manpage, None); // Write the processed manpage to stdout - io::stdout().write_all(manpage.as_bytes()).unwrap(); + io::stdout() + .write_all(processed_manpage.as_bytes()) + .unwrap(); io::stdout().flush().unwrap(); process::exit(0); } @@ -691,3 +714,119 @@ fn format_examples(content: String, output_markdown: bool) -> Result Date: Sun, 15 Mar 2026 22:15:50 +0100 Subject: [PATCH 4/4] fix: remove .br before empty lines to fix mandoc warnings - Enhanced post-processing to remove .br macros that appear before empty lines - This fixes 'WARNING: skipping paragraph macro: br before sp' from mandoc - Also handles the common pattern of .br-empty-.br by removing both .br macros - Added comprehensive test cases for the new patterns - All mandoc 'br before sp' warnings are now resolved --- Cargo.lock | 12 ++++---- Cargo.toml | 2 ++ src/bin/uudoc.rs | 69 +++++++++++++++++++++++++++++++++++++++------- tests/uudoc/mod.rs | 4 +-- 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c5bd7b89b36..e41116809cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -991,7 +991,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -1716,7 +1716,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2019,7 +2019,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2601,7 +2601,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2901,7 +2901,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4689,7 +4689,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e8117b270c4..acf962c3e76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ uudoc = [ "dep:clap_complete", "dep:clap_mangen", "dep:fluent-syntax", + "dep:jiff", "dep:regex", "dep:zip", ] @@ -481,6 +482,7 @@ clap_complete = { workspace = true, optional = true } clap_mangen = { workspace = true, optional = true } clap.workspace = true fluent-syntax = { workspace = true, optional = true } +jiff = { workspace = true, optional = true } regex = { workspace = true, optional = true } itertools.workspace = true phf.workspace = true diff --git a/src/bin/uudoc.rs b/src/bin/uudoc.rs index 5eb75629a03..459437bf2cc 100644 --- a/src/bin/uudoc.rs +++ b/src/bin/uudoc.rs @@ -17,8 +17,8 @@ use clap::{Arg, Command}; use clap_complete::Shell; use clap_mangen::Man; use fluent_syntax::ast::{Entry, Message, Pattern}; -use jiff::Zoned; use fluent_syntax::parser; +use jiff::Zoned; use regex::Regex; use textwrap::{fill, indent, termwidth}; use zip::ZipArchive; @@ -31,10 +31,13 @@ include!(concat!(env!("OUT_DIR"), "/uutils_map.rs")); /// Post-process a generated manpage to fix mandoc lint issues /// /// This function: -/// - Fixes the TH header by uppercasing command names and removing invalid date formats +/// - Fixes the TH header by uppercasing command names and adding a proper date /// - Removes trailing whitespace from all lines /// - Fixes redundant .br paragraph macros that cause mandoc warnings -fn post_process_manpage(manpage: String) -> String { +/// - Removes .br before empty lines to avoid "br before sp" warnings +/// - Removes .br after empty lines to avoid "br after sp" warnings +/// - Fixes escape sequences (e.g., \\\\0 to \\0) to avoid "undefined escape" warnings +fn post_process_manpage(manpage: String, date: Option<&str>) -> String { // Only match TH headers that have at least a command name on the same line // Use [ \t] instead of \s to avoid matching newlines // Use a date format that satisfies mandoc (YYYY-MM-DD) @@ -61,16 +64,22 @@ fn post_process_manpage(manpage: String) -> String { let line = lines[i].trim_end(); if line == ".br" && !skip_indices.contains(&i) { - // Check for consecutive .br macros - if i > 0 && lines[i - 1].trim_end() == ".br" { + // Check for .br followed by empty line + if i + 1 < lines.len() && lines[i + 1].trim().is_empty() { + // Remove the .br when it's followed by an empty line + // This prevents "WARNING: skipping paragraph macro: br before sp" skip_indices.insert(i); + + // Also check if there's another .br after the empty line (common pattern) + if i + 2 < lines.len() && lines[i + 2].trim_end() == ".br" { + skip_indices.insert(i + 2); + } } - // Check for .br, empty line, .br pattern - else if i + 2 < lines.len() - && lines[i + 1].trim().is_empty() - && lines[i + 2].trim_end() == ".br" + // Check for .br preceded by empty line or another .br + // This prevents "WARNING: skipping paragraph macro: br after sp" and consecutive .br + else if i > 0 && (lines[i - 1].trim().is_empty() || lines[i - 1].trim_end() == ".br") { - skip_indices.insert(i + 2); + skip_indices.insert(i); } } } @@ -829,4 +838,44 @@ mod tests { let result4 = post_process_manpage(input4.to_string(), Some("2024-01-01")); assert_eq!(result4, expected4); } + + #[test] + fn test_post_process_manpage_removes_br_before_empty_line() { + // Test that .br is removed when followed by empty line (which becomes .sp) + let input = ".TH TEST 1\nSome text\n.br\n\nMore text\n"; + let expected = ".TH TEST 1 \"2024-01-01\"\nSome text\n\nMore text\n"; + + let result = post_process_manpage(input.to_string(), Some("2024-01-01")); + assert_eq!(result, expected); + } + + #[test] + fn test_post_process_manpage_complex_br_before_empty() { + // Test multiple .br before empty line patterns + let input = ".TH TEST 1\nSection 1\n.br\n\nSection 2\n.br\n\nSection 3\n"; + let expected = ".TH TEST 1 \"2024-01-01\"\nSection 1\n\nSection 2\n\nSection 3\n"; + + let result = post_process_manpage(input.to_string(), Some("2024-01-01")); + assert_eq!(result, expected); + } + + #[test] + fn test_post_process_manpage_removes_br_after_empty_line() { + // Test that .br is removed when preceded by empty line (which becomes .sp) + let input = ".TH TEST 1\nSome text\n\n.br\nMore text\n"; + let expected = ".TH TEST 1 \"2024-01-01\"\nSome text\n\nMore text\n"; + + let result = post_process_manpage(input.to_string(), Some("2024-01-01")); + assert_eq!(result, expected); + } + + #[test] + fn test_post_process_manpage_fixes_escape_sequences() { + // Test that \\\\0 and \\0 are fixed to \e0 (literal backslash-zero) + let input = ".TH TEST 1\nText with \\\\\\\\0 and \\\\0 escape\n"; + let expected = ".TH TEST 1 \"2024-01-01\"\nText with \\e0 and \\e0 escape\n"; + + let result = post_process_manpage(input.to_string(), Some("2024-01-01")); + assert_eq!(result, expected); + } } diff --git a/tests/uudoc/mod.rs b/tests/uudoc/mod.rs index 010d6cda363..4a96a532137 100644 --- a/tests/uudoc/mod.rs +++ b/tests/uudoc/mod.rs @@ -36,7 +36,7 @@ fn test_manpage_generation() { ); let output_str = String::from_utf8_lossy(&output.stdout); - assert!(output_str.contains("\n.TH ls"), "{output_str}"); + assert!(output_str.contains("\n.TH LS"), "{output_str}"); assert!(output_str.contains('1'), "{output_str}"); assert!(output_str.contains("\n.SH NAME\nls"), "{output_str}"); } @@ -62,7 +62,7 @@ fn test_manpage_coreutils() { ); let output_str = String::from_utf8_lossy(&output.stdout); - assert!(output_str.contains("\n.TH coreutils"), "{output_str}"); + assert!(output_str.contains("\n.TH COREUTILS"), "{output_str}"); assert!(output_str.contains("coreutils"), "{output_str}"); assert!(output_str.contains("\n.SH NAME\ncoreutils"), "{output_str}"); }