From 070a9f8c7d743751b9967f15bf8fa6b52f88bc8d Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Tue, 26 May 2026 12:30:55 +0100 Subject: [PATCH 1/3] feat(storage): per-finding hexad emission (issue #33 S1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a per-WeakPoint hexad path to persist_assemblyline_report so a batch scan can persist one hexad per finding in addition to the existing aggregate hexad. Subject identity is `finding::::`, chosen for cross-run stability so the upcoming S2 (campaign register-pr) and S3 (query) slices can join on it without diffing JSON. New public surface: - HexadSemantic gains an optional `finding: Option` (additive, skip_serializing_if = none → existing consumers unaffected). - FindingSemantic carries finding_id / repo / file / line / category / rule_id / rule_name / severity / description / first_seen_run / last_seen_run / framework. rule_id and rule_name reuse the canonical SARIF mapping (sarif.rs::rule_id / rule_name now pub(crate)). - build_finding_hexads(report) -> Vec. - STORE_FINDING_HEXADS_ENV = "PANIC_ATTACK_STORE_FINDING_HEXADS" — when set non-empty AND StorageMode::VerisimDb is configured, persist_assemblyline_report writes one file per finding under `/hexads/findings/`. Behaviour preserved: - Default path unchanged (env var off → no per-finding writes). - Aggregate hexad still emitted in every VerisimDb run. - Suppressed WeakPoints are skipped, keeping the store aligned with fleet/CI counts. S1 sets first_seen_run == last_seen_run; back-stamping from a prior hexad is S2's job (per the issue), not S1's. Tests: 7 new (id stability, category discrimination, count per WP, suppression skip, canonical rule_id/name, file write + round-trip, env-var default-off). Full suite: 215 lib + 13 + 16 + 6 + 12 + 3 + 7 + 12 + 14 + 20 + 10 + 8 + 22 + 22 + 12 + 2 doc — all green. Clippy clean with -D warnings. Refs #33. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/report/sarif.rs | 4 +- src/storage/mod.rs | 402 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 395 insertions(+), 11 deletions(-) diff --git a/src/report/sarif.rs b/src/report/sarif.rs index db7ca42..cd67a6d 100644 --- a/src/report/sarif.rs +++ b/src/report/sarif.rs @@ -112,7 +112,7 @@ pub struct SarifRegion { } /// Map WeakPointCategory to a stable rule ID -fn rule_id(category: &WeakPointCategory) -> &'static str { +pub(crate) fn rule_id(category: &WeakPointCategory) -> &'static str { match category { WeakPointCategory::UncheckedAllocation => "PA001", WeakPointCategory::UnboundedAllocation => "PA001b", @@ -144,7 +144,7 @@ fn rule_id(category: &WeakPointCategory) -> &'static str { } /// Map WeakPointCategory to a human-readable name -fn rule_name(category: &WeakPointCategory) -> &'static str { +pub(crate) fn rule_name(category: &WeakPointCategory) -> &'static str { match category { WeakPointCategory::UncheckedAllocation => "unchecked-allocation", WeakPointCategory::UnboundedAllocation => "unbounded-allocation", diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 91f0d02..4c43d08 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -88,6 +88,51 @@ pub struct HexadSemantic { /// Migration-specific semantic data (present when target is ReScript) #[serde(skip_serializing_if = "Option::is_none")] pub migration: Option, + /// Finding-level semantic data (present when this hexad represents a + /// single WeakPoint emitted by `build_finding_hexads`, issue #33 S1). + #[serde(skip_serializing_if = "Option::is_none")] + pub finding: Option, +} + +/// Semantic facets of a per-finding hexad (issue #33 S1). +/// +/// A per-finding hexad represents one `WeakPoint` from an assemblyline scan +/// of one repository. The `finding_id` is stable across runs (same +/// repo/file/line/category → same id), so subsequent slices (S2 PR-state +/// tracking, S3 cross-repo query) can identify a finding without comparing +/// JSON blobs. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FindingSemantic { + /// Stable per-finding identifier: `finding::::`. + pub finding_id: String, + /// Repository name (basename of repo path). + pub repo_name: String, + /// File path, repo-relative. + pub file: String, + /// Line number from the original `WeakPoint`, if available. + #[serde(skip_serializing_if = "Option::is_none")] + pub line: Option, + /// `WeakPointCategory` Debug name (e.g. "UnsafeCode"). + pub category: String, + /// Stable rule ID (e.g. "PA004"). Mirrors the SARIF rule mapping. + pub rule_id: String, + /// Human-readable rule slug (e.g. "unsafe-code"). Mirrors SARIF. + pub rule_name: String, + /// Severity label (lowercase: "critical", "high", "medium", "low"). + pub severity: String, + /// Per-finding description from the `WeakPoint`. + pub description: String, + /// Run id of the *current* run (also written to `last_seen_run`). + /// + /// S1 sets `first_seen_run == last_seen_run`. A later slice (S2 or a + /// query-side aggregation in S3) is responsible for back-stamping + /// `first_seen_run` from a prior hexad with the same `finding_id`. + pub first_seen_run: String, + /// Run id of the run that emitted this hexad. + pub last_seen_run: String, + /// Framework hint, when derivable. Reserved for future enrichment. + #[serde(skip_serializing_if = "Option::is_none")] + pub framework: Option, } /// Migration-specific semantic data for VeriSimDB hexads @@ -175,6 +220,7 @@ fn build_hexad(report: &AssaultReport) -> Result { robustness_score: report.overall_assessment.robustness_score, categories, migration, + finding: None, }, document, }) @@ -296,11 +342,176 @@ fn build_assemblyline_hexad( robustness_score: 0.0, categories, migration: None, + finding: None, }, document, }) } +/// Env var that opts a run into per-finding hexad emission (issue #33 S1). +/// +/// When set to a non-empty value AND `StorageMode::VerisimDb` is configured, +/// `persist_assemblyline_report` writes one hexad per `WeakPoint` under +/// `/hexads/findings/` in addition to the existing aggregate hexad. +pub const STORE_FINDING_HEXADS_ENV: &str = "PANIC_ATTACK_STORE_FINDING_HEXADS"; + +/// Return `true` when per-finding hexad emission is requested via env var. +fn finding_hexads_enabled() -> bool { + std::env::var(STORE_FINDING_HEXADS_ENV) + .map(|v| !v.is_empty() && v != "0" && !v.eq_ignore_ascii_case("false")) + .unwrap_or(false) +} + +/// Build the stable finding-id for a `WeakPoint`. +/// +/// Pattern: `finding::::` — chosen so that two +/// scans of the same repo see the same id for the same finding, which is +/// the property S2 (`campaign register-pr`) and S3 (`query`) need. +/// +/// File and line components fall back to literal `"unknown"` / `"0"` when +/// the underlying `WeakPoint` lacks them, so the id is always well-formed. +fn build_finding_id(repo_name: &str, wp: &crate::types::WeakPoint) -> String { + let file = wp + .file + .clone() + .or_else(|| wp.location.clone()) + .unwrap_or_else(|| "unknown".to_string()); + let line = wp + .line + .map(|n| n.to_string()) + .unwrap_or_else(|| "0".to_string()); + format!("finding:{}:{}:{}:{:?}", repo_name, file, line, wp.category) +} + +/// Map `Severity` to a lowercase string label. +fn severity_label(severity: &crate::types::Severity) -> &'static str { + match severity { + crate::types::Severity::Critical => "critical", + crate::types::Severity::High => "high", + crate::types::Severity::Medium => "medium", + crate::types::Severity::Low => "low", + } +} + +/// Build one hexad per `WeakPoint` across all repo results in an +/// assemblyline report (issue #33 S1). +/// +/// Subject identity lives in `semantic.finding.finding_id`; each emitted +/// hexad's top-level `id` remains per-run-unique so two runs of the same +/// finding produce two distinct hexad files (the join key is the +/// `finding_id`, not the hexad id). +/// +/// `run_id` is shared across every finding-hexad in this run and stamped +/// into both `first_seen_run` and `last_seen_run` (S1 has no prior-run +/// lookup; that's a follow-up slice's job). +pub fn build_finding_hexads( + report: &crate::assemblyline::AssemblylineReport, +) -> Result> { + let now = Utc::now(); + let run_id = format!( + "pa-asmline-{}-{}", + now.format("%Y%m%d%H%M%S"), + &uuid_from_timestamp(now.timestamp_millis()) + ); + + let mut hexads = Vec::new(); + for (repo_idx, result) in report.results.iter().enumerate() { + let Some(assail_report) = &result.report else { + continue; + }; + let language = format!("{:?}", assail_report.language); + + for (wp_idx, wp) in assail_report.weak_points.iter().enumerate() { + // Skip suppressed findings — they're audit-only, not lifecycle + // material. Keeps the hexad store aligned with fleet/CI counts. + if wp.suppressed { + continue; + } + + let finding_id = build_finding_id(&result.repo_name, wp); + let category_str = format!("{:?}", wp.category); + let rule_id_str = crate::report::sarif::rule_id(&wp.category).to_string(); + let rule_name_str = crate::report::sarif::rule_name(&wp.category).to_string(); + let severity_str = severity_label(&wp.severity).to_string(); + + // Per-hexad id: pa-finding----. + // Repo/wp indices keep collision-free even within a millisecond. + let hexad_id = format!( + "pa-finding-{}-{}-{}-{}", + now.format("%Y%m%d%H%M%S"), + repo_idx, + wp_idx, + &uuid_from_timestamp(now.timestamp_millis()), + ); + + let document = serde_json::json!({ + "finding_id": finding_id, + "repo_name": result.repo_name, + "repo_path": result.repo_path.display().to_string(), + "weak_point": wp, + }); + + hexads.push(PanicAttackHexad { + schema: "verisimdb.hexad.v1".to_string(), + id: hexad_id, + created_at: now.to_rfc3339(), + provenance: HexadProvenance { + tool: "panic-attack".to_string(), + version: env!("CARGO_PKG_VERSION").to_string(), + program_path: result.repo_path.display().to_string(), + language: language.clone(), + attestation_hash: None, + }, + semantic: HexadSemantic { + total_weak_points: 1, + critical_count: matches!(wp.severity, crate::types::Severity::Critical) + as usize, + high_count: matches!(wp.severity, crate::types::Severity::High) as usize, + total_crashes: 0, + robustness_score: 0.0, + categories: vec![category_str.clone()], + migration: None, + finding: Some(FindingSemantic { + finding_id: finding_id.clone(), + repo_name: result.repo_name.clone(), + file: wp + .file + .clone() + .or_else(|| wp.location.clone()) + .unwrap_or_else(|| "unknown".to_string()), + line: wp.line, + category: category_str, + rule_id: rule_id_str, + rule_name: rule_name_str, + severity: severity_str, + description: wp.description.clone(), + first_seen_run: run_id.clone(), + last_seen_run: run_id.clone(), + framework: None, + }), + }, + document, + }); + } + } + + Ok(hexads) +} + +/// Write a slice of hexads under `/hexads/findings/` (one file +/// per hexad). Returns the paths written. +fn write_finding_hexads(hexads: &[PanicAttackHexad], base_dir: &Path) -> Result> { + let dir = base_dir.join("hexads").join("findings"); + fs::create_dir_all(&dir)?; + let mut written = Vec::with_capacity(hexads.len()); + for hexad in hexads { + let path = dir.join(format!("{}.json", hexad.id)); + fs::write(&path, serde_json::to_string_pretty(hexad)?)?; + written.push(path); + } + Ok(written) +} + /// Persist an assemblyline report to storage (filesystem and/or verisimdb). /// /// This is the batch-scan counterpart to `persist_report()` — it stores @@ -327,19 +538,16 @@ pub fn persist_assemblyline_report( if modes.contains(&StorageMode::VerisimDb) { let hexad = build_assemblyline_hexad(report)?; + let base_dir = directory + .map(Path::to_path_buf) + .unwrap_or_else(|| PathBuf::from("verisimdb-data")); #[cfg(feature = "http")] { if std::env::var("VERISIMDB_URL").is_ok() { - let base_dir = directory - .map(Path::to_path_buf) - .unwrap_or_else(|| PathBuf::from("verisimdb-data")); let mut http_paths = push_hexad_with_fallback(&hexad, &base_dir)?; stored.append(&mut http_paths); } else { - let base_dir = directory - .map(Path::to_path_buf) - .unwrap_or_else(|| PathBuf::from("verisimdb-data")); let hexad_dir = base_dir.join("hexads"); fs::create_dir_all(&hexad_dir)?; let path = hexad_dir.join(format!("{}.json", hexad.id)); @@ -349,15 +557,21 @@ pub fn persist_assemblyline_report( } #[cfg(not(feature = "http"))] { - let base_dir = directory - .map(Path::to_path_buf) - .unwrap_or_else(|| PathBuf::from("verisimdb-data")); let hexad_dir = base_dir.join("hexads"); fs::create_dir_all(&hexad_dir)?; let path = hexad_dir.join(format!("{}.json", hexad.id)); fs::write(&path, serde_json::to_string_pretty(&hexad)?)?; stored.push(path); } + + // Per-finding hexads (issue #33 S1) — additive, env-var gated, and + // always file-side for now. HTTP push for finding hexads is left + // to S3/query path so we don't add chattiness to the API mid-S1. + if finding_hexads_enabled() { + let finding_hexads = build_finding_hexads(report)?; + let mut paths = write_finding_hexads(&finding_hexads, &base_dir)?; + stored.append(&mut paths); + } } Ok(stored) @@ -774,4 +988,174 @@ mod tests { assert_eq!("disk".parse::(), Ok(StorageMode::Filesystem)); assert_eq!("bogus".parse::(), Err(())); } + + // ----- Issue #33 S1: per-finding hexad tests ----------------------- + + use crate::assemblyline::{AssemblylineReport, RepoResult}; + use crate::types::{ + AssailReport, Language, ProgramStatistics, Severity, WeakPoint, WeakPointCategory, + }; + use std::path::PathBuf; + + fn sample_weak_point(file: &str, line: u32, category: WeakPointCategory) -> WeakPoint { + WeakPoint { + category, + location: Some(format!("{}:{}", file, line)), + file: Some(file.to_string()), + line: Some(line), + severity: Severity::High, + description: format!("test finding at {}:{}", file, line), + recommended_attack: Vec::new(), + suppressed: false, + } + } + + fn sample_assemblyline(repo: &str, wps: Vec) -> AssemblylineReport { + let assail = AssailReport { + schema_version: "2.5".to_string(), + program_path: PathBuf::from(format!("/tmp/{}", repo)), + language: Language::Rust, + frameworks: Vec::new(), + weak_points: wps, + statistics: ProgramStatistics::default(), + file_statistics: Vec::new(), + recommended_attacks: Vec::new(), + dependency_graph: Default::default(), + taint_matrix: Default::default(), + migration_metrics: None, + suppressed_count: 0, + }; + AssemblylineReport { + schema_version: "2.5".to_string(), + created_at: "2026-05-26T00:00:00Z".to_string(), + directory: PathBuf::from("/tmp"), + repos_scanned: 1, + repos_with_findings: 1, + repos_skipped: 0, + total_weak_points: assail.weak_points.len(), + total_critical: 0, + results: vec![RepoResult { + repo_path: PathBuf::from(format!("/tmp/{}", repo)), + repo_name: repo.to_string(), + weak_point_count: assail.weak_points.len(), + critical_count: 0, + high_count: assail.weak_points.len(), + total_files: 1, + total_lines: 10, + error: None, + fingerprint: None, + report: Some(assail), + }], + } + } + + #[test] + fn build_finding_id_stable_per_finding() { + let wp = sample_weak_point("src/main.rs", 42, WeakPointCategory::UnsafeCode); + let id_1 = build_finding_id("foo", &wp); + let id_2 = build_finding_id("foo", &wp); + assert_eq!(id_1, id_2); + assert_eq!(id_1, "finding:foo:src/main.rs:42:UnsafeCode"); + } + + #[test] + fn build_finding_id_differs_by_category() { + let wp1 = sample_weak_point("src/main.rs", 42, WeakPointCategory::UnsafeCode); + let wp2 = sample_weak_point("src/main.rs", 42, WeakPointCategory::PanicPath); + assert_ne!(build_finding_id("foo", &wp1), build_finding_id("foo", &wp2)); + } + + #[test] + fn build_finding_hexads_emits_one_per_weak_point() { + let report = sample_assemblyline( + "demo", + vec![ + sample_weak_point("src/a.rs", 1, WeakPointCategory::UnsafeCode), + sample_weak_point("src/b.rs", 7, WeakPointCategory::PanicPath), + sample_weak_point("src/c.rs", 9, WeakPointCategory::CommandInjection), + ], + ); + let hexads = build_finding_hexads(&report).expect("build ok"); + assert_eq!(hexads.len(), 3); + for h in &hexads { + let f = h + .semantic + .finding + .as_ref() + .expect("each per-finding hexad must carry FindingSemantic"); + assert!(f.finding_id.starts_with("finding:demo:")); + assert_eq!(f.repo_name, "demo"); + assert_eq!(f.severity, "high"); + assert!(!f.rule_id.is_empty()); + assert_eq!(f.first_seen_run, f.last_seen_run); + } + } + + #[test] + fn build_finding_hexads_skips_suppressed() { + let mut suppressed = sample_weak_point("src/a.rs", 1, WeakPointCategory::UnsafeCode); + suppressed.suppressed = true; + let report = sample_assemblyline( + "demo", + vec![ + suppressed, + sample_weak_point("src/b.rs", 2, WeakPointCategory::PanicPath), + ], + ); + let hexads = build_finding_hexads(&report).expect("build ok"); + assert_eq!(hexads.len(), 1); + assert_eq!( + hexads[0].semantic.finding.as_ref().unwrap().category, + "PanicPath" + ); + } + + #[test] + fn build_finding_hexads_uses_canonical_rule_ids() { + let report = sample_assemblyline( + "demo", + vec![sample_weak_point( + "src/x.rs", + 3, + WeakPointCategory::UnsafeCode, + )], + ); + let hexads = build_finding_hexads(&report).expect("build ok"); + let f = hexads[0].semantic.finding.as_ref().unwrap(); + assert_eq!(f.rule_id, "PA004"); + assert_eq!(f.rule_name, "unsafe-code"); + } + + #[test] + fn write_finding_hexads_writes_one_file_per_hexad() { + let dir = tempfile::tempdir().expect("tempdir"); + let report = sample_assemblyline( + "demo", + vec![ + sample_weak_point("src/a.rs", 1, WeakPointCategory::UnsafeCode), + sample_weak_point("src/b.rs", 2, WeakPointCategory::PanicPath), + ], + ); + let hexads = build_finding_hexads(&report).expect("build ok"); + let paths = write_finding_hexads(&hexads, dir.path()).expect("write ok"); + assert_eq!(paths.len(), 2); + for p in &paths { + assert!(p.exists()); + // sanity: parses back as a hexad + let content = std::fs::read_to_string(p).unwrap(); + let parsed: PanicAttackHexad = serde_json::from_str(&content).unwrap(); + assert!(parsed.semantic.finding.is_some()); + } + } + + #[test] + fn finding_hexads_disabled_by_default() { + // Snapshot+restore so we don't trample on parallel-test global state. + let original = std::env::var(STORE_FINDING_HEXADS_ENV).ok(); + std::env::remove_var(STORE_FINDING_HEXADS_ENV); + assert!(!finding_hexads_enabled()); + if let Some(v) = original { + std::env::set_var(STORE_FINDING_HEXADS_ENV, v); + } + } } From d6e41dc9b985a6c44436db93dd2adfcc5e0775a0 Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Tue, 26 May 2026 12:43:46 +0100 Subject: [PATCH 2/3] feat(campaign): finding-lifecycle CLI + state hexads (issue #33 S2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the second slice of issue #33: a panic-attack campaign subcommand that tracks the lifecycle of individual findings produced by the assemblyline per-finding hexad path (S1). State is persisted as campaign-facet hexads written under /hexads/campaign/, indexed by finding_id, append-only — the current state per finding is the newest campaign hexad with that finding_id as subject. New surface: - HexadSemantic gains `campaign: Option` (additive, skip_serializing_if = none). - CampaignSemantic { finding_id, state, pr_url?, reason?, last_polled? } — state is a free-form String so future labels can be added without a schema bump. - storage: build_campaign_hexad / write_campaign_hexad / load_{finding,campaign,aggregate}_hexads helpers. - src/campaign/ module — register_pr, dismiss, current_state, status_markdown. - panic-attack campaign register-pr|dismiss|status — CLI surface. `status` renders a Markdown tracker matching the shape of the issue #32 manual checklist: summary line, table with finding-id, repo, rule_id, location, state, PR link (or dismissal reason), last-event timestamp, checkbox column. Out of scope (S2b): poll subcommand that queries GitHub for PR-state transitions. The data path is in place — the polling logic lands once the rate-limit / pagination shape is settled. Tests: 5 new in src/campaign/ (register, dismiss-overrides-open, empty-arg rejection, empty-store status, two-row render). Full lib suite: 220 green. Clippy clean with -D warnings. End-to-end CLI smoke test green: register-pr + dismiss + status round-trip prints the expected markdown. Refs #33. Stacked on #55 (S1) — diff against main includes the S1 changes until S1 lands; this PR will rebase clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/campaign/mod.rs | 296 ++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 97 +++++++++++++++ src/storage/mod.rs | 148 ++++++++++++++++++++++ 4 files changed, 542 insertions(+) create mode 100644 src/campaign/mod.rs diff --git a/src/campaign/mod.rs b/src/campaign/mod.rs new file mode 100644 index 0000000..62be968 --- /dev/null +++ b/src/campaign/mod.rs @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Campaign-state orchestration (issue #33 S2). +//! +//! Tracks the lifecycle of individual findings emitted by the assemblyline +//! per-finding hexad path (issue #33 S1): +//! +//! - `register_pr(finding_id, pr_url)` — opens a PR for a finding. +//! - `dismiss(finding_id, reason)` — marks a finding parked / known-good / +//! intentionally-out-of-scope. +//! - `status_markdown(base_dir)` — renders a Markdown tracker identical +//! in shape to the manual checklist used in issue #32. +//! +//! State is persisted as campaign-facet hexads written under +//! `/hexads/campaign/`. The store is append-only: each call +//! writes a *new* hexad. `status` derives the current state per +//! `finding_id` by sorting all campaign hexads by `created_at` and +//! keeping the newest one for each subject. +//! +//! Polling GitHub for PR-state updates is deferred to a follow-up slice +//! (S2b) — this initial S2 focuses on the local lifecycle primitives so +//! the campaign data can accumulate before the polling logic lands. + +use crate::storage::{ + build_campaign_hexad, load_campaign_hexads, load_finding_hexads, write_campaign_hexad, + CampaignSemantic, PanicAttackHexad, +}; +use anyhow::{anyhow, Result}; +use chrono::Utc; +use std::collections::HashMap; +use std::path::Path; + +/// Canonical state labels written into `CampaignSemantic.state`. +/// +/// New variants can be added without breaking older readers — the field +/// is a `String` on the wire (forward-compatible by design). +pub mod state { + pub const OPEN: &str = "open"; + pub const PR_FILED: &str = "pr-filed"; + pub const PR_MERGED: &str = "pr-merged"; + pub const PR_CLOSED: &str = "pr-closed"; + pub const DISMISSED: &str = "dismissed"; +} + +/// Register an open PR against a known finding. +/// +/// Writes a `pr-filed` campaign hexad to `/hexads/campaign/`. +/// Returns the path written. +pub fn register_pr(finding_id: &str, pr_url: &str, base_dir: &Path) -> Result { + if finding_id.is_empty() { + return Err(anyhow!("finding_id must not be empty")); + } + if pr_url.is_empty() { + return Err(anyhow!("pr_url must not be empty")); + } + let hexad = build_campaign_hexad(CampaignSemantic { + finding_id: finding_id.to_string(), + state: state::PR_FILED.to_string(), + pr_url: Some(pr_url.to_string()), + reason: None, + last_polled: None, + }); + write_campaign_hexad(&hexad, base_dir) +} + +/// Dismiss a finding (parked, known-good, out-of-scope). +/// +/// Writes a `dismissed` campaign hexad. Returns the path written. +pub fn dismiss(finding_id: &str, reason: &str, base_dir: &Path) -> Result { + if finding_id.is_empty() { + return Err(anyhow!("finding_id must not be empty")); + } + let hexad = build_campaign_hexad(CampaignSemantic { + finding_id: finding_id.to_string(), + state: state::DISMISSED.to_string(), + pr_url: None, + reason: Some(reason.to_string()), + last_polled: None, + }); + write_campaign_hexad(&hexad, base_dir) +} + +/// One row of the campaign tracker — current state of a finding. +#[derive(Debug, Clone)] +pub struct CampaignRow { + pub finding_id: String, + pub state: String, + pub pr_url: Option, + pub reason: Option, + pub last_event_at: String, + /// If the finding hexad is available, its repo name (for display). + pub repo_name: Option, + /// Same — rule id (e.g. PA004). + pub rule_id: Option, + /// Same — file:line summary. + pub location: Option, +} + +/// Compute the current campaign state for every finding seen, by +/// folding the append-only hexad stream by `finding_id` and keeping the +/// newest event. +pub fn current_state(base_dir: &Path) -> Result> { + let mut campaign = load_campaign_hexads(base_dir)?; + campaign.sort_by(|a, b| a.created_at.cmp(&b.created_at)); + + // Index finding metadata by finding_id (latest wins, but for findings + // the schema is run-stable so any matching hexad will do). + let findings = load_finding_hexads(base_dir)?; + let mut finding_meta: HashMap = HashMap::new(); + for h in &findings { + if let Some(f) = h.semantic.finding.as_ref() { + finding_meta.insert(f.finding_id.clone(), h); + } + } + + let mut latest: HashMap = HashMap::new(); + for h in campaign { + if let Some(c) = h.semantic.campaign.clone() { + latest.insert(c.finding_id.clone(), (h.created_at.clone(), c)); + } + } + + let mut rows: Vec = latest + .into_iter() + .map(|(_, (ts, c))| { + let (repo_name, rule_id, location) = finding_meta + .get(&c.finding_id) + .and_then(|h| h.semantic.finding.as_ref()) + .map(|f| { + ( + Some(f.repo_name.clone()), + Some(f.rule_id.clone()), + Some(format!( + "{}:{}", + f.file, + f.line.map(|n| n.to_string()).unwrap_or_default() + )), + ) + }) + .unwrap_or((None, None, None)); + CampaignRow { + finding_id: c.finding_id, + state: c.state, + pr_url: c.pr_url, + reason: c.reason, + last_event_at: ts, + repo_name, + rule_id, + location, + } + }) + .collect(); + rows.sort_by(|a, b| a.finding_id.cmp(&b.finding_id)); + Ok(rows) +} + +/// Render a Markdown tracker matching the shape used by issue #32. +/// +/// Rows sorted by `finding_id`; checkbox `[x]` for merged/closed/dismissed, +/// `[ ]` otherwise. State, PR link (or reason), and timestamp appear in +/// columns. An ungrouped "Findings without campaign state" footer is +/// omitted from S2 to keep the output small; S3 query is the right place +/// to list "open work not yet PR'd". +pub fn status_markdown(base_dir: &Path) -> Result { + let rows = current_state(base_dir)?; + let now = Utc::now().to_rfc3339(); + let mut out = String::new(); + out.push_str(&format!( + "# Campaign tracker — `panic-attack`\n\n_Generated {now}_\n\n" + )); + if rows.is_empty() { + out.push_str("_No campaign state recorded yet._\n"); + return Ok(out); + } + + let merged_count = rows + .iter() + .filter(|r| matches!(r.state.as_str(), state::PR_MERGED | state::PR_CLOSED)) + .count(); + let open_count = rows + .iter() + .filter(|r| matches!(r.state.as_str(), state::PR_FILED | state::OPEN)) + .count(); + let dismissed_count = rows.iter().filter(|r| r.state == state::DISMISSED).count(); + out.push_str(&format!( + "**Summary**: {} merged/closed, {} open, {} dismissed (total {}).\n\n", + merged_count, + open_count, + dismissed_count, + rows.len() + )); + + out.push_str("| ☐ | Finding | Repo | Rule | Location | State | PR / Reason | Last event |\n"); + out.push_str("|---|---------|------|------|----------|-------|-------------|------------|\n"); + for r in rows { + let check = match r.state.as_str() { + state::PR_MERGED | state::PR_CLOSED | state::DISMISSED => "[x]", + _ => "[ ]", + }; + let pr_or_reason = match (r.pr_url.as_deref(), r.reason.as_deref()) { + (Some(url), _) => format!("[PR]({url})"), + (None, Some(reason)) => reason.to_string(), + (None, None) => "—".to_string(), + }; + out.push_str(&format!( + "| {} | `{}` | {} | {} | {} | {} | {} | {} |\n", + check, + r.finding_id, + r.repo_name.as_deref().unwrap_or("—"), + r.rule_id.as_deref().unwrap_or("—"), + r.location.as_deref().unwrap_or("—"), + r.state, + pr_or_reason, + r.last_event_at, + )); + } + Ok(out) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn register_pr_writes_hexad() { + let dir = tempdir().unwrap(); + let path = register_pr( + "finding:demo:src/a.rs:1:UnsafeCode", + "https://example.invalid/pr/1", + dir.path(), + ) + .expect("register ok"); + assert!(path.exists()); + let rows = current_state(dir.path()).unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].state, state::PR_FILED); + assert_eq!( + rows[0].pr_url.as_deref(), + Some("https://example.invalid/pr/1") + ); + } + + #[test] + fn dismiss_overrides_open() { + let dir = tempdir().unwrap(); + let id = "finding:demo:src/a.rs:1:UnsafeCode"; + register_pr(id, "https://example.invalid/pr/1", dir.path()).unwrap(); + // Sleep a hair to ensure the second hexad's created_at sorts strictly later. + std::thread::sleep(std::time::Duration::from_millis(1100)); + dismiss(id, "intentional sentinel", dir.path()).unwrap(); + let rows = current_state(dir.path()).unwrap(); + assert_eq!(rows.len(), 1, "one finding, latest state wins"); + assert_eq!(rows[0].state, state::DISMISSED); + assert_eq!(rows[0].reason.as_deref(), Some("intentional sentinel")); + } + + #[test] + fn register_pr_rejects_empty_args() { + let dir = tempdir().unwrap(); + assert!(register_pr("", "https://example.invalid", dir.path()).is_err()); + assert!(register_pr("finding:x:y:1:Z", "", dir.path()).is_err()); + } + + #[test] + fn status_markdown_handles_empty() { + let dir = tempdir().unwrap(); + let md = status_markdown(dir.path()).unwrap(); + assert!(md.contains("No campaign state recorded yet")); + } + + #[test] + fn status_markdown_renders_rows() { + let dir = tempdir().unwrap(); + register_pr( + "finding:alpha:src/a.rs:1:UnsafeCode", + "https://example.invalid/pr/1", + dir.path(), + ) + .unwrap(); + std::thread::sleep(std::time::Duration::from_millis(1100)); + dismiss( + "finding:beta:src/b.rs:9:PanicPath", + "test coverage gap", + dir.path(), + ) + .unwrap(); + let md = status_markdown(dir.path()).unwrap(); + assert!(md.contains("finding:alpha:src/a.rs:1:UnsafeCode")); + assert!(md.contains("finding:beta:src/b.rs:9:PanicPath")); + assert!(md.contains("pr-filed")); + assert!(md.contains("dismissed")); + assert!(md.contains("test coverage gap")); + assert!(md.contains("1 open, 1 dismissed")); + } +} diff --git a/src/lib.rs b/src/lib.rs index b5ca076..8b52541 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,7 @@ pub mod attestation; pub mod axial; #[cfg(feature = "http")] pub mod bridge; +pub mod campaign; pub mod i18n; pub mod kanren; pub mod mass_panic; diff --git a/src/main.rs b/src/main.rs index b31e0f4..7a127db 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,7 @@ mod attestation; mod axial; #[cfg(feature = "http")] mod bridge; +mod campaign; mod diagnostics; mod groove; mod i18n; @@ -743,6 +744,15 @@ enum Commands { #[command(subcommand)] action: AttestAction, }, + + /// Campaign: lifecycle tracking for findings (register-pr, dismiss, status). + /// + /// Operates on the per-finding hexad store written by `assemblyline` when + /// `PANIC_ATTACK_STORE_FINDING_HEXADS=1` is set with verisimdb storage. + Campaign { + #[command(subcommand)] + action: CampaignAction, + }, } #[derive(Subcommand)] @@ -755,6 +765,46 @@ enum AttestAction { }, } +/// Campaign subcommands for finding-lifecycle tracking (issue #33 S2). +#[derive(Subcommand)] +enum CampaignAction { + /// Register an open PR against a known finding-id. + RegisterPr { + /// Finding id (e.g. `finding:demo:src/a.rs:1:UnsafeCode`). + #[arg(value_name = "FINDING_ID")] + finding_id: String, + /// PR URL (e.g. `https://github.com/org/repo/pull/123`). + #[arg(value_name = "PR_URL")] + pr_url: String, + /// VeriSimDB data directory (default: `verisimdb-data`). + #[arg(long, value_name = "DIR", default_value = "verisimdb-data")] + verisimdb_dir: PathBuf, + }, + + /// Mark a finding as dismissed (parked, known-good, out-of-scope). + Dismiss { + /// Finding id. + #[arg(value_name = "FINDING_ID")] + finding_id: String, + /// Short human-readable reason. + #[arg(value_name = "REASON")] + reason: String, + /// VeriSimDB data directory (default: `verisimdb-data`). + #[arg(long, value_name = "DIR", default_value = "verisimdb-data")] + verisimdb_dir: PathBuf, + }, + + /// Render a Markdown tracker of the current campaign state. + Status { + /// VeriSimDB data directory (default: `verisimdb-data`). + #[arg(long, value_name = "DIR", default_value = "verisimdb-data")] + verisimdb_dir: PathBuf, + /// Write the Markdown to a file instead of stdout. + #[arg(short, long, value_name = "FILE")] + output: Option, + }, +} + /// Patch Bridge subcommands for CVE lifecycle management. #[cfg(feature = "http")] #[derive(Subcommand)] @@ -2354,6 +2404,53 @@ fn run_main() -> Result<()> { } }, + Commands::Campaign { action } => { + match action { + CampaignAction::RegisterPr { + finding_id, + pr_url, + verisimdb_dir, + } => { + let path = campaign::register_pr(&finding_id, &pr_url, &verisimdb_dir)?; + qprintln!( + cli.quiet, + "Registered PR {} for {} ({})", + pr_url, + finding_id, + path.display() + ); + } + CampaignAction::Dismiss { + finding_id, + reason, + verisimdb_dir, + } => { + let path = campaign::dismiss(&finding_id, &reason, &verisimdb_dir)?; + qprintln!( + cli.quiet, + "Dismissed {} ({}): {}", + finding_id, + reason, + path.display() + ); + } + CampaignAction::Status { + verisimdb_dir, + output, + } => { + let md = campaign::status_markdown(&verisimdb_dir)?; + match output { + Some(path) => { + std::fs::write(&path, &md)?; + qprintln!(cli.quiet, "Status written to {}", path.display()); + } + None => print!("{}", md), + } + } + } + return Ok(()); + } + Commands::Temporal { action } => { match action { TemporalAction::List { verisimdb_dir } => { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 4c43d08..4a8c3e9 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -92,6 +92,36 @@ pub struct HexadSemantic { /// single WeakPoint emitted by `build_finding_hexads`, issue #33 S1). #[serde(skip_serializing_if = "Option::is_none")] pub finding: Option, + /// Campaign-state semantic data (present when this hexad is a lifecycle + /// update — PR registration, dismissal, poll — issue #33 S2). + #[serde(skip_serializing_if = "Option::is_none")] + pub campaign: Option, +} + +/// Campaign-state facet of a hexad: tracks the lifecycle of a single +/// finding (issue #33 S2). +/// +/// Append-only: each `register-pr` / `dismiss` / `poll` emits a fresh +/// hexad with the same `finding_id` subject. `status` aggregates by +/// taking the newest by `created_at`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CampaignSemantic { + /// Subject — must match a `FindingSemantic.finding_id` written by S1. + pub finding_id: String, + /// State label. Canonical values: "open", "pr-filed", "pr-merged", + /// "pr-closed", "dismissed". Free-form so future states can be added + /// without a schema bump (forward-compatible by design). + pub state: String, + /// PR URL when `state` is `pr-*`. + #[serde(skip_serializing_if = "Option::is_none")] + pub pr_url: Option, + /// Human-readable dismissal reason when `state == "dismissed"`. + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + /// ISO 8601 of the last PR-state poll (S2 follow-up sets this; S2 + /// initial doesn't poll). + #[serde(skip_serializing_if = "Option::is_none")] + pub last_polled: Option, } /// Semantic facets of a per-finding hexad (issue #33 S1). @@ -221,6 +251,7 @@ fn build_hexad(report: &AssaultReport) -> Result { categories, migration, finding: None, + campaign: None, }, document, }) @@ -343,6 +374,7 @@ fn build_assemblyline_hexad( categories, migration: None, finding: None, + campaign: None, }, document, }) @@ -489,6 +521,7 @@ pub fn build_finding_hexads( last_seen_run: run_id.clone(), framework: None, }), + campaign: None, }, document, }); @@ -512,6 +545,121 @@ fn write_finding_hexads(hexads: &[PanicAttackHexad], base_dir: &Path) -> Result< Ok(written) } +// --------------------------------------------------------------------------- +// Issue #33 S2 — campaign-state hexad write/load helpers +// --------------------------------------------------------------------------- + +/// Maximum size (in bytes) of a single hexad JSON file we'll load from +/// disk. Hexads are small documents; anything past 16 MiB is corrupted +/// or hostile. +const HEXAD_FILE_READ_LIMIT: u64 = 16 * 1024 * 1024; + +/// Build a campaign-state hexad for one lifecycle event (issue #33 S2). +/// +/// Append-only: each call produces a fresh hexad with a unique id. The +/// `finding_id` is carried as the semantic subject so the newest hexad +/// per finding is the current state. +pub fn build_campaign_hexad(semantic: CampaignSemantic) -> PanicAttackHexad { + let now = Utc::now(); + let hexad_id = format!( + "pa-campaign-{}-{}", + now.format("%Y%m%d%H%M%S"), + &uuid_from_timestamp(now.timestamp_millis()) + ); + + PanicAttackHexad { + schema: "verisimdb.hexad.v1".to_string(), + id: hexad_id, + created_at: now.to_rfc3339(), + provenance: HexadProvenance { + tool: "panic-attack".to_string(), + version: env!("CARGO_PKG_VERSION").to_string(), + program_path: "campaign".to_string(), + language: "n/a".to_string(), + attestation_hash: None, + }, + semantic: HexadSemantic { + total_weak_points: 0, + critical_count: 0, + high_count: 0, + total_crashes: 0, + robustness_score: 0.0, + categories: Vec::new(), + migration: None, + finding: None, + campaign: Some(semantic), + }, + document: serde_json::Value::Null, + } +} + +/// Write a single campaign-state hexad under +/// `/hexads/campaign/.json`. Returns the path. +pub fn write_campaign_hexad(hexad: &PanicAttackHexad, base_dir: &Path) -> Result { + let dir = base_dir.join("hexads").join("campaign"); + fs::create_dir_all(&dir)?; + let path = dir.join(format!("{}.json", hexad.id)); + fs::write(&path, serde_json::to_string_pretty(hexad)?)?; + Ok(path) +} + +/// Load every JSON hexad file from a directory. +/// +/// Files that fail to parse are silently skipped — this is a "best +/// effort" reader used by status/query subcommands, not a validation +/// pass. Returns hexads in filesystem-order (the caller sorts as needed). +fn load_hexad_dir(dir: &Path) -> Result> { + use std::io::Read; + + if !dir.exists() { + return Ok(Vec::new()); + } + let mut hexads = Vec::new(); + for entry in fs::read_dir(dir)?.flatten() { + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) != Some("json") { + continue; + } + let mut content = String::new(); + let Ok(file) = fs::File::open(&path) else { + continue; + }; + if file + .take(HEXAD_FILE_READ_LIMIT) + .read_to_string(&mut content) + .is_err() + { + continue; + } + if let Ok(hexad) = serde_json::from_str::(&content) { + hexads.push(hexad); + } + } + Ok(hexads) +} + +/// Load every per-finding hexad from `/hexads/findings/`. +pub fn load_finding_hexads(base_dir: &Path) -> Result> { + load_hexad_dir(&base_dir.join("hexads").join("findings")) +} + +/// Load every campaign-state hexad from `/hexads/campaign/`. +pub fn load_campaign_hexads(base_dir: &Path) -> Result> { + load_hexad_dir(&base_dir.join("hexads").join("campaign")) +} + +/// Load every aggregate (per-run) hexad from `/hexads/`. +/// +/// Aggregate hexads live at the top-level `hexads/` directory; per-finding +/// and per-campaign hexads live in subdirs and are excluded here. +/// +/// Reserved for S3 query — kept public so the upcoming `query` subcommand +/// can compose it with the per-finding / per-campaign loaders. +#[allow(dead_code)] +pub fn load_aggregate_hexads(base_dir: &Path) -> Result> { + load_hexad_dir(&base_dir.join("hexads")) +} + /// Persist an assemblyline report to storage (filesystem and/or verisimdb). /// /// This is the batch-scan counterpart to `persist_report()` — it stores From 68aa30f661fbf129f96d76b38f5054c7aefa7cae Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Tue, 26 May 2026 13:46:54 +0100 Subject: [PATCH 3/3] feat(sweep-tracker): hierarchical estate-sweep Markdown report MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `panic-attack sweep-tracker` subcommand that renders an issue-#32 shaped sweep tracker by joining per-finding hexads (issue #33 S1) with campaign-state hexads (issue #33 S2). Distinct from `campaign status`: that is a flat per-finding table; this is a hierarchical checklist grouped by repo and/or category, with an estate-wide summary header. Flags: - `--verisimdb-dir DIR` : hexad store root (default `verisimdb-data`) - `--output FILE` : write Markdown to file instead of stdout - `--by-repo` : emit only the "By repo" section - `--by-category` : emit only the "By category" section - no flag : emit both sections (default) Output is deterministic — repos alphabetically, findings within each repo sorted by (rule_id, file, line). A finding with no campaign hexad shows state `open`; with one, shows the latest state plus PR URL (rendered as `#` link) or dismissal reason. Tests cover empty store, by-repo grouping, by-category grouping, campaign-state join (open / pr-merged / dismissed), deterministic ordering, both-shape ordering, and PR-number label extraction. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lib.rs | 1 + src/main.rs | 47 +++ src/sweep_tracker/mod.rs | 678 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 726 insertions(+) create mode 100644 src/sweep_tracker/mod.rs diff --git a/src/lib.rs b/src/lib.rs index 8b52541..7e4f92f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,4 +35,5 @@ pub mod panll; pub mod report; pub mod signatures; pub mod storage; +pub mod sweep_tracker; pub mod types; diff --git a/src/main.rs b/src/main.rs index 7a127db..b81ef27 100644 --- a/src/main.rs +++ b/src/main.rs @@ -30,6 +30,7 @@ mod panll; mod report; mod signatures; mod storage; +mod sweep_tracker; mod types; extern crate walkdir; @@ -753,6 +754,30 @@ enum Commands { #[command(subcommand)] action: CampaignAction, }, + + /// Sweep-tracker: render an issue-#32-style estate-sweep Markdown report. + /// + /// Joins per-finding hexads (issue #33 S1) with campaign-state hexads + /// (issue #33 S2) and groups them by repo and/or category. Distinct + /// from `campaign status`: that is a flat per-finding table; this is + /// a hierarchical sweep checklist. + SweepTracker { + /// VeriSimDB data directory (default: `verisimdb-data`). + #[arg(long, value_name = "DIR", default_value = "verisimdb-data")] + verisimdb_dir: PathBuf, + + /// Write the Markdown to a file instead of stdout. + #[arg(short, long, value_name = "FILE")] + output: Option, + + /// Emit only the "By repo" section. + #[arg(long, group = "sweep_shape", default_value_t = false)] + by_repo: bool, + + /// Emit only the "By category" section. + #[arg(long, group = "sweep_shape", default_value_t = false)] + by_category: bool, + }, } #[derive(Subcommand)] @@ -2451,6 +2476,28 @@ fn run_main() -> Result<()> { return Ok(()); } + Commands::SweepTracker { + verisimdb_dir, + output, + by_repo, + by_category, + } => { + let shape = match (by_repo, by_category) { + (true, false) => sweep_tracker::ReportShape::ByRepo, + (false, true) => sweep_tracker::ReportShape::ByCategory, + _ => sweep_tracker::ReportShape::Both, + }; + let md = sweep_tracker::render_report(&verisimdb_dir, shape)?; + match output { + Some(path) => { + std::fs::write(&path, &md)?; + qprintln!(cli.quiet, "Sweep tracker written to {}", path.display()); + } + None => print!("{}", md), + } + return Ok(()); + } + Commands::Temporal { action } => { match action { TemporalAction::List { verisimdb_dir } => { diff --git a/src/sweep_tracker/mod.rs b/src/sweep_tracker/mod.rs new file mode 100644 index 0000000..253f38c --- /dev/null +++ b/src/sweep_tracker/mod.rs @@ -0,0 +1,678 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Estate-sweep tracker — Markdown report generator (issue #33 follow-up). +//! +//! Produces a hierarchical issue-#32-style sweep tracker by joining the +//! per-finding hexad store (issue #33 S1) with the campaign-state hexad +//! store (issue #33 S2). This is a *report* over the same data the +//! per-finding `panic-attack campaign status` table renders, but +//! organised the way an estate sweep is run: top-down by repo, and a +//! cross-cut by category. +//! +//! Distinguishing features vs `campaign::status_markdown`: +//! +//! - **Hierarchical**, not flat: grouped by repo and/or category. +//! - **Estate summary** up top — count of repos, criticals, PRs filed, +//! dismissed, and open-no-PR. +//! - **Always sourced from the finding store**: a finding with no +//! campaign hexad still appears (state `open`); the per-finding +//! table is campaign-driven and omits never-touched findings. +//! - **Deterministic**: repos alphabetically; findings within each +//! repo by `(rule_id, file, line)`; categories by rule_id. +//! +//! The intended workflow: +//! +//! ```text +//! panic-attack sweep-tracker --output sweep-tracker.md +//! ``` +//! +//! …producing a Markdown checklist that can be pasted into an +//! estate-sweep tracker issue (the issue-#32 shape). + +use crate::storage::{load_campaign_hexads, load_finding_hexads, CampaignSemantic}; +use anyhow::Result; +use chrono::Utc; +use std::collections::{BTreeMap, HashMap}; +use std::path::Path; + +/// Which sections of the report to emit. +/// +/// `Both` is the default and renders a "By repo" section followed by a +/// "By category" section, with a shared estate-summary header. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum ReportShape { + /// Group findings by repository name only. + ByRepo, + /// Group findings by category (rule id) only. + ByCategory, + /// Render both groupings, separated by an `## By category` heading. + #[default] + Both, +} + +/// One finding joined against its current campaign state, if any. +#[derive(Debug, Clone)] +struct JoinedRow { + repo_name: String, + file: String, + line: Option, + rule_id: String, + category: String, + severity: String, + finding_id: String, + /// Current campaign state. `"open"` if no campaign hexad exists. + state: String, + pr_url: Option, + dismissal_reason: Option, +} + +impl JoinedRow { + fn is_pr_filed(&self) -> bool { + matches!( + self.state.as_str(), + crate::campaign::state::PR_FILED + | crate::campaign::state::PR_MERGED + | crate::campaign::state::PR_CLOSED + ) + } + + fn is_dismissed(&self) -> bool { + self.state == crate::campaign::state::DISMISSED + } + + fn is_done(&self) -> bool { + matches!( + self.state.as_str(), + crate::campaign::state::PR_MERGED + | crate::campaign::state::PR_CLOSED + | crate::campaign::state::DISMISSED + ) + } + + fn is_open(&self) -> bool { + self.state == crate::campaign::state::OPEN + } + + fn is_critical(&self) -> bool { + self.severity.eq_ignore_ascii_case("critical") + } + + fn is_high(&self) -> bool { + self.severity.eq_ignore_ascii_case("high") + } + + /// Stable per-row sort key: `(rule_id, file, line)`. Ties broken by + /// finding_id so the order is fully deterministic. + fn sort_key(&self) -> (String, String, u32, String) { + ( + self.rule_id.clone(), + self.file.clone(), + self.line.unwrap_or(0), + self.finding_id.clone(), + ) + } +} + +/// Build the joined finding × campaign-state rows for `base_dir`. +/// +/// One row per finding hexad. Latest campaign hexad per `finding_id` +/// wins (matching `campaign::current_state`). Returned in unspecified +/// order; the renderer sorts per section. +fn collect_rows(base_dir: &Path) -> Result> { + let finding_hexads = load_finding_hexads(base_dir)?; + let mut campaign_hexads = load_campaign_hexads(base_dir)?; + campaign_hexads.sort_by(|a, b| a.created_at.cmp(&b.created_at)); + + // Newest campaign hexad per finding_id wins. + let mut latest_state: HashMap = HashMap::new(); + for h in &campaign_hexads { + if let Some(c) = h.semantic.campaign.as_ref() { + latest_state.insert(c.finding_id.clone(), c.clone()); + } + } + + let mut rows = Vec::new(); + for h in &finding_hexads { + let Some(f) = h.semantic.finding.as_ref() else { + continue; + }; + let (state, pr_url, dismissal_reason) = latest_state + .get(&f.finding_id) + .map(|c| (c.state.clone(), c.pr_url.clone(), c.reason.clone())) + .unwrap_or_else(|| (crate::campaign::state::OPEN.to_string(), None, None)); + rows.push(JoinedRow { + repo_name: f.repo_name.clone(), + file: f.file.clone(), + line: f.line, + rule_id: f.rule_id.clone(), + category: f.category.clone(), + severity: f.severity.clone(), + finding_id: f.finding_id.clone(), + state, + pr_url, + dismissal_reason, + }); + } + Ok(rows) +} + +/// Estate-wide summary counts for the header. +#[derive(Debug, Clone, Default)] +struct Summary { + total: usize, + repos: usize, + critical: usize, + high: usize, + pr_filed: usize, + dismissed: usize, + open: usize, +} + +impl Summary { + fn from_rows(rows: &[JoinedRow]) -> Self { + let mut repos: std::collections::BTreeSet<&str> = std::collections::BTreeSet::new(); + let mut s = Summary { + total: rows.len(), + ..Default::default() + }; + for r in rows { + repos.insert(r.repo_name.as_str()); + if r.is_critical() { + s.critical += 1; + } + if r.is_high() { + s.high += 1; + } + if r.is_pr_filed() { + s.pr_filed += 1; + } + if r.is_dismissed() { + s.dismissed += 1; + } + if r.is_open() { + s.open += 1; + } + } + s.repos = repos.len(); + s + } +} + +/// Render one row's trailing state/pr/reason marker, e.g. +/// `pr-merged ([#42](https://...))`, `dismissed (test scaffold)`, or +/// `open`. +fn render_state_marker(row: &JoinedRow) -> String { + if let Some(url) = row.pr_url.as_deref() { + // Best-effort `#` extraction for compactness; falls back to + // the URL itself when the trailing segment isn't a number. + let label = pr_number_label(url).unwrap_or_else(|| url.to_string()); + format!("{} ([{}]({}))", row.state, label, url) + } else if let Some(reason) = row.dismissal_reason.as_deref() { + format!("{} ({})", row.state, reason) + } else { + row.state.clone() + } +} + +/// Extract a `#` label from a PR URL like +/// `https://github.com/org/repo/pull/42`. Returns `None` when the URL +/// doesn't end in a numeric path segment. +fn pr_number_label(url: &str) -> Option { + let trimmed = url.trim_end_matches('/'); + let tail = trimmed.rsplit('/').next()?; + let num: u64 = tail.parse().ok()?; + Some(format!("#{}", num)) +} + +/// `file:line` shorthand for inline display. +fn location_str(row: &JoinedRow) -> String { + match row.line { + Some(n) if n > 0 => format!("{}:{}", row.file, n), + _ => row.file.clone(), + } +} + +fn checkbox(row: &JoinedRow) -> &'static str { + if row.is_done() { + "[x]" + } else { + "[ ]" + } +} + +fn render_by_repo_section(rows: &[JoinedRow]) -> String { + let mut by_repo: BTreeMap> = BTreeMap::new(); + for r in rows { + by_repo.entry(r.repo_name.clone()).or_default().push(r); + } + + let mut out = String::new(); + out.push_str("## By repo\n\n"); + if by_repo.is_empty() { + out.push_str("_No findings recorded._\n\n"); + return out; + } + for (repo, mut repo_rows) in by_repo { + repo_rows.sort_by_key(|r| r.sort_key()); + let critical = repo_rows.iter().filter(|r| r.is_critical()).count(); + out.push_str(&format!( + "### {} ({} findings, {} critical)\n\n", + repo, + repo_rows.len(), + critical, + )); + for r in repo_rows { + out.push_str(&format!( + "- {} {} {} — {}\n", + checkbox(r), + r.rule_id, + location_str(r), + render_state_marker(r), + )); + } + out.push('\n'); + } + out +} + +fn render_by_category_section(rows: &[JoinedRow]) -> String { + let mut by_category: BTreeMap<(String, String), Vec<&JoinedRow>> = BTreeMap::new(); + for r in rows { + by_category + .entry((r.rule_id.clone(), r.category.clone())) + .or_default() + .push(r); + } + + let mut out = String::new(); + out.push_str("## By category\n\n"); + if by_category.is_empty() { + out.push_str("_No findings recorded._\n\n"); + return out; + } + for ((rule_id, category), mut cat_rows) in by_category { + cat_rows.sort_by_key(|r| (r.repo_name.clone(), r.sort_key())); + let repo_set: std::collections::BTreeSet<&str> = + cat_rows.iter().map(|r| r.repo_name.as_str()).collect(); + out.push_str(&format!( + "### {} {} ({} findings across {} repos)\n\n", + rule_id, + category, + cat_rows.len(), + repo_set.len(), + )); + for r in cat_rows { + out.push_str(&format!( + "- {} {} {} — {}\n", + checkbox(r), + r.repo_name, + location_str(r), + render_state_marker(r), + )); + } + out.push('\n'); + } + out +} + +fn render_header(rows: &[JoinedRow]) -> String { + let now = Utc::now().to_rfc3339(); + let s = Summary::from_rows(rows); + let mut out = String::new(); + out.push_str("# Estate sweep tracker\n\n"); + out.push_str(&format!("_Generated {}_\n\n", now)); + if s.total == 0 { + out.push_str("_No findings recorded yet — the per-finding hexad store is empty._\n\n"); + return out; + } + out.push_str(&format!( + "**Estate summary**: {} findings across {} repos ({} critical, {} high). \ + {} PR-filed, {} dismissed, {} open (no PR).\n\n", + s.total, s.repos, s.critical, s.high, s.pr_filed, s.dismissed, s.open, + )); + out +} + +/// Render an estate-sweep tracker Markdown report. +/// +/// Reads the per-finding and campaign hexad stores under `base_dir`, +/// joins them, and emits a Markdown document shaped after the +/// issue-#32 tracker checklist. +pub fn render_report(base_dir: &Path, shape: ReportShape) -> Result { + let rows = collect_rows(base_dir)?; + let mut out = render_header(&rows); + if rows.is_empty() { + // Header already announced the empty-store case; skip per-section + // headings entirely so the document doesn't carry confusing + // "## By repo / _No findings recorded._" stanzas. + return Ok(out); + } + match shape { + ReportShape::ByRepo => out.push_str(&render_by_repo_section(&rows)), + ReportShape::ByCategory => out.push_str(&render_by_category_section(&rows)), + ReportShape::Both => { + out.push_str(&render_by_repo_section(&rows)); + out.push_str(&render_by_category_section(&rows)); + } + } + Ok(out) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::campaign; + use crate::storage::{ + CampaignSemantic, FindingSemantic, HexadProvenance, HexadSemantic, PanicAttackHexad, + }; + use std::fs; + use std::path::PathBuf; + use tempfile::tempdir; + + fn write_finding_hexad(base_dir: &Path, finding: FindingSemantic) -> PathBuf { + let dir = base_dir.join("hexads").join("findings"); + fs::create_dir_all(&dir).unwrap(); + // Sanitise filename-hostile chars from the finding id; the on-disk + // hexad id is decorative (the join key lives in `semantic.finding`). + let safe_id: String = finding + .finding_id + .chars() + .map(|c| { + if matches!(c, ':' | '/' | '\\') { + '_' + } else { + c + } + }) + .collect(); + let id = format!("pa-finding-test-{}", safe_id); + let hexad = PanicAttackHexad { + schema: "verisimdb.hexad.v1".to_string(), + id: id.clone(), + created_at: "2026-05-26T12:00:00Z".to_string(), + provenance: HexadProvenance { + tool: "panic-attack".to_string(), + version: "test".to_string(), + program_path: format!("/tmp/{}", finding.repo_name), + language: "Rust".to_string(), + attestation_hash: None, + }, + semantic: HexadSemantic { + total_weak_points: 1, + critical_count: if finding.severity == "critical" { 1 } else { 0 }, + high_count: if finding.severity == "high" { 1 } else { 0 }, + total_crashes: 0, + robustness_score: 0.0, + categories: vec![finding.category.clone()], + migration: None, + finding: Some(finding), + campaign: None, + }, + document: serde_json::Value::Null, + }; + let path = dir.join(format!("{}.json", id)); + fs::write(&path, serde_json::to_string_pretty(&hexad).unwrap()).unwrap(); + path + } + + fn write_campaign_hexad_with_id(base_dir: &Path, id: &str, semantic: CampaignSemantic) { + let dir = base_dir.join("hexads").join("campaign"); + fs::create_dir_all(&dir).unwrap(); + let hexad = PanicAttackHexad { + schema: "verisimdb.hexad.v1".to_string(), + id: id.to_string(), + created_at: format!("2026-05-26T12:00:{:02}Z", id.len() % 60), + provenance: HexadProvenance { + tool: "panic-attack".to_string(), + version: "test".to_string(), + program_path: "campaign".to_string(), + language: "n/a".to_string(), + attestation_hash: None, + }, + semantic: HexadSemantic { + total_weak_points: 0, + critical_count: 0, + high_count: 0, + total_crashes: 0, + robustness_score: 0.0, + categories: Vec::new(), + migration: None, + finding: None, + campaign: Some(semantic), + }, + document: serde_json::Value::Null, + }; + let path = dir.join(format!("{}.json", id)); + fs::write(&path, serde_json::to_string_pretty(&hexad).unwrap()).unwrap(); + } + + fn sample_finding( + repo: &str, + file: &str, + line: u32, + rule_id: &str, + category: &str, + severity: &str, + ) -> FindingSemantic { + FindingSemantic { + finding_id: format!("finding:{}:{}:{}:{}", repo, file, line, category), + repo_name: repo.to_string(), + file: file.to_string(), + line: Some(line), + category: category.to_string(), + rule_id: rule_id.to_string(), + rule_name: rule_id.to_lowercase(), + severity: severity.to_string(), + description: format!("sample finding {}:{}:{}", repo, file, line), + first_seen_run: "run-test".to_string(), + last_seen_run: "run-test".to_string(), + framework: None, + } + } + + #[test] + fn empty_store_yields_empty_marker() { + let dir = tempdir().unwrap(); + let report = render_report(dir.path(), ReportShape::Both).unwrap(); + assert!(report.starts_with("# Estate sweep tracker")); + assert!(report.contains("_No findings recorded yet")); + // Neither section should appear when there are no findings. + assert!(!report.contains("## By repo")); + assert!(!report.contains("## By category")); + } + + #[test] + fn by_repo_groups_and_summarises() { + let dir = tempdir().unwrap(); + write_finding_hexad( + dir.path(), + sample_finding("alpha", "src/lib.rs", 23, "PA001", "PanicPath", "critical"), + ); + write_finding_hexad( + dir.path(), + sample_finding("alpha", "src/ffi.rs", 7, "PA004", "UnsafeCode", "high"), + ); + write_finding_hexad( + dir.path(), + sample_finding("beta", "src/auth.rs", 91, "PA022", "CryptoMisuse", "medium"), + ); + + let report = render_report(dir.path(), ReportShape::ByRepo).unwrap(); + assert!(report.contains("**Estate summary**: 3 findings across 2 repos")); + assert!(report.contains("(1 critical, 1 high)")); + assert!(report.contains("## By repo")); + assert!(report.contains("### alpha (2 findings, 1 critical)")); + assert!(report.contains("### beta (1 findings, 0 critical)")); + // Section absent when shape is ByRepo only. + assert!(!report.contains("## By category")); + + // Deterministic ordering: alpha before beta, and within alpha the + // PA001 finding sorts before PA004 (rule_id ascending). + let alpha_idx = report.find("### alpha").unwrap(); + let beta_idx = report.find("### beta").unwrap(); + assert!(alpha_idx < beta_idx); + let pa001_idx = report.find("PA001 src/lib.rs:23").unwrap(); + let pa004_idx = report.find("PA004 src/ffi.rs:7").unwrap(); + assert!(pa001_idx < pa004_idx); + } + + #[test] + fn by_category_groups_across_repos() { + let dir = tempdir().unwrap(); + write_finding_hexad( + dir.path(), + sample_finding("alpha", "src/a.rs", 1, "PA004", "UnsafeCode", "high"), + ); + write_finding_hexad( + dir.path(), + sample_finding("beta", "src/b.rs", 2, "PA004", "UnsafeCode", "high"), + ); + write_finding_hexad( + dir.path(), + sample_finding("alpha", "src/c.rs", 3, "PA001", "PanicPath", "medium"), + ); + + let report = render_report(dir.path(), ReportShape::ByCategory).unwrap(); + assert!(report.contains("## By category")); + assert!(report.contains("### PA001 PanicPath (1 findings across 1 repos)")); + assert!(report.contains("### PA004 UnsafeCode (2 findings across 2 repos)")); + assert!(!report.contains("## By repo")); + + // PA001 sorts before PA004. + let pa001 = report.find("### PA001").unwrap(); + let pa004 = report.find("### PA004").unwrap(); + assert!(pa001 < pa004); + } + + #[test] + fn campaign_state_join_renders_pr_url_and_dismissal() { + let dir = tempdir().unwrap(); + + let pr_finding = + sample_finding("alpha", "src/lib.rs", 23, "PA001", "PanicPath", "critical"); + let pr_finding_id = pr_finding.finding_id.clone(); + write_finding_hexad(dir.path(), pr_finding); + + let dismissed_finding = sample_finding( + "alpha", + "src/auth.rs", + 91, + "PA022", + "CryptoMisuse", + "medium", + ); + let dismissed_id = dismissed_finding.finding_id.clone(); + write_finding_hexad(dir.path(), dismissed_finding); + + let open_finding = sample_finding("alpha", "src/ffi.rs", 7, "PA004", "UnsafeCode", "high"); + write_finding_hexad(dir.path(), open_finding); + + // Manual write of campaign hexads to bypass timestamp collisions + // (two same-millisecond calls to `build_campaign_hexad` would + // produce identical hexad ids and the second would overwrite the + // first on disk). Each gets a deterministic, unique id here. + write_campaign_hexad_with_id( + dir.path(), + "pa-campaign-test-1", + CampaignSemantic { + finding_id: pr_finding_id.clone(), + state: campaign::state::PR_MERGED.to_string(), + pr_url: Some("https://github.com/example/alpha/pull/42".to_string()), + reason: None, + last_polled: None, + }, + ); + + write_campaign_hexad_with_id( + dir.path(), + "pa-campaign-test-2", + CampaignSemantic { + finding_id: dismissed_id.clone(), + state: campaign::state::DISMISSED.to_string(), + pr_url: None, + reason: Some("test scaffold".to_string()), + last_polled: None, + }, + ); + + let report = render_report(dir.path(), ReportShape::ByRepo).unwrap(); + + // Estate summary: 1 PR-filed (counts pr-merged too), 1 dismissed, 1 open. + assert!(report.contains("1 PR-filed, 1 dismissed, 1 open")); + + // PR row: checkbox ticked, state + GitHub-style #42 link. + assert!(report.contains( + "[x] PA001 src/lib.rs:23 — pr-merged ([#42](https://github.com/example/alpha/pull/42))" + )); + // Dismissal row: ticked, reason in parens. + assert!(report.contains("[x] PA022 src/auth.rs:91 — dismissed (test scaffold)")); + // Open row: empty checkbox, bare `open` marker. + assert!(report.contains("[ ] PA004 src/ffi.rs:7 — open")); + } + + #[test] + fn deterministic_ordering_within_repo() { + // Insert findings in a deliberately scrambled order; expect the + // report to sort them by (rule_id, file, line). + let dir = tempdir().unwrap(); + write_finding_hexad( + dir.path(), + sample_finding("zzz", "src/z.rs", 9, "PA004", "UnsafeCode", "low"), + ); + write_finding_hexad( + dir.path(), + sample_finding("alpha", "src/b.rs", 5, "PA001", "PanicPath", "low"), + ); + write_finding_hexad( + dir.path(), + sample_finding("alpha", "src/a.rs", 5, "PA001", "PanicPath", "low"), + ); + write_finding_hexad( + dir.path(), + sample_finding("alpha", "src/a.rs", 3, "PA001", "PanicPath", "low"), + ); + + let report = render_report(dir.path(), ReportShape::ByRepo).unwrap(); + + // Repos: alpha before zzz. + let alpha = report.find("### alpha").unwrap(); + let zzz = report.find("### zzz").unwrap(); + assert!(alpha < zzz); + + // Within alpha: a.rs:3 < a.rs:5 < b.rs:5. + let a3 = report.find("PA001 src/a.rs:3").unwrap(); + let a5 = report.find("PA001 src/a.rs:5").unwrap(); + let b5 = report.find("PA001 src/b.rs:5").unwrap(); + assert!(a3 < a5); + assert!(a5 < b5); + } + + #[test] + fn both_shape_emits_repo_then_category() { + let dir = tempdir().unwrap(); + write_finding_hexad( + dir.path(), + sample_finding("alpha", "src/a.rs", 1, "PA004", "UnsafeCode", "high"), + ); + let report = render_report(dir.path(), ReportShape::Both).unwrap(); + let repo_idx = report.find("## By repo").unwrap(); + let cat_idx = report.find("## By category").unwrap(); + assert!(repo_idx < cat_idx); + // Header still present. + assert!(report.contains("**Estate summary**")); + } + + #[test] + fn pr_number_label_handles_non_numeric_tail() { + assert_eq!( + pr_number_label("https://github.com/foo/bar/pull/42"), + Some("#42".to_string()) + ); + assert_eq!( + pr_number_label("https://github.com/foo/bar/pull/42/"), + Some("#42".to_string()) + ); + assert_eq!(pr_number_label("https://example.invalid/some/path"), None); + assert_eq!(pr_number_label(""), None); + } +}