From 070a9f8c7d743751b9967f15bf8fa6b52f88bc8d Mon Sep 17 00:00:00 2001
From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com>
Date: Tue, 26 May 2026 12:30:55 +0100
Subject: [PATCH 1/3] feat(storage): per-finding hexad emission (issue #33 S1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a per-WeakPoint hexad path to persist_assemblyline_report so a
batch scan can persist one hexad per finding in addition to the existing
aggregate hexad. Subject identity is `finding:<repo>:<file>:<line>:<category>`,
chosen for cross-run stability so the upcoming S2 (campaign register-pr)
and S3 (query) slices can join on it without diffing JSON.

New public surface:
- HexadSemantic gains an optional `finding: Option<FindingSemantic>`
  (additive, skip_serializing_if = none → existing consumers unaffected).
- FindingSemantic carries finding_id / repo / file / line / category /
  rule_id / rule_name / severity / description / first_seen_run /
  last_seen_run / framework. rule_id and rule_name reuse the canonical
  SARIF mapping (sarif.rs::rule_id / rule_name now pub(crate)).
- build_finding_hexads(report) -> Vec<PanicAttackHexad>.
- STORE_FINDING_HEXADS_ENV = "PANIC_ATTACK_STORE_FINDING_HEXADS" — when
  set non-empty AND StorageMode::VerisimDb is configured,
  persist_assemblyline_report writes one file per finding under
  `<dir>/hexads/findings/`.

Behaviour preserved:
- Default path unchanged (env var off → no per-finding writes).
- Aggregate hexad still emitted in every VerisimDb run.
- Suppressed WeakPoints are skipped, keeping the store aligned with
  fleet/CI counts.

S1 sets first_seen_run == last_seen_run; back-stamping from a prior
hexad is S2's job (per the issue), not S1's.

Tests: 7 new (id stability, category discrimination, count per WP,
suppression skip, canonical rule_id/name, file write + round-trip,
env-var default-off). Full suite: 215 lib + 13 + 16 + 6 + 12 + 3 + 7
+ 12 + 14 + 20 + 10 + 8 + 22 + 22 + 12 + 2 doc — all green. Clippy
clean with -D warnings.

Refs #33.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/report/sarif.rs |   4 +-
 src/storage/mod.rs  | 402 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 395 insertions(+), 11 deletions(-)
diff --git a/src/report/sarif.rs b/src/report/sarif.rs
index db7ca42..cd67a6d 100644
--- a/src/report/sarif.rs
+++ b/src/report/sarif.rs
@@ -112,7 +112,7 @@ pub struct SarifRegion {
 }
 
 /// Map WeakPointCategory to a stable rule ID
-fn rule_id(category: &WeakPointCategory) -> &'static str {
+pub(crate) fn rule_id(category: &WeakPointCategory) -> &'static str {
     match category {
         WeakPointCategory::UncheckedAllocation => "PA001",
         WeakPointCategory::UnboundedAllocation => "PA001b",
@@ -144,7 +144,7 @@ fn rule_id(category: &WeakPointCategory) -> &'static str {
 }
 
 /// Map WeakPointCategory to a human-readable name
-fn rule_name(category: &WeakPointCategory) -> &'static str {
+pub(crate) fn rule_name(category: &WeakPointCategory) -> &'static str {
     match category {
         WeakPointCategory::UncheckedAllocation => "unchecked-allocation",
         WeakPointCategory::UnboundedAllocation => "unbounded-allocation",
diff --git a/src/storage/mod.rs b/src/storage/mod.rs
index 91f0d02..4c43d08 100644
--- a/src/storage/mod.rs
+++ b/src/storage/mod.rs
@@ -88,6 +88,51 @@ pub struct HexadSemantic {
     /// Migration-specific semantic data (present when target is ReScript)
     #[serde(skip_serializing_if = "Option::is_none")]
     pub migration: Option<MigrationSemantic>,
+    /// Finding-level semantic data (present when this hexad represents a
+    /// single WeakPoint emitted by `build_finding_hexads`, issue #33 S1).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub finding: Option<FindingSemantic>,
+}
+
+/// Semantic facets of a per-finding hexad (issue #33 S1).
+///
+/// A per-finding hexad represents one `WeakPoint` from an assemblyline scan
+/// of one repository. The `finding_id` is stable across runs (same
+/// repo/file/line/category → same id), so subsequent slices (S2 PR-state
+/// tracking, S3 cross-repo query) can identify a finding without comparing
+/// JSON blobs.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FindingSemantic {
+    /// Stable per-finding identifier: `finding:<repo>:<file>:<line>:<category>`.
+    pub finding_id: String,
+    /// Repository name (basename of repo path).
+    pub repo_name: String,
+    /// File path, repo-relative.
+    pub file: String,
+    /// Line number from the original `WeakPoint`, if available.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub line: Option<u32>,
+    /// `WeakPointCategory` Debug name (e.g. "UnsafeCode").
+    pub category: String,
+    /// Stable rule ID (e.g. "PA004"). Mirrors the SARIF rule mapping.
+    pub rule_id: String,
+    /// Human-readable rule slug (e.g. "unsafe-code"). Mirrors SARIF.
+    pub rule_name: String,
+    /// Severity label (lowercase: "critical", "high", "medium", "low").
+    pub severity: String,
+    /// Per-finding description from the `WeakPoint`.
+    pub description: String,
+    /// Run id of the *current* run (also written to `last_seen_run`).
+    ///
+    /// S1 sets `first_seen_run == last_seen_run`. A later slice (S2 or a
+    /// query-side aggregation in S3) is responsible for back-stamping
+    /// `first_seen_run` from a prior hexad with the same `finding_id`.
+    pub first_seen_run: String,
+    /// Run id of the run that emitted this hexad.
+    pub last_seen_run: String,
+    /// Framework hint, when derivable. Reserved for future enrichment.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub framework: Option<String>,
 }
 
 /// Migration-specific semantic data for VeriSimDB hexads
@@ -175,6 +220,7 @@ fn build_hexad(report: &AssaultReport) -> Result<PanicAttackHexad> {
             robustness_score: report.overall_assessment.robustness_score,
             categories,
             migration,
+            finding: None,
         },
         document,
     })
@@ -296,11 +342,176 @@ fn build_assemblyline_hexad(
             robustness_score: 0.0,
             categories,
             migration: None,
+            finding: None,
         },
         document,
     })
 }
 
+/// Env var that opts a run into per-finding hexad emission (issue #33 S1).
+///
+/// When set to a non-empty value AND `StorageMode::VerisimDb` is configured,
+/// `persist_assemblyline_report` writes one hexad per `WeakPoint` under
+/// `<dir>/hexads/findings/` in addition to the existing aggregate hexad.
+pub const STORE_FINDING_HEXADS_ENV: &str = "PANIC_ATTACK_STORE_FINDING_HEXADS";
+
+/// Return `true` when per-finding hexad emission is requested via env var.
+fn finding_hexads_enabled() -> bool {
+    std::env::var(STORE_FINDING_HEXADS_ENV)
+        .map(|v| !v.is_empty() && v != "0" && !v.eq_ignore_ascii_case("false"))
+        .unwrap_or(false)
+}
+
+/// Build the stable finding-id for a `WeakPoint`.
+///
+/// Pattern: `finding:<repo>:<file>:<line>:<category>` — chosen so that two
+/// scans of the same repo see the same id for the same finding, which is
+/// the property S2 (`campaign register-pr`) and S3 (`query`) need.
+///
+/// File and line components fall back to literal `"unknown"` / `"0"` when
+/// the underlying `WeakPoint` lacks them, so the id is always well-formed.
+fn build_finding_id(repo_name: &str, wp: &crate::types::WeakPoint) -> String {
+    let file = wp
+        .file
+        .clone()
+        .or_else(|| wp.location.clone())
+        .unwrap_or_else(|| "unknown".to_string());
+    let line = wp
+        .line
+        .map(|n| n.to_string())
+        .unwrap_or_else(|| "0".to_string());
+    format!("finding:{}:{}:{}:{:?}", repo_name, file, line, wp.category)
+}
+
+/// Map `Severity` to a lowercase string label.
+fn severity_label(severity: &crate::types::Severity) -> &'static str {
+    match severity {
+        crate::types::Severity::Critical => "critical",
+        crate::types::Severity::High => "high",
+        crate::types::Severity::Medium => "medium",
+        crate::types::Severity::Low => "low",
+    }
+}
+
+/// Build one hexad per `WeakPoint` across all repo results in an
+/// assemblyline report (issue #33 S1).
+///
+/// Subject identity lives in `semantic.finding.finding_id`; each emitted
+/// hexad's top-level `id` remains per-run-unique so two runs of the same
+/// finding produce two distinct hexad files (the join key is the
+/// `finding_id`, not the hexad id).
+///
+/// `run_id` is shared across every finding-hexad in this run and stamped
+/// into both `first_seen_run` and `last_seen_run` (S1 has no prior-run
+/// lookup; that's a follow-up slice's job).
+pub fn build_finding_hexads(
+    report: &crate::assemblyline::AssemblylineReport,
+) -> Result<Vec<PanicAttackHexad>> {
+    let now = Utc::now();
+    let run_id = format!(
+        "pa-asmline-{}-{}",
+        now.format("%Y%m%d%H%M%S"),
+        &uuid_from_timestamp(now.timestamp_millis())
+    );
+
+    let mut hexads = Vec::new();
+    for (repo_idx, result) in report.results.iter().enumerate() {
+        let Some(assail_report) = &result.report else {
+            continue;
+        };
+        let language = format!("{:?}", assail_report.language);
+
+        for (wp_idx, wp) in assail_report.weak_points.iter().enumerate() {
+            // Skip suppressed findings — they're audit-only, not lifecycle
+            // material. Keeps the hexad store aligned with fleet/CI counts.
+            if wp.suppressed {
+                continue;
+            }
+
+            let finding_id = build_finding_id(&result.repo_name, wp);
+            let category_str = format!("{:?}", wp.category);
+            let rule_id_str = crate::report::sarif::rule_id(&wp.category).to_string();
+            let rule_name_str = crate::report::sarif::rule_name(&wp.category).to_string();
+            let severity_str = severity_label(&wp.severity).to_string();
+
+            // Per-hexad id: pa-finding-<run_ts>-<repo_idx>-<wp_idx>-<short>.
+            // Repo/wp indices keep collision-free even within a millisecond.
+            let hexad_id = format!(
+                "pa-finding-{}-{}-{}-{}",
+                now.format("%Y%m%d%H%M%S"),
+                repo_idx,
+                wp_idx,
+                &uuid_from_timestamp(now.timestamp_millis()),
+            );
+
+            let document = serde_json::json!({
+                "finding_id": finding_id,
+                "repo_name": result.repo_name,
+                "repo_path": result.repo_path.display().to_string(),
+                "weak_point": wp,
+            });
+
+            hexads.push(PanicAttackHexad {
+                schema: "verisimdb.hexad.v1".to_string(),
+                id: hexad_id,
+                created_at: now.to_rfc3339(),
+                provenance: HexadProvenance {
+                    tool: "panic-attack".to_string(),
+                    version: env!("CARGO_PKG_VERSION").to_string(),
+                    program_path: result.repo_path.display().to_string(),
+                    language: language.clone(),
+                    attestation_hash: None,
+                },
+                semantic: HexadSemantic {
+                    total_weak_points: 1,
+                    critical_count: matches!(wp.severity, crate::types::Severity::Critical)
+                        as usize,
+                    high_count: matches!(wp.severity, crate::types::Severity::High) as usize,
+                    total_crashes: 0,
+                    robustness_score: 0.0,
+                    categories: vec![category_str.clone()],
+                    migration: None,
+                    finding: Some(FindingSemantic {
+                        finding_id: finding_id.clone(),
+                        repo_name: result.repo_name.clone(),
+                        file: wp
+                            .file
+                            .clone()
+                            .or_else(|| wp.location.clone())
+                            .unwrap_or_else(|| "unknown".to_string()),
+                        line: wp.line,
+                        category: category_str,
+                        rule_id: rule_id_str,
+                        rule_name: rule_name_str,
+                        severity: severity_str,
+                        description: wp.description.clone(),
+                        first_seen_run: run_id.clone(),
+                        last_seen_run: run_id.clone(),
+                        framework: None,
+                    }),
+                },
+                document,
+            });
+        }
+    }
+
+    Ok(hexads)
+}
+
+/// Write a slice of hexads under `<base_dir>/hexads/findings/` (one file
+/// per hexad). Returns the paths written.
+fn write_finding_hexads(hexads: &[PanicAttackHexad], base_dir: &Path) -> Result<Vec<PathBuf>> {
+    let dir = base_dir.join("hexads").join("findings");
+    fs::create_dir_all(&dir)?;
+    let mut written = Vec::with_capacity(hexads.len());
+    for hexad in hexads {
+        let path = dir.join(format!("{}.json", hexad.id));
+        fs::write(&path, serde_json::to_string_pretty(hexad)?)?;
+        written.push(path);
+    }
+    Ok(written)
+}
+
 /// Persist an assemblyline report to storage (filesystem and/or verisimdb).
 ///
 /// This is the batch-scan counterpart to `persist_report()` — it stores
@@ -327,19 +538,16 @@ pub fn persist_assemblyline_report(
 
     if modes.contains(&StorageMode::VerisimDb) {
         let hexad = build_assemblyline_hexad(report)?;
+        let base_dir = directory
+            .map(Path::to_path_buf)
+            .unwrap_or_else(|| PathBuf::from("verisimdb-data"));
 
         #[cfg(feature = "http")]
         {
             if std::env::var("VERISIMDB_URL").is_ok() {
-                let base_dir = directory
-                    .map(Path::to_path_buf)
-                    .unwrap_or_else(|| PathBuf::from("verisimdb-data"));
                 let mut http_paths = push_hexad_with_fallback(&hexad, &base_dir)?;
                 stored.append(&mut http_paths);
             } else {
-                let base_dir = directory
-                    .map(Path::to_path_buf)
-                    .unwrap_or_else(|| PathBuf::from("verisimdb-data"));
                 let hexad_dir = base_dir.join("hexads");
                 fs::create_dir_all(&hexad_dir)?;
                 let path = hexad_dir.join(format!("{}.json", hexad.id));
@@ -349,15 +557,21 @@ pub fn persist_assemblyline_report(
         }
         #[cfg(not(feature = "http"))]
         {
-            let base_dir = directory
-                .map(Path::to_path_buf)
-                .unwrap_or_else(|| PathBuf::from("verisimdb-data"));
             let hexad_dir = base_dir.join("hexads");
             fs::create_dir_all(&hexad_dir)?;
             let path = hexad_dir.join(format!("{}.json", hexad.id));
             fs::write(&path, serde_json::to_string_pretty(&hexad)?)?;
             stored.push(path);
         }
+
+        // Per-finding hexads (issue #33 S1) — additive, env-var gated, and
+        // always file-side for now. HTTP push for finding hexads is left
+        // to S3/query path so we don't add chattiness to the API mid-S1.
+        if finding_hexads_enabled() {
+            let finding_hexads = build_finding_hexads(report)?;
+            let mut paths = write_finding_hexads(&finding_hexads, &base_dir)?;
+            stored.append(&mut paths);
+        }
     }
 
     Ok(stored)
@@ -774,4 +988,174 @@ mod tests {
         assert_eq!("disk".parse::<StorageMode>(), Ok(StorageMode::Filesystem));
         assert_eq!("bogus".parse::<StorageMode>(), Err(()));
     }
+
+    // ----- Issue #33 S1: per-finding hexad tests -----------------------
+
+    use crate::assemblyline::{AssemblylineReport, RepoResult};
+    use crate::types::{
+        AssailReport, Language, ProgramStatistics, Severity, WeakPoint, WeakPointCategory,
+    };
+    use std::path::PathBuf;
+
+    fn sample_weak_point(file: &str, line: u32, category: WeakPointCategory) -> WeakPoint {
+        WeakPoint {
+            category,
+            location: Some(format!("{}:{}", file, line)),
+            file: Some(file.to_string()),
+            line: Some(line),
+            severity: Severity::High,
+            description: format!("test finding at {}:{}", file, line),
+            recommended_attack: Vec::new(),
+            suppressed: false,
+        }
+    }
+
+    fn sample_assemblyline(repo: &str, wps: Vec<WeakPoint>) -> AssemblylineReport {
+        let assail = AssailReport {
+            schema_version: "2.5".to_string(),
+            program_path: PathBuf::from(format!("/tmp/{}", repo)),
+            language: Language::Rust,
+            frameworks: Vec::new(),
+            weak_points: wps,
+            statistics: ProgramStatistics::default(),
+            file_statistics: Vec::new(),
+            recommended_attacks: Vec::new(),
+            dependency_graph: Default::default(),
+            taint_matrix: Default::default(),
+            migration_metrics: None,
+            suppressed_count: 0,
+        };
+        AssemblylineReport {
+            schema_version: "2.5".to_string(),
+            created_at: "2026-05-26T00:00:00Z".to_string(),
+            directory: PathBuf::from("/tmp"),
+            repos_scanned: 1,
+            repos_with_findings: 1,
+            repos_skipped: 0,
+            total_weak_points: assail.weak_points.len(),
+            total_critical: 0,
+            results: vec![RepoResult {
+                repo_path: PathBuf::from(format!("/tmp/{}", repo)),
+                repo_name: repo.to_string(),
+                weak_point_count: assail.weak_points.len(),
+                critical_count: 0,
+                high_count: assail.weak_points.len(),
+                total_files: 1,
+                total_lines: 10,
+                error: None,
+                fingerprint: None,
+                report: Some(assail),
+            }],
+        }
+    }
+
+    #[test]
+    fn build_finding_id_stable_per_finding() {
+        let wp = sample_weak_point("src/main.rs", 42, WeakPointCategory::UnsafeCode);
+        let id_1 = build_finding_id("foo", &wp);
+        let id_2 = build_finding_id("foo", &wp);
+        assert_eq!(id_1, id_2);
+        assert_eq!(id_1, "finding:foo:src/main.rs:42:UnsafeCode");
+    }
+
+    #[test]
+    fn build_finding_id_differs_by_category() {
+        let wp1 = sample_weak_point("src/main.rs", 42, WeakPointCategory::UnsafeCode);
+        let wp2 = sample_weak_point("src/main.rs", 42, WeakPointCategory::PanicPath);
+        assert_ne!(build_finding_id("foo", &wp1), build_finding_id("foo", &wp2));
+    }
+
+    #[test]
+    fn build_finding_hexads_emits_one_per_weak_point() {
+        let report = sample_assemblyline(
+            "demo",
+            vec![
+                sample_weak_point("src/a.rs", 1, WeakPointCategory::UnsafeCode),
+                sample_weak_point("src/b.rs", 7, WeakPointCategory::PanicPath),
+                sample_weak_point("src/c.rs", 9, WeakPointCategory::CommandInjection),
+            ],
+        );
+        let hexads = build_finding_hexads(&report).expect("build ok");
+        assert_eq!(hexads.len(), 3);
+        for h in &hexads {
+            let f = h
+                .semantic
+                .finding
+                .as_ref()
+                .expect("each per-finding hexad must carry FindingSemantic");
+            assert!(f.finding_id.starts_with("finding:demo:"));
+            assert_eq!(f.repo_name, "demo");
+            assert_eq!(f.severity, "high");
+            assert!(!f.rule_id.is_empty());
+            assert_eq!(f.first_seen_run, f.last_seen_run);
+        }
+    }
+
+    #[test]
+    fn build_finding_hexads_skips_suppressed() {
+        let mut suppressed = sample_weak_point("src/a.rs", 1, WeakPointCategory::UnsafeCode);
+        suppressed.suppressed = true;
+        let report = sample_assemblyline(
+            "demo",
+            vec![
+                suppressed,
+                sample_weak_point("src/b.rs", 2, WeakPointCategory::PanicPath),
+            ],
+        );
+        let hexads = build_finding_hexads(&report).expect("build ok");
+        assert_eq!(hexads.len(), 1);
+        assert_eq!(
+            hexads[0].semantic.finding.as_ref().unwrap().category,
+            "PanicPath"
+        );
+    }
+
+    #[test]
+    fn build_finding_hexads_uses_canonical_rule_ids() {
+        let report = sample_assemblyline(
+            "demo",
+            vec![sample_weak_point(
+                "src/x.rs",
+                3,
+                WeakPointCategory::UnsafeCode,
+            )],
+        );
+        let hexads = build_finding_hexads(&report).expect("build ok");
+        let f = hexads[0].semantic.finding.as_ref().unwrap();
+        assert_eq!(f.rule_id, "PA004");
+        assert_eq!(f.rule_name, "unsafe-code");
+    }
+
+    #[test]
+    fn write_finding_hexads_writes_one_file_per_hexad() {
+        let dir = tempfile::tempdir().expect("tempdir");
+        let report = sample_assemblyline(
+            "demo",
+            vec![
+                sample_weak_point("src/a.rs", 1, WeakPointCategory::UnsafeCode),
+                sample_weak_point("src/b.rs", 2, WeakPointCategory::PanicPath),
+            ],
+        );
+        let hexads = build_finding_hexads(&report).expect("build ok");
+        let paths = write_finding_hexads(&hexads, dir.path()).expect("write ok");
+        assert_eq!(paths.len(), 2);
+        for p in &paths {
+            assert!(p.exists());
+            // sanity: parses back as a hexad
+            let content = std::fs::read_to_string(p).unwrap();
+            let parsed: PanicAttackHexad = serde_json::from_str(&content).unwrap();
+            assert!(parsed.semantic.finding.is_some());
+        }
+    }
+
+    #[test]
+    fn finding_hexads_disabled_by_default() {
+        // Snapshot+restore so we don't trample on parallel-test global state.
+        let original = std::env::var(STORE_FINDING_HEXADS_ENV).ok();
+        std::env::remove_var(STORE_FINDING_HEXADS_ENV);
+        assert!(!finding_hexads_enabled());
+        if let Some(v) = original {
+            std::env::set_var(STORE_FINDING_HEXADS_ENV, v);
+        }
+    }
 }

From d6e41dc9b985a6c44436db93dd2adfcc5e0775a0 Mon Sep 17 00:00:00 2001
From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com>
Date: Tue, 26 May 2026 12:43:46 +0100
Subject: [PATCH 2/3] feat(campaign): finding-lifecycle CLI + state hexads
 (issue #33 S2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the second slice of issue #33: a panic-attack campaign subcommand
that tracks the lifecycle of individual findings produced by the
assemblyline per-finding hexad path (S1). State is persisted as
campaign-facet hexads written under <dir>/hexads/campaign/, indexed by
finding_id, append-only — the current state per finding is the newest
campaign hexad with that finding_id as subject.

New surface:

- HexadSemantic gains `campaign: Option<CampaignSemantic>` (additive,
  skip_serializing_if = none).
- CampaignSemantic { finding_id, state, pr_url?, reason?, last_polled? }
  — state is a free-form String so future labels can be added without
  a schema bump.
- storage: build_campaign_hexad / write_campaign_hexad /
  load_{finding,campaign,aggregate}_hexads helpers.
- src/campaign/ module — register_pr, dismiss, current_state,
  status_markdown.
- panic-attack campaign register-pr|dismiss|status — CLI surface.

`status` renders a Markdown tracker matching the shape of the issue #32
manual checklist: summary line, table with finding-id, repo, rule_id,
location, state, PR link (or dismissal reason), last-event timestamp,
checkbox column.

Out of scope (S2b): poll subcommand that queries GitHub for PR-state
transitions. The data path is in place — the polling logic lands once
the rate-limit / pagination shape is settled.

Tests: 5 new in src/campaign/ (register, dismiss-overrides-open,
empty-arg rejection, empty-store status, two-row render). Full lib
suite: 220 green. Clippy clean with -D warnings. End-to-end CLI smoke
test green: register-pr + dismiss + status round-trip prints the
expected markdown.

Refs #33. Stacked on #55 (S1) — diff against main includes the S1
changes until S1 lands; this PR will rebase clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/campaign/mod.rs | 296 ++++++++++++++++++++++++++++++++++++++++++++
 src/lib.rs          |   1 +
 src/main.rs         |  97 +++++++++++++++
 src/storage/mod.rs  | 148 ++++++++++++++++++++++
 4 files changed, 542 insertions(+)
 create mode 100644 src/campaign/mod.rs

diff --git a/src/campaign/mod.rs b/src/campaign/mod.rs
new file mode 100644
index 0000000..62be968
--- /dev/null
+++ b/src/campaign/mod.rs
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: MPL-2.0
+
+//! Campaign-state orchestration (issue #33 S2).
+//!
+//! Tracks the lifecycle of individual findings emitted by the assemblyline
+//! per-finding hexad path (issue #33 S1):
+//!
+//! - `register_pr(finding_id, pr_url)` — opens a PR for a finding.
+//! - `dismiss(finding_id, reason)` — marks a finding parked / known-good /
+//!   intentionally-out-of-scope.
+//! - `status_markdown(base_dir)` — renders a Markdown tracker identical
+//!   in shape to the manual checklist used in issue #32.
+//!
+//! State is persisted as campaign-facet hexads written under
+//! `<base_dir>/hexads/campaign/`. The store is append-only: each call
+//! writes a *new* hexad. `status` derives the current state per
+//! `finding_id` by sorting all campaign hexads by `created_at` and
+//! keeping the newest one for each subject.
+//!
+//! Polling GitHub for PR-state updates is deferred to a follow-up slice
+//! (S2b) — this initial S2 focuses on the local lifecycle primitives so
+//! the campaign data can accumulate before the polling logic lands.
+
+use crate::storage::{
+    build_campaign_hexad, load_campaign_hexads, load_finding_hexads, write_campaign_hexad,
+    CampaignSemantic, PanicAttackHexad,
+};
+use anyhow::{anyhow, Result};
+use chrono::Utc;
+use std::collections::HashMap;
+use std::path::Path;
+
+/// Canonical state labels written into `CampaignSemantic.state`.
+///
+/// New variants can be added without breaking older readers — the field
+/// is a `String` on the wire (forward-compatible by design).
+pub mod state {
+    pub const OPEN: &str = "open";
+    pub const PR_FILED: &str = "pr-filed";
+    pub const PR_MERGED: &str = "pr-merged";
+    pub const PR_CLOSED: &str = "pr-closed";
+    pub const DISMISSED: &str = "dismissed";
+}
+
+/// Register an open PR against a known finding.
+///
+/// Writes a `pr-filed` campaign hexad to `<base_dir>/hexads/campaign/`.
+/// Returns the path written.
+pub fn register_pr(finding_id: &str, pr_url: &str, base_dir: &Path) -> Result<std::path::PathBuf> {
+    if finding_id.is_empty() {
+        return Err(anyhow!("finding_id must not be empty"));
+    }
+    if pr_url.is_empty() {
+        return Err(anyhow!("pr_url must not be empty"));
+    }
+    let hexad = build_campaign_hexad(CampaignSemantic {
+        finding_id: finding_id.to_string(),
+        state: state::PR_FILED.to_string(),
+        pr_url: Some(pr_url.to_string()),
+        reason: None,
+        last_polled: None,
+    });
+    write_campaign_hexad(&hexad, base_dir)
+}
+
+/// Dismiss a finding (parked, known-good, out-of-scope).
+///
+/// Writes a `dismissed` campaign hexad. Returns the path written.
+pub fn dismiss(finding_id: &str, reason: &str, base_dir: &Path) -> Result<std::path::PathBuf> {
+    if finding_id.is_empty() {
+        return Err(anyhow!("finding_id must not be empty"));
+    }
+    let hexad = build_campaign_hexad(CampaignSemantic {
+        finding_id: finding_id.to_string(),
+        state: state::DISMISSED.to_string(),
+        pr_url: None,
+        reason: Some(reason.to_string()),
+        last_polled: None,
+    });
+    write_campaign_hexad(&hexad, base_dir)
+}
+
+/// One row of the campaign tracker — current state of a finding.
+#[derive(Debug, Clone)]
+pub struct CampaignRow {
+    pub finding_id: String,
+    pub state: String,
+    pub pr_url: Option<String>,
+    pub reason: Option<String>,
+    pub last_event_at: String,
+    /// If the finding hexad is available, its repo name (for display).
+    pub repo_name: Option<String>,
+    /// Same — rule id (e.g. PA004).
+    pub rule_id: Option<String>,
+    /// Same — file:line summary.
+    pub location: Option<String>,
+}
+
+/// Compute the current campaign state for every finding seen, by
+/// folding the append-only hexad stream by `finding_id` and keeping the
+/// newest event.
+pub fn current_state(base_dir: &Path) -> Result<Vec<CampaignRow>> {
+    let mut campaign = load_campaign_hexads(base_dir)?;
+    campaign.sort_by(|a, b| a.created_at.cmp(&b.created_at));
+
+    // Index finding metadata by finding_id (latest wins, but for findings
+    // the schema is run-stable so any matching hexad will do).
+    let findings = load_finding_hexads(base_dir)?;
+    let mut finding_meta: HashMap<String, &PanicAttackHexad> = HashMap::new();
+    for h in &findings {
+        if let Some(f) = h.semantic.finding.as_ref() {
+            finding_meta.insert(f.finding_id.clone(), h);
+        }
+    }
+
+    let mut latest: HashMap<String, (String, CampaignSemantic)> = HashMap::new();
+    for h in campaign {
+        if let Some(c) = h.semantic.campaign.clone() {
+            latest.insert(c.finding_id.clone(), (h.created_at.clone(), c));
+        }
+    }
+
+    let mut rows: Vec<CampaignRow> = latest
+        .into_iter()
+        .map(|(_, (ts, c))| {
+            let (repo_name, rule_id, location) = finding_meta
+                .get(&c.finding_id)
+                .and_then(|h| h.semantic.finding.as_ref())
+                .map(|f| {
+                    (
+                        Some(f.repo_name.clone()),
+                        Some(f.rule_id.clone()),
+                        Some(format!(
+                            "{}:{}",
+                            f.file,
+                            f.line.map(|n| n.to_string()).unwrap_or_default()
+                        )),
+                    )
+                })
+                .unwrap_or((None, None, None));
+            CampaignRow {
+                finding_id: c.finding_id,
+                state: c.state,
+                pr_url: c.pr_url,
+                reason: c.reason,
+                last_event_at: ts,
+                repo_name,
+                rule_id,
+                location,
+            }
+        })
+        .collect();
+    rows.sort_by(|a, b| a.finding_id.cmp(&b.finding_id));
+    Ok(rows)
+}
+
+/// Render a Markdown tracker matching the shape used by issue #32.
+///
+/// Rows sorted by `finding_id`; checkbox `[x]` for merged/closed/dismissed,
+/// `[ ]` otherwise. State, PR link (or reason), and timestamp appear in
+/// columns. An ungrouped "Findings without campaign state" footer is
+/// omitted from S2 to keep the output small; S3 query is the right place
+/// to list "open work not yet PR'd".
+pub fn status_markdown(base_dir: &Path) -> Result<String> {
+    let rows = current_state(base_dir)?;
+    let now = Utc::now().to_rfc3339();
+    let mut out = String::new();
+    out.push_str(&format!(
+        "# Campaign tracker — `panic-attack`\n\n_Generated {now}_\n\n"
+    ));
+    if rows.is_empty() {
+        out.push_str("_No campaign state recorded yet._\n");
+        return Ok(out);
+    }
+
+    let merged_count = rows
+        .iter()
+        .filter(|r| matches!(r.state.as_str(), state::PR_MERGED | state::PR_CLOSED))
+        .count();
+    let open_count = rows
+        .iter()
+        .filter(|r| matches!(r.state.as_str(), state::PR_FILED | state::OPEN))
+        .count();
+    let dismissed_count = rows.iter().filter(|r| r.state == state::DISMISSED).count();
+    out.push_str(&format!(
+        "**Summary**: {} merged/closed, {} open, {} dismissed (total {}).\n\n",
+        merged_count,
+        open_count,
+        dismissed_count,
+        rows.len()
+    ));
+
+    out.push_str("| ☐ | Finding | Repo | Rule | Location | State | PR / Reason | Last event |\n");
+    out.push_str("|---|---------|------|------|----------|-------|-------------|------------|\n");
+    for r in rows {
+        let check = match r.state.as_str() {
+            state::PR_MERGED | state::PR_CLOSED | state::DISMISSED => "[x]",
+            _ => "[ ]",
+        };
+        let pr_or_reason = match (r.pr_url.as_deref(), r.reason.as_deref()) {
+            (Some(url), _) => format!("[PR]({url})"),
+            (None, Some(reason)) => reason.to_string(),
+            (None, None) => "—".to_string(),
+        };
+        out.push_str(&format!(
+            "| {} | `{}` | {} | {} | {} | {} | {} | {} |\n",
+            check,
+            r.finding_id,
+            r.repo_name.as_deref().unwrap_or("—"),
+            r.rule_id.as_deref().unwrap_or("—"),
+            r.location.as_deref().unwrap_or("—"),
+            r.state,
+            pr_or_reason,
+            r.last_event_at,
+        ));
+    }
+    Ok(out)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+
+    #[test]
+    fn register_pr_writes_hexad() {
+        let dir = tempdir().unwrap();
+        let path = register_pr(
+            "finding:demo:src/a.rs:1:UnsafeCode",
+            "https://example.invalid/pr/1",
+            dir.path(),
+        )
+        .expect("register ok");
+        assert!(path.exists());
+        let rows = current_state(dir.path()).unwrap();
+        assert_eq!(rows.len(), 1);
+        assert_eq!(rows[0].state, state::PR_FILED);
+        assert_eq!(
+            rows[0].pr_url.as_deref(),
+            Some("https://example.invalid/pr/1")
+        );
+    }
+
+    #[test]
+    fn dismiss_overrides_open() {
+        let dir = tempdir().unwrap();
+        let id = "finding:demo:src/a.rs:1:UnsafeCode";
+        register_pr(id, "https://example.invalid/pr/1", dir.path()).unwrap();
+        // Sleep a hair to ensure the second hexad's created_at sorts strictly later.
+        std::thread::sleep(std::time::Duration::from_millis(1100));
+        dismiss(id, "intentional sentinel", dir.path()).unwrap();
+        let rows = current_state(dir.path()).unwrap();
+        assert_eq!(rows.len(), 1, "one finding, latest state wins");
+        assert_eq!(rows[0].state, state::DISMISSED);
+        assert_eq!(rows[0].reason.as_deref(), Some("intentional sentinel"));
+    }
+
+    #[test]
+    fn register_pr_rejects_empty_args() {
+        let dir = tempdir().unwrap();
+        assert!(register_pr("", "https://example.invalid", dir.path()).is_err());
+        assert!(register_pr("finding:x:y:1:Z", "", dir.path()).is_err());
+    }
+
+    #[test]
+    fn status_markdown_handles_empty() {
+        let dir = tempdir().unwrap();
+        let md = status_markdown(dir.path()).unwrap();
+        assert!(md.contains("No campaign state recorded yet"));
+    }
+
+    #[test]
+    fn status_markdown_renders_rows() {
+        let dir = tempdir().unwrap();
+        register_pr(
+            "finding:alpha:src/a.rs:1:UnsafeCode",
+            "https://example.invalid/pr/1",
+            dir.path(),
+        )
+        .unwrap();
+        std::thread::sleep(std::time::Duration::from_millis(1100));
+        dismiss(
+            "finding:beta:src/b.rs:9:PanicPath",
+            "test coverage gap",
+            dir.path(),
+        )
+        .unwrap();
+        let md = status_markdown(dir.path()).unwrap();
+        assert!(md.contains("finding:alpha:src/a.rs:1:UnsafeCode"));
+        assert!(md.contains("finding:beta:src/b.rs:9:PanicPath"));
+        assert!(md.contains("pr-filed"));
+        assert!(md.contains("dismissed"));
+        assert!(md.contains("test coverage gap"));
+        assert!(md.contains("1 open, 1 dismissed"));
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index b5ca076..8b52541 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -26,6 +26,7 @@ pub mod attestation;
 pub mod axial;
 #[cfg(feature = "http")]
 pub mod bridge;
+pub mod campaign;
 pub mod i18n;
 pub mod kanren;
 pub mod mass_panic;
diff --git a/src/main.rs b/src/main.rs
index b31e0f4..7a127db 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -18,6 +18,7 @@ mod attestation;
 mod axial;
 #[cfg(feature = "http")]
 mod bridge;
+mod campaign;
 mod diagnostics;
 mod groove;
 mod i18n;
@@ -743,6 +744,15 @@ enum Commands {
         #[command(subcommand)]
         action: AttestAction,
     },
+
+    /// Campaign: lifecycle tracking for findings (register-pr, dismiss, status).
+    ///
+    /// Operates on the per-finding hexad store written by `assemblyline` when
+    /// `PANIC_ATTACK_STORE_FINDING_HEXADS=1` is set with verisimdb storage.
+    Campaign {
+        #[command(subcommand)]
+        action: CampaignAction,
+    },
 }
 
 #[derive(Subcommand)]
@@ -755,6 +765,46 @@ enum AttestAction {
     },
 }
 
+/// Campaign subcommands for finding-lifecycle tracking (issue #33 S2).
+#[derive(Subcommand)]
+enum CampaignAction {
+    /// Register an open PR against a known finding-id.
+    RegisterPr {
+        /// Finding id (e.g. `finding:demo:src/a.rs:1:UnsafeCode`).
+        #[arg(value_name = "FINDING_ID")]
+        finding_id: String,
+        /// PR URL (e.g. `https://github.com/org/repo/pull/123`).
+        #[arg(value_name = "PR_URL")]
+        pr_url: String,
+        /// VeriSimDB data directory (default: `verisimdb-data`).
+        #[arg(long, value_name = "DIR", default_value = "verisimdb-data")]
+        verisimdb_dir: PathBuf,
+    },
+
+    /// Mark a finding as dismissed (parked, known-good, out-of-scope).
+    Dismiss {
+        /// Finding id.
+        #[arg(value_name = "FINDING_ID")]
+        finding_id: String,
+        /// Short human-readable reason.
+        #[arg(value_name = "REASON")]
+        reason: String,
+        /// VeriSimDB data directory (default: `verisimdb-data`).
+        #[arg(long, value_name = "DIR", default_value = "verisimdb-data")]
+        verisimdb_dir: PathBuf,
+    },
+
+    /// Render a Markdown tracker of the current campaign state.
+    Status {
+        /// VeriSimDB data directory (default: `verisimdb-data`).
+        #[arg(long, value_name = "DIR", default_value = "verisimdb-data")]
+        verisimdb_dir: PathBuf,
+        /// Write the Markdown to a file instead of stdout.
+        #[arg(short, long, value_name = "FILE")]
+        output: Option<PathBuf>,
+    },
+}
+
 /// Patch Bridge subcommands for CVE lifecycle management.
 #[cfg(feature = "http")]
 #[derive(Subcommand)]
@@ -2354,6 +2404,53 @@ fn run_main() -> Result<()> {
             }
         },
 
+        Commands::Campaign { action } => {
+            match action {
+                CampaignAction::RegisterPr {
+                    finding_id,
+                    pr_url,
+                    verisimdb_dir,
+                } => {
+                    let path = campaign::register_pr(&finding_id, &pr_url, &verisimdb_dir)?;
+                    qprintln!(
+                        cli.quiet,
+                        "Registered PR {} for {} ({})",
+                        pr_url,
+                        finding_id,
+                        path.display()
+                    );
+                }
+                CampaignAction::Dismiss {
+                    finding_id,
+                    reason,
+                    verisimdb_dir,
+                } => {
+                    let path = campaign::dismiss(&finding_id, &reason, &verisimdb_dir)?;
+                    qprintln!(
+                        cli.quiet,
+                        "Dismissed {} ({}): {}",
+                        finding_id,
+                        reason,
+                        path.display()
+                    );
+                }
+                CampaignAction::Status {
+                    verisimdb_dir,
+                    output,
+                } => {
+                    let md = campaign::status_markdown(&verisimdb_dir)?;
+                    match output {
+                        Some(path) => {
+                            std::fs::write(&path, &md)?;
+                            qprintln!(cli.quiet, "Status written to {}", path.display());
+                        }
+                        None => print!("{}", md),
+                    }
+                }
+            }
+            return Ok(());
+        }
+
         Commands::Temporal { action } => {
             match action {
                 TemporalAction::List { verisimdb_dir } => {
diff --git a/src/storage/mod.rs b/src/storage/mod.rs
index 4c43d08..4a8c3e9 100644
--- a/src/storage/mod.rs
+++ b/src/storage/mod.rs
@@ -92,6 +92,36 @@ pub struct HexadSemantic {
     /// single WeakPoint emitted by `build_finding_hexads`, issue #33 S1).
     #[serde(skip_serializing_if = "Option::is_none")]
     pub finding: Option<FindingSemantic>,
+    /// Campaign-state semantic data (present when this hexad is a lifecycle
+    /// update — PR registration, dismissal, poll — issue #33 S2).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub campaign: Option<CampaignSemantic>,
+}
+
+/// Campaign-state facet of a hexad: tracks the lifecycle of a single
+/// finding (issue #33 S2).
+///
+/// Append-only: each `register-pr` / `dismiss` / `poll` emits a fresh
+/// hexad with the same `finding_id` subject. `status` aggregates by
+/// taking the newest by `created_at`.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CampaignSemantic {
+    /// Subject — must match a `FindingSemantic.finding_id` written by S1.
+    pub finding_id: String,
+    /// State label. Canonical values: "open", "pr-filed", "pr-merged",
+    /// "pr-closed", "dismissed". Free-form so future states can be added
+    /// without a schema bump (forward-compatible by design).
+    pub state: String,
+    /// PR URL when `state` is `pr-*`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub pr_url: Option<String>,
+    /// Human-readable dismissal reason when `state == "dismissed"`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reason: Option<String>,
+    /// ISO 8601 of the last PR-state poll (S2 follow-up sets this; S2
+    /// initial doesn't poll).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub last_polled: Option<String>,
 }
 
 /// Semantic facets of a per-finding hexad (issue #33 S1).
@@ -221,6 +251,7 @@ fn build_hexad(report: &AssaultReport) -> Result<PanicAttackHexad> {
             categories,
             migration,
             finding: None,
+            campaign: None,
         },
         document,
     })
@@ -343,6 +374,7 @@ fn build_assemblyline_hexad(
             categories,
             migration: None,
             finding: None,
+            campaign: None,
         },
         document,
     })
@@ -489,6 +521,7 @@ pub fn build_finding_hexads(
                         last_seen_run: run_id.clone(),
                         framework: None,
                     }),
+                    campaign: None,
                 },
                 document,
             });
@@ -512,6 +545,121 @@ fn write_finding_hexads(hexads: &[PanicAttackHexad], base_dir: &Path) -> Result<
     Ok(written)
 }
 
+// ---------------------------------------------------------------------------
+// Issue #33 S2 — campaign-state hexad write/load helpers
+// ---------------------------------------------------------------------------
+
+/// Maximum size (in bytes) of a single hexad JSON file we'll load from
+/// disk. Hexads are small documents; anything past 16 MiB is corrupted
+/// or hostile.
+const HEXAD_FILE_READ_LIMIT: u64 = 16 * 1024 * 1024;
+
+/// Build a campaign-state hexad for one lifecycle event (issue #33 S2).
+///
+/// Append-only: each call produces a fresh hexad with a unique id. The
+/// `finding_id` is carried as the semantic subject so the newest hexad
+/// per finding is the current state.
+pub fn build_campaign_hexad(semantic: CampaignSemantic) -> PanicAttackHexad {
+    let now = Utc::now();
+    let hexad_id = format!(
+        "pa-campaign-{}-{}",
+        now.format("%Y%m%d%H%M%S"),
+        &uuid_from_timestamp(now.timestamp_millis())
+    );
+
+    PanicAttackHexad {
+        schema: "verisimdb.hexad.v1".to_string(),
+        id: hexad_id,
+        created_at: now.to_rfc3339(),
+        provenance: HexadProvenance {
+            tool: "panic-attack".to_string(),
+            version: env!("CARGO_PKG_VERSION").to_string(),
+            program_path: "campaign".to_string(),
+            language: "n/a".to_string(),
+            attestation_hash: None,
+        },
+        semantic: HexadSemantic {
+            total_weak_points: 0,
+            critical_count: 0,
+            high_count: 0,
+            total_crashes: 0,
+            robustness_score: 0.0,
+            categories: Vec::new(),
+            migration: None,
+            finding: None,
+            campaign: Some(semantic),
+        },
+        document: serde_json::Value::Null,
+    }
+}
+
+/// Write a single campaign-state hexad under
+/// `<base_dir>/hexads/campaign/<hexad_id>.json`. Returns the path.
+pub fn write_campaign_hexad(hexad: &PanicAttackHexad, base_dir: &Path) -> Result<PathBuf> {
+    let dir = base_dir.join("hexads").join("campaign");
+    fs::create_dir_all(&dir)?;
+    let path = dir.join(format!("{}.json", hexad.id));
+    fs::write(&path, serde_json::to_string_pretty(hexad)?)?;
+    Ok(path)
+}
+
+/// Load every JSON hexad file from a directory.
+///
+/// Files that fail to parse are silently skipped — this is a "best
+/// effort" reader used by status/query subcommands, not a validation
+/// pass. Returns hexads in filesystem-order (the caller sorts as needed).
+fn load_hexad_dir(dir: &Path) -> Result<Vec<PanicAttackHexad>> {
+    use std::io::Read;
+
+    if !dir.exists() {
+        return Ok(Vec::new());
+    }
+    let mut hexads = Vec::new();
+    for entry in fs::read_dir(dir)?.flatten() {
+        let path = entry.path();
+        if path.extension().and_then(|e| e.to_str()) != Some("json") {
+            continue;
+        }
+        let mut content = String::new();
+        let Ok(file) = fs::File::open(&path) else {
+            continue;
+        };
+        if file
+            .take(HEXAD_FILE_READ_LIMIT)
+            .read_to_string(&mut content)
+            .is_err()
+        {
+            continue;
+        }
+        if let Ok(hexad) = serde_json::from_str::<PanicAttackHexad>(&content) {
+            hexads.push(hexad);
+        }
+    }
+    Ok(hexads)
+}
+
+/// Load every per-finding hexad from `<base_dir>/hexads/findings/`.
+pub fn load_finding_hexads(base_dir: &Path) -> Result<Vec<PanicAttackHexad>> {
+    load_hexad_dir(&base_dir.join("hexads").join("findings"))
+}
+
+/// Load every campaign-state hexad from `<base_dir>/hexads/campaign/`.
+pub fn load_campaign_hexads(base_dir: &Path) -> Result<Vec<PanicAttackHexad>> {
+    load_hexad_dir(&base_dir.join("hexads").join("campaign"))
+}
+
+/// Load every aggregate (per-run) hexad from `<base_dir>/hexads/`.
+///
+/// Aggregate hexads live at the top-level `hexads/` directory; per-finding
+/// and per-campaign hexads live in subdirs and are excluded here.
+///
+/// Reserved for S3 query — kept public so the upcoming `query` subcommand
+/// can compose it with the per-finding / per-campaign loaders.
+#[allow(dead_code)]
+pub fn load_aggregate_hexads(base_dir: &Path) -> Result<Vec<PanicAttackHexad>> {
+    load_hexad_dir(&base_dir.join("hexads"))
+}
+
 /// Persist an assemblyline report to storage (filesystem and/or verisimdb).
 ///
 /// This is the batch-scan counterpart to `persist_report()` — it stores

From 68aa30f661fbf129f96d76b38f5054c7aefa7cae Mon Sep 17 00:00:00 2001
From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com>
Date: Tue, 26 May 2026 13:46:54 +0100
Subject: [PATCH 3/3] feat(sweep-tracker): hierarchical estate-sweep Markdown
 report
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds `panic-attack sweep-tracker` subcommand that renders an issue-#32
shaped sweep tracker by joining per-finding hexads (issue #33 S1) with
campaign-state hexads (issue #33 S2). Distinct from `campaign status`:
that is a flat per-finding table; this is a hierarchical checklist
grouped by repo and/or category, with an estate-wide summary header.

Flags:
- `--verisimdb-dir DIR`  : hexad store root (default `verisimdb-data`)
- `--output FILE`        : write Markdown to file instead of stdout
- `--by-repo`            : emit only the "By repo" section
- `--by-category`        : emit only the "By category" section
- no flag                : emit both sections (default)

Output is deterministic — repos alphabetically, findings within each
repo sorted by (rule_id, file, line). A finding with no campaign
hexad shows state `open`; with one, shows the latest state plus PR
URL (rendered as `#<num>` link) or dismissal reason.

Tests cover empty store, by-repo grouping, by-category grouping,
campaign-state join (open / pr-merged / dismissed), deterministic
ordering, both-shape ordering, and PR-number label extraction.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/lib.rs               |   1 +
 src/main.rs              |  47 +++
 src/sweep_tracker/mod.rs | 678 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 726 insertions(+)
 create mode 100644 src/sweep_tracker/mod.rs

diff --git a/src/lib.rs b/src/lib.rs
index 8b52541..7e4f92f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -35,4 +35,5 @@ pub mod panll;
 pub mod report;
 pub mod signatures;
 pub mod storage;
+pub mod sweep_tracker;
 pub mod types;
diff --git a/src/main.rs b/src/main.rs
index 7a127db..b81ef27 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -30,6 +30,7 @@ mod panll;
 mod report;
 mod signatures;
 mod storage;
+mod sweep_tracker;
 mod types;
 
 extern crate walkdir;
@@ -753,6 +754,30 @@ enum Commands {
         #[command(subcommand)]
         action: CampaignAction,
     },
+
+    /// Sweep-tracker: render an issue-#32-style estate-sweep Markdown report.
+    ///
+    /// Joins per-finding hexads (issue #33 S1) with campaign-state hexads
+    /// (issue #33 S2) and groups them by repo and/or category. Distinct
+    /// from `campaign status`: that is a flat per-finding table; this is
+    /// a hierarchical sweep checklist.
+    SweepTracker {
+        /// VeriSimDB data directory (default: `verisimdb-data`).
+        #[arg(long, value_name = "DIR", default_value = "verisimdb-data")]
+        verisimdb_dir: PathBuf,
+
+        /// Write the Markdown to a file instead of stdout.
+        #[arg(short, long, value_name = "FILE")]
+        output: Option<PathBuf>,
+
+        /// Emit only the "By repo" section.
+        #[arg(long, group = "sweep_shape", default_value_t = false)]
+        by_repo: bool,
+
+        /// Emit only the "By category" section.
+        #[arg(long, group = "sweep_shape", default_value_t = false)]
+        by_category: bool,
+    },
 }
 
 #[derive(Subcommand)]
@@ -2451,6 +2476,28 @@ fn run_main() -> Result<()> {
             return Ok(());
         }
 
+        Commands::SweepTracker {
+            verisimdb_dir,
+            output,
+            by_repo,
+            by_category,
+        } => {
+            let shape = match (by_repo, by_category) {
+                (true, false) => sweep_tracker::ReportShape::ByRepo,
+                (false, true) => sweep_tracker::ReportShape::ByCategory,
+                _ => sweep_tracker::ReportShape::Both,
+            };
+            let md = sweep_tracker::render_report(&verisimdb_dir, shape)?;
+            match output {
+                Some(path) => {
+                    std::fs::write(&path, &md)?;
+                    qprintln!(cli.quiet, "Sweep tracker written to {}", path.display());
+                }
+                None => print!("{}", md),
+            }
+            return Ok(());
+        }
+
         Commands::Temporal { action } => {
             match action {
                 TemporalAction::List { verisimdb_dir } => {
diff --git a/src/sweep_tracker/mod.rs b/src/sweep_tracker/mod.rs
new file mode 100644
index 0000000..253f38c
--- /dev/null
+++ b/src/sweep_tracker/mod.rs
@@ -0,0 +1,678 @@
+// SPDX-License-Identifier: MPL-2.0
+
+//! Estate-sweep tracker — Markdown report generator (issue #33 follow-up).
+//!
+//! Produces a hierarchical issue-#32-style sweep tracker by joining the
+//! per-finding hexad store (issue #33 S1) with the campaign-state hexad
+//! store (issue #33 S2). This is a *report* over the same data the
+//! per-finding `panic-attack campaign status` table renders, but
+//! organised the way an estate sweep is run: top-down by repo, and a
+//! cross-cut by category.
+//!
+//! Distinguishing features vs `campaign::status_markdown`:
+//!
+//! - **Hierarchical**, not flat: grouped by repo and/or category.
+//! - **Estate summary** up top — count of repos, criticals, PRs filed,
+//!   dismissed, and open-no-PR.
+//! - **Always sourced from the finding store**: a finding with no
+//!   campaign hexad still appears (state `open`); the per-finding
+//!   table is campaign-driven and omits never-touched findings.
+//! - **Deterministic**: repos alphabetically; findings within each
+//!   repo by `(rule_id, file, line)`; categories by rule_id.
+//!
+//! The intended workflow:
+//!
+//! ```text
+//! panic-attack sweep-tracker --output sweep-tracker.md
+//! ```
+//!
+//! …producing a Markdown checklist that can be pasted into an
+//! estate-sweep tracker issue (the issue-#32 shape).
+
+use crate::storage::{load_campaign_hexads, load_finding_hexads, CampaignSemantic};
+use anyhow::Result;
+use chrono::Utc;
+use std::collections::{BTreeMap, HashMap};
+use std::path::Path;
+
+/// Which sections of the report to emit.
+///
+/// `Both` is the default and renders a "By repo" section followed by a
+/// "By category" section, with a shared estate-summary header.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum ReportShape {
+    /// Group findings by repository name only.
+    ByRepo,
+    /// Group findings by category (rule id) only.
+    ByCategory,
+    /// Render both groupings, separated by an `## By category` heading.
+    #[default]
+    Both,
+}
+
+/// One finding joined against its current campaign state, if any.
+#[derive(Debug, Clone)]
+struct JoinedRow {
+    repo_name: String,
+    file: String,
+    line: Option<u32>,
+    rule_id: String,
+    category: String,
+    severity: String,
+    finding_id: String,
+    /// Current campaign state. `"open"` if no campaign hexad exists.
+    state: String,
+    pr_url: Option<String>,
+    dismissal_reason: Option<String>,
+}
+
+impl JoinedRow {
+    fn is_pr_filed(&self) -> bool {
+        matches!(
+            self.state.as_str(),
+            crate::campaign::state::PR_FILED
+                | crate::campaign::state::PR_MERGED
+                | crate::campaign::state::PR_CLOSED
+        )
+    }
+
+    fn is_dismissed(&self) -> bool {
+        self.state == crate::campaign::state::DISMISSED
+    }
+
+    fn is_done(&self) -> bool {
+        matches!(
+            self.state.as_str(),
+            crate::campaign::state::PR_MERGED
+                | crate::campaign::state::PR_CLOSED
+                | crate::campaign::state::DISMISSED
+        )
+    }
+
+    fn is_open(&self) -> bool {
+        self.state == crate::campaign::state::OPEN
+    }
+
+    fn is_critical(&self) -> bool {
+        self.severity.eq_ignore_ascii_case("critical")
+    }
+
+    fn is_high(&self) -> bool {
+        self.severity.eq_ignore_ascii_case("high")
+    }
+
+    /// Stable per-row sort key: `(rule_id, file, line)`. Ties broken by
+    /// finding_id so the order is fully deterministic.
+    fn sort_key(&self) -> (String, String, u32, String) {
+        (
+            self.rule_id.clone(),
+            self.file.clone(),
+            self.line.unwrap_or(0),
+            self.finding_id.clone(),
+        )
+    }
+}
+
+/// Build the joined finding × campaign-state rows for `base_dir`.
+///
+/// One row per finding hexad. Latest campaign hexad per `finding_id`
+/// wins (matching `campaign::current_state`). Returned in unspecified
+/// order; the renderer sorts per section.
+fn collect_rows(base_dir: &Path) -> Result<Vec<JoinedRow>> {
+    let finding_hexads = load_finding_hexads(base_dir)?;
+    let mut campaign_hexads = load_campaign_hexads(base_dir)?;
+    campaign_hexads.sort_by(|a, b| a.created_at.cmp(&b.created_at));
+
+    // Newest campaign hexad per finding_id wins.
+    let mut latest_state: HashMap<String, CampaignSemantic> = HashMap::new();
+    for h in &campaign_hexads {
+        if let Some(c) = h.semantic.campaign.as_ref() {
+            latest_state.insert(c.finding_id.clone(), c.clone());
+        }
+    }
+
+    let mut rows = Vec::new();
+    for h in &finding_hexads {
+        let Some(f) = h.semantic.finding.as_ref() else {
+            continue;
+        };
+        let (state, pr_url, dismissal_reason) = latest_state
+            .get(&f.finding_id)
+            .map(|c| (c.state.clone(), c.pr_url.clone(), c.reason.clone()))
+            .unwrap_or_else(|| (crate::campaign::state::OPEN.to_string(), None, None));
+        rows.push(JoinedRow {
+            repo_name: f.repo_name.clone(),
+            file: f.file.clone(),
+            line: f.line,
+            rule_id: f.rule_id.clone(),
+            category: f.category.clone(),
+            severity: f.severity.clone(),
+            finding_id: f.finding_id.clone(),
+            state,
+            pr_url,
+            dismissal_reason,
+        });
+    }
+    Ok(rows)
+}
+
+/// Estate-wide summary counts for the header.
+#[derive(Debug, Clone, Default)]
+struct Summary {
+    total: usize,
+    repos: usize,
+    critical: usize,
+    high: usize,
+    pr_filed: usize,
+    dismissed: usize,
+    open: usize,
+}
+
+impl Summary {
+    fn from_rows(rows: &[JoinedRow]) -> Self {
+        let mut repos: std::collections::BTreeSet<&str> = std::collections::BTreeSet::new();
+        let mut s = Summary {
+            total: rows.len(),
+            ..Default::default()
+        };
+        for r in rows {
+            repos.insert(r.repo_name.as_str());
+            if r.is_critical() {
+                s.critical += 1;
+            }
+            if r.is_high() {
+                s.high += 1;
+            }
+            if r.is_pr_filed() {
+                s.pr_filed += 1;
+            }
+            if r.is_dismissed() {
+                s.dismissed += 1;
+            }
+            if r.is_open() {
+                s.open += 1;
+            }
+        }
+        s.repos = repos.len();
+        s
+    }
+}
+
+/// Render one row's trailing state/pr/reason marker, e.g.
+/// `pr-merged ([#42](https://...))`, `dismissed (test scaffold)`, or
+/// `open`.
+fn render_state_marker(row: &JoinedRow) -> String {
+    if let Some(url) = row.pr_url.as_deref() {
+        // Best-effort `#<num>` extraction for compactness; falls back to
+        // the URL itself when the trailing segment isn't a number.
+        let label = pr_number_label(url).unwrap_or_else(|| url.to_string());
+        format!("{} ([{}]({}))", row.state, label, url)
+    } else if let Some(reason) = row.dismissal_reason.as_deref() {
+        format!("{} ({})", row.state, reason)
+    } else {
+        row.state.clone()
+    }
+}
+
+/// Extract a `#<num>` label from a PR URL like
+/// `https://github.com/org/repo/pull/42`. Returns `None` when the URL
+/// doesn't end in a numeric path segment.
+fn pr_number_label(url: &str) -> Option<String> {
+    let trimmed = url.trim_end_matches('/');
+    let tail = trimmed.rsplit('/').next()?;
+    let num: u64 = tail.parse().ok()?;
+    Some(format!("#{}", num))
+}
+
+/// `file:line` shorthand for inline display.
+fn location_str(row: &JoinedRow) -> String {
+    match row.line {
+        Some(n) if n > 0 => format!("{}:{}", row.file, n),
+        _ => row.file.clone(),
+    }
+}
+
+fn checkbox(row: &JoinedRow) -> &'static str {
+    if row.is_done() {
+        "[x]"
+    } else {
+        "[ ]"
+    }
+}
+
+fn render_by_repo_section(rows: &[JoinedRow]) -> String {
+    let mut by_repo: BTreeMap<String, Vec<&JoinedRow>> = BTreeMap::new();
+    for r in rows {
+        by_repo.entry(r.repo_name.clone()).or_default().push(r);
+    }
+
+    let mut out = String::new();
+    out.push_str("## By repo\n\n");
+    if by_repo.is_empty() {
+        out.push_str("_No findings recorded._\n\n");
+        return out;
+    }
+    for (repo, mut repo_rows) in by_repo {
+        repo_rows.sort_by_key(|r| r.sort_key());
+        let critical = repo_rows.iter().filter(|r| r.is_critical()).count();
+        out.push_str(&format!(
+            "### {} ({} findings, {} critical)\n\n",
+            repo,
+            repo_rows.len(),
+            critical,
+        ));
+        for r in repo_rows {
+            out.push_str(&format!(
+                "- {} {} {} — {}\n",
+                checkbox(r),
+                r.rule_id,
+                location_str(r),
+                render_state_marker(r),
+            ));
+        }
+        out.push('\n');
+    }
+    out
+}
+
+fn render_by_category_section(rows: &[JoinedRow]) -> String {
+    let mut by_category: BTreeMap<(String, String), Vec<&JoinedRow>> = BTreeMap::new();
+    for r in rows {
+        by_category
+            .entry((r.rule_id.clone(), r.category.clone()))
+            .or_default()
+            .push(r);
+    }
+
+    let mut out = String::new();
+    out.push_str("## By category\n\n");
+    if by_category.is_empty() {
+        out.push_str("_No findings recorded._\n\n");
+        return out;
+    }
+    for ((rule_id, category), mut cat_rows) in by_category {
+        cat_rows.sort_by_key(|r| (r.repo_name.clone(), r.sort_key()));
+        let repo_set: std::collections::BTreeSet<&str> =
+            cat_rows.iter().map(|r| r.repo_name.as_str()).collect();
+        out.push_str(&format!(
+            "### {} {} ({} findings across {} repos)\n\n",
+            rule_id,
+            category,
+            cat_rows.len(),
+            repo_set.len(),
+        ));
+        for r in cat_rows {
+            out.push_str(&format!(
+                "- {} {} {} — {}\n",
+                checkbox(r),
+                r.repo_name,
+                location_str(r),
+                render_state_marker(r),
+            ));
+        }
+        out.push('\n');
+    }
+    out
+}
+
+fn render_header(rows: &[JoinedRow]) -> String {
+    let now = Utc::now().to_rfc3339();
+    let s = Summary::from_rows(rows);
+    let mut out = String::new();
+    out.push_str("# Estate sweep tracker\n\n");
+    out.push_str(&format!("_Generated {}_\n\n", now));
+    if s.total == 0 {
+        out.push_str("_No findings recorded yet — the per-finding hexad store is empty._\n\n");
+        return out;
+    }
+    out.push_str(&format!(
+        "**Estate summary**: {} findings across {} repos ({} critical, {} high). \
+         {} PR-filed, {} dismissed, {} open (no PR).\n\n",
+        s.total, s.repos, s.critical, s.high, s.pr_filed, s.dismissed, s.open,
+    ));
+    out
+}
+
+/// Render an estate-sweep tracker Markdown report.
+///
+/// Reads the per-finding and campaign hexad stores under `base_dir`,
+/// joins them, and emits a Markdown document shaped after the
+/// issue-#32 tracker checklist.
+pub fn render_report(base_dir: &Path, shape: ReportShape) -> Result<String> {
+    let rows = collect_rows(base_dir)?;
+    let mut out = render_header(&rows);
+    if rows.is_empty() {
+        // Header already announced the empty-store case; skip per-section
+        // headings entirely so the document doesn't carry confusing
+        // "## By repo / _No findings recorded._" stanzas.
+        return Ok(out);
+    }
+    match shape {
+        ReportShape::ByRepo => out.push_str(&render_by_repo_section(&rows)),
+        ReportShape::ByCategory => out.push_str(&render_by_category_section(&rows)),
+        ReportShape::Both => {
+            out.push_str(&render_by_repo_section(&rows));
+            out.push_str(&render_by_category_section(&rows));
+        }
+    }
+    Ok(out)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::campaign;
+    use crate::storage::{
+        CampaignSemantic, FindingSemantic, HexadProvenance, HexadSemantic, PanicAttackHexad,
+    };
+    use std::fs;
+    use std::path::PathBuf;
+    use tempfile::tempdir;
+
+    fn write_finding_hexad(base_dir: &Path, finding: FindingSemantic) -> PathBuf {
+        let dir = base_dir.join("hexads").join("findings");
+        fs::create_dir_all(&dir).unwrap();
+        // Sanitise filename-hostile chars from the finding id; the on-disk
+        // hexad id is decorative (the join key lives in `semantic.finding`).
+        let safe_id: String = finding
+            .finding_id
+            .chars()
+            .map(|c| {
+                if matches!(c, ':' | '/' | '\\') {
+                    '_'
+                } else {
+                    c
+                }
+            })
+            .collect();
+        let id = format!("pa-finding-test-{}", safe_id);
+        let hexad = PanicAttackHexad {
+            schema: "verisimdb.hexad.v1".to_string(),
+            id: id.clone(),
+            created_at: "2026-05-26T12:00:00Z".to_string(),
+            provenance: HexadProvenance {
+                tool: "panic-attack".to_string(),
+                version: "test".to_string(),
+                program_path: format!("/tmp/{}", finding.repo_name),
+                language: "Rust".to_string(),
+                attestation_hash: None,
+            },
+            semantic: HexadSemantic {
+                total_weak_points: 1,
+                critical_count: if finding.severity == "critical" { 1 } else { 0 },
+                high_count: if finding.severity == "high" { 1 } else { 0 },
+                total_crashes: 0,
+                robustness_score: 0.0,
+                categories: vec![finding.category.clone()],
+                migration: None,
+                finding: Some(finding),
+                campaign: None,
+            },
+            document: serde_json::Value::Null,
+        };
+        let path = dir.join(format!("{}.json", id));
+        fs::write(&path, serde_json::to_string_pretty(&hexad).unwrap()).unwrap();
+        path
+    }
+
+    fn write_campaign_hexad_with_id(base_dir: &Path, id: &str, semantic: CampaignSemantic) {
+        let dir = base_dir.join("hexads").join("campaign");
+        fs::create_dir_all(&dir).unwrap();
+        let hexad = PanicAttackHexad {
+            schema: "verisimdb.hexad.v1".to_string(),
+            id: id.to_string(),
+            created_at: format!("2026-05-26T12:00:{:02}Z", id.len() % 60),
+            provenance: HexadProvenance {
+                tool: "panic-attack".to_string(),
+                version: "test".to_string(),
+                program_path: "campaign".to_string(),
+                language: "n/a".to_string(),
+                attestation_hash: None,
+            },
+            semantic: HexadSemantic {
+                total_weak_points: 0,
+                critical_count: 0,
+                high_count: 0,
+                total_crashes: 0,
+                robustness_score: 0.0,
+                categories: Vec::new(),
+                migration: None,
+                finding: None,
+                campaign: Some(semantic),
+            },
+            document: serde_json::Value::Null,
+        };
+        let path = dir.join(format!("{}.json", id));
+        fs::write(&path, serde_json::to_string_pretty(&hexad).unwrap()).unwrap();
+    }
+
+    fn sample_finding(
+        repo: &str,
+        file: &str,
+        line: u32,
+        rule_id: &str,
+        category: &str,
+        severity: &str,
+    ) -> FindingSemantic {
+        FindingSemantic {
+            finding_id: format!("finding:{}:{}:{}:{}", repo, file, line, category),
+            repo_name: repo.to_string(),
+            file: file.to_string(),
+            line: Some(line),
+            category: category.to_string(),
+            rule_id: rule_id.to_string(),
+            rule_name: rule_id.to_lowercase(),
+            severity: severity.to_string(),
+            description: format!("sample finding {}:{}:{}", repo, file, line),
+            first_seen_run: "run-test".to_string(),
+            last_seen_run: "run-test".to_string(),
+            framework: None,
+        }
+    }
+
+    #[test]
+    fn empty_store_yields_empty_marker() {
+        let dir = tempdir().unwrap();
+        let report = render_report(dir.path(), ReportShape::Both).unwrap();
+        assert!(report.starts_with("# Estate sweep tracker"));
+        assert!(report.contains("_No findings recorded yet"));
+        // Neither section should appear when there are no findings.
+        assert!(!report.contains("## By repo"));
+        assert!(!report.contains("## By category"));
+    }
+
+    #[test]
+    fn by_repo_groups_and_summarises() {
+        let dir = tempdir().unwrap();
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("alpha", "src/lib.rs", 23, "PA001", "PanicPath", "critical"),
+        );
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("alpha", "src/ffi.rs", 7, "PA004", "UnsafeCode", "high"),
+        );
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("beta", "src/auth.rs", 91, "PA022", "CryptoMisuse", "medium"),
+        );
+
+        let report = render_report(dir.path(), ReportShape::ByRepo).unwrap();
+        assert!(report.contains("**Estate summary**: 3 findings across 2 repos"));
+        assert!(report.contains("(1 critical, 1 high)"));
+        assert!(report.contains("## By repo"));
+        assert!(report.contains("### alpha (2 findings, 1 critical)"));
+        assert!(report.contains("### beta (1 findings, 0 critical)"));
+        // Section absent when shape is ByRepo only.
+        assert!(!report.contains("## By category"));
+
+        // Deterministic ordering: alpha before beta, and within alpha the
+        // PA001 finding sorts before PA004 (rule_id ascending).
+        let alpha_idx = report.find("### alpha").unwrap();
+        let beta_idx = report.find("### beta").unwrap();
+        assert!(alpha_idx < beta_idx);
+        let pa001_idx = report.find("PA001 src/lib.rs:23").unwrap();
+        let pa004_idx = report.find("PA004 src/ffi.rs:7").unwrap();
+        assert!(pa001_idx < pa004_idx);
+    }
+
+    #[test]
+    fn by_category_groups_across_repos() {
+        let dir = tempdir().unwrap();
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("alpha", "src/a.rs", 1, "PA004", "UnsafeCode", "high"),
+        );
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("beta", "src/b.rs", 2, "PA004", "UnsafeCode", "high"),
+        );
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("alpha", "src/c.rs", 3, "PA001", "PanicPath", "medium"),
+        );
+
+        let report = render_report(dir.path(), ReportShape::ByCategory).unwrap();
+        assert!(report.contains("## By category"));
+        assert!(report.contains("### PA001 PanicPath (1 findings across 1 repos)"));
+        assert!(report.contains("### PA004 UnsafeCode (2 findings across 2 repos)"));
+        assert!(!report.contains("## By repo"));
+
+        // PA001 sorts before PA004.
+        let pa001 = report.find("### PA001").unwrap();
+        let pa004 = report.find("### PA004").unwrap();
+        assert!(pa001 < pa004);
+    }
+
+    #[test]
+    fn campaign_state_join_renders_pr_url_and_dismissal() {
+        let dir = tempdir().unwrap();
+
+        let pr_finding =
+            sample_finding("alpha", "src/lib.rs", 23, "PA001", "PanicPath", "critical");
+        let pr_finding_id = pr_finding.finding_id.clone();
+        write_finding_hexad(dir.path(), pr_finding);
+
+        let dismissed_finding = sample_finding(
+            "alpha",
+            "src/auth.rs",
+            91,
+            "PA022",
+            "CryptoMisuse",
+            "medium",
+        );
+        let dismissed_id = dismissed_finding.finding_id.clone();
+        write_finding_hexad(dir.path(), dismissed_finding);
+
+        let open_finding = sample_finding("alpha", "src/ffi.rs", 7, "PA004", "UnsafeCode", "high");
+        write_finding_hexad(dir.path(), open_finding);
+
+        // Manual write of campaign hexads to bypass timestamp collisions
+        // (two same-millisecond calls to `build_campaign_hexad` would
+        // produce identical hexad ids and the second would overwrite the
+        // first on disk). Each gets a deterministic, unique id here.
+        write_campaign_hexad_with_id(
+            dir.path(),
+            "pa-campaign-test-1",
+            CampaignSemantic {
+                finding_id: pr_finding_id.clone(),
+                state: campaign::state::PR_MERGED.to_string(),
+                pr_url: Some("https://github.com/example/alpha/pull/42".to_string()),
+                reason: None,
+                last_polled: None,
+            },
+        );
+
+        write_campaign_hexad_with_id(
+            dir.path(),
+            "pa-campaign-test-2",
+            CampaignSemantic {
+                finding_id: dismissed_id.clone(),
+                state: campaign::state::DISMISSED.to_string(),
+                pr_url: None,
+                reason: Some("test scaffold".to_string()),
+                last_polled: None,
+            },
+        );
+
+        let report = render_report(dir.path(), ReportShape::ByRepo).unwrap();
+
+        // Estate summary: 1 PR-filed (counts pr-merged too), 1 dismissed, 1 open.
+        assert!(report.contains("1 PR-filed, 1 dismissed, 1 open"));
+
+        // PR row: checkbox ticked, state + GitHub-style #42 link.
+        assert!(report.contains(
+            "[x] PA001 src/lib.rs:23 — pr-merged ([#42](https://github.com/example/alpha/pull/42))"
+        ));
+        // Dismissal row: ticked, reason in parens.
+        assert!(report.contains("[x] PA022 src/auth.rs:91 — dismissed (test scaffold)"));
+        // Open row: empty checkbox, bare `open` marker.
+        assert!(report.contains("[ ] PA004 src/ffi.rs:7 — open"));
+    }
+
+    #[test]
+    fn deterministic_ordering_within_repo() {
+        // Insert findings in a deliberately scrambled order; expect the
+        // report to sort them by (rule_id, file, line).
+        let dir = tempdir().unwrap();
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("zzz", "src/z.rs", 9, "PA004", "UnsafeCode", "low"),
+        );
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("alpha", "src/b.rs", 5, "PA001", "PanicPath", "low"),
+        );
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("alpha", "src/a.rs", 5, "PA001", "PanicPath", "low"),
+        );
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("alpha", "src/a.rs", 3, "PA001", "PanicPath", "low"),
+        );
+
+        let report = render_report(dir.path(), ReportShape::ByRepo).unwrap();
+
+        // Repos: alpha before zzz.
+        let alpha = report.find("### alpha").unwrap();
+        let zzz = report.find("### zzz").unwrap();
+        assert!(alpha < zzz);
+
+        // Within alpha: a.rs:3 < a.rs:5 < b.rs:5.
+        let a3 = report.find("PA001 src/a.rs:3").unwrap();
+        let a5 = report.find("PA001 src/a.rs:5").unwrap();
+        let b5 = report.find("PA001 src/b.rs:5").unwrap();
+        assert!(a3 < a5);
+        assert!(a5 < b5);
+    }
+
+    #[test]
+    fn both_shape_emits_repo_then_category() {
+        let dir = tempdir().unwrap();
+        write_finding_hexad(
+            dir.path(),
+            sample_finding("alpha", "src/a.rs", 1, "PA004", "UnsafeCode", "high"),
+        );
+        let report = render_report(dir.path(), ReportShape::Both).unwrap();
+        let repo_idx = report.find("## By repo").unwrap();
+        let cat_idx = report.find("## By category").unwrap();
+        assert!(repo_idx < cat_idx);
+        // Header still present.
+        assert!(report.contains("**Estate summary**"));
+    }
+
+    #[test]
+    fn pr_number_label_handles_non_numeric_tail() {
+        assert_eq!(
+            pr_number_label("https://github.com/foo/bar/pull/42"),
+            Some("#42".to_string())
+        );
+        assert_eq!(
+            pr_number_label("https://github.com/foo/bar/pull/42/"),
+            Some("#42".to_string())
+        );
+        assert_eq!(pr_number_label("https://example.invalid/some/path"), None);
+        assert_eq!(pr_number_label(""), None);
+    }
+}