From ef737612948c0645da75ed4d0e86a74b4cc60ce0 Mon Sep 17 00:00:00 2001
From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com>
Date: Tue, 2 Jun 2026 19:23:56 +0100
Subject: [PATCH] =?UTF-8?q?feat(rules):=20SD022=20+=20SD023=20=E2=80=94=20?=
=?UTF-8?q?stale-path-after-rename=20+=20STATE.a2ml=20divergence?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Adds two new structural-drift rules to catch the exact drift patterns
surfaced by the 2026-06-02 estate sweep against JoshuaJewell/paint-type.
## SD022 — stale-path-after-rename (medium / trigger_intensive)
When a source directory is renamed in a single commit, trailing-edge
doc references frequently outlive the rename. Detection: enumerate
real `src/*/` subdirs in the tree, scan docs (.md / .adoc / .a2ml /
.contractile / .toml / .twasm) for `src/
/` regex matches, flag
any whose `` is not in the real-subdir set.
Exemptions:
- CHANGELOG.md (historical references are documentation, not drift)
- third_party/ subtree (vendored, owned upstream)
Recovery action: `sed -i s|src/|src/|g` across the
flagged file set. Identify via `git log --diff-filter=R`.
Empirical: caught 49 occurrences across 25 files on paint-type after
PR #48 renamed src/ephapax → src/paint_core in the Cargo workspace
but updated no docs. Manual sweep landed as paint-type#49.
## SD023 — STATE.a2ml divergence (medium)
Some repos retain both legacy `.machine_readable/STATE.a2ml` and
canonical `.machine_readable/6a2/STATE.a2ml`. When both exist, they
MUST agree on `last-updated` — otherwise consumers (Hypatia, agents
reading 6a2) see inconsistent reality.
Detection: extract `last-updated` from each via regex matching both
TOML (`last-updated = "..."`) and Scheme (`(last-updated "...")`)
variants. Flag if both exist + dates disagree.
Empirical: paint-type top-level STATE was 2026-06-01 / 22% completion;
6a2/STATE was 2026-05-11 / 10% completion. Unified in paint-type#49.
## Test coverage
7 new test cases:
- SD022: positive case (ephapax→paint_core); CHANGELOG exemption;
third_party/ exemption; empty src/ baseline
- SD023: positive divergence; matching dates (no finding);
only-one-file (no finding); Scheme-variant matching
All tests use the same temporary-dir + git-init pattern as existing
SD001-SD014 tests.
## Wired into scan/1
Both rules appended to the comprehensive scan pipeline. SD022 finds
contribute trigger_intensive=true (one rename-drift hit predicts
others); SD023 stays advisory.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
lib/rules/structural_drift.ex | 168 ++++++++++++++++++++++++++++++++-
test/structural_drift_test.exs | 116 +++++++++++++++++++++++
2 files changed, 283 insertions(+), 1 deletion(-)
diff --git a/lib/rules/structural_drift.ex b/lib/rules/structural_drift.ex
index d39cce8b..22c491f8 100644
--- a/lib/rules/structural_drift.ex
+++ b/lib/rules/structural_drift.ex
@@ -653,7 +653,9 @@ defmodule Hypatia.Rules.StructuralDrift do
sd010_tracked_node_modules(repo_path) ++
sd011_missing_gitignore(repo_path) ++
sd013_path_specific_gitignore(repo_path) ++
- sd014_safedom_example_dialect(repo_path)
+ sd014_safedom_example_dialect(repo_path) ++
+ sd022_stale_path_after_rename(repo_path) ++
+ sd023_state_a2ml_divergence(repo_path)
needs_intensive = Enum.any?(findings, & &1[:trigger_intensive])
needs_alert = Enum.any?(findings, & &1[:alert_user])
@@ -803,4 +805,168 @@ defmodule Hypatia.Rules.StructuralDrift do
true -> branch_block_items(rest, false, acc)
end
end
+
+ # ─── SD022: Stale path references after directory rename ───────────────
+ #
+ # When a source directory is renamed (e.g. `src/ephapax/` → `src/paint_core/`
+ # in a single commit), trailing-edge documentation references frequently
+ # outlive the rename. This rule scans docs for `src//` references and
+ # flags any whose `` is NOT a real directory in the current tree.
+ #
+ # Discovered on JoshuaJewell/paint-type 2026-06-02: PR #48 renamed
+ # src/ephapax → src/paint_core in the Cargo workspace; 25 docs (49
+ # occurrences) still pointed at the old path. Caught by manual sweep
+ # in PR #49. This rule prevents the next recurrence.
+ #
+ # Exemption: CHANGELOG.md (historical references are documentation, not
+ # drift) and anything under `third_party/` (vendored).
+
+ @doc """
+ SD022: Detect documentation that references a `src//` path
+ whose `` is not a real directory in the current tree.
+
+ Severity: medium (doc-only; doesn't break the build, but misleads readers).
+ Action: sed sweep `s|src/|src/|g` once the rename
+ target is identified (typically via `git log --diff-filter=R`).
+ Triggers: intensive scan (where one rename-drift hits, others follow).
+ """
+ def sd022_stale_path_after_rename(repo_path) do
+ src_root = Path.join(repo_path, "src")
+
+ real_subdirs =
+ case File.ls(src_root) do
+ {:ok, entries} ->
+ entries
+ |> Enum.filter(&File.dir?(Path.join(src_root, &1)))
+ |> MapSet.new()
+
+ {:error, _} ->
+ MapSet.new()
+ end
+
+ if MapSet.size(real_subdirs) == 0 do
+ []
+ else
+ doc_files =
+ find_files_by_ext(repo_path, [
+ ".md",
+ ".adoc",
+ ".txt",
+ ".a2ml",
+ ".contractile",
+ ".toml",
+ ".twasm"
+ ])
+
+ doc_files
+ |> Enum.reject(fn rel ->
+ rel == "CHANGELOG.md" or String.starts_with?(rel, "third_party/")
+ end)
+ |> Enum.flat_map(fn rel ->
+ path = Path.join(repo_path, rel)
+
+ case File.read(path) do
+ {:ok, content} ->
+ ~r{\bsrc/([A-Za-z0-9_][A-Za-z0-9_-]*)/}
+ |> Regex.scan(content)
+ |> Enum.map(fn [_, dir] -> dir end)
+ |> Enum.uniq()
+ |> Enum.reject(&MapSet.member?(real_subdirs, &1))
+ |> Enum.map(fn stale_dir ->
+ %{
+ rule: "SD022",
+ file: rel,
+ severity: :medium,
+ reason:
+ "doc references `src/#{stale_dir}/` but no such directory exists in the tree (likely surviving a directory rename)",
+ action: :rename_sweep,
+ stale_dir: stale_dir,
+ trigger_intensive: true
+ }
+ end)
+
+ _ ->
+ []
+ end
+ end)
+ end
+ end
+
+ # ─── SD023: STATE.a2ml divergence (top-level vs 6a2/) ──────────────────
+ #
+ # The estate v2 convention puts STATE at `.machine_readable/6a2/STATE.a2ml`.
+ # Some repos retain a legacy top-level `.machine_readable/STATE.a2ml`. When
+ # both exist, they MUST agree on the `last-updated` field — otherwise one
+ # is stale and consumers (Hypatia, agents reading 6a2) see the wrong reality.
+ #
+ # Discovered on JoshuaJewell/paint-type 2026-06-02: top-level STATE.a2ml
+ # was 2026-06-01 with 22% completion while 6a2/STATE.a2ml was 2026-05-11
+ # with 10% completion. Caught by manual sweep; PR #49 unified them.
+
+ @doc """
+ SD023: Detect divergence between `.machine_readable/STATE.a2ml` and
+ `.machine_readable/6a2/STATE.a2ml` when both exist.
+
+ Severity: medium (one is stale; consumers may read either).
+ Action: pick the freshest as truth, mirror to the other, document
+ in CHANGELOG which is canonical going forward.
+ """
+ def sd023_state_a2ml_divergence(repo_path) do
+ top = Path.join([repo_path, ".machine_readable", "STATE.a2ml"])
+ six = Path.join([repo_path, ".machine_readable", "6a2", "STATE.a2ml"])
+
+ with true <- File.exists?(top),
+ true <- File.exists?(six),
+ {:ok, top_content} <- File.read(top),
+ {:ok, six_content} <- File.read(six) do
+ top_date = extract_last_updated(top_content)
+ six_date = extract_last_updated(six_content)
+
+ cond do
+ top_date == nil or six_date == nil ->
+ []
+
+ top_date == six_date ->
+ []
+
+ true ->
+ [
+ %{
+ rule: "SD023",
+ file: ".machine_readable/STATE.a2ml + .machine_readable/6a2/STATE.a2ml",
+ severity: :medium,
+ reason:
+ "STATE.a2ml divergence: top-level last-updated=#{top_date}, 6a2/ last-updated=#{six_date}. One is stale; consumers may read either.",
+ action: :unify_state,
+ top_last_updated: top_date,
+ six_last_updated: six_date,
+ trigger_intensive: false
+ }
+ ]
+ end
+ else
+ _ -> []
+ end
+ end
+
+ defp extract_last_updated(content) do
+ # Matches both TOML (`last-updated = "2026-06-02"`) and Scheme
+ # (`(last-updated "2026-06-02")`) variants.
+ case Regex.run(~r/last[-_]updated\s*[=\s]\s*"([^"]+)"/, content) do
+ [_, date] -> date
+ _ -> nil
+ end
+ end
+
+ defp find_files_by_ext(repo_path, exts) do
+ case System.cmd("git", ["-C", repo_path, "ls-files"], stderr_to_stdout: true) do
+ {output, 0} ->
+ output
+ |> String.split("\n", trim: true)
+ |> Enum.filter(fn rel -> Path.extname(rel) in exts end)
+
+ _ ->
+ []
+ end
+ end
end
diff --git a/test/structural_drift_test.exs b/test/structural_drift_test.exs
index e63f8868..35acf737 100644
--- a/test/structural_drift_test.exs
+++ b/test/structural_drift_test.exs
@@ -294,4 +294,120 @@ defmodule Hypatia.Rules.StructuralDriftTest do
assert Map.has_key?(result, :dispatch)
end
end
+
+ describe "sd022_stale_path_after_rename/1" do
+ test "flags docs referencing src// where dir no longer exists", %{repo: repo} do
+ # Real layout: only src/paint_core/ exists
+ File.mkdir_p!(Path.join([repo, "src", "paint_core"]))
+ # Doc still references old src/ephapax/
+ File.write!(Path.join(repo, "EXPLAINME.adoc"), "See src/ephapax/lib.rs for the tile API.")
+ System.cmd("git", ["init"], cd: repo)
+ System.cmd("git", ["add", "."], cd: repo)
+ System.cmd("git", ["commit", "-m", "init", "--no-gpg-sign"], cd: repo,
+ env: [{"GIT_AUTHOR_NAME", "T"}, {"GIT_AUTHOR_EMAIL", "t@t"},
+ {"GIT_COMMITTER_NAME", "T"}, {"GIT_COMMITTER_EMAIL", "t@t"}])
+
+ findings = StructuralDrift.sd022_stale_path_after_rename(repo)
+ assert Enum.any?(findings, &(&1.rule == "SD022"))
+ assert Enum.any?(findings, &(&1.stale_dir == "ephapax"))
+ assert Enum.all?(findings, &(&1.severity == :medium))
+ assert Enum.all?(findings, & &1.trigger_intensive)
+ end
+
+ test "ignores CHANGELOG.md (historical references are intentional)", %{repo: repo} do
+ File.mkdir_p!(Path.join([repo, "src", "paint_core"]))
+ File.write!(Path.join(repo, "CHANGELOG.md"), "Renamed src/ephapax to src/paint_core.")
+ System.cmd("git", ["init"], cd: repo)
+ System.cmd("git", ["add", "."], cd: repo)
+ System.cmd("git", ["commit", "-m", "init", "--no-gpg-sign"], cd: repo,
+ env: [{"GIT_AUTHOR_NAME", "T"}, {"GIT_AUTHOR_EMAIL", "t@t"},
+ {"GIT_COMMITTER_NAME", "T"}, {"GIT_COMMITTER_EMAIL", "t@t"}])
+
+ findings = StructuralDrift.sd022_stale_path_after_rename(repo)
+ assert findings == []
+ end
+
+ test "ignores third_party/ subtree (vendored)", %{repo: repo} do
+ File.mkdir_p!(Path.join([repo, "src", "paint_core"]))
+ File.mkdir_p!(Path.join([repo, "third_party", "x"]))
+ File.write!(Path.join([repo, "third_party", "x", "README.md"]), "uses src/ephapax/foo")
+ System.cmd("git", ["init"], cd: repo)
+ System.cmd("git", ["add", "."], cd: repo)
+ System.cmd("git", ["commit", "-m", "init", "--no-gpg-sign"], cd: repo,
+ env: [{"GIT_AUTHOR_NAME", "T"}, {"GIT_AUTHOR_EMAIL", "t@t"},
+ {"GIT_COMMITTER_NAME", "T"}, {"GIT_COMMITTER_EMAIL", "t@t"}])
+
+ findings = StructuralDrift.sd022_stale_path_after_rename(repo)
+ assert findings == []
+ end
+
+ test "returns empty when src/ has no subdirs", %{repo: repo} do
+ File.write!(Path.join(repo, "README.md"), "test")
+ findings = StructuralDrift.sd022_stale_path_after_rename(repo)
+ assert findings == []
+ end
+ end
+
+ describe "sd023_state_a2ml_divergence/1" do
+ test "flags divergent last-updated between top-level and 6a2/", %{repo: repo} do
+ File.mkdir_p!(Path.join([repo, ".machine_readable", "6a2"]))
+
+ File.write!(
+ Path.join([repo, ".machine_readable", "STATE.a2ml"]),
+ "[metadata]\nlast-updated = \"2026-06-02\"\n"
+ )
+
+ File.write!(
+ Path.join([repo, ".machine_readable", "6a2", "STATE.a2ml"]),
+ "[metadata]\nlast-updated = \"2026-05-11\"\n"
+ )
+
+ findings = StructuralDrift.sd023_state_a2ml_divergence(repo)
+ assert length(findings) == 1
+ assert hd(findings).rule == "SD023"
+ assert hd(findings).top_last_updated == "2026-06-02"
+ assert hd(findings).six_last_updated == "2026-05-11"
+ end
+
+ test "no finding when dates match", %{repo: repo} do
+ File.mkdir_p!(Path.join([repo, ".machine_readable", "6a2"]))
+ File.write!(
+ Path.join([repo, ".machine_readable", "STATE.a2ml"]),
+ "last-updated = \"2026-06-02\""
+ )
+ File.write!(
+ Path.join([repo, ".machine_readable", "6a2", "STATE.a2ml"]),
+ "last-updated = \"2026-06-02\""
+ )
+
+ assert StructuralDrift.sd023_state_a2ml_divergence(repo) == []
+ end
+
+ test "no finding when only one of the two files exists", %{repo: repo} do
+ File.mkdir_p!(Path.join([repo, ".machine_readable"]))
+ File.write!(
+ Path.join([repo, ".machine_readable", "STATE.a2ml"]),
+ "last-updated = \"2026-06-02\""
+ )
+
+ assert StructuralDrift.sd023_state_a2ml_divergence(repo) == []
+ end
+
+ test "matches Scheme-style (last-updated \"...\") variant", %{repo: repo} do
+ File.mkdir_p!(Path.join([repo, ".machine_readable", "6a2"]))
+
+ File.write!(
+ Path.join([repo, ".machine_readable", "STATE.a2ml"]),
+ "(state (metadata (last-updated \"2026-06-02\")))"
+ )
+
+ File.write!(
+ Path.join([repo, ".machine_readable", "6a2", "STATE.a2ml"]),
+ "last-updated = \"2026-05-11\""
+ )
+
+ findings = StructuralDrift.sd023_state_a2ml_divergence(repo)
+ assert length(findings) == 1
+ end
+ end
end