diff --git a/CHANGELOG.md b/CHANGELOG.md index e291b8b..1d7c9cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased - 2026-05-18 +- Added Phase D.1 render parity findings for explicitly rendered audits, including changed title, description, canonical, missing rendered heading/content, and structured-data loss checks. - Added Phase B repository audit mode so the CLI can inspect a source repository and audit either detected static output or an explicit preview server. - Added `detect-repo [path]` to report repository metadata including package manager, framework signal, build command, preview command, static output directory, and discovered route sources. - Added `audit-repo ` with `--static-dir`, `--preview-command`, `--preview-url`, preview startup timeout, crawl limits, security mode, JSON output, and Markdown output support. diff --git a/docs/superpowers/plans/2026-05-20-render-parity-rule-pack.md b/docs/superpowers/plans/2026-05-20-render-parity-rule-pack.md new file mode 100644 index 0000000..83e4fdc --- /dev/null +++ b/docs/superpowers/plans/2026-05-20-render-parity-rule-pack.md @@ -0,0 +1,920 @@ +# Render Parity Rule Pack Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add deterministic raw/rendered parity findings for SEO/GEO-critical page signals when explicit rendered evidence exists. + +**Architecture:** Add a pure `render-parity.mjs` helper that compares raw extracted evidence with rendered extracted evidence and returns normalized parity facts. Integrate those facts into `rule-engine.mjs` so findings use the existing rule registry, implementation task, severity, confidence, and evidence path model. Keep rendering opt-in; this plan does not change audit defaults or install browser dependencies. + +**Tech Stack:** Node.js ESM, `node:test`, existing CLI modules under `packages/cli/src`, existing fixture and validation scripts. + +--- + +## Source Spec + +Approved design: `docs/superpowers/specs/2026-05-20-render-parity-rule-pack-design.md` + +## File Structure + +- Create `packages/cli/src/render-parity.mjs`: pure comparison helper for raw/rendered extracted evidence. +- Create `packages/cli/test/render-parity.test.mjs`: direct helper coverage. +- Modify `packages/cli/src/rules.mjs`: add render parity rule registry entries and reuse existing raw/rendered rules. +- Modify `packages/cli/test/rules.test.mjs`: assert the new rules are registered with expected severities. +- Modify `packages/cli/src/rule-engine.mjs`: consume render parity facts and emit findings. +- Modify `packages/cli/test/rule-engine.test.mjs`: page-level rule coverage for render parity findings and false positives. +- Modify `packages/cli/test/audit.test.mjs`: add end-to-end injected-renderer coverage without Playwright. +- Modify `scripts/validate-skill.mjs`: require the new helper and test files. +- Modify `CHANGELOG.md`: record user-visible Phase D.1 behavior. + +## Task 1: Add Render Parity Helper + +**Files:** +- Create: `packages/cli/src/render-parity.mjs` +- Create: `packages/cli/test/render-parity.test.mjs` + +- [ ] **Step 1: Write failing helper tests** + +Create `packages/cli/test/render-parity.test.mjs`: + +```js +import test from "node:test"; +import assert from "node:assert/strict"; +import { renderParityFacts } from "../src/render-parity.mjs"; + +const snapshot = ({ raw = {}, rendered = {}, renderStatus = "rendered" } = {}) => ({ + finalUrl: "https://example.com/page", + evidence: { + title: "Raw Title", + description: "Raw description", + canonical: "https://example.com/page", + h1: ["Raw H1"], + structuredData: [], + schemaTypes: [], + counts: { visibleTextCharacters: 500 }, + ...raw, + }, + render: { + status: renderStatus, + evidence: + renderStatus === "rendered" + ? { + title: "Raw Title", + description: "Raw description", + canonical: "https://example.com/page", + h1: ["Raw H1"], + structuredData: [], + schemaTypes: [], + counts: { visibleTextCharacters: 500 }, + ...rendered, + } + : undefined, + }, +}); + +test("returns no parity facts without rendered evidence", () => { + assert.deepEqual(renderParityFacts(snapshot({ renderStatus: "not_requested" }), 0), []); +}); + +test("detects rendered title changes", () => { + const facts = renderParityFacts(snapshot({ rendered: { title: "Client Title" } }), 2); + assert.deepEqual(facts.map((fact) => fact.ruleId), ["technical.rendered_title_changed"]); + assert.deepEqual(facts[0].evidence, ["$.pages[2].evidence.title", "$.pages[2].render.evidence.title"]); + assert.match(facts[0].impact, /Rendered title changed/); +}); + +test("detects rendered description removal", () => { + const facts = renderParityFacts(snapshot({ rendered: { description: "" } }), 0); + assert.deepEqual(facts.map((fact) => fact.ruleId), ["technical.rendered_description_changed"]); + assert.deepEqual(facts[0].evidence, ["$.pages[0].evidence.description", "$.pages[0].render.evidence.description"]); +}); + +test("detects rendered canonical changes with URL normalization", () => { + const facts = renderParityFacts( + snapshot({ + raw: { canonical: "https://example.com/page/" }, + rendered: { canonical: "https://example.com/other" }, + }), + 0, + ); + assert.deepEqual(facts.map((fact) => fact.ruleId), ["technical.rendered_canonical_changed"]); +}); + +test("does not flag equivalent canonical URL formatting", () => { + const facts = renderParityFacts( + snapshot({ + raw: { canonical: "https://example.com/page/" }, + rendered: { canonical: "https://example.com/page" }, + }), + 0, + ); + assert.deepEqual(facts, []); +}); + +test("detects rendered primary heading removal", () => { + const facts = renderParityFacts(snapshot({ rendered: { h1: [] } }), 0); + assert.deepEqual(facts.map((fact) => fact.ruleId), ["technical.rendered_primary_heading_missing"]); + assert.deepEqual(facts[0].evidence, ["$.pages[0].evidence.h1", "$.pages[0].render.evidence.h1"]); +}); + +test("detects rendered structured data type loss", () => { + const facts = renderParityFacts( + snapshot({ + raw: { + structuredData: [{ data: { "@context": "https://schema.org", "@type": "Organization", name: "Example" } }], + schemaTypes: ["Organization"], + }, + rendered: { structuredData: [], schemaTypes: [] }, + }), + 0, + ); + assert.deepEqual(facts.map((fact) => fact.ruleId), ["technical.rendered_structured_data_lost"]); + assert.match(facts[0].impact, /Organization/); +}); + +test("ignores invalid raw structured data when checking structured data loss", () => { + const facts = renderParityFacts( + snapshot({ + raw: { structuredData: [{ parseError: true, rawPreview: "{bad json" }], schemaTypes: [] }, + rendered: { structuredData: [], schemaTypes: [] }, + }), + 0, + ); + assert.deepEqual(facts, []); +}); + +test("detects rendered content missing before broad text mismatch", () => { + const facts = renderParityFacts( + snapshot({ + raw: { counts: { visibleTextCharacters: 500 } }, + rendered: { counts: { visibleTextCharacters: 100 } }, + }), + 0, + ); + assert.deepEqual(facts.map((fact) => fact.ruleId), ["technical.rendered_content_missing"]); +}); + +test("detects large raw and rendered visible text deltas", () => { + const facts = renderParityFacts( + snapshot({ + raw: { counts: { visibleTextCharacters: 900 } }, + rendered: { counts: { visibleTextCharacters: 500 } }, + }), + 0, + ); + assert.deepEqual(facts.map((fact) => fact.ruleId), ["technical.raw_rendered_mismatch"]); +}); + +test("returns no facts when raw and rendered evidence match", () => { + assert.deepEqual(renderParityFacts(snapshot(), 0), []); +}); +``` + +- [ ] **Step 2: Run helper tests to verify failure** + +Run: + +```bash +node --test packages/cli/test/render-parity.test.mjs +``` + +Expected: FAIL with an import/module-not-found error for `../src/render-parity.mjs`. + +- [ ] **Step 3: Implement the render parity helper** + +Create `packages/cli/src/render-parity.mjs`: + +```js +import { normalizeUrl } from "./url-utils.mjs"; + +const cleanText = (value) => + String(value ?? "") + .replace(/\s+/g, " ") + .trim(); + +const comparableText = (value) => cleanText(value).toLowerCase(); + +const shortValue = (value) => { + const cleaned = cleanText(value); + if (!cleaned) return "missing"; + return cleaned.length > 80 ? `${cleaned.slice(0, 77)}...` : cleaned; +}; + +const evidencePath = (pageIndex, path, rendered = false) => + rendered ? `$.pages[${pageIndex}].render.evidence.${path}` : `$.pages[${pageIndex}].evidence.${path}`; + +const compareUrl = (value) => { + const cleaned = cleanText(value); + if (!cleaned) return ""; + try { + return normalizeUrl(cleaned); + } catch { + return cleaned.toLowerCase(); + } +}; + +const structuredDataTypes = (value) => { + if (!value) return []; + if (Array.isArray(value)) return value.flatMap(structuredDataTypes); + if (typeof value !== "object") return []; + + const types = []; + if (value["@type"]) { + if (Array.isArray(value["@type"])) types.push(...value["@type"].map(String)); + else types.push(String(value["@type"])); + } + if (value["@graph"]) types.push(...structuredDataTypes(value["@graph"])); + return types; +}; + +const validStructuredData = (evidence) => (evidence.structuredData || []).filter((item) => item?.data && !item.parseError); + +const schemaTypesFor = (evidence) => { + const explicit = Array.isArray(evidence.schemaTypes) ? evidence.schemaTypes.map(String) : []; + const fromBlocks = validStructuredData(evidence).flatMap((item) => structuredDataTypes(item.data)); + return [...new Set([...explicit, ...fromBlocks].filter(Boolean))]; +}; + +const visibleTextCharacters = (evidence) => Number(evidence.counts?.visibleTextCharacters || 0); + +const fact = (ruleId, pageIndex, path, impact) => ({ + ruleId, + evidence: [evidencePath(pageIndex, path), evidencePath(pageIndex, path, true)], + impact, +}); + +export const renderParityFacts = (snapshot, pageIndex = 0) => { + if (snapshot?.render?.status !== "rendered" || !snapshot.render.evidence) return []; + + const raw = snapshot.evidence || {}; + const rendered = snapshot.render.evidence || {}; + const facts = []; + + if (cleanText(raw.title) && comparableText(raw.title) !== comparableText(rendered.title)) { + facts.push( + fact( + "technical.rendered_title_changed", + pageIndex, + "title", + `Rendered title changed from "${shortValue(raw.title)}" to "${shortValue(rendered.title)}".`, + ), + ); + } + + if (cleanText(raw.description) && comparableText(raw.description) !== comparableText(rendered.description)) { + facts.push( + fact( + "technical.rendered_description_changed", + pageIndex, + "description", + `Rendered meta description changed from "${shortValue(raw.description)}" to "${shortValue(rendered.description)}".`, + ), + ); + } + + if (cleanText(raw.canonical) && compareUrl(raw.canonical) !== compareUrl(rendered.canonical)) { + facts.push( + fact( + "technical.rendered_canonical_changed", + pageIndex, + "canonical", + `Rendered canonical changed from "${shortValue(raw.canonical)}" to "${shortValue(rendered.canonical)}".`, + ), + ); + } + + const rawH1 = Array.isArray(raw.h1) ? raw.h1.filter((item) => cleanText(item)) : []; + const renderedH1 = Array.isArray(rendered.h1) ? rendered.h1.filter((item) => cleanText(item)) : []; + if (rawH1.length > 0 && renderedH1.length === 0) { + facts.push( + fact( + "technical.rendered_primary_heading_missing", + pageIndex, + "h1", + "Rendered HTML removed the primary H1 that exists in raw HTML.", + ), + ); + } + + const rawStructuredData = validStructuredData(raw); + const renderedStructuredData = validStructuredData(rendered); + const rawSchemaTypes = schemaTypesFor(raw); + const renderedSchemaTypes = new Set(schemaTypesFor(rendered)); + const lostSchemaTypes = rawSchemaTypes.filter((type) => !renderedSchemaTypes.has(type)); + if (rawStructuredData.length > 0 && (renderedStructuredData.length < rawStructuredData.length || lostSchemaTypes.length > 0)) { + const lostTypeText = lostSchemaTypes.length ? ` Lost schema types: ${lostSchemaTypes.join(", ")}.` : ""; + facts.push( + fact( + "technical.rendered_structured_data_lost", + pageIndex, + "structuredData", + `Rendered HTML lost structured data that exists in raw HTML.${lostTypeText}`, + ), + ); + } + + const rawTextLength = visibleTextCharacters(raw); + const renderedTextLength = visibleTextCharacters(rendered); + const textDelta = renderedTextLength - rawTextLength; + if (rawTextLength >= 300 && renderedTextLength < 150) { + facts.push({ + ruleId: "technical.rendered_content_missing", + evidence: [evidencePath(pageIndex, "counts.visibleTextCharacters"), evidencePath(pageIndex, "counts.visibleTextCharacters", true)], + impact: `Rendered HTML has ${renderedTextLength} visible text characters; raw HTML has ${rawTextLength}.`, + }); + } else if (Math.abs(textDelta) > 300) { + facts.push({ + ruleId: "technical.raw_rendered_mismatch", + evidence: [evidencePath(pageIndex, "counts.visibleTextCharacters"), evidencePath(pageIndex, "counts.visibleTextCharacters", true)], + impact: `Rendered visible text differs from raw HTML by ${Math.abs(textDelta)} characters.`, + }); + } + + return facts; +}; +``` + +- [ ] **Step 4: Run helper tests to verify pass** + +Run: + +```bash +node --test packages/cli/test/render-parity.test.mjs +``` + +Expected: PASS, 11 tests. + +- [ ] **Step 5: Commit helper** + +Run: + +```bash +git add packages/cli/src/render-parity.mjs packages/cli/test/render-parity.test.mjs +git commit -m "feat: add render parity helper" +``` + +## Task 2: Register Render Parity Rules + +**Files:** +- Modify: `packages/cli/src/rules.mjs` +- Modify: `packages/cli/test/rules.test.mjs` + +- [ ] **Step 1: Write failing rule registry tests** + +Append this test to `packages/cli/test/rules.test.mjs`: + +```js +test("defines render parity rule metadata", () => { + const expected = { + "technical.rendered_title_changed": "P2", + "technical.rendered_description_changed": "P3", + "technical.rendered_canonical_changed": "P1", + "technical.rendered_primary_heading_missing": "P1", + "technical.rendered_structured_data_lost": "P2", + "technical.rendered_content_missing": "P1", + "technical.raw_rendered_mismatch": "P2", + }; + + for (const [ruleId, severity] of Object.entries(expected)) { + const item = getRule(ruleId); + assert.ok(item, `${ruleId} is registered`); + assert.equal(item.dimension, "technical"); + assert.equal(item.defaultSeverity, severity); + assert.ok(item.recommendation); + assert.ok(item.sources.length > 0); + } +}); +``` + +- [ ] **Step 2: Run registry test to verify failure** + +Run: + +```bash +node --test packages/cli/test/rules.test.mjs +``` + +Expected: FAIL because at least `technical.rendered_title_changed` is not registered. + +- [ ] **Step 3: Add render parity rules** + +In `packages/cli/src/rules.mjs`, add these rule entries after `technical.raw_rendered_mismatch`: + +```js + rule( + "technical.rendered_title_changed", + "technical", + "P2", + "Rendered HTML changes the page title", + "Keep rendered title output aligned with raw HTML so crawlers and users receive a stable title signal.", + [sources.javascriptSeo, sources.titleLinks], + ), + rule( + "technical.rendered_description_changed", + "technical", + "P3", + "Rendered HTML changes the meta description", + "Keep rendered meta description output aligned with raw HTML when the page relies on it for snippet context.", + [sources.javascriptSeo, sources.snippets], + ), + rule( + "technical.rendered_canonical_changed", + "technical", + "P1", + "Rendered HTML changes the canonical URL", + "Keep canonical URL output stable between raw and rendered HTML so indexing signals remain consistent.", + [sources.javascriptSeo, sources.canonicalization], + ), + rule( + "technical.rendered_primary_heading_missing", + "technical", + "P1", + "Rendered HTML removes the primary heading", + "Ensure the rendered page preserves the primary heading that communicates page purpose.", + [sources.javascriptSeo, sources.helpfulContent], + ), + rule( + "technical.rendered_structured_data_lost", + "technical", + "P2", + "Rendered HTML removes structured data", + "Preserve structured data through rendering and hydration so eligible markup remains available.", + [sources.javascriptSeo, sources.structuredDataIntro, sources.structuredDataPolicies], + ), +``` + +Do not add a second `technical.rendered_content_missing` or `technical.raw_rendered_mismatch`; those already exist. + +- [ ] **Step 4: Run registry tests to verify pass** + +Run: + +```bash +node --test packages/cli/test/rules.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 5: Verify `explain-rule` for a new rule** + +Run: + +```bash +node packages/cli/src/cli.mjs explain-rule technical.rendered_canonical_changed +``` + +Expected: JSON output with `"id": "technical.rendered_canonical_changed"` and at least one Google Search Central source URL. + +- [ ] **Step 6: Commit rule registry** + +Run: + +```bash +git add packages/cli/src/rules.mjs packages/cli/test/rules.test.mjs +git commit -m "feat: register render parity rules" +``` + +## Task 3: Integrate Render Parity Into Page Findings + +**Files:** +- Modify: `packages/cli/src/rule-engine.mjs` +- Modify: `packages/cli/test/rule-engine.test.mjs` + +- [ ] **Step 1: Write failing page-level rule tests** + +Append these tests to `packages/cli/test/rule-engine.test.mjs`: + +```js +const renderedSnapshotFor = (rawHtml, renderedHtml, overrides = {}) => { + const base = snapshotFor(rawHtml, overrides); + return { + ...base, + render: { + status: "rendered", + renderedHash: "rendered", + evidence: extractHtmlEvidence(renderedHtml, base.finalUrl), + textDeltaCharacters: + extractHtmlEvidence(renderedHtml, base.finalUrl).counts.visibleTextCharacters - + base.evidence.counts.visibleTextCharacters, + }, + }; +}; + +test("flags rendered metadata and canonical changes", () => { + const rawHtml = ` + + + Raw Product Title + + + +

Product

${"Useful product content ".repeat(40)}

+ + `; + const renderedHtml = ` + + + Client Product Title + + + +

Product

${"Useful product content ".repeat(40)}

+ + `; + + const findings = evaluatePage(renderedSnapshotFor(rawHtml, renderedHtml)); + const ids = findings.map((finding) => finding.ruleId); + assert.ok(ids.includes("technical.rendered_title_changed")); + assert.ok(ids.includes("technical.rendered_description_changed")); + assert.ok(ids.includes("technical.rendered_canonical_changed")); + + const canonical = findings.find((finding) => finding.ruleId === "technical.rendered_canonical_changed"); + assert.equal(canonical.severity, "P1"); + assert.equal(canonical.owner, "Engineering"); + assert.deepEqual(canonical.evidence, ["$.pages[0].evidence.canonical", "$.pages[0].render.evidence.canonical"]); +}); + +test("flags rendered primary heading and structured data loss", () => { + const rawHtml = ` + + + Organization + + + +

Example Org

${"Useful organization content ".repeat(40)}

+ + `; + const renderedHtml = ` + + + Organization + + +

${"Useful organization content ".repeat(40)}

+ + `; + + const findings = evaluatePage(renderedSnapshotFor(rawHtml, renderedHtml)); + const ids = findings.map((finding) => finding.ruleId); + assert.ok(ids.includes("technical.rendered_primary_heading_missing")); + assert.ok(ids.includes("technical.rendered_structured_data_lost")); + + const structuredData = findings.find((finding) => finding.ruleId === "technical.rendered_structured_data_lost"); + assert.deepEqual(structuredData.evidence, ["$.pages[0].evidence.structuredData", "$.pages[0].render.evidence.structuredData"]); +}); + +test("flags rendered content missing without duplicate raw rendered mismatch", () => { + const rawHtml = ` + + Useful Page +

Useful Page

${"Useful content ".repeat(80)}

+ + `; + const renderedHtml = ` + + Useful Page +

Useful Page

Loading.

+ + `; + + const findings = evaluatePage(renderedSnapshotFor(rawHtml, renderedHtml)); + const ids = findings.map((finding) => finding.ruleId); + assert.ok(ids.includes("technical.rendered_content_missing")); + assert.equal(ids.includes("technical.raw_rendered_mismatch"), false); +}); + +test("flags broad raw rendered mismatch when rendered content is still substantial", () => { + const rawHtml = ` + + Useful Page +

Useful Page

${"Raw content ".repeat(100)}

+ + `; + const renderedHtml = ` + + Useful Page +

Useful Page

${"Rendered content ".repeat(40)}

+ + `; + + const findings = evaluatePage(renderedSnapshotFor(rawHtml, renderedHtml)); + assert.ok(findings.map((finding) => finding.ruleId).includes("technical.raw_rendered_mismatch")); +}); + +test("does not emit render parity findings without rendered evidence or when rendered evidence matches", () => { + const html = ` + + + Stable Page + + + +

Stable Page

${"Stable content ".repeat(40)}

+ + `; + + const withoutRender = evaluatePage(snapshotFor(html)); + const withMatchingRender = evaluatePage(renderedSnapshotFor(html, html)); + const parityIds = [ + "technical.rendered_title_changed", + "technical.rendered_description_changed", + "technical.rendered_canonical_changed", + "technical.rendered_primary_heading_missing", + "technical.rendered_structured_data_lost", + "technical.rendered_content_missing", + "technical.raw_rendered_mismatch", + ]; + + for (const id of parityIds) { + assert.equal(withoutRender.some((finding) => finding.ruleId === id), false); + assert.equal(withMatchingRender.some((finding) => finding.ruleId === id), false); + } +}); +``` + +- [ ] **Step 2: Run page-level tests to verify failure** + +Run: + +```bash +node --test packages/cli/test/rule-engine.test.mjs +``` + +Expected: FAIL because render parity facts are not consumed by `evaluatePage`. + +- [ ] **Step 3: Integrate render parity facts** + +In `packages/cli/src/rule-engine.mjs`, add this import: + +```js +import { renderParityFacts } from "./render-parity.mjs"; +``` + +Remove the existing block that directly checks `snapshot.render?.status === "rendered" && snapshot.render.textDeltaCharacters > 300`. + +Add this block in the same area where the old raw/rendered mismatch check lived, after canonical checks and before indexability directives: + +```js + for (const parityFact of renderParityFacts(snapshot, pageIndex)) { + findings.push(createFinding(parityFact.ruleId, snapshot, parityFact.evidence, pageIndex, parityFact.impact)); + } +``` + +- [ ] **Step 4: Run page-level tests to verify pass** + +Run: + +```bash +node --test packages/cli/test/rule-engine.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 5: Run focused render helper and rule tests** + +Run: + +```bash +node --test packages/cli/test/render-parity.test.mjs packages/cli/test/rule-engine.test.mjs packages/cli/test/rules.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 6: Commit rule-engine integration** + +Run: + +```bash +git add packages/cli/src/rule-engine.mjs packages/cli/test/rule-engine.test.mjs +git commit -m "feat: emit render parity findings" +``` + +## Task 4: Add End-To-End Injected Renderer Coverage + +**Files:** +- Modify: `packages/cli/test/audit.test.mjs` + +- [ ] **Step 1: Write audit integration regression test** + +Append this test to `packages/cli/test/audit.test.mjs`: + +```js +test("includes render parity findings when an injected renderer changes SEO signals", async () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "geo-seo-render-parity-")); + const html = path.join(dir, "index.html"); + fs.writeFileSync( + html, + ` + + + Raw Render Parity Title + + + + + +

Render Parity Org

+

${"Useful render parity content ".repeat(60)}

+ + + `, + ); + + const audit = await runAudit({ + target: html, + renderer: async () => ` + + + Client Render Parity Title + + + + +

Loading.

+ + + `, + }); + + assert.equal(audit.pages[0].render.status, "rendered"); + const ids = audit.findings.map((finding) => finding.ruleId); + assert.ok(ids.includes("technical.rendered_title_changed")); + assert.ok(ids.includes("technical.rendered_description_changed")); + assert.ok(ids.includes("technical.rendered_canonical_changed")); + assert.ok(ids.includes("technical.rendered_primary_heading_missing")); + assert.ok(ids.includes("technical.rendered_structured_data_lost")); + assert.ok(ids.includes("technical.rendered_content_missing")); +}); +``` + +- [ ] **Step 2: Run audit test to verify pass through the public audit path** + +Run: + +```bash +node --test packages/cli/test/audit.test.mjs +``` + +Expected: PASS because Task 3 already made the rule behavior available and this test proves it through `runAudit`. + +- [ ] **Step 3: Run focused render parity tests with audit coverage** + +Run: + +```bash +node --test packages/cli/test/render-parity.test.mjs packages/cli/test/rule-engine.test.mjs packages/cli/test/audit.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 4: Commit audit coverage** + +Run: + +```bash +git add packages/cli/test/audit.test.mjs +git commit -m "test: cover render parity audit output" +``` + +## Task 5: Update Validation And User-Facing Docs + +**Files:** +- Modify: `scripts/validate-skill.mjs` +- Modify: `CHANGELOG.md` + +- [ ] **Step 1: Add validation coverage for new files** + +In `scripts/validate-skill.mjs`, add these entries to `requiredFiles` near the related source and test files: + +```js + "packages/cli/src/render-parity.mjs", +``` + +and: + +```js + "packages/cli/test/render-parity.test.mjs", +``` + +- [ ] **Step 2: Add changelog entry** + +In `CHANGELOG.md`, under `## Unreleased - 2026-05-18`, add: + +```md +- Added Phase D.1 render parity findings for explicitly rendered audits, including changed title, description, canonical, missing rendered heading/content, and structured-data loss checks. +``` + +- [ ] **Step 3: Run validation** + +Run: + +```bash +npm run validate +``` + +Expected: PASS with `"ok": true` and `requiredFiles` increased by 2 compared with the current baseline. + +- [ ] **Step 4: Run changelog grep** + +Run: + +```bash +rg -n "Phase D.1 render parity|render-parity.mjs|render-parity.test.mjs" CHANGELOG.md scripts/validate-skill.mjs +``` + +Expected: output includes the changelog entry and both validation file entries. + +- [ ] **Step 5: Commit validation and docs** + +Run: + +```bash +git add scripts/validate-skill.mjs CHANGELOG.md +git commit -m "docs: record render parity rule pack" +``` + +## Task 6: Final Verification And Review + +**Files:** +- Read-only verification across the repository. + +- [ ] **Step 1: Run focused test suite** + +Run: + +```bash +node --test packages/cli/test/render-parity.test.mjs packages/cli/test/rules.test.mjs packages/cli/test/rule-engine.test.mjs packages/cli/test/audit.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 2: Run full test suite** + +Run: + +```bash +npm test +``` + +Expected: PASS with zero failures. + +- [ ] **Step 3: Run skill validation** + +Run: + +```bash +npm run validate +``` + +Expected: PASS with `"ok": true`. + +- [ ] **Step 4: Run whitespace diff check** + +Run: + +```bash +git diff --check +``` + +Expected: no output and exit code 0. + +- [ ] **Step 5: Inspect working tree** + +Run: + +```bash +git status --short --branch +``` + +Expected: clean working tree on the implementation branch. + +- [ ] **Step 6: Request final code review** + +Use `superpowers:requesting-code-review` after verification passes. The review request should ask for: + +- render parity helper correctness +- false-positive risk +- evidence path stability +- no default rendering behavior changes +- no ranking overclaims +- validation coverage + +- [ ] **Step 7: Fix review findings** + +If review returns findings, implement only the requested fixes, rerun focused tests plus the relevant full verification command, and commit with a focused message. + +- [ ] **Step 8: Finish branch** + +Use `superpowers:finishing-a-development-branch` after review and verification pass. Present merge or PR options to the user. + +## Completion Checklist + +- [ ] `packages/cli/src/render-parity.mjs` exists and is pure. +- [ ] `packages/cli/test/render-parity.test.mjs` covers helper behavior and false positives. +- [ ] New render parity rule IDs are registered with expected severities. +- [ ] `evaluatePage` emits render parity findings only when rendered evidence exists. +- [ ] `technical.rendered_content_missing` suppresses duplicate broad `technical.raw_rendered_mismatch` for near-total content loss. +- [ ] Injected renderer audit coverage proves end-to-end behavior without Playwright. +- [ ] `scripts/validate-skill.mjs` protects new source and test files. +- [ ] `CHANGELOG.md` records Phase D.1 without claiming measured ranking impact. +- [ ] `npm test`, `npm run validate`, and `git diff --check` pass. diff --git a/docs/superpowers/specs/2026-05-20-render-parity-rule-pack-design.md b/docs/superpowers/specs/2026-05-20-render-parity-rule-pack-design.md new file mode 100644 index 0000000..3ee4a98 --- /dev/null +++ b/docs/superpowers/specs/2026-05-20-render-parity-rule-pack-design.md @@ -0,0 +1,295 @@ +# Render Parity Rule Pack Design + +Date: 2026-05-20 +Repository: openclaw-geo-seo-audit-skill +Status: Approved design direction; awaiting user review before implementation planning + +## Purpose + +This spec defines Phase D.1 of the deterministic GEO/SEO audit PRD: a render parity rule pack for developer repo audits. + +The product direction remains a deterministic audit CLI plus OpenClaw skill wrapper. The CLI is the source of evidence. The skill wrapper interprets CLI evidence, explains priorities, cites sources, and avoids inventing findings. This phase improves repo-audit depth by detecting when raw or generated HTML differs from rendered HTML in ways that create SEO/GEO readiness risk. + +## User-Approved Direction + +The approved direction is: + +- Focus Phase D on developer repo audit depth. +- Use an output-first approach with repo context. +- Center the first rule cluster on raw/rendered parity and primary-content risk. +- Evaluate these rules only when render evidence already exists through explicit `--render auto|always` or an injected renderer. +- Add a small set of specific rules rather than one broad finding or a large rule explosion. +- Use a focused rule pack with a small helper module so comparison logic stays testable and does not bloat `rule-engine.mjs`. + +## Current Baseline + +Already implemented on `main`: + +- `snapshot.mjs` can collect raw evidence and optional rendered evidence. +- Rendered evidence is stored under `snapshot.render` when rendering is requested and succeeds. +- `technical.raw_rendered_mismatch` exists, but currently only checks a large visible-text character delta. +- `technical.rendered_content_missing` exists in the rule registry but is not yet meaningfully triggered. +- Repo audits support explicit static-output and preview-server workflows. +- Next.js and Astro fixture repos exist for deterministic repo audit coverage. +- Findings already include rule IDs, severity, dimension, evidence paths, implementation tasks, confidence, sources, and page indexes. + +## Goals + +Add deterministic page-level findings that help developers identify SEO/GEO risks caused by JavaScript rendering, hydration, or client-only mutations. + +The rule pack should identify: + +- Rendered title changes or disappearance. +- Rendered meta description changes or disappearance. +- Rendered canonical URL changes or disappearance. +- Rendered primary heading disappearance. +- Rendered structured-data count or type loss. +- Material visible-text deltas with better evidence than the current broad check. + +The findings should help answer: "If I audit this repo output before deployment, will crawlers and users see the same primary SEO/GEO signals after rendering?" + +## Non-Goals + +This phase will not: + +- Enable rendering by default. +- Install Playwright or browser dependencies automatically. +- Parse framework source files deeply. +- Infer framework metadata exports from React, Next.js, Astro, or other source code. +- Add a new audit profile such as `--profile repo-depth`. +- Claim measured ranking impact. +- Replace external evidence integrations for Search Console, SERP visibility, AI-answer visibility, or Lighthouse execution. +- Change the output schema shape in a breaking way. + +## Architecture + +### Render Parity Helper + +Add a focused helper module at `packages/cli/src/render-parity.mjs`. + +Responsibilities: + +- Accept a snapshot or raw/rendered evidence pair. +- Return normalized parity facts for values already extracted by `html-extract.mjs`. +- Compare raw and rendered values for: + - title + - meta description + - canonical + - H1 values + - visible text character counts + - structured data block count + - structured data schema types +- Produce stable evidence path metadata for each detected difference. +- Avoid rule severity decisions; the rule engine owns finding creation. + +The helper should be pure and unit-testable. It should not read files, run browsers, inspect frameworks, or mutate snapshots. + +### Rule Engine Integration + +`packages/cli/src/rule-engine.mjs` should call the helper only when: + +- `snapshot.render?.status === "rendered"` +- `snapshot.render.evidence` exists + +If rendering was not requested, failed, or was unavailable, no render parity rules should fire. Evidence gaps for unavailable rendering are out of scope for this phase because the user explicitly chose explicit render evidence only. + +### Rule Registry + +Add or activate a small rule set in `packages/cli/src/rules.mjs`. + +Recommended rule IDs: + +- `technical.rendered_title_changed` +- `technical.rendered_description_changed` +- `technical.rendered_canonical_changed` +- `technical.rendered_primary_heading_missing` +- `technical.rendered_structured_data_lost` +- `technical.raw_rendered_mismatch` +- `technical.rendered_content_missing` + +`technical.raw_rendered_mismatch` already exists and should be enriched rather than duplicated. `technical.rendered_content_missing` already exists and should become meaningful when raw HTML has primary content but rendered evidence loses it. + +Rule severities: + +- `technical.rendered_canonical_changed`: `P1` +- `technical.rendered_primary_heading_missing`: `P1` +- `technical.rendered_content_missing`: existing `P1` +- `technical.rendered_structured_data_lost`: `P2` +- `technical.rendered_title_changed`: `P2` +- `technical.rendered_description_changed`: `P3` +- `technical.raw_rendered_mismatch`: existing `P2` + +Severity should remain conservative. The rule text should describe eligibility and readiness risk, not ranking loss. + +## Finding Behavior + +Each finding should include: + +- Stable rule ID. +- Existing severity/dimension model. +- Affected URL. +- Raw evidence path. +- Rendered evidence path. +- Impact text that names the specific changed signal. +- Existing implementation task format. +- Engineering owner. +- Confidence of `high` when raw and rendered evidence directly contradict each other. +- Existing Google Search Central citations through the rule registry. + +Evidence should be path-oriented by default, for example: + +- `$.pages[0].evidence.title` +- `$.pages[0].render.evidence.title` +- `$.pages[0].evidence.canonical` +- `$.pages[0].render.evidence.canonical` +- `$.pages[0].evidence.structuredData` +- `$.pages[0].render.evidence.structuredData` + +Avoid embedding long page text in findings. If snippets are added, cap them tightly and normalize whitespace. + +## Rule Trigger Details + +### Rendered Title Changed + +Trigger when raw title exists and rendered title is missing or materially different after trimming and whitespace normalization. + +Do not trigger when both titles are absent; existing missing-title logic handles raw missing titles. + +### Rendered Description Changed + +Trigger when raw meta description exists and rendered description is missing or materially different after normalization. + +This should be lower severity than title or canonical changes because descriptions influence snippet eligibility but are not guaranteed snippets. + +### Rendered Canonical Changed + +Trigger when raw canonical exists and rendered canonical is missing or normalizes to a different URL. + +This should use URL normalization where possible. If URL normalization fails, compare normalized strings. + +### Rendered Primary Heading Missing + +Trigger when raw H1 evidence exists and rendered H1 evidence is empty. + +Do not trigger for H1 copy changes unless the rendered H1 becomes empty. Fine-grained heading-copy changes can be a later Phase D.2 if needed. + +### Rendered Structured Data Lost + +Trigger when raw JSON-LD structured data exists and rendered evidence has fewer valid structured-data blocks or loses schema types that existed in raw evidence. + +Do not trigger when raw structured data is invalid; `structured_data.invalid_jsonld` already handles parse failure. + +### Rendered Content Missing + +Use `technical.rendered_content_missing` when raw visible text indicates a meaningful page but rendered visible text drops below a low threshold. + +Trigger: + +- raw visible text characters >= 300 +- rendered visible text characters < 150 + +### Raw/Rendered Mismatch + +Keep `technical.raw_rendered_mismatch` as a broader P2 signal for large text deltas when content is not nearly missing. + +Trigger: + +- absolute visible-text delta > 300 characters +- and rendered content is not already below the `rendered_content_missing` threshold + +This prevents duplicate broad findings when the stronger content-missing rule already explains the risk. + +## Testing Strategy + +### Unit Tests + +Add direct tests for the render parity helper: + +- no render evidence returns no parity facts +- title changed +- description removed +- canonical changed with URL normalization +- primary heading removed +- structured data type lost +- visible text nearly missing +- large text delta without near-total loss +- unchanged raw/rendered evidence returns no facts + +### Rule Tests + +Add `rule-engine` tests for each new or activated finding: + +- one triggering snapshot +- one false-positive guard where raw and rendered evidence match +- evidence paths include both raw and rendered paths +- severity and owner match the registry/default model + +### Fixture Tests + +At least one repo or static fixture should exercise render parity through an injected renderer so tests do not require Playwright. + +The fixture should make raw/generated HTML contain SEO/GEO-critical signals and have the injected renderer remove or alter them. This keeps CI deterministic and avoids browser installation assumptions. + +### Golden Outputs + +Update golden summaries only where intentional. Prefer focused rule tests over large golden churn. + +### Validation + +`npm test` and `npm run validate` must pass. If new files become required for the skill package or test coverage, update `scripts/validate-skill.mjs`. + +## Reporting And Skill Impact + +No new report section is required in this phase. Existing Markdown finding output should display the new rule findings. + +The skill wrapper should not need behavioral changes for Phase D.1 unless implementation changes finding wording or evidence gaps. If skill language is touched, it should reinforce: + +- render parity findings are readiness risks +- they are based on explicit rendered evidence +- they do not claim measured rankings + +## Risks And Mitigations + +### Risk: Noisy render differences + +Mitigation: normalize whitespace, compare stable extracted fields, use conservative thresholds, and avoid firing on insignificant copy variation. + +### Risk: Rendering becomes unexpectedly expensive + +Mitigation: do not change default rendering behavior. Rules only evaluate when render evidence already exists. + +### Risk: Duplicate findings + +Mitigation: make `technical.rendered_content_missing` suppress the broad `technical.raw_rendered_mismatch` for near-total content loss. + +### Risk: Framework-specific overreach + +Mitigation: keep this phase output-first. Do not parse source files or infer framework metadata conventions. + +### Risk: Ranking overclaims + +Mitigation: keep wording about crawl/render consistency, search eligibility, and readiness. Do not claim ranking loss. + +## Acceptance Criteria + +- Render parity helper exists and has direct unit tests. +- Render parity findings fire only when rendered evidence exists. +- New or activated rule IDs are present in `rules.mjs` and work with `explain-rule`. +- Page-level rule tests cover each new or activated finding and at least one false-positive guard. +- Evidence paths point to both raw and rendered values. +- No default rendering behavior changes. +- Existing repo-audit, route-list, framework-manifest, and report behavior remains compatible. +- `npm test`, `npm run validate`, and `git diff --check` pass before implementation is considered complete. + +## Implementation Planning Notes + +The next planning step should produce a focused implementation plan for this Phase D.1 spec only. + +Recommended task slices: + +1. Add render parity helper and tests. +2. Add or activate rule registry entries. +3. Integrate parity facts into `rule-engine.mjs`. +4. Add fixture/injected-renderer coverage. +5. Update documentation/changelog only for user-visible behavior. +6. Run full verification and request review before merge. diff --git a/packages/cli/src/render-parity.mjs b/packages/cli/src/render-parity.mjs new file mode 100644 index 0000000..25ff340 --- /dev/null +++ b/packages/cli/src/render-parity.mjs @@ -0,0 +1,149 @@ +import { normalizeUrl } from "./url-utils.mjs"; + +const pageEvidencePath = (pageIndex, path) => `$.pages[${pageIndex}].evidence.${path}`; +const renderEvidencePath = (pageIndex, path) => `$.pages[${pageIndex}].render.evidence.${path}`; + +const cleanText = (value) => + String(value ?? "") + .replace(/\s+/g, " ") + .trim(); + +const cleanTextFolded = (value) => cleanText(value).toLowerCase(); + +const normalizeCanonical = (value) => { + const cleaned = cleanText(value); + if (!cleaned) return ""; + + try { + return normalizeUrl(cleaned); + } catch { + return cleaned.toLowerCase(); + } +}; + +const firstNormalized = (values) => { + if (!Array.isArray(values)) return ""; + return cleanText(values[0]); +}; + +const structuredDataTypes = (value) => { + if (!value) return []; + if (Array.isArray(value)) return value.flatMap(structuredDataTypes); + if (typeof value !== "object") return []; + + const types = []; + if (value["@type"]) { + if (Array.isArray(value["@type"])) types.push(...value["@type"].map(String)); + else types.push(String(value["@type"])); + } + if (value["@graph"]) types.push(...structuredDataTypes(value["@graph"])); + return types; +}; + +const validStructuredDataBlocks = (evidence) => + (Array.isArray(evidence?.structuredData) ? evidence.structuredData : []).filter((block) => !block?.parseError); + +const schemaTypesFor = (evidence) => { + const explicitTypes = Array.isArray(evidence?.schemaTypes) ? evidence.schemaTypes.map(String) : []; + const blockTypes = validStructuredDataBlocks(evidence).flatMap((block) => structuredDataTypes(block?.data)); + return [...new Set([...explicitTypes, ...blockTypes].map(cleanText).filter(Boolean))]; +}; + +const visibleTextCharacters = (evidence) => { + const value = evidence?.counts?.visibleTextCharacters; + return Number.isFinite(value) ? value : 0; +}; + +const makeFact = (ruleId, pageIndex, paths, impact) => ({ + ruleId, + evidence: paths.flatMap((path) => [pageEvidencePath(pageIndex, path), renderEvidencePath(pageIndex, path)]), + impact, +}); + +export const renderParityFacts = (snapshot, pageIndex = 0) => { + if (snapshot?.render?.status !== "rendered" || !snapshot.render.evidence) return []; + + const raw = snapshot.evidence ?? {}; + const rendered = snapshot.render.evidence; + const facts = []; + + const rawTitle = cleanTextFolded(raw.title); + const renderedTitle = cleanTextFolded(rendered.title); + if (rawTitle && rawTitle !== renderedTitle) { + facts.push(makeFact( + "technical.rendered_title_changed", + pageIndex, + ["title"], + "Rendered title differs from the raw HTML title.", + )); + } + + const rawDescription = cleanTextFolded(raw.description); + const renderedDescription = cleanTextFolded(rendered.description); + if (rawDescription && rawDescription !== renderedDescription) { + facts.push(makeFact( + "technical.rendered_description_changed", + pageIndex, + ["description"], + "Rendered description differs from the raw HTML description.", + )); + } + + const rawCanonical = normalizeCanonical(raw.canonical); + const renderedCanonical = normalizeCanonical(rendered.canonical); + if (rawCanonical && rawCanonical !== renderedCanonical) { + facts.push(makeFact( + "technical.rendered_canonical_changed", + pageIndex, + ["canonical"], + "Rendered canonical URL differs from the raw HTML canonical URL.", + )); + } + + const rawPrimaryHeading = firstNormalized(raw.h1); + const renderedPrimaryHeading = firstNormalized(rendered.h1); + if (rawPrimaryHeading && !renderedPrimaryHeading) { + facts.push(makeFact( + "technical.rendered_primary_heading_missing", + pageIndex, + ["h1"], + "Rendered page is missing the primary heading found in raw HTML.", + )); + } + + const rawValidBlocks = validStructuredDataBlocks(raw); + const renderedValidBlocks = validStructuredDataBlocks(rendered); + const rawTypes = schemaTypesFor(raw); + const renderedTypes = schemaTypesFor(rendered); + const renderedTypeSet = new Set(renderedTypes); + const lostTypes = rawTypes.filter((type) => !renderedTypeSet.has(type)); + if (rawValidBlocks.length > renderedValidBlocks.length || lostTypes.length > 0) { + const lostLabel = lostTypes.length > 0 ? ` Lost schema types: ${lostTypes.join(", ")}.` : ""; + facts.push(makeFact( + "technical.rendered_structured_data_lost", + pageIndex, + ["structuredData"], + `Rendered page has less valid structured data than the raw HTML.${lostLabel}`, + )); + } + + const rawVisibleText = visibleTextCharacters(raw); + const renderedVisibleText = visibleTextCharacters(rendered); + if (rawVisibleText >= 300 && renderedVisibleText < 150) { + facts.push(makeFact( + "technical.rendered_content_missing", + pageIndex, + ["counts.visibleTextCharacters"], + "Rendered page is missing most visible text found in raw HTML.", + )); + } else if (Math.abs(rawVisibleText - renderedVisibleText) > 300) { + facts.push(makeFact( + "technical.raw_rendered_mismatch", + pageIndex, + ["counts.visibleTextCharacters"], + "Rendered visible text count differs substantially from raw HTML.", + )); + } + + return facts; +}; diff --git a/packages/cli/src/rule-engine.mjs b/packages/cli/src/rule-engine.mjs index df79fd4..73db6d3 100644 --- a/packages/cli/src/rule-engine.mjs +++ b/packages/cli/src/rule-engine.mjs @@ -1,4 +1,5 @@ import { implementationTaskFor } from "./finding-task.mjs"; +import { renderParityFacts } from "./render-parity.mjs"; import { getRule } from "./rules.mjs"; import { validateStructuredData } from "./structured-data.mjs"; @@ -145,15 +146,9 @@ export const evaluatePage = (snapshot, pageIndex = 0) => { ); } - if (snapshot.render?.status === "rendered" && snapshot.render.textDeltaCharacters > 300) { + for (const parityFact of renderParityFacts(snapshot, pageIndex)) { findings.push( - createFinding( - "technical.raw_rendered_mismatch", - snapshot, - [`$.pages[${pageIndex}].evidence.counts.visibleTextCharacters`, `$.pages[${pageIndex}].render.textDeltaCharacters`], - pageIndex, - "Large raw and rendered text differences can indicate JavaScript SEO risk.", - ), + createFinding(parityFact.ruleId, snapshot, parityFact.evidence, pageIndex, parityFact.impact), ); } diff --git a/packages/cli/src/rules.mjs b/packages/cli/src/rules.mjs index 8517429..c8607a1 100644 --- a/packages/cli/src/rules.mjs +++ b/packages/cli/src/rules.mjs @@ -73,6 +73,46 @@ export const rules = [ "Review JavaScript rendering and hydration so crawlers and users receive consistent primary content.", [sources.javascriptSeo], ), + rule( + "technical.rendered_title_changed", + "technical", + "P2", + "Rendered HTML changes the page title", + "Keep rendered title output aligned with raw HTML so crawlers and users receive a stable title signal.", + [sources.javascriptSeo, sources.titleLinks], + ), + rule( + "technical.rendered_description_changed", + "technical", + "P3", + "Rendered HTML changes the meta description", + "Keep rendered meta description output aligned with raw HTML when the page relies on it for snippet context.", + [sources.javascriptSeo, sources.snippets], + ), + rule( + "technical.rendered_canonical_changed", + "technical", + "P1", + "Rendered HTML changes the canonical URL", + "Keep canonical URL output stable between raw and rendered HTML so indexing signals remain consistent.", + [sources.javascriptSeo, sources.canonicalization], + ), + rule( + "technical.rendered_primary_heading_missing", + "technical", + "P1", + "Rendered HTML removes the primary heading", + "Ensure the rendered page preserves the primary heading that communicates page purpose.", + [sources.javascriptSeo, sources.helpfulContent], + ), + rule( + "technical.rendered_structured_data_lost", + "technical", + "P2", + "Rendered HTML removes structured data", + "Preserve structured data through rendering and hydration so eligible markup remains available.", + [sources.javascriptSeo, sources.structuredDataIntro, sources.structuredDataPolicies], + ), rule( "crawl.robots_blocked", "crawl_index", diff --git a/packages/cli/test/audit.test.mjs b/packages/cli/test/audit.test.mjs index 6f67f51..c7a78ad 100644 --- a/packages/cli/test/audit.test.mjs +++ b/packages/cli/test/audit.test.mjs @@ -187,3 +187,50 @@ test("normalizes internal URL list entries like URL-list file lines", async () = ); }); }); + +test("includes render parity findings when an injected renderer changes SEO signals", async () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "geo-seo-render-parity-")); + const html = path.join(dir, "index.html"); + fs.writeFileSync( + html, + ` + + + Raw Render Parity Title + + + + + +

Render Parity Org

+

${"Useful render parity content ".repeat(60)}

+ + + `, + ); + + const audit = await runAudit({ + target: html, + renderer: async () => ` + + + Client Render Parity Title + + + + +

Loading.

+ + + `, + }); + + assert.equal(audit.pages[0].render.status, "rendered"); + const ids = audit.findings.map((finding) => finding.ruleId); + assert.ok(ids.includes("technical.rendered_title_changed")); + assert.ok(ids.includes("technical.rendered_description_changed")); + assert.ok(ids.includes("technical.rendered_canonical_changed")); + assert.ok(ids.includes("technical.rendered_primary_heading_missing")); + assert.ok(ids.includes("technical.rendered_structured_data_lost")); + assert.ok(ids.includes("technical.rendered_content_missing")); +}); diff --git a/packages/cli/test/render-parity.test.mjs b/packages/cli/test/render-parity.test.mjs new file mode 100644 index 0000000..3b751d5 --- /dev/null +++ b/packages/cli/test/render-parity.test.mjs @@ -0,0 +1,191 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { renderParityFacts } from "../src/render-parity.mjs"; + +const baseEvidence = () => ({ + title: "Example title", + description: "Example description", + canonical: "https://example.com/page/", + h1: ["Primary heading"], + structuredData: [ + { data: { "@type": "Organization", name: "Example" } }, + ], + schemaTypes: ["Organization"], + counts: { + visibleTextCharacters: 400, + }, +}); + +const snapshot = ({ renderStatus = "rendered", evidence = baseEvidence(), renderEvidence = baseEvidence() } = {}) => ({ + evidence, + render: { + status: renderStatus, + evidence: renderEvidence, + }, +}); + +test("returns no facts when rendering was not requested", () => { + assert.deepEqual(renderParityFacts(snapshot({ renderStatus: "not_requested" }), 0), []); +}); + +test("returns no facts when rendered evidence is absent", () => { + assert.deepEqual(renderParityFacts({ + evidence: baseEvidence(), + render: { + status: "rendered", + }, + }), []); +}); + +test("detects rendered title changes", () => { + const facts = renderParityFacts(snapshot({ + renderEvidence: { + ...baseEvidence(), + title: "Different title", + }, + }), 2); + + assert.equal(facts[0].ruleId, "technical.rendered_title_changed"); + assert.deepEqual(facts[0].evidence, [ + "$.pages[2].evidence.title", + "$.pages[2].render.evidence.title", + ]); +}); + +test("detects rendered title removal", () => { + const facts = renderParityFacts(snapshot({ + renderEvidence: { + ...baseEvidence(), + title: "", + }, + })); + + assert.equal(facts[0].ruleId, "technical.rendered_title_changed"); +}); + +test("detects rendered description removal", () => { + const facts = renderParityFacts(snapshot({ + renderEvidence: { + ...baseEvidence(), + description: null, + }, + })); + + assert.equal(facts[0].ruleId, "technical.rendered_description_changed"); +}); + +test("detects rendered canonical changes while treating equivalent formatting as equal", () => { + assert.deepEqual(renderParityFacts(snapshot({ + evidence: { + ...baseEvidence(), + canonical: "https://example.com/page/", + }, + renderEvidence: { + ...baseEvidence(), + canonical: "https://example.com/page", + }, + })), []); + + const facts = renderParityFacts(snapshot({ + renderEvidence: { + ...baseEvidence(), + canonical: "https://example.com/other", + }, + })); + + assert.equal(facts[0].ruleId, "technical.rendered_canonical_changed"); +}); + +test("detects rendered canonical removal", () => { + const facts = renderParityFacts(snapshot({ + renderEvidence: { + ...baseEvidence(), + canonical: "", + }, + })); + + assert.equal(facts[0].ruleId, "technical.rendered_canonical_changed"); +}); + +test("detects rendered primary heading removal", () => { + const facts = renderParityFacts(snapshot({ + renderEvidence: { + ...baseEvidence(), + h1: [], + }, + }), 1); + + assert.equal(facts[0].ruleId, "technical.rendered_primary_heading_missing"); + assert.deepEqual(facts[0].evidence, [ + "$.pages[1].evidence.h1", + "$.pages[1].render.evidence.h1", + ]); +}); + +test("detects rendered structured data type loss", () => { + const facts = renderParityFacts(snapshot({ + renderEvidence: { + ...baseEvidence(), + structuredData: [], + schemaTypes: [], + }, + })); + + assert.equal(facts[0].ruleId, "technical.rendered_structured_data_lost"); + assert.match(facts[0].impact, /Organization/); +}); + +test("ignores invalid raw structured data when checking structured data loss", () => { + const facts = renderParityFacts(snapshot({ + evidence: { + ...baseEvidence(), + structuredData: [ + { parseError: true, rawPreview: "{bad json}" }, + ], + schemaTypes: [], + }, + renderEvidence: { + ...baseEvidence(), + structuredData: [], + schemaTypes: [], + }, + })); + + assert.deepEqual(facts, []); +}); + +test("detects rendered content missing before broad text mismatch", () => { + const facts = renderParityFacts(snapshot({ + renderEvidence: { + ...baseEvidence(), + counts: { + visibleTextCharacters: 120, + }, + }, + })); + + assert.deepEqual(facts.map((fact) => fact.ruleId), ["technical.rendered_content_missing"]); +}); + +test("detects broad raw/rendered visible text deltas when not nearly missing", () => { + const facts = renderParityFacts(snapshot({ + evidence: { + ...baseEvidence(), + counts: { + visibleTextCharacters: 700, + }, + }, + renderEvidence: { + ...baseEvidence(), + counts: { + visibleTextCharacters: 350, + }, + }, + })); + + assert.equal(facts.at(-1).ruleId, "technical.raw_rendered_mismatch"); +}); + +test("returns no facts when raw and rendered evidence match", () => { + assert.deepEqual(renderParityFacts(snapshot()), []); +}); diff --git a/packages/cli/test/rule-engine.test.mjs b/packages/cli/test/rule-engine.test.mjs index f331856..e2f3440 100644 --- a/packages/cli/test/rule-engine.test.mjs +++ b/packages/cli/test/rule-engine.test.mjs @@ -17,6 +17,17 @@ const snapshotFor = (html, overrides = {}) => ({ ...overrides, }); +const renderedSnapshotFor = (rawHtml, renderedHtml, overrides = {}) => { + const renderedEvidence = extractHtmlEvidence(renderedHtml, "https://example.com/bad-page"); + return snapshotFor(rawHtml, { + render: { + status: "rendered", + evidence: renderedEvidence, + }, + ...overrides, + }); +}; + test("evaluates deterministic page findings", () => { const findings = evaluatePage( snapshotFor(` @@ -179,3 +190,167 @@ test("flags structured data required property gaps", () => { assert.ok(finding); assert.match(finding.impact, /offers/); }); + +test("flags rendered metadata and canonical changes", () => { + const findings = evaluatePage(renderedSnapshotFor( + ` + + + Raw title + + + +

Product page

${"Useful product copy ".repeat(25)}

+ + `, + ` + + + Rendered title + + + +

Product page

${"Useful product copy ".repeat(25)}

+ + `, + )); + + const ids = findings.map((finding) => finding.ruleId); + assert.ok(ids.includes("technical.rendered_title_changed")); + assert.ok(ids.includes("technical.rendered_description_changed")); + assert.ok(ids.includes("technical.rendered_canonical_changed")); + + const canonical = findings.find((finding) => finding.ruleId === "technical.rendered_canonical_changed"); + assert.equal(canonical.severity, "P1"); + assert.equal(canonical.owner, "Engineering"); + assert.deepEqual(canonical.evidence, [ + "$.pages[0].evidence.canonical", + "$.pages[0].render.evidence.canonical", + ]); +}); + +test("flags rendered primary heading and structured data loss", () => { + const findings = evaluatePage(renderedSnapshotFor( + ` + + + Organization profile + + + + +

Organization profile

${"Useful organization context ".repeat(25)}

+ + `, + ` + + + Organization profile + + + +

${"Useful organization context ".repeat(25)}

+ + `, + )); + + const ids = findings.map((finding) => finding.ruleId); + assert.ok(ids.includes("technical.rendered_primary_heading_missing")); + assert.ok(ids.includes("technical.rendered_structured_data_lost")); + + const structuredData = findings.find((finding) => finding.ruleId === "technical.rendered_structured_data_lost"); + assert.deepEqual(structuredData.evidence, [ + "$.pages[0].evidence.structuredData", + "$.pages[0].render.evidence.structuredData", + ]); +}); + +test("flags rendered_content_missing without duplicate raw_rendered_mismatch", () => { + const findings = evaluatePage(renderedSnapshotFor( + ` + + + Long page + + + +

Long page

${"Primary content sentence with useful context. ".repeat(30)}

+ + `, + ` + + + Long page + + + +

Long page

Loading.

+ + `, + )); + + const ids = findings.map((finding) => finding.ruleId); + assert.ok(ids.includes("technical.rendered_content_missing")); + assert.equal(ids.includes("technical.raw_rendered_mismatch"), false); +}); + +test("flags broad raw_rendered_mismatch when rendered content is still substantial", () => { + const findings = evaluatePage(renderedSnapshotFor( + ` + + + Comparison page + + + +

Comparison page

${"Detailed comparison content. ".repeat(45)}

+ + `, + ` + + + Comparison page + + + +

Comparison page

${"Detailed comparison content. ".repeat(20)}

+ + `, + )); + + const mismatch = findings.find((finding) => finding.ruleId === "technical.raw_rendered_mismatch"); + assert.ok(mismatch); + assert.deepEqual(mismatch.evidence, [ + "$.pages[0].evidence.counts.visibleTextCharacters", + "$.pages[0].render.evidence.counts.visibleTextCharacters", + ]); +}); + +test("emits no render parity findings without rendered evidence or when rendered evidence matches", () => { + const html = ` + + + Stable page + + + + +

Stable page

Details

${"Stable useful content. ".repeat(20)}

+ + `; + const parityRuleIds = [ + "technical.rendered_title_changed", + "technical.rendered_description_changed", + "technical.rendered_canonical_changed", + "technical.rendered_primary_heading_missing", + "technical.rendered_structured_data_lost", + "technical.rendered_content_missing", + "technical.raw_rendered_mismatch", + ]; + + const withoutRenderedEvidence = evaluatePage(snapshotFor(html, { render: { status: "rendered" } })); + assert.deepEqual(withoutRenderedEvidence.filter((finding) => parityRuleIds.includes(finding.ruleId)), []); + + const matchingRenderedEvidence = evaluatePage(renderedSnapshotFor(html, html)); + assert.deepEqual(matchingRenderedEvidence.filter((finding) => parityRuleIds.includes(finding.ruleId)), []); +}); diff --git a/packages/cli/test/rules.test.mjs b/packages/cli/test/rules.test.mjs index f951e83..8e5e0c8 100644 --- a/packages/cli/test/rules.test.mjs +++ b/packages/cli/test/rules.test.mjs @@ -23,3 +23,24 @@ test("retrieves a rule by ID", () => { const rule = getRule("indexability.noindex"); assert.equal(rule.id, "indexability.noindex"); }); + +test("defines render parity rule metadata", () => { + const expected = { + "technical.rendered_title_changed": "P2", + "technical.rendered_description_changed": "P3", + "technical.rendered_canonical_changed": "P1", + "technical.rendered_primary_heading_missing": "P1", + "technical.rendered_structured_data_lost": "P2", + "technical.rendered_content_missing": "P1", + "technical.raw_rendered_mismatch": "P2", + }; + + for (const [ruleId, severity] of Object.entries(expected)) { + const item = getRule(ruleId); + assert.ok(item, `${ruleId} is registered`); + assert.equal(item.dimension, "technical"); + assert.equal(item.defaultSeverity, severity); + assert.ok(item.recommendation); + assert.ok(item.sources.length > 0); + } +}); diff --git a/scripts/validate-skill.mjs b/scripts/validate-skill.mjs index 79e87a6..3ca5a5a 100644 --- a/scripts/validate-skill.mjs +++ b/scripts/validate-skill.mjs @@ -20,6 +20,7 @@ const requiredFiles = [ "packages/cli/src/url-utils.mjs", "packages/cli/src/html-extract.mjs", "packages/cli/src/render.mjs", + "packages/cli/src/render-parity.mjs", "packages/cli/src/robots.mjs", "packages/cli/src/sitemap.mjs", "packages/cli/src/snapshot.mjs", @@ -59,6 +60,7 @@ const requiredFiles = [ "examples/fixture-sites/known-issues/canonical-alt.html", "examples/fixture-sites/known-issues/robots.txt", "examples/fixture-sites/known-issues/sitemap.xml", + "packages/cli/test/render-parity.test.mjs", "packages/cli/test/repo-audit.test.mjs", "packages/cli/test/repo-detect.test.mjs", "packages/cli/test/repo-manifests.test.mjs",