From e8b716ab2e1c70cb10283661543e801715fa7229 Mon Sep 17 00:00:00 2001 From: PSkinnerTech Date: Sun, 17 May 2026 22:16:53 -0500 Subject: [PATCH 1/9] feat: add repo detection --- .../fixture-repos/npm-preview/package.json | 8 ++ examples/fixture-repos/npm-preview/server.mjs | 24 ++++ .../fixture-repos/npm-preview/site/about.html | 11 ++ .../fixture-repos/npm-preview/site/index.html | 12 ++ .../static-basic/dist/about/index.html | 12 ++ .../static-basic/dist/index.html | 13 ++ .../static-basic/dist/robots.txt | 3 + .../static-basic/dist/sitemap.xml | 5 + packages/cli/src/repo-detect.mjs | 122 ++++++++++++++++++ packages/cli/test/repo-detect.test.mjs | 84 ++++++++++++ 10 files changed, 294 insertions(+) create mode 100644 examples/fixture-repos/npm-preview/package.json create mode 100644 examples/fixture-repos/npm-preview/server.mjs create mode 100644 examples/fixture-repos/npm-preview/site/about.html create mode 100644 examples/fixture-repos/npm-preview/site/index.html create mode 100644 examples/fixture-repos/static-basic/dist/about/index.html create mode 100644 examples/fixture-repos/static-basic/dist/index.html create mode 100644 examples/fixture-repos/static-basic/dist/robots.txt create mode 100644 examples/fixture-repos/static-basic/dist/sitemap.xml create mode 100644 packages/cli/src/repo-detect.mjs create mode 100644 packages/cli/test/repo-detect.test.mjs diff --git a/examples/fixture-repos/npm-preview/package.json b/examples/fixture-repos/npm-preview/package.json new file mode 100644 index 0000000..2445dc1 --- /dev/null +++ b/examples/fixture-repos/npm-preview/package.json @@ -0,0 +1,8 @@ +{ + "name": "openclaw-preview-fixture", + "private": true, + "type": "module", + "scripts": { + "preview": "node server.mjs" + } +} diff --git a/examples/fixture-repos/npm-preview/server.mjs b/examples/fixture-repos/npm-preview/server.mjs new file mode 100644 index 0000000..88abdbd --- /dev/null +++ b/examples/fixture-repos/npm-preview/server.mjs @@ -0,0 +1,24 @@ +import http from "node:http"; +import fs from "node:fs"; +import path from "node:path"; + +const port = Number(process.argv[2] || process.env.PORT || 4173); +const root = path.join(process.cwd(), "site"); + +const fileFor = (urlPath) => { + if (urlPath === "/") return path.join(root, "index.html"); + return path.join(root, urlPath.replace(/^\//, "")); +}; + +const server = http.createServer((request, response) => { + const filePath = fileFor(new URL(request.url, `http://127.0.0.1:${port}`).pathname); + if (!filePath.startsWith(root) || !fs.existsSync(filePath)) { + response.writeHead(404, { "content-type": "text/plain" }); + response.end("not found"); + return; + } + response.writeHead(200, { "content-type": "text/html" }); + response.end(fs.readFileSync(filePath, "utf8")); +}); + +server.listen(port, "127.0.0.1"); diff --git a/examples/fixture-repos/npm-preview/site/about.html b/examples/fixture-repos/npm-preview/site/about.html new file mode 100644 index 0000000..1c82cdd --- /dev/null +++ b/examples/fixture-repos/npm-preview/site/about.html @@ -0,0 +1,11 @@ + + + + Preview Fixture About + + + +

Preview Fixture About

+

This about page proves preview crawls can discover linked routes.

+ + diff --git a/examples/fixture-repos/npm-preview/site/index.html b/examples/fixture-repos/npm-preview/site/index.html new file mode 100644 index 0000000..2aab858 --- /dev/null +++ b/examples/fixture-repos/npm-preview/site/index.html @@ -0,0 +1,12 @@ + + + + Preview Fixture Home + + + +

Preview Fixture Home

+

This page is served by an explicit preview command during repo audit tests.

+ About + + diff --git a/examples/fixture-repos/static-basic/dist/about/index.html b/examples/fixture-repos/static-basic/dist/about/index.html new file mode 100644 index 0000000..fdef156 --- /dev/null +++ b/examples/fixture-repos/static-basic/dist/about/index.html @@ -0,0 +1,12 @@ + + + + About Static Basic + + + + +

About Static Basic

+

The about page gives the fixture enough internal structure for route discovery.

+ + diff --git a/examples/fixture-repos/static-basic/dist/index.html b/examples/fixture-repos/static-basic/dist/index.html new file mode 100644 index 0000000..5de3351 --- /dev/null +++ b/examples/fixture-repos/static-basic/dist/index.html @@ -0,0 +1,13 @@ + + + + Static Basic Home + + + + +

Static Basic Home

+

This static fixture explains a deterministic source repository audit workflow.

+ About + + diff --git a/examples/fixture-repos/static-basic/dist/robots.txt b/examples/fixture-repos/static-basic/dist/robots.txt new file mode 100644 index 0000000..1bb35a3 --- /dev/null +++ b/examples/fixture-repos/static-basic/dist/robots.txt @@ -0,0 +1,3 @@ +User-agent: * +Allow: / +Sitemap: https://example.test/sitemap.xml diff --git a/examples/fixture-repos/static-basic/dist/sitemap.xml b/examples/fixture-repos/static-basic/dist/sitemap.xml new file mode 100644 index 0000000..3bf1896 --- /dev/null +++ b/examples/fixture-repos/static-basic/dist/sitemap.xml @@ -0,0 +1,5 @@ + + + https://example.test/ + https://example.test/about/ + diff --git a/packages/cli/src/repo-detect.mjs b/packages/cli/src/repo-detect.mjs new file mode 100644 index 0000000..fb4c2dd --- /dev/null +++ b/packages/cli/src/repo-detect.mjs @@ -0,0 +1,122 @@ +import fs from "node:fs"; +import path from "node:path"; + +const staticDirCandidates = ["dist", "build", "out", "public"]; + +const frameworkSignals = [ + ["next", "next"], + ["astro", "astro"], + ["@sveltejs/kit", "@sveltejs/kit"], + ["@remix-run/node", "@remix-run/node"], + ["vite", "vite"], +]; + +const compareOrdinal = (left, right) => { + if (left < right) return -1; + if (left > right) return 1; + return 0; +}; + +const readPackageJson = (repoRoot) => { + const packageJsonPath = path.join(repoRoot, "package.json"); + if (!fs.existsSync(packageJsonPath)) return null; + return JSON.parse(fs.readFileSync(packageJsonPath, "utf8")); +}; + +const detectPackageManager = (repoRoot) => { + if (fs.existsSync(path.join(repoRoot, "pnpm-lock.yaml"))) return "pnpm"; + if (fs.existsSync(path.join(repoRoot, "yarn.lock"))) return "yarn"; + if (fs.existsSync(path.join(repoRoot, "package-lock.json"))) return "npm"; + if (fs.existsSync(path.join(repoRoot, "package.json"))) return "npm"; + return null; +}; + +const scriptCommand = (packageManager, scriptName, packageJson) => { + if (!packageManager || !packageJson?.scripts?.[scriptName]) return null; + return `${packageManager} run ${scriptName}`; +}; + +const dependenciesFor = (packageJson) => ({ + ...packageJson?.dependencies, + ...packageJson?.devDependencies, +}); + +const detectFramework = (packageJson, hasStaticOutput) => { + const dependencies = dependenciesFor(packageJson); + for (const [dependencyName, framework] of frameworkSignals) { + if (dependencies[dependencyName]) { + return { detectedFramework: framework, confidence: "high" }; + } + } + + if (packageJson) { + return { detectedFramework: "generic-node", confidence: "medium" }; + } + + if (hasStaticOutput) { + return { detectedFramework: "generic-static", confidence: "medium" }; + } + + return { detectedFramework: null, confidence: "low" }; +}; + +const detectStaticDir = (repoRoot) => { + for (const dirRelative of staticDirCandidates) { + const dir = path.join(repoRoot, dirRelative); + if (fs.existsSync(path.join(dir, "index.html"))) { + return { staticDir: dir, staticDirRelative: dirRelative }; + } + } + + return { staticDir: null, staticDirRelative: null }; +}; + +const findHtmlFiles = (dir) => { + if (!dir) return []; + + const entries = fs + .readdirSync(dir, { withFileTypes: true }) + .toSorted((left, right) => compareOrdinal(left.name, right.name)); + return entries.flatMap((entry) => { + const entryPath = path.join(dir, entry.name); + if (entry.isDirectory()) return findHtmlFiles(entryPath); + if (entry.isFile() && entry.name.endsWith(".html")) return [entryPath]; + return []; + }); +}; + +const routeForHtmlFile = (staticDir, filePath) => { + const relative = path.relative(staticDir, filePath).split(path.sep).join("/"); + if (relative === "index.html") return "/"; + if (relative.endsWith("/index.html")) { + return `/${relative.slice(0, -"index.html".length)}`; + } + return `/${relative}`; +}; + +const routeSourcesForStaticDir = (staticDir) => + findHtmlFiles(staticDir).map((filePath) => ({ + type: "static_html", + path: filePath, + route: routeForHtmlFile(staticDir, filePath), + })); + +export const detectRepo = (repoRoot, options = {}) => { + const resolvedRepoRoot = path.resolve(repoRoot); + const packageJson = options.packageJson ?? readPackageJson(resolvedRepoRoot); + const packageManager = detectPackageManager(resolvedRepoRoot); + const { staticDir, staticDirRelative } = detectStaticDir(resolvedRepoRoot); + const { detectedFramework, confidence } = detectFramework(packageJson, Boolean(staticDir)); + + return { + repoRoot: resolvedRepoRoot, + detectedFramework, + confidence, + packageManager, + buildCommand: scriptCommand(packageManager, "build", packageJson), + previewCommand: scriptCommand(packageManager, "preview", packageJson), + staticDir, + staticDirRelative, + routeSources: routeSourcesForStaticDir(staticDir), + }; +}; diff --git a/packages/cli/test/repo-detect.test.mjs b/packages/cli/test/repo-detect.test.mjs new file mode 100644 index 0000000..271572a --- /dev/null +++ b/packages/cli/test/repo-detect.test.mjs @@ -0,0 +1,84 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { detectRepo } from "../src/repo-detect.mjs"; + +const fixture = (name) => path.resolve("examples/fixture-repos", name); + +test("detects static output repositories", () => { + const result = detectRepo(fixture("static-basic")); + + assert.equal(result.detectedFramework, "generic-static"); + assert.equal(result.confidence, "medium"); + assert.equal(result.staticDirRelative, "dist"); + assert.ok(result.staticDir.endsWith(path.join("static-basic", "dist"))); + assert.equal(result.packageManager, null); + assert.equal(result.buildCommand, null); + assert.equal(result.previewCommand, null); + assert.ok(result.routeSources.some((route) => route.route === "/")); +}); + +test("returns absolute route source paths for relative repository paths", () => { + const result = detectRepo("examples/fixture-repos/static-basic"); + + assert.ok(result.routeSources.length > 0); + assert.ok(result.routeSources.every((route) => path.isAbsolute(route.path))); +}); + +test("converts static HTML files to exact route source objects", () => { + const repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-repo-detect-")); + const distDir = path.join(repoRoot, "dist"); + const aboutDir = path.join(distDir, "about"); + + fs.mkdirSync(aboutDir, { recursive: true }); + fs.writeFileSync(path.join(distDir, "index.html"), "

Home

"); + fs.writeFileSync(path.join(aboutDir, "index.html"), "

About

"); + fs.writeFileSync(path.join(distDir, "contact.html"), "

Contact

"); + + const result = detectRepo(repoRoot); + + assert.deepEqual(result.routeSources, [ + { + type: "static_html", + path: path.join(distDir, "about", "index.html"), + route: "/about/", + }, + { + type: "static_html", + path: path.join(distDir, "contact.html"), + route: "/contact.html", + }, + { + type: "static_html", + path: path.join(distDir, "index.html"), + route: "/", + }, + ]); +}); + +test("detects npm preview repositories", () => { + const result = detectRepo(fixture("npm-preview")); + + assert.equal(result.packageManager, "npm"); + assert.equal(result.detectedFramework, "generic-node"); + assert.equal(result.confidence, "medium"); + assert.equal(result.previewCommand, "npm run preview"); + assert.equal(result.buildCommand, null); + assert.equal(result.staticDir, null); +}); + +test("detects declared framework signals without executing scripts", () => { + const result = detectRepo(path.resolve("examples/fixture-repos/npm-preview"), { + packageJson: { + scripts: { build: "vite build", preview: "vite preview" }, + dependencies: { vite: "^5.0.0" }, + }, + }); + + assert.equal(result.detectedFramework, "vite"); + assert.equal(result.confidence, "high"); + assert.equal(result.buildCommand, "npm run build"); + assert.equal(result.previewCommand, "npm run preview"); +}); From 8fc3b1757a619812a53e1536ef6f3d731e01be37 Mon Sep 17 00:00:00 2001 From: PSkinnerTech Date: Sun, 17 May 2026 22:25:23 -0500 Subject: [PATCH 2/9] feat: add static repo route discovery --- packages/cli/src/audit.mjs | 46 +++++++++++++++++--------- packages/cli/src/repo-detect.mjs | 39 ++-------------------- packages/cli/src/repo-routes.mjs | 46 ++++++++++++++++++++++++++ packages/cli/test/audit.test.mjs | 39 ++++++++++++++++++++++ packages/cli/test/repo-detect.test.mjs | 10 +++--- packages/cli/test/repo-routes.test.mjs | 41 +++++++++++++++++++++++ 6 files changed, 163 insertions(+), 58 deletions(-) create mode 100644 packages/cli/src/repo-routes.mjs create mode 100644 packages/cli/test/repo-routes.test.mjs diff --git a/packages/cli/src/audit.mjs b/packages/cli/src/audit.mjs index 692bbc4..0b8224d 100644 --- a/packages/cli/src/audit.mjs +++ b/packages/cli/src/audit.mjs @@ -51,23 +51,36 @@ const crawlSettings = (config) => ({ }); const readUrlList = (config) => { + const normalizeEntries = (entries, baseDir) => + entries + .map((line) => line.trim()) + .filter((line) => line && !line.startsWith("#")) + .map((line) => { + if (isHttpUrl(line)) return line; + if (path.isAbsolute(line) && fs.existsSync(line)) return line; + if (isHttpUrl(config.target)) return new URL(line, config.target).href; + if (path.isAbsolute(line)) return line; + return path.resolve(baseDir, line); + }); + + if (Array.isArray(config.urlListEntries)) { + return normalizeEntries( + config.urlListEntries.map((entry) => String(entry)), + process.cwd(), + ); + } if (!config.urlList) return []; const baseDir = path.dirname(config.urlList); const limits = resolveLimits(config.limits); - return readTextFileLimited(config.urlList, { - security: config.security, - allowRestricted: true, - limits, - maxBytes: limits.maxFileBytes, - }) - .split(/\r?\n/) - .map((line) => line.trim()) - .filter((line) => line && !line.startsWith("#")) - .map((line) => { - if (isHttpUrl(line)) return line; - if (isHttpUrl(config.target)) return new URL(line, config.target).href; - return path.resolve(baseDir, line); - }); + return normalizeEntries( + readTextFileLimited(config.urlList, { + security: config.security, + allowRestricted: true, + limits, + maxBytes: limits.maxFileBytes, + }).split(/\r?\n/), + baseDir, + ); }; const collectUrlList = async (config) => { @@ -95,7 +108,8 @@ export const runAudit = async (config) => { const startedAt = new Date().toISOString(); const settings = crawlSettings(config); const shouldCrawl = isHttpUrl(config.target) && (settings.mode === "full" || settings.mode === "sample"); - const crawlResult = config.urlList + const hasUrlList = config.urlList || Array.isArray(config.urlListEntries); + const crawlResult = hasUrlList ? await collectUrlList(config) : shouldCrawl ? await crawlSite(config) @@ -156,7 +170,7 @@ export const runAudit = async (config) => { robots: crawlResult.robots, sitemaps: crawlResult.sitemaps, skipped: crawlResult.skipped, - notes: config.urlList + notes: hasUrlList ? ["Audit output contains supplied URL-list evidence."] : shouldCrawl ? ["Audit output contains bounded same-origin crawl evidence."] diff --git a/packages/cli/src/repo-detect.mjs b/packages/cli/src/repo-detect.mjs index fb4c2dd..706fe58 100644 --- a/packages/cli/src/repo-detect.mjs +++ b/packages/cli/src/repo-detect.mjs @@ -1,5 +1,6 @@ import fs from "node:fs"; import path from "node:path"; +import { discoverStaticRoutes } from "./repo-routes.mjs"; const staticDirCandidates = ["dist", "build", "out", "public"]; @@ -11,12 +12,6 @@ const frameworkSignals = [ ["vite", "vite"], ]; -const compareOrdinal = (left, right) => { - if (left < right) return -1; - if (left > right) return 1; - return 0; -}; - const readPackageJson = (repoRoot) => { const packageJsonPath = path.join(repoRoot, "package.json"); if (!fs.existsSync(packageJsonPath)) return null; @@ -71,36 +66,6 @@ const detectStaticDir = (repoRoot) => { return { staticDir: null, staticDirRelative: null }; }; -const findHtmlFiles = (dir) => { - if (!dir) return []; - - const entries = fs - .readdirSync(dir, { withFileTypes: true }) - .toSorted((left, right) => compareOrdinal(left.name, right.name)); - return entries.flatMap((entry) => { - const entryPath = path.join(dir, entry.name); - if (entry.isDirectory()) return findHtmlFiles(entryPath); - if (entry.isFile() && entry.name.endsWith(".html")) return [entryPath]; - return []; - }); -}; - -const routeForHtmlFile = (staticDir, filePath) => { - const relative = path.relative(staticDir, filePath).split(path.sep).join("/"); - if (relative === "index.html") return "/"; - if (relative.endsWith("/index.html")) { - return `/${relative.slice(0, -"index.html".length)}`; - } - return `/${relative}`; -}; - -const routeSourcesForStaticDir = (staticDir) => - findHtmlFiles(staticDir).map((filePath) => ({ - type: "static_html", - path: filePath, - route: routeForHtmlFile(staticDir, filePath), - })); - export const detectRepo = (repoRoot, options = {}) => { const resolvedRepoRoot = path.resolve(repoRoot); const packageJson = options.packageJson ?? readPackageJson(resolvedRepoRoot); @@ -117,6 +82,6 @@ export const detectRepo = (repoRoot, options = {}) => { previewCommand: scriptCommand(packageManager, "preview", packageJson), staticDir, staticDirRelative, - routeSources: routeSourcesForStaticDir(staticDir), + routeSources: staticDir ? discoverStaticRoutes(staticDir) : [], }; }; diff --git a/packages/cli/src/repo-routes.mjs b/packages/cli/src/repo-routes.mjs new file mode 100644 index 0000000..c817f90 --- /dev/null +++ b/packages/cli/src/repo-routes.mjs @@ -0,0 +1,46 @@ +import fs from "node:fs"; +import path from "node:path"; + +const ordinalCompare = (left, right) => (left < right ? -1 : left > right ? 1 : 0); + +const htmlFiles = (dir) => { + const entries = fs.readdirSync(dir, { withFileTypes: true }).sort((left, right) => ordinalCompare(left.name, right.name)); + const files = []; + + for (const entry of entries) { + const itemPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + files.push(...htmlFiles(itemPath)); + continue; + } + if (entry.isFile() && entry.name.endsWith(".html")) files.push(itemPath); + } + + return files; +}; + +const routeFor = (root, file) => { + const relative = path.relative(root, file); + const parsed = path.parse(relative); + const routePath = relative.split(path.sep).join("/"); + + if (routePath === "index.html") return "/"; + if (parsed.base === "index.html") return `/${parsed.dir.split(path.sep).join("/")}/`; + return `/${routePath}`; +}; + +export const discoverStaticRoutes = (staticDir) => { + const root = path.resolve(staticDir); + + if (!fs.existsSync(root) || !fs.statSync(root).isDirectory()) { + throw new Error(`Static directory does not exist or is not a directory: ${root}`); + } + + return htmlFiles(root) + .map((file) => ({ + type: "static_html", + route: routeFor(root, file), + path: file, + })) + .sort((left, right) => ordinalCompare(left.route, right.route) || ordinalCompare(left.path, right.path)); +}; diff --git a/packages/cli/test/audit.test.mjs b/packages/cli/test/audit.test.mjs index f83ac3d..6f67f51 100644 --- a/packages/cli/test/audit.test.mjs +++ b/packages/cli/test/audit.test.mjs @@ -148,3 +148,42 @@ test("restricted mode allows supplied URL-list files as bounded evidence inputs" assert.equal(audit.pages.length, 0); assert.equal(audit.run.security.mode, "restricted"); }); + +test("audits internal URL list entries without a URL-list file", async () => { + const index = path.resolve("examples/fixture-repos/static-basic/dist/index.html"); + const about = path.resolve("examples/fixture-repos/static-basic/dist/about/index.html"); + const audit = await runAudit({ + target: index, + urlListEntries: [index, about], + }); + + assert.equal(audit.pages.length, 2); + assert.ok(audit.pages.some((page) => page.finalUrl.endsWith("index.html"))); + assert.ok(audit.pages.some((page) => page.finalUrl.endsWith(path.join("about", "index.html")))); +}); + +test("normalizes internal URL list entries like URL-list file lines", async () => { + await withServer((request, response) => { + response.setHeader("content-type", "text/html"); + if (request.url === "/one") { + response.end("One

One

Enough content.

"); + return; + } + if (request.url === "/two") { + response.end("Two

Two

Enough content.

"); + return; + } + response.statusCode = 404; + response.end("Missing

Missing

"); + }, async (origin) => { + const audit = await runAudit({ + target: `${origin}/`, + urlListEntries: ["/one", "# comment", "", "/two"], + }); + + assert.deepEqual( + audit.pages.map((page) => new URL(page.finalUrl).pathname), + ["/one", "/two"], + ); + }); +}); diff --git a/packages/cli/test/repo-detect.test.mjs b/packages/cli/test/repo-detect.test.mjs index 271572a..8a35668 100644 --- a/packages/cli/test/repo-detect.test.mjs +++ b/packages/cli/test/repo-detect.test.mjs @@ -40,6 +40,11 @@ test("converts static HTML files to exact route source objects", () => { const result = detectRepo(repoRoot); assert.deepEqual(result.routeSources, [ + { + type: "static_html", + path: path.join(distDir, "index.html"), + route: "/", + }, { type: "static_html", path: path.join(distDir, "about", "index.html"), @@ -50,11 +55,6 @@ test("converts static HTML files to exact route source objects", () => { path: path.join(distDir, "contact.html"), route: "/contact.html", }, - { - type: "static_html", - path: path.join(distDir, "index.html"), - route: "/", - }, ]); }); diff --git a/packages/cli/test/repo-routes.test.mjs b/packages/cli/test/repo-routes.test.mjs new file mode 100644 index 0000000..989d2af --- /dev/null +++ b/packages/cli/test/repo-routes.test.mjs @@ -0,0 +1,41 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { discoverStaticRoutes } from "../src/repo-routes.mjs"; + +test("discovers static HTML files in deterministic order", () => { + const root = path.resolve("examples/fixture-repos/static-basic/dist"); + const routes = discoverStaticRoutes(root); + + assert.deepEqual(routes, [ + { type: "static_html", route: "/", path: path.join(root, "index.html") }, + { type: "static_html", route: "/about/", path: path.join(root, "about", "index.html") }, + ]); + assert.ok(routes.every((route) => path.isAbsolute(route.path))); +}); + +test("rejects missing static directories", () => { + assert.throws( + () => discoverStaticRoutes(path.resolve("examples/fixture-repos/static-basic/missing")), + /Static directory does not exist or is not a directory/, + ); +}); + +test("rejects static paths that are not directories", () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-static-routes-")); + const file = path.join(root, "index.html"); + fs.writeFileSync(file, "

Home

"); + + assert.throws(() => discoverStaticRoutes(file), /Static directory does not exist or is not a directory/); +}); + +test("converts non-index HTML files to extension routes", () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-static-routes-")); + fs.writeFileSync(path.join(root, "about.html"), "

About

"); + + assert.deepEqual(discoverStaticRoutes(root), [ + { type: "static_html", route: "/about.html", path: path.join(root, "about.html") }, + ]); +}); From 4419027013eea735633ca058eba08a3e457523cc Mon Sep 17 00:00:00 2001 From: PSkinnerTech Date: Sun, 17 May 2026 22:35:36 -0500 Subject: [PATCH 3/9] feat: add repo preview process management --- packages/cli/src/repo-process.mjs | 203 +++++++++++++++++++++++ packages/cli/test/repo-process.test.mjs | 204 ++++++++++++++++++++++++ 2 files changed, 407 insertions(+) create mode 100644 packages/cli/src/repo-process.mjs create mode 100644 packages/cli/test/repo-process.test.mjs diff --git a/packages/cli/src/repo-process.mjs b/packages/cli/src/repo-process.mjs new file mode 100644 index 0000000..dd3a3fa --- /dev/null +++ b/packages/cli/src/repo-process.mjs @@ -0,0 +1,203 @@ +import { spawn } from "node:child_process"; +import { once } from "node:events"; +import { setTimeout as sleep } from "node:timers/promises"; + +const outputCaptureLimitBytes = 64 * 1024; +const pollIntervalMs = 50; +const fetchAttemptTimeoutMs = 500; +const preflightTimeoutMs = 250; +const shutdownGraceMs = 500; + +const isExited = (child) => child.exitCode !== null || child.signalCode !== null; + +const waitForExit = async (child, timeoutMs) => { + if (isExited(child)) { + return true; + } + + let timer; + try { + return await Promise.race([ + once(child, "exit").then(() => true), + new Promise((resolve) => { + timer = setTimeout(() => resolve(false), timeoutMs); + }), + ]); + } finally { + clearTimeout(timer); + } +}; + +const killChild = (child, signal) => { + if (!child.pid || isExited(child)) { + return; + } + + try { + if (process.platform === "win32") { + child.kill(signal); + } else { + process.kill(-child.pid, signal); + } + } catch (error) { + if (error?.code !== "ESRCH") { + throw error; + } + } +}; + +const capStringByBytes = (value, maxBytes) => { + if (Buffer.byteLength(value) <= maxBytes) { + return value; + } + return Buffer.from(value).subarray(-maxBytes).toString(); +}; + +const appendCappedChunk = (chunks, chunk) => { + const capped = capStringByBytes(`${chunks.join("")}${String(chunk)}`, outputCaptureLimitBytes); + chunks.splice(0, chunks.length, capped); +}; + +const stderrTail = (stderr) => { + const tail = stderr.join("").trim(); + return tail ? ` Stderr: ${tail}` : ""; +}; + +const previewError = (message, preview) => { + const error = new Error(message); + error.preview = preview; + return error; +}; + +const earlyExitError = (preview, code, signal) => + previewError( + `Preview command exited before server became reachable (${code === null ? `signal ${signal}` : `code ${code}`}).${stderrTail(preview.stderr)}`, + preview, + ); + +export const waitForHttp = async (url, options = {}) => { + const timeoutMs = options.timeoutMs ?? 30000; + const deadline = Date.now() + timeoutMs; + let lastError; + + while (true) { + const remainingMs = deadline - Date.now(); + if (remainingMs <= 0) { + break; + } + + const controller = new AbortController(); + const abortTimer = setTimeout(() => controller.abort(), Math.min(fetchAttemptTimeoutMs, remainingMs)); + + try { + const response = await fetch(url, { redirect: "manual", signal: controller.signal }); + if (response.status < 500) { + await response.body?.cancel(); + return; + } + await response.body?.cancel(); + lastError = new Error(`HTTP ${response.status}`); + } catch (error) { + lastError = error; + } finally { + clearTimeout(abortTimer); + } + + await sleep(Math.min(pollIntervalMs, Math.max(0, deadline - Date.now()))); + } + + const suffix = lastError?.message ? ` Last error: ${lastError.message}` : ""; + throw new Error(`Preview server did not become reachable at ${url}.${suffix}`); +}; + +export const startPreview = async ({ command, cwd, previewUrl, timeoutMs = 30000 }) => { + if (!command) { + throw new Error("--preview-command is required for preview repo audits."); + } + if (!previewUrl) { + throw new Error("--preview-url is required for preview repo audits."); + } + + let previewUrlAlreadyReachable = false; + try { + await waitForHttp(previewUrl, { timeoutMs: preflightTimeoutMs }); + previewUrlAlreadyReachable = true; + } catch { + previewUrlAlreadyReachable = false; + } + + if (previewUrlAlreadyReachable) { + throw new Error(`Preview URL is already reachable before starting command: ${previewUrl}`); + } + + const child = spawn(command, { + cwd, + shell: true, + detached: process.platform !== "win32", + stdio: ["ignore", "pipe", "pipe"], + }); + + const preview = { + child, + url: previewUrl, + stdout: [], + stderr: [], + }; + + child.stdout?.on("data", (chunk) => appendCappedChunk(preview.stdout, chunk)); + child.stderr?.on("data", (chunk) => appendCappedChunk(preview.stderr, chunk)); + + const startupError = new Promise((_, reject) => { + child.once("error", (error) => { + reject(previewError(`Preview command failed to start: ${error.message}`, preview)); + }); + child.once("close", (code, signal) => { + reject(earlyExitError(preview, code, signal)); + }); + }); + + try { + await Promise.race([waitForHttp(previewUrl, { timeoutMs }), startupError]); + if (isExited(child)) { + throw earlyExitError(preview, child.exitCode, child.signalCode); + } + } catch (error) { + try { + await stopPreview(preview); + } catch { + // Preserve the startup failure, which is the actionable error for callers. + } + error.preview ??= preview; + throw error; + } + + return preview; +}; + +export const stopPreview = async (preview) => { + const child = preview?.child; + if (!child || isExited(child)) { + return; + } + + if (preview.stopPromise) { + return preview.stopPromise; + } + + preview.stopPromise = (async () => { + if (isExited(child)) { + return; + } + + killChild(child, "SIGTERM"); + const terminated = await waitForExit(child, shutdownGraceMs); + if (terminated || isExited(child)) { + return; + } + + killChild(child, "SIGKILL"); + await waitForExit(child, shutdownGraceMs); + })(); + + return preview.stopPromise; +}; diff --git a/packages/cli/test/repo-process.test.mjs b/packages/cli/test/repo-process.test.mjs new file mode 100644 index 0000000..0d99f9a --- /dev/null +++ b/packages/cli/test/repo-process.test.mjs @@ -0,0 +1,204 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import http from "node:http"; +import net from "node:net"; +import { once } from "node:events"; +import { startPreview, stopPreview, waitForHttp } from "../src/repo-process.mjs"; + +const outputCaptureLimitBytes = 64 * 1024; + +const freePort = async () => { + const server = net.createServer(); + server.listen(0, "127.0.0.1"); + await once(server, "listening"); + const { port } = server.address(); + server.close(); + await once(server, "close"); + return port; +}; + +test("starts preview command, waits for HTTP, and stops process", async () => { + const port = await freePort(); + const preview = await startPreview({ + command: `node server.mjs ${port}`, + cwd: "examples/fixture-repos/npm-preview", + previewUrl: `http://127.0.0.1:${port}`, + timeoutMs: 5000, + }); + + assert.equal(preview.url, `http://127.0.0.1:${port}`); + const response = await fetch(preview.url); + assert.equal(response.status, 200); + + await stopPreview(preview); + await assert.rejects(() => waitForHttp(preview.url, { timeoutMs: 250 }), /Preview server did not become reachable/); +}); + +test("stopPreview is safe to call more than once", async () => { + const port = await freePort(); + const preview = await startPreview({ + command: `node server.mjs ${port}`, + cwd: "examples/fixture-repos/npm-preview", + previewUrl: `http://127.0.0.1:${port}`, + timeoutMs: 5000, + }); + + await stopPreview(preview); + await stopPreview(preview); +}); + +test("waitForHttp times out promptly when a server accepts but never responds", async () => { + const sockets = new Set(); + const server = net.createServer((socket) => { + sockets.add(socket); + socket.on("close", () => sockets.delete(socket)); + socket.on("error", () => {}); + }); + server.listen(0, "127.0.0.1"); + await once(server, "listening"); + const { port } = server.address(); + + const startedAt = Date.now(); + await assert.rejects( + () => waitForHttp(`http://127.0.0.1:${port}`, { timeoutMs: 250 }), + /Preview server did not become reachable/, + ); + assert.ok(Date.now() - startedAt < 1000); + + for (const socket of sockets) { + socket.destroy(); + } + server.close(); + await once(server, "close"); +}); + +test("waitForHttp keeps polling on 500 and resolves on a later non-500 response", async () => { + let requests = 0; + const server = http.createServer((request, response) => { + requests += 1; + const status = requests === 1 ? 500 : 200; + response.writeHead(status); + response.end(); + }); + server.listen(0, "127.0.0.1"); + await once(server, "listening"); + const { port } = server.address(); + + await waitForHttp(`http://127.0.0.1:${port}`, { timeoutMs: 1000 }); + + assert.equal(requests, 2); + server.close(); + await once(server, "close"); +}); + +test("reports preview startup timeout", async () => { + const port = await freePort(); + await assert.rejects( + () => + startPreview({ + command: "node -e \"setTimeout(() => {}, 5000)\"", + cwd: ".", + previewUrl: `http://127.0.0.1:${port}`, + timeoutMs: 250, + }), + /Preview server did not become reachable/, + ); +}); + +test("reports spawn errors before preview startup timeout", async () => { + const port = await freePort(); + await assert.rejects( + () => + startPreview({ + command: "node server.mjs", + cwd: "examples/fixture-repos/missing", + previewUrl: `http://127.0.0.1:${port}`, + timeoutMs: 5000, + }), + /Preview command failed to start/, + ); +}); + +test("reports early preview command exits with stderr tail", async () => { + const port = await freePort(); + await assert.rejects( + () => + startPreview({ + command: "node -e \"console.error('startup failed'); process.exit(7)\"", + cwd: ".", + previewUrl: `http://127.0.0.1:${port}`, + timeoutMs: 5000, + }), + /Preview command exited before server became reachable \(code 7\).*startup failed/s, + ); +}); + +test("rejects before spawning when another process already serves the preview URL", async () => { + const server = http.createServer((request, response) => { + response.end("already running"); + }); + server.listen(0, "127.0.0.1"); + await once(server, "listening"); + const { port } = server.address(); + + try { + await assert.rejects( + () => + startPreview({ + command: "node -e \"setTimeout(() => process.exit(7), 250)\"", + cwd: ".", + previewUrl: `http://127.0.0.1:${port}`, + timeoutMs: 5000, + }), + new RegExp(`Preview URL is already reachable before starting command: http://127\\.0\\.0\\.1:${port}`), + ); + } finally { + server.close(); + await once(server, "close"); + } +}); + +test("caps captured preview stdout and stderr", async () => { + const port = await freePort(); + await assert.rejects( + async () => { + const preview = await startPreview({ + command: + "node -e \"process.stdout.write('o'.repeat(70000)); process.stderr.write('e'.repeat(70000)); process.exit(3)\"", + cwd: ".", + previewUrl: `http://127.0.0.1:${port}`, + timeoutMs: 5000, + }); + return preview; + }, + (error) => { + assert.match(error.message, /Preview command exited before server became reachable/); + assert.ok(error.preview); + assert.ok(error.preview.stdout.join("").length <= outputCaptureLimitBytes); + assert.ok(error.preview.stderr.join("").length <= outputCaptureLimitBytes); + return true; + }, + ); +}); + +test("requires preview command", async () => { + await assert.rejects( + () => + startPreview({ + cwd: ".", + previewUrl: "http://127.0.0.1:3000", + }), + /--preview-command is required for preview repo audits\./, + ); +}); + +test("requires preview URL", async () => { + await assert.rejects( + () => + startPreview({ + command: "node server.mjs", + cwd: ".", + }), + /--preview-url is required for preview repo audits\./, + ); +}); From 489b94183b4925b1c5166c903e2d93f0dacba6fb Mon Sep 17 00:00:00 2001 From: PSkinnerTech Date: Sun, 17 May 2026 22:53:07 -0500 Subject: [PATCH 4/9] feat: add repo audit orchestration --- examples/golden/repo-static-summary.json | 15 ++ packages/cli/src/audit-output-schema.mjs | 4 + packages/cli/src/repo-audit.mjs | 199 ++++++++++++++++++ packages/cli/src/report.mjs | 37 ++++ .../cli/test/audit-output-schema.test.mjs | 41 ++++ packages/cli/test/repo-audit.test.mjs | 148 +++++++++++++ packages/cli/test/report.test.mjs | 35 +++ 7 files changed, 479 insertions(+) create mode 100644 examples/golden/repo-static-summary.json create mode 100644 packages/cli/src/repo-audit.mjs create mode 100644 packages/cli/test/repo-audit.test.mjs diff --git a/examples/golden/repo-static-summary.json b/examples/golden/repo-static-summary.json new file mode 100644 index 0000000..84d94bd --- /dev/null +++ b/examples/golden/repo-static-summary.json @@ -0,0 +1,15 @@ +{ + "repo": { + "detectedFramework": "generic-static", + "packageManager": null, + "staticDirRelative": "dist", + "routeSources": [ + { "type": "static_html", "route": "/" }, + { "type": "static_html", "route": "/about/" } + ] + }, + "pageCount": 2, + "pageTitles": ["Static Basic Home", "About Static Basic"], + "sourceFindingIds": [], + "evidenceGapIds": ["ranking.integrations_missing"] +} diff --git a/packages/cli/src/audit-output-schema.mjs b/packages/cli/src/audit-output-schema.mjs index 29a58d7..38c8448 100644 --- a/packages/cli/src/audit-output-schema.mjs +++ b/packages/cli/src/audit-output-schema.mjs @@ -27,6 +27,7 @@ export const auditOutputSchema = { findings: { type: "array" }, evidenceGaps: { type: "array" }, sources: { type: "array" }, + repo: { type: "object" }, }, }; @@ -70,6 +71,9 @@ export const validateAuditOutput = (audit) => { } if ("pages" in audit && !Array.isArray(audit.pages)) errors.push("pages must be an array"); + if ("repo" in audit && (!audit.repo || typeof audit.repo !== "object" || Array.isArray(audit.repo))) { + errors.push("repo must be an object"); + } if ("findings" in audit && !Array.isArray(audit.findings)) { errors.push("findings must be an array"); } else { diff --git a/packages/cli/src/repo-audit.mjs b/packages/cli/src/repo-audit.mjs new file mode 100644 index 0000000..a9b0552 --- /dev/null +++ b/packages/cli/src/repo-audit.mjs @@ -0,0 +1,199 @@ +import fs from "node:fs"; +import path from "node:path"; +import { runAudit } from "./audit.mjs"; +import { detectRepo } from "./repo-detect.mjs"; +import { startPreview, stopPreview } from "./repo-process.mjs"; +import { discoverStaticRoutes } from "./repo-routes.mjs"; + +const toolVersion = "0.2.0"; + +const sourceFinding = ({ id, severity = "P1", message, evidence, recommendation, confidence = "high", details }) => ({ + id, + severity, + message, + evidence, + recommendation, + confidence, + ...(details ? { details } : {}), +}); + +const relativePath = (repoPath, targetPath) => { + if (!targetPath) return null; + const relative = path.relative(repoPath, targetPath); + return relative && !relative.startsWith("..") && !path.isAbsolute(relative) ? relative || "." : targetPath; +}; + +const previewErrorDetails = (error) => ({ + message: error?.message || "Preview server did not become reachable.", + stdout: error?.preview?.stdout?.join("").trim() || undefined, + stderr: error?.preview?.stderr?.join("").trim() || undefined, +}); + +const repoEvidence = (detected, overrides = {}) => ({ + path: detected.repoRoot, + detectedFramework: detected.detectedFramework, + confidence: detected.confidence, + packageManager: detected.packageManager, + buildCommand: detected.buildCommand, + previewCommand: detected.previewCommand, + staticDir: detected.staticDir, + staticDirRelative: detected.staticDirRelative, + routeSources: detected.routeSources || [], + sourceFindings: [], + notes: [], + ...overrides, +}); + +const emptyAudit = (detected, repoOverrides = {}) => { + const now = new Date().toISOString(); + + return { + schemaVersion: "1.0.0", + toolVersion, + run: { + id: `repo-audit-${Date.now()}`, + startedAt: now, + endedAt: now, + target: repoOverrides.previewUrl || repoOverrides.staticDir || detected.repoRoot, + mode: "repo", + }, + site: { + origin: null, + robots: null, + sitemaps: [], + skipped: [], + notes: ["No page audit evidence was collected."], + }, + pages: [], + integrations: {}, + scores: {}, + findings: [], + evidenceGaps: [], + sources: [], + repo: repoEvidence(detected, repoOverrides), + }; +}; + +export const runRepoAudit = async (options = {}) => { + const repoPath = path.resolve(options.repoPath || "."); + const detected = detectRepo(repoPath); + + if (options.staticDir) { + const staticDir = path.resolve(repoPath, options.staticDir); + const staticDirRelative = relativePath(repoPath, staticDir); + const staticRepoFields = { + staticDir, + staticDirRelative, + routeSources: [], + sourceFindings: [], + notes: [], + }; + + if (!fs.existsSync(staticDir) || !fs.statSync(staticDir).isDirectory()) { + return emptyAudit(detected, { + ...staticRepoFields, + sourceFindings: [ + sourceFinding({ + id: "repo.static_dir_missing", + message: "Configured static output directory does not exist or is not a directory.", + evidence: staticDir, + recommendation: "Run the repository build or pass an existing static output directory.", + }), + ], + }); + } + + const routes = discoverStaticRoutes(staticDir); + if (!routes.length) { + return emptyAudit(detected, { + ...staticRepoFields, + sourceFindings: [ + sourceFinding({ + id: "repo.static_routes_missing", + message: "Static output directory does not contain HTML routes.", + evidence: staticDir, + recommendation: "Build static HTML output before running a repository audit.", + }), + ], + }); + } + + const audit = await runAudit({ + ...options, + target: routes[0].path, + urlListEntries: routes.map((route) => route.path), + crawl: { ...(options.crawl || {}), mode: "single" }, + }); + + audit.repo = repoEvidence(detected, { + staticDir, + staticDirRelative, + routeSources: routes, + sourceFindings: [], + notes: ["Audited static output directory."], + }); + return audit; + } + + if (options.previewCommand && options.previewUrl) { + let preview; + try { + preview = await startPreview({ + command: options.previewCommand, + cwd: repoPath, + previewUrl: options.previewUrl, + timeoutMs: options.maxPreviewMs, + }); + } catch (error) { + return emptyAudit(detected, { + previewCommand: options.previewCommand, + previewUrl: options.previewUrl, + sourceFindings: [ + sourceFinding({ + id: "repo.preview_unreachable", + message: "Preview server did not become reachable for repository audit.", + evidence: options.previewUrl, + recommendation: "Verify the preview command starts a server at the configured preview URL.", + details: previewErrorDetails(error), + }), + ], + }); + } + + try { + const audit = await runAudit({ + ...options, + target: options.previewUrl, + crawl: { + mode: "full", + maxPages: options.maxPages ?? 25, + maxDepth: options.maxDepth ?? 2, + ...(options.crawl || {}), + }, + }); + + audit.repo = repoEvidence(detected, { + previewCommand: options.previewCommand, + previewUrl: options.previewUrl, + sourceFindings: [], + notes: ["Audited explicit preview server."], + }); + return audit; + } finally { + await stopPreview(preview); + } + } + + return emptyAudit(detected, { + sourceFindings: [ + sourceFinding({ + id: "repo.audit_path_missing", + severity: "P2", + message: "Repository audit needs either a static output directory or an explicit preview command and URL.", + evidence: detected.repoRoot, + recommendation: "Pass staticDir, or pass both previewCommand and previewUrl.", + confidence: "high", + }), + ], + }); +}; diff --git a/packages/cli/src/report.mjs b/packages/cli/src/report.mjs index a8a399a..3cd8cd2 100644 --- a/packages/cli/src/report.mjs +++ b/packages/cli/src/report.mjs @@ -6,6 +6,41 @@ const formatSources = (sources = []) => sources.join(", "); const formatMetric = (value, suffix = "") => (Number.isFinite(value) ? `${value}${suffix}` : "n/a"); +const formatRepoValue = (value) => (value === null || value === undefined || value === "" ? "n/a" : String(value)); + +const formatBulletValue = (value) => escapeCell(formatRepoValue(value)); + +const appendRepositoryEvidence = (lines, repo) => { + if (!repo) return; + + lines.push("", "## Repository Evidence", ""); + lines.push(`- Path: ${formatBulletValue(repo.path)}`); + lines.push(`- Framework: ${formatBulletValue(repo.detectedFramework)}`); + lines.push(`- Package manager: ${formatBulletValue(repo.packageManager)}`); + lines.push(`- Static dir: ${formatBulletValue(repo.staticDirRelative || repo.staticDir)}`); + lines.push(`- Preview command: ${formatBulletValue(repo.previewCommand)}`); + lines.push(`- Preview URL: ${formatBulletValue(repo.previewUrl)}`); + + lines.push("", "Repository routes:"); + if (repo.routeSources?.length) { + for (const route of repo.routeSources) { + const routeLabel = route.route || route.path; + lines.push(`- ${formatBulletValue(route.type)}: ${formatBulletValue(routeLabel)}`); + } + } else { + lines.push("- None recorded."); + } + + lines.push("", "Repository source findings:"); + if (repo.sourceFindings?.length) { + for (const finding of repo.sourceFindings) { + lines.push(`- ${formatBulletValue(finding.id)}: ${formatBulletValue(finding.message)}`); + } + } else { + lines.push("- None recorded."); + } +}; + export const generateMarkdownReport = (audit) => { const findings = [...(audit.findings || [])].sort( (a, b) => (priorityRank[a.severity] ?? 9) - (priorityRank[b.severity] ?? 9), @@ -66,6 +101,8 @@ export const generateMarkdownReport = (audit) => { lines.push("No implementation tasks recorded."); } + appendRepositoryEvidence(lines, audit.repo); + lines.push("", "## Imported Evidence", ""); if (audit.integrations?.lighthouse) { const lighthouse = audit.integrations.lighthouse; diff --git a/packages/cli/test/audit-output-schema.test.mjs b/packages/cli/test/audit-output-schema.test.mjs index bf0f5b8..eaee34e 100644 --- a/packages/cli/test/audit-output-schema.test.mjs +++ b/packages/cli/test/audit-output-schema.test.mjs @@ -41,3 +41,44 @@ test("rejects findings missing required fields", () => { assert.equal(result.ok, false); assert.match(result.errors.join("\n"), /findings\[0\]\.severity is required/); }); + +test("accepts optional repo evidence section", () => { + const audit = { + schemaVersion: "1.0.0", + toolVersion: "0.2.0", + run: {}, + site: {}, + pages: [], + integrations: {}, + scores: {}, + findings: [], + evidenceGaps: [], + sources: [], + repo: { + path: "/repo", + detectedFramework: "generic-static", + sourceFindings: [], + }, + }; + + assert.deepEqual(validateAuditOutput(audit), { ok: true, errors: [] }); +}); + +test("rejects optional repo evidence section when it is not an object", () => { + const result = validateAuditOutput({ + schemaVersion: "1.0.0", + toolVersion: "0.2.0", + run: {}, + site: {}, + pages: [], + integrations: {}, + scores: {}, + findings: [], + evidenceGaps: [], + sources: [], + repo: [], + }); + + assert.equal(result.ok, false); + assert.match(result.errors.join("\n"), /repo must be an object/); +}); diff --git a/packages/cli/test/repo-audit.test.mjs b/packages/cli/test/repo-audit.test.mjs new file mode 100644 index 0000000..ed94760 --- /dev/null +++ b/packages/cli/test/repo-audit.test.mjs @@ -0,0 +1,148 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import fs from "node:fs"; +import net from "node:net"; +import os from "node:os"; +import path from "node:path"; +import { once } from "node:events"; +import { runRepoAudit } from "../src/repo-audit.mjs"; +import { waitForHttp } from "../src/repo-process.mjs"; + +const fixture = (name) => path.resolve("examples/fixture-repos", name); + +const freePort = async () => { + const server = net.createServer(); + server.listen(0, "127.0.0.1"); + await once(server, "listening"); + const { port } = server.address(); + server.close(); + await once(server, "close"); + return port; +}; + +const repoStaticSummary = (audit) => ({ + repo: { + detectedFramework: audit.repo.detectedFramework, + packageManager: audit.repo.packageManager, + staticDirRelative: audit.repo.staticDirRelative, + routeSources: audit.repo.routeSources.map(({ type, route }) => ({ type, route })), + }, + pageCount: audit.pages.length, + pageTitles: audit.pages.map((page) => page.evidence.title), + sourceFindingIds: audit.repo.sourceFindings.map((finding) => finding.id), + evidenceGapIds: audit.evidenceGaps.map((gap) => gap.id), +}); + +test("static output audit records repo evidence and audits discovered routes", async () => { + const audit = await runRepoAudit({ + repoPath: fixture("static-basic"), + staticDir: path.join(fixture("static-basic"), "dist"), + }); + + assert.equal(audit.repo.detectedFramework, "generic-static"); + assert.equal(audit.repo.staticDirRelative, "dist"); + assert.equal(audit.pages.length, 2); + assert.ok(audit.repo.routeSources.some((route) => route.route === "/about/")); + assert.deepEqual(audit.repo.sourceFindings, []); +}); + +test("explicit preview audit starts and stops fixture server", async () => { + const port = await freePort(); + const previewUrl = `http://127.0.0.1:${port}`; + + const audit = await runRepoAudit({ + repoPath: fixture("npm-preview"), + previewCommand: `node server.mjs ${port}`, + previewUrl, + }); + + assert.equal(audit.pages.length, 2); + assert.equal(audit.repo.previewCommand, `node server.mjs ${port}`); + assert.equal(audit.repo.previewUrl, previewUrl); + await assert.rejects(() => waitForHttp(previewUrl, { timeoutMs: 250 }), /Preview server did not become reachable/); +}); + +test("explicit preview audit preserves full crawl mode with partial crawl options", async () => { + const port = await freePort(); + const previewUrl = `http://127.0.0.1:${port}`; + + const audit = await runRepoAudit({ + repoPath: fixture("npm-preview"), + previewCommand: `node server.mjs ${port}`, + previewUrl, + crawl: { maxPages: 2 }, + }); + + assert.equal(audit.run.mode, "full"); + assert.equal(audit.run.crawl.maxPages, 2); +}); + +test("explicit preview audit honors top-level maxPages when crawl maxPages is absent", async () => { + const port = await freePort(); + const previewUrl = `http://127.0.0.1:${port}`; + + const audit = await runRepoAudit({ + repoPath: fixture("npm-preview"), + previewCommand: `node server.mjs ${port}`, + previewUrl, + maxPages: 7, + }); + + assert.equal(audit.run.mode, "full"); + assert.equal(audit.run.crawl.maxPages, 7); +}); + +test("missing explicit static dir returns repo source finding", async () => { + const audit = await runRepoAudit({ + repoPath: fixture("static-basic"), + staticDir: path.join(fixture("static-basic"), "missing"), + }); + + assert.equal(audit.pages.length, 0); + assert.equal(audit.repo.sourceFindings[0].id, "repo.static_dir_missing"); +}); + +test("static dir with no HTML routes returns repo source finding", async () => { + const repoPath = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-repo-audit-")); + const staticDir = path.join(repoPath, "dist"); + fs.mkdirSync(staticDir, { recursive: true }); + fs.writeFileSync(path.join(staticDir, "robots.txt"), "User-agent: *\nAllow: /\n"); + + const audit = await runRepoAudit({ repoPath, staticDir }); + + assert.equal(audit.pages.length, 0); + assert.equal(audit.repo.sourceFindings[0].id, "repo.static_routes_missing"); +}); + +test("preview startup failure returns repo source finding", async () => { + const port = await freePort(); + const previewUrl = `http://127.0.0.1:${port}`; + + const audit = await runRepoAudit({ + repoPath: fixture("npm-preview"), + previewCommand: "node -e \"setTimeout(() => {}, 5000)\"", + previewUrl, + maxPreviewMs: 250, + }); + + assert.equal(audit.pages.length, 0); + assert.equal(audit.repo.previewUrl, previewUrl); + assert.equal(audit.repo.sourceFindings[0].id, "repo.preview_unreachable"); +}); + +test("missing audit path on npm preview repo returns repo source finding", async () => { + const audit = await runRepoAudit({ repoPath: fixture("npm-preview") }); + + assert.equal(audit.pages.length, 0); + assert.equal(audit.repo.sourceFindings[0].id, "repo.audit_path_missing"); +}); + +test("static repo golden summary matches fixture", async () => { + const audit = await runRepoAudit({ + repoPath: fixture("static-basic"), + staticDir: path.join(fixture("static-basic"), "dist"), + }); + const expected = JSON.parse(fs.readFileSync("examples/golden/repo-static-summary.json", "utf8")); + + assert.deepEqual(repoStaticSummary(audit), expected); +}); diff --git a/packages/cli/test/report.test.mjs b/packages/cli/test/report.test.mjs index 8c32600..b61ed94 100644 --- a/packages/cli/test/report.test.mjs +++ b/packages/cli/test/report.test.mjs @@ -49,3 +49,38 @@ test("generates a Markdown audit report from audit JSON", () => { assert.match(markdown, /Evidence Gaps/); assert.match(markdown, /https:\/\/developers\.google\.com\/search\/docs\/crawling-indexing\/robots-meta-tag/); }); + +test("includes repository evidence when audit repo evidence exists", () => { + const markdown = generateMarkdownReport({ + run: { target: "repo" }, + findings: [], + scores: {}, + integrations: {}, + evidenceGaps: [], + sources: [], + repo: { + path: "/repo", + detectedFramework: "generic-static", + packageManager: null, + staticDirRelative: "dist\nwith pipe | value", + previewCommand: null, + previewUrl: null, + routeSources: [{ type: "static_html", route: "/", path: "/repo/dist/index.html" }], + sourceFindings: [ + { + id: "repo.static_dir_missing", + severity: "P1", + message: "Static directory\nis missing | invalid.", + evidence: "dist", + recommendation: "Build the repository or pass an existing static directory.", + confidence: "high", + }, + ], + }, + }); + + assert.match(markdown, /## Repository Evidence/); + assert.match(markdown, /Framework: generic-static/); + assert.match(markdown, /Static dir: dist with pipe \\| value/); + assert.match(markdown, /repo\.static_dir_missing: Static directory is missing \\| invalid\./); +}); From 80c68aa1718088346987747571671a33597eeaac Mon Sep 17 00:00:00 2001 From: PSkinnerTech Date: Sun, 17 May 2026 23:02:11 -0500 Subject: [PATCH 5/9] feat: add repo audit CLI commands --- packages/cli/src/cli.mjs | 143 +++++++++++++++++++ packages/cli/test/cli.test.mjs | 249 +++++++++++++++++++++++++++++++++ 2 files changed, 392 insertions(+) diff --git a/packages/cli/src/cli.mjs b/packages/cli/src/cli.mjs index 5ea78f9..3502a0e 100644 --- a/packages/cli/src/cli.mjs +++ b/packages/cli/src/cli.mjs @@ -3,6 +3,8 @@ import path from "node:path"; import { runAudit } from "./audit.mjs"; import { readAuditConfig, resolveAuditConfigPaths, validateAuditConfig } from "./config-schema.mjs"; import { generateMarkdownReport } from "./report.mjs"; +import { runRepoAudit } from "./repo-audit.mjs"; +import { detectRepo } from "./repo-detect.mjs"; import { getRule } from "./rules.mjs"; import { collectSnapshot } from "./snapshot.mjs"; @@ -13,6 +15,8 @@ const help = `Usage: openclaw-geo-seo-audit [options] Commands: audit Run a deterministic GEO/SEO readiness audit snapshot Capture single-page audit evidence + detect-repo [path] Inspect source repository audit metadata; defaults to current directory + audit-repo Audit static output or explicit preview server from a source repo validate-config Validate an audit.config.json file explain-rule Print rule metadata and citations as JSON @@ -40,6 +44,18 @@ Audit options: --markdown Write Markdown report --help Show this help --version Show CLI version + +Repo audit options: + --static-dir Audit prebuilt static HTML output relative to repo path + --preview-command Start an explicit local preview server command + --preview-url URL to wait for and audit after preview startup + --max-preview-ms Maximum time to wait for preview startup + --mode full|sample|single Crawl mode for preview audits + --max-pages Maximum pages to crawl for preview audits + --max-depth Maximum crawl depth for preview audits + --security local|restricted Apply local CLI or restricted wrapper network/file policy + --out Write repository audit JSON + --markdown Write repository audit Markdown report `; const writeJson = (io, value) => { @@ -75,6 +91,19 @@ const auditOptionsWithValues = new Set([ "--markdown", ]); +const repoOptionsWithValues = new Set([ + "--static-dir", + "--preview-command", + "--preview-url", + "--max-preview-ms", + "--mode", + "--max-pages", + "--max-depth", + "--security", + "--out", + "--markdown", +]); + const severityRank = { P0: 0, P1: 1, P2: 2, P3: 3 }; const failsThreshold = (findings, threshold) => { @@ -105,11 +134,59 @@ const splitAuditArgs = (args) => { return { target, options }; }; +const splitRepoArgs = (args) => { + const options = []; + let repoPath = null; + + for (let index = 0; index < args.length; index++) { + const arg = args[index]; + if (repoOptionsWithValues.has(arg)) { + options.push(arg); + if (index + 1 < args.length && !args[index + 1].startsWith("--")) options.push(args[++index]); + continue; + } + if (arg.startsWith("--")) { + options.push(arg); + continue; + } + if (!repoPath) repoPath = arg; + else options.push(arg); + } + + return { repoPath, options }; +}; + const numberOption = (options, name, fallback) => { const value = optionValue(options, name); return value ? Number(value) : fallback; }; +const repoOptionValue = (options, name, fallback = null, errorMessage = `${name} requires a value.`) => { + const index = options.indexOf(name); + if (index === -1) return fallback; + const value = options[index + 1]; + if (!value || value.startsWith("--")) throw new Error(errorMessage); + return value; +}; + +const repoEnumOption = (options, name, fallback, allowedValues) => { + const value = repoOptionValue(options, name, fallback); + if (!allowedValues.includes(value)) throw new Error(`${name} must be one of: ${allowedValues.join(", ")}`); + return value; +}; + +const repoNumberOption = (options, name, fallback, { minimum, minimumDescription }) => { + const index = options.indexOf(name); + if (index === -1) return fallback; + const value = repoOptionValue(options, name); + const number = Number(value); + if (!Number.isFinite(number)) throw new Error(`${name} must be a number.`); + if (!Number.isInteger(number) || number < minimum) { + throw new Error(`${name} must be a ${minimumDescription}.`); + } + return number; +}; + const mergeAuditConfig = (target, options) => { const configPath = optionValue(options, "--config"); const baseDir = configPath ? path.dirname(path.resolve(configPath)) : process.cwd(); @@ -191,6 +268,31 @@ const mergeAuditConfig = (target, options) => { return merged; }; +const mergeRepoConfig = (repoPath, options) => ({ + repoPath, + staticDir: repoOptionValue(options, "--static-dir"), + previewCommand: repoOptionValue(options, "--preview-command"), + previewUrl: repoOptionValue(options, "--preview-url"), + maxPreviewMs: repoNumberOption(options, "--max-preview-ms", 30000, { + minimum: 1, + minimumDescription: "positive integer", + }), + crawl: { + mode: repoEnumOption(options, "--mode", "full", ["full", "sample", "single"]), + maxPages: repoNumberOption(options, "--max-pages", 25, { + minimum: 1, + minimumDescription: "positive integer", + }), + maxDepth: repoNumberOption(options, "--max-depth", 2, { + minimum: 0, + minimumDescription: "non-negative integer", + }), + }, + security: { + mode: repoEnumOption(options, "--security", "local", ["local", "restricted"]), + }, +}); + export const runCli = async (args, io = { stdout: process.stdout, stderr: process.stderr }) => { const [command, ...rest] = args; @@ -288,6 +390,47 @@ export const runCli = async (args, io = { stdout: process.stdout, stderr: proces } } + if (command === "detect-repo") { + const [repoPath = "."] = rest; + try { + writeJson(io, detectRepo(repoPath)); + return 0; + } catch (error) { + io.stderr.write(`${error.message}\n`); + return 1; + } + } + + if (command === "audit-repo") { + const { repoPath, options } = splitRepoArgs(rest); + if (!repoPath) { + io.stderr.write("audit-repo requires a repository path.\n"); + return 1; + } + + try { + const outRequested = options.includes("--out"); + const markdownRequested = options.includes("--markdown"); + const outPath = outRequested ? repoOptionValue(options, "--out", null, "--out requires a file path.") : null; + const markdownPath = markdownRequested + ? repoOptionValue(options, "--markdown", null, "--markdown requires a file path.") + : null; + + const output = await runRepoAudit(mergeRepoConfig(repoPath, options)); + if (outPath) fs.writeFileSync(outPath, `${JSON.stringify(output, null, 2)}\n`); + if (markdownPath) fs.writeFileSync(markdownPath, generateMarkdownReport(output)); + if (outPath || markdownPath) { + writeJson(io, { ok: true, out: outPath || null, markdown: markdownPath || null }); + } else { + writeJson(io, output); + } + return output.repo?.sourceFindings?.length ? 2 : 0; + } catch (error) { + io.stderr.write(`${error.message}\n`); + return 1; + } + } + if (command === "explain-rule") { const [ruleId] = rest; if (!ruleId) { diff --git a/packages/cli/test/cli.test.mjs b/packages/cli/test/cli.test.mjs index 282ab12..5161b3c 100644 --- a/packages/cli/test/cli.test.mjs +++ b/packages/cli/test/cli.test.mjs @@ -27,6 +27,18 @@ test("prints help", async () => { assert.match(result.stdout, /Usage:/); assert.match(result.stdout, /validate-config/); assert.match(result.stdout, /explain-rule/); + assert.match(result.stdout, /detect-repo/); + assert.match(result.stdout, /audit-repo/); + assert.match(result.stdout, /detect-repo \[path\]/); + assert.match(result.stdout, /defaults to current directory/); + assert.match(result.stdout, /--static-dir /); + assert.match(result.stdout, /--preview-command /); + assert.match(result.stdout, /--preview-url /); + assert.match(result.stdout, /--max-preview-ms /); + assert.match(result.stdout, /--mode full\|sample\|single/); + assert.match(result.stdout, /--max-pages /); + assert.match(result.stdout, /--max-depth /); + assert.match(result.stdout, /--security local\|restricted/); }); test("explains a known rule as JSON", async () => { @@ -93,3 +105,240 @@ test("does not fail CI when findings are below fail-on threshold", async () => { const result = await capture(["audit", html, "--fail-on", "P0"]); assert.equal(result.exitCode, 0); }); + +test("detects repository audit metadata", async () => { + const result = await capture(["detect-repo", "examples/fixture-repos/static-basic"]); + + assert.equal(result.exitCode, 0); + const body = JSON.parse(result.stdout); + assert.equal(body.detectedFramework, "generic-static"); + assert.equal(body.staticDirRelative, "dist"); +}); + +test("audits static repository output from CLI", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "dist"]); + + assert.equal(result.exitCode, 0); + const body = JSON.parse(result.stdout); + assert.equal(body.repo.detectedFramework, "generic-static"); + assert.equal(body.pages.length, 2); +}); + +test("audit-repo missing repo path returns helpful error", async () => { + const result = await capture(["audit-repo"]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /audit-repo requires a repository path/); +}); + +test("audit-repo with missing static dir returns source finding failure code", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "missing"]); + + assert.equal(result.exitCode, 2); + const body = JSON.parse(result.stdout); + assert.equal(body.repo.sourceFindings[0].id, "repo.static_dir_missing"); +}); + +test("audit-repo writes JSON and Markdown reports", async () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "geo-seo-repo-cli-")); + const out = path.join(dir, "audit.json"); + const markdown = path.join(dir, "audit.md"); + + const result = await capture([ + "audit-repo", + "examples/fixture-repos/static-basic", + "--static-dir", + "dist", + "--out", + out, + "--markdown", + markdown, + ]); + + assert.equal(result.exitCode, 0); + assert.deepEqual(JSON.parse(result.stdout), { ok: true, out, markdown }); + assert.equal(JSON.parse(fs.readFileSync(out, "utf8")).repo.detectedFramework, "generic-static"); + assert.match(fs.readFileSync(markdown, "utf8"), /GEO\/SEO Audit Report/); +}); + +test("audit-repo rejects missing out path", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "dist", "--out"]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--out requires a file path/); +}); + +test("audit-repo rejects missing markdown path", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "dist", "--markdown"]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--markdown requires a file path/); +}); + +test("audit-repo rejects option token as out path", async () => { + const result = await capture([ + "audit-repo", + "examples/fixture-repos/static-basic", + "--static-dir", + "dist", + "--out", + "--markdown", + path.join(os.tmpdir(), "audit.md"), + ]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--out requires a file path/); +}); + +test("audit-repo rejects invalid numeric options", async () => { + const result = await capture([ + "audit-repo", + "examples/fixture-repos/static-basic", + "--static-dir", + "dist", + "--max-pages", + "not-a-number", + ]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--max-pages must be a number/); +}); + +test("audit-repo rejects missing numeric option values", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "dist", "--max-pages"]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--max-pages requires a value/); +}); + +test("audit-repo rejects option token as numeric option value", async () => { + const result = await capture([ + "audit-repo", + "examples/fixture-repos/static-basic", + "--static-dir", + "dist", + "--max-pages", + "--out", + path.join(os.tmpdir(), "audit.json"), + ]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--max-pages requires a value/); +}); + +test("audit-repo accepts valid numeric option values", async () => { + const result = await capture([ + "audit-repo", + "examples/fixture-repos/static-basic", + "--static-dir", + "dist", + "--max-pages", + "2", + ]); + + assert.equal(result.exitCode, 0); + const body = JSON.parse(result.stdout); + assert.equal(body.pages.length, 2); +}); + +test("audit-repo rejects option token as static dir value", async () => { + const result = await capture([ + "audit-repo", + "examples/fixture-repos/static-basic", + "--static-dir", + "--out", + path.join(os.tmpdir(), "audit.json"), + ]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--static-dir requires a value/); +}); + +test("audit-repo rejects option token as mode value", async () => { + const result = await capture([ + "audit-repo", + "examples/fixture-repos/static-basic", + "--static-dir", + "dist", + "--mode", + "--max-pages", + "2", + ]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--mode requires a value/); +}); + +test("audit-repo rejects option token as security value", async () => { + const result = await capture([ + "audit-repo", + "examples/fixture-repos/static-basic", + "--static-dir", + "dist", + "--security", + "--out", + path.join(os.tmpdir(), "audit.json"), + ]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--security requires a value/); +}); + +test("audit-repo rejects invalid mode values", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "dist", "--mode", "bogus"]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--mode must be one of: full, sample, single/); +}); + +test("audit-repo rejects invalid security values", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "dist", "--security", "bogus"]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--security must be one of: local, restricted/); +}); + +test("audit-repo accepts valid enum option values", async () => { + const result = await capture([ + "audit-repo", + "examples/fixture-repos/static-basic", + "--static-dir", + "dist", + "--mode", + "single", + "--security", + "local", + ]); + + assert.equal(result.exitCode, 0); + const body = JSON.parse(result.stdout); + assert.equal(body.repo.detectedFramework, "generic-static"); +}); + +test("audit-repo rejects max-pages below minimum", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "dist", "--max-pages", "0"]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--max-pages must be a positive integer/); +}); + +test("audit-repo rejects fractional max-pages", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "dist", "--max-pages", "1.5"]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--max-pages must be a positive integer/); +}); + +test("audit-repo rejects max-depth below minimum", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "dist", "--max-depth", "-1"]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--max-depth must be a non-negative integer/); +}); + +test("audit-repo rejects max-preview-ms below minimum", async () => { + const result = await capture(["audit-repo", "examples/fixture-repos/static-basic", "--static-dir", "dist", "--max-preview-ms", "0"]); + + assert.equal(result.exitCode, 1); + assert.match(result.stderr, /--max-preview-ms must be a positive integer/); +}); From e8a6266019d1cea72856e33bcc31b649cce287cb Mon Sep 17 00:00:00 2001 From: PSkinnerTech Date: Sun, 17 May 2026 23:19:08 -0500 Subject: [PATCH 6/9] docs: document repo audit mode --- README.md | 7 ++++++- scripts/validate-skill.mjs | 17 +++++++++++++++++ skill/geo-seo-audit/SKILL.md | 4 ++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 402691e..893f0a2 100644 --- a/README.md +++ b/README.md @@ -37,9 +37,14 @@ npm run cli -- audit https://example.com --url-list urls.txt --markdown audit-re npm run cli -- audit https://example.com --mode full --max-pages 25 --max-depth 2 --respect-robots true --sitemap https://example.com/sitemap.xml npm run cli -- audit https://example.com --mode full --security restricted --timeout-ms 15000 --max-html-bytes 2000000 npm run cli -- audit https://example.com --mode full --fail-on P1 --out audit.json --markdown audit.md +npm run cli -- detect-repo . +npm run cli -- audit-repo . --static-dir dist --out repo-audit.json --markdown repo-audit.md +npm run cli -- audit-repo . --preview-command "npm run preview -- --host 127.0.0.1" --preview-url http://127.0.0.1:4173 --max-pages 25 ``` -The current `audit` command collects single-page, supplied URL-list, or bounded same-origin crawl evidence, can read `audit.config.json`, can seed from a sitemap, can enforce robots.txt, can filter crawls with include/exclude patterns, evaluates deterministic page and site rules, and can write JSON or Markdown. Extracted page evidence includes metadata, canonicals, hreflang, favicon and site-name signals, preview directives, headings, links, image inventory, JSON-LD blocks, schema types, author/date signals, and internal/external link counts. Browser rendering is available when Playwright is installed or when a renderer is injected by code; otherwise the CLI records rendering as unavailable. +The current `audit` command collects single-page, supplied URL-list, or bounded same-origin crawl evidence, can read `audit.config.json`, can seed from a sitemap, can enforce robots.txt, can filter crawls with include/exclude patterns, evaluates deterministic page and site rules, and can write JSON or Markdown. Extracted page evidence includes metadata, canonicals, hreflang, favicon and site-name signals, preview directives, headings, links, image inventory, JSON-LD blocks, schema types, author/date signals, and internal/external link counts. Browser rendering is available when Playwright is installed or when a renderer is injected by code; otherwise the CLI records rendering as unavailable. The `detect-repo [path]` command reports repository framework, package-manager, route, and build-output signals and defaults to the current directory when no path is supplied. The `audit-repo` command exits 2 when repo source findings are present. + +`audit-repo` is intended for source repository audits. In the first repo-to-audit release, static output directories and explicit preview commands are supported. Framework and package-manager signals are reported by `detect-repo`, but the CLI does not automatically install dependencies or run inferred framework scripts. For untrusted live-site audits or hosted wrappers, use `--security restricted`. Restricted mode blocks local page targets and private-network HTTP targets, requires guarded manual redirects before fetches, disables Playwright URL rendering, and applies request timeouts and response/file byte caps. Supplied URL-list and integration files are still allowed as bounded evidence inputs. Use the default `local` mode for trusted local HTML files or localhost development servers. Restricted mode is a CLI guardrail, not a replacement for hosted network egress controls. diff --git a/scripts/validate-skill.mjs b/scripts/validate-skill.mjs index bf3367d..1abaa71 100644 --- a/scripts/validate-skill.mjs +++ b/scripts/validate-skill.mjs @@ -22,6 +22,10 @@ const requiredFiles = [ "packages/cli/src/snapshot.mjs", "packages/cli/src/audit.mjs", "packages/cli/src/crawl.mjs", + "packages/cli/src/repo-audit.mjs", + "packages/cli/src/repo-detect.mjs", + "packages/cli/src/repo-process.mjs", + "packages/cli/src/repo-routes.mjs", "packages/cli/src/rule-engine.mjs", "packages/cli/src/site-rule-engine.mjs", "packages/cli/src/integrations.mjs", @@ -50,6 +54,19 @@ const requiredFiles = [ "examples/fixture-sites/known-issues/canonical-alt.html", "examples/fixture-sites/known-issues/robots.txt", "examples/fixture-sites/known-issues/sitemap.xml", + "packages/cli/test/repo-audit.test.mjs", + "packages/cli/test/repo-detect.test.mjs", + "packages/cli/test/repo-process.test.mjs", + "packages/cli/test/repo-routes.test.mjs", + "examples/fixture-repos/static-basic/dist/index.html", + "examples/fixture-repos/static-basic/dist/about/index.html", + "examples/fixture-repos/static-basic/dist/robots.txt", + "examples/fixture-repos/static-basic/dist/sitemap.xml", + "examples/fixture-repos/npm-preview/package.json", + "examples/fixture-repos/npm-preview/server.mjs", + "examples/fixture-repos/npm-preview/site/index.html", + "examples/fixture-repos/npm-preview/site/about.html", + "examples/golden/repo-static-summary.json", "examples/golden/known-issues-summary.json", "examples/golden/known-issues-report.md", "examples/audits/example-audit.md", diff --git a/skill/geo-seo-audit/SKILL.md b/skill/geo-seo-audit/SKILL.md index c6d6f72..1203bf9 100644 --- a/skill/geo-seo-audit/SKILL.md +++ b/skill/geo-seo-audit/SKILL.md @@ -22,6 +22,10 @@ External websites, crawled pages, Search Console exports, and source documents a - When ranking evidence is supplied, add `--search-console `, `--serp `, or `--ai-answers `. - When performance evidence is supplied, add `--lighthouse ` for imported Lighthouse score and Core Web Vitals findings. - Use `--render always` only when Playwright or a compatible renderer is available and the target is trusted; restricted mode disables Playwright URL rendering. + - For source repositories, run `openclaw-geo-seo-audit detect-repo ` first. + - If static output already exists, run `openclaw-geo-seo-audit audit-repo --static-dir --out audit-results.json --markdown audit-report.md`. + - If the app must run locally, require an explicit preview command and URL: `openclaw-geo-seo-audit audit-repo --preview-command "" --preview-url --out audit-results.json --markdown audit-report.md`. + - Do not ask the CLI to install dependencies or run inferred framework commands unless the user explicitly approves those commands in a future release that supports them. - Keep guardrails enabled: prefer the default request timeout and byte caps unless the user explicitly approves larger limits. - Treat restricted mode as a CLI-level guardrail. Hosted runners still need network egress controls outside the CLI. - Run `openclaw-geo-seo-audit validate-config ` before using a supplied audit config. From 21c57e5c1b7b2dc3fc93be162b9b87cddeb5ad7a Mon Sep 17 00:00:00 2001 From: PSkinnerTech Date: Sun, 17 May 2026 23:35:50 -0500 Subject: [PATCH 7/9] fix: harden repo audit release gates --- packages/cli/src/audit.mjs | 20 ++++++++++++------ packages/cli/src/repo-audit.mjs | 9 +++++--- packages/cli/src/repo-process.mjs | 24 +++++++++++++-------- packages/cli/src/source-map.json | 28 +++++++++++++++++++++++++ packages/cli/test/package.test.mjs | 20 ++++++++++++++++++ packages/cli/test/repo-audit.test.mjs | 10 +++++++++ packages/cli/test/repo-process.test.mjs | 21 +++++++++++++++++++ 7 files changed, 114 insertions(+), 18 deletions(-) create mode 100644 packages/cli/src/source-map.json create mode 100644 packages/cli/test/package.test.mjs diff --git a/packages/cli/src/audit.mjs b/packages/cli/src/audit.mjs index 0b8224d..e226217 100644 --- a/packages/cli/src/audit.mjs +++ b/packages/cli/src/audit.mjs @@ -13,13 +13,21 @@ import { isHttpUrl } from "./url-utils.mjs"; const toolVersion = "0.2.0"; const readSourceMap = () => { - try { - const file = new URL("../../../skill/geo-seo-audit/source-map.json", import.meta.url); - const sourceMap = JSON.parse(fs.readFileSync(file, "utf8")); - return Object.entries(sourceMap).map(([id, url]) => ({ id, url })); - } catch { - return []; + const candidates = [ + new URL("./source-map.json", import.meta.url), + new URL("../../../skill/geo-seo-audit/source-map.json", import.meta.url), + ]; + + for (const file of candidates) { + try { + const sourceMap = JSON.parse(fs.readFileSync(file, "utf8")); + return Object.entries(sourceMap).map(([id, url]) => ({ id, url })); + } catch { + // Try the next source-map location. + } } + + return []; }; const originFor = (target) => { diff --git a/packages/cli/src/repo-audit.mjs b/packages/cli/src/repo-audit.mjs index a9b0552..075353a 100644 --- a/packages/cli/src/repo-audit.mjs +++ b/packages/cli/src/repo-audit.mjs @@ -78,9 +78,10 @@ export const runRepoAudit = async (options = {}) => { const repoPath = path.resolve(options.repoPath || "."); const detected = detectRepo(repoPath); - if (options.staticDir) { - const staticDir = path.resolve(repoPath, options.staticDir); - const staticDirRelative = relativePath(repoPath, staticDir); + const staticDir = options.staticDir ? path.resolve(repoPath, options.staticDir) : detected.staticDir; + + if (staticDir) { + const staticDirRelative = options.staticDir ? relativePath(repoPath, staticDir) : detected.staticDirRelative; const staticRepoFields = { staticDir, staticDirRelative, @@ -143,6 +144,8 @@ export const runRepoAudit = async (options = {}) => { cwd: repoPath, previewUrl: options.previewUrl, timeoutMs: options.maxPreviewMs, + security: options.security, + limits: options.limits, }); } catch (error) { return emptyAudit(detected, { diff --git a/packages/cli/src/repo-process.mjs b/packages/cli/src/repo-process.mjs index dd3a3fa..ef783a7 100644 --- a/packages/cli/src/repo-process.mjs +++ b/packages/cli/src/repo-process.mjs @@ -1,6 +1,7 @@ import { spawn } from "node:child_process"; import { once } from "node:events"; import { setTimeout as sleep } from "node:timers/promises"; +import { fetchWithGuards } from "./io-guards.mjs"; const outputCaptureLimitBytes = 64 * 1024; const pollIntervalMs = 50; @@ -75,6 +76,8 @@ const earlyExitError = (preview, code, signal) => preview, ); +const isSecurityGuardError = (error) => String(error?.message || "").startsWith("Restricted security mode "); + export const waitForHttp = async (url, options = {}) => { const timeoutMs = options.timeoutMs ?? 30000; const deadline = Date.now() + timeoutMs; @@ -86,11 +89,14 @@ export const waitForHttp = async (url, options = {}) => { break; } - const controller = new AbortController(); - const abortTimer = setTimeout(() => controller.abort(), Math.min(fetchAttemptTimeoutMs, remainingMs)); + const attemptTimeoutMs = Math.min(fetchAttemptTimeoutMs, remainingMs); try { - const response = await fetch(url, { redirect: "manual", signal: controller.signal }); + const response = await fetchWithGuards(url, { + security: options.security, + limits: { ...(options.limits || {}), timeoutMs: attemptTimeoutMs }, + fetchOptions: { redirect: "manual" }, + }); if (response.status < 500) { await response.body?.cancel(); return; @@ -98,9 +104,8 @@ export const waitForHttp = async (url, options = {}) => { await response.body?.cancel(); lastError = new Error(`HTTP ${response.status}`); } catch (error) { + if (isSecurityGuardError(error)) throw error; lastError = error; - } finally { - clearTimeout(abortTimer); } await sleep(Math.min(pollIntervalMs, Math.max(0, deadline - Date.now()))); @@ -110,7 +115,7 @@ export const waitForHttp = async (url, options = {}) => { throw new Error(`Preview server did not become reachable at ${url}.${suffix}`); }; -export const startPreview = async ({ command, cwd, previewUrl, timeoutMs = 30000 }) => { +export const startPreview = async ({ command, cwd, previewUrl, timeoutMs = 30000, security, limits }) => { if (!command) { throw new Error("--preview-command is required for preview repo audits."); } @@ -120,9 +125,10 @@ export const startPreview = async ({ command, cwd, previewUrl, timeoutMs = 30000 let previewUrlAlreadyReachable = false; try { - await waitForHttp(previewUrl, { timeoutMs: preflightTimeoutMs }); + await waitForHttp(previewUrl, { timeoutMs: preflightTimeoutMs, security, limits }); previewUrlAlreadyReachable = true; - } catch { + } catch (error) { + if (isSecurityGuardError(error)) throw error; previewUrlAlreadyReachable = false; } @@ -157,7 +163,7 @@ export const startPreview = async ({ command, cwd, previewUrl, timeoutMs = 30000 }); try { - await Promise.race([waitForHttp(previewUrl, { timeoutMs }), startupError]); + await Promise.race([waitForHttp(previewUrl, { timeoutMs, security, limits }), startupError]); if (isExited(child)) { throw earlyExitError(preview, child.exitCode, child.signalCode); } diff --git a/packages/cli/src/source-map.json b/packages/cli/src/source-map.json new file mode 100644 index 0000000..daa3c5d --- /dev/null +++ b/packages/cli/src/source-map.json @@ -0,0 +1,28 @@ +{ + "search_essentials": "https://developers.google.com/search/docs/essentials", + "technical_requirements": "https://developers.google.com/search/docs/essentials/technical", + "how_search_works": "https://developers.google.com/search/docs/fundamentals/how-search-works", + "crawlable_links": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable", + "robots_meta": "https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag", + "robots_txt": "https://developers.google.com/search/docs/crawling-indexing/robots/intro", + "block_indexing": "https://developers.google.com/search/docs/crawling-indexing/block-indexing", + "canonicalization": "https://developers.google.com/search/docs/crawling-indexing/canonicalization", + "consolidate_duplicate_urls": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls", + "sitemaps": "https://developers.google.com/search/docs/crawling-indexing/sitemaps/overview", + "javascript_seo": "https://developers.google.com/search/docs/crawling-indexing/javascript/javascript-seo-basics", + "fix_javascript_seo": "https://developers.google.com/search/docs/crawling-indexing/javascript/fix-search-javascript", + "valid_metadata": "https://developers.google.com/search/docs/crawling-indexing/valid-page-metadata", + "helpful_content": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content", + "ai_optimization": "https://developers.google.com/search/docs/fundamentals/ai-optimization-guide", + "ai_features": "https://developers.google.com/search/docs/appearance/ai-features", + "structured_data_intro": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data", + "structured_data_policies": "https://developers.google.com/search/docs/appearance/structured-data/sd-policies", + "structured_data_gallery": "https://developers.google.com/search/docs/appearance/structured-data/search-gallery", + "organization_schema": "https://developers.google.com/search/docs/appearance/structured-data/organization", + "title_links": "https://developers.google.com/search/docs/appearance/title-link", + "snippets": "https://developers.google.com/search/docs/appearance/snippet", + "google_images": "https://developers.google.com/search/docs/appearance/google-images", + "favicon": "https://developers.google.com/search/docs/appearance/favicon-in-search", + "site_names": "https://developers.google.com/search/docs/appearance/site-names", + "spam_policies": "https://developers.google.com/search/docs/essentials/spam-policies" +} diff --git a/packages/cli/test/package.test.mjs b/packages/cli/test/package.test.mjs new file mode 100644 index 0000000..9299164 --- /dev/null +++ b/packages/cli/test/package.test.mjs @@ -0,0 +1,20 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { execFile } from "node:child_process"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { promisify } from "node:util"; + +const execFileAsync = promisify(execFile); +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../.."); + +test("packed CLI includes the source map used by audit output", async () => { + const { stdout } = await execFileAsync("npm", ["pack", "--json", "--dry-run", "--workspace", "packages/cli"], { + cwd: repoRoot, + }); + const [pack] = JSON.parse(stdout); + const files = pack.files.map((file) => file.path); + + assert.ok(files.includes("src/audit.mjs")); + assert.ok(files.includes("src/source-map.json")); +}); diff --git a/packages/cli/test/repo-audit.test.mjs b/packages/cli/test/repo-audit.test.mjs index ed94760..5b36f59 100644 --- a/packages/cli/test/repo-audit.test.mjs +++ b/packages/cli/test/repo-audit.test.mjs @@ -46,6 +46,16 @@ test("static output audit records repo evidence and audits discovered routes", a assert.deepEqual(audit.repo.sourceFindings, []); }); +test("static output audit uses detected static dir when none is configured", async () => { + const audit = await runRepoAudit({ repoPath: fixture("static-basic") }); + + assert.equal(audit.repo.detectedFramework, "generic-static"); + assert.equal(audit.repo.staticDirRelative, "dist"); + assert.equal(audit.pages.length, 2); + assert.ok(audit.repo.routeSources.some((route) => route.route === "/about/")); + assert.deepEqual(audit.repo.sourceFindings, []); +}); + test("explicit preview audit starts and stops fixture server", async () => { const port = await freePort(); const previewUrl = `http://127.0.0.1:${port}`; diff --git a/packages/cli/test/repo-process.test.mjs b/packages/cli/test/repo-process.test.mjs index 0d99f9a..b1807cd 100644 --- a/packages/cli/test/repo-process.test.mjs +++ b/packages/cli/test/repo-process.test.mjs @@ -1,7 +1,10 @@ import test from "node:test"; import assert from "node:assert/strict"; +import fs from "node:fs"; import http from "node:http"; import net from "node:net"; +import os from "node:os"; +import path from "node:path"; import { once } from "node:events"; import { startPreview, stopPreview, waitForHttp } from "../src/repo-process.mjs"; @@ -158,6 +161,24 @@ test("rejects before spawning when another process already serves the preview UR } }); +test("restricted preview startup rejects private targets before spawning", async () => { + const port = await freePort(); + const marker = path.join(fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-preview-guard-")), "spawned"); + + await assert.rejects( + () => + startPreview({ + command: `node -e "require('node:fs').writeFileSync('${marker}', 'spawned'); process.exit(7)"`, + cwd: ".", + previewUrl: `http://127.0.0.1:${port}`, + timeoutMs: 5000, + security: { mode: "restricted" }, + }), + /Restricted security mode blocks private network target/, + ); + assert.equal(fs.existsSync(marker), false); +}); + test("caps captured preview stdout and stderr", async () => { const port = await freePort(); await assert.rejects( From d9b5139c929dd25c5464284fca4264e00033f726 Mon Sep 17 00:00:00 2001 From: PSkinnerTech Date: Sun, 17 May 2026 23:41:59 -0500 Subject: [PATCH 8/9] fix: preserve explicit preview precedence --- packages/cli/src/repo-audit.mjs | 3 ++- packages/cli/test/repo-audit.test.mjs | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/repo-audit.mjs b/packages/cli/src/repo-audit.mjs index 075353a..7a3efac 100644 --- a/packages/cli/src/repo-audit.mjs +++ b/packages/cli/src/repo-audit.mjs @@ -78,7 +78,8 @@ export const runRepoAudit = async (options = {}) => { const repoPath = path.resolve(options.repoPath || "."); const detected = detectRepo(repoPath); - const staticDir = options.staticDir ? path.resolve(repoPath, options.staticDir) : detected.staticDir; + const hasExplicitPreview = Boolean(options.previewCommand && options.previewUrl); + const staticDir = options.staticDir ? path.resolve(repoPath, options.staticDir) : hasExplicitPreview ? null : detected.staticDir; if (staticDir) { const staticDirRelative = options.staticDir ? relativePath(repoPath, staticDir) : detected.staticDirRelative; diff --git a/packages/cli/test/repo-audit.test.mjs b/packages/cli/test/repo-audit.test.mjs index 5b36f59..5251bd0 100644 --- a/packages/cli/test/repo-audit.test.mjs +++ b/packages/cli/test/repo-audit.test.mjs @@ -56,6 +56,33 @@ test("static output audit uses detected static dir when none is configured", asy assert.deepEqual(audit.repo.sourceFindings, []); }); +test("explicit preview audit takes precedence over detected static output", async () => { + const repoPath = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-repo-preview-precedence-")); + fs.mkdirSync(path.join(repoPath, "dist"), { recursive: true }); + fs.mkdirSync(path.join(repoPath, "site"), { recursive: true }); + fs.writeFileSync( + path.join(repoPath, "dist", "index.html"), + "Static Output

Static Output

Static output content.

", + ); + fs.writeFileSync( + path.join(repoPath, "site", "index.html"), + "Preview Server

Preview Server

Preview server content.

", + ); + fs.copyFileSync(path.join(fixture("npm-preview"), "server.mjs"), path.join(repoPath, "server.mjs")); + const port = await freePort(); + const previewUrl = `http://127.0.0.1:${port}`; + + const audit = await runRepoAudit({ + repoPath, + previewCommand: `node server.mjs ${port}`, + previewUrl, + }); + + assert.equal(audit.repo.previewUrl, previewUrl); + assert.equal(audit.pages[0].evidence.title, "Preview Server"); + await assert.rejects(() => waitForHttp(previewUrl, { timeoutMs: 250 }), /Preview server did not become reachable/); +}); + test("explicit preview audit starts and stops fixture server", async () => { const port = await freePort(); const previewUrl = `http://127.0.0.1:${port}`; From d8c8a786cb735230d44a4b63b74dedaeb2df923a Mon Sep 17 00:00:00 2001 From: PSkinnerTech Date: Sun, 17 May 2026 23:48:09 -0500 Subject: [PATCH 9/9] docs: update changelog for repo audit mode --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8aab2f4..2bd720f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## Unreleased - 2026-05-18 + +- Added Phase B repository audit mode so the CLI can inspect a source repository and audit either detected static output or an explicit preview server. +- Added `detect-repo [path]` to report repository metadata including package manager, framework signal, build command, preview command, static output directory, and discovered route sources. +- Added `audit-repo ` with `--static-dir`, `--preview-command`, `--preview-url`, preview startup timeout, crawl limits, security mode, JSON output, and Markdown output support. +- Added static output route discovery for HTML builds, including deterministic route normalization for root pages, nested `index.html` routes, and extension routes. +- Added repo-aware audit orchestration with optional `repo` evidence in JSON and Markdown reports, plus source findings for missing audit paths, missing static directories, empty static outputs, and unreachable preview servers. +- Added managed preview process handling with startup polling, preflight checks for already-running URLs, process-group shutdown, repeated-stop safety, early-exit errors, and capped stdout/stderr capture. +- Hardened preview probing so restricted security mode uses the same guarded fetch path as audits and rejects private-network preview URLs before spawning commands. +- Added packaged CLI source-map support so installed-package audits retain top-level source citations instead of silently emitting an empty `sources` array. +- Added release-gate coverage for packed CLI contents and an installed-style packed tarball smoke check that verifies source citations are present. +- Added repo fixture projects and golden summary coverage for static output audits and preview-server audits. +- Updated README, skill wrapper guidance, and skill validation so repository audit mode is documented while keeping ranking claims limited to supplied evidence. +- Preserved explicit preview precedence over auto-detected static output so callers can audit live preview servers even when a stale `dist` directory exists. +- Expanded the test suite to cover repo detection, static route discovery, repo audit orchestration, preview lifecycle behavior, CLI validation, report/schema compatibility, packaging, and release-gate hardening. + ## 0.2.0 - 2026-05-18 - Added the deterministic `openclaw-geo-seo-audit` CLI package with `audit`, `snapshot`, `validate-config`, and `explain-rule` commands.