diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 4cf046f..0cb6785 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -100,10 +100,11 @@ Two background concerns keep that store in sync with the world: ## Local setup 1. `asdf install` — picks up Node from `.tool-versions` -2. Clone the data repo as a sibling: `git clone git@github.com:CodeForPhilly/codeforphilly-data.git ../codeforphilly-data` (checkout `fixture` for a small seed, or `published` for the full laddr import) -3. `cp .env.example .env` and edit — point `CFP_DATA_REPO_PATH` at your sibling clone (absolute path recommended; relative paths resolve from `apps/api/`, not repo root) -4. `npm install` -5. `npm run dev` — api + web concurrently +2. **Bare-clone** the data repo as a sibling: `git clone --bare git@github.com:CodeForPhilly/codeforphilly-data.git ../codeforphilly-data` (use `--branch fixture` for a small seed, or `--branch published` for the full laddr import). The app reads via gitsheets' tree-object interface and *requires* bare — see [specs/behaviors/storage.md](../specs/behaviors/storage.md) → "The data clone is bare." +3. *(optional)* If you want a working tree to browse/edit records by hand, clone *from* your bare into a second directory: `git clone ../codeforphilly-data ../codeforphilly-data-wt`. Push/pull between them; the app doesn't care. +4. `cp .env.example .env` and edit — point `CFP_DATA_REPO_PATH` at your bare sibling clone (absolute path recommended; relative paths resolve from `apps/api/`, not repo root) +5. `npm install` +6. `npm run dev` — api + web concurrently ```bash npm install # install all workspaces @@ -124,7 +125,7 @@ Typical change flow: 1. **Merge to `main`** — CI builds + tests; nothing deploys yet. 2. **Publish image** (currently manual) — `docker build --platform=linux/amd64 -t ghcr.io/codeforphilly/codeforphilly-ng:sandbox . && docker push …`. Apple-silicon dev machines must set the platform flag — cluster nodes are amd64. 3. **GitOps pickup** — `cfp-sandbox-cluster` projects from our `deploy/kustomize/`; on its own merge, applies via `kubectl apply -k`. -4. **Pod boot** — single replica, `Recreate` strategy. Container entrypoint clones the data repo on first boot (PVC persists across pods). Node boots: env → store load → **reconcile** (ff/rebase/escape-hatch against `origin/`) → **push daemon** → routes → SPA. `/api/health/ready` returns 200 once stores are loaded *and* reconciled. +4. **Pod boot** — single replica, `Recreate` strategy. Container entrypoint bare-clones the data repo on every pod start (the data volume is `emptyDir`, so first boot = every fresh pod). Node boots: env → store load → **reconcile** (ff/replay/escape-hatch against `origin/`) → **push daemon** → routes → SPA. `/api/health/ready` returns 200 once stores are loaded *and* reconciled. 5. **Live data updates** — independent of app deploy. Pushes to `published` trigger the [hot-reload webhook](../docs/operations/runbook.md#hot-reload-webhook); the pod rebuilds in-memory state in place, no restart. Constraints worth knowing before touching anything deploy-shaped: diff --git a/apps/api/scripts/import-laddr/importer.ts b/apps/api/scripts/import-laddr/importer.ts index ae56fdf..81e1e2a 100644 --- a/apps/api/scripts/import-laddr/importer.ts +++ b/apps/api/scripts/import-laddr/importer.ts @@ -557,34 +557,44 @@ async function ensureGitRepo(repo: string): Promise { } /** - * Check out the target branch in the working tree. On first run, create it - * from `origin/` if available, falling back to `initialParent` - * (typically `origin/empty`). + * Point HEAD at the target branch in the bare data repo. On first run, + * the branch ref is created from `origin/` if available, falling + * back to `initialParent` (typically `origin/empty`). + * + * Bare-friendly — no working tree to check out into. The branch ref is + * created/updated via `git update-ref`, and HEAD becomes a symbolic-ref + * pointing at it so subsequent transacts commit onto the right branch. */ async function ensureBranchCheckedOut( repo: string, branch: string, initialParent: string, ): Promise { - // Existing local branch: just switch. - try { - await exec('git', ['rev-parse', '--verify', `refs/heads/${branch}`], { cwd: repo }); - await exec('git', ['checkout', branch], { cwd: repo }); - return; - } catch { - // No local branch — fall through. - } - // No local branch yet. Try origin/, fall back to initialParent. - let parent: string; + // Resolve the parent commit hash: either the existing local branch (use + // it as-is), origin/ if it exists, or the initialParent fallback. + let parentCommit: string; try { - await exec('git', ['rev-parse', '--verify', `refs/remotes/origin/${branch}`], { - cwd: repo, - }); - parent = `origin/${branch}`; + const result = await exec('git', ['rev-parse', '--verify', `refs/heads/${branch}`], { cwd: repo }); + parentCommit = result.stdout.trim(); } catch { - parent = initialParent; + // No local branch — try origin/, fall back to initialParent. + let parentRef: string; + try { + await exec('git', ['rev-parse', '--verify', `refs/remotes/origin/${branch}`], { + cwd: repo, + }); + parentRef = `refs/remotes/origin/${branch}`; + } catch { + parentRef = initialParent; + } + const result = await exec('git', ['rev-parse', '--verify', parentRef], { cwd: repo }); + parentCommit = result.stdout.trim(); + await exec('git', ['update-ref', `refs/heads/${branch}`, parentCommit], { cwd: repo }); } - await exec('git', ['checkout', '-b', branch, parent], { cwd: repo }); + + // Point HEAD at the (now-existing) branch so subsequent gitsheets + // transacts commit onto it. + await exec('git', ['symbolic-ref', 'HEAD', `refs/heads/${branch}`], { cwd: repo }); } // --------------------------------------------------------------------------- diff --git a/apps/api/scripts/scrub-data.ts b/apps/api/scripts/scrub-data.ts index c74f94c..0fe64fb 100644 --- a/apps/api/scripts/scrub-data.ts +++ b/apps/api/scripts/scrub-data.ts @@ -476,7 +476,7 @@ export async function scrubRepo(opts: ScrubOptions): Promise { // ------------------------------------------------------------------------- // 1. Open source repo // ------------------------------------------------------------------------- - const sourceRepo = await openRepo({ workTree: source, gitDir: join(source, '.git') }); + const sourceRepo = await openRepo({ gitDir: source }); const sourceHeadHash = await sourceRepo.resolveRef('HEAD'); // ------------------------------------------------------------------------- @@ -585,16 +585,24 @@ export async function scrubRepo(opts: ScrubOptions): Promise { await writeFile(fullPath, content, 'utf-8'); } - // Copy .gitsheets config from source - const sourceGitsheetsDir = join(source, '.gitsheets'); + // Copy .gitsheets configs from source's HEAD tree (source is a bare clone, + // per specs/behaviors/storage.md → "The data clone is bare", so the configs + // live as git blobs rather than working-tree files). const targetGitsheetsDir = join(target, '.gitsheets'); await mkdir(targetGitsheetsDir, { recursive: true }); - const configFiles = await readdir(sourceGitsheetsDir, { withFileTypes: true }).catch(() => []); - for (const entry of configFiles) { - if (entry.isFile() && entry.name.endsWith('.toml')) { - const configContent = await readFile(join(sourceGitsheetsDir, entry.name), 'utf-8'); - await writeFile(join(targetGitsheetsDir, entry.name), configContent, 'utf-8'); - } + const lsTreeResult = await exec('git', [ + '--git-dir', source, + 'ls-tree', '--name-only', 'HEAD', '.gitsheets/', + ]); + const configFiles = lsTreeResult.stdout + .split('\n') + .filter((name) => name.endsWith('.toml')); + for (const path of configFiles) { + const blob = await exec('git', [ + '--git-dir', source, + 'show', `HEAD:${path}`, + ]); + await writeFile(join(target, path), blob.stdout, 'utf-8'); } // ------------------------------------------------------------------------- diff --git a/apps/api/src/store/public.ts b/apps/api/src/store/public.ts index 160e1e9..89a239d 100644 --- a/apps/api/src/store/public.ts +++ b/apps/api/src/store/public.ts @@ -1,3 +1,6 @@ +import { existsSync } from 'node:fs'; +import { join } from 'node:path'; + import { openRepo, openStore } from 'gitsheets'; import type { Repository, StandardSchemaV1, Store, StoreTx, ValidatorMap } from 'gitsheets'; import { @@ -60,17 +63,33 @@ export type PublicStoreTx = StoreTx; /** * Open the gitsheets-backed public data store. * - * Reads `.gitsheets/.toml` for each declared sheet in `repoPath`. - * In-memory secondary indices are built by the caller (boot.ts) after this + * `repoPath` is the **bare gitdir** — no `.git` subdirectory. The app + * always operates against a bare clone per + * specs/behaviors/storage.md → "The data clone is bare". A non-bare + * repoPath fails loudly here so misconfiguration surfaces at boot + * rather than as runtime drift between the API's HEAD and a stale + * working tree. + * + * Reads `.gitsheets/.toml` for each declared sheet. In-memory + * secondary indices are built by the caller (boot.ts) after this * returns, since they require iterating over all records. * - * Returns both the typed store and the underlying Repository handle — the - * latter is needed by the push-daemon plugin to push commits to origin. + * Returns both the typed store and the underlying Repository handle — + * the latter is needed by the push-daemon plugin to push commits to + * origin. */ export async function openPublicStore( repoPath: string, ): Promise<{ store: PublicStore; repo: Repository }> { - const repo = await openRepo({ gitDir: `${repoPath}/.git`, workTree: repoPath }); + if (existsSync(join(repoPath, '.git'))) { + throw new Error( + `CFP_DATA_REPO_PATH=${repoPath} looks like a non-bare clone (found .git subdirectory). ` + + `The app requires a bare clone — re-create with: git clone --bare ${repoPath}. ` + + `See specs/behaviors/storage.md → "The data clone is bare".`, + ); + } + + const repo = await openRepo({ gitDir: repoPath }); repo.requireExplicitTransactions(); const validators: PublicValidators = { diff --git a/apps/api/src/store/reconcile.ts b/apps/api/src/store/reconcile.ts index 7577b60..b69a564 100644 --- a/apps/api/src/store/reconcile.ts +++ b/apps/api/src/store/reconcile.ts @@ -95,14 +95,29 @@ interface GitExecResult { readonly stderr: string; } +interface GitExecResultWithExit extends GitExecResult { + readonly exitCode: number; +} + +interface GitOpts { + /** Override the child-process env. Used for setting `GIT_AUTHOR_*` on commit-tree calls. */ + readonly env?: NodeJS.ProcessEnv; + /** + * Treat a non-zero exit code as a normal result (return with `exitCode > 0`) + * instead of throwing. Used for `merge-tree --write-tree`, which signals + * conflicts via exit 1 rather than via stderr-only. + */ + readonly allowFailure?: boolean; +} + /** * Run a git command in the data repo. `stderr` is captured (not piped) so * exit codes propagate cleanly — none of the pipe-eats-exit-code class of * bugs that bit the shell entrypoint. * - * Throws if git exits non-zero. Most call sites in this module trap the - * throw and translate it into a structured outcome; only unrecoverable - * errors bubble out of `reconcileDataRepo`. + * Throws if git exits non-zero (unless `opts.allowFailure`). Most call + * sites in this module trap the throw and translate it into a structured + * outcome; only unrecoverable errors bubble out of `reconcileDataRepo`. */ async function git( repoPath: string, @@ -111,6 +126,154 @@ async function git( return exec('git', [...args], { cwd: repoPath, maxBuffer: 32 * 1024 * 1024 }); } +async function gitWithOpts( + repoPath: string, + opts: GitOpts, + ...args: readonly string[] +): Promise { + try { + const result = await exec('git', [...args], { + cwd: repoPath, + maxBuffer: 32 * 1024 * 1024, + env: opts.env, + }); + return { ...result, exitCode: 0 }; + } catch (err) { + if ( + opts.allowFailure && + err !== null && + typeof err === 'object' && + 'code' in err + ) { + const e = err as { code?: unknown; stdout?: unknown; stderr?: unknown }; + return { + stdout: typeof e.stdout === 'string' ? e.stdout : '', + stderr: typeof e.stderr === 'string' ? e.stderr : '', + exitCode: typeof e.code === 'number' ? e.code : 1, + }; + } + throw err; + } +} + +/** + * Signal that a `merge-tree --write-tree` step couldn't produce a clean tree. + * Caught by the diverged-branch handler to route to the escape-hatch path. + */ +class RebaseReplayConflictError extends Error { + readonly conflictingCommit: string; + constructor(commit: string, mergeOutput: string) { + super(`merge-tree conflict replaying ${commit}: ${mergeOutput}`); + this.name = 'RebaseReplayConflictError'; + this.conflictingCommit = commit; + } +} + +/** + * Replay local commits on top of a remote tip, bare-style. Mirrors today's + * `git rebase ` semantics — linear history, original + * author/message metadata preserved, committer rewritten to the API's + * pseudonymous identity — but done entirely via plumbing so it works on + * bare clones with no working tree. + * + * For each local commit C (oldest first), runs: + * git merge-tree --write-tree --merge-base=C^ C + * → tree hash, or exit 1 with conflict markers on stderr + * git commit-tree -p -m + * → new commit hash; becomes newTip for the next iteration + * + * Throws `RebaseReplayConflictError` on first conflict so the caller can + * route to the escape hatch (preserve pre-rebase HEAD on conflicts/, + * fast-forward local refs to remote tip). + */ +async function replayLocalOntoRemote( + repoPath: string, + localTip: string, + remoteTip: string, + mergeBase: string, +): Promise { + const revList = await git( + repoPath, + 'rev-list', + '--reverse', + `${mergeBase}..${localTip}`, + ); + const localCommits = revList.stdout + .trim() + .split('\n') + .filter((line) => line.length > 0); + + let newTip = remoteTip; + for (const commit of localCommits) { + const parent = ( + await git(repoPath, 'rev-parse', `${commit}^`) + ).stdout.trim(); + + const merge = await gitWithOpts( + repoPath, + { allowFailure: true }, + 'merge-tree', + '--write-tree', + `--merge-base=${parent}`, + newTip, + commit, + ); + if (merge.exitCode !== 0) { + // merge-tree --write-tree prints the conflicting tree hash + a list of + // conflicting paths on stdout, with conflict markers (or `info`-style + // lines) on stderr depending on the conflict type. + throw new RebaseReplayConflictError( + commit, + [merge.stderr, merge.stdout].filter(Boolean).join('\n'), + ); + } + // First line of stdout is the merged tree's hash. (Subsequent lines, if + // any, describe non-conflict messages; we only care about the tree hash.) + const mergedTreeHash = merge.stdout.split('\n')[0]?.trim() ?? ''; + if (!/^[0-9a-f]{40}$/.test(mergedTreeHash)) { + throw new Error( + `merge-tree returned unexpected stdout (no tree hash on first line) while replaying ${commit}: ${merge.stdout.slice(0, 200)}`, + ); + } + + // Read original commit metadata: author name/email/iso-date + full + // message (subject + body + trailers). Committer is rewritten to the + // API's pseudonymous identity — matches today's `git rebase` behavior + // (rebase preserves author, resets committer to current user). + const [authorName, authorEmail, authorDate, message] = await Promise.all([ + git(repoPath, 'show', '-s', '--format=%an', commit).then((r) => r.stdout.trimEnd()), + git(repoPath, 'show', '-s', '--format=%ae', commit).then((r) => r.stdout.trimEnd()), + git(repoPath, 'show', '-s', '--format=%aI', commit).then((r) => r.stdout.trimEnd()), + git(repoPath, 'show', '-s', '--format=%B', commit).then((r) => r.stdout.trimEnd()), + ]); + + const commitTree = await gitWithOpts( + repoPath, + { + env: { + ...process.env, + GIT_AUTHOR_NAME: authorName, + GIT_AUTHOR_EMAIL: authorEmail, + GIT_AUTHOR_DATE: authorDate, + GIT_COMMITTER_NAME: AUTHOR_NAME, + GIT_COMMITTER_EMAIL: AUTHOR_EMAIL, + // GIT_COMMITTER_DATE: omitted — picks up wall-clock, matches + // today's `git rebase` (preserves author date, resets committer date). + }, + }, + 'commit-tree', + mergedTreeHash, + '-p', + newTip, + '-m', + message, + ); + newTip = commitTree.stdout.trim(); + } + + return newTip; +} + /** * Cast an `unknown` error to something describable. Mirrors importer.ts's * `describe()` — keeping the shape consistent across the codebase. @@ -134,18 +297,19 @@ function conflictBranchName(now: Date): string { } /** - * Reconcile the local working tree against `/`. + * Reconcile the local bare data clone against `/`. * * Idempotent — calling repeatedly with no upstream changes is a no-op * (`outcome: 'in-sync'`). * - * Assumes the caller has already checked out `branch` in the working tree. - * (The entrypoint's surviving responsibility is the initial clone; the API's - * is the reconciliation.) + * Operates entirely via plumbing (`update-ref`, `merge-tree --write-tree`, + * `commit-tree`) so it works against a bare repo with no working tree — + * per the invariant in + * specs/behaviors/storage.md → "The data clone is bare". * * Concurrency: this function MUST be called under a write mutex if there * are any concurrent gitsheets mutations against the same repo. At boot - * there's no contention; the future webhook (#65) acquires the mutex first. + * there's no contention; the hot-reload webhook acquires the mutex first. */ export async function reconcileDataRepo( opts: ReconcileOptions, @@ -195,7 +359,11 @@ export async function reconcileDataRepo( await git(repoPath, 'merge-base', 'HEAD', remoteRef) ).stdout.trim(); - // Behind: fast-forward. + // Behind: fast-forward by updating the branch ref straight to the remote + // commit. `update-ref ` is the bare-repo equivalent of + // `merge --ff-only` and the trailing `` argument makes the operation + // CAS-safe — a race with another process advancing the ref would fail + // here rather than silently clobbering. if (oldCommit === mergeBase) { const behind = Number( (await git(repoPath, 'rev-list', '--count', `HEAD..${remoteRef}`)).stdout.trim(), @@ -204,9 +372,8 @@ export async function reconcileDataRepo( { branch, behind, from: oldCommit, to: remoteCommit }, 'data-repo behind remote — fast-forwarding', ); - await git(repoPath, 'merge', '--ff-only', remoteRef); - const newCommit = (await git(repoPath, 'rev-parse', 'HEAD')).stdout.trim(); - return { outcome: 'fast-forwarded', oldCommit, newCommit, behind }; + await git(repoPath, 'update-ref', `refs/heads/${branch}`, remoteCommit, oldCommit); + return { outcome: 'fast-forwarded', oldCommit, newCommit: remoteCommit, behind }; } // Ahead: push. @@ -244,33 +411,39 @@ export async function reconcileDataRepo( 'data-repo diverged from remote — attempting rebase', ); + let newCommit: string; try { - await git(repoPath, 'rebase', remoteRef); + // Bare-friendly rebase: replay local commits on top of the remote tip + // using `merge-tree --write-tree` + `commit-tree`. Leaves no + // mid-operation state behind to abort on conflict — the throw is + // self-contained. + newCommit = await replayLocalOntoRemote(repoPath, oldCommit, remoteCommit, mergeBase); + // Move the branch ref to the replayed tip. CAS against oldCommit so a + // racing in-process transact (impossible at boot under the mutex, + // possible-in-principle during webhook reconcile) surfaces as an error + // rather than silently dropping commits. + await git(repoPath, 'update-ref', `refs/heads/${branch}`, newCommit, oldCommit); } catch (err) { - // Rebase failed — escape hatch. + // Either a merge-tree conflict or some other plumbing failure during the + // replay — escape hatch. logger.error( { err: describe(err), branch, ahead, behind, local: oldCommit, remote: remoteCommit }, 'data-repo rebase conflicted — invoking escape hatch', ); - // Abort the in-progress rebase. If `git rebase --abort` itself fails, - // we still want to forge ahead to the conflict-branch preservation — - // log + continue. - try { - await git(repoPath, 'rebase', '--abort'); - } catch (abortErr) { - logger.warn( - { err: describe(abortErr), branch }, - 'rebase --abort itself failed; continuing escape-hatch', - ); - } + // No mid-rebase state to abort: the replay is pure plumbing on the + // object DB; nothing in `refs/` was advanced. We just need to preserve + // the pre-reconcile HEAD on a uniquely-named branch and fast-forward + // local to the remote tip. // Preserve the pre-rebase HEAD on a uniquely-named branch so the - // operator can investigate. `branch --force` lets the reconciler stay - // idempotent even on the same-second second invocation in tests. + // operator can investigate. `update-ref --create-reflog` keeps the + // reconciler idempotent even on a same-second second invocation in + // tests; `update-ref refs/heads/ ` (two-arg form) doesn't + // require the branch to be absent. const conflictBranch = conflictBranchName(now()); try { - await git(repoPath, 'branch', '--force', conflictBranch, oldCommit); + await git(repoPath, 'update-ref', `refs/heads/${conflictBranch}`, oldCommit); } catch (branchErr) { // If we can't even create a ref locally, that's an unrecoverable // filesystem-level problem. @@ -282,7 +455,7 @@ export async function reconcileDataRepo( // Push the conflict branch to origin so the operator can see it from // GitHub. Non-fatal if push fails — the local ref is still there for - // forensic recovery via the PVC. + // forensic recovery via the bare repo on the pod's emptyDir. try { await git(repoPath, 'push', remote, conflictBranch); logger.error( @@ -301,13 +474,14 @@ export async function reconcileDataRepo( ); } - // Hard-reset to origin so the pod boots from a known-good state. - await git(repoPath, 'reset', '--hard', remoteRef); - const newCommit = (await git(repoPath, 'rev-parse', 'HEAD')).stdout.trim(); + // Fast-forward the working branch to the remote tip so the pod boots + // from a known-good state. (Equivalent to today's `git reset --hard`, + // but bare-friendly.) + await git(repoPath, 'update-ref', `refs/heads/${branch}`, remoteCommit, oldCommit); return { outcome: 'conflict-escaped', oldCommit, - newCommit, + newCommit: remoteCommit, conflictBranch, ahead, behind, @@ -315,7 +489,6 @@ export async function reconcileDataRepo( } // Rebase succeeded — push. - const newCommit = (await git(repoPath, 'rev-parse', 'HEAD')).stdout.trim(); logger.info( { branch, ahead, behind, from: oldCommit, to: newCommit }, 'data-repo rebase clean — pushing', diff --git a/apps/api/tests/account-claim.test.ts b/apps/api/tests/account-claim.test.ts index eee5fc3..fdeb3f2 100644 --- a/apps/api/tests/account-claim.test.ts +++ b/apps/api/tests/account-claim.test.ts @@ -21,13 +21,14 @@ import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { type FastifyInstance } from 'fastify'; import { execFile } from 'node:child_process'; import { promisify } from 'node:util'; -import { writeFile, mkdir, readFile } from 'node:fs/promises'; +import { writeFile, readFile } from 'node:fs/promises'; import { join } from 'node:path'; import bcrypt from 'bcryptjs'; import { buildApp } from '../src/app.js'; import { issueClaimPending, issueSession } from '../src/auth/jwt.js'; import { createFullDataRepo, createPrivateStorageDir } from './helpers/test-full-repo.js'; +import { seedRawToml } from './helpers/seed-fixtures.js'; const exec = promisify(execFile); const JWT_KEY = 'test-jwt-signing-key-at-least-32-chars!!'; @@ -52,7 +53,6 @@ async function seedPerson( id: string, opts: SeedPersonOpts = {}, ): Promise { - const git = (...args: string[]) => exec('git', args, { cwd: repoDir }); const lines = [ `id = "${id}"`, `slug = "${slug}"`, @@ -68,14 +68,7 @@ async function seedPerson( lines.push(`slackSamlNameId = "${opts.slackSamlNameId}"`); } - await mkdir(join(repoDir, 'people'), { recursive: true }); - await writeFile(join(repoDir, 'people', `${slug}.toml`), lines.join('\n')); - await git('add', `people/${slug}.toml`); - await git( - '-c', 'user.email=test@cfp.test', - '-c', 'user.name=test', - 'commit', '-m', `seed person ${slug}`, - ); + await seedRawToml(repoDir, `people/${slug}.toml`, lines.join('\n'), `seed person ${slug}`); } async function seedPrivateProfile( diff --git a/apps/api/tests/auth.test.ts b/apps/api/tests/auth.test.ts index f572615..d3c8e16 100644 --- a/apps/api/tests/auth.test.ts +++ b/apps/api/tests/auth.test.ts @@ -20,17 +20,13 @@ import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { type FastifyInstance } from 'fastify'; import { SignJWT } from 'jose'; -import { execFile } from 'node:child_process'; -import { promisify } from 'node:util'; -import { writeFile, mkdir } from 'node:fs/promises'; -import { join } from 'node:path'; import { buildApp } from '../src/app.js'; import { mintSessionFor } from '../src/auth/issue.js'; import { verifyAccess, verifyRefresh } from '../src/auth/jwt.js'; import { createFullDataRepo, createPrivateStorageDir } from './helpers/test-full-repo.js'; +import { seedRawToml } from './helpers/seed-fixtures.js'; -const exec = promisify(execFile); const JWT_KEY = 'test-jwt-signing-key-at-least-32-chars!!'; async function buildTestApp( @@ -61,7 +57,6 @@ async function seedPerson( id: string, accountLevel = 'user', ): Promise { - const git = (...args: string[]) => exec('git', args, { cwd: repoDir }); const personToml = [ `id = "${id}"`, `slug = "${slug}"`, @@ -71,14 +66,7 @@ async function seedPerson( `updatedAt = "2026-05-01T00:00:00Z"`, ].join('\n'); - await mkdir(join(repoDir, 'people'), { recursive: true }); - await writeFile(join(repoDir, 'people', `${slug}.toml`), personToml); - await git('add', `people/${slug}.toml`); - await git( - '-c', 'user.email=test@cfp.test', - '-c', 'user.name=test', - 'commit', '-m', `seed person ${slug}`, - ); + await seedRawToml(repoDir, `people/${slug}.toml`, personToml, `seed person ${slug}`); } // --------------------------------------------------------------------------- diff --git a/apps/api/tests/cutover-mailout.test.ts b/apps/api/tests/cutover-mailout.test.ts index 4ad0e3b..2c57648 100644 --- a/apps/api/tests/cutover-mailout.test.ts +++ b/apps/api/tests/cutover-mailout.test.ts @@ -22,7 +22,7 @@ async function seedPerson( repoPath: string, fields: { id: string; slug: string; fullName?: string; githubUserId?: number; deletedAt?: string }, ): Promise { - const repo = await openRepo({ gitDir: `${repoPath}/.git`, workTree: repoPath }); + const repo = await openRepo({ gitDir: repoPath }); await repo.transact( { message: `seed person ${fields.slug}`, author: { name: 'test', email: 'test@cfp.test' } }, async (tx) => { diff --git a/apps/api/tests/data-repo-reconcile.test.ts b/apps/api/tests/data-repo-reconcile.test.ts index 69bc9e7..3dbfe2d 100644 --- a/apps/api/tests/data-repo-reconcile.test.ts +++ b/apps/api/tests/data-repo-reconcile.test.ts @@ -57,11 +57,16 @@ async function git(cwd: string, ...args: string[]): Promise { } /** - * Initialize a tracked working tree at a tmpdir + a corresponding bare - * "remote" repo. Returns paths + cleanup. The local tree is cloned from - * the bare via filesystem URL so push/fetch round-trip locally. + * Initialize a **bare** local clone at a tmpdir + a corresponding bare + * "remote" repo. Both repos are bare to match the production invariant + * documented in specs/behaviors/storage.md — gitsheets / the reconciler + * operate via plumbing on the object DB only, never via a working tree. * - * Both initial commits live on `main`; the local working tree is on `main`. + * Test helpers that need to mint commits do so through a transient + * working-tree clone (see `commitToBare` / `advanceRemote`), then push + * back to whichever bare they're targeting. + * + * Both initial commits live on `main`. */ interface TestRig { readonly bare: string; @@ -73,7 +78,7 @@ async function createRig(): Promise { const root = await mkdtemp(join(tmpdir(), 'cfp-reconcile-')); const bare = join(root, 'remote.git'); const seed = join(root, 'seed'); - const local = join(root, 'local'); + const local = join(root, 'local.git'); // Seed repo: produces the initial commit on `main`. await exec('git', ['init', '-b', 'main', seed]); @@ -87,23 +92,22 @@ async function createRig(): Promise { // Bare remote. await exec('git', ['init', '--bare', '-b', 'main', bare]); - // The bare needs to allow non-fast-forward pushes for the conflict-branch - // tests; default git allows that for branch creation but the bare needs - // `receive.denyCurrentBranch=warn` so pushes to `main` succeed (the - // bare has main checked out as HEAD). + // Allow non-fast-forward pushes onto the current branch (the bare has + // main checked out as HEAD) so the conflict-branch tests can push. await git(bare, 'config', 'receive.denyCurrentBranch', 'ignore'); await exec('git', ['push', bare, 'main'], { cwd: seed }); - // Local clone from the bare. - await exec('git', ['clone', bare, local]); + // Local clone from the bare — also bare, matching the runtime invariant. + await exec('git', ['clone', '--bare', bare, local]); await git(local, 'config', 'user.email', 'local@test.local'); await git(local, 'config', 'user.name', 'local'); await git(local, 'config', 'commit.gpgsign', 'false'); await git(local, 'config', 'core.hooksPath', '/dev/null'); - // git clone may pick up the global remote.origin.fetch refspec; make sure - // it's the standard "all branches" refspec so fetch works as expected. - // (The reconciler will override per-call anyway, but the bare/seed plumbing - // uses plain `git push`/`git fetch` from the test helpers.) + // A bare `git clone --bare` writes a narrow `+refs/heads/*:refs/heads/*` + // refspec on origin. The reconciler always passes an explicit refspec + // on fetch so this is fine for it, but make sure HEAD points at the + // single-branch the test expects. + await git(local, 'symbolic-ref', 'HEAD', 'refs/heads/main'); return { bare, @@ -114,43 +118,56 @@ async function createRig(): Promise { }; } -/** Commit a file into a working tree and return the new HEAD. */ -async function commitFile( - cwd: string, - filename: string, - contents: string, - message: string, -): Promise { - await writeFile(join(cwd, filename), contents); - await git(cwd, 'add', filename); - await git(cwd, 'commit', '-m', message); - return git(cwd, 'rev-parse', 'HEAD'); -} - /** - * Push a new commit onto the bare's `main` by way of an ephemeral working - * tree clone, so the bare advances ahead of `local`. + * Mint a new commit onto a bare repo's `main` via an ephemeral working + * tree clone. Used by both the local and remote helpers — the bare and + * the workflow are symmetrical, just the target repo path differs. + * + * Returns the new HEAD hash. */ -async function advanceRemote( - rig: TestRig, +async function commitToBare( + barePath: string, filename: string, contents: string, message: string, ): Promise { - const wt = `${rig.local}-remote-advance-${Date.now()}-${Math.random() + const wt = `${barePath}-wt-${Date.now()}-${Math.random() .toString(36) .slice(2, 8)}`; - await exec('git', ['clone', rig.bare, wt]); - await git(wt, 'config', 'user.email', 'remote-advance@test.local'); - await git(wt, 'config', 'user.name', 'remote-advance'); + await exec('git', ['clone', barePath, wt]); + await git(wt, 'config', 'user.email', 'commit-helper@test.local'); + await git(wt, 'config', 'user.name', 'commit-helper'); await git(wt, 'config', 'commit.gpgsign', 'false'); await git(wt, 'config', 'core.hooksPath', '/dev/null'); - const head = await commitFile(wt, filename, contents, message); + await writeFile(join(wt, filename), contents); + await git(wt, 'add', filename); + await git(wt, 'commit', '-m', message); + const head = await git(wt, 'rev-parse', 'HEAD'); await git(wt, 'push', 'origin', 'main'); await rm(wt, { recursive: true, force: true }); return head; } +/** Push a new commit onto the bare remote's `main` so it advances ahead of local. */ +async function advanceRemote( + rig: TestRig, + filename: string, + contents: string, + message: string, +): Promise { + return commitToBare(rig.bare, filename, contents, message); +} + +/** Push a new commit onto the local bare's `main` so it advances ahead of remote. */ +async function advanceLocal( + rig: TestRig, + filename: string, + contents: string, + message: string, +): Promise { + return commitToBare(rig.local, filename, contents, message); +} + // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- @@ -205,8 +222,8 @@ describe('reconcileDataRepo', () => { }); it('"pushed-ahead" when local is ahead of remote', async () => { - const remoteBefore = await git(rig.local, 'rev-parse', 'origin/main'); - const newHead = await commitFile(rig.local, 'local-feature.txt', 'hi\n', 'local: add feature'); + const remoteBefore = await git(rig.bare, 'rev-parse', 'main'); + const newHead = await advanceLocal(rig, 'local-feature.txt', 'hi\n', 'local: add feature'); const { logger } = makeLogger(); const result = await reconcileDataRepo({ @@ -228,8 +245,8 @@ describe('reconcileDataRepo', () => { it('"rebased" when diverged with a clean rebase', async () => { // Local commits to a file the remote will never touch. - const localHead = await commitFile( - rig.local, + const localHead = await advanceLocal( + rig, 'local-only.txt', 'L\n', 'local: independent change', @@ -264,7 +281,7 @@ describe('reconcileDataRepo', () => { it('"conflict-escaped" when diverged with a rebase conflict', async () => { // Both sides touch the same file with different content — guaranteed // rebase conflict. - const localHead = await commitFile(rig.local, 'conflict.txt', 'LOCAL\n', 'local: edit shared file'); + const localHead = await advanceLocal(rig, 'conflict.txt', 'LOCAL\n', 'local: edit shared file'); await advanceRemote(rig, 'conflict.txt', 'REMOTE\n', 'remote: edit shared file'); const remoteHead = await git(rig.bare, 'rev-parse', 'main'); @@ -341,33 +358,43 @@ describe('reconcileDataRepo', () => { expect(lines.some((l) => l.level === 'warn' && /fetch/.test(l.msg))).toBe(true); }); - it('reconciles correctly after a single-branch clone (regression for the shell bug)', async () => { - // The shell entrypoint used `git clone --branch X` then `git fetch origin Y` - // which wrote a narrow remote refspec and quietly failed to populate - // refs/remotes/origin/Y. Our Node reconciler always passes an explicit - // refspec — this test exercises a clone-with-a-narrow-refspec setup and - // confirms reconciliation still works. - - // Start a fresh local from scratch, with a narrow remote.origin.fetch - // refspec that mimics what `git clone --single-branch --branch main` writes. - const narrowLocal = `${rig.local}-narrow`; - await exec('git', ['clone', '--single-branch', '--branch', 'main', rig.bare, narrowLocal]); - await git(narrowLocal, 'config', 'user.email', 'narrow@test.local'); - await git(narrowLocal, 'config', 'user.name', 'narrow'); - await git(narrowLocal, 'config', 'commit.gpgsign', 'false'); - await git(narrowLocal, 'config', 'core.hooksPath', '/dev/null'); + it('reconciles correctly when refs/remotes/origin/ is unpopulated (regression for the refspec bug)', async () => { + // The original shell-side bug: `git clone --branch X` wrote a narrow + // refspec, then later `git fetch origin Y` quietly failed to populate + // `refs/remotes/origin/Y`. Our reconciler always passes an explicit + // refspec — this test verifies that defense. + // + // A bare clone exhibits the same shape: `git clone --bare` writes + // `+refs/heads/*:refs/heads/*` (mirroring to local refs/heads/), + // leaving `refs/remotes/origin/main` unpopulated. With our explicit + // refspec on fetch, reconcile still works. + const altLocal = `${rig.local}-alt.git`; + await exec('git', ['clone', '--bare', rig.bare, altLocal]); + await git(altLocal, 'symbolic-ref', 'HEAD', 'refs/heads/main'); + await git(altLocal, 'config', 'user.email', 'alt@test.local'); + await git(altLocal, 'config', 'user.name', 'alt'); + await git(altLocal, 'config', 'commit.gpgsign', 'false'); + + // Sanity-check the test premise: origin/main is unset on the fresh bare. + let originBranchExists = true; + try { + await git(altLocal, 'rev-parse', '--verify', 'refs/remotes/origin/main'); + } catch { + originBranchExists = false; + } + expect(originBranchExists).toBe(false); // Advance the remote on `main`. const newRemoteHead = await advanceRemote( rig, 'narrow-feature.txt', 'hi\n', - 'remote: advance after narrow clone', + 'remote: advance after bare clone', ); const { logger } = makeLogger(); const result = await reconcileDataRepo({ - repoPath: narrowLocal, + repoPath: altLocal, branch: 'main', logger, }); @@ -375,6 +402,6 @@ describe('reconcileDataRepo', () => { expect(result.outcome).toBe('fast-forwarded'); expect(result.newCommit).toBe(newRemoteHead); - await rm(narrowLocal, { recursive: true, force: true }); + await rm(altLocal, { recursive: true, force: true }); }); }); diff --git a/apps/api/tests/github-oauth.test.ts b/apps/api/tests/github-oauth.test.ts index a63c307..648bd4f 100644 --- a/apps/api/tests/github-oauth.test.ts +++ b/apps/api/tests/github-oauth.test.ts @@ -17,9 +17,7 @@ */ import { afterAll, afterEach, beforeAll, describe, expect, it } from 'vitest'; import { type FastifyInstance } from 'fastify'; -import { execFile } from 'node:child_process'; -import { promisify } from 'node:util'; -import { writeFile, mkdir, readFile } from 'node:fs/promises'; +import { writeFile, readFile } from 'node:fs/promises'; import { join } from 'node:path'; import { SignJWT } from 'jose'; @@ -27,9 +25,9 @@ import { buildApp } from '../src/app.js'; import { verifyAccess, verifyRefresh, verifyClaimPending } from '../src/auth/jwt.js'; import { verifyOAuthSession } from '../src/auth/oauth-session-cookie.js'; import { createFullDataRepo, createPrivateStorageDir } from './helpers/test-full-repo.js'; +import { seedRawToml } from './helpers/seed-fixtures.js'; import { createGitHubMock } from './helpers/mocks.js'; -const exec = promisify(execFile); const JWT_KEY = 'test-jwt-signing-key-at-least-32-chars!!'; const GH_CLIENT_ID = 'test-client-id'; const GH_CLIENT_SECRET = 'test-client-secret'; @@ -47,7 +45,6 @@ async function seedPerson( id: string, opts: SeedPersonOpts = {}, ): Promise { - const git = (...args: string[]) => exec('git', args, { cwd: repoDir }); const lines = [ `id = "${id}"`, `slug = "${slug}"`, @@ -66,14 +63,7 @@ async function seedPerson( lines.push(`githubLinkedAt = "${opts.githubLinkedAt}"`); } - await mkdir(join(repoDir, 'people'), { recursive: true }); - await writeFile(join(repoDir, 'people', `${slug}.toml`), lines.join('\n')); - await git('add', `people/${slug}.toml`); - await git( - '-c', 'user.email=test@cfp.test', - '-c', 'user.name=test', - 'commit', '-m', `seed person ${slug}`, - ); + await seedRawToml(repoDir, `people/${slug}.toml`, lines.join('\n'), `seed person ${slug}`); } /** Seed a PrivateProfile directly into the filesystem private store. */ diff --git a/apps/api/tests/helpers/seed-fixtures.ts b/apps/api/tests/helpers/seed-fixtures.ts index 9fae5d9..9b542ce 100644 --- a/apps/api/tests/helpers/seed-fixtures.ts +++ b/apps/api/tests/helpers/seed-fixtures.ts @@ -8,8 +8,54 @@ * In production the write-api will construct these path fields from the * in-memory index at write time. Tests must do the same. */ +import { execFile } from 'node:child_process'; +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { promisify } from 'node:util'; + import { openRepo } from 'gitsheets'; +const execAsync = promisify(execFile); + +/** + * Write a raw TOML file into a bare gitsheets repo via a transient + * working-tree clone. The bare-repo invariant + * (specs/behaviors/storage.md → "The data clone is bare") rules out the + * traditional `git add` / `git commit` flow against the data path — this + * helper does the transient-clone-push dance once per call so test + * fixtures can land arbitrary file shapes. + * + * `relPath` is the path within the working tree (e.g. `people/jane.toml`). + * `commitMessage` is used verbatim; author/committer default to the + * test identity. + */ +export async function seedRawToml( + bareRepoPath: string, + relPath: string, + toml: string, + commitMessage: string, +): Promise { + const wt = await mkdtemp(join(tmpdir(), 'cfp-seed-wt-')); + try { + await execAsync('git', ['clone', bareRepoPath, wt]); + await execAsync('git', ['config', 'user.email', 'test@cfp.test'], { cwd: wt }); + await execAsync('git', ['config', 'user.name', 'cfp test'], { cwd: wt }); + await execAsync('git', ['config', 'commit.gpgsign', 'false'], { cwd: wt }); + await execAsync('git', ['config', 'core.hooksPath', '/dev/null'], { cwd: wt }); + + const absPath = join(wt, relPath); + await mkdir(dirname(absPath), { recursive: true }); + await writeFile(absPath, toml); + + await execAsync('git', ['add', relPath], { cwd: wt }); + await execAsync('git', ['commit', '-m', commitMessage], { cwd: wt }); + await execAsync('git', ['push', 'origin', 'main'], { cwd: wt }); + } finally { + await rm(wt, { recursive: true, force: true }); + } +} + const NOW = '2026-05-01T00:00:00Z'; const NOW2 = '2026-05-10T00:00:00Z'; @@ -34,7 +80,7 @@ export interface SeededFixtures { * Returns the IDs/slugs for use in assertions. */ export async function seedFixtures(repoPath: string): Promise { - const repo = await openRepo({ gitDir: `${repoPath}/.git`, workTree: repoPath }); + const repo = await openRepo({ gitDir: repoPath }); const projectId = uuid(1); const personId = uuid(2); diff --git a/apps/api/tests/helpers/test-full-repo.ts b/apps/api/tests/helpers/test-full-repo.ts index 2357fd0..5cb8f81 100644 --- a/apps/api/tests/helpers/test-full-repo.ts +++ b/apps/api/tests/helpers/test-full-repo.ts @@ -43,31 +43,42 @@ export interface FullTestRepo { * for full-app tests where the store plugin boots the real openPublicStore(). */ export async function createFullDataRepo(): Promise { - const dir = await mkdtemp(join(tmpdir(), 'cfp-full-data-')); - const git = (...args: string[]) => execFileAsync('git', args, { cwd: dir }); + const root = await mkdtemp(join(tmpdir(), 'cfp-full-data-')); + const bareDir = join(root, 'data.git'); + const seedDir = join(root, 'seed'); - await git('init', '-b', 'main'); - await git('config', 'user.email', 'test@cfp.test'); - await git('config', 'user.name', 'cfp test'); - await git('config', 'commit.gpgsign', 'false'); - await git('config', 'core.hooksPath', '/dev/null'); - await git('commit', '--allow-empty', '-m', 'initial'); + // Bare gitdir — the path tests pass to openPublicStore. Matches the + // production bare-repo invariant (specs/behaviors/storage.md). + await execFileAsync('git', ['init', '--bare', '-b', 'main', bareDir]); + await execFileAsync('git', ['config', 'receive.denyCurrentBranch', 'ignore'], { cwd: bareDir }); - // Write all sheet configs - await mkdir(join(dir, '.gitsheets'), { recursive: true }); + // Transient working-tree clone for seeding the sheet configs. + await execFileAsync('git', ['init', '-b', 'main', seedDir]); + const seedGit = (...args: string[]) => execFileAsync('git', args, { cwd: seedDir }); + await seedGit('config', 'user.email', 'test@cfp.test'); + await seedGit('config', 'user.name', 'cfp test'); + await seedGit('config', 'commit.gpgsign', 'false'); + await seedGit('config', 'core.hooksPath', '/dev/null'); + await seedGit('commit', '--allow-empty', '-m', 'initial'); + + await mkdir(join(seedDir, '.gitsheets'), { recursive: true }); for (const [name, config] of Object.entries(SHEET_CONFIGS)) { - await writeFile(join(dir, '.gitsheets', `${name}.toml`), config); + await writeFile(join(seedDir, '.gitsheets', `${name}.toml`), config); } - await git('add', '.gitsheets'); - await git('commit', '-m', 'chore: add all gitsheets sheet configs'); + await seedGit('add', '.gitsheets'); + await seedGit('commit', '-m', 'chore: add all gitsheets sheet configs'); + + await seedGit('remote', 'add', 'origin', bareDir); + await seedGit('push', 'origin', 'main'); + await rm(seedDir, { recursive: true, force: true }); let cleaned = false; return { - path: dir, + path: bareDir, cleanup: async () => { if (cleaned) return; cleaned = true; - await rm(dir, { recursive: true, force: true }); + await rm(root, { recursive: true, force: true }); }, }; } diff --git a/apps/api/tests/helpers/test-repo.ts b/apps/api/tests/helpers/test-repo.ts index e49be80..58b2b57 100644 --- a/apps/api/tests/helpers/test-repo.ts +++ b/apps/api/tests/helpers/test-repo.ts @@ -31,41 +31,54 @@ export interface AppTestRepo { export async function createTestRepo( sheetNames: readonly string[] = [], ): Promise { - const dir = await mkdtemp(join(tmpdir(), 'cfp-test-')); - const gitDir = join(dir, '.git'); + const root = await mkdtemp(join(tmpdir(), 'cfp-test-')); + const bareDir = join(root, 'data.git'); + const seedDir = join(root, 'seed'); - const git: GitRunner = (...args) => exec('git', args, { cwd: dir }); + // Bare repo — the path the test code reads as a gitsheets store. Matches + // the production bare-repo invariant (specs/behaviors/storage.md). + await exec('git', ['init', '--bare', '-b', 'main', bareDir]); + // Allow pushes to the currently-checked-out branch on the bare so the + // seed step's `git push` lands. + await exec('git', ['config', 'receive.denyCurrentBranch', 'ignore'], { cwd: bareDir }); - await git('init', '-b', 'main'); - await git('config', 'user.email', 'test@cfp.test'); - await git('config', 'user.name', 'cfp test'); - await git('config', 'commit.gpgsign', 'false'); - await git('config', 'core.hooksPath', '/dev/null'); - await git('commit', '--allow-empty', '-m', 'initial'); + // Transient working-tree clone used only for the seed commits. + await exec('git', ['init', '-b', 'main', seedDir]); + const seedGit: GitRunner = (...args) => exec('git', args, { cwd: seedDir }); + await seedGit('config', 'user.email', 'test@cfp.test'); + await seedGit('config', 'user.name', 'cfp test'); + await seedGit('config', 'commit.gpgsign', 'false'); + await seedGit('config', 'core.hooksPath', '/dev/null'); + await seedGit('commit', '--allow-empty', '-m', 'initial'); if (sheetNames.length > 0) { - await mkdir(join(dir, '.gitsheets'), { recursive: true }); + await mkdir(join(seedDir, '.gitsheets'), { recursive: true }); for (const name of sheetNames) { const config = `[gitsheet]\n` + `root = '${name}'\n` + `path = '\${{ slug }}'\n`; - await writeFile(join(dir, '.gitsheets', `${name}.toml`), config); + await writeFile(join(seedDir, '.gitsheets', `${name}.toml`), config); } - await git('add', '.gitsheets'); - await git('commit', '-m', `chore: add sheet configs (${sheetNames.join(', ')})`); + await seedGit('add', '.gitsheets'); + await seedGit('commit', '-m', `chore: add sheet configs (${sheetNames.join(', ')})`); } - const repo = await openRepo({ gitDir, workTree: dir }); + await seedGit('remote', 'add', 'origin', bareDir); + await seedGit('push', 'origin', 'main'); + // Discard the transient working tree — only the bare matters from here on. + await rm(seedDir, { recursive: true, force: true }); + + const repo = await openRepo({ gitDir: bareDir }); let cleaned = false; return { repo, - path: dir, + path: bareDir, cleanup: async () => { if (cleaned) return; cleaned = true; - await rm(dir, { recursive: true, force: true }); + await rm(root, { recursive: true, force: true }); }, }; } diff --git a/apps/api/tests/import-laddr.test.ts b/apps/api/tests/import-laddr.test.ts index 0b725a8..4f5a15e 100644 --- a/apps/api/tests/import-laddr.test.ts +++ b/apps/api/tests/import-laddr.test.ts @@ -382,9 +382,17 @@ describe('translateProject', () => { // --------------------------------------------------------------------------- async function makeRepo(): Promise<{ path: string; cleanup: () => Promise }> { - const dir = await mkdtemp(join(tmpdir(), 'cfp-import-json-')); - const run = (...args: string[]) => exec('git', args, { cwd: dir }); - await run('init', '-b', 'main'); + const root = await mkdtemp(join(tmpdir(), 'cfp-import-json-')); + const bareDir = join(root, 'data.git'); + const seedDir = join(root, 'seed'); + + // Bare gitdir — what the importer opens via openPublicStore. + await exec('git', ['init', '--bare', '-b', 'main', bareDir]); + await exec('git', ['config', 'receive.denyCurrentBranch', 'ignore'], { cwd: bareDir }); + + // Transient working-tree clone to stage the initial sheet configs. + await exec('git', ['init', '-b', 'main', seedDir]); + const run = (...args: string[]) => exec('git', args, { cwd: seedDir }); await run('config', 'user.email', 'test@cfp.test'); await run('config', 'user.name', 'test'); await run('config', 'commit.gpgsign', 'false'); @@ -392,7 +400,7 @@ async function makeRepo(): Promise<{ path: string; cleanup: () => Promise // importer opens the gitsheets store at boot, which requires every sheet // declared on the validator map (PublicValidators) to have a corresponding // .gitsheets/.toml. Mirrors the upstream codeforphilly-data repo. - await mkdir(join(dir, '.gitsheets'), { recursive: true }); + await mkdir(join(seedDir, '.gitsheets'), { recursive: true }); const sheets: Array<[string, string]> = [ ['people', "root = 'people'\npath = '${{ slug }}'\n"], ['projects', "root = 'projects'\npath = '${{ slug }}'\n"], @@ -419,12 +427,16 @@ async function makeRepo(): Promise<{ path: string; cleanup: () => Promise ['revocations', "root = 'revocations'\npath = '${{ jti }}'\n"], ]; for (const [name, body] of sheets) { - await writeFile(join(dir, '.gitsheets', `${name}.toml`), `[gitsheet]\n${body}`); + await writeFile(join(seedDir, '.gitsheets', `${name}.toml`), `[gitsheet]\n${body}`); } await run('add', '.gitsheets'); await run('commit', '-m', 'initial empty branch'); await run('branch', '-M', 'empty'); // rename initial branch - return { path: dir, cleanup: () => rm(dir, { recursive: true, force: true }) }; + await run('remote', 'add', 'origin', bareDir); + await run('push', 'origin', 'empty'); + await rm(seedDir, { recursive: true, force: true }); + await exec('git', ['symbolic-ref', 'HEAD', 'refs/heads/empty'], { cwd: bareDir }); + return { path: bareDir, cleanup: () => rm(root, { recursive: true, force: true }) }; } function mockRoutes(): MockRoutes { diff --git a/apps/api/tests/internal-reload.test.ts b/apps/api/tests/internal-reload.test.ts index cd5e74b..b256354 100644 --- a/apps/api/tests/internal-reload.test.ts +++ b/apps/api/tests/internal-reload.test.ts @@ -17,7 +17,7 @@ * FTS index actually got rebuilt against the new tree). */ import { execFile } from 'node:child_process'; -import { mkdtemp, rm, writeFile, readFile } from 'node:fs/promises'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { promisify } from 'node:util'; @@ -76,7 +76,7 @@ async function createRig(): Promise { const root = await mkdtemp(join(tmpdir(), 'cfp-hot-reload-')); const bare = join(root, 'remote.git'); const seed = join(root, 'seed'); - const local = join(root, 'local'); + const local = join(root, 'local.git'); // Seed: produces the initial sheet-configs commit on `main`. await exec('git', ['init', '-b', 'main', seed]); @@ -97,12 +97,13 @@ async function createRig(): Promise { await git(bare, 'config', 'receive.denyCurrentBranch', 'ignore'); await exec('git', ['push', bare, 'main'], { cwd: seed }); - // Local clone from the bare. - await exec('git', ['clone', bare, local]); + // Local clone from the bare — also bare, matching the app's runtime invariant + // (specs/behaviors/storage.md → "The data clone is bare"). + await exec('git', ['clone', '--bare', bare, local]); + await git(local, 'symbolic-ref', 'HEAD', 'refs/heads/main'); await git(local, 'config', 'user.email', 'local@test.local'); await git(local, 'config', 'user.name', 'local'); await git(local, 'config', 'commit.gpgsign', 'false'); - await git(local, 'config', 'core.hooksPath', '/dev/null'); return { local, @@ -379,15 +380,13 @@ describe('POST /api/_internal/reload-data — short-circuit + reconcile', () => expect(project.data.slug).toBe('lazyloader'); expect(project.data.title).toBe('LazyLoader'); - // Confirm the local working tree actually fast-forwarded too. + // Confirm the local bare actually fast-forwarded too. const afterHead = await git(rig.local, 'rev-parse', 'HEAD'); expect(afterHead).toBe(newRemoteHead); - // And the sheet config file was written to the bare in the seed - // commit — sanity check that gitsheets has actually parsed the new - // record (read the TOML to confirm shape). - const tomlPath = join(rig.local, 'projects', 'lazyloader.toml'); - const contents = await readFile(tomlPath, 'utf8'); + // Sanity-check that gitsheets has parsed the new record by reading + // the TOML from the bare's HEAD tree (no working tree to filesystem-read). + const contents = await git(rig.local, 'show', 'HEAD:projects/lazyloader.toml'); expect(contents).toContain("slug = 'lazyloader'"); }); }); diff --git a/apps/api/tests/reconcile.test.ts b/apps/api/tests/reconcile.test.ts index e95e572..83475b3 100644 --- a/apps/api/tests/reconcile.test.ts +++ b/apps/api/tests/reconcile.test.ts @@ -53,7 +53,7 @@ async function seedPerson( repoPath: string, fields: { id: string; slug: string; githubUserId?: number }, ): Promise { - const repo = await openRepo({ gitDir: `${repoPath}/.git`, workTree: repoPath }); + const repo = await openRepo({ gitDir: repoPath }); await repo.transact( { message: `seed person ${fields.slug}`, author: { name: 'test', email: 'test@cfp.test' } }, async (tx) => { diff --git a/apps/api/tests/saml.test.ts b/apps/api/tests/saml.test.ts index 3936cb2..1352ca2 100644 --- a/apps/api/tests/saml.test.ts +++ b/apps/api/tests/saml.test.ts @@ -14,18 +14,16 @@ */ import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { type FastifyInstance } from 'fastify'; -import { execFile } from 'node:child_process'; -import { promisify } from 'node:util'; -import { writeFile, mkdir } from 'node:fs/promises'; +import { writeFile } from 'node:fs/promises'; import { join } from 'node:path'; import { DOMParser } from '@xmldom/xmldom'; import { buildApp } from '../src/app.js'; import { mintSessionFor } from '../src/auth/issue.js'; import { createFullDataRepo, createPrivateStorageDir } from './helpers/test-full-repo.js'; +import { seedRawToml } from './helpers/seed-fixtures.js'; import { getSamlTestKeyPair, type SamlTestKeyPair } from './helpers/saml-cert.js'; -const exec = promisify(execFile); const JWT_KEY = 'test-jwt-signing-key-at-least-32-chars!!'; const SLACK_TEAM_HOST = 'codeforphilly.slack.com'; @@ -40,7 +38,6 @@ async function seedPerson( accountLevel?: string; }, ): Promise { - const git = (...args: string[]) => exec('git', args, { cwd: repoDir }); const lines = [ `id = "${opts.id}"`, `slug = "${opts.slug}"`, @@ -53,14 +50,7 @@ async function seedPerson( `updatedAt = "2026-05-01T00:00:00Z"`, ].filter(Boolean); - await mkdir(join(repoDir, 'people'), { recursive: true }); - await writeFile(join(repoDir, 'people', `${opts.slug}.toml`), lines.join('\n')); - await git('add', `people/${opts.slug}.toml`); - await git( - '-c', 'user.email=test@cfp.test', - '-c', 'user.name=test', - 'commit', '-m', `seed person ${opts.slug}`, - ); + await seedRawToml(repoDir, `people/${opts.slug}.toml`, lines.join('\n'), `seed person ${opts.slug}`); } async function seedPrivateProfile( diff --git a/deploy/docker/entrypoint.sh b/deploy/docker/entrypoint.sh index 4826f83..301cf69 100755 --- a/deploy/docker/entrypoint.sh +++ b/deploy/docker/entrypoint.sh @@ -3,26 +3,31 @@ # # Minimal boot prep. The data-repo reconciliation state machine # (in-sync / behind / ahead / diverged-clean-rebase / diverged-conflict-escape) -# lives in the Node API process now — see apps/api/src/store/reconcile.ts +# lives in the Node API process — see apps/api/src/store/reconcile.ts # and apps/api/src/plugins/reconcile.ts. This script only ensures: # -# 1. The PVC mount at CFP_DATA_REPO_PATH is trusted by git regardless of -# file-ownership (PVCs survive pod restarts and may carry files owned -# by a different uid than the current runAsUser). -# 2. A reasonable git user identity is configured for any rebase committer -# writes (rebase preserves authors of replayed commits; the committer -# line is the only thing that can pick up runtime identity). -# 3. There IS a valid `.git` working tree at CFP_DATA_REPO_PATH. On first -# pod boot (empty PVC), we do an initial full-history clone. On -# subsequent boots, the reconciler inside the API decides what to do. +# 1. CFP_DATA_REPO_PATH is trusted by git regardless of file-ownership +# (volumes may carry files owned by a different uid than the current +# runAsUser between iterations). +# 2. A reasonable git user identity is configured for any committer line +# the API's plumbing-based reconcile might mint. +# 3. CFP_DATA_REPO_PATH is a valid **bare** git repo. On first pod boot +# (empty volume), we do an initial bare clone. On subsequent boots +# within the same pod's lifetime, the reconciler inside the API +# handles fetching + ref advancement. +# +# Bare invariant: gitsheets reads/writes via hologit's tree-object +# interface — no working tree needed. See +# specs/behaviors/storage.md → "The data clone is bare". # # Required env: -# CFP_DATA_REPO_PATH — local working-tree path (mounted PVC in k8s) +# CFP_DATA_REPO_PATH — bare gitdir (the path itself is the gitdir; no +# .git subdirectory inside it) # Optional env: -# CFP_DATA_REMOTE — git URL to clone/fetch/push. If unset, the entrypoint -# assumes an offline-style dev setup and uses whatever -# working tree is already at CFP_DATA_REPO_PATH. -# CFP_DATA_BRANCH — branch to clone initially (default: main). +# CFP_DATA_REMOTE — git URL to clone/fetch/push. If unset, the +# entrypoint assumes an offline-style dev setup +# and uses whatever is already at the path. +# CFP_DATA_BRANCH — branch to set HEAD to on initial clone (default: main). # GIT_SSH_COMMAND — set when an SSH deploy key is mounted. set -eu @@ -35,53 +40,58 @@ log() { DATA_BRANCH="${CFP_DATA_BRANCH:-main}" -# Trust the data-repo working tree regardless of file ownership. PVCs survive -# pod restarts and may carry files owned by a different uid than this pod's -# runAsUser (e.g., an earlier iteration ran as root). +# Trust the data-repo path regardless of file ownership. git config --global --add safe.directory "$CFP_DATA_REPO_PATH" # Pseudonymous identity for any direct git operations that pick up the -# runtime committer line. API mutations supply their own GIT_AUTHOR_* via -# gitsheets transaction options; the reconciler re-applies these to the -# repo-local config too, so this is belt-and-suspenders for any other tool -# that touches the tree. +# runtime committer line. : "${GIT_AUTHOR_NAME:=CodeForPhilly API}" : "${GIT_AUTHOR_EMAIL:=api@users.noreply.codeforphilly.org}" : "${GIT_COMMITTER_NAME:=$GIT_AUTHOR_NAME}" : "${GIT_COMMITTER_EMAIL:=$GIT_AUTHOR_EMAIL}" export GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL GIT_COMMITTER_NAME GIT_COMMITTER_EMAIL -if [ ! -d "$CFP_DATA_REPO_PATH/.git" ]; then +# Bare-repo marker: an `objects/` directory at the path root. (A non-bare +# clone has `.git/objects/` instead, so this check distinguishes the two +# without false positives on either side.) +if [ ! -d "$CFP_DATA_REPO_PATH/objects" ]; then if [ -z "${CFP_DATA_REMOTE:-}" ]; then - log "ERROR: $CFP_DATA_REPO_PATH is not a git repo and CFP_DATA_REMOTE is unset" + log "ERROR: $CFP_DATA_REPO_PATH is not a bare git repo and CFP_DATA_REMOTE is unset" + exit 1 + fi + + # Refuse a non-bare working-tree clone left over from an earlier build. + if [ -d "$CFP_DATA_REPO_PATH/.git" ]; then + log "ERROR: $CFP_DATA_REPO_PATH contains a non-bare clone (.git subdirectory)" + log " The app requires a bare clone — wipe the volume and restart, or" + log " delete $CFP_DATA_REPO_PATH/.git and re-clone." exit 1 fi mkdir -p "$CFP_DATA_REPO_PATH" - # PVC may carry residue from a previous pod that bailed mid-clone. + # Volume may carry residue from a previous pod that bailed mid-clone. # `git clone` refuses to clone into a non-empty directory, so wipe it # first. Safe because the data repo is always re-cloneable. if [ -n "$(ls -A "$CFP_DATA_REPO_PATH" 2>/dev/null)" ]; then - log "$CFP_DATA_REPO_PATH non-empty but lacks .git — wiping before clone" + log "$CFP_DATA_REPO_PATH non-empty but not a bare repo — wiping before clone" find "$CFP_DATA_REPO_PATH" -mindepth 1 -maxdepth 1 -exec rm -rf {} + fi - log "cloning $CFP_DATA_REMOTE into $CFP_DATA_REPO_PATH (branch=$DATA_BRANCH)" + log "bare-cloning $CFP_DATA_REMOTE into $CFP_DATA_REPO_PATH (HEAD → $DATA_BRANCH)" # Full history (no --depth) so the API-side reconciler can rebase against # any realistic divergence on subsequent boots. - git clone --branch "$DATA_BRANCH" "$CFP_DATA_REMOTE" "$CFP_DATA_REPO_PATH" + git clone --bare --branch "$DATA_BRANCH" "$CFP_DATA_REMOTE" "$CFP_DATA_REPO_PATH" fi -cd "$CFP_DATA_REPO_PATH" -git config user.name "$GIT_AUTHOR_NAME" -git config user.email "$GIT_AUTHOR_EMAIL" -# Ensure the origin URL matches the current env (in case CFP_DATA_REMOTE -# was rotated). Idempotent. -if [ -n "${CFP_DATA_REMOTE:-}" ] && git remote get-url origin >/dev/null 2>&1; then - git remote set-url origin "$CFP_DATA_REMOTE" +# Pin the API's committer identity into the bare repo's config. +git --git-dir="$CFP_DATA_REPO_PATH" config user.name "$GIT_AUTHOR_NAME" +git --git-dir="$CFP_DATA_REPO_PATH" config user.email "$GIT_AUTHOR_EMAIL" + +# Refresh the origin URL in case CFP_DATA_REMOTE was rotated. Idempotent. +if [ -n "${CFP_DATA_REMOTE:-}" ] && git --git-dir="$CFP_DATA_REPO_PATH" remote get-url origin >/dev/null 2>&1; then + git --git-dir="$CFP_DATA_REPO_PATH" remote set-url origin "$CFP_DATA_REMOTE" fi -cd - >/dev/null -log "data repo ready; starting API (reconciliation runs inside the API process)" +log "data repo ready (bare); starting API (reconciliation runs inside the API process)" exec "$@" diff --git a/deploy/kustomize/base/deployment.yaml b/deploy/kustomize/base/deployment.yaml index 1b6b271..2486e3a 100644 --- a/deploy/kustomize/base/deployment.yaml +++ b/deploy/kustomize/base/deployment.yaml @@ -78,9 +78,12 @@ spec: capabilities: drop: [ALL] volumes: + # Data repo lives as a bare clone — ephemeral. Re-cloned from origin + # on every pod boot; objects-only (~50 MB) so the clone is cheap. + # Persisting across pod restarts buys nothing because the bare + # clone is fully recoverable from the git remote. - name: data - persistentVolumeClaim: - claimName: codeforphilly-data + emptyDir: {} - name: private persistentVolumeClaim: claimName: codeforphilly-private diff --git a/deploy/kustomize/base/kustomization.yaml b/deploy/kustomize/base/kustomization.yaml index 84c8734..5c880e4 100644 --- a/deploy/kustomize/base/kustomization.yaml +++ b/deploy/kustomize/base/kustomization.yaml @@ -14,7 +14,6 @@ labels: resources: - serviceaccount.yaml - configmap.yaml - - pvc-data.yaml - pvc-private.yaml - service.yaml - deployment.yaml diff --git a/deploy/kustomize/base/pvc-data.yaml b/deploy/kustomize/base/pvc-data.yaml deleted file mode 100644 index 8e6db7e..0000000 --- a/deploy/kustomize/base/pvc-data.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: codeforphilly-data -spec: - accessModes: [ReadWriteOnce] - storageClassName: linode-block-storage-retain - resources: - requests: - storage: 10Gi diff --git a/docs/operations/deploy.md b/docs/operations/deploy.md index 2b01967..574cec1 100644 --- a/docs/operations/deploy.md +++ b/docs/operations/deploy.md @@ -104,43 +104,49 @@ curl http://localhost:3001/ # SPA index.html The container entrypoint (`deploy/docker/entrypoint.sh`) only handles the bits that *must* run before the Node process exists: -- Trusts the PVC mount via `git config --global safe.directory`. +- Trusts the data path via `git config --global safe.directory`. - Sets a pseudonymous git identity (`CodeForPhilly API - `) for any committer line a future - rebase might write. -- On first pod boot — and only then — does a full-history `git clone` of - `CFP_DATA_REMOTE` into `CFP_DATA_REPO_PATH` when no `.git` directory - exists. On subsequent boots the PVC already holds a clone; no clone is - performed. -- Refreshes `origin`'s URL to whatever `CFP_DATA_REMOTE` is set to (lets - operators rotate the remote without re-cloning the PVC). -- `exec`s the API. That's all — about a dozen lines of shell now. + `) for any committer line the + reconcile's commit-replay path might write. +- On first pod boot — `data` is an `emptyDir`, so this is every fresh + pod — does a `git clone --bare --branch $CFP_DATA_BRANCH` of + `CFP_DATA_REMOTE` into `CFP_DATA_REPO_PATH`. Bare means no working + tree; gitsheets operates on the git object DB directly. See + [`specs/behaviors/storage.md`](../../specs/behaviors/storage.md) → + "The data clone is bare." +- Refreshes `origin`'s URL to whatever `CFP_DATA_REMOTE` is set to + (operators can rotate the remote with a pod restart; the new + `emptyDir` re-clones from the new URL). +- `exec`s the API. That's all. Then `exec node apps/api/dist/index.js`. Inside node, `buildApp()` registers plugins ([apps/api/src/app.ts](../../apps/api/src/app.ts)) in order: env → -CORS → cookies → trace IDs → error mapper → **store** (loads public + -private into memory) → **reconcile** (fetch + ff/rebase/escape-hatch against +CORS → cookies → trace IDs → error mapper → **store** (opens the bare +public clone via `openRepo({ gitDir })`, loads public + private into +memory) → **reconcile** (fetch + ff/rebase-replay/escape-hatch against origin — see below) → **push daemon** (starts pushing transact'd commits to `CFP_DATA_REMOTE`) → services (FTS) → rate limit → idempotency → session middleware → swagger → routes → static SPA. Fastify's `listen()` doesn't fire until all of those resolve, so once `/api/health/ready` returns 200 -both stores have loaded **and** the working tree has been reconciled with -origin. +both stores have loaded **and** local refs have been reconciled with origin. ### Reconciliation state machine Lives in [`apps/api/src/store/reconcile.ts`](../../apps/api/src/store/reconcile.ts) -and is invoked at boot by the reconcile plugin. Same state machine the -shell used to run, just structured Node so exit codes propagate naturally -and the same code is reusable from the future hot-reload webhook (#65): +and is invoked at boot by the reconcile plugin. Operates entirely on the +object DB via plumbing (`update-ref`, `merge-tree --write-tree`, +`commit-tree`) so it works against the bare clone with no working tree: - in sync → no-op (`'in-sync'`) -- behind → fast-forward (`'fast-forwarded'`) +- behind → fast-forward via `git update-ref refs/heads/` (CAS + against old commit) (`'fast-forwarded'`) - ahead → push (`'pushed-ahead'`; push daemon retries on push failure) -- diverged + clean rebase → rebase + push (`'rebased'`) -- diverged + conflicts → abort rebase, create + push a - `conflicts/` branch from the pre-rebase HEAD, hard-reset - local to origin (`'conflict-escaped'`; logged at ERROR level so operators +- diverged + clean replay → `merge-tree --write-tree` + `commit-tree` + per local commit on top of remote tip, then `update-ref` + push + (`'rebased'`) +- diverged + replay conflict → preserve pre-replay HEAD on + `conflicts/`, push it, fast-forward local refs to + remote tip (`'conflict-escaped'`; logged at ERROR level so operators see it in production logs) - fetch itself fails (network blip) → log warn, continue with local state (`'fetch-failed'`) @@ -158,24 +164,26 @@ skips reconciliation entirely. ## Data repo on disk -The API operates on a working tree at `/app/data` backed by a PVC. The -entrypoint ensures the working tree exists (cloning on first boot); the -API-side reconcile plugin then synchronizes that tree with `CFP_DATA_REMOTE` -on every boot, and the push daemon pushes commits made during the pod's -lifetime back to the remote. +The API operates on a **bare** clone at `/app/data` backed by an +`emptyDir` volume. The entrypoint clones (`git clone --bare`) on every +pod start since `emptyDir` doesn't survive restarts. Within a pod's +lifetime, the API-side reconcile plugin synchronizes local refs with +`CFP_DATA_REMOTE` (boot reconcile + hot-reload webhook), and the push +daemon pushes commits made during the pod's lifetime back to the +remote. Implications: -- **PVC contents are durable enough to outlive a single pod**, which lets the - push daemon finish pushing any commits made just before pod terminate. - But the source of truth is the git remote, not the PVC — wiping the PVC - is safe (the next boot re-clones). +- **No PVC for data.** The git remote is the source of truth; the + pod's bare clone is recoverable from there. Pod restart is the + recovery primitive — there's nothing to delete first, and no + Multi-Attach errors during node failover. - **The deploy key matters.** When `CFP_DATA_REMOTE` is SSH (the default), the entrypoint relies on `GIT_SSH_COMMAND` (set in the - ConfigMap) pointing at the mounted private key. Rotation: replace the - SealedSecret, restart the pod. See - [secrets.md](secrets.md#data-repo-deploy-key) and the rotation procedure - in [sandbox-deploy.md](sandbox-deploy.md#rotating-the-deploy-key). + ConfigMap) pointing at the mounted private key. Rotation: replace + the SealedSecret, restart the pod. See + [secrets.md](secrets.md#data-repo-deploy-key) and the rotation + procedure in [sandbox-deploy.md](sandbox-deploy.md#rotating-the-deploy-key). ## Bucket provisioning (production) @@ -212,7 +220,7 @@ comments. Production pod gets these mounted: | `NODE_ENV` | ConfigMap | `production` | | `PORT` | ConfigMap | `3001` | | `HOST` | ConfigMap | `0.0.0.0` | -| `CFP_DATA_REPO_PATH` | ConfigMap | `/app/data` (PVC mount) | +| `CFP_DATA_REPO_PATH` | ConfigMap | `/app/data` — bare gitdir, backed by an `emptyDir`; re-cloned on every pod boot | | `CFP_DATA_REMOTE` | Secret | git URL (ssh in prod) | | `CFP_DATA_BRANCH` | ConfigMap | e.g. `fixture` / `main` | | `CFP_DATA_RELOAD_SECRET` | **Secret** | Shared bearer-token for the hot-reload webhook; when unset the `/api/_internal/reload-data` endpoint returns 503. See [runbook.md](runbook.md#hot-reload-webhook). | diff --git a/docs/operations/runbook.md b/docs/operations/runbook.md index 1bda771..253b8de 100644 --- a/docs/operations/runbook.md +++ b/docs/operations/runbook.md @@ -23,7 +23,7 @@ Look for one of the four common boot failures: |------------------|-------|-----| | `[entrypoint] ERROR: CFP_DATA_REMOTE is unset` | The Secret containing `CFP_DATA_REMOTE` isn't reaching the pod. | Check `kubectl get secret codeforphilly-secrets -o yaml`; verify the SealedSecret in the GitOps repo decrypted successfully (look at the sealed-secrets controller logs). | | `fatal: could not read Username for 'https://...'` or `Permission denied (publickey)` | Bad/missing data-repo credentials. | Verify the `codeforphilly-data-deploy-key` Secret holds a valid `id_ed25519` whose public key has push access to the data repo. See [secrets.md](secrets.md#data-repo-deploy-key). | -| `Failed to open public gitsheets store` | Working tree corrupt or missing `.gitsheets/` configs. | Exec into the pod, inspect `/app/data/.gitsheets/`. Recovery: `kubectl delete pvc codeforphilly-data -n `, then trigger a rollout — the entrypoint re-clones from `CFP_DATA_REMOTE`. | +| `Failed to open public gitsheets store` | Bare clone corrupt or missing `.gitsheets/` configs. | Exec into the pod, inspect `/app/data/refs/`, `/app/data/objects/`, and verify `.gitsheets/` exists in HEAD via `git --git-dir=/app/data show HEAD:.gitsheets`. Recovery: restart the pod — `data` is an `emptyDir`, so a fresh pod re-clones from `CFP_DATA_REMOTE` automatically. | | `Failed to load private store (s3)` | Bucket creds wrong, bucket gone, or network ACL blocks egress. | Confirm `S3_*` env in the ConfigMap + Secret. From the pod, `curl $S3_ENDPOINT` to confirm reachability. | | `environment variable ... is required` | A required env (`CFP_DATA_REPO_PATH`, `STORAGE_BACKEND`, `CFP_JWT_SIGNING_KEY`) is missing. | Manifest regression. Compare against `deploy/kustomize/base/configmap.yaml` + the GitOps repo's SealedSecret. | @@ -37,8 +37,10 @@ kubectl -n codeforphilly debug -it deploy/codeforphilly \ From inside: ```bash -# Is the data repo really there? -ls -la /app/data /app/data/.gitsheets +# Is the bare data repo really there? Bare gitdir lives at the path root — +# no .git subdir; expect HEAD, config, objects/, refs/ at the top. +ls -la /app/data +git --git-dir=/app/data show HEAD:.gitsheets # Are env vars present? env | grep -E '^(CFP_|S3_|STORAGE_|GITHUB_)' | sort @@ -71,8 +73,10 @@ kubectl -n codeforphilly-rewrite-sandbox set image \ deploy/codeforphilly codeforphilly=ghcr.io/codeforphilly/codeforphilly-ng: ``` -Data is **not** in the PVC long-term; it's in the git remote. Deleting the -PVC and letting the entrypoint re-clone is safe. +The bare data clone lives in an `emptyDir` — re-cloned from the git remote on +every pod boot. Pod restart is the recovery primitive; there's no PVC to +delete. (A `codeforphilly-private` PVC still exists for the S3-fallback +private store; only `codeforphilly-data` was retired.) ## "Readiness flapping / 503 spikes" diff --git a/plans/bare-data-repo.md b/plans/bare-data-repo.md new file mode 100644 index 0000000..839ecc9 --- /dev/null +++ b/plans/bare-data-repo.md @@ -0,0 +1,174 @@ +--- +status: done +depends: [] +specs: + - specs/behaviors/storage.md +issues: [85] +pr: 86 +--- + +# Plan: Eliminate the working tree — bare-clone the data repo + +## Scope + +The pod has been carrying a working tree it never reads. gitsheets operates on git tree objects via hologit; the checked-out files on disk are a redundant on-disk copy of records that already exist as git blobs. Worse, hologit's default `getWorkspace()` path (when `workTree` is non-null) **hashes the entire working tree on every workspace construction** — active CPU we don't need to spend. + +Switching to a bare clone removes all of this: smaller disk footprint, faster boot, faster hot-reloads, no PVC dependency (so no PVC Multi-Attach errors during node failover, and runbook recovery shrinks from "delete PVC, restart, re-clone" to just "restart"). + +Closes [#85](https://github.com/CodeForPhilly/codeforphilly-ng/issues/85). + +## Implements + +- [behaviors/storage.md](../specs/behaviors/storage.md) — new invariant: the API operates on a **bare** git clone. No working tree. Reconcile + transact + push all happen via plumbing on the object DB. + +## Approach + +### 1. Spec update first + +Add a new sub-section to `specs/behaviors/storage.md` (probably after "Repositories") declaring the bare-clone invariant. Cross-link from the deploy + runbook docs. + +### 2. `openPublicStore` — drop `workTree`, bare-only + +In `apps/api/src/store/public.ts`: + +```diff +-const repo = await openRepo({ gitDir: `${repoPath}/.git`, workTree: repoPath }); ++const repo = await openRepo({ gitDir: repoPath }); +``` + +`repoPath` is the bare gitdir (no `.git` subdirectory). Omitting `workTree` causes hologit's `getWorkspace()` to take the `createWorkspaceFromRef(this.ref)` branch — i.e. resolve from HEAD ref, not from hashing the working tree. Confirmed via `node_modules/hologit/lib/Repo.js:86-97`. + +**Single mode — bare everywhere.** The app uses a bare clone whether the runtime is the sandbox pod or a local dev machine. Mixed-mode (bare in prod, non-bare in dev) is what got us into the staleness footgun in the first place: API mutations advance HEAD via gitsheets transact, but a parallel working tree drifts. Keeping prod and dev on the same shape — bare — keeps gitsheets' tree-object view of the world coherent. + +### 3. Entrypoint — `git clone --bare` + +In `deploy/docker/entrypoint.sh`: + +- Detect first-boot via `[ ! -d "$CFP_DATA_REPO_PATH/objects" ]` (the marker for a bare repo) instead of `[ ! -d "$CFP_DATA_REPO_PATH/.git" ]`. +- Clone with `git clone --bare "$CFP_DATA_REMOTE" "$CFP_DATA_REPO_PATH"`. +- Pseudonymous identity config stays (`git config --global user.name/.email` — the `--global` flag means it's irrelevant to the bare/non-bare distinction). +- `git config --global safe.directory` stays. +- The remote-URL refresh logic stays — `git -C "$CFP_DATA_REPO_PATH" remote set-url origin "$CFP_DATA_REMOTE"` works bare-mode. + +### 4. Reconcile rewrite — three commands swapped + +In `apps/api/src/store/reconcile.ts`: + +| Today | Bare equivalent | +|---|---| +| `git merge --ff-only ` (line 207) | `git update-ref refs/heads/$branch $local-tip` | +| `git rebase ` (line 248) | Per-commit replay via `git merge-tree --write-tree` + `git commit-tree` (see below) | +| `git reset --hard ` (line 305, escape hatch) | `git update-ref refs/heads/$branch ` | + +The rebase replay loop: + +``` +1. mergeBase = git merge-base HEAD origin/ +2. localCommits = git rev-list --reverse mergeBase..HEAD // oldest first +3. newTip = origin/'s commit hash +4. for each commit C in localCommits: + parent = C^ + mergeResult = git merge-tree --write-tree --merge-base=parent newTip C + (exit 0: writes the merged tree hash to stdout) + (exit 1: conflicts — escape-hatch) + newCommitHash = git commit-tree -p newTip \ + (carry C's author + committer + dates + message) + newTip = newCommitHash +5. git update-ref refs/heads/ newTip +``` + +Properties preserved from today's `git rebase`: + +- Linear history with local commits **on top of** remote commits (no merge bubbles). +- Local commits' messages + author/committer/dates carry forward to the replayed commits. +- A conflict anywhere in the chain aborts the whole replay (same as `git rebase` failing) and the escape-hatch fires. + +Properties dropped (acceptable): + +- No interactive rebase / conflict-edit shell. Today's reconcile doesn't do that either — it just escapes on first conflict. + +### 5. Kustomize — drop the PVC + +`deploy/kustomize/base/pvc.yaml` → delete. + +`deploy/kustomize/base/deployment.yaml` — swap the PVC volume mount for `emptyDir`: + +```yaml +volumes: + - name: data-repo + emptyDir: {} +``` + +Re-clone on every pod boot is fine on a small repo (objects-only, ~50 MB or so). Validate boot time stays under the existing readiness deadline; if not, fall back to a `kustomize`-level option for an `emptyDir` with `sizeLimit` bumped, but cloning should be well under 60 s. + +### 6. Docs + spec + +- `specs/behaviors/storage.md` — bare-clone invariant section (step 1 above). +- `docs/operations/deploy.md` — boot sequence: "clone bare, no checkout"; drop the PVC env table entry; drop the `safe.directory` note (still needed for the bare gitdir but the wording shifts). +- `docs/operations/runbook.md` — drop the "delete PVC, restart" recovery procedure under "API won't boot"; recovery is now just a pod restart. +- `.claude/CLAUDE.md` Local setup section — clarify that local dev clones are **non-bare** (so contributors can browse), but the running app is bare. Two paths via the same code (`workTree` parameter optional). + +### 7. Local-dev workflow + +Local dev mirrors prod: the sibling clone of `codeforphilly-data` is **also bare**. Contributors who want a working tree to browse / hand-edit records clone again *from the bare local* into a second directory and push/pull there. + +```bash +# The clone the app uses (matches production shape) +git clone --bare git@github.com:CodeForPhilly/codeforphilly-data.git ../codeforphilly-data + +# Optional: a working-tree view of the same data, for browsing/editing +git clone ../codeforphilly-data ../codeforphilly-data-wt +# work in ../codeforphilly-data-wt; push back to ../codeforphilly-data when done +``` + +Keeping dev and prod on the same bare shape avoids the mixed-mode drift gitsheets struggles with (API-side commits advance HEAD on the bare repo; a co-located working tree would lag and confuse hologit's `hashWorkTree` path). + +### 8. Reconcile tests + +Add `apps/api/tests/reconcile.test.ts` (or extend an existing file) with the four state-machine cases: + +- **Clean ahead** — local has commits, remote has nothing new; expect `outcome: 'pushed-ahead'`, push goes through. +- **Clean behind** — remote has commits, local has nothing new; expect `outcome: 'fast-forwarded'`, refs updated via `git update-ref`. +- **Clean divergent** — both have commits with no overlapping file changes; expect `outcome: 'rebased'`, local commits replayed on top of remote, push fast-forwards. +- **Divergent conflict** — both have commits with conflicting file changes; expect `outcome: 'conflict-escaped'`, `conflicts/` branch created + pushed, local refs reset to remote. + +Each test sets up two bare clones in `os.tmpdir()` (one as the "pod's local," one as the "origin") wired with a file:// remote URL. The test exercises the reconcile path with realistic transact-style commits, asserts the outcome + final ref state of both clones. + +## Validation + +- [x] `specs/behaviors/storage.md` declares the bare-clone invariant. +- [x] `openPublicStore` opens a bare clone; non-bare paths fail loudly with a clear error pointing at the bare-only invariant. +- [x] `deploy/docker/entrypoint.sh` bare-clones on first boot; refuses to operate against a stray non-bare clone at `CFP_DATA_REPO_PATH`. +- [x] `reconcile.ts` rebase replay passes unit tests for: clean local-ahead, clean local-behind, clean divergent (replay succeeds), conflicting divergent (escape-hatch fires) — all 7 cases in `data-repo-reconcile.test.ts`. +- [x] All 241 API tests pass; `packages/shared` (53) and `apps/web` (30) pass too. +- [x] `npm run type-check && npm run lint` clean. +- [x] Kustomize manifests render with `emptyDir` mounted; PVC deleted from the base + kustomization.yaml resources list. +- [ ] **Sandbox smoke test** — apply manifests; pod boots, reaches `/api/health/ready`. **Deferred to deploy time.** +- [ ] **Hot-reload webhook** still works against the bare runtime — push to `published`, expect short-circuit / rebuild log line. **Deferred to deploy time.** +- [ ] **Operator `git-pod-uploadpack.sh`** still works (bare repo accepts `git upload-pack`). **Deferred to deploy time** (trivially expected; bare is the native shape for `git upload-pack`). +- [ ] **Boot time** on sandbox is no worse than today (re-clone-on-emptyDir vs. PVC persist). **Deferred to deploy time** — measure first boot vs. baseline. + +## Risks / unknowns + +- **First-boot clone time on `emptyDir`** — every pod restart re-clones. Objects-only is ~50 MB today. Acceptable. If the data repo grows past ~500 MB someday, we'd revisit (maybe with a sidecar that does a shared local clone). +- **Reconcile rebase replay edge cases** — empty-commit dropping, sign-off lines, multi-parent commits (none expected on the data repo, but worth a test). +- **`git merge-tree --write-tree`** requires git 2.38+; we're on 2.45+ in the Alpine 3.20 base image. Fine. +- **Dev workflow** — contributors browsing the data repo locally want a working tree. Solution: bare local clone (matches prod) + an optional second clone *from* the bare for a working tree to browse/edit. Same shape everywhere keeps gitsheets coherent (mixed-mode bare-app/working-tree-clone is exactly the drift the bare migration is escaping). + +## Notes + +Shipped over eight commits on the branch (plan opening + revision + spec + openPublicStore + reconcile rewrite + entrypoint/kustomize + test/scripts sweep + docs). + +Surprises: + +- **hologit hashes the working tree on every `getWorkspace` call** when one is set (`node_modules/hologit/lib/Repo.js:86-97`). Not just dead weight on disk — active CPU on every gitsheets workspace construction. Going bare eliminates the hash pass entirely. +- **`git merge-tree --write-tree` semantics are a clean fit.** Exit 0 → tree hash on stdout. Exit 1 → conflict info. The per-commit replay loop wraps that with a `RebaseReplayConflictError` throw, which routes to the existing escape-hatch path. Net change to externally-observable state-machine behavior: zero (same six outcomes, same conflict-branch shape, same push semantics). +- **Test scaffolding was where most of the lift was.** `openPublicStore`'s bare-only guard rejected every working-tree-seeding helper. The new `seedRawToml` helper in `tests/helpers/seed-fixtures.ts` centralizes the transient-clone-push pattern; future tests that need to seed raw fixtures use it instead of copying another `git add` block. +- **Local-dev workflow** intentionally stays bare for the app's clone, with an optional second clone-from-the-bare for hand-editing. Mixed mode (bare in prod, working-tree locally) was specifically the drift footgun this work escaped — keeping prod and dev coherent matters here. + +## Follow-ups + +- **Deploy smoke** — apply the new manifests to the sandbox, verify pod boot + hot-reload + `git-pod-uploadpack.sh` operator helper. *Deferred to operator cadence*; nothing should regress, but the boot-time delta is the one observable worth measuring. +- **Boot-time measurement** — on first deploy with `emptyDir`, capture how long `git clone --bare` takes against the live remote. If it pushes past ~30 s for a small repo, we'd want to know — re-clone overhead is the one cost we accepted in this design. *Deferred to deploy time*. +- **Local-setup onboarding** — `git clone --bare` is now the documented path in CLAUDE.md + storage.md, but if a contributor follows older docs elsewhere (READMEs, third-party blog posts, etc.) they may hit the boot-time bare-only guard. The error message points them at the spec section. *Tracked as*: future polish to "smooth out getting started" (#5). +- **Spec note about hologit `hashWorkTree` cost** — worth a sentence in the storage spec's "Runtime data flow" subsection for posterity. *Deferred* — low urgency since the bare invariant means we never take that path anymore. diff --git a/specs/behaviors/storage.md b/specs/behaviors/storage.md index 7c7e371..2e7feb9 100644 --- a/specs/behaviors/storage.md +++ b/specs/behaviors/storage.md @@ -56,7 +56,20 @@ There are two git repositories: | `codeforphilly-rewrite` | The application code (this repo) | Public | | `codeforphilly-data` | The live gitsheets data | **Private** — contains emails, real names, IPs | -The code repo references the data repo by env (`CFP_DATA_REPO_PATH`). They are not git submodules — too much friction. They're sibling working trees. +The code repo references the data repo by env (`CFP_DATA_REPO_PATH`). They are not git submodules — too much friction. Sibling clones, locally and in production. + +### The data clone is bare + +The API always operates against a **bare** clone of the data repo. gitsheets reads and writes via hologit's tree-object interface; a checked-out working tree is redundant data on disk *and* an active drift hazard — hologit's `getWorkspace()` hashes the working tree on every workspace construction when one is present, which both costs CPU and exposes a class of staleness bugs when API-side commits advance HEAD while the working tree lags. + +Invariants: + +- `CFP_DATA_REPO_PATH` points at the **bare gitdir** (no `.git` subdirectory inside it; the path itself is the gitdir). +- `openPublicStore` calls `openRepo({ gitDir: repoPath })` with `workTree` omitted, putting hologit on the `createWorkspaceFromRef(HEAD)` path. +- The reconcile state machine uses git plumbing (`update-ref`, `merge-tree --write-tree`, `commit-tree`) instead of working-tree-touching commands (`merge --ff-only`, `rebase`, `reset --hard`). See [`apps/api/src/store/reconcile.ts`](../../apps/api/src/store/reconcile.ts). +- The pod's data volume is `emptyDir` — re-cloned on every pod start, updated incrementally via `git fetch` within a pod's lifetime. + +Local dev mirrors this: the sibling clone the app reads is bare. Contributors who want a working tree to browse/edit by hand clone *from* the bare local into a second working directory and push/pull between them. ### Dev-environment data @@ -72,12 +85,19 @@ A `scripts/scrub-data.ts` in the code repo produces the snapshot. The contributo ```bash git clone https://github.com/CodeForPhilly/codeforphilly-rewrite.git -git clone https://github.com/CodeForPhilly/codeforphilly-data-snapshot.git ../codeforphilly-data +git clone --bare https://github.com/CodeForPhilly/codeforphilly-data-snapshot.git ../codeforphilly-data npm install npm run dev # api + web boot, data already there ``` -That's the "no moving pieces" win — a contributor sees real-shape data without ever touching a database, and can `git checkout`, `git reset`, or branch to experiment. +The data clone is `--bare` to match the runtime invariant above. Contributors who want a working tree for browsing/hand-editing records clone *again* from the local bare: + +```bash +git clone ../codeforphilly-data ../codeforphilly-data-wt +# edit in ../codeforphilly-data-wt, then push back to ../codeforphilly-data +``` + +That's the "no moving pieces" win — a contributor sees real-shape data without ever touching a database, and can `git checkout`, `git reset`, or branch to experiment in their working-tree clone without the app caring. ## Sheet layout @@ -100,7 +120,7 @@ Each entity lives in one sheet. The path template determines how records are sto ### Why these path shapes -The path template is the only "index" gitsheets provides natively. Choose it to support the _dominant_ access pattern; in-memory secondary indices handle reverse lookups. +The path template is the only "index" gitsheets provides natively. Choose it to support the *dominant* access pattern; in-memory secondary indices handle reverse lookups. - **Composite paths** (`${projectSlug}/${personSlug}.toml`) make "list everything for this parent" a single directory scan. Reverse lookups ("which projects is this person in?") use an in-memory index built at boot. - **Time-partitioned paths** keep directory size bounded for sheets that grow monotonically. None of the v1 sheets currently use time partitioning, but the pattern is reserved for future high-volume sheets (e.g., webhook ingestion logs).