From b09f2e2dc8daab22ed3a020827bb8bc605cb26fe Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Thu, 11 Jun 2026 10:17:26 -0500 Subject: [PATCH 01/12] Recycle prerender browsers when the host is redeployed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A prerender pool page holds the host bundle it warmed against for its lifetime, so after a host deploy a page can keep rendering against a stale bundle (and, in the incident this addresses, poison the public modules cache). This makes the prerender fleet drop its host when the host changes, coordinated through the manager. - The realm server reports the host-shell token it serves (an md5 of the fetched index.html) to the prerender manager at boot. A deploy restarts the realm server, so the new bundle is reported here. Best-effort; a missing/unreachable manager never blocks boot. - The manager stores the latest reported token and echoes it on every heartbeat response (new X-Boxel-Prerender-Host-Shell-Hash header). Storing the token (not a counter) keeps this robust across the manager's own restart in the deploy train — the next realm-server boot re-reports it. - Each prerender server records the token it warmed against and, when a heartbeat reports a different one, recycles its browser (Prerenderer.recycle() → closeAll + restart Chrome + re-warm, which also clears Chrome's HTTP cache so re-warmed pages load the new shell). The first token seen is adopted as a baseline without recycling. Ordering is structural: the realm server only reports once IT is serving the new shell (and realm-server restart is the deploy train's last step), so prerender pages that warmed against the old shell are recycled only after the new shell is actually being served. Tests: manager echoes/updates the reported token on heartbeats and requires a hash; unit tests for the server's recycle decision. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/realm-server/main.ts | 41 ++++++++++ .../realm-server/prerender/manager-app.ts | 49 ++++++++++++ .../realm-server/prerender/prerender-app.ts | 75 ++++++++++++++++++- .../prerender/prerender-constants.ts | 10 +++ .../realm-server/prerender/prerenderer.ts | 11 +++ packages/realm-server/tests/index.ts | 1 + .../prerender-host-shell-recycle-test.ts | 43 +++++++++++ .../tests/prerender-manager-test.ts | 54 +++++++++++++ 8 files changed, 283 insertions(+), 1 deletion(-) create mode 100644 packages/realm-server/tests/prerender-host-shell-recycle-test.ts diff --git a/packages/realm-server/main.ts b/packages/realm-server/main.ts index b8db76c556..2d4e635b34 100644 --- a/packages/realm-server/main.ts +++ b/packages/realm-server/main.ts @@ -22,6 +22,7 @@ import { MatrixClient } from '@cardstack/runtime-common/matrix-client'; import 'decorator-transforms/globals'; import { createRemotePrerenderer } from './prerender/remote-prerenderer.ts'; +import { resolvePrerenderManagerURL } from './prerender/config.ts'; import { buildCreatePrerenderAuth } from './prerender/auth.ts'; import { isEnvironmentMode, @@ -363,6 +364,43 @@ const smokeTestHostApp = async () => { throw lastError ?? new Error('host app smoke test timed out'); }; +// Report the host-shell token this realm server is serving to the prerender +// manager. The manager echoes it on heartbeat responses so prerender servers +// recycle their browsers when it changes — i.e. when a deploy ships a new +// host bundle. Runs at boot (after the smoke test confirmed the shell is +// reachable): a deploy restarts this process, so a new bundle is reported +// here and picked up by the prerender fleet. Best-effort — a missing or +// unreachable manager must never block realm-server boot. +const reportHostShellToManager = async () => { + try { + let html = await getIndexHTML(); + let { createHash } = await import('crypto'); + let hash = createHash('md5').update(html).digest('hex').slice(0, 8); + let managerURL = resolvePrerenderManagerURL(); + let response = await fetch(`${managerURL}/host-shell`, { + method: 'POST', + headers: { + 'Content-Type': 'application/vnd.api+json', + Accept: 'application/vnd.api+json', + }, + body: JSON.stringify({ data: { attributes: { hash } } }), + }); + if (response.ok) { + console.log( + `Reported host shell token ${hash} to prerender manager at ${managerURL}`, + ); + } else { + console.warn( + `Prerender manager rejected host shell report: ${response.status}`, + ); + } + } catch (e: any) { + console.warn( + `Failed to report host shell token to prerender manager: ${e?.message ?? e}`, + ); + } +}; + (async () => { try { await smokeTestHostApp(); @@ -375,6 +413,9 @@ const smokeTestHostApp = async () => { console.error(`Unable to fetch from host app URL ${distURL}: ${detail}`); process.exit(-2); } + // Fire-and-forget: tell the prerender manager which host shell we're + // serving so the prerender fleet recycles after a host redeploy. + void reportHostShellToManager(); let realms: Realm[] = []; let dbAdapter = new PgAdapter({ autoMigrate }); let queue = new PgQueuePublisher(dbAdapter); diff --git a/packages/realm-server/prerender/manager-app.ts b/packages/realm-server/prerender/manager-app.ts index e3c2f41dcc..b8b2d4184e 100644 --- a/packages/realm-server/prerender/manager-app.ts +++ b/packages/realm-server/prerender/manager-app.ts @@ -8,6 +8,7 @@ import { import { format } from 'date-fns'; import { PRERENDER_JOB_ID_HEADER, + PRERENDER_HOST_SHELL_HASH_HEADER, PRERENDER_REQUEST_ID_HEADER, PRERENDER_SERVER_DRAINING_STATUS_CODE, PRERENDER_SERVER_STATUS_DRAINING, @@ -64,6 +65,11 @@ type Registry = { servers: Map; // key: serverUrl affinities: Map; // affinityKey (:) -> assigned serverUrls (deque semantics) lastAccessByAffinity: Map; + // Latest host-shell token reported by a realm server (POST /host-shell). + // Echoed to prerender servers on every heartbeat response so they recycle + // when it changes (host redeployed). Undefined until first reported; reset + // on manager restart, re-learned from the next realm-server boot report. + hostShellHash?: string; }; const log = logger('prerender-manager'); @@ -507,6 +513,11 @@ export function buildPrerenderManagerApp(options?: { warmedAffinities, affinityVacancy, }); + // Echo the current host-shell token so the server can recycle its + // browser when the host is redeployed (see PRERENDER_HOST_SHELL_HASH_HEADER). + if (registry.hostShellHash) { + ctxt.set(PRERENDER_HOST_SHELL_HASH_HEADER, registry.hostShellHash); + } ctxt.status = 204; ctxt.set('X-Prerender-Server-Id', url); } catch (e) { @@ -516,6 +527,44 @@ export function buildPrerenderManagerApp(options?: { } }); + // The realm server reports the host-shell token it is currently serving + // (POST at boot, after it has fetched the new shell). A change means the + // host was redeployed; prerender servers pick it up on their next heartbeat + // and recycle. Storing the latest token (rather than counting) keeps this + // robust across the manager's own restart in the deploy train — the next + // realm-server boot re-reports the current token. + router.post('/host-shell', async (ctxt) => { + try { + let req = await fetchRequestFromContext(ctxt); + let raw = await req.text(); + let requestBody: any = {}; + if (raw) { + try { + requestBody = JSON.parse(raw); + } catch (e) { + log.debug('Invalid JSON body on /host-shell; treating as empty:', e); + } + } + let hash = requestBody?.data?.attributes?.hash; + if (typeof hash !== 'string' || hash.trim().length === 0) { + ctxt.status = 400; + ctxt.body = { errors: [{ status: 400, message: 'hash is required' }] }; + return; + } + if (registry.hostShellHash !== hash) { + log.info( + `host shell token changed (${registry.hostShellHash ?? 'none'} -> ${hash}); prerender servers will recycle on next heartbeat`, + ); + registry.hostShellHash = hash; + } + ctxt.status = 204; + } catch (e) { + log.error('Error in /host-shell:', e); + ctxt.status = 500; + ctxt.body = { errors: [{ status: 500, message: 'host-shell error' }] }; + } + }); + // maintenance: clear affinity assignments and capacity tracking router.post('/prerender-maintenance/reset', async (ctxt) => { for (let [, info] of registry.servers) { diff --git a/packages/realm-server/prerender/prerender-app.ts b/packages/realm-server/prerender/prerender-app.ts index 2445d268e1..ec85d5492a 100644 --- a/packages/realm-server/prerender/prerender-app.ts +++ b/packages/realm-server/prerender/prerender-app.ts @@ -22,6 +22,7 @@ import { Prerenderer } from './index.ts'; import type { Timings } from './render-runner.ts'; import { resolvePrerenderManagerURL } from './config.ts'; import { + PRERENDER_HOST_SHELL_HASH_HEADER, PRERENDER_JOB_ID_HEADER, PRERENDER_REQUEST_ID_HEADER, PRERENDER_SERVER_DRAINING_STATUS_CODE, @@ -60,6 +61,30 @@ export function decorateRenderErrorDiagnostics( }; } +// Pure decision for the host-shell recycle reconcile. Given the token the +// manager last reported (`reported`, null when it doesn't know one yet) and +// the token this server warmed against (`warmed`, undefined before the first +// report), decide whether to recycle and what the baseline token becomes: +// - no report → keep the current baseline, don't recycle +// - first report seen → adopt it as the baseline, don't recycle (we just +// warmed against whatever shell is current) +// - same as baseline → no-op +// - differs from baseline → recycle and advance the baseline +// Exported for unit testing; the live caller layers the draining / in-flight +// guards and the async recycle on top. +export function decideHostShellRecycle( + reported: string | null, + warmed: string | undefined, +): { recycle: boolean; nextWarmed: string | undefined } { + if (!reported) { + return { recycle: false, nextWarmed: warmed }; + } + if (warmed === undefined || warmed === reported) { + return { recycle: false, nextWarmed: reported }; + } + return { recycle: true, nextWarmed: reported }; +} + export function buildPrerenderApp(options: { serverURL: string; maxPages?: number; @@ -1021,6 +1046,13 @@ export function createPrerenderHttpServer(options?: { let drainingResolved = false; let drainingDeferred = new Deferred(); let heartbeatTimer: NodeJS.Timeout | undefined; + // Host-shell token the standbys were last warmed against, learned from the + // manager's heartbeat responses (PRERENDER_HOST_SHELL_HASH_HEADER). When the + // manager reports a different token — the host was redeployed and the realm + // server is now serving a new shell — the browser is recycled so pages + // reload it. Undefined until the first heartbeat that carries a token. + let warmedHostShellHash: string | undefined; + let recyclingForHostChange = false; let isClosing = false; let fatalExitOnUncaught = options?.fatalExitOnUncaught ?? true; let serverURL = resolvePrerenderServerURL(options?.port); @@ -1079,7 +1111,7 @@ export function createPrerenderHttpServer(options?: { log.debug( `POST heartbeat to ${managerURL}/prerender-servers with body:\n${JSON.stringify(body, null, 2)}`, ); - await fetch(`${managerURL}/prerender-servers`, { + let response = await fetch(`${managerURL}/prerender-servers`, { method: 'POST', headers: { 'Content-Type': 'application/vnd.api+json', @@ -1088,13 +1120,54 @@ export function createPrerenderHttpServer(options?: { body: JSON.stringify(body), }).catch((e) => { log.debug('Prerender manager heartbeat request failed:', e); + return undefined; }); + if (response) { + reconcileHostShell( + response.headers.get(PRERENDER_HOST_SHELL_HASH_HEADER), + ); + } } catch (e) { // best-effort, but log for visibility log.debug('Error while attempting heartbeat with prerender manager:', e); } } + // Compare the manager's current host-shell token against the one we warmed + // against. A change means the host was redeployed, so recycle the browser + // (fire-and-forget; the heartbeat itself must not block on the restart). + function reconcileHostShell(hash: string | null) { + if (draining || recyclingForHostChange) { + return; + } + let { recycle, nextWarmed } = decideHostShellRecycle( + hash, + warmedHostShellHash, + ); + if (!recycle) { + // Either nothing reported, or we adopted a baseline / matched — record + // the (possibly newly-adopted) token and we're done. + warmedHostShellHash = nextWarmed; + return; + } + recyclingForHostChange = true; + log.info( + `host shell changed (${warmedHostShellHash} -> ${hash}); recycling prerender browser`, + ); + void prerenderer + .recycle() + .then(() => { + warmedHostShellHash = nextWarmed; + }) + .catch((e) => { + // Leave warmedHostShellHash unchanged so the next heartbeat retries. + log.error('Failed to recycle prerender browser on host change:', e); + }) + .finally(() => { + recyclingForHostChange = false; + }); + } + function startHeartbeatLoop() { if (heartbeatTimer) return; void sendHeartbeat(); diff --git a/packages/realm-server/prerender/prerender-constants.ts b/packages/realm-server/prerender/prerender-constants.ts index 7ec8005b19..31afbd97e9 100644 --- a/packages/realm-server/prerender/prerender-constants.ts +++ b/packages/realm-server/prerender/prerender-constants.ts @@ -2,6 +2,16 @@ export const PRERENDER_SERVER_STATUS_HEADER = 'X-Boxel-Prerender-Server-Status'; export const PRERENDER_SERVER_STATUS_DRAINING = 'draining'; export const PRERENDER_SERVER_DRAINING_STATUS_CODE = 410; +// Opaque token for the current host shell (the realm server's rewritten +// index.html). The realm server reports it to the manager at boot +// (POST /host-shell); the manager echoes the latest value on every +// heartbeat response via this header, and a prerender server recycles its +// browser when the value differs from the shell it last warmed against — +// i.e. the host was redeployed. The token only has to change when the host +// bundle changes; prerender servers treat it opaquely. +export const PRERENDER_HOST_SHELL_HASH_HEADER = + 'X-Boxel-Prerender-Host-Shell-Hash'; + // CS-10872: correlates one client-initiated prerender call across // remote-prerenderer → manager → prerender-server. The client assigns // the ID on the first request; the manager and prerender-server echo diff --git a/packages/realm-server/prerender/prerenderer.ts b/packages/realm-server/prerender/prerenderer.ts index 65059b09d0..2481b6974a 100644 --- a/packages/realm-server/prerender/prerenderer.ts +++ b/packages/realm-server/prerender/prerenderer.ts @@ -209,6 +209,17 @@ export class Prerenderer { await this.#pagePool.warmStandbys(); } + // Recycle the whole browser to pick up a redeployed host. Reuses the + // failure-recovery restart path (closeAll → restart Chrome → re-warm + // standbys); the full browser restart also clears Chrome's HTTP cache, so + // re-warmed pages reload the current host shell rather than a cached stale + // bundle. Coalesced via the same in-flight guard as #restartBrowser, so + // overlapping recycle signals collapse to one restart. + async recycle(): Promise { + log.info('Recycling prerender browser to pick up a redeployed host'); + await this.#restartBrowser(); + } + // Emit the `render cancelled` log line (format from CS-10872) // and, on a `rendering`-state cancel, tear down the affinity so // the next request gets a fresh tab rather than one whose diff --git a/packages/realm-server/tests/index.ts b/packages/realm-server/tests/index.ts index d29ea647f4..6165013e57 100644 --- a/packages/realm-server/tests/index.ts +++ b/packages/realm-server/tests/index.ts @@ -190,6 +190,7 @@ const ALL_TEST_FILES: string[] = [ './prerendering-test', './prerender-server-test', './prerender-manager-test', + './prerender-host-shell-recycle-test', './prerender-artifact-sink-test', './prerender-affinity-activity-test', './prerender-batch-ownership-test', diff --git a/packages/realm-server/tests/prerender-host-shell-recycle-test.ts b/packages/realm-server/tests/prerender-host-shell-recycle-test.ts new file mode 100644 index 0000000000..497fae4495 --- /dev/null +++ b/packages/realm-server/tests/prerender-host-shell-recycle-test.ts @@ -0,0 +1,43 @@ +import { module, test } from 'qunit'; +import { basename } from 'path'; +import { decideHostShellRecycle } from '../prerender/prerender-app.ts'; + +// Unit tests for the host-shell recycle decision a prerender server makes on +// every heartbeat: the manager echoes the current host-shell token, and the +// server recycles its browser when that token differs from the one it warmed +// against (the host was redeployed). See PRERENDER_HOST_SHELL_HASH_HEADER. +module(basename(__filename), function () { + module('decideHostShellRecycle', function () { + test('no token reported yet → no recycle, baseline unchanged', function (assert) { + assert.deepEqual(decideHostShellRecycle(null, undefined), { + recycle: false, + nextWarmed: undefined, + }); + assert.deepEqual(decideHostShellRecycle(null, 'aaa'), { + recycle: false, + nextWarmed: 'aaa', + }); + }); + + test('first token seen → adopt as baseline, no recycle', function (assert) { + assert.deepEqual(decideHostShellRecycle('aaa', undefined), { + recycle: false, + nextWarmed: 'aaa', + }); + }); + + test('token matches baseline → no-op', function (assert) { + assert.deepEqual(decideHostShellRecycle('aaa', 'aaa'), { + recycle: false, + nextWarmed: 'aaa', + }); + }); + + test('token differs from baseline → recycle and advance baseline', function (assert) { + assert.deepEqual(decideHostShellRecycle('bbb', 'aaa'), { + recycle: true, + nextWarmed: 'bbb', + }); + }); + }); +}); diff --git a/packages/realm-server/tests/prerender-manager-test.ts b/packages/realm-server/tests/prerender-manager-test.ts index abb6b5fb46..02a20af85f 100644 --- a/packages/realm-server/tests/prerender-manager-test.ts +++ b/packages/realm-server/tests/prerender-manager-test.ts @@ -8,6 +8,7 @@ import type { RealmHttpServer as Server } from '../server.ts'; import http, { createServer } from 'http'; import { buildPrerenderManagerApp } from '../prerender/manager-app.ts'; import { + PRERENDER_HOST_SHELL_HASH_HEADER, PRERENDER_SERVER_DRAINING_STATUS_CODE, PRERENDER_SERVER_STATUS_DRAINING, PRERENDER_SERVER_STATUS_HEADER, @@ -97,6 +98,59 @@ module(basename(__filename), function () { ); }); + test('reports the host shell token and echoes it on heartbeats', async function (assert) { + let { app } = buildPrerenderManagerApp(); + let request: SuperTest = supertest(app.callback()); + let headerKey = PRERENDER_HOST_SHELL_HASH_HEADER.toLowerCase(); + let heartbeat = () => + request.post('/prerender-servers').send({ + data: { + type: 'prerender-server', + attributes: { capacity: 2, url: serverUrlA }, + }, + }); + + // No token reported yet → heartbeat carries no host-shell header. + let first = await heartbeat(); + assert.strictEqual(first.status, 204, 'heartbeat accepted'); + assert.strictEqual( + first.headers[headerKey], + undefined, + 'no host-shell header before any report', + ); + + // Realm server reports a token. + let reportA = await request + .post('/host-shell') + .send({ data: { attributes: { hash: 'aaa111' } } }); + assert.strictEqual(reportA.status, 204, 'host-shell report accepted'); + + // Now heartbeats echo it. + let second = await heartbeat(); + assert.strictEqual( + second.headers[headerKey], + 'aaa111', + 'heartbeat echoes the reported host-shell token', + ); + + // A changed token is echoed; a repeat of the same token is a no-op. + await request + .post('/host-shell') + .send({ data: { attributes: { hash: 'bbb222' } } }); + let third = await heartbeat(); + assert.strictEqual( + third.headers[headerKey], + 'bbb222', + 'heartbeat echoes the updated host-shell token', + ); + + // A missing hash is rejected. + let bad = await request + .post('/host-shell') + .send({ data: { attributes: {} } }); + assert.strictEqual(bad.status, 400, 'host-shell report requires a hash'); + }); + test('health includes active servers with affinities and last used times', async function (assert) { process.env.PRERENDER_MULTIPLEX = '2'; let { app } = buildPrerenderManagerApp(); From ce0ab7d7dc98f08c9a360f78dcebe523236dda57 Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Thu, 11 Jun 2026 12:01:37 -0500 Subject: [PATCH 02/12] Recycle the prerender fleet at the end of a deploy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prerender service deploys before the realm server (the manager, worker, and realm server all depend on the prerender fleet being up for boot indexing), so its tabs warm against the host shell the realm server was serving at that earlier point — the old bundle. Add a final recycle-prerender job, gated on post-deploy-realm-server, that re-deploys the prerender service once the realm server is up serving the new shell. The reusable ecs-deploy workflow always passes force-new-deployment, so this rolls fresh tasks (which re-warm against the new shell) even though the prerender image is unchanged. A deploy-side safety net that complements the in-process recycle (the prerender server recycles when the manager reports a new host-shell token); this covers the common full-deploy case without depending on the heartbeat round-trip. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/manual-deploy.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/manual-deploy.yml b/.github/workflows/manual-deploy.yml index 01bac788ad..12c260d59e 100644 --- a/.github/workflows/manual-deploy.yml +++ b/.github/workflows/manual-deploy.yml @@ -335,6 +335,27 @@ jobs: exit 1 fi + recycle-prerender: + name: Recycle prerender after host is live + # The prerender fleet deploys before the realm server (the manager, + # worker, and realm server all depend on it being up for boot indexing), + # so its tabs warm against the OLD host shell the realm server was still + # serving at that point. Once the realm server is up serving the new + # shell (this `needs`), re-deploy the prerender service so its tabs + # re-warm against it. The reusable deploy passes force-new-deployment, so + # this recycles fresh tasks even though the image is unchanged. + needs: [build-prerender, post-deploy-realm-server] + uses: cardstack/gh-actions/.github/workflows/ecs-deploy.yml@main + secrets: inherit + with: + container-name: "boxel-prerender-server" + environment: ${{ inputs.environment }} + cluster: ${{ inputs.environment }} + service-name: "boxel-prerender-server-${{ inputs.environment }}" + image: ${{ needs.build-prerender.outputs.image }} + timeout-minutes: 10 + wait-for-service-stability: true + apply-observability: # Push the observability/ package's dashboards/folders/data sources/alerts # into the production self-host Grafana as part of the deploy. The @@ -375,6 +396,7 @@ jobs: post-deploy-worker, deploy-realm-server, post-deploy-realm-server, + recycle-prerender, apply-observability, ] if: github.event_name == 'workflow_dispatch' && always() From 349a8f0828eaca99b90d8287a55bb062e0622c6a Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Thu, 11 Jun 2026 14:49:04 -0500 Subject: [PATCH 03/12] Add arbitrary host change --- packages/host/app/components/submode-switcher.gts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/host/app/components/submode-switcher.gts b/packages/host/app/components/submode-switcher.gts index f68a2a7408..821c54575c 100644 --- a/packages/host/app/components/submode-switcher.gts +++ b/packages/host/app/components/submode-switcher.gts @@ -230,7 +230,7 @@ export default class SubmodeSwitcher extends Component { } get appVersion() { - return `Version ${config.APP.version}`; + return `xVersion ${config.APP.version}`; } get buildMenuItems(): MenuItem[] { From 6ef8702c164593c159727bfde42b3d4fdd355e4e Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Thu, 11 Jun 2026 15:34:34 -0500 Subject: [PATCH 04/12] Gate the prerender recycle on the realm-server deploy, not the post-deploy hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit recycle-prerender was gated on post-deploy-realm-server, which only POSTs the realm server's /_post-deployment endpoint and can fail on its own (observed: an edge 403). That skipped the recycle. Gate on deploy-realm-server instead — it waits for service stability, so the new realm server is up and serving the new shell by then, and the recycle no longer depends on the flaky post-deploy endpoint call. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/manual-deploy.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/manual-deploy.yml b/.github/workflows/manual-deploy.yml index 12c260d59e..375229825d 100644 --- a/.github/workflows/manual-deploy.yml +++ b/.github/workflows/manual-deploy.yml @@ -341,10 +341,16 @@ jobs: # worker, and realm server all depend on it being up for boot indexing), # so its tabs warm against the OLD host shell the realm server was still # serving at that point. Once the realm server is up serving the new - # shell (this `needs`), re-deploy the prerender service so its tabs - # re-warm against it. The reusable deploy passes force-new-deployment, so - # this recycles fresh tasks even though the image is unchanged. - needs: [build-prerender, post-deploy-realm-server] + # shell, re-deploy the prerender service so its tabs re-warm against it. + # The reusable deploy passes force-new-deployment, so this recycles fresh + # tasks even though the image is unchanged. + # + # Gate on `deploy-realm-server` (wait-for-service-stability: true → the + # new realm server is up and serving the new shell), NOT on + # `post-deploy-realm-server`: that job only POSTs the realm server's + # `/_post-deployment` endpoint and can fail independently (e.g. an edge + # 403), which would otherwise skip this recycle. + needs: [build-prerender, deploy-realm-server] uses: cardstack/gh-actions/.github/workflows/ecs-deploy.yml@main secrets: inherit with: From 7c07d52722b6e6bf2ea0c03cff975bf34468170e Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Thu, 11 Jun 2026 15:42:52 -0500 Subject: [PATCH 05/12] Add another arbitrary change --- packages/host/app/components/submode-switcher.gts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/host/app/components/submode-switcher.gts b/packages/host/app/components/submode-switcher.gts index 821c54575c..5c02590551 100644 --- a/packages/host/app/components/submode-switcher.gts +++ b/packages/host/app/components/submode-switcher.gts @@ -230,7 +230,7 @@ export default class SubmodeSwitcher extends Component { } get appVersion() { - return `xVersion ${config.APP.version}`; + return `yVersion ${config.APP.version}`; } get buildMenuItems(): MenuItem[] { From 5b4bc25053dfcc83ee6b5056200cd24f55b68d3b Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Fri, 12 Jun 2026 10:09:22 -0500 Subject: [PATCH 06/12] Remove outdated 403 reference --- .github/workflows/manual-deploy.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/manual-deploy.yml b/.github/workflows/manual-deploy.yml index 375229825d..ac312f3317 100644 --- a/.github/workflows/manual-deploy.yml +++ b/.github/workflows/manual-deploy.yml @@ -347,9 +347,11 @@ jobs: # # Gate on `deploy-realm-server` (wait-for-service-stability: true → the # new realm server is up and serving the new shell), NOT on - # `post-deploy-realm-server`: that job only POSTs the realm server's - # `/_post-deployment` endpoint and can fail independently (e.g. an edge - # 403), which would otherwise skip this recycle. + # `post-deploy-realm-server`: that job is a separate post-deploy hook + # (it POSTs the realm server's `/_post-deployment` endpoint) and can fail + # independently of the recycle's only real precondition — the new shell + # being served. Coupling to it would skip this recycle on an unrelated + # hook failure. needs: [build-prerender, deploy-realm-server] uses: cardstack/gh-actions/.github/workflows/ecs-deploy.yml@main secrets: inherit From 2dcb88baf2fd3c854767a945663a8a9b0605dfe8 Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Fri, 12 Jun 2026 11:51:02 -0500 Subject: [PATCH 07/12] Remove nonsense change --- packages/host/app/components/submode-switcher.gts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/host/app/components/submode-switcher.gts b/packages/host/app/components/submode-switcher.gts index 5c02590551..f68a2a7408 100644 --- a/packages/host/app/components/submode-switcher.gts +++ b/packages/host/app/components/submode-switcher.gts @@ -230,7 +230,7 @@ export default class SubmodeSwitcher extends Component { } get appVersion() { - return `yVersion ${config.APP.version}`; + return `Version ${config.APP.version}`; } get buildMenuItems(): MenuItem[] { From 96f661e4e593d592ba83ffc4e512f930265c9fdf Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Mon, 15 Jun 2026 07:39:41 -0500 Subject: [PATCH 08/12] Normalize and bound /host-shell token before storing Trim the reported host-shell token and reject values over 64 chars before storing it. The token is echoed into a response header on every heartbeat, so a whitespace variant would spuriously read as a change and an oversized value would bloat every heartbeat response. Co-Authored-By: Claude Opus 4.8 --- packages/realm-server/prerender/manager-app.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packages/realm-server/prerender/manager-app.ts b/packages/realm-server/prerender/manager-app.ts index b8b2d4184e..313b0f7ab4 100644 --- a/packages/realm-server/prerender/manager-app.ts +++ b/packages/realm-server/prerender/manager-app.ts @@ -551,6 +551,18 @@ export function buildPrerenderManagerApp(options?: { ctxt.body = { errors: [{ status: 400, message: 'hash is required' }] }; return; } + // Normalize and bound the token before storing it: it is echoed into a + // response header on every heartbeat, so a stray-whitespace variant would + // spuriously read as a change, and an oversized value would bloat every + // heartbeat response. The real token is a short hex digest, so anything + // longer is malformed — reject rather than silently truncate (a truncated + // token would never match and would recycle forever). + hash = hash.trim(); + if (hash.length > 64) { + ctxt.status = 400; + ctxt.body = { errors: [{ status: 400, message: 'hash too long' }] }; + return; + } if (registry.hostShellHash !== hash) { log.info( `host shell token changed (${registry.hostShellHash ?? 'none'} -> ${hash}); prerender servers will recycle on next heartbeat`, From e6b388116298a6e6e1345570b95b1e6b1de43c8f Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Mon, 15 Jun 2026 08:31:21 -0500 Subject: [PATCH 09/12] Seed prerender warm baseline from the host shell it warmed against A prerender server learned its host-shell baseline from the first heartbeat that carried a token, treating `warmed === undefined` as "just warmed against the current shell." When a server warmed against an old shell but its first token-carrying heartbeat reported a newer one, it adopted the new token without recycling and stayed pinned to the stale shell. Seed the baseline at startup from the realm server the prerender loads its shell from: the realm server now serves its current host-shell token at GET /_host-shell-hash (the same value it reports to the manager, computed via a shared helper), and the prerender fetches it before the first heartbeat. The baseline is then the token actually warmed against, so a later change correctly recycles. Best-effort: if the seed fails the server falls back to adopting the first reported token. Co-Authored-By: Claude Opus 4.8 --- packages/realm-server/handlers/serve-index.ts | 31 ++++++++++++- packages/realm-server/main.ts | 5 +- .../realm-server/prerender/prerender-app.ts | 46 +++++++++++++++++-- .../prerender/prerender-constants.ts | 10 ++++ .../realm-server/prerender/prerenderer.ts | 2 +- packages/realm-server/routes.ts | 14 ++++++ packages/realm-server/server.ts | 3 +- .../realm-server/tests/serve-index-test.ts | 41 +++++++++++++++++ 8 files changed, 143 insertions(+), 9 deletions(-) diff --git a/packages/realm-server/handlers/serve-index.ts b/packages/realm-server/handlers/serve-index.ts index 719effaf05..d72fbd342c 100644 --- a/packages/realm-server/handlers/serve-index.ts +++ b/packages/realm-server/handlers/serve-index.ts @@ -20,6 +20,7 @@ import { } from '../lib/index-html-injection.ts'; import { retrieveScopedCSS } from '../lib/retrieve-scoped-css.ts'; import { fullRequestURL } from '../middleware/index.ts'; +import { computeHostShellHash } from '../prerender/prerender-constants.ts'; import { findOrMountRealm, getPublishedRealmInfo, @@ -48,6 +49,10 @@ export type ServeIndexHandlers = { // isolation. Same closure backs `serveIndex` / `serveHostApp` so the // production cache behaviour is preserved. retrieveIndexHTML: () => Promise; + // The current host-shell token (md5 of the host index HTML), served from + // `GET /_host-shell-hash` and matching what the realm server reports to the + // prerender manager. A prerender server seeds its warm baseline from this. + getHostShellHash: () => Promise; }; const log = logger('realm-server'); @@ -181,6 +186,30 @@ export function createServeIndex(deps: ServeIndexDeps): ServeIndexHandlers { return work; } + // Token for the host shell this server is serving. Computed from the raw + // host index HTML (not the rewritten body) so it matches the value the realm + // server reports to the prerender manager — the prerender fleet compares the + // two to detect a host redeploy. Promise-memoized for the process lifetime + // like the shell itself (so concurrent callers share one getIndexHTML call); + // recomputed in dev where the shell isn't cached. + let promiseForHostShellHash: Promise | undefined; + async function getHostShellHash(): Promise { + let isDev = assetsURL.hostname === 'localhost'; + if (!isDev && promiseForHostShellHash) { + return promiseForHostShellHash; + } + let work = (async () => computeHostShellHash(await getIndexHTML()))(); + if (!isDev) { + promiseForHostShellHash = work; + // On failure clear the cache so the next call retries instead of + // awaiting a permanently-rejected promise. + work.catch(() => { + promiseForHostShellHash = undefined; + }); + } + return work; + } + function defaultIconLinks(): string[] { let faviconURL = new URL('boxel-favicon.png', assetsURL).href; let webclipURL = new URL('boxel-webclip.png', assetsURL).href; @@ -523,7 +552,7 @@ export function createServeIndex(deps: ServeIndexDeps): ServeIndexHandlers { ); }; - return { serveIndex, serveHostApp, retrieveIndexHTML }; + return { serveIndex, serveHostApp, retrieveIndexHTML, getHostShellHash }; } function truncateLogLines(value: string, maxLines = 3): string { diff --git a/packages/realm-server/main.ts b/packages/realm-server/main.ts index 2d4e635b34..776bb0e8e0 100644 --- a/packages/realm-server/main.ts +++ b/packages/realm-server/main.ts @@ -23,6 +23,7 @@ import { MatrixClient } from '@cardstack/runtime-common/matrix-client'; import 'decorator-transforms/globals'; import { createRemotePrerenderer } from './prerender/remote-prerenderer.ts'; import { resolvePrerenderManagerURL } from './prerender/config.ts'; +import { computeHostShellHash } from './prerender/prerender-constants.ts'; import { buildCreatePrerenderAuth } from './prerender/auth.ts'; import { isEnvironmentMode, @@ -373,9 +374,7 @@ const smokeTestHostApp = async () => { // unreachable manager must never block realm-server boot. const reportHostShellToManager = async () => { try { - let html = await getIndexHTML(); - let { createHash } = await import('crypto'); - let hash = createHash('md5').update(html).digest('hex').slice(0, 8); + let hash = await computeHostShellHash(await getIndexHTML()); let managerURL = resolvePrerenderManagerURL(); let response = await fetch(`${managerURL}/host-shell`, { method: 'POST', diff --git a/packages/realm-server/prerender/prerender-app.ts b/packages/realm-server/prerender/prerender-app.ts index ec85d5492a..67db7dcc83 100644 --- a/packages/realm-server/prerender/prerender-app.ts +++ b/packages/realm-server/prerender/prerender-app.ts @@ -19,6 +19,7 @@ import { fetchRequestFromContext, } from '../middleware/index.ts'; import { Prerenderer } from './index.ts'; +import { boxelHostURL } from './prerenderer.ts'; import type { Timings } from './render-runner.ts'; import { resolvePrerenderManagerURL } from './config.ts'; import { @@ -1046,11 +1047,14 @@ export function createPrerenderHttpServer(options?: { let drainingResolved = false; let drainingDeferred = new Deferred(); let heartbeatTimer: NodeJS.Timeout | undefined; - // Host-shell token the standbys were last warmed against, learned from the + // Host-shell token the standbys were last warmed against. Seeded at startup + // from the realm server we load the shell from (GET /_host-shell-hash) so it + // reflects the shell we actually warmed against, then kept current from the // manager's heartbeat responses (PRERENDER_HOST_SHELL_HASH_HEADER). When the // manager reports a different token — the host was redeployed and the realm // server is now serving a new shell — the browser is recycled so pages - // reload it. Undefined until the first heartbeat that carries a token. + // reload it. Undefined only if the startup seed failed, in which case the + // first heartbeat that carries a token adopts it as the baseline. let warmedHostShellHash: string | undefined; let recyclingForHostChange = false; let isClosing = false; @@ -1168,6 +1172,39 @@ export function createPrerenderHttpServer(options?: { }); } + // Seed the warm baseline from the realm server we warm against, so the first + // heartbeat compares against the shell we actually loaded rather than + // adopting whatever token the manager happens to report. This closes the gap + // where a server warmed against an old shell, but whose first token-carrying + // heartbeat reports a newer shell, would adopt the new token without + // recycling and stay pinned to the stale shell. Best-effort and time-bounded: + // on any failure we leave the baseline unset and fall back to adopting the + // first reported token. + async function seedWarmedHostShellHash() { + let controller = new AbortController(); + let timer = setTimeout(() => controller.abort(), 5000); + try { + let response = await fetch(`${boxelHostURL}/_host-shell-hash`, { + headers: { Accept: 'application/vnd.api+json' }, + signal: controller.signal, + }); + if (!response.ok) { + log.debug(`host shell token seed skipped: HTTP ${response.status}`); + return; + } + let body: any = await response.json(); + let hash = body?.data?.attributes?.hash; + if (typeof hash === 'string' && hash.trim().length > 0) { + warmedHostShellHash = hash.trim(); + log.info(`seeded warmed host shell token ${warmedHostShellHash}`); + } + } catch (e) { + log.debug('Failed to seed warmed host shell token:', e as any); + } finally { + clearTimeout(timer); + } + } + function startHeartbeatLoop() { if (heartbeatTimer) return; void sendHeartbeat(); @@ -1209,7 +1246,10 @@ export function createPrerenderHttpServer(options?: { serverURL = actualURL; prerenderer.serverURL = actualURL; } - startHeartbeatLoop(); + // Seed the baseline before the first heartbeat so it can't adopt a + // newer-than-what-we-warmed-against token; then start heartbeating + // regardless of whether the seed succeeded. + void seedWarmedHostShellHash().finally(() => startHeartbeatLoop()); } catch (e) { log.debug('Error scheduling registration with prerender manager:', e); } diff --git a/packages/realm-server/prerender/prerender-constants.ts b/packages/realm-server/prerender/prerender-constants.ts index 31afbd97e9..25d3bf7fcf 100644 --- a/packages/realm-server/prerender/prerender-constants.ts +++ b/packages/realm-server/prerender/prerender-constants.ts @@ -12,6 +12,16 @@ export const PRERENDER_SERVER_DRAINING_STATUS_CODE = 410; export const PRERENDER_HOST_SHELL_HASH_HEADER = 'X-Boxel-Prerender-Host-Shell-Hash'; +// Single definition of the host-shell token so every producer agrees on it: +// the realm server reports it to the manager at boot, serves it from +// `GET /_host-shell-hash`, and a prerender server seeds its warm baseline from +// that endpoint. A short md5 digest of the host index HTML — it only has to +// change when the host bundle changes; callers treat it opaquely. +export async function computeHostShellHash(indexHTML: string): Promise { + let { createHash } = await import('crypto'); + return createHash('md5').update(indexHTML).digest('hex').slice(0, 8); +} + // CS-10872: correlates one client-initiated prerender call across // remote-prerenderer → manager → prerender-server. The client assigns // the ID on the first request; the manager and prerender-server echo diff --git a/packages/realm-server/prerender/prerenderer.ts b/packages/realm-server/prerender/prerenderer.ts index 2481b6974a..08fa035c0a 100644 --- a/packages/realm-server/prerender/prerenderer.ts +++ b/packages/realm-server/prerender/prerenderer.ts @@ -35,7 +35,7 @@ const log = logger('prerenderer'); const defaultHostURL = isEnvironmentMode() ? serviceURL('host') : 'https://localhost:4200'; -const boxelHostURL = process.env.BOXEL_HOST_URL ?? defaultHostURL; +export const boxelHostURL = process.env.BOXEL_HOST_URL ?? defaultHostURL; const DEFAULT_AFFINITY_IDLE_EVICT_MS = 12 * 60 * 60 * 1000; type PoolMeta = { diff --git a/packages/realm-server/routes.ts b/packages/realm-server/routes.ts index e319ee8f19..510cdc6d03 100644 --- a/packages/realm-server/routes.ts +++ b/packages/realm-server/routes.ts @@ -74,6 +74,7 @@ import handleWebhookReceiverRequest from './handlers/handle-webhook-receiver.ts' import handleRunCommand from './handlers/handle-run-command.ts'; import handleScreenshotCard from './handlers/handle-screenshot-card.ts'; import { buildCreatePrerenderAuth } from './prerender/auth.ts'; +import { PRERENDER_HOST_SHELL_HASH_HEADER } from './prerender/prerender-constants.ts'; import type { RealmRegistryReconciler } from './lib/realm-registry-reconciler.ts'; export type CreateRoutesArgs = { @@ -99,6 +100,7 @@ export type CreateRoutesArgs = { serveHostApp: (ctxt: Koa.Context, next: Koa.Next) => Promise; serveIndex: (ctxt: Koa.Context, next: Koa.Next) => Promise; serveFromRealm: (ctxt: Koa.Context, next: Koa.Next) => Promise; + getHostShellHash: () => Promise; sendEvent: ( user: string, eventType: string, @@ -143,6 +145,18 @@ export function createRoutes(args: CreateRoutesArgs) { args.serveFromRealm, ); router.get('/_standby', healthCheck, args.serveHostApp, args.serveFromRealm); + // The host-shell token this server is serving. A prerender server seeds its + // warm baseline from here at startup so it compares heartbeats against the + // shell it actually warmed against (the realm server it loads `/_standby` + // from is the authority), rather than adopting the first token the manager + // happens to report. The value matches what this server reports to the + // manager. Non-sensitive (a digest of public HTML), so no auth. + router.get('/_host-shell-hash', async (ctxt: Koa.Context) => { + let hash = await args.getHostShellHash(); + ctxt.set(PRERENDER_HOST_SHELL_HASH_HEADER, hash); + ctxt.body = { data: { type: 'host-shell', attributes: { hash } } }; + ctxt.status = 200; + }); router.post('/_server-session', handleCreateSessionRequest(args)); router.post( '/_create-realm', diff --git a/packages/realm-server/server.ts b/packages/realm-server/server.ts index 12ee2788c2..3a4fe24036 100644 --- a/packages/realm-server/server.ts +++ b/packages/realm-server/server.ts @@ -771,7 +771,7 @@ export class RealmServer { } private buildApp() { - let { serveIndex, serveHostApp } = createServeIndex({ + let { serveIndex, serveHostApp, getHostShellHash } = createServeIndex({ serverURL: this.serverURL, assetsURL: this.assetsURL, realms: this.realms, @@ -855,6 +855,7 @@ export class RealmServer { serveHostApp, serveIndex, serveFromRealm, + getHostShellHash, sendEvent, queue: this.queue, realms: this.realms, diff --git a/packages/realm-server/tests/serve-index-test.ts b/packages/realm-server/tests/serve-index-test.ts index 2dfecb6235..02c6339a07 100644 --- a/packages/realm-server/tests/serve-index-test.ts +++ b/packages/realm-server/tests/serve-index-test.ts @@ -2,6 +2,7 @@ import { module, test } from 'qunit'; import { basename } from 'path'; import { createServeIndex } from '../handlers/serve-index.ts'; +import { computeHostShellHash } from '../prerender/prerender-constants.ts'; function buildDeps(getIndexHTML: () => Promise) { return { @@ -113,4 +114,44 @@ module(basename(__filename), function () { assert.strictEqual(calls, 1, 'subsequent calls also reuse the cache'); assert.strictEqual(d, a, 'cached value is returned identically'); }); + + test('getHostShellHash digests the raw index HTML, matching the manager report', async function (assert) { + let raw = validIndexHTML(); + let { getHostShellHash, retrieveIndexHTML } = createServeIndex( + buildDeps(async () => raw), + ); + + let hash = await getHostShellHash(); + assert.strictEqual( + hash, + await computeHostShellHash(raw), + 'token is the digest of the raw getIndexHTML — the same value the realm server reports to the manager', + ); + + // The served shell is the rewritten HTML, which differs from the raw — the + // token must not be derived from it, or it would never match the manager's. + let rewritten = await retrieveIndexHTML(); + assert.notStrictEqual( + hash, + await computeHostShellHash(rewritten), + 'token is not derived from the rewritten shell', + ); + }); + + test('getHostShellHash is memoized — getIndexHTML runs once across calls', async function (assert) { + let calls = 0; + let { getHostShellHash } = createServeIndex( + buildDeps(async () => { + calls += 1; + return validIndexHTML(); + }), + ); + + let [a, b] = await Promise.all([getHostShellHash(), getHostShellHash()]); + let c = await getHostShellHash(); + + assert.strictEqual(a, b, 'concurrent callers receive the same token'); + assert.strictEqual(b, c, 'subsequent calls reuse the cached token'); + assert.strictEqual(calls, 1, 'getIndexHTML was only invoked once'); + }); }); From 3d17c2cb9346703f8c2027d055d894e8f08f53a8 Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Mon, 15 Jun 2026 10:50:15 -0500 Subject: [PATCH 10/12] Report host shell to the manager URL the realm server already uses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reportHostShellToManager resolved the manager URL via PRERENDER_MANAGER_URL, which is only set on the prerender-server tasks. On the realm server that env var is unset, so it fell back to localhost and the report never reached the manager (fetch failed at boot). The manager therefore never learned the new host-shell token and prerender servers never recycled via heartbeat. Use prerendererUrl — the manager address the realm server already sends prerender requests to — so there is a single source of truth. Co-Authored-By: Claude Opus 4.8 --- packages/realm-server/main.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/realm-server/main.ts b/packages/realm-server/main.ts index 776bb0e8e0..9bd128c4f9 100644 --- a/packages/realm-server/main.ts +++ b/packages/realm-server/main.ts @@ -22,7 +22,6 @@ import { MatrixClient } from '@cardstack/runtime-common/matrix-client'; import 'decorator-transforms/globals'; import { createRemotePrerenderer } from './prerender/remote-prerenderer.ts'; -import { resolvePrerenderManagerURL } from './prerender/config.ts'; import { computeHostShellHash } from './prerender/prerender-constants.ts'; import { buildCreatePrerenderAuth } from './prerender/auth.ts'; import { @@ -375,7 +374,13 @@ const smokeTestHostApp = async () => { const reportHostShellToManager = async () => { try { let hash = await computeHostShellHash(await getIndexHTML()); - let managerURL = resolvePrerenderManagerURL(); + // Report to the same prerender manager this realm server already sends + // prerender requests to. `prerendererUrl` is the manager's address (the + // manager fronts the pool). Resolving a separate PRERENDER_MANAGER_URL here + // is wrong on the realm-server side: that env var is only set on the + // prerender-server tasks, so on the realm server it fell back to localhost + // and the report never reached the manager. + let managerURL = prerendererUrl.replace(/\/$/, ''); let response = await fetch(`${managerURL}/host-shell`, { method: 'POST', headers: { From 67a1cffdde6b17a1db51ab98ff5c23d5f91a117e Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Mon, 15 Jun 2026 18:04:22 -0500 Subject: [PATCH 11/12] Revert seed of prerender warm baseline (kept the manager-URL fix) The seed's startup fetch to `${boxelHostURL}/_host-shell-hash` wedged the software-factory Playwright harness, where the prerender's boxelHostURL points at the compat proxy whose port is reserved/rebound during fixture setup; the extra startup connection stalled the compat-proxy bring-up past its 300s budget (every SF test failed). Confirmed by bisect: reverting only the seed makes SF pass in normal time. The seed addressed a narrow `warmed === undefined` edge in decideHostShellRecycle; reverting restores the safe adopt-first-token fallback. Base CS-11468 (heartbeat recycle, /host-shell hardening) and the manager-URL report fix are kept. This reverts the seed portion of e6b3881162; main.ts retains the prerendererUrl report target. Co-Authored-By: Claude Opus 4.8 --- packages/realm-server/handlers/serve-index.ts | 31 +------------ packages/realm-server/main.ts | 13 ++---- .../realm-server/prerender/prerender-app.ts | 46 ++----------------- .../prerender/prerender-constants.ts | 10 ---- .../realm-server/prerender/prerenderer.ts | 2 +- packages/realm-server/routes.ts | 14 ------ packages/realm-server/server.ts | 3 +- .../realm-server/tests/serve-index-test.ts | 41 ----------------- 8 files changed, 11 insertions(+), 149 deletions(-) diff --git a/packages/realm-server/handlers/serve-index.ts b/packages/realm-server/handlers/serve-index.ts index d72fbd342c..719effaf05 100644 --- a/packages/realm-server/handlers/serve-index.ts +++ b/packages/realm-server/handlers/serve-index.ts @@ -20,7 +20,6 @@ import { } from '../lib/index-html-injection.ts'; import { retrieveScopedCSS } from '../lib/retrieve-scoped-css.ts'; import { fullRequestURL } from '../middleware/index.ts'; -import { computeHostShellHash } from '../prerender/prerender-constants.ts'; import { findOrMountRealm, getPublishedRealmInfo, @@ -49,10 +48,6 @@ export type ServeIndexHandlers = { // isolation. Same closure backs `serveIndex` / `serveHostApp` so the // production cache behaviour is preserved. retrieveIndexHTML: () => Promise; - // The current host-shell token (md5 of the host index HTML), served from - // `GET /_host-shell-hash` and matching what the realm server reports to the - // prerender manager. A prerender server seeds its warm baseline from this. - getHostShellHash: () => Promise; }; const log = logger('realm-server'); @@ -186,30 +181,6 @@ export function createServeIndex(deps: ServeIndexDeps): ServeIndexHandlers { return work; } - // Token for the host shell this server is serving. Computed from the raw - // host index HTML (not the rewritten body) so it matches the value the realm - // server reports to the prerender manager — the prerender fleet compares the - // two to detect a host redeploy. Promise-memoized for the process lifetime - // like the shell itself (so concurrent callers share one getIndexHTML call); - // recomputed in dev where the shell isn't cached. - let promiseForHostShellHash: Promise | undefined; - async function getHostShellHash(): Promise { - let isDev = assetsURL.hostname === 'localhost'; - if (!isDev && promiseForHostShellHash) { - return promiseForHostShellHash; - } - let work = (async () => computeHostShellHash(await getIndexHTML()))(); - if (!isDev) { - promiseForHostShellHash = work; - // On failure clear the cache so the next call retries instead of - // awaiting a permanently-rejected promise. - work.catch(() => { - promiseForHostShellHash = undefined; - }); - } - return work; - } - function defaultIconLinks(): string[] { let faviconURL = new URL('boxel-favicon.png', assetsURL).href; let webclipURL = new URL('boxel-webclip.png', assetsURL).href; @@ -552,7 +523,7 @@ export function createServeIndex(deps: ServeIndexDeps): ServeIndexHandlers { ); }; - return { serveIndex, serveHostApp, retrieveIndexHTML, getHostShellHash }; + return { serveIndex, serveHostApp, retrieveIndexHTML }; } function truncateLogLines(value: string, maxLines = 3): string { diff --git a/packages/realm-server/main.ts b/packages/realm-server/main.ts index 9bd128c4f9..c610555ceb 100644 --- a/packages/realm-server/main.ts +++ b/packages/realm-server/main.ts @@ -22,7 +22,6 @@ import { MatrixClient } from '@cardstack/runtime-common/matrix-client'; import 'decorator-transforms/globals'; import { createRemotePrerenderer } from './prerender/remote-prerenderer.ts'; -import { computeHostShellHash } from './prerender/prerender-constants.ts'; import { buildCreatePrerenderAuth } from './prerender/auth.ts'; import { isEnvironmentMode, @@ -373,13 +372,11 @@ const smokeTestHostApp = async () => { // unreachable manager must never block realm-server boot. const reportHostShellToManager = async () => { try { - let hash = await computeHostShellHash(await getIndexHTML()); - // Report to the same prerender manager this realm server already sends - // prerender requests to. `prerendererUrl` is the manager's address (the - // manager fronts the pool). Resolving a separate PRERENDER_MANAGER_URL here - // is wrong on the realm-server side: that env var is only set on the - // prerender-server tasks, so on the realm server it fell back to localhost - // and the report never reached the manager. + let html = await getIndexHTML(); + let { createHash } = await import('crypto'); + let hash = createHash('md5').update(html).digest('hex').slice(0, 8); + // Report to the manager URL the realm server already uses (prerendererUrl); + // PRERENDER_MANAGER_URL is only set on the prerender-server tasks. let managerURL = prerendererUrl.replace(/\/$/, ''); let response = await fetch(`${managerURL}/host-shell`, { method: 'POST', diff --git a/packages/realm-server/prerender/prerender-app.ts b/packages/realm-server/prerender/prerender-app.ts index 67db7dcc83..ec85d5492a 100644 --- a/packages/realm-server/prerender/prerender-app.ts +++ b/packages/realm-server/prerender/prerender-app.ts @@ -19,7 +19,6 @@ import { fetchRequestFromContext, } from '../middleware/index.ts'; import { Prerenderer } from './index.ts'; -import { boxelHostURL } from './prerenderer.ts'; import type { Timings } from './render-runner.ts'; import { resolvePrerenderManagerURL } from './config.ts'; import { @@ -1047,14 +1046,11 @@ export function createPrerenderHttpServer(options?: { let drainingResolved = false; let drainingDeferred = new Deferred(); let heartbeatTimer: NodeJS.Timeout | undefined; - // Host-shell token the standbys were last warmed against. Seeded at startup - // from the realm server we load the shell from (GET /_host-shell-hash) so it - // reflects the shell we actually warmed against, then kept current from the + // Host-shell token the standbys were last warmed against, learned from the // manager's heartbeat responses (PRERENDER_HOST_SHELL_HASH_HEADER). When the // manager reports a different token — the host was redeployed and the realm // server is now serving a new shell — the browser is recycled so pages - // reload it. Undefined only if the startup seed failed, in which case the - // first heartbeat that carries a token adopts it as the baseline. + // reload it. Undefined until the first heartbeat that carries a token. let warmedHostShellHash: string | undefined; let recyclingForHostChange = false; let isClosing = false; @@ -1172,39 +1168,6 @@ export function createPrerenderHttpServer(options?: { }); } - // Seed the warm baseline from the realm server we warm against, so the first - // heartbeat compares against the shell we actually loaded rather than - // adopting whatever token the manager happens to report. This closes the gap - // where a server warmed against an old shell, but whose first token-carrying - // heartbeat reports a newer shell, would adopt the new token without - // recycling and stay pinned to the stale shell. Best-effort and time-bounded: - // on any failure we leave the baseline unset and fall back to adopting the - // first reported token. - async function seedWarmedHostShellHash() { - let controller = new AbortController(); - let timer = setTimeout(() => controller.abort(), 5000); - try { - let response = await fetch(`${boxelHostURL}/_host-shell-hash`, { - headers: { Accept: 'application/vnd.api+json' }, - signal: controller.signal, - }); - if (!response.ok) { - log.debug(`host shell token seed skipped: HTTP ${response.status}`); - return; - } - let body: any = await response.json(); - let hash = body?.data?.attributes?.hash; - if (typeof hash === 'string' && hash.trim().length > 0) { - warmedHostShellHash = hash.trim(); - log.info(`seeded warmed host shell token ${warmedHostShellHash}`); - } - } catch (e) { - log.debug('Failed to seed warmed host shell token:', e as any); - } finally { - clearTimeout(timer); - } - } - function startHeartbeatLoop() { if (heartbeatTimer) return; void sendHeartbeat(); @@ -1246,10 +1209,7 @@ export function createPrerenderHttpServer(options?: { serverURL = actualURL; prerenderer.serverURL = actualURL; } - // Seed the baseline before the first heartbeat so it can't adopt a - // newer-than-what-we-warmed-against token; then start heartbeating - // regardless of whether the seed succeeded. - void seedWarmedHostShellHash().finally(() => startHeartbeatLoop()); + startHeartbeatLoop(); } catch (e) { log.debug('Error scheduling registration with prerender manager:', e); } diff --git a/packages/realm-server/prerender/prerender-constants.ts b/packages/realm-server/prerender/prerender-constants.ts index 25d3bf7fcf..31afbd97e9 100644 --- a/packages/realm-server/prerender/prerender-constants.ts +++ b/packages/realm-server/prerender/prerender-constants.ts @@ -12,16 +12,6 @@ export const PRERENDER_SERVER_DRAINING_STATUS_CODE = 410; export const PRERENDER_HOST_SHELL_HASH_HEADER = 'X-Boxel-Prerender-Host-Shell-Hash'; -// Single definition of the host-shell token so every producer agrees on it: -// the realm server reports it to the manager at boot, serves it from -// `GET /_host-shell-hash`, and a prerender server seeds its warm baseline from -// that endpoint. A short md5 digest of the host index HTML — it only has to -// change when the host bundle changes; callers treat it opaquely. -export async function computeHostShellHash(indexHTML: string): Promise { - let { createHash } = await import('crypto'); - return createHash('md5').update(indexHTML).digest('hex').slice(0, 8); -} - // CS-10872: correlates one client-initiated prerender call across // remote-prerenderer → manager → prerender-server. The client assigns // the ID on the first request; the manager and prerender-server echo diff --git a/packages/realm-server/prerender/prerenderer.ts b/packages/realm-server/prerender/prerenderer.ts index 08fa035c0a..2481b6974a 100644 --- a/packages/realm-server/prerender/prerenderer.ts +++ b/packages/realm-server/prerender/prerenderer.ts @@ -35,7 +35,7 @@ const log = logger('prerenderer'); const defaultHostURL = isEnvironmentMode() ? serviceURL('host') : 'https://localhost:4200'; -export const boxelHostURL = process.env.BOXEL_HOST_URL ?? defaultHostURL; +const boxelHostURL = process.env.BOXEL_HOST_URL ?? defaultHostURL; const DEFAULT_AFFINITY_IDLE_EVICT_MS = 12 * 60 * 60 * 1000; type PoolMeta = { diff --git a/packages/realm-server/routes.ts b/packages/realm-server/routes.ts index 510cdc6d03..e319ee8f19 100644 --- a/packages/realm-server/routes.ts +++ b/packages/realm-server/routes.ts @@ -74,7 +74,6 @@ import handleWebhookReceiverRequest from './handlers/handle-webhook-receiver.ts' import handleRunCommand from './handlers/handle-run-command.ts'; import handleScreenshotCard from './handlers/handle-screenshot-card.ts'; import { buildCreatePrerenderAuth } from './prerender/auth.ts'; -import { PRERENDER_HOST_SHELL_HASH_HEADER } from './prerender/prerender-constants.ts'; import type { RealmRegistryReconciler } from './lib/realm-registry-reconciler.ts'; export type CreateRoutesArgs = { @@ -100,7 +99,6 @@ export type CreateRoutesArgs = { serveHostApp: (ctxt: Koa.Context, next: Koa.Next) => Promise; serveIndex: (ctxt: Koa.Context, next: Koa.Next) => Promise; serveFromRealm: (ctxt: Koa.Context, next: Koa.Next) => Promise; - getHostShellHash: () => Promise; sendEvent: ( user: string, eventType: string, @@ -145,18 +143,6 @@ export function createRoutes(args: CreateRoutesArgs) { args.serveFromRealm, ); router.get('/_standby', healthCheck, args.serveHostApp, args.serveFromRealm); - // The host-shell token this server is serving. A prerender server seeds its - // warm baseline from here at startup so it compares heartbeats against the - // shell it actually warmed against (the realm server it loads `/_standby` - // from is the authority), rather than adopting the first token the manager - // happens to report. The value matches what this server reports to the - // manager. Non-sensitive (a digest of public HTML), so no auth. - router.get('/_host-shell-hash', async (ctxt: Koa.Context) => { - let hash = await args.getHostShellHash(); - ctxt.set(PRERENDER_HOST_SHELL_HASH_HEADER, hash); - ctxt.body = { data: { type: 'host-shell', attributes: { hash } } }; - ctxt.status = 200; - }); router.post('/_server-session', handleCreateSessionRequest(args)); router.post( '/_create-realm', diff --git a/packages/realm-server/server.ts b/packages/realm-server/server.ts index e504227ecf..787d9164d7 100644 --- a/packages/realm-server/server.ts +++ b/packages/realm-server/server.ts @@ -997,7 +997,7 @@ export class RealmServer { } private buildApp() { - let { serveIndex, serveHostApp, getHostShellHash } = createServeIndex({ + let { serveIndex, serveHostApp } = createServeIndex({ serverURL: this.serverURL, assetsURL: this.assetsURL, realms: this.realms, @@ -1081,7 +1081,6 @@ export class RealmServer { serveHostApp, serveIndex, serveFromRealm, - getHostShellHash, sendEvent, queue: this.queue, realms: this.realms, diff --git a/packages/realm-server/tests/serve-index-test.ts b/packages/realm-server/tests/serve-index-test.ts index 02c6339a07..2dfecb6235 100644 --- a/packages/realm-server/tests/serve-index-test.ts +++ b/packages/realm-server/tests/serve-index-test.ts @@ -2,7 +2,6 @@ import { module, test } from 'qunit'; import { basename } from 'path'; import { createServeIndex } from '../handlers/serve-index.ts'; -import { computeHostShellHash } from '../prerender/prerender-constants.ts'; function buildDeps(getIndexHTML: () => Promise) { return { @@ -114,44 +113,4 @@ module(basename(__filename), function () { assert.strictEqual(calls, 1, 'subsequent calls also reuse the cache'); assert.strictEqual(d, a, 'cached value is returned identically'); }); - - test('getHostShellHash digests the raw index HTML, matching the manager report', async function (assert) { - let raw = validIndexHTML(); - let { getHostShellHash, retrieveIndexHTML } = createServeIndex( - buildDeps(async () => raw), - ); - - let hash = await getHostShellHash(); - assert.strictEqual( - hash, - await computeHostShellHash(raw), - 'token is the digest of the raw getIndexHTML — the same value the realm server reports to the manager', - ); - - // The served shell is the rewritten HTML, which differs from the raw — the - // token must not be derived from it, or it would never match the manager's. - let rewritten = await retrieveIndexHTML(); - assert.notStrictEqual( - hash, - await computeHostShellHash(rewritten), - 'token is not derived from the rewritten shell', - ); - }); - - test('getHostShellHash is memoized — getIndexHTML runs once across calls', async function (assert) { - let calls = 0; - let { getHostShellHash } = createServeIndex( - buildDeps(async () => { - calls += 1; - return validIndexHTML(); - }), - ); - - let [a, b] = await Promise.all([getHostShellHash(), getHostShellHash()]); - let c = await getHostShellHash(); - - assert.strictEqual(a, b, 'concurrent callers receive the same token'); - assert.strictEqual(b, c, 'subsequent calls reuse the cached token'); - assert.strictEqual(calls, 1, 'getIndexHTML was only invoked once'); - }); }); From be64529586a966ab997bacd0d67f810bf59389e3 Mon Sep 17 00:00:00 2001 From: Buck Doyle Date: Mon, 15 Jun 2026 19:18:42 -0500 Subject: [PATCH 12/12] Report host shell after the server is serving, and from the post-deploy hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The boot-time host-shell report fired right after fetching the host dist, before the realm server was listening. In a rolling deploy the manager could echo the new token while the load balancer still routed to the old task, so a prerender would recycle against the old shell, record the new token, and stop retrying — leaving stale tabs on any path relying on the heartbeat signal. Move the boot report to after server.start() (the listener is then serving the new shell), and also report from the post-deployment hook, which runs once the deploy reports the service stable and load-balancer-routable. The boot report keeps manager-restart resilience; the post-deploy report closes the rolling- deploy window. Co-Authored-By: Claude Opus 4.8 --- .../handlers/handle-post-deployment.ts | 9 +++++++++ packages/realm-server/main.ts | 14 +++++++++++--- packages/realm-server/routes.ts | 4 ++++ packages/realm-server/server.ts | 8 ++++++++ 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/packages/realm-server/handlers/handle-post-deployment.ts b/packages/realm-server/handlers/handle-post-deployment.ts index 7be38ee876..a139307f69 100644 --- a/packages/realm-server/handlers/handle-post-deployment.ts +++ b/packages/realm-server/handlers/handle-post-deployment.ts @@ -20,6 +20,7 @@ export default function handlePostDeployment({ definitionLookup, queue, realmServerSecretSeed, + reportHostShell, }: CreateRoutesArgs): (ctxt: Koa.Context, next: Koa.Next) => Promise { return async function (ctxt: Koa.Context, _next: Koa.Next) { if (ctxt.request.headers.authorization !== realmServerSecretSeed) { @@ -27,6 +28,14 @@ export default function handlePostDeployment({ return; } + // This hook fires after the deploy reports the service stable, so the new + // host shell is live and load-balancer-routable. Re-report the host-shell + // token to the prerender manager from here so the fleet's recycle signal + // reflects the now-serving shell, closing the rolling-deploy window where + // the boot-time report could precede the new task receiving traffic. + // Fire-and-forget — best-effort, must not affect the hook's response. + void reportHostShell?.(); + await definitionLookup.clearAllDefinitions(); let boxelUiChangeCheckerResult = diff --git a/packages/realm-server/main.ts b/packages/realm-server/main.ts index c610555ceb..0a95099fde 100644 --- a/packages/realm-server/main.ts +++ b/packages/realm-server/main.ts @@ -414,9 +414,6 @@ const reportHostShellToManager = async () => { console.error(`Unable to fetch from host app URL ${distURL}: ${detail}`); process.exit(-2); } - // Fire-and-forget: tell the prerender manager which host shell we're - // serving so the prerender fleet recycles after a host redeploy. - void reportHostShellToManager(); let realms: Realm[] = []; let dbAdapter = new PgAdapter({ autoMigrate }); let queue = new PgQueuePublisher(dbAdapter); @@ -636,6 +633,7 @@ const reportHostShellToManager = async () => { ? getRegistrationSecret : undefined, prerenderer, + reportHostShell: reportHostShellToManager, }); let httpServer = server.listen(port); @@ -777,6 +775,16 @@ const reportHostShellToManager = async () => { // wait for first-request mount via reconciler.lookupOrMount(). await server.start(); + // Now that the HTTP listener is accepting traffic and serving the new host + // shell, tell the prerender manager which shell we're serving so the fleet + // recycles after a host redeploy. Reporting earlier (before the listener is + // live) races a rolling deploy: the manager could echo the new token while + // the load balancer still routes to the old task, so a prerender would + // recycle against the old shell, record the new token, and stop retrying. + // The post-deployment hook reports again once the service is fully stable. + // Fire-and-forget — a missing/unreachable manager must never affect serving. + void reportHostShellToManager(); + // Begin the reconciler's background poll loop (LISTEN realm_registry + // 30s safety poll). It picks up changes from peer instances (publish, // unpublish, delete) and reconciles them into local mounted state. diff --git a/packages/realm-server/routes.ts b/packages/realm-server/routes.ts index e319ee8f19..6b313e7637 100644 --- a/packages/realm-server/routes.ts +++ b/packages/realm-server/routes.ts @@ -110,6 +110,10 @@ export type CreateRoutesArgs = { }; assetsURL: URL; prerenderer?: Prerenderer; + // Reports the current host-shell token to the prerender manager. The + // post-deployment hook calls it so the fleet's recycle signal is refreshed + // once the new code is live and the service is stable. + reportHostShell?: () => Promise; searchCache: JobScopedSearchCache; }; diff --git a/packages/realm-server/server.ts b/packages/realm-server/server.ts index 787d9164d7..9dd4c551b3 100644 --- a/packages/realm-server/server.ts +++ b/packages/realm-server/server.ts @@ -894,6 +894,7 @@ export class RealmServer { } | undefined; private prerenderer: Prerenderer | undefined; + private reportHostShell: (() => Promise) | undefined; private reconciler: RealmRegistryReconciler; private searchCache: JobScopedSearchCache; private cachedApp: ReturnType | undefined; @@ -919,6 +920,7 @@ export class RealmServer { getRegistrationSecret, domainsForPublishedRealms, prerenderer, + reportHostShell, searchCache, }: { serverURL: URL; @@ -945,6 +947,10 @@ export class RealmServer { boxelSite?: string; }; prerenderer?: Prerenderer; + // Reports the current host-shell token to the prerender manager. main.ts + // wires this so the post-deployment hook can re-report once the service is + // stable (the boot-time report fires as soon as this server starts serving). + reportHostShell?: () => Promise; // Optional so test harnesses that construct a RealmServer directly get a // private cache for free. main.ts passes a shared instance so the // JobsFinishedListener can evict the same cache the handlers populate. @@ -989,6 +995,7 @@ export class RealmServer { this.realms = realms; this.reconciler = reconciler; this.prerenderer = prerenderer; + this.reportHostShell = reportHostShell; this.searchCache = searchCache ?? new JobScopedSearchCache(dbAdapter); } @@ -1091,6 +1098,7 @@ export class RealmServer { matrixAdminPassword: this.matrixAdminPassword, domainsForPublishedRealms: this.domainsForPublishedRealms, prerenderer: this.prerenderer, + reportHostShell: this.reportHostShell, reconciler: this.reconciler, searchCache: this.searchCache, }),