Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
b09f2e2
Recycle prerender browsers when the host is redeployed
backspace Jun 11, 2026
ce0ab7d
Recycle the prerender fleet at the end of a deploy
backspace Jun 11, 2026
349a8f0
Add arbitrary host change
backspace Jun 11, 2026
1363a5b
Merge remote-tracking branch 'origin/main' into prerender-refetch-hos…
backspace Jun 11, 2026
6ef8702
Gate the prerender recycle on the realm-server deploy, not the post-d…
backspace Jun 11, 2026
7c07d52
Add another arbitrary change
backspace Jun 11, 2026
a5a0094
Merge remote-tracking branch 'origin/main' into prerender-refetch-hos…
backspace Jun 12, 2026
5b4bc25
Remove outdated 403 reference
backspace Jun 12, 2026
57bc70c
Merge remote-tracking branch 'origin/main' into prerender-refetch-hos…
backspace Jun 12, 2026
2dcb88b
Remove nonsense change
backspace Jun 12, 2026
96f661e
Normalize and bound /host-shell token before storing
backspace Jun 15, 2026
e6b3881
Seed prerender warm baseline from the host shell it warmed against
backspace Jun 15, 2026
514ed9a
Merge remote-tracking branch 'origin/main' into prerender-refetch-hos…
backspace Jun 15, 2026
bfb0acb
Merge remote-tracking branch 'origin/main' into prerender-refetch-hos…
backspace Jun 15, 2026
f072547
Merge remote-tracking branch 'origin/main' into prerender-refetch-hos…
backspace Jun 15, 2026
437c3c1
Merge remote-tracking branch 'origin/main' into prerender-refetch-hos…
backspace Jun 15, 2026
357c5d5
Merge remote-tracking branch 'origin/main' into prerender-refetch-hos…
backspace Jun 15, 2026
3d17c2c
Report host shell to the manager URL the realm server already uses
backspace Jun 15, 2026
31de073
Merge remote-tracking branch 'origin/main' into prerender-refetch-hos…
backspace Jun 15, 2026
67a1cff
Revert seed of prerender warm baseline (kept the manager-URL fix)
backspace Jun 15, 2026
be64529
Report host shell after the server is serving, and from the post-depl…
backspace Jun 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/manual-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,35 @@ jobs:
exit 1
fi

recycle-prerender:
name: Recycle prerender after host is live
# The prerender fleet deploys before the realm server (the manager,
# worker, and realm server all depend on it being up for boot indexing),
# so its tabs warm against the OLD host shell the realm server was still
# serving at that point. Once the realm server is up serving the new
# shell, re-deploy the prerender service so its tabs re-warm against it.
# The reusable deploy passes force-new-deployment, so this recycles fresh
# tasks even though the image is unchanged.
#
# Gate on `deploy-realm-server` (wait-for-service-stability: true → the
# new realm server is up and serving the new shell), NOT on
# `post-deploy-realm-server`: that job is a separate post-deploy hook
# (it POSTs the realm server's `/_post-deployment` endpoint) and can fail
# independently of the recycle's only real precondition — the new shell
# being served. Coupling to it would skip this recycle on an unrelated
# hook failure.
needs: [build-prerender, deploy-realm-server]
uses: cardstack/gh-actions/.github/workflows/ecs-deploy.yml@main
secrets: inherit
with:
container-name: "boxel-prerender-server"
environment: ${{ inputs.environment }}
cluster: ${{ inputs.environment }}
service-name: "boxel-prerender-server-${{ inputs.environment }}"
image: ${{ needs.build-prerender.outputs.image }}
timeout-minutes: 10
wait-for-service-stability: true

apply-observability:
# Push the observability/ package's dashboards/folders/data sources/alerts
# into the production self-host Grafana as part of the deploy. The
Expand Down Expand Up @@ -375,6 +404,7 @@ jobs:
post-deploy-worker,
deploy-realm-server,
post-deploy-realm-server,
recycle-prerender,
apply-observability,
]
if: github.event_name == 'workflow_dispatch' && always()
Expand Down
9 changes: 9 additions & 0 deletions packages/realm-server/handlers/handle-post-deployment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,22 @@ export default function handlePostDeployment({
definitionLookup,
queue,
realmServerSecretSeed,
reportHostShell,
}: CreateRoutesArgs): (ctxt: Koa.Context, next: Koa.Next) => Promise<void> {
return async function (ctxt: Koa.Context, _next: Koa.Next) {
if (ctxt.request.headers.authorization !== realmServerSecretSeed) {
sendResponseForUnauthorizedRequest(ctxt, 'Unauthorized');
return;
}

// This hook fires after the deploy reports the service stable, so the new
// host shell is live and load-balancer-routable. Re-report the host-shell
// token to the prerender manager from here so the fleet's recycle signal
// reflects the now-serving shell, closing the rolling-deploy window where
// the boot-time report could precede the new task receiving traffic.
// Fire-and-forget — best-effort, must not affect the hook's response.
void reportHostShell?.();

await definitionLookup.clearAllDefinitions();

let boxelUiChangeCheckerResult =
Expand Down
50 changes: 50 additions & 0 deletions packages/realm-server/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,45 @@ const smokeTestHostApp = async () => {
throw lastError ?? new Error('host app smoke test timed out');
};

// Report the host-shell token this realm server is serving to the prerender
// manager. The manager echoes it on heartbeat responses so prerender servers
// recycle their browsers when it changes — i.e. when a deploy ships a new
// host bundle. Runs at boot (after the smoke test confirmed the shell is
// reachable): a deploy restarts this process, so a new bundle is reported
// here and picked up by the prerender fleet. Best-effort — a missing or
// unreachable manager must never block realm-server boot.
const reportHostShellToManager = async () => {
try {
let html = await getIndexHTML();
let { createHash } = await import('crypto');
let hash = createHash('md5').update(html).digest('hex').slice(0, 8);
// Report to the manager URL the realm server already uses (prerendererUrl);
// PRERENDER_MANAGER_URL is only set on the prerender-server tasks.
let managerURL = prerendererUrl.replace(/\/$/, '');
let response = await fetch(`${managerURL}/host-shell`, {
method: 'POST',
headers: {
'Content-Type': 'application/vnd.api+json',
Accept: 'application/vnd.api+json',
},
body: JSON.stringify({ data: { attributes: { hash } } }),
});
if (response.ok) {
console.log(
`Reported host shell token ${hash} to prerender manager at ${managerURL}`,
);
} else {
console.warn(
`Prerender manager rejected host shell report: ${response.status}`,
);
}
} catch (e: any) {
console.warn(
`Failed to report host shell token to prerender manager: ${e?.message ?? e}`,
);
}
};

(async () => {
try {
await smokeTestHostApp();
Expand Down Expand Up @@ -594,6 +633,7 @@ const smokeTestHostApp = async () => {
? getRegistrationSecret
: undefined,
prerenderer,
reportHostShell: reportHostShellToManager,
});

let httpServer = server.listen(port);
Expand Down Expand Up @@ -735,6 +775,16 @@ const smokeTestHostApp = async () => {
// wait for first-request mount via reconciler.lookupOrMount().
await server.start();

// Now that the HTTP listener is accepting traffic and serving the new host
// shell, tell the prerender manager which shell we're serving so the fleet
// recycles after a host redeploy. Reporting earlier (before the listener is
// live) races a rolling deploy: the manager could echo the new token while
// the load balancer still routes to the old task, so a prerender would
// recycle against the old shell, record the new token, and stop retrying.
// The post-deployment hook reports again once the service is fully stable.
// Fire-and-forget — a missing/unreachable manager must never affect serving.
void reportHostShellToManager();

// Begin the reconciler's background poll loop (LISTEN realm_registry +
// 30s safety poll). It picks up changes from peer instances (publish,
// unpublish, delete) and reconciles them into local mounted state.
Expand Down
61 changes: 61 additions & 0 deletions packages/realm-server/prerender/manager-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
import { format } from 'date-fns';
import {
PRERENDER_JOB_ID_HEADER,
PRERENDER_HOST_SHELL_HASH_HEADER,
PRERENDER_REQUEST_ID_HEADER,
PRERENDER_SERVER_DRAINING_STATUS_CODE,
PRERENDER_SERVER_STATUS_DRAINING,
Expand Down Expand Up @@ -64,6 +65,11 @@ type Registry = {
servers: Map<string, ServerInfo>; // key: serverUrl
affinities: Map<string, string[]>; // affinityKey (<type>:<value>) -> assigned serverUrls (deque semantics)
lastAccessByAffinity: Map<string, number>;
// Latest host-shell token reported by a realm server (POST /host-shell).
// Echoed to prerender servers on every heartbeat response so they recycle
// when it changes (host redeployed). Undefined until first reported; reset
// on manager restart, re-learned from the next realm-server boot report.
hostShellHash?: string;
};

const log = logger('prerender-manager');
Expand Down Expand Up @@ -507,6 +513,11 @@ export function buildPrerenderManagerApp(options?: {
warmedAffinities,
affinityVacancy,
});
// Echo the current host-shell token so the server can recycle its
// browser when the host is redeployed (see PRERENDER_HOST_SHELL_HASH_HEADER).
if (registry.hostShellHash) {
ctxt.set(PRERENDER_HOST_SHELL_HASH_HEADER, registry.hostShellHash);
}
ctxt.status = 204;
ctxt.set('X-Prerender-Server-Id', url);
} catch (e) {
Expand All @@ -516,6 +527,56 @@ export function buildPrerenderManagerApp(options?: {
}
});

// The realm server reports the host-shell token it is currently serving
// (POST at boot, after it has fetched the new shell). A change means the
// host was redeployed; prerender servers pick it up on their next heartbeat
// and recycle. Storing the latest token (rather than counting) keeps this
// robust across the manager's own restart in the deploy train — the next
// realm-server boot re-reports the current token.
router.post('/host-shell', async (ctxt) => {
try {
let req = await fetchRequestFromContext(ctxt);
let raw = await req.text();
let requestBody: any = {};
if (raw) {
try {
requestBody = JSON.parse(raw);
} catch (e) {
log.debug('Invalid JSON body on /host-shell; treating as empty:', e);
}
}
let hash = requestBody?.data?.attributes?.hash;
if (typeof hash !== 'string' || hash.trim().length === 0) {
ctxt.status = 400;
ctxt.body = { errors: [{ status: 400, message: 'hash is required' }] };
return;
}
// Normalize and bound the token before storing it: it is echoed into a
// response header on every heartbeat, so a stray-whitespace variant would
// spuriously read as a change, and an oversized value would bloat every
// heartbeat response. The real token is a short hex digest, so anything
// longer is malformed — reject rather than silently truncate (a truncated
// token would never match and would recycle forever).
hash = hash.trim();
if (hash.length > 64) {
ctxt.status = 400;
ctxt.body = { errors: [{ status: 400, message: 'hash too long' }] };
return;
}
if (registry.hostShellHash !== hash) {
log.info(
`host shell token changed (${registry.hostShellHash ?? 'none'} -> ${hash}); prerender servers will recycle on next heartbeat`,
);
registry.hostShellHash = hash;
}
ctxt.status = 204;
} catch (e) {
log.error('Error in /host-shell:', e);
ctxt.status = 500;
ctxt.body = { errors: [{ status: 500, message: 'host-shell error' }] };
}
});

// maintenance: clear affinity assignments and capacity tracking
router.post('/prerender-maintenance/reset', async (ctxt) => {
for (let [, info] of registry.servers) {
Expand Down
75 changes: 74 additions & 1 deletion packages/realm-server/prerender/prerender-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { Prerenderer } from './index.ts';
import type { Timings } from './render-runner.ts';
import { resolvePrerenderManagerURL } from './config.ts';
import {
PRERENDER_HOST_SHELL_HASH_HEADER,
PRERENDER_JOB_ID_HEADER,
PRERENDER_REQUEST_ID_HEADER,
PRERENDER_SERVER_DRAINING_STATUS_CODE,
Expand Down Expand Up @@ -60,6 +61,30 @@ export function decorateRenderErrorDiagnostics(
};
}

// Pure decision for the host-shell recycle reconcile. Given the token the
// manager last reported (`reported`, null when it doesn't know one yet) and
// the token this server warmed against (`warmed`, undefined before the first
// report), decide whether to recycle and what the baseline token becomes:
// - no report → keep the current baseline, don't recycle
// - first report seen → adopt it as the baseline, don't recycle (we just
// warmed against whatever shell is current)
// - same as baseline → no-op
// - differs from baseline → recycle and advance the baseline
// Exported for unit testing; the live caller layers the draining / in-flight
// guards and the async recycle on top.
export function decideHostShellRecycle(
reported: string | null,
warmed: string | undefined,
): { recycle: boolean; nextWarmed: string | undefined } {
if (!reported) {
return { recycle: false, nextWarmed: warmed };
}
if (warmed === undefined || warmed === reported) {
return { recycle: false, nextWarmed: reported };
}
return { recycle: true, nextWarmed: reported };
}
Comment thread
backspace marked this conversation as resolved.

export function buildPrerenderApp(options: {
serverURL: string;
maxPages?: number;
Expand Down Expand Up @@ -1021,6 +1046,13 @@ export function createPrerenderHttpServer(options?: {
let drainingResolved = false;
let drainingDeferred = new Deferred<void>();
let heartbeatTimer: NodeJS.Timeout | undefined;
// Host-shell token the standbys were last warmed against, learned from the
// manager's heartbeat responses (PRERENDER_HOST_SHELL_HASH_HEADER). When the
// manager reports a different token — the host was redeployed and the realm
// server is now serving a new shell — the browser is recycled so pages
// reload it. Undefined until the first heartbeat that carries a token.
let warmedHostShellHash: string | undefined;
let recyclingForHostChange = false;
let isClosing = false;
let fatalExitOnUncaught = options?.fatalExitOnUncaught ?? true;
let serverURL = resolvePrerenderServerURL(options?.port);
Expand Down Expand Up @@ -1079,7 +1111,7 @@ export function createPrerenderHttpServer(options?: {
log.debug(
`POST heartbeat to ${managerURL}/prerender-servers with body:\n${JSON.stringify(body, null, 2)}`,
);
await fetch(`${managerURL}/prerender-servers`, {
let response = await fetch(`${managerURL}/prerender-servers`, {
method: 'POST',
headers: {
'Content-Type': 'application/vnd.api+json',
Expand All @@ -1088,13 +1120,54 @@ export function createPrerenderHttpServer(options?: {
body: JSON.stringify(body),
}).catch((e) => {
log.debug('Prerender manager heartbeat request failed:', e);
return undefined;
});
if (response) {
reconcileHostShell(
response.headers.get(PRERENDER_HOST_SHELL_HASH_HEADER),
);
}
} catch (e) {
// best-effort, but log for visibility
log.debug('Error while attempting heartbeat with prerender manager:', e);
}
}

// Compare the manager's current host-shell token against the one we warmed
// against. A change means the host was redeployed, so recycle the browser
// (fire-and-forget; the heartbeat itself must not block on the restart).
function reconcileHostShell(hash: string | null) {
if (draining || recyclingForHostChange) {
return;
}
let { recycle, nextWarmed } = decideHostShellRecycle(
hash,
warmedHostShellHash,
);
if (!recycle) {
// Either nothing reported, or we adopted a baseline / matched — record
// the (possibly newly-adopted) token and we're done.
warmedHostShellHash = nextWarmed;
return;
}
recyclingForHostChange = true;
log.info(
`host shell changed (${warmedHostShellHash} -> ${hash}); recycling prerender browser`,
);
void prerenderer
.recycle()
.then(() => {
warmedHostShellHash = nextWarmed;
})
.catch((e) => {
// Leave warmedHostShellHash unchanged so the next heartbeat retries.
log.error('Failed to recycle prerender browser on host change:', e);
})
.finally(() => {
recyclingForHostChange = false;
});
}

function startHeartbeatLoop() {
if (heartbeatTimer) return;
void sendHeartbeat();
Expand Down
10 changes: 10 additions & 0 deletions packages/realm-server/prerender/prerender-constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@ export const PRERENDER_SERVER_STATUS_HEADER = 'X-Boxel-Prerender-Server-Status';
export const PRERENDER_SERVER_STATUS_DRAINING = 'draining';
export const PRERENDER_SERVER_DRAINING_STATUS_CODE = 410;

// Opaque token for the current host shell (the realm server's rewritten
// index.html). The realm server reports it to the manager at boot
// (POST /host-shell); the manager echoes the latest value on every
// heartbeat response via this header, and a prerender server recycles its
// browser when the value differs from the shell it last warmed against —
// i.e. the host was redeployed. The token only has to change when the host
// bundle changes; prerender servers treat it opaquely.
export const PRERENDER_HOST_SHELL_HASH_HEADER =
'X-Boxel-Prerender-Host-Shell-Hash';

// CS-10872: correlates one client-initiated prerender call across
// remote-prerenderer → manager → prerender-server. The client assigns
// the ID on the first request; the manager and prerender-server echo
Expand Down
11 changes: 11 additions & 0 deletions packages/realm-server/prerender/prerenderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,17 @@ export class Prerenderer {
await this.#pagePool.warmStandbys();
}

// Recycle the whole browser to pick up a redeployed host. Reuses the
// failure-recovery restart path (closeAll → restart Chrome → re-warm
// standbys); the full browser restart also clears Chrome's HTTP cache, so
// re-warmed pages reload the current host shell rather than a cached stale
// bundle. Coalesced via the same in-flight guard as #restartBrowser, so
// overlapping recycle signals collapse to one restart.
async recycle(): Promise<void> {
log.info('Recycling prerender browser to pick up a redeployed host');
await this.#restartBrowser();
}

// Emit the `render cancelled` log line (format from CS-10872)
// and, on a `rendering`-state cancel, tear down the affinity so
// the next request gets a fresh tab rather than one whose
Expand Down
4 changes: 4 additions & 0 deletions packages/realm-server/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ export type CreateRoutesArgs = {
};
assetsURL: URL;
prerenderer?: Prerenderer;
// Reports the current host-shell token to the prerender manager. The
// post-deployment hook calls it so the fleet's recycle signal is refreshed
// once the new code is live and the service is stable.
reportHostShell?: () => Promise<void>;
searchCache: JobScopedSearchCache;
};

Expand Down
Loading
Loading