Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ LINKUP_API_KEY=dummy_linkup_key
LOOPS_API_KEY=dummy_loops_key
ZEROCLICK_API_KEY=dummy_zeroclick_key

# Freebuff
FREEBUFF_SESSION_LENGTH_MS=3600000

# Discord Integration
DISCORD_PUBLIC_KEY=dummy_discord_public_key
DISCORD_BOT_TOKEN=dummy_discord_bot_token
Expand Down
4 changes: 2 additions & 2 deletions cli/src/app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ export const App = ({
}

// Render project picker FIRST when at home directory or outside a project.
// This deliberately precedes the login/auth and waiting-room gates so the
// This deliberately precedes the login/auth and freebuff session gate so the
// user always gets to pick a working directory before anything else — auth
// failures or a banned/queued freebuff session would otherwise replace the
// picker mid-flash and look like being kicked out of the app.
Expand Down Expand Up @@ -340,7 +340,7 @@ interface AuthedSurfaceProps {
}

/**
* Rendered only after auth is confirmed. Owns the freebuff waiting-room gate
* Rendered only after auth is confirmed. Owns the freebuff session gate
* so `useFreebuffSession` runs exactly once per authed session (not before
* we have a token).
*/
Expand Down
8 changes: 4 additions & 4 deletions cli/src/components/waiting-room-screen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -496,11 +496,11 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
</box>
)}

{/* Server says the waiting room is disabled — this screen should not
normally render in that case, but show a minimal message just in
case App.tsx's guard is bypassed. */}
{/* Compatibility fallback for older servers without the session
endpoint. This should not normally render because App.tsx treats
it as admitted. */}
{session?.status === 'disabled' && (
<text style={{ fg: theme.muted }}>Waiting room disabled.</text>
<text style={{ fg: theme.muted }}>Session gate unavailable.</text>
)}

{/* Country outside the free-mode allowlist. Terminal — polling has
Expand Down
2 changes: 1 addition & 1 deletion cli/src/hooks/helpers/send-message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ export const handleRunError = (params: {
}

/**
* Surface + recover from a waiting-room gate rejection. The server rejected
* Surface + recover from a freebuff session gate rejection. The server rejected
* the request because our seat is no longer valid; update local state so the
* UI reflects reality and we stop sending requests until we re-admit.
*/
Expand Down
3 changes: 1 addition & 2 deletions cli/src/hooks/use-freebuff-session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ async function callSession(
signal: opts.signal,
})
// 404 = endpoint not deployed on this server (older web build). Treat as
// "waiting room disabled" so a newer CLI against an older server still
// works, rather than stranding users in a waiting room forever.
// a compatibility bypass so a newer CLI against an older server still works.
if (resp.status === 404) {
return { status: 'disabled' }
}
Expand Down
2 changes: 1 addition & 1 deletion cli/src/utils/error-handling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ export const getCountryBlockFromFreeModeError = (
}

/**
* Freebuff waiting-room gate errors returned by /api/v1/chat/completions.
* Freebuff session gate errors returned by /api/v1/chat/completions.
*
* Contract (see docs/freebuff-waiting-room.md):
* - 428 `waiting_room_required` — no session row exists; POST /session to join.
Expand Down
12 changes: 5 additions & 7 deletions common/src/types/freebuff-session.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import type { FreebuffAccessTier } from '../constants/freebuff-models'

/**
* Wire-level shapes returned by `/api/v1/freebuff/session`. Source of truth
* for the CLI (which deserializes these) and the server (which serializes
* them) — keep both in sync by importing this module from either side.
*
* The CLI uses these shapes directly; there are no client-only states.
* Shapes used by `/api/v1/freebuff/session` and the CLI. Most variants are
* wire-level responses serialized by the server; explicitly documented
* compatibility variants may be synthesized by the CLI for older servers.
*/

/**
Expand Down Expand Up @@ -67,8 +65,8 @@ export type FreebuffIpPrivacySignal =

export type FreebuffSessionServerResponse =
| {
/** Waiting room is globally off; free-mode requests flow through
* unchanged. Client should treat this as "admitted forever". */
/** Compatibility fallback for older servers without the session
* endpoint. Client should treat this as "admitted forever". */
status: 'disabled'
}
| {
Expand Down
21 changes: 3 additions & 18 deletions docs/freebuff-waiting-room.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,14 @@ The waiting room is the admission control layer for **free-mode** requests again
2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; GLM 5.1 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput.

Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.
Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. With the current high instant-admit capacities, most users go straight from model selection to an active session; the queue only appears when a model is actually saturated. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.

The entire system is gated by the env flag `FREEBUFF_WAITING_ROOM_ENABLED`. When `false`, the gate is a no-op and the admission ticker does not start; free-mode traffic flows through unchanged.

## Kill Switch
## Configuration

```bash
# Disable entirely (both the gate on chat/completions and the admission loop)
FREEBUFF_WAITING_ROOM_ENABLED=false

# Other knob (only read when enabled)
FREEBUFF_SESSION_LENGTH_MS=3600000 # 1 hour
```

Flipping the flag is safe at runtime: existing rows stay in the DB and will be admitted / expired correctly whenever the flag is flipped back on.

## Architecture

```mermaid
Expand Down Expand Up @@ -186,9 +178,6 @@ Before any of those state transitions, the handler requires a resolved allowlist
Response shapes:

```jsonc
// Waiting room disabled — CLI should treat this as "always admitted"
{ "status": "disabled" }

// In queue
{
"status": "queued",
Expand Down Expand Up @@ -272,9 +261,7 @@ For free-mode requests (`codebuff_metadata.cost_mode === 'free'`), `_post.ts` ca
| 409 | `session_superseded` | Claimed `instance_id` does not match stored one — another CLI took over. |
| 410 | `session_expired` | `expires_at + grace < now()` (past the hard cutoff). Client should POST /session to re-queue. |

Successful results carry one of three reasons: `disabled` (gate is off), `active` (`expires_at > now()`, `remainingMs` provided), or `draining` (`expires_at <= now() < expires_at + grace`, `gracePeriodRemainingMs` provided). The CLI should treat `draining` as "let any in-flight agent run finish, but block new user prompts" — see [Drain / Grace Window](#drain--grace-window) below. The corresponding wire status from `getSessionState` is `ended`.

When the waiting room is disabled, the gate returns `{ ok: true, reason: 'disabled' }` without touching the DB.
Successful results carry one of two reasons: `active` (`expires_at > now()`, `remainingMs` provided), or `draining` (`expires_at <= now() < expires_at + grace`, `gracePeriodRemainingMs` provided). The CLI should treat `draining` as "let any in-flight agent run finish, but block new user prompts" — see [Drain / Grace Window](#drain--grace-window) below. The corresponding wire status from `getSessionState` is `ended`.

## Drain / Grace Window

Expand Down Expand Up @@ -314,8 +301,6 @@ The CLI:
8. **Handles chat-gate errors:** the same statuses are reachable via the gate's 409/410/428/429 for fast in-flight feedback, and the CLI calls the matching `markFreebuff*` helper to flip local state without waiting for the next poll.
9. **On clean exit**, calls `DELETE /api/v1/freebuff/session` so the next user can be admitted sooner.

The `disabled` response means the server has the waiting room turned off. CLI treats it identically to `active` with infinite remaining time — no countdown, and chat requests can omit `freebuff_instance_id` entirely.

## Multi-pod Behavior

- **`/api/v1/freebuff/session` routes** are stateless per pod; all state lives in Postgres. Any pod can serve any request.
Expand Down
11 changes: 1 addition & 10 deletions packages/internal/src/env-schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,6 @@ export const serverEnvSchema = clientEnvSchema.extend({
// sweep but risks rate-limiting.
BOT_SWEEP_GITHUB_TOKEN: z.string().min(1).optional(),

// Freebuff waiting room. Defaults to OFF so the feature requires explicit
// opt-in per environment — the CLI/SDK do not yet send
// freebuff_instance_id, so enabling this before they ship would reject
// every free-mode request with 428 waiting_room_required.
FREEBUFF_WAITING_ROOM_ENABLED: z
.enum(['true', 'false'])
.default('false')
.transform((v) => v === 'true'),
FREEBUFF_SESSION_LENGTH_MS: z.coerce
.number()
.int()
Expand Down Expand Up @@ -136,8 +128,7 @@ export const serverProcessEnv: ServerInput = {
BOT_SWEEP_SECRET: process.env.BOT_SWEEP_SECRET,
BOT_SWEEP_GITHUB_TOKEN: process.env.BOT_SWEEP_GITHUB_TOKEN,

// Freebuff waiting room
FREEBUFF_WAITING_ROOM_ENABLED: process.env.FREEBUFF_WAITING_ROOM_ENABLED,
// Freebuff session gate
FREEBUFF_SESSION_LENGTH_MS: process.env.FREEBUFF_SESSION_LENGTH_MS,
FREEBUFF_DEV_FORCE_LIMITED: process.env.FREEBUFF_DEV_FORCE_LIMITED,
}
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,10 @@ describe('/api/v1/chat/completions POST endpoint', () => {
let mockInsertMessageBigquery: InsertMessageBigqueryFn
let nextQuotaReset: string

// Bypasses the freebuff waiting-room gate in tests that exercise free-mode
// flow without seeding a session. Matches the real return for the disabled
// path so downstream logic proceeds normally.
// Bypasses the freebuff session gate in tests that exercise free-mode flow
// without seeding a session.
const mockCheckSessionAdmissibleAllow = async () =>
({ ok: true, reason: 'disabled' }) as const
({ ok: true, reason: 'active', remainingMs: 60 * 60 * 1000 }) as const
const mockResolveFreeModeCountryAccess = async (
_userId: string,
req: Parameters<typeof getFreeModeCountryAccess>[0],
Expand Down
8 changes: 3 additions & 5 deletions web/src/app/api/v1/chat/completions/_post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ export async function postChatCompletions(params: {
logger: Logger
}) => Promise<BlockGrantResult | null>
getUserPreferences?: GetUserPreferencesFn
/** Optional override for the freebuff waiting-room gate. Defaults to the
/** Optional override for the freebuff session gate. Defaults to the
* real check backed by Postgres; tests inject a no-op. */
checkSessionAdmissible?: CheckSessionAdmissibleFn
/** Optional override for the free-mode rate limiter. Tests inject this to
Expand Down Expand Up @@ -527,16 +527,14 @@ export async function postChatCompletions(params: {

let freeModeSessionGate: SessionGateResult | null = null

// Freebuff waiting-room gate. Usually enforced only when
// FREEBUFF_WAITING_ROOM_ENABLED=true. Runs before the rate limiter so
// rejected requests don't burn a queued user's free-mode counters.
// Freebuff session gate. Runs before the rate limiter so rejected requests
// don't burn a queued user's free-mode counters.
if (isFreeModeRequest) {
const claimedInstanceId =
typedBody.codebuff_metadata?.freebuff_instance_id
freeModeSessionGate = await checkSession({
userId,
accessTier: freebuffAccessTier,
userEmail: userInfo.email,
claimedInstanceId,
requestedModel: typedBody.model,
requireActiveSession: isFreebuffGeminiThinkerAgent(agentId),
Expand Down
11 changes: 0 additions & 11 deletions web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ function makeSessionDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
let instanceCounter = 0
return {
rows,
isWaitingRoomEnabled: () => true,
graceMs: 30 * 60 * 1000,
sessionLengthMs: 60 * 60 * 1000,
// Keep instant-admit disabled in handler tests — they verify queue/state
Expand Down Expand Up @@ -225,16 +224,6 @@ describe('POST /api/v1/freebuff/session', () => {
})
})

test('returns disabled when waiting room flag is off', async () => {
const sessionDeps = makeSessionDeps({ isWaitingRoomEnabled: () => false })
const resp = await postFreebuffSession(
makeReq('ok'),
makeDeps(sessionDeps, 'u1'),
)
const body = await resp.json()
expect(body.status).toBe('disabled')
})

test('creates a limited DeepSeek Flash session for disallowed country', async () => {
const sessionDeps = makeSessionDeps()
const resp = await postFreebuffSession(
Expand Down
8 changes: 2 additions & 6 deletions web/src/app/api/v1/freebuff/session/_handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ export interface FreebuffSessionDeps {

type AuthResult =
| { error: NextResponse }
| { userId: string; userEmail: string | null; userBanned: boolean }
| { userId: string; userBanned: boolean }

async function resolveUser(
req: NextRequest,
Expand All @@ -94,7 +94,7 @@ async function resolveUser(
}
const userInfo = await deps.getUserInfoFromApiKey({
apiKey,
fields: ['id', 'email', 'banned'],
fields: ['id', 'banned'],
logger: deps.logger,
})
if (!userInfo?.id) {
Expand All @@ -107,7 +107,6 @@ async function resolveUser(
}
return {
userId: String(userInfo.id),
userEmail: userInfo.email ?? null,
userBanned: Boolean(userInfo.banned),
}
}
Expand Down Expand Up @@ -160,7 +159,6 @@ export async function postFreebuffSession(
try {
const state = await requestSession({
userId: auth.userId,
userEmail: auth.userEmail,
userBanned: auth.userBanned,
model: requestedModel,
accessTier,
Expand Down Expand Up @@ -207,7 +205,6 @@ export async function getFreebuffSession(
const state = await getSessionState({
userId: auth.userId,
accessTier,
userEmail: auth.userEmail,
userBanned: auth.userBanned,
claimedInstanceId,
deps: deps.sessionDeps,
Expand Down Expand Up @@ -244,7 +241,6 @@ export async function deleteFreebuffSession(
try {
await endUserSession({
userId: auth.userId,
userEmail: auth.userEmail,
deps: deps.sessionDeps,
})
return NextResponse.json({ status: 'ended' }, { status: 200 })
Expand Down
Loading
Loading