Skip to content

Commit d7028e2

Browse files
d-csclaude
andauthored
feat(webapp): label mollifier decisions by enrolled org (#3869)
## Summary The `mollifier.decisions` metric only carried an `outcome` label, so for an org that has the mollifier enabled there was no way to see how often its triggers pass through the gate instead of being diverted — making it hard to tell why the trip isn't firing for an opted-in org. This adds two bounded labels: `enrolled` (`"true"`/`"false"`, the per-org flag) and `org` (the org id, attached **only** when `enrolled` is true). For an enrolled org you can now compare directly: `mollifier.decisions{outcome="pass_through", enrolled="true", org="<id>"}` vs `{outcome="mollify", ...}`. ## Design `recordDecision` now takes an options object (`{ reason?, enrolled, orgId? }`). The `org` label is restricted to the enrolled cohort to keep cardinality bounded — the guard lives in a pure `decisionLabels` helper, so a non-enrolled org id can never be attached even if one is passed. The enrolled set is small and capped operationally. The per-org flag is resolved once at the top of `evaluateGate` (in-memory, no DB round-trip on the trigger hot path) so every decision — including the debounce / one-time-use-token / triggerAndWait bypasses — is labelled consistently. ## Test plan - [x] `mollifierGate.test.ts` cascade asserts `enrolled`/`org` on every gate branch - [x] `mollifierDecisionLabels.test.ts` (new) proves `org` is dropped for non-enrolled even when an id is passed (cardinality guard) - [x] `vitest run mollifierGate mollifierDecisionLabels` — 34/34 pass - [x] `pnpm run typecheck --filter webapp` clean Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 93532cd commit d7028e2

5 files changed

Lines changed: 178 additions & 39 deletions

File tree

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
area: webapp
3+
type: improvement
4+
---
5+
6+
Add bounded `enrolled` and `org` labels to the `mollifier.decisions` metric so per-enrolled-org pass-through vs mollify is visible (the `org` label is attached only for the enrolled cohort to keep cardinality bounded).

apps/webapp/app/v3/mollifier/mollifierGate.server.ts

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
recordDecision,
88
type DecisionOutcome,
99
type DecisionReason,
10+
type RecordDecisionOptions,
1011
} from "./mollifierTelemetry.server";
1112

1213
// `count` is the fleet-wide fixed-window counter for the env (INCR with a
@@ -80,7 +81,7 @@ export type GateDependencies = {
8081
inputs: GateInputs,
8182
decision: Extract<TripDecision, { divert: true }>,
8283
) => void;
83-
recordDecision: (outcome: DecisionOutcome, reason?: DecisionReason) => void;
84+
recordDecision: (outcome: DecisionOutcome, opts: RecordDecisionOptions) => void;
8485
};
8586

8687
// `options` is a thunk so env reads happen per-evaluation, not at module load.
@@ -152,52 +153,59 @@ export async function evaluateGate(
152153
): Promise<GateOutcome> {
153154
const d = { ...defaultGateDependencies, ...deps };
154155

156+
// Resolve the per-org flag up front so every decision below — including
157+
// the bypasses — can be labelled enrolled vs not on the
158+
// `mollifier.decisions` counter. Fail open: a transient error must not
159+
// block triggers. The resolver is purely in-memory (reads
160+
// `Organization.featureFlags`); it adds no DB round-trip to the hot path.
161+
let orgFlagEnabled: boolean;
162+
try {
163+
orgFlagEnabled = await d.resolveOrgFlag(inputs);
164+
} catch (error) {
165+
logger.warn("mollifier.resolve_org_flag_failed", {
166+
envId: inputs.envId,
167+
orgId: inputs.orgId,
168+
taskId: inputs.taskId,
169+
error: error instanceof Error ? error.message : String(error),
170+
});
171+
orgFlagEnabled = false;
172+
}
173+
// Passed to every `recordDecision`. `org` only becomes a label for the
174+
// (operationally capped) enrolled cohort — the guard is in
175+
// `decisionLabels`, so passing orgId unconditionally here is safe.
176+
const labels: RecordDecisionOptions = { enrolled: orgFlagEnabled, orgId: inputs.orgId };
177+
155178
// Debounce bypass. onDebounced is a closure over webapp state and
156179
// can't be snapshotted into the buffer for drainer replay. Skip before the
157180
// trip evaluator so debounce traffic is never counted against the rate.
158181
if (inputs.options?.debounce) {
159-
d.recordDecision("pass_through");
182+
d.recordDecision("pass_through", labels);
160183
return { action: "pass_through" };
161184
}
162185
// OneTimeUseToken bypass. OTU is a security feature on the PUBLIC_JWT
163186
// auth path; its synchronous-rejection contract is materially worse to
164187
// break than the idempotency-key contract.
165188
if (inputs.options?.oneTimeUseToken) {
166-
d.recordDecision("pass_through");
189+
d.recordDecision("pass_through", labels);
167190
return { action: "pass_through" };
168191
}
169192
// Single triggerAndWait bypass. batchTriggerAndWait still funnels
170193
// through TriggerTaskService.call per item so the dominant burst pattern
171194
// remains covered.
172195
if (inputs.options?.parentTaskRunId && inputs.options?.resumeParentOnCompletion) {
173-
d.recordDecision("pass_through");
196+
d.recordDecision("pass_through", labels);
174197
return { action: "pass_through" };
175198
}
176199

177200
if (!d.isMollifierEnabled()) {
178-
d.recordDecision("pass_through");
201+
d.recordDecision("pass_through", labels);
179202
return { action: "pass_through" };
180203
}
181204

182-
// Fail open: a transient DB error resolving the per-org flag must not
183-
// block triggers. Mirror the evaluator's fail-open posture in
184-
// `mollifierTripEvaluator.server.ts`.
185-
let orgFlagEnabled: boolean;
186-
try {
187-
orgFlagEnabled = await d.resolveOrgFlag(inputs);
188-
} catch (error) {
189-
logger.warn("mollifier.resolve_org_flag_failed", {
190-
envId: inputs.envId,
191-
orgId: inputs.orgId,
192-
taskId: inputs.taskId,
193-
error: error instanceof Error ? error.message : String(error),
194-
});
195-
orgFlagEnabled = false;
196-
}
197205
const shadowOn = d.isShadowModeOn();
198206

199207
if (!orgFlagEnabled && !shadowOn) {
200-
d.recordDecision("pass_through");
208+
d.recordDecision("pass_through", labels);
201209
return { action: "pass_through" };
202210
}
203211

@@ -226,17 +234,17 @@ export async function evaluateGate(
226234
decision = { divert: false };
227235
}
228236
if (!decision.divert) {
229-
d.recordDecision("pass_through");
237+
d.recordDecision("pass_through", labels);
230238
return { action: "pass_through" };
231239
}
232240

233241
if (orgFlagEnabled) {
234242
d.logMollified(inputs, decision);
235-
d.recordDecision("mollify", decision.reason);
243+
d.recordDecision("mollify", { ...labels, reason: decision.reason });
236244
return { action: "mollify", decision };
237245
}
238246

239247
d.logShadow(inputs, decision);
240-
d.recordDecision("shadow_log", decision.reason);
248+
d.recordDecision("shadow_log", { ...labels, reason: decision.reason });
241249
return { action: "shadow_log", decision };
242250
}

apps/webapp/app/v3/mollifier/mollifierTelemetry.server.ts

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,39 @@ export const mollifierDecisionsCounter = meter.createCounter("mollifier.decision
99
export type DecisionOutcome = "pass_through" | "shadow_log" | "mollify";
1010
export type DecisionReason = "per_env_rate";
1111

12-
export function recordDecision(outcome: DecisionOutcome, reason?: DecisionReason): void {
13-
mollifierDecisionsCounter.add(1, {
12+
export type RecordDecisionOptions = {
13+
reason?: DecisionReason;
14+
// Whether the org has the per-org mollifier flag enabled. Emitted as the
15+
// bounded `enrolled` label so we can see how often enrolled orgs pass
16+
// through instead of mollifying — the whole point of this instrumentation.
17+
enrolled: boolean;
18+
// Org id, attached as the `org` label ONLY when `enrolled` is true. The
19+
// enrolled cohort is capped operationally (<= 10 orgs), so this stays
20+
// low-cardinality. It must NEVER be attached for non-enrolled orgs — that
21+
// would fan the metric out across every org id in production (unbounded;
22+
// the same high-cardinality ban that keeps envId/orgId off the other
23+
// mollifier metrics). The guard lives in `decisionLabels`, so callers can
24+
// pass orgId unconditionally.
25+
orgId?: string;
26+
};
27+
28+
// Pure: builds the metric label set for a gate decision. Extracted from
29+
// `recordDecision` so the org-only-when-enrolled cardinality guard is
30+
// unit-testable without standing up an OTel meter.
31+
export function decisionLabels(
32+
outcome: DecisionOutcome,
33+
opts: RecordDecisionOptions,
34+
): Record<string, string> {
35+
return {
1436
outcome,
15-
...(reason ? { reason } : {}),
16-
});
37+
enrolled: opts.enrolled ? "true" : "false",
38+
...(opts.reason ? { reason: opts.reason } : {}),
39+
...(opts.enrolled && opts.orgId ? { org: opts.orgId } : {}),
40+
};
41+
}
42+
43+
export function recordDecision(outcome: DecisionOutcome, opts: RecordDecisionOptions): void {
44+
mollifierDecisionsCounter.add(1, decisionLabels(outcome, opts));
1745
}
1846

1947
// Counts subscriptions hitting `/realtime/v1/runs/<id>` for a run that
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import { describe, expect, it } from "vitest";
2+
3+
import { decisionLabels } from "~/v3/mollifier/mollifierTelemetry.server";
4+
5+
// The cardinality guard. `org` is a bounded label (enrolled cohort is capped
6+
// at <= 10 orgs operationally), so it may ONLY be attached when the org is
7+
// enrolled. Attaching it for non-enrolled orgs would fan `mollifier.decisions`
8+
// out across every org id in production — the high-cardinality blow-up these
9+
// labels are explicitly designed to avoid.
10+
describe("decisionLabels", () => {
11+
it("always emits a bounded `enrolled` label (true/false)", () => {
12+
expect(decisionLabels("pass_through", { enrolled: false })).toEqual({
13+
outcome: "pass_through",
14+
enrolled: "false",
15+
});
16+
expect(decisionLabels("pass_through", { enrolled: true, orgId: "org_1" })).toMatchObject({
17+
enrolled: "true",
18+
});
19+
});
20+
21+
it("attaches the `org` label ONLY when enrolled — never for non-enrolled, even if orgId is passed", () => {
22+
// Non-enrolled: orgId passed but MUST be dropped (cardinality guard).
23+
expect(decisionLabels("pass_through", { enrolled: false, orgId: "org_unbounded" })).toEqual({
24+
outcome: "pass_through",
25+
enrolled: "false",
26+
});
27+
28+
// Enrolled: org label present.
29+
expect(
30+
decisionLabels("mollify", { enrolled: true, orgId: "org_1", reason: "per_env_rate" }),
31+
).toEqual({
32+
outcome: "mollify",
33+
enrolled: "true",
34+
reason: "per_env_rate",
35+
org: "org_1",
36+
});
37+
});
38+
39+
it("omits `org` when enrolled but no orgId is supplied", () => {
40+
expect(decisionLabels("pass_through", { enrolled: true })).toEqual({
41+
outcome: "pass_through",
42+
enrolled: "true",
43+
});
44+
});
45+
46+
it("includes `reason` only when supplied", () => {
47+
expect(decisionLabels("pass_through", { enrolled: true, orgId: "org_1" })).not.toHaveProperty(
48+
"reason",
49+
);
50+
expect(
51+
decisionLabels("shadow_log", { enrolled: false, reason: "per_env_rate" }),
52+
).toMatchObject({ reason: "per_env_rate" });
53+
});
54+
});

apps/webapp/test/mollifierGate.test.ts

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,12 @@ type Spies = {
2727
evaluatorCalls: number;
2828
logShadowCalls: Array<{ inputs: GateInputs; decision: Extract<TripDecision, { divert: true }> }>;
2929
logMollifiedCalls: Array<{ inputs: GateInputs; decision: Extract<TripDecision, { divert: true }> }>;
30-
recordDecisionCalls: Array<{ outcome: DecisionOutcome; reason?: DecisionReason }>;
30+
recordDecisionCalls: Array<{
31+
outcome: DecisionOutcome;
32+
reason?: DecisionReason;
33+
enrolled?: boolean;
34+
orgId?: string;
35+
}>;
3136
};
3237

3338
type Toggles = {
@@ -58,8 +63,13 @@ function makeDeps(toggles: Toggles): { deps: GateDependencies; spies: Spies } {
5863
logMollified: (inputs, decision) => {
5964
spies.logMollifiedCalls.push({ inputs, decision });
6065
},
61-
recordDecision: (outcome, reason) => {
62-
spies.recordDecisionCalls.push({ outcome, reason });
66+
recordDecision: (outcome, opts) => {
67+
spies.recordDecisionCalls.push({
68+
outcome,
69+
reason: opts.reason,
70+
enrolled: opts.enrolled,
71+
orgId: opts.orgId,
72+
});
6373
},
6474
};
6575
return { deps, spies };
@@ -152,6 +162,12 @@ describe("evaluateGate cascade — exhaustive truth table", () => {
152162
expect(spies.recordDecisionCalls).toHaveLength(1);
153163
expect(spies.recordDecisionCalls[0].outcome).toBe(row.expected.recordedOutcome);
154164
expect(spies.recordDecisionCalls[0].reason).toBe(row.expected.expectedReason);
165+
// enrolled label = the resolved per-org flag, now hoisted above the
166+
// bypasses so it's set on every decision. orgId is always passed by the
167+
// gate; the telemetry layer drops it for non-enrolled (covered in
168+
// mollifierDecisionLabels.test.ts).
169+
expect(spies.recordDecisionCalls[0].enrolled).toBe(row.flag);
170+
expect(spies.recordDecisionCalls[0].orgId).toBe(inputs.orgId);
155171
},
156172
);
157173

@@ -254,8 +270,13 @@ describe("evaluateGate — fail open on evaluator error", () => {
254270
logMollified: (inputs, decision) => {
255271
spies.logMollifiedCalls.push({ inputs, decision });
256272
},
257-
recordDecision: (outcome, reason) => {
258-
spies.recordDecisionCalls.push({ outcome, reason });
273+
recordDecision: (outcome, opts) => {
274+
spies.recordDecisionCalls.push({
275+
outcome,
276+
reason: opts.reason,
277+
enrolled: opts.enrolled,
278+
orgId: opts.orgId,
279+
});
259280
},
260281
};
261282

@@ -265,7 +286,13 @@ describe("evaluateGate — fail open on evaluator error", () => {
265286
expect(spies.evaluatorCalls).toBe(1);
266287
expect(spies.logMollifiedCalls).toHaveLength(0);
267288
expect(spies.logShadowCalls).toHaveLength(0);
268-
expect(spies.recordDecisionCalls).toEqual([{ outcome: "pass_through", reason: undefined }]);
289+
expect(spies.recordDecisionCalls).toHaveLength(1);
290+
expect(spies.recordDecisionCalls[0]).toMatchObject({
291+
outcome: "pass_through",
292+
reason: undefined,
293+
enrolled: true,
294+
orgId: inputs.orgId,
295+
});
269296
});
270297
});
271298

@@ -293,16 +320,27 @@ describe("evaluateGate — fail open on resolveOrgFlag error", () => {
293320
logMollified: (inputs, decision) => {
294321
spies.logMollifiedCalls.push({ inputs, decision });
295322
},
296-
recordDecision: (outcome, reason) => {
297-
spies.recordDecisionCalls.push({ outcome, reason });
323+
recordDecision: (outcome, opts) => {
324+
spies.recordDecisionCalls.push({
325+
outcome,
326+
reason: opts.reason,
327+
enrolled: opts.enrolled,
328+
orgId: opts.orgId,
329+
});
298330
},
299331
};
300332

301333
const outcome = await evaluateGate(inputs, deps);
302334

303335
expect(outcome.action).toBe("pass_through");
304336
expect(spies.evaluatorCalls).toBe(0);
305-
expect(spies.recordDecisionCalls).toEqual([{ outcome: "pass_through", reason: undefined }]);
337+
expect(spies.recordDecisionCalls).toHaveLength(1);
338+
expect(spies.recordDecisionCalls[0]).toMatchObject({
339+
outcome: "pass_through",
340+
reason: undefined,
341+
enrolled: false,
342+
orgId: inputs.orgId,
343+
});
306344
});
307345
});
308346

@@ -333,8 +371,13 @@ describe("evaluateGate — per-org isolation via Organization.featureFlags", ()
333371
logMollified: (inputs, decision) => {
334372
spies.logMollifiedCalls.push({ inputs, decision });
335373
},
336-
recordDecision: (outcome, reason) => {
337-
spies.recordDecisionCalls.push({ outcome, reason });
374+
recordDecision: (outcome, opts) => {
375+
spies.recordDecisionCalls.push({
376+
outcome,
377+
reason: opts.reason,
378+
enrolled: opts.enrolled,
379+
orgId: opts.orgId,
380+
});
338381
},
339382
};
340383
return { deps, spies };

0 commit comments

Comments
 (0)