diff --git a/.eslintignore b/.eslintignore
new file mode 100644
index 00000000..513a873e
--- /dev/null
+++ b/.eslintignore
@@ -0,0 +1,3 @@
+# Stale agent worktrees produced by parallel Claude Code sessions — they
+# hold their own branches and are linted as part of their own runs.
+.claude/worktrees/
diff --git a/.oxlintrc.json b/.oxlintrc.json
index 3e2ccf26..6158a462 100644
--- a/.oxlintrc.json
+++ b/.oxlintrc.json
@@ -28,6 +28,7 @@
     "no-undef": "off",
     "no-underscore-dangle": "off",
     "no-useless-undefined": "off",
+    "require-unicode-regexp": "off",
     "no-warning-comments": "off",
     "prefer-destructuring": "off",
     "sort-imports": "off",
diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts
index 7e300f45..152e3f98 100644
--- a/packages/app/cypress/support/mock-data.ts
+++ b/packages/app/cypress/support/mock-data.ts
@@ -189,10 +189,14 @@ export function createMockInferenceContext(
     workflowInfo: null,
     selectedYAxisMetric: 'y_tpPerGpu',
     setSelectedYAxisMetric: namedStub('setSelectedYAxisMetric'),
+    selectedPercentile: 'p90',
+    setSelectedPercentile: namedStub('setSelectedPercentile'),
     selectedXAxisMetric: null,
     setSelectedXAxisMetric: namedStub('setSelectedXAxisMetric'),
     selectedE2eXAxisMetric: null,
     setSelectedE2eXAxisMetric: namedStub('setSelectedE2eXAxisMetric'),
+    selectedXAxisMode: 'interactivity' as const,
+    setSelectedXAxisMode: namedStub('setSelectedXAxisMode'),
     scaleType: 'auto',
     setScaleType: namedStub('setScaleType'),
     isLegendExpanded: true,
diff --git a/packages/app/src/app/(dashboard)/inference/agentic/[id]/page.tsx b/packages/app/src/app/(dashboard)/inference/agentic/[id]/page.tsx
new file mode 100644
index 00000000..77f29805
--- /dev/null
+++ b/packages/app/src/app/(dashboard)/inference/agentic/[id]/page.tsx
@@ -0,0 +1,17 @@
+import type { Metadata } from 'next';
+
+import { AgenticPointDetail } from '@/components/inference/agentic-point/agentic-point-detail';
+
+export const metadata: Metadata = {
+  title: 'Agentic trace detail | InferenceX',
+  robots: { index: false },
+};
+
+export default async function AgenticPointDetailPage({
+  params,
+}: {
+  params: Promise<{ id: string }>;
+}) {
+  const { id } = await params;
+  return <AgenticPointDetail id={Number(id)} />;
+}
diff --git a/packages/app/src/app/api/unofficial-run/route.ts b/packages/app/src/app/api/unofficial-run/route.ts
index a90e26fc..3d2d0da7 100644
--- a/packages/app/src/app/api/unofficial-run/route.ts
+++ b/packages/app/src/app/api/unofficial-run/route.ts
@@ -33,6 +33,10 @@ export function normalizeArtifactRows(
     if (!params) continue;
     const { config } = params;
     results.push({
+      // Synthetic id — overlay rows aren't persisted, so trace_replay lookups
+      // (keyed on benchmark_results.id) will always miss, which is the
+      // intended behaviour: overlays never have stored trace_replay blobs.
+      id: 0,
       hardware: config.hardware,
       framework: config.framework,
       model: config.model,
@@ -50,6 +54,8 @@ export function normalizeArtifactRows(
       decode_num_workers: config.decodeNumWorkers,
       num_prefill_gpu: config.numPrefillGpu,
       num_decode_gpu: config.numDecodeGpu,
+      benchmark_type: params.benchmarkType,
+      offload_mode: params.offloadMode,
       isl: params.isl,
       osl: params.osl,
       conc: params.conc,
diff --git a/packages/app/src/app/api/v1/agentic-aggregates/route.ts b/packages/app/src/app/api/v1/agentic-aggregates/route.ts
new file mode 100644
index 00000000..63cb2dc0
--- /dev/null
+++ b/packages/app/src/app/api/v1/agentic-aggregates/route.ts
@@ -0,0 +1,64 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getAgenticAggregates,
+  type AgenticAggregateMap,
+} from '@semianalysisai/inferencex-db/queries/agentic-aggregates';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+// blobOnly: response stays small (a few numbers per id), but generating it
+// parses ~5-10 MB of decompressed JSONL + JSON per id. Cache so the
+// "Aggregates" toggle stays snappy.
+const getCachedAgenticAggregates = cachedQuery(
+  (ids: number[]): Promise<AgenticAggregateMap> => getAgenticAggregates(getDb(), ids),
+  'agentic-aggregates',
+  { blobOnly: true },
+);
+
+const MAX_IDS_PER_REQUEST = 200;
+
+/**
+ * GET /api/v1/agentic-aggregates?ids=1,2,3
+ *
+ * Returns per-id mean/p50/p75/p90/p99 for ISL, OSL, KV cache utilization,
+ * and prefix cache hit rate — computed live from the stored aiperf
+ * profile_export.jsonl + server_metrics_json blobs. Ids without a
+ * trace_replay blob (or with no usable samples) get nulls.
+ */
+export async function GET(request: NextRequest) {
+  const raw = request.nextUrl.searchParams.get('ids');
+  if (!raw) {
+    return NextResponse.json({ error: 'ids query param is required' }, { status: 400 });
+  }
+
+  const ids = [
+    ...new Set(
+      raw
+        .split(',')
+        .map((s) => Number(s.trim()))
+        .filter((n) => Number.isFinite(n) && n > 0),
+    ),
+  ];
+  if (ids.length === 0) {
+    return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 });
+  }
+  if (ids.length > MAX_IDS_PER_REQUEST) {
+    return NextResponse.json(
+      { error: `too many ids (max ${MAX_IDS_PER_REQUEST})` },
+      { status: 400 },
+    );
+  }
+
+  try {
+    const sorted = [...ids].toSorted((a, b) => a - b);
+    const result = await getCachedAgenticAggregates(sorted);
+    return cachedJson(result);
+  } catch (error) {
+    console.error('Error fetching agentic aggregates:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}
diff --git a/packages/app/src/app/api/v1/benchmark-siblings/route.ts b/packages/app/src/app/api/v1/benchmark-siblings/route.ts
new file mode 100644
index 00000000..14c1d461
--- /dev/null
+++ b/packages/app/src/app/api/v1/benchmark-siblings/route.ts
@@ -0,0 +1,38 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getBenchmarkSiblings,
+  type BenchmarkSiblings,
+} from '@semianalysisai/inferencex-db/queries/benchmark-siblings';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+const getCachedSiblings = cachedQuery(
+  (id: number): Promise<BenchmarkSiblings | null> => getBenchmarkSiblings(getDb(), id),
+  'benchmark-siblings',
+);
+
+/**
+ * GET /api/v1/benchmark-siblings?id=N
+ *
+ * Returns the SKU (hw/framework/model/precision/spec/benchmark_type) of the
+ * benchmark_result + all sibling rows that share that SKU within the same
+ * workflow_run. Used by the agentic detail page to render a navigator.
+ */
+export async function GET(request: NextRequest) {
+  const id = Number(request.nextUrl.searchParams.get('id'));
+  if (!id || !Number.isFinite(id)) {
+    return NextResponse.json({ error: 'id is required (benchmark_result_id)' }, { status: 400 });
+  }
+  try {
+    const data = await getCachedSiblings(id);
+    if (!data) return NextResponse.json({ error: 'Not found' }, { status: 404 });
+    return cachedJson(data);
+  } catch (error) {
+    console.error('Error fetching benchmark siblings:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}
diff --git a/packages/app/src/app/api/v1/benchmarks/route.test.ts b/packages/app/src/app/api/v1/benchmarks/route.test.ts
index 780f775e..92d5f326 100644
--- a/packages/app/src/app/api/v1/benchmarks/route.test.ts
+++ b/packages/app/src/app/api/v1/benchmarks/route.test.ts
@@ -59,6 +59,7 @@ describe('GET /api/v1/benchmarks', () => {
       ['dsr1'],
       undefined,
       undefined,
+      undefined,
     );
   });
 
@@ -72,6 +73,7 @@ describe('GET /api/v1/benchmarks', () => {
       ['dsr1'],
       '2026-03-01',
       undefined,
+      undefined,
     );
   });
 
@@ -82,7 +84,27 @@ describe('GET /api/v1/benchmarks', () => {
       req('/api/v1/benchmarks?model=DeepSeek-R1-0528&date=2026-03-01&exact=true'),
     );
     expect(res.status).toBe(200);
-    expect(mockGetLatestBenchmarks).toHaveBeenCalledWith('mock-sql', ['dsr1'], '2026-03-01', true);
+    expect(mockGetLatestBenchmarks).toHaveBeenCalledWith(
+      'mock-sql',
+      ['dsr1'],
+      '2026-03-01',
+      true,
+      undefined,
+    );
+  });
+
+  it('passes runId param to query when provided', async () => {
+    mockGetLatestBenchmarks.mockResolvedValueOnce([]);
+
+    const res = await GET(req('/api/v1/benchmarks?model=DeepSeek-R1-0528&runId=26194160120'));
+    expect(res.status).toBe(200);
+    expect(mockGetLatestBenchmarks).toHaveBeenCalledWith(
+      'mock-sql',
+      ['dsr1'],
+      undefined,
+      undefined,
+      '26194160120',
+    );
   });
 
   it('returns 500 when query throws', async () => {
diff --git a/packages/app/src/app/api/v1/benchmarks/route.ts b/packages/app/src/app/api/v1/benchmarks/route.ts
index c79f1aa7..c4037208 100644
--- a/packages/app/src/app/api/v1/benchmarks/route.ts
+++ b/packages/app/src/app/api/v1/benchmarks/route.ts
@@ -11,10 +11,10 @@ import { loadFixture } from '@/lib/test-fixtures';
 export const dynamic = 'force-dynamic';
 
 const getCachedBenchmarks = cachedQuery(
-  (dbModelKeys: string[], date?: string, exact?: boolean) => {
+  (dbModelKeys: string[], date?: string, exact?: boolean, runId?: string) => {
     if (JSON_MODE)
       return Promise.resolve(jsonProvider.getLatestBenchmarks(dbModelKeys, date, exact));
-    return getLatestBenchmarks(getDb(), dbModelKeys, date, exact);
+    return getLatestBenchmarks(getDb(), dbModelKeys, date, exact, runId);
   },
   'benchmarks',
   { blobOnly: true },
@@ -25,6 +25,7 @@ export async function GET(request: NextRequest) {
   const model = params.get('model') ?? '';
   const date = params.get('date') ?? undefined;
   const exact = params.get('exact') === 'true';
+  const runId = params.get('runId') ?? undefined;
   const dbModelKeys = DISPLAY_MODEL_TO_DB[model];
   if (!dbModelKeys || dbModelKeys.length === 0) {
     return NextResponse.json({ error: 'Unknown model' }, { status: 400 });
@@ -32,7 +33,7 @@ export async function GET(request: NextRequest) {
   if (FIXTURES_MODE) return cachedJson(loadFixture('benchmarks'));
 
   try {
-    const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined);
+    const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined, runId);
     return cachedJson(rows);
   } catch (error) {
     console.error('Error fetching benchmarks:', error);
diff --git a/packages/app/src/app/api/v1/derived-agentic-metrics/route.ts b/packages/app/src/app/api/v1/derived-agentic-metrics/route.ts
new file mode 100644
index 00000000..6ce7c017
--- /dev/null
+++ b/packages/app/src/app/api/v1/derived-agentic-metrics/route.ts
@@ -0,0 +1,71 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getDerivedAgenticMetrics,
+  type DerivedAgenticMetricMap,
+} from '@semianalysisai/inferencex-db/queries/derived-agentic-metrics';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+// blobOnly: the response is one entry per id with two numbers, but the
+// derivation work parses thousands of JSONL records per blob — cache the
+// computed result so a chart-refresh hits the warm path.
+// Bumped to v2 when mean_p90_prefill_tps_per_user → p90_prefill_tps_per_user.
+// Stale v1 cache entries return undefined for the new field and silently
+// blank the chart with "No data available".
+const getCachedDerivedAgenticMetrics = cachedQuery(
+  (ids: number[]): Promise<DerivedAgenticMetricMap> => getDerivedAgenticMetrics(getDb(), ids),
+  'derived-agentic-metrics-v2',
+  { blobOnly: true },
+);
+
+const MAX_IDS_PER_REQUEST = 200;
+
+/**
+ * GET /api/v1/derived-agentic-metrics?ids=1,2,3
+ *
+ * Returns per-id derived metrics computed live from the stored aiperf
+ * profile_export.jsonl blobs:
+ *  - normalized_session_time_s: mean across sessions of session e2e time
+ *    (Σ per-turn request_latency) rescaled by mean_load / session_load.
+ *  - p90_prefill_tps_per_user: P90 of per-turn prefill TPS/user (ISL / TTFT)
+ *    across every turn in every session.
+ *
+ * Ids without a trace_replay blob or with unparseable records are omitted.
+ */
+export async function GET(request: NextRequest) {
+  const raw = request.nextUrl.searchParams.get('ids');
+  if (!raw) {
+    return NextResponse.json({ error: 'ids query param is required' }, { status: 400 });
+  }
+
+  const ids = [
+    ...new Set(
+      raw
+        .split(',')
+        .map((s) => Number(s.trim()))
+        .filter((n) => Number.isFinite(n) && n > 0),
+    ),
+  ];
+  if (ids.length === 0) {
+    return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 });
+  }
+  if (ids.length > MAX_IDS_PER_REQUEST) {
+    return NextResponse.json(
+      { error: `too many ids (max ${MAX_IDS_PER_REQUEST})` },
+      { status: 400 },
+    );
+  }
+
+  try {
+    const sorted = [...ids].toSorted((a, b) => a - b);
+    const result = await getCachedDerivedAgenticMetrics(sorted);
+    return cachedJson(result);
+  } catch (error) {
+    console.error('Error fetching derived agentic metrics:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}
diff --git a/packages/app/src/app/api/v1/request-timeline/route.ts b/packages/app/src/app/api/v1/request-timeline/route.ts
new file mode 100644
index 00000000..6c884fb2
--- /dev/null
+++ b/packages/app/src/app/api/v1/request-timeline/route.ts
@@ -0,0 +1,40 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getRequestTimeline,
+  type RequestTimeline,
+} from '@semianalysisai/inferencex-db/queries/request-timeline';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+const getCachedRequestTimeline = cachedQuery(
+  (id: number): Promise<RequestTimeline | null> => getRequestTimeline(getDb(), id),
+  'request-timeline',
+  { blobOnly: true },
+);
+
+/**
+ * GET /api/v1/request-timeline?id=N
+ *
+ * Returns the per-request Gantt timeline for one agentic benchmark point.
+ * Each request entry has ns-from-start offsets for credit/start/ack/end,
+ * plus TTFT, ISL, OSL, conversation id, turn index, worker id. 404 if the
+ * point has no stored profile_export.jsonl blob.
+ */
+export async function GET(request: NextRequest) {
+  const id = Number(request.nextUrl.searchParams.get('id'));
+  if (!id || !Number.isFinite(id)) {
+    return NextResponse.json({ error: 'id is required (benchmark_result_id)' }, { status: 400 });
+  }
+  try {
+    const data = await getCachedRequestTimeline(id);
+    if (!data) return NextResponse.json({ error: 'Not found' }, { status: 404 });
+    return cachedJson(data);
+  } catch (error) {
+    console.error('Error fetching request timeline:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}
diff --git a/packages/app/src/app/api/v1/trace-availability/route.ts b/packages/app/src/app/api/v1/trace-availability/route.ts
new file mode 100644
index 00000000..2484ceaf
--- /dev/null
+++ b/packages/app/src/app/api/v1/trace-availability/route.ts
@@ -0,0 +1,59 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getTraceAvailability,
+  type TraceAvailabilityMap,
+} from '@semianalysisai/inferencex-db/queries/trace-availability';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+const getCachedTraceAvailability = cachedQuery(
+  (ids: number[]): Promise<TraceAvailabilityMap> => getTraceAvailability(getDb(), ids),
+  'trace-availability',
+);
+
+const MAX_IDS_PER_REQUEST = 500;
+
+/**
+ * GET /api/v1/trace-availability?ids=1,2,3
+ *
+ * Returns `{[id]: true}` for ids that have a stored trace_replay blob.
+ * Lightweight presence check used by the scatter tooltip to decide whether
+ * to render the "View charts" button — see queries/trace-availability.ts.
+ */
+export async function GET(request: NextRequest) {
+  const raw = request.nextUrl.searchParams.get('ids');
+  if (!raw) {
+    return NextResponse.json({ error: 'ids query param is required' }, { status: 400 });
+  }
+
+  const ids = [
+    ...new Set(
+      raw
+        .split(',')
+        .map((s) => Number(s.trim()))
+        .filter((n) => Number.isFinite(n) && n > 0),
+    ),
+  ];
+  if (ids.length === 0) {
+    return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 });
+  }
+  if (ids.length > MAX_IDS_PER_REQUEST) {
+    return NextResponse.json(
+      { error: `too many ids (max ${MAX_IDS_PER_REQUEST})` },
+      { status: 400 },
+    );
+  }
+
+  try {
+    const sorted = [...ids].toSorted((a, b) => a - b);
+    const availability = await getCachedTraceAvailability(sorted);
+    return cachedJson(availability);
+  } catch (error) {
+    console.error('Error fetching trace availability:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}
diff --git a/packages/app/src/app/api/v1/trace-histograms/route.ts b/packages/app/src/app/api/v1/trace-histograms/route.ts
new file mode 100644
index 00000000..7a959a65
--- /dev/null
+++ b/packages/app/src/app/api/v1/trace-histograms/route.ts
@@ -0,0 +1,65 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getTraceHistograms,
+  type TraceHistogramMap,
+} from '@semianalysisai/inferencex-db/queries/trace-histograms';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+// blobOnly: a 50-id histogram payload can easily exceed Next.js's 2MB
+// unstable_cache limit (each point carries one int per request, ~500-1000+
+// requests for agentic), which manifests as a 500 from the route. Blob
+// storage lets us cache the larger response without losing the warm-cache hit.
+const getCachedTraceHistograms = cachedQuery(
+  (ids: number[]): Promise<TraceHistogramMap> => getTraceHistograms(getDb(), ids),
+  'trace-histograms',
+  { blobOnly: true },
+);
+
+const MAX_IDS_PER_REQUEST = 200;
+
+/**
+ * GET /api/v1/trace-histograms?ids=1,2,3
+ *
+ * Returns per-request ISL/OSL arrays parsed from the stored aiperf
+ * `profile_export.jsonl` blobs, keyed by `benchmark_results.id`.
+ * Ids without a trace_replay blob are omitted from the response.
+ */
+export async function GET(request: NextRequest) {
+  const raw = request.nextUrl.searchParams.get('ids');
+  if (!raw) {
+    return NextResponse.json({ error: 'ids query param is required' }, { status: 400 });
+  }
+
+  const ids = [
+    ...new Set(
+      raw
+        .split(',')
+        .map((s) => Number(s.trim()))
+        .filter((n) => Number.isFinite(n) && n > 0),
+    ),
+  ];
+  if (ids.length === 0) {
+    return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 });
+  }
+  if (ids.length > MAX_IDS_PER_REQUEST) {
+    return NextResponse.json(
+      { error: `too many ids (max ${MAX_IDS_PER_REQUEST})` },
+      { status: 400 },
+    );
+  }
+
+  try {
+    // Sort the cache key so the same set of ids in any order hits the same entry.
+    const sorted = [...ids].toSorted((a, b) => a - b);
+    const histograms = await getCachedTraceHistograms(sorted);
+    return cachedJson(histograms);
+  } catch (error) {
+    console.error('Error fetching trace histograms:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}
diff --git a/packages/app/src/app/api/v1/trace-server-metrics/route.ts b/packages/app/src/app/api/v1/trace-server-metrics/route.ts
new file mode 100644
index 00000000..7346a3e8
--- /dev/null
+++ b/packages/app/src/app/api/v1/trace-server-metrics/route.ts
@@ -0,0 +1,40 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getTraceServerMetrics,
+  type TraceServerMetrics,
+} from '@semianalysisai/inferencex-db/queries/trace-server-metrics';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+const getCachedTraceServerMetrics = cachedQuery(
+  (id: number): Promise<TraceServerMetrics | null> => getTraceServerMetrics(getDb(), id),
+  'trace-server-metrics',
+  { blobOnly: true },
+);
+
+/**
+ * GET /api/v1/trace-server-metrics?id=N
+ *
+ * Returns parsed time-series for the agentic detail view: KV cache usage,
+ * prefix cache hit rate per interval, queue depth, and per-source prompt
+ * token rates. Times are in seconds from benchmark start. 404 if the point
+ * has no stored server_metrics_export.json blob.
+ */
+export async function GET(request: NextRequest) {
+  const id = Number(request.nextUrl.searchParams.get('id'));
+  if (!id || !Number.isFinite(id)) {
+    return NextResponse.json({ error: 'id is required (benchmark_result_id)' }, { status: 400 });
+  }
+  try {
+    const data = await getCachedTraceServerMetrics(id);
+    if (!data) return NextResponse.json({ error: 'Not found' }, { status: 404 });
+    return cachedJson(data);
+  } catch (error) {
+    console.error('Error fetching trace server metrics:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}
diff --git a/packages/app/src/components/GlobalFilterContext.tsx b/packages/app/src/components/GlobalFilterContext.tsx
index d2a171ee..2dd40e0c 100644
--- a/packages/app/src/components/GlobalFilterContext.tsx
+++ b/packages/app/src/components/GlobalFilterContext.tsx
@@ -12,6 +12,8 @@ import {
   useState,
 } from 'react';
 
+import { DISPLAY_MODEL_TO_DB, rowToSequence } from '@semianalysisai/inferencex-constants';
+
 // useLayoutEffect warns during SSR; alias to useEffect on the server (no-op there anyway).
 const useIsomorphicLayoutEffect = typeof window === 'undefined' ? useEffect : useLayoutEffect;
 
@@ -22,8 +24,6 @@ function isEnumValue<T extends Record<string, string>>(e: T, v: string): v is T[
 const RUNDATE_RE = /^\d{4}-\d{2}-\d{2}$/u;
 const RUNID_RE = /^[A-Za-z0-9_-]{1,64}$/u;
 
-import { DISPLAY_MODEL_TO_DB, islOslToSequence } from '@semianalysisai/inferencex-constants';
-
 import { useAvailability } from '@/hooks/api/use-availability';
 import { useWorkflowInfo } from '@/hooks/api/use-workflow-info';
 import { useUrlState } from '@/hooks/useUrlState';
@@ -99,7 +99,9 @@ function buildRunInfo(data: WorkflowInfoResponse): Record<string, RunInfo> {
   const runs: Record<string, RunInfo> = {};
   for (const run of data.runs) {
     const runId = String(run.github_run_id);
-    const runChangelogs = data.changelogs.filter((c) => c.workflow_run_id === run.github_run_id);
+    const runChangelogs = data.changelogs.filter(
+      (c) => String(c.workflow_run_id) === String(run.github_run_id),
+    );
     runs[runId] = {
       runId,
       runDate: run.created_at,
@@ -146,7 +148,11 @@ export function GlobalFilterProvider({
 
   const [selectedSequence, setSelectedSequence] = useState<Sequence>(() => {
     if (initialSequence) return initialSequence;
-    return Sequence.EightK_OneK;
+    const urlSeq = getUrlParam('i_seq');
+    if (urlSeq && Object.values(Sequence).includes(urlSeq as Sequence)) return urlSeq as Sequence;
+    // Prefer Agentic Traces by default when the selected model has it; the
+    // effectiveSequence fallback below handles models without agentic data.
+    return Sequence.AgenticTraces;
   });
 
   const [selectedPrecisions, setSelectedPrecisionsRaw] = useState<string[]>(() => {
@@ -267,9 +273,7 @@ export function GlobalFilterProvider({
     if (!availabilityRows) {
       return unofficialSeqs.length > 0 ? [...new Set(unofficialSeqs)] : SEQUENCE_OPTIONS;
     }
-    const dbSeqs = modelRows
-      .map((r) => islOslToSequence(r.isl, r.osl))
-      .filter((s): s is Sequence => s !== null);
+    const dbSeqs = modelRows.map((r) => rowToSequence(r)).filter((s): s is Sequence => s !== null);
     const merged = [...new Set([...dbSeqs, ...unofficialSeqs])];
     return merged.length > 0 ? merged : SEQUENCE_OPTIONS;
   }, [availabilityRows, modelRows, unofficialAvailable, selectedModel]);
@@ -288,7 +292,7 @@ export function GlobalFilterProvider({
     if (!availabilityRows) {
       return unofficialPrecs.length > 0 ? [...new Set(unofficialPrecs)].toSorted() : ['fp4'];
     }
-    const rows = modelRows.filter((r) => islOslToSequence(r.isl, r.osl) === effectiveSequence);
+    const rows = modelRows.filter((r) => rowToSequence(r) === effectiveSequence);
     const dbPrecs = rows.map((r) => r.precision);
     const merged = [...new Set([...dbPrecs, ...unofficialPrecs])].toSorted();
     return merged.length > 0 ? merged : ['fp4'];
@@ -304,7 +308,7 @@ export function GlobalFilterProvider({
   // Dates available for selected model + sequence + precisions
   const availableDates = useMemo(() => {
     if (!availabilityRows) return [];
-    const seqRows = modelRows.filter((r) => islOslToSequence(r.isl, r.osl) === effectiveSequence);
+    const seqRows = modelRows.filter((r) => rowToSequence(r) === effectiveSequence);
     const rows = seqRows.filter((r) => effectivePrecisions.includes(r.precision));
     if (rows.length === 0) {
       return [...new Set(seqRows.map((r) => r.date))].toSorted();
diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx
index 17ce37b5..244c713c 100644
--- a/packages/app/src/components/inference/InferenceContext.tsx
+++ b/packages/app/src/components/inference/InferenceContext.tsx
@@ -11,7 +11,7 @@ import {
   useState,
 } from 'react';
 
-import { DISPLAY_MODEL_TO_DB, islOslToSequence } from '@semianalysisai/inferencex-constants';
+import { DISPLAY_MODEL_TO_DB, rowToSequence } from '@semianalysisai/inferencex-constants';
 import { track } from '@/lib/analytics';
 import {
   FAVORITE_PRESETS,
@@ -43,7 +43,7 @@ import {
 import { useUrlState } from '@/hooks/useUrlState';
 import { buildAvailabilityHwKey } from '@/lib/chart-utils';
 import { getHardwareConfig, getModelSortIndex, isKnownGpu, TABLEAU_10 } from '@/lib/constants';
-import { hasMtpEngineExclusion, MODEL_PREFIX_MAPPING } from '@/lib/data-mappings';
+import { hasMtpEngineExclusion, MODEL_PREFIX_MAPPING, sequenceKind } from '@/lib/data-mappings';
 import {
   MtpEngineConflictToast,
   type MtpEngineConflictDetail,
@@ -128,10 +128,51 @@ export function InferenceProvider({
     () => getUrlParam('i_metric') || 'y_tpPerGpu',
   );
   const [selectedXAxisMetric, setSelectedXAxisMetric] = useState<string | null>(
-    () => getUrlParam('i_xmetric') || 'p99_ttft',
+    () => getUrlParam('i_xmetric') || 'p90_ttft',
   );
   const [selectedE2eXAxisMetric, setSelectedE2eXAxisMetric] = useState<string | null>(
-    () => getUrlParam('i_e2e_xmetric') || null,
+    () => getUrlParam('i_e2e_xmetric') || 'p90_ttft',
+  );
+  // Selected chart variant. Initialize from URL only — SSR cannot read URL, so
+  // computing a kind-based default here would diverge between server and client
+  // and cause a hydration mismatch. The scenario-kind default is applied in a
+  // post-mount effect below (and a ref tracks whether the user has overridden).
+  type XAxisMode = 'ttft' | 'e2e' | 'interactivity' | 'session-time' | 'prefill-tps';
+  const VALID_X_MODES: XAxisMode[] = [
+    'ttft',
+    'e2e',
+    'interactivity',
+    'session-time',
+    'prefill-tps',
+  ];
+  // SSR has no URL access, so seed with a fixed default and apply the URL
+  // value (if any) in a post-mount effect — keeps server + client first render
+  // identical and avoids "didn't match" hydration warnings when the URL holds
+  // a non-default mode.
+  const [selectedXAxisMode, setSelectedXAxisMode] = useState<XAxisMode>('ttft');
+  const xAxisModeFromUrlRef = useRef(false);
+  useEffect(() => {
+    if (xAxisModeFromUrlRef.current) return;
+    const v = getUrlParam('i_xmode');
+    if (v && (VALID_X_MODES as string[]).includes(v)) {
+      xAxisModeFromUrlRef.current = true;
+      setSelectedXAxisMode(v as XAxisMode);
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+  // Wrap the setter so a button click also aligns selectedE2eXAxisMetric — the
+  // existing useChartData pipeline keys off that flag for the e2e chart's x-axis.
+  const handleSetXAxisMode = useCallback((mode: XAxisMode) => {
+    xAxisModeFromUrlRef.current = true;
+    setSelectedXAxisMode(mode);
+    // The e2e chart's x-axis metric is reconciled in a separate effect below,
+    // because it depends on sequence kind (fixed-seq has no p90_* metrics) and
+    // the agentic percentile, both of which can change independently.
+  }, []);
+  // Latency percentile applied to the chart x-axis for agentic scenarios.
+  // Values: 'p90' | 'p99'. Non-agentic charts ignore.
+  const [selectedPercentile, setSelectedPercentile] = useState<string>(
+    () => getUrlParam('i_pctl') || 'p90',
   );
   const [scaleType, setScaleType] = useState<'auto' | 'linear' | 'log'>(
     () => (getUrlParam('i_scale') as 'auto' | 'linear' | 'log') || 'auto',
@@ -188,6 +229,56 @@ export function InferenceProvider({
   // ── Data fetching (gated by isActive) ──────────────────────────────────────
   const latestDate = availableDates.length > 0 ? availableDates.at(-1) : undefined;
 
+  // Run-selector scoping: only constrain benchmark data to a specific run when
+  // there's actually a disambiguation to make for the CURRENT model. The
+  // raw `availableRuns` is across ALL models on the date, so the picker may
+  // auto-select a run that produced nothing for the current model — passing
+  // that runId would return zero rows and hide the chart entirely.
+  // Compute the set of runs whose CHANGELOG explicitly mentions this model +
+  // precision. We can't reuse `filterRunsByModel` here because it has a
+  // fallback that returns all runs when nothing matches (so the picker still
+  // renders) — which would make us pass a runId that produced no rows for
+  // the current model, hiding the chart.
+  const modelPrefixesEarly = Object.entries(MODEL_PREFIX_MAPPING)
+    .filter(([, model]) => model === selectedModel)
+    .map(([prefix]) => prefix);
+  // Map each FULL config_key (model-precision-hardware-framework) a run's
+  // changelog claims to the set of runs claiming it. Single-run scoping should
+  // only kick in when two runs contest the SAME full key — e.g. a same-day
+  // re-run of one hardware — because then a DISTINCT ON merge could mix them
+  // and the user needs to pick which run wins. Runs covering DIFFERENT hardware
+  // of the same model (e.g. a B300 run and a B200 run on the same date) are
+  // complementary: both must render via carry-forward. Matching on model+
+  // precision alone (the old behavior) wrongly treated those as alternatives
+  // and scoped the chart to one run, hiding the other GPU's curve.
+  const runsByConfigKey = new Map<string, Set<string>>();
+  if (availableRuns) {
+    for (const [runId, runInfo] of Object.entries(availableRuns)) {
+      if (!runInfo.changelog) continue;
+      for (const entry of runInfo.changelog.entries) {
+        for (const key of entry.config_keys) {
+          const parts = key.split('-');
+          if (modelPrefixesEarly.includes(parts[0]!) && effectivePrecisions.includes(parts[1]!)) {
+            let runs = runsByConfigKey.get(key);
+            if (!runs) {
+              runs = new Set<string>();
+              runsByConfigKey.set(key, runs);
+            }
+            runs.add(runId);
+          }
+        }
+      }
+    }
+  }
+  // A run is "contested" only if some full config_key it claims is also claimed
+  // by another run. Only then does picking a run disambiguate anything.
+  const contestedRunIds = new Set<string>();
+  for (const runs of runsByConfigKey.values()) {
+    if (runs.size > 1) for (const r of runs) contestedRunIds.add(r);
+  }
+  const benchmarkRunId =
+    selectedRunId && contestedRunIds.has(String(selectedRunId)) ? String(selectedRunId) : undefined;
+
   const {
     graphs,
     loading: chartDataLoading,
@@ -208,7 +299,10 @@ export function InferenceProvider({
     effectiveRunDate,
     isActive,
     latestDate,
+    selectedPercentile,
     compareGpuPair ?? null,
+    benchmarkRunId,
+    selectedXAxisMode,
   );
 
   // For GPU comparison date picker — use shared availability data from global filters
@@ -222,7 +316,7 @@ export function InferenceProvider({
     if (!availabilityRows) return availableDates;
     const rows = availabilityRows.filter((r) => {
       if (!dbModelKeys.includes(r.model)) return false;
-      if (islOslToSequence(r.isl, r.osl) !== effectiveSequence) return false;
+      if (rowToSequence(r) !== effectiveSequence) return false;
       if (!effectivePrecisions.includes(r.precision)) return false;
       if (!r.hardware) return false;
       const hwKey = buildAvailabilityHwKey(r.hardware, r.framework, r.spec_method, r.disagg);
@@ -247,7 +341,7 @@ export function InferenceProvider({
     const hwKeys = new Set<string>();
     for (const r of availabilityRows) {
       if (!dbModelKeys.includes(r.model)) continue;
-      if (islOslToSequence(r.isl, r.osl) !== effectiveSequence) continue;
+      if (rowToSequence(r) !== effectiveSequence) continue;
       if (!effectivePrecisions.includes(r.precision)) continue;
       if (!r.hardware) continue;
       const hwKey = buildAvailabilityHwKey(r.hardware, r.framework, r.spec_method, r.disagg);
@@ -319,6 +413,61 @@ export function InferenceProvider({
     setTrackedConfigs((prev) => (prev.length > 0 ? [] : prev));
   }, [selectedModel, effectiveSequence, effectivePrecisions, selectedYAxisMetric]);
 
+  // Reconcile the x-axis mode with the scenario kind:
+  //  - On mount with no `i_xmode` URL param: snap to the kind's natural default
+  //    (agentic → ttft, fixed → interactivity). The state itself was initialized
+  //    to a SSR-stable constant so server and client render the same DOM; this
+  //    effect fixes it up after hydration.
+  //  - When the user later switches sequence kinds: snap to the new kind's
+  //    natural default (the prior selection was for a different kind, so it
+  //    doesn't carry over).
+  const lastSeqKindRef = useRef<ReturnType<typeof sequenceKind> | null>(null);
+  useEffect(() => {
+    const kind = sequenceKind(effectiveSequence);
+    const isInitialMount = lastSeqKindRef.current === null;
+    const isAgenticOnlyMode =
+      selectedXAxisMode === 'session-time' || selectedXAxisMode === 'prefill-tps';
+    // On a stale render where kind hasn't changed, bail unless the current
+    // mode is agentic-only and we just landed on a fixed-seq scenario — in
+    // that case force the snap so the chart doesn't try to plot trace-derived
+    // metrics against rows that have no trace_replay.
+    if (!isInitialMount && lastSeqKindRef.current === kind) {
+      if (kind === 'fixed-seq' && isAgenticOnlyMode) {
+        handleSetXAxisMode('interactivity');
+      }
+      return;
+    }
+    lastSeqKindRef.current = kind;
+    if (
+      isInitialMount &&
+      xAxisModeFromUrlRef.current &&
+      !(kind === 'fixed-seq' && isAgenticOnlyMode)
+    ) {
+      // URL-restored agentic-only mode on a fixed-seq sequence makes no sense
+      // — fall through to the default snap below.
+      return;
+    }
+    handleSetXAxisMode(kind === 'agentic' ? 'ttft' : 'interactivity');
+  }, [effectiveSequence, selectedXAxisMode, handleSetXAxisMode]);
+
+  // Reconcile selectedE2eXAxisMetric whenever the mode, sequence kind, or
+  // agentic percentile changes. For fixed-seq the JSONB only carries
+  // median_* / p99_* (no p90_*), so the TTFT button there has to point at
+  // median_ttft — otherwise the chart goes blank. For agentic, we point at
+  // the user's chosen percentile so the dropdown actually drives the axis.
+  useEffect(() => {
+    const isAgentic = sequenceKind(effectiveSequence) === 'agentic';
+    if (selectedXAxisMode === 'ttft') {
+      setSelectedE2eXAxisMetric(isAgentic ? `${selectedPercentile}_ttft` : 'median_ttft');
+    } else if (selectedXAxisMode === 'e2e') {
+      // null = use the chart-config natural x (median_e2el), which useChartData
+      // rewrites to <pctl>_e2el for agentic via withPercentile().
+      setSelectedE2eXAxisMetric(null);
+    }
+    // 'interactivity' mode renders the interactivity chart, which keys off
+    // selectedXAxisMetric (not the e2e one), so nothing to do here.
+  }, [selectedXAxisMode, effectiveSequence, selectedPercentile]);
+
   // Ref guard: when true, filter changes don't clear the active preset.
   // FavoritePresetsDropdown sets this while applying a preset so its own
   // programmatic setter calls don't accidentally deactivate it.
@@ -768,6 +917,7 @@ export function InferenceProvider({
   useUrlStateSync(
     {
       i_metric: selectedYAxisMetric,
+      i_pctl: selectedPercentile,
       i_gpus: selectedGPUs.join(','),
       i_dates: selectedDates.join(','),
       i_dstart: selectedDateRange.startDate,
@@ -778,6 +928,7 @@ export function InferenceProvider({
       i_log: logScale ? '1' : '',
       i_xmetric: selectedXAxisMetric || '',
       i_e2e_xmetric: selectedE2eXAxisMetric || '',
+      i_xmode: selectedXAxisMode,
       i_scale: scaleType,
       i_legend: isLegendExpanded ? '' : '0',
       i_advlabel: useAdvancedLabels ? '1' : '',
@@ -791,6 +942,7 @@ export function InferenceProvider({
       selectedYAxisMetric,
       selectedXAxisMetric,
       selectedE2eXAxisMetric,
+      selectedXAxisMode,
       scaleType,
       selectedGPUs,
       selectedDates,
@@ -961,6 +1113,8 @@ export function InferenceProvider({
       setSelectedXAxisMetric,
       selectedE2eXAxisMetric,
       setSelectedE2eXAxisMetric,
+      selectedXAxisMode,
+      setSelectedXAxisMode: handleSetXAxisMode,
       scaleType,
       setScaleType,
       loading,
@@ -968,6 +1122,8 @@ export function InferenceProvider({
       workflowInfo,
       selectedYAxisMetric,
       setSelectedYAxisMetric: setSelectedYAxisMetricAndClear,
+      selectedPercentile,
+      setSelectedPercentile,
       selectedGPUs,
       setSelectedGPUs: setSelectedGPUsAndClear,
       availableGPUs,
@@ -1032,6 +1188,7 @@ export function InferenceProvider({
       selectedYAxisMetric,
       selectedXAxisMetric,
       selectedE2eXAxisMetric,
+      selectedXAxisMode,
       scaleType,
       selectedGPUs,
       selectedDates,
diff --git a/packages/app/src/components/inference/agentic-point/agentic-point-detail.tsx b/packages/app/src/components/inference/agentic-point/agentic-point-detail.tsx
new file mode 100644
index 00000000..1ce321ee
--- /dev/null
+++ b/packages/app/src/components/inference/agentic-point/agentic-point-detail.tsx
@@ -0,0 +1,621 @@
+'use client';
+
+import Link from 'next/link';
+import { useRouter } from 'next/navigation';
+import { useState } from 'react';
+import { ArrowLeft } from 'lucide-react';
+
+import { useAgenticAggregates, type AgenticAggregateMap } from '@/hooks/api/use-agentic-aggregates';
+import { useRequestTimeline } from '@/hooks/api/use-request-timeline';
+import { useTraceHistograms } from '@/hooks/api/use-trace-histograms';
+import {
+  useTraceServerMetrics,
+  type PointMeta,
+  type QueueDepthPoint,
+  type TimeSeriesPoint,
+} from '@/hooks/api/use-trace-server-metrics';
+import { useBenchmarkSiblings } from '@/hooks/api/use-benchmark-siblings';
+import { SegmentedToggle, type SegmentedToggleOption } from '@/components/ui/segmented-toggle';
+
+import { AggregateChart, type AggregatePoint, type PercentileKey } from './aggregate-chart';
+import { Distribution } from './distribution';
+import { ExpandableChart } from './expandable-chart';
+import { RequestTimelineView } from './request-timeline';
+import { SiblingNav, chipLabel } from './sibling-nav';
+import {
+  StackedAreaChart,
+  TimeSeriesChart,
+  cumulativeAverage,
+  cumulativeDifferenceMonotonic,
+  inflightUniqueTokens,
+  rollingAverage,
+  sumSeries,
+  timeRollingAverage,
+} from './time-series-chart';
+
+interface Props {
+  id: number;
+}
+
+const fmtPct = (v: number | null | undefined): string =>
+  v === null || v === undefined || Number.isNaN(v) ? '—' : `${(v * 100).toFixed(2)}%`;
+
+function MetaLine({ label, value }: { label: string; value: React.ReactNode }) {
+  return (
+    <div className="flex flex-col gap-0.5">
+      <span className="text-xs uppercase tracking-wide text-muted-foreground">{label}</span>
+      <span className="text-sm font-medium text-foreground">{value}</span>
+    </div>
+  );
+}
+
+function PointSummary({ meta }: { meta: PointMeta }) {
+  return (
+    <div className="mb-4">
+      <div className="flex items-baseline justify-between gap-3 mb-2">
+        <p className="text-sm text-muted-foreground">
+          Selected point
+          {meta.disagg ? ' · disagg' : ''}
+          {meta.spec_method && meta.spec_method !== 'none' ? ` · spec=${meta.spec_method}` : ''}
+        </p>
+        {meta.run_url && (
+          <a
+            href={meta.run_url}
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-xs text-muted-foreground hover:text-foreground underline"
+          >
+            GitHub Actions run →
+          </a>
+        )}
+      </div>
+      <div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-6 gap-3">
+        <MetaLine label="Offload" value={(meta.offload_mode ?? 'off').toUpperCase()} />
+        <MetaLine label="Concurrency" value={meta.conc} />
+        <MetaLine label="GPU cache hit" value={fmtPct(meta.server_gpu_cache_hit_rate)} />
+        <MetaLine label="CPU cache hit" value={fmtPct(meta.server_cpu_cache_hit_rate)} />
+        {meta.isl !== null && <MetaLine label="ISL" value={meta.isl} />}
+        {meta.osl !== null && <MetaLine label="OSL" value={meta.osl} />}
+      </div>
+    </div>
+  );
+}
+
+/** Sizes passed to charts for the inline (small) vs expanded (dialog) render. */
+const CHART_SIZES = {
+  inline: { width: 720, height: 260 },
+  expanded: { width: 1300, height: 520 },
+};
+
+// Per-DP-rank color palette for DEP runs (one distinct color per rank in
+// the KV cache utilization overlay). Mirrors the request-timeline row
+// palette so the same DP index reads as the same color across both views.
+// Wraps mod-N if more than 12 ranks ever land.
+const DP_RANK_PALETTE = [
+  '#3b82f6',
+  '#ef4444',
+  '#10b981',
+  '#f59e0b',
+  '#a855f7',
+  '#06b6d4',
+  '#f97316',
+  '#84cc16',
+  '#ec4899',
+  '#14b8a6',
+  '#8b5cf6',
+  '#eab308',
+];
+
+type DetailView = 'point' | 'timeline' | 'aggregates';
+const VIEW_OPTIONS: SegmentedToggleOption<DetailView>[] = [
+  { value: 'point', label: 'Per-point', testId: 'detail-view-point' },
+  { value: 'timeline', label: 'Request timeline', testId: 'detail-view-timeline' },
+  { value: 'aggregates', label: 'Aggregates across configs', testId: 'detail-view-aggregates' },
+];
+
+/** Bundle per-percentile values for one sibling into the shape AggregateChart wants. */
+function toAggPoint(
+  sibling: { id: number; label: string },
+  pct: { mean: number; p50: number; p75: number; p90: number; p99: number } | null | undefined,
+): AggregatePoint {
+  const values: Partial<Record<PercentileKey, number>> = {};
+  if (pct) {
+    values.mean = pct.mean;
+    values.p50 = pct.p50;
+    values.p75 = pct.p75;
+    values.p90 = pct.p90;
+    values.p99 = pct.p99;
+  }
+  return { id: sibling.id, label: sibling.label, values };
+}
+
+export function AgenticPointDetail({ id }: Props) {
+  const router = useRouter();
+  const histQuery = useTraceHistograms([id], true);
+  const metricsQuery = useTraceServerMetrics(id, true);
+  const siblingsQuery = useBenchmarkSiblings(id);
+
+  const hist = histQuery.data?.[id];
+  const metrics = metricsQuery.data;
+  const siblingsData = siblingsQuery.data;
+
+  const [view, setView] = useState<DetailView>('point');
+  // Fetch aggregates only when the aggregates view is active. Uses the full
+  // sibling set (across parallelism + concurrency configs) so each chart
+  // shows how the metric varies across the SKU.
+  const siblingIds = siblingsData?.siblings.map((s) => s.id) ?? [];
+  const aggregatesQuery = useAgenticAggregates(siblingIds, view === 'aggregates');
+  // Per-request timeline used by both the timeline view AND the per-point
+  // "Unique input tokens in flight" chart, so fetch whenever we're on
+  // either view.
+  const timelineQuery = useRequestTimeline(id, view === 'timeline' || view === 'point');
+
+  return (
+    <div className="container mx-auto px-4 lg:px-8 flex flex-col gap-4 py-6">
+      <div className="flex items-center gap-2">
+        <button
+          type="button"
+          onClick={() => router.back()}
+          className="inline-flex items-center gap-1 text-sm text-muted-foreground hover:text-foreground"
+        >
+          <ArrowLeft className="size-4" /> Back
+        </button>
+        <span className="text-sm text-muted-foreground">·</span>
+        <Link href="/inference" className="text-sm text-muted-foreground hover:text-foreground">
+          Inference chart
+        </Link>
+      </div>
+
+      {siblingsData ? (
+        <SiblingNav sku={siblingsData.sku} siblings={siblingsData.siblings} />
+      ) : siblingsQuery.isLoading ? (
+        <div className="text-sm text-muted-foreground">Loading SKU navigator…</div>
+      ) : null}
+
+      {metrics ? (
+        <PointSummary meta={metrics.meta} />
+      ) : metricsQuery.isLoading ? (
+        <div className="text-sm text-muted-foreground">Loading point metadata…</div>
+      ) : null}
+
+      {metricsQuery.isError && (
+        <div className="rounded-lg border border-destructive/40 bg-destructive/10 p-4 text-sm text-destructive">
+          Failed to load trace data for benchmark point #{id}.
+        </div>
+      )}
+      {metricsQuery.data === null && !metricsQuery.isLoading && (
+        <div className="rounded-lg border border-border/40 bg-card/40 p-4 text-sm text-muted-foreground">
+          No stored trace_replay blob for benchmark point #{id}. This point predates the aiperf
+          time-series capture, or its source artifacts have expired on GitHub.
+        </div>
+      )}
+
+      <div className="flex items-center justify-between gap-3">
+        <SegmentedToggle
+          value={view}
+          options={VIEW_OPTIONS}
+          onValueChange={setView}
+          ariaLabel="Detail view"
+          testId="detail-view-toggle"
+          buttonClassName="px-3 py-1.5 text-sm"
+        />
+        {view === 'aggregates' && (
+          <span className="text-xs text-muted-foreground">
+            {siblingIds.length} configs in SKU
+            {aggregatesQuery.isLoading ? ' · loading…' : ''}
+          </span>
+        )}
+        {view === 'timeline' && timelineQuery.data && (
+          <span className="text-xs text-muted-foreground">
+            {timelineQuery.data.requests.length} requests
+          </span>
+        )}
+      </div>
+
+      {view === 'aggregates' ? (
+        <AggregatesGrid
+          siblings={siblingsData?.siblings ?? []}
+          aggregates={aggregatesQuery.data}
+          isLoading={aggregatesQuery.isLoading}
+        />
+      ) : view === 'timeline' ? (
+        timelineQuery.isLoading ? (
+          <div className="rounded-lg border border-border/40 bg-card/40 p-4 text-sm text-muted-foreground">
+            Loading request timeline…
+          </div>
+        ) : timelineQuery.data ? (
+          <RequestTimelineView data={timelineQuery.data} />
+        ) : (
+          <div className="rounded-lg border border-border/40 bg-card/40 p-4 text-sm text-muted-foreground">
+            No per-request timeline for benchmark point #{id} — the profile_export.jsonl artifact
+            isn&apos;t stored for this row.
+          </div>
+        )
+      ) : (
+        <div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
+          <ExpandableChart
+            title="Input sequence length distribution"
+            render={(expanded) => {
+              const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline;
+              if (hist) return <Distribution values={hist.isl} unit="tokens" {...size} />;
+              return histQuery.isLoading ? <Skeleton /> : <Empty />;
+            }}
+          />
+          <ExpandableChart
+            title="Output sequence length distribution"
+            render={(expanded) => {
+              const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline;
+              if (hist) return <Distribution values={hist.osl} unit="tokens" {...size} />;
+              return histQuery.isLoading ? <Skeleton /> : <Empty />;
+            }}
+          />
+
+          <ExpandableChart
+            title="KV cache utilization over time"
+            render={(expanded) => {
+              const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline;
+              if (!metrics) return <Skeleton />;
+              // For SGLang hicache rows we have both GPU (HBM) util and
+              // host (CPU offload pool) util — overlay them as two lines.
+              const hasHost = metrics.hostKvCacheUsage.length > 0;
+              // DEP runs report one series per engine. When there's more
+              // than one, draw one line per rank in distinct colors so
+              // load skew is visible at a glance; cluster-average sits on
+              // top in white so it stands out.
+              const perEngine = metrics.kvCacheUsageByEngine ?? [];
+              const hasPerEngine = perEngine.length > 1;
+              // Render order matters: per-engine first → average drawn on top.
+              const series = [
+                ...(hasPerEngine
+                  ? perEngine.map((e, i) => ({
+                      name: `DP ${e.engineLabel}`,
+                      data: rollingAverage(e.points, 50),
+                      color: DP_RANK_PALETTE[i % DP_RANK_PALETTE.length]!,
+                      // Thin + translucent so the Avg line on top reads as
+                      // the headline number, not just one more series.
+                      strokeWidth: 1,
+                      strokeOpacity: 0.5,
+                    }))
+                  : []),
+                {
+                  name: hasHost
+                    ? 'GPU HBM (avg n=50)'
+                    : hasPerEngine
+                      ? 'Avg'
+                      : 'GPU KV cache (avg n=50)',
+                  data: rollingAverage(metrics.kvCacheUsage, 50),
+                  // Skip raw scatter when per-engine overlay is on — the
+                  // DP-rank lines already convey the spread, dots would be noise.
+                  rawData: hasPerEngine ? undefined : metrics.kvCacheUsage,
+                  // Bold red Avg sits on top of the translucent per-DP lines.
+                  // DP 1 in the palette is #ef4444 (lighter red); the darker
+                  // #dc2626 here plus the heavier stroke keeps it distinct.
+                  color: hasPerEngine ? '#dc2626' : '#3b82f6',
+                  strokeWidth: hasPerEngine ? 3.5 : 2,
+                },
+                ...(hasHost
+                  ? [
+                      {
+                        name: 'CPU offload pool (avg n=50)',
+                        data: rollingAverage(metrics.hostKvCacheUsage, 50),
+                        rawData: metrics.hostKvCacheUsage,
+                        color: '#f97316',
+                        strokeWidth: 2,
+                      },
+                    ]
+                  : []),
+              ];
+              return (
+                <TimeSeriesChart
+                  series={series}
+                  durationS={metrics.durationS}
+                  yMax={1}
+                  yFmt={(v) => `${(v * 100).toFixed(0)}%`}
+                  yAxisLabel="KV cache (%)"
+                  {...size}
+                />
+              );
+            }}
+          />
+
+          <ExpandableChart
+            title="Request queue depth"
+            render={(expanded) => {
+              const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline;
+              if (!metrics) return <Skeleton />;
+              return (
+                <TimeSeriesChart
+                  series={[
+                    {
+                      name: 'Running (avg n=50)',
+                      data: rollingAverage(
+                        metrics.queueDepth.map((p: QueueDepthPoint) => ({
+                          t: p.t,
+                          value: p.running,
+                        })),
+                        50,
+                      ),
+                      color: '#22c55e',
+                      strokeWidth: 2,
+                    },
+                    {
+                      name: 'Waiting (avg n=50)',
+                      data: rollingAverage(
+                        metrics.queueDepth.map((p: QueueDepthPoint) => ({
+                          t: p.t,
+                          value: p.waiting,
+                        })),
+                        50,
+                      ),
+                      color: '#ef4444',
+                      strokeWidth: 2,
+                    },
+                    {
+                      name: 'Total (avg n=50)',
+                      data: rollingAverage(
+                        metrics.queueDepth.map((p: QueueDepthPoint) => ({
+                          t: p.t,
+                          value: p.total,
+                        })),
+                        50,
+                      ),
+                      color: '#3b82f6',
+                      strokeWidth: 2,
+                    },
+                  ]}
+                  durationS={metrics.durationS}
+                  yAxisLabel="Requests"
+                  {...size}
+                />
+              );
+            }}
+          />
+
+          <ExpandableChart
+            title="Prefix cache hit rate per interval"
+            render={(expanded) => {
+              const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline;
+              if (!metrics) return <Skeleton />;
+              return (
+                <TimeSeriesChart
+                  series={[
+                    {
+                      name: 'GPU (HBM, avg n=50)',
+                      data: rollingAverage(metrics.prefixCacheHitRate, 50),
+                      rawData: metrics.prefixCacheHitRate,
+                      color: '#a855f7',
+                      strokeWidth: 2,
+                    },
+                  ]}
+                  durationS={metrics.durationS}
+                  yMax={1}
+                  yFmt={(v) => `${(v * 100).toFixed(0)}%`}
+                  yAxisLabel="Hit rate (%)"
+                  {...size}
+                />
+              );
+            }}
+          />
+
+          <ExpandableChart
+            title="Throughput (total & decode)"
+            render={(expanded) => {
+              const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline;
+              if (!metrics) return <Skeleton />;
+              const total = sumSeries(metrics.prefillTps, metrics.decodeTps);
+              return (
+                <TimeSeriesChart
+                  series={[
+                    {
+                      name: 'Total (avg n=50)',
+                      data: rollingAverage(total, 50),
+                      color: '#3b82f6',
+                      strokeWidth: 1.6,
+                    },
+                    {
+                      name: 'Decode (avg n=50)',
+                      data: rollingAverage(metrics.decodeTps, 50),
+                      color: '#f97316',
+                      strokeWidth: 1.6,
+                    },
+                    {
+                      name: 'Total running avg',
+                      data: cumulativeAverage(total),
+                      color: '#ef4444',
+                      strokeWidth: 3,
+                    },
+                  ]}
+                  durationS={metrics.durationS}
+                  yAxisLabel="Tokens / sec"
+                  {...size}
+                />
+              );
+            }}
+          />
+
+          <ExpandableChart
+            title="Cumulative prompt token source breakdown"
+            render={(expanded) => {
+              const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline;
+              if (!metrics) return <Skeleton />;
+              return (
+                <StackedAreaChart
+                  sourceSeries={metrics.promptTokensBySource}
+                  durationS={metrics.durationS}
+                  {...size}
+                />
+              );
+            }}
+          />
+
+          <ExpandableChart
+            title="Total unique input tokens over time"
+            render={(expanded) => {
+              const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline;
+              if (!metrics) return <Skeleton />;
+              // Unique = total prompt tokens received minus tokens served
+              // from the prefix cache. Equivalent to cumsum of
+              // vllm:request_prefill_kv_computed_tokens. We compute it as
+              // monotonic-non-decreasing cumulative-diff so per-scrape
+              // timing skew between the prompt_tokens and prefix_cache_hits
+              // counters can't make the line dip negative.
+              return (
+                <TimeSeriesChart
+                  series={[
+                    {
+                      name: 'Cumulative unique input tokens',
+                      data: cumulativeDifferenceMonotonic(
+                        metrics.prefillTps,
+                        metrics.prefixCacheHitsTps,
+                      ),
+                      color: '#3b82f6',
+                      strokeWidth: 2,
+                    },
+                  ]}
+                  durationS={metrics.durationS}
+                  yAxisLabel="Tokens"
+                  {...size}
+                />
+              );
+            }}
+          />
+
+          <ExpandableChart
+            title="Unique input tokens in flight"
+            render={(expanded) => {
+              const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline;
+              if (!timelineQuery.data) {
+                return timelineQuery.isLoading ? <Skeleton /> : <Empty />;
+              }
+              // Step function: at each request start/end, sum the ISLs of
+              // currently-active requests across distinct cids. Within one
+              // cid turns are sequential so each cid contributes at most
+              // one in-flight ISL; across cids we treat content as
+              // independent (cross-conv prefix sharing adds <1pp in
+              // practice). Smooth with a 30s time-weighted rolling average
+              // so brief turn-handoff dips don't dominate the chart.
+              const raw = inflightUniqueTokens(timelineQuery.data.requests);
+              const smoothed = timeRollingAverage(raw, 30);
+              return (
+                <TimeSeriesChart
+                  series={[
+                    {
+                      name: 'In flight (avg 30s)',
+                      data: smoothed,
+                      rawData: raw,
+                      color: '#a855f7',
+                      strokeWidth: 2,
+                    },
+                  ]}
+                  durationS={timelineQuery.data.durationS}
+                  yAxisLabel="Tokens"
+                  {...size}
+                />
+              );
+            }}
+          />
+        </div>
+      )}
+    </div>
+  );
+}
+
+function AggregatesGrid({
+  siblings,
+  aggregates,
+  isLoading,
+}: {
+  siblings: {
+    id: number;
+    conc: number;
+    decode_tp: number;
+    decode_ep: number;
+    disagg: boolean;
+    num_prefill_gpu: number;
+    num_decode_gpu: number;
+    offload_mode?: string | null;
+  }[];
+  aggregates: AgenticAggregateMap | undefined;
+  isLoading: boolean;
+}) {
+  if (siblings.length === 0) {
+    return (
+      <div className="rounded-lg border border-border/40 bg-card/40 p-4 text-sm text-muted-foreground">
+        SKU sibling list not loaded yet — open a point to populate.
+      </div>
+    );
+  }
+  if (isLoading && !aggregates) {
+    return (
+      <div className="rounded-lg border border-border/40 bg-card/40 p-4 text-sm text-muted-foreground">
+        Computing aggregates across {siblings.length} configs… (parsing trace blobs)
+      </div>
+    );
+  }
+  const labeled = siblings.map((s) => ({ id: s.id, label: chipLabel(s as any) }));
+  const islPoints = labeled.map((s) => toAggPoint(s, aggregates?.[s.id]?.isl));
+  const oslPoints = labeled.map((s) => toAggPoint(s, aggregates?.[s.id]?.osl));
+  const kvPoints = labeled.map((s) => toAggPoint(s, aggregates?.[s.id]?.kvCacheUtil));
+  const prefixPoints = labeled.map((s) => toAggPoint(s, aggregates?.[s.id]?.prefixCacheHitRate));
+  return (
+    <div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
+      <ExpandableChart
+        title="ISL distribution (across configs)"
+        render={(expanded) => (
+          <AggregateChart
+            points={islPoints}
+            unit="tokens"
+            {...(expanded ? CHART_SIZES.expanded : CHART_SIZES.inline)}
+          />
+        )}
+      />
+      <ExpandableChart
+        title="OSL distribution (across configs)"
+        render={(expanded) => (
+          <AggregateChart
+            points={oslPoints}
+            unit="tokens"
+            {...(expanded ? CHART_SIZES.expanded : CHART_SIZES.inline)}
+          />
+        )}
+      />
+      <ExpandableChart
+        title="KV cache utilization (across configs)"
+        render={(expanded) => (
+          <AggregateChart
+            points={kvPoints}
+            unit="%"
+            yMax={1}
+            yFmt={(v) => `${(v * 100).toFixed(0)}%`}
+            {...(expanded ? CHART_SIZES.expanded : CHART_SIZES.inline)}
+          />
+        )}
+      />
+      <ExpandableChart
+        title="Prefix cache hit rate (across configs)"
+        render={(expanded) => (
+          <AggregateChart
+            points={prefixPoints}
+            unit="%"
+            yMax={1}
+            yFmt={(v) => `${(v * 100).toFixed(0)}%`}
+            {...(expanded ? CHART_SIZES.expanded : CHART_SIZES.inline)}
+          />
+        )}
+      />
+    </div>
+  );
+}
+
+function Skeleton() {
+  return <div className="h-[260px] rounded-md bg-muted/30 animate-pulse" />;
+}
+
+function Empty() {
+  return (
+    <div className="h-[260px] grid place-items-center text-xs text-muted-foreground">No data</div>
+  );
+}
+
+// Re-export type for use by sub-components
+export type { TimeSeriesPoint, QueueDepthPoint };
diff --git a/packages/app/src/components/inference/agentic-point/aggregate-chart.tsx b/packages/app/src/components/inference/agentic-point/aggregate-chart.tsx
new file mode 100644
index 00000000..55ac8061
--- /dev/null
+++ b/packages/app/src/components/inference/agentic-point/aggregate-chart.tsx
@@ -0,0 +1,286 @@
+'use client';
+
+import { useMemo } from 'react';
+
+import { ChartHover, type HoverItem } from './chart-hover';
+
+export type PercentileKey = 'mean' | 'p50' | 'p75' | 'p90' | 'p99';
+
+interface PercentileLine {
+  key: PercentileKey;
+  /** Display label in legend / tooltip. */
+  label: string;
+  color: string;
+}
+
+const PERCENTILE_LINES: PercentileLine[] = [
+  { key: 'mean', label: 'Mean', color: '#ef4444' },
+  { key: 'p50', label: 'P50', color: '#3b82f6' },
+  { key: 'p75', label: 'P75', color: '#22c55e' },
+  { key: 'p90', label: 'P90', color: '#f59e0b' },
+  { key: 'p99', label: 'P99', color: '#a855f7' },
+];
+
+export interface AggregatePoint {
+  /** Sibling label rendered on x-axis (e.g. "TP8 • c=8"). */
+  label: string;
+  /** Per-percentile value; missing percentiles are dropped from the plot. */
+  values: Partial<Record<PercentileKey, number>>;
+  /** Sibling id — purely informational, used in the tooltip title. */
+  id?: number;
+}
+
+/**
+ * Multi-line chart: one x-position per sibling config, one line per
+ * percentile (mean/p50/p75/p90/p99). Designed for the "Aggregates across
+ * configs" view on the agentic detail page.
+ */
+export function AggregateChart({
+  points,
+  unit,
+  yMax,
+  yFmt,
+  width = 720,
+  height = 320,
+}: {
+  points: readonly AggregatePoint[];
+  unit: string;
+  /** Optional fixed y-axis upper bound (e.g. 1 for percentages). */
+  yMax?: number;
+  /** Optional value formatter (e.g. percentage → "30%"). */
+  yFmt?: (v: number) => string;
+  width?: number;
+  height?: number;
+}) {
+  const W = width;
+  const H = height;
+  const PAD = { top: 16, right: 16, bottom: 90, left: 64 };
+  const fmt = (v: number) =>
+    yFmt
+      ? yFmt(v)
+      : v >= 10000
+        ? new Intl.NumberFormat('en-US').format(Math.round(v))
+        : v.toFixed(v < 10 ? 2 : 0);
+
+  const computed = useMemo(() => {
+    if (points.length === 0) return null;
+    let yMaxComputed = 0;
+    for (const p of points) {
+      for (const line of PERCENTILE_LINES) {
+        const v = p.values[line.key];
+        if (typeof v === 'number' && Number.isFinite(v) && v > yMaxComputed) yMaxComputed = v;
+      }
+    }
+    const yTop = yMax ?? (yMaxComputed === 0 ? 1 : yMaxComputed * 1.05);
+    const innerW = W - PAD.left - PAD.right;
+    const innerH = H - PAD.top - PAD.bottom;
+    return { yTop, innerW, innerH };
+  }, [points, W, H, PAD.left, PAD.right, PAD.top, PAD.bottom, yMax]);
+
+  if (!computed) {
+    return (
+      <div className="grid place-items-center text-xs text-muted-foreground" style={{ height: H }}>
+        No data
+      </div>
+    );
+  }
+  const { yTop, innerW, innerH } = computed;
+
+  // X positions: evenly spaced across the inner width.
+  const xOf = (i: number) =>
+    points.length === 1 ? PAD.left + innerW / 2 : PAD.left + (i / (points.length - 1)) * innerW;
+  const yOf = (v: number) => PAD.top + (1 - v / yTop) * innerH;
+
+  // 5 y-axis ticks evenly between 0 and yTop.
+  const yTicks = Array.from({ length: 5 }, (_, i) => (yTop * i) / 4);
+
+  // Resolve hover: snap to nearest sibling index and emit all percentiles
+  // that have data at that x.
+  const resolve = (fraction: number) => {
+    const idx = Math.round(fraction * (points.length - 1));
+    const p = points[Math.max(0, Math.min(points.length - 1, idx))];
+    if (!p) return null;
+    const items: HoverItem[] = [];
+    for (const line of PERCENTILE_LINES) {
+      const v = p.values[line.key];
+      if (typeof v !== 'number' || !Number.isFinite(v)) continue;
+      items.push({ color: line.color, label: line.label, value: fmt(v) });
+    }
+    return { items, title: p.label };
+  };
+
+  return (
+    <div className="w-full">
+      <div className="mb-2 flex flex-wrap items-center gap-x-3 gap-y-1 text-xs">
+        {PERCENTILE_LINES.map((line) => (
+          <div key={line.key} className="flex items-center gap-1.5">
+            <span className="inline-block w-3 h-0.5" style={{ backgroundColor: line.color }} />
+            <span className="text-muted-foreground">{line.label}</span>
+          </div>
+        ))}
+        <span className="ml-auto text-muted-foreground">
+          {points.length} configs · units: {unit}
+        </span>
+      </div>
+      <ChartHover pad={PAD} width={W} height={H} resolve={resolve}>
+        {/* y-axis ticks + gridlines */}
+        {yTicks.map((v, i) => {
+          const y = yOf(v);
+          return (
+            <g key={`y${i}`}>
+              <line
+                x1={PAD.left}
+                x2={PAD.left + innerW}
+                y1={y}
+                y2={y}
+                stroke="currentColor"
+                opacity={0.08}
+              />
+              <text
+                x={PAD.left - 8}
+                y={y + 3}
+                fontSize={10}
+                fill="currentColor"
+                opacity={0.55}
+                textAnchor="end"
+              >
+                {fmt(v)}
+              </text>
+            </g>
+          );
+        })}
+
+        {/* X-axis tick labels — one per sibling, rotated 30° to fit. */}
+        {points.map((p, i) => {
+          const x = xOf(i);
+          return (
+            <g key={`x${i}`}>
+              <line
+                x1={x}
+                x2={x}
+                y1={PAD.top + innerH}
+                y2={PAD.top + innerH + 4}
+                stroke="currentColor"
+                opacity={0.4}
+              />
+              <text
+                x={x}
+                y={PAD.top + innerH + 8}
+                fontSize={10}
+                fill="currentColor"
+                opacity={0.7}
+                textAnchor="end"
+                transform={`rotate(-30 ${x} ${PAD.top + innerH + 8})`}
+              >
+                {p.label}
+              </text>
+            </g>
+          );
+        })}
+
+        {/* X axis baseline */}
+        <line
+          x1={PAD.left}
+          x2={PAD.left + innerW}
+          y1={PAD.top + innerH}
+          y2={PAD.top + innerH}
+          stroke="currentColor"
+          opacity={0.25}
+        />
+
+        {/* Horizontal connecting lines per percentile — faint backdrop so the
+            eye can follow how each percentile changes across configs. */}
+        {PERCENTILE_LINES.map((line) => {
+          const segments: { x1: number; y1: number; x2: number; y2: number }[] = [];
+          let prev: { x: number; y: number } | null = null;
+          for (let i = 0; i < points.length; i++) {
+            const v = points[i]!.values[line.key];
+            if (typeof v !== 'number' || !Number.isFinite(v)) {
+              prev = null;
+              continue;
+            }
+            const x = xOf(i);
+            const y = yOf(v);
+            if (prev) segments.push({ x1: prev.x, y1: prev.y, x2: x, y2: y });
+            prev = { x, y };
+          }
+          return (
+            <g key={`hline-${line.key}`} opacity={0.35}>
+              {segments.map((s, j) => (
+                <line
+                  key={`s${j}`}
+                  x1={s.x1}
+                  y1={s.y1}
+                  x2={s.x2}
+                  y2={s.y2}
+                  stroke={line.color}
+                  strokeWidth={1}
+                />
+              ))}
+            </g>
+          );
+        })}
+
+        {/* Per-sibling vertical bar spanning the percentile range, with a
+            colored tick at each percentile level. Mean rendered as a small
+            diamond to distinguish from the percentile ticks. */}
+        {points.map((p, i) => {
+          const x = xOf(i);
+          // Collect percentile values present for this sibling.
+          const present = PERCENTILE_LINES.filter(
+            (line) =>
+              typeof p.values[line.key] === 'number' && Number.isFinite(p.values[line.key]!),
+          ).map((line) => ({ ...line, value: p.values[line.key]! }));
+          if (present.length === 0) return null;
+          // Only the *percentile* values define the bar extent; mean might be
+          // outside the percentile span on weird distributions.
+          const pctlOnly = present.filter((p2) => p2.key !== 'mean');
+          const bandValues = pctlOnly.length > 0 ? pctlOnly : present;
+          const bandYs = bandValues.map((b) => yOf(b.value));
+          const yLo = Math.min(...bandYs);
+          const yHi = Math.max(...bandYs);
+          return (
+            <g key={`bar-${i}`}>
+              <line
+                x1={x}
+                x2={x}
+                y1={yLo}
+                y2={yHi}
+                stroke="currentColor"
+                strokeWidth={1}
+                opacity={0.35}
+              />
+              {present.map((b) => {
+                const ty = yOf(b.value);
+                if (b.key === 'mean') {
+                  // Diamond marker for mean.
+                  const s = 4;
+                  return (
+                    <polygon
+                      key={`m-${b.key}`}
+                      points={`${x},${ty - s} ${x + s},${ty} ${x},${ty + s} ${x - s},${ty}`}
+                      fill={b.color}
+                      stroke={b.color}
+                    />
+                  );
+                }
+                // Horizontal tick at each percentile.
+                return (
+                  <line
+                    key={`tk-${b.key}`}
+                    x1={x - 6}
+                    x2={x + 6}
+                    y1={ty}
+                    y2={ty}
+                    stroke={b.color}
+                    strokeWidth={2.5}
+                  />
+                );
+              })}
+            </g>
+          );
+        })}
+      </ChartHover>
+    </div>
+  );
+}
diff --git a/packages/app/src/components/inference/agentic-point/chart-hover.tsx b/packages/app/src/components/inference/agentic-point/chart-hover.tsx
new file mode 100644
index 00000000..24270122
--- /dev/null
+++ b/packages/app/src/components/inference/agentic-point/chart-hover.tsx
@@ -0,0 +1,148 @@
+'use client';
+
+import { useState, type ReactNode } from 'react';
+
+/** Vertical crosshair + floating value tooltip overlay shared by every chart. */
+export interface HoverItem {
+  /** Color swatch to render next to the label. */
+  color: string;
+  label: string;
+  value: string;
+  /** Optional faint secondary line (e.g. timestamp under main values). */
+  hint?: string;
+}
+
+interface ChartHoverProps {
+  /** Padding inside the SVG; matches the chart's CHART_PAD. */
+  pad: { top: number; right: number; bottom: number; left: number };
+  /** SVG viewBox dimensions used to render the chart. */
+  width: number;
+  height: number;
+  /**
+   * Called with the cursor's normalized x in [0..1] across the plot area.
+   * Returns `null` to hide the tooltip (e.g. cursor outside data range).
+   */
+  resolve: (xFraction: number) => { items: HoverItem[]; title?: string } | null;
+  children: ReactNode;
+}
+
+/**
+ * Wrap a chart's <svg> render to add mouse-driven crosshair + tooltip.
+ *
+ * The chart owner renders its bars / lines / axes via `children`; this wrapper
+ * adds an invisible <rect> across the plot area to capture pointer events, a
+ * vertical line that follows the cursor, and a floating tooltip on the right
+ * of the cursor (auto-flipping to the left when it would overflow).
+ */
+export function ChartHover({ pad, width, height, resolve, children }: ChartHoverProps) {
+  const [hover, setHover] = useState<{
+    xPx: number;
+    yPx: number;
+    fraction: number;
+    items: HoverItem[];
+    title?: string;
+  } | null>(null);
+
+  const innerW = width - pad.left - pad.right;
+  const innerH = height - pad.top - pad.bottom;
+
+  const onMove = (e: React.MouseEvent<SVGRectElement>) => {
+    const svg = e.currentTarget.ownerSVGElement;
+    if (!svg) return;
+    const rect = svg.getBoundingClientRect();
+    // Convert client coords → SVG viewBox coords.
+    const sx = ((e.clientX - rect.left) * width) / rect.width;
+    const sy = ((e.clientY - rect.top) * height) / rect.height;
+    const fraction = Math.max(0, Math.min(1, (sx - pad.left) / innerW));
+    const resolved = resolve(fraction);
+    if (!resolved) {
+      setHover(null);
+      return;
+    }
+    setHover({ xPx: sx, yPx: sy, fraction, items: resolved.items, title: resolved.title });
+  };
+
+  const onLeave = () => setHover(null);
+
+  return (
+    <div className="relative w-full">
+      <svg
+        viewBox={`0 0 ${width} ${height}`}
+        preserveAspectRatio="xMidYMid meet"
+        className="w-full h-auto text-foreground"
+      >
+        {children}
+        {hover && (
+          <line
+            x1={hover.xPx}
+            x2={hover.xPx}
+            y1={pad.top}
+            y2={pad.top + innerH}
+            stroke="currentColor"
+            strokeWidth={1}
+            strokeDasharray="3 3"
+            opacity={0.4}
+            pointerEvents="none"
+          />
+        )}
+        <rect
+          x={pad.left}
+          y={pad.top}
+          width={innerW}
+          height={innerH}
+          fill="transparent"
+          onMouseMove={onMove}
+          onMouseLeave={onLeave}
+        />
+      </svg>
+      {hover && hover.items.length > 0 && (
+        <HoverTooltip
+          xFraction={hover.fraction}
+          containerWidth={width}
+          padLeft={pad.left}
+          innerW={innerW}
+          title={hover.title}
+          items={hover.items}
+        />
+      )}
+    </div>
+  );
+}
+
+function HoverTooltip({
+  xFraction,
+  containerWidth,
+  padLeft,
+  innerW,
+  title,
+  items,
+}: {
+  xFraction: number;
+  containerWidth: number;
+  padLeft: number;
+  innerW: number;
+  title?: string;
+  items: HoverItem[];
+}) {
+  // Position tooltip near the crosshair as a % of the container.
+  // We flip to the cursor's left side when it would overflow the right edge.
+  const xPx = padLeft + xFraction * innerW;
+  const onRight = xPx < containerWidth * 0.55;
+  const left = onRight ? `${(xPx / containerWidth) * 100}%` : 'auto';
+  const right = onRight ? 'auto' : `${((containerWidth - xPx) / containerWidth) * 100}%`;
+  return (
+    <div
+      className="pointer-events-none absolute top-2 z-10 rounded-md border border-border bg-popover px-2 py-1.5 text-xs shadow-md"
+      style={{ left, right, marginLeft: onRight ? 8 : 0, marginRight: onRight ? 0 : 8 }}
+    >
+      {title && <div className="font-medium text-foreground mb-1">{title}</div>}
+      {items.map((it, i) => (
+        <div key={i} className="flex items-center gap-1.5 leading-tight">
+          <span className="inline-block w-2 h-2 rounded-sm" style={{ backgroundColor: it.color }} />
+          <span className="text-muted-foreground">{it.label}</span>
+          <span className="ml-auto font-medium text-foreground tabular-nums">{it.value}</span>
+        </div>
+      ))}
+    </div>
+  );
+}
diff --git a/packages/app/src/components/inference/agentic-point/distribution.tsx b/packages/app/src/components/inference/agentic-point/distribution.tsx
new file mode 100644
index 00000000..685b73f3
--- /dev/null
+++ b/packages/app/src/components/inference/agentic-point/distribution.tsx
@@ -0,0 +1,242 @@
+'use client';
+
+import { useMemo } from 'react';
+
+import { ChartHover, type HoverItem } from './chart-hover';
+
+const fmtNum = (n: number) =>
+  n >= 10000 ? new Intl.NumberFormat('en-US').format(Math.round(n)) : String(Math.round(n));
+
+/**
+ * Bar histogram with vertical p50/p75/p90/p95 guide lines. Designed for the
+ * detail-page card — fills its container width via `viewBox` + 100% width.
+ * Hover shows the bin range + count + cumulative percentile.
+ */
+export function Distribution({
+  values,
+  unit,
+  width = 720,
+  height = 260,
+}: {
+  values: readonly number[];
+  unit: string;
+  width?: number;
+  height?: number;
+}) {
+  const W = width;
+  const H = height;
+  const PAD = { top: 12, right: 16, bottom: 56, left: 60 };
+
+  const computed = useMemo(() => {
+    if (values.length === 0) return null;
+    const sorted = [...values].toSorted((a, b) => a - b);
+    const min = sorted[0]!;
+    const max = sorted.at(-1)!;
+    const range = Math.max(1e-9, max - min);
+    const innerW = W - PAD.left - PAD.right;
+    const innerH = H - PAD.top - PAD.bottom;
+    const nBins = Math.min(50, Math.max(15, Math.ceil(Math.sqrt(values.length))));
+    const counts: number[] = Array.from({ length: nBins }, () => 0);
+    for (const v of values) {
+      const i = Math.min(nBins - 1, Math.floor(((v - min) / range) * nBins));
+      counts[i]!++;
+    }
+    return { sorted, min, max, range, innerW, innerH, nBins, counts };
+  }, [values, W, H, PAD.bottom, PAD.left, PAD.right, PAD.top]);
+
+  if (!computed) {
+    return (
+      <div className="h-[260px] grid place-items-center text-xs text-muted-foreground">No data</div>
+    );
+  }
+  const { sorted, min, max, range, innerW, innerH, nBins, counts } = computed;
+  const maxCount = Math.max(...counts, 1);
+  const xScale = (v: number) => PAD.left + ((v - min) / range) * innerW;
+  const yScale = (c: number) => PAD.top + (1 - c / maxCount) * innerH;
+  const barW = innerW / nBins;
+
+  const fmt = fmtNum;
+
+  const quantile = (q: number): number => {
+    const pos = (sorted.length - 1) * q;
+    const lo = Math.floor(pos);
+    const hi = Math.ceil(pos);
+    return sorted[lo]! + (sorted[hi]! - sorted[lo]!) * (pos - lo);
+  };
+
+  const GUIDES = [
+    { label: 'p50', q: 0.5, color: '#3b82f6' },
+    { label: 'p75', q: 0.75, color: '#22c55e' },
+    { label: 'p90', q: 0.9, color: '#f59e0b' },
+    { label: 'p95', q: 0.95, color: '#ef4444' },
+  ] as const;
+
+  // Hover: report the bin range under cursor, its count, and what percentile
+  // the bin's midpoint represents in the empirical distribution.
+  const resolve = (fraction: number) => {
+    const v = min + fraction * range;
+    const binIdx = Math.min(nBins - 1, Math.floor(((v - min) / range) * nBins));
+    const binLo = min + (binIdx * range) / nBins;
+    const binHi = min + ((binIdx + 1) * range) / nBins;
+    const count = counts[binIdx] ?? 0;
+    // Cumulative % at the bin's right edge.
+    let cumCount = 0;
+    for (let i = 0; i <= binIdx; i++) cumCount += counts[i] ?? 0;
+    const cumPct = (cumCount / values.length) * 100;
+    const items: HoverItem[] = [
+      { color: 'currentColor', label: 'Bin', value: `${fmt(binLo)}–${fmt(binHi)} ${unit}` },
+      { color: 'currentColor', label: 'Count', value: count.toLocaleString() },
+      { color: 'currentColor', label: 'Cumulative', value: `${cumPct.toFixed(1)}%` },
+    ];
+    return { items };
+  };
+
+  const xTickVals = [min, min + range / 3, min + (2 * range) / 3, max];
+  const yTickVals = Array.from({ length: 5 }, (_, i) => (maxCount * i) / 4);
+
+  return (
+    <div className="w-full">
+      <div className="mb-2 text-xs text-muted-foreground">
+        {values.length.toLocaleString()} requests · range {fmt(min)}–{fmt(max)} {unit}
+      </div>
+      <ChartHover pad={PAD} width={W} height={H} resolve={resolve}>
+        {/* y-axis gridlines + labels */}
+        {yTickVals.map((v, i) => {
+          const y = yScale(v);
+          return (
+            <g key={`y${i}`}>
+              <line
+                x1={PAD.left - 4}
+                x2={PAD.left}
+                y1={y}
+                y2={y}
+                stroke="currentColor"
+                opacity={0.4}
+              />
+              <text
+                x={PAD.left - 8}
+                y={y + 3}
+                fontSize={10}
+                fill="currentColor"
+                opacity={0.55}
+                textAnchor="end"
+              >
+                {fmt(v)}
+              </text>
+            </g>
+          );
+        })}
+
+        {/* Bars */}
+        {counts.map((c, i) => {
+          const h = (c / maxCount) * innerH;
+          const x = PAD.left + i * barW;
+          const y = PAD.top + (innerH - h);
+          return (
+            <rect
+              key={i}
+              x={x}
+              y={y}
+              width={Math.max(0, barW - 1)}
+              height={h}
+              fill="currentColor"
+              opacity={0.55}
+            />
+          );
+        })}
+
+        {/* Percentile guide lines */}
+        {GUIDES.map(({ q, color }) => {
+          const v = quantile(q);
+          const x = xScale(v);
+          return (
+            <line
+              key={q}
+              x1={x}
+              x2={x}
+              y1={PAD.top}
+              y2={PAD.top + innerH}
+              stroke={color}
+              strokeWidth={2}
+              strokeDasharray="5 3"
+              opacity={0.95}
+            />
+          );
+        })}
+
+        {/* X axis */}
+        <line
+          x1={PAD.left}
+          x2={PAD.left + innerW}
+          y1={PAD.top + innerH}
+          y2={PAD.top + innerH}
+          stroke="currentColor"
+          opacity={0.2}
+        />
+        {xTickVals.map((v, i) => {
+          const anchor = i === 0 ? 'start' : i === xTickVals.length - 1 ? 'end' : 'middle';
+          return (
+            <text
+              key={`x${i}`}
+              x={xScale(v)}
+              y={PAD.top + innerH + 14}
+              fontSize={11}
+              fill="currentColor"
+              opacity={0.7}
+              textAnchor={anchor}
+            >
+              {fmt(v)}
+            </text>
+          );
+        })}
+        <text
+          x={W / 2}
+          y={H - 22}
+          fontSize={11}
+          fill="currentColor"
+          opacity={0.55}
+          textAnchor="middle"
+        >
+          value ({unit})
+        </text>
+        <text
+          x={10}
+          y={H / 2}
+          fontSize={11}
+          fill="currentColor"
+          opacity={0.55}
+          textAnchor="middle"
+          transform={`rotate(-90 10 ${H / 2})`}
+        >
+          count
+        </text>
+
+        {/* Percentile legend chips */}
+        {(() => {
+          const chipY = H - 8;
+          const chipW = innerW / GUIDES.length;
+          return GUIDES.map(({ label: ql, q, color }, i) => {
+            const v = quantile(q);
+            const x = PAD.left + i * chipW;
+            return (
+              <g key={ql}>
+                <line
+                  x1={x + 2}
+                  x2={x + 14}
+                  y1={chipY - 4}
+                  y2={chipY - 4}
+                  stroke={color}
+                  strokeWidth={2}
+                  strokeDasharray="5 3"
+                />
+                <text x={x + 18} y={chipY} fontSize={11} fill="currentColor" opacity={0.9}>
+                  {ql} {fmt(v)}
+                </text>
+              </g>
+            );
+          });
+        })()}
+      </ChartHover>
+    </div>
+  );
+}
diff --git a/packages/app/src/components/inference/agentic-point/expandable-chart.tsx b/packages/app/src/components/inference/agentic-point/expandable-chart.tsx
new file mode 100644
index 00000000..7c8e4538
--- /dev/null
+++ b/packages/app/src/components/inference/agentic-point/expandable-chart.tsx
@@ -0,0 +1,46 @@
+'use client';
+
+import { useState, type ReactNode } from 'react';
+import { Maximize2 } from 'lucide-react';
+
+import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui/dialog';
+
+/**
+ * Wraps a chart in a card with a header + expand button. Click the button to
+ * open the chart in a large dialog. The `render` prop receives `expanded:true`
+ * inside the dialog so charts can pick larger width/height.
+ */
+export function ExpandableChart({
+  title,
+  render,
+}: {
+  title: string;
+  render: (expanded: boolean) => ReactNode;
+}) {
+  const [open, setOpen] = useState(false);
+
+  return (
+    <div className="rounded-lg border border-border/40 bg-card/40 p-4">
+      <div className="flex items-start justify-between mb-3 gap-2">
+        <h2 className="text-sm font-semibold text-foreground">{title}</h2>
+        <button
+          type="button"
+          aria-label="Expand chart"
+          onClick={() => setOpen(true)}
+          className="text-muted-foreground hover:text-foreground transition-colors"
+        >
+          <Maximize2 className="size-4" />
+        </button>
+      </div>
+      {render(false)}
+      <Dialog open={open} onOpenChange={setOpen}>
+        <DialogContent className="max-w-[min(96vw,1400px)] w-[min(96vw,1400px)]">
+          <DialogHeader>
+            <DialogTitle>{title}</DialogTitle>
+          </DialogHeader>
+          <div className="w-full">{render(true)}</div>
+        </DialogContent>
+      </Dialog>
+    </div>
+  );
+}
diff --git a/packages/app/src/components/inference/agentic-point/request-timeline.tsx b/packages/app/src/components/inference/agentic-point/request-timeline.tsx
new file mode 100644
index 00000000..8762a158
--- /dev/null
+++ b/packages/app/src/components/inference/agentic-point/request-timeline.tsx
@@ -0,0 +1,948 @@
+'use client';
+
+import { useCallback, useMemo, useRef, useState } from 'react';
+
+import { type RequestRecord, type RequestTimeline } from '@/hooks/api/use-request-timeline';
+import { SegmentedToggle, type SegmentedToggleOption } from '@/components/ui/segmented-toggle';
+
+/**
+ * Gantt-style request timeline for one agentic benchmark point.
+ *
+ * Rows are conversations (or workers — toggle in the header). Bars are
+ * individual HTTP requests, drawn from request_start to request_end with a
+ * thin lead-in segment from credit_issued (load gen queue). Scroll-wheel
+ * zooms, drag pans, hover shows per-request stats.
+ *
+ * The reference for this layout is the agent-timeline in semianalysis-claude-code-proxy.
+ */
+
+type RowMode = 'conversation' | 'worker';
+
+const ROW_MODE_OPTIONS: SegmentedToggleOption<RowMode>[] = [
+  { value: 'conversation', label: 'By conversation', testId: 'timeline-mode-conversation' },
+  { value: 'worker', label: 'By worker', testId: 'timeline-mode-worker' },
+];
+
+type PhaseFilter = 'all' | 'profiling';
+
+const PHASE_OPTIONS: SegmentedToggleOption<PhaseFilter>[] = [
+  { value: 'profiling', label: 'Profiling', testId: 'timeline-phase-profiling' },
+  { value: 'all', label: 'All (incl. warmup)', testId: 'timeline-phase-all' },
+];
+
+/** A stable color palette indexed by row-key hash. */
+const ROW_COLORS = [
+  '#3b82f6',
+  '#ef4444',
+  '#10b981',
+  '#f59e0b',
+  '#a855f7',
+  '#06b6d4',
+  '#f97316',
+  '#84cc16',
+  '#ec4899',
+  '#14b8a6',
+  '#8b5cf6',
+  '#eab308',
+];
+
+/** Phase color overlay drawn as a thin strip at the bottom of each bar. */
+const PHASE_COLORS: Record<string, string> = {
+  profiling: '#22c55e',
+  warmup: '#94a3b8',
+  unknown: '#64748b',
+};
+
+/**
+ * Row kinds:
+ *   parent           — top-level conversation (depth 0)
+ *   worker           — worker swimlane (depth 0, worker mode)
+ *   subagent         — a subagent invocation (depth 1). Either a single
+ *                      stream (renders its own bars), or a multi-stream
+ *                      container whose bars are the union of its streams
+ *                      when collapsed.
+ *   stream           — one :sN stream of a multi-stream subagent (depth 2).
+ *                      Hidden by default; toggled in via the parent's chevron.
+ */
+type RowKind = 'parent' | 'worker' | 'subagent' | 'stream';
+
+interface Row {
+  key: string;
+  label: string;
+  color: string;
+  requests: RequestRecord[];
+  depth: number;
+  kind: RowKind;
+  /** Number of streams under this subagent (>=1). Only set for subagent rows. */
+  streamCount?: number;
+  /** For stream rows: the parent subagent's row key (drives expand/collapse). */
+  parentRowKey?: string;
+}
+
+/**
+ * Conversation ids for subagent calls look like
+ *   <parent_cid>::sa:<agent_id>[:s<stream_idx>]
+ * The optional `:s<N>` suffix is set when the harness fans a single
+ * subagent into multiple parallel "streams" (interval-graph
+ * decomposition in weka_trace._pack_into_streams). We split it off so
+ * we can group all streams of one subagent under a single header row.
+ */
+function splitCid(cid: string): {
+  parent: string;
+  subagentBase: string | null;
+  stream: number | null;
+} {
+  const sep = cid.indexOf('::sa:');
+  if (sep === -1) return { parent: cid, subagentBase: null, stream: null };
+  const parent = cid.slice(0, sep);
+  const raw = cid.slice(sep + 5);
+  const m = /^(.*):s(\d+)$/.exec(raw);
+  if (m) return { parent, subagentBase: m[1]!, stream: Number(m[2]) };
+  return { parent, subagentBase: raw, stream: null };
+}
+
+/**
+ * Group requests into rows. In conversation mode, output order is:
+ *   parent_conv
+ *     subagent_001                  (collapsed by default, container)
+ *       :s0                         (hidden unless expanded)
+ *       :s1
+ *     subagent_002
+ *     ...
+ *
+ * `expandedSubagents` controls which subagent containers reveal their
+ * stream children. Bars on a collapsed subagent are the UNION of all its
+ * streams' requests — overlapping bars visually communicate the
+ * stream-level parallelism without expanding.
+ */
+function buildRows(
+  requests: RequestRecord[],
+  mode: RowMode,
+  expandedSubagents: ReadonlySet<string>,
+): Row[] {
+  if (mode !== 'conversation') {
+    // Worker mode: flat rows, sorted by first activity.
+    const groups = new Map<string, RequestRecord[]>();
+    for (const r of requests) {
+      let list = groups.get(r.wid);
+      if (!list) {
+        list = [];
+        groups.set(r.wid, list);
+      }
+      list.push(r);
+    }
+    const rows: Row[] = [];
+    let i = 0;
+    for (const [key, list] of groups) {
+      list.sort((a, b) => a.start - b.start);
+      rows.push({
+        key,
+        label: shortenWid(key),
+        color: ROW_COLORS[i % ROW_COLORS.length]!,
+        requests: list,
+        depth: 0,
+        kind: 'worker',
+      });
+      i++;
+    }
+    rows.sort((a, b) => a.requests[0]!.start - b.requests[0]!.start);
+    return rows;
+  }
+
+  // Conversation mode — tree: parent → subagent → stream.
+  interface Tree {
+    parentCid: string;
+    parentReqs: RequestRecord[];
+    // subagentBase → (streamIndex|null → requests)
+    subagents: Map<string, Map<number | null, RequestRecord[]>>;
+    firstStart: number;
+  }
+  const trees = new Map<string, Tree>();
+  for (const r of requests) {
+    const { parent, subagentBase, stream } = splitCid(r.cid);
+    let tree = trees.get(parent);
+    if (!tree) {
+      tree = {
+        parentCid: parent,
+        parentReqs: [],
+        subagents: new Map(),
+        firstStart: Number.POSITIVE_INFINITY,
+      };
+      trees.set(parent, tree);
+    }
+    if (subagentBase === null) {
+      tree.parentReqs.push(r);
+    } else {
+      let saMap = tree.subagents.get(subagentBase);
+      if (!saMap) {
+        saMap = new Map();
+        tree.subagents.set(subagentBase, saMap);
+      }
+      const list = saMap.get(stream);
+      if (list) list.push(r);
+      else saMap.set(stream, [r]);
+    }
+    if (r.start < tree.firstStart) tree.firstStart = r.start;
+  }
+
+  const sortedTrees = [...trees.values()].toSorted((a, b) => a.firstStart - b.firstStart);
+  const rows: Row[] = [];
+  let colorIdx = 0;
+  for (const tree of sortedTrees) {
+    const color = ROW_COLORS[colorIdx % ROW_COLORS.length]!;
+    colorIdx++;
+    // Parent row (use a placeholder key if the parent itself wasn't replayed).
+    tree.parentReqs.sort((a, b) => a.start - b.start);
+    rows.push({
+      key: tree.parentReqs.length > 0 ? tree.parentCid : `__parent_${tree.parentCid}`,
+      label: tree.parentCid,
+      color,
+      requests: tree.parentReqs,
+      depth: 0,
+      kind: 'parent',
+    });
+
+    // One subagent row per base (which may contain N streams).
+    const subagentEntries = [...tree.subagents.entries()].toSorted((a, b) => {
+      const aStart = Math.min(
+        ...[...a[1].values()].map((reqs) => reqs[0]?.start ?? Number.POSITIVE_INFINITY),
+      );
+      const bStart = Math.min(
+        ...[...b[1].values()].map((reqs) => reqs[0]?.start ?? Number.POSITIVE_INFINITY),
+      );
+      return aStart - bStart;
+    });
+    for (const [saBase, streams] of subagentEntries) {
+      const subagentKey = `${tree.parentCid}::sa:${saBase}`;
+      // Union of all stream requests for collapsed-view bars.
+      const allReqs: RequestRecord[] = [];
+      for (const reqs of streams.values()) allReqs.push(...reqs);
+      allReqs.sort((a, b) => a.start - b.start);
+      const streamCount = streams.size;
+      rows.push({
+        key: subagentKey,
+        label: `↳ ${formatSubagentLabel(saBase)}`,
+        color,
+        requests: allReqs,
+        depth: 1,
+        kind: 'subagent',
+        streamCount,
+      });
+
+      // Stream children only when expanded AND there's more than one
+      // stream (a single-stream subagent has nothing extra to show).
+      if (streamCount > 1 && expandedSubagents.has(subagentKey)) {
+        const streamEntries = [...streams.entries()].toSorted((a, b) => {
+          // Sort by stream index (null first as the "default" stream)
+          const ai = a[0] ?? -1;
+          const bi = b[0] ?? -1;
+          return ai - bi;
+        });
+        for (const [streamIdx, reqs] of streamEntries) {
+          reqs.sort((a, b) => a.start - b.start);
+          rows.push({
+            key: `${subagentKey}:s${streamIdx ?? '∅'}`,
+            label: `stream ${streamIdx ?? '∅'}`,
+            color,
+            requests: reqs,
+            depth: 2,
+            kind: 'stream',
+            parentRowKey: subagentKey,
+          });
+        }
+      }
+    }
+  }
+  return rows;
+}
+
+/** `subagent_001_bf1c5c16` → `subagent 001 · bf1c` (compact, readable). */
+function formatSubagentLabel(raw: string): string {
+  const m = /^subagent_(\d+)_([0-9a-f]+)$/i.exec(raw);
+  if (!m) return raw;
+  return `subagent ${m[1]} · ${m[2]!.slice(0, 4)}`;
+}
+
+function shortenWid(wid: string): string {
+  // worker_4ae87bea → w_4ae8
+  return wid.replace(/^worker_/, 'w_').slice(0, 12);
+}
+
+/** Format ns offset → "+12.3s" / "+1.2m". */
+function formatTickLabel(ns: number): string {
+  const ms = ns / 1e6;
+  if (ms < 1000) return `+${ms.toFixed(0)}ms`;
+  if (ms < 60_000) return `+${(ms / 1000).toFixed(ms < 10_000 ? 1 : 0)}s`;
+  return `+${(ms / 60_000).toFixed(1)}m`;
+}
+
+function formatDuration(ms: number): string {
+  if (ms < 1000) return `${ms.toFixed(0)}ms`;
+  if (ms < 60_000) return `${(ms / 1000).toFixed(2)}s`;
+  return `${(ms / 60_000).toFixed(2)}m`;
+}
+
+/** Number of values in a sorted ascending array that are <= target. */
+function countLeq(sorted: number[], target: number): number {
+  let lo = 0;
+  let hi = sorted.length;
+  while (lo < hi) {
+    const mid = (lo + hi) >>> 1;
+    if (sorted[mid]! <= target) lo = mid + 1;
+    else hi = mid;
+  }
+  return lo;
+}
+/** Number of values in a sorted ascending array that are < target. */
+function countLt(sorted: number[], target: number): number {
+  let lo = 0;
+  let hi = sorted.length;
+  while (lo < hi) {
+    const mid = (lo + hi) >>> 1;
+    if (sorted[mid]! < target) lo = mid + 1;
+    else hi = mid;
+  }
+  return lo;
+}
+
+interface TooltipData {
+  x: number;
+  y: number;
+  row: Row;
+  req: RequestRecord;
+}
+
+function Tooltip({ data }: { data: TooltipData }) {
+  const { row, req } = data;
+  const totalMs = (req.end - req.start) / 1e6;
+  const queueMs = (req.start - req.credit) / 1e6;
+  return (
+    <div
+      className="fixed z-50 pointer-events-none rounded-md border border-border bg-card p-2.5 shadow-lg text-[11px]"
+      style={{ left: data.x + 12, top: data.y - 10, maxWidth: 280 }}
+    >
+      <div className="flex items-center gap-2 font-medium text-foreground">
+        <span className="inline-block w-2 h-2 rounded-sm" style={{ backgroundColor: row.color }} />
+        <span className="truncate">{row.label}</span>
+        <span className="text-muted-foreground">· turn {req.ti}</span>
+        {req.cancelled && <span className="text-destructive">· cancelled</span>}
+      </div>
+      <div className="mt-1.5 grid grid-cols-2 gap-x-3 gap-y-0.5 text-muted-foreground">
+        <span>Total</span>
+        <span className="text-foreground text-right tabular-nums">{formatDuration(totalMs)}</span>
+        <span>Queue wait</span>
+        <span className="text-foreground text-right tabular-nums">
+          {queueMs > 0.5 ? formatDuration(queueMs) : '—'}
+        </span>
+        {req.ttftMs !== null && (
+          <>
+            <span>TTFT</span>
+            <span className="text-foreground text-right tabular-nums">
+              {formatDuration(req.ttftMs)}
+            </span>
+          </>
+        )}
+        {req.isl !== null && (
+          <>
+            <span>ISL</span>
+            <span className="text-foreground text-right tabular-nums">
+              {req.isl.toLocaleString()}
+            </span>
+          </>
+        )}
+        {req.osl !== null && (
+          <>
+            <span>OSL</span>
+            <span className="text-foreground text-right tabular-nums">
+              {req.osl.toLocaleString()}
+            </span>
+          </>
+        )}
+        <span>Phase</span>
+        <span className="text-foreground text-right">{req.phase}</span>
+        {req.ad > 0 && (
+          <>
+            <span>Agent depth</span>
+            <span className="text-foreground text-right tabular-nums">{req.ad}</span>
+          </>
+        )}
+        <span>Worker</span>
+        <span className="text-foreground text-right truncate">{shortenWid(req.wid)}</span>
+      </div>
+      <div className="mt-1.5 pt-1 border-t border-border/40 text-[10px] text-muted-foreground">
+        Started at {formatTickLabel(req.start)}
+      </div>
+    </div>
+  );
+}
+
+export function RequestTimelineView({ data }: { data: RequestTimeline }) {
+  const [rowMode, setRowMode] = useState<RowMode>('conversation');
+  const [phaseFilter, setPhaseFilter] = useState<PhaseFilter>('profiling');
+  const [tooltip, setTooltip] = useState<TooltipData | null>(null);
+  // Which multi-stream subagents currently have their per-stream rows
+  // expanded. Key is the subagent row's `key` (parent_cid::sa:agent_id).
+  const [expandedSubagents, setExpandedSubagents] = useState<ReadonlySet<string>>(() => new Set());
+  const toggleSubagent = useCallback((key: string) => {
+    setExpandedSubagents((prev) => {
+      const next = new Set(prev);
+      if (next.has(key)) next.delete(key);
+      else next.add(key);
+      return next;
+    });
+  }, []);
+  const dragRef = useRef<{ startX: number; vs: number; ve: number } | null>(null);
+
+  // Apply phase filter, then group into rows.
+  const filtered = useMemo(
+    () =>
+      phaseFilter === 'all' ? data.requests : data.requests.filter((r) => r.phase === 'profiling'),
+    [data.requests, phaseFilter],
+  );
+  const rows = useMemo(
+    () => buildRows(filtered, rowMode, expandedSubagents),
+    [filtered, rowMode, expandedSubagents],
+  );
+
+  // Pre-sort the timestamp columns so the cursor-time stats popover can
+  // count "running / waiting at time t" in O(log n). With a few hundred
+  // requests this is overkill — but it stays smooth on huge runs too.
+  const sortedTimes = useMemo(() => {
+    const credits = filtered.map((r) => r.credit).toSorted((a, b) => a - b);
+    const starts = filtered.map((r) => r.start).toSorted((a, b) => a - b);
+    const ends = filtered.map((r) => r.end).toSorted((a, b) => a - b);
+    return { credits, starts, ends };
+  }, [filtered]);
+
+  // Cursor state (vertical line + stats popover). null when the mouse
+  // isn't over the chart. xPx is svg-local; tNs is the ns offset from
+  // dataStart that the cursor is pointing at.
+  const [cursor, setCursor] = useState<{
+    xPx: number;
+    tNs: number;
+    clientX: number;
+    clientY: number;
+  } | null>(null);
+
+  // Timeline extent (clamped to actual data — if we filtered out warmup
+  // the visible window should shrink to just the profiling phase).
+  const dataStart = filtered.length === 0 ? 0 : Math.min(...filtered.map((r) => r.credit));
+  const dataEnd = filtered.length === 0 ? 1 : Math.max(...filtered.map((r) => r.end));
+  const totalNs = Math.max(dataEnd - dataStart, 1);
+
+  // Visible window state (ns offsets, relative to dataStart).
+  const [viewStart, setViewStart] = useState(0);
+  const [viewEnd, setViewEnd] = useState<number | null>(null);
+  const vStart = viewStart;
+  const vEnd = viewEnd ?? totalNs;
+  const visibleDur = Math.max(vEnd - vStart, 1);
+  const isZoomed = viewEnd !== null;
+
+  // Layout
+  // Wide enough for a full 36-char conversation id at 10px font, plus the
+  // indent + color stripe + count badge. Subagent rows inherit the same
+  // width but truncate the longer "↳ subagent N · hash" tail with ellipsis.
+  const LABEL_WIDTH = 360;
+  const ROW_HEIGHT = 22;
+  const ROW_GAP = 3;
+  const HEADER_HEIGHT = 24;
+  const PADDING_RIGHT = 12;
+  const chartWidth = 920;
+  const svgHeight = HEADER_HEIGHT + rows.length * (ROW_HEIGHT + ROW_GAP) + 6;
+  const scale = (chartWidth - PADDING_RIGHT) / visibleDur;
+  // Local coords: convert ns offset from dataStart to x px.
+  const xOf = (ns: number) => (ns - dataStart - vStart) * scale;
+
+  // Time-axis ticks (~8 across visible window, snapped to nice second multiples).
+  const niceMs = [
+    100, 250, 500, 1000, 2000, 5000, 10_000, 30_000, 60_000, 120_000, 300_000, 600_000, 1_800_000,
+  ];
+  const targetMs = visibleDur / 1e6 / 8;
+  const tickMs = niceMs.find((n) => n >= targetMs) ?? targetMs;
+  const tickNs = tickMs * 1e6;
+  const ticks: number[] = [];
+  const tickStart = Math.floor(vStart / tickNs) * tickNs;
+  for (let t = tickStart; t <= vEnd + tickNs; t += tickNs) {
+    if (t >= vStart && t <= vEnd) ticks.push(t);
+  }
+
+  const handleWheel = useCallback(
+    (e: React.WheelEvent<SVGSVGElement>) => {
+      e.preventDefault();
+      const rect = e.currentTarget.getBoundingClientRect();
+      const mouseX = e.clientX - rect.left;
+      const mouseRatio = Math.max(0, Math.min(1, mouseX / (chartWidth - PADDING_RIGHT)));
+      const curStart = vStart;
+      const curEnd = vEnd;
+      const curDur = curEnd - curStart;
+      const factor = e.deltaY > 0 ? 1.2 : 1 / 1.2;
+      const newDur = Math.min(Math.max(curDur * factor, totalNs * 0.001), totalNs);
+      const pivot = curStart + mouseRatio * curDur;
+      let newStart = pivot - mouseRatio * newDur;
+      let newEnd = pivot + (1 - mouseRatio) * newDur;
+      if (newStart < 0) {
+        newEnd -= newStart;
+        newStart = 0;
+      }
+      if (newEnd > totalNs) {
+        newStart -= newEnd - totalNs;
+        newEnd = totalNs;
+        if (newStart < 0) newStart = 0;
+      }
+      if (newEnd - newStart >= totalNs * 0.99) {
+        setViewStart(0);
+        setViewEnd(null);
+      } else {
+        setViewStart(newStart);
+        setViewEnd(newEnd);
+      }
+    },
+    [vStart, vEnd, totalNs, chartWidth],
+  );
+
+  const handleMouseDown = useCallback(
+    (e: React.MouseEvent<SVGSVGElement>) => {
+      if (e.button !== 0) return;
+      dragRef.current = { startX: e.clientX, vs: vStart, ve: vEnd };
+    },
+    [vStart, vEnd],
+  );
+
+  const handleMouseMove = useCallback(
+    (e: React.MouseEvent<SVGSVGElement>) => {
+      // Dragging takes precedence over cursor tracking — panning the view.
+      if (dragRef.current) {
+        const dx = e.clientX - dragRef.current.startX;
+        const nsPerPx = visibleDur / (chartWidth - PADDING_RIGHT);
+        const delta = -dx * nsPerPx;
+        let ns = dragRef.current.vs + delta;
+        let ne = dragRef.current.ve + delta;
+        const dur = ne - ns;
+        if (ns < 0) {
+          ns = 0;
+          ne = dur;
+        }
+        if (ne > totalNs) {
+          ne = totalNs;
+          ns = totalNs - dur;
+          if (ns < 0) ns = 0;
+        }
+        setViewStart(ns);
+        setViewEnd(ne);
+        setTooltip(null);
+        setCursor(null);
+        return;
+      }
+      // Track the cursor position in svg-local px and the matching ns offset
+      // so the crosshair + stats popover can render. Clamped to the chart
+      // plot area (don't show a cursor on the axis labels gutter).
+      const rect = e.currentTarget.getBoundingClientRect();
+      const xPx = Math.max(0, Math.min(chartWidth - PADDING_RIGHT, e.clientX - rect.left));
+      const nsPerPx = visibleDur / (chartWidth - PADDING_RIGHT);
+      const tNs = vStart + xPx * nsPerPx;
+      setCursor({ xPx, tNs, clientX: e.clientX, clientY: e.clientY });
+    },
+    [visibleDur, chartWidth, totalNs, vStart],
+  );
+
+  const handleMouseUp = useCallback(() => {
+    dragRef.current = null;
+  }, []);
+
+  const handleMouseLeave = useCallback(() => {
+    dragRef.current = null;
+    setCursor(null);
+  }, []);
+
+  const resetZoom = useCallback(() => {
+    setViewStart(0);
+    setViewEnd(null);
+  }, []);
+
+  if (rows.length === 0) {
+    return (
+      <div className="rounded-lg border border-border/40 bg-card/40 p-4 text-sm text-muted-foreground">
+        No requests in the current filter.
+      </div>
+    );
+  }
+
+  const totalRequests = filtered.length;
+
+  return (
+    <div className="space-y-3">
+      {/* Controls */}
+      <div className="flex flex-wrap items-center gap-2">
+        <SegmentedToggle
+          value={rowMode}
+          options={ROW_MODE_OPTIONS}
+          onValueChange={setRowMode}
+          ariaLabel="Row mode"
+          testId="timeline-row-mode"
+          buttonClassName="px-2.5 py-1 text-xs"
+        />
+        <SegmentedToggle
+          value={phaseFilter}
+          options={PHASE_OPTIONS}
+          onValueChange={setPhaseFilter}
+          ariaLabel="Phase filter"
+          testId="timeline-phase-filter"
+          buttonClassName="px-2.5 py-1 text-xs"
+        />
+        <span className="ml-auto text-xs text-muted-foreground">
+          {totalRequests} request{totalRequests === 1 ? '' : 's'} · {rows.length}{' '}
+          {rowMode === 'conversation' ? 'conversations' : 'workers'} · span{' '}
+          {formatDuration((dataEnd - dataStart) / 1e6)}
+          {isZoomed && (
+            <>
+              {' · '}
+              <button type="button" onClick={resetZoom} className="text-foreground hover:underline">
+                reset zoom
+              </button>
+            </>
+          )}
+        </span>
+      </div>
+
+      {/* Chart container */}
+      <div className="rounded-md border border-border/60 bg-card overflow-hidden">
+        <div className="flex">
+          {/* Label column — sticky, doesn't scroll horizontally with the chart. */}
+          <div
+            className="flex-shrink-0 border-r border-border/60 bg-card/80"
+            style={{ width: LABEL_WIDTH }}
+          >
+            <div
+              className="border-b border-border/60 flex items-end px-2 pb-1"
+              style={{ height: HEADER_HEIGHT }}
+            >
+              <span className="text-[9px] font-mono font-bold uppercase tracking-[0.15em] text-muted-foreground">
+                {rowMode === 'conversation' ? 'Conversation' : 'Worker'}
+              </span>
+            </div>
+            {rows.map((row) => {
+              const isSubagentRow = row.kind === 'subagent';
+              const isStreamRow = row.kind === 'stream';
+              const isExpandable = isSubagentRow && (row.streamCount ?? 1) > 1;
+              const isExpanded = isExpandable && expandedSubagents.has(row.key);
+              return (
+                <div
+                  key={row.key}
+                  className="flex items-center gap-1 overflow-hidden pr-2"
+                  style={{
+                    height: ROW_HEIGHT + ROW_GAP,
+                    paddingLeft: 4 + row.depth * 10,
+                  }}
+                >
+                  {isExpandable ? (
+                    <button
+                      type="button"
+                      onClick={() => toggleSubagent(row.key)}
+                      className="size-3.5 flex items-center justify-center text-muted-foreground hover:text-foreground shrink-0"
+                      aria-label={isExpanded ? 'Collapse streams' : 'Expand streams'}
+                      title={isExpanded ? 'Collapse streams' : 'Expand streams'}
+                    >
+                      <span className="text-[10px] leading-none">{isExpanded ? '▾' : '▸'}</span>
+                    </button>
+                  ) : (
+                    <span className="size-3.5 shrink-0" />
+                  )}
+                  <span
+                    className="inline-block w-1 h-3 rounded-sm flex-shrink-0"
+                    style={{
+                      backgroundColor: row.color,
+                      opacity: isStreamRow ? 0.4 : isSubagentRow ? 0.55 : 1,
+                    }}
+                  />
+                  <span
+                    className="text-[10px] font-mono truncate"
+                    style={{
+                      color: row.color,
+                      opacity: isStreamRow ? 0.7 : isSubagentRow ? 0.85 : 1,
+                    }}
+                  >
+                    {row.label}
+                    {isExpandable && (
+                      <span className="text-muted-foreground ml-1">×{row.streamCount}</span>
+                    )}
+                  </span>
+                  <span className="text-[9px] font-mono text-muted-foreground ml-auto shrink-0">
+                    {row.requests.length > 0 ? row.requests.length : '—'}
+                  </span>
+                </div>
+              );
+            })}
+          </div>
+
+          {/* Scrollable SVG */}
+          <div className="flex-1 overflow-x-auto">
+            <svg
+              width={chartWidth}
+              height={svgHeight}
+              className="block"
+              style={{ cursor: isZoomed ? 'grab' : 'crosshair' }}
+              onWheel={handleWheel}
+              onMouseDown={handleMouseDown}
+              onMouseMove={handleMouseMove}
+              onMouseUp={handleMouseUp}
+              onMouseLeave={handleMouseLeave}
+            >
+              {/* Header / time-axis baseline */}
+              <line
+                x1={0}
+                y1={HEADER_HEIGHT}
+                x2={chartWidth}
+                y2={HEADER_HEIGHT}
+                stroke="currentColor"
+                opacity={0.15}
+              />
+
+              {/* Time axis ticks */}
+              {ticks.map((t) => {
+                // Convert visible-window ns offset → x px (the tick array
+                // is already in dataStart-relative coords).
+                const x = (t - vStart) * scale;
+                return (
+                  <g key={t}>
+                    <line
+                      x1={x}
+                      y1={HEADER_HEIGHT}
+                      x2={x}
+                      y2={svgHeight}
+                      stroke="currentColor"
+                      opacity={0.08}
+                      strokeDasharray="2 4"
+                    />
+                    <text
+                      x={x + 2}
+                      y={HEADER_HEIGHT - 6}
+                      fill="currentColor"
+                      opacity={0.55}
+                      fontSize={9}
+                      fontFamily="ui-monospace, SFMono-Regular, monospace"
+                    >
+                      {formatTickLabel(t)}
+                    </text>
+                  </g>
+                );
+              })}
+
+              {/* Row separators */}
+              {rows.map((row, idx) => (
+                <line
+                  key={`sep-${row.key}`}
+                  x1={0}
+                  y1={HEADER_HEIGHT + idx * (ROW_HEIGHT + ROW_GAP)}
+                  x2={chartWidth}
+                  y2={HEADER_HEIGHT + idx * (ROW_HEIGHT + ROW_GAP)}
+                  stroke="currentColor"
+                  opacity={0.04}
+                />
+              ))}
+
+              {/* Request bars */}
+              {rows.map((row, rowIdx) => {
+                const yTop = HEADER_HEIGHT + rowIdx * (ROW_HEIGHT + ROW_GAP) + 2;
+                const barH = ROW_HEIGHT - 4;
+                // For multi-stream subagent containers, suppress the union
+                // bars when expanded — the child stream rows draw them
+                // individually instead, so we'd double-draw otherwise.
+                if (
+                  row.kind === 'subagent' &&
+                  (row.streamCount ?? 1) > 1 &&
+                  expandedSubagents.has(row.key)
+                ) {
+                  return null;
+                }
+                return row.requests.map((req) => {
+                  const xCredit = xOf(req.credit);
+                  const xStart = xOf(req.start);
+                  const xEnd = xOf(req.end);
+                  // Cull bars entirely outside the visible window so big
+                  // benchmarks don't render thousands of zero-width rects.
+                  if (xEnd < -2 || xCredit > chartWidth + 2) return null;
+                  const runW = Math.max(xEnd - xStart, 1);
+                  const queueW = Math.max(xStart - xCredit, 0);
+                  const phaseColor = PHASE_COLORS[req.phase] ?? PHASE_COLORS.unknown!;
+                  return (
+                    <g
+                      key={`${req.cid}-${req.ti}-${req.start}`}
+                      onMouseMove={(e) => setTooltip({ x: e.clientX, y: e.clientY, row, req })}
+                      onMouseLeave={() => setTooltip(null)}
+                    >
+                      {/* Queue lead-in (faint) — only drawn when noticeable. */}
+                      {queueW >= 1 && (
+                        <rect
+                          x={xCredit}
+                          y={yTop + barH / 2 - 1}
+                          width={queueW}
+                          height={2}
+                          fill={row.color}
+                          opacity={0.35}
+                        />
+                      )}
+                      {/* Main bar — opacity stepped down with depth so
+                          parent > subagent > stream reads visually. */}
+                      <rect
+                        x={xStart}
+                        y={yTop}
+                        width={runW}
+                        height={barH}
+                        rx={2}
+                        fill={row.color}
+                        opacity={
+                          req.cancelled
+                            ? 0.35
+                            : row.kind === 'stream'
+                              ? 0.5
+                              : row.kind === 'subagent'
+                                ? 0.6
+                                : 0.85
+                        }
+                      />
+                      {/* Phase strip at bottom */}
+                      <rect
+                        x={xStart}
+                        y={yTop + barH - 2}
+                        width={runW}
+                        height={2}
+                        rx={1}
+                        fill={phaseColor}
+                        opacity={0.85}
+                      />
+                      {/* Cancelled X overlay */}
+                      {req.cancelled && runW > 6 && (
+                        <line
+                          x1={xStart + 1}
+                          y1={yTop + 1}
+                          x2={xStart + runW - 1}
+                          y2={yTop + barH - 1}
+                          stroke="currentColor"
+                          strokeWidth={0.7}
+                          opacity={0.6}
+                        />
+                      )}
+                    </g>
+                  );
+                });
+              })}
+
+              {/* Cursor crosshair — drawn on top of bars so it stays visible
+                  through dense rows. Stats popover is rendered as fixed
+                  HTML below the SVG block. */}
+              {cursor && (
+                <line
+                  x1={cursor.xPx}
+                  x2={cursor.xPx}
+                  y1={0}
+                  y2={svgHeight}
+                  stroke="currentColor"
+                  strokeWidth={1}
+                  opacity={0.45}
+                  pointerEvents="none"
+                />
+              )}
+            </svg>
+          </div>
+        </div>
+      </div>
+
+      {/* Footer / legend */}
+      <div className="flex flex-wrap items-center gap-x-4 gap-y-1 px-1 text-[11px] text-muted-foreground">
+        <span className="inline-flex items-center gap-1.5">
+          <span className="inline-block w-3 h-2 rounded-sm bg-current opacity-30" />
+          queue wait
+        </span>
+        <span className="inline-flex items-center gap-1.5">
+          <span className="inline-block w-3 h-2 rounded-sm" style={{ background: '#22c55e' }} />
+          profiling
+        </span>
+        <span className="inline-flex items-center gap-1.5">
+          <span className="inline-block w-3 h-2 rounded-sm" style={{ background: '#94a3b8' }} />
+          warmup
+        </span>
+        <span className="ml-auto opacity-70">scroll to zoom · drag to pan</span>
+      </div>
+
+      {/* Cursor stats popover: count of in-flight / waiting at the cursor's
+          ns offset. Hidden when the user is hovering an individual bar
+          (per-request tooltip wins). */}
+      {cursor && !tooltip && (
+        <CursorPopover
+          cursor={cursor}
+          dataStart={dataStart}
+          startTimes={sortedTimes.starts}
+          endTimes={sortedTimes.ends}
+          creditTimes={sortedTimes.credits}
+        />
+      )}
+
+      {/* Tooltip */}
+      {tooltip && <Tooltip data={tooltip} />}
+    </div>
+  );
+}
+
+function CursorPopover({
+  cursor,
+  dataStart,
+  startTimes,
+  endTimes,
+  creditTimes,
+}: {
+  cursor: { xPx: number; tNs: number; clientX: number; clientY: number };
+  dataStart: number;
+  startTimes: number[];
+  endTimes: number[];
+  creditTimes: number[];
+}) {
+  // At time t (ns from dataStart, here represented as t = tNs):
+  //   running  = #(start <= t) - #(end < t)
+  //   waiting  = #(credit <= t) - #(start <= t)
+  //   completed= #(end <= t)
+  const t = cursor.tNs;
+  const startsLeq = countLeq(startTimes, t);
+  const endsLt = countLt(endTimes, t);
+  const creditsLeq = countLeq(creditTimes, t);
+  const endsLeq = countLeq(endTimes, t);
+  const running = Math.max(0, startsLeq - endsLt);
+  const waiting = Math.max(0, creditsLeq - startsLeq);
+  const completed = endsLeq;
+  const inflight = running + waiting;
+  // Absolute wall-clock seconds since the timeline origin (dataStart).
+  const tSec = t / 1e9;
+  // Position the popover near the cursor without overflowing the viewport.
+  // 200 px wide; flip to the left of the cursor if it would clip the right.
+  const wantLeft = cursor.clientX + 14;
+  const left =
+    typeof window === 'undefined' || wantLeft + 220 < window.innerWidth
+      ? wantLeft
+      : cursor.clientX - 220;
+  return (
+    <div
+      className="fixed z-40 pointer-events-none rounded-md border border-border bg-card/95 backdrop-blur p-2 shadow-lg text-[11px] font-mono"
+      style={{ left, top: cursor.clientY - 60, minWidth: 180 }}
+    >
+      <div className="flex justify-between gap-3 text-foreground">
+        <span className="text-muted-foreground">t =</span>
+        <span className="tabular-nums">
+          {tSec < 60 ? `${tSec.toFixed(3)} s` : `${(tSec / 60).toFixed(3)} m`}
+        </span>
+      </div>
+      <div className="mt-1 pt-1 border-t border-border/40 grid grid-cols-2 gap-x-3 gap-y-0.5 text-muted-foreground">
+        <span>In flight</span>
+        <span className="text-foreground text-right tabular-nums">{inflight}</span>
+        <span className="pl-3 text-[10px]">running</span>
+        <span className="text-foreground text-right tabular-nums">{running}</span>
+        <span className="pl-3 text-[10px]">waiting</span>
+        <span className="text-foreground text-right tabular-nums">{waiting}</span>
+        <span>Completed</span>
+        <span className="text-foreground text-right tabular-nums">{completed}</span>
+      </div>
+      {/* dataStart is informational — the displayed t is relative to it. */}
+      <div className="mt-1 pt-1 border-t border-border/40 text-[9px] text-muted-foreground">
+        relative to t₀ ({(dataStart / 1e9).toFixed(0)}s wall-clock)
+      </div>
+    </div>
+  );
+}
diff --git a/packages/app/src/components/inference/agentic-point/sibling-nav.tsx b/packages/app/src/components/inference/agentic-point/sibling-nav.tsx
new file mode 100644
index 00000000..aa727fdc
--- /dev/null
+++ b/packages/app/src/components/inference/agentic-point/sibling-nav.tsx
@@ -0,0 +1,118 @@
+'use client';
+
+import { useRouter } from 'next/navigation';
+import { ChevronLeft, ChevronRight } from 'lucide-react';
+
+import type { BenchmarkSibling, BenchmarkSku } from '@/hooks/api/use-benchmark-siblings';
+
+const HW_LABELS: Record<string, string> = {
+  b200: 'B200',
+  b300: 'B300',
+  gb200: 'GB200',
+  gb300: 'GB300',
+  h100: 'H100',
+  h200: 'H200',
+  mi300x: 'MI300X',
+  mi325x: 'MI325X',
+  mi355x: 'MI355X',
+};
+
+const MODEL_LABELS: Record<string, string> = {
+  dsr1: 'DeepSeek R1',
+  dsv4: 'DeepSeek V4 Pro',
+  glm5: 'GLM-5',
+  'glm5.1': 'GLM-5.1',
+  gptoss120b: 'gpt-oss 120B',
+  kimik2: 'Kimi K2',
+  'kimik2.5': 'Kimi K2.5',
+  'kimik2.6': 'Kimi K2.6',
+  llama70b: 'Llama 3.3 70B',
+  'minimaxm2.5': 'MiniMax M2.5',
+  'minimaxm2.7': 'MiniMax M2.7',
+  'qwen3.5': 'Qwen 3.5',
+};
+
+function hwLabel(hw: string) {
+  return HW_LABELS[hw] ?? hw.toUpperCase();
+}
+function modelLabel(m: string) {
+  return MODEL_LABELS[m] ?? m;
+}
+function frameworkLabel(fw: string) {
+  if (fw === 'vllm') return 'vLLM';
+  if (fw === 'sglang') return 'SGLang';
+  if (fw === 'trt') return 'TRT';
+  if (fw === 'mori-sglang') return 'Mori-SGLang';
+  if (fw.startsWith('dynamo-')) return `Dynamo ${fw.slice('dynamo-'.length).toUpperCase()}`;
+  return fw;
+}
+
+/** Short label for a sibling chip: parallelism + concurrency. */
+export function chipLabel(s: BenchmarkSibling): string {
+  const parallel = s.disagg
+    ? `${s.num_prefill_gpu}P+${s.num_decode_gpu}D`
+    : `TP${s.decode_tp}${s.decode_ep > 1 ? `EP${s.decode_ep}` : ''}`;
+  const offload = s.offload_mode === 'on' ? ' • off=ON' : '';
+  return `${parallel} • c=${s.conc}${offload}`;
+}
+
+export function SiblingNav({ sku, siblings }: { sku: BenchmarkSku; siblings: BenchmarkSibling[] }) {
+  const router = useRouter();
+  const currentIdx = siblings.findIndex((s) => s.is_current);
+  const prev = currentIdx > 0 ? siblings[currentIdx - 1] : null;
+  const next =
+    currentIdx !== -1 && currentIdx < siblings.length - 1 ? siblings[currentIdx + 1] : null;
+
+  const skuLabel = `${hwLabel(sku.hardware)} · ${modelLabel(sku.model)} · ${sku.precision.toUpperCase()} · ${frameworkLabel(sku.framework)}`;
+
+  return (
+    <div className="border-b border-border/40 pb-4 mb-4">
+      <div className="flex items-baseline justify-between gap-3 mb-3">
+        <h1 className="text-2xl font-semibold text-foreground">{skuLabel}</h1>
+        <span className="text-xs text-muted-foreground">
+          {siblings.length} point{siblings.length === 1 ? '' : 's'} in this run · {sku.date}
+        </span>
+      </div>
+      <div className="flex items-center gap-2 flex-wrap">
+        <button
+          type="button"
+          disabled={!prev}
+          onClick={() => prev && router.push(`/inference/agentic/${prev.id}`)}
+          className="inline-flex items-center gap-1 px-2 py-1 rounded-md text-xs border border-border/40 hover:bg-accent disabled:opacity-30 disabled:cursor-not-allowed"
+          aria-label="Previous point"
+        >
+          <ChevronLeft className="size-3.5" /> prev
+        </button>
+        <div className="flex items-center gap-1 flex-wrap">
+          {siblings.map((s) => {
+            const active = s.is_current;
+            return (
+              <button
+                key={s.id}
+                type="button"
+                onClick={() => !active && router.push(`/inference/agentic/${s.id}`)}
+                className={`px-2 py-1 rounded-md text-xs border transition-colors ${
+                  active
+                    ? 'border-primary bg-primary text-primary-foreground font-medium'
+                    : 'border-border/40 text-foreground hover:bg-accent'
+                } ${s.has_trace ? '' : 'opacity-60'}`}
+                title={s.has_trace ? undefined : 'No stored trace data'}
+              >
+                {chipLabel(s)}
+              </button>
+            );
+          })}
+        </div>
+        <button
+          type="button"
+          disabled={!next}
+          onClick={() => next && router.push(`/inference/agentic/${next.id}`)}
+          className="inline-flex items-center gap-1 px-2 py-1 rounded-md text-xs border border-border/40 hover:bg-accent disabled:opacity-30 disabled:cursor-not-allowed"
+          aria-label="Next point"
+        >
+          next <ChevronRight className="size-3.5" />
+        </button>
+      </div>
+    </div>
+  );
+}
diff --git a/packages/app/src/components/inference/agentic-point/time-series-chart.tsx b/packages/app/src/components/inference/agentic-point/time-series-chart.tsx
new file mode 100644
index 00000000..399f965d
--- /dev/null
+++ b/packages/app/src/components/inference/agentic-point/time-series-chart.tsx
@@ -0,0 +1,702 @@
+'use client';
+
+import { useMemo } from 'react';
+
+import type { TimeSeriesPoint } from '@/hooks/api/use-trace-server-metrics';
+
+import { ChartHover, type HoverItem } from './chart-hover';
+
+interface Series {
+  name: string;
+  /** The line to draw (caller pre-smooths if desired). */
+  data: TimeSeriesPoint[];
+  /** Optional raw per-scrape values; rendered as low-opacity scatter behind the line. */
+  rawData?: TimeSeriesPoint[];
+  color: string;
+  /** Override default stroke width (1.8). Use higher values for emphasis lines. */
+  strokeWidth?: number;
+  /** Stroke opacity (0..1). Use < 1 for background/underlay lines. */
+  strokeOpacity?: number;
+  /** Hide from the hover legend (e.g. per-engine underlay lines that
+   *  would clutter the tooltip). The path still renders. */
+  hideFromHover?: boolean;
+}
+
+interface TimeSeriesChartProps {
+  series: Series[];
+  durationS: number;
+  yMax?: number;
+  yFmt?: (v: number) => string;
+  yAxisLabel?: string;
+  width?: number;
+  height?: number;
+}
+
+/**
+ * Time-weighted rolling average over a `windowS`-second trailing window.
+ * Treats the input as a step function (value held constant between
+ * samples) and integrates over the trailing window, dividing by the
+ * window length. Good for smoothing irregularly-sampled event series
+ * (e.g. request start/end events) where the regular sample-count
+ * `rollingAverage` would over-weight bursts of close-together events.
+ */
+export function timeRollingAverage(data: TimeSeriesPoint[], windowS: number): TimeSeriesPoint[] {
+  if (data.length === 0 || windowS <= 0) return data;
+  const out: TimeSeriesPoint[] = Array.from({ length: data.length });
+  for (let i = 0; i < data.length; i++) {
+    const tEnd = data[i]!.t;
+    const tStart = Math.max(0, tEnd - windowS);
+    // Find the first sample j whose t is >= tStart; the step value at
+    // tStart is data[j-1].value if j > 0, else data[0].value.
+    let j = 0;
+    while (j < data.length && data[j]!.t < tStart) j++;
+    let prevT = tStart;
+    let prevV = j > 0 ? data[j - 1]!.value : data[0]!.value;
+    let area = 0;
+    for (; j <= i; j++) {
+      const curT = data[j]!.t;
+      area += prevV * (curT - prevT);
+      prevT = curT;
+      prevV = data[j]!.value;
+    }
+    const dur = tEnd - tStart;
+    out[i] = { t: tEnd, value: dur > 0 ? area / dur : data[i]!.value };
+  }
+  return out;
+}
+
+/** Centered rolling average over `windowSize` samples. */
+export function rollingAverage(data: TimeSeriesPoint[], windowSize: number): TimeSeriesPoint[] {
+  if (data.length === 0 || windowSize <= 1) return data;
+  const half = Math.floor(windowSize / 2);
+  const out: TimeSeriesPoint[] = Array.from({ length: data.length });
+  for (let i = 0; i < data.length; i++) {
+    const start = Math.max(0, i - half);
+    const end = Math.min(data.length, i + half + 1);
+    let sum = 0;
+    let n = 0;
+    for (let j = start; j < end; j++) {
+      sum += data[j]!.value;
+      n++;
+    }
+    out[i] = { t: data[i]!.t, value: n > 0 ? sum / n : 0 };
+  }
+  return out;
+}
+
+/** Expanding-window cumulative mean from index 0..i. */
+export function cumulativeAverage(data: TimeSeriesPoint[]): TimeSeriesPoint[] {
+  if (data.length === 0) return data;
+  const out: TimeSeriesPoint[] = Array.from({ length: data.length });
+  let sum = 0;
+  for (let i = 0; i < data.length; i++) {
+    sum += data[i]!.value;
+    out[i] = { t: data[i]!.t, value: sum / (i + 1) };
+  }
+  return out;
+}
+
+/**
+ * Running cumulative sum of a per-interval rate series. Each output point
+ * is the integral of the rate from start to that point, assuming the rate
+ * applies over a 1-second window (aiperf's scrape interval). Use for
+ * "total tokens served so far" from a tokens-per-second series.
+ */
+export function cumulativeSum(data: TimeSeriesPoint[]): TimeSeriesPoint[] {
+  if (data.length === 0) return data;
+  const out: TimeSeriesPoint[] = Array.from({ length: data.length });
+  let sum = 0;
+  for (let i = 0; i < data.length; i++) {
+    sum += data[i]!.value;
+    out[i] = { t: data[i]!.t, value: sum };
+  }
+  return out;
+}
+
+/**
+ * Per-event step series: at each request start/end, sum the ISLs of
+ * currently-active requests across distinct `cid`s. Within a single
+ * `cid` aiperf dispatches turns sequentially (turn N+1 waits for N),
+ * so each cid contributes at most one in-flight ISL at a time. Across
+ * different cids we assume content is independent (parent ↔ subagent
+ * and conv ↔ conv share negligible prefix in practice — cross-conv
+ * dedup added ~0.25 pp to theoretical hit rate, so treating them as
+ * independent is a tight approximation of the true in-flight unique
+ * token count).
+ *
+ * Output is a step function: one point per event, value held constant
+ * until the next event. Time axis is seconds relative to the earliest
+ * event in `requests`.
+ */
+export function inflightUniqueTokens(
+  requests: readonly { cid: string; start: number; end: number; isl: number | null }[],
+): TimeSeriesPoint[] {
+  if (requests.length === 0) return [];
+  // The request_timeline timestamps are ns-relative to its own origin.
+  // Convert events to seconds and emit a step series.
+  interface Event {
+    tNs: number;
+    kind: 'start' | 'end';
+    cid: string;
+    isl: number;
+  }
+  const events: Event[] = [];
+  for (const r of requests) {
+    const isl = r.isl ?? 0;
+    if (isl <= 0) continue;
+    events.push({ tNs: r.start, kind: 'start', cid: r.cid, isl });
+    events.push({ tNs: r.end, kind: 'end', cid: r.cid, isl });
+  }
+  if (events.length === 0) return [];
+  // Sort by time; on ties, process 'end' before 'start' so a same-instant
+  // turn handoff within one cid doesn't transiently double-count.
+  events.sort((a, b) => a.tNs - b.tNs || (a.kind === 'end' ? -1 : 1));
+
+  // Active ISL per cid (max in case the same cid somehow has overlapping
+  // events; in practice it's always 0 or 1 request at a time per cid).
+  const activeByCid = new Map<string, number>();
+  let total = 0;
+  const out: TimeSeriesPoint[] = [{ t: 0, value: 0 }];
+  for (const e of events) {
+    const tSec = e.tNs / 1e9;
+    if (e.kind === 'start') {
+      const prev = activeByCid.get(e.cid) ?? 0;
+      const next = Math.max(prev, e.isl);
+      activeByCid.set(e.cid, next);
+      total += next - prev;
+    } else {
+      const cur = activeByCid.get(e.cid) ?? 0;
+      if (cur > 0) {
+        total -= cur;
+        activeByCid.delete(e.cid);
+      }
+    }
+    out.push({ t: tSec, value: Math.max(0, total) });
+  }
+  return out;
+}
+
+/**
+ * Monotonic-non-decreasing cumulative difference of two rate series:
+ * for each unique timestamp, compute Σa[0..t] − Σb[0..t], then enforce
+ * a running max so the curve never dips below its prior value.
+ *
+ * Use this to plot things like "cumulative cache-missed tokens" where the
+ * true value can only ever grow, but the underlying per-tick rates can
+ * temporarily look negative due to counter timing skew between scrapes
+ * (vllm's `prefix_cache_hits` and `prompt_tokens` counters can lag each
+ * other by ~5-10 s in our data even though their lifetime totals agree).
+ *
+ * `a` and `b` may have different (or overlapping) timestamp sets — both
+ * are unioned and walked in time order. Output has one point per unique
+ * timestamp present in either input.
+ */
+export function cumulativeDifferenceMonotonic(
+  a: TimeSeriesPoint[],
+  b: TimeSeriesPoint[],
+): TimeSeriesPoint[] {
+  const aByT = new Map(a.map((p) => [p.t, p.value]));
+  const bByT = new Map(b.map((p) => [p.t, p.value]));
+  const allT = [...new Set([...aByT.keys(), ...bByT.keys()])].toSorted((x, y) => x - y);
+  const out: TimeSeriesPoint[] = Array.from({ length: allT.length });
+  let cumA = 0;
+  let cumB = 0;
+  let runningMax = 0;
+  for (let i = 0; i < allT.length; i++) {
+    const t = allT[i]!;
+    cumA += aByT.get(t) ?? 0;
+    cumB += bByT.get(t) ?? 0;
+    const diff = cumA - cumB;
+    if (diff > runningMax) runningMax = diff;
+    out[i] = { t, value: runningMax };
+  }
+  return out;
+}
+
+/** Pointwise sum of two arrays sharing the same t index. */
+export function sumSeries(a: TimeSeriesPoint[], b: TimeSeriesPoint[]): TimeSeriesPoint[] {
+  const n = Math.min(a.length, b.length);
+  const out: TimeSeriesPoint[] = Array.from({ length: n });
+  for (let i = 0; i < n; i++) {
+    out[i] = { t: a[i]!.t, value: a[i]!.value + b[i]!.value };
+  }
+  return out;
+}
+
+const fmtIntDefault = (n: number) =>
+  n >= 10000 ? new Intl.NumberFormat('en-US').format(Math.round(n)) : String(Math.round(n));
+
+const fmtSeconds = (s: number) => {
+  if (s < 60) return `${Math.round(s)}s`;
+  const m = Math.floor(s / 60);
+  const rem = Math.round(s % 60);
+  return `${m}m ${rem}s`;
+};
+
+/** Linear-interpolated value at time `t` from a time-sorted series. */
+function interpAt(data: TimeSeriesPoint[], t: number): number | null {
+  if (data.length === 0) return null;
+  if (t <= data[0]!.t) return data[0]!.value;
+  if (t >= data.at(-1)!.t) return data.at(-1)!.value;
+  // Binary search
+  let lo = 0;
+  let hi = data.length - 1;
+  while (hi - lo > 1) {
+    const mid = (lo + hi) >> 1;
+    if (data[mid]!.t <= t) lo = mid;
+    else hi = mid;
+  }
+  const a = data[lo]!;
+  const b = data[hi]!;
+  if (b.t === a.t) return a.value;
+  const frac = (t - a.t) / (b.t - a.t);
+  return a.value + (b.value - a.value) * frac;
+}
+
+export function TimeSeriesChart({
+  series,
+  durationS,
+  yMax: yMaxOpt,
+  yFmt = fmtIntDefault,
+  yAxisLabel,
+  width = 720,
+  height = 260,
+}: TimeSeriesChartProps) {
+  const W = width;
+  const H = height;
+  const PAD = { top: 12, right: 16, bottom: 56, left: 60 };
+
+  const layout = useMemo(() => {
+    const innerW = W - PAD.left - PAD.right;
+    const innerH = H - PAD.top - PAD.bottom;
+    const xMax = Math.max(durationS, 1);
+    const yMax = yMaxOpt ?? Math.max(1e-9, ...series.flatMap((s) => s.data.map((d) => d.value)));
+    const xScale = (t: number) => PAD.left + (t / xMax) * innerW;
+    const yScale = (v: number) => PAD.top + (1 - v / yMax) * innerH;
+    return { innerW, innerH, xMax, yMax, xScale, yScale };
+  }, [series, durationS, yMaxOpt, W, H, PAD.bottom, PAD.left, PAD.right, PAD.top]);
+
+  const { innerW, innerH, xMax, yMax, xScale, yScale } = layout;
+
+  const subsample = (arr: TimeSeriesPoint[]) => {
+    if (arr.length === 0) return arr;
+    const stride = Math.max(1, Math.floor(arr.length / innerW));
+    return stride > 1 ? arr.filter((_, i) => i % stride === 0) : arr;
+  };
+
+  // Pre-format axis ticks.
+  const xTickVals = Array.from({ length: 5 }, (_, i) => (xMax * i) / 4);
+  const yTickVals = Array.from({ length: 5 }, (_, i) => (yMax * i) / 4);
+
+  const resolve = (fraction: number) => {
+    const t = fraction * xMax;
+    const items: HoverItem[] = [];
+    for (const s of series) {
+      if (s.hideFromHover) continue;
+      const v = interpAt(s.data, t);
+      if (v === null || !Number.isFinite(v)) continue;
+      items.push({ color: s.color, label: s.name, value: yFmt(v) });
+    }
+    if (items.length === 0) return null;
+    return { items, title: fmtSeconds(t) };
+  };
+
+  if (series.every((s) => s.data.length === 0)) {
+    return (
+      <div className="h-[260px] grid place-items-center text-xs text-muted-foreground">No data</div>
+    );
+  }
+
+  return (
+    <ChartHover pad={PAD} width={W} height={H} resolve={resolve}>
+      {/* y-axis gridlines + labels */}
+      {yTickVals.map((v, i) => {
+        const y = yScale(v);
+        return (
+          <g key={`y${i}`}>
+            <line
+              x1={PAD.left - 4}
+              x2={PAD.left + innerW}
+              y1={y}
+              y2={y}
+              stroke="currentColor"
+              opacity={0.08}
+            />
+            <text
+              x={PAD.left - 8}
+              y={y + 3}
+              fontSize={10}
+              fill="currentColor"
+              opacity={0.55}
+              textAnchor="end"
+            >
+              {yFmt(v)}
+            </text>
+          </g>
+        );
+      })}
+
+      {/* Raw scatter underlay */}
+      {series
+        .filter((s) => s.rawData && s.rawData.length > 0)
+        .map((s, si) =>
+          subsample(s.rawData!).map((d, i) => (
+            <circle
+              key={`r${si}-${i}`}
+              cx={xScale(d.t)}
+              cy={yScale(d.value)}
+              r={1.5}
+              fill={s.color}
+              opacity={0.2}
+            />
+          )),
+        )}
+
+      {/* Lines */}
+      {series.map((s, si) => {
+        if (s.data.length === 0) return null;
+        const sampled = subsample(s.data);
+        const path = sampled
+          .map(
+            (d, i) =>
+              `${i === 0 ? 'M' : 'L'}${xScale(d.t).toFixed(2)},${yScale(d.value).toFixed(2)}`,
+          )
+          .join(' ');
+        return (
+          <path
+            key={`l${si}`}
+            d={path}
+            fill="none"
+            stroke={s.color}
+            strokeWidth={s.strokeWidth ?? 1.8}
+            strokeOpacity={s.strokeOpacity ?? 1}
+          />
+        );
+      })}
+
+      {/* X-axis */}
+      <line
+        x1={PAD.left}
+        x2={PAD.left + innerW}
+        y1={PAD.top + innerH}
+        y2={PAD.top + innerH}
+        stroke="currentColor"
+        opacity={0.2}
+      />
+      {xTickVals.map((v, i) => {
+        const x = xScale(v);
+        const anchor = i === 0 ? 'start' : i === xTickVals.length - 1 ? 'end' : 'middle';
+        return (
+          <text
+            key={`x${i}`}
+            x={x}
+            y={PAD.top + innerH + 14}
+            fontSize={11}
+            fill="currentColor"
+            opacity={0.7}
+            textAnchor={anchor}
+          >
+            {fmtSeconds(v)}
+          </text>
+        );
+      })}
+      <text
+        x={W / 2}
+        y={H - 22}
+        fontSize={11}
+        fill="currentColor"
+        opacity={0.55}
+        textAnchor="middle"
+      >
+        time
+      </text>
+
+      {yAxisLabel && (
+        <text
+          x={10}
+          y={H / 2}
+          fontSize={11}
+          fill="currentColor"
+          opacity={0.55}
+          textAnchor="middle"
+          transform={`rotate(-90 10 ${H / 2})`}
+        >
+          {yAxisLabel}
+        </text>
+      )}
+
+      {/* Legend — skip series flagged hideFromHover so per-engine
+          underlays don't clutter the chip row. */}
+      {(() => {
+        const visible = series.filter((s) => !s.hideFromHover);
+        const chipY = H - 8;
+        const chipW = innerW / Math.max(1, visible.length);
+        return visible.map((s, i) => {
+          const x = PAD.left + i * chipW;
+          return (
+            <g key={`leg${i}`}>
+              <line
+                x1={x + 2}
+                x2={x + 14}
+                y1={chipY - 4}
+                y2={chipY - 4}
+                stroke={s.color}
+                strokeWidth={s.strokeWidth ?? 2}
+              />
+              <text x={x + 18} y={chipY} fontSize={11} fill="currentColor" opacity={0.9}>
+                {s.name}
+              </text>
+            </g>
+          );
+        });
+      })()}
+    </ChartHover>
+  );
+}
+
+/** Stacked-area chart for token-source share over time. */
+export function StackedAreaChart({
+  sourceSeries,
+  durationS,
+  width = 720,
+  height = 260,
+}: {
+  sourceSeries: Record<string, TimeSeriesPoint[]>;
+  durationS: number;
+  width?: number;
+  height?: number;
+}) {
+  const W = width;
+  const H = height;
+  const PAD = { top: 12, right: 16, bottom: 56, left: 60 };
+
+  const computed = useMemo(() => {
+    const entries = Object.entries(sourceSeries).filter(([, v]) => v.length > 0);
+    if (entries.length === 0) return null;
+
+    // Different sources can land on different scrape timestamps
+    // (SGLang's hits/misses fire on alternating ticks), so we MUST
+    // align across all sources before computing shares — otherwise the
+    // share calculation indexes into each source's own time axis and
+    // mixes values from different moments.
+    //
+    // Approach: union all timestamps across sources, then for each
+    // unique timestamp carry forward the cumulative sum for every
+    // source (a source that didn't report at time t holds its previous
+    // cumulative value rather than dropping to 0).
+    const tValues = [...new Set(entries.flatMap(([, arr]) => arr.map((p) => p.t)))].toSorted(
+      (a, b) => a - b,
+    );
+
+    // For each source, walk its (sorted) array and produce a parallel
+    // cumulative-sum array indexed against `tValues` via carry-forward.
+    const cum: Record<string, number[]> = {};
+    for (const [name, arr] of entries) {
+      const valByT = new Map(arr.map((p) => [p.t, p.value]));
+      const out: number[] = Array.from({ length: tValues.length });
+      let acc = 0;
+      for (let i = 0; i < tValues.length; i++) {
+        const v = valByT.get(tValues[i]!);
+        if (v !== undefined) acc += v;
+        out[i] = acc;
+      }
+      cum[name] = out;
+    }
+
+    const shares: Record<string, number[]> = {};
+    for (const name of Object.keys(cum)) shares[name] = [];
+    for (let i = 0; i < tValues.length; i++) {
+      const total = entries.reduce((s, [name]) => s + (cum[name]?.[i] ?? 0), 0);
+      for (const [name] of entries) {
+        shares[name]!.push(total > 0 ? (cum[name]?.[i] ?? 0) / total : 0);
+      }
+    }
+    return { tValues, shares };
+  }, [sourceSeries]);
+
+  const colors: Record<string, string> = {
+    // vLLM source names
+    local_compute: '#f97316',
+    local_cache_hit: '#3b82f6',
+    external_kv_transfer: '#22c55e',
+    miss: '#f97316',
+    // SGLang source names (set by compute-chart-series for sglang rows)
+    'cache hit (HBM)': '#3b82f6',
+    'cache hit (CPU offload)': '#22c55e',
+    'cache hit': '#3b82f6',
+    'compute (miss)': '#f97316',
+  };
+  const labelFor: Record<string, string> = {
+    local_compute: 'Prefill',
+    local_cache_hit: 'HBM Cache Hit',
+    external_kv_transfer: 'Offload Cache Hit',
+    miss: 'Miss',
+  };
+  // Fallback palette for any source name not in `colors` so we never
+  // emit two layers in the same shade. Cycles by insertion order.
+  const fallbackPalette = [
+    '#3b82f6',
+    '#f97316',
+    '#22c55e',
+    '#a855f7',
+    '#ef4444',
+    '#06b6d4',
+    '#f59e0b',
+    '#ec4899',
+  ];
+  let fallbackIdx = 0;
+  const colorFor = (name: string): string => {
+    if (colors[name]) return colors[name]!;
+    const c = fallbackPalette[fallbackIdx % fallbackPalette.length]!;
+    fallbackIdx++;
+    colors[name] = c; // memoize so the SAME unknown name always gets the same color
+    return c;
+  };
+
+  if (!computed) {
+    return (
+      <div className="h-[260px] grid place-items-center text-xs text-muted-foreground">No data</div>
+    );
+  }
+  const { tValues, shares } = computed;
+
+  const innerW = W - PAD.left - PAD.right;
+  const innerH = H - PAD.top - PAD.bottom;
+  const xMax = Math.max(durationS, 1);
+  const xScale = (t: number) => PAD.left + (t / xMax) * innerW;
+  const yScale = (v: number) => PAD.top + (1 - v) * innerH;
+
+  const stackOrder = Object.keys(shares);
+  const lower: number[] = Array.from({ length: tValues.length }, () => 0);
+  const layers = stackOrder.map((name) => {
+    const upper = shares[name]!.map((v, i) => lower[i]! + v);
+    const top = upper.map((v, i) => [xScale(tValues[i]!), yScale(v)] as [number, number]);
+    const bottom = lower.map((v, i) => [xScale(tValues[i]!), yScale(v)] as [number, number]);
+    const d = `${top
+      .map(([x, y], i) => `${i === 0 ? 'M' : 'L'}${x.toFixed(2)},${y.toFixed(2)}`)
+      .join(' ')} ${[...bottom]
+      .toReversed()
+      .map(([x, y]) => `L${x.toFixed(2)},${y.toFixed(2)}`)
+      .join(' ')} Z`;
+    const color = colorFor(name);
+    for (let i = 0; i < tValues.length; i++) lower[i] = upper[i]!;
+    return { name, color, d };
+  });
+
+  const resolve = (fraction: number) => {
+    const t = fraction * xMax;
+    // Find the closest tValue index.
+    let idx = 0;
+    let bestDist = Infinity;
+    for (let i = 0; i < tValues.length; i++) {
+      const d = Math.abs(tValues[i]! - t);
+      if (d < bestDist) {
+        bestDist = d;
+        idx = i;
+      }
+    }
+    const items: HoverItem[] = stackOrder.map((name) => ({
+      color: colorFor(name),
+      label: labelFor[name] ?? name,
+      value: `${((shares[name]?.[idx] ?? 0) * 100).toFixed(1)}%`,
+    }));
+    return { items, title: fmtSeconds(t) };
+  };
+
+  const xTickVals = Array.from({ length: 5 }, (_, i) => (xMax * i) / 4);
+  const yTickVals = [0, 0.25, 0.5, 0.75, 1];
+
+  return (
+    <ChartHover pad={PAD} width={W} height={H} resolve={resolve}>
+      {yTickVals.map((v, i) => {
+        const y = yScale(v);
+        return (
+          <g key={`y${i}`}>
+            <line
+              x1={PAD.left - 4}
+              x2={PAD.left + innerW}
+              y1={y}
+              y2={y}
+              stroke="currentColor"
+              opacity={0.08}
+            />
+            <text
+              x={PAD.left - 8}
+              y={y + 3}
+              fontSize={10}
+              fill="currentColor"
+              opacity={0.55}
+              textAnchor="end"
+            >
+              {(v * 100).toFixed(0)}%
+            </text>
+          </g>
+        );
+      })}
+      {layers.map((l, i) => (
+        <path key={i} d={l.d} fill={l.color} opacity={0.75} />
+      ))}
+      <line
+        x1={PAD.left}
+        x2={PAD.left + innerW}
+        y1={PAD.top + innerH}
+        y2={PAD.top + innerH}
+        stroke="currentColor"
+        opacity={0.2}
+      />
+      {xTickVals.map((v, i) => {
+        const x = xScale(v);
+        const anchor = i === 0 ? 'start' : i === xTickVals.length - 1 ? 'end' : 'middle';
+        return (
+          <text
+            key={`x${i}`}
+            x={x}
+            y={PAD.top + innerH + 14}
+            fontSize={11}
+            fill="currentColor"
+            opacity={0.7}
+            textAnchor={anchor}
+          >
+            {fmtSeconds(v)}
+          </text>
+        );
+      })}
+      <text
+        x={W / 2}
+        y={H - 22}
+        fontSize={11}
+        fill="currentColor"
+        opacity={0.55}
+        textAnchor="middle"
+      >
+        time
+      </text>
+      <text
+        x={10}
+        y={H / 2}
+        fontSize={11}
+        fill="currentColor"
+        opacity={0.55}
+        textAnchor="middle"
+        transform={`rotate(-90 10 ${H / 2})`}
+      >
+        % of prefill tokens
+      </text>
+      {(() => {
+        const chipY = H - 8;
+        const chipW = innerW / Math.max(1, layers.length);
+        return layers.map((l, i) => {
+          const x = PAD.left + i * chipW;
+          return (
+            <g key={`leg${i}`}>
+              <rect x={x + 2} y={chipY - 9} width={12} height={8} fill={l.color} opacity={0.75} />
+              <text x={x + 18} y={chipY} fontSize={11} fill="currentColor" opacity={0.9}>
+                {labelFor[l.name] ?? l.name}
+              </text>
+            </g>
+          );
+        });
+      })()}
+    </ChartHover>
+  );
+}
diff --git a/packages/app/src/components/inference/hooks/useChartData.ts b/packages/app/src/components/inference/hooks/useChartData.ts
index beed5e0a..3c67ff90 100644
--- a/packages/app/src/components/inference/hooks/useChartData.ts
+++ b/packages/app/src/components/inference/hooks/useChartData.ts
@@ -1,7 +1,7 @@
 import { useMemo, useRef } from 'react';
 
 import { useQueries } from '@tanstack/react-query';
-import { sequenceToIslOsl } from '@semianalysisai/inferencex-constants';
+import { rowToSequence } from '@semianalysisai/inferencex-constants';
 
 import chartDefinitions from '@/components/inference/inference-chart-config.json';
 import type {
@@ -19,9 +19,87 @@ import {
   getModelSortIndex,
   hardwareKeyMatchesAnyBase,
 } from '@/lib/constants';
-import { transformBenchmarkRows } from '@/lib/benchmark-transform';
-import type { Model, Sequence } from '@/lib/data-mappings';
+import { transformBenchmarkRows, withPercentile } from '@/lib/benchmark-transform';
+import { Sequence, type Model } from '@/lib/data-mappings';
 import { calculateCostsForGpus, calculatePowerForGpus } from '@/lib/utils';
+import {
+  paretoFrontLowerLeft,
+  paretoFrontLowerRight,
+  paretoFrontUpperLeft,
+  paretoFrontUpperRight,
+} from '@/lib/chart-utils';
+
+type XAxisMode = 'ttft' | 'e2e' | 'interactivity' | 'session-time' | 'prefill-tps';
+
+/**
+ * Resolve the percentile-prefixed e2e-latency field name for the given
+ * sequence + percentile combo (e.g. 'median_e2el', 'p90_e2el').
+ */
+function e2elFieldFor(percentile: string): string {
+  return withPercentile('median_e2el', percentile);
+}
+
+/**
+ * Compute the set of benchmark_results.id values that sit on the
+ * (e2e_latency, y) Pareto frontier within each (hwKey, precision, date)
+ * group. Used to restrict the non-e2e xmode charts (ttft, interactivity,
+ * session-time, prefill-tps) so they show *only* the points that win on
+ * end-to-end latency — preventing benchmark-hacking where a config tops
+ * one axis while tanking the other.
+ *
+ * Returns null when the y-metric has no roofline direction declared on
+ * the e2e chart (caller falls back to no filtering in that case).
+ */
+function e2eParetoIds(
+  points: InferenceData[],
+  selectedYAxisMetric: string,
+  percentile: string,
+): Set<number> | null {
+  const e2eChartDef = (chartDefinitions as ChartDefinition[]).find((c) => c.chartType === 'e2e');
+  if (!e2eChartDef) return null;
+  const dir = e2eChartDef[`${selectedYAxisMetric}_roofline` as keyof ChartDefinition] as
+    | 'upper_right'
+    | 'upper_left'
+    | 'lower_left'
+    | 'lower_right'
+    | undefined;
+  if (!dir) return null;
+  const frontierFn =
+    dir === 'upper_right'
+      ? paretoFrontUpperRight
+      : dir === 'upper_left'
+        ? paretoFrontUpperLeft
+        : dir === 'lower_left'
+          ? paretoFrontLowerLeft
+          : paretoFrontLowerRight;
+  const e2elField = e2elFieldFor(percentile);
+  const metricKey = selectedYAxisMetric.replace('y_', '') as YAxisMetricKey;
+
+  // Re-frame each candidate point in (e2el, y) space, then compute the
+  // pareto per (hwKey, precision, date) bucket — frontiers don't span dates
+  // (a May 17 point can't dominate a May 15 plot).
+  const byGroup = new Map<string, InferenceData[]>();
+  for (const p of points) {
+    const yValue = (p[metricKey] as { y?: number } | undefined)?.y;
+    const xValue = (p as unknown as Record<string, unknown>)[e2elField];
+    if (typeof xValue !== 'number' || !Number.isFinite(xValue)) continue;
+    if (typeof yValue !== 'number' || !Number.isFinite(yValue)) continue;
+    const key = `${p.hwKey}|${p.precision}|${p.date}`;
+    let bucket = byGroup.get(key);
+    if (!bucket) {
+      bucket = [];
+      byGroup.set(key, bucket);
+    }
+    bucket.push({ ...p, x: xValue, y: yValue });
+  }
+  const ids = new Set<number>();
+  for (const bucket of byGroup.values()) {
+    for (const f of frontierFn(bucket)) {
+      if (typeof f.id === 'number') ids.add(f.id);
+    }
+  }
+  return ids;
+}
 
 /** Build deduplicated comparison dates, excluding the main run date. */
 export function buildComparisonDates(
@@ -83,12 +161,31 @@ export function useChartData(
   selectedRunDate?: string,
   enabled = true,
   latestAvailableDate?: string,
+  selectedPercentile = 'p90',
   /** When set, only series for these two registry GPU keys are shown (compare pages). */
   compareGpuPair?: readonly [string, string] | null,
+  /**
+   * GitHub run id (g_runid) from the run picker. When set, the benchmarks API
+   * scopes results to that workflow run instead of returning the latest per
+   * config — disambiguates when two runs land on the same date.
+   */
+  selectedRunId?: string,
+  /**
+   * Current x-axis mode. When set to anything other than 'e2e', the displayed
+   * data is filtered to the (e2e-latency, y) Pareto frontier so the ttft /
+   * interactivity / session-time / prefill-tps charts show only points that
+   * also win on end-to-end latency — preventing benchmark-hacking where a
+   * config tops one metric while tanking the other. The 'e2e' mode is the
+   * source of truth and keeps the full point set.
+   */
+  selectedXAxisMode: XAxisMode = 'e2e',
 ) {
   // When the selected date is the latest available, use '' (empty string) to match
   // the initial no-date query key, reusing the eagerly-fetched benchmarks from the
   // materialized view instead of firing a redundant second fetch with identical data.
+  // When a specific run is selected, we always go through the runId branch and the
+  // date is effectively ignored — keep queryDate set so React Query still has a
+  // distinct cache key per date if the user navigates back to "latest".
   const queryDate =
     selectedRunDate && latestAvailableDate && selectedRunDate === latestAvailableDate
       ? ''
@@ -98,7 +195,7 @@ export function useChartData(
     data: allRows,
     isLoading: queryLoading,
     error: queryError,
-  } = useBenchmarks(selectedModel, queryDate, enabled);
+  } = useBenchmarks(selectedModel, queryDate, enabled, selectedRunId);
 
   // GPU comparison: fetch data for each additional comparison date
   const comparisonDates = useMemo(
@@ -125,11 +222,13 @@ export function useChartData(
   // Merge main rows with comparison date rows.
   // Stamp each row with the *requested* date (not the actual DB date) so that
   // GPUGraph's activeDates filter (keyed by user-selected date) matches the points.
-  const sequenceIslOsl = useMemo(() => sequenceToIslOsl(selectedSequence), [selectedSequence]);
+  //
+  // rowToSequence handles both fixed-seq (via isl/osl) and agentic (via
+  // benchmark_type), so one filter covers every scenario.
   const rows = useMemo(() => {
-    if (!allRows || !sequenceIslOsl) return [];
-    const seqFilter = (r: { isl: number; osl: number }) =>
-      r.isl === sequenceIslOsl.isl && r.osl === sequenceIslOsl.osl;
+    if (!allRows) return [];
+    const seqFilter = (r: { isl: number | null; osl: number | null; benchmark_type: string }) =>
+      rowToSequence(r) === selectedSequence;
     const seqFiltered = allRows.filter(seqFilter);
 
     // For each (hw, framework, spec_method, disagg, precision) group, keep only
@@ -156,14 +255,14 @@ export function useChartData(
         .map((r) => ({ ...r, date: comparisonDates[i], actualDate: r.date })),
     );
     return [...mainRows, ...extraRows];
-  }, [allRows, sequenceIslOsl, comparisonDates, comparisonDataKey, selectedRunDate]);
+  }, [allRows, selectedSequence, comparisonDates, comparisonDataKey, selectedRunDate]);
 
   // Transform filtered rows into chart data
   const { chartData, hardwareConfig: rawHardwareConfig } = useMemo(() => {
     if (rows.length === 0)
       return { chartData: [] as InferenceData[][], hardwareConfig: {} as HardwareConfig };
-    return transformBenchmarkRows(rows);
-  }, [rows]);
+    return transformBenchmarkRows(rows, selectedPercentile);
+  }, [rows, selectedPercentile]);
 
   // Sort hardware config — stabilize reference when keys haven't changed.
   // Different sequences for the same model often have the same GPU configs,
@@ -198,8 +297,11 @@ export function useChartData(
       (chartDefinitions as ChartDefinition[]).map((chartDef) => {
         const metricKey = selectedYAxisMetric.replace('y_', '') as YAxisMetricKey;
 
-        // Determine dynamic x-axis
-        let xAxisField: keyof AggDataEntry = chartDef.x;
+        // Default x-axis = chart's natural latency metric, percentile-adjusted
+        // for the agentic case (median_e2el → p99_e2el etc.). For non-agentic
+        // scenarios `withPercentile` is a no-op when percentile === 'median'.
+        const naturalX = withPercentile(chartDef.x, selectedPercentile) as keyof AggDataEntry;
+        let xAxisField: keyof AggDataEntry = naturalX;
         let xAxisLabel = chartDef.x_label;
 
         const metricTitle =
@@ -209,14 +311,25 @@ export function useChartData(
         // Resolve the effective x-axis override per chart type
         const effectiveXMetric =
           chartDef.chartType === 'e2e' ? selectedE2eXAxisMetric : selectedXAxisMetric;
+        // The TTFT override is now any *_ttft metric (not just p90_ttft) — the
+        // x-axis-mode picker reconciles the percentile prefix based on sequence
+        // kind (fixed-seq → median, agentic → user-picked percentile).
         const isTtftOverride =
-          effectiveXMetric === 'p99_ttft' || effectiveXMetric === 'median_ttft';
-        const ttftLabel =
-          effectiveXMetric === 'p99_ttft'
-            ? 'P99 Time To First Token (s)'
-            : 'Median Time To First Token (s)';
-
-        if (effectiveXMetric && chartDef.chartType === 'interactivity' && isInputMetric) {
+          typeof effectiveXMetric === 'string' && effectiveXMetric.endsWith('_ttft');
+        const ttftPctl = isTtftOverride
+          ? (effectiveXMetric as string).replace(/_ttft$/u, '')
+          : 'p90';
+        const ttftPctlWord = ttftPctl === 'median' ? 'Median' : ttftPctl.toUpperCase();
+        const ttftLabel = `${ttftPctlWord} Time To First Token (s)`;
+
+        const isAgentic = selectedSequence === Sequence.AgenticTraces;
+
+        if (
+          effectiveXMetric &&
+          chartDef.chartType === 'interactivity' &&
+          isInputMetric &&
+          !isAgentic
+        ) {
           xAxisField = effectiveXMetric as keyof AggDataEntry;
           const labelKey = `${selectedYAxisMetric}_x_label` as keyof ChartDefinition;
           if (effectiveXMetric === chartDef[`${selectedYAxisMetric}_x` as keyof ChartDefinition]) {
@@ -225,6 +338,10 @@ export function useChartData(
             xAxisLabel = isTtftOverride ? ttftLabel : chartDef.x_label;
           }
         } else if (chartDef.chartType === 'interactivity' && isInputMetric) {
+          // Agentic falls through here too — the manual X-axis dropdown is
+          // hidden in agentic mode (would double up with the percentile
+          // selector), so the config default + percentile post-processing
+          // below drives the x axis.
           const xOverrideKey = `${selectedYAxisMetric}_x` as keyof ChartDefinition;
           const xLabelOverrideKey = `${selectedYAxisMetric}_x_label` as keyof ChartDefinition;
           xAxisField = (chartDef[xOverrideKey] as keyof AggDataEntry) || chartDef.x;
@@ -234,12 +351,35 @@ export function useChartData(
           xAxisLabel = ttftLabel;
         }
 
+        // Agentic: rewrite the resolved x metric to the chosen percentile,
+        // and relabel accordingly. Both have to be updated unconditionally —
+        // xAxisField may already be percentile-adjusted (via naturalX) while
+        // xAxisLabel still carries the raw chartDef.x_label prefix.
+        // The chart heading ("vs. <latency>") is also rewritten to include
+        // the percentile so the title above the plot reflects what's drawn.
+        const headingKey = `${selectedYAxisMetric}_heading` as keyof ChartDefinition;
+        let chartHeading = (chartDef[headingKey] as string) || chartDef.heading;
+        if (isAgentic) {
+          xAxisField = withPercentile(
+            xAxisField as string,
+            selectedPercentile,
+          ) as keyof AggDataEntry;
+          const pctlWord = selectedPercentile.toUpperCase();
+          xAxisLabel = xAxisLabel.replace(/^(Median|Mean|P75|P90|P95|P99(?:\.9)?)\b/iu, pctlWord);
+          chartHeading = chartHeading.replace(
+            /^(vs\.\s+)(?:(Median|Mean|P75|P90|P95|P99(?:\.9)?)\s+)?/iu,
+            `$1${pctlWord} `,
+          );
+        }
+
         // The x-axis is "flipped" only when the good-direction reverses
         // (e.g. interactivity → TTFT: "higher is better" → "lower is better").
         // E2EL → TTFT keeps the same direction ("lower is better" for both),
         // so no roofline flip is needed for the e2e chart.
+        // Compare against `naturalX` (percentile-adjusted) — switching the
+        // percentile of the same logical metric is NOT a flip.
         const xAxisFlipped =
-          xAxisField !== chartDef.x && !(chartDef.chartType === 'e2e' && isTtftOverride);
+          xAxisField !== naturalX && !(chartDef.chartType === 'e2e' && isTtftOverride);
 
         const yLabelKey = `${selectedYAxisMetric}_label` as keyof ChartDefinition;
         const dynamicYLabel = chartDef[yLabelKey];
@@ -260,6 +400,7 @@ export function useChartData(
           chartDefinition: {
             ...chartDef,
             ...rooflineOverrides,
+            heading: chartHeading,
             x_label: xAxisLabel,
             y_label: dynamicYLabel === null ? undefined : String(dynamicYLabel),
           },
@@ -267,7 +408,13 @@ export function useChartData(
           xAxisField,
         };
       }),
-    [selectedYAxisMetric, selectedXAxisMetric, selectedE2eXAxisMetric],
+    [
+      selectedYAxisMetric,
+      selectedXAxisMetric,
+      selectedE2eXAxisMetric,
+      selectedPercentile,
+      selectedSequence,
+    ],
   );
 
   // Build renderable graphs (data processing + stable chart definitions)
@@ -297,9 +444,30 @@ export function useChartData(
 
         filteredData = filterDataByCostLimit(filteredData, chartDefinition, selectedYAxisMetric);
 
+        // For AGENTIC workloads only: when the user is NOT viewing the
+        // e2e latency chart, mark each point with whether it sits on the
+        // (e2e_latency, y) Pareto frontier for its (hwKey, precision,
+        // date) group. The chart still renders every point as scatter —
+        // only e2e-Pareto winners feed the roofline (ScatterGraph honors
+        // the flag). Prevents benchmark-hacking the TTFT / interactivity
+        // line by tanking decode (or vice versa) without hiding the
+        // non-optimal configs from view.
+        //
+        // Fixed-seq workloads keep the existing per-axis Pareto since
+        // there's no separate "session-time" notion of total latency —
+        // their e2e IS the request latency, so a TTFT hack there reads
+        // honestly on e2e too. The anti-hack constraint is specifically
+        // about multi-turn agentic where TTFT measures a tiny fraction
+        // of the user-visible session time.
+        const isAgentic = selectedSequence === Sequence.AgenticTraces;
+        const e2eParetoSet =
+          isAgentic && selectedXAxisMode !== 'e2e'
+            ? e2eParetoIds(filteredData, selectedYAxisMetric, selectedPercentile)
+            : null;
+
         // Filter to points that have the selected metric, then remap x/y
         const hasMetric = filteredData.some((d) => metricKey in d);
-        const isTtftX = xAxisField === 'p99_ttft' || xAxisField === 'median_ttft';
+        const isTtftX = typeof xAxisField === 'string' && xAxisField.endsWith('_ttft');
         const processedData = hasMetric
           ? filteredData
               .filter((d) => metricKey in d)
@@ -307,18 +475,26 @@ export function useChartData(
                 const yValue = (d[metricKey] as { y: number })?.y ?? d.y;
                 const roof = (d[metricKey] as { roof: boolean })?.roof ?? false;
                 const xValue = (d as any)[xAxisField] ?? d.x;
+                const isOnE2eFrontier =
+                  e2eParetoSet === null
+                    ? undefined
+                    : typeof d.id === 'number' && e2eParetoSet.has(d.id);
                 return {
                   ...d,
                   x: xValue,
                   y: yValue,
                   roof,
+                  isOnE2eFrontier,
                 };
               })
-              // When TTFT is on the x-axis, apply the latency limit to filter overload outliers
-              // (e.g. conc=2048 rows with TTFT > 60s that compress all real data to the far left)
+              // When TTFT is on the x-axis, apply the latency limit to filter
+              // overload outliers (fixed-seq conc=2048 rows with TTFT > 60s that
+              // compress all real data to the far left). Skip for agentic — long
+              // TTFTs there reflect real workloads (multi-turn, big prompts).
               .filter(
                 (d) =>
                   !isTtftX ||
+                  isAgentic ||
                   !chartDefinition.y_latency_limit ||
                   d.x <= chartDefinition.y_latency_limit,
               )
@@ -343,6 +519,8 @@ export function useChartData(
     userPowers,
     stableChartDefinitions,
     compareGpuPair,
+    selectedXAxisMode,
+    selectedPercentile,
   ]);
 
   return { graphs, loading, error, hardwareConfig };
diff --git a/packages/app/src/components/inference/inference-chart-config.json b/packages/app/src/components/inference/inference-chart-config.json
index e26d237e..dcd91e60 100644
--- a/packages/app/src/components/inference/inference-chart-config.json
+++ b/packages/app/src/components/inference/inference-chart-config.json
@@ -13,9 +13,9 @@
     "y_inputTputPerGpu_label": "Input Token Throughput per GPU (tok/s/gpu)",
     "y_inputTputPerGpu_title": "Input Token Throughput per GPU",
     "y_inputTputPerGpu_roofline": "upper_left",
-    "y_inputTputPerGpu_x": "p99_ttft",
-    "y_inputTputPerGpu_x_label": "P99 Time To First Token (s)",
-    "y_inputTputPerGpu_heading": "vs. P99 Time To First Token",
+    "y_inputTputPerGpu_x": "p90_ttft",
+    "y_inputTputPerGpu_x_label": "P90 Time To First Token (s)",
+    "y_inputTputPerGpu_heading": "vs. P90 Time To First Token",
     "y_outputTputPerGpu": "outputTputPerGpu.y",
     "y_outputTputPerGpu_label": "Output Token Throughput per GPU (tok/s/gpu)",
     "y_outputTputPerGpu_title": "Output Token Throughput per GPU",
@@ -105,8 +105,8 @@
     "y_inputTputPerGpu_label": "Input Token Throughput per GPU (tok/s/gpu)",
     "y_inputTputPerGpu_title": "Input Token Throughput per GPU",
     "y_inputTputPerGpu_roofline": "upper_right",
-    "y_inputTputPerGpu_x": "p99_ttft",
-    "y_inputTputPerGpu_x_label": "P99 Time To First Token (s)",
+    "y_inputTputPerGpu_x": "p90_ttft",
+    "y_inputTputPerGpu_x_label": "P90 Time To First Token (s)",
     "y_outputTputPerGpu": "outputTputPerGpu.y",
     "y_outputTputPerGpu_label": "Output Token Throughput per GPU (tok/s/gpu)",
     "y_outputTputPerGpu_title": "Output Token Throughput per GPU",
diff --git a/packages/app/src/components/inference/replay/buildReplayTimeline.ts b/packages/app/src/components/inference/replay/buildReplayTimeline.ts
index be076418..b0eb1446 100644
--- a/packages/app/src/components/inference/replay/buildReplayTimeline.ts
+++ b/packages/app/src/components/inference/replay/buildReplayTimeline.ts
@@ -82,8 +82,7 @@ function resolveXAxisField(
   const metricTitle =
     (chartDef[`${selectedYAxisMetric}_title` as keyof ChartDefinition] as string) || '';
   const isInputMetric = metricTitle.toLowerCase().includes('input');
-  const isTtftOverride =
-    selectedXAxisMetric === 'p99_ttft' || selectedXAxisMetric === 'median_ttft';
+  const isTtftOverride = selectedXAxisMetric === 'p90_ttft';
 
   if (selectedXAxisMetric && chartDef.chartType === 'interactivity' && isInputMetric) {
     return selectedXAxisMetric;
diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts
index 5b5f9ec2..219e6bd7 100644
--- a/packages/app/src/components/inference/types.ts
+++ b/packages/app/src/components/inference/types.ts
@@ -36,6 +36,8 @@ import type { Model, Sequence } from '@/lib/data-mappings';
  * @property {number} p99_e2el - 99th percentile of End-to-End Latency.
  */
 export interface AggDataEntry {
+  /** Stable per-point id from benchmark_results — for trace_replay lookups. */
+  id?: number;
   hw: string;
   mtp?: string;
   hwKey: string;
@@ -50,23 +52,43 @@ export interface AggDataEntry {
   mean_ttft: number;
   median_ttft: number;
   std_ttft: number;
+  p75_ttft: number;
+  p90_ttft: number;
+  p95_ttft: number;
   p99_ttft: number;
+  'p99.9_ttft': number;
   mean_tpot: number;
   mean_intvty: number;
   median_tpot: number;
   median_intvty: number;
   std_tpot: number;
   std_intvty: number;
+  p75_tpot: number;
+  p75_intvty: number;
+  p90_tpot: number;
+  p90_intvty: number;
+  p95_tpot: number;
+  p95_intvty: number;
   p99_tpot: number;
   p99_intvty: number;
+  'p99.9_tpot': number;
+  'p99.9_intvty': number;
   mean_itl: number;
   median_itl: number;
   std_itl: number;
+  p75_itl: number;
+  p90_itl: number;
+  p95_itl: number;
   p99_itl: number;
+  'p99.9_itl': number;
   mean_e2el: number;
   median_e2el: number;
   std_e2el: number;
+  p75_e2el: number;
+  p90_e2el: number;
+  p95_e2el: number;
   p99_e2el: number;
+  'p99.9_e2el': number;
   disagg: boolean;
   num_prefill_gpu: number;
   num_decode_gpu: number;
@@ -88,6 +110,29 @@ export interface AggDataEntry {
   actualDate?: string;
   /** URL to the GitHub Actions workflow run that produced this data point. */
   run_url?: string;
+  /** Benchmark scenario: `single_turn` (fixed-seq isl/osl) or `agentic_traces`. */
+  benchmark_type?: string;
+  /** ISL in tokens — null for agentic_traces. */
+  isl?: number | null;
+  /** OSL in tokens — null for agentic_traces. */
+  osl?: number | null;
+  // ── Agentic-only fields (populated from metrics JSONB for `agentic_traces` rows) ──
+  /** "on" | "off" — whether KV cache offload to CPU was enabled. */
+  offload_mode?: string;
+  /** Actual server-observed GPU prefix-cache hit rate (0..1). */
+  server_gpu_cache_hit_rate?: number;
+  /** Actual server-observed CPU prefix-cache hit rate (0..1). */
+  server_cpu_cache_hit_rate?: number;
+  /** Infinite-cache theoretical hit rate (0..1) computed from trace. */
+  theoretical_cache_hit_rate?: number;
+  /** Total requests attempted during the window. */
+  num_requests_total?: number;
+  /** Requests that completed successfully. */
+  num_requests_successful?: number;
+  /** Total prompt tokens served. */
+  total_prompt_tokens?: number;
+  /** Total generated (output) tokens. */
+  total_generation_tokens?: number;
 }
 
 /**
@@ -113,6 +158,17 @@ export interface InferenceData extends Partial<Omit<AggDataEntry, AggDataConflic
   x: number;
   y: number;
   hidden?: boolean;
+  /**
+   * Whether this point sits on the (e2e_latency, y-metric) Pareto frontier.
+   * Set by useChartData when `selectedXAxisMode !== 'e2e'`. The TTFT /
+   * interactivity / session-time / prefill-tps charts use this flag to
+   * restrict their roofline computation to e2e-Pareto winners — vendors
+   * can't benchmark-hack TTFT by tanking decode (or vice versa) and still
+   * appear on the frontier line — while keeping every point visible as
+   * scatter so the user can see where dominated configs actually sit.
+   * Undefined when the chart is in e2e mode (no remapping needed).
+   */
+  isOnE2eFrontier?: boolean;
 
   // Overridden fields with narrower types
   hwKey: string;
@@ -490,10 +546,26 @@ export interface InferenceChartContextType {
   workflowInfo: any;
   selectedYAxisMetric: string;
   setSelectedYAxisMetric: (metric: string) => void;
+  /** Latency percentile for the x-axis under agentic scenarios (median/p90/p99/p99.9). */
+  selectedPercentile: string;
+  setSelectedPercentile: (p: string) => void;
   selectedXAxisMetric: string | null;
   setSelectedXAxisMetric: (metric: string | null) => void;
   selectedE2eXAxisMetric: string | null;
   setSelectedE2eXAxisMetric: (metric: string | null) => void;
+  /**
+   * Which chart variant the user wants to see — the inference card shows one chart
+   * at a time, picked by the big buttons above the chart.
+   * - 'ttft'          → e2e chartType with x-axis forced to p90_ttft
+   * - 'e2e'           → e2e chartType with the chart-config default x-axis (median_e2el / p90_e2el)
+   * - 'interactivity' → interactivity chartType (x = median_intvty / p90_intvty)
+   * - 'session-time'  → agentic-only; x = mean-normalized session time (live-computed from trace blobs)
+   * - 'prefill-tps'   → agentic-only; x = mean of P90 prefill TPS/user per session
+   */
+  selectedXAxisMode: 'ttft' | 'e2e' | 'interactivity' | 'session-time' | 'prefill-tps';
+  setSelectedXAxisMode: (
+    mode: 'ttft' | 'e2e' | 'interactivity' | 'session-time' | 'prefill-tps',
+  ) => void;
   scaleType: 'auto' | 'linear' | 'log';
   setScaleType: (type: 'auto' | 'linear' | 'log') => void;
   setIsLegendExpanded: (metric: boolean) => void;
diff --git a/packages/app/src/components/inference/ui/ChartControls.tsx b/packages/app/src/components/inference/ui/ChartControls.tsx
index 0b1705b0..ad222edc 100644
--- a/packages/app/src/components/inference/ui/ChartControls.tsx
+++ b/packages/app/src/components/inference/ui/ChartControls.tsx
@@ -1,13 +1,14 @@
 'use client';
 
-import { useState } from 'react';
+import { useEffect, useState } from 'react';
 
 import { track } from '@/lib/analytics';
 
 import { useInference } from '@/components/inference/InferenceContext';
 import {
   ModelSelector,
-  SequenceSelector,
+  ScenarioSelector,
+  PercentileSelector,
   PrecisionSelector,
 } from '@/components/ui/chart-selectors';
 import { DateRangePicker } from '@/components/ui/date-range-picker';
@@ -24,7 +25,7 @@ import { SearchableSelect } from '@/components/ui/searchable-select';
 import { TooltipProvider } from '@/components/ui/tooltip';
 import chartDefinitions from '@/components/inference/inference-chart-config.json';
 import type { ChartDefinition } from '@/components/inference/types';
-import type { Model, Sequence } from '@/lib/data-mappings';
+import { Sequence, type Model, type Percentile } from '@/lib/data-mappings';
 
 // Build Y-axis metric options from static chart config JSON — available immediately, no API wait
 const METRIC_GROUPS = [
@@ -79,6 +80,13 @@ interface ChartControlsProps {
 }
 
 export default function ChartControls({ hideGpuComparison = false }: ChartControlsProps) {
+  // The percentile selector is rendered conditionally on `selectedSequence`,
+  // which on the client is hydrated from URL params. SSR doesn't see the URL,
+  // so deferring the conditional until after mount keeps the initial DOM
+  // identical between server and client (avoids hydration warnings).
+  const [mounted, setMounted] = useState(false);
+  useEffect(() => setMounted(true), []);
+
   const [openDropdown, setOpenDropdown] = useState<string | null>(null);
   const handleDropdownOpenChange = (dropdownKey: string) => (open: boolean) => {
     if (open) {
@@ -87,6 +95,7 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
     }
     setOpenDropdown((current) => (current === dropdownKey ? null : current));
   };
+
   const {
     selectedModel,
     setSelectedModel,
@@ -96,6 +105,8 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
     setSelectedPrecisions,
     selectedYAxisMetric,
     setSelectedYAxisMetric,
+    selectedPercentile,
+    setSelectedPercentile,
     graphs,
     selectedGPUs,
     setSelectedGPUs,
@@ -214,14 +225,21 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
             availableModels={availableModels}
             data-testid="model-selector"
           />
-          <SequenceSelector
+          <ScenarioSelector
             value={selectedSequence}
             onChange={handleSequenceChange}
             open={openDropdown === 'sequence'}
             onOpenChange={handleDropdownOpenChange('sequence')}
             availableSequences={availableSequences}
-            data-testid="sequence-selector"
+            data-testid="scenario-selector"
           />
+          {mounted && selectedSequence === Sequence.AgenticTraces && (
+            <PercentileSelector
+              value={selectedPercentile}
+              onChange={(p: Percentile) => setSelectedPercentile(p)}
+              data-testid="percentile-selector"
+            />
+          )}
           <PrecisionSelector
             value={selectedPrecisions}
             onChange={handlePrecisionChange}
@@ -251,16 +269,17 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
           </div>
 
           {graphs.some((g) => g.chartDefinition?.chartType === 'interactivity') &&
-            isInputMetric && (
+            isInputMetric &&
+            selectedSequence !== Sequence.AgenticTraces && (
               <div className="flex flex-col space-y-1.5 lg:col-span-1">
                 <LabelWithTooltip
                   htmlFor="x-axis-select"
                   label="X-Axis Metric"
-                  tooltip="The latency metric displayed on the chart's X-axis. Options include P99 Time To First Token and Median Time To First Token."
+                  tooltip="The latency metric displayed on the chart's X-axis: P90 Time To First Token."
                 />
                 <Select
                   onValueChange={handleXAxisMetricChange}
-                  value={selectedXAxisMetric ?? 'p99_ttft'}
+                  value={selectedXAxisMetric ?? 'p90_ttft'}
                 >
                   <SelectTrigger
                     id="x-axis-select"
@@ -270,8 +289,7 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
                     <SelectValue />
                   </SelectTrigger>
                   <SelectContent portalled={false}>
-                    <SelectItem value="p99_ttft">P99 TTFT</SelectItem>
-                    <SelectItem value="median_ttft">Median TTFT</SelectItem>
+                    <SelectItem value="p90_ttft">P90 TTFT</SelectItem>
                   </SelectContent>
                 </Select>
               </div>
diff --git a/packages/app/src/components/inference/ui/ChartDisplay.tsx b/packages/app/src/components/inference/ui/ChartDisplay.tsx
index f0e1692a..fd6cd9c1 100644
--- a/packages/app/src/components/inference/ui/ChartDisplay.tsx
+++ b/packages/app/src/components/inference/ui/ChartDisplay.tsx
@@ -1,8 +1,8 @@
 'use client';
 import { track } from '@/lib/analytics';
 import dynamic from 'next/dynamic';
-import { useMemo, useRef, useState } from 'react';
-import { BarChart3, ChevronDown, Table2, X } from 'lucide-react';
+import { useEffect, useMemo, useRef, useState } from 'react';
+import { BarChart3, Table2, X } from 'lucide-react';
 
 import chartDefinitions from '@/components/inference/inference-chart-config.json';
 import { useInference } from '@/components/inference/InferenceContext';
@@ -30,7 +30,6 @@ import {
   DialogHeader,
   DialogTitle,
 } from '@/components/ui/dialog';
-import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover';
 import { Skeleton } from '@/components/ui/skeleton';
 import { useUnofficialRun } from '@/components/unofficial-run-provider';
 import {
@@ -40,8 +39,10 @@ import {
   getModelLabel,
   getPrecisionLabel,
   getSequenceLabel,
+  sequenceKind,
 } from '@/lib/data-mappings';
 import { useComparisonChangelogs } from '@/hooks/api/use-comparison-changelogs';
+import { useDerivedAgenticMetrics } from '@/hooks/api/use-derived-agentic-metrics';
 import { useTrendData } from '@/components/inference/hooks/useTrendData';
 import { hardwareKeyMatchesAnyBase } from '@/lib/constants';
 
@@ -59,54 +60,30 @@ const ModelArchitectureDiagram = dynamic(() => import('./ModelArchitectureDiagra
 });
 import WorkflowInfoDisplay from './WorkflowInfoDisplay';
 
-/** Controlled popover dropdown for the e2e chart x-axis toggle. */
-function E2eXAxisDropdown({
-  xAxisLabel,
-  xAxisOptions,
-  selectedValue,
-  onSelect,
-}: {
-  xAxisLabel: string;
-  xAxisOptions: { value: string | null; label: string }[];
-  selectedValue: string | null;
-  onSelect: (value: string | null) => void;
-}) {
-  const [open, setOpen] = useState(false);
-  return (
-    <Popover open={open} onOpenChange={setOpen}>
-      <PopoverTrigger asChild>
-        <button
-          className="inline-flex items-center gap-1 hover:opacity-70 transition-opacity cursor-pointer"
-          onClick={(e) => e.stopPropagation()}
-        >
-          vs. {xAxisLabel}
-          <ChevronDown className="no-export size-3.5 shrink-0 opacity-60" />
-        </button>
-      </PopoverTrigger>
-      <PopoverContent className="w-48 p-1" align="start">
-        {xAxisOptions.map((opt) => (
-          <button
-            key={opt.label}
-            className={`w-full text-left px-3 py-1.5 text-sm rounded hover:bg-accent transition-colors ${
-              (opt.value === null && !selectedValue) || opt.value === selectedValue
-                ? 'font-medium'
-                : ''
-            }`}
-            onClick={() => {
-              onSelect(opt.value);
-              setOpen(false);
-            }}
-          >
-            {opt.label}
-          </button>
-        ))}
-      </PopoverContent>
-    </Popover>
-  );
-}
-
 type InferenceViewMode = 'chart' | 'table';
 
+/**
+ * The chart variants the user can choose with the big buttons above the chart
+ * card. The first three map to entries in `inference-chart-config.json` plus a
+ * forced x-axis override for the E2E chartType; the last two are agentic-only
+ * derived metrics computed live from the stored trace_replay blobs.
+ */
+type XAxisMode = 'ttft' | 'e2e' | 'interactivity' | 'session-time' | 'prefill-tps';
+
+interface XAxisModeButton {
+  value: XAxisMode;
+  label: string;
+  /** When true, the button is only shown on agentic scenarios. */
+  agenticOnly?: boolean;
+}
+const X_AXIS_MODE_BUTTONS: XAxisModeButton[] = [
+  { value: 'ttft', label: 'TTFT' },
+  { value: 'e2e', label: 'E2E Latency' },
+  { value: 'interactivity', label: 'Interactivity' },
+  { value: 'session-time', label: 'Session Time', agenticOnly: true },
+  { value: 'prefill-tps', label: 'Prefill TPS / user', agenticOnly: true },
+];
+
 const VIEW_MODE_OPTIONS: SegmentedToggleOption<InferenceViewMode>[] = [
   {
     value: 'chart',
@@ -151,8 +128,10 @@ export default function ChartDisplay() {
     logScale,
     activeHwTypes,
     activeDates,
-    setSelectedE2eXAxisMetric,
+    selectedPercentile,
     compareGpuPair,
+    selectedXAxisMode,
+    setSelectedXAxisMode,
   } = useInference();
 
   const {
@@ -161,6 +140,13 @@ export default function ChartDisplay() {
     totalDatesQueried,
   } = useComparisonChangelogs(selectedGPUs, selectedDateRange, dateRangeAvailableDates);
 
+  // SSR has no URL access and `selectedSequence` defaults to agentic on the
+  // server even when the URL says fixed-seq — so any conditional rendering
+  // that keys off `sequenceKind(selectedSequence)` would diverge between
+  // server and client first render. Defer agentic-only UI until after mount.
+  const [mounted, setMounted] = useState(false);
+  useEffect(() => setMounted(true), []);
+
   const [viewModes, setViewModes] = useState<Record<number, InferenceViewMode>>({});
   const replayHandlesRef = useRef<Record<number, ReplayLauncherHandle | null>>({});
   const getViewMode = (index: number): InferenceViewMode => viewModes[index] ?? 'chart';
@@ -210,6 +196,7 @@ export default function ChartDisplay() {
         chartType,
         selectedYAxisMetric,
         effectiveXMetric,
+        { isAgentic: sequenceKind(selectedSequence) === 'agentic' },
       );
 
       let overlayPoints = processed;
@@ -327,214 +314,258 @@ export default function ChartDisplay() {
     }));
   }, [graphs, overlayDataByChartType, selectedModel, selectedSequence]);
 
-  const displayGraphs = isFirstLoad
-    ? Array.from({ length: 2 }).map((_, index) => (
-        <Card key={`skeleton-${index}`}>
-          <Skeleton className="h-7 w-2/4 mb-1" />
-          <Skeleton className="h-5 w-3/4 mb-2" />
-          <Skeleton className="h-[600px] w-full" />
-        </Card>
-      ))
-    : effectiveGraphs.length === 0
-      ? []
-      : effectiveGraphs.map((graph, graphIndex) => {
-          const isTimelineMode = Boolean(
-            selectedDateRange.startDate && selectedDateRange.endDate && selectedGPUs.length > 0,
-          );
-          const replayAvailable = getViewMode(graphIndex) === 'chart' && !isTimelineMode;
-          return (
-            <section key={graphIndex} className="pt-8 md:pt-0">
-              <figure data-testid="chart-figure" className="relative rounded-lg">
-                <ChartButtons
-                  chartId={`chart-${graphIndex}`}
-                  analyticsPrefix={
-                    isTimelineMode
-                      ? 'gpu_timeseries'
-                      : graph.chartDefinition.chartType === 'e2e'
-                        ? 'latency'
-                        : 'interactivity'
-                  }
-                  leadingControls={
-                    <SegmentedToggle
-                      value={getViewMode(graphIndex)}
-                      options={VIEW_MODE_OPTIONS}
-                      onValueChange={(v) => handleViewModeChange(graphIndex, v)}
-                      ariaLabel="View mode"
-                      testId={`inference-view-toggle-${graphIndex}`}
-                    />
-                  }
-                  hideImageExport={getViewMode(graphIndex) === 'table'}
-                  setIsLegendExpanded={setIsLegendExpanded}
-                  exportFileName={`InferenceX_${selectedModel}_${graph.chartDefinition.chartType}`}
-                  onExportMp4={
-                    replayAvailable ? () => replayHandlesRef.current[graphIndex]?.open() : undefined
-                  }
-                  onExportCsv={() => {
-                    const visibleData = graph.data.filter((d) =>
+  // Show one chart at a time, picked by the buttons above the chart.
+  //  - 'interactivity' renders the interactivity chartType.
+  //  - 'ttft' / 'e2e' render the e2e chartType (x swap via selectedE2eXAxisMetric).
+  //  - 'session-time' / 'prefill-tps' render the e2e chartType too; the x-axis
+  //    is overridden below from live-computed derived metrics.
+  const visibleGraphs = useMemo(() => {
+    const wantedType = selectedXAxisMode === 'interactivity' ? 'interactivity' : 'e2e';
+    const filtered = effectiveGraphs.filter((g) => g.chartDefinition.chartType === wantedType);
+    return filtered.length > 0 ? filtered : effectiveGraphs;
+  }, [effectiveGraphs, selectedXAxisMode]);
+
+  // Derived-metric path: fetch live-computed values from the trace_replay blobs
+  // and override scatter data.x. Only fires for the two agentic-only modes.
+  const useDerived =
+    sequenceKind(selectedSequence) === 'agentic' &&
+    (selectedXAxisMode === 'session-time' || selectedXAxisMode === 'prefill-tps');
+  const derivedTargetIds = useMemo(() => {
+    if (!useDerived) return [] as number[];
+    const ids = new Set<number>();
+    for (const g of visibleGraphs) {
+      for (const d of g.data) {
+        if (d.benchmark_type === 'agentic_traces' && typeof d.id === 'number') {
+          ids.add(d.id);
+        }
+      }
+    }
+    return [...ids];
+  }, [useDerived, visibleGraphs]);
+  const derivedQuery = useDerivedAgenticMetrics(derivedTargetIds, useDerived);
+  const derivedMetrics = derivedQuery.data;
+  // Show skeleton (not "No data available") while the derived-metrics query
+  // is in flight. Without this gate, every flip to session-time / prefill-tps
+  // briefly blanks the chart and surfaces a misleading empty-state.
+  const isDerivedLoading =
+    useDerived &&
+    derivedTargetIds.length > 0 &&
+    (derivedQuery.isPending || derivedQuery.isFetching) &&
+    !derivedMetrics;
+
+  const renderableGraphs = useMemo(() => {
+    if (!useDerived) return visibleGraphs;
+    if (!derivedMetrics) return visibleGraphs.map((g) => ({ ...g, data: [] }));
+    const isSession = selectedXAxisMode === 'session-time';
+    const xLabel = isSession
+      ? 'Mean Normalized Session Time (min)'
+      : 'P90 Prefill TPS per user (tok/s)';
+    // Roofline corner = which corner the curve sweeps from / toward, matching
+    // existing chart-config convention:
+    //  - session-time: as concurrency rises, session time AND throughput both
+    //    grow → curve goes bottom-left → top-right → upper_right.
+    //  - prefill-tps:  as concurrency rises, per-user prefill TPS falls while
+    //    total throughput rises → curve goes top-left → bottom-right →
+    //    upper_left.
+    const rooflineCorner = isSession ? 'upper_right' : 'upper_left';
+    return visibleGraphs.map((g) => {
+      const overriddenChartDef = {
+        ...g.chartDefinition,
+        x_label: xLabel,
+        // y_latency_limit was meant to suppress fixed-seq overload outliers on
+        // the TTFT axis — irrelevant for these derived axes.
+        y_latency_limit: undefined,
+        [`${selectedYAxisMetric}_roofline` as keyof typeof g.chartDefinition]: rooflineCorner,
+      };
+      const data = g.data
+        .map((d) => {
+          if (typeof d.id !== 'number') return null;
+          const m = derivedMetrics[d.id];
+          const raw = isSession ? m?.normalized_session_time_s : m?.p90_prefill_tps_per_user;
+          if (raw === null || raw === undefined || !Number.isFinite(raw)) return null;
+          const v = isSession ? raw / 60 : raw;
+          return { ...d, x: v };
+        })
+        .filter((d): d is NonNullable<typeof d> => d !== null);
+      return { ...g, chartDefinition: overriddenChartDef, data };
+    });
+  }, [useDerived, visibleGraphs, derivedMetrics, selectedXAxisMode, selectedYAxisMetric]);
+
+  const displayGraphs =
+    isFirstLoad || isDerivedLoading
+      ? [
+          <Card key="skeleton-0">
+            <Skeleton className="h-7 w-2/4 mb-1" />
+            <Skeleton className="h-5 w-3/4 mb-2" />
+            <Skeleton className="h-[600px] w-full" />
+          </Card>,
+        ]
+      : renderableGraphs.length === 0
+        ? []
+        : renderableGraphs.map((graph, graphIndex) => {
+            const isTimelineMode = Boolean(
+              selectedDateRange.startDate && selectedDateRange.endDate && selectedGPUs.length > 0,
+            );
+            const replayAvailable = getViewMode(graphIndex) === 'chart' && !isTimelineMode;
+            return (
+              <section key={graphIndex} className="pt-8 md:pt-0">
+                <figure data-testid="chart-figure" className="relative rounded-lg">
+                  <ChartButtons
+                    chartId={`chart-${graphIndex}`}
+                    analyticsPrefix={
                       isTimelineMode
-                        ? activeDates.has(`${d.date}_${d.hwKey}`)
-                        : activeHwTypes.has(d.hwKey as string) &&
-                          selectedPrecisions.includes(d.precision),
-                    );
-                    const { headers, rows } = inferenceChartToCsv(
-                      visibleData,
-                      graph.model,
-                      graph.sequence,
-                    );
-                    exportToCsv(
-                      `InferenceX_${selectedModel}_${graph.chartDefinition.chartType}`,
-                      headers,
-                      rows,
-                    );
-                  }}
-                />
-                <Card>
-                  {(() => {
-                    const chartCaption = (
-                      <>
-                        <h2 className="text-lg font-semibold">
-                          {
-                            graph.chartDefinition[
-                              `${selectedYAxisMetric}_title` as keyof typeof graph.chartDefinition
-                            ]
-                          }{' '}
-                          {(() => {
-                            // For Input metrics with dynamic x-axis, use dynamic heading
-                            const metricTitle =
-                              (graph.chartDefinition[
+                        ? 'gpu_timeseries'
+                        : graph.chartDefinition.chartType === 'e2e'
+                          ? 'latency'
+                          : 'interactivity'
+                    }
+                    leadingControls={
+                      <SegmentedToggle
+                        value={getViewMode(graphIndex)}
+                        options={VIEW_MODE_OPTIONS}
+                        onValueChange={(v) => handleViewModeChange(graphIndex, v)}
+                        ariaLabel="View mode"
+                        testId={`inference-view-toggle-${graphIndex}`}
+                      />
+                    }
+                    hideImageExport={getViewMode(graphIndex) === 'table'}
+                    setIsLegendExpanded={setIsLegendExpanded}
+                    exportFileName={`InferenceX_${selectedModel}_${graph.chartDefinition.chartType}`}
+                    onExportMp4={
+                      replayAvailable
+                        ? () => replayHandlesRef.current[graphIndex]?.open()
+                        : undefined
+                    }
+                    onExportCsv={() => {
+                      const visibleData = graph.data.filter((d) =>
+                        isTimelineMode
+                          ? activeDates.has(`${d.date}_${d.hwKey}`)
+                          : activeHwTypes.has(d.hwKey as string) &&
+                            selectedPrecisions.includes(d.precision),
+                      );
+                      const { headers, rows } = inferenceChartToCsv(
+                        visibleData,
+                        graph.model,
+                        graph.sequence,
+                      );
+                      exportToCsv(
+                        `InferenceX_${selectedModel}_${graph.chartDefinition.chartType}`,
+                        headers,
+                        rows,
+                      );
+                    }}
+                  />
+                  <Card>
+                    {(() => {
+                      const chartCaption = (
+                        <>
+                          <h2 className="text-lg font-semibold">
+                            {
+                              graph.chartDefinition[
                                 `${selectedYAxisMetric}_title` as keyof typeof graph.chartDefinition
-                              ] as string) || '';
-                            const isInputMetric = metricTitle.toLowerCase().includes('input');
-                            if (
-                              graph.chartDefinition.chartType === 'interactivity' &&
-                              isInputMetric &&
-                              selectedXAxisMetric
-                            ) {
-                              if (selectedXAxisMetric === 'p99_ttft') {
-                                return 'vs. P99 Time To First Token';
-                              } else if (selectedXAxisMetric === 'median_ttft') {
-                                return 'vs. Median Time To First Token';
+                              ]
+                            }{' '}
+                            {(() => {
+                              // For Input metrics with dynamic x-axis, use dynamic heading
+                              const metricTitle =
+                                (graph.chartDefinition[
+                                  `${selectedYAxisMetric}_title` as keyof typeof graph.chartDefinition
+                                ] as string) || '';
+                              const isInputMetric = metricTitle.toLowerCase().includes('input');
+                              if (
+                                graph.chartDefinition.chartType === 'interactivity' &&
+                                isInputMetric &&
+                                selectedXAxisMetric === 'p90_ttft'
+                              ) {
+                                return 'vs. P90 Time To First Token';
                               }
-                            }
 
-                            // For e2e chart: render clickable inline dropdown for x-axis
-                            if (graph.chartDefinition.chartType === 'e2e') {
-                              const xAxisLabel =
-                                selectedE2eXAxisMetric === 'p99_ttft'
-                                  ? 'P99 TTFT'
-                                  : selectedE2eXAxisMetric === 'median_ttft'
-                                    ? 'Median TTFT'
-                                    : 'End-to-end Latency';
-                              const xAxisOptions = [
-                                { value: null, label: 'End-to-end Latency' },
-                                { value: 'p99_ttft', label: 'P99 TTFT' },
-                                { value: 'median_ttft', label: 'Median TTFT' },
-                              ];
-                              const zoomPrefix =
-                                selectedDateRange.startDate &&
-                                selectedDateRange.endDate &&
-                                selectedGPUs.length > 0
-                                  ? 'gpu_timeseries'
-                                  : 'latency';
-                              return (
-                                <E2eXAxisDropdown
-                                  xAxisLabel={xAxisLabel}
-                                  xAxisOptions={xAxisOptions}
-                                  selectedValue={selectedE2eXAxisMetric}
-                                  onSelect={(value) => {
-                                    setSelectedE2eXAxisMetric(value);
-                                    track('latency_x_axis_metric_selected', {
-                                      metric: value ?? 'median_e2el',
-                                    });
-                                    window.dispatchEvent(
-                                      new CustomEvent(
-                                        `${zoomPrefix}_zoom_reset_chart-${graphIndex}`,
-                                      ),
-                                    );
-                                  }}
-                                />
-                              );
-                            }
+                              // For e2e chart: heading is driven by the buttons above the
+                              // card. Derived-metric modes win first; otherwise the metric
+                              // carries the percentile prefix (e.g. p90_ttft, median_ttft).
+                              if (graph.chartDefinition.chartType === 'e2e') {
+                                if (selectedXAxisMode === 'session-time') {
+                                  return 'vs. Mean Normalized Session Time';
+                                }
+                                if (selectedXAxisMode === 'prefill-tps') {
+                                  return 'vs. P90 Prefill TPS / user';
+                                }
+                                const isAgentic = sequenceKind(selectedSequence) === 'agentic';
+                                if (selectedE2eXAxisMetric?.endsWith('_ttft')) {
+                                  const pctl = selectedE2eXAxisMetric.replace(/_ttft$/u, '');
+                                  const word = pctl === 'median' ? 'Median' : pctl.toUpperCase();
+                                  return `vs. ${word} Time To First Token`;
+                                }
+                                const pctlWord = selectedPercentile.toUpperCase();
+                                return isAgentic
+                                  ? `vs. ${pctlWord} End-to-end Latency`
+                                  : 'vs. End-to-end Latency';
+                              }
 
-                            // Fall back to configured heading
-                            return (
-                              graph.chartDefinition[
-                                `${selectedYAxisMetric}_heading` as keyof typeof graph.chartDefinition
-                              ] || graph.chartDefinition.heading
-                            );
-                          })()}
-                        </h2>
-                        <p className="text-sm text-muted-foreground mb-2">
-                          {getModelLabel(graph.model as Model)} •{' '}
-                          {selectedPrecisions
-                            .map((prec) => getPrecisionLabel(prec as Precision))
-                            .join(', ')}{' '}
-                          • {getSequenceLabel(graph.sequence as Sequence)} •{' '}
-                          {isUnofficialRun
-                            ? 'Source: UNOFFICIAL'
-                            : 'Source: SemiAnalysis InferenceX™'}
-                          {selectedRunDate && (
-                            <>
-                              {' '}
-                              • Updated:{' '}
-                              {new Date(`${selectedRunDate}T00:00:00Z`).toLocaleDateString(
-                                'en-US',
-                                {
-                                  year: 'numeric',
-                                  month: '2-digit',
-                                  day: '2-digit',
-                                  timeZone: 'UTC',
-                                },
-                              )}
-                            </>
-                          )}
-                        </p>
-                        <MetricAssumptionNotes selectedYAxisMetric={selectedYAxisMetric} />
-                        <UnofficialDomainNotice />
-                      </>
-                    );
-
-                    if (getViewMode(graphIndex) === 'table') {
-                      const overlay =
-                        graph.chartDefinition.chartType === 'e2e'
-                          ? overlayDataByChartType.e2e
-                          : overlayDataByChartType.interactivity;
-                      const overlayRows = (overlay?.data ?? []).filter((p) =>
-                        selectedPrecisions.includes(p.precision),
-                      );
-                      return (
-                        <>
-                          {chartCaption}
-                          <InferenceTable
-                            data={
-                              overlayRows.length > 0 ? [...graph.data, ...overlayRows] : graph.data
-                            }
-                            chartDefinition={graph.chartDefinition}
-                            selectedYAxisMetric={selectedYAxisMetric}
-                          />
+                              // Fall back to the heading baked into chartDefinition
+                              // by useChartData (already resolves per-metric overrides
+                              // and applies the agentic percentile rewrite).
+                              return graph.chartDefinition.heading;
+                            })()}
+                          </h2>
+                          <p className="text-sm text-muted-foreground mb-2">
+                            {getModelLabel(graph.model as Model)} •{' '}
+                            {selectedPrecisions
+                              .map((prec) => getPrecisionLabel(prec as Precision))
+                              .join(', ')}{' '}
+                            • {getSequenceLabel(graph.sequence as Sequence)} •{' '}
+                            {isUnofficialRun
+                              ? 'Source: UNOFFICIAL'
+                              : 'Source: SemiAnalysis InferenceX™'}
+                            {selectedRunDate && (
+                              <>
+                                {' '}
+                                • Updated:{' '}
+                                {new Date(`${selectedRunDate}T00:00:00Z`).toLocaleDateString(
+                                  'en-US',
+                                  {
+                                    year: 'numeric',
+                                    month: '2-digit',
+                                    day: '2-digit',
+                                    timeZone: 'UTC',
+                                  },
+                                )}
+                              </>
+                            )}
+                          </p>
+                          <MetricAssumptionNotes selectedYAxisMetric={selectedYAxisMetric} />
+                          <UnofficialDomainNotice />
                         </>
                       );
-                    }
 
-                    return selectedDateRange.startDate &&
-                      selectedDateRange.endDate &&
-                      selectedGPUs.length > 0 ? (
-                      <GPUGraph
-                        chartId={`chart-${graphIndex}`}
-                        modelLabel={graph.model}
-                        data={graph.data}
-                        xLabel={graph.chartDefinition.x_label}
-                        yLabel={`${
-                          graph.chartDefinition[
-                            `${selectedYAxisMetric}_label` as keyof typeof graph.chartDefinition
-                          ]
-                        }`}
-                        chartDefinition={graph.chartDefinition}
-                        caption={chartCaption}
-                      />
-                    ) : (
-                      <div className="relative">
-                        <ScatterGraph
+                      if (getViewMode(graphIndex) === 'table') {
+                        const overlay =
+                          graph.chartDefinition.chartType === 'e2e'
+                            ? overlayDataByChartType.e2e
+                            : overlayDataByChartType.interactivity;
+                        const overlayRows = (overlay?.data ?? []).filter((p) =>
+                          selectedPrecisions.includes(p.precision),
+                        );
+                        return (
+                          <>
+                            {chartCaption}
+                            <InferenceTable
+                              data={
+                                overlayRows.length > 0
+                                  ? [...graph.data, ...overlayRows]
+                                  : graph.data
+                              }
+                              chartDefinition={graph.chartDefinition}
+                              selectedYAxisMetric={selectedYAxisMetric}
+                            />
+                          </>
+                        );
+                      }
+
+                      return selectedDateRange.startDate &&
+                        selectedDateRange.endDate &&
+                        selectedGPUs.length > 0 ? (
+                        <GPUGraph
                           chartId={`chart-${graphIndex}`}
                           modelLabel={graph.model}
                           data={graph.data}
@@ -546,43 +577,58 @@ export default function ChartDisplay() {
                           }`}
                           chartDefinition={graph.chartDefinition}
                           caption={chartCaption}
-                          overlayData={
-                            graph.chartDefinition.chartType === 'e2e'
-                              ? (overlayDataByChartType.e2e ?? undefined)
-                              : (overlayDataByChartType.interactivity ?? undefined)
-                          }
                         />
-                        {selectedGPUs.length > 0 &&
-                          (!selectedDateRange.startDate || !selectedDateRange.endDate) && (
-                            <div className="absolute inset-0 flex items-center justify-center bg-background/60 backdrop-blur-[2px] rounded-lg z-10">
-                              <p className="text-sm font-medium text-muted-foreground bg-background/90 border border-border rounded-md px-4 py-2 shadow-sm">
-                                Select a date range to view GPU comparison
-                              </p>
-                            </div>
-                          )}
-                      </div>
-                    );
-                  })()}
-                  {replayAvailable && (
-                    <ReplayLauncher
-                      ref={(handle) => {
-                        replayHandlesRef.current[graphIndex] = handle;
-                      }}
-                      parentChartId={`chart-${graphIndex}`}
-                      chartDefinition={graph.chartDefinition}
-                      yLabel={`${
-                        graph.chartDefinition[
-                          `${selectedYAxisMetric}_label` as keyof typeof graph.chartDefinition
-                        ]
-                      }`}
-                      xLabel={graph.chartDefinition.x_label}
-                    />
-                  )}
-                </Card>
-              </figure>
-            </section>
-          );
-        });
+                      ) : (
+                        <div className="relative">
+                          <ScatterGraph
+                            chartId={`chart-${graphIndex}`}
+                            modelLabel={graph.model}
+                            data={graph.data}
+                            xLabel={graph.chartDefinition.x_label}
+                            yLabel={`${
+                              graph.chartDefinition[
+                                `${selectedYAxisMetric}_label` as keyof typeof graph.chartDefinition
+                              ]
+                            }`}
+                            chartDefinition={graph.chartDefinition}
+                            caption={chartCaption}
+                            overlayData={
+                              graph.chartDefinition.chartType === 'e2e'
+                                ? (overlayDataByChartType.e2e ?? undefined)
+                                : (overlayDataByChartType.interactivity ?? undefined)
+                            }
+                          />
+                          {selectedGPUs.length > 0 &&
+                            (!selectedDateRange.startDate || !selectedDateRange.endDate) && (
+                              <div className="absolute inset-0 flex items-center justify-center bg-background/60 backdrop-blur-[2px] rounded-lg z-10">
+                                <p className="text-sm font-medium text-muted-foreground bg-background/90 border border-border rounded-md px-4 py-2 shadow-sm">
+                                  Select a date range to view GPU comparison
+                                </p>
+                              </div>
+                            )}
+                        </div>
+                      );
+                    })()}
+                    {replayAvailable && (
+                      <ReplayLauncher
+                        ref={(handle) => {
+                          replayHandlesRef.current[graphIndex] = handle;
+                        }}
+                        parentChartId={`chart-${graphIndex}`}
+                        chartDefinition={graph.chartDefinition}
+                        yLabel={`${
+                          graph.chartDefinition[
+                            `${selectedYAxisMetric}_label` as keyof typeof graph.chartDefinition
+                          ]
+                        }`}
+                        xLabel={graph.chartDefinition.x_label}
+                      />
+                    )}
+                  </Card>
+                </figure>
+              </section>
+            );
+          });
 
   return (
     <div data-testid="inference-chart-display" className="flex flex-col gap-4">
@@ -640,6 +686,43 @@ export default function ChartDisplay() {
           <CustomPowers loading={loading} />
         </section>
       )}
+      <section
+        className="flex flex-wrap justify-center gap-3 sm:gap-4"
+        role="tablist"
+        aria-label="Chart x-axis metric"
+        data-testid="x-axis-mode-buttons"
+      >
+        {X_AXIS_MODE_BUTTONS.filter(({ agenticOnly }) => {
+          if (!agenticOnly) return true;
+          // Before client mount, conditionalize on the server-default kind
+          // (agentic) so SSR + first client render produce identical DOM. After
+          // mount, hide the agentic-only buttons on fixed-seq sequences.
+          if (!mounted) return true;
+          return sequenceKind(selectedSequence) === 'agentic';
+        }).map(({ value, label }) => {
+          const isActive = selectedXAxisMode === value;
+          return (
+            <button
+              key={value}
+              type="button"
+              role="tab"
+              aria-selected={isActive}
+              data-testid={`x-axis-mode-${value}`}
+              onClick={() => {
+                setSelectedXAxisMode(value);
+                track('latency_x_axis_mode_selected', { mode: value });
+              }}
+              className={`min-w-[160px] flex-1 sm:flex-initial rounded-full border-2 px-6 py-3 text-base font-semibold transition-colors ${
+                isActive
+                  ? 'border-primary bg-primary text-primary-foreground shadow-sm'
+                  : 'border-border bg-card text-foreground hover:border-primary/60 hover:bg-accent'
+              }`}
+            >
+              {label}
+            </button>
+          );
+        })}
+      </section>
       <div className="flex flex-col gap-4">{displayGraphs}</div>
 
       {/* Performance Over Time — Modal Drill-Down */}
diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx
index f9a73aa8..2552a334 100644
--- a/packages/app/src/components/inference/ui/ScatterGraph.tsx
+++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx
@@ -6,11 +6,13 @@ import React, { useCallback, useEffect, useMemo, useRef } from 'react';
 
 import { GRADIENT_NUDGE_EVENT } from '@/lib/nudges/registry';
 import { useInference } from '@/components/inference/InferenceContext';
+import { useTraceAvailability } from '@/hooks/api/use-trace-availability';
+import { useRouter } from 'next/navigation';
 import ChartLegend from '@/components/ui/chart-legend';
 import { useUnofficialRun } from '@/components/unofficial-run-provider';
 import { computeToggle } from '@/hooks/useTogglableSet';
 import { getHardwareConfig, getModelSortIndex } from '@/lib/constants';
-import { getChartWatermark } from '@/lib/data-mappings';
+import { getChartWatermark, Sequence } from '@/lib/data-mappings';
 import { formatNumber, getDisplayLabel, updateRepoUrl } from '@/lib/utils';
 import { D3Chart } from '@/lib/d3-chart/D3Chart';
 import type {
@@ -63,6 +65,96 @@ import {
   buildGradientColorMap,
 } from '@/components/inference/utils/paretoLabels';
 
+// Greedy label-collision avoidance.
+// Each candidate is the y-position of the FIRST baseline (relative to point
+// center) which we apply via the first tspan's `dy` — later tspans cascade
+// down by 1.1em. We try above/below at primary and secondary offsets, and
+// hide the label if all four positions collide.
+function avoidLabelCollisions(
+  zoomGroup: d3.Selection<SVGGElement, unknown, null, undefined>,
+): void {
+  interface LabelInfo {
+    el: SVGTextElement;
+    firstTspan: SVGTSpanElement;
+    cx: number;
+    cy: number;
+    w: number;
+    nLines: number;
+    defaultFirstY: number;
+  }
+  const labels: LabelInfo[] = [];
+  const ASCENT = 9;
+  const DESCENT = 3;
+  const LINE_H = 11;
+
+  zoomGroup.selectAll<SVGGElement, unknown>('.dot-group').each(function () {
+    const labelEl = this.querySelector<SVGTextElement>('.point-label');
+    if (!labelEl) return;
+    if ((this as SVGGElement).style.opacity === '0') return;
+    const tspans = labelEl.querySelectorAll<SVGTSpanElement>('tspan');
+    if (tspans.length === 0) return;
+    const transform = (this as SVGGElement).getAttribute('transform') ?? '';
+    const m = transform.match(/translate\(([^,]+),([^)]+)\)/);
+    if (!m) return;
+    const cx = parseFloat(m[1]);
+    const cy = parseFloat(m[2]);
+    const nLines = tspans.length;
+    const defaultFirstY = -(8 + (nLines - 1) * LINE_H); // last baseline 8px above point
+    // Reset to default before measuring so prior positioning doesn't bias bbox
+    tspans[0].setAttribute('dy', `${defaultFirstY}px`);
+    labelEl.style.opacity = '1';
+    const bbox = labelEl.getBBox();
+    labels.push({
+      el: labelEl,
+      firstTspan: tspans[0],
+      cx,
+      cy,
+      w: bbox.width,
+      nLines,
+      defaultFirstY,
+    });
+  });
+
+  labels.sort((a, b) => a.cx - b.cx);
+  const placed: { left: number; right: number; top: number; bottom: number }[] = [];
+  const pad = 2;
+
+  for (const lab of labels) {
+    const blockH = (lab.nLines - 1) * LINE_H + ASCENT + DESCENT;
+    const aboveFirstY = lab.defaultFirstY;
+    const belowFirstY = 14; // first baseline 14px below point center
+    const candidates = [
+      aboveFirstY,
+      belowFirstY,
+      aboveFirstY - blockH - 2,
+      belowFirstY + blockH + 2,
+    ];
+    let chosenY: number | null = null;
+    let chosenBox: { left: number; right: number; top: number; bottom: number } | null = null;
+    for (const firstY of candidates) {
+      const top = lab.cy + firstY - ASCENT - pad;
+      const bottom = lab.cy + firstY + (lab.nLines - 1) * LINE_H + DESCENT + pad;
+      const left = lab.cx - lab.w / 2 - pad;
+      const right = lab.cx + lab.w / 2 + pad;
+      const collides = placed.some(
+        (p) => !(right < p.left || left > p.right || bottom < p.top || top > p.bottom),
+      );
+      if (!collides) {
+        chosenY = firstY;
+        chosenBox = { left, right, top, bottom };
+        break;
+      }
+    }
+    if (chosenY !== null && chosenBox) {
+      lab.firstTspan.setAttribute('dy', `${chosenY}px`);
+      lab.el.style.opacity = '1';
+      placed.push(chosenBox);
+    } else {
+      lab.el.style.opacity = '0';
+    }
+  }
+}
+
 // X-shape path for overlay (unofficial) data points
 const X_SIZE = 5;
 const X_HOVER_SIZE = 7;
@@ -150,6 +242,8 @@ const ScatterGraph = React.memo(
       trackedConfigs,
       addTrackedConfig,
       removeTrackedConfig,
+      selectedXAxisMode,
+      selectedSequence,
     } = useInference();
 
     const {
@@ -258,6 +352,10 @@ const ScatterGraph = React.memo(
     );
 
     const rooflines = useMemo(() => {
+      // Frontier scope is (hw, precision, date) — points from different dates
+      // can never share a frontier (a May 15 point can't dominate a May 17 plot).
+      // The legend grouping is still by (hw, precision); we just split the
+      // pareto compute per date and re-merge into the legend bucket.
       const result: Record<string, InferenceData[]> = {};
       const rooflineKey = `${selectedYAxisMetric}_roofline` as keyof ChartDefinition;
       const dir = chartDefinition[rooflineKey] as
@@ -266,17 +364,43 @@ const ScatterGraph = React.memo(
         | 'lower_left'
         | 'lower_right'
         | undefined;
-      for (const hw of Object.keys(groupedData)) {
-        const front =
-          dir === 'upper_right'
-            ? paretoFrontUpperRight(groupedData[hw])
-            : dir === 'upper_left'
-              ? paretoFrontUpperLeft(groupedData[hw])
-              : dir === 'lower_left'
-                ? paretoFrontLowerLeft(groupedData[hw])
-                : paretoFrontLowerRight(groupedData[hw]);
-        front.sort((a, b) => a.x - b.x);
-        result[hw] = front;
+      const frontierFn =
+        dir === 'upper_right'
+          ? paretoFrontUpperRight
+          : dir === 'upper_left'
+            ? paretoFrontUpperLeft
+            : dir === 'lower_left'
+              ? paretoFrontLowerLeft
+              : paretoFrontLowerRight;
+      for (const hwKey of Object.keys(groupedData)) {
+        const byDate = new Map<string, InferenceData[]>();
+        for (const p of groupedData[hwKey]) {
+          const d = p.date;
+          let bucket = byDate.get(d);
+          if (!bucket) {
+            bucket = [];
+            byDate.set(d, bucket);
+          }
+          bucket.push(p);
+        }
+        const combined: InferenceData[] = [];
+        for (const datePoints of byDate.values()) {
+          // In non-e2e xmodes, useChartData stamps every point with an
+          // `isOnE2eFrontier` flag so the line is restricted to the
+          // e2e-Pareto winners — same set of points across every chart,
+          // just re-plotted at the chosen x metric. When the flag is
+          // present on ANY point in the bucket, narrow to the winners
+          // before paretoing (otherwise we'd recompute a fresh frontier
+          // on the swapped x axis and reintroduce the benchmark hack).
+          const flagged = datePoints.some((p) => p.isOnE2eFrontier !== undefined);
+          const seedPoints = flagged
+            ? datePoints.filter((p) => p.isOnE2eFrontier === true)
+            : datePoints;
+          if (seedPoints.length === 0) continue;
+          combined.push(...frontierFn(seedPoints));
+        }
+        combined.sort((a, b) => a.x - b.x);
+        result[hwKey] = combined;
       }
       return result;
     }, [groupedData, selectedYAxisMetric, chartDefinition]);
@@ -284,7 +408,7 @@ const ScatterGraph = React.memo(
     const optimalPointKeys = useMemo(() => {
       const keys = new Set<string>();
       Object.values(rooflines).forEach((pts) =>
-        pts.forEach((p) => keys.add(`${p.hwKey}_${p.precision}-${p.x}-${p.y}`)),
+        pts.forEach((p) => keys.add(`${p.hwKey}_${p.precision}_${p.date}-${p.x}-${p.y}`)),
       );
       return keys;
     }, [rooflines]);
@@ -311,6 +435,10 @@ const ScatterGraph = React.memo(
     const buildPointConfigId = useCallback((point: InferenceData): string => {
       let key = `${point.hwKey}|${point.precision}|${point.tp}|${point.conc}|${point.decode_ep ?? 0}|${point.prefill_tp ?? 0}|${point.prefill_ep ?? 0}`;
       if (point.disagg) key += `|disagg|${point.num_prefill_gpu ?? 0}|${point.num_decode_gpu ?? 0}`;
+      // Agentic runs emit two rows per (config, conc) — one offload=on, one off.
+      // Without this suffix, d3's data join treats them as the same point and
+      // drops one variant (along with its halo).
+      if (point.offload_mode) key += `|offload-${point.offload_mode}`;
       return key;
     }, []);
 
@@ -383,6 +511,21 @@ const ScatterGraph = React.memo(
     // All official points for rendering (unfiltered — visibility via opacity)
     const pointsData = useMemo(() => Object.values(groupedData).flat(), [groupedData]);
 
+    // Bulk presence lookup for agentic points: which ids have a stored
+    // trace_replay blob → controls the "View charts" button in the pinned
+    // tooltip. We deliberately don't fetch the histograms themselves here;
+    // a 95-point dsv4-b300 dashboard would pull GB of profile blobs through
+    // Neon's HTTP API and trip its 64 MB per-response cap.
+    const agenticIds = useMemo(() => {
+      const ids: number[] = [];
+      for (const p of pointsData) {
+        if (p.benchmark_type === 'agentic_traces' && typeof p.id === 'number') ids.push(p.id);
+      }
+      return ids;
+    }, [pointsData]);
+    const { data: traceAvailability } = useTraceAvailability(agenticIds);
+    const router = useRouter();
+
     // Gradient label data
     const allPointLabelsByKey = useMemo(() => {
       const globalLabelColorMap = new Map<string, string>();
@@ -422,7 +565,9 @@ const ScatterGraph = React.memo(
     const visiblePoints = useMemo(() => {
       let pts = filteredData;
       if (hideNonOptimal) {
-        pts = pts.filter((d) => optimalPointKeys.has(`${d.hwKey}_${d.precision}-${d.x}-${d.y}`));
+        pts = pts.filter((d) =>
+          optimalPointKeys.has(`${d.hwKey}_${d.precision}_${d.date}-${d.x}-${d.y}`),
+        );
       }
       return processedOverlayData.length > 0 ? [...pts, ...processedOverlayData] : pts;
     }, [filteredData, processedOverlayData, hideNonOptimal, optimalPointKeys]);
@@ -507,7 +652,8 @@ const ScatterGraph = React.memo(
       (d: InferenceData) =>
         effectiveActiveHwTypes.has(d.hwKey as string) &&
         selectedPrecisions.includes(d.precision) &&
-        (!hideNonOptimal || optimalPointKeys.has(`${d.hwKey}_${d.precision}-${d.x}-${d.y}`)),
+        (!hideNonOptimal ||
+          optimalPointKeys.has(`${d.hwKey}_${d.precision}_${d.date}-${d.x}-${d.y}`)),
       [effectiveActiveHwTypes, selectedPrecisions, hideNonOptimal, optimalPointKeys],
     );
 
@@ -625,6 +771,7 @@ const ScatterGraph = React.memo(
               d3.axisLeft(newYS).ticks(10).tickFormat(logTickFormat(newYS)) as any,
             );
           }
+          avoidLabelCollisions(ctx.layout.zoomGroup);
         },
       }),
       [zoomResetEventName, eventPrefix, xScaleConfig._isLog, yScaleConfig.type],
@@ -644,6 +791,7 @@ const ScatterGraph = React.memo(
             hardwareConfig,
             isTracked: trackedConfigIdsRef.current.has(buildPointConfigId(d)),
             runUrl: d.run_url ? updateRepoUrl(d.run_url) : undefined,
+            hasTrace: typeof d.id === 'number' ? traceAvailability?.[d.id] === true : false,
           }),
         getRulerX: (d: InferenceData, xScale: any) => (xScale as ContinuousScale)(d.x),
         getRulerY: (d: InferenceData, yScale: any) => (yScale as ContinuousScale)(d.y),
@@ -659,26 +807,43 @@ const ScatterGraph = React.memo(
           ),
         onPointClick: (d: InferenceData) => {
           track('latency_data_point_clicked', { hw: String(d.hwKey), x: d.x, y: d.y });
-          // Attach track-over-time button handler in the tooltip
           const tooltipEl = chartRef.current?.getTooltipElement();
-          if (tooltipEl) {
-            const btn = tooltipEl.querySelector('[data-action="track-over-time"]');
-            if (btn) {
-              btn.addEventListener('click', (btnEvent) => {
-                btnEvent.stopPropagation();
-                const configId = buildPointConfigId(d);
-                if (trackedConfigIdsRef.current.has(configId)) removeTrackedConfig(configId);
-                else addTrackedConfig(d, chartDefinition.chartType);
-                chartRef.current?.dismissTooltip();
-                chartRef.current?.hideTooltip();
-                track('latency_point_tracked_via_tooltip', {
-                  hwKey: String(d.hwKey),
-                  tp: d.tp,
-                  conc: d.conc,
-                  precision: d.precision,
-                });
+          if (!tooltipEl) return;
+
+          // ── Summary-page actions ──────────────────────────────────────────
+          const trackBtn = tooltipEl.querySelector('[data-action="track-over-time"]');
+          if (trackBtn) {
+            trackBtn.addEventListener('click', (btnEvent) => {
+              btnEvent.stopPropagation();
+              const configId = buildPointConfigId(d);
+              if (trackedConfigIdsRef.current.has(configId)) removeTrackedConfig(configId);
+              else addTrackedConfig(d, chartDefinition.chartType);
+              chartRef.current?.dismissTooltip();
+              chartRef.current?.hideTooltip();
+              track('latency_point_tracked_via_tooltip', {
+                hwKey: String(d.hwKey),
+                tp: d.tp,
+                conc: d.conc,
+                precision: d.precision,
               });
-            }
+            });
+          }
+
+          // ── "View charts" → navigate to dedicated detail page ────────────
+          const viewBtn = tooltipEl.querySelector('[data-action="view-charts"]');
+          if (viewBtn && typeof d.id === 'number') {
+            const pointId = d.id;
+            viewBtn.addEventListener('click', (btnEvent) => {
+              btnEvent.stopPropagation();
+              track('latency_view_charts_opened', {
+                id: pointId,
+                hwKey: String(d.hwKey),
+                conc: d.conc,
+              });
+              chartRef.current?.dismissTooltip();
+              chartRef.current?.hideTooltip();
+              router.push(`/inference/agentic/${pointId}`);
+            });
           }
         },
         attachToLayer: 1, // scatter layer is index 1 (after rooflines at 0)
@@ -693,6 +858,12 @@ const ScatterGraph = React.memo(
         removeTrackedConfig,
         chartDefinition.chartType,
         selectedPrecisions,
+        // Tooltip content closure reads traceAvailability to decide whether
+        // to render the "View charts" button — rebuild config when the
+        // presence fetch resolves so the button appears for points that
+        // have a trace_replay blob.
+        traceAvailability,
+        router,
       ],
     );
 
@@ -743,35 +914,64 @@ const ScatterGraph = React.memo(
             const precision = key.split('_').pop()!;
             const visible =
               effectiveActiveHwTypes.has(hw) && selectedPrecisions.includes(precision);
-            let stroke = getCssColor(resolveColor(hw));
-
-            if (showGradientLabels) {
-              const pointLabels = allPointLabelsByKey[key];
-              if (pointLabels) {
-                const stops = computeGradientStops(pointLabels, xScale);
-                if (stops) {
-                  const gid = `roofline-gradient-${chartId}-${key}`;
-                  activeGradientIds.add(gid);
-                  let gradient = defs.select<SVGLinearGradientElement>(`#${CSS.escape(gid)}`);
-                  if (gradient.empty()) gradient = defs.append('linearGradient').attr('id', gid);
-                  gradient
-                    .attr('gradientUnits', 'userSpaceOnUse')
-                    .attr('x1', xScale(pts[0].x))
-                    .attr('y1', 0)
-                    .attr('x2', xScale(pts.at(-1)!.x))
-                    .attr('y2', 0);
-                  gradient
-                    .selectAll('stop')
-                    .data(stops)
-                    .join('stop')
-                    .attr('offset', (s) => `${(s.offset * 100).toFixed(2)}%`)
-                    .attr('stop-color', (s) => s.color);
-                  stroke = `url(#${gid})`;
-                }
+            const baseStroke = getCssColor(resolveColor(hw));
+
+            // Split into per-date sub-paths so the line never crosses dates.
+            // (When only one date is present the loop runs once with the full set.)
+            const byDate = new Map<string, InferenceData[]>();
+            for (const p of pts) {
+              let bucket = byDate.get(p.date);
+              if (!bucket) {
+                bucket = [];
+                byDate.set(p.date, bucket);
               }
+              bucket.push(p);
             }
+            const singleDate = byDate.size === 1;
+
+            for (const [date, datePoints] of byDate) {
+              if (datePoints.length <= 1) continue;
+              const entryKey = singleDate ? key : `${key}__${date}`;
+              let stroke = baseStroke;
+
+              // Gradient labels only apply in the single-date case; mapping the
+              // (key-wide) ParetoPointLabel array onto per-date sub-segments is
+              // ambiguous and the comparison-date overlay is a rare combo.
+              if (singleDate && showGradientLabels) {
+                const pointLabels = allPointLabelsByKey[key];
+                if (pointLabels) {
+                  const stops = computeGradientStops(pointLabels, xScale);
+                  if (stops) {
+                    const gid = `roofline-gradient-${chartId}-${entryKey}`;
+                    activeGradientIds.add(gid);
+                    let gradient = defs.select<SVGLinearGradientElement>(`#${CSS.escape(gid)}`);
+                    if (gradient.empty()) gradient = defs.append('linearGradient').attr('id', gid);
+                    gradient
+                      .attr('gradientUnits', 'userSpaceOnUse')
+                      .attr('x1', xScale(datePoints[0].x))
+                      .attr('y1', 0)
+                      .attr('x2', xScale(datePoints.at(-1)!.x))
+                      .attr('y2', 0);
+                    gradient
+                      .selectAll('stop')
+                      .data(stops)
+                      .join('stop')
+                      .attr('offset', (s) => `${(s.offset * 100).toFixed(2)}%`)
+                      .attr('stop-color', (s) => s.color);
+                    stroke = `url(#${gid})`;
+                  }
+                }
+              }
 
-            entries.push({ key, hw, precision, points: pts, stroke, visible });
+              entries.push({
+                key: entryKey,
+                hw,
+                precision,
+                points: datePoints,
+                stroke,
+                visible,
+              });
+            }
           });
 
           // Remove stale gradients
@@ -1176,11 +1376,26 @@ const ScatterGraph = React.memo(
             .y((d) => newYScale(d.y))
             .curve(d3.curveMonotoneX);
 
-          // Update roofline paths
+          // Update roofline paths — must split per-date so the zoom redraw
+          // matches the per-date sub-paths created in the initial render.
           Object.entries(rooflines).forEach(([key, pts]) => {
             if (pts.length < 2) return;
-            const sel = zoomGroup.select<SVGPathElement>(`.roofline-${key}`);
-            if (!sel.empty()) sel.attr('d', lineGen(pts) as string);
+            const byDate = new Map<string, InferenceData[]>();
+            for (const p of pts) {
+              let bucket = byDate.get(p.date);
+              if (!bucket) {
+                bucket = [];
+                byDate.set(p.date, bucket);
+              }
+              bucket.push(p);
+            }
+            const singleDate = byDate.size === 1;
+            for (const [date, datePoints] of byDate) {
+              if (datePoints.length < 2) continue;
+              const cls = singleDate ? `roofline-${key}` : `roofline-${key}__${date}`;
+              const sel = zoomGroup.select<SVGPathElement>(`.${CSS.escape(cls)}`);
+              if (!sel.empty()) sel.attr('d', lineGen(datePoints) as string);
+            }
           });
 
           // Update gradient coordinates
@@ -1406,7 +1621,8 @@ const ScatterGraph = React.memo(
           getOpacity: (d) => (isPointVisible(d) ? 1 : 0),
           getPointerEvents: (d) => (isPointVisible(d) ? 'auto' : 'none'),
           hideLabels: hidePointLabels || showGradientLabels,
-          getLabelText: (d) => (useAdvancedLabels ? getPointLabel(d) : String(d.tp)),
+          getLabelText: (d) =>
+            useAdvancedLabels ? `${getPointLabel(d)}\nC=${d.conc}` : `${d.tp}\nC=${d.conc}`,
           foreground: 'var(--foreground)',
           dataAttrs: {
             'hw-key': (d) => String(d.hwKey),
@@ -1506,17 +1722,31 @@ const ScatterGraph = React.memo(
               // Labels
               const showLabels = !hidePointLabels && !showGradientLabels;
               overlayPoints.each(function (d) {
-                d3.select(this)
+                const lines = showLabels
+                  ? (useAdvancedLabels
+                      ? `${getPointLabel(d)}\nC=${d.conc}`
+                      : `${d.tp}\nC=${d.conc}`
+                    ).split('\n')
+                  : [];
+                const text = d3
+                  .select(this)
                   .selectAll<SVGTextElement, boolean>('.overlay-label')
                   .data(showLabels ? [true] : [])
                   .join('text')
                   .attr('class', 'overlay-label')
-                  .attr('dy', -10)
                   .attr('text-anchor', 'middle')
                   .style('fill', 'var(--foreground)')
                   .attr('font-size', '10px')
-                  .attr('pointer-events', 'none')
-                  .text(useAdvancedLabels ? getPointLabel(d) : String(d.tp));
+                  .attr('font-weight', '700')
+                  .attr('pointer-events', 'none');
+                const firstDy = -(1 + (lines.length - 1) * 1.1);
+                text
+                  .selectAll<SVGTSpanElement, string>('tspan')
+                  .data(lines)
+                  .join('tspan')
+                  .attr('x', 0)
+                  .attr('dy', (_l, i) => (i === 0 ? `${firstDy}em` : '1.1em'))
+                  .text((l) => l);
               });
 
               // Overlay tooltip handlers
@@ -1784,6 +2014,23 @@ const ScatterGraph = React.memo(
             .attr('pointer-events', 'none');
         });
 
+        // Offload halo: dashed ring on every point that used KV offload (Pareto or not)
+        zoomGroup.selectAll<SVGGElement, InferenceData>('.dot-group').each(function (d) {
+          const showHalo = d.offload_mode === 'on';
+          d3.select(this)
+            .selectAll<SVGCircleElement, boolean>('.offload-halo')
+            .data(showHalo ? [true] : [])
+            .join('circle')
+            .attr('class', 'offload-halo')
+            .attr('r', POINT_SIZE + 4)
+            .attr('fill', 'none')
+            .attr('stroke', 'var(--foreground)')
+            .attr('stroke-width', 1.5)
+            .attr('stroke-dasharray', '3 2')
+            .attr('opacity', 0.9)
+            .attr('pointer-events', 'none');
+        });
+
         // Double-click to track/untrack
         zoomGroup
           .selectAll<SVGGElement, InferenceData>('.dot-group')
@@ -1817,6 +2064,8 @@ const ScatterGraph = React.memo(
             });
           });
 
+        avoidLabelCollisions(zoomGroup);
+
         // Log tick formatting on initial render
         if (xScaleConfig._isLog) {
           const xScale = ctx.xScale as d3.ScaleLogarithmic<number, number>;
@@ -1839,6 +2088,9 @@ const ScatterGraph = React.memo(
         chartDefinition.chartType,
         xScaleConfig._isLog,
         yScaleConfig.type,
+        optimalPointKeys,
+        getCssColor,
+        resolveColor,
       ],
     );
 
@@ -2031,6 +2283,17 @@ const ScatterGraph = React.memo(
                   setHideNonOptimal(checked);
                   track('latency_hide_non_optimal_toggled', { enabled: checked });
                 },
+                // On agentic + non-e2e chart, "optimal" means "on the
+                // e2e-latency Pareto frontier" (not a per-axis Pareto on the
+                // current x metric). Explain that so users don't wonder why
+                // a point sitting above the line is still considered
+                // dominated.
+                ...(selectedSequence === Sequence.AgenticTraces && selectedXAxisMode !== 'e2e'
+                  ? {
+                      infoTooltip:
+                        "On agentic, optimal = on the end-to-end latency Pareto frontier, so a config can't win this axis by tanking e2e. Off-frontier points may appear above the line.",
+                    }
+                  : {}),
               },
               {
                 id: 'scatter-hide-point-labels',
diff --git a/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx b/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx
index f9b1b3c8..73018483 100644
--- a/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx
+++ b/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx
@@ -194,9 +194,7 @@ export function UnofficialChartDisplay() {
                           `${selectedYAxisMetric}_title` as keyof typeof graph.chartDefinition
                         ]
                       }{' '}
-                      {graph.chartDefinition[
-                        `${selectedYAxisMetric}_heading` as keyof typeof graph.chartDefinition
-                      ] || graph.chartDefinition.heading}
+                      {graph.chartDefinition.heading}
                     </h2>
                     <p className="text-sm text-muted-foreground mb-2">
                       {graph.model} • {selectedPrecisions.join(', ')} • {graph.sequence}
diff --git a/packages/app/src/components/inference/utils.test.ts b/packages/app/src/components/inference/utils.test.ts
index 8f8705e1..589ba580 100644
--- a/packages/app/src/components/inference/utils.test.ts
+++ b/packages/app/src/components/inference/utils.test.ts
@@ -157,12 +157,12 @@ describe('processOverlayChartData', () => {
   });
 
   it('remaps x to config override for input metrics on interactivity chart', () => {
-    // inputTputPerGpu has x override to p99_ttft on interactivity chart
+    // inputTputPerGpu has x override to p90_ttft on interactivity chart
     const data = [
       pt({
         x: 100,
         inputTputPerGpu: { y: 5, roof: false },
-        p99_ttft: 0.25,
+        p90_ttft: 0.25,
         median_intvty: 50,
       } as any),
     ];
@@ -176,16 +176,11 @@ describe('processOverlayChartData', () => {
       pt({
         x: 100,
         inputTputPerGpu: { y: 5, roof: false },
-        median_ttft: 0.1,
+        p90_ttft: 0.1,
         median_intvty: 50,
       } as any),
     ];
-    const result = processOverlayChartData(
-      data,
-      'interactivity',
-      'y_inputTputPerGpu',
-      'median_ttft',
-    );
+    const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p90_ttft');
     expect(result).toHaveLength(1);
     expect(result[0].x).toBe(0.1);
   });
@@ -195,76 +190,62 @@ describe('processOverlayChartData', () => {
       pt({
         x: 100,
         inputTputPerGpu: { y: 5, roof: false },
-        p99_ttft: 0.25,
+        p90_ttft: 0.25,
         median_e2el: 2.5,
       } as any),
     ];
     const result = processOverlayChartData(data, 'e2e', 'y_inputTputPerGpu', null);
     expect(result).toHaveLength(1);
-    // e2e uses median_e2el as x (from chart config default), not p99_ttft
+    // e2e uses median_e2el as x (from chart config default), not p90_ttft
     expect(result[0].x).toBe(2.5);
   });
 
-  it('remaps x to TTFT for e2e chart when selectedXAxisMetric is p99_ttft', () => {
-    const data = [
-      pt({
-        x: 100,
-        tpPerGpu: { y: 42, roof: false },
-        p99_ttft: 0.35,
-        median_e2el: 2.5,
-      } as any),
-    ];
-    const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p99_ttft');
-    expect(result).toHaveLength(1);
-    expect(result[0].x).toBe(0.35);
-  });
-
-  it('remaps x to TTFT for e2e chart when selectedXAxisMetric is median_ttft', () => {
+  it('remaps x to TTFT for e2e chart when selectedXAxisMetric is p90_ttft', () => {
     const data = [
       pt({
         x: 100,
         tpPerGpu: { y: 42, roof: false },
-        median_ttft: 0.12,
+        p90_ttft: 0.12,
         median_e2el: 2.5,
       } as any),
     ];
-    const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'median_ttft');
+    const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p90_ttft');
     expect(result).toHaveLength(1);
     expect(result[0].x).toBe(0.12);
   });
 
   it('filters e2e TTFT outliers exceeding y_latency_limit', () => {
     const data = [
-      pt({ tpPerGpu: { y: 10, roof: false }, p99_ttft: 0.5, median_e2el: 1 } as any),
-      pt({ tpPerGpu: { y: 5, roof: false }, p99_ttft: 999, median_e2el: 2 } as any),
+      pt({ tpPerGpu: { y: 10, roof: false }, p90_ttft: 0.5, median_e2el: 1 } as any),
+      pt({ tpPerGpu: { y: 5, roof: false }, p90_ttft: 999, median_e2el: 2 } as any),
     ];
-    const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p99_ttft');
+    const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p90_ttft');
     // y_latency_limit is 60 in the e2e chart config — the 999 outlier should be filtered
     expect(result).toHaveLength(1);
     expect(result[0].x).toBe(0.5);
   });
 
   it('does not filter interactivity points by latency limit when x-axis is default', () => {
-    // Regression: selectedXAxisMetric defaults to 'p99_ttft' but the interactivity
+    // Regression: selectedXAxisMetric defaults to 'p90_ttft' but the interactivity
     // chart's x-axis stays median_intvty for non-input metrics. The latency limit
     // (60) must NOT apply to median_intvty values.
     const data = [
       pt({ tpPerGpu: { y: 42, roof: false }, median_intvty: 200 } as any),
       pt({ tpPerGpu: { y: 10, roof: false }, median_intvty: 30 } as any),
     ];
-    const result = processOverlayChartData(data, 'interactivity', 'y_tpPerGpu', 'p99_ttft');
+    const result = processOverlayChartData(data, 'interactivity', 'y_tpPerGpu', 'p90_ttft');
     expect(result).toHaveLength(2);
   });
 
   it('applies latency limit on interactivity only when x-axis is actually overridden', () => {
-    // When an input metric IS selected and x-axis overrides to p99_ttft,
+    // When an input metric IS selected and x-axis overrides to p90_ttft,
     // the latency limit should apply.
     const data = [
-      pt({ inputTputPerGpu: { y: 5, roof: false }, p99_ttft: 0.5, median_intvty: 10 } as any),
-      pt({ inputTputPerGpu: { y: 3, roof: false }, p99_ttft: 999, median_intvty: 20 } as any),
+      pt({ inputTputPerGpu: { y: 5, roof: false }, p90_ttft: 0.5, median_intvty: 10 } as any),
+      pt({ inputTputPerGpu: { y: 3, roof: false }, p90_ttft: 999, median_intvty: 20 } as any),
     ];
-    const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p99_ttft');
-    // x-axis is overridden to p99_ttft for input metric — latency limit SHOULD filter 999
+    const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p90_ttft');
+    // x-axis is overridden to p90_ttft for input metric — latency limit SHOULD filter 999
     expect(result).toHaveLength(1);
     expect(result[0].x).toBe(0.5);
   });
diff --git a/packages/app/src/components/inference/utils.ts b/packages/app/src/components/inference/utils.ts
index 4b5335b6..4876c614 100644
--- a/packages/app/src/components/inference/utils.ts
+++ b/packages/app/src/components/inference/utils.ts
@@ -75,11 +75,13 @@ export function processOverlayChartData(
   chartType: 'e2e' | 'interactivity',
   selectedYAxisMetric: string,
   selectedXAxisMetric: string | null,
+  options?: { isAgentic?: boolean },
 ): InferenceData[] {
   const chartDef = (chartDefinitions as ChartDefinition[]).find((d) => d.chartType === chartType);
   if (!chartDef) return [];
 
   const metricKey = selectedYAxisMetric.replace('y_', '') as YAxisMetricKey;
+  const isAgentic = options?.isAgentic === true;
 
   // Resolve x-axis field (must match useChartData logic)
   const metricTitle =
@@ -87,9 +89,11 @@ export function processOverlayChartData(
   const isInputMetric = metricTitle.toLowerCase().includes('input');
   let xAxisField: string = chartDef.x;
   // selectedXAxisMetric is already the effective metric for this chart type
-  // (interactivity uses selectedXAxisMetric, e2e uses selectedE2eXAxisMetric)
+  // (interactivity uses selectedXAxisMetric, e2e uses selectedE2eXAxisMetric).
+  // Match any *_ttft metric — the x-axis-mode picker can now select any
+  // percentile (median/p75/p90/p99) depending on sequence kind.
   const isTtftOverride =
-    selectedXAxisMetric === 'p99_ttft' || selectedXAxisMetric === 'median_ttft';
+    typeof selectedXAxisMetric === 'string' && selectedXAxisMetric.endsWith('_ttft');
 
   if (selectedXAxisMetric && chartDef.chartType === 'interactivity' && isInputMetric) {
     xAxisField = selectedXAxisMetric;
@@ -109,7 +113,12 @@ export function processOverlayChartData(
     })
     .filter(
       (d) =>
-        xAxisField === chartDef.x || !chartDef.y_latency_limit || d.x <= chartDef.y_latency_limit,
+        // Skip the latency limit for the natural x-axis or for agentic
+        // (long TTFTs are normal there, not overload outliers).
+        xAxisField === chartDef.x ||
+        isAgentic ||
+        !chartDef.y_latency_limit ||
+        d.x <= chartDef.y_latency_limit,
     );
 
   return filterDataByCostLimit(processedData, chartDef, selectedYAxisMetric);
diff --git a/packages/app/src/components/inference/utils/tooltipUtils.ts b/packages/app/src/components/inference/utils/tooltipUtils.ts
index 4c56d217..ed68c41b 100644
--- a/packages/app/src/components/inference/utils/tooltipUtils.ts
+++ b/packages/app/src/components/inference/utils/tooltipUtils.ts
@@ -19,6 +19,14 @@ export interface TooltipConfig {
   isTracked?: boolean;
   /** URL to the GitHub Actions workflow run */
   runUrl?: string;
+  /**
+   * Whether this agentic point has a stored trace_replay blob. Controls
+   * visibility of the "View charts" button — the actual distributions are
+   * rendered on the detail page, not inline, so all the tooltip needs is a
+   * presence boolean (sourced from the bulk `/api/v1/trace-availability`
+   * call so we don't ship megabytes of profile JSONL just for this check).
+   */
+  hasTrace?: boolean;
 }
 
 export interface OverlayTooltipConfig extends TooltipConfig {
@@ -88,6 +96,74 @@ const runLinkHTML = (runUrl?: string) =>
 const tooltipLine = (label: string, value: string | number) =>
   `<div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;"><strong>${label}:</strong> ${value}</div>`;
 
+const formatPct = (v: number | undefined): string | null =>
+  v === undefined || v === null || Number.isNaN(v) ? null : `${(v * 100).toFixed(1)}%`;
+
+/** Tooltip numeric values are capped at 3 decimal places (trailing zeros stripped). */
+const fmt = (v: number): string => {
+  if (!Number.isFinite(v)) return String(v);
+  const rounded = parseFloat(v.toFixed(3));
+  if (Math.abs(rounded) >= 10000) return new Intl.NumberFormat('en-US').format(rounded);
+  return String(rounded);
+};
+
+/**
+ * Agentic-only tooltip rows: offload mode, KV cache hit rates, request
+ * success, token totals. Returns an empty string for non-agentic rows.
+ */
+const generateAgenticHTML = (d: InferenceData): string => {
+  if (d.benchmark_type !== 'agentic_traces') return '';
+
+  const parts: string[] = [];
+  if (d.offload_mode) {
+    parts.push(tooltipLine('Offload Mode', d.offload_mode.toUpperCase()));
+  }
+
+  const gpuHit = formatPct(d.server_gpu_cache_hit_rate);
+  const cpuHit = formatPct(d.server_cpu_cache_hit_rate);
+  const theoHit = formatPct(d.theoretical_cache_hit_rate);
+  if (gpuHit) parts.push(tooltipLine('GPU Cache Hit Rate', gpuHit));
+  if (cpuHit) parts.push(tooltipLine('CPU Cache Hit Rate', cpuHit));
+  if (theoHit) parts.push(tooltipLine('Theoretical Cache Hit Rate', theoHit));
+
+  if (d.num_requests_total !== undefined && d.num_requests_successful !== undefined) {
+    const successPct =
+      d.num_requests_total > 0
+        ? ` (${((d.num_requests_successful / d.num_requests_total) * 100).toFixed(0)}%)`
+        : '';
+    parts.push(
+      tooltipLine(
+        'Requests',
+        `${d.num_requests_successful} / ${d.num_requests_total}${successPct}`,
+      ),
+    );
+  }
+
+  if (d.total_prompt_tokens !== undefined) {
+    parts.push(tooltipLine('Prompt Tokens', formatNumber(d.total_prompt_tokens)));
+  }
+  if (d.total_generation_tokens !== undefined) {
+    parts.push(tooltipLine('Generated Tokens', formatNumber(d.total_generation_tokens)));
+  }
+
+  // Histograms + time-series live on the dedicated detail page now; the
+  // "View charts" button (rendered by the wrapper when pinned + has trace
+  // data) takes the user there.
+
+  return parts.join('');
+};
+
+/** "View charts" button — only visible when the tooltip is pinned and the
+ *  point has stored trace data. Wired up by the ScatterGraph click handler. */
+const viewChartsButtonHTML = (isPinned: boolean, hasTraceData: boolean): string => {
+  if (!isPinned || !hasTraceData) return '';
+  return `<button data-action="view-charts" style="
+    margin-top: 8px; width: 100%; padding: 4px 8px; font-size: 11px; font-weight: 500;
+    border: 1px solid var(--border); border-radius: 6px; cursor: pointer;
+    background: var(--accent); color: var(--accent-foreground);
+  ">View charts &rarr;</button>`;
+};
+
 const shortenSha = (image: string) => image.replaceAll(/(sha256:[a-f0-9]{7})[a-f0-9]+/giu, '$1…');
 
 const imageTooltipLine = (image: string) =>
@@ -138,7 +214,16 @@ const generateParallelismHTML = (d: InferenceData): string => {
  * @returns HTML string for the tooltip content
  */
 export const generateTooltipContent = (config: TooltipConfig): string => {
-  const { data: d, isPinned, xLabel, yLabel, selectedYAxisMetric, hardwareConfig, runUrl } = config;
+  const {
+    data: d,
+    isPinned,
+    xLabel,
+    yLabel,
+    selectedYAxisMetric,
+    hardwareConfig,
+    runUrl,
+    hasTrace,
+  } = config;
 
   return `
     <div style="background: var(--popover); border: 1px solid var(--border); border-radius: 8px; padding: 12px; box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1); user-select: ${isPinned ? 'text' : 'none'};">
@@ -156,16 +241,16 @@ export const generateTooltipContent = (config: TooltipConfig): string => {
           : ''
       }
       <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
-        <strong>${xLabel}:</strong> ${formatNumber(d.x)}
+        <strong>${xLabel}:</strong> ${fmt(d.x)}
       </div>
       <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
-        <strong>${yLabel}:</strong> ${formatNumber(d.y)}
+        <strong>${yLabel}:</strong> ${fmt(d.y)}
       </div>
       ${
         selectedYAxisMetric === 'y_tpPerGpu' && d['inputTputPerGpu']
           ? `
           <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
-            <strong>Input Token Throughput per GPU:</strong> ${formatNumber(d['inputTputPerGpu'].y)}
+            <strong>Input Token Throughput per GPU:</strong> ${fmt(d['inputTputPerGpu'].y)}
           </div>`
           : ''
       }
@@ -173,7 +258,7 @@ export const generateTooltipContent = (config: TooltipConfig): string => {
         selectedYAxisMetric === 'y_tpPerGpu' && d['outputTputPerGpu']
           ? `
           <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
-            <strong>Output Token Throughput per GPU:</strong> ${formatNumber(d['outputTputPerGpu'].y)}
+            <strong>Output Token Throughput per GPU:</strong> ${fmt(d['outputTputPerGpu'].y)}
           </div>`
           : ''
       }
@@ -182,10 +267,12 @@ export const generateTooltipContent = (config: TooltipConfig): string => {
       <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
         <strong>Concurrency:</strong> ${d.conc}
       </div>
-      <div style="color: var(--muted-foreground); font-size: 11px;">
+      <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
         <strong>Precision:</strong> ${d.precision.toUpperCase()}
       </div>
+      ${generateAgenticHTML(d)}
       ${runLinkHTML(runUrl)}
+      ${viewChartsButtonHTML(isPinned, Boolean(hasTrace))}
       ${
         isPinned
           ? `<button data-action="track-over-time" style="
@@ -228,19 +315,20 @@ export const generateOverlayTooltipContent = (config: OverlayTooltipConfig): str
         <strong>Date:</strong> ${d.actualDate ?? d.date}
       </div>
       <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
-        <strong>${xLabel}:</strong> ${formatNumber(d.x)}
+        <strong>${xLabel}:</strong> ${fmt(d.x)}
       </div>
       <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
-        <strong>${yLabel}:</strong> ${formatNumber(d.y)}
+        <strong>${yLabel}:</strong> ${fmt(d.y)}
       </div>
       ${tooltipLine('Total GPUs', d.tp)}
       ${generateParallelismHTML(d)}
       <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
         <strong>Concurrency:</strong> ${d.conc}
       </div>
-      <div style="color: var(--muted-foreground); font-size: 11px;">
+      <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
         <strong>Precision:</strong> ${d.precision.toUpperCase()}
       </div>
+      ${generateAgenticHTML(d)}
     </div>
   `;
 };
@@ -271,16 +359,16 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string =>
           : ''
       }
       <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
-        <strong>${xLabel}:</strong> ${formatNumber(d.x)}
+        <strong>${xLabel}:</strong> ${fmt(d.x)}
       </div>
       <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
-        <strong>${yLabel}:</strong> ${formatNumber(d.y)}
+        <strong>${yLabel}:</strong> ${fmt(d.y)}
       </div>
       ${
         selectedYAxisMetric === 'y_tpPerGpu' && d['inputTputPerGpu']
           ? `
           <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
-            <strong>Input Token Throughput per GPU:</strong> ${formatNumber(d['inputTputPerGpu'].y)}
+            <strong>Input Token Throughput per GPU:</strong> ${fmt(d['inputTputPerGpu'].y)}
           </div>`
           : ''
       }
@@ -288,7 +376,7 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string =>
         selectedYAxisMetric === 'y_tpPerGpu' && d['outputTputPerGpu']
           ? `
           <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
-            <strong>Output Token Throughput per GPU:</strong> ${formatNumber(d['outputTputPerGpu'].y)}
+            <strong>Output Token Throughput per GPU:</strong> ${fmt(d['outputTputPerGpu'].y)}
           </div>`
           : ''
       }
@@ -297,9 +385,10 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string =>
       <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
         <strong>Concurrency:</strong> ${d.conc}
       </div>
-      <div style="color: var(--muted-foreground); font-size: 11px;">
+      <div style="color: var(--muted-foreground); font-size: 11px; margin-bottom: 4px;">
         <strong>Precision:</strong> ${d.precision.toUpperCase()}
       </div>
+      ${generateAgenticHTML(d)}
       ${runLinkHTML(runUrl)}
     </div>
   `;
diff --git a/packages/app/src/components/ui/chart-legend.tsx b/packages/app/src/components/ui/chart-legend.tsx
index 81d5f261..a20c9959 100644
--- a/packages/app/src/components/ui/chart-legend.tsx
+++ b/packages/app/src/components/ui/chart-legend.tsx
@@ -6,6 +6,7 @@ import {
   ArrowRightToLine,
   Circle,
   Diamond,
+  Info,
   Square,
   Triangle,
   X,
@@ -36,6 +37,8 @@ export interface LegendSwitchConfig {
   label: string;
   checked: boolean;
   onCheckedChange: (checked: boolean) => void;
+  /** Optional explainer rendered as an info-icon tooltip next to the label. */
+  infoTooltip?: React.ReactNode;
 }
 
 export interface LegendActionConfig {
@@ -273,6 +276,29 @@ export default function ChartLegend({
             >
               {sw.label}
             </Label>
+            {sw.infoTooltip && (
+              <TooltipProvider delayDuration={100}>
+                <TooltipRoot>
+                  <TooltipTrigger asChild>
+                    <button
+                      type="button"
+                      data-testid={`${sw.id}-info`}
+                      aria-label={`More info about ${sw.label}`}
+                      className="text-muted-foreground hover:text-foreground cursor-help -m-1.5 p-1.5 inline-flex items-center"
+                    >
+                      <Info size={14} />
+                    </button>
+                  </TooltipTrigger>
+                  <TooltipContent
+                    side="top"
+                    sideOffset={6}
+                    className="max-w-[260px] text-xs leading-snug"
+                  >
+                    {sw.infoTooltip}
+                  </TooltipContent>
+                </TooltipRoot>
+              </TooltipProvider>
+            )}
           </div>
         ))}
       </div>
diff --git a/packages/app/src/components/ui/chart-selectors.tsx b/packages/app/src/components/ui/chart-selectors.tsx
index a9e087b2..49ea3f1a 100644
--- a/packages/app/src/components/ui/chart-selectors.tsx
+++ b/packages/app/src/components/ui/chart-selectors.tsx
@@ -5,19 +5,39 @@ import { Info } from 'lucide-react';
 import { LabelWithTooltip } from '@/components/ui/label-with-tooltip';
 import { track } from '@/lib/analytics';
 import { MultiSelect } from '@/components/ui/multi-select';
+import {
+  Select,
+  SelectContent,
+  SelectGroup,
+  SelectItem,
+  SelectLabel,
+  SelectTrigger,
+  SelectValue,
+} from '@/components/ui/select';
 import { TooltipContent, TooltipRoot, TooltipTrigger } from '@/components/ui/tooltip';
 import {
   type Model,
   type Precision,
   type Sequence,
+  type Percentile,
+  PERCENTILE_OPTIONS,
   getModelCategory,
   getModelLabel,
+  getPercentileLabel,
   getPrecisionLabel,
   getSequenceCategory,
   getSequenceLabel,
   groupByCategory,
+  sequenceKind,
 } from '@/lib/data-mappings';
 
+/**
+ * "Deprecated" sub-header used by selectors. Rendered as a span (not
+ * SelectLabel) because some callsites use `MultiSelect`, which wraps
+ * headers in its own div and isn't a SelectGroup. The span carries no
+ * styling of its own — the parent context supplies the muted/small
+ * treatment. ScenarioSelector renders this inside a SelectLabel directly.
+ */
 function DeprecatedSectionTitle({ reason }: { reason: string }) {
   return (
     <span className="flex items-center gap-1">
@@ -200,6 +220,140 @@ export function SequenceSelector({
   );
 }
 
+interface ScenarioSelectorProps {
+  id?: string;
+  value: string;
+  onChange: (value: Sequence) => void;
+  open?: boolean;
+  onOpenChange?: (open: boolean) => void;
+  availableSequences: string[];
+  'data-testid'?: string;
+}
+
+/**
+ * Scenario selector — fixed-seq-len rows grouped under "Fixed Sequence Length",
+ * agentic-trace rows rendered flat below. Label is "Scenario" (the ISL/OSL
+ * framing only applies to the fixed-seq subset).
+ */
+export function ScenarioSelector({
+  id = 'scenario-select',
+  value,
+  onChange,
+  open,
+  onOpenChange,
+  availableSequences,
+  'data-testid': testId,
+}: ScenarioSelectorProps) {
+  const fixedSeq = availableSequences.filter((s) => sequenceKind(s as Sequence) === 'fixed-seq');
+  const agentic = availableSequences.filter((s) => sequenceKind(s as Sequence) === 'agentic');
+  const fixedGroups = groupByCategory(fixedSeq, (s) => getSequenceCategory(s as Sequence));
+
+  return (
+    <div className="flex flex-col space-y-1.5 lg:col-span-1">
+      <LabelWithTooltip
+        htmlFor={id}
+        label="Scenario"
+        tooltip="Benchmark scenario. Fixed Sequence Length runs use a defined input/output token count (ISL/OSL). Agentic Traces replay real agentic workloads with variable inputs/outputs."
+      />
+      <Select
+        value={value}
+        onValueChange={(v) => {
+          track('selector_scenario_changed', { scenario: v });
+          onChange(v as Sequence);
+        }}
+        open={open}
+        onOpenChange={onOpenChange}
+      >
+        <SelectTrigger id={id} data-testid={testId} className="w-full">
+          <SelectValue />
+        </SelectTrigger>
+        <SelectContent>
+          {/* Agentic first — preferred default scenario when available. */}
+          {agentic.length > 0 && (
+            <SelectGroup>
+              <SelectLabel>Agentic</SelectLabel>
+              {agentic.map((seq) => (
+                <SelectItem key={seq} value={seq}>
+                  {getSequenceLabel(seq as Sequence)}
+                </SelectItem>
+              ))}
+            </SelectGroup>
+          )}
+          {fixedSeq.length > 0 && (
+            <SelectGroup>
+              <SelectLabel>Fixed Sequence Length</SelectLabel>
+              {fixedGroups.default.map((seq) => (
+                <SelectItem key={seq} value={seq}>
+                  {getSequenceLabel(seq as Sequence)}
+                </SelectItem>
+              ))}
+              {fixedGroups.deprecated.length > 0 && (
+                <>
+                  <SelectLabel>
+                    <DeprecatedSectionTitle reason="CI capacity was reallocated to agentic coding and multi-turn chat scenarios." />
+                  </SelectLabel>
+                  {fixedGroups.deprecated.map((seq) => (
+                    <SelectItem key={seq} value={seq}>
+                      {getSequenceLabel(seq as Sequence)}
+                    </SelectItem>
+                  ))}
+                </>
+              )}
+            </SelectGroup>
+          )}
+        </SelectContent>
+      </Select>
+    </div>
+  );
+}
+
+interface PercentileSelectorProps {
+  id?: string;
+  value: string;
+  onChange: (value: Percentile) => void;
+  'data-testid'?: string;
+}
+
+/**
+ * Latency percentile selector for agentic-trace charts. The selected value
+ * rewrites the chart x-axis metric from `median_*` to `{percentile}_*`, so
+ * picking p99 plots p99 e2e latency / interactivity instead of the median.
+ */
+export function PercentileSelector({
+  id = 'percentile-select',
+  value,
+  onChange,
+  'data-testid': testId,
+}: PercentileSelectorProps) {
+  return (
+    <div className="flex flex-col space-y-1.5 lg:col-span-1">
+      <LabelWithTooltip
+        htmlFor={id}
+        label="Latency Percentile"
+        tooltip="Percentile of the latency distribution used for the chart x-axis on agentic runs."
+      />
+      <Select
+        value={value}
+        onValueChange={(v) => {
+          track('selector_percentile_changed', { percentile: v });
+          onChange(v as Percentile);
+        }}
+      >
+        <SelectTrigger id={id} data-testid={testId} className="w-full">
+          <SelectValue />
+        </SelectTrigger>
+        <SelectContent>
+          {PERCENTILE_OPTIONS.map((p) => (
+            <SelectItem key={p} value={p}>
+              {getPercentileLabel(p)}
+            </SelectItem>
+          ))}
+        </SelectContent>
+      </Select>
+    </div>
+  );
+}
+
 interface PrecisionSelectorProps {
   id?: string;
   value: string[];
diff --git a/packages/app/src/components/ui/d3-chart-wrapper.tsx b/packages/app/src/components/ui/d3-chart-wrapper.tsx
index 0392ac10..44013b1b 100644
--- a/packages/app/src/components/ui/d3-chart-wrapper.tsx
+++ b/packages/app/src/components/ui/d3-chart-wrapper.tsx
@@ -1,6 +1,41 @@
 'use client';
 
-import React from 'react';
+import React, { useEffect, useState } from 'react';
+import { createPortal } from 'react-dom';
+
+/**
+ * Renders the d3 tooltip element via React Portal to document.body so it
+ * escapes any parent stacking context (e.g. the chart Card's backdrop-filter
+ * creates one, trapping z-index inside it). Position is set as viewport
+ * coordinates by the d3 layer.
+ */
+function PortalTooltip({
+  tooltipRef,
+  pinned,
+}: {
+  tooltipRef: React.RefObject<HTMLDivElement | null>;
+  pinned: boolean;
+}) {
+  const [mounted, setMounted] = useState(false);
+  useEffect(() => setMounted(true), []);
+  const node = (
+    <div
+      ref={tooltipRef}
+      data-chart-tooltip
+      style={{
+        position: 'fixed',
+        left: 0,
+        top: 0,
+        opacity: pinned ? 1 : 0,
+        pointerEvents: pinned ? 'auto' : 'none',
+        display: pinned ? 'block' : 'none',
+        zIndex: 9999,
+      }}
+    />
+  );
+  if (!mounted || typeof document === 'undefined') return node;
+  return createPortal(node, document.body);
+}
 
 export interface D3ChartWrapperProps {
   chartId: string;
@@ -72,17 +107,11 @@ export function D3ChartWrapper({
                 }
               }}
             />
-            <div
-              ref={tooltipRef}
-              data-chart-tooltip
-              style={{
-                position: 'absolute',
-                opacity: pinnedPoint ? 1 : 0,
-                pointerEvents: pinnedPoint ? 'auto' : 'none',
-                display: pinnedPoint ? 'block' : 'none',
-                zIndex: 50,
-              }}
-            />
+            {/* Tooltip is portalled to <body> with position:fixed so it can
+                rise above sibling chart cards' stacking contexts. The d3 layer
+                writes viewport-coords into style.left/top — see
+                computeTooltipPosition. */}
+            <PortalTooltip tooltipRef={tooltipRef} pinned={Boolean(pinnedPoint)} />
             {noDataOverlay}
           </div>
           <p className="no-export text-xs text-muted-foreground text-center mt-2">{instructions}</p>
diff --git a/packages/app/src/components/unofficial-run-provider.test.ts b/packages/app/src/components/unofficial-run-provider.test.ts
index 1863060d..3c24d32b 100644
--- a/packages/app/src/components/unofficial-run-provider.test.ts
+++ b/packages/app/src/components/unofficial-run-provider.test.ts
@@ -12,6 +12,7 @@ import { buildChartData, parseAvailableModelsAndSequences } from './unofficial-r
 /** Minimal BenchmarkRow stub — only fields used by buildChartData key logic. */
 function stubRow(overrides: Partial<BenchmarkRow> = {}): BenchmarkRow {
   return {
+    id: 1,
     hardware: 'h200',
     framework: 'sglang',
     model: 'dsr1',
@@ -29,6 +30,8 @@ function stubRow(overrides: Partial<BenchmarkRow> = {}): BenchmarkRow {
     decode_num_workers: 0,
     num_prefill_gpu: 8,
     num_decode_gpu: 8,
+    benchmark_type: 'single_turn',
+    offload_mode: 'off',
     isl: 1024,
     osl: 1024,
     conc: 128,
diff --git a/packages/app/src/components/unofficial-run-provider.tsx b/packages/app/src/components/unofficial-run-provider.tsx
index 6fd3aba1..dd2b0dbf 100644
--- a/packages/app/src/components/unofficial-run-provider.tsx
+++ b/packages/app/src/components/unofficial-run-provider.tsx
@@ -12,7 +12,7 @@ import {
 
 import type { ChartDefinition, HardwareConfig, InferenceData } from '@/components/inference/types';
 import { UnofficialBanner } from '@/components/ui/unofficial-banner';
-import { DB_MODEL_TO_DISPLAY, islOslToSequence } from '@semianalysisai/inferencex-constants';
+import { DB_MODEL_TO_DISPLAY, rowToSequence } from '@semianalysisai/inferencex-constants';
 import { computeToggle } from '@/hooks/useTogglableSet';
 import type { BenchmarkRow, EvalRow } from '@/lib/api';
 import { normalizeEvalHardwareKey } from '@/lib/chart-utils';
@@ -110,7 +110,7 @@ export function buildChartData(benchmarks: BenchmarkRow[]): UnofficialChartData
   const groups = new Map<string, BenchmarkRow[]>();
   for (const row of benchmarks) {
     const displayModel = DB_MODEL_TO_DISPLAY[row.model] ?? row.model;
-    const sequence = islOslToSequence(row.isl, row.osl);
+    const sequence = rowToSequence(row);
     if (!sequence) continue;
     const key = `${displayModel}_${sequence}`;
     if (!groups.has(key)) groups.set(key, []);
diff --git a/packages/app/src/hooks/api/use-agentic-aggregates.ts b/packages/app/src/hooks/api/use-agentic-aggregates.ts
new file mode 100644
index 00000000..4ca25ee2
--- /dev/null
+++ b/packages/app/src/hooks/api/use-agentic-aggregates.ts
@@ -0,0 +1,45 @@
+import { useQuery } from '@tanstack/react-query';
+
+export interface MetricPercentiles {
+  mean: number;
+  p50: number;
+  p75: number;
+  p90: number;
+  p99: number;
+  n: number;
+}
+
+export interface AgenticAggregate {
+  id: number;
+  isl: MetricPercentiles | null;
+  osl: MetricPercentiles | null;
+  kvCacheUtil: MetricPercentiles | null;
+  prefixCacheHitRate: MetricPercentiles | null;
+}
+
+export type AgenticAggregateMap = Record<number, AgenticAggregate>;
+
+async function fetchAgenticAggregates(
+  ids: number[],
+  signal?: AbortSignal,
+): Promise<AgenticAggregateMap> {
+  if (ids.length === 0) return {};
+  const res = await fetch(`/api/v1/agentic-aggregates?ids=${ids.join(',')}`, { signal });
+  if (!res.ok) throw new Error(`agentic-aggregates ${res.status}`);
+  return (await res.json()) as AgenticAggregateMap;
+}
+
+/**
+ * Fetch per-id aggregate stats (mean/p50/p75/p90/p99) for ISL, OSL, KV
+ * cache utilization, and prefix cache hit rate. Used by the "Aggregates
+ * across configs" view on the agentic detail page.
+ */
+export function useAgenticAggregates(ids: number[], enabled = true) {
+  const sortedKey = [...new Set(ids)].toSorted((a, b) => a - b);
+  return useQuery({
+    queryKey: ['agentic-aggregates', sortedKey.join(',')] as const,
+    queryFn: ({ signal }: { signal: AbortSignal }) => fetchAgenticAggregates(sortedKey, signal),
+    enabled: enabled && sortedKey.length > 0,
+    staleTime: 5 * 60 * 1000,
+  });
+}
diff --git a/packages/app/src/hooks/api/use-benchmark-siblings.ts b/packages/app/src/hooks/api/use-benchmark-siblings.ts
new file mode 100644
index 00000000..1ea90c0d
--- /dev/null
+++ b/packages/app/src/hooks/api/use-benchmark-siblings.ts
@@ -0,0 +1,46 @@
+import { useQuery } from '@tanstack/react-query';
+
+export interface BenchmarkSibling {
+  id: number;
+  conc: number;
+  offload_mode: string | null;
+  decode_tp: number;
+  decode_ep: number;
+  prefill_tp: number;
+  prefill_ep: number;
+  num_prefill_gpu: number;
+  num_decode_gpu: number;
+  disagg: boolean;
+  is_current: boolean;
+  has_trace: boolean;
+}
+
+export interface BenchmarkSku {
+  hardware: string;
+  framework: string;
+  model: string;
+  precision: string;
+  spec_method: string;
+  benchmark_type: string;
+  github_run_id: number;
+  date: string;
+}
+
+export interface BenchmarkSiblings {
+  sku: BenchmarkSku;
+  siblings: BenchmarkSibling[];
+}
+
+export function useBenchmarkSiblings(id: number | null) {
+  return useQuery({
+    queryKey: ['benchmark-siblings', id] as const,
+    queryFn: async ({ signal }) => {
+      const res = await fetch(`/api/v1/benchmark-siblings?id=${id}`, { signal });
+      if (res.status === 404) return null;
+      if (!res.ok) throw new Error(`benchmark-siblings ${res.status}`);
+      return (await res.json()) as BenchmarkSiblings;
+    },
+    enabled: id !== null && id > 0,
+    staleTime: 5 * 60 * 1000,
+  });
+}
diff --git a/packages/app/src/hooks/api/use-benchmarks.test.ts b/packages/app/src/hooks/api/use-benchmarks.test.ts
index 7329896d..c4f49130 100644
--- a/packages/app/src/hooks/api/use-benchmarks.test.ts
+++ b/packages/app/src/hooks/api/use-benchmarks.test.ts
@@ -5,12 +5,29 @@ import { benchmarkQueryOptions } from '@/hooks/api/use-benchmarks';
 describe('benchmarkQueryOptions', () => {
   it('builds query key from model and date', () => {
     const opts = benchmarkQueryOptions('DeepSeek-R1-0528', '2026-03-01');
-    expect(opts.queryKey).toEqual(['benchmarks', 'DeepSeek-R1-0528', '2026-03-01', 'latest']);
+    expect(opts.queryKey).toEqual(['benchmarks', 'DeepSeek-R1-0528', '2026-03-01', 'latest', '']);
   });
 
   it('builds exact query key when exact=true', () => {
     const opts = benchmarkQueryOptions('DeepSeek-R1-0528', '2026-03-01', true, true);
-    expect(opts.queryKey).toEqual(['benchmarks', 'DeepSeek-R1-0528', '2026-03-01', 'exact']);
+    expect(opts.queryKey).toEqual(['benchmarks', 'DeepSeek-R1-0528', '2026-03-01', 'exact', '']);
+  });
+
+  it('includes runId in query key when provided', () => {
+    const opts = benchmarkQueryOptions(
+      'DeepSeek-R1-0528',
+      '2026-03-01',
+      true,
+      false,
+      '26194160120',
+    );
+    expect(opts.queryKey).toEqual([
+      'benchmarks',
+      'DeepSeek-R1-0528',
+      '2026-03-01',
+      'latest',
+      '26194160120',
+    ]);
   });
 
   it('produces distinct keys for different models', () => {
diff --git a/packages/app/src/hooks/api/use-benchmarks.ts b/packages/app/src/hooks/api/use-benchmarks.ts
index 6da1568e..8fd1f4e9 100644
--- a/packages/app/src/hooks/api/use-benchmarks.ts
+++ b/packages/app/src/hooks/api/use-benchmarks.ts
@@ -8,14 +8,16 @@ export function benchmarkQueryOptions(
   date: string,
   enabled = true,
   exact?: boolean,
+  runId?: string,
 ) {
   return {
-    queryKey: ['benchmarks', model, date, exact ? 'exact' : 'latest'] as const,
-    queryFn: ({ signal }: { signal: AbortSignal }) => fetchBenchmarks(model, date, exact, signal),
+    queryKey: ['benchmarks', model, date, exact ? 'exact' : 'latest', runId ?? ''] as const,
+    queryFn: ({ signal }: { signal: AbortSignal }) =>
+      fetchBenchmarks(model, date, exact, signal, runId),
     enabled: enabled && Boolean(model),
   };
 }
 
-export function useBenchmarks(model: string, date?: string, enabled = true) {
-  return useQuery(benchmarkQueryOptions(model, date ?? 'latest', enabled));
+export function useBenchmarks(model: string, date?: string, enabled = true, runId?: string) {
+  return useQuery(benchmarkQueryOptions(model, date ?? 'latest', enabled, undefined, runId));
 }
diff --git a/packages/app/src/hooks/api/use-derived-agentic-metrics.ts b/packages/app/src/hooks/api/use-derived-agentic-metrics.ts
new file mode 100644
index 00000000..6bc7ae5e
--- /dev/null
+++ b/packages/app/src/hooks/api/use-derived-agentic-metrics.ts
@@ -0,0 +1,41 @@
+import { useQuery } from '@tanstack/react-query';
+
+export interface DerivedAgenticMetric {
+  id: number;
+  /** Mean across sessions of e2e time (Σ per-turn request_latency) rescaled
+   *  by mean_load / session_load. Null when the JSONL had no usable records. */
+  normalized_session_time_s: number | null;
+  /** P90 of per-turn ISL/TTFT across every turn in every session.
+   *  Null when no prefill rates could be computed. */
+  p90_prefill_tps_per_user: number | null;
+}
+
+export type DerivedAgenticMetricMap = Record<number, DerivedAgenticMetric>;
+
+async function fetchDerivedAgenticMetrics(
+  ids: number[],
+  signal?: AbortSignal,
+): Promise<DerivedAgenticMetricMap> {
+  if (ids.length === 0) return {};
+  const res = await fetch(`/api/v1/derived-agentic-metrics?ids=${ids.join(',')}`, { signal });
+  if (!res.ok) throw new Error(`derived-agentic-metrics ${res.status}`);
+  return (await res.json()) as DerivedAgenticMetricMap;
+}
+
+/**
+ * Fetch per-id derived agentic metrics (session time + p90 prefill TPS/user)
+ * computed live from the stored aiperf profile_export.jsonl. Used to drive
+ * the "Session Time" and "Prefill TPS/user" chart variants.
+ *
+ * Ids without a trace_replay blob (older or non-aiperf agentic runs) are
+ * silently omitted from the response.
+ */
+export function useDerivedAgenticMetrics(ids: number[], enabled = true) {
+  const sortedKey = [...new Set(ids)].toSorted((a, b) => a - b);
+  return useQuery({
+    queryKey: ['derived-agentic-metrics', sortedKey.join(',')] as const,
+    queryFn: ({ signal }: { signal: AbortSignal }) => fetchDerivedAgenticMetrics(sortedKey, signal),
+    enabled: enabled && sortedKey.length > 0,
+    staleTime: 5 * 60 * 1000,
+  });
+}
diff --git a/packages/app/src/hooks/api/use-request-timeline.ts b/packages/app/src/hooks/api/use-request-timeline.ts
new file mode 100644
index 00000000..d3ceaab8
--- /dev/null
+++ b/packages/app/src/hooks/api/use-request-timeline.ts
@@ -0,0 +1,59 @@
+import { useQuery } from '@tanstack/react-query';
+
+export interface RequestRecord {
+  /** Conversation id (groups turns of one agent session). */
+  cid: string;
+  /** Zero-based turn index within the conversation. */
+  ti: number;
+  /** Worker id (concurrency slot that handled this request). */
+  wid: string;
+  /** Sub-agent depth (0 = top-level). */
+  ad: number;
+  /** `warmup` or `profiling`. */
+  phase: string;
+  /** ns offset from timeline.startNs. Load gen decided to dispatch. */
+  credit: number;
+  /** ns offset from timeline.startNs. HTTP send started. */
+  start: number;
+  /** ns offset from timeline.startNs. First server acknowledgement (or null). */
+  ack: number | null;
+  /** ns offset from timeline.startNs. Last byte received. */
+  end: number;
+  ttftMs: number | null;
+  isl: number | null;
+  osl: number | null;
+  cancelled: boolean;
+}
+
+export interface RequestTimeline {
+  version: number;
+  startNs: number;
+  endNs: number;
+  durationS: number;
+  requests: RequestRecord[];
+}
+
+async function fetchRequestTimeline(
+  id: number,
+  signal?: AbortSignal,
+): Promise<RequestTimeline | null> {
+  const res = await fetch(`/api/v1/request-timeline?id=${id}`, { signal });
+  if (res.status === 404) return null;
+  if (!res.ok) throw new Error(`request-timeline ${res.status}`);
+  return (await res.json()) as RequestTimeline;
+}
+
+/**
+ * Lazy-fetch the per-request Gantt timeline for one agentic point.
+ * Enabled only when the caller opts in (e.g. the timeline view becomes
+ * active), so the payload (~30 KB per point) isn't paid for every page load.
+ */
+export function useRequestTimeline(id: number | null, enabled = false) {
+  return useQuery({
+    queryKey: ['request-timeline', id] as const,
+    queryFn: ({ signal }: { signal: AbortSignal }) =>
+      id ? fetchRequestTimeline(id, signal) : Promise.resolve(null),
+    enabled: enabled && Boolean(id),
+    staleTime: 5 * 60 * 1000,
+  });
+}
diff --git a/packages/app/src/hooks/api/use-trace-availability.ts b/packages/app/src/hooks/api/use-trace-availability.ts
new file mode 100644
index 00000000..02176d59
--- /dev/null
+++ b/packages/app/src/hooks/api/use-trace-availability.ts
@@ -0,0 +1,29 @@
+import { useQuery } from '@tanstack/react-query';
+
+export type TraceAvailabilityMap = Record<number, true>;
+
+async function fetchTraceAvailability(
+  ids: number[],
+  signal?: AbortSignal,
+): Promise<TraceAvailabilityMap> {
+  if (ids.length === 0) return {};
+  const res = await fetch(`/api/v1/trace-availability?ids=${ids.join(',')}`, { signal });
+  if (!res.ok) throw new Error(`trace-availability ${res.status}`);
+  return (await res.json()) as TraceAvailabilityMap;
+}
+
+/**
+ * Bulk presence lookup: which of the given `benchmark_results.id`s have a
+ * stored trace_replay blob. Used by the scatter chart to decide whether to
+ * surface the "View charts" button — cheap boolean per id instead of
+ * shipping multi-MB profile blobs just for the check.
+ */
+export function useTraceAvailability(ids: number[], enabled = true) {
+  const sortedKey = [...new Set(ids)].toSorted((a, b) => a - b);
+  return useQuery({
+    queryKey: ['trace-availability', sortedKey.join(',')] as const,
+    queryFn: ({ signal }: { signal: AbortSignal }) => fetchTraceAvailability(sortedKey, signal),
+    enabled: enabled && sortedKey.length > 0,
+    staleTime: 5 * 60 * 1000,
+  });
+}
diff --git a/packages/app/src/hooks/api/use-trace-histograms.ts b/packages/app/src/hooks/api/use-trace-histograms.ts
new file mode 100644
index 00000000..db4220d2
--- /dev/null
+++ b/packages/app/src/hooks/api/use-trace-histograms.ts
@@ -0,0 +1,39 @@
+import { useQuery } from '@tanstack/react-query';
+
+export interface TraceHistogramPoint {
+  id: number;
+  /** Input sequence length (tokens) per completed request. */
+  isl: number[];
+  /** Output sequence length (tokens) per completed request. */
+  osl: number[];
+}
+
+export type TraceHistogramMap = Record<number, TraceHistogramPoint>;
+
+async function fetchTraceHistograms(
+  ids: number[],
+  signal?: AbortSignal,
+): Promise<TraceHistogramMap> {
+  if (ids.length === 0) return {};
+  const res = await fetch(`/api/v1/trace-histograms?ids=${ids.join(',')}`, { signal });
+  if (!res.ok) throw new Error(`trace-histograms ${res.status}`);
+  return (await res.json()) as TraceHistogramMap;
+}
+
+/**
+ * Fetch per-request ISL/OSL arrays for a set of benchmark_results.id values.
+ * Ids without a stored trace_replay blob are silently omitted from the response.
+ *
+ * Caller passes the agentic id set currently on screen; React Query handles
+ * dedup + stale-while-revalidate. Cache key is sorted-ids-comma-joined so
+ * any permutation of the same set hits the same cache entry.
+ */
+export function useTraceHistograms(ids: number[], enabled = true) {
+  const sortedKey = [...new Set(ids)].toSorted((a, b) => a - b);
+  return useQuery({
+    queryKey: ['trace-histograms', sortedKey.join(',')] as const,
+    queryFn: ({ signal }: { signal: AbortSignal }) => fetchTraceHistograms(sortedKey, signal),
+    enabled: enabled && sortedKey.length > 0,
+    staleTime: 5 * 60 * 1000,
+  });
+}
diff --git a/packages/app/src/hooks/api/use-trace-server-metrics.ts b/packages/app/src/hooks/api/use-trace-server-metrics.ts
new file mode 100644
index 00000000..11905aaa
--- /dev/null
+++ b/packages/app/src/hooks/api/use-trace-server-metrics.ts
@@ -0,0 +1,79 @@
+import { useQuery } from '@tanstack/react-query';
+
+export interface TimeSeriesPoint {
+  /** Seconds from benchmark start. */
+  t: number;
+  value: number;
+}
+export interface QueueDepthPoint {
+  t: number;
+  running: number;
+  waiting: number;
+  total: number;
+}
+export interface PointMeta {
+  id: number;
+  hardware: string;
+  framework: string;
+  model: string;
+  precision: string;
+  spec_method: string;
+  disagg: boolean;
+  conc: number;
+  offload_mode: string | null;
+  isl: number | null;
+  osl: number | null;
+  benchmark_type: string;
+  date: string;
+  run_url: string | null;
+  server_gpu_cache_hit_rate: number | null;
+  server_cpu_cache_hit_rate: number | null;
+}
+
+export interface TraceServerMetrics {
+  meta: PointMeta;
+  startNs: number;
+  endNs: number;
+  durationS: number;
+  timeslicesCount: number;
+  kvCacheUsage: TimeSeriesPoint[];
+  prefixCacheHitRate: TimeSeriesPoint[];
+  queueDepth: QueueDepthPoint[];
+  promptTokensBySource: Record<string, TimeSeriesPoint[]>;
+  prefillTps: TimeSeriesPoint[];
+  decodeTps: TimeSeriesPoint[];
+  /** Tokens served from prefix cache per scrape (vllm:prefix_cache_hits rate). */
+  prefixCacheHitsTps: TimeSeriesPoint[];
+  /** Host (CPU offload) KV cache utilization, 0..1. SGLang hicache only. */
+  hostKvCacheUsage: TimeSeriesPoint[];
+  /**
+   * Per-DP-rank KV cache utilization. Empty for single-engine deployments —
+   * the cluster-average `kvCacheUsage` line covers that case alone.
+   */
+  kvCacheUsageByEngine: { engineLabel: string; points: TimeSeriesPoint[] }[];
+}
+
+async function fetchTraceServerMetrics(
+  id: number,
+  signal?: AbortSignal,
+): Promise<TraceServerMetrics | null> {
+  const res = await fetch(`/api/v1/trace-server-metrics?id=${id}`, { signal });
+  if (res.status === 404) return null;
+  if (!res.ok) throw new Error(`trace-server-metrics ${res.status}`);
+  return (await res.json()) as TraceServerMetrics;
+}
+
+/**
+ * Lazy-fetch parsed server-metric time-series for one agentic point.
+ * Enabled only when the caller passes `enabled=true` (the detail panel opens),
+ * so we don't pay the parse cost on every hover.
+ */
+export function useTraceServerMetrics(id: number | null, enabled = false) {
+  return useQuery({
+    queryKey: ['trace-server-metrics', id] as const,
+    queryFn: ({ signal }: { signal: AbortSignal }) =>
+      id ? fetchTraceServerMetrics(id, signal) : Promise.resolve(null),
+    enabled: enabled && Boolean(id),
+    staleTime: 5 * 60 * 1000,
+  });
+}
diff --git a/packages/app/src/lib/api.ts b/packages/app/src/lib/api.ts
index 999cbfde..31cf906a 100644
--- a/packages/app/src/lib/api.ts
+++ b/packages/app/src/lib/api.ts
@@ -6,6 +6,8 @@
 import type { SubmissionsResponse } from './submissions-types';
 
 export interface BenchmarkRow {
+  /** Stable per-point id from benchmark_results; used to look up trace histograms. */
+  id: number;
   hardware: string;
   framework: string;
   model: string;
@@ -23,9 +25,13 @@ export interface BenchmarkRow {
   decode_num_workers: number;
   num_prefill_gpu: number;
   num_decode_gpu: number;
-  isl: number;
-  osl: number;
+  benchmark_type: string;
+  // Null for agentic_traces rows; numeric for single_turn fixed-seq rows.
+  isl: number | null;
+  osl: number | null;
   conc: number;
+  /** KV-cache offload mode: 'on' | 'off'. Defaults to 'off' for fixed-seq. */
+  offload_mode: string;
   image: string | null;
   metrics: Record<string, number>;
   date: string;
@@ -115,10 +121,13 @@ export function fetchBenchmarks(
   date?: string,
   exact?: boolean,
   signal?: AbortSignal,
+  /** Optional github_run_id to scope to a specific workflow run. */
+  runId?: string,
 ) {
   const params = new URLSearchParams({ model });
   if (date) params.set('date', date);
   if (exact) params.set('exact', 'true');
+  if (runId) params.set('runId', runId);
   return fetchJson<BenchmarkRow[]>(`/api/v1/benchmarks?${params}`, signal);
 }
 
@@ -141,13 +150,15 @@ export function fetchWorkflowInfo(date: string, signal?: AbortSignal) {
 
 export interface AvailabilityRow {
   model: string;
-  isl: number;
-  osl: number;
+  // Null for agentic_traces rows; numeric for single_turn fixed-seq rows.
+  isl: number | null;
+  osl: number | null;
   precision: string;
   hardware: string;
   framework: string;
   spec_method: string;
   disagg: boolean;
+  benchmark_type: string;
   date: string;
 }
 
diff --git a/packages/app/src/lib/benchmark-transform.test.ts b/packages/app/src/lib/benchmark-transform.test.ts
index be76438e..fcbca681 100644
--- a/packages/app/src/lib/benchmark-transform.test.ts
+++ b/packages/app/src/lib/benchmark-transform.test.ts
@@ -6,6 +6,7 @@ import { rowToAggDataEntry, transformBenchmarkRows } from './benchmark-transform
 
 function makeRow(overrides: Partial<BenchmarkRow> = {}): BenchmarkRow {
   return {
+    id: 1,
     hardware: 'h200',
     framework: 'trt',
     model: 'dsr1',
@@ -23,6 +24,8 @@ function makeRow(overrides: Partial<BenchmarkRow> = {}): BenchmarkRow {
     decode_num_workers: 0,
     num_prefill_gpu: 8,
     num_decode_gpu: 8,
+    benchmark_type: 'single_turn',
+    offload_mode: 'off',
     isl: 1024,
     osl: 1024,
     conc: 64,
diff --git a/packages/app/src/lib/benchmark-transform.ts b/packages/app/src/lib/benchmark-transform.ts
index 107f0b12..3594750c 100644
--- a/packages/app/src/lib/benchmark-transform.ts
+++ b/packages/app/src/lib/benchmark-transform.ts
@@ -15,10 +15,42 @@ import { createChartDataPoint, getHardwareKey } from '@/lib/chart-utils';
 import { getHardwareConfig } from '@/lib/constants';
 import type { BenchmarkRow } from '@/lib/api';
 
+/**
+ * Agentic trace-replay runs (`benchmark_type === 'agentic_traces'`) emit ttft/ttlt/itl
+ * but not the intvty/e2el/tpot keys the chart pipeline expects. Bridge them here:
+ *   e2el   ≡ ttlt   (time-to-last-token == end-to-end latency)
+ *   tpot   ≡ itl    (time-per-output-token == inter-token-latency for single-output)
+ *   intvty ≡ 1/itl  (tok/s from the user's perspective)
+ * Existing fields win if present; we only fill in the gaps.
+ */
+function agenticAliases(m: Record<string, number>): Record<string, number> {
+  const out: Record<string, number> = {};
+  for (const suffix of ['mean', 'median', 'p90', 'p99', 'p99.9']) {
+    const itl = m[`${suffix}_itl`];
+    const ttlt = m[`${suffix}_ttlt`];
+    if (m[`${suffix}_e2el`] === undefined && ttlt !== undefined) out[`${suffix}_e2el`] = ttlt;
+    if (m[`${suffix}_tpot`] === undefined && itl !== undefined) out[`${suffix}_tpot`] = itl;
+    if (m[`${suffix}_intvty`] === undefined && itl !== undefined && itl > 0) {
+      out[`${suffix}_intvty`] = 1 / itl;
+    }
+  }
+  return out;
+}
+
 /** Convert a DB benchmark row to an AggDataEntry. */
 export function rowToAggDataEntry(row: BenchmarkRow): AggDataEntry {
-  const m = row.metrics;
+  const isAgentic = row.benchmark_type === 'agentic_traces';
+  const m = isAgentic ? { ...row.metrics, ...agenticAliases(row.metrics) } : row.metrics;
+  // Prefer the dedicated column (added in migration 004); fall back to the
+  // legacy stash inside `metrics` for any rows ingested before that column
+  // existed.
+  const rawMetrics = row.metrics as Record<string, unknown>;
+  const offloadMode =
+    row.offload_mode ??
+    (typeof rawMetrics.offload_mode === 'string' ? rawMetrics.offload_mode : undefined);
   return {
+    // Coerce: Postgres bigint comes through the SQL client as a string.
+    id: typeof row.id === 'number' ? row.id : Number(row.id),
     hw: row.hardware,
     framework: row.framework,
     model: DB_MODEL_TO_DISPLAY[row.model] ?? row.model,
@@ -32,23 +64,43 @@ export function rowToAggDataEntry(row: BenchmarkRow): AggDataEntry {
     mean_ttft: m.mean_ttft ?? 0,
     median_ttft: m.median_ttft ?? 0,
     std_ttft: m.std_ttft ?? 0,
+    p75_ttft: m.p75_ttft ?? 0,
+    p90_ttft: m.p90_ttft ?? 0,
+    p95_ttft: m.p95_ttft ?? 0,
     p99_ttft: m.p99_ttft ?? 0,
+    'p99.9_ttft': m['p99.9_ttft'] ?? 0,
     mean_tpot: m.mean_tpot ?? 0,
     median_tpot: m.median_tpot ?? 0,
     std_tpot: m.std_tpot ?? 0,
+    p75_tpot: m.p75_tpot ?? 0,
+    p90_tpot: m.p90_tpot ?? 0,
+    p95_tpot: m.p95_tpot ?? 0,
     p99_tpot: m.p99_tpot ?? 0,
+    'p99.9_tpot': m['p99.9_tpot'] ?? 0,
     mean_intvty: m.mean_intvty ?? 0,
     median_intvty: m.median_intvty ?? 0,
     std_intvty: m.std_intvty ?? 0,
+    p75_intvty: m.p75_intvty ?? 0,
+    p90_intvty: m.p90_intvty ?? 0,
+    p95_intvty: m.p95_intvty ?? 0,
     p99_intvty: m.p99_intvty ?? 0,
+    'p99.9_intvty': m['p99.9_intvty'] ?? 0,
     mean_itl: m.mean_itl ?? 0,
     median_itl: m.median_itl ?? 0,
     std_itl: m.std_itl ?? 0,
+    p75_itl: m.p75_itl ?? 0,
+    p90_itl: m.p90_itl ?? 0,
+    p95_itl: m.p95_itl ?? 0,
     p99_itl: m.p99_itl ?? 0,
+    'p99.9_itl': m['p99.9_itl'] ?? 0,
     mean_e2el: m.mean_e2el ?? 0,
     median_e2el: m.median_e2el ?? 0,
     std_e2el: m.std_e2el ?? 0,
+    p75_e2el: m.p75_e2el ?? 0,
+    p90_e2el: m.p90_e2el ?? 0,
+    p95_e2el: m.p95_e2el ?? 0,
     p99_e2el: m.p99_e2el ?? 0,
+    'p99.9_e2el': m['p99.9_e2el'] ?? 0,
     disagg: row.disagg,
     num_prefill_gpu: row.num_prefill_gpu,
     num_decode_gpu: row.num_decode_gpu,
@@ -68,6 +120,17 @@ export function rowToAggDataEntry(row: BenchmarkRow): AggDataEntry {
     date: row.date,
     actualDate: (row as any).actualDate ?? row.date,
     run_url: row.run_url ?? undefined,
+    benchmark_type: row.benchmark_type,
+    isl: row.isl,
+    osl: row.osl,
+    offload_mode: offloadMode,
+    server_gpu_cache_hit_rate: m.server_gpu_cache_hit_rate,
+    server_cpu_cache_hit_rate: m.server_cpu_cache_hit_rate,
+    theoretical_cache_hit_rate: m.theoretical_cache_hit_rate,
+    num_requests_total: m.num_requests_total,
+    num_requests_successful: m.num_requests_successful,
+    total_prompt_tokens: m.total_prompt_tokens,
+    total_generation_tokens: m.total_generation_tokens,
   };
 }
 
@@ -77,13 +140,30 @@ interface PreparedEntry {
   date: string;
 }
 
+/**
+ * Rewrite a chart x-axis key to use a different latency percentile prefix
+ * (`median_` → `p99_` etc). Only touches keys that start with a known
+ * percentile prefix; leaves everything else alone.
+ */
+export function withPercentile(key: string, percentile: string): string {
+  return key.replace(/^(mean|median|p75|p90|p95|p99|p99\.9)_/u, `${percentile}_`);
+}
+
 /**
  * Transform raw BenchmarkRow[] into chart-ready InferenceData[][] and HardwareConfig.
  * Returns one InferenceData[] per chart definition (e2e, interactivity).
  *
  * Converts rows to AggDataEntry once, then reuses for each chart definition.
+ *
+ * @param percentile Optional latency percentile for the chart x-axis
+ *   (default 'median'). Swaps `median_intvty`/`median_e2el` in the chart
+ *   definition for the chosen percentile — only agentic rows carry the
+ *   full set (median/p90/p99/p99.9) so this mainly affects that scenario.
  */
-export function transformBenchmarkRows(rows: BenchmarkRow[]): {
+export function transformBenchmarkRows(
+  rows: BenchmarkRow[],
+  percentile = 'median',
+): {
   chartData: InferenceData[][];
   hardwareConfig: HardwareConfig;
 } {
@@ -109,13 +189,14 @@ export function transformBenchmarkRows(rows: BenchmarkRow[]): {
 
   // Phase 2: Build chart data per chart definition (reusing prepared entries)
   const chartData = (chartDefinitions as ChartDefinition[]).map((chartDef) => {
+    const xKey = withPercentile(chartDef.x, percentile);
     const groupedByHw: Record<string, InferenceData[]> = {};
 
     for (const { entry, hwKey, date } of prepared) {
       const dataPoint = createChartDataPoint(
         date,
         entry,
-        chartDef.x as keyof AggDataEntry,
+        xKey as keyof AggDataEntry,
         chartDef.y as keyof AggDataEntry,
         hwKey,
       );
diff --git a/packages/app/src/lib/compare-pair-defaults.test.ts b/packages/app/src/lib/compare-pair-defaults.test.ts
index f0f1ef5b..da81ca0e 100644
--- a/packages/app/src/lib/compare-pair-defaults.test.ts
+++ b/packages/app/src/lib/compare-pair-defaults.test.ts
@@ -6,6 +6,7 @@ import { pickPairDefaults } from './compare-pair-defaults';
 
 function makeRow(overrides: Partial<BenchmarkRow>): BenchmarkRow {
   return {
+    id: 1,
     hardware: 'h100',
     framework: 'sglang',
     model: 'dsr1',
@@ -30,6 +31,8 @@ function makeRow(overrides: Partial<BenchmarkRow>): BenchmarkRow {
     metrics: { tput_per_gpu: 100 },
     date: '2026-01-01',
     run_url: null,
+    benchmark_type: 'single_turn',
+    offload_mode: 'off',
     ...overrides,
   };
 }
diff --git a/packages/app/src/lib/compare-pair-defaults.ts b/packages/app/src/lib/compare-pair-defaults.ts
index be6450ad..f5a37e1f 100644
--- a/packages/app/src/lib/compare-pair-defaults.ts
+++ b/packages/app/src/lib/compare-pair-defaults.ts
@@ -14,6 +14,7 @@ export function pickPairDefaults(
   const seenB = new Map<string, Set<string>>();
   for (const row of rows) {
     if (row.hardware !== a && row.hardware !== b) continue;
+    if (row.isl === null || row.osl === null) continue;
     const seq = islOslToSequence(row.isl, row.osl);
     if (!seq) continue;
     const key = `${seq}|${row.precision}`;
diff --git a/packages/app/src/lib/d3-chart/layers/scatter-points.ts b/packages/app/src/lib/d3-chart/layers/scatter-points.ts
index a3d28315..421ac69b 100644
--- a/packages/app/src/lib/d3-chart/layers/scatter-points.ts
+++ b/packages/app/src/lib/d3-chart/layers/scatter-points.ts
@@ -61,17 +61,33 @@ export function renderScatterPoints<T extends { precision: string; x: number; y:
   // Visible shape is created (or swapped, if selectedPrecisions changed) in the
   // merged update pass below.
 
-  // Label (enter only)
+  // Label (enter only). Multi-line labels are passed as `\n`-separated strings;
+  // we anchor the entire stack via the FIRST tspan's `dy` so getBBox() doesn't
+  // pick up the text element's own (unused) y=0 origin. The first tspan is
+  // raised so the LAST line baseline lands ~8px above the point; subsequent
+  // tspans cascade down by 1.1em.
   if (!config.hideLabels && config.getLabelText && config.foreground) {
-    entered
-      .append('text')
-      .attr('class', 'point-label')
-      .attr('dy', -8)
-      .attr('text-anchor', 'middle')
-      .attr('fill', config.foreground)
-      .attr('font-size', '10px')
-      .attr('pointer-events', 'none')
-      .text(config.getLabelText);
+    const labelGetter = config.getLabelText;
+    entered.each(function (d) {
+      const lines = labelGetter(d).split('\n');
+      const text = d3
+        .select(this)
+        .append('text')
+        .attr('class', 'point-label')
+        .attr('text-anchor', 'middle')
+        .attr('fill', config.foreground!)
+        .attr('font-size', '10px')
+        .attr('font-weight', '700')
+        .attr('pointer-events', 'none');
+      const firstDy = -(0.8 + (lines.length - 1) * 1.1);
+      lines.forEach((line, i) => {
+        text
+          .append('tspan')
+          .attr('x', 0)
+          .attr('dy', i === 0 ? `${firstDy}em` : '1.1em')
+          .text(line);
+      });
+    });
   }
 
   // Exit: remove stale points
@@ -128,20 +144,32 @@ export function renderScatterPoints<T extends { precision: string; x: number; y:
     }
   });
 
-  // Update labels: use data join so labels are created/removed properly on toggle
+  // Update labels: use data join so labels are created/removed properly on toggle.
+  // Anchor the stack via the first tspan (NOT the text dy — that doesn't shift the
+  // bbox cleanly when there are tspan children).
   if (!config.hideLabels && config.getLabelText && config.foreground) {
+    const labelGetter = config.getLabelText;
     points.each(function (d) {
-      const g = d3.select(this);
-      g.selectAll<SVGTextElement, boolean>('.point-label')
+      const lines = labelGetter(d).split('\n');
+      const text = d3
+        .select(this)
+        .selectAll<SVGTextElement, boolean>('.point-label')
         .data([true])
         .join('text')
         .attr('class', 'point-label')
-        .attr('dy', -8)
         .attr('text-anchor', 'middle')
         .attr('fill', config.foreground!)
         .attr('font-size', '10px')
-        .attr('pointer-events', 'none')
-        .text(config.getLabelText!(d));
+        .attr('font-weight', '700')
+        .attr('pointer-events', 'none');
+      const firstDy = -(0.8 + (lines.length - 1) * 1.1);
+      text
+        .selectAll<SVGTSpanElement, string>('tspan')
+        .data(lines)
+        .join('tspan')
+        .attr('x', 0)
+        .attr('dy', (_l, i) => (i === 0 ? `${firstDy}em` : '1.1em'))
+        .text((l) => l);
     });
   } else {
     points.selectAll('.point-label').remove();
@@ -261,7 +289,21 @@ export function attachScatterTooltipHandlers<
     });
 }
 
-/** Compute tooltip left/top, flipping when it would overflow the chart container. */
+/**
+ * Compute tooltip left/top **in viewport coordinates** so the tooltip can be
+ * rendered via portal with `position: fixed`. Callers still pass cursor coords
+ * relative to `container` (matching `d3.pointer(event, container)`).
+ *
+ * Why viewport coords: the chart cards use `backdrop-filter`, which creates
+ * a stacking context. A tooltip painted inside the upper card's stacking
+ * context cannot rise above the lower card's stacking context regardless of
+ * its z-index. Portalling to document.body + `position: fixed` sidesteps the
+ * whole problem; we just need the coordinates in viewport space.
+ *
+ * Strategy: pick preferred side (right/below cursor), flip if it overflows the
+ * container, then clamp to container bounds. Tall tooltips that don't fit get
+ * clamped to the container edges.
+ */
 export function computeTooltipPosition(
   mx: number,
   my: number,
@@ -280,13 +322,21 @@ export function computeTooltipPosition(
   // Force reflow so we get real dimensions
   const tw = node.getBoundingClientRect().width || node.offsetWidth;
   const th = node.getBoundingClientRect().height || node.offsetHeight;
+  const rect = container.getBoundingClientRect();
   const cw = container.clientWidth;
   const ch = container.clientHeight;
+  const EDGE_PAD = 4;
+
+  // Prefer right of cursor; flip to left if no room.
+  let left = mx + offset + tw <= cw ? mx + offset : mx - offset - tw;
+  left = Math.max(EDGE_PAD, Math.min(cw - tw - EDGE_PAD, left));
 
-  const left = mx + offset + tw > cw ? mx - offset - tw : mx + offset;
-  const top = my + offset + th > ch ? my - offset - th : my + offset;
+  // Prefer below cursor; flip above if no room.
+  let top = my + offset + th <= ch ? my + offset : my - offset - th;
+  top = Math.max(EDGE_PAD, Math.min(ch - th - EDGE_PAD, top));
 
-  return { left, top };
+  // Convert container-local coords → viewport coords for `position: fixed`.
+  return { left: left + rect.left, top: top + rect.top };
 }
 
 /** Update scatter point positions on zoom. */
diff --git a/packages/app/src/lib/data-mappings.ts b/packages/app/src/lib/data-mappings.ts
index 6a543925..c18266ba 100644
--- a/packages/app/src/lib/data-mappings.ts
+++ b/packages/app/src/lib/data-mappings.ts
@@ -138,17 +138,73 @@ export enum Sequence {
   OneK_OneK = '1k/1k',
   OneK_EightK = '1k/8k',
   EightK_OneK = '8k/1k',
+  AgenticTraces = 'agentic-traces',
 }
 
-const SEQUENCE_CONFIG: Record<Sequence, { label: string; compact: string; category: CategoryTag }> =
-  {
-    [Sequence.OneK_OneK]: { label: '1K / 1K', compact: '1k1k', category: 'default' },
-    [Sequence.OneK_EightK]: { label: '1K / 8K', compact: '1k8k', category: 'deprecated' },
-    [Sequence.EightK_OneK]: { label: '8K / 1K', compact: '8k1k', category: 'default' },
-  };
+/**
+ * Top-level scenario kind. Fixed-seq sequences cluster under a single group
+ * in the selector; agentic traces sit alongside as their own kind.
+ */
+export type ScenarioKind = 'fixed-seq' | 'agentic';
+
+export function sequenceKind(seq: Sequence): ScenarioKind {
+  return seq === Sequence.AgenticTraces ? 'agentic' : 'fixed-seq';
+}
+
+const SEQUENCE_CONFIG: Record<
+  Sequence,
+  { label: string; compact: string; category: CategoryTag; kind: ScenarioKind }
+> = {
+  [Sequence.OneK_OneK]: {
+    label: '1K / 1K',
+    compact: '1k1k',
+    category: 'default',
+    kind: 'fixed-seq',
+  },
+  [Sequence.OneK_EightK]: {
+    label: '1K / 8K',
+    compact: '1k8k',
+    category: 'deprecated',
+    kind: 'fixed-seq',
+  },
+  [Sequence.EightK_OneK]: {
+    label: '8K / 1K',
+    compact: '8k1k',
+    category: 'default',
+    kind: 'fixed-seq',
+  },
+  [Sequence.AgenticTraces]: {
+    label: 'Agentic Traces',
+    compact: 'agentic',
+    category: 'default',
+    kind: 'agentic',
+  },
+};
 
 export const SEQUENCE_OPTIONS = Object.keys(SEQUENCE_CONFIG) as Sequence[];
 
+/**
+ * Percentile of the latency distribution used for the chart x-axis when
+ * viewing agentic traces. Agentic rows carry median/p75/p90/p95/p99/p99.9
+ * variants for ttft, ttlt (=e2el), and itl (and intvty derived from itl);
+ * p75 and p90 are surfaced in the UI.
+ */
+export enum Percentile {
+  P75 = 'p75',
+  P90 = 'p90',
+}
+
+const PERCENTILE_CONFIG: Record<Percentile, { label: string }> = {
+  [Percentile.P75]: { label: 'p75' },
+  [Percentile.P90]: { label: 'p90' },
+};
+
+export const PERCENTILE_OPTIONS = Object.keys(PERCENTILE_CONFIG) as Percentile[];
+
+export function getPercentileLabel(p: Percentile): string {
+  return PERCENTILE_CONFIG[p]?.label ?? p;
+}
+
 export const DEPRECATED_SEQUENCES: ReadonlySet<Sequence> = new Set(
   (Object.entries(SEQUENCE_CONFIG) as [Sequence, (typeof SEQUENCE_CONFIG)[Sequence]][])
     .filter(([, c]) => c.category === 'deprecated')
diff --git a/packages/app/src/lib/energy-metrics.test.ts b/packages/app/src/lib/energy-metrics.test.ts
index 28cc1e36..2f5844c1 100644
--- a/packages/app/src/lib/energy-metrics.test.ts
+++ b/packages/app/src/lib/energy-metrics.test.ts
@@ -57,23 +57,43 @@ function makeEntry(overrides: Partial<AggDataEntry> = {}): AggDataEntry {
     mean_ttft: 0.5,
     median_ttft: 0.4,
     std_ttft: 0.1,
+    p75_ttft: 0.65,
+    p90_ttft: 0.7,
+    p95_ttft: 0.75,
     p99_ttft: 0.8,
+    'p99.9_ttft': 0.9,
     mean_tpot: 0.02,
     mean_intvty: 45,
     median_tpot: 0.02,
     median_intvty: 44,
     std_tpot: 0.005,
     std_intvty: 5,
+    p75_tpot: 0.022,
+    p75_intvty: 50,
+    p90_tpot: 0.025,
+    p90_intvty: 55,
+    p95_tpot: 0.028,
+    p95_intvty: 58,
     p99_tpot: 0.03,
     p99_intvty: 60,
+    'p99.9_tpot': 0.035,
+    'p99.9_intvty': 65,
     mean_itl: 0.01,
     median_itl: 0.01,
     std_itl: 0.002,
+    p75_itl: 0.012,
+    p90_itl: 0.013,
+    p95_itl: 0.014,
     p99_itl: 0.015,
+    'p99.9_itl': 0.018,
     mean_e2el: 5,
     median_e2el: 4.8,
     std_e2el: 0.5,
+    p75_e2el: 5.2,
+    p90_e2el: 5.5,
+    p95_e2el: 5.8,
     p99_e2el: 6,
+    'p99.9_e2el': 6.5,
     disagg: false,
     num_prefill_gpu: 0,
     num_decode_gpu: 0,
diff --git a/packages/app/src/lib/url-state.ts b/packages/app/src/lib/url-state.ts
index ebaa5336..73cbe0b7 100644
--- a/packages/app/src/lib/url-state.ts
+++ b/packages/app/src/lib/url-state.ts
@@ -22,8 +22,10 @@ const URL_STATE_KEYS = [
   'i_seq',
   'i_prec',
   'i_metric',
+  'i_pctl',
   'i_xmetric',
   'i_e2e_xmetric',
+  'i_xmode',
   'i_scale',
   'i_gpus',
   'i_dates',
@@ -66,8 +68,10 @@ export const PARAM_DEFAULTS: Record<UrlStateKey, string> = {
   i_seq: '8k/1k',
   i_prec: 'fp4',
   i_metric: 'y_tpPerGpu',
-  i_xmetric: 'p99_ttft',
-  i_e2e_xmetric: '',
+  i_pctl: 'p90',
+  i_xmetric: 'p90_ttft',
+  i_e2e_xmetric: 'p90_ttft',
+  i_xmode: '',
   i_scale: 'auto',
   i_gpus: '',
   i_dates: '',
diff --git a/packages/constants/src/framework-aliases.ts b/packages/constants/src/framework-aliases.ts
index cc5eb6b4..e23a93bc 100644
--- a/packages/constants/src/framework-aliases.ts
+++ b/packages/constants/src/framework-aliases.ts
@@ -44,6 +44,7 @@ export const FRAMEWORK_LABELS: Record<string, string> = {
     ]),
   ),
   mtp: 'MTP',
+  aiperf: 'AIPerf',
 };
 
 /**
diff --git a/packages/constants/src/metric-keys.ts b/packages/constants/src/metric-keys.ts
index cf2c4d0b..70e50f96 100644
--- a/packages/constants/src/metric-keys.ts
+++ b/packages/constants/src/metric-keys.ts
@@ -1,46 +1,110 @@
 /**
  * Canonical set of metric keys stored in the benchmark_results.metrics JSONB column.
  *
- * All values are in seconds unless noted otherwise. Throughput values are tokens/sec/GPU.
+ * Latency values (ttft/tpot/itl/e2el/intvty) are in seconds. Throughput values are
+ * tokens/sec — `_per_gpu` is per-GPU, `_tps` is total tokens/sec across the deployment.
+ *
+ * Distribution stats (mean/median/std/p75/p90/p95/p99/p99.9) are present for latency,
+ * QPS, and per-request token counts; agentic runs carry the full set, fixed-seq runs
+ * carry median/mean/p99/std for latency only.
  */
 export const METRIC_KEYS = new Set([
   // throughput (tokens/sec/GPU)
   'tput_per_gpu',
   'output_tput_per_gpu',
   'input_tput_per_gpu',
+  // throughput (tokens/sec, deployment total) — agentic aiperf reports both
+  'total_tput_tps',
+  'output_tput_tps',
+  'input_tput_tps',
   // TTFT — time to first token
   'median_ttft',
   'mean_ttft',
+  'p75_ttft',
   'p90_ttft',
+  'p95_ttft',
   'p99_ttft',
   'p99.9_ttft',
   'std_ttft',
   // TPOT — time per output token
   'median_tpot',
   'mean_tpot',
+  'p75_tpot',
   'p90_tpot',
+  'p95_tpot',
   'p99_tpot',
   'p99.9_tpot',
   'std_tpot',
   // ITL — inter-token latency
   'median_itl',
   'mean_itl',
+  'p75_itl',
   'p90_itl',
+  'p95_itl',
   'p99_itl',
   'p99.9_itl',
   'std_itl',
   // E2EL — end-to-end latency
   'median_e2el',
   'mean_e2el',
+  'p75_e2el',
   'p90_e2el',
+  'p95_e2el',
   'p99_e2el',
   'p99.9_e2el',
   'std_e2el',
   // interactivity
   'median_intvty',
   'mean_intvty',
+  'p75_intvty',
   'p90_intvty',
+  'p95_intvty',
   'p99_intvty',
   'p99.9_intvty',
   'std_intvty',
+  // QPS — queries per second (agentic aiperf)
+  'median_qps',
+  'mean_qps',
+  'p75_qps',
+  'p90_qps',
+  'p95_qps',
+  'p99_qps',
+  'p99.9_qps',
+  'std_qps',
+  // per-request input token count distribution
+  'median_input_tokens',
+  'mean_input_tokens',
+  'p75_input_tokens',
+  'p90_input_tokens',
+  'p95_input_tokens',
+  'p99_input_tokens',
+  'p99.9_input_tokens',
+  'std_input_tokens',
+  // per-request output token count distribution — actual served
+  'median_output_tokens_actual',
+  'mean_output_tokens_actual',
+  'p75_output_tokens_actual',
+  'p90_output_tokens_actual',
+  'p95_output_tokens_actual',
+  'p99_output_tokens_actual',
+  'p99.9_output_tokens_actual',
+  'std_output_tokens_actual',
+  // per-request output token count distribution — expected from trace
+  'median_output_tokens_expected',
+  'mean_output_tokens_expected',
+  'p75_output_tokens_expected',
+  'p90_output_tokens_expected',
+  'p95_output_tokens_expected',
+  'p99_output_tokens_expected',
+  'p99.9_output_tokens_expected',
+  'std_output_tokens_expected',
+  // run totals (agentic aiperf)
+  'duration_seconds',
+  'total_requests_completed',
+  'total_prompt_tokens',
+  'total_generation_tokens',
+  // server prefix-cache observability (agentic aiperf)
+  'server_gpu_cache_hit_rate',
+  'server_cpu_cache_hit_rate',
+  'theoretical_cache_hit_rate',
 ]);
diff --git a/packages/constants/src/models.ts b/packages/constants/src/models.ts
index c75034c7..783d239d 100644
--- a/packages/constants/src/models.ts
+++ b/packages/constants/src/models.ts
@@ -54,3 +54,20 @@ export function islOslToSequence(isl: number, osl: number): string | null {
   };
   return map[`${isl}_${osl}`] ?? null;
 }
+
+/**
+ * Map a benchmark/availability row to its sequence (scenario) string.
+ * - `agentic_traces` rows map to `'agentic-traces'` regardless of isl/osl.
+ * - Other rows (today: `single_turn`) fall back to `islOslToSequence`.
+ * Returns `null` for rows that can't be classified (e.g. `single_turn` with
+ * unmapped isl/osl values).
+ */
+export function rowToSequence(row: {
+  isl: number | null;
+  osl: number | null;
+  benchmark_type: string;
+}): string | null {
+  if (row.benchmark_type === 'agentic_traces') return 'agentic-traces';
+  if (row.isl === null || row.osl === null) return null;
+  return islOslToSequence(row.isl, row.osl);
+}
diff --git a/packages/db/migrations/002_agentic_scenario.sql b/packages/db/migrations/002_agentic_scenario.sql
new file mode 100644
index 00000000..c143914e
--- /dev/null
+++ b/packages/db/migrations/002_agentic_scenario.sql
@@ -0,0 +1,30 @@
+-- Support agentic scenarios in benchmark_results.
+--
+-- Scenarios are discriminated by benchmark_type:
+--   'single_turn'     — fixed-seq-len runs (1k1k, 1k8k, 8k1k, …). isl/osl set.
+--   'agentic_traces'  — trace-replay agentic runs. isl/osl NULL.
+--
+-- conc retains its meaning (concurrent users/requests) for both.
+
+-- 1) isl/osl become nullable for agentic rows
+alter table benchmark_results
+  alter column isl drop not null,
+  alter column osl drop not null;
+
+-- 2) CHECK constraints: positive-or-null
+alter table benchmark_results
+  drop constraint benchmark_results_isl_positive,
+  drop constraint benchmark_results_osl_positive;
+
+alter table benchmark_results
+  add constraint benchmark_results_isl_positive check (isl is null or isl > 0),
+  add constraint benchmark_results_osl_positive check (osl is null or osl > 0);
+
+-- 3) Uniqueness must treat (NULL, NULL) pairs as equal so agentic rows
+--    can't duplicate on (workflow_run_id, config_id, benchmark_type, conc).
+alter table benchmark_results
+  drop constraint benchmark_results_unique;
+
+alter table benchmark_results
+  add constraint benchmark_results_unique unique nulls not distinct
+    (workflow_run_id, config_id, benchmark_type, isl, osl, conc);
diff --git a/packages/db/migrations/003_agentic_availability.sql b/packages/db/migrations/003_agentic_availability.sql
new file mode 100644
index 00000000..e96cbd50
--- /dev/null
+++ b/packages/db/migrations/003_agentic_availability.sql
@@ -0,0 +1,21 @@
+-- Extend the availability table to cover agentic scenarios.
+--
+-- The 002 migration relaxed benchmark_results.isl/osl to nullable; do the same
+-- for availability and add benchmark_type so the frontend can enumerate
+-- agentic vs single_turn scenarios per model/date.
+--
+-- Postgres primary keys require every column to be NOT NULL, so we drop the PK
+-- and replace it with a UNIQUE NULLS NOT DISTINCT constraint — functionally
+-- equivalent except it allows isl/osl to be NULL for agentic rows.
+
+alter table availability
+  drop constraint availability_pkey;
+
+alter table availability
+  alter column isl drop not null,
+  alter column osl drop not null,
+  add column benchmark_type text not null default 'single_turn';
+
+alter table availability
+  add constraint availability_natural_key unique nulls not distinct
+    (model, isl, osl, precision, hardware, framework, spec_method, disagg, benchmark_type, date);
diff --git a/packages/db/migrations/004_offload_mode.sql b/packages/db/migrations/004_offload_mode.sql
new file mode 100644
index 00000000..24b617f1
--- /dev/null
+++ b/packages/db/migrations/004_offload_mode.sql
@@ -0,0 +1,42 @@
+-- Add offload_mode as a first-class dimension on benchmark_results.
+--
+-- KV-cache offload (on/off) is a meaningful sweep dimension for agentic-trace
+-- runs: a single run may emit two rows for the same (config, isl, osl, conc)
+-- — one with offload disabled, one enabled. The pre-existing unique key
+-- collapsed those into one row, forcing the ingest to skip variants.
+--
+-- For fixed-seq runs `offload_mode` defaults to 'off', which matches the
+-- assumption baked into the existing 5,500+ rows.
+
+alter table benchmark_results
+  add column offload_mode text not null default 'off';
+
+-- Backfill agentic rows from the offload_mode value already living in metrics
+-- JSONB (set during the earlier agentic ingest backfill).
+update benchmark_results
+   set offload_mode = metrics->>'offload_mode'
+ where benchmark_type = 'agentic_traces'
+   and metrics ? 'offload_mode';
+
+-- Replace the unique constraint so on/off variants can coexist.
+alter table benchmark_results
+  drop constraint benchmark_results_unique;
+
+alter table benchmark_results
+  add constraint benchmark_results_unique unique nulls not distinct
+    (workflow_run_id, config_id, benchmark_type, isl, osl, conc, offload_mode);
+
+-- Rebuild the latest-per-config materialized view to dedupe by offload_mode too.
+drop materialized view if exists latest_benchmarks cascade;
+
+create materialized view latest_benchmarks as
+select distinct on (br.config_id, br.conc, br.isl, br.osl, br.offload_mode)
+  br.*
+from benchmark_results br
+join latest_workflow_runs wr on wr.id = br.workflow_run_id
+where br.error is null
+order by br.config_id, br.conc, br.isl, br.osl, br.offload_mode, br.date desc;
+
+create unique index latest_benchmarks_pk
+  on latest_benchmarks (config_id, conc, isl, osl, offload_mode) nulls not distinct;
+create index latest_benchmarks_model_idx on latest_benchmarks (config_id);
diff --git a/packages/db/migrations/006_agentic_trace_replay.sql b/packages/db/migrations/006_agentic_trace_replay.sql
new file mode 100644
index 00000000..398bc725
--- /dev/null
+++ b/packages/db/migrations/006_agentic_trace_replay.sql
@@ -0,0 +1,34 @@
+-- Capture raw aiperf trace files per agentic benchmark point.
+--
+-- The aiperf harness produces two per-point export files inside each
+-- `agentic_<suffix>` artifact:
+--   - profile_export.jsonl         (~2 MB raw, per-request data)
+--   - server_metrics_export.csv    (~20 KB raw, periodic Prometheus snapshots)
+--
+-- We persist them so the dashboard can later show per-request distributions,
+-- KV cache utilization over time, and conversation traces without needing to
+-- re-download the GitHub artifacts. Storage stays in Postgres (TOASTed) — at
+-- ~500 KB per point post-gzip the total fits comfortably without a separate
+-- blob service.
+--
+-- Mirrors the existing `server_logs` pattern (id-keyed sibling table + FK
+-- column on benchmark_results). Older, non-aiperf agentic runs simply have a
+-- NULL `trace_replay_id`.
+
+create table agentic_trace_replay (
+  id                                bigserial   primary key,
+  -- gzip(profile_export.jsonl); null when only the server metrics file existed
+  profile_export_jsonl_gz           bytea,
+  profile_export_uncompressed_size  bigint,
+  -- raw csv bytes; null when only the profile file existed
+  server_metrics_csv                bytea,
+  server_metrics_csv_size           bigint,
+  created_at                        timestamptz not null default now()
+);
+
+alter table benchmark_results
+  add column trace_replay_id bigint references agentic_trace_replay(id);
+
+create index benchmark_results_trace_replay_idx
+  on benchmark_results (trace_replay_id)
+  where trace_replay_id is not null;
diff --git a/packages/db/migrations/007_agentic_trace_server_metrics_json.sql b/packages/db/migrations/007_agentic_trace_server_metrics_json.sql
new file mode 100644
index 00000000..ba7bd095
--- /dev/null
+++ b/packages/db/migrations/007_agentic_trace_server_metrics_json.sql
@@ -0,0 +1,17 @@
+-- Add the full server-metrics time-series JSON to agentic_trace_replay.
+--
+-- The existing `server_metrics_csv` column holds aiperf's summary export —
+-- one row per metric with avg/min/max/std/p1..p99 across the entire run.
+-- That's enough for the cumulative cache-hit number but not for any
+-- "metric over time" view (KV cache utilization curve, queue depth, prefix
+-- hit rate per interval, cumulative prefill token source).
+--
+-- The harness also writes `server_metrics_export.json` which contains the
+-- raw per-scrape (~1Hz) values for every Prometheus metric over the whole
+-- benchmark window. Raw size is ~250 MB per point but it compresses ~42x
+-- to ~6 MB gzipped (text with repeated metric names + numeric values).
+-- That's the file we store here for any future time-series chart.
+
+alter table agentic_trace_replay
+  add column server_metrics_json_gz bytea,
+  add column server_metrics_json_uncompressed_size bigint;
diff --git a/packages/db/migrations/008_agentic_aggregate_stats.sql b/packages/db/migrations/008_agentic_aggregate_stats.sql
new file mode 100644
index 00000000..d55533b9
--- /dev/null
+++ b/packages/db/migrations/008_agentic_aggregate_stats.sql
@@ -0,0 +1,18 @@
+-- Pre-computed aggregate stats for each agentic_trace_replay row.
+--
+-- Previously the agentic detail page parsed the (huge) profile_export.jsonl
+-- and server_metrics_json blobs on every request to compute distribution
+-- stats for ISL/OSL/KV-util/prefix-hit-rate, plus the per-point derived
+-- metrics (session-time, p90 prefill TPS). That took ~20s per row and the
+-- worst rows (high-conc TP+EP server_metrics blobs that decompress past
+-- Node's 512 MB string cap) couldn't be parsed without a stream fallback.
+--
+-- This column holds the computed stats so the API serves the page from a
+-- single SQL row read. Shape mirrors the existing benchmark_results.metrics
+-- JSONB convention; an inner `version` field lets the backfill script
+-- detect rows whose stats were computed by an older algorithm and
+-- recompute them. Null when stats haven't been computed yet (existing
+-- rows pre-backfill; the API has a slow-path fallback for that case).
+
+alter table agentic_trace_replay
+  add column aggregate_stats jsonb;
diff --git a/packages/db/migrations/009_agentic_chart_series.sql b/packages/db/migrations/009_agentic_chart_series.sql
new file mode 100644
index 00000000..b42718b9
--- /dev/null
+++ b/packages/db/migrations/009_agentic_chart_series.sql
@@ -0,0 +1,19 @@
+-- Pre-computed time-series for the agentic detail page chart.
+--
+-- Sibling to `aggregate_stats` (migration 008): that column stores
+-- per-row percentile/derived *summaries*, this one stores the full
+-- chart-ready time-series arrays (kvCacheUsage, prefixCacheHitRate,
+-- queueDepth, prefillTps, decodeTps, promptTokensBySource).
+--
+-- Without this, the detail page parsed the entire `server_metrics_json_gz`
+-- blob on every request and blew up with ERR_STRING_TOO_LONG on high-conc
+-- TP+EP rows (the blob decompresses past Node's 512 MB max-string-length).
+-- With pre-computed series the page is a single SQL row read.
+--
+-- Shape includes an inner `version` field so the backfill script can
+-- recompute rows whose stored series were produced by an older algorithm.
+-- Null when the series haven't been computed yet; the API has a slow-path
+-- fallback (with stream-parse for oversized blobs) for that case.
+
+alter table agentic_trace_replay
+  add column chart_series jsonb;
diff --git a/packages/db/migrations/010_agentic_request_timeline.sql b/packages/db/migrations/010_agentic_request_timeline.sql
new file mode 100644
index 00000000..756b775e
--- /dev/null
+++ b/packages/db/migrations/010_agentic_request_timeline.sql
@@ -0,0 +1,15 @@
+-- Pre-computed per-request timeline for the agentic detail page.
+--
+-- Sibling to `aggregate_stats` (008) and `chart_series` (009). This one
+-- holds a thin per-request array extracted from `profile_export_jsonl_gz`
+-- so the detail page can render a Gantt-style swimlane of every request
+-- (one bar per conversation turn) without re-parsing the JSONL on every
+-- page load.
+--
+-- Shape includes an inner `version` field so the backfill script can
+-- recompute rows whose stored timeline was produced by an older
+-- algorithm. Null when the timeline hasn't been computed yet; the API
+-- falls back to parsing the blob in that case.
+
+alter table agentic_trace_replay
+  add column request_timeline jsonb;
diff --git a/packages/db/package.json b/packages/db/package.json
index c849ea26..710089f1 100644
--- a/packages/db/package.json
+++ b/packages/db/package.json
@@ -19,6 +19,9 @@
     "db:ingest:supplemental": "dotenv -e ../../.env -- tsx src/ingest-supplemental.ts",
     "db:migrate": "dotenv -e ../../.env -- tsx src/migrate.ts",
     "db:apply-overrides": "dotenv -e ../../.env -- tsx src/apply-overrides.ts",
+    "db:backfill-aggregate-stats": "dotenv -e ../../.env -- tsx src/backfill-aggregate-stats.ts",
+    "db:backfill-chart-series": "dotenv -e ../../.env -- tsx src/backfill-chart-series.ts",
+    "db:backfill-request-timeline": "dotenv -e ../../.env -- tsx src/backfill-request-timeline.ts",
     "db:dump": "dotenv -e ../../.env -- tsx src/dump-db.ts",
     "db:load-dump": "dotenv -e ../../.env -- tsx src/load-dump.ts",
     "db:reset": "dotenv -e ../../.env -- tsx src/reset-db.ts",
@@ -30,11 +33,14 @@
     "@neondatabase/serverless": "^1.1.0",
     "@noble/ciphers": "^2.2.0",
     "@semianalysisai/inferencex-constants": "workspace:*",
-    "postgres": "^3.4.9"
+    "postgres": "^3.4.9",
+    "stream-chain": "^3.4.0",
+    "stream-json": "^2.1.0"
   },
   "devDependencies": {
     "@types/adm-zip": "^0.5.8",
     "@types/node": "^25.7.0",
+    "@types/stream-json": "^1.7.8",
     "@vitest/coverage-v8": "^4.1.6",
     "adm-zip": "^0.5.17",
     "dotenv-cli": "^11.0.0",
diff --git a/packages/db/src/backfill-aggregate-stats.ts b/packages/db/src/backfill-aggregate-stats.ts
new file mode 100644
index 00000000..8dd42dce
--- /dev/null
+++ b/packages/db/src/backfill-aggregate-stats.ts
@@ -0,0 +1,150 @@
+/**
+ * Backfill `agentic_trace_replay.aggregate_stats` for rows that are missing it
+ * or were computed by an older `STATS_VERSION`.
+ *
+ * The ingest path now computes stats inline, but existing rows (and rows
+ * whose computation logic has since changed) still need this pass. Run after
+ * applying migration 008 and any time `STATS_VERSION` bumps.
+ *
+ * Strategy:
+ *   - Stream rows one at a time (server_metrics_json_gz can be hundreds of
+ *     MB decompressed for TP+EP / high-conc points — keeping one in memory
+ *     at a time avoids OOM).
+ *   - Skip rows whose stored `aggregate_stats.version` already matches.
+ *   - Recompute via the same `computeAggregateStats()` helper the ingest
+ *     path uses, so behavior cannot drift.
+ *
+ * Usage:
+ *   pnpm --filter @semianalysisai/inferencex-db db:backfill-aggregate-stats
+ *     [--limit N]   only process the first N candidate rows (useful for
+ *                   smoke-tests on a fresh deploy)
+ *     [--force]     recompute every row, even if version already matches
+ *     [--yes]       skip the confirmation prompt
+ */
+
+import { confirm, hasNoSslFlag, hasYesFlag } from './cli-utils.js';
+import { computeAggregateStats, STATS_VERSION } from './etl/compute-aggregate-stats.js';
+import { createAdminSql } from './etl/db-utils.js';
+
+interface CliFlags {
+  limit: number | null;
+  force: boolean;
+}
+
+function parseFlags(): CliFlags {
+  let limit: number | null = null;
+  let force = false;
+  for (let i = 2; i < process.argv.length; i++) {
+    const arg = process.argv[i]!;
+    if (arg === '--force') force = true;
+    else if (arg === '--limit') {
+      const next = process.argv[++i];
+      if (!next || Number.isNaN(Number(next))) {
+        console.error('--limit requires a numeric argument');
+        process.exit(1);
+      }
+      limit = Number(next);
+    }
+  }
+  return { limit, force };
+}
+
+const flags = parseFlags();
+
+const sql = createAdminSql({
+  noSsl: hasNoSslFlag(),
+  max: 1,
+  onnotice: () => {},
+});
+
+async function main(): Promise<void> {
+  console.log('=== backfill-aggregate-stats ===');
+  console.log(`  STATS_VERSION = ${STATS_VERSION}`);
+  console.log(`  force = ${flags.force}`);
+  console.log(`  limit = ${flags.limit ?? 'none'}`);
+
+  // Find candidates: rows missing stats, or whose stored version is stale.
+  // Using >>'version'::int comparison would error on null; coalesce to -1 so
+  // null-stats rows always count as stale.
+  const candidates = flags.force
+    ? await sql<{ id: number }[]>`
+        select id
+        from agentic_trace_replay
+        order by id
+        ${flags.limit ? sql`limit ${flags.limit}` : sql``}
+      `
+    : await sql<{ id: number }[]>`
+        select id
+        from agentic_trace_replay
+        where aggregate_stats is null
+           or coalesce((aggregate_stats->>'version')::int, -1) <> ${STATS_VERSION}
+        order by id
+        ${flags.limit ? sql`limit ${flags.limit}` : sql``}
+      `;
+
+  if (candidates.length === 0) {
+    console.log('\n  Nothing to do — all rows up to date.');
+    return;
+  }
+
+  console.log(`\n  ${candidates.length} candidate row(s).`);
+  if (!hasYesFlag()) {
+    const ok = await confirm('\nProceed? (y/N) ');
+    if (!ok) {
+      console.log('Aborted.');
+      return;
+    }
+  }
+
+  let ok = 0;
+  let failed = 0;
+  const t0 = Date.now();
+  for (const { id } of candidates) {
+    const start = Date.now();
+    try {
+      // Fetch one row at a time — the json_gz blob is the heavy field.
+      const [row] = await sql<
+        { profile_export_jsonl_gz: Buffer | null; server_metrics_json_gz: Buffer | null }[]
+      >`
+        select profile_export_jsonl_gz, server_metrics_json_gz
+        from agentic_trace_replay
+        where id = ${id}
+      `;
+      if (!row) {
+        console.warn(`  id=${id}: row vanished, skipping`);
+        continue;
+      }
+
+      const stats = await computeAggregateStats({
+        profileBlob: row.profile_export_jsonl_gz,
+        serverBlob: row.server_metrics_json_gz,
+      });
+
+      await sql`
+        update agentic_trace_replay
+        set aggregate_stats = ${sql.json(structuredClone(stats) as unknown as Parameters<typeof sql.json>[0])}
+        where id = ${id}
+      `;
+      ok++;
+      const elapsed = Math.round((Date.now() - start) / 1000);
+      const elapsedTotal = Math.round((Date.now() - t0) / 1000);
+      console.log(
+        `  ✓ id=${id} (${elapsed}s, ${ok}/${candidates.length} done, ${elapsedTotal}s total)`,
+      );
+    } catch (error) {
+      failed++;
+      console.error(`  ✗ id=${id}: ${error instanceof Error ? error.message : String(error)}`);
+    }
+  }
+
+  const totalSec = Math.round((Date.now() - t0) / 1000);
+  console.log(`\n=== backfill complete: ${ok} ok, ${failed} failed in ${totalSec}s ===`);
+  if (failed > 0) process.exitCode = 1;
+}
+
+main()
+  .catch((error) => {
+    console.error('backfill-aggregate-stats failed:', error);
+    process.exitCode = 1;
+  })
+  .finally(() => sql.end());
diff --git a/packages/db/src/backfill-chart-series.ts b/packages/db/src/backfill-chart-series.ts
new file mode 100644
index 00000000..66156b45
--- /dev/null
+++ b/packages/db/src/backfill-chart-series.ts
@@ -0,0 +1,154 @@
+/**
+ * Backfill `agentic_trace_replay.chart_series` for rows that are missing it
+ * or were computed by an older `CHART_SERIES_VERSION`.
+ *
+ * The ingest path now computes the time-series inline, but existing rows
+ * (and rows whose computation logic has since changed) still need this
+ * pass. Run after applying migration 009 and any time `CHART_SERIES_VERSION`
+ * bumps.
+ *
+ * Strategy:
+ *   - Stream rows one at a time (server_metrics_json_gz can decompress
+ *     past 500 MB on high-conc TP+EP points — one in memory at a time
+ *     avoids OOM).
+ *   - Skip rows whose stored version already matches.
+ *   - Recompute via the same `computeChartSeries()` helper the ingest
+ *     path uses, so behavior cannot drift.
+ *
+ * Usage:
+ *   pnpm --filter @semianalysisai/inferencex-db db:backfill-chart-series
+ *     [--limit N]   only process the first N candidate rows
+ *     [--force]     recompute every row, even if version already matches
+ *     [--yes]       skip the confirmation prompt
+ */
+
+import { confirm, hasNoSslFlag, hasYesFlag } from './cli-utils.js';
+import { CHART_SERIES_VERSION, computeChartSeries } from './etl/compute-chart-series.js';
+import { createAdminSql } from './etl/db-utils.js';
+
+interface CliFlags {
+  limit: number | null;
+  force: boolean;
+}
+
+function parseFlags(): CliFlags {
+  let limit: number | null = null;
+  let force = false;
+  for (let i = 2; i < process.argv.length; i++) {
+    const arg = process.argv[i]!;
+    if (arg === '--force') force = true;
+    else if (arg === '--limit') {
+      const next = process.argv[++i];
+      if (!next || Number.isNaN(Number(next))) {
+        console.error('--limit requires a numeric argument');
+        process.exit(1);
+      }
+      limit = Number(next);
+    }
+  }
+  return { limit, force };
+}
+
+const flags = parseFlags();
+
+const sql = createAdminSql({
+  noSsl: hasNoSslFlag(),
+  max: 1,
+  onnotice: () => {},
+});
+
+async function main(): Promise<void> {
+  console.log('=== backfill-chart-series ===');
+  console.log(`  CHART_SERIES_VERSION = ${CHART_SERIES_VERSION}`);
+  console.log(`  force = ${flags.force}`);
+  console.log(`  limit = ${flags.limit ?? 'none'}`);
+
+  // Only rows that actually have a server_metrics blob can produce a
+  // chart_series. Rows without the blob legitimately keep `chart_series`
+  // null and the API serves them via the slow path (which also returns
+  // null because there's no blob to parse — so the page falls into the
+  // "no stored trace_replay blob" branch).
+  const candidates = flags.force
+    ? await sql<{ id: number }[]>`
+        select id
+        from agentic_trace_replay
+        where server_metrics_json_gz is not null
+        order by id
+        ${flags.limit ? sql`limit ${flags.limit}` : sql``}
+      `
+    : await sql<{ id: number }[]>`
+        select id
+        from agentic_trace_replay
+        where server_metrics_json_gz is not null
+          and (
+            chart_series is null
+            or coalesce((chart_series->>'version')::int, -1) <> ${CHART_SERIES_VERSION}
+          )
+        order by id
+        ${flags.limit ? sql`limit ${flags.limit}` : sql``}
+      `;
+
+  if (candidates.length === 0) {
+    console.log('\n  Nothing to do — all rows up to date.');
+    return;
+  }
+
+  console.log(`\n  ${candidates.length} candidate row(s).`);
+  if (!hasYesFlag()) {
+    const ok = await confirm('\nProceed? (y/N) ');
+    if (!ok) {
+      console.log('Aborted.');
+      return;
+    }
+  }
+
+  let ok = 0;
+  let failed = 0;
+  const t0 = Date.now();
+  for (const { id } of candidates) {
+    const start = Date.now();
+    try {
+      const [row] = await sql<{ server_metrics_json_gz: Buffer | null }[]>`
+        select server_metrics_json_gz
+        from agentic_trace_replay
+        where id = ${id}
+      `;
+      if (!row) {
+        console.warn(`  id=${id}: row vanished, skipping`);
+        continue;
+      }
+
+      const series = await computeChartSeries(row.server_metrics_json_gz);
+
+      await sql`
+        update agentic_trace_replay
+        set chart_series = ${
+          series === null
+            ? null
+            : sql.json(structuredClone(series) as unknown as Parameters<typeof sql.json>[0])
+        }
+        where id = ${id}
+      `;
+      ok++;
+      const elapsed = Math.round((Date.now() - start) / 1000);
+      const elapsedTotal = Math.round((Date.now() - t0) / 1000);
+      console.log(
+        `  ✓ id=${id} (${elapsed}s, ${ok}/${candidates.length} done, ${elapsedTotal}s total)`,
+      );
+    } catch (error) {
+      failed++;
+      console.error(`  ✗ id=${id}: ${error instanceof Error ? error.message : String(error)}`);
+    }
+  }
+
+  const totalSec = Math.round((Date.now() - t0) / 1000);
+  console.log(`\n=== backfill complete: ${ok} ok, ${failed} failed in ${totalSec}s ===`);
+  if (failed > 0) process.exitCode = 1;
+}
+
+main()
+  .catch((error) => {
+    console.error('backfill-chart-series failed:', error);
+    process.exitCode = 1;
+  })
+  .finally(() => sql.end());
diff --git a/packages/db/src/backfill-request-timeline.ts b/packages/db/src/backfill-request-timeline.ts
new file mode 100644
index 00000000..327099d0
--- /dev/null
+++ b/packages/db/src/backfill-request-timeline.ts
@@ -0,0 +1,144 @@
+/**
+ * Backfill `agentic_trace_replay.request_timeline` for rows that are
+ * missing it or were computed by an older `REQUEST_TIMELINE_VERSION`.
+ *
+ * The ingest path now computes the timeline inline, but existing rows
+ * (and rows whose computation logic has since changed) still need this
+ * pass. Run after applying migration 010 and any time the version bumps.
+ *
+ * Usage:
+ *   pnpm --filter @semianalysisai/inferencex-db db:backfill-request-timeline
+ *     [--limit N]   only process the first N candidate rows
+ *     [--force]     recompute every row, even if version already matches
+ *     [--yes]       skip the confirmation prompt
+ */
+
+import { confirm, hasNoSslFlag, hasYesFlag } from './cli-utils.js';
+import {
+  REQUEST_TIMELINE_VERSION,
+  computeRequestTimeline,
+} from './etl/compute-request-timeline.js';
+import { createAdminSql } from './etl/db-utils.js';
+
+interface CliFlags {
+  limit: number | null;
+  force: boolean;
+}
+
+function parseFlags(): CliFlags {
+  let limit: number | null = null;
+  let force = false;
+  for (let i = 2; i < process.argv.length; i++) {
+    const arg = process.argv[i]!;
+    if (arg === '--force') force = true;
+    else if (arg === '--limit') {
+      const next = process.argv[++i];
+      if (!next || Number.isNaN(Number(next))) {
+        console.error('--limit requires a numeric argument');
+        process.exit(1);
+      }
+      limit = Number(next);
+    }
+  }
+  return { limit, force };
+}
+
+const flags = parseFlags();
+
+const sql = createAdminSql({
+  noSsl: hasNoSslFlag(),
+  max: 1,
+  onnotice: () => {},
+});
+
+async function main(): Promise<void> {
+  console.log('=== backfill-request-timeline ===');
+  console.log(`  REQUEST_TIMELINE_VERSION = ${REQUEST_TIMELINE_VERSION}`);
+  console.log(`  force = ${flags.force}`);
+  console.log(`  limit = ${flags.limit ?? 'none'}`);
+
+  // Only rows with a profile_export blob can produce a timeline. Rows
+  // without the blob keep `request_timeline` null and the API serves them
+  // as "no timeline data".
+  const candidates = flags.force
+    ? await sql<{ id: number }[]>`
+        select id
+        from agentic_trace_replay
+        where profile_export_jsonl_gz is not null
+        order by id
+        ${flags.limit ? sql`limit ${flags.limit}` : sql``}
+      `
+    : await sql<{ id: number }[]>`
+        select id
+        from agentic_trace_replay
+        where profile_export_jsonl_gz is not null
+          and (
+            request_timeline is null
+            or coalesce((request_timeline->>'version')::int, -1) <> ${REQUEST_TIMELINE_VERSION}
+          )
+        order by id
+        ${flags.limit ? sql`limit ${flags.limit}` : sql``}
+      `;
+
+  if (candidates.length === 0) {
+    console.log('\n  Nothing to do — all rows up to date.');
+    return;
+  }
+
+  console.log(`\n  ${candidates.length} candidate row(s).`);
+  if (!hasYesFlag()) {
+    const ok = await confirm('\nProceed? (y/N) ');
+    if (!ok) {
+      console.log('Aborted.');
+      return;
+    }
+  }
+
+  let ok = 0;
+  let failed = 0;
+  const t0 = Date.now();
+  for (const { id } of candidates) {
+    const start = Date.now();
+    try {
+      const [row] = await sql<{ profile_export_jsonl_gz: Buffer | null }[]>`
+        select profile_export_jsonl_gz
+        from agentic_trace_replay
+        where id = ${id}
+      `;
+      if (!row) {
+        console.warn(`  id=${id}: row vanished, skipping`);
+        continue;
+      }
+      const timeline = computeRequestTimeline(row.profile_export_jsonl_gz);
+      await sql`
+        update agentic_trace_replay
+        set request_timeline = ${
+          timeline === null
+            ? null
+            : sql.json(structuredClone(timeline) as unknown as Parameters<typeof sql.json>[0])
+        }
+        where id = ${id}
+      `;
+      ok++;
+      const elapsed = Math.round((Date.now() - start) / 1000);
+      const elapsedTotal = Math.round((Date.now() - t0) / 1000);
+      console.log(
+        `  ✓ id=${id} (${elapsed}s, ${ok}/${candidates.length} done, ${elapsedTotal}s total)`,
+      );
+    } catch (error) {
+      failed++;
+      console.error(`  ✗ id=${id}: ${error instanceof Error ? error.message : String(error)}`);
+    }
+  }
+
+  const totalSec = Math.round((Date.now() - t0) / 1000);
+  console.log(`\n=== backfill complete: ${ok} ok, ${failed} failed in ${totalSec}s ===`);
+  if (failed > 0) process.exitCode = 1;
+}
+
+main()
+  .catch((error) => {
+    console.error('backfill-request-timeline failed:', error);
+    process.exitCode = 1;
+  })
+  .finally(() => sql.end());
diff --git a/packages/db/src/etl/benchmark-ingest.ts b/packages/db/src/etl/benchmark-ingest.ts
index 67173c64..ea802d3f 100644
--- a/packages/db/src/etl/benchmark-ingest.ts
+++ b/packages/db/src/etl/benchmark-ingest.ts
@@ -29,12 +29,19 @@ export async function bulkIngestBenchmarkRows(
 
   // Postgres rejects ON CONFLICT DO UPDATE if the same conflict key appears
   // more than once in a single batch. Deduplicate within the batch, keeping
-  // the last occurrence (last metrics for each unique config/isl/osl/conc).
+  // the last occurrence (last metrics for each unique config/benchmark_type/isl/osl/conc/offload_mode).
   const seen = new Map<string, BenchmarkParams & { configId: number }>();
-  for (const r of rows) seen.set(`${r.configId}-${r.isl}-${r.osl}-${r.conc}`, r);
+  for (const r of rows) {
+    seen.set(
+      `${r.configId}-${r.benchmarkType}-${r.isl ?? ''}-${r.osl ?? ''}-${r.conc}-${r.offloadMode}`,
+      r,
+    );
+  }
   const deduped = [...seen.values()];
 
   const configIds = deduped.map((r) => r.configId);
+  const benchmarkTypes = deduped.map((r) => r.benchmarkType);
+  const offloadModes = deduped.map((r) => r.offloadMode);
   const isls = deduped.map((r) => r.isl);
   const osls = deduped.map((r) => r.osl);
   const concs = deduped.map((r) => r.conc);
@@ -43,20 +50,21 @@ export async function bulkIngestBenchmarkRows(
 
   const result = await sql<{ inserted: boolean; id: number }[]>`
     insert into benchmark_results (
-      workflow_run_id, config_id, benchmark_type, date,
+      workflow_run_id, config_id, benchmark_type, offload_mode, date,
       isl, osl, conc, image, metrics
     )
     select
       ${workflowRunId},
       unnest(${sql.array(configIds)}::int[]),
-      'single_turn',
+      unnest(${sql.array(benchmarkTypes)}::text[]),
+      unnest(${sql.array(offloadModes)}::text[]),
       ${date}::date,
       unnest(${sql.array(isls)}::int[]),
       unnest(${sql.array(osls)}::int[]),
       unnest(${sql.array(concs)}::int[]),
       unnest(${sql.array(images)}),
       unnest(${sql.array(metricsJsons)}::jsonb[])
-    on conflict (workflow_run_id, config_id, benchmark_type, isl, osl, conc)
+    on conflict (workflow_run_id, config_id, benchmark_type, isl, osl, conc, offload_mode)
     do update set
       metrics = excluded.metrics,
       image = excluded.image
@@ -147,13 +155,14 @@ export async function bulkUpsertAvailability(
   sql: Sql,
   rows: {
     model: string;
-    isl: number;
-    osl: number;
+    isl: number | null;
+    osl: number | null;
     precision: string;
     hardware: string;
     framework: string;
     specMethod: string;
     disagg: boolean;
+    benchmarkType: string;
   }[],
   date: string,
 ): Promise<void> {
@@ -162,7 +171,7 @@ export async function bulkUpsertAvailability(
   const seen = new Set<string>();
   const unique: typeof rows = [];
   for (const r of rows) {
-    const key = `${r.model}|${r.isl}|${r.osl}|${r.precision}|${r.hardware}|${r.framework}|${r.specMethod}|${r.disagg}|${date}`;
+    const key = `${r.model}|${r.isl ?? ''}|${r.osl ?? ''}|${r.precision}|${r.hardware}|${r.framework}|${r.specMethod}|${r.disagg}|${r.benchmarkType}|${date}`;
     if (!seen.has(key)) {
       seen.add(key);
       unique.push(r);
@@ -170,7 +179,7 @@ export async function bulkUpsertAvailability(
   }
 
   await sql`
-    insert into availability (model, isl, osl, precision, hardware, framework, spec_method, disagg, date)
+    insert into availability (model, isl, osl, precision, hardware, framework, spec_method, disagg, benchmark_type, date)
     select
       unnest(${sql.array(unique.map((r) => r.model))}::text[]),
       unnest(${sql.array(unique.map((r) => r.isl))}::int[]),
@@ -180,6 +189,7 @@ export async function bulkUpsertAvailability(
       unnest(${sql.array(unique.map((r) => r.framework))}::text[]),
       unnest(${sql.array(unique.map((r) => r.specMethod))}::text[]),
       unnest(${sql.array(unique.map((r) => r.disagg))}::bool[]),
+      unnest(${sql.array(unique.map((r) => r.benchmarkType))}::text[]),
       ${date}::date
     on conflict do nothing
   `;
diff --git a/packages/db/src/etl/benchmark-mapper.ts b/packages/db/src/etl/benchmark-mapper.ts
index 7d78e175..1aff5ea9 100644
--- a/packages/db/src/etl/benchmark-mapper.ts
+++ b/packages/db/src/etl/benchmark-mapper.ts
@@ -57,8 +57,21 @@ const NON_METRIC_KEYS = new Set([
   'decode_num_workers',
   'num_prefill_gpu',
   'num_decode_gpu',
+  // agentic scenario
+  'scenario_type',
+  'users',
+  'offload_mode',
+  'num_requests_total',
+  'num_requests_successful',
 ]);
 
+/**
+ * `benchmark_type` values understood by the ingest.
+ * - `single_turn`    — fixed sequence-length runs (isl/osl set).
+ * - `agentic_traces` — trace-replay agentic runs (isl/osl null, `users` → conc).
+ */
+export type BenchmarkType = 'single_turn' | 'agentic_traces';
+
 /**
  * METRIC_KEYS from constants is the canonical set of known metric keys.
  * Any numeric field outside this set and `NON_METRIC_KEYS` is auto-captured
@@ -70,9 +83,13 @@ const _warnedMetricKeys = new Set<string>();
 
 export interface BenchmarkParams {
   config: ConfigParams;
-  isl: number;
-  osl: number;
+  benchmarkType: BenchmarkType;
+  // Null for agentic_traces; present for single_turn.
+  isl: number | null;
+  osl: number | null;
   conc: number;
+  /** 'on' | 'off' — KV cache offload to CPU. Defaults to 'off'. */
+  offloadMode: string;
   image: string | null;
   metrics: Record<string, number>;
 }
@@ -114,14 +131,45 @@ export function mapBenchmarkRow(
     return null;
   }
 
-  const isl = parseInt2(row.isl) ?? islOslFallback?.isl;
-  const osl = parseInt2(row.osl) ?? islOslFallback?.osl;
-  const conc = parseInt2(row.conc);
-  if (!isl || !osl || !conc) {
+  // Agentic-trace runs emit `scenario_type: 'agentic-coding'` (and variants),
+  // no isl/osl, and `users` instead of `conc`. Everything else stays as-is.
+  const isAgentic = String(row.scenario_type ?? '').startsWith('agentic');
+  const benchmarkType: BenchmarkType = isAgentic ? 'agentic_traces' : 'single_turn';
+
+  const isl = isAgentic ? null : (parseInt2(row.isl) ?? islOslFallback?.isl ?? null);
+  const osl = isAgentic ? null : (parseInt2(row.osl) ?? islOslFallback?.osl ?? null);
+  // Agentic artifacts encode concurrency as `users` in older schemas and `conc` in newer ones.
+  const conc = isAgentic ? (parseInt2(row.users) ?? parseInt2(row.conc)) : parseInt2(row.conc);
+  if (!conc || (!isAgentic && (!isl || !osl))) {
     tracker.skips.noIslOsl++;
     return null;
   }
 
+  // Failed-run guard: aggregated artifacts (`results_bmk`) merge rows from
+  // every runner, including ones with 0 successful requests and null metrics.
+  // Without this skip, the empty row's nulls overwrite a good row via
+  // ON CONFLICT DO UPDATE when both share the same (config, conc, offload).
+  if (
+    typeof row.num_requests_successful === 'number' &&
+    row.num_requests_successful === 0 &&
+    typeof row.num_requests_total === 'number' &&
+    row.num_requests_total > 0
+  ) {
+    tracker.skips.failedRun++;
+    return null;
+  }
+
+  // Agentic offload signal: prefer `offload_mode` ('on'|'off'), fall back to `offloading`
+  // ('none' → 'off'; any other non-empty value → 'on').
+  const offloadModeRaw =
+    typeof row.offload_mode === 'string' && row.offload_mode.length > 0
+      ? row.offload_mode
+      : typeof row.offloading === 'string' && row.offloading.length > 0
+        ? row.offloading === 'none'
+          ? 'off'
+          : 'on'
+        : 'off';
+
   const { framework, disagg } = normalizeFramework(String(row.framework ?? ''), row.disagg);
   const isMultinode = parseBool(row.is_multinode);
   const precision = normalizePrecision(String(row.precision ?? ''));
@@ -182,6 +230,12 @@ export function mapBenchmarkRow(
     }
   }
 
+  // Agentic rows emit `offload_mode: "on" | "off"` (or older `offloading: "none"|...`)
+  // — preserve as a stringified metric so the frontend can expose it in tooltips.
+  if (isAgentic) {
+    (metrics as Record<string, unknown>).offload_mode = offloadModeRaw;
+  }
+
   // Artifact names encode '/' as '#' to avoid path separators; restore the URI.
   const image = row.image ? String(row.image).replaceAll('#', '/') : null;
 
@@ -205,9 +259,11 @@ export function mapBenchmarkRow(
       numPrefillGpu,
       numDecodeGpu,
     },
+    benchmarkType,
     isl,
     osl,
     conc,
+    offloadMode: offloadModeRaw,
     image,
     metrics,
   };
diff --git a/packages/db/src/etl/compute-aggregate-stats.test.ts b/packages/db/src/etl/compute-aggregate-stats.test.ts
new file mode 100644
index 00000000..de0009de
--- /dev/null
+++ b/packages/db/src/etl/compute-aggregate-stats.test.ts
@@ -0,0 +1,123 @@
+import { gzipSync } from 'node:zlib';
+
+import { describe, expect, it } from 'vitest';
+
+import { STATS_VERSION, computeAggregateStats } from './compute-aggregate-stats.js';
+
+/** Build a minimal `profile_export.jsonl` from a few synthetic requests. */
+function makeProfileBlob(requests: { isl: number; osl: number; rl?: number; ttft?: number }[]) {
+  const lines = requests.map((r, i) =>
+    JSON.stringify({
+      metadata: {
+        benchmark_phase: 'profiling',
+        conversation_id: `conv-${i}`,
+        turn_index: 0,
+      },
+      metrics: {
+        input_sequence_length: { value: r.isl, unit: 'tokens' },
+        output_sequence_length: { value: r.osl, unit: 'tokens' },
+        request_latency: { value: r.rl ?? 1000, unit: 'ms' },
+        time_to_first_token: { value: r.ttft ?? 100, unit: 'ms' },
+      },
+    }),
+  );
+  return gzipSync(Buffer.from(lines.join('\n')));
+}
+
+/** Build a tiny server_metrics_json blob with KV util + prefix cache series. */
+function makeServerBlob() {
+  const json = JSON.stringify({
+    metrics: {
+      'vllm:kv_cache_usage_perc': {
+        series: [
+          {
+            timeslices: [
+              { start_ns: 0, end_ns: 1, avg: 0.2 },
+              { start_ns: 1, end_ns: 2, avg: 0.5 },
+              { start_ns: 2, end_ns: 3, avg: 0.8 },
+            ],
+          },
+        ],
+      },
+      'vllm:prefix_cache_hits': {
+        series: [{ timeslices: [{ start_ns: 0, rate: 80 }] }],
+      },
+      'vllm:prefix_cache_queries': {
+        series: [{ timeslices: [{ start_ns: 0, rate: 100 }] }],
+      },
+    },
+  });
+  return gzipSync(Buffer.from(json));
+}
+
+describe('computeAggregateStats', () => {
+  it('returns the current STATS_VERSION in the bundle', async () => {
+    const stats = await computeAggregateStats({ profileBlob: null, serverBlob: null });
+    expect(stats.version).toBe(STATS_VERSION);
+  });
+
+  it('leaves every metric null when both blobs are null', async () => {
+    const stats = await computeAggregateStats({ profileBlob: null, serverBlob: null });
+    expect(stats.isl).toBeNull();
+    expect(stats.osl).toBeNull();
+    expect(stats.kvCacheUtil).toBeNull();
+    expect(stats.prefixCacheHitRate).toBeNull();
+    expect(stats.normalizedSessionTimeS).toBeNull();
+    expect(stats.p90PrefillTpsPerUser).toBeNull();
+  });
+
+  it('computes ISL/OSL percentiles + derived metrics from the profile blob', async () => {
+    const profileBlob = makeProfileBlob([
+      { isl: 100, osl: 50, rl: 1000, ttft: 100 },
+      { isl: 200, osl: 75, rl: 2000, ttft: 200 },
+      { isl: 300, osl: 100, rl: 3000, ttft: 300 },
+    ]);
+    const stats = await computeAggregateStats({ profileBlob, serverBlob: null });
+
+    expect(stats.isl?.n).toBe(3);
+    expect(stats.isl?.mean).toBeCloseTo(200, 6);
+    expect(stats.osl?.n).toBe(3);
+    expect(stats.osl?.mean).toBeCloseTo(75, 6);
+
+    // Server-side metrics still null when there's no server blob.
+    expect(stats.kvCacheUtil).toBeNull();
+    expect(stats.prefixCacheHitRate).toBeNull();
+
+    // Derived: prefill TPS per turn = isl / (ttft/1000) = 1000 for each, so p90 = 1000.
+    expect(stats.p90PrefillTpsPerUser).toBeCloseTo(1000, 6);
+    // Normalized session time: T̃_i = T_i × (mean_load / load_i), then mean.
+    //   loads = [150, 275, 400], mean_load = 275
+    //   scaled times (s) = [1×275/150, 2×275/275, 3×275/400] = [1.8333, 2, 2.0625]
+    //   mean ≈ 1.9653
+    expect(stats.normalizedSessionTimeS).toBeCloseTo(1.9653, 3);
+  });
+
+  it('computes KV util + prefix hit rate from the server blob alone', async () => {
+    const stats = await computeAggregateStats({
+      profileBlob: null,
+      serverBlob: makeServerBlob(),
+    });
+    expect(stats.kvCacheUtil?.n).toBe(3);
+    expect(stats.kvCacheUtil?.mean).toBeCloseTo(0.5, 6);
+    expect(stats.prefixCacheHitRate?.n).toBe(1);
+    expect(stats.prefixCacheHitRate?.mean).toBeCloseTo(0.8, 6);
+
+    // Profile-derived metrics absent.
+    expect(stats.isl).toBeNull();
+    expect(stats.osl).toBeNull();
+    expect(stats.normalizedSessionTimeS).toBeNull();
+    expect(stats.p90PrefillTpsPerUser).toBeNull();
+  });
+
+  it('tolerates a malformed profile blob by leaving its metrics null', async () => {
+    // A random non-gzip buffer triggers a gunzip error — code path swallows it.
+    const garbage = Buffer.from('not-gzip-data');
+    const stats = await computeAggregateStats({ profileBlob: garbage, serverBlob: null });
+    expect(stats.isl).toBeNull();
+    expect(stats.osl).toBeNull();
+    expect(stats.normalizedSessionTimeS).toBeNull();
+    expect(stats.p90PrefillTpsPerUser).toBeNull();
+    // Version still set so the row is considered "computed".
+    expect(stats.version).toBe(STATS_VERSION);
+  });
+});
diff --git a/packages/db/src/etl/compute-aggregate-stats.ts b/packages/db/src/etl/compute-aggregate-stats.ts
new file mode 100644
index 00000000..a422cfec
--- /dev/null
+++ b/packages/db/src/etl/compute-aggregate-stats.ts
@@ -0,0 +1,147 @@
+/**
+ * Pre-compute the per-row aggregate stats for an `agentic_trace_replay`
+ * blob pair. The output lands in the `aggregate_stats` JSONB column so the
+ * detail page can serve the "Aggregates across configs" view and the
+ * derived chart x-axis modes from a single SQL row read, instead of
+ * parsing the raw blobs on demand.
+ *
+ * Shape is intentionally versioned — bump `STATS_VERSION` whenever the
+ * computation changes so the backfill script knows which rows to recompute.
+ */
+
+import { Readable } from 'node:stream';
+import { createGunzip, gunzipSync } from 'node:zlib';
+
+import { chain } from 'stream-chain';
+
+import { parser } from 'stream-json';
+import { pick } from 'stream-json/filters/pick.js';
+import { streamObject } from 'stream-json/streamers/stream-object.js';
+
+import { computeDerivedFromBlob } from '../queries/derived-agentic-metrics.js';
+import {
+  STATS_VERSION,
+  extractIslOsl,
+  extractServerMetricSamples,
+  percentilesOf,
+  type MetricPercentiles,
+} from '../queries/agentic-aggregates.js';
+
+export { STATS_VERSION };
+
+export interface AggregateStats {
+  version: number;
+  isl: MetricPercentiles | null;
+  osl: MetricPercentiles | null;
+  kvCacheUtil: MetricPercentiles | null;
+  prefixCacheHitRate: MetricPercentiles | null;
+  /** Mean of (per-session e2e time × mean_load / session_load) across sessions. */
+  normalizedSessionTimeS: number | null;
+  /** P90 of per-turn ISL/TTFT pooled across every session's turns. */
+  p90PrefillTpsPerUser: number | null;
+}
+
+/** Metric subtrees we extract via stream-parse on oversized server blobs. */
+const TARGET_METRIC_KEYS = new Set([
+  'vllm:kv_cache_usage_perc',
+  'vllm:gpu_cache_usage_perc',
+  'vllm:prefix_cache_hits',
+  'vllm:prefix_cache_queries',
+  'vllm:gpu_prefix_cache_hits',
+  'vllm:gpu_prefix_cache_queries',
+]);
+
+/**
+ * Stream-parse the gzipped server_metrics_json and collect just the metric
+ * subtrees we care about. Avoids Node's 512 MB max-string-length cap that
+ * `gunzipSync().toString('utf8')` hits on high-conc TP+EP rows.
+ */
+async function streamExtractServer(
+  buffer: Buffer,
+): Promise<{ kvCacheUtil: number[]; prefixCacheHitRate: number[] }> {
+  /* eslint-disable @typescript-eslint/no-explicit-any */
+  const collected: Record<string, unknown> = {};
+  const pipelineStream = chain([
+    Readable.from(buffer),
+    createGunzip(),
+    parser(),
+    pick({ filter: 'metrics' }),
+    streamObject(),
+  ]);
+  await new Promise<void>((resolve, reject) => {
+    (pipelineStream as any).on('data', (chunk: unknown) => {
+      const { key, value } = chunk as { key: string; value: unknown };
+      if (TARGET_METRIC_KEYS.has(key)) collected[key] = value;
+    });
+    (pipelineStream as any).on('end', resolve);
+    (pipelineStream as any).on('error', reject);
+  });
+  /* eslint-enable @typescript-eslint/no-explicit-any */
+  return extractServerMetricSamples(JSON.stringify({ metrics: collected }));
+}
+
+/**
+ * Compute the full versioned stats bundle from a (profile, server-metrics)
+ * blob pair. Either blob may be null (e.g. only the server file existed) —
+ * the corresponding stats just come back null.
+ */
+export async function computeAggregateStats(args: {
+  profileBlob: Buffer | null;
+  serverBlob: Buffer | null;
+}): Promise<AggregateStats> {
+  let islPct: MetricPercentiles | null = null;
+  let oslPct: MetricPercentiles | null = null;
+  let normalized: number | null = null;
+  let prefillP90: number | null = null;
+
+  if (args.profileBlob) {
+    try {
+      const jsonl = gunzipSync(args.profileBlob).toString('utf8');
+      const { isl, osl } = extractIslOsl(jsonl);
+      islPct = percentilesOf(isl);
+      oslPct = percentilesOf(osl);
+      const derived = computeDerivedFromBlob(jsonl);
+      normalized = derived.normalized_session_time_s;
+      prefillP90 = derived.p90_prefill_tps_per_user;
+    } catch {
+      // ignore malformed blob — leave nulls
+    }
+  }
+
+  let kvPct: MetricPercentiles | null = null;
+  let prefixPct: MetricPercentiles | null = null;
+  if (args.serverBlob) {
+    let server: { kvCacheUtil: number[]; prefixCacheHitRate: number[] } | null = null;
+    try {
+      const json = gunzipSync(args.serverBlob).toString('utf8');
+      server = extractServerMetricSamples(json);
+    } catch (error) {
+      const code = error && (error as NodeJS.ErrnoException).code;
+      const msg = error instanceof Error ? error.message : String(error);
+      // ERR_STRING_TOO_LONG hits on high-conc TP+EP rows. Stream-parse to
+      // pull just the metric subtrees we need without materializing the
+      // full 500+ MB JSON string.
+      if (code === 'ERR_STRING_TOO_LONG' || msg.includes('longer than 0x1fffffe8')) {
+        try {
+          server = await streamExtractServer(args.serverBlob);
+        } catch {
+          // stream fallback failed too — leave nulls
+        }
+      }
+    }
+    if (server) {
+      kvPct = percentilesOf(server.kvCacheUtil);
+      prefixPct = percentilesOf(server.prefixCacheHitRate);
+    }
+  }
+
+  return {
+    version: STATS_VERSION,
+    isl: islPct,
+    osl: oslPct,
+    kvCacheUtil: kvPct,
+    prefixCacheHitRate: prefixPct,
+    normalizedSessionTimeS: normalized,
+    p90PrefillTpsPerUser: prefillP90,
+  };
+}
diff --git a/packages/db/src/etl/compute-chart-series.test.ts b/packages/db/src/etl/compute-chart-series.test.ts
new file mode 100644
index 00000000..4c6f8791
--- /dev/null
+++ b/packages/db/src/etl/compute-chart-series.test.ts
@@ -0,0 +1,209 @@
+import { gzipSync } from 'node:zlib';
+
+import { describe, expect, it } from 'vitest';
+
+import { CHART_SERIES_VERSION, computeChartSeries } from './compute-chart-series.js';
+
+/**
+ * Build a minimal server_metrics_json blob covering the metrics the chart
+ * consumes. Each timeslice is one second long starting at t=0.
+ */
+function makeBlob(opts?: {
+  prefixHits?: number;
+  prefixQueries?: number;
+  promptTokensRate?: number;
+}) {
+  const json = JSON.stringify({
+    metrics: {
+      'vllm:kv_cache_usage_perc': {
+        series: [
+          {
+            timeslices: [
+              { start_ns: 0, end_ns: 1e9, avg: 0.1 },
+              { start_ns: 1e9, end_ns: 2e9, avg: 0.4 },
+              { start_ns: 2e9, end_ns: 3e9, avg: 0.7 },
+            ],
+          },
+        ],
+      },
+      'vllm:prefix_cache_hits': {
+        series: [{ timeslices: [{ start_ns: 0, end_ns: 1e9, rate: opts?.prefixHits ?? 75 }] }],
+      },
+      'vllm:prefix_cache_queries': {
+        series: [{ timeslices: [{ start_ns: 0, end_ns: 1e9, rate: opts?.prefixQueries ?? 100 }] }],
+      },
+      'vllm:num_requests_running': {
+        series: [{ timeslices: [{ start_ns: 0, end_ns: 1e9, avg: 5 }] }],
+      },
+      'vllm:num_requests_waiting': {
+        series: [{ timeslices: [{ start_ns: 0, end_ns: 1e9, avg: 2 }] }],
+      },
+      'vllm:prompt_tokens': {
+        series: [
+          { timeslices: [{ start_ns: 0, end_ns: 1e9, rate: opts?.promptTokensRate ?? 1000 }] },
+        ],
+      },
+      'vllm:generation_tokens': {
+        series: [{ timeslices: [{ start_ns: 0, end_ns: 1e9, rate: 500 }] }],
+      },
+      'vllm:prompt_tokens_by_source': {
+        series: [
+          {
+            labels: { source: 'local_cache_hit' },
+            timeslices: [{ start_ns: 0, end_ns: 1e9, rate: 200 }],
+          },
+          {
+            labels: { source: 'miss' },
+            timeslices: [{ start_ns: 0, end_ns: 1e9, rate: 800 }],
+          },
+        ],
+      },
+    },
+  });
+  return gzipSync(Buffer.from(json));
+}
+
+/** Build a synthetic per-engine vLLM metric series for the multi-engine test. */
+function buildEngineSeries(engineId: number, baseRunning: number) {
+  const labels = { engine: String(engineId) };
+  return {
+    runningSlice: {
+      labels,
+      timeslices: [
+        { start_ns: 0, avg: baseRunning },
+        { start_ns: 1e9, avg: baseRunning + 1 },
+      ],
+    },
+    waitingSlice: {
+      labels,
+      timeslices: [
+        { start_ns: 0, avg: 0 },
+        { start_ns: 1e9, avg: 0 },
+      ],
+    },
+    kvSlice: {
+      labels,
+      timeslices: [
+        { start_ns: 0, avg: 0.25 },
+        { start_ns: 1e9, avg: 0.5 },
+      ],
+    },
+    promptSlice: {
+      labels,
+      timeslices: [
+        { start_ns: 0, rate: 100 },
+        { start_ns: 1e9, rate: 200 },
+      ],
+    },
+    genSlice: {
+      labels,
+      timeslices: [
+        { start_ns: 0, rate: 50 },
+        { start_ns: 1e9, rate: 75 },
+      ],
+    },
+  };
+}
+
+describe('computeChartSeries', () => {
+  it('returns null when the blob is null', async () => {
+    expect(await computeChartSeries(null)).toBeNull();
+  });
+
+  it('returns the current CHART_SERIES_VERSION in the bundle', async () => {
+    const series = await computeChartSeries(makeBlob());
+    expect(series?.version).toBe(CHART_SERIES_VERSION);
+  });
+
+  it('extracts kvCacheUsage points with t=seconds-from-start', async () => {
+    const series = await computeChartSeries(makeBlob());
+    expect(series?.kvCacheUsage).toEqual([
+      { t: 0, value: 0.1 },
+      { t: 1, value: 0.4 },
+      { t: 2, value: 0.7 },
+    ]);
+  });
+
+  it('computes prefixCacheHitRate as hits.rate / queries.rate', async () => {
+    const series = await computeChartSeries(makeBlob({ prefixHits: 80, prefixQueries: 100 }));
+    expect(series?.prefixCacheHitRate).toEqual([{ t: 0, value: 0.8 }]);
+  });
+
+  it('drops prefixCacheHitRate windows where queries.rate is 0', async () => {
+    const series = await computeChartSeries(makeBlob({ prefixHits: 5, prefixQueries: 0 }));
+    expect(series?.prefixCacheHitRate).toEqual([]);
+  });
+
+  it('pairs running + waiting into queueDepth points', async () => {
+    const series = await computeChartSeries(makeBlob());
+    expect(series?.queueDepth).toEqual([{ t: 0, running: 5, waiting: 2, total: 7 }]);
+  });
+
+  it('extracts prefillTps + decodeTps from counter rates', async () => {
+    const series = await computeChartSeries(makeBlob());
+    expect(series?.prefillTps).toEqual([{ t: 0, value: 1000 }]);
+    expect(series?.decodeTps).toEqual([{ t: 0, value: 500 }]);
+  });
+
+  it('splits promptTokensBySource by label and skips empty series', async () => {
+    const series = await computeChartSeries(makeBlob());
+    expect(Object.keys(series!.promptTokensBySource).toSorted()).toEqual([
+      'local_cache_hit',
+      'miss',
+    ]);
+    expect(series!.promptTokensBySource['local_cache_hit']).toEqual([{ t: 0, value: 200 }]);
+    expect(series!.promptTokensBySource['miss']).toEqual([{ t: 0, value: 800 }]);
+  });
+
+  it('computes timing metadata from the widest metric window', async () => {
+    const series = await computeChartSeries(makeBlob());
+    // kvCacheUsage has the widest window (0 → 3e9), so startNs=0, endNs=3e9.
+    expect(series?.startNs).toBe(0);
+    expect(series?.endNs).toBe(3e9);
+    expect(series?.durationS).toBeCloseTo(3, 6);
+    expect(series?.timeslicesCount).toBe(3);
+  });
+
+  it('returns null on a malformed (non-gzip) blob', async () => {
+    const result = await computeChartSeries(Buffer.from('not-gzip-data'));
+    expect(result).toBeNull();
+  });
+
+  it('aggregates gauges + counters across all engine series (DP/PP fix)', async () => {
+    // Simulate a 4-engine deployment: each engine reports its own series for
+    // every metric. Cluster-wide value should be SUM for running/waiting and
+    // counter rates, AVG for kv_cache_usage_perc (per-engine fraction).
+    const engines = [0, 1, 2, 3].map((id) => buildEngineSeries(id, 3)); // running=3 per engine
+    const json = JSON.stringify({
+      metrics: {
+        'vllm:num_requests_running': { series: engines.map((e) => e.runningSlice) },
+        'vllm:num_requests_waiting': { series: engines.map((e) => e.waitingSlice) },
+        'vllm:kv_cache_usage_perc': { series: engines.map((e) => e.kvSlice) },
+        'vllm:prompt_tokens': { series: engines.map((e) => e.promptSlice) },
+        'vllm:generation_tokens': { series: engines.map((e) => e.genSlice) },
+      },
+    });
+    const blob = gzipSync(Buffer.from(json));
+    const cs = await computeChartSeries(blob);
+    expect(cs).not.toBeNull();
+    // queueDepth.running = Σ engines = 4 × 3 = 12 at t=0; 4 × 4 = 16 at t=1
+    expect(cs!.queueDepth).toEqual([
+      { t: 0, running: 12, waiting: 0, total: 12 },
+      { t: 1, running: 16, waiting: 0, total: 16 },
+    ]);
+    // kvCacheUsage stays 0.25, 0.5 (average across engines, all engines reported same value)
+    expect(cs!.kvCacheUsage).toEqual([
+      { t: 0, value: 0.25 },
+      { t: 1, value: 0.5 },
+    ]);
+    // prefillTps = Σ rates = 4 × 100 = 400; then 4 × 200 = 800
+    expect(cs!.prefillTps).toEqual([
+      { t: 0, value: 400 },
+      { t: 1, value: 800 },
+    ]);
+    expect(cs!.decodeTps).toEqual([
+      { t: 0, value: 200 },
+      { t: 1, value: 300 },
+    ]);
+  });
+});
diff --git a/packages/db/src/etl/compute-chart-series.ts b/packages/db/src/etl/compute-chart-series.ts
new file mode 100644
index 00000000..46600f7d
--- /dev/null
+++ b/packages/db/src/etl/compute-chart-series.ts
@@ -0,0 +1,484 @@
+/**
+ * Pre-compute the time-series for the agentic detail page chart, so the
+ * API doesn't have to gunzip + JSON-parse a multi-hundred-MB blob on every
+ * request. The output lands in `agentic_trace_replay.chart_series` and is
+ * read directly by `getTraceServerMetrics`.
+ *
+ * Versioned so the backfill script knows which rows are stale — bump
+ * `CHART_SERIES_VERSION` whenever the extraction algorithm changes.
+ */
+
+import { Readable } from 'node:stream';
+import { createGunzip, gunzipSync } from 'node:zlib';
+
+import { chain } from 'stream-chain';
+
+import { parser } from 'stream-json';
+import { pick } from 'stream-json/filters/pick.js';
+import { streamObject } from 'stream-json/streamers/stream-object.js';
+
+/**
+ * Bump when the extraction algorithm changes — backfill recomputes anything
+ * older.
+ *
+ * v2: aggregate vllm gauges/counters across all engine series (was reading
+ * only series[0], which under-counted by Nx on multi-engine DP/PP
+ * deployments — most visible as a request-queue-depth chart that maxed out
+ * at ~3 when the timeline clearly showed 20+ in-flight).
+ *
+ * v3: extract `prefixCacheHitsTps` so the detail page can derive cumulative
+ * unique input tokens as cumsum(prefillTps - prefixCacheHitsTps).
+ *
+ * v4: extract sglang:* metrics too (fallback chain in each picker), so
+ * SGLang runs populate the chart_series the same way vllm runs do.
+ *
+ * v5: map sglang:realtime_tokens (mode={prefill_cache,prefill_compute,decode})
+ * into promptTokensBySource so the cumulative prompt-token-source-breakdown
+ * chart shows useful splits for SGLang runs (filtered to prefill_* modes).
+ *
+ * v6: for SGLang, swap the coarse "prefill_cache" bucket for per-cache_source
+ * breakdown from sglang:cached_tokens — current runs always have one
+ * cache_source ("device" / HBM) but hicache (CPU offload) runs would
+ * split into "device" + "host" automatically once ingested.
+ *
+ * v7: extract sglang:hicache_host_{used,total}_tokens into a new
+ * hostKvCacheUsage series so the KV cache utilization chart can plot
+ * the CPU offload pool's usage alongside the on-GPU HBM line.
+ *
+ * v8: keep the per-engine dimension on kv_cache_usage_perc as
+ * `kvCacheUsageByEngine` (one entry per DP rank). The cluster-average
+ * line hides load skew on DEP configs; the detail page overlays the
+ * per-rank lines so a hot rank is visible at a glance.
+ */
+export const CHART_SERIES_VERSION = 8;
+
+export interface TimeSeriesPoint {
+  /** Seconds from benchmark start. */
+  t: number;
+  value: number;
+}
+
+export interface QueueDepthPoint {
+  t: number;
+  running: number;
+  waiting: number;
+  total: number;
+}
+
+export interface ChartSeries {
+  version: number;
+  /** ns wall-clock of the first window's start; for debugging only. */
+  startNs: number;
+  /** ns wall-clock of the last window's end. */
+  endNs: number;
+  /** Total benchmark window in seconds. */
+  durationS: number;
+  /** Number of 1Hz windows captured. */
+  timeslicesCount: number;
+  kvCacheUsage: TimeSeriesPoint[];
+  prefixCacheHitRate: TimeSeriesPoint[];
+  queueDepth: QueueDepthPoint[];
+  promptTokensBySource: Record<string, TimeSeriesPoint[]>;
+  prefillTps: TimeSeriesPoint[];
+  decodeTps: TimeSeriesPoint[];
+  /**
+   * Per-scrape rate (tokens/sec) of vllm:prefix_cache_hits, summed across
+   * engines. Detail page derives "cumulative unique input tokens" as
+   * cumsum(prefillTps - prefixCacheHitsTps) — what the cache actually
+   * saved vs the raw queries that came in.
+   */
+  prefixCacheHitsTps: TimeSeriesPoint[];
+  /**
+   * Host (CPU offload) KV cache utilization, 0..1. Only populated for
+   * SGLang hicache runs (derived as hicache_host_used / hicache_host_total).
+   * Frontend overlays this on the KV cache util chart as a second line.
+   */
+  hostKvCacheUsage: TimeSeriesPoint[];
+  /**
+   * Per-DP-rank KV cache utilization (0..1 each). One entry per engine
+   * series found in the raw metric, ordered by the `engine` label when
+   * present and by series-array index otherwise. Empty for single-engine
+   * deployments — the average `kvCacheUsage` line covers that case alone.
+   * The detail page overlays these on the same chart so DEP load skew is
+   * visible without changing the headline number.
+   */
+  kvCacheUsageByEngine: { engineLabel: string; points: TimeSeriesPoint[] }[];
+}
+
+// ── Raw blob shapes (subset we read) ────────────────────────────────────
+
+interface RawSlice {
+  start_ns?: number;
+  end_ns?: number;
+  avg?: number;
+  rate?: number;
+}
+
+interface RawSeries {
+  labels?: Record<string, string>;
+  timeslices?: RawSlice[];
+}
+
+interface RawMetric {
+  series?: RawSeries[];
+}
+
+type MetricsMap = Record<string, RawMetric>;
+
+/**
+ * The set of metric subtrees the chart consumes. Includes both vllm:* and
+ * sglang:* names so the stream-parse fallback collects whichever framework
+ * the blob was emitted by — `buildSeriesFromMetrics` then picks per metric.
+ */
+const CHART_METRIC_KEYS = new Set([
+  // vLLM
+  'vllm:kv_cache_usage_perc',
+  'vllm:gpu_cache_usage_perc',
+  'vllm:prefix_cache_hits',
+  'vllm:prefix_cache_queries',
+  'vllm:num_requests_running',
+  'vllm:num_requests_waiting',
+  'vllm:prompt_tokens',
+  'vllm:generation_tokens',
+  'vllm:prompt_tokens_by_source',
+  // SGLang
+  'sglang:token_usage',
+  'sglang:cached_tokens',
+  'sglang:prompt_tokens',
+  'sglang:generation_tokens',
+  'sglang:num_running_reqs',
+  'sglang:num_queue_reqs',
+  'sglang:realtime_tokens',
+  'sglang:hicache_host_used_tokens',
+  'sglang:hicache_host_total_tokens',
+]);
+
+/**
+ * Stream-parse the gzipped server_metrics_json and collect only the metric
+ * subtrees the chart needs. Avoids Node's 512 MB max-string-length cap that
+ * `gunzipSync(buffer).toString('utf8')` trips on high-conc TP+EP rows.
+ */
+async function streamCollectMetrics(buffer: Buffer): Promise<MetricsMap> {
+  /* eslint-disable @typescript-eslint/no-explicit-any */
+  const collected: MetricsMap = {};
+  const pipeline = chain([
+    Readable.from(buffer),
+    createGunzip(),
+    parser(),
+    pick({ filter: 'metrics' }),
+    streamObject(),
+  ]);
+  await new Promise<void>((resolve, reject) => {
+    (pipeline as any).on('data', (chunk: unknown) => {
+      const { key, value } = chunk as { key: string; value: RawMetric };
+      if (CHART_METRIC_KEYS.has(key)) collected[key] = value;
+    });
+    (pipeline as any).on('end', resolve);
+    (pipeline as any).on('error', reject);
+  });
+  /* eslint-enable @typescript-eslint/no-explicit-any */
+  return collected;
+}
+
+/**
+ * Parse the gzipped server_metrics blob into the metric map. Tries the
+ * synchronous fast path first; falls back to stream-parse on
+ * ERR_STRING_TOO_LONG so high-conc TP+EP rows succeed.
+ */
+async function parseMetrics(buffer: Buffer): Promise<MetricsMap> {
+  try {
+    const obj = JSON.parse(gunzipSync(buffer).toString('utf8')) as { metrics?: MetricsMap };
+    return obj.metrics ?? {};
+  } catch (error) {
+    const code = error && (error as NodeJS.ErrnoException).code;
+    const msg = error instanceof Error ? error.message : String(error);
+    if (code === 'ERR_STRING_TOO_LONG' || msg.includes('longer than 0x1fffffe8')) {
+      return await streamCollectMetrics(buffer);
+    }
+    throw error;
+  }
+}
+
+/**
+ * Build chart-ready time-series arrays from a gzipped server_metrics blob.
+ * The math mirrors `getTraceServerMetrics` — this helper exists so ingest,
+ * backfill, and the API path produce byte-identical results.
+ */
+export async function computeChartSeries(blob: Buffer | null): Promise<ChartSeries | null> {
+  if (!blob) return null;
+  let metrics: MetricsMap;
+  try {
+    metrics = await parseMetrics(blob);
+  } catch {
+    // Malformed blob → no series (caller treats null as "no data").
+    return null;
+  }
+  return buildSeriesFromMetrics(metrics);
+}
+
+/**
+ * Aggregate one timeslice field across all series of a metric, indexed by
+ * `start_ns`. Multi-engine vllm deployments report one series per engine —
+ * the cluster value is the sum (for running/waiting/throughput counters)
+ * or the average (for kv_cache_usage_perc, a per-engine fraction).
+ */
+function aggregateByStart(
+  series: readonly RawSeries[] | undefined,
+  field: 'avg' | 'rate',
+  combine: 'sum' | 'avg',
+): Map<number, number> {
+  const sums = new Map<number, number>();
+  const counts = new Map<number, number>();
+  for (const s of series ?? []) {
+    for (const ts of s.timeslices ?? []) {
+      if (typeof ts.start_ns !== 'number') continue;
+      const v = ts[field];
+      if (typeof v !== 'number' || !Number.isFinite(v)) continue;
+      sums.set(ts.start_ns, (sums.get(ts.start_ns) ?? 0) + v);
+      counts.set(ts.start_ns, (counts.get(ts.start_ns) ?? 0) + 1);
+    }
+  }
+  if (combine === 'sum') return sums;
+  const out = new Map<number, number>();
+  for (const [t, s] of sums) out.set(t, s / (counts.get(t) ?? 1));
+  return out;
+}
+
+/** Stable order: emit one point per unique start_ns, chronologically. */
+function sortedEntries(m: Map<number, number>): [number, number][] {
+  return [...m.entries()].toSorted((a, b) => a[0] - b[0]);
+}
+
+function buildSeriesFromMetrics(metrics: MetricsMap): ChartSeries {
+  // Timing reference: smallest start_ns and largest end_ns across every
+  // timeslice we extracted. timeslicesCount is the length of any single
+  // series (engines are scraped on the same cadence), so picking the max
+  // length across all series of all metrics is safe.
+  let startNs = Number.POSITIVE_INFINITY;
+  let endNs = 0;
+  let timeslicesCount = 0;
+  for (const metricMeta of Object.values(metrics)) {
+    for (const s of metricMeta?.series ?? []) {
+      const ts = s.timeslices ?? [];
+      if (ts.length === 0) continue;
+      timeslicesCount = Math.max(timeslicesCount, ts.length);
+      const first = ts[0]!;
+      const last = ts.at(-1)!;
+      if (typeof first.start_ns === 'number' && first.start_ns < startNs) startNs = first.start_ns;
+      if (typeof last.end_ns === 'number' && last.end_ns > endNs) endNs = last.end_ns;
+    }
+  }
+  if (!Number.isFinite(startNs)) startNs = 0;
+  const tOf = (ns: number) => (ns - startNs) / 1e9;
+
+  // Pick the first metric name whose series array has any data; fallback
+  // chain lets the same code path serve both vllm:* and sglang:* blobs.
+  const pickSeries = (...names: string[]): readonly RawSeries[] | undefined => {
+    for (const name of names) {
+      const s = metrics[name]?.series;
+      if (s && s.length > 0) return s;
+    }
+    return undefined;
+  };
+
+  // KV cache usage (gauge, 0..1) — average across engines so the value
+  // stays a fraction (each engine has its own KV pool).
+  const kvSeries = pickSeries(
+    'vllm:kv_cache_usage_perc',
+    'vllm:gpu_cache_usage_perc',
+    'sglang:token_usage',
+  );
+  const kvCacheUsage: TimeSeriesPoint[] = sortedEntries(
+    aggregateByStart(kvSeries, 'avg', 'avg'),
+  ).map(([t, v]) => ({ t: tOf(t), value: v }));
+  // Per-engine breakdown of the same metric. We only emit it when there's
+  // more than one series — single-engine deployments would just duplicate
+  // the cluster-average line.
+  const kvCacheUsageByEngine: { engineLabel: string; points: TimeSeriesPoint[] }[] = [];
+  if (kvSeries && kvSeries.length > 1) {
+    // Sort by numeric engine label when present so rank 0..N renders in
+    // order; fall back to series-array index otherwise.
+    const decorated = kvSeries.map((s, idx) => {
+      const raw =
+        s.labels?.['engine'] ?? s.labels?.['engine_idx'] ?? s.labels?.['dp_rank'] ?? String(idx);
+      const numeric = Number(raw);
+      return { series: s, idx, label: raw, sortKey: Number.isFinite(numeric) ? numeric : idx };
+    });
+    decorated.sort((a, b) => a.sortKey - b.sortKey);
+    for (const { series, label } of decorated) {
+      const pts: TimeSeriesPoint[] = [];
+      for (const ts of series.timeslices ?? []) {
+        if (typeof ts.start_ns !== 'number' || typeof ts.avg !== 'number') continue;
+        if (!Number.isFinite(ts.avg)) continue;
+        pts.push({ t: tOf(ts.start_ns), value: ts.avg });
+      }
+      if (pts.length > 0) kvCacheUsageByEngine.push({ engineLabel: label, points: pts });
+    }
+  }
+
+  // Prefix cache hit rate per scrape: Σhits.rate / Σqueries.rate across
+  // engines, joined on start_ns. SGLang names: cached_tokens / prompt_tokens.
+  const hitsSeries = pickSeries('vllm:prefix_cache_hits', 'sglang:cached_tokens');
+  const qsSeries = pickSeries(
+    'vllm:prefix_cache_queries',
+    'vllm:prompt_tokens',
+    'sglang:prompt_tokens',
+  );
+  const hitsByT = aggregateByStart(hitsSeries, 'rate', 'sum');
+  const qsByT = aggregateByStart(qsSeries, 'rate', 'sum');
+  const prefixCacheHitRate: TimeSeriesPoint[] = [];
+  for (const [t, h] of [...hitsByT.entries()].toSorted((a, b) => a[0] - b[0])) {
+    const q = qsByT.get(t);
+    if (q !== undefined && q > 0) prefixCacheHitRate.push({ t: tOf(t), value: h / q });
+  }
+
+  // Queue depth: sum running + waiting across engines per timeslice.
+  const runSeries = pickSeries('vllm:num_requests_running', 'sglang:num_running_reqs');
+  const waitSeries = pickSeries('vllm:num_requests_waiting', 'sglang:num_queue_reqs');
+  const runByT = aggregateByStart(runSeries, 'avg', 'sum');
+  const waitByT = aggregateByStart(waitSeries, 'avg', 'sum');
+  const queueDepth: QueueDepthPoint[] = [];
+  // Union of timestamps so we surface activity even if one of the gauges
+  // didn't report a sample on a given tick.
+  const allTimes = new Set<number>([...runByT.keys(), ...waitByT.keys()]);
+  for (const t of [...allTimes].toSorted((a, b) => a - b)) {
+    const running = runByT.get(t) ?? 0;
+    const waiting = waitByT.get(t) ?? 0;
+    queueDepth.push({ t: tOf(t), running, waiting, total: running + waiting });
+  }
+
+  // Throughput: sum the counter `rate` (already per-second) across engines.
+  // Takes a fallback chain so vllm:* and sglang:* both work.
+  const counterRate = (...names: string[]): TimeSeriesPoint[] => {
+    const s = pickSeries(...names);
+    return sortedEntries(aggregateByStart(s, 'rate', 'sum')).map(([t, v]) => ({
+      t: tOf(t),
+      value: v,
+    }));
+  };
+  const prefillTps = counterRate('vllm:prompt_tokens', 'sglang:prompt_tokens');
+  const decodeTps = counterRate('vllm:generation_tokens', 'sglang:generation_tokens');
+  // Tokens served from prefix cache per scrape. Lets the frontend derive
+  // "cumulative unique input tokens served" = cumsum(prefillTps) − cumsum(hits).
+  const prefixCacheHitsTps = counterRate('vllm:prefix_cache_hits', 'sglang:cached_tokens');
+
+  // SGLang hicache: host-pool KV cache utilization as used/total per
+  // timeslice. Both metrics are gauges in absolute tokens. Total stays
+  // constant (it's the pool size), used fluctuates.
+  const hostUsedByT = aggregateByStart(
+    metrics['sglang:hicache_host_used_tokens']?.series,
+    'avg',
+    'sum',
+  );
+  const hostTotalByT = aggregateByStart(
+    metrics['sglang:hicache_host_total_tokens']?.series,
+    'avg',
+    'sum',
+  );
+  const hostKvCacheUsage: TimeSeriesPoint[] = [];
+  for (const [t, used] of [...hostUsedByT.entries()].toSorted((a, b) => a[0] - b[0])) {
+    const total = hostTotalByT.get(t);
+    if (total !== undefined && total > 0) {
+      hostKvCacheUsage.push({ t: tOf(t), value: used / total });
+    }
+  }
+
+  // Per-source prompt tokens — sum across engines per source label.
+  //   vllm: vllm:prompt_tokens_by_source has one series per source label
+  //         (local_cache_hit, external_cache_hit, miss, ...). Use the
+  //         `source`/`reason`/`kind` label as the breakdown key.
+  //   sglang: sglang:realtime_tokens uses a `mode` label with values
+  //         {prefill_cache, prefill_compute, decode}. Filter to prefill_*
+  //         since decode isn't prompt-token volume.
+  const promptBySrcByT = new Map<string, Map<number, number>>();
+  for (const series of metrics['vllm:prompt_tokens_by_source']?.series ?? []) {
+    const labels = series.labels ?? {};
+    const source = labels['source'] ?? labels['reason'] ?? labels['kind'] ?? JSON.stringify(labels);
+    let byT = promptBySrcByT.get(source);
+    if (!byT) {
+      byT = new Map<number, number>();
+      promptBySrcByT.set(source, byT);
+    }
+    for (const ts of series.timeslices ?? []) {
+      if (typeof ts.rate === 'number' && typeof ts.start_ns === 'number') {
+        byT.set(ts.start_ns, (byT.get(ts.start_ns) ?? 0) + ts.rate);
+      }
+    }
+  }
+  // SGLang fallback: only consider when the vllm metric wasn't found.
+  //   - Cache misses (fresh prefill): `sglang:realtime_tokens[mode=prefill_compute]`
+  //   - Cache hits, split by tier: per-series `sglang:cached_tokens` where each
+  //     series carries a `cache_source` label ("device" = HBM, "host" = CPU
+  //     offload via hicache). Current runs have only `device`; when hicache
+  //     runs land, additional series will appear and the chart will split.
+  if (promptBySrcByT.size === 0) {
+    for (const series of metrics['sglang:realtime_tokens']?.series ?? []) {
+      const labels = series.labels ?? {};
+      const mode = labels['mode'] ?? 'unknown';
+      // Only carry the cache-miss line over — cache hits come from
+      // sglang:cached_tokens broken out by cache_source below, so we'd
+      // double-count if we kept `prefill_cache` here too.
+      if (mode !== 'prefill_compute') continue;
+      const label = 'compute (miss)';
+      let byT = promptBySrcByT.get(label);
+      if (!byT) {
+        byT = new Map<number, number>();
+        promptBySrcByT.set(label, byT);
+      }
+      for (const ts of series.timeslices ?? []) {
+        if (typeof ts.rate === 'number' && typeof ts.start_ns === 'number') {
+          byT.set(ts.start_ns, (byT.get(ts.start_ns) ?? 0) + ts.rate);
+        }
+      }
+    }
+    // Cache hits broken out per cache_source. Strip the noisy "total" label
+    // (older sglang versions emit a single un-broken-out series labelled
+    // total — show that as just "cache hit").
+    for (const series of metrics['sglang:cached_tokens']?.series ?? []) {
+      const labels = series.labels ?? {};
+      const src = labels['cache_source'] ?? 'cache hit';
+      const label =
+        src === 'device'
+          ? 'cache hit (HBM)'
+          : src === 'host'
+            ? 'cache hit (CPU offload)'
+            : src === 'total'
+              ? 'cache hit'
+              : `cache hit (${src})`;
+      let byT = promptBySrcByT.get(label);
+      if (!byT) {
+        byT = new Map<number, number>();
+        promptBySrcByT.set(label, byT);
+      }
+      for (const ts of series.timeslices ?? []) {
+        if (typeof ts.rate === 'number' && typeof ts.start_ns === 'number') {
+          byT.set(ts.start_ns, (byT.get(ts.start_ns) ?? 0) + ts.rate);
+        }
+      }
+    }
+  }
+  const promptTokensBySource: Record<string, TimeSeriesPoint[]> = {};
+  for (const [source, byT] of promptBySrcByT) {
+    const arr: TimeSeriesPoint[] = [];
+    for (const [t, v] of [...byT.entries()].toSorted((a, b) => a[0] - b[0])) {
+      if (v > 0) arr.push({ t: tOf(t), value: v });
+    }
+    if (arr.length > 0) promptTokensBySource[source] = arr;
+  }
+  return {
+    version: CHART_SERIES_VERSION,
+    startNs,
+    endNs,
+    durationS: endNs > startNs ? (endNs - startNs) / 1e9 : 0,
+    timeslicesCount,
+    kvCacheUsage,
+    prefixCacheHitRate,
+    queueDepth,
+    promptTokensBySource,
+    prefillTps,
+    decodeTps,
+    prefixCacheHitsTps,
+    hostKvCacheUsage,
+    kvCacheUsageByEngine,
+  };
+}
diff --git a/packages/db/src/etl/compute-request-timeline.test.ts b/packages/db/src/etl/compute-request-timeline.test.ts
new file mode 100644
index 00000000..64512aca
--- /dev/null
+++ b/packages/db/src/etl/compute-request-timeline.test.ts
@@ -0,0 +1,153 @@
+import { gzipSync } from 'node:zlib';
+
+import { describe, expect, it } from 'vitest';
+
+import { REQUEST_TIMELINE_VERSION, computeRequestTimeline } from './compute-request-timeline.js';
+
+interface SyntheticRequest {
+  cid: string;
+  ti: number;
+  wid?: string;
+  ad?: number;
+  phase?: string;
+  credit: number;
+  start: number;
+  end: number;
+  ack?: number | null;
+  ttftMs?: number | null;
+  isl?: number | null;
+  osl?: number | null;
+  cancelled?: boolean;
+}
+
+function makeBlob(requests: SyntheticRequest[]) {
+  const lines = requests.map((r) =>
+    JSON.stringify({
+      metadata: {
+        conversation_id: r.cid,
+        turn_index: r.ti,
+        worker_id: r.wid ?? 'worker_default',
+        agent_depth: r.ad ?? 0,
+        benchmark_phase: r.phase ?? 'profiling',
+        credit_issued_ns: r.credit,
+        request_start_ns: r.start,
+        ...(r.ack === undefined ? {} : { request_ack_ns: r.ack }),
+        request_end_ns: r.end,
+        was_cancelled: r.cancelled ?? false,
+      },
+      metrics: {
+        time_to_first_token: r.ttftMs === null ? null : { value: r.ttftMs ?? 50, unit: 'ms' },
+        input_sequence_length: { value: r.isl ?? 100, unit: 'tokens' },
+        output_sequence_length: { value: r.osl ?? 10, unit: 'tokens' },
+      },
+    }),
+  );
+  return gzipSync(Buffer.from(lines.join('\n')));
+}
+
+describe('computeRequestTimeline', () => {
+  it('returns null when the blob is null', () => {
+    expect(computeRequestTimeline(null)).toBeNull();
+  });
+
+  it('returns null on a malformed (non-gzip) blob', () => {
+    expect(computeRequestTimeline(Buffer.from('not-gzip'))).toBeNull();
+  });
+
+  it('returns null when the blob has no parseable records', () => {
+    expect(computeRequestTimeline(gzipSync(Buffer.from('\n\n')))).toBeNull();
+  });
+
+  it('returns the current REQUEST_TIMELINE_VERSION in the bundle', () => {
+    const tl = computeRequestTimeline(
+      makeBlob([{ cid: 'a', ti: 0, credit: 1000, start: 2000, end: 3000 }]),
+    );
+    expect(tl?.version).toBe(REQUEST_TIMELINE_VERSION);
+  });
+
+  it('shifts ns timestamps to be relative to the earliest credit_issued', () => {
+    // Two requests with absolute ns starting at 1_000_000_000.
+    const tl = computeRequestTimeline(
+      makeBlob([
+        { cid: 'a', ti: 0, credit: 1_000_000_000, start: 1_001_000_000, end: 1_010_000_000 },
+        { cid: 'a', ti: 1, credit: 1_020_000_000, start: 1_021_000_000, end: 1_030_000_000 },
+      ]),
+    );
+    expect(tl?.startNs).toBe(1_000_000_000);
+    expect(tl?.endNs).toBe(1_030_000_000);
+    expect(tl?.durationS).toBeCloseTo(0.03, 6);
+    expect(tl?.requests[0]?.credit).toBe(0);
+    expect(tl?.requests[0]?.end).toBe(10_000_000);
+    expect(tl?.requests[1]?.start).toBe(21_000_000);
+  });
+
+  it('sorts requests by start time, regardless of input order', () => {
+    const tl = computeRequestTimeline(
+      makeBlob([
+        { cid: 'a', ti: 0, credit: 30, start: 50, end: 60 },
+        { cid: 'a', ti: 1, credit: 0, start: 10, end: 20 },
+        { cid: 'a', ti: 2, credit: 80, start: 90, end: 100 },
+      ]),
+    );
+    expect(tl?.requests.map((r) => r.start)).toEqual([10, 50, 90]);
+  });
+
+  it('preserves conversation/worker grouping fields', () => {
+    const tl = computeRequestTimeline(
+      makeBlob([
+        {
+          cid: 'conv-A',
+          ti: 5,
+          wid: 'worker_abcd1234',
+          ad: 2,
+          phase: 'profiling',
+          credit: 0,
+          start: 10,
+          end: 100,
+        },
+      ]),
+    );
+    const r = tl?.requests[0]!;
+    expect(r.cid).toBe('conv-A');
+    expect(r.ti).toBe(5);
+    expect(r.wid).toBe('worker_abcd1234');
+    expect(r.ad).toBe(2);
+    expect(r.phase).toBe('profiling');
+  });
+
+  it('preserves the cancelled flag and TTFT/ISL/OSL metrics', () => {
+    const tl = computeRequestTimeline(
+      makeBlob([
+        {
+          cid: 'a',
+          ti: 0,
+          credit: 0,
+          start: 10,
+          end: 100,
+          ttftMs: 25.5,
+          isl: 1024,
+          osl: 256,
+          cancelled: true,
+        },
+      ]),
+    );
+    const r = tl?.requests[0]!;
+    expect(r.cancelled).toBe(true);
+    expect(r.ttftMs).toBeCloseTo(25.5, 6);
+    expect(r.isl).toBe(1024);
+    expect(r.osl).toBe(256);
+  });
+
+  it('skips records missing both credit_issued_ns and request_start_ns', () => {
+    // Build a record with only request_end_ns — the helper rejects it.
+    const broken = gzipSync(
+      Buffer.from(
+        JSON.stringify({
+          metadata: { conversation_id: 'a', turn_index: 0, request_end_ns: 1234 },
+          metrics: {},
+        }),
+      ),
+    );
+    expect(computeRequestTimeline(broken)).toBeNull();
+  });
+});
diff --git a/packages/db/src/etl/compute-request-timeline.ts b/packages/db/src/etl/compute-request-timeline.ts
new file mode 100644
index 00000000..a1134f7a
--- /dev/null
+++ b/packages/db/src/etl/compute-request-timeline.ts
@@ -0,0 +1,182 @@
+/**
+ * Pre-compute the per-request timeline for the agentic detail page's
+ * Gantt view. Output lands in `agentic_trace_replay.request_timeline`
+ * and is read directly by the timeline API route.
+ *
+ * Shape is a thin array — ~150 bytes per request × ~200 requests per
+ * point ≈ 30 KB per row before JSONB compression. Trivial vs the raw
+ * gzipped JSONL blob (~1-3 MB).
+ *
+ * Versioned so the backfill script knows which rows are stale — bump
+ * `REQUEST_TIMELINE_VERSION` whenever the extraction algorithm changes.
+ */
+
+import { gunzipSync } from 'node:zlib';
+
+/** Bump when the extraction algorithm changes — backfill recomputes anything older. */
+export const REQUEST_TIMELINE_VERSION = 1;
+
+export interface RequestRecord {
+  /** Conversation id (groups turns of one agent session). */
+  cid: string;
+  /** Zero-based turn index within the conversation. */
+  ti: number;
+  /** Worker id (concurrency slot that handled this request). */
+  wid: string;
+  /** Sub-agent depth (0 = top-level). */
+  ad: number;
+  /** `warmup` or `profiling`. */
+  phase: string;
+  /** ns offset from timeline.startNs. Load gen decided to dispatch. */
+  credit: number;
+  /** ns offset from timeline.startNs. HTTP send started. */
+  start: number;
+  /** ns offset from timeline.startNs. First server acknowledgement (or null). */
+  ack: number | null;
+  /** ns offset from timeline.startNs. Last byte received. */
+  end: number;
+  /** Time-to-first-token in ms. */
+  ttftMs: number | null;
+  /** Input sequence length (tokens). */
+  isl: number | null;
+  /** Output sequence length (tokens). */
+  osl: number | null;
+  cancelled: boolean;
+}
+
+export interface RequestTimeline {
+  version: number;
+  /** Wall-clock ns of the earliest event (used as the relative-time origin). */
+  startNs: number;
+  /** Wall-clock ns of the latest `request_end_ns`. */
+  endNs: number;
+  /** Total span in seconds. */
+  durationS: number;
+  requests: RequestRecord[];
+}
+
+interface RawMetadata {
+  conversation_id?: string;
+  turn_index?: number;
+  worker_id?: string;
+  agent_depth?: number;
+  benchmark_phase?: string;
+  credit_issued_ns?: number;
+  request_start_ns?: number;
+  request_ack_ns?: number;
+  request_end_ns?: number;
+  was_cancelled?: boolean;
+}
+
+interface RawMetricValue {
+  value?: number;
+}
+
+interface RawRecord {
+  metadata?: RawMetadata;
+  metrics?: {
+    time_to_first_token?: RawMetricValue | number;
+    input_sequence_length?: RawMetricValue | number;
+    output_sequence_length?: RawMetricValue | number;
+  };
+}
+
+/** Pull a numeric metric out of the `{value, unit}` envelope (or a bare number). */
+function readNum(v: unknown): number | undefined {
+  if (typeof v === 'number') return Number.isFinite(v) ? v : undefined;
+  if (v && typeof v === 'object' && 'value' in v) {
+    const inner = (v as { value?: unknown }).value;
+    if (typeof inner === 'number' && Number.isFinite(inner)) return inner;
+  }
+  return undefined;
+}
+
+/**
+ * Parse the gzipped `profile_export.jsonl` blob into a chart-ready
+ * timeline. Returns null on a missing or malformed blob.
+ */
+export function computeRequestTimeline(blob: Buffer | null): RequestTimeline | null {
+  if (!blob) return null;
+  let text: string;
+  try {
+    text = gunzipSync(blob).toString('utf8');
+  } catch {
+    return null;
+  }
+
+  // First pass: parse + collect raw turns; find timeline origin.
+  const raw: {
+    meta: RawMetadata;
+    ttftMs: number | null;
+    isl: number | null;
+    osl: number | null;
+  }[] = [];
+  let originNs = Number.POSITIVE_INFINITY;
+  let endNs = 0;
+
+  for (const line of text.split('\n')) {
+    if (!line) continue;
+    let rec: RawRecord;
+    try {
+      rec = JSON.parse(line) as RawRecord;
+    } catch {
+      continue;
+    }
+    const meta = rec.metadata ?? {};
+    // Use credit_issued_ns when available (the true start of the request's
+    // lifecycle), falling back to request_start_ns. Skip rows missing both.
+    const cStart = meta.credit_issued_ns ?? meta.request_start_ns;
+    const cEnd = meta.request_end_ns;
+    if (typeof cStart !== 'number' || typeof cEnd !== 'number') continue;
+
+    if (cStart < originNs) originNs = cStart;
+    if (cEnd > endNs) endNs = cEnd;
+
+    raw.push({
+      meta,
+      ttftMs: readNum(rec.metrics?.time_to_first_token) ?? null,
+      isl: readNum(rec.metrics?.input_sequence_length) ?? null,
+      osl: readNum(rec.metrics?.output_sequence_length) ?? null,
+    });
+  }
+
+  if (raw.length === 0) return null;
+  if (!Number.isFinite(originNs)) originNs = 0;
+
+  // Second pass: shift timestamps to be relative to originNs (smaller
+  // numbers fit in JSON nicely and the frontend doesn't need bigint math).
+  const requests: RequestRecord[] = [];
+  for (const r of raw) {
+    const m = r.meta;
+    const credit = (m.credit_issued_ns ?? m.request_start_ns ?? originNs) - originNs;
+    const start = (m.request_start_ns ?? m.credit_issued_ns ?? originNs) - originNs;
+    const ack = typeof m.request_ack_ns === 'number' ? m.request_ack_ns - originNs : null;
+    const end = (m.request_end_ns ?? originNs) - originNs;
+    requests.push({
+      cid: m.conversation_id ?? 'unknown',
+      ti: typeof m.turn_index === 'number' ? m.turn_index : 0,
+      wid: m.worker_id ?? 'unknown',
+      ad: typeof m.agent_depth === 'number' ? m.agent_depth : 0,
+      phase: m.benchmark_phase ?? 'unknown',
+      credit,
+      start,
+      ack,
+      end,
+      ttftMs: r.ttftMs,
+      isl: r.isl,
+      osl: r.osl,
+      cancelled: m.was_cancelled === true,
+    });
+  }
+
+  // Stable order so backfill output is deterministic.
+  requests.sort((a, b) => a.start - b.start);
+
+  return {
+    version: REQUEST_TIMELINE_VERSION,
+    startNs: originNs,
+    endNs,
+    durationS: endNs > originNs ? (endNs - originNs) / 1e9 : 0,
+    requests,
+  };
+}
diff --git a/packages/db/src/etl/normalizers.ts b/packages/db/src/etl/normalizers.ts
index d42429c9..0e1166aa 100644
--- a/packages/db/src/etl/normalizers.ts
+++ b/packages/db/src/etl/normalizers.ts
@@ -22,6 +22,8 @@ export { GPU_KEYS };
  *   stripped base is not in `GPU_KEYS`.
  */
 export function hwToGpuKey(hw: string): string | null {
+  // Take the first segment before `-` as the canonical key. Subsumes all the
+  // prior explicit suffix strips (-nv, -amds, -dgxc-slurm, -p1, -cw, …).
   const base = hw.toLowerCase().split('-')[0];
   return GPU_KEYS.has(base) ? base : null;
 }
diff --git a/packages/db/src/etl/skip-tracker.test.ts b/packages/db/src/etl/skip-tracker.test.ts
index 90ad73b7..e407db3a 100644
--- a/packages/db/src/etl/skip-tracker.test.ts
+++ b/packages/db/src/etl/skip-tracker.test.ts
@@ -9,6 +9,7 @@ describe('createSkipTracker', () => {
     expect(tracker.skips.unmappedHw).toBe(0);
     expect(tracker.skips.noIslOsl).toBe(0);
     expect(tracker.skips.dbError).toBe(0);
+    expect(tracker.skips.traceReplayMissing).toBe(0);
   });
 
   it('initializes with empty unmapped sets', () => {
diff --git a/packages/db/src/etl/skip-tracker.ts b/packages/db/src/etl/skip-tracker.ts
index 6166ea44..401d197c 100644
--- a/packages/db/src/etl/skip-tracker.ts
+++ b/packages/db/src/etl/skip-tracker.ts
@@ -8,7 +8,10 @@ export interface Skips {
   unmappedModel: number;
   unmappedHw: number;
   noIslOsl: number;
+  failedRun: number;
   dbError: number;
+  /** Agentic point whose sibling `agentic_<suffix>` artifact had no trace_replay files. */
+  traceReplayMissing: number;
 }
 
 export interface SkipSnapshot {
@@ -66,7 +69,15 @@ const MAX_DB_ERRORS = 10;
  * @returns A `SkipTracker` with zeroed counters and empty unmapped-name sets.
  */
 export function createSkipTracker(): SkipTracker {
-  const skips: Skips = { badZip: 0, unmappedModel: 0, unmappedHw: 0, noIslOsl: 0, dbError: 0 };
+  const skips: Skips = {
+    badZip: 0,
+    unmappedModel: 0,
+    unmappedHw: 0,
+    noIslOsl: 0,
+    failedRun: 0,
+    dbError: 0,
+    traceReplayMissing: 0,
+  };
   const unmappedModels = new Set<string>();
   const unmappedHws = new Set<string>();
   const unmappedPrecisions = new Set<string>();
diff --git a/packages/db/src/etl/trace-replay-ingest.ts b/packages/db/src/etl/trace-replay-ingest.ts
new file mode 100644
index 00000000..cb022ca9
--- /dev/null
+++ b/packages/db/src/etl/trace-replay-ingest.ts
@@ -0,0 +1,147 @@
+/**
+ * Insert per-point aiperf trace files (`profile_export.jsonl` +
+ * `server_metrics_export.csv`) into `agentic_trace_replay` and link the new row
+ * to each provided benchmark_results row via `trace_replay_id`.
+ *
+ * Mirrors the {@link insertServerLog} idempotency contract: rows that already
+ * have a non-null `trace_replay_id` are left alone so a re-ingest doesn't
+ * duplicate the sibling blob.
+ */
+
+import { gzipSync } from 'node:zlib';
+
+import type postgres from 'postgres';
+
+import { computeAggregateStats } from './compute-aggregate-stats.js';
+import { computeChartSeries } from './compute-chart-series.js';
+import { computeRequestTimeline } from './compute-request-timeline.js';
+
+type Sql = ReturnType<typeof postgres>;
+
+/**
+ * Persist the per-point trace files and link them to `benchmarkResultIds`.
+ *
+ * @param sql                 Active `postgres` connection.
+ * @param benchmarkResultIds  DB ids of the benchmark_results rows produced by
+ *                            the same `bmk_agentic_<suffix>` artifact whose
+ *                            sibling `agentic_<suffix>` directory holds these
+ *                            trace files.
+ * @param profileExportJsonl  Raw bytes of `profile_export.jsonl`, or null.
+ *                            Gzipped before storage.
+ * @param serverMetricsCsv    Raw bytes of `server_metrics_export.csv`, or null.
+ *                            Stored as-is.
+ * @param serverMetricsJson   Raw bytes of `server_metrics_export.json` —
+ *                            per-scrape time-series of every Prometheus metric.
+ *                            Optional, gzipped before storage (~42x ratio).
+ */
+export async function insertTraceReplay(
+  sql: Sql,
+  benchmarkResultIds: number[],
+  profileExportJsonl: Buffer | null,
+  serverMetricsCsv: Buffer | null,
+  serverMetricsJson: Buffer | null = null,
+): Promise<void> {
+  if (benchmarkResultIds.length === 0) return;
+  if (!profileExportJsonl && !serverMetricsCsv && !serverMetricsJson) return;
+
+  // Only link rows that don't already point at a trace_replay row — keeps
+  // re-ingest from inserting duplicate sibling blobs.
+  const unlinked = await sql<{ id: number }[]>`
+    select id from benchmark_results
+    where id = any(${sql.array(benchmarkResultIds)}::bigint[])
+      and trace_replay_id is null
+  `;
+  if (unlinked.length === 0) return;
+
+  const profileGz = profileExportJsonl ? gzipSync(profileExportJsonl) : null;
+  const profileSize = profileExportJsonl ? profileExportJsonl.length : null;
+  const csvSize = serverMetricsCsv ? serverMetricsCsv.length : null;
+  const metricsJsonGz = serverMetricsJson ? gzipSync(serverMetricsJson) : null;
+  const metricsJsonSize = serverMetricsJson ? serverMetricsJson.length : null;
+
+  // Pre-compute aggregate stats + chart-ready time-series + per-request
+  // timeline so the detail page doesn't have to re-parse these blobs on
+  // every request. Each helper tolerates a null blob and falls back to
+  // a streaming parser for oversized server_metrics blobs.
+  const [aggregateStats, chartSeries, requestTimeline] = await Promise.all([
+    computeAggregateStats({ profileBlob: profileGz, serverBlob: metricsJsonGz }),
+    computeChartSeries(metricsJsonGz),
+    Promise.resolve(computeRequestTimeline(profileGz)),
+  ]);
+
+  const [{ id: traceReplayId }] = await sql<{ id: number }[]>`
+    insert into agentic_trace_replay (
+      profile_export_jsonl_gz,
+      profile_export_uncompressed_size,
+      server_metrics_csv,
+      server_metrics_csv_size,
+      server_metrics_json_gz,
+      server_metrics_json_uncompressed_size,
+      aggregate_stats,
+      chart_series,
+      request_timeline
+    )
+    values (
+      ${profileGz},
+      ${profileSize},
+      ${serverMetricsCsv},
+      ${csvSize},
+      ${metricsJsonGz},
+      ${metricsJsonSize},
+      ${sql.json(structuredClone(aggregateStats) as unknown as Parameters<typeof sql.json>[0])},
+      ${chartSeries === null ? null : sql.json(structuredClone(chartSeries) as unknown as Parameters<typeof sql.json>[0])},
+      ${requestTimeline === null ? null : sql.json(structuredClone(requestTimeline) as unknown as Parameters<typeof sql.json>[0])}
+    )
+    returning id
+  `;
+
+  await sql`
+    update benchmark_results
+    set trace_replay_id = ${traceReplayId}
+    where id = any(${sql.array(unlinked.map((r) => r.id))}::bigint[])
+  `;
+
+  // Derive lifetime GPU + CPU cache hit rates from chart_series. SGLang
+  // runs don't populate these in the harness JSON; vLLM runs do but only
+  // for GPU. We always recompute to keep the derivation consistent with
+  // what the detail-page charts plot — overwriting any pre-existing value.
+  //
+  // Source label naming differs by framework / cache topology:
+  //   SGLang hicache: 'cache hit (HBM)' + 'cache hit (CPU offload)'
+  //   SGLang older:   'cache hit'      (no tier breakdown)
+  //   vLLM LMCache:   'local_cache_hit' + 'external_kv_transfer'  (+ 'local_compute' for miss)
+  //   vLLM single:    falls back to prefixCacheHitsTps total (= local cache only)
+  if (chartSeries && chartSeries.prefillTps.length > 0) {
+    const sumPrompts = chartSeries.prefillTps.reduce((s, p) => s + p.value, 0);
+    if (sumPrompts > 0) {
+      const sumOf = (name: string): number =>
+        (chartSeries.promptTokensBySource[name] ?? []).reduce((s, p) => s + p.value, 0);
+      // CPU-offload hits: SGLang hicache + vLLM LMCache external transfer.
+      const cpuHits = sumOf('cache hit (CPU offload)') + sumOf('external_kv_transfer');
+      // GPU/HBM hits from source breakdown, summed across known aliases.
+      const hbmFromBreakdown =
+        sumOf('cache hit (HBM)') + sumOf('cache hit') + sumOf('local_cache_hit');
+      // If the source breakdown has any GPU entry, use it. Otherwise fall back
+      // to total prefixCacheHitsTps sum (single-source vLLM path with no
+      // by_source metric — equals the lone cache counter's lifetime).
+      const gpuHits =
+        hbmFromBreakdown > 0
+          ? hbmFromBreakdown
+          : chartSeries.prefixCacheHitsTps.reduce((s, p) => s + p.value, 0);
+      const gpuRate = gpuHits / sumPrompts;
+      const cpuRate = cpuHits > 0 ? cpuHits / sumPrompts : null;
+      await sql`
+        update benchmark_results
+        set metrics = jsonb_set(
+          case when ${cpuRate}::numeric is not null
+            then jsonb_set(metrics, '{server_cpu_cache_hit_rate}', to_jsonb(${cpuRate}::numeric))
+            else metrics
+          end,
+          '{server_gpu_cache_hit_rate}',
+          to_jsonb(${gpuRate}::numeric)
+        )
+        where id = any(${sql.array(unlinked.map((r) => r.id))}::bigint[])
+      `;
+    }
+  }
+}
diff --git a/packages/db/src/ingest-ci-run.ts b/packages/db/src/ingest-ci-run.ts
index c345e662..eeb55313 100644
--- a/packages/db/src/ingest-ci-run.ts
+++ b/packages/db/src/ingest-ci-run.ts
@@ -45,6 +45,7 @@ import {
   bulkUpsertAvailability,
   insertServerLog,
 } from './etl/benchmark-ingest';
+import { insertTraceReplay } from './etl/trace-replay-ingest';
 import { mapAggEvalRow, mapEvalRow } from './etl/eval-mapper';
 import { ingestEvalRow } from './etl/eval-ingest';
 import { mapEvalSamples } from './etl/eval-samples-mapper';
@@ -109,15 +110,30 @@ if (isDownloadMode) {
     } catch {}
   }
 
-  const byName = new Map<string, (typeof allArtifacts)[0]>();
+  // Strip the trailing `_<runner-pool>_<attempt-digits>` token from each
+  // artifact name, then group by the resulting logical name and keep only
+  // the most recent per group. Without this, two artifacts produced on
+  // different runners for the same logical config (e.g. `…_h200-cw_00` and
+  // `…_h200-dgxc-slurm_1`) both land in the DB and the failed one's empty
+  // metrics can overwrite the good one via ON CONFLICT DO UPDATE.
+  //
+  // The runner pool name itself has no underscores (`h200-cw`,
+  // `h200-dgxc-slurm`, `b200-nb`), so `[a-zA-Z0-9.-]*` keeps the strip
+  // bounded — using `\w` here would over-match across earlier `_`
+  // separators and collapse different (conc, offload) variants into the
+  // same logical name.
+  const RUNNER_SUFFIX_RE = /_[a-zA-Z][a-zA-Z0-9.-]*_\d+$/;
+  const byLogical = new Map<string, (typeof allArtifacts)[0]>();
   for (const a of allArtifacts) {
-    const existing = byName.get(a.name);
+    const key = a.name.replace(RUNNER_SUFFIX_RE, '');
+    const existing = byLogical.get(key);
     if (!existing || a.created_at > existing.created_at) {
-      byName.set(a.name, a);
+      byLogical.set(key, a);
     }
   }
 
-  for (const [name, artifact] of byName) {
+  for (const [, artifact] of byLogical) {
+    const name = artifact.name;
     console.log(`  ${name}`);
     const zipPath = path.join(artifactsDir, 'artifact.zip');
     execSync(`gh api "${artifact.archive_download_url}" > "${zipPath}"`, {
@@ -129,7 +145,7 @@ if (isDownloadMode) {
     fs.unlinkSync(zipPath);
   }
 
-  console.log(`\n  Downloaded ${byName.size} artifact(s)`);
+  console.log(`\n  Downloaded ${byLogical.size} artifact(s)`);
 
   // Fetch run attempt from API
   const attemptStr = execSync(
@@ -194,6 +210,14 @@ const ARTIFACT_NAMES = {
   changelog: 'changelog-metadata',
 } as const;
 
+/**
+ * Strip the `bmk_` and/or `agentic_` prefixes from an artifact directory name
+ * so the bare suffix becomes a shared key between `bmk_agentic_<suffix>` and
+ * its sibling `agentic_<suffix>` artifact.
+ */
+const stripBmkAndAgenticPrefix = (s: string): string =>
+  s.replace(/^bmk_/u, '').replace(/^agentic_/u, '');
+
 function readJson(filePath: string): unknown {
   try {
     return JSON.parse(fs.readFileSync(filePath, 'utf8'));
@@ -294,13 +318,14 @@ async function main(): Promise<void> {
 
   const availRows: {
     model: string;
-    isl: number;
-    osl: number;
+    isl: number | null;
+    osl: number | null;
     precision: string;
     hardware: string;
     framework: string;
     specMethod: string;
     disagg: boolean;
+    benchmarkType: string;
   }[] = [];
 
   let totalNewBmk = 0,
@@ -311,6 +336,7 @@ async function main(): Promise<void> {
   let totalSamples = 0;
   let totalSampleFiles = 0;
   let totalChangelogs = 0;
+  let totalTraceReplayLinked = 0;
 
   // ── Check for evals-only flag in changelog ────────────────────────────
   const changelogDir = path.join(artifactsDir, ARTIFACT_NAMES.changelog);
@@ -365,6 +391,56 @@ async function main(): Promise<void> {
       console.log(`  Found ${serverLogPaths.size} server log artifact(s)`);
     }
 
+    // Sibling aiperf artifacts: each `bmk_agentic_<suffix>` is paired with an
+    // `agentic_<suffix>` dir holding `profile_export.jsonl` and
+    // `server_metrics_export.csv`. The harness emits these under either a
+    // `trace_replay/` subdir (older layout) or `aiperf_artifacts/` (current).
+    // Older non-aiperf agentic runs don't ship this sibling. Key on the bare
+    // suffix so both names map to the same Map entry.
+    const TRACE_SUBDIRS = ['aiperf_artifacts', 'trace_replay'];
+    const traceReplayPaths = new Map<
+      string,
+      {
+        profileJsonl: string | null;
+        serverMetricsCsv: string | null;
+        serverMetricsJson: string | null;
+      }
+    >();
+    if (fs.existsSync(artifactsDir)) {
+      for (const d of fs.readdirSync(artifactsDir)) {
+        if (!d.startsWith('agentic_')) continue;
+        let profile: string | null = null;
+        let metrics: string | null = null;
+        let metricsJson: string | null = null;
+        for (const sub of TRACE_SUBDIRS) {
+          const dir = path.join(artifactsDir, d, sub);
+          if (!fs.existsSync(dir) || !fs.statSync(dir).isDirectory()) continue;
+          if (!profile) {
+            const p = path.join(dir, 'profile_export.jsonl');
+            if (fs.existsSync(p)) profile = p;
+          }
+          if (!metrics) {
+            const m = path.join(dir, 'server_metrics_export.csv');
+            if (fs.existsSync(m)) metrics = m;
+          }
+          if (!metricsJson) {
+            const j = path.join(dir, 'server_metrics_export.json');
+            if (fs.existsSync(j)) metricsJson = j;
+          }
+        }
+        if (!profile && !metrics && !metricsJson) continue;
+        const suffix = stripBmkAndAgenticPrefix(d);
+        traceReplayPaths.set(suffix, {
+          profileJsonl: profile,
+          serverMetricsCsv: metrics,
+          serverMetricsJson: metricsJson,
+        });
+      }
+    }
+    if (traceReplayPaths.size > 0) {
+      console.log(`  Found ${traceReplayPaths.size} trace_replay sibling artifact(s)`);
+    }
+
     const allBmkFiles = [...bmkFiles, ...allBmkDirs.flatMap((d) => findJsonFiles(d))];
     console.log(`  Found ${allBmkFiles.length} benchmark JSON file(s)`);
 
@@ -415,6 +491,7 @@ async function main(): Promise<void> {
               framework: r.config.framework,
               specMethod: r.config.specMethod,
               disagg: r.config.disagg,
+              benchmarkType: r.benchmarkType,
             });
           }
 
@@ -431,12 +508,42 @@ async function main(): Promise<void> {
               }
             }
           }
+
+          // Trace-replay sibling lookup for agentic points only. The aiperf
+          // harness emits `agentic_<suffix>/trace_replay/...` next to the
+          // `bmk_agentic_<suffix>` artifact we just ingested.
+          if (parentDir.startsWith('bmk_agentic_') && insertedIds.length > 0) {
+            const suffix = stripBmkAndAgenticPrefix(parentDir);
+            const trace = traceReplayPaths.get(suffix);
+            if (trace) {
+              try {
+                const profile = trace.profileJsonl ? fs.readFileSync(trace.profileJsonl) : null;
+                const metrics = trace.serverMetricsCsv
+                  ? fs.readFileSync(trace.serverMetricsCsv)
+                  : null;
+                const metricsJson = trace.serverMetricsJson
+                  ? fs.readFileSync(trace.serverMetricsJson)
+                  : null;
+                await insertTraceReplay(sql, insertedIds, profile, metrics, metricsJson);
+                totalTraceReplayLinked += insertedIds.length;
+              } catch (error: any) {
+                tracker.recordDbError(`trace_replay for ${suffix}`, error);
+              }
+            } else {
+              tracker.skips.traceReplayMissing++;
+            }
+          }
         } catch (error: any) {
           tracker.recordDbError(path.basename(file), error);
         }
       }
     }
     console.log(`  Benchmarks: +${totalNewBmk} new, ${totalDupBmk} dup`);
+    if (totalTraceReplayLinked > 0 || tracker.skips.traceReplayMissing > 0) {
+      console.log(
+        `  Trace replay: ${totalTraceReplayLinked} rows linked, ${tracker.skips.traceReplayMissing} agentic point(s) missing sibling artifact`,
+      );
+    }
 
     if (availRows.length > 0) {
       try {
@@ -654,11 +761,17 @@ async function main(): Promise<void> {
 
   const { skips, unmappedModels, unmappedHws, unmappedPrecisions } = tracker;
   const totalSkips =
-    skips.badZip + skips.unmappedModel + skips.unmappedHw + skips.noIslOsl + skips.dbError;
+    skips.badZip +
+    skips.unmappedModel +
+    skips.unmappedHw +
+    skips.noIslOsl +
+    skips.failedRun +
+    skips.dbError;
   if (totalSkips > 0) {
     console.log(`\n  Skipped: ${totalSkips} rows`);
     const skipLines: [string, number][] = [
       ['no isl/osl (old format)', skips.noIslOsl],
+      ['failed run (0 successful)', skips.failedRun],
       ['unmapped model', skips.unmappedModel],
       ['unmapped hw', skips.unmappedHw],
       ['bad/empty zip', skips.badZip],
diff --git a/packages/db/src/ingest-gcs-backup.ts b/packages/db/src/ingest-gcs-backup.ts
index 9c17bfaf..b4a6fb95 100644
--- a/packages/db/src/ingest-gcs-backup.ts
+++ b/packages/db/src/ingest-gcs-backup.ts
@@ -457,6 +457,9 @@ async function mapWorkflowDir(
       unmappedModel: local.skips.unmappedModel,
       unmappedHw: local.skips.unmappedHw,
       noIslOsl: local.skips.noIslOsl,
+      failedRun: local.skips.failedRun,
+      // GCS backup doesn't ingest aiperf trace files; counter stays 0.
+      traceReplayMissing: local.skips.traceReplayMissing,
     },
     localUnmappedModels: new Set(local.unmappedModels),
     localUnmappedHws: new Set(local.unmappedHws),
@@ -621,13 +624,14 @@ async function main(): Promise<void> {
     // Upsert availability rows only for successfully resolved configs
     const availRows: {
       model: string;
-      isl: number;
-      osl: number;
+      isl: number | null;
+      osl: number | null;
       precision: string;
       hardware: string;
       framework: string;
       specMethod: string;
       disagg: boolean;
+      benchmarkType: string;
     }[] = [];
     for (const r of allInserted) {
       availRows.push({
@@ -639,6 +643,7 @@ async function main(): Promise<void> {
         framework: r.config.framework,
         specMethod: r.config.specMethod,
         disagg: r.config.disagg,
+        benchmarkType: r.benchmarkType,
       });
     }
     if (availRows.length > 0) {
diff --git a/packages/db/src/ingest-supplemental.ts b/packages/db/src/ingest-supplemental.ts
index a3b62fe0..f868767e 100644
--- a/packages/db/src/ingest-supplemental.ts
+++ b/packages/db/src/ingest-supplemental.ts
@@ -219,8 +219,10 @@ async function ingestSupplementalBmk(
 
     const rows: {
       configId: number;
-      isl: number;
-      osl: number;
+      benchmarkType: 'single_turn' | 'agentic_traces';
+      offloadMode: string;
+      isl: number | null;
+      osl: number | null;
       conc: number;
       image: string | null;
       metrics: Record<string, number>;
@@ -271,6 +273,8 @@ async function ingestSupplementalBmk(
 
       rows.push({
         configId,
+        benchmarkType: 'single_turn',
+        offloadMode: 'off',
         isl: entry.isl,
         osl: entry.osl,
         conc: entry.conc,
@@ -294,13 +298,14 @@ async function ingestSupplementalBmk(
     // to `rows` are exactly the valid ones.
     const availRows: {
       model: string;
-      isl: number;
-      osl: number;
+      isl: number | null;
+      osl: number | null;
       precision: string;
       hardware: string;
       framework: string;
       specMethod: string;
       disagg: boolean;
+      benchmarkType: string;
     }[] = [];
     for (const entry of entries) {
       const modelKey = resolveModelKey({ model: entry.model, infmax_model_prefix: undefined });
@@ -317,6 +322,7 @@ async function ingestSupplementalBmk(
         framework,
         specMethod,
         disagg,
+        benchmarkType: 'single_turn',
       });
     }
     if (availRows.length > 0) {
diff --git a/packages/db/src/json-provider.ts b/packages/db/src/json-provider.ts
index 25525e04..785d82c4 100644
--- a/packages/db/src/json-provider.ts
+++ b/packages/db/src/json-provider.ts
@@ -273,6 +273,7 @@ function toBenchmarkRow(
   metrics?: Record<string, number>,
 ): BenchmarkRow {
   return {
+    id: br.id,
     hardware: c.hardware,
     framework: c.framework,
     model: c.model,
@@ -290,6 +291,8 @@ function toBenchmarkRow(
     decode_num_workers: c.decode_num_workers,
     num_prefill_gpu: c.num_prefill_gpu,
     num_decode_gpu: c.num_decode_gpu,
+    benchmark_type: br.benchmark_type ?? 'single_turn',
+    offload_mode: (br as { offload_mode?: string }).offload_mode ?? 'off',
     isl: br.isl,
     osl: br.osl,
     conc: br.conc,
@@ -410,7 +413,11 @@ export function getAvailabilityData(): AvailabilityRow[] {
   for (const a of s.availability) {
     const key = `${a.model}|${a.hardware}|${a.framework}|${a.precision}|${a.isl}|${a.osl}|${toDateString(a.date)}`;
     if (validKeys.has(key)) {
-      rows.push({ ...a, date: toDateString(a.date) });
+      rows.push({
+        ...a,
+        benchmark_type: (a as { benchmark_type?: string }).benchmark_type ?? 'single_turn',
+        date: toDateString(a.date),
+      });
     }
   }
 
diff --git a/packages/db/src/queries/agentic-aggregates.test.ts b/packages/db/src/queries/agentic-aggregates.test.ts
new file mode 100644
index 00000000..8c712323
--- /dev/null
+++ b/packages/db/src/queries/agentic-aggregates.test.ts
@@ -0,0 +1,113 @@
+import { describe, expect, it } from 'vitest';
+
+import { extractIslOsl, extractServerMetricSamples, percentilesOf } from './agentic-aggregates.js';
+
+describe('percentilesOf', () => {
+  it('returns null for empty input', () => {
+    expect(percentilesOf([])).toBeNull();
+    expect(percentilesOf([Number.NaN, Number.POSITIVE_INFINITY])).toBeNull();
+  });
+
+  it('computes percentiles for a simple integer range', () => {
+    // 1..100, evenly spaced — linear quantile is straightforward.
+    const xs = Array.from({ length: 100 }, (_, i) => i + 1);
+    const p = percentilesOf(xs);
+    expect(p).not.toBeNull();
+    expect(p!.n).toBe(100);
+    expect(p!.mean).toBeCloseTo(50.5, 6);
+    expect(p!.p50).toBeCloseTo(50.5, 6);
+    // For 100 sorted values, p75 = sorted[0.75 * 99] = sorted[74.25] interp.
+    expect(p!.p75).toBeCloseTo(75.25, 6);
+    expect(p!.p90).toBeCloseTo(90.1, 6);
+    expect(p!.p99).toBeCloseTo(99.01, 6);
+  });
+
+  it('filters out non-finite values before computing', () => {
+    const p = percentilesOf([1, 2, Number.NaN, 3, Number.POSITIVE_INFINITY, 4]);
+    expect(p?.n).toBe(4);
+    expect(p?.mean).toBeCloseTo(2.5, 6);
+  });
+});
+
+describe('extractIslOsl', () => {
+  it('reads input/output sequence length from profiling records', () => {
+    const lines = [
+      JSON.stringify({
+        metadata: { benchmark_phase: 'profiling' },
+        metrics: {
+          input_sequence_length: { value: 100, unit: 'tokens' },
+          output_sequence_length: { value: 50, unit: 'tokens' },
+        },
+      }),
+      JSON.stringify({
+        metadata: { benchmark_phase: 'profiling' },
+        metrics: {
+          input_sequence_length: { value: 200, unit: 'tokens' },
+          output_sequence_length: { value: 75, unit: 'tokens' },
+        },
+      }),
+      // warmup record — should be ignored
+      JSON.stringify({
+        metadata: { benchmark_phase: 'warmup' },
+        metrics: {
+          input_sequence_length: { value: 9999, unit: 'tokens' },
+          output_sequence_length: { value: 9999, unit: 'tokens' },
+        },
+      }),
+    ];
+    const { isl, osl } = extractIslOsl(lines.join('\n'));
+    expect(isl).toEqual([100, 200]);
+    expect(osl).toEqual([50, 75]);
+  });
+});
+
+describe('extractServerMetricSamples', () => {
+  it('extracts KV cache util gauge and computes per-interval prefix hit rate', () => {
+    const json = JSON.stringify({
+      metrics: {
+        'vllm:kv_cache_usage_perc': {
+          series: [
+            {
+              timeslices: [
+                { start_ns: 0, end_ns: 1, avg: 0.1 },
+                { start_ns: 1, end_ns: 2, avg: 0.5 },
+                { start_ns: 2, end_ns: 3, avg: 0.9 },
+              ],
+            },
+          ],
+        },
+        'vllm:prefix_cache_hits': {
+          series: [
+            {
+              timeslices: [
+                { start_ns: 0, rate: 80 },
+                { start_ns: 1, rate: 50 },
+                { start_ns: 2, rate: 0 }, // skipped because matching queries.rate is 0
+              ],
+            },
+          ],
+        },
+        'vllm:prefix_cache_queries': {
+          series: [
+            {
+              timeslices: [
+                { start_ns: 0, rate: 100 }, // hit rate = 0.8
+                { start_ns: 1, rate: 100 }, // hit rate = 0.5
+                { start_ns: 2, rate: 0 },
+              ],
+            },
+          ],
+        },
+      },
+    });
+    const { kvCacheUtil, prefixCacheHitRate } = extractServerMetricSamples(json);
+    expect(kvCacheUtil).toEqual([0.1, 0.5, 0.9]);
+    expect(prefixCacheHitRate).toEqual([0.8, 0.5]);
+  });
+
+  it('returns empty arrays when the JSON lacks the expected metric series', () => {
+    const out = extractServerMetricSamples(JSON.stringify({ metrics: {} }));
+    expect(out.kvCacheUtil).toEqual([]);
+    expect(out.prefixCacheHitRate).toEqual([]);
+  });
+});
diff --git a/packages/db/src/queries/agentic-aggregates.ts b/packages/db/src/queries/agentic-aggregates.ts
new file mode 100644
index 00000000..da5d18a0
--- /dev/null
+++ b/packages/db/src/queries/agentic-aggregates.ts
@@ -0,0 +1,455 @@
+/**
+ * Per-id aggregate stats for the "Aggregates across configs" view on the
+ * agentic detail page. Each id contributes one summary number per metric per
+ * percentile so the frontend can plot how each metric varies across the
+ * SKU's parallelism + concurrency configs.
+ *
+ * Sources:
+ *  - `profile_export.jsonl` → ISL / OSL per request (filtered to profiling phase)
+ *  - `server_metrics_json` → time-series of KV cache utilization +
+ *     prefix-cache hit rate per scrape interval
+ *
+ * Returns mean/p50/p75/p90/p99 per metric. Nulls when the blob is missing
+ * or has no usable samples — frontend treats those as "no data".
+ */
+
+import { Readable } from 'node:stream';
+import { createGunzip, gunzipSync } from 'node:zlib';
+
+import { chain } from 'stream-chain';
+
+import { parser } from 'stream-json';
+import { pick } from 'stream-json/filters/pick.js';
+import { streamObject } from 'stream-json/streamers/stream-object.js';
+
+import type { DbClient } from '../connection.js';
+
+/**
+ * Bump when the aggregate-stats computation algorithm changes — the backfill
+ * script recomputes any row whose stored `aggregate_stats.version` is older.
+ * Lives here (rather than in compute-aggregate-stats.ts) to avoid a circular
+ * import: the compute helper depends on the percentile utilities below.
+ *
+ * v2: aggregate vllm gauges/counters across all engine series (was reading
+ * only series[0], which under-counted by Nx on multi-engine DP/PP deployments).
+ *
+ * v3: extract sglang:* metrics too — kv_cache_util + prefix_cache_hit_rate
+ * populate for SGLang runs (qwen3.5/h100, mi355x sglang, etc.) the same way
+ * they do for vllm runs.
+ */
+export const STATS_VERSION = 3;
+
+export interface MetricPercentiles {
+  mean: number;
+  p50: number;
+  p75: number;
+  p90: number;
+  p99: number;
+  /** Sample count used to compute the percentiles. */
+  n: number;
+}
+
+export interface AgenticAggregate {
+  id: number;
+  isl: MetricPercentiles | null;
+  osl: MetricPercentiles | null;
+  kvCacheUtil: MetricPercentiles | null;
+  prefixCacheHitRate: MetricPercentiles | null;
+}
+
+export type AgenticAggregateMap = Record<number, AgenticAggregate>;
+
+/**
+ * `profile_export_jsonl_gz` is small (~1-3 MB) so we can batch many per
+ * round-trip. `server_metrics_json_gz` is much bigger (~17 MB compressed
+ * for high-conc TP+EP runs; Neon encodes bytea over HTTP at ~1.6× wire
+ * size, so two of those = ~50 MB and three already trips the 64 MB cap).
+ * We fetch the two blob types in separate queries with different chunk
+ * sizes.
+ */
+const PROFILE_CHUNK_SIZE = 8;
+const SERVER_CHUNK_SIZE = 1;
+
+/** Linear-interpolated percentile (matches numpy default). */
+function quantile(sortedAsc: number[], q: number): number {
+  if (sortedAsc.length === 0) return Number.NaN;
+  if (sortedAsc.length === 1) return sortedAsc[0]!;
+  const pos = (sortedAsc.length - 1) * q;
+  const lo = Math.floor(pos);
+  const hi = Math.ceil(pos);
+  if (lo === hi) return sortedAsc[lo]!;
+  return sortedAsc[lo]! + (sortedAsc[hi]! - sortedAsc[lo]!) * (pos - lo);
+}
+
+function meanOf(xs: number[]): number {
+  let s = 0;
+  for (const x of xs) s += x;
+  return s / xs.length;
+}
+
+/** Compute the percentile bundle for an array of samples; null if empty. */
+export function percentilesOf(samples: number[]): MetricPercentiles | null {
+  const clean = samples.filter((v) => Number.isFinite(v));
+  if (clean.length === 0) return null;
+  const sorted = [...clean].toSorted((a, b) => a - b);
+  return {
+    mean: meanOf(sorted),
+    p50: quantile(sorted, 0.5),
+    p75: quantile(sorted, 0.75),
+    p90: quantile(sorted, 0.9),
+    p99: quantile(sorted, 0.99),
+    n: sorted.length,
+  };
+}
+
+/** Pull a numeric metric out of the {value, unit} envelope (or a bare number). */
+function readNum(v: unknown): number | undefined {
+  if (typeof v === 'number') return v;
+  if (v && typeof v === 'object' && 'value' in v) {
+    const inner = (v as { value?: unknown }).value;
+    if (typeof inner === 'number' && Number.isFinite(inner)) return inner;
+  }
+  return undefined;
+}
+
+interface ProfileRecord {
+  metadata?: { benchmark_phase?: string };
+  metrics?: {
+    input_sequence_length?: { value?: number } | number;
+    output_sequence_length?: { value?: number } | number;
+  };
+}
+
+/** Parse the profile_export.jsonl → per-request ISL + OSL arrays. */
+export function extractIslOsl(jsonl: string): { isl: number[]; osl: number[] } {
+  const isl: number[] = [];
+  const osl: number[] = [];
+  for (const line of jsonl.split('\n')) {
+    if (!line) continue;
+    let rec: ProfileRecord;
+    try {
+      rec = JSON.parse(line) as ProfileRecord;
+    } catch {
+      continue;
+    }
+    if (rec.metadata?.benchmark_phase && rec.metadata.benchmark_phase !== 'profiling') continue;
+    const m = rec.metrics ?? {};
+    const i = readNum(m.input_sequence_length);
+    const o = readNum(m.output_sequence_length);
+    if (typeof i === 'number') isl.push(i);
+    if (typeof o === 'number') osl.push(o);
+  }
+  return { isl, osl };
+}
+
+interface TimeSlice {
+  start_ns?: number;
+  end_ns?: number;
+  avg?: number;
+  rate?: number;
+  count?: number;
+  sum?: number;
+}
+interface Series {
+  labels?: Record<string, string>;
+  timeslices?: TimeSlice[];
+}
+interface MetricMeta {
+  series?: Series[];
+}
+interface MetricsJson {
+  metrics?: Record<string, MetricMeta>;
+}
+
+/**
+ * Aggregate a per-timeslice field across all series of a metric, indexed by
+ * the timeslice's `start_ns`. vllm reports one series per engine on
+ * multi-engine DP/PP deployments, so we sum (or average) across engines to
+ * get the cluster-wide value at each timeslice.
+ *
+ * `field` selects which numeric field on a timeslice to read (`avg` for
+ * gauges, `rate` for counter deltas). `combine` controls cross-engine math:
+ * 'sum' for running/waiting/throughput counters where the cluster total is
+ * the sum; 'avg' for KV cache utilization, which is bounded [0, 1] per
+ * engine and should be averaged across engines for the cluster view.
+ */
+function aggregateSeriesByStart(
+  metricSeries: readonly Series[] | undefined,
+  field: 'avg' | 'rate',
+  combine: 'sum' | 'avg',
+): Map<number, number> {
+  const sums = new Map<number, number>();
+  const counts = new Map<number, number>();
+  for (const s of metricSeries ?? []) {
+    for (const ts of s.timeslices ?? []) {
+      if (typeof ts.start_ns !== 'number') continue;
+      const v = ts[field];
+      if (typeof v !== 'number' || !Number.isFinite(v)) continue;
+      sums.set(ts.start_ns, (sums.get(ts.start_ns) ?? 0) + v);
+      counts.set(ts.start_ns, (counts.get(ts.start_ns) ?? 0) + 1);
+    }
+  }
+  if (combine === 'sum') return sums;
+  const out = new Map<number, number>();
+  for (const [t, s] of sums) out.set(t, s / (counts.get(t) ?? 1));
+  return out;
+}
+
+/**
+ * Parse the server_metrics_json → time-series arrays for KV cache util and
+ * prefix cache hit rate (per-interval, computed from the prometheus
+ * counters the same way trace-server-metrics does it).
+ *
+ * Aggregates across all engine series so multi-engine DP/PP deployments are
+ * counted correctly (previously we only read engine 0).
+ */
+/** First metric whose series array is non-empty; supports vllm/sglang fallback. */
+function pickFirstNonEmpty(
+  metrics: Record<string, MetricMeta>,
+  ...names: string[]
+): Series[] | undefined {
+  for (const name of names) {
+    const s = metrics[name]?.series;
+    if (s && s.length > 0) return s;
+  }
+  return undefined;
+}
+
+export function extractServerMetricSamples(json: string): {
+  kvCacheUtil: number[];
+  prefixCacheHitRate: number[];
+} {
+  const parsed = JSON.parse(json) as MetricsJson;
+  const metrics = parsed.metrics ?? {};
+
+  // KV cache util — per-engine gauge in [0, 1]. Average across engines so the
+  // value stays a percentage; summing would give meaningless 0..N.
+  const kvSeriesAll = pickFirstNonEmpty(
+    metrics,
+    'vllm:kv_cache_usage_perc',
+    'vllm:gpu_cache_usage_perc',
+    'sglang:token_usage',
+  );
+  const kvCacheUtil = [...aggregateSeriesByStart(kvSeriesAll, 'avg', 'avg').values()];
+
+  // Prefix cache hit rate per interval = Σhits.rate / Σqueries.rate across
+  // all engines. Sum first, then divide. SGLang names: cached_tokens / prompt_tokens.
+  const hitsAll = pickFirstNonEmpty(
+    metrics,
+    'vllm:prefix_cache_hits',
+    'vllm:gpu_prefix_cache_hits',
+    'sglang:cached_tokens',
+  );
+  const queriesAll = pickFirstNonEmpty(
+    metrics,
+    'vllm:prefix_cache_queries',
+    'vllm:gpu_prefix_cache_queries',
+    'vllm:prompt_tokens',
+    'sglang:prompt_tokens',
+  );
+  const hitsByT = aggregateSeriesByStart(hitsAll, 'rate', 'sum');
+  const qByT = aggregateSeriesByStart(queriesAll, 'rate', 'sum');
+  const prefixCacheHitRate: number[] = [];
+  for (const [t, h] of hitsByT) {
+    const q = qByT.get(t);
+    if (q !== undefined && q > 0) prefixCacheHitRate.push(h / q);
+  }
+
+  return { kvCacheUtil, prefixCacheHitRate };
+}
+
+/** Metrics our aggregates pipeline cares about. Anything else in the blob is skipped. */
+const TARGET_METRIC_KEYS = new Set([
+  // vLLM
+  'vllm:kv_cache_usage_perc',
+  'vllm:gpu_cache_usage_perc',
+  'vllm:prefix_cache_hits',
+  'vllm:prefix_cache_queries',
+  'vllm:gpu_prefix_cache_hits',
+  'vllm:gpu_prefix_cache_queries',
+  'vllm:prompt_tokens',
+  // SGLang
+  'sglang:token_usage',
+  'sglang:cached_tokens',
+  'sglang:prompt_tokens',
+]);
+
+/**
+ * Stream-parse the gzipped server_metrics_json and collect ONLY the metrics
+ * we need. Avoids the Node 512 MB string cap that JSON.parse hits on
+ * server_metrics blobs from high-conc TP+EP runs (which can decompress to
+ * >500 MB because vllm dumps `cache_config_info` every scrape interval).
+ *
+ * Pipeline: Buffer → gunzip → JSON parser → Pick('metrics') →
+ * StreamObject (one metric per chunk) → keep only the keys we care about.
+ *
+ * Returns the same `{ kvCacheUtil, prefixCacheHitRate }` shape as the
+ * synchronous fast path so callers can use either interchangeably.
+ */
+async function streamExtractServerMetricSamples(
+  buffer: Buffer,
+): Promise<{ kvCacheUtil: number[]; prefixCacheHitRate: number[] }> {
+  const collected: Record<string, MetricMeta> = {};
+  // stream-json's TypeScript types don't compose cleanly with node:stream's
+  // pipeline() generic, and several `.pipe()`/event APIs are typed loosely —
+  // cast to any for this local pipe chain. It works at runtime.
+  // stream-json composes transforms via stream-chain. `pick`/`streamObject`
+  // each return a Transform when called; `chain([...])` wires them.
+  /* eslint-disable @typescript-eslint/no-explicit-any */
+  const pipeline = chain([
+    Readable.from(buffer),
+    createGunzip(),
+    parser(),
+    pick({ filter: 'metrics' }),
+    streamObject(),
+  ]);
+  await new Promise<void>((resolve, reject) => {
+    (pipeline as any).on('data', (chunk: unknown) => {
+      const { key, value } = chunk as { key: string; value: MetricMeta };
+      if (TARGET_METRIC_KEYS.has(key)) collected[key] = value;
+    });
+    (pipeline as any).on('end', resolve);
+    (pipeline as any).on('error', reject);
+  });
+  /* eslint-enable @typescript-eslint/no-explicit-any */
+  return extractServerMetricSamples(JSON.stringify({ metrics: collected }));
+}
+
+export async function getAgenticAggregates(
+  sql: DbClient,
+  benchmarkResultIds: number[],
+): Promise<AgenticAggregateMap> {
+  if (benchmarkResultIds.length === 0) return {};
+
+  const result: AgenticAggregateMap = {};
+
+  // Fast path: read the pre-computed `aggregate_stats` JSONB written by the
+  // ingest pipeline (and back-filled by `backfill-aggregate-stats.ts`). One
+  // round-trip pulls everything we need for every requested id with no blob
+  // decompression, so the slow blob-parsing fallback only runs for ids
+  // whose stats are missing or were produced by an older `STATS_VERSION`.
+  const statsRows = (await sql`
+    select
+      br.id as benchmark_result_id,
+      atr.aggregate_stats as stats
+    from benchmark_results br
+    join agentic_trace_replay atr on atr.id = br.trace_replay_id
+    where br.id = any(${benchmarkResultIds}::bigint[])
+  `) as {
+    benchmark_result_id: number;
+    stats: AggregateStatsRow | null;
+  }[];
+
+  const idsNeedingProfile: number[] = [];
+  const idsNeedingServer: number[] = [];
+  for (const row of statsRows) {
+    const id = Number(row.benchmark_result_id);
+    const agg = blankAggregate(id);
+    if (row.stats && Number(row.stats.version) === STATS_VERSION) {
+      agg.isl = row.stats.isl ?? null;
+      agg.osl = row.stats.osl ?? null;
+      agg.kvCacheUtil = row.stats.kvCacheUtil ?? null;
+      agg.prefixCacheHitRate = row.stats.prefixCacheHitRate ?? null;
+    } else {
+      // No stats (or stale version) — schedule the blob-parse fallback below
+      // so the response still surfaces data. Backfill should drain these.
+      idsNeedingProfile.push(id);
+      idsNeedingServer.push(id);
+    }
+    result[id] = agg;
+  }
+  // Also fall back for ids that didn't return a row at all (no trace_replay
+  // link) — keep the caller contract: every id we know about lands in the map.
+  for (const id of benchmarkResultIds) {
+    if (!(id in result)) result[id] = blankAggregate(id);
+  }
+
+  if (idsNeedingProfile.length === 0 && idsNeedingServer.length === 0) {
+    return result;
+  }
+
+  // ── Fallback Pass 1: profile_export blobs (cheap; large batches). ──────
+  for (let i = 0; i < idsNeedingProfile.length; i += PROFILE_CHUNK_SIZE) {
+    const chunk = idsNeedingProfile.slice(i, i + PROFILE_CHUNK_SIZE);
+    const rows = (await sql`
+      select
+        br.id as benchmark_result_id,
+        atr.profile_export_jsonl_gz as profile_blob
+      from benchmark_results br
+      join agentic_trace_replay atr on atr.id = br.trace_replay_id
+      where br.id = any(${chunk}::bigint[])
+    `) as { benchmark_result_id: number; profile_blob: Buffer | null }[];
+    for (const row of rows) {
+      const id = Number(row.benchmark_result_id);
+      result[id] ??= blankAggregate(id);
+      if (row.profile_blob) {
+        try {
+          const jsonl = gunzipSync(row.profile_blob).toString('utf8');
+          const { isl, osl } = extractIslOsl(jsonl);
+          result[id].isl = percentilesOf(isl);
+          result[id].osl = percentilesOf(osl);
+        } catch {
+          // ignore malformed blob
+        }
+      }
+    }
+  }
+  // ── Fallback Pass 2: server_metrics blobs (huge; one at a time). ───────
+  // Serial to avoid OOM on the decompressed JSON of a high-conc TP+EP row
+  // (>500 MB raw). The aggregator is fronted by a blob cache, so the slow
+  // path runs at most once per sibling set.
+  for (let i = 0; i < idsNeedingServer.length; i += SERVER_CHUNK_SIZE) {
+    const chunk = idsNeedingServer.slice(i, i + SERVER_CHUNK_SIZE);
+    const rows = (await sql`
+      select
+        br.id as benchmark_result_id,
+        atr.server_metrics_json_gz as server_blob
+      from benchmark_results br
+      join agentic_trace_replay atr on atr.id = br.trace_replay_id
+      where br.id = any(${chunk}::bigint[])
+    `) as { benchmark_result_id: number; server_blob: Buffer | null }[];
+    for (const row of rows) {
+      const id = Number(row.benchmark_result_id);
+      result[id] ??= blankAggregate(id);
+      if (!row.server_blob) continue;
+      let parsed: { kvCacheUtil: number[]; prefixCacheHitRate: number[] } | null = null;
+      try {
+        const json = gunzipSync(row.server_blob).toString('utf8');
+        parsed = extractServerMetricSamples(json);
+      } catch (error) {
+        // ERR_STRING_TOO_LONG (>512 MB) hits on high-conc TP+EP rows whose
+        // server_metrics_json decompresses past Node's max string length.
+        // Stream-parse to extract just the metric subtrees we care about.
+        const code = error && (error as NodeJS.ErrnoException).code;
+        const msg = error instanceof Error ? error.message : String(error);
+        if (code === 'ERR_STRING_TOO_LONG' || msg.includes('longer than 0x1fffffe8')) {
+          try {
+            parsed = await streamExtractServerMetricSamples(row.server_blob);
+          } catch {
+            // stream fallback failed too — leave nulls
+          }
+        }
+      }
+      if (parsed) {
+        result[id].kvCacheUtil = percentilesOf(parsed.kvCacheUtil);
+        result[id].prefixCacheHitRate = percentilesOf(parsed.prefixCacheHitRate);
+      }
+    }
+  }
+  return result;
+}
+
+/** Shape of the JSONB column when read back via postgres-js. */
+interface AggregateStatsRow {
+  version: number;
+  isl: MetricPercentiles | null;
+  osl: MetricPercentiles | null;
+  kvCacheUtil: MetricPercentiles | null;
+  prefixCacheHitRate: MetricPercentiles | null;
+  normalizedSessionTimeS: number | null;
+  p90PrefillTpsPerUser: number | null;
+}
+
+function blankAggregate(id: number): AgenticAggregate {
+  return { id, isl: null, osl: null, kvCacheUtil: null, prefixCacheHitRate: null };
+}
diff --git a/packages/db/src/queries/benchmark-siblings.ts b/packages/db/src/queries/benchmark-siblings.ts
new file mode 100644
index 00000000..245a1170
--- /dev/null
+++ b/packages/db/src/queries/benchmark-siblings.ts
@@ -0,0 +1,132 @@
+/**
+ * Find all benchmark_results that share the same SKU (hardware + framework +
+ * model + precision + spec_method + disagg + benchmark_type + workflow_run)
+ * as the given point. Used by the detail page to render a "switch between
+ * concs / parallelisms" navigator within a single run.
+ */
+
+import type { DbClient } from '../connection.js';
+
+export interface BenchmarkSibling {
+  id: number;
+  conc: number;
+  /** "on" | "off" | null. */
+  offload_mode: string | null;
+  decode_tp: number;
+  decode_ep: number;
+  prefill_tp: number;
+  prefill_ep: number;
+  num_prefill_gpu: number;
+  num_decode_gpu: number;
+  disagg: boolean;
+  /** True if this row IS the point passed in. */
+  is_current: boolean;
+  /** Whether the row has a stored trace_replay blob (for navigation hint). */
+  has_trace: boolean;
+}
+
+export interface BenchmarkSku {
+  hardware: string;
+  framework: string;
+  model: string;
+  precision: string;
+  spec_method: string;
+  benchmark_type: string;
+  /** Human-readable workflow_run summary so the page header can hint at provenance. */
+  github_run_id: number;
+  date: string;
+}
+
+export interface BenchmarkSiblings {
+  sku: BenchmarkSku;
+  siblings: BenchmarkSibling[];
+}
+
+export async function getBenchmarkSiblings(
+  sql: DbClient,
+  benchmarkResultId: number,
+): Promise<BenchmarkSiblings | null> {
+  // Step 1: resolve the SKU defining fields for the requested point.
+  const seed = (await sql`
+    select
+      c.hardware, c.framework, c.model, c.precision, c.spec_method,
+      br.benchmark_type, br.workflow_run_id, br.date::text,
+      wr.github_run_id
+    from benchmark_results br
+    join configs c on c.id = br.config_id
+    join workflow_runs wr on wr.id = br.workflow_run_id
+    where br.id = ${benchmarkResultId}
+  `) as unknown as {
+    hardware: string;
+    framework: string;
+    model: string;
+    precision: string;
+    spec_method: string;
+    benchmark_type: string;
+    workflow_run_id: number;
+    date: string;
+    github_run_id: number;
+  }[];
+  const root = seed[0];
+  if (!root) return null;
+
+  // Step 2: pull every sibling row sharing the SKU within the same workflow_run.
+  const rows = (await sql`
+    select
+      br.id, br.conc, br.offload_mode,
+      c.decode_tp, c.decode_ep, c.prefill_tp, c.prefill_ep,
+      c.num_prefill_gpu, c.num_decode_gpu, c.disagg,
+      (br.trace_replay_id is not null) as has_trace
+    from benchmark_results br
+    join configs c on c.id = br.config_id
+    where br.workflow_run_id = ${root.workflow_run_id}
+      and br.benchmark_type = ${root.benchmark_type}
+      and c.hardware = ${root.hardware}
+      and c.framework = ${root.framework}
+      and c.model = ${root.model}
+      and c.precision = ${root.precision}
+      and c.spec_method = ${root.spec_method}
+    order by c.decode_tp, c.decode_ep, br.offload_mode nulls first, br.conc
+  `) as unknown as {
+    id: number;
+    conc: number;
+    offload_mode: string | null;
+    decode_tp: number;
+    decode_ep: number;
+    prefill_tp: number;
+    prefill_ep: number;
+    num_prefill_gpu: number;
+    num_decode_gpu: number;
+    disagg: boolean;
+    has_trace: boolean;
+  }[];
+
+  const siblings: BenchmarkSibling[] = rows.map((r) => ({
+    id: Number(r.id),
+    conc: r.conc,
+    offload_mode: r.offload_mode,
+    decode_tp: r.decode_tp,
+    decode_ep: r.decode_ep,
+    prefill_tp: r.prefill_tp,
+    prefill_ep: r.prefill_ep,
+    num_prefill_gpu: r.num_prefill_gpu,
+    num_decode_gpu: r.num_decode_gpu,
+    disagg: r.disagg,
+    is_current: Number(r.id) === benchmarkResultId,
+    has_trace: r.has_trace,
+  }));
+
+  return {
+    sku: {
+      hardware: root.hardware,
+      framework: root.framework,
+      model: root.model,
+      precision: root.precision,
+      spec_method: root.spec_method,
+      benchmark_type: root.benchmark_type,
+      github_run_id: Number(root.github_run_id),
+      date: root.date,
+    },
+    siblings,
+  };
+}
diff --git a/packages/db/src/queries/benchmarks.ts b/packages/db/src/queries/benchmarks.ts
index 1c30b1fd..2291dc0c 100644
--- a/packages/db/src/queries/benchmarks.ts
+++ b/packages/db/src/queries/benchmarks.ts
@@ -1,6 +1,13 @@
 import type { DbClient } from '../connection.js';
 
 export interface BenchmarkRow {
+  /**
+   * Stable per-point id from benchmark_results. Used by the frontend to look
+   * up associated detail blobs (e.g. trace_replay histograms).
+   * Number is fine in TS but it's a Postgres bigint — Date arithmetic on huge
+   * runs is hypothetically lossy, in practice well below Number.MAX_SAFE_INTEGER.
+   */
+  id: number;
   hardware: string;
   framework: string;
   model: string;
@@ -18,9 +25,13 @@ export interface BenchmarkRow {
   decode_num_workers: number;
   num_prefill_gpu: number;
   num_decode_gpu: number;
-  isl: number;
-  osl: number;
+  benchmark_type: string;
+  // Null for agentic_traces; numeric for single_turn fixed-seq runs.
+  isl: number | null;
+  osl: number | null;
   conc: number;
+  /** KV-cache offload mode: 'on' | 'off'. Defaults to 'off' for fixed-seq. */
+  offload_mode: string;
   image: string | null;
   metrics: Record<string, number>;
   date: string;
@@ -42,8 +53,56 @@ export async function getLatestBenchmarks(
   modelKey: string | string[],
   date?: string,
   exact?: boolean,
+  /**
+   * If set, filter to a specific GitHub Actions workflow run.
+   * Bypasses the "latest per config" logic — when two runs landed on the same
+   * date and the user picked one in the run selector, this scopes the chart
+   * data to that run only. Value matches the URL param `g_runid` (a
+   * stringified github_run_id, not the DB id).
+   */
+  runId?: string,
 ): Promise<BenchmarkRow[]> {
   const modelKeys = Array.isArray(modelKey) ? modelKey : [modelKey];
+  if (runId) {
+    const rows = await sql`
+      SELECT
+        br.id,
+        c.hardware,
+        c.framework,
+        c.model,
+        c.precision,
+        c.spec_method,
+        c.disagg,
+        c.is_multinode,
+        c.prefill_tp,
+        c.prefill_ep,
+        c.prefill_dp_attention,
+        c.prefill_num_workers,
+        c.decode_tp,
+        c.decode_ep,
+        c.decode_dp_attention,
+        c.decode_num_workers,
+        c.num_prefill_gpu,
+        c.num_decode_gpu,
+        br.benchmark_type,
+        br.offload_mode,
+        br.isl,
+        br.osl,
+        br.conc,
+        br.image,
+        br.metrics,
+        br.date::text,
+        CASE WHEN wr.html_url IS NOT NULL THEN wr.html_url || '/attempts/' || wr.run_attempt ELSE NULL END AS run_url
+      FROM benchmark_results br
+      JOIN configs c ON c.id = br.config_id
+      JOIN latest_workflow_runs wr ON wr.id = br.workflow_run_id
+      WHERE c.model = ANY(${modelKeys})
+        AND br.error IS NULL
+        AND wr.github_run_id = ${runId}::bigint
+      ORDER BY br.config_id, br.conc, br.isl, br.osl
+    `;
+    return rows as unknown as BenchmarkRow[];
+  }
   if (date) {
     // Date-filtered: use base table with DISTINCT ON (the view only has the absolute latest)
     // exact=true: only return data from this exact date (for GPU comparison)
@@ -51,6 +110,7 @@ export async function getLatestBenchmarks(
     const dateFilter = exact ? sql`br.date = ${date}::date` : sql`br.date <= ${date}::date`;
     const rows = await sql`
       SELECT DISTINCT ON (br.config_id, br.conc, br.isl, br.osl)
+        br.id,
         c.hardware,
         c.framework,
         c.model,
@@ -68,6 +128,8 @@ export async function getLatestBenchmarks(
         c.decode_num_workers,
         c.num_prefill_gpu,
         c.num_decode_gpu,
+        br.benchmark_type,
+        br.offload_mode,
         br.isl,
         br.osl,
         br.conc,
@@ -89,6 +151,7 @@ export async function getLatestBenchmarks(
   // No date filter: use materialized view for instant lookups
   const rows = await sql`
     SELECT
+      lb.id,
       c.hardware,
       c.framework,
       c.model,
@@ -106,6 +169,8 @@ export async function getLatestBenchmarks(
       c.decode_num_workers,
       c.num_prefill_gpu,
       c.num_decode_gpu,
+      lb.benchmark_type,
+      lb.offload_mode,
       lb.isl,
       lb.osl,
       lb.conc,
@@ -153,6 +218,7 @@ export async function getAllBenchmarksForHistory(
       c.decode_num_workers,
       c.num_prefill_gpu,
       c.num_decode_gpu,
+      br.benchmark_type,
       br.isl,
       br.osl,
       br.conc,
diff --git a/packages/db/src/queries/derived-agentic-metrics.test.ts b/packages/db/src/queries/derived-agentic-metrics.test.ts
new file mode 100644
index 00000000..321434be
--- /dev/null
+++ b/packages/db/src/queries/derived-agentic-metrics.test.ts
@@ -0,0 +1,96 @@
+import { describe, expect, it } from 'vitest';
+
+import { computeDerivedFromBlob } from './derived-agentic-metrics.js';
+
+/** Build one aiperf JSONL record for the synthetic fixture. */
+function rec(
+  conversation_id: string,
+  turn_index: number,
+  fields: { isl: number; osl: number; ttft_ms: number; latency_ms: number },
+): string {
+  return JSON.stringify({
+    metadata: { conversation_id, turn_index, benchmark_phase: 'profiling' },
+    metrics: {
+      request_latency: { value: fields.latency_ms, unit: 'ms' },
+      time_to_first_token: { value: fields.ttft_ms, unit: 'ms' },
+      input_sequence_length: { value: fields.isl, unit: 'tokens' },
+      output_sequence_length: { value: fields.osl, unit: 'tokens' },
+    },
+  });
+}
+
+describe('computeDerivedFromBlob', () => {
+  it('returns nulls when no usable records', () => {
+    const out = computeDerivedFromBlob('');
+    expect(out.normalized_session_time_s).toBeNull();
+    expect(out.p90_prefill_tps_per_user).toBeNull();
+  });
+
+  it('rescales single-session time and computes P90 prefill', () => {
+    // One session, two turns. load = (100+50) + (200+50) = 400.
+    // Single session ⇒ mean_load = load_i ⇒ T̃ = T = (1000+2000) ms = 3.0 s.
+    const jsonl = [
+      rec('s1', 0, { isl: 100, osl: 50, ttft_ms: 500, latency_ms: 1000 }),
+      rec('s1', 1, { isl: 200, osl: 50, ttft_ms: 1000, latency_ms: 2000 }),
+    ].join('\n');
+    const out = computeDerivedFromBlob(jsonl);
+    expect(out.normalized_session_time_s).toBeCloseTo(3, 6);
+    // Prefill TPS per turn: 100/0.5=200, 200/1.0=200 → global P90 = 200.
+    expect(out.p90_prefill_tps_per_user).toBeCloseTo(200, 6);
+  });
+
+  it('rescales times across sessions with unequal load', () => {
+    // s1: 1 turn, load = 100, T = 1s
+    // s2: 1 turn, load = 300, T = 3s
+    // mean_load = 200; T̃_1 = 1 * 200/100 = 2; T̃_2 = 3 * 200/300 = 2
+    // Mean T̃ = 2.0
+    const jsonl = [
+      rec('s1', 0, { isl: 90, osl: 10, ttft_ms: 500, latency_ms: 1000 }),
+      rec('s2', 0, { isl: 270, osl: 30, ttft_ms: 500, latency_ms: 3000 }),
+    ].join('\n');
+    const out = computeDerivedFromBlob(jsonl);
+    expect(out.normalized_session_time_s).toBeCloseTo(2, 6);
+  });
+
+  it('drops records missing required fields and skips non-profiling phase', () => {
+    const lines = [
+      rec('s1', 0, { isl: 100, osl: 50, ttft_ms: 500, latency_ms: 1000 }),
+      // missing TTFT — should be skipped
+      JSON.stringify({
+        metadata: { conversation_id: 's1', turn_index: 1, benchmark_phase: 'profiling' },
+        metrics: {
+          request_latency: { value: 1000, unit: 'ms' },
+          input_sequence_length: { value: 100, unit: 'tokens' },
+          output_sequence_length: { value: 50, unit: 'tokens' },
+        },
+      }),
+      // warmup phase — should be skipped
+      JSON.stringify({
+        metadata: { conversation_id: 's2', turn_index: 0, benchmark_phase: 'warmup' },
+        metrics: {
+          request_latency: { value: 9999, unit: 'ms' },
+          time_to_first_token: { value: 9999, unit: 'ms' },
+          input_sequence_length: { value: 100, unit: 'tokens' },
+          output_sequence_length: { value: 50, unit: 'tokens' },
+        },
+      }),
+    ];
+    const out = computeDerivedFromBlob(lines.join('\n'));
+    expect(out.normalized_session_time_s).toBeCloseTo(1, 6);
+    expect(out.p90_prefill_tps_per_user).toBeCloseTo(200, 6);
+  });
+
+  it('p90 across turns: 10-turn session picks the right rank', () => {
+    // Prefill rates 100..1000 (per turn isl/ttft); p90 of 10 values (linear) = 910.
+    const turns = Array.from({ length: 10 }, (_, i) =>
+      rec('s1', i, {
+        isl: (i + 1) * 100, // 100, 200, ..., 1000 tokens
+        osl: 10,
+        ttft_ms: 1000, // 1 second → rates: 100..1000 tps
+        latency_ms: 1500,
+      }),
+    );
+    const out = computeDerivedFromBlob(turns.join('\n'));
+    expect(out.p90_prefill_tps_per_user).toBeCloseTo(910, 6);
+  });
+});
diff --git a/packages/db/src/queries/derived-agentic-metrics.ts b/packages/db/src/queries/derived-agentic-metrics.ts
new file mode 100644
index 00000000..35a4b76c
--- /dev/null
+++ b/packages/db/src/queries/derived-agentic-metrics.ts
@@ -0,0 +1,264 @@
+/**
+ * Live-computed per-point metrics derived from the stored aiperf
+ * `profile_export.jsonl` blob. These aren't precomputed in the metrics JSONB
+ * because they require grouping by `conversation_id` and aggregating per
+ * session — work that's cheap once per agentic point but adds up to be
+ * meaningful only when actually plotted.
+ *
+ * - normalized_session_time_s: per the "Mean Normalized Session Time" proposal
+ *   (https://gist.github.com/xinli-sw/115d370c17f6d1b977878b68530981fa). Sum of
+ *   per-turn `request_latency` per session (inter-turn tool/thinking gaps are
+ *   inherently excluded since we only sum the active GPU time, not wallclock).
+ *   Each session's time is rescaled by `mean_load / session_load`, where load
+ *   is Σ(ISL+OSL) across turns. The plotted value is the mean across sessions.
+ *
+ * - p90_prefill_tps_per_user: per the same gist's "Prefill" Pareto chart.
+ *   Per turn: prefill_tps = ISL / TTFT_seconds. Single P90 across every turn
+ *   in every session — the per-session percentile + cross-session mean
+ *   sandwich was discarded because it just dampens tail behavior.
+ */
+
+import { gunzipSync } from 'node:zlib';
+
+import type { DbClient } from '../connection.js';
+import { STATS_VERSION } from './agentic-aggregates';
+
+export interface DerivedAgenticMetric {
+  /** benchmark_results.id this entry belongs to. */
+  id: number;
+  /** Mean normalized session time in seconds. */
+  normalized_session_time_s: number | null;
+  /** P90 of per-turn prefill tps/user (ISL / TTFT) across every turn in every session. */
+  p90_prefill_tps_per_user: number | null;
+}
+
+export type DerivedAgenticMetricMap = Record<number, DerivedAgenticMetric>;
+
+/**
+ * JSONL blobs can be ~1-2 MB compressed (~5-10 MB raw) and Neon's serverless
+ * HTTP driver caps responses at 64 MB — chunk to stay well under.
+ */
+const QUERY_CHUNK_SIZE = 6;
+
+interface RecordMetrics {
+  request_latency?: { value?: number; unit?: string } | number;
+  time_to_first_token?: { value?: number; unit?: string } | number;
+  input_sequence_length?: { value?: number } | number;
+  output_sequence_length?: { value?: number } | number;
+}
+
+interface RecordMetadata {
+  conversation_id?: string;
+  turn_index?: number;
+  benchmark_phase?: string;
+}
+
+interface ProfileRecord {
+  metadata?: RecordMetadata;
+  metrics?: RecordMetrics;
+}
+
+interface TurnFields {
+  request_latency_ms: number;
+  ttft_ms: number;
+  isl: number;
+  osl: number;
+}
+
+/** Pull a numeric metric out of the {value, unit} envelope (or a bare number). */
+function readNum(v: unknown): number | undefined {
+  if (typeof v === 'number') return v;
+  if (v && typeof v === 'object' && 'value' in v) {
+    const inner = (v as { value?: unknown }).value;
+    if (typeof inner === 'number' && Number.isFinite(inner)) return inner;
+  }
+  return undefined;
+}
+
+function extractTurn(rec: ProfileRecord): TurnFields | null {
+  const m = rec.metrics ?? {};
+  const rl = readNum(m.request_latency);
+  const tt = readNum(m.time_to_first_token);
+  const isl = readNum(m.input_sequence_length);
+  const osl = readNum(m.output_sequence_length);
+  if (rl === undefined || tt === undefined || isl === undefined || osl === undefined) return null;
+  if (rl <= 0 || tt <= 0 || isl <= 0) return null;
+  return { request_latency_ms: rl, ttft_ms: tt, isl, osl };
+}
+
+/** Linear-interpolated percentile (matches numpy's default linear method). */
+function quantile(sortedAsc: number[], q: number): number {
+  if (sortedAsc.length === 0) return Number.NaN;
+  if (sortedAsc.length === 1) return sortedAsc[0]!;
+  const pos = (sortedAsc.length - 1) * q;
+  const lo = Math.floor(pos);
+  const hi = Math.ceil(pos);
+  if (lo === hi) return sortedAsc[lo]!;
+  return sortedAsc[lo]! + (sortedAsc[hi]! - sortedAsc[lo]!) * (pos - lo);
+}
+
+function meanOf(xs: number[]): number {
+  if (xs.length === 0) return Number.NaN;
+  let s = 0;
+  for (const x of xs) s += x;
+  return s / xs.length;
+}
+
+/**
+ * Parse one point's JSONL and return the two derived metrics. Returns
+ * `{ session_time: null, prefill: null }` if the blob has no usable records.
+ */
+export function computeDerivedFromBlob(jsonl: string): {
+  normalized_session_time_s: number | null;
+  p90_prefill_tps_per_user: number | null;
+} {
+  // Group records by conversation_id, filter to the profiling phase.
+  const bySession = new Map<string, TurnFields[]>();
+  for (const line of jsonl.split('\n')) {
+    if (!line) continue;
+    let rec: ProfileRecord;
+    try {
+      rec = JSON.parse(line) as ProfileRecord;
+    } catch {
+      continue;
+    }
+    if (rec.metadata?.benchmark_phase && rec.metadata.benchmark_phase !== 'profiling') continue;
+    const sid = rec.metadata?.conversation_id;
+    if (!sid) continue;
+    const turn = extractTurn(rec);
+    if (!turn) continue;
+    let list = bySession.get(sid);
+    if (!list) {
+      list = [];
+      bySession.set(sid, list);
+    }
+    list.push(turn);
+  }
+  if (bySession.size === 0) {
+    return { normalized_session_time_s: null, p90_prefill_tps_per_user: null };
+  }
+
+  // Per-session aggregates for session time; per-turn prefill rates pool into
+  // a single global array so the percentile sees the full distribution.
+  const sessionTimesS: number[] = [];
+  const sessionLoads: number[] = [];
+  const allPrefillRates: number[] = [];
+  for (const turns of bySession.values()) {
+    let timeMs = 0;
+    let load = 0;
+    for (const t of turns) {
+      timeMs += t.request_latency_ms;
+      load += t.isl + t.osl;
+      const ttftSec = t.ttft_ms / 1000;
+      if (ttftSec > 0) allPrefillRates.push(t.isl / ttftSec);
+    }
+    if (load > 0) {
+      sessionTimesS.push(timeMs / 1000);
+      sessionLoads.push(load);
+    }
+  }
+
+  // Normalized session time: T̃_i = T_i × (mean_load / load_i), then mean.
+  let normalized: number | null = null;
+  if (sessionTimesS.length > 0) {
+    const meanLoad = meanOf(sessionLoads);
+    if (meanLoad > 0) {
+      const scaled: number[] = [];
+      for (let i = 0; i < sessionTimesS.length; i++) {
+        const ti = sessionTimesS[i]!;
+        const li = sessionLoads[i]!;
+        if (li > 0) scaled.push(ti * (meanLoad / li));
+      }
+      normalized = scaled.length > 0 ? meanOf(scaled) : null;
+    }
+  }
+
+  let prefill: number | null = null;
+  if (allPrefillRates.length > 0) {
+    allPrefillRates.sort((a, b) => a - b);
+    prefill = quantile(allPrefillRates, 0.9);
+  }
+
+  return {
+    normalized_session_time_s: normalized,
+    p90_prefill_tps_per_user: prefill,
+  };
+}
+
+export async function getDerivedAgenticMetrics(
+  sql: DbClient,
+  benchmarkResultIds: number[],
+): Promise<DerivedAgenticMetricMap> {
+  if (benchmarkResultIds.length === 0) return {};
+
+  const result: DerivedAgenticMetricMap = {};
+
+  // Fast path: read the pre-computed values out of `aggregate_stats`. The
+  // ingest pipeline computes both metrics in the same pass that produces the
+  // percentile bundles, so a single SQL round-trip covers most ids without
+  // touching the gzipped profile blob.
+  const statsRows = (await sql`
+    select
+      br.id as benchmark_result_id,
+      atr.aggregate_stats as stats
+    from benchmark_results br
+    join agentic_trace_replay atr on atr.id = br.trace_replay_id
+    where br.id = any(${benchmarkResultIds}::bigint[])
+  `) as {
+    benchmark_result_id: number;
+    stats: {
+      version?: number;
+      normalizedSessionTimeS?: number | null;
+      p90PrefillTpsPerUser?: number | null;
+    } | null;
+  }[];
+
+  const idsNeedingBlob: number[] = [];
+  for (const row of statsRows) {
+    const id = Number(row.benchmark_result_id);
+    if (row.stats && Number(row.stats.version) === STATS_VERSION) {
+      result[id] = {
+        id,
+        normalized_session_time_s: row.stats.normalizedSessionTimeS ?? null,
+        p90_prefill_tps_per_user: row.stats.p90PrefillTpsPerUser ?? null,
+      };
+    } else {
+      idsNeedingBlob.push(id);
+    }
+  }
+
+  if (idsNeedingBlob.length === 0) return result;
+
+  // Fallback: parse the profile blob directly. Used for rows whose
+  // `aggregate_stats` is null or computed by an older STATS_VERSION; the
+  // backfill script drains the population so this path should be rare.
+  const rows: { benchmark_result_id: number; blob: Buffer }[] = [];
+  for (let i = 0; i < idsNeedingBlob.length; i += QUERY_CHUNK_SIZE) {
+    const chunk = idsNeedingBlob.slice(i, i + QUERY_CHUNK_SIZE);
+    const chunkRows = (await sql`
+      select
+        br.id as benchmark_result_id,
+        atr.profile_export_jsonl_gz as blob
+      from benchmark_results br
+      join agentic_trace_replay atr on atr.id = br.trace_replay_id
+      where br.id = any(${chunk}::bigint[])
+        and atr.profile_export_jsonl_gz is not null
+    `) as { benchmark_result_id: number; blob: Buffer }[];
+    rows.push(...chunkRows);
+  }
+
+  for (const row of rows) {
+    try {
+      const jsonl = gunzipSync(row.blob).toString('utf8');
+      const { normalized_session_time_s, p90_prefill_tps_per_user } = computeDerivedFromBlob(jsonl);
+      result[Number(row.benchmark_result_id)] = {
+        id: Number(row.benchmark_result_id),
+        normalized_session_time_s,
+        p90_prefill_tps_per_user,
+      };
+    } catch {
+      // Skip malformed blobs silently — frontend treats missing ids as "no data".
+    }
+  }
+  return result;
+}
diff --git a/packages/db/src/queries/request-timeline.ts b/packages/db/src/queries/request-timeline.ts
new file mode 100644
index 00000000..2bd3e251
--- /dev/null
+++ b/packages/db/src/queries/request-timeline.ts
@@ -0,0 +1,48 @@
+/**
+ * Per-request timeline for the agentic detail page's Gantt view.
+ *
+ * Backed by `agentic_trace_replay.request_timeline` (pre-computed at
+ * ingest time, see `etl/compute-request-timeline.ts`). The fast path is
+ * a single SQL row read; the slow path re-computes from
+ * `profile_export_jsonl_gz` and is only taken when the column is missing
+ * or the stored `REQUEST_TIMELINE_VERSION` is stale.
+ */
+
+import {
+  REQUEST_TIMELINE_VERSION,
+  computeRequestTimeline,
+  type RequestTimeline,
+} from '../etl/compute-request-timeline';
+
+import type { DbClient } from '../connection.js';
+
+export type { RequestTimeline, RequestRecord } from '../etl/compute-request-timeline';
+
+interface RawRow {
+  blob: Buffer | null;
+  request_timeline: RequestTimeline | null;
+}
+
+export async function getRequestTimeline(
+  sql: DbClient,
+  benchmarkResultId: number,
+): Promise<RequestTimeline | null> {
+  const rows = (await sql`
+    select
+      atr.profile_export_jsonl_gz as blob,
+      atr.request_timeline
+    from benchmark_results br
+    join agentic_trace_replay atr on atr.id = br.trace_replay_id
+    where br.id = ${benchmarkResultId}
+  `) as unknown as RawRow[];
+  const row = rows[0];
+  if (!row) return null;
+
+  // Fast path: pre-computed timeline at the current version.
+  if (row.request_timeline && Number(row.request_timeline.version) === REQUEST_TIMELINE_VERSION) {
+    return row.request_timeline;
+  }
+
+  // Slow path: recompute from the blob (rare — only stale/missing rows).
+  return computeRequestTimeline(row.blob);
+}
diff --git a/packages/db/src/queries/trace-availability.ts b/packages/db/src/queries/trace-availability.ts
new file mode 100644
index 00000000..155b3d4c
--- /dev/null
+++ b/packages/db/src/queries/trace-availability.ts
@@ -0,0 +1,34 @@
+/**
+ * Bulk "does this point have a trace_replay blob?" lookup. Used by the
+ * inference scatter chart to decide whether to render a "View charts"
+ * button in the pinned tooltip — a pure presence check that doesn't need
+ * the multi-megabyte blob payload `getTraceHistograms` ships.
+ *
+ * Going through `trace-histograms` for this trips Neon's 64 MB
+ * per-HTTP-response cap as soon as one chunk's combined gzip payload
+ * exceeds the cap (high-conc 8×8 rows can be 13 MB compressed each).
+ */
+
+import type { DbClient } from '../connection.js';
+
+/** Map of `benchmark_results.id` → true for each id that has a trace_replay blob. */
+export type TraceAvailabilityMap = Record<number, true>;
+
+export async function getTraceAvailability(
+  sql: DbClient,
+  benchmarkResultIds: number[],
+): Promise<TraceAvailabilityMap> {
+  if (benchmarkResultIds.length === 0) return {};
+
+  const rows = (await sql`
+    select br.id
+    from benchmark_results br
+    join agentic_trace_replay atr on atr.id = br.trace_replay_id
+    where br.id = any(${benchmarkResultIds}::bigint[])
+      and atr.profile_export_jsonl_gz is not null
+  `) as { id: number }[];
+
+  const result: TraceAvailabilityMap = {};
+  for (const row of rows) result[Number(row.id)] = true;
+  return result;
+}
diff --git a/packages/db/src/queries/trace-histograms.ts b/packages/db/src/queries/trace-histograms.ts
new file mode 100644
index 00000000..20ebc0d5
--- /dev/null
+++ b/packages/db/src/queries/trace-histograms.ts
@@ -0,0 +1,95 @@
+/**
+ * Fetch per-request ISL/OSL arrays from stored aiperf `profile_export.jsonl`
+ * blobs (gzipped in `agentic_trace_replay.profile_export_jsonl_gz`). Caller
+ * passes the set of `benchmark_results.id`s it wants and receives one entry
+ * per id that actually has a trace_replay blob (others are silently skipped).
+ *
+ * The JSONL has one JSON object per request with the shape:
+ *   { metrics: { input_sequence_length: { value, unit }, output_sequence_length: {...}, ... } }
+ *
+ * Returns raw arrays rather than pre-binned histograms — payload stays tiny
+ * (~256 ints * 2 fields per point, ~2 KB compressed) and the frontend can bin
+ * however it wants.
+ */
+
+import { gunzipSync } from 'node:zlib';
+
+import type { DbClient } from '../connection.js';
+
+export interface TraceHistogramPoint {
+  /** benchmark_results.id this entry belongs to. */
+  id: number;
+  /** Input sequence length (tokens) per completed request. */
+  isl: number[];
+  /** Output sequence length (tokens) per completed request. */
+  osl: number[];
+}
+
+export type TraceHistogramMap = Record<number, TraceHistogramPoint>;
+
+/**
+ * Cap the number of blobs we pull in a single Neon HTTP query — the serverless
+ * driver returns 507 ("response is too large, max 64 MB") if the combined gzip
+ * payload exceeds that. Each profile_export.jsonl blob can be ~1-2 MB
+ * compressed, so we stay well below the cap at 12.
+ */
+const QUERY_CHUNK_SIZE = 12;
+
+export async function getTraceHistograms(
+  sql: DbClient,
+  benchmarkResultIds: number[],
+): Promise<TraceHistogramMap> {
+  if (benchmarkResultIds.length === 0) return {};
+
+  const rows: { benchmark_result_id: number; blob: Buffer }[] = [];
+  for (let i = 0; i < benchmarkResultIds.length; i += QUERY_CHUNK_SIZE) {
+    const chunk = benchmarkResultIds.slice(i, i + QUERY_CHUNK_SIZE);
+    const chunkRows = (await sql`
+      select
+        br.id as benchmark_result_id,
+        atr.profile_export_jsonl_gz as blob
+      from benchmark_results br
+      join agentic_trace_replay atr on atr.id = br.trace_replay_id
+      where br.id = any(${chunk}::bigint[])
+        and atr.profile_export_jsonl_gz is not null
+    `) as { benchmark_result_id: number; blob: Buffer }[];
+    rows.push(...chunkRows);
+  }
+
+  const result: TraceHistogramMap = {};
+  for (const row of rows) {
+    try {
+      const jsonl = gunzipSync(row.blob).toString('utf8');
+      const isl: number[] = [];
+      const osl: number[] = [];
+      for (const line of jsonl.split('\n')) {
+        if (!line) continue;
+        let rec: { metrics?: Record<string, { value?: number } | number> };
+        try {
+          rec = JSON.parse(line);
+        } catch {
+          continue;
+        }
+        const m = rec.metrics ?? {};
+        const islVal = readMetric(m['input_sequence_length']);
+        const oslVal = readMetric(m['output_sequence_length']);
+        if (typeof islVal === 'number' && Number.isFinite(islVal)) isl.push(islVal);
+        if (typeof oslVal === 'number' && Number.isFinite(oslVal)) osl.push(oslVal);
+      }
+      result[Number(row.benchmark_result_id)] = {
+        id: Number(row.benchmark_result_id),
+        isl,
+        osl,
+      };
+    } catch {
+      // Drop malformed blobs silently — caller treats missing ids as "no data".
+    }
+  }
+  return result;
+}
+
+function readMetric(v: { value?: number } | number | undefined): number | undefined {
+  if (v === undefined || v === null) return undefined;
+  if (typeof v === 'number') return v;
+  return v.value;
+}
diff --git a/packages/db/src/queries/trace-server-metrics.ts b/packages/db/src/queries/trace-server-metrics.ts
new file mode 100644
index 00000000..5594d514
--- /dev/null
+++ b/packages/db/src/queries/trace-server-metrics.ts
@@ -0,0 +1,170 @@
+/**
+ * Time-series view of one agentic benchmark point: chart-ready arrays for
+ * KV utilization, prefix-cache hit rate, queue depth, prefill + decode TPS,
+ * and per-source prompt-token counts.
+ *
+ * Backed by `agentic_trace_replay.chart_series` (pre-computed at ingest
+ * time, see `etl/compute-chart-series.ts`). The fast path is a single SQL
+ * row read; the slow path re-computes from `server_metrics_json_gz` and is
+ * only taken when the column is missing or the stored
+ * `CHART_SERIES_VERSION` is stale (the backfill script should drain that).
+ */
+
+import {
+  CHART_SERIES_VERSION,
+  computeChartSeries,
+  type ChartSeries,
+  type QueueDepthPoint,
+  type TimeSeriesPoint,
+} from '../etl/compute-chart-series';
+
+import type { DbClient } from '../connection.js';
+
+export type { TimeSeriesPoint, QueueDepthPoint } from '../etl/compute-chart-series';
+
+export interface PointMeta {
+  id: number;
+  hardware: string;
+  framework: string;
+  model: string;
+  precision: string;
+  spec_method: string;
+  disagg: boolean;
+  conc: number;
+  offload_mode: string | null;
+  isl: number | null;
+  osl: number | null;
+  benchmark_type: string;
+  date: string;
+  /** GitHub Actions run URL for jumping to the source. */
+  run_url: string | null;
+  /** Cumulative end-of-run cache-hit number the dashboard already shows. */
+  server_gpu_cache_hit_rate: number | null;
+  /** Cumulative end-of-run CPU offload cache-hit. */
+  server_cpu_cache_hit_rate: number | null;
+}
+
+export interface TraceServerMetrics {
+  /** Point context — hardware, model, conc, etc. for the page header. */
+  meta: PointMeta;
+  /** ns wall-clock of the first window's start; for debugging only. */
+  startNs: number;
+  /** ns wall-clock of the last window's end. */
+  endNs: number;
+  /** Total benchmark window in seconds. */
+  durationS: number;
+  /** Number of 1Hz windows captured. */
+  timeslicesCount: number;
+  /** vllm:kv_cache_usage_perc avg per scrape, values in 0..1. */
+  kvCacheUsage: TimeSeriesPoint[];
+  /** Per-window prefix-cache hit rate computed as Δhits / Δqueries (0..1). */
+  prefixCacheHitRate: TimeSeriesPoint[];
+  /** Request queue depth: running, waiting, total per scrape. */
+  queueDepth: QueueDepthPoint[];
+  /**
+   * Per-source prompt-token counts over time (counter rate per scrape).
+   * Keyed by the value of the `source` label (typically `local_cache_hit`,
+   * `external_cache_hit`, `miss`, etc.). Plot as stacked area.
+   */
+  promptTokensBySource: Record<string, TimeSeriesPoint[]>;
+  /** Prefill throughput: vllm:prompt_tokens rate (tokens/sec) per scrape. */
+  prefillTps: TimeSeriesPoint[];
+  /** Decode throughput: vllm:generation_tokens rate (tokens/sec) per scrape. */
+  decodeTps: TimeSeriesPoint[];
+  /** Tokens served from prefix cache per scrape (vllm:prefix_cache_hits rate). */
+  prefixCacheHitsTps: TimeSeriesPoint[];
+  /** Host (CPU offload) KV cache utilization, 0..1. SGLang hicache only. */
+  hostKvCacheUsage: TimeSeriesPoint[];
+  /**
+   * Per-DP-rank KV cache utilization. Empty for single-engine deployments —
+   * the cluster-average `kvCacheUsage` line covers that case alone.
+   */
+  kvCacheUsageByEngine: { engineLabel: string; points: TimeSeriesPoint[] }[];
+}
+
+interface RawMetaRow extends PointMeta {
+  blob: Buffer | null;
+  chart_series: ChartSeries | null;
+}
+
+function buildMeta(row: RawMetaRow): PointMeta {
+  return {
+    id: Number(row.id),
+    hardware: row.hardware,
+    framework: row.framework,
+    model: row.model,
+    precision: row.precision,
+    spec_method: row.spec_method,
+    disagg: row.disagg,
+    conc: row.conc,
+    offload_mode: row.offload_mode,
+    isl: row.isl,
+    osl: row.osl,
+    benchmark_type: row.benchmark_type,
+    date: row.date,
+    run_url: row.run_url,
+    server_gpu_cache_hit_rate:
+      row.server_gpu_cache_hit_rate === null ? null : Number(row.server_gpu_cache_hit_rate),
+    server_cpu_cache_hit_rate:
+      row.server_cpu_cache_hit_rate === null ? null : Number(row.server_cpu_cache_hit_rate),
+  };
+}
+
+function merge(meta: PointMeta, series: ChartSeries): TraceServerMetrics {
+  return {
+    meta,
+    startNs: series.startNs,
+    endNs: series.endNs,
+    durationS: series.durationS,
+    timeslicesCount: series.timeslicesCount,
+    kvCacheUsage: series.kvCacheUsage,
+    prefixCacheHitRate: series.prefixCacheHitRate,
+    queueDepth: series.queueDepth,
+    promptTokensBySource: series.promptTokensBySource,
+    prefillTps: series.prefillTps,
+    decodeTps: series.decodeTps,
+    // v2 chart_series rows pre-backfill don't have this field — default to []
+    prefixCacheHitsTps: series.prefixCacheHitsTps ?? [],
+    hostKvCacheUsage: series.hostKvCacheUsage ?? [],
+    // v8+ field; older chart_series rows lack it → omit per-engine overlay.
+    kvCacheUsageByEngine: series.kvCacheUsageByEngine ?? [],
+  };
+}
+
+export async function getTraceServerMetrics(
+  sql: DbClient,
+  benchmarkResultId: number,
+): Promise<TraceServerMetrics | null> {
+  const rows = (await sql`
+    select
+      atr.server_metrics_json_gz as blob,
+      atr.chart_series,
+      br.id, c.hardware, c.framework, c.model, c.precision, c.spec_method, c.disagg,
+      br.conc, br.offload_mode, br.isl, br.osl, br.benchmark_type,
+      br.date::text,
+      case when wr.html_url is not null then wr.html_url || '/attempts/' || wr.run_attempt else null end as run_url,
+      (br.metrics ->> 'server_gpu_cache_hit_rate')::numeric as server_gpu_cache_hit_rate,
+      (br.metrics ->> 'server_cpu_cache_hit_rate')::numeric as server_cpu_cache_hit_rate
+    from benchmark_results br
+    join configs c on c.id = br.config_id
+    join workflow_runs wr on wr.id = br.workflow_run_id
+    left join agentic_trace_replay atr on atr.id = br.trace_replay_id
+    where br.id = ${benchmarkResultId}
+  `) as unknown as RawMetaRow[];
+  const row = rows[0];
+  if (!row) return null;
+  if (!row.blob) return null;
+  const meta = buildMeta(row);
+
+  // Fast path: pre-computed chart_series at the current version.
+  if (row.chart_series && Number(row.chart_series.version) === CHART_SERIES_VERSION) {
+    return merge(meta, row.chart_series);
+  }
+
+  // Slow path: compute from the blob. `computeChartSeries` handles
+  // ERR_STRING_TOO_LONG via a stream-parse fallback so high-conc TP+EP
+  // rows succeed even before the backfill drains them.
+  const series = await computeChartSeries(row.blob);
+  if (!series) return null;
+  return merge(meta, series);
+}
diff --git a/packages/db/src/queries/workflow-info.ts b/packages/db/src/queries/workflow-info.ts
index b4e4f255..d5e2d933 100644
--- a/packages/db/src/queries/workflow-info.ts
+++ b/packages/db/src/queries/workflow-info.ts
@@ -88,20 +88,22 @@ export async function getDateConfigs(sql: DbClient, date: string): Promise<DateC
 
 export interface AvailabilityRow {
   model: string;
-  isl: number;
-  osl: number;
+  // Null for agentic_traces rows; numeric for single_turn fixed-seq rows.
+  isl: number | null;
+  osl: number | null;
   precision: string;
   hardware: string;
   framework: string;
   spec_method: string;
   disagg: boolean;
+  benchmark_type: string;
   date: string;
 }
 
-/** Get available (model, ISL/OSL, precision, hardware, framework, spec_method, date) combos for the availability API. */
+/** Get available (model, ISL/OSL, precision, hardware, framework, spec_method, benchmark_type, date) combos for the availability API. */
 export async function getAvailabilityData(sql: DbClient): Promise<AvailabilityRow[]> {
   const rows = await sql`
-    SELECT a.model, a.isl, a.osl, a.precision, a.hardware, a.framework, a.spec_method, a.disagg, a.date::text
+    SELECT a.model, a.isl, a.osl, a.precision, a.hardware, a.framework, a.spec_method, a.disagg, a.benchmark_type, a.date::text
     FROM availability a
     WHERE EXISTS (
       SELECT 1
@@ -112,8 +114,9 @@ export async function getAvailabilityData(sql: DbClient): Promise<AvailabilityRo
         AND c.hardware = a.hardware
         AND c.framework = a.framework
         AND c.precision = a.precision
-        AND br.isl = a.isl
-        AND br.osl = a.osl
+        AND br.isl IS NOT DISTINCT FROM a.isl
+        AND br.osl IS NOT DISTINCT FROM a.osl
+        AND br.benchmark_type = a.benchmark_type
         AND br.date = a.date
         AND br.error IS NULL
         AND wr.conclusion IS NOT NULL
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 14505e57..717ffc5c 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -249,6 +249,12 @@ importers:
       postgres:
         specifier: ^3.4.9
         version: 3.4.9
+      stream-chain:
+        specifier: ^3.4.0
+        version: 3.6.3
+      stream-json:
+        specifier: ^2.1.0
+        version: 2.1.0
     devDependencies:
       '@types/adm-zip':
         specifier: ^0.5.8
@@ -256,6 +262,9 @@ importers:
       '@types/node':
         specifier: ^25.7.0
         version: 25.7.0
+      '@types/stream-json':
+        specifier: ^1.7.8
+        version: 1.7.8
       '@vitest/coverage-v8':
         specifier: ^4.1.6
         version: 4.1.6(vitest@4.1.6)
@@ -2334,6 +2343,12 @@ packages:
   '@types/stats.js@0.17.4':
     resolution: {integrity: sha512-jIBvWWShCvlBqBNIZt0KAshWpvSjhkwkEu4ZUcASoAvhmrgAUI2t1dXrjSL4xXVLB4FznPrIsX3nKXFl/Dt4vA==}
 
+  '@types/stream-chain@2.1.0':
+    resolution: {integrity: sha512-guDyAl6s/CAzXUOWpGK2bHvdiopLIwpGu8v10+lb9hnQOyo4oj/ZUQFOvqFjKGsE3wJP1fpIesCcMvbXuWsqOg==}
+
+  '@types/stream-json@1.7.8':
+    resolution: {integrity: sha512-MU1OB1eFLcYWd1LjwKXrxdoPtXSRzRmAnnxs4Js/ayB5O/NvHraWwuOaqMWIebpYwM6khFlsJOHEhI9xK/ab4Q==}
+
   '@types/three@0.184.1':
     resolution: {integrity: sha512-6q4VdiqVsrTRqmk62/BnlcAvIrnDM0zf2ZDVKI5kZiniWrSaOHaQzmbp+BNzoggc/8tgW412pL//wZIxu2PPTA==}
 
@@ -5074,9 +5089,15 @@ packages:
     resolution: {integrity: sha512-eLoXW/DHyl62zxY4SCaIgnRhuMr6ri4juEYARS8E6sCEqzKpOiE521Ucofdx+KnDZl5xmvGYaaKCk5FEOxJCoQ==}
     engines: {node: '>= 0.4'}
 
+  stream-chain@3.6.3:
+    resolution: {integrity: sha512-JZuELdHUuiZL4Olcr4EllGUvj9VKEaDkGHA6QAP5SruD0bgrr8TwtNXwRfH+fCncysEII7HhWll1+aOwvHYyRw==}
+
   stream-combiner@0.2.2:
     resolution: {integrity: sha512-6yHMqgLYDzQDcAkL+tjJDC5nSNuNIx0vZtRZeiPh7Saef7VHX9H5Ijn9l2VIol2zaNYlYEX6KyuT/237A58qEQ==}
 
+  stream-json@2.1.0:
+    resolution: {integrity: sha512-9gV/ywtebMn3DdKnNKYCb9iESvgR1dHbucNV+bRGvdvy+jV4c9FFgYKmENhpKv58jSwvs90Wk80RhfKk1KxHPg==}
+
   string-width@4.2.3:
     resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==}
     engines: {node: '>=8'}
@@ -7392,6 +7413,15 @@ snapshots:
 
   '@types/stats.js@0.17.4': {}
 
+  '@types/stream-chain@2.1.0':
+    dependencies:
+      '@types/node': 25.7.0
+
+  '@types/stream-json@1.7.8':
+    dependencies:
+      '@types/node': 25.7.0
+      '@types/stream-chain': 2.1.0
+
   '@types/three@0.184.1':
     dependencies:
       '@dimforge/rapier3d-compat': 0.12.0
@@ -10752,11 +10782,17 @@ snapshots:
       es-errors: 1.3.0
       internal-slot: 1.1.0
 
+  stream-chain@3.6.3: {}
+
   stream-combiner@0.2.2:
     dependencies:
       duplexer: 0.1.2
       through: 2.3.8
 
+  stream-json@2.1.0:
+    dependencies:
+      stream-chain: 3.6.3
+
   string-width@4.2.3:
     dependencies:
       emoji-regex: 8.0.0