;
@@ -1839,6 +2088,9 @@ const ScatterGraph = React.memo(
chartDefinition.chartType,
xScaleConfig._isLog,
yScaleConfig.type,
+ optimalPointKeys,
+ getCssColor,
+ resolveColor,
],
);
@@ -2031,6 +2283,17 @@ const ScatterGraph = React.memo(
setHideNonOptimal(checked);
track('latency_hide_non_optimal_toggled', { enabled: checked });
},
+ // On agentic + non-e2e chart, "optimal" means "on the
+ // e2e-latency Pareto frontier" (not a per-axis Pareto on the
+ // current x metric). Explain that so users don't wonder why
+ // a point sitting above the line is still considered
+ // dominated.
+ ...(selectedSequence === Sequence.AgenticTraces && selectedXAxisMode !== 'e2e'
+ ? {
+ infoTooltip:
+ "On agentic, optimal = on the end-to-end latency Pareto frontier, so a config can't win this axis by tanking e2e. Off-frontier points may appear above the line.",
+ }
+ : {}),
},
{
id: 'scatter-hide-point-labels',
diff --git a/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx b/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx
index f9b1b3c8..73018483 100644
--- a/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx
+++ b/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx
@@ -194,9 +194,7 @@ export function UnofficialChartDisplay() {
`${selectedYAxisMetric}_title` as keyof typeof graph.chartDefinition
]
}{' '}
- {graph.chartDefinition[
- `${selectedYAxisMetric}_heading` as keyof typeof graph.chartDefinition
- ] || graph.chartDefinition.heading}
+ {graph.chartDefinition.heading}
{graph.model} • {selectedPrecisions.join(', ')} • {graph.sequence}
diff --git a/packages/app/src/components/inference/utils.test.ts b/packages/app/src/components/inference/utils.test.ts
index 8f8705e1..589ba580 100644
--- a/packages/app/src/components/inference/utils.test.ts
+++ b/packages/app/src/components/inference/utils.test.ts
@@ -157,12 +157,12 @@ describe('processOverlayChartData', () => {
});
it('remaps x to config override for input metrics on interactivity chart', () => {
- // inputTputPerGpu has x override to p99_ttft on interactivity chart
+ // inputTputPerGpu has x override to p90_ttft on interactivity chart
const data = [
pt({
x: 100,
inputTputPerGpu: { y: 5, roof: false },
- p99_ttft: 0.25,
+ p90_ttft: 0.25,
median_intvty: 50,
} as any),
];
@@ -176,16 +176,11 @@ describe('processOverlayChartData', () => {
pt({
x: 100,
inputTputPerGpu: { y: 5, roof: false },
- median_ttft: 0.1,
+ p90_ttft: 0.1,
median_intvty: 50,
} as any),
];
- const result = processOverlayChartData(
- data,
- 'interactivity',
- 'y_inputTputPerGpu',
- 'median_ttft',
- );
+ const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p90_ttft');
expect(result).toHaveLength(1);
expect(result[0].x).toBe(0.1);
});
@@ -195,76 +190,62 @@ describe('processOverlayChartData', () => {
pt({
x: 100,
inputTputPerGpu: { y: 5, roof: false },
- p99_ttft: 0.25,
+ p90_ttft: 0.25,
median_e2el: 2.5,
} as any),
];
const result = processOverlayChartData(data, 'e2e', 'y_inputTputPerGpu', null);
expect(result).toHaveLength(1);
- // e2e uses median_e2el as x (from chart config default), not p99_ttft
+ // e2e uses median_e2el as x (from chart config default), not p90_ttft
expect(result[0].x).toBe(2.5);
});
- it('remaps x to TTFT for e2e chart when selectedXAxisMetric is p99_ttft', () => {
- const data = [
- pt({
- x: 100,
- tpPerGpu: { y: 42, roof: false },
- p99_ttft: 0.35,
- median_e2el: 2.5,
- } as any),
- ];
- const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p99_ttft');
- expect(result).toHaveLength(1);
- expect(result[0].x).toBe(0.35);
- });
-
- it('remaps x to TTFT for e2e chart when selectedXAxisMetric is median_ttft', () => {
+ it('remaps x to TTFT for e2e chart when selectedXAxisMetric is p90_ttft', () => {
const data = [
pt({
x: 100,
tpPerGpu: { y: 42, roof: false },
- median_ttft: 0.12,
+ p90_ttft: 0.12,
median_e2el: 2.5,
} as any),
];
- const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'median_ttft');
+ const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p90_ttft');
expect(result).toHaveLength(1);
expect(result[0].x).toBe(0.12);
});
it('filters e2e TTFT outliers exceeding y_latency_limit', () => {
const data = [
- pt({ tpPerGpu: { y: 10, roof: false }, p99_ttft: 0.5, median_e2el: 1 } as any),
- pt({ tpPerGpu: { y: 5, roof: false }, p99_ttft: 999, median_e2el: 2 } as any),
+ pt({ tpPerGpu: { y: 10, roof: false }, p90_ttft: 0.5, median_e2el: 1 } as any),
+ pt({ tpPerGpu: { y: 5, roof: false }, p90_ttft: 999, median_e2el: 2 } as any),
];
- const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p99_ttft');
+ const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p90_ttft');
// y_latency_limit is 60 in the e2e chart config — the 999 outlier should be filtered
expect(result).toHaveLength(1);
expect(result[0].x).toBe(0.5);
});
it('does not filter interactivity points by latency limit when x-axis is default', () => {
- // Regression: selectedXAxisMetric defaults to 'p99_ttft' but the interactivity
+ // Regression: selectedXAxisMetric defaults to 'p90_ttft' but the interactivity
// chart's x-axis stays median_intvty for non-input metrics. The latency limit
// (60) must NOT apply to median_intvty values.
const data = [
pt({ tpPerGpu: { y: 42, roof: false }, median_intvty: 200 } as any),
pt({ tpPerGpu: { y: 10, roof: false }, median_intvty: 30 } as any),
];
- const result = processOverlayChartData(data, 'interactivity', 'y_tpPerGpu', 'p99_ttft');
+ const result = processOverlayChartData(data, 'interactivity', 'y_tpPerGpu', 'p90_ttft');
expect(result).toHaveLength(2);
});
it('applies latency limit on interactivity only when x-axis is actually overridden', () => {
- // When an input metric IS selected and x-axis overrides to p99_ttft,
+ // When an input metric IS selected and x-axis overrides to p90_ttft,
// the latency limit should apply.
const data = [
- pt({ inputTputPerGpu: { y: 5, roof: false }, p99_ttft: 0.5, median_intvty: 10 } as any),
- pt({ inputTputPerGpu: { y: 3, roof: false }, p99_ttft: 999, median_intvty: 20 } as any),
+ pt({ inputTputPerGpu: { y: 5, roof: false }, p90_ttft: 0.5, median_intvty: 10 } as any),
+ pt({ inputTputPerGpu: { y: 3, roof: false }, p90_ttft: 999, median_intvty: 20 } as any),
];
- const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p99_ttft');
- // x-axis is overridden to p99_ttft for input metric — latency limit SHOULD filter 999
+ const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p90_ttft');
+ // x-axis is overridden to p90_ttft for input metric — latency limit SHOULD filter 999
expect(result).toHaveLength(1);
expect(result[0].x).toBe(0.5);
});
diff --git a/packages/app/src/components/inference/utils.ts b/packages/app/src/components/inference/utils.ts
index 4b5335b6..4876c614 100644
--- a/packages/app/src/components/inference/utils.ts
+++ b/packages/app/src/components/inference/utils.ts
@@ -75,11 +75,13 @@ export function processOverlayChartData(
chartType: 'e2e' | 'interactivity',
selectedYAxisMetric: string,
selectedXAxisMetric: string | null,
+ options?: { isAgentic?: boolean },
): InferenceData[] {
const chartDef = (chartDefinitions as ChartDefinition[]).find((d) => d.chartType === chartType);
if (!chartDef) return [];
const metricKey = selectedYAxisMetric.replace('y_', '') as YAxisMetricKey;
+ const isAgentic = options?.isAgentic === true;
// Resolve x-axis field (must match useChartData logic)
const metricTitle =
@@ -87,9 +89,11 @@ export function processOverlayChartData(
const isInputMetric = metricTitle.toLowerCase().includes('input');
let xAxisField: string = chartDef.x;
// selectedXAxisMetric is already the effective metric for this chart type
- // (interactivity uses selectedXAxisMetric, e2e uses selectedE2eXAxisMetric)
+ // (interactivity uses selectedXAxisMetric, e2e uses selectedE2eXAxisMetric).
+ // Match any *_ttft metric — the x-axis-mode picker can now select any
+ // percentile (median/p75/p90/p99) depending on sequence kind.
const isTtftOverride =
- selectedXAxisMetric === 'p99_ttft' || selectedXAxisMetric === 'median_ttft';
+ typeof selectedXAxisMetric === 'string' && selectedXAxisMetric.endsWith('_ttft');
if (selectedXAxisMetric && chartDef.chartType === 'interactivity' && isInputMetric) {
xAxisField = selectedXAxisMetric;
@@ -109,7 +113,12 @@ export function processOverlayChartData(
})
.filter(
(d) =>
- xAxisField === chartDef.x || !chartDef.y_latency_limit || d.x <= chartDef.y_latency_limit,
+ // Skip the latency limit for the natural x-axis or for agentic
+ // (long TTFTs are normal there, not overload outliers).
+ xAxisField === chartDef.x ||
+ isAgentic ||
+ !chartDef.y_latency_limit ||
+ d.x <= chartDef.y_latency_limit,
);
return filterDataByCostLimit(processedData, chartDef, selectedYAxisMetric);
diff --git a/packages/app/src/components/inference/utils/tooltipUtils.ts b/packages/app/src/components/inference/utils/tooltipUtils.ts
index 4c56d217..ed68c41b 100644
--- a/packages/app/src/components/inference/utils/tooltipUtils.ts
+++ b/packages/app/src/components/inference/utils/tooltipUtils.ts
@@ -19,6 +19,14 @@ export interface TooltipConfig {
isTracked?: boolean;
/** URL to the GitHub Actions workflow run */
runUrl?: string;
+ /**
+ * Whether this agentic point has a stored trace_replay blob. Controls
+ * visibility of the "View charts" button — the actual distributions are
+ * rendered on the detail page, not inline, so all the tooltip needs is a
+ * presence boolean (sourced from the bulk `/api/v1/trace-availability`
+ * call so we don't ship megabytes of profile JSONL just for this check).
+ */
+ hasTrace?: boolean;
}
export interface OverlayTooltipConfig extends TooltipConfig {
@@ -88,6 +96,74 @@ const runLinkHTML = (runUrl?: string) =>
const tooltipLine = (label: string, value: string | number) =>
`
${label}: ${value}
`;
+const formatPct = (v: number | undefined): string | null =>
+ v === undefined || v === null || Number.isNaN(v) ? null : `${(v * 100).toFixed(1)}%`;
+
+/** Tooltip numeric values are capped at 3 decimal places (trailing zeros stripped). */
+const fmt = (v: number): string => {
+ if (!Number.isFinite(v)) return String(v);
+ const rounded = parseFloat(v.toFixed(3));
+ if (Math.abs(rounded) >= 10000) return new Intl.NumberFormat('en-US').format(rounded);
+ return String(rounded);
+};
+
+/**
+ * Agentic-only tooltip rows: offload mode, KV cache hit rates, request
+ * success, token totals. Returns an empty string for non-agentic rows.
+ */
+const generateAgenticHTML = (d: InferenceData): string => {
+ if (d.benchmark_type !== 'agentic_traces') return '';
+
+ const parts: string[] = [];
+ if (d.offload_mode) {
+ parts.push(tooltipLine('Offload Mode', d.offload_mode.toUpperCase()));
+ }
+
+ const gpuHit = formatPct(d.server_gpu_cache_hit_rate);
+ const cpuHit = formatPct(d.server_cpu_cache_hit_rate);
+ const theoHit = formatPct(d.theoretical_cache_hit_rate);
+ if (gpuHit) parts.push(tooltipLine('GPU Cache Hit Rate', gpuHit));
+ if (cpuHit) parts.push(tooltipLine('CPU Cache Hit Rate', cpuHit));
+ if (theoHit) parts.push(tooltipLine('Theoretical Cache Hit Rate', theoHit));
+
+ if (d.num_requests_total !== undefined && d.num_requests_successful !== undefined) {
+ const successPct =
+ d.num_requests_total > 0
+ ? ` (${((d.num_requests_successful / d.num_requests_total) * 100).toFixed(0)}%)`
+ : '';
+ parts.push(
+ tooltipLine(
+ 'Requests',
+ `${d.num_requests_successful} / ${d.num_requests_total}${successPct}`,
+ ),
+ );
+ }
+
+ if (d.total_prompt_tokens !== undefined) {
+ parts.push(tooltipLine('Prompt Tokens', formatNumber(d.total_prompt_tokens)));
+ }
+ if (d.total_generation_tokens !== undefined) {
+ parts.push(tooltipLine('Generated Tokens', formatNumber(d.total_generation_tokens)));
+ }
+
+ // Histograms + time-series live on the dedicated detail page now; the
+ // "View charts" button (rendered by the wrapper when pinned + has trace
+ // data) takes the user there.
+
+ return parts.join('');
+};
+
+/** "View charts" button — only visible when the tooltip is pinned and the
+ * point has stored trace data. Wired up by the ScatterGraph click handler. */
+const viewChartsButtonHTML = (isPinned: boolean, hasTraceData: boolean): string => {
+ if (!isPinned || !hasTraceData) return '';
+ return `View charts → `;
+};
+
const shortenSha = (image: string) => image.replaceAll(/(sha256:[a-f0-9]{7})[a-f0-9]+/giu, '$1…');
const imageTooltipLine = (image: string) =>
@@ -138,7 +214,16 @@ const generateParallelismHTML = (d: InferenceData): string => {
* @returns HTML string for the tooltip content
*/
export const generateTooltipContent = (config: TooltipConfig): string => {
- const { data: d, isPinned, xLabel, yLabel, selectedYAxisMetric, hardwareConfig, runUrl } = config;
+ const {
+ data: d,
+ isPinned,
+ xLabel,
+ yLabel,
+ selectedYAxisMetric,
+ hardwareConfig,
+ runUrl,
+ hasTrace,
+ } = config;
return `
@@ -156,16 +241,16 @@ export const generateTooltipContent = (config: TooltipConfig): string => {
: ''
}
- ${xLabel}: ${formatNumber(d.x)}
+ ${xLabel}: ${fmt(d.x)}
- ${yLabel}: ${formatNumber(d.y)}
+ ${yLabel}: ${fmt(d.y)}
${
selectedYAxisMetric === 'y_tpPerGpu' && d['inputTputPerGpu']
? `
- Input Token Throughput per GPU: ${formatNumber(d['inputTputPerGpu'].y)}
+ Input Token Throughput per GPU: ${fmt(d['inputTputPerGpu'].y)}
`
: ''
}
@@ -173,7 +258,7 @@ export const generateTooltipContent = (config: TooltipConfig): string => {
selectedYAxisMetric === 'y_tpPerGpu' && d['outputTputPerGpu']
? `
- Output Token Throughput per GPU: ${formatNumber(d['outputTputPerGpu'].y)}
+ Output Token Throughput per GPU: ${fmt(d['outputTputPerGpu'].y)}
`
: ''
}
@@ -182,10 +267,12 @@ export const generateTooltipContent = (config: TooltipConfig): string => {
Concurrency: ${d.conc}
-
+
Precision: ${d.precision.toUpperCase()}
+ ${generateAgenticHTML(d)}
${runLinkHTML(runUrl)}
+ ${viewChartsButtonHTML(isPinned, Boolean(hasTrace))}
${
isPinned
? `
- ${xLabel}: ${formatNumber(d.x)}
+ ${xLabel}: ${fmt(d.x)}
- ${yLabel}: ${formatNumber(d.y)}
+ ${yLabel}: ${fmt(d.y)}
${tooltipLine('Total GPUs', d.tp)}
${generateParallelismHTML(d)}
Concurrency: ${d.conc}
-
+
Precision: ${d.precision.toUpperCase()}
+ ${generateAgenticHTML(d)}
`;
};
@@ -271,16 +359,16 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string =>
: ''
}
- ${xLabel}: ${formatNumber(d.x)}
+ ${xLabel}: ${fmt(d.x)}
- ${yLabel}: ${formatNumber(d.y)}
+ ${yLabel}: ${fmt(d.y)}
${
selectedYAxisMetric === 'y_tpPerGpu' && d['inputTputPerGpu']
? `
- Input Token Throughput per GPU: ${formatNumber(d['inputTputPerGpu'].y)}
+ Input Token Throughput per GPU: ${fmt(d['inputTputPerGpu'].y)}
`
: ''
}
@@ -288,7 +376,7 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string =>
selectedYAxisMetric === 'y_tpPerGpu' && d['outputTputPerGpu']
? `
- Output Token Throughput per GPU: ${formatNumber(d['outputTputPerGpu'].y)}
+ Output Token Throughput per GPU: ${fmt(d['outputTputPerGpu'].y)}
`
: ''
}
@@ -297,9 +385,10 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string =>
Concurrency: ${d.conc}
-
+
Precision: ${d.precision.toUpperCase()}
+ ${generateAgenticHTML(d)}
${runLinkHTML(runUrl)}
`;
diff --git a/packages/app/src/components/ui/chart-legend.tsx b/packages/app/src/components/ui/chart-legend.tsx
index 81d5f261..a20c9959 100644
--- a/packages/app/src/components/ui/chart-legend.tsx
+++ b/packages/app/src/components/ui/chart-legend.tsx
@@ -6,6 +6,7 @@ import {
ArrowRightToLine,
Circle,
Diamond,
+ Info,
Square,
Triangle,
X,
@@ -36,6 +37,8 @@ export interface LegendSwitchConfig {
label: string;
checked: boolean;
onCheckedChange: (checked: boolean) => void;
+ /** Optional explainer rendered as an info-icon tooltip next to the label. */
+ infoTooltip?: React.ReactNode;
}
export interface LegendActionConfig {
@@ -273,6 +276,29 @@ export default function ChartLegend({
>
{sw.label}
+ {sw.infoTooltip && (
+
+
+
+
+
+
+
+
+ {sw.infoTooltip}
+
+
+
+ )}
))}