Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/assets/index-Bc8XlX7Z.css

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion docs/assets/index-Bq1YCEGN.css

This file was deleted.

4 changes: 2 additions & 2 deletions docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
<link rel="preconnect" href="https://fonts.googleapis.com" />
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet" />
<script type="module" crossorigin src="./assets/index-B1JUNaN9.js"></script>
<link rel="stylesheet" crossorigin href="./assets/index-Bq1YCEGN.css">
<script type="module" crossorigin src="./assets/index-BpBEqFYA.js"></script>
<link rel="stylesheet" crossorigin href="./assets/index-Bc8XlX7Z.css">
</head>
<body>
<div id="root"></div>
Expand Down
2 changes: 1 addition & 1 deletion website/src/components/leaderboard/LeaderboardSection.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ export function LeaderboardSection() {
</p>
</div>

<Perturbations systems={systems} domain={domain} />
<Perturbations systems={systems} />
</div>
</Section>
);
Expand Down
96 changes: 57 additions & 39 deletions website/src/components/leaderboard/PerturbationBarChart.tsx
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import { BarChart, Bar, XAxis, YAxis, CartesianGrid, Tooltip, ResponsiveContainer, ErrorBar, ReferenceLine, Customized, LabelList, useXAxisScale, useYAxisScale } from 'recharts';
import type { SystemStats, DomainOrPooled } from '../../data/leaderboardData';
import type { SystemStats } from '../../data/leaderboardData';
import { getPertValue, perturbations, perturbationLabels, groupedSystems } from '../../data/leaderboardData';
import { useThemeColors } from '../../styles/theme';

interface PerturbationBarChartProps {
metric: string;
metricLabel: string;
systems: SystemStats[];
domain: DomainOrPooled;
}

interface ChartRow {
Expand Down Expand Up @@ -134,34 +133,47 @@ function SeparatorsLayer({
);
}

/** Renders a single significance marker above a bar+CI structure. Uses the
* YAxis scale hook so the y position is exact regardless of chart layout. */
/** Renders a single significance marker just outside a bar+CI structure in
* the bar's direction (above for positive deltas, below for negative). Font
* size scales with bar width so "***" always fits. */
function StarMark({
vb,
label,
upperValue,
point,
ciLower,
ciUpper,
amberColor,
}: {
vb: { x: number; width: number };
label: string;
upperValue: number;
point: number;
ciLower: number;
ciUpper: number;
amberColor: string;
}) {
const yScale = useYAxisScale() as ((v: number) => number | undefined) | undefined;
if (!yScale) return null;
// Position the star above the higher of (upper CI cap, zero line).
// For deeply-negative bars whose entire CI sits below zero, this clamps
// the star to the zero line so it stays well above the x-axis instead of
// drifting to the bottom of the plot.
const target = Math.max(0, upperValue);
const ySc = yScale(target);
if (ySc == null) return null;
// "***" width ≈ 3 chars × 0.6 × fontSize. Solve for fontSize that fits vb.width.
const fontSize = Math.max(7, Math.min(13, Math.floor(vb.width / (3 * 0.6))));
const clearance = 5;
const above = point >= 0;
const capPx = yScale(above ? ciUpper : ciLower);
if (capPx == null) return null;
// SVG text y is the baseline. For above-bar placement, baseline sits just
// above the cap so the glyphs hover over the cap; for below-bar placement,
// baseline sits one fontSize below the cap so the glyphs hover under it.
let y = above ? capPx - clearance : capPx + clearance + fontSize;
// Clamp inside the plot area in case the cap is outside the visible domain.
const topPx = yScale(0.5);
const bottomPx = yScale(-0.5);
if (topPx != null) y = Math.max(y, topPx + fontSize);
if (bottomPx != null) y = Math.min(y, bottomPx - 2);
return (
<text
x={vb.x + vb.width / 2}
y={ySc - 14}
y={y}
fill={amberColor}
fontSize={14}
fontSize={fontSize}
fontWeight={700}
textAnchor="middle"
>
Expand All @@ -170,29 +182,27 @@ function StarMark({
);
}

export function PerturbationBarChart({ metric, metricLabel, systems, domain }: PerturbationBarChartProps) {
export function PerturbationBarChart({ metric, metricLabel, systems }: PerturbationBarChartProps) {
const colors = useThemeColors();

// Order systems by architecture group: S2S → Hybrid (2-part) → Cascade.
const ordered = groupedSystems(systems);

// Build data rows: one per system that has any perturbation data for this metric.
// Perturbation results are always shown pooled across domains; the domain
// pills at the top of the leaderboard scope only the scatter plot.
const data: ChartRow[] = ordered.flatMap((s) => {
const row: ChartRow = { name: s.name, type: s.type };
let any = false;
for (const p of perturbations) {
const v = getPertValue(s, metric, p, domain);
const v = getPertValue(s, metric, p, 'pooled');
if (v) {
const label = tierLabel(v.corrected_p);
row[`${p}_point`] = v.point;
row[`${p}_err`] = [v.point - v.ci_lower, v.ci_upper - v.point];
row[`${p}_sig`] = label !== '';
row[`${p}_sig_label`] = label;
row[`${p}_sig_label`] = tierLabel(v.corrected_p);
any = true;
} else {
row[`${p}_point`] = null;
row[`${p}_err`] = undefined;
row[`${p}_sig`] = false;
row[`${p}_sig_label`] = '';
}
}
Expand All @@ -202,14 +212,13 @@ export function PerturbationBarChart({ metric, metricLabel, systems, domain }: P
if (data.length === 0) {
return (
<div className="text-sm text-text-muted italic px-4 py-6">
No perturbation data available for {metricLabel} at this domain.
No perturbation data available for {metricLabel}.
</div>
);
}

// Compute group boundary indices: positions where the type changes from the previous row.
// The ReferenceLine x value is the `name` of the first row in the new group; recharts will
// draw the line at that category's tick.
// Group boundaries: each entry pairs the new-group's first row with the previous row,
// so SeparatorsLayer can place a dashed line at the midpoint of the gap between them.
const separators: { name: string; prevName: string }[] = [];
for (let i = 1; i < data.length; i++) {
if (data[i].type !== data[i - 1].type) {
Expand Down Expand Up @@ -261,27 +270,36 @@ export function PerturbationBarChart({ metric, metricLabel, systems, domain }: P
<Bar key={p} dataKey={`${p}_point`} fill={colorFor(p, colors)} radius={[2, 2, 0, 0]}>
<ErrorBar dataKey={`${p}_err`} direction="y" width={4} strokeWidth={1} stroke={colors.text.muted} />
<LabelList
dataKey={`${p}_sig_label`}
// Encode the row's significance + CI into cp.value via valueAccessor
// rather than reading `data[cp.index]` in content: Bar drops zero-dimension
// rectangles, so cp.index is into a filtered array and would misalign rows
// after any all-zero row.
valueAccessor={(entry: { payload?: ChartRow }) => {
const r = entry?.payload;
const label = r?.[`${p}_sig_label`] as string | undefined;
const point = r?.[`${p}_point`] as number | null | undefined;
const err = r?.[`${p}_err`] as [number, number] | undefined;
if (!label || point == null || !err) return '';
return `${label}|${point}|${err[0]}|${err[1]}`;
}}
content={(props: unknown) => {
const cp = props as {
viewBox?: { x?: number; width?: number };
value?: string;
index?: number;
};
const label = cp.value;
const cp = props as { viewBox?: { x?: number; width?: number }; value?: string };
const vb = cp.viewBox;
if (!label || !vb || vb.x == null || vb.width == null || cp.index == null) {
if (!cp.value || !vb || vb.x == null || vb.width == null) return null;
const [label, pointStr, errLoStr, errHiStr] = cp.value.split('|');
const point = parseFloat(pointStr);
const errLo = parseFloat(errLoStr);
const errHi = parseFloat(errHiStr);
if (!Number.isFinite(point) || !Number.isFinite(errLo) || !Number.isFinite(errHi)) {
return null;
}
const row = data[cp.index];
const point = row?.[`${p}_point`] as number | null | undefined;
const err = row?.[`${p}_err`] as [number, number] | undefined;
if (point == null || !err) return null;
return (
<StarMark
vb={{ x: vb.x, width: vb.width }}
label={label}
upperValue={point + err[1]}
point={point}
ciLower={point - errLo}
ciUpper={point + errHi}
amberColor={colors.accent.amber}
/>
);
Expand Down
6 changes: 2 additions & 4 deletions website/src/components/leaderboard/Perturbations.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { useState } from 'react';
import { ChevronDown, ChevronRight } from 'lucide-react';
import type { SystemStats, DomainOrPooled } from '../../data/leaderboardData';
import type { SystemStats } from '../../data/leaderboardData';
import { perturbations, perturbationLabels } from '../../data/leaderboardData';
import { PerturbationBarChart } from './PerturbationBarChart';
import { useThemeColors } from '../../styles/theme';
Expand All @@ -13,7 +13,6 @@ const PERT_COLOR_KEYS: Record<string, 'amber' | 'cyan' | 'purple'> = {

interface PerturbationsProps {
systems: SystemStats[];
domain: DomainOrPooled;
}

interface MetricSpec {
Expand All @@ -33,7 +32,7 @@ const METRICS: MetricSpec[] = [
{ key: 'conversation_correctly_finished', label: 'Conversation Correctly Finished' },
];

export function Perturbations({ systems, domain }: PerturbationsProps) {
export function Perturbations({ systems }: PerturbationsProps) {
const colors = useThemeColors();
const [sectionOpen, setSectionOpen] = useState(true);
const [expanded, setExpanded] = useState<Set<string>>(new Set());
Expand Down Expand Up @@ -113,7 +112,6 @@ export function Perturbations({ systems, domain }: PerturbationsProps) {
metric={m.key}
metricLabel={m.label}
systems={systems}
domain={domain}
/>
</div>
)}
Expand Down
Loading