Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/cli/src/commands/results/serve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ export function resolveDashboardMode(
return { projectDashboard: false };
}

return { projectDashboard: projectCount > 1 };
return { projectDashboard: projectCount > 0 };
}

// ── Feedback persistence ─────────────────────────────────────────────────
Expand Down
4 changes: 2 additions & 2 deletions apps/cli/test/commands/results/serve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ describe('resolveDashboardMode', () => {
});
});

it('defaults to single-project mode when exactly one project is registered', () => {
it('uses the project dashboard flow when exactly one project is registered', () => {
expect(resolveDashboardMode(1, {})).toEqual({
projectDashboard: false,
projectDashboard: true,
});
});

Expand Down
62 changes: 62 additions & 0 deletions apps/cli/test/unit/studio-navigation.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { describe, expect, it } from 'bun:test';

import {
categoryPath,
evalPath,
experimentPath,
jobPath,
projectHomePath,
resolveIndexRoute,
runPath,
runsHomePath,
suitePath,
} from '../../../studio/src/lib/navigation.ts';

describe('studio navigation helpers', () => {
it('redirects the root entrypoint to the only registered project', () => {
expect(resolveIndexRoute(['demo-project'], undefined, 'analytics')).toEqual({
kind: 'redirect',
redirectPath: '/projects/demo-project?tab=analytics',
});
});

it('keeps explicit single-project mode on the legacy root home', () => {
expect(resolveIndexRoute(['demo-project'], false, 'runs')).toEqual({
kind: 'single-project-home',
});
});

it('keeps the dashboard for zero or many projects', () => {
expect(resolveIndexRoute([], true)).toEqual({ kind: 'dashboard' });
expect(resolveIndexRoute(['one', 'two'], true)).toEqual({ kind: 'dashboard' });
});

it('builds project-scoped drill-down paths', () => {
expect(projectHomePath('demo project', 'runs')).toBe('/projects/demo%20project?tab=runs');
expect(runPath('run::1', 'demo project')).toBe('/projects/demo%20project/runs/run%3A%3A1');
expect(evalPath('run::1', 'case/a', 'demo project')).toBe(
'/projects/demo%20project/evals/run%3A%3A1/case%2Fa',
);
expect(jobPath('job/1', 'demo project')).toBe('/projects/demo%20project/jobs/job%2F1');
expect(categoryPath('run::1', 'Safety > PII', 'demo project')).toBe(
'/projects/demo%20project/runs/run%3A%3A1/category/Safety%20%3E%20PII',
);
expect(suitePath('run::1', 'evals/smoke.eval.yaml', 'demo project')).toBe(
'/projects/demo%20project/runs/run%3A%3A1/suite/evals%2Fsmoke.eval.yaml',
);
expect(experimentPath('prod-baseline', 'demo project')).toBe(
'/projects/demo%20project/experiments/prod-baseline',
);
});

it('keeps unscoped paths for legacy single-project routes', () => {
expect(runPath('run::1')).toBe('/runs/run%3A%3A1');
expect(evalPath('run::1', 'case/a')).toBe('/evals/run%3A%3A1/case%2Fa');
expect(jobPath('job/1')).toBe('/jobs/job%2F1');
expect(categoryPath('run::1', 'Safety')).toBe('/runs/run%3A%3A1/category/Safety');
expect(suitePath('run::1', 'evals/smoke.eval.yaml')).toBe(
'/runs/run%3A%3A1/suite/evals%2Fsmoke.eval.yaml',
);
expect(runsHomePath()).toBe('/?tab=runs');
});
});
93 changes: 82 additions & 11 deletions apps/studio/src/components/Breadcrumbs.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@

import { Link, useMatches } from '@tanstack/react-router';

import {
categoryPath,
evalPath,
experimentPath,
jobPath,
projectHomePath,
runPath,
suitePath,
} from '~/lib/navigation';

interface BreadcrumbSegment {
label: string;
to?: string;
Expand Down Expand Up @@ -35,51 +45,112 @@ function deriveSegments(matches: ReturnType<typeof useMatches>): BreadcrumbSegme
if (!segments.some((s) => s.label === params.projectId)) {
segments.push({
label: params.projectId,
to: `/projects/${encodeURIComponent(params.projectId)}`,
to: projectHomePath(params.projectId),
});
}
if (routeId === '/projects/$projectId') {
continue;
}
}

if (routeId.includes('/runs/$runId/category/$category')) {
if (!segments.some((s) => s.label === params.runId)) {
if (routeId.includes('/projects/$projectId_/jobs/$runId')) {
if (!segments.some((s) => s.label === formatRunLabel(params.runId))) {
segments.push({
label: formatRunLabel(params.runId),
to: jobPath(params.runId, params.projectId),
});
}
} else if (routeId.includes('/projects/$projectId_/runs/$runId/category/$category')) {
if (!segments.some((s) => s.label === formatRunLabel(params.runId))) {
segments.push({
label: formatRunLabel(params.runId),
to: `/runs/${encodeURIComponent(params.runId)}`,
to: runPath(params.runId, params.projectId),
});
}
segments.push({
label: params.category ?? 'Category',
to: match.pathname,
to: categoryPath(params.runId, params.category ?? 'Category', params.projectId),
});
} else if (routeId.includes('/projects/$projectId_/runs/$runId/suite/$suite')) {
if (!segments.some((s) => s.label === formatRunLabel(params.runId))) {
segments.push({
label: formatRunLabel(params.runId),
to: runPath(params.runId, params.projectId),
});
}
segments.push({
label: params.suite ?? 'Suite',
to: suitePath(params.runId, params.suite ?? 'Suite', params.projectId),
});
} else if (routeId.includes('/projects/$projectId_/runs/$runId')) {
segments.push({
label: formatRunLabel(params.runId),
to: runPath(params.runId, params.projectId),
});
} else if (routeId.includes('/projects/$projectId_/evals/$runId/$evalId')) {
if (!segments.some((s) => s.label === formatRunLabel(params.runId))) {
segments.push({
label: formatRunLabel(params.runId),
to: runPath(params.runId, params.projectId),
});
}
segments.push({
label: params.evalId ?? 'Eval',
to: evalPath(params.runId, params.evalId ?? 'Eval', params.projectId),
});
} else if (routeId.includes('/projects/$projectId_/experiments/$experimentName')) {
segments.push({
label: params.experimentName ?? 'Experiment',
to: experimentPath(params.experimentName ?? 'Experiment', params.projectId),
});
} else if (routeId.includes('/runs/$runId/category/$category')) {
if (!segments.some((s) => s.label === formatRunLabel(params.runId))) {
segments.push({
label: formatRunLabel(params.runId),
to: runPath(params.runId),
});
}
segments.push({
label: params.category ?? 'Category',
to: categoryPath(params.runId, params.category ?? 'Category'),
});
} else if (routeId.includes('/runs/$runId/suite/$suite')) {
if (!segments.some((s) => s.label === formatRunLabel(params.runId))) {
segments.push({
label: formatRunLabel(params.runId),
to: runPath(params.runId),
});
}
segments.push({
label: params.suite ?? 'Suite',
to: match.pathname,
to: suitePath(params.runId, params.suite ?? 'Suite'),
});
} else if (routeId.includes('/jobs/$runId')) {
segments.push({
label: formatRunLabel(params.runId),
to: jobPath(params.runId),
});
} else if (routeId.includes('/runs/$runId')) {
segments.push({
label: formatRunLabel(params.runId),
to: match.pathname,
to: runPath(params.runId),
});
} else if (routeId.includes('/evals/$runId/$evalId')) {
// For eval pages, show the run as a parent segment too
if (!segments.some((s) => s.label === params.runId)) {
if (!segments.some((s) => s.label === formatRunLabel(params.runId))) {
segments.push({
label: formatRunLabel(params.runId),
to: `/runs/${encodeURIComponent(params.runId)}`,
to: runPath(params.runId),
});
}
segments.push({
label: params.evalId ?? 'Eval',
to: match.pathname,
to: evalPath(params.runId, params.evalId ?? 'Eval'),
});
} else if (routeId.includes('/experiments/$experimentName')) {
segments.push({
label: params.experimentName ?? 'Experiment',
to: match.pathname,
to: experimentPath(params.experimentName ?? 'Experiment'),
});
} else if (routeId === '/index' || routeId === '/') {
segments.push({ label: 'Home', to: '/' });
Expand Down
8 changes: 4 additions & 4 deletions apps/studio/src/components/EvalDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ function findFirstFile(nodes: FileNode[]): string | null {

export function EvalDetail({ eval: result, runId, projectId }: EvalDetailProps) {
const [activeTab, setActiveTab] = useState<Tab>('checks');
const { data: config } = useStudioConfig();
const { data: config } = useStudioConfig(projectId);
const isReadOnly = config?.read_only === true;

const tabs: { id: Tab; label: string }[] = [
Expand Down Expand Up @@ -83,7 +83,7 @@ export function EvalDetail({ eval: result, runId, projectId }: EvalDetailProps)
<div className="min-h-0 flex-1">
{activeTab === 'checks' && (
<div className="overflow-auto p-4">
<ChecksTab result={result} />
<ChecksTab result={result} projectId={projectId} />
</div>
)}
{activeTab === 'files' && (
Expand Down Expand Up @@ -133,8 +133,8 @@ function AssertionCard({ assertion }: { assertion: AssertionEntry }) {
* Checks tab: overall score → per-grader scores → assertions → failure reasons.
* Assertions are grouped by evaluator when per-score assertion data is available.
*/
function ChecksTab({ result }: { result: EvalResult }) {
const { data: config } = useStudioConfig();
function ChecksTab({ result, projectId }: { result: EvalResult; projectId?: string }) {
const { data: config } = useStudioConfig(projectId);
const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;

const hasFailed =
Expand Down
9 changes: 8 additions & 1 deletion apps/studio/src/components/ResumeRunActions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,14 @@ export function ResumeRunActions({
try {
const body = buildResumeRequestBody({ mode, runDir, suiteFilter, target });
const response = await launchEvalRun(body, projectId);
navigate({ to: '/jobs/$runId', params: { runId: response.id } });
if (projectId) {
navigate({
to: '/projects/$projectId/jobs/$runId',
params: { projectId, runId: response.id },
});
} else {
navigate({ to: '/jobs/$runId', params: { runId: response.id } });
}
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to launch resume');
setBusy(null);
Expand Down
22 changes: 20 additions & 2 deletions apps/studio/src/components/RunDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ function buildCategoryGroups(results: EvalResult[], passThreshold: number): Cate
}

export function RunDetail({ results, runId, projectId }: RunDetailProps) {
const { data: config } = useStudioConfig();
const { data: config } = useStudioConfig(projectId);
const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;

const total = results.length;
Expand Down Expand Up @@ -143,7 +143,25 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) {
<tbody className="divide-y divide-gray-800/50">
{categories.map((cat) => (
<tr key={cat.name} className="transition-colors hover:bg-gray-900/30">
<td className="px-4 py-2.5 font-medium text-gray-200">{cat.name}</td>
<td className="px-4 py-2.5 font-medium text-gray-200">
{projectId ? (
<Link
to="/projects/$projectId/runs/$runId/category/$category"
params={{ projectId, runId, category: cat.name }}
className="text-cyan-400 hover:text-cyan-300 hover:underline"
>
{cat.name}
</Link>
) : (
<Link
to="/runs/$runId/category/$category"
params={{ runId, category: cat.name }}
className="text-cyan-400 hover:text-cyan-300 hover:underline"
>
{cat.name}
</Link>
)}
</td>
<td className="px-4 py-2.5">
<PassRatePill rate={cat.total > 0 ? cat.passed / cat.total : 0} />
</td>
Expand Down
5 changes: 3 additions & 2 deletions apps/studio/src/components/RunEvalModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
useEvalTargets,
useStudioConfig,
} from '~/lib/api';
import { runsHomePath } from '~/lib/navigation';
import type { RunEvalRequest } from '~/lib/types';
import {
buildRunEvalRequest,
Expand Down Expand Up @@ -69,7 +70,7 @@ export function RunEvalModal({ open, onClose, projectId, prefill }: RunEvalModal
// Data
const { data: discoverData } = useEvalDiscover(projectId);
const { data: targetsData } = useEvalTargets(projectId);
const { data: runStatus } = useEvalRunStatus(activeRunId);
const { data: runStatus } = useEvalRunStatus(activeRunId, projectId);
const { data: studioConfig } = useStudioConfig(projectId);

const evalFiles = useMemo(() => discoverData?.eval_files ?? [], [discoverData]);
Expand Down Expand Up @@ -170,7 +171,7 @@ export function RunEvalModal({ open, onClose, projectId, prefill }: RunEvalModal
if (activeRunId && runStatus) {
function handleRunInBackground() {
onClose();
navigate({ to: '/', search: { tab: 'runs' } as Record<string, string> });
navigate({ to: runsHomePath(projectId) });
}
return (
<ModalShell onClose={onClose} title="Eval Run">
Expand Down
2 changes: 1 addition & 1 deletion apps/studio/src/components/RunList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ function formatDate(ts: string | undefined | null): { date: string; full: string
}

export function RunList({ runs, projectId, emptyMessage }: RunListProps) {
const { data: config } = useStudioConfig();
const { data: config } = useStudioConfig(projectId);
const passThreshold = config?.threshold ?? DEFAULT_PASS_THRESHOLD;

if (runs.length === 0) {
Expand Down
Loading
Loading