Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 17 additions & 35 deletions apps/cli/src/commands/results/serve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,17 +127,13 @@ export function loadResults(content: string): EvaluationResult[] {

export function resolveDashboardMode(
projectCount: number,
options: { multi?: boolean; single?: boolean },
): { isMultiProject: boolean; showMultiWarning: boolean } {
options: { single?: boolean },
): { projectDashboard: boolean } {
if (options.single === true) {
return { isMultiProject: false, showMultiWarning: options.multi === true };
return { projectDashboard: false };
}

if (options.multi === true) {
return { isMultiProject: true, showMultiWarning: true };
}

return { isMultiProject: projectCount > 1, showMultiWarning: false };
return { projectDashboard: projectCount > 1 };
}

// ── Feedback persistence ─────────────────────────────────────────────────
Expand Down Expand Up @@ -901,13 +897,13 @@ async function handleTargets(c: C, { searchDir, agentvDir }: DataContext) {
function handleConfig(
c: C,
{ agentvDir, searchDir }: DataContext,
options?: { readOnly?: boolean; multiProjectDashboard?: boolean },
options?: { readOnly?: boolean; projectDashboard?: boolean },
) {
return c.json({
...loadStudioConfig(agentvDir),
read_only: options?.readOnly === true,
project_name: path.basename(searchDir),
multi_project_dashboard: options?.multiProjectDashboard === true,
project_dashboard: options?.projectDashboard === true,
});
}

Expand Down Expand Up @@ -973,7 +969,7 @@ export function createApp(
resultDir: string,
cwd?: string,
sourceFile?: string,
options?: { studioDir?: string; readOnly?: boolean; multiProjectDashboard?: boolean },
options?: { studioDir?: string; readOnly?: boolean; projectDashboard?: boolean },
): Hono {
const searchDir = cwd ?? resultDir;
const agentvDir = path.join(searchDir, '.agentv');
Expand Down Expand Up @@ -1175,7 +1171,7 @@ export function createApp(
app.get('/api/config', (c) =>
handleConfig(c, defaultCtx, {
readOnly,
multiProjectDashboard: options?.multiProjectDashboard,
projectDashboard: options?.projectDashboard,
}),
);
app.get('/api/remote/status', async (c) => c.json(await getRemoteResultsStatus(searchDir)));
Expand Down Expand Up @@ -1293,7 +1289,7 @@ export function createApp(
withProject(c, (ctx, dataCtx) =>
handleConfig(ctx, dataCtx, {
readOnly,
multiProjectDashboard: options?.multiProjectDashboard,
projectDashboard: options?.projectDashboard,
}),
),
);
Expand Down Expand Up @@ -1459,11 +1455,6 @@ export const resultsServeCommand = command({
short: 'd',
description: 'Working directory (default: current directory)',
}),
multi: flag({
long: 'multi',
description:
'Launch in multi-project dashboard mode (deprecated; use auto-detect or --single)',
}),
single: flag({
long: 'single',
description: 'Force single-project dashboard mode',
Expand All @@ -1483,7 +1474,7 @@ export const resultsServeCommand = command({
description: 'Disable write operations and launch Studio in read-only leaderboard mode',
}),
},
handler: async ({ source, port, dir, multi, single, add, remove, readOnly }) => {
handler: async ({ source, port, dir, single, add, remove, readOnly }) => {
const cwd = dir ?? process.cwd();
const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);

Expand Down Expand Up @@ -1522,14 +1513,11 @@ export const resultsServeCommand = command({
await enforceRequiredVersion(yamlConfig.required_version);
}

// ── Determine multi-project mode ─────────────────────────────────
// ── Determine dashboard mode ─────────────────────────────────────
const registry = loadProjectRegistry();
const { isMultiProject, showMultiWarning } = resolveDashboardMode(registry.projects.length, {
multi,
single,
});
const { projectDashboard } = resolveDashboardMode(registry.projects.length, { single });

// ── Benchmark sync preflight ─────────────────────────────────────
// ── Project sync preflight ───────────────────────────────────────
// Clone or pull any project entries that declare a source.
await syncProjects(registry.projects);

Expand Down Expand Up @@ -1563,25 +1551,19 @@ export const resultsServeCommand = command({
const resultDir = sourceFile ? path.dirname(path.resolve(sourceFile)) : cwd;
const app = createApp(results, resultDir, cwd, sourceFile, {
readOnly,
multiProjectDashboard: isMultiProject,
projectDashboard,
});

if (showMultiWarning) {
console.warn(
'Warning: --multi is deprecated. Studio now auto-detects multi-project mode when multiple projects are registered. Use --single to force the single-project view.',
);
}

if (isMultiProject) {
console.log(`Multi-project mode: ${registry.projects.length} project(s) registered`);
if (projectDashboard) {
console.log(`Project dashboard: ${registry.projects.length} project(s) registered`);
} else if (results.length > 0 && sourceFile) {
console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
} else {
console.log('No results found. Dashboard will show an empty state.');
console.log('Run an evaluation to see results: agentv eval <eval-file>');
}
console.log(`Dashboard: http://localhost:${listenPort}`);
console.log(`Benchmarks API: http://localhost:${listenPort}/api/projects`);
console.log(`Projects API: http://localhost:${listenPort}/api/projects`);
console.log('Press Ctrl+C to stop');

const { serve: startServer } = await import('@hono/node-server');
Expand Down
42 changes: 12 additions & 30 deletions apps/cli/test/commands/results/serve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,45 +104,27 @@ describe('loadResults', () => {
// ── resolveDashboardMode ───────────────────────────────────────────────

describe('resolveDashboardMode', () => {
it('defaults to single-benchmark mode when no benchmarks are registered', () => {
it('defaults to single-project mode when no projects are registered', () => {
expect(resolveDashboardMode(0, {})).toEqual({
isMultiProject: false,
showMultiWarning: false,
projectDashboard: false,
});
});

it('defaults to single-benchmark mode when exactly one benchmark is registered', () => {
it('defaults to single-project mode when exactly one project is registered', () => {
expect(resolveDashboardMode(1, {})).toEqual({
isMultiProject: false,
showMultiWarning: false,
projectDashboard: false,
});
});

it('defaults to multi-benchmark mode when multiple benchmarks are registered', () => {
it('defaults to the projects dashboard when multiple projects are registered', () => {
expect(resolveDashboardMode(2, {})).toEqual({
isMultiProject: true,
showMultiWarning: false,
projectDashboard: true,
});
});

it('forces multi-benchmark mode with a deprecation warning when --multi is used', () => {
expect(resolveDashboardMode(1, { multi: true })).toEqual({
isMultiProject: true,
showMultiWarning: true,
});
});

it('forces single-benchmark mode when --single is used', () => {
it('forces single-project mode when --single is used', () => {
expect(resolveDashboardMode(3, { single: true })).toEqual({
isMultiProject: false,
showMultiWarning: false,
});
});

it('lets --single override --multi', () => {
expect(resolveDashboardMode(3, { multi: true, single: true })).toEqual({
isMultiProject: false,
showMultiWarning: true,
projectDashboard: false,
});
});
});
Expand Down Expand Up @@ -366,23 +348,23 @@ describe('serve app', () => {
});

describe('GET /api/config', () => {
it('includes read_only mode in the config payload', async () => {
it('includes read_only mode and dashboard mode in the config payload', async () => {
const content = toJsonl(RESULT_A, RESULT_B);
const results = loadResults(content);
const app = createApp(results, tempDir, undefined, undefined, {
studioDir,
readOnly: true,
multiProjectDashboard: true,
projectDashboard: true,
});

const res = await app.request('/api/config');
expect(res.status).toBe(200);
const data = (await res.json()) as {
read_only?: boolean;
multi_project_dashboard?: boolean;
project_dashboard?: boolean;
};
expect(data.read_only).toBe(true);
expect(data.multi_project_dashboard).toBe(true);
expect(data.project_dashboard).toBe(true);
});
});

Expand Down
2 changes: 1 addition & 1 deletion apps/studio/src/components/ProjectCard.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Project card for the multi-project dashboard.
* Project card for the projects dashboard.
*
* Shows project name, path, run count, pass rate, and last run time.
* Click navigates to the project's run list.
Expand Down
2 changes: 1 addition & 1 deletion apps/studio/src/lib/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ export interface StudioConfigResponse {
pass_threshold?: number;
read_only?: boolean;
project_name?: string;
multi_project_dashboard?: boolean;
project_dashboard?: boolean;
}

export interface RemoteStatusResponse {
Expand Down
10 changes: 5 additions & 5 deletions apps/studio/src/routes/index.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* Home route: shows the multi-project dashboard when the server enables it,
* or the existing tabbed landing page (Runs, Experiments, Analytics, Targets)
* in single-project mode.
* Home route: shows the projects dashboard by default when multiple projects
* are registered, or the existing tabbed landing page (Runs, Experiments,
* Analytics, Targets) in single-project mode.
*
* Uses URL search param `?tab=` for tab persistence.
*/
Expand Down Expand Up @@ -45,13 +45,13 @@ function HomePage() {
const { data: projectData, isLoading: projectsLoading } = useProjectList();
const { data: config, isLoading: configLoading } = useStudioConfig();
const hasProjects = (projectData?.projects.length ?? 0) > 0;
const multiProjectDashboard = config?.multi_project_dashboard;
const projectDashboard = config?.project_dashboard;

if (projectsLoading || configLoading) {
return <LoadingSkeleton />;
}

if (multiProjectDashboard === true || (multiProjectDashboard === undefined && hasProjects)) {
if (projectDashboard === true || (projectDashboard === undefined && hasProjects)) {
return <ProjectsDashboard />;
}

Expand Down
Loading