Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/cli_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ name: CLI Tests
on:
pull_request:
paths:
- 'packages/eval_cli/**'
- 'packages/devals_cli/**'
- 'packages/eval_config/**'
- '.github/workflows/cli_tests.yml'
push:
branches:
- main
paths:
- 'packages/eval_cli/**'
- 'packages/devals_cli/**'
- 'packages/eval_config/**'
- '.github/workflows/cli_tests.yml'

Expand All @@ -33,9 +33,9 @@ jobs:
run: flutter pub get

- name: Analyze
working-directory: packages/eval_cli
working-directory: packages/devals_cli
run: dart analyze --fatal-infos

- name: Run tests
working-directory: packages/eval_cli
working-directory: packages/devals_cli
run: flutter test
8 changes: 4 additions & 4 deletions .github/workflows/config_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ name: Config Tests
on:
pull_request:
paths:
- 'packages/eval_config/**'
- 'packages/dataset_config/**'
- '.github/workflows/config_tests.yml'
push:
branches:
- main
paths:
- 'packages/eval_config/**'
- 'packages/dataset_config/**'
- '.github/workflows/config_tests.yml'

jobs:
Expand All @@ -31,9 +31,9 @@ jobs:
run: flutter pub get

- name: Analyze
working-directory: packages/eval_config
working-directory: packages/dataset_config
run: dart analyze --fatal-infos

- name: Run tests
working-directory: packages/eval_config
working-directory: packages/dataset_config
run: dart test
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import 'package:freezed_annotation/freezed_annotation.dart';
import 'package:eval_config/src/models/models.dart';
import 'package:dataset_config/src/models/models.dart';

part 'eval_set.freezed.dart';
part 'eval_set.g.dart';
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import 'package:freezed_annotation/freezed_annotation.dart';
import 'package:eval_config/src/models/models.dart';
import 'package:dataset_config/src/models/models.dart';

part 'task.freezed.dart';
part 'task.g.dart';
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: eval_config
name: dataset_config
description: Core library for resolving eval dataset YAML into run manifests.
version: 0.0.1
publish_to: none
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import 'package:eval_config/eval_config.dart';
import 'package:dataset_config/dataset_config.dart';
import 'package:test/test.dart';

void main() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import 'dart:convert';
import 'dart:io';

import 'package:eval_config/eval_config.dart';
import 'package:dataset_config/dataset_config.dart';
import 'package:test/test.dart';

void main() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import 'package:eval_config/eval_config.dart';
import 'package:dataset_config/dataset_config.dart';
import 'package:test/test.dart';

void main() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import 'package:eval_config/eval_config.dart';
import 'package:dataset_config/dataset_config.dart';
import 'package:test/test.dart';

void main() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import 'dart:io';

import 'package:eval_config/eval_config.dart';
import 'package:dataset_config/dataset_config.dart';
import 'package:test/test.dart';
import 'package:yaml/yaml.dart';

Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Test configuration for the eval_cli package.
# Test configuration for the devals_cli package.
# Run tests serially to avoid race conditions with Directory.current
# mutations in filesystem_utils_test.dart and dataset_reader_test.dart.
concurrency: 1
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import 'package:args/command_runner.dart';
import 'package:dataset_config/dataset_config.dart';
import 'package:devals/src/dataset/dataset_reader.dart';
import 'package:devals/src/dataset/eval_writer.dart';
import 'package:devals/src/dataset/file_templates/job_template.dart';
import 'package:eval_config/eval_config.dart';
import 'package:howdy/howdy.dart';

/// Interactive command to create a new job file.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import 'dart:io';

import 'package:args/command_runner.dart';
import 'package:dataset_config/dataset_config.dart';
import 'package:devals/src/cli_exception.dart';
import 'package:devals/src/dataset/eval_writer.dart';
import 'package:devals/src/dataset/file_templates/job_template.dart';
import 'package:devals/src/dataset/file_templates/task_template.dart';
import 'package:devals/src/dataset/filesystem_utils.dart';
import 'package:eval_config/eval_config.dart';
import 'package:howdy/howdy.dart';

/// Interactive guide to create a task and job in one go.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import 'dart:io';

import 'package:args/command_runner.dart';
import 'package:devals/src/config/env.dart';
import 'package:devals/src/config/expand_home_dir.dart';
import 'package:devals/src/utils/env.dart';
import 'package:devals/src/utils/expand_home_dir.dart';
import 'package:howdy/howdy.dart';

/// The result status of a single doctor check.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ library;
import 'dart:io';

import 'package:args/command_runner.dart';
import 'package:devals/src/config/expand_home_dir.dart';
import 'package:devals/src/utils/expand_home_dir.dart';
import 'package:howdy/howdy.dart';
import 'package:path/path.dart' as p;

import '../cli_exception.dart';
import '../config/env.dart';
import '../gcs/gcs_client.dart';
import '../gcs/log_validator.dart';
import '../utils/env.dart';

/// Publishes InspectAI JSON log files to a GCS bucket.
///
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import 'dart:io';

import 'package:args/command_runner.dart';
import 'package:dataset_config/dataset_config.dart';
import 'package:devals/src/dataset/dry_run.dart';
import 'package:devals/src/dataset/filesystem_utils.dart';
import 'package:eval_config/eval_config.dart';
import 'package:howdy/howdy.dart';
import 'package:path/path.dart' as p;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@ import 'filesystem_utils.dart';
/// Global accessor for the dataset reader singleton.
DatasetReader get datasetReader => DatasetReader();

/// Singleton reader for dataset configuration.
///
/// Task functions and variants are now discovered from the filesystem
/// (tasks/ directory YAML files) rather than a generated registry.
/// Reads dataset YAML files for configuration.
class DatasetReader {
DatasetReader._();
static final DatasetReader _instance = DatasetReader._();
Expand All @@ -31,14 +28,6 @@ class DatasetReader {
/// Gets the path to the tasks directory.
String get tasksDirPath => p.join(datasetDirPath, 'tasks');

/// Returns the list of common variant names for scaffolding.
List<String> getVariants() => const [
'baseline',
'context_only',
'mcp_only',
'full',
];

/// Returns the list of task names discovered from tasks/ directory.
///
/// Each subdirectory in tasks/ that contains a task.yaml file is a task.
Expand All @@ -65,6 +54,14 @@ class DatasetReader {
/// Returns the set of existing task names for duplicate checking.
Set<String> getExistingTaskNames() => getTasks().toSet();

/// Returns the list of available variant names.
///
/// These come from the [DefaultVariants] enum, which defines the
/// built-in variant configurations (baseline, flutter_rules, etc.).
List<String> getVariants() {
return DefaultVariants.values.map((v) => v.variantName).toList();
}

/// Returns task function info discovered from task.yaml files.
///
/// Reads the `func` and optional `description` field from each task.yaml.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import 'package:eval_config/eval_config.dart';
import 'package:dataset_config/dataset_config.dart';

/// Preview resolved config without running evaluations.
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ String jobTemplate({

// Build named variant map YAML
// Currently this doesn't work
final variantsMap = variantDefaults();
final variantsMap = variantDefaults(variants);

return '''
# =============================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ void appendToFile(String filePath, String content) {
}

// ------------------------------------------------------------------
// Dataset discovery (moved from eval_config)
// Dataset discovery (moved from dataset_config)
// ------------------------------------------------------------------

/// The marker file that identifies a devals project root.
Expand Down
64 changes: 64 additions & 0 deletions packages/devals_cli/lib/src/dataset/variant_defaults.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/// Default variant configurations for eval jobs.
///
/// Each variant defines a YAML snippet that configures what tools/context
/// the agent has access to during an evaluation run.
enum DefaultVariants {
baseline(
'baseline',
'Run without any additional AI tools.',
'baseline: {}',
),
flutterRules(
'flutter_rules',
'Run with Flutter rules context files.',
'flutter_rules: { context_files: [./context_files/flutter.md] }',
),
withSkills(
'with_skills',
'Run with skills files.',
'with_skills: { skill_paths: [./skills/*] }',
),
withMCP(
'with_mcp',
'Run with Dart MCP server available.',
'with_mcp: { mcp_servers: [dart] }',
)
;

const DefaultVariants(this.variantName, this.help, this.yaml);

/// The variant name as it appears in YAML (e.g. `'baseline'`).
final String variantName;

/// Human-readable description shown in CLI prompts.
final String help;

/// The YAML snippet for this variant (e.g. `'baseline: {}'`).
final String yaml;

/// Look up a [DefaultVariants] by its [variantName], or `null` if not found.
static DefaultVariants? tryByName(String name) {
for (final v in values) {
if (v.variantName == name) return v;
}
return null;
}
}

/// Builds a YAML string for the `variants:` section of a job file.
///
/// For each variant name in [selectedVariants], uses the matching
/// [DefaultVariants] YAML snippet if one exists, otherwise falls back to
/// an empty config (`variant_name: {}`).
String variantDefaults(List<String> selectedVariants) {
final buffer = StringBuffer();
for (final name in selectedVariants) {
final defaultVariant = DefaultVariants.tryByName(name);
if (defaultVariant != null) {
buffer.writeln(' ${defaultVariant.yaml}');
} else {
buffer.writeln(' $name: {}');
}
}
return buffer.toString();
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ dependencies:
git:
url: https://github.com/ericwindmill/howdy.git
path: packages/howdy-cli
eval_config:
path: ../eval_config
dataset_config:
path: ../dataset_config
path: ^1.9.0
yaml: ^3.1.0
yaml_edit: ^2.2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void main() {
final variants = reader.getVariants();
expect(
variants,
containsAll(['baseline', 'context_only', 'mcp_only', 'full']),
containsAll(['baseline', 'flutter_rules', 'with_skills', 'with_mcp']),
);
} finally {
Directory.current = originalDir;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ class DevalResult {
///
/// [args] are the command-line arguments (e.g., `['init']`, `['create', 'task']`).
/// [stdinLines] are lines to feed to the process's stdin (for interactive prompts).
/// [workingDirectory] is the directory to run in (defaults to eval_cli package root).
/// [workingDirectory] is the directory to run in (defaults to devals_cli package root).
///
/// Returns a [DevalResult] with captured exit code, stdout, and stderr.
Future<DevalResult> runDevals(
List<String> args, {
List<String>? stdinLines,
required String workingDirectory,
}) async {
// Resolve the path to bin/devals.dart relative to the eval_cli package.
final evalCliRoot = _findEvalCliRoot();
// Resolve the path to bin/devals.dart relative to the devals_cli package.
final evalCliRoot = _findDevalsCliRoot();
final devalsScript = p.join(evalCliRoot, 'bin', 'devals.dart');

final process = await Process.start(
Expand All @@ -64,10 +64,10 @@ Future<DevalResult> runDevals(
return DevalResult(exitCode: exitCode, stdout: stdout, stderr: stderr);
}

/// Finds the eval_cli package root by walking up from this test file.
String _findEvalCliRoot() {
// This file lives at pkgs/eval_cli/test/e2e/e2e_helpers.dart
// We need to find pkgs/eval_cli/
/// Finds the devals_cli package root by walking up from this test file.
String _findDevalsCliRoot() {
// This file lives at packages/devals_cli/test/e2e/e2e_helpers.dart
// We need to find packages/devals_cli/
var dir = Directory(p.dirname(Platform.script.toFilePath()));

// Walk up until we find pubspec.yaml with name: devals
Expand All @@ -80,7 +80,7 @@ String _findEvalCliRoot() {
dir = dir.parent;
}

// Fallback: assume we're running from the eval_cli directory
// Fallback: assume we're running from the devals_cli directory
return Directory.current.path;
}

Expand Down
3 changes: 0 additions & 3 deletions packages/eval_cli/.dart_tool/pub/workspace_ref.json

This file was deleted.

This file was deleted.

Loading
Loading