diff --git a/braintrust-sdk/src/main/java/dev/braintrust/eval/Classification.java b/braintrust-sdk/src/main/java/dev/braintrust/eval/Classification.java
new file mode 100644
index 00000000..7d3684a6
--- /dev/null
+++ b/braintrust-sdk/src/main/java/dev/braintrust/eval/Classification.java
@@ -0,0 +1,37 @@
+package dev.braintrust.eval;
+
+import java.util.Map;
+import javax.annotation.Nullable;
+
+/**
+ * A single structured classification produced by a {@link Classifier}.
+ *
+ *
Unlike a {@link Score} (numeric 0-1), a Classification carries a stable id, an optional
+ * display label, and optional metadata. The {@code name} acts as the grouping key in the aggregated
+ * result map; when {@code name} is {@code null} or blank, the owning classifier's resolved name is
+ * used instead.
+ *
+ * @param name optional grouping key; defaults to the owning classifier's resolved name when null or
+ * blank
+ * @param id stable identifier for the classification (required)
+ * @param label optional display label
+ * @param metadata optional arbitrary metadata
+ */
+public record Classification(
+ @Nullable String name,
+ String id,
+ @Nullable String label,
+ @Nullable Map metadata) {
+
+ public static Classification of(String id) {
+ return new Classification(null, id, null, null);
+ }
+
+ public static Classification of(String id, String label) {
+ return new Classification(null, id, label, null);
+ }
+
+ public static Classification of(String name, String id, String label) {
+ return new Classification(name, id, label, null);
+ }
+}
diff --git a/braintrust-sdk/src/main/java/dev/braintrust/eval/Classifier.java b/braintrust-sdk/src/main/java/dev/braintrust/eval/Classifier.java
new file mode 100644
index 00000000..e37b3e3e
--- /dev/null
+++ b/braintrust-sdk/src/main/java/dev/braintrust/eval/Classifier.java
@@ -0,0 +1,98 @@
+package dev.braintrust.eval;
+
+import java.util.List;
+import java.util.function.Function;
+
+/**
+ * A classifier categorizes and labels eval outputs, producing zero or more structured {@link
+ * Classification} items.
+ *
+ * Classifiers run independently from {@link Scorer}s. Each Classifier exposes a name (used as
+ * the span name and as the default grouping key for classifications whose own {@code name} is
+ * blank).
+ *
+ * @param type of the input data
+ * @param type of the output data
+ */
+public interface Classifier {
+ String INVALID_CLASSIFICATION_MESSAGE =
+ "When returning structured classifier results, each classification must be a non-empty"
+ + " object.";
+
+ String getName();
+
+ /**
+ * Classifies the result of a successful task execution.
+ *
+ * @param taskResult the task output and originating dataset case
+ * @return zero or more classifications. An empty list means "no classifications for this case".
+ */
+ List classify(TaskResult taskResult);
+
+ /**
+ * Creates a classifier from a function that returns a (possibly empty or null) list of
+ * classifications.
+ *
+ * A {@code null} return value is treated as no classifications. Each returned {@link
+ * Classification} must have a non-blank {@code id}; otherwise the classifier throws an
+ * exception (which the eval runner records but does not abort on).
+ */
+ static Classifier of(
+ String classifierName,
+ Function, List> classifierFn) {
+ return new Classifier<>() {
+ @Override
+ public String getName() {
+ return classifierName;
+ }
+
+ @Override
+ public List classify(TaskResult taskResult) {
+ var result = classifierFn.apply(taskResult);
+ if (result == null) {
+ return List.of();
+ }
+ for (var item : result) {
+ validate(item);
+ }
+ return result;
+ }
+ };
+ }
+
+ /**
+ * Creates a classifier from a function that returns a single classification.
+ *
+ * A {@code null} return value is treated as no classifications.
+ */
+ static Classifier single(
+ String classifierName,
+ Function, Classification> classifierFn) {
+ return new Classifier<>() {
+ @Override
+ public String getName() {
+ return classifierName;
+ }
+
+ @Override
+ public List classify(TaskResult taskResult) {
+ var item = classifierFn.apply(taskResult);
+ if (item == null) {
+ return List.of();
+ }
+ validate(item);
+ return List.of(item);
+ }
+ };
+ }
+
+ /**
+ * Validates a single classification: it must have a non-blank id. Throws with the spec-mandated
+ * wording on failure.
+ */
+ private static void validate(Classification item) {
+ if (item == null || item.id() == null || item.id().isBlank()) {
+ throw new IllegalArgumentException(INVALID_CLASSIFICATION_MESSAGE + " Got: " + item);
+ }
+ }
+}
diff --git a/braintrust-sdk/src/main/java/dev/braintrust/eval/Eval.java b/braintrust-sdk/src/main/java/dev/braintrust/eval/Eval.java
index ee814baf..a9c3d06b 100644
--- a/braintrust-sdk/src/main/java/dev/braintrust/eval/Eval.java
+++ b/braintrust-sdk/src/main/java/dev/braintrust/eval/Eval.java
@@ -42,6 +42,7 @@ public final class Eval {
private final @Nonnull Dataset dataset;
private final @Nonnull Task task;
private final @Nonnull List> scorers;
+ private final @Nonnull List> classifiers;
private final @Nonnull List tags;
private final @Nonnull Map metadata;
private final @Nonnull Parameters parameters;
@@ -58,6 +59,7 @@ private Eval(Builder builder) {
this.dataset = builder.dataset;
this.task = Objects.requireNonNull(builder.task);
this.scorers = List.copyOf(builder.scorers);
+ this.classifiers = List.copyOf(builder.classifiers);
this.tags = List.copyOf(builder.tags);
this.metadata = Map.copyOf(builder.metadata);
this.parameters = builder.buildParameters();
@@ -172,6 +174,42 @@ private void evalOne(String experimentId, DatasetCase datasetCase
for (var scorer : scorers) {
runScorer(experimentId, rootSpan, scorer, taskResult, trace);
}
+
+ // run classifiers - one span per classifier. Classifier exceptions are non-fatal:
+ // they are recorded on the classifier span and surfaced in the root span's metadata
+ // under `classifier_errors`, but do not abort the eval or affect other classifiers/
+ // scorers. Classifiers only run when the task succeeded (no scoreForTaskException
+ // analogue).
+ if (!classifiers.isEmpty()) {
+ Map>> caseClassifications = new LinkedHashMap<>();
+ Map classifierErrors = new LinkedHashMap<>();
+ for (int i = 0; i < classifiers.size(); i++) {
+ var classifier = classifiers.get(i);
+ var classifierName = classifier.getName();
+ if (classifierName == null || classifierName.isBlank()) {
+ classifierName = "classifier_" + i;
+ }
+ runClassifier(
+ experimentId,
+ classifier,
+ classifierName,
+ taskResult,
+ trace,
+ caseClassifications,
+ classifierErrors);
+ }
+ if (!caseClassifications.isEmpty()) {
+ rootSpan.setAttribute(
+ "braintrust.classifications", toJson(caseClassifications));
+ }
+ if (!classifierErrors.isEmpty()) {
+ Map mergedMetadata =
+ new LinkedHashMap<>(datasetCase.metadata());
+ mergedMetadata.put("classifier_errors", classifierErrors);
+ rootSpan.setAttribute(
+ AttributeKey.stringKey("braintrust.metadata"), toJson(mergedMetadata));
+ }
+ }
} finally {
rootSpan.end();
}
@@ -236,6 +274,84 @@ private void runScoreForTaskException(
}
}
+ /**
+ * Runs a classifier inside its own span. Exceptions are recorded on the classifier span and
+ * surfaced via {@code classifierErrors}; they do not propagate.
+ */
+ private void runClassifier(
+ String experimentId,
+ Classifier classifier,
+ String resolvedName,
+ TaskResult taskResult,
+ BrainstoreTrace trace,
+ Map>> caseClassifications,
+ Map classifierErrors) {
+ var classifierSpan =
+ tracer.spanBuilder(resolvedName)
+ .setAttribute(PARENT, "experiment_id:" + experimentId)
+ .startSpan();
+ try (var unused =
+ BraintrustContext.ofExperiment(experimentId, classifierSpan).makeCurrent()) {
+ Map spanAttrs = new LinkedHashMap<>();
+ spanAttrs.put("type", "classifier");
+ spanAttrs.put("name", resolvedName);
+ spanAttrs.put("purpose", "scorer");
+ classifierSpan.setAttribute("braintrust.span_attributes", toJson(spanAttrs));
+
+ List classifications;
+ try {
+ if (classifier instanceof TracedClassifier tracedClassifier) {
+ classifications = tracedClassifier.classify(taskResult, trace);
+ } else {
+ classifications = classifier.classify(taskResult);
+ }
+ if (classifications == null) {
+ classifications = List.of();
+ }
+ } catch (Exception e) {
+ classifierSpan.setStatus(StatusCode.ERROR, e.getMessage());
+ classifierSpan.recordException(e);
+ log.debug("Classifier '{}' threw exception", resolvedName, e);
+ classifierErrors.put(
+ resolvedName, e.getMessage() == null ? e.toString() : e.getMessage());
+ return;
+ }
+
+ // Group results by resolved item name (item.name, falling back to the classifier
+ // name when blank). Same map is logged to the classifier span and merged into the
+ // per-case aggregate logged on the root span.
+ Map>> outputByName = new LinkedHashMap<>();
+ for (var item : classifications) {
+ var itemName = item.name();
+ if (itemName == null || itemName.isBlank()) {
+ itemName = resolvedName;
+ }
+ var itemMap = toClassificationItem(item);
+ outputByName.computeIfAbsent(itemName, k -> new ArrayList<>()).add(itemMap);
+ caseClassifications.computeIfAbsent(itemName, k -> new ArrayList<>()).add(itemMap);
+ }
+ classifierSpan.setAttribute("braintrust.output_json", toJson(outputByName));
+ } finally {
+ classifierSpan.end();
+ }
+ }
+
+ /**
+ * Converts a {@link Classification} to the wire-format {@code ClassificationItem}: drops {@code
+ * name}, includes {@code label} and {@code metadata} only when present.
+ */
+ private static Map toClassificationItem(Classification c) {
+ Map m = new LinkedHashMap<>();
+ m.put("id", c.id());
+ if (c.label() != null) {
+ m.put("label", c.label());
+ }
+ if (c.metadata() != null) {
+ m.put("metadata", c.metadata());
+ }
+ return m;
+ }
+
/** Validates and records scores on the score span and root span. */
private void recordScores(
Span scoreSpan, Span rootSpan, Scorer scorer, List scores) {
@@ -276,6 +392,7 @@ public static final class Builder {
private @Nullable Tracer tracer = null;
private @Nullable Task task;
private @Nonnull List> scorers = List.of();
+ private @Nonnull List> classifiers = List.of();
private @Nonnull List> parameterDefs = List.of();
private @Nonnull Map parameterValues = Map.of();
private @Nonnull List tags = List.of();
@@ -291,8 +408,8 @@ public Eval build() {
if (projectId == null) {
projectId = config.defaultProjectId().orElse(null);
}
- if (scorers.isEmpty()) {
- throw new RuntimeException("must provide at least one scorer");
+ if (scorers.isEmpty() && classifiers.isEmpty()) {
+ throw new RuntimeException("must provide at least one scorer or classifier");
}
if (null == apiClient) {
apiClient = BraintrustOpenApiClient.of(config);
@@ -380,6 +497,12 @@ public final Builder scorers(Scorer ... scorers) {
return this;
}
+ @SafeVarargs
+ public final Builder classifiers(Classifier ... classifiers) {
+ this.classifiers = List.of(classifiers);
+ return this;
+ }
+
/** Sets tags for the experiment. */
public Builder tags(List tags) {
this.tags = List.copyOf(tags);
diff --git a/braintrust-sdk/src/main/java/dev/braintrust/eval/TracedClassifier.java b/braintrust-sdk/src/main/java/dev/braintrust/eval/TracedClassifier.java
new file mode 100644
index 00000000..db67d78b
--- /dev/null
+++ b/braintrust-sdk/src/main/java/dev/braintrust/eval/TracedClassifier.java
@@ -0,0 +1,41 @@
+package dev.braintrust.eval;
+
+import dev.braintrust.trace.BrainstoreTrace;
+import java.util.List;
+
+/**
+ * A classifier that receives access to the full distributed trace of the task that was evaluated.
+ *
+ * Implement this interface when your classifier needs to examine intermediate LLM calls, tool
+ * invocations, or other spans produced during task execution — not just the final {@link
+ * TaskResult}.
+ *
+ * @param type of the input data
+ * @param type of the output data
+ */
+public interface TracedClassifier extends Classifier {
+
+ /**
+ * Classifies the task result using the distributed trace for additional context. Called instead
+ * of {@link Classifier#classify(TaskResult)} when a {@link BrainstoreTrace} is available.
+ *
+ * @param taskResult the task output and originating dataset case
+ * @param trace lazy access to the distributed trace spans for this eval case
+ * @return zero or more classifications
+ */
+ List classify(TaskResult taskResult, BrainstoreTrace trace);
+
+ /**
+ * {@inheritDoc}
+ *
+ * When used inside an {@link Eval}, this overload is never called — {@link
+ * #classify(TaskResult, BrainstoreTrace)} is dispatched instead. This default implementation
+ * throws {@link UnsupportedOperationException} to surface any accidental direct calls.
+ */
+ @Override
+ default List classify(TaskResult taskResult) {
+ throw new UnsupportedOperationException(
+ "traced classifier classify method directly called. This is likely an accident. If"
+ + " you wish to support this, your implementation must override this method.");
+ }
+}
diff --git a/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierEvalTest.java b/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierEvalTest.java
new file mode 100644
index 00000000..fb1445c6
--- /dev/null
+++ b/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierEvalTest.java
@@ -0,0 +1,304 @@
+package dev.braintrust.eval;
+
+import static dev.braintrust.json.BraintrustJsonMapper.fromJson;
+import static org.junit.jupiter.api.Assertions.*;
+
+import dev.braintrust.TestHarness;
+import io.opentelemetry.api.common.AttributeKey;
+import io.opentelemetry.sdk.trace.data.SpanData;
+import java.util.List;
+import java.util.Map;
+import lombok.SneakyThrows;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+/**
+ * End-to-end tests for the classifier flow on {@link Eval}, modeled on {@link EvalTest}.
+ *
+ * These tests rely on VCR cassettes for new experiment names. Cassettes are regenerated by
+ * running {@code VCR_MODE=record BRAINTRUST_API_KEY=... ./gradlew :braintrust-sdk:test --tests
+ * 'dev.braintrust.eval.ClassifierEvalTest'} (see {@code scripts/re-record-cassettes.sh}).
+ */
+public class ClassifierEvalTest {
+ private TestHarness testHarness;
+
+ @BeforeEach
+ void beforeEach() {
+ testHarness = TestHarness.setup();
+ }
+
+ // ---------------------------------------------------------------------------
+ // Builder validation (no API calls)
+ // ---------------------------------------------------------------------------
+
+ @Test
+ void builderRejectsEmptyScorersAndClassifiers() {
+ var error =
+ assertThrows(
+ RuntimeException.class,
+ () ->
+ testHarness
+ .braintrust()
+ .evalBuilder()
+ .name("no-scorers-or-classifiers")
+ .cases(DatasetCase.of("hello", null))
+ .taskFunction(input -> input)
+ .build());
+ assertTrue(
+ error.getMessage().contains("at least one scorer or classifier"),
+ "expected message about scorer-or-classifier requirement, got: "
+ + error.getMessage());
+ }
+
+ @Test
+ void builderAcceptsClassifiersOnly() {
+ assertDoesNotThrow(
+ () ->
+ testHarness
+ .braintrust()
+ .evalBuilder()
+ .name("classifiers-only-build")
+ .cases(DatasetCase.of("hello", null))
+ .taskFunction(input -> input)
+ .classifiers(
+ Classifier.single(
+ "category",
+ tr ->
+ new Classification(
+ "category",
+ "greeting",
+ null,
+ null)))
+ .build());
+ }
+
+ @Test
+ void builderAcceptsBothScorersAndClassifiers() {
+ assertDoesNotThrow(
+ () ->
+ testHarness
+ .braintrust()
+ .evalBuilder()
+ .name("scorers-and-classifiers-build")
+ .cases(DatasetCase.of("hello", "HELLO"))
+ .taskFunction(String::toUpperCase)
+ .scorers(Scorer.of("exact", tr -> 1.0))
+ .classifiers(
+ Classifier.single(
+ "category",
+ tr ->
+ new Classification(
+ "category", "text", null, null)))
+ .build());
+ }
+
+ // ---------------------------------------------------------------------------
+ // End-to-end span assertions (require VCR cassettes)
+ // ---------------------------------------------------------------------------
+
+ @Test
+ @SneakyThrows
+ void classifierOnlyEvalProducesClassifierSpanAndRootAggregation() {
+ var eval =
+ testHarness
+ .braintrust()
+ .evalBuilder()
+ .name("classifier-only-eval")
+ .cases(DatasetCase.of("hello", null))
+ .taskFunction(input -> input)
+ .classifiers(
+ Classifier.single(
+ "category",
+ tr ->
+ new Classification(
+ "category", "greeting", "Greeting", null)))
+ .build();
+ eval.run();
+
+ var spans = testHarness.awaitExportedSpans();
+ var classifierSpan = findSpanByName(spans, "category");
+ assertNotNull(classifierSpan, "expected classifier span named 'category'");
+
+ var spanAttrsJson =
+ classifierSpan
+ .getAttributes()
+ .get(AttributeKey.stringKey("braintrust.span_attributes"));
+ assertNotNull(spanAttrsJson);
+ var spanAttrs = fromJson(spanAttrsJson, Map.class);
+ assertEquals("classifier", spanAttrs.get("type"));
+ assertEquals("scorer", spanAttrs.get("purpose"));
+ assertEquals("category", spanAttrs.get("name"));
+
+ var outputJson =
+ classifierSpan
+ .getAttributes()
+ .get(AttributeKey.stringKey("braintrust.output_json"));
+ assertNotNull(outputJson);
+ var output = fromJson(outputJson, Map.class);
+ @SuppressWarnings("unchecked")
+ var category = (List>) output.get("category");
+ assertEquals(1, category.size());
+ assertEquals("greeting", category.get(0).get("id"));
+ assertEquals("Greeting", category.get(0).get("label"));
+
+ var rootSpan = findRootEvalSpan(spans);
+ assertNotNull(rootSpan);
+ var classificationsJson =
+ rootSpan.getAttributes().get(AttributeKey.stringKey("braintrust.classifications"));
+ assertNotNull(classificationsJson, "root span should carry braintrust.classifications");
+ var classifications = fromJson(classificationsJson, Map.class);
+ assertTrue(classifications.containsKey("category"));
+ }
+
+ @Test
+ @SneakyThrows
+ void scorerAndClassifierBothRun() {
+ var eval =
+ testHarness
+ .braintrust()
+ .evalBuilder()
+ .name("scorer-and-classifier-eval")
+ .cases(DatasetCase.of("hello", "HELLO"))
+ .taskFunction(String::toUpperCase)
+ .scorers(
+ Scorer.of(
+ "exact",
+ tr ->
+ tr.result().equals(tr.datasetCase().expected())
+ ? 1.0
+ : 0.0))
+ .classifiers(
+ Classifier.single(
+ "category",
+ tr -> new Classification("category", "text", null, null)))
+ .build();
+ eval.run();
+
+ var spans = testHarness.awaitExportedSpans();
+ assertNotNull(findSpanByName(spans, "score"), "expected score span");
+ assertNotNull(findSpanByName(spans, "category"), "expected classifier span");
+ }
+
+ @Test
+ @SneakyThrows
+ void classifierExceptionIsNonFatalAndRecordedInRootMetadata() {
+ var eval =
+ testHarness
+ .braintrust()
+ .evalBuilder()
+ .name("classifier-error-eval")
+ .cases(DatasetCase.of("hello", null))
+ .taskFunction(input -> input)
+ .classifiers(
+ Classifier.single(
+ "broken",
+ tr -> {
+ throw new RuntimeException("classifier boom");
+ }),
+ Classifier.single(
+ "working",
+ tr -> new Classification("working", "ok", null, null)))
+ .build();
+ eval.run();
+
+ var spans = testHarness.awaitExportedSpans();
+ var rootSpan = findRootEvalSpan(spans);
+ assertNotNull(rootSpan);
+
+ var metadataJson =
+ rootSpan.getAttributes().get(AttributeKey.stringKey("braintrust.metadata"));
+ assertNotNull(metadataJson, "expected metadata to be set when classifier errors occur");
+ var metadata = fromJson(metadataJson, Map.class);
+ @SuppressWarnings("unchecked")
+ var classifierErrors = (Map) metadata.get("classifier_errors");
+ assertNotNull(classifierErrors);
+ assertEquals("classifier boom", classifierErrors.get("broken"));
+
+ // The working classifier still produced output
+ var classificationsJson =
+ rootSpan.getAttributes().get(AttributeKey.stringKey("braintrust.classifications"));
+ assertNotNull(classificationsJson);
+ var classifications = fromJson(classificationsJson, Map.class);
+ assertTrue(classifications.containsKey("working"));
+ }
+
+ @Test
+ @SneakyThrows
+ void multiLabelClassifierGroupsUnderOneName() {
+ var eval =
+ testHarness
+ .braintrust()
+ .evalBuilder()
+ .name("multi-label-eval")
+ .cases(DatasetCase.of("great!", null))
+ .taskFunction(input -> input)
+ .classifiers(
+ Classifier.of(
+ "sentiment",
+ tr ->
+ List.of(
+ new Classification(
+ "sentiment",
+ "positive",
+ "Positive",
+ null),
+ new Classification(
+ "sentiment",
+ "enthusiastic",
+ "Enthusiastic",
+ null))))
+ .build();
+ eval.run();
+
+ var spans = testHarness.awaitExportedSpans();
+ var rootSpan = findRootEvalSpan(spans);
+ assertNotNull(rootSpan);
+ var classifications =
+ fromJson(
+ rootSpan.getAttributes()
+ .get(AttributeKey.stringKey("braintrust.classifications")),
+ Map.class);
+ @SuppressWarnings("unchecked")
+ var sentiment = (List>) classifications.get("sentiment");
+ assertEquals(2, sentiment.size());
+ assertEquals("positive", sentiment.get(0).get("id"));
+ assertEquals("enthusiastic", sentiment.get(1).get("id"));
+ }
+
+ @Test
+ @SneakyThrows
+ void blankItemNameDefaultsToClassifierName() {
+ var eval =
+ testHarness
+ .braintrust()
+ .evalBuilder()
+ .name("default-name-eval")
+ .cases(DatasetCase.of("hello", null))
+ .taskFunction(input -> input)
+ .classifiers(
+ // No name on the item — should fall back to the classifier name.
+ Classifier.single(
+ "my_classifier",
+ tr -> new Classification(null, "foo", null, null)))
+ .build();
+ eval.run();
+
+ var spans = testHarness.awaitExportedSpans();
+ var rootSpan = findRootEvalSpan(spans);
+ assertNotNull(rootSpan);
+ var classifications =
+ fromJson(
+ rootSpan.getAttributes()
+ .get(AttributeKey.stringKey("braintrust.classifications")),
+ Map.class);
+ assertTrue(classifications.containsKey("my_classifier"));
+ }
+
+ private static SpanData findSpanByName(List spans, String name) {
+ return spans.stream().filter(s -> name.equals(s.getName())).findFirst().orElse(null);
+ }
+
+ private static SpanData findRootEvalSpan(List spans) {
+ return spans.stream().filter(s -> "eval".equals(s.getName())).findFirst().orElse(null);
+ }
+}
diff --git a/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierTest.java b/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierTest.java
new file mode 100644
index 00000000..37ada2cd
--- /dev/null
+++ b/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierTest.java
@@ -0,0 +1,128 @@
+package dev.braintrust.eval;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.util.List;
+import java.util.Map;
+import org.junit.jupiter.api.Test;
+
+public class ClassifierTest {
+
+ private static TaskResult taskResult(String input, T output) {
+ return new TaskResult<>(output, DatasetCase.of(input, null));
+ }
+
+ @Test
+ void singleFactoryReturnsOneClassification() {
+ Classifier classifier =
+ Classifier.single(
+ "category",
+ tr -> new Classification("category", "greeting", "Greeting", null));
+
+ var result = classifier.classify(taskResult("hello", "hello"));
+ assertEquals(1, result.size());
+ assertEquals("greeting", result.get(0).id());
+ assertEquals("Greeting", result.get(0).label());
+ assertEquals("category", classifier.getName());
+ }
+
+ @Test
+ void singleFactoryNullReturnNormalizesToEmptyList() {
+ Classifier classifier = Classifier.single("maybe", tr -> null);
+ assertEquals(List.of(), classifier.classify(taskResult("x", "x")));
+ }
+
+ @Test
+ void listFactoryReturnsMultipleClassifications() {
+ Classifier classifier =
+ Classifier.of(
+ "sentiment",
+ tr ->
+ List.of(
+ new Classification(
+ "sentiment", "positive", "Positive", null),
+ new Classification(
+ "sentiment",
+ "enthusiastic",
+ "Enthusiastic",
+ null)));
+
+ var result = classifier.classify(taskResult("great!", "great!"));
+ assertEquals(2, result.size());
+ assertEquals("positive", result.get(0).id());
+ assertEquals("enthusiastic", result.get(1).id());
+ }
+
+ @Test
+ void listFactoryNullReturnNormalizesToEmptyList() {
+ Classifier classifier = Classifier.of("maybe", tr -> null);
+ assertEquals(List.of(), classifier.classify(taskResult("x", "x")));
+ }
+
+ @Test
+ void classificationOfHelpers() {
+ var c1 = Classification.of("id1");
+ assertNull(c1.name());
+ assertEquals("id1", c1.id());
+ assertNull(c1.label());
+ assertNull(c1.metadata());
+
+ var c2 = Classification.of("id2", "Label 2");
+ assertEquals("id2", c2.id());
+ assertEquals("Label 2", c2.label());
+
+ var c3 = Classification.of("nm", "id3", "Label 3");
+ assertEquals("nm", c3.name());
+ assertEquals("id3", c3.id());
+ assertEquals("Label 3", c3.label());
+ }
+
+ @Test
+ void classificationWithMetadataIsPreserved() {
+ var item =
+ new Classification(
+ "category", "greeting", "Greeting", Map.of("source", "unit-test"));
+ Classifier classifier = Classifier.single("category", tr -> item);
+
+ var result = classifier.classify(taskResult("hi", "hi"));
+ assertEquals(1, result.size());
+ assertEquals(Map.of("source", "unit-test"), result.get(0).metadata());
+ }
+
+ @Test
+ void validationThrowsForBlankId() {
+ Classifier classifier =
+ Classifier.single("bad", tr -> new Classification("bad", "", null, null));
+
+ var error =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> classifier.classify(taskResult("x", "x")));
+ assertTrue(
+ error.getMessage().contains("each classification must be a non-empty object"),
+ "expected spec wording, got: " + error.getMessage());
+ }
+
+ @Test
+ void validationThrowsForNullItemInList() {
+ Classifier classifier =
+ Classifier.of("bad", tr -> java.util.Arrays.asList((Classification) null));
+
+ var error =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> classifier.classify(taskResult("x", "x")));
+ assertTrue(
+ error.getMessage().contains("each classification must be a non-empty object"),
+ "expected spec wording, got: " + error.getMessage());
+ }
+
+ @Test
+ void getNameReturnsConstructorName() {
+ Classifier c1 = Classifier.of("foo", tr -> List.of());
+ assertEquals("foo", c1.getName());
+
+ Classifier c2 = Classifier.single("bar", tr -> null);
+ assertEquals("bar", c2.getName());
+ }
+}
diff --git a/examples/src/main/java/dev/braintrust/examples/ClassifiersExample.java b/examples/src/main/java/dev/braintrust/examples/ClassifiersExample.java
new file mode 100644
index 00000000..270dcc61
--- /dev/null
+++ b/examples/src/main/java/dev/braintrust/examples/ClassifiersExample.java
@@ -0,0 +1,158 @@
+package dev.braintrust.examples;
+
+import dev.braintrust.Braintrust;
+import dev.braintrust.eval.Classification;
+import dev.braintrust.eval.Classifier;
+import dev.braintrust.eval.DatasetCase;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Classifiers categorize and label eval outputs. Unlike scorers (numeric 0-1), classifiers return
+ * structured {@link Classification} items with an id, optional label, and optional metadata.
+ *
+ * Three patterns are shown:
+ *
+ *
+ * {@link Classifier#single} for a single-label classifier returning one {@link
+ * Classification}.
+ * {@link Classifier#of} for a multi-label classifier returning a list.
+ * An anonymous {@link Classifier} implementation for reusable classifiers with their own
+ * logic.
+ *
+ *
+ * Classifiers and scorers run independently — you can use either, or both together.
+ */
+public class ClassifiersExample {
+ public static void main(String[] args) throws Exception {
+ var braintrust = Braintrust.get();
+
+ // 1. Single-label classifier.
+ Classifier intentClassifier =
+ Classifier.single(
+ "intent",
+ tr -> {
+ var input = tr.datasetCase().input();
+ String id;
+ if (input.matches("(?i).*thank.*")) {
+ id = "praise";
+ } else if (input.matches("(?i).*(waiting|order|update).*")) {
+ id = "follow_up";
+ } else if (input.matches("(?i).*(password|reset|find).*")) {
+ id = "how_to";
+ } else if (input.matches("(?i).*(damaged|refund).*")) {
+ id = "complaint";
+ } else {
+ id = "other";
+ }
+ return new Classification(
+ "intent", id, capitalize(id.replace('_', ' ')), null);
+ });
+
+ // 2. Multi-label classifier.
+ Classifier toneClassifier =
+ Classifier.of(
+ "tone",
+ tr -> {
+ var input = tr.datasetCase().input();
+ List labels = new ArrayList<>();
+ if (input.matches("(?i).*(immediately|unacceptable|waiting).*")) {
+ labels.add(new Classification("tone", "urgent", "Urgent", null));
+ }
+ if (input.matches("(?i).*(please|thank|just checking).*")) {
+ labels.add(new Classification("tone", "polite", "Polite", null));
+ }
+ if (input.matches("(?i).*(unacceptable|damaged|waiting).*")) {
+ labels.add(
+ new Classification(
+ "tone", "frustrated", "Frustrated", null));
+ }
+ if (labels.isEmpty()) {
+ labels.add(new Classification("tone", "neutral", "Neutral", null));
+ }
+ return labels;
+ });
+
+ // 3. Custom Classifier implementation — full control over name and logic.
+ Classifier qualityClassifier =
+ new Classifier<>() {
+ @Override
+ public String getName() {
+ return "response_quality";
+ }
+
+ @Override
+ public List classify(
+ dev.braintrust.eval.TaskResult tr) {
+ var output = tr.result();
+ int wordCount = output == null ? 0 : output.trim().split("\\s+").length;
+ String id;
+ if (output == null || output.isBlank()) {
+ id = "no_response";
+ } else if (wordCount < 5) {
+ id = "too_short";
+ } else if (output.matches("(?i).*(immediately|right away|look into).*")) {
+ id = "action_oriented";
+ } else {
+ id = "informational";
+ }
+ return List.of(
+ new Classification(
+ "response_quality",
+ id,
+ capitalize(id.replace('_', ' ')),
+ java.util.Map.of("word_count", wordCount)));
+ }
+ };
+
+ var eval =
+ braintrust
+ .evalBuilder()
+ .name("classifiers-example-" + System.currentTimeMillis())
+ .cases(
+ DatasetCase.of(
+ "Hi! I just wanted to say thank you, the product is"
+ + " amazing!",
+ null),
+ DatasetCase.of(
+ "I've been waiting 2 weeks for my order. This is"
+ + " unacceptable!",
+ null),
+ DatasetCase.of(
+ "How do I reset my password? I can't find the option"
+ + " anywhere.",
+ null),
+ DatasetCase.of(
+ "The item arrived damaged. I need a refund immediately.",
+ null),
+ DatasetCase.of(
+ "Just checking in — any update on my ticket #4821?", null))
+ .taskFunction(ClassifiersExample::generateResponse)
+ .classifiers(intentClassifier, toneClassifier, qualityClassifier)
+ .build();
+
+ var result = eval.run();
+ System.out.println("\n\n" + result.createReportString());
+ }
+
+ private static String generateResponse(String message) {
+ if (message.matches("(?i).*thank.*")) {
+ return "You're welcome! So glad you're enjoying it.";
+ }
+ if (message.matches("(?i).*(waiting|order).*")) {
+ return "I sincerely apologise for the delay. Let me look into this right away.";
+ }
+ if (message.matches("(?i).*(password|reset).*")) {
+ return "To reset your password, go to Settings > Account > Reset Password.";
+ }
+ if (message.matches("(?i).*(damaged|refund).*")) {
+ return "I'm sorry to hear that. I'll process your refund immediately.";
+ }
+ return "Thanks for reaching out! Let me check on that for you.";
+ }
+
+ private static String capitalize(String s) {
+ if (s == null || s.isEmpty()) return s;
+ return Character.toUpperCase(s.charAt(0)) + s.substring(1);
+ }
+}
diff --git a/test-harness/src/testFixtures/java/dev/braintrust/TestHarness.java b/test-harness/src/testFixtures/java/dev/braintrust/TestHarness.java
index e95ef5a0..75a8ece6 100644
--- a/test-harness/src/testFixtures/java/dev/braintrust/TestHarness.java
+++ b/test-harness/src/testFixtures/java/dev/braintrust/TestHarness.java
@@ -313,7 +313,7 @@ public void ensureRemoteDataset(String datasetName, Dataset, ?> expectedData)
// verify
var btDataset = braintrust.fetchDataset(datasetName);
- if (datasetsEqual(expectedData, btDataset)) {
+ if (!datasetsEqual(expectedData, btDataset)) {
throw new RuntimeException(
"failed to ensure expected dataset: %s -- %s"
.formatted(toList(expectedData), toList(btDataset)));
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-08d26d2faef8.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-08d26d2faef8.json
new file mode 100644
index 00000000..634dffc9
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-08d26d2faef8.json
@@ -0,0 +1 @@
+{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-228248286fba.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-228248286fba.json
new file mode 100644
index 00000000..634dffc9
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-228248286fba.json
@@ -0,0 +1 @@
+{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-4139f91c7e72.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-4139f91c7e72.json
new file mode 100644
index 00000000..634dffc9
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-4139f91c7e72.json
@@ -0,0 +1 @@
+{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-684178b0ab9d.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-684178b0ab9d.json
new file mode 100644
index 00000000..634dffc9
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-684178b0ab9d.json
@@ -0,0 +1 @@
+{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-9611c99e6aac.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-9611c99e6aac.json
new file mode 100644
index 00000000..634dffc9
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-9611c99e6aac.json
@@ -0,0 +1 @@
+{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-a7fce53bd211.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-a7fce53bd211.json
new file mode 100644
index 00000000..634dffc9
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-a7fce53bd211.json
@@ -0,0 +1 @@
+{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fd38313bebba.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fd38313bebba.json
new file mode 100644
index 00000000..634dffc9
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fd38313bebba.json
@@ -0,0 +1 @@
+{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fda483633056.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fda483633056.json
new file mode 100644
index 00000000..634dffc9
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fda483633056.json
@@ -0,0 +1 @@
+{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-1185d0796fb3.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-1185d0796fb3.json
new file mode 100644
index 00000000..a817864e
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-1185d0796fb3.json
@@ -0,0 +1 @@
+{"id":"bd13b6ad-0fb2-4477-b407-0aa58127586e","project_id":"f1e858a4-58e3-408f-983f-016760d7fa25","name":"scorer-and-classifier-eval","description":null,"created":"2026-05-27T10:13:48.702Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"internal_metadata":null,"parameters_id":null,"parameters_version":null,"public":false,"user_id":"08e3988c-e05c-4324-8763-8998a5b39755","metadata":null,"tags":null}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-2c8bb270c5cf.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-2c8bb270c5cf.json
new file mode 100644
index 00000000..eadc14a9
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-2c8bb270c5cf.json
@@ -0,0 +1 @@
+{"id":"bda48716-d8cc-406b-aed1-10ca29b511e5","project_id":"f1e858a4-58e3-408f-983f-016760d7fa25","name":"classifier-only-eval","description":null,"created":"2026-05-27T10:13:52.580Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"internal_metadata":null,"parameters_id":null,"parameters_version":null,"public":false,"user_id":"08e3988c-e05c-4324-8763-8998a5b39755","metadata":null,"tags":null}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-4f1bd5afd18d.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-4f1bd5afd18d.json
new file mode 100644
index 00000000..8c927324
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-4f1bd5afd18d.json
@@ -0,0 +1 @@
+{"id":"129eb154-c227-43f4-a549-35bd3f281bd9","project_id":"f1e858a4-58e3-408f-983f-016760d7fa25","name":"classifier-error-eval","description":null,"created":"2026-05-27T10:14:02.504Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"internal_metadata":null,"parameters_id":null,"parameters_version":null,"public":false,"user_id":"08e3988c-e05c-4324-8763-8998a5b39755","metadata":null,"tags":null}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-80d23f49c06c.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-80d23f49c06c.json
new file mode 100644
index 00000000..42a3a8d2
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-80d23f49c06c.json
@@ -0,0 +1 @@
+{"id":"ed5735fb-b23b-4522-9b4f-546bcae35eae","project_id":"f1e858a4-58e3-408f-983f-016760d7fa25","name":"multi-label-eval","description":null,"created":"2026-05-27T10:13:46.338Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"internal_metadata":null,"parameters_id":null,"parameters_version":null,"public":false,"user_id":"08e3988c-e05c-4324-8763-8998a5b39755","metadata":null,"tags":null}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-81a6ebbafe29.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-81a6ebbafe29.json
new file mode 100644
index 00000000..0f2383ab
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-81a6ebbafe29.json
@@ -0,0 +1 @@
+{"id":"3d17f6cf-5729-44c9-8dcc-b95ab9c5f0f4","project_id":"f1e858a4-58e3-408f-983f-016760d7fa25","name":"default-name-eval","description":null,"created":"2026-05-27T10:13:57.484Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"internal_metadata":null,"parameters_id":null,"parameters_version":null,"public":false,"user_id":"08e3988c-e05c-4324-8763-8998a5b39755","metadata":null,"tags":null}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-30867e7088bd.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-30867e7088bd.json
new file mode 100644
index 00000000..89452950
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-30867e7088bd.json
@@ -0,0 +1 @@
+{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-410ff9f133da.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-410ff9f133da.json
new file mode 100644
index 00000000..89452950
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-410ff9f133da.json
@@ -0,0 +1 @@
+{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-4891298f2969.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-4891298f2969.json
new file mode 100644
index 00000000..89452950
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-4891298f2969.json
@@ -0,0 +1 @@
+{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-543a07178006.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-543a07178006.json
new file mode 100644
index 00000000..89452950
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-543a07178006.json
@@ -0,0 +1 @@
+{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-c0742bb3c63f.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-c0742bb3c63f.json
new file mode 100644
index 00000000..89452950
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-c0742bb3c63f.json
@@ -0,0 +1 @@
+{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-dd7665d7a48a.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-dd7665d7a48a.json
new file mode 100644
index 00000000..89452950
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-dd7665d7a48a.json
@@ -0,0 +1 @@
+{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e74886687a34.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e74886687a34.json
new file mode 100644
index 00000000..89452950
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e74886687a34.json
@@ -0,0 +1 @@
+{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e7e35e493e43.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e7e35e493e43.json
new file mode 100644
index 00000000..89452950
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e7e35e493e43.json
@@ -0,0 +1 @@
+{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-08d26d2faef8.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-08d26d2faef8.json
new file mode 100644
index 00000000..e80e1a08
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-08d26d2faef8.json
@@ -0,0 +1,38 @@
+{
+ "id" : "e5e5a390-4568-343b-95d7-7e50b56e2d3e",
+ "name" : "api_apikey_login",
+ "request" : {
+ "url" : "/api/apikey/login",
+ "method" : "POST"
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "api_apikey_login-08d26d2faef8.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXBgHtHoAMEqQQ=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "259",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d209-36d29e3856e386c501419f1b;Parent=50f208976f235332;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:17 GMT",
+ "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 aafd761bed21ff3b2c4a07021d172702.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d20900000000434852e942a27399",
+ "x-amzn-RequestId" : "4adcb8ff-9421-43a4-ba8f-0543cd0897fb",
+ "X-Amz-Cf-Id" : "KPwnsYEtUa_3P5lm_xsfehvgGHHGQRKqCVzUBxJNJv8udASi6Vv4Jg==",
+ "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "e5e5a390-4568-343b-95d7-7e50b56e2d3e",
+ "persistent" : true,
+ "scenarioName" : "scenario-1-api-apikey-login",
+ "requiredScenarioState" : "Started",
+ "newScenarioState" : "scenario-1-api-apikey-login-2",
+ "insertionIndex" : 139
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-228248286fba.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-228248286fba.json
new file mode 100644
index 00000000..d130210d
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-228248286fba.json
@@ -0,0 +1,38 @@
+{
+ "id" : "9602117c-895c-3d23-a069-36bd7207a729",
+ "name" : "api_apikey_login",
+ "request" : {
+ "url" : "/api/apikey/login",
+ "method" : "POST"
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "api_apikey_login-228248286fba.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXCKE3kIAMEVGQ=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "259",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d20d-4d3f3dc72ec48b9a3b8c04f2;Parent=403457bedc76358d;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:21 GMT",
+ "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d20d000000005000c50aaf3b8111",
+ "x-amzn-RequestId" : "55329b39-c50b-4ba8-a2e6-1205aeb76d1c",
+ "X-Amz-Cf-Id" : "T1dQXV26G8lEmQQzBx41-0WGFMsKDQVuUZaQXI0EEPf5WUy3-IpmRg==",
+ "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "9602117c-895c-3d23-a069-36bd7207a729",
+ "persistent" : true,
+ "scenarioName" : "scenario-1-api-apikey-login",
+ "requiredScenarioState" : "scenario-1-api-apikey-login-3",
+ "newScenarioState" : "scenario-1-api-apikey-login-4",
+ "insertionIndex" : 134
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-4139f91c7e72.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-4139f91c7e72.json
new file mode 100644
index 00000000..376ad543
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-4139f91c7e72.json
@@ -0,0 +1,38 @@
+{
+ "id" : "57532630-927c-3919-a0d7-63a83e6b3ed0",
+ "name" : "api_apikey_login",
+ "request" : {
+ "url" : "/api/apikey/login",
+ "method" : "POST"
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "api_apikey_login-4139f91c7e72.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXCrH1YoAMEZuw=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "259",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d210-68f2dcb247a18da6096f18f3;Parent=3351e09ca2d830e3;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:24 GMT",
+ "Via" : "1.1 3cb4f0364fec17117cb52ac539a5430c.cloudfront.net (CloudFront), 1.1 1b7c94274bd830ddf26396883b21ed8a.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d210000000004c8b8aff442f640c",
+ "x-amzn-RequestId" : "bf6984ee-1292-4284-ab86-68da0d531eb5",
+ "X-Amz-Cf-Id" : "ggKVGglyTdPcOxTy-6gQHUAK9iI3qa-gl5HqCrPpOzesXhTvLIv1NQ==",
+ "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "57532630-927c-3919-a0d7-63a83e6b3ed0",
+ "persistent" : true,
+ "scenarioName" : "scenario-1-api-apikey-login",
+ "requiredScenarioState" : "scenario-1-api-apikey-login-4",
+ "newScenarioState" : "scenario-1-api-apikey-login-5",
+ "insertionIndex" : 131
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-684178b0ab9d.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-684178b0ab9d.json
new file mode 100644
index 00000000..8b8a9bdb
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-684178b0ab9d.json
@@ -0,0 +1,37 @@
+{
+ "id" : "28cf2c95-3206-316f-939f-74d5e43caa3c",
+ "name" : "api_apikey_login",
+ "request" : {
+ "url" : "/api/apikey/login",
+ "method" : "POST"
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "api_apikey_login-684178b0ab9d.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXEAE0hIAMEN_Q=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "259",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d219-71253c4e506e4a7947864732;Parent=3bd8be87000e5343;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:33 GMT",
+ "Via" : "1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront), 1.1 a44b410ee30a39bfebd24cea78fab2b0.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d21900000000719be83d517acd00",
+ "x-amzn-RequestId" : "d92a299f-74fa-4820-8f0b-fb0365fd8331",
+ "X-Amz-Cf-Id" : "k5s1K-dtngbSlv43UW0hKdbO_tCi1E9wlWsEYde1i5stLwfd6O6j3g==",
+ "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "28cf2c95-3206-316f-939f-74d5e43caa3c",
+ "persistent" : true,
+ "scenarioName" : "scenario-1-api-apikey-login",
+ "requiredScenarioState" : "scenario-1-api-apikey-login-8",
+ "insertionIndex" : 121
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-9611c99e6aac.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-9611c99e6aac.json
new file mode 100644
index 00000000..417b5b06
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-9611c99e6aac.json
@@ -0,0 +1,38 @@
+{
+ "id" : "058e04ef-a97a-3a53-8129-7fc92d2dacd5",
+ "name" : "api_apikey_login",
+ "request" : {
+ "url" : "/api/apikey/login",
+ "method" : "POST"
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "api_apikey_login-9611c99e6aac.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXC_GH_IAMEEfw=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "259",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d212-54d6989d24d3b0970b860d75;Parent=178fc312bd1c2944;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:26 GMT",
+ "Via" : "1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront), 1.1 d1feda220715f81c20b2cca33054d72a.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d212000000004ce3d30800e5905e",
+ "x-amzn-RequestId" : "76fef457-9295-4146-b295-0fcd16a1a715",
+ "X-Amz-Cf-Id" : "RdJ96709nNYgQvFSEyD0XxbP2lmSZO634x8qoqe-1PONWFMRXaG-bg==",
+ "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "058e04ef-a97a-3a53-8129-7fc92d2dacd5",
+ "persistent" : true,
+ "scenarioName" : "scenario-1-api-apikey-login",
+ "requiredScenarioState" : "scenario-1-api-apikey-login-5",
+ "newScenarioState" : "scenario-1-api-apikey-login-6",
+ "insertionIndex" : 128
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-a7fce53bd211.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-a7fce53bd211.json
new file mode 100644
index 00000000..d14b3a9e
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-a7fce53bd211.json
@@ -0,0 +1,38 @@
+{
+ "id" : "dafe0989-f7a8-3d93-a37b-4e88227a2425",
+ "name" : "api_apikey_login",
+ "request" : {
+ "url" : "/api/apikey/login",
+ "method" : "POST"
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "api_apikey_login-a7fce53bd211.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXBuEjeIAMEoWA=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "259",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d20a-6fe2e1c81e8436bf350a6d90;Parent=3e3aa626a4ed2918;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:18 GMT",
+ "Via" : "1.1 1cb50957fd77e1eaad139f90b2e44564.cloudfront.net (CloudFront), 1.1 f39ba1c0189814d2897853765b8684b2.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d20a00000000328b1fb874b22495",
+ "x-amzn-RequestId" : "b0e5f526-399c-4690-8af8-ea2a6f63f480",
+ "X-Amz-Cf-Id" : "MN5c5XBw2VvbGJhjz_UbWpOPK5qKEhA-pYNbZVYBqJekJsreM0TI6w==",
+ "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "dafe0989-f7a8-3d93-a37b-4e88227a2425",
+ "persistent" : true,
+ "scenarioName" : "scenario-1-api-apikey-login",
+ "requiredScenarioState" : "scenario-1-api-apikey-login-2",
+ "newScenarioState" : "scenario-1-api-apikey-login-3",
+ "insertionIndex" : 137
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fd38313bebba.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fd38313bebba.json
new file mode 100644
index 00000000..349b8a9e
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fd38313bebba.json
@@ -0,0 +1,38 @@
+{
+ "id" : "8713c97f-9d52-324e-a859-b43f5e7f596c",
+ "name" : "api_apikey_login",
+ "request" : {
+ "url" : "/api/apikey/login",
+ "method" : "POST"
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "api_apikey_login-fd38313bebba.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXDyGceoAMEibQ=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "259",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d217-1313d2cf6f97731228ee95ad;Parent=391239ba8d8316c4;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:31 GMT",
+ "Via" : "1.1 1cb50957fd77e1eaad139f90b2e44564.cloudfront.net (CloudFront), 1.1 9a94d1d050cdaaed2e0186e2da88b14c.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d2170000000061abd59cc184ebd1",
+ "x-amzn-RequestId" : "67e9196b-4272-4875-a71a-ed3b5868db47",
+ "X-Amz-Cf-Id" : "e4LBeQe47mxQpyg3ewMWhB1NwKSwaUHhmLqzzNyZbXi4ExXRbvVPvA==",
+ "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "8713c97f-9d52-324e-a859-b43f5e7f596c",
+ "persistent" : true,
+ "scenarioName" : "scenario-1-api-apikey-login",
+ "requiredScenarioState" : "scenario-1-api-apikey-login-7",
+ "newScenarioState" : "scenario-1-api-apikey-login-8",
+ "insertionIndex" : 123
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fda483633056.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fda483633056.json
new file mode 100644
index 00000000..e2cd318e
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fda483633056.json
@@ -0,0 +1,38 @@
+{
+ "id" : "7fc7ac7d-600b-3bb1-bbeb-6c20419871bd",
+ "name" : "api_apikey_login",
+ "request" : {
+ "url" : "/api/apikey/login",
+ "method" : "POST"
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "api_apikey_login-fda483633056.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXDQFIgoAMEvWQ=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "259",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d214-2cd07d1772c84d2909e43107;Parent=515a17fbfa890994;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:28 GMT",
+ "Via" : "1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront), 1.1 e02f538931b17b78287c925d3a647504.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d214000000003783962a661571fc",
+ "x-amzn-RequestId" : "ab54225a-603e-48e8-83dd-1af3e8286785",
+ "X-Amz-Cf-Id" : "x975G8mDgfBC-efvRtpt8u-YVVSTnd2yYsmZLEEn4gXaxeZSEfyLhA==",
+ "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "7fc7ac7d-600b-3bb1-bbeb-6c20419871bd",
+ "persistent" : true,
+ "scenarioName" : "scenario-1-api-apikey-login",
+ "requiredScenarioState" : "scenario-1-api-apikey-login-6",
+ "newScenarioState" : "scenario-1-api-apikey-login-7",
+ "insertionIndex" : 126
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-1185d0796fb3.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-1185d0796fb3.json
new file mode 100644
index 00000000..4c124322
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-1185d0796fb3.json
@@ -0,0 +1,45 @@
+{
+ "id" : "eecc14b6-db38-3599-a246-dfa88a5f3369",
+ "name" : "v1_experiment",
+ "request" : {
+ "url" : "/v1/experiment",
+ "method" : "POST",
+ "headers" : {
+ "Content-Type" : {
+ "equalTo" : "application/json"
+ }
+ },
+ "bodyPatterns" : [ {
+ "equalToJson" : "{\"project_id\":\"f1e858a4-58e3-408f-983f-016760d7fa25\",\"name\":\"scorer-and-classifier-eval\"}",
+ "ignoreArrayOrder" : true,
+ "ignoreExtraElements" : false
+ } ]
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_experiment-1185d0796fb3.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXCTEBZIAMEs8Q=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "460",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d20e-26740e1b52c0f6ee059e5ca6;Parent=5f9b3be8b414e885;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:22 GMT",
+ "Via" : "1.1 1cb50957fd77e1eaad139f90b2e44564.cloudfront.net (CloudFront), 1.1 e02f538931b17b78287c925d3a647504.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d20e00000000743b62e99266c2b2",
+ "x-amzn-RequestId" : "2f575965-a65c-451a-a367-68c18fd7d05b",
+ "X-Amz-Cf-Id" : "af2trR_NgbPDyNjDKkEX0gBSesSTjoRBmctpOuV7h8zdBICjMaoeCQ==",
+ "etag" : "W/\"1cc-wY6bMr0WYAzBpcNQkHl9XTJwGLo\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "eecc14b6-db38-3599-a246-dfa88a5f3369",
+ "persistent" : true,
+ "insertionIndex" : 133
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-2c8bb270c5cf.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-2c8bb270c5cf.json
new file mode 100644
index 00000000..aac667da
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-2c8bb270c5cf.json
@@ -0,0 +1,45 @@
+{
+ "id" : "cc272b70-f30a-3174-9ba3-a81baae8f4ed",
+ "name" : "v1_experiment",
+ "request" : {
+ "url" : "/v1/experiment",
+ "method" : "POST",
+ "headers" : {
+ "Content-Type" : {
+ "equalTo" : "application/json"
+ }
+ },
+ "bodyPatterns" : [ {
+ "equalToJson" : "{\"project_id\":\"f1e858a4-58e3-408f-983f-016760d7fa25\",\"name\":\"classifier-only-eval\"}",
+ "ignoreArrayOrder" : true,
+ "ignoreExtraElements" : false
+ } ]
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_experiment-2c8bb270c5cf.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXCvG5hIAMEqew=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "454",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d211-3f0647211bd7e912248c7f32;Parent=585772df002b596f;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:25 GMT",
+ "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 9188ac315a73b9d6c346dfcf5866043c.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d2110000000066a77c1b36a9fdff",
+ "x-amzn-RequestId" : "7e8a597c-6a7c-43f5-b27a-910e3f58ac36",
+ "X-Amz-Cf-Id" : "xeUuj41sVJh6kF1TvZup12XNOhgba19t4D8QV9SZVIez9YAgT238UQ==",
+ "etag" : "W/\"1c6-rz81IPhQ4qZNL6lAqhJrzM+gycU\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "cc272b70-f30a-3174-9ba3-a81baae8f4ed",
+ "persistent" : true,
+ "insertionIndex" : 130
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-4f1bd5afd18d.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-4f1bd5afd18d.json
new file mode 100644
index 00000000..728272dd
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-4f1bd5afd18d.json
@@ -0,0 +1,45 @@
+{
+ "id" : "b82cb2d9-9920-3347-b016-b1d659c0af6e",
+ "name" : "v1_experiment",
+ "request" : {
+ "url" : "/v1/experiment",
+ "method" : "POST",
+ "headers" : {
+ "Content-Type" : {
+ "equalTo" : "application/json"
+ }
+ },
+ "bodyPatterns" : [ {
+ "equalToJson" : "{\"project_id\":\"f1e858a4-58e3-408f-983f-016760d7fa25\",\"name\":\"classifier-error-eval\"}",
+ "ignoreArrayOrder" : true,
+ "ignoreExtraElements" : false
+ } ]
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_experiment-4f1bd5afd18d.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXEIF7IoAMEbXA=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "455",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d21a-66b823910bcf105c6f698586;Parent=0e932b08eb869015;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:34 GMT",
+ "Via" : "1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront), 1.1 aedb60fbad7f08567276abe527b7fb22.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d21a000000001bea3c38943af2f0",
+ "x-amzn-RequestId" : "6504845d-5a89-4221-b197-9f41f9e3259b",
+ "X-Amz-Cf-Id" : "rfZjZ3HExQoDW6mtFFGqaTfY8oHv422o1-82u1Fqj8RuR5ckxA5VnQ==",
+ "etag" : "W/\"1c7-Nuu5x5+lc1SsjkxZYjB+5Dmn450\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "b82cb2d9-9920-3347-b016-b1d659c0af6e",
+ "persistent" : true,
+ "insertionIndex" : 120
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-80d23f49c06c.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-80d23f49c06c.json
new file mode 100644
index 00000000..9e90708c
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-80d23f49c06c.json
@@ -0,0 +1,45 @@
+{
+ "id" : "a5ae2b98-65c4-3fa3-b2fd-58b8852e2e8f",
+ "name" : "v1_experiment",
+ "request" : {
+ "url" : "/v1/experiment",
+ "method" : "POST",
+ "headers" : {
+ "Content-Type" : {
+ "equalTo" : "application/json"
+ }
+ },
+ "bodyPatterns" : [ {
+ "equalToJson" : "{\"project_id\":\"f1e858a4-58e3-408f-983f-016760d7fa25\",\"name\":\"multi-label-eval\"}",
+ "ignoreArrayOrder" : true,
+ "ignoreExtraElements" : false
+ } ]
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_experiment-80d23f49c06c.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXByGBhIAMEd8A=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "450",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d20b-21aca5612600689b5f182d05;Parent=5978daff80eb7de6;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:19 GMT",
+ "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 89664692f153569d5d76f7ee89b2e518.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d20b0000000045eef8862a28282e",
+ "x-amzn-RequestId" : "5cb95ff0-5a69-4084-bed9-5c9ed2f2967c",
+ "X-Amz-Cf-Id" : "M_PS9I3vGX9q-IYeh99FQ8XV47gtzziCHBgYaGWlJZTxQlKtmn6EuQ==",
+ "etag" : "W/\"1c2-xdW6/5KUQpOgyFv/Kw/psYXB6lA\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "a5ae2b98-65c4-3fa3-b2fd-58b8852e2e8f",
+ "persistent" : true,
+ "insertionIndex" : 136
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-81a6ebbafe29.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-81a6ebbafe29.json
new file mode 100644
index 00000000..3e098ee6
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-81a6ebbafe29.json
@@ -0,0 +1,45 @@
+{
+ "id" : "88dfcc21-c37b-3d4e-9cf7-01d8d26e7cdc",
+ "name" : "v1_experiment",
+ "request" : {
+ "url" : "/v1/experiment",
+ "method" : "POST",
+ "headers" : {
+ "Content-Type" : {
+ "equalTo" : "application/json"
+ }
+ },
+ "bodyPatterns" : [ {
+ "equalToJson" : "{\"project_id\":\"f1e858a4-58e3-408f-983f-016760d7fa25\",\"name\":\"default-name-eval\"}",
+ "ignoreArrayOrder" : true,
+ "ignoreExtraElements" : false
+ } ]
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_experiment-81a6ebbafe29.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXDZEnHoAMElQg=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "451",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d215-18b6a8be368d6a2b48e280e4;Parent=4210e31acd963bbc;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:29 GMT",
+ "Via" : "1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront), 1.1 aedb60fbad7f08567276abe527b7fb22.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d2150000000061e0529576243763",
+ "x-amzn-RequestId" : "b7b2de2e-893d-4644-9fab-f800105d0171",
+ "X-Amz-Cf-Id" : "Rh6Z30cHxTokA9lEY2Ki0NziaYupibRAWnT9BYMsMUMoLJ3Y_GdqNw==",
+ "etag" : "W/\"1c3-dkvmbzbnogEoefH3HNfcb+4qbp8\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "88dfcc21-c37b-3d4e-9cf7-01d8d26e7cdc",
+ "persistent" : true,
+ "insertionIndex" : 125
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-30867e7088bd.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-30867e7088bd.json
new file mode 100644
index 00000000..f05070e2
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-30867e7088bd.json
@@ -0,0 +1,44 @@
+{
+ "id" : "6606a3c7-099c-3495-b32d-59502385df8a",
+ "name" : "v1_project",
+ "request" : {
+ "urlPath" : "/v1/project",
+ "method" : "GET",
+ "queryParameters" : {
+ "project_name" : {
+ "hasExactly" : [ {
+ "equalTo" : "java-unit-test"
+ } ]
+ }
+ }
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_project-30867e7088bd.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXD2GdlIAMEZ9Q=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "361",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d218-0e8b14c94f499dea67b23891;Parent=32eb08ae9c40f8c4;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:32 GMT",
+ "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 193b73e60a0ed559f0dfa5eb247e5b34.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d21800000000481708e0aaf04435",
+ "x-amzn-RequestId" : "bb03e88e-1edb-478d-9e64-40d6709282b8",
+ "X-Amz-Cf-Id" : "O-vZuMFFSx5g20nPrUGL1uofv5a7lfzh-DerUTc9vhOqyf5NeR7S2w==",
+ "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "6606a3c7-099c-3495-b32d-59502385df8a",
+ "persistent" : true,
+ "scenarioName" : "scenario-2-v1-project",
+ "requiredScenarioState" : "scenario-2-v1-project-8",
+ "insertionIndex" : 122
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-410ff9f133da.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-410ff9f133da.json
new file mode 100644
index 00000000..3b61260c
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-410ff9f133da.json
@@ -0,0 +1,45 @@
+{
+ "id" : "2c0e6f0a-b1c8-3b66-8435-51de30a1a7dd",
+ "name" : "v1_project",
+ "request" : {
+ "urlPath" : "/v1/project",
+ "method" : "GET",
+ "queryParameters" : {
+ "project_name" : {
+ "hasExactly" : [ {
+ "equalTo" : "java-unit-test"
+ } ]
+ }
+ }
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_project-410ff9f133da.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXC6Ep5oAMEAyQ=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "361",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d212-2ec9cc4c7f4206431b5b3f27;Parent=32f0a00b235c198c;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:26 GMT",
+ "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 96891645583a0d37345ae58fd6592e98.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d2120000000051431f174a009d8f",
+ "x-amzn-RequestId" : "6478a632-afc0-4e1a-98e9-c970acfa4e12",
+ "X-Amz-Cf-Id" : "wzlkpDq0BXAz47-OAsOrm72fKP6BTAQLOsLVokBH0jT3pkPBQnjO3w==",
+ "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "2c0e6f0a-b1c8-3b66-8435-51de30a1a7dd",
+ "persistent" : true,
+ "scenarioName" : "scenario-2-v1-project",
+ "requiredScenarioState" : "scenario-2-v1-project-5",
+ "newScenarioState" : "scenario-2-v1-project-6",
+ "insertionIndex" : 129
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-4891298f2969.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-4891298f2969.json
new file mode 100644
index 00000000..9d6e550d
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-4891298f2969.json
@@ -0,0 +1,45 @@
+{
+ "id" : "f0270406-fe88-3aec-a5a7-ee6d2435ae22",
+ "name" : "v1_project",
+ "request" : {
+ "urlPath" : "/v1/project",
+ "method" : "GET",
+ "queryParameters" : {
+ "project_name" : {
+ "hasExactly" : [ {
+ "equalTo" : "java-unit-test"
+ } ]
+ }
+ }
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_project-4891298f2969.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXBWHzaIAMEbjg=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "361",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d208-5ab021804fabf735270603a8;Parent=705ce3859c59aff4;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:16 GMT",
+ "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 25a83a69fd8e833e18790d3971b848a8.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d208000000006d9f9c719887c6dc",
+ "x-amzn-RequestId" : "2b5c1be0-8edb-4f38-b151-3d9ead611d59",
+ "X-Amz-Cf-Id" : "boyVP5_5Icm6H5Po-pwHjYHv0IXWH-6RkNqIA3t81TmX0Ijks7BW0g==",
+ "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "f0270406-fe88-3aec-a5a7-ee6d2435ae22",
+ "persistent" : true,
+ "scenarioName" : "scenario-2-v1-project",
+ "requiredScenarioState" : "Started",
+ "newScenarioState" : "scenario-2-v1-project-2",
+ "insertionIndex" : 140
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-543a07178006.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-543a07178006.json
new file mode 100644
index 00000000..b9c1e229
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-543a07178006.json
@@ -0,0 +1,45 @@
+{
+ "id" : "1c224e28-c695-3150-80a1-b131fa0ac88b",
+ "name" : "v1_project",
+ "request" : {
+ "urlPath" : "/v1/project",
+ "method" : "GET",
+ "queryParameters" : {
+ "project_name" : {
+ "hasExactly" : [ {
+ "equalTo" : "java-unit-test"
+ } ]
+ }
+ }
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_project-543a07178006.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXDpGHzIAMEj_A=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "361",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d216-585bcd564752520a2764188a;Parent=6037c8f8c6826bf3;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:31 GMT",
+ "Via" : "1.1 3cb4f0364fec17117cb52ac539a5430c.cloudfront.net (CloudFront), 1.1 89664692f153569d5d76f7ee89b2e518.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d21600000000245f63f400cfca72",
+ "x-amzn-RequestId" : "495dee67-c272-46ef-b9cb-2f3e8c440e72",
+ "X-Amz-Cf-Id" : "Yfnyy-GP_3Sg9RL4nEkJEvuKX4Unn87lXHl6fehYOHwO8XxO8B_NzQ==",
+ "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "1c224e28-c695-3150-80a1-b131fa0ac88b",
+ "persistent" : true,
+ "scenarioName" : "scenario-2-v1-project",
+ "requiredScenarioState" : "scenario-2-v1-project-7",
+ "newScenarioState" : "scenario-2-v1-project-8",
+ "insertionIndex" : 124
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-c0742bb3c63f.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-c0742bb3c63f.json
new file mode 100644
index 00000000..55948026
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-c0742bb3c63f.json
@@ -0,0 +1,45 @@
+{
+ "id" : "02e68d25-de8f-38c9-b80e-6c8c4390e791",
+ "name" : "v1_project",
+ "request" : {
+ "urlPath" : "/v1/project",
+ "method" : "GET",
+ "queryParameters" : {
+ "project_name" : {
+ "hasExactly" : [ {
+ "equalTo" : "java-unit-test"
+ } ]
+ }
+ }
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_project-c0742bb3c63f.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXDHG52IAMETHQ=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "361",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d213-0331effd0a508caa1931af7b;Parent=48874c626d0a4b18;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:27 GMT",
+ "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 1b7c94274bd830ddf26396883b21ed8a.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d213000000000db05346ecb0692d",
+ "x-amzn-RequestId" : "a66e7aa5-3773-4927-acd5-78ce5d892a5d",
+ "X-Amz-Cf-Id" : "gVarBklLPnqMsM0b4B3i5QkmeOes8Jjh7VDWe-Xxx05qAwhSUNsEug==",
+ "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "02e68d25-de8f-38c9-b80e-6c8c4390e791",
+ "persistent" : true,
+ "scenarioName" : "scenario-2-v1-project",
+ "requiredScenarioState" : "scenario-2-v1-project-6",
+ "newScenarioState" : "scenario-2-v1-project-7",
+ "insertionIndex" : 127
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-dd7665d7a48a.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-dd7665d7a48a.json
new file mode 100644
index 00000000..22c2cf49
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-dd7665d7a48a.json
@@ -0,0 +1,45 @@
+{
+ "id" : "0254f542-fbbd-37af-903b-d925ff1ca4af",
+ "name" : "v1_project",
+ "request" : {
+ "urlPath" : "/v1/project",
+ "method" : "GET",
+ "queryParameters" : {
+ "project_name" : {
+ "hasExactly" : [ {
+ "equalTo" : "java-unit-test"
+ } ]
+ }
+ }
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_project-dd7665d7a48a.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXCiFhZIAMEFvQ=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "361",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d20f-6094aca74304bcca352d39aa;Parent=2302cf88ea5f8c6c;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:24 GMT",
+ "Via" : "1.1 3cb4f0364fec17117cb52ac539a5430c.cloudfront.net (CloudFront), 1.1 76fabd50aff5345ed3105adfbd47fb46.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d20f0000000061d12d99d3f12326",
+ "x-amzn-RequestId" : "034806bd-5e15-4c6e-818f-ed0adc759fca",
+ "X-Amz-Cf-Id" : "p4k33S4Uh_IiB6N4xpxuw3Vy3_Fq9NchymLzp1PLjIgpfOhZLzeyGw==",
+ "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "0254f542-fbbd-37af-903b-d925ff1ca4af",
+ "persistent" : true,
+ "scenarioName" : "scenario-2-v1-project",
+ "requiredScenarioState" : "scenario-2-v1-project-4",
+ "newScenarioState" : "scenario-2-v1-project-5",
+ "insertionIndex" : 132
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e74886687a34.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e74886687a34.json
new file mode 100644
index 00000000..8a5fb7c9
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e74886687a34.json
@@ -0,0 +1,45 @@
+{
+ "id" : "b0b988b5-0416-376e-8651-152095d5b692",
+ "name" : "v1_project",
+ "request" : {
+ "urlPath" : "/v1/project",
+ "method" : "GET",
+ "queryParameters" : {
+ "project_name" : {
+ "hasExactly" : [ {
+ "equalTo" : "java-unit-test"
+ } ]
+ }
+ }
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_project-e74886687a34.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXCBFx1IAMEUUw=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "361",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d20c-70666faf0263b2ce1db6eb04;Parent=73bbc66fd7eb898d;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:20 GMT",
+ "Via" : "1.1 3cb4f0364fec17117cb52ac539a5430c.cloudfront.net (CloudFront), 1.1 1cb50957fd77e1eaad139f90b2e44564.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d20c0000000025b845fca788ff05",
+ "x-amzn-RequestId" : "8bfdc679-2ece-4279-abc0-0581a4f1fc34",
+ "X-Amz-Cf-Id" : "QDXJ7_8f7PjYmzPSpKFri0AYY9mrej39EYYwGwllS6pCe6Wj-mRIVw==",
+ "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "b0b988b5-0416-376e-8651-152095d5b692",
+ "persistent" : true,
+ "scenarioName" : "scenario-2-v1-project",
+ "requiredScenarioState" : "scenario-2-v1-project-3",
+ "newScenarioState" : "scenario-2-v1-project-4",
+ "insertionIndex" : 135
+}
\ No newline at end of file
diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e7e35e493e43.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e7e35e493e43.json
new file mode 100644
index 00000000..91a2f6b6
--- /dev/null
+++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e7e35e493e43.json
@@ -0,0 +1,45 @@
+{
+ "id" : "d2f428e5-f001-3e3a-9b24-5e1d7065518c",
+ "name" : "v1_project",
+ "request" : {
+ "urlPath" : "/v1/project",
+ "method" : "GET",
+ "queryParameters" : {
+ "project_name" : {
+ "hasExactly" : [ {
+ "equalTo" : "java-unit-test"
+ } ]
+ }
+ }
+ },
+ "response" : {
+ "status" : 200,
+ "bodyFileName" : "v1_project-e7e35e493e43.json",
+ "headers" : {
+ "X-Cache" : "Miss from cloudfront",
+ "expires" : "0",
+ "x-amz-apigw-id" : "eBXBpHbzIAMEgmQ=",
+ "vary" : "Origin, Accept-Encoding",
+ "x-amzn-Remapped-content-length" : "361",
+ "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ],
+ "X-Amzn-Trace-Id" : "Root=1-6a16d20a-4a08de754676dbd2118c8c50;Parent=17ce211499fdfc49;Sampled=0;Lineage=1:24be3d11:0",
+ "Date" : "Wed, 27 May 2026 11:14:18 GMT",
+ "Via" : "1.1 3cb4f0364fec17117cb52ac539a5430c.cloudfront.net (CloudFront), 1.1 9188ac315a73b9d6c346dfcf5866043c.cloudfront.net (CloudFront)",
+ "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id",
+ "access-control-allow-credentials" : "true",
+ "x-bt-internal-trace-id" : "6a16d20a000000001ff48a7a1c2a7cd9",
+ "x-amzn-RequestId" : "f1b758fe-a590-4aa5-80eb-682722e4379c",
+ "X-Amz-Cf-Id" : "_h4ILEWdkwzk-3vrZIxLOEjWT6kY-O7ny0r5maDbiQU3RSSF1XNP5Q==",
+ "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"",
+ "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate",
+ "surrogate-control" : "no-store",
+ "Content-Type" : "application/json; charset=utf-8"
+ }
+ },
+ "uuid" : "d2f428e5-f001-3e3a-9b24-5e1d7065518c",
+ "persistent" : true,
+ "scenarioName" : "scenario-2-v1-project",
+ "requiredScenarioState" : "scenario-2-v1-project-2",
+ "newScenarioState" : "scenario-2-v1-project-3",
+ "insertionIndex" : 138
+}
\ No newline at end of file