From 26b798a09581f8584196dfd7628b792fe0f6b429 Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Wed, 27 May 2026 19:59:42 +0800 Subject: [PATCH] Add classifier support to eval Mirrors the Ruby SDK's classifier port (braintrustdata/braintrust-sdk-ruby#154) and the canonical classifier spec at braintrust-spec/docs/features/classifiers.md. Classifiers return structured Classification items (id, optional label, optional metadata) instead of numeric scores. They run alongside scorers, their failures are non-fatal, and at least one of scorers/classifiers is required (relaxes the prior scorers-required check). New public types: Classification, Classifier (+ Classifier.of / .single factories), TracedClassifier. Eval gains a classifiers(...) builder method and a runClassifier helper that emits classifier spans with type=classifier, purpose=scorer; per-case classifications aggregate onto the root eval span as braintrust.classifications, and classifier exceptions land in braintrust.metadata.classifier_errors. Also fixes an inverted-condition bug in TestHarness.ensureRemoteDataset's post-rebuild verify check (threw when datasets matched). New cassettes were recorded only for ClassifierEvalTest (VCR_MODE=record ... --tests '*ClassifierEvalTest*') against the same Braintrust SDKs org used by the existing cassettes, so the rest of the cassette set is untouched. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../dev/braintrust/eval/Classification.java | 37 +++ .../java/dev/braintrust/eval/Classifier.java | 98 ++++++ .../main/java/dev/braintrust/eval/Eval.java | 127 +++++++- .../dev/braintrust/eval/TracedClassifier.java | 41 +++ .../braintrust/eval/ClassifierEvalTest.java | 304 ++++++++++++++++++ .../dev/braintrust/eval/ClassifierTest.java | 128 ++++++++ .../examples/ClassifiersExample.java | 158 +++++++++ .../java/dev/braintrust/TestHarness.java | 2 +- .../api_apikey_login-08d26d2faef8.json | 1 + .../api_apikey_login-228248286fba.json | 1 + .../api_apikey_login-4139f91c7e72.json | 1 + .../api_apikey_login-684178b0ab9d.json | 1 + .../api_apikey_login-9611c99e6aac.json | 1 + .../api_apikey_login-a7fce53bd211.json | 1 + .../api_apikey_login-fd38313bebba.json | 1 + .../api_apikey_login-fda483633056.json | 1 + .../__files/v1_experiment-1185d0796fb3.json | 1 + .../__files/v1_experiment-2c8bb270c5cf.json | 1 + .../__files/v1_experiment-4f1bd5afd18d.json | 1 + .../__files/v1_experiment-80d23f49c06c.json | 1 + .../__files/v1_experiment-81a6ebbafe29.json | 1 + .../__files/v1_project-30867e7088bd.json | 1 + .../__files/v1_project-410ff9f133da.json | 1 + .../__files/v1_project-4891298f2969.json | 1 + .../__files/v1_project-543a07178006.json | 1 + .../__files/v1_project-c0742bb3c63f.json | 1 + .../__files/v1_project-dd7665d7a48a.json | 1 + .../__files/v1_project-e74886687a34.json | 1 + .../__files/v1_project-e7e35e493e43.json | 1 + .../api_apikey_login-08d26d2faef8.json | 38 +++ .../api_apikey_login-228248286fba.json | 38 +++ .../api_apikey_login-4139f91c7e72.json | 38 +++ .../api_apikey_login-684178b0ab9d.json | 37 +++ .../api_apikey_login-9611c99e6aac.json | 38 +++ .../api_apikey_login-a7fce53bd211.json | 38 +++ .../api_apikey_login-fd38313bebba.json | 38 +++ .../api_apikey_login-fda483633056.json | 38 +++ .../mappings/v1_experiment-1185d0796fb3.json | 45 +++ .../mappings/v1_experiment-2c8bb270c5cf.json | 45 +++ .../mappings/v1_experiment-4f1bd5afd18d.json | 45 +++ .../mappings/v1_experiment-80d23f49c06c.json | 45 +++ .../mappings/v1_experiment-81a6ebbafe29.json | 45 +++ .../mappings/v1_project-30867e7088bd.json | 44 +++ .../mappings/v1_project-410ff9f133da.json | 45 +++ .../mappings/v1_project-4891298f2969.json | 45 +++ .../mappings/v1_project-543a07178006.json | 45 +++ .../mappings/v1_project-c0742bb3c63f.json | 45 +++ .../mappings/v1_project-dd7665d7a48a.json | 45 +++ .../mappings/v1_project-e74886687a34.json | 45 +++ .../mappings/v1_project-e7e35e493e43.json | 45 +++ 50 files changed, 1800 insertions(+), 3 deletions(-) create mode 100644 braintrust-sdk/src/main/java/dev/braintrust/eval/Classification.java create mode 100644 braintrust-sdk/src/main/java/dev/braintrust/eval/Classifier.java create mode 100644 braintrust-sdk/src/main/java/dev/braintrust/eval/TracedClassifier.java create mode 100644 braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierEvalTest.java create mode 100644 braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierTest.java create mode 100644 examples/src/main/java/dev/braintrust/examples/ClassifiersExample.java create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-08d26d2faef8.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-228248286fba.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-4139f91c7e72.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-684178b0ab9d.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-9611c99e6aac.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-a7fce53bd211.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fd38313bebba.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fda483633056.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-1185d0796fb3.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-2c8bb270c5cf.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-4f1bd5afd18d.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-80d23f49c06c.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-81a6ebbafe29.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-30867e7088bd.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-410ff9f133da.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-4891298f2969.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-543a07178006.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-c0742bb3c63f.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-dd7665d7a48a.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e74886687a34.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e7e35e493e43.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-08d26d2faef8.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-228248286fba.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-4139f91c7e72.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-684178b0ab9d.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-9611c99e6aac.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-a7fce53bd211.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fd38313bebba.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fda483633056.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-1185d0796fb3.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-2c8bb270c5cf.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-4f1bd5afd18d.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-80d23f49c06c.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-81a6ebbafe29.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-30867e7088bd.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-410ff9f133da.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-4891298f2969.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-543a07178006.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-c0742bb3c63f.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-dd7665d7a48a.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e74886687a34.json create mode 100644 test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e7e35e493e43.json diff --git a/braintrust-sdk/src/main/java/dev/braintrust/eval/Classification.java b/braintrust-sdk/src/main/java/dev/braintrust/eval/Classification.java new file mode 100644 index 00000000..7d3684a6 --- /dev/null +++ b/braintrust-sdk/src/main/java/dev/braintrust/eval/Classification.java @@ -0,0 +1,37 @@ +package dev.braintrust.eval; + +import java.util.Map; +import javax.annotation.Nullable; + +/** + * A single structured classification produced by a {@link Classifier}. + * + *

Unlike a {@link Score} (numeric 0-1), a Classification carries a stable id, an optional + * display label, and optional metadata. The {@code name} acts as the grouping key in the aggregated + * result map; when {@code name} is {@code null} or blank, the owning classifier's resolved name is + * used instead. + * + * @param name optional grouping key; defaults to the owning classifier's resolved name when null or + * blank + * @param id stable identifier for the classification (required) + * @param label optional display label + * @param metadata optional arbitrary metadata + */ +public record Classification( + @Nullable String name, + String id, + @Nullable String label, + @Nullable Map metadata) { + + public static Classification of(String id) { + return new Classification(null, id, null, null); + } + + public static Classification of(String id, String label) { + return new Classification(null, id, label, null); + } + + public static Classification of(String name, String id, String label) { + return new Classification(name, id, label, null); + } +} diff --git a/braintrust-sdk/src/main/java/dev/braintrust/eval/Classifier.java b/braintrust-sdk/src/main/java/dev/braintrust/eval/Classifier.java new file mode 100644 index 00000000..e37b3e3e --- /dev/null +++ b/braintrust-sdk/src/main/java/dev/braintrust/eval/Classifier.java @@ -0,0 +1,98 @@ +package dev.braintrust.eval; + +import java.util.List; +import java.util.function.Function; + +/** + * A classifier categorizes and labels eval outputs, producing zero or more structured {@link + * Classification} items. + * + *

Classifiers run independently from {@link Scorer}s. Each Classifier exposes a name (used as + * the span name and as the default grouping key for classifications whose own {@code name} is + * blank). + * + * @param type of the input data + * @param type of the output data + */ +public interface Classifier { + String INVALID_CLASSIFICATION_MESSAGE = + "When returning structured classifier results, each classification must be a non-empty" + + " object."; + + String getName(); + + /** + * Classifies the result of a successful task execution. + * + * @param taskResult the task output and originating dataset case + * @return zero or more classifications. An empty list means "no classifications for this case". + */ + List classify(TaskResult taskResult); + + /** + * Creates a classifier from a function that returns a (possibly empty or null) list of + * classifications. + * + *

A {@code null} return value is treated as no classifications. Each returned {@link + * Classification} must have a non-blank {@code id}; otherwise the classifier throws an + * exception (which the eval runner records but does not abort on). + */ + static Classifier of( + String classifierName, + Function, List> classifierFn) { + return new Classifier<>() { + @Override + public String getName() { + return classifierName; + } + + @Override + public List classify(TaskResult taskResult) { + var result = classifierFn.apply(taskResult); + if (result == null) { + return List.of(); + } + for (var item : result) { + validate(item); + } + return result; + } + }; + } + + /** + * Creates a classifier from a function that returns a single classification. + * + *

A {@code null} return value is treated as no classifications. + */ + static Classifier single( + String classifierName, + Function, Classification> classifierFn) { + return new Classifier<>() { + @Override + public String getName() { + return classifierName; + } + + @Override + public List classify(TaskResult taskResult) { + var item = classifierFn.apply(taskResult); + if (item == null) { + return List.of(); + } + validate(item); + return List.of(item); + } + }; + } + + /** + * Validates a single classification: it must have a non-blank id. Throws with the spec-mandated + * wording on failure. + */ + private static void validate(Classification item) { + if (item == null || item.id() == null || item.id().isBlank()) { + throw new IllegalArgumentException(INVALID_CLASSIFICATION_MESSAGE + " Got: " + item); + } + } +} diff --git a/braintrust-sdk/src/main/java/dev/braintrust/eval/Eval.java b/braintrust-sdk/src/main/java/dev/braintrust/eval/Eval.java index ee814baf..a9c3d06b 100644 --- a/braintrust-sdk/src/main/java/dev/braintrust/eval/Eval.java +++ b/braintrust-sdk/src/main/java/dev/braintrust/eval/Eval.java @@ -42,6 +42,7 @@ public final class Eval { private final @Nonnull Dataset dataset; private final @Nonnull Task task; private final @Nonnull List> scorers; + private final @Nonnull List> classifiers; private final @Nonnull List tags; private final @Nonnull Map metadata; private final @Nonnull Parameters parameters; @@ -58,6 +59,7 @@ private Eval(Builder builder) { this.dataset = builder.dataset; this.task = Objects.requireNonNull(builder.task); this.scorers = List.copyOf(builder.scorers); + this.classifiers = List.copyOf(builder.classifiers); this.tags = List.copyOf(builder.tags); this.metadata = Map.copyOf(builder.metadata); this.parameters = builder.buildParameters(); @@ -172,6 +174,42 @@ private void evalOne(String experimentId, DatasetCase datasetCase for (var scorer : scorers) { runScorer(experimentId, rootSpan, scorer, taskResult, trace); } + + // run classifiers - one span per classifier. Classifier exceptions are non-fatal: + // they are recorded on the classifier span and surfaced in the root span's metadata + // under `classifier_errors`, but do not abort the eval or affect other classifiers/ + // scorers. Classifiers only run when the task succeeded (no scoreForTaskException + // analogue). + if (!classifiers.isEmpty()) { + Map>> caseClassifications = new LinkedHashMap<>(); + Map classifierErrors = new LinkedHashMap<>(); + for (int i = 0; i < classifiers.size(); i++) { + var classifier = classifiers.get(i); + var classifierName = classifier.getName(); + if (classifierName == null || classifierName.isBlank()) { + classifierName = "classifier_" + i; + } + runClassifier( + experimentId, + classifier, + classifierName, + taskResult, + trace, + caseClassifications, + classifierErrors); + } + if (!caseClassifications.isEmpty()) { + rootSpan.setAttribute( + "braintrust.classifications", toJson(caseClassifications)); + } + if (!classifierErrors.isEmpty()) { + Map mergedMetadata = + new LinkedHashMap<>(datasetCase.metadata()); + mergedMetadata.put("classifier_errors", classifierErrors); + rootSpan.setAttribute( + AttributeKey.stringKey("braintrust.metadata"), toJson(mergedMetadata)); + } + } } finally { rootSpan.end(); } @@ -236,6 +274,84 @@ private void runScoreForTaskException( } } + /** + * Runs a classifier inside its own span. Exceptions are recorded on the classifier span and + * surfaced via {@code classifierErrors}; they do not propagate. + */ + private void runClassifier( + String experimentId, + Classifier classifier, + String resolvedName, + TaskResult taskResult, + BrainstoreTrace trace, + Map>> caseClassifications, + Map classifierErrors) { + var classifierSpan = + tracer.spanBuilder(resolvedName) + .setAttribute(PARENT, "experiment_id:" + experimentId) + .startSpan(); + try (var unused = + BraintrustContext.ofExperiment(experimentId, classifierSpan).makeCurrent()) { + Map spanAttrs = new LinkedHashMap<>(); + spanAttrs.put("type", "classifier"); + spanAttrs.put("name", resolvedName); + spanAttrs.put("purpose", "scorer"); + classifierSpan.setAttribute("braintrust.span_attributes", toJson(spanAttrs)); + + List classifications; + try { + if (classifier instanceof TracedClassifier tracedClassifier) { + classifications = tracedClassifier.classify(taskResult, trace); + } else { + classifications = classifier.classify(taskResult); + } + if (classifications == null) { + classifications = List.of(); + } + } catch (Exception e) { + classifierSpan.setStatus(StatusCode.ERROR, e.getMessage()); + classifierSpan.recordException(e); + log.debug("Classifier '{}' threw exception", resolvedName, e); + classifierErrors.put( + resolvedName, e.getMessage() == null ? e.toString() : e.getMessage()); + return; + } + + // Group results by resolved item name (item.name, falling back to the classifier + // name when blank). Same map is logged to the classifier span and merged into the + // per-case aggregate logged on the root span. + Map>> outputByName = new LinkedHashMap<>(); + for (var item : classifications) { + var itemName = item.name(); + if (itemName == null || itemName.isBlank()) { + itemName = resolvedName; + } + var itemMap = toClassificationItem(item); + outputByName.computeIfAbsent(itemName, k -> new ArrayList<>()).add(itemMap); + caseClassifications.computeIfAbsent(itemName, k -> new ArrayList<>()).add(itemMap); + } + classifierSpan.setAttribute("braintrust.output_json", toJson(outputByName)); + } finally { + classifierSpan.end(); + } + } + + /** + * Converts a {@link Classification} to the wire-format {@code ClassificationItem}: drops {@code + * name}, includes {@code label} and {@code metadata} only when present. + */ + private static Map toClassificationItem(Classification c) { + Map m = new LinkedHashMap<>(); + m.put("id", c.id()); + if (c.label() != null) { + m.put("label", c.label()); + } + if (c.metadata() != null) { + m.put("metadata", c.metadata()); + } + return m; + } + /** Validates and records scores on the score span and root span. */ private void recordScores( Span scoreSpan, Span rootSpan, Scorer scorer, List scores) { @@ -276,6 +392,7 @@ public static final class Builder { private @Nullable Tracer tracer = null; private @Nullable Task task; private @Nonnull List> scorers = List.of(); + private @Nonnull List> classifiers = List.of(); private @Nonnull List> parameterDefs = List.of(); private @Nonnull Map parameterValues = Map.of(); private @Nonnull List tags = List.of(); @@ -291,8 +408,8 @@ public Eval build() { if (projectId == null) { projectId = config.defaultProjectId().orElse(null); } - if (scorers.isEmpty()) { - throw new RuntimeException("must provide at least one scorer"); + if (scorers.isEmpty() && classifiers.isEmpty()) { + throw new RuntimeException("must provide at least one scorer or classifier"); } if (null == apiClient) { apiClient = BraintrustOpenApiClient.of(config); @@ -380,6 +497,12 @@ public final Builder scorers(Scorer... scorers) { return this; } + @SafeVarargs + public final Builder classifiers(Classifier... classifiers) { + this.classifiers = List.of(classifiers); + return this; + } + /** Sets tags for the experiment. */ public Builder tags(List tags) { this.tags = List.copyOf(tags); diff --git a/braintrust-sdk/src/main/java/dev/braintrust/eval/TracedClassifier.java b/braintrust-sdk/src/main/java/dev/braintrust/eval/TracedClassifier.java new file mode 100644 index 00000000..db67d78b --- /dev/null +++ b/braintrust-sdk/src/main/java/dev/braintrust/eval/TracedClassifier.java @@ -0,0 +1,41 @@ +package dev.braintrust.eval; + +import dev.braintrust.trace.BrainstoreTrace; +import java.util.List; + +/** + * A classifier that receives access to the full distributed trace of the task that was evaluated. + * + *

Implement this interface when your classifier needs to examine intermediate LLM calls, tool + * invocations, or other spans produced during task execution — not just the final {@link + * TaskResult}. + * + * @param type of the input data + * @param type of the output data + */ +public interface TracedClassifier extends Classifier { + + /** + * Classifies the task result using the distributed trace for additional context. Called instead + * of {@link Classifier#classify(TaskResult)} when a {@link BrainstoreTrace} is available. + * + * @param taskResult the task output and originating dataset case + * @param trace lazy access to the distributed trace spans for this eval case + * @return zero or more classifications + */ + List classify(TaskResult taskResult, BrainstoreTrace trace); + + /** + * {@inheritDoc} + * + *

When used inside an {@link Eval}, this overload is never called — {@link + * #classify(TaskResult, BrainstoreTrace)} is dispatched instead. This default implementation + * throws {@link UnsupportedOperationException} to surface any accidental direct calls. + */ + @Override + default List classify(TaskResult taskResult) { + throw new UnsupportedOperationException( + "traced classifier classify method directly called. This is likely an accident. If" + + " you wish to support this, your implementation must override this method."); + } +} diff --git a/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierEvalTest.java b/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierEvalTest.java new file mode 100644 index 00000000..fb1445c6 --- /dev/null +++ b/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierEvalTest.java @@ -0,0 +1,304 @@ +package dev.braintrust.eval; + +import static dev.braintrust.json.BraintrustJsonMapper.fromJson; +import static org.junit.jupiter.api.Assertions.*; + +import dev.braintrust.TestHarness; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.sdk.trace.data.SpanData; +import java.util.List; +import java.util.Map; +import lombok.SneakyThrows; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * End-to-end tests for the classifier flow on {@link Eval}, modeled on {@link EvalTest}. + * + *

These tests rely on VCR cassettes for new experiment names. Cassettes are regenerated by + * running {@code VCR_MODE=record BRAINTRUST_API_KEY=... ./gradlew :braintrust-sdk:test --tests + * 'dev.braintrust.eval.ClassifierEvalTest'} (see {@code scripts/re-record-cassettes.sh}). + */ +public class ClassifierEvalTest { + private TestHarness testHarness; + + @BeforeEach + void beforeEach() { + testHarness = TestHarness.setup(); + } + + // --------------------------------------------------------------------------- + // Builder validation (no API calls) + // --------------------------------------------------------------------------- + + @Test + void builderRejectsEmptyScorersAndClassifiers() { + var error = + assertThrows( + RuntimeException.class, + () -> + testHarness + .braintrust() + .evalBuilder() + .name("no-scorers-or-classifiers") + .cases(DatasetCase.of("hello", null)) + .taskFunction(input -> input) + .build()); + assertTrue( + error.getMessage().contains("at least one scorer or classifier"), + "expected message about scorer-or-classifier requirement, got: " + + error.getMessage()); + } + + @Test + void builderAcceptsClassifiersOnly() { + assertDoesNotThrow( + () -> + testHarness + .braintrust() + .evalBuilder() + .name("classifiers-only-build") + .cases(DatasetCase.of("hello", null)) + .taskFunction(input -> input) + .classifiers( + Classifier.single( + "category", + tr -> + new Classification( + "category", + "greeting", + null, + null))) + .build()); + } + + @Test + void builderAcceptsBothScorersAndClassifiers() { + assertDoesNotThrow( + () -> + testHarness + .braintrust() + .evalBuilder() + .name("scorers-and-classifiers-build") + .cases(DatasetCase.of("hello", "HELLO")) + .taskFunction(String::toUpperCase) + .scorers(Scorer.of("exact", tr -> 1.0)) + .classifiers( + Classifier.single( + "category", + tr -> + new Classification( + "category", "text", null, null))) + .build()); + } + + // --------------------------------------------------------------------------- + // End-to-end span assertions (require VCR cassettes) + // --------------------------------------------------------------------------- + + @Test + @SneakyThrows + void classifierOnlyEvalProducesClassifierSpanAndRootAggregation() { + var eval = + testHarness + .braintrust() + .evalBuilder() + .name("classifier-only-eval") + .cases(DatasetCase.of("hello", null)) + .taskFunction(input -> input) + .classifiers( + Classifier.single( + "category", + tr -> + new Classification( + "category", "greeting", "Greeting", null))) + .build(); + eval.run(); + + var spans = testHarness.awaitExportedSpans(); + var classifierSpan = findSpanByName(spans, "category"); + assertNotNull(classifierSpan, "expected classifier span named 'category'"); + + var spanAttrsJson = + classifierSpan + .getAttributes() + .get(AttributeKey.stringKey("braintrust.span_attributes")); + assertNotNull(spanAttrsJson); + var spanAttrs = fromJson(spanAttrsJson, Map.class); + assertEquals("classifier", spanAttrs.get("type")); + assertEquals("scorer", spanAttrs.get("purpose")); + assertEquals("category", spanAttrs.get("name")); + + var outputJson = + classifierSpan + .getAttributes() + .get(AttributeKey.stringKey("braintrust.output_json")); + assertNotNull(outputJson); + var output = fromJson(outputJson, Map.class); + @SuppressWarnings("unchecked") + var category = (List>) output.get("category"); + assertEquals(1, category.size()); + assertEquals("greeting", category.get(0).get("id")); + assertEquals("Greeting", category.get(0).get("label")); + + var rootSpan = findRootEvalSpan(spans); + assertNotNull(rootSpan); + var classificationsJson = + rootSpan.getAttributes().get(AttributeKey.stringKey("braintrust.classifications")); + assertNotNull(classificationsJson, "root span should carry braintrust.classifications"); + var classifications = fromJson(classificationsJson, Map.class); + assertTrue(classifications.containsKey("category")); + } + + @Test + @SneakyThrows + void scorerAndClassifierBothRun() { + var eval = + testHarness + .braintrust() + .evalBuilder() + .name("scorer-and-classifier-eval") + .cases(DatasetCase.of("hello", "HELLO")) + .taskFunction(String::toUpperCase) + .scorers( + Scorer.of( + "exact", + tr -> + tr.result().equals(tr.datasetCase().expected()) + ? 1.0 + : 0.0)) + .classifiers( + Classifier.single( + "category", + tr -> new Classification("category", "text", null, null))) + .build(); + eval.run(); + + var spans = testHarness.awaitExportedSpans(); + assertNotNull(findSpanByName(spans, "score"), "expected score span"); + assertNotNull(findSpanByName(spans, "category"), "expected classifier span"); + } + + @Test + @SneakyThrows + void classifierExceptionIsNonFatalAndRecordedInRootMetadata() { + var eval = + testHarness + .braintrust() + .evalBuilder() + .name("classifier-error-eval") + .cases(DatasetCase.of("hello", null)) + .taskFunction(input -> input) + .classifiers( + Classifier.single( + "broken", + tr -> { + throw new RuntimeException("classifier boom"); + }), + Classifier.single( + "working", + tr -> new Classification("working", "ok", null, null))) + .build(); + eval.run(); + + var spans = testHarness.awaitExportedSpans(); + var rootSpan = findRootEvalSpan(spans); + assertNotNull(rootSpan); + + var metadataJson = + rootSpan.getAttributes().get(AttributeKey.stringKey("braintrust.metadata")); + assertNotNull(metadataJson, "expected metadata to be set when classifier errors occur"); + var metadata = fromJson(metadataJson, Map.class); + @SuppressWarnings("unchecked") + var classifierErrors = (Map) metadata.get("classifier_errors"); + assertNotNull(classifierErrors); + assertEquals("classifier boom", classifierErrors.get("broken")); + + // The working classifier still produced output + var classificationsJson = + rootSpan.getAttributes().get(AttributeKey.stringKey("braintrust.classifications")); + assertNotNull(classificationsJson); + var classifications = fromJson(classificationsJson, Map.class); + assertTrue(classifications.containsKey("working")); + } + + @Test + @SneakyThrows + void multiLabelClassifierGroupsUnderOneName() { + var eval = + testHarness + .braintrust() + .evalBuilder() + .name("multi-label-eval") + .cases(DatasetCase.of("great!", null)) + .taskFunction(input -> input) + .classifiers( + Classifier.of( + "sentiment", + tr -> + List.of( + new Classification( + "sentiment", + "positive", + "Positive", + null), + new Classification( + "sentiment", + "enthusiastic", + "Enthusiastic", + null)))) + .build(); + eval.run(); + + var spans = testHarness.awaitExportedSpans(); + var rootSpan = findRootEvalSpan(spans); + assertNotNull(rootSpan); + var classifications = + fromJson( + rootSpan.getAttributes() + .get(AttributeKey.stringKey("braintrust.classifications")), + Map.class); + @SuppressWarnings("unchecked") + var sentiment = (List>) classifications.get("sentiment"); + assertEquals(2, sentiment.size()); + assertEquals("positive", sentiment.get(0).get("id")); + assertEquals("enthusiastic", sentiment.get(1).get("id")); + } + + @Test + @SneakyThrows + void blankItemNameDefaultsToClassifierName() { + var eval = + testHarness + .braintrust() + .evalBuilder() + .name("default-name-eval") + .cases(DatasetCase.of("hello", null)) + .taskFunction(input -> input) + .classifiers( + // No name on the item — should fall back to the classifier name. + Classifier.single( + "my_classifier", + tr -> new Classification(null, "foo", null, null))) + .build(); + eval.run(); + + var spans = testHarness.awaitExportedSpans(); + var rootSpan = findRootEvalSpan(spans); + assertNotNull(rootSpan); + var classifications = + fromJson( + rootSpan.getAttributes() + .get(AttributeKey.stringKey("braintrust.classifications")), + Map.class); + assertTrue(classifications.containsKey("my_classifier")); + } + + private static SpanData findSpanByName(List spans, String name) { + return spans.stream().filter(s -> name.equals(s.getName())).findFirst().orElse(null); + } + + private static SpanData findRootEvalSpan(List spans) { + return spans.stream().filter(s -> "eval".equals(s.getName())).findFirst().orElse(null); + } +} diff --git a/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierTest.java b/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierTest.java new file mode 100644 index 00000000..37ada2cd --- /dev/null +++ b/braintrust-sdk/src/test/java/dev/braintrust/eval/ClassifierTest.java @@ -0,0 +1,128 @@ +package dev.braintrust.eval; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +public class ClassifierTest { + + private static TaskResult taskResult(String input, T output) { + return new TaskResult<>(output, DatasetCase.of(input, null)); + } + + @Test + void singleFactoryReturnsOneClassification() { + Classifier classifier = + Classifier.single( + "category", + tr -> new Classification("category", "greeting", "Greeting", null)); + + var result = classifier.classify(taskResult("hello", "hello")); + assertEquals(1, result.size()); + assertEquals("greeting", result.get(0).id()); + assertEquals("Greeting", result.get(0).label()); + assertEquals("category", classifier.getName()); + } + + @Test + void singleFactoryNullReturnNormalizesToEmptyList() { + Classifier classifier = Classifier.single("maybe", tr -> null); + assertEquals(List.of(), classifier.classify(taskResult("x", "x"))); + } + + @Test + void listFactoryReturnsMultipleClassifications() { + Classifier classifier = + Classifier.of( + "sentiment", + tr -> + List.of( + new Classification( + "sentiment", "positive", "Positive", null), + new Classification( + "sentiment", + "enthusiastic", + "Enthusiastic", + null))); + + var result = classifier.classify(taskResult("great!", "great!")); + assertEquals(2, result.size()); + assertEquals("positive", result.get(0).id()); + assertEquals("enthusiastic", result.get(1).id()); + } + + @Test + void listFactoryNullReturnNormalizesToEmptyList() { + Classifier classifier = Classifier.of("maybe", tr -> null); + assertEquals(List.of(), classifier.classify(taskResult("x", "x"))); + } + + @Test + void classificationOfHelpers() { + var c1 = Classification.of("id1"); + assertNull(c1.name()); + assertEquals("id1", c1.id()); + assertNull(c1.label()); + assertNull(c1.metadata()); + + var c2 = Classification.of("id2", "Label 2"); + assertEquals("id2", c2.id()); + assertEquals("Label 2", c2.label()); + + var c3 = Classification.of("nm", "id3", "Label 3"); + assertEquals("nm", c3.name()); + assertEquals("id3", c3.id()); + assertEquals("Label 3", c3.label()); + } + + @Test + void classificationWithMetadataIsPreserved() { + var item = + new Classification( + "category", "greeting", "Greeting", Map.of("source", "unit-test")); + Classifier classifier = Classifier.single("category", tr -> item); + + var result = classifier.classify(taskResult("hi", "hi")); + assertEquals(1, result.size()); + assertEquals(Map.of("source", "unit-test"), result.get(0).metadata()); + } + + @Test + void validationThrowsForBlankId() { + Classifier classifier = + Classifier.single("bad", tr -> new Classification("bad", "", null, null)); + + var error = + assertThrows( + IllegalArgumentException.class, + () -> classifier.classify(taskResult("x", "x"))); + assertTrue( + error.getMessage().contains("each classification must be a non-empty object"), + "expected spec wording, got: " + error.getMessage()); + } + + @Test + void validationThrowsForNullItemInList() { + Classifier classifier = + Classifier.of("bad", tr -> java.util.Arrays.asList((Classification) null)); + + var error = + assertThrows( + IllegalArgumentException.class, + () -> classifier.classify(taskResult("x", "x"))); + assertTrue( + error.getMessage().contains("each classification must be a non-empty object"), + "expected spec wording, got: " + error.getMessage()); + } + + @Test + void getNameReturnsConstructorName() { + Classifier c1 = Classifier.of("foo", tr -> List.of()); + assertEquals("foo", c1.getName()); + + Classifier c2 = Classifier.single("bar", tr -> null); + assertEquals("bar", c2.getName()); + } +} diff --git a/examples/src/main/java/dev/braintrust/examples/ClassifiersExample.java b/examples/src/main/java/dev/braintrust/examples/ClassifiersExample.java new file mode 100644 index 00000000..270dcc61 --- /dev/null +++ b/examples/src/main/java/dev/braintrust/examples/ClassifiersExample.java @@ -0,0 +1,158 @@ +package dev.braintrust.examples; + +import dev.braintrust.Braintrust; +import dev.braintrust.eval.Classification; +import dev.braintrust.eval.Classifier; +import dev.braintrust.eval.DatasetCase; +import java.util.ArrayList; +import java.util.List; + +/** + * Classifiers categorize and label eval outputs. Unlike scorers (numeric 0-1), classifiers return + * structured {@link Classification} items with an id, optional label, and optional metadata. + * + *

Three patterns are shown: + * + *

    + *
  1. {@link Classifier#single} for a single-label classifier returning one {@link + * Classification}. + *
  2. {@link Classifier#of} for a multi-label classifier returning a list. + *
  3. An anonymous {@link Classifier} implementation for reusable classifiers with their own + * logic. + *
+ * + *

Classifiers and scorers run independently — you can use either, or both together. + */ +public class ClassifiersExample { + public static void main(String[] args) throws Exception { + var braintrust = Braintrust.get(); + + // 1. Single-label classifier. + Classifier intentClassifier = + Classifier.single( + "intent", + tr -> { + var input = tr.datasetCase().input(); + String id; + if (input.matches("(?i).*thank.*")) { + id = "praise"; + } else if (input.matches("(?i).*(waiting|order|update).*")) { + id = "follow_up"; + } else if (input.matches("(?i).*(password|reset|find).*")) { + id = "how_to"; + } else if (input.matches("(?i).*(damaged|refund).*")) { + id = "complaint"; + } else { + id = "other"; + } + return new Classification( + "intent", id, capitalize(id.replace('_', ' ')), null); + }); + + // 2. Multi-label classifier. + Classifier toneClassifier = + Classifier.of( + "tone", + tr -> { + var input = tr.datasetCase().input(); + List labels = new ArrayList<>(); + if (input.matches("(?i).*(immediately|unacceptable|waiting).*")) { + labels.add(new Classification("tone", "urgent", "Urgent", null)); + } + if (input.matches("(?i).*(please|thank|just checking).*")) { + labels.add(new Classification("tone", "polite", "Polite", null)); + } + if (input.matches("(?i).*(unacceptable|damaged|waiting).*")) { + labels.add( + new Classification( + "tone", "frustrated", "Frustrated", null)); + } + if (labels.isEmpty()) { + labels.add(new Classification("tone", "neutral", "Neutral", null)); + } + return labels; + }); + + // 3. Custom Classifier implementation — full control over name and logic. + Classifier qualityClassifier = + new Classifier<>() { + @Override + public String getName() { + return "response_quality"; + } + + @Override + public List classify( + dev.braintrust.eval.TaskResult tr) { + var output = tr.result(); + int wordCount = output == null ? 0 : output.trim().split("\\s+").length; + String id; + if (output == null || output.isBlank()) { + id = "no_response"; + } else if (wordCount < 5) { + id = "too_short"; + } else if (output.matches("(?i).*(immediately|right away|look into).*")) { + id = "action_oriented"; + } else { + id = "informational"; + } + return List.of( + new Classification( + "response_quality", + id, + capitalize(id.replace('_', ' ')), + java.util.Map.of("word_count", wordCount))); + } + }; + + var eval = + braintrust + .evalBuilder() + .name("classifiers-example-" + System.currentTimeMillis()) + .cases( + DatasetCase.of( + "Hi! I just wanted to say thank you, the product is" + + " amazing!", + null), + DatasetCase.of( + "I've been waiting 2 weeks for my order. This is" + + " unacceptable!", + null), + DatasetCase.of( + "How do I reset my password? I can't find the option" + + " anywhere.", + null), + DatasetCase.of( + "The item arrived damaged. I need a refund immediately.", + null), + DatasetCase.of( + "Just checking in — any update on my ticket #4821?", null)) + .taskFunction(ClassifiersExample::generateResponse) + .classifiers(intentClassifier, toneClassifier, qualityClassifier) + .build(); + + var result = eval.run(); + System.out.println("\n\n" + result.createReportString()); + } + + private static String generateResponse(String message) { + if (message.matches("(?i).*thank.*")) { + return "You're welcome! So glad you're enjoying it."; + } + if (message.matches("(?i).*(waiting|order).*")) { + return "I sincerely apologise for the delay. Let me look into this right away."; + } + if (message.matches("(?i).*(password|reset).*")) { + return "To reset your password, go to Settings > Account > Reset Password."; + } + if (message.matches("(?i).*(damaged|refund).*")) { + return "I'm sorry to hear that. I'll process your refund immediately."; + } + return "Thanks for reaching out! Let me check on that for you."; + } + + private static String capitalize(String s) { + if (s == null || s.isEmpty()) return s; + return Character.toUpperCase(s.charAt(0)) + s.substring(1); + } +} diff --git a/test-harness/src/testFixtures/java/dev/braintrust/TestHarness.java b/test-harness/src/testFixtures/java/dev/braintrust/TestHarness.java index e95ef5a0..75a8ece6 100644 --- a/test-harness/src/testFixtures/java/dev/braintrust/TestHarness.java +++ b/test-harness/src/testFixtures/java/dev/braintrust/TestHarness.java @@ -313,7 +313,7 @@ public void ensureRemoteDataset(String datasetName, Dataset expectedData) // verify var btDataset = braintrust.fetchDataset(datasetName); - if (datasetsEqual(expectedData, btDataset)) { + if (!datasetsEqual(expectedData, btDataset)) { throw new RuntimeException( "failed to ensure expected dataset: %s -- %s" .formatted(toList(expectedData), toList(btDataset))); diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-08d26d2faef8.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-08d26d2faef8.json new file mode 100644 index 00000000..634dffc9 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-08d26d2faef8.json @@ -0,0 +1 @@ +{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-228248286fba.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-228248286fba.json new file mode 100644 index 00000000..634dffc9 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-228248286fba.json @@ -0,0 +1 @@ +{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-4139f91c7e72.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-4139f91c7e72.json new file mode 100644 index 00000000..634dffc9 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-4139f91c7e72.json @@ -0,0 +1 @@ +{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-684178b0ab9d.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-684178b0ab9d.json new file mode 100644 index 00000000..634dffc9 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-684178b0ab9d.json @@ -0,0 +1 @@ +{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-9611c99e6aac.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-9611c99e6aac.json new file mode 100644 index 00000000..634dffc9 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-9611c99e6aac.json @@ -0,0 +1 @@ +{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-a7fce53bd211.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-a7fce53bd211.json new file mode 100644 index 00000000..634dffc9 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-a7fce53bd211.json @@ -0,0 +1 @@ +{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fd38313bebba.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fd38313bebba.json new file mode 100644 index 00000000..634dffc9 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fd38313bebba.json @@ -0,0 +1 @@ +{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fda483633056.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fda483633056.json new file mode 100644 index 00000000..634dffc9 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/api_apikey_login-fda483633056.json @@ -0,0 +1 @@ +{"org_info":[{"id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"Braintrust SDKs","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-1185d0796fb3.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-1185d0796fb3.json new file mode 100644 index 00000000..a817864e --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-1185d0796fb3.json @@ -0,0 +1 @@ +{"id":"bd13b6ad-0fb2-4477-b407-0aa58127586e","project_id":"f1e858a4-58e3-408f-983f-016760d7fa25","name":"scorer-and-classifier-eval","description":null,"created":"2026-05-27T10:13:48.702Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"internal_metadata":null,"parameters_id":null,"parameters_version":null,"public":false,"user_id":"08e3988c-e05c-4324-8763-8998a5b39755","metadata":null,"tags":null} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-2c8bb270c5cf.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-2c8bb270c5cf.json new file mode 100644 index 00000000..eadc14a9 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-2c8bb270c5cf.json @@ -0,0 +1 @@ +{"id":"bda48716-d8cc-406b-aed1-10ca29b511e5","project_id":"f1e858a4-58e3-408f-983f-016760d7fa25","name":"classifier-only-eval","description":null,"created":"2026-05-27T10:13:52.580Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"internal_metadata":null,"parameters_id":null,"parameters_version":null,"public":false,"user_id":"08e3988c-e05c-4324-8763-8998a5b39755","metadata":null,"tags":null} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-4f1bd5afd18d.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-4f1bd5afd18d.json new file mode 100644 index 00000000..8c927324 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-4f1bd5afd18d.json @@ -0,0 +1 @@ +{"id":"129eb154-c227-43f4-a549-35bd3f281bd9","project_id":"f1e858a4-58e3-408f-983f-016760d7fa25","name":"classifier-error-eval","description":null,"created":"2026-05-27T10:14:02.504Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"internal_metadata":null,"parameters_id":null,"parameters_version":null,"public":false,"user_id":"08e3988c-e05c-4324-8763-8998a5b39755","metadata":null,"tags":null} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-80d23f49c06c.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-80d23f49c06c.json new file mode 100644 index 00000000..42a3a8d2 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-80d23f49c06c.json @@ -0,0 +1 @@ +{"id":"ed5735fb-b23b-4522-9b4f-546bcae35eae","project_id":"f1e858a4-58e3-408f-983f-016760d7fa25","name":"multi-label-eval","description":null,"created":"2026-05-27T10:13:46.338Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"internal_metadata":null,"parameters_id":null,"parameters_version":null,"public":false,"user_id":"08e3988c-e05c-4324-8763-8998a5b39755","metadata":null,"tags":null} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-81a6ebbafe29.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-81a6ebbafe29.json new file mode 100644 index 00000000..0f2383ab --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_experiment-81a6ebbafe29.json @@ -0,0 +1 @@ +{"id":"3d17f6cf-5729-44c9-8dcc-b95ab9c5f0f4","project_id":"f1e858a4-58e3-408f-983f-016760d7fa25","name":"default-name-eval","description":null,"created":"2026-05-27T10:13:57.484Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"internal_metadata":null,"parameters_id":null,"parameters_version":null,"public":false,"user_id":"08e3988c-e05c-4324-8763-8998a5b39755","metadata":null,"tags":null} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-30867e7088bd.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-30867e7088bd.json new file mode 100644 index 00000000..89452950 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-30867e7088bd.json @@ -0,0 +1 @@ +{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-410ff9f133da.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-410ff9f133da.json new file mode 100644 index 00000000..89452950 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-410ff9f133da.json @@ -0,0 +1 @@ +{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-4891298f2969.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-4891298f2969.json new file mode 100644 index 00000000..89452950 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-4891298f2969.json @@ -0,0 +1 @@ +{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-543a07178006.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-543a07178006.json new file mode 100644 index 00000000..89452950 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-543a07178006.json @@ -0,0 +1 @@ +{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-c0742bb3c63f.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-c0742bb3c63f.json new file mode 100644 index 00000000..89452950 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-c0742bb3c63f.json @@ -0,0 +1 @@ +{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-dd7665d7a48a.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-dd7665d7a48a.json new file mode 100644 index 00000000..89452950 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-dd7665d7a48a.json @@ -0,0 +1 @@ +{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e74886687a34.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e74886687a34.json new file mode 100644 index 00000000..89452950 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e74886687a34.json @@ -0,0 +1 @@ +{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e7e35e493e43.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e7e35e493e43.json new file mode 100644 index 00000000..89452950 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/__files/v1_project-e7e35e493e43.json @@ -0,0 +1 @@ +{"objects":[{"id":"f1e858a4-58e3-408f-983f-016760d7fa25","org_id":"5abfae3a-7aa7-4653-a9c8-b3efcb18f584","name":"java-unit-test","description":null,"created":"2026-05-07T16:55:48.127Z","deleted_at":null,"user_id":"a5ca7f9c-bf20-40c4-a82b-5c992f6a38f5","settings":{"remote_eval_sources":[{"url":"http://localhost:8301","name":"localjava","description":null}]}}]} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-08d26d2faef8.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-08d26d2faef8.json new file mode 100644 index 00000000..e80e1a08 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-08d26d2faef8.json @@ -0,0 +1,38 @@ +{ + "id" : "e5e5a390-4568-343b-95d7-7e50b56e2d3e", + "name" : "api_apikey_login", + "request" : { + "url" : "/api/apikey/login", + "method" : "POST" + }, + "response" : { + "status" : 200, + "bodyFileName" : "api_apikey_login-08d26d2faef8.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXBgHtHoAMEqQQ=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "259", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d209-36d29e3856e386c501419f1b;Parent=50f208976f235332;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:17 GMT", + "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 aafd761bed21ff3b2c4a07021d172702.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d20900000000434852e942a27399", + "x-amzn-RequestId" : "4adcb8ff-9421-43a4-ba8f-0543cd0897fb", + "X-Amz-Cf-Id" : "KPwnsYEtUa_3P5lm_xsfehvgGHHGQRKqCVzUBxJNJv8udASi6Vv4Jg==", + "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "e5e5a390-4568-343b-95d7-7e50b56e2d3e", + "persistent" : true, + "scenarioName" : "scenario-1-api-apikey-login", + "requiredScenarioState" : "Started", + "newScenarioState" : "scenario-1-api-apikey-login-2", + "insertionIndex" : 139 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-228248286fba.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-228248286fba.json new file mode 100644 index 00000000..d130210d --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-228248286fba.json @@ -0,0 +1,38 @@ +{ + "id" : "9602117c-895c-3d23-a069-36bd7207a729", + "name" : "api_apikey_login", + "request" : { + "url" : "/api/apikey/login", + "method" : "POST" + }, + "response" : { + "status" : 200, + "bodyFileName" : "api_apikey_login-228248286fba.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXCKE3kIAMEVGQ=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "259", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d20d-4d3f3dc72ec48b9a3b8c04f2;Parent=403457bedc76358d;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:21 GMT", + "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d20d000000005000c50aaf3b8111", + "x-amzn-RequestId" : "55329b39-c50b-4ba8-a2e6-1205aeb76d1c", + "X-Amz-Cf-Id" : "T1dQXV26G8lEmQQzBx41-0WGFMsKDQVuUZaQXI0EEPf5WUy3-IpmRg==", + "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "9602117c-895c-3d23-a069-36bd7207a729", + "persistent" : true, + "scenarioName" : "scenario-1-api-apikey-login", + "requiredScenarioState" : "scenario-1-api-apikey-login-3", + "newScenarioState" : "scenario-1-api-apikey-login-4", + "insertionIndex" : 134 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-4139f91c7e72.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-4139f91c7e72.json new file mode 100644 index 00000000..376ad543 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-4139f91c7e72.json @@ -0,0 +1,38 @@ +{ + "id" : "57532630-927c-3919-a0d7-63a83e6b3ed0", + "name" : "api_apikey_login", + "request" : { + "url" : "/api/apikey/login", + "method" : "POST" + }, + "response" : { + "status" : 200, + "bodyFileName" : "api_apikey_login-4139f91c7e72.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXCrH1YoAMEZuw=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "259", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d210-68f2dcb247a18da6096f18f3;Parent=3351e09ca2d830e3;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:24 GMT", + "Via" : "1.1 3cb4f0364fec17117cb52ac539a5430c.cloudfront.net (CloudFront), 1.1 1b7c94274bd830ddf26396883b21ed8a.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d210000000004c8b8aff442f640c", + "x-amzn-RequestId" : "bf6984ee-1292-4284-ab86-68da0d531eb5", + "X-Amz-Cf-Id" : "ggKVGglyTdPcOxTy-6gQHUAK9iI3qa-gl5HqCrPpOzesXhTvLIv1NQ==", + "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "57532630-927c-3919-a0d7-63a83e6b3ed0", + "persistent" : true, + "scenarioName" : "scenario-1-api-apikey-login", + "requiredScenarioState" : "scenario-1-api-apikey-login-4", + "newScenarioState" : "scenario-1-api-apikey-login-5", + "insertionIndex" : 131 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-684178b0ab9d.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-684178b0ab9d.json new file mode 100644 index 00000000..8b8a9bdb --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-684178b0ab9d.json @@ -0,0 +1,37 @@ +{ + "id" : "28cf2c95-3206-316f-939f-74d5e43caa3c", + "name" : "api_apikey_login", + "request" : { + "url" : "/api/apikey/login", + "method" : "POST" + }, + "response" : { + "status" : 200, + "bodyFileName" : "api_apikey_login-684178b0ab9d.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXEAE0hIAMEN_Q=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "259", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d219-71253c4e506e4a7947864732;Parent=3bd8be87000e5343;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:33 GMT", + "Via" : "1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront), 1.1 a44b410ee30a39bfebd24cea78fab2b0.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d21900000000719be83d517acd00", + "x-amzn-RequestId" : "d92a299f-74fa-4820-8f0b-fb0365fd8331", + "X-Amz-Cf-Id" : "k5s1K-dtngbSlv43UW0hKdbO_tCi1E9wlWsEYde1i5stLwfd6O6j3g==", + "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "28cf2c95-3206-316f-939f-74d5e43caa3c", + "persistent" : true, + "scenarioName" : "scenario-1-api-apikey-login", + "requiredScenarioState" : "scenario-1-api-apikey-login-8", + "insertionIndex" : 121 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-9611c99e6aac.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-9611c99e6aac.json new file mode 100644 index 00000000..417b5b06 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-9611c99e6aac.json @@ -0,0 +1,38 @@ +{ + "id" : "058e04ef-a97a-3a53-8129-7fc92d2dacd5", + "name" : "api_apikey_login", + "request" : { + "url" : "/api/apikey/login", + "method" : "POST" + }, + "response" : { + "status" : 200, + "bodyFileName" : "api_apikey_login-9611c99e6aac.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXC_GH_IAMEEfw=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "259", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d212-54d6989d24d3b0970b860d75;Parent=178fc312bd1c2944;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:26 GMT", + "Via" : "1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront), 1.1 d1feda220715f81c20b2cca33054d72a.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d212000000004ce3d30800e5905e", + "x-amzn-RequestId" : "76fef457-9295-4146-b295-0fcd16a1a715", + "X-Amz-Cf-Id" : "RdJ96709nNYgQvFSEyD0XxbP2lmSZO634x8qoqe-1PONWFMRXaG-bg==", + "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "058e04ef-a97a-3a53-8129-7fc92d2dacd5", + "persistent" : true, + "scenarioName" : "scenario-1-api-apikey-login", + "requiredScenarioState" : "scenario-1-api-apikey-login-5", + "newScenarioState" : "scenario-1-api-apikey-login-6", + "insertionIndex" : 128 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-a7fce53bd211.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-a7fce53bd211.json new file mode 100644 index 00000000..d14b3a9e --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-a7fce53bd211.json @@ -0,0 +1,38 @@ +{ + "id" : "dafe0989-f7a8-3d93-a37b-4e88227a2425", + "name" : "api_apikey_login", + "request" : { + "url" : "/api/apikey/login", + "method" : "POST" + }, + "response" : { + "status" : 200, + "bodyFileName" : "api_apikey_login-a7fce53bd211.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXBuEjeIAMEoWA=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "259", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d20a-6fe2e1c81e8436bf350a6d90;Parent=3e3aa626a4ed2918;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:18 GMT", + "Via" : "1.1 1cb50957fd77e1eaad139f90b2e44564.cloudfront.net (CloudFront), 1.1 f39ba1c0189814d2897853765b8684b2.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d20a00000000328b1fb874b22495", + "x-amzn-RequestId" : "b0e5f526-399c-4690-8af8-ea2a6f63f480", + "X-Amz-Cf-Id" : "MN5c5XBw2VvbGJhjz_UbWpOPK5qKEhA-pYNbZVYBqJekJsreM0TI6w==", + "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "dafe0989-f7a8-3d93-a37b-4e88227a2425", + "persistent" : true, + "scenarioName" : "scenario-1-api-apikey-login", + "requiredScenarioState" : "scenario-1-api-apikey-login-2", + "newScenarioState" : "scenario-1-api-apikey-login-3", + "insertionIndex" : 137 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fd38313bebba.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fd38313bebba.json new file mode 100644 index 00000000..349b8a9e --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fd38313bebba.json @@ -0,0 +1,38 @@ +{ + "id" : "8713c97f-9d52-324e-a859-b43f5e7f596c", + "name" : "api_apikey_login", + "request" : { + "url" : "/api/apikey/login", + "method" : "POST" + }, + "response" : { + "status" : 200, + "bodyFileName" : "api_apikey_login-fd38313bebba.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXDyGceoAMEibQ=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "259", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d217-1313d2cf6f97731228ee95ad;Parent=391239ba8d8316c4;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:31 GMT", + "Via" : "1.1 1cb50957fd77e1eaad139f90b2e44564.cloudfront.net (CloudFront), 1.1 9a94d1d050cdaaed2e0186e2da88b14c.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d2170000000061abd59cc184ebd1", + "x-amzn-RequestId" : "67e9196b-4272-4875-a71a-ed3b5868db47", + "X-Amz-Cf-Id" : "e4LBeQe47mxQpyg3ewMWhB1NwKSwaUHhmLqzzNyZbXi4ExXRbvVPvA==", + "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "8713c97f-9d52-324e-a859-b43f5e7f596c", + "persistent" : true, + "scenarioName" : "scenario-1-api-apikey-login", + "requiredScenarioState" : "scenario-1-api-apikey-login-7", + "newScenarioState" : "scenario-1-api-apikey-login-8", + "insertionIndex" : 123 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fda483633056.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fda483633056.json new file mode 100644 index 00000000..e2cd318e --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/api_apikey_login-fda483633056.json @@ -0,0 +1,38 @@ +{ + "id" : "7fc7ac7d-600b-3bb1-bbeb-6c20419871bd", + "name" : "api_apikey_login", + "request" : { + "url" : "/api/apikey/login", + "method" : "POST" + }, + "response" : { + "status" : 200, + "bodyFileName" : "api_apikey_login-fda483633056.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXDQFIgoAMEvWQ=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "259", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d214-2cd07d1772c84d2909e43107;Parent=515a17fbfa890994;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:28 GMT", + "Via" : "1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront), 1.1 e02f538931b17b78287c925d3a647504.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d214000000003783962a661571fc", + "x-amzn-RequestId" : "ab54225a-603e-48e8-83dd-1af3e8286785", + "X-Amz-Cf-Id" : "x975G8mDgfBC-efvRtpt8u-YVVSTnd2yYsmZLEEn4gXaxeZSEfyLhA==", + "etag" : "W/\"103-nNkLTyyrgUiaGMR62zMegGj/5Ig\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "7fc7ac7d-600b-3bb1-bbeb-6c20419871bd", + "persistent" : true, + "scenarioName" : "scenario-1-api-apikey-login", + "requiredScenarioState" : "scenario-1-api-apikey-login-6", + "newScenarioState" : "scenario-1-api-apikey-login-7", + "insertionIndex" : 126 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-1185d0796fb3.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-1185d0796fb3.json new file mode 100644 index 00000000..4c124322 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-1185d0796fb3.json @@ -0,0 +1,45 @@ +{ + "id" : "eecc14b6-db38-3599-a246-dfa88a5f3369", + "name" : "v1_experiment", + "request" : { + "url" : "/v1/experiment", + "method" : "POST", + "headers" : { + "Content-Type" : { + "equalTo" : "application/json" + } + }, + "bodyPatterns" : [ { + "equalToJson" : "{\"project_id\":\"f1e858a4-58e3-408f-983f-016760d7fa25\",\"name\":\"scorer-and-classifier-eval\"}", + "ignoreArrayOrder" : true, + "ignoreExtraElements" : false + } ] + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_experiment-1185d0796fb3.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXCTEBZIAMEs8Q=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "460", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d20e-26740e1b52c0f6ee059e5ca6;Parent=5f9b3be8b414e885;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:22 GMT", + "Via" : "1.1 1cb50957fd77e1eaad139f90b2e44564.cloudfront.net (CloudFront), 1.1 e02f538931b17b78287c925d3a647504.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d20e00000000743b62e99266c2b2", + "x-amzn-RequestId" : "2f575965-a65c-451a-a367-68c18fd7d05b", + "X-Amz-Cf-Id" : "af2trR_NgbPDyNjDKkEX0gBSesSTjoRBmctpOuV7h8zdBICjMaoeCQ==", + "etag" : "W/\"1cc-wY6bMr0WYAzBpcNQkHl9XTJwGLo\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "eecc14b6-db38-3599-a246-dfa88a5f3369", + "persistent" : true, + "insertionIndex" : 133 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-2c8bb270c5cf.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-2c8bb270c5cf.json new file mode 100644 index 00000000..aac667da --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-2c8bb270c5cf.json @@ -0,0 +1,45 @@ +{ + "id" : "cc272b70-f30a-3174-9ba3-a81baae8f4ed", + "name" : "v1_experiment", + "request" : { + "url" : "/v1/experiment", + "method" : "POST", + "headers" : { + "Content-Type" : { + "equalTo" : "application/json" + } + }, + "bodyPatterns" : [ { + "equalToJson" : "{\"project_id\":\"f1e858a4-58e3-408f-983f-016760d7fa25\",\"name\":\"classifier-only-eval\"}", + "ignoreArrayOrder" : true, + "ignoreExtraElements" : false + } ] + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_experiment-2c8bb270c5cf.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXCvG5hIAMEqew=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "454", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d211-3f0647211bd7e912248c7f32;Parent=585772df002b596f;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:25 GMT", + "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 9188ac315a73b9d6c346dfcf5866043c.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d2110000000066a77c1b36a9fdff", + "x-amzn-RequestId" : "7e8a597c-6a7c-43f5-b27a-910e3f58ac36", + "X-Amz-Cf-Id" : "xeUuj41sVJh6kF1TvZup12XNOhgba19t4D8QV9SZVIez9YAgT238UQ==", + "etag" : "W/\"1c6-rz81IPhQ4qZNL6lAqhJrzM+gycU\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "cc272b70-f30a-3174-9ba3-a81baae8f4ed", + "persistent" : true, + "insertionIndex" : 130 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-4f1bd5afd18d.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-4f1bd5afd18d.json new file mode 100644 index 00000000..728272dd --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-4f1bd5afd18d.json @@ -0,0 +1,45 @@ +{ + "id" : "b82cb2d9-9920-3347-b016-b1d659c0af6e", + "name" : "v1_experiment", + "request" : { + "url" : "/v1/experiment", + "method" : "POST", + "headers" : { + "Content-Type" : { + "equalTo" : "application/json" + } + }, + "bodyPatterns" : [ { + "equalToJson" : "{\"project_id\":\"f1e858a4-58e3-408f-983f-016760d7fa25\",\"name\":\"classifier-error-eval\"}", + "ignoreArrayOrder" : true, + "ignoreExtraElements" : false + } ] + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_experiment-4f1bd5afd18d.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXEIF7IoAMEbXA=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "455", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d21a-66b823910bcf105c6f698586;Parent=0e932b08eb869015;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:34 GMT", + "Via" : "1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront), 1.1 aedb60fbad7f08567276abe527b7fb22.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d21a000000001bea3c38943af2f0", + "x-amzn-RequestId" : "6504845d-5a89-4221-b197-9f41f9e3259b", + "X-Amz-Cf-Id" : "rfZjZ3HExQoDW6mtFFGqaTfY8oHv422o1-82u1Fqj8RuR5ckxA5VnQ==", + "etag" : "W/\"1c7-Nuu5x5+lc1SsjkxZYjB+5Dmn450\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "b82cb2d9-9920-3347-b016-b1d659c0af6e", + "persistent" : true, + "insertionIndex" : 120 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-80d23f49c06c.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-80d23f49c06c.json new file mode 100644 index 00000000..9e90708c --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-80d23f49c06c.json @@ -0,0 +1,45 @@ +{ + "id" : "a5ae2b98-65c4-3fa3-b2fd-58b8852e2e8f", + "name" : "v1_experiment", + "request" : { + "url" : "/v1/experiment", + "method" : "POST", + "headers" : { + "Content-Type" : { + "equalTo" : "application/json" + } + }, + "bodyPatterns" : [ { + "equalToJson" : "{\"project_id\":\"f1e858a4-58e3-408f-983f-016760d7fa25\",\"name\":\"multi-label-eval\"}", + "ignoreArrayOrder" : true, + "ignoreExtraElements" : false + } ] + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_experiment-80d23f49c06c.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXByGBhIAMEd8A=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "450", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d20b-21aca5612600689b5f182d05;Parent=5978daff80eb7de6;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:19 GMT", + "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 89664692f153569d5d76f7ee89b2e518.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d20b0000000045eef8862a28282e", + "x-amzn-RequestId" : "5cb95ff0-5a69-4084-bed9-5c9ed2f2967c", + "X-Amz-Cf-Id" : "M_PS9I3vGX9q-IYeh99FQ8XV47gtzziCHBgYaGWlJZTxQlKtmn6EuQ==", + "etag" : "W/\"1c2-xdW6/5KUQpOgyFv/Kw/psYXB6lA\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "a5ae2b98-65c4-3fa3-b2fd-58b8852e2e8f", + "persistent" : true, + "insertionIndex" : 136 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-81a6ebbafe29.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-81a6ebbafe29.json new file mode 100644 index 00000000..3e098ee6 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_experiment-81a6ebbafe29.json @@ -0,0 +1,45 @@ +{ + "id" : "88dfcc21-c37b-3d4e-9cf7-01d8d26e7cdc", + "name" : "v1_experiment", + "request" : { + "url" : "/v1/experiment", + "method" : "POST", + "headers" : { + "Content-Type" : { + "equalTo" : "application/json" + } + }, + "bodyPatterns" : [ { + "equalToJson" : "{\"project_id\":\"f1e858a4-58e3-408f-983f-016760d7fa25\",\"name\":\"default-name-eval\"}", + "ignoreArrayOrder" : true, + "ignoreExtraElements" : false + } ] + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_experiment-81a6ebbafe29.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXDZEnHoAMElQg=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "451", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d215-18b6a8be368d6a2b48e280e4;Parent=4210e31acd963bbc;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:29 GMT", + "Via" : "1.1 a7347a5f5a64db5951cd2879c6fd86c8.cloudfront.net (CloudFront), 1.1 aedb60fbad7f08567276abe527b7fb22.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d2150000000061e0529576243763", + "x-amzn-RequestId" : "b7b2de2e-893d-4644-9fab-f800105d0171", + "X-Amz-Cf-Id" : "Rh6Z30cHxTokA9lEY2Ki0NziaYupibRAWnT9BYMsMUMoLJ3Y_GdqNw==", + "etag" : "W/\"1c3-dkvmbzbnogEoefH3HNfcb+4qbp8\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "88dfcc21-c37b-3d4e-9cf7-01d8d26e7cdc", + "persistent" : true, + "insertionIndex" : 125 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-30867e7088bd.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-30867e7088bd.json new file mode 100644 index 00000000..f05070e2 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-30867e7088bd.json @@ -0,0 +1,44 @@ +{ + "id" : "6606a3c7-099c-3495-b32d-59502385df8a", + "name" : "v1_project", + "request" : { + "urlPath" : "/v1/project", + "method" : "GET", + "queryParameters" : { + "project_name" : { + "hasExactly" : [ { + "equalTo" : "java-unit-test" + } ] + } + } + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_project-30867e7088bd.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXD2GdlIAMEZ9Q=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "361", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d218-0e8b14c94f499dea67b23891;Parent=32eb08ae9c40f8c4;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:32 GMT", + "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 193b73e60a0ed559f0dfa5eb247e5b34.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d21800000000481708e0aaf04435", + "x-amzn-RequestId" : "bb03e88e-1edb-478d-9e64-40d6709282b8", + "X-Amz-Cf-Id" : "O-vZuMFFSx5g20nPrUGL1uofv5a7lfzh-DerUTc9vhOqyf5NeR7S2w==", + "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "6606a3c7-099c-3495-b32d-59502385df8a", + "persistent" : true, + "scenarioName" : "scenario-2-v1-project", + "requiredScenarioState" : "scenario-2-v1-project-8", + "insertionIndex" : 122 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-410ff9f133da.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-410ff9f133da.json new file mode 100644 index 00000000..3b61260c --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-410ff9f133da.json @@ -0,0 +1,45 @@ +{ + "id" : "2c0e6f0a-b1c8-3b66-8435-51de30a1a7dd", + "name" : "v1_project", + "request" : { + "urlPath" : "/v1/project", + "method" : "GET", + "queryParameters" : { + "project_name" : { + "hasExactly" : [ { + "equalTo" : "java-unit-test" + } ] + } + } + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_project-410ff9f133da.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXC6Ep5oAMEAyQ=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "361", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d212-2ec9cc4c7f4206431b5b3f27;Parent=32f0a00b235c198c;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:26 GMT", + "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 96891645583a0d37345ae58fd6592e98.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d2120000000051431f174a009d8f", + "x-amzn-RequestId" : "6478a632-afc0-4e1a-98e9-c970acfa4e12", + "X-Amz-Cf-Id" : "wzlkpDq0BXAz47-OAsOrm72fKP6BTAQLOsLVokBH0jT3pkPBQnjO3w==", + "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "2c0e6f0a-b1c8-3b66-8435-51de30a1a7dd", + "persistent" : true, + "scenarioName" : "scenario-2-v1-project", + "requiredScenarioState" : "scenario-2-v1-project-5", + "newScenarioState" : "scenario-2-v1-project-6", + "insertionIndex" : 129 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-4891298f2969.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-4891298f2969.json new file mode 100644 index 00000000..9d6e550d --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-4891298f2969.json @@ -0,0 +1,45 @@ +{ + "id" : "f0270406-fe88-3aec-a5a7-ee6d2435ae22", + "name" : "v1_project", + "request" : { + "urlPath" : "/v1/project", + "method" : "GET", + "queryParameters" : { + "project_name" : { + "hasExactly" : [ { + "equalTo" : "java-unit-test" + } ] + } + } + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_project-4891298f2969.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXBWHzaIAMEbjg=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "361", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d208-5ab021804fabf735270603a8;Parent=705ce3859c59aff4;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:16 GMT", + "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 25a83a69fd8e833e18790d3971b848a8.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d208000000006d9f9c719887c6dc", + "x-amzn-RequestId" : "2b5c1be0-8edb-4f38-b151-3d9ead611d59", + "X-Amz-Cf-Id" : "boyVP5_5Icm6H5Po-pwHjYHv0IXWH-6RkNqIA3t81TmX0Ijks7BW0g==", + "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "f0270406-fe88-3aec-a5a7-ee6d2435ae22", + "persistent" : true, + "scenarioName" : "scenario-2-v1-project", + "requiredScenarioState" : "Started", + "newScenarioState" : "scenario-2-v1-project-2", + "insertionIndex" : 140 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-543a07178006.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-543a07178006.json new file mode 100644 index 00000000..b9c1e229 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-543a07178006.json @@ -0,0 +1,45 @@ +{ + "id" : "1c224e28-c695-3150-80a1-b131fa0ac88b", + "name" : "v1_project", + "request" : { + "urlPath" : "/v1/project", + "method" : "GET", + "queryParameters" : { + "project_name" : { + "hasExactly" : [ { + "equalTo" : "java-unit-test" + } ] + } + } + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_project-543a07178006.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXDpGHzIAMEj_A=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "361", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d216-585bcd564752520a2764188a;Parent=6037c8f8c6826bf3;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:31 GMT", + "Via" : "1.1 3cb4f0364fec17117cb52ac539a5430c.cloudfront.net (CloudFront), 1.1 89664692f153569d5d76f7ee89b2e518.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d21600000000245f63f400cfca72", + "x-amzn-RequestId" : "495dee67-c272-46ef-b9cb-2f3e8c440e72", + "X-Amz-Cf-Id" : "Yfnyy-GP_3Sg9RL4nEkJEvuKX4Unn87lXHl6fehYOHwO8XxO8B_NzQ==", + "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "1c224e28-c695-3150-80a1-b131fa0ac88b", + "persistent" : true, + "scenarioName" : "scenario-2-v1-project", + "requiredScenarioState" : "scenario-2-v1-project-7", + "newScenarioState" : "scenario-2-v1-project-8", + "insertionIndex" : 124 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-c0742bb3c63f.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-c0742bb3c63f.json new file mode 100644 index 00000000..55948026 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-c0742bb3c63f.json @@ -0,0 +1,45 @@ +{ + "id" : "02e68d25-de8f-38c9-b80e-6c8c4390e791", + "name" : "v1_project", + "request" : { + "urlPath" : "/v1/project", + "method" : "GET", + "queryParameters" : { + "project_name" : { + "hasExactly" : [ { + "equalTo" : "java-unit-test" + } ] + } + } + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_project-c0742bb3c63f.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXDHG52IAMETHQ=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "361", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d213-0331effd0a508caa1931af7b;Parent=48874c626d0a4b18;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:27 GMT", + "Via" : "1.1 cb45a99b778649cddac95c220851f0ae.cloudfront.net (CloudFront), 1.1 1b7c94274bd830ddf26396883b21ed8a.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d213000000000db05346ecb0692d", + "x-amzn-RequestId" : "a66e7aa5-3773-4927-acd5-78ce5d892a5d", + "X-Amz-Cf-Id" : "gVarBklLPnqMsM0b4B3i5QkmeOes8Jjh7VDWe-Xxx05qAwhSUNsEug==", + "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "02e68d25-de8f-38c9-b80e-6c8c4390e791", + "persistent" : true, + "scenarioName" : "scenario-2-v1-project", + "requiredScenarioState" : "scenario-2-v1-project-6", + "newScenarioState" : "scenario-2-v1-project-7", + "insertionIndex" : 127 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-dd7665d7a48a.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-dd7665d7a48a.json new file mode 100644 index 00000000..22c2cf49 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-dd7665d7a48a.json @@ -0,0 +1,45 @@ +{ + "id" : "0254f542-fbbd-37af-903b-d925ff1ca4af", + "name" : "v1_project", + "request" : { + "urlPath" : "/v1/project", + "method" : "GET", + "queryParameters" : { + "project_name" : { + "hasExactly" : [ { + "equalTo" : "java-unit-test" + } ] + } + } + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_project-dd7665d7a48a.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXCiFhZIAMEFvQ=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "361", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d20f-6094aca74304bcca352d39aa;Parent=2302cf88ea5f8c6c;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:24 GMT", + "Via" : "1.1 3cb4f0364fec17117cb52ac539a5430c.cloudfront.net (CloudFront), 1.1 76fabd50aff5345ed3105adfbd47fb46.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d20f0000000061d12d99d3f12326", + "x-amzn-RequestId" : "034806bd-5e15-4c6e-818f-ed0adc759fca", + "X-Amz-Cf-Id" : "p4k33S4Uh_IiB6N4xpxuw3Vy3_Fq9NchymLzp1PLjIgpfOhZLzeyGw==", + "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "0254f542-fbbd-37af-903b-d925ff1ca4af", + "persistent" : true, + "scenarioName" : "scenario-2-v1-project", + "requiredScenarioState" : "scenario-2-v1-project-4", + "newScenarioState" : "scenario-2-v1-project-5", + "insertionIndex" : 132 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e74886687a34.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e74886687a34.json new file mode 100644 index 00000000..8a5fb7c9 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e74886687a34.json @@ -0,0 +1,45 @@ +{ + "id" : "b0b988b5-0416-376e-8651-152095d5b692", + "name" : "v1_project", + "request" : { + "urlPath" : "/v1/project", + "method" : "GET", + "queryParameters" : { + "project_name" : { + "hasExactly" : [ { + "equalTo" : "java-unit-test" + } ] + } + } + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_project-e74886687a34.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXCBFx1IAMEUUw=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "361", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d20c-70666faf0263b2ce1db6eb04;Parent=73bbc66fd7eb898d;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:20 GMT", + "Via" : "1.1 3cb4f0364fec17117cb52ac539a5430c.cloudfront.net (CloudFront), 1.1 1cb50957fd77e1eaad139f90b2e44564.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d20c0000000025b845fca788ff05", + "x-amzn-RequestId" : "8bfdc679-2ece-4279-abc0-0581a4f1fc34", + "X-Amz-Cf-Id" : "QDXJ7_8f7PjYmzPSpKFri0AYY9mrej39EYYwGwllS6pCe6Wj-mRIVw==", + "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "b0b988b5-0416-376e-8651-152095d5b692", + "persistent" : true, + "scenarioName" : "scenario-2-v1-project", + "requiredScenarioState" : "scenario-2-v1-project-3", + "newScenarioState" : "scenario-2-v1-project-4", + "insertionIndex" : 135 +} \ No newline at end of file diff --git a/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e7e35e493e43.json b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e7e35e493e43.json new file mode 100644 index 00000000..91a2f6b6 --- /dev/null +++ b/test-harness/src/testFixtures/resources/cassettes/braintrust/mappings/v1_project-e7e35e493e43.json @@ -0,0 +1,45 @@ +{ + "id" : "d2f428e5-f001-3e3a-9b24-5e1d7065518c", + "name" : "v1_project", + "request" : { + "urlPath" : "/v1/project", + "method" : "GET", + "queryParameters" : { + "project_name" : { + "hasExactly" : [ { + "equalTo" : "java-unit-test" + } ] + } + } + }, + "response" : { + "status" : 200, + "bodyFileName" : "v1_project-e7e35e493e43.json", + "headers" : { + "X-Cache" : "Miss from cloudfront", + "expires" : "0", + "x-amz-apigw-id" : "eBXBpHbzIAMEgmQ=", + "vary" : "Origin, Accept-Encoding", + "x-amzn-Remapped-content-length" : "361", + "X-Amz-Cf-Pop" : [ "MNL51-P1", "MNL51-P1" ], + "X-Amzn-Trace-Id" : "Root=1-6a16d20a-4a08de754676dbd2118c8c50;Parent=17ce211499fdfc49;Sampled=0;Lineage=1:24be3d11:0", + "Date" : "Wed, 27 May 2026 11:14:18 GMT", + "Via" : "1.1 3cb4f0364fec17117cb52ac539a5430c.cloudfront.net (CloudFront), 1.1 9188ac315a73b9d6c346dfcf5866043c.cloudfront.net (CloudFront)", + "access-control-expose-headers" : "x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms,x-bt-internal-trace-id", + "access-control-allow-credentials" : "true", + "x-bt-internal-trace-id" : "6a16d20a000000001ff48a7a1c2a7cd9", + "x-amzn-RequestId" : "f1b758fe-a590-4aa5-80eb-682722e4379c", + "X-Amz-Cf-Id" : "_h4ILEWdkwzk-3vrZIxLOEjWT6kY-O7ny0r5maDbiQU3RSSF1XNP5Q==", + "etag" : "W/\"169-XiwCuJsCqAZuAH8JspCgkYonnKw\"", + "cache-control" : "no-store, no-cache, must-revalidate, proxy-revalidate", + "surrogate-control" : "no-store", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "d2f428e5-f001-3e3a-9b24-5e1d7065518c", + "persistent" : true, + "scenarioName" : "scenario-2-v1-project", + "requiredScenarioState" : "scenario-2-v1-project-2", + "newScenarioState" : "scenario-2-v1-project-3", + "insertionIndex" : 138 +} \ No newline at end of file