diff --git a/.gitignore b/.gitignore
index 85d9a55a..ad035292 100644
--- a/.gitignore
+++ b/.gitignore
@@ -57,3 +57,6 @@ gemfiles/*.gemfile.lock
 # Ignore local Docker files (for dev env customizations)
 docker-compose.override.yml
 Dockerfile.local
+
+# BTX: cached braintrust-spec downloads (fetched on demand)
+/test/btx/.spec-cache/
diff --git a/Rakefile b/Rakefile
index 68f2f04c..ac882837 100644
--- a/Rakefile
+++ b/Rakefile
@@ -5,7 +5,38 @@ require "rake/testtask"
 desc "Run tests (optionally with seed: rake test[12345])"
 task :test, [:seed] do |t, args|
   seed_opt = args[:seed] ? " -- --seed=#{args[:seed]}" : ""
-  sh "ruby -Ilib:test -e \"Dir.glob('test/**/*_test.rb').each { |f| require_relative f }\"#{seed_opt}"
+  # Exclude the BTX cross-language spec suite — it requires provider gems and
+  # is run separately via `rake test:btx` (under the contrib appraisal).
+  sh "ruby -Ilib:test -e \"Dir.glob('test/**/*_test.rb').reject { |f| f.start_with?('test/btx/') }.each { |f| require_relative f }\"#{seed_opt}"
+end
+
+namespace :test do
+  # BTX: cross-language LLM-span spec suite.
+  #
+  # Requires the openai + anthropic gems, so it runs under the `contrib`
+  # appraisal. Use `rake test:btx` while already inside a gemfile that has the
+  # provider gems (e.g. `bundle exec appraisal contrib rake test:btx`), or
+  # `rake test:btx:ci` which selects the contrib appraisal for you.
+  namespace :btx do
+    desc "Fetch the pinned braintrust-spec into the local cache (idempotent)"
+    task :fetch_spec do
+      # Run the fetch in a clean process before WebMock is loaded so the GitHub
+      # download is not blocked by the test suite's HTTP stubbing.
+      sh "ruby -Itest/btx -e \"require 'spec_fetcher'; puts Braintrust::BTX::SpecFetcher.spec_root\""
+    end
+
+    desc "Run the BTX suite under the contrib appraisal (used by `rake ci`)"
+    task :ci do
+      # Ensure the contrib gemfile (openai + anthropic) is installed, then run.
+      sh "bundle exec appraisal contrib bundle install --quiet"
+      sh "bundle exec appraisal contrib rake test:btx"
+    end
+  end
+
+  desc "Run the BTX cross-language LLM-span spec suite (run under the contrib appraisal)"
+  task btx: :"btx:fetch_spec" do
+    sh "ruby -Ilib:test -e \"require_relative 'test/btx/btx_test.rb'\""
+  end
 end
 
 desc "Run Standard linter"
@@ -91,8 +122,8 @@ task coverage: :test do
   end
 end
 
-desc "Verify CI (lint + test all appraisal scenarios)"
-task ci: [:lint, :"test:appraisal"]
+desc "Verify CI (lint + test all appraisal scenarios + btx spec suite)"
+task ci: [:lint, :"test:appraisal", :"test:btx:ci"]
 
 task default: :ci
 
diff --git a/lib/braintrust/contrib/anthropic/instrumentation/common.rb b/lib/braintrust/contrib/anthropic/instrumentation/common.rb
index 710a4134..9bbcd398 100644
--- a/lib/braintrust/contrib/anthropic/instrumentation/common.rb
+++ b/lib/braintrust/contrib/anthropic/instrumentation/common.rb
@@ -33,10 +33,23 @@ def self.parse_usage_tokens(usage)
               metrics[target] = value.to_i if target
             end
 
-            # Accumulate cache tokens into prompt_tokens (matching TS/Python SDKs)
+            # Cache-creation breakdown. When Anthropic returns the per-TTL
+            # `cache_creation` breakdown, report the granular metrics
+            # (prompt_cache_creation_5m_tokens / _1h_tokens) and drop the
+            # aggregate prompt_cache_creation_tokens — the aggregate is just the
+            # sum of the variants, so reporting both would double count.
+            cache_creation_total = metrics["prompt_cache_creation_tokens"]
+            apply_cache_creation_breakdown(metrics, usage_hash)
+
+            # Accumulate cache tokens into prompt_tokens (matching TS/Python SDKs).
+            # Use the original aggregate total when present, otherwise the
+            # granular breakdown sum.
+            creation_for_prompt = cache_creation_total ||
+              (metrics["prompt_cache_creation_5m_tokens"] || 0) +
+                (metrics["prompt_cache_creation_1h_tokens"] || 0)
             prompt_tokens = (metrics["prompt_tokens"] || 0) +
               (metrics["prompt_cached_tokens"] || 0) +
-              (metrics["prompt_cache_creation_tokens"] || 0)
+              creation_for_prompt
             metrics["prompt_tokens"] = prompt_tokens if prompt_tokens > 0
 
             # Calculate total
@@ -46,6 +59,36 @@ def self.parse_usage_tokens(usage)
 
             metrics
           end
+
+          # Map the nested `cache_creation` breakdown to per-TTL metrics and
+          # remove the now-redundant aggregate. No-op when the breakdown is
+          # absent or carries no positive values.
+          # @param metrics [Hash] metrics accumulated so far (mutated)
+          # @param usage_hash [Hash] raw Anthropic usage hash
+          def self.apply_cache_creation_breakdown(metrics, usage_hash)
+            breakdown = usage_hash["cache_creation"] || usage_hash[:cache_creation]
+            breakdown = breakdown.to_h if breakdown.respond_to?(:to_h)
+            return unless breakdown.is_a?(Hash)
+
+            ttl_map = {
+              "ephemeral_5m_input_tokens" => "prompt_cache_creation_5m_tokens",
+              "ephemeral_1h_input_tokens" => "prompt_cache_creation_1h_tokens"
+            }
+
+            emitted = false
+            ttl_map.each do |source, target|
+              next unless breakdown.key?(source) || breakdown.key?(source.to_sym)
+              value = breakdown[source] || breakdown[source.to_sym]
+              next unless value.is_a?(Numeric)
+              metrics[target] = value.to_i
+              emitted = true
+            end
+
+            # When the per-TTL breakdown is present, drop the aggregate so we do
+            # not double count (spec: "anthropic cache tokens only send 5m or
+            # 1h variants").
+            metrics.delete("prompt_cache_creation_tokens") if emitted
+          end
         end
       end
     end
diff --git a/lib/braintrust/contrib/anthropic/instrumentation/messages.rb b/lib/braintrust/contrib/anthropic/instrumentation/messages.rb
index ab112fc8..76285d19 100644
--- a/lib/braintrust/contrib/anthropic/instrumentation/messages.rb
+++ b/lib/braintrust/contrib/anthropic/instrumentation/messages.rb
@@ -34,6 +34,7 @@ def create(**params)
 
               tracer.in_span("anthropic.messages.create") do |span|
                 metadata = build_metadata(params)
+                Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"})
                 set_input(span, params)
 
                 response = nil
@@ -98,6 +99,13 @@ def build_metadata(params, stream: false)
             def set_input(span, params)
               input_messages = []
 
+              # User/assistant messages come first, then the system prompt is
+              # appended (matching the cross-language spec / backend format).
+              if params[:messages]
+                messages_array = params[:messages].map(&:to_h)
+                input_messages.concat(messages_array)
+              end
+
               if params[:system_]
                 system_content = params[:system_]
                 if system_content.is_a?(Array)
@@ -110,11 +118,6 @@ def set_input(span, params)
                 end
               end
 
-              if params[:messages]
-                messages_array = params[:messages].map(&:to_h)
-                input_messages.concat(messages_array)
-              end
-
               Support::OTel.set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
             end
 
@@ -122,10 +125,10 @@ def set_output(span, response)
               return unless response.respond_to?(:content) && response.content
 
               content_array = response.content.map(&:to_h)
-              output = [{
+              output = {
                 role: response.respond_to?(:role) ? response.role : "assistant",
                 content: content_array
-              }]
+              }
               Support::OTel.set_json_attr(span, "braintrust.output_json", output)
             end
 
@@ -196,7 +199,8 @@ def close
                 metadata = ctx[:metadata]
                 messages_instance = ctx[:messages_instance]
 
-                tracer.in_span("anthropic.messages.create") do |span|
+                tracer.in_span("anthropic.messages.stream") do |span|
+                  Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"})
                   messages_instance.send(:set_input, span, params)
                   Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
                 end
@@ -215,7 +219,8 @@ def trace_consumption(ctx)
               metadata = ctx[:metadata]
               messages_instance = ctx[:messages_instance]
 
-              tracer.in_span("anthropic.messages.create") do |span|
+              tracer.in_span("anthropic.messages.stream") do |span|
+                Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"})
                 messages_instance.send(:set_input, span, params)
                 Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
 
diff --git a/lib/braintrust/contrib/openai/instrumentation/chat.rb b/lib/braintrust/contrib/openai/instrumentation/chat.rb
index ac062290..59cc82c8 100644
--- a/lib/braintrust/contrib/openai/instrumentation/chat.rb
+++ b/lib/braintrust/contrib/openai/instrumentation/chat.rb
@@ -40,6 +40,7 @@ def create(**params)
                 tracer.in_span("Chat Completion") do |span|
                   metadata = build_metadata(params)
 
+                  Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"})
                   set_input(span, params)
 
                   response = nil
@@ -180,6 +181,7 @@ def trace_consumption(ctx)
                 start_time = Braintrust::Internal::Time.measure
 
                 tracer.in_span("Chat Completion") do |span|
+                  Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"})
                   completions_instance.send(:set_input, span, params)
                   Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
 
@@ -252,6 +254,7 @@ def each(&block)
                 time_to_first_token = nil
 
                 tracer.in_span("Chat Completion") do |span|
+                  Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"})
                   completions_instance.send(:set_input, span, params)
                   Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
 
diff --git a/lib/braintrust/contrib/openai/instrumentation/responses.rb b/lib/braintrust/contrib/openai/instrumentation/responses.rb
index 5bb4da60..d95f37ad 100644
--- a/lib/braintrust/contrib/openai/instrumentation/responses.rb
+++ b/lib/braintrust/contrib/openai/instrumentation/responses.rb
@@ -39,6 +39,7 @@ def create(**params)
               tracer.in_span("openai.responses.create") do |span|
                 metadata = build_metadata(params)
 
+                Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"})
                 set_input(span, params)
 
                 response = nil
@@ -140,6 +141,7 @@ def each(&block)
               time_to_first_token = nil
 
               tracer.in_span("openai.responses.create") do |span|
+                Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"})
                 responses_instance.send(:set_input, span, params)
                 Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
 
diff --git a/test/braintrust/contrib/anthropic/instrumentation/beta_messages_test.rb b/test/braintrust/contrib/anthropic/instrumentation/beta_messages_test.rb
index 81a37667..c83ad9e5 100644
--- a/test/braintrust/contrib/anthropic/instrumentation/beta_messages_test.rb
+++ b/test/braintrust/contrib/anthropic/instrumentation/beta_messages_test.rb
@@ -201,7 +201,7 @@ def test_handles_beta_streaming
       # Single span created during consumption
       span = rig.drain_one
 
-      assert_equal "anthropic.messages.create", span.name
+      assert_equal "anthropic.messages.stream", span.name
 
       # Verify input captured on span
       assert span.attributes.key?("braintrust.input_json")
diff --git a/test/braintrust/contrib/anthropic/instrumentation/common_test.rb b/test/braintrust/contrib/anthropic/instrumentation/common_test.rb
index a530424d..bdb6724b 100644
--- a/test/braintrust/contrib/anthropic/instrumentation/common_test.rb
+++ b/test/braintrust/contrib/anthropic/instrumentation/common_test.rb
@@ -57,6 +57,32 @@ def test_handles_cache_creation_tokens
     assert_equal 120, metrics["prompt_tokens"]
   end
 
+  def test_handles_granular_cache_creation_breakdown
+    # When Anthropic returns the per-TTL cache_creation breakdown, report the
+    # granular metrics and drop the aggregate (which would double count).
+    usage = {
+      "input_tokens" => 12,
+      "output_tokens" => 5,
+      "cache_read_input_tokens" => 0,
+      "cache_creation_input_tokens" => 1369,
+      "cache_creation" => {
+        "ephemeral_5m_input_tokens" => 1369,
+        "ephemeral_1h_input_tokens" => 0
+      }
+    }
+
+    metrics = Common.parse_usage_tokens(usage)
+
+    # Both TTL variants present in the breakdown are reported (including zero),
+    # and the aggregate is dropped so the totals are not double counted.
+    assert_equal 1369, metrics["prompt_cache_creation_5m_tokens"]
+    assert_equal 0, metrics["prompt_cache_creation_1h_tokens"]
+    refute metrics.key?("prompt_cache_creation_tokens"), "aggregate dropped when breakdown present"
+    # prompt_tokens still accumulates the creation tokens: 12 + 0 + 1369
+    assert_equal 1381, metrics["prompt_tokens"]
+    assert_equal 1386, metrics["tokens"]
+  end
+
   def test_handles_object_with_to_h
     # SDK returns objects with to_h method
     usage_object = Struct.new(:input_tokens, :output_tokens, keyword_init: true)
diff --git a/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb b/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb
index 5e7140db..8864da6f 100644
--- a/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb
+++ b/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb
@@ -52,9 +52,8 @@ def test_creates_span_for_basic_message
       # Verify braintrust.output_json contains response as message array
       assert span.attributes.key?("braintrust.output_json")
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal "assistant", output[0]["role"]
-      assert output[0]["content"].is_a?(Array)
+      assert_equal "assistant", output["role"]
+      assert output["content"].is_a?(Array)
 
       # Verify braintrust.metadata contains request and response metadata
       assert span.attributes.key?("braintrust.metadata")
@@ -148,24 +147,23 @@ def test_handles_system_prompt
       # Verify span name
       assert_equal "anthropic.messages.create", span.name
 
-      # Verify braintrust.input_json has system prompt prepended
+      # Verify braintrust.input_json has system prompt appended last
       assert span.attributes.key?("braintrust.input_json")
       input = JSON.parse(span.attributes["braintrust.input_json"])
       assert_equal 2, input.length
 
-      # First message should be system
-      assert_equal "system", input[0]["role"]
-      assert_equal "You are a helpful assistant that always responds briefly.", input[0]["content"]
+      # First message should be the user message
+      assert_equal "user", input[0]["role"]
+      assert_equal "Say hello", input[0]["content"]
 
-      # Second message should be user
-      assert_equal "user", input[1]["role"]
-      assert_equal "Say hello", input[1]["content"]
+      # System prompt appended last
+      assert_equal "system", input[1]["role"]
+      assert_equal "You are a helpful assistant that always responds briefly.", input[1]["content"]
 
       # Verify output
       assert span.attributes.key?("braintrust.output_json")
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal "assistant", output[0]["role"]
+      assert_equal "assistant", output["role"]
     end
   end
 
@@ -218,12 +216,11 @@ def test_handles_tool_use
       # Verify output contains tool_use content blocks
       assert span.attributes.key?("braintrust.output_json")
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal "assistant", output[0]["role"]
-      assert output[0]["content"].is_a?(Array)
+      assert_equal "assistant", output["role"]
+      assert output["content"].is_a?(Array)
 
       # Check that we captured tool_use block
-      content = output[0]["content"]
+      content = output["content"]
       tool_use_block = content.find { |block| block["type"] == "tool_use" }
       assert tool_use_block, "Should have tool_use content block"
       assert_equal "get_weather", tool_use_block["name"]
@@ -262,7 +259,7 @@ def test_handles_streaming
       # Single span created during consumption
       span = rig.drain_one
 
-      assert_equal "anthropic.messages.create", span.name
+      assert_equal "anthropic.messages.stream", span.name
 
       # Verify input captured on span
       assert span.attributes.key?("braintrust.input_json")
@@ -309,20 +306,19 @@ def test_handles_streaming_output_aggregation
       # Single span created during consumption
       span = rig.drain_one
 
-      assert_equal "anthropic.messages.create", span.name
+      assert_equal "anthropic.messages.stream", span.name
 
       # CRITICAL: Verify output was aggregated
       assert span.attributes.key?("braintrust.output_json"), "Should have output_json attribute"
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length, "Should have one output message"
-      assert_equal "assistant", output[0]["role"]
+      assert_equal "assistant", output["role"]
 
       # The output content should not be empty!
-      assert output[0]["content"].is_a?(Array), "Output content should be an array"
-      refute_empty output[0]["content"], "Output content should not be empty"
+      assert output["content"].is_a?(Array), "Output content should be an array"
+      refute_empty output["content"], "Output content should not be empty"
 
       # Should have aggregated the text content
-      text_block = output[0]["content"].find { |b| b["type"] == "text" }
+      text_block = output["content"].find { |b| b["type"] == "text" }
       assert text_block, "Should have a text content block"
       assert text_block["text"], "Text block should have text"
       refute_empty text_block["text"], "Text should not be empty"
@@ -400,8 +396,7 @@ def test_handles_vision_with_base64
       # Verify output
       assert span.attributes.key?("braintrust.output_json")
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal "assistant", output[0]["role"]
+      assert_equal "assistant", output["role"]
     end
   end
 
@@ -453,12 +448,11 @@ def test_handles_reasoning_thinking_blocks
       # Verify output includes thinking blocks
       assert span.attributes.key?("braintrust.output_json")
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal "assistant", output[0]["role"]
-      assert output[0]["content"].is_a?(Array)
+      assert_equal "assistant", output["role"]
+      assert output["content"].is_a?(Array)
 
       # Check that thinking blocks are captured
-      output_thinking = output[0]["content"].select { |b| b["type"] == "thinking" }
+      output_thinking = output["content"].select { |b| b["type"] == "thinking" }
       assert output_thinking.length > 0, "Should capture thinking blocks in output"
     end
   end
@@ -510,8 +504,7 @@ def test_handles_multi_turn_conversation
       # Verify output
       assert span.attributes.key?("braintrust.output_json")
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal "assistant", output[0]["role"]
+      assert_equal "assistant", output["role"]
     end
   end
 
@@ -558,8 +551,7 @@ def test_handles_temperature_and_stop_sequences
       # Verify output
       assert span.attributes.key?("braintrust.output_json")
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal "assistant", output[0]["role"]
+      assert_equal "assistant", output["role"]
     end
   end
 
@@ -715,16 +707,15 @@ def test_handles_streaming_with_text_each
       # Single span created during consumption
       span = rig.drain_one
 
-      assert_equal "anthropic.messages.create", span.name
+      assert_equal "anthropic.messages.stream", span.name
 
       # CRITICAL: Verify output was aggregated
       assert span.attributes.key?("braintrust.output_json"), "Should have output_json attribute"
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length, "Should have one output message"
-      assert_equal "assistant", output[0]["role"]
+      assert_equal "assistant", output["role"]
 
       # Should have aggregated the text content
-      text_block = output[0]["content"].find { |b| b["type"] == "text" }
+      text_block = output["content"].find { |b| b["type"] == "text" }
       assert text_block, "Should have a text content block"
       assert text_block["text"], "Text block should have text"
       refute_empty text_block["text"], "Text should not be empty"
@@ -778,16 +769,15 @@ def test_handles_streaming_with_accumulated_text
       # Single span created during consumption
       span = rig.drain_one
 
-      assert_equal "anthropic.messages.create", span.name
+      assert_equal "anthropic.messages.stream", span.name
 
       # CRITICAL: Verify output was aggregated
       assert span.attributes.key?("braintrust.output_json"), "Should have output_json attribute"
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length, "Should have one output message"
-      assert_equal "assistant", output[0]["role"]
+      assert_equal "assistant", output["role"]
 
       # Should have aggregated the text content
-      text_block = output[0]["content"].find { |b| b["type"] == "text" }
+      text_block = output["content"].find { |b| b["type"] == "text" }
       assert text_block, "Should have a text content block"
       assert_equal accumulated_text, text_block["text"], "Aggregated text should match accumulated text"
 
@@ -833,16 +823,15 @@ def test_handles_streaming_with_accumulated_message
       # Single span created during consumption
       span = rig.drain_one
 
-      assert_equal "anthropic.messages.create", span.name
+      assert_equal "anthropic.messages.stream", span.name
 
       # CRITICAL: Verify output was aggregated
       assert span.attributes.key?("braintrust.output_json"), "Should have output_json attribute"
       output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length, "Should have one output message"
-      assert_equal "assistant", output[0]["role"]
+      assert_equal "assistant", output["role"]
 
       # Should have content
-      refute_empty output[0]["content"], "Output content should not be empty"
+      refute_empty output["content"], "Output content should not be empty"
 
       # CRITICAL: Verify metrics were captured
       assert span.attributes.key?("braintrust.metrics"), "Should have metrics attribute"
@@ -882,7 +871,7 @@ def test_handles_streaming_with_close
       # Single span created on close
       span = rig.drain_one
 
-      assert_equal "anthropic.messages.create", span.name
+      assert_equal "anthropic.messages.stream", span.name
 
       # Verify input was captured on span
       assert span.attributes.key?("braintrust.input_json")
diff --git a/test/btx/README.md b/test/btx/README.md
new file mode 100644
index 00000000..c66dcf18
--- /dev/null
+++ b/test/btx/README.md
@@ -0,0 +1,81 @@
+# BTX — cross-language LLM-span spec tests
+
+This suite validates the Ruby SDK's LLM instrumentation against the shared YAML
+specs in [`braintrustdata/braintrust-spec`](https://github.com/braintrustdata/braintrust-spec),
+the same specs used by every other Braintrust SDK.
+
+For each spec file it:
+
+1. Fetches the spec at the pinned ref (`spec-ref.txt`) into `.spec-cache/` (gitignored).
+2. Executes the spec in-process: real provider API calls (OpenAI, Anthropic)
+   wrapped with Braintrust instrumentation, captured via an in-memory OTel
+   exporter, under a single parent span.
+3. Validates the resulting brainstore spans against `expected_brainstore_spans`.
+
+## Running
+
+The suite needs the provider gems, so run it under the `contrib` appraisal:
+
+```bash
+# Replay from committed cassettes (no API keys, no network) — how CI runs:
+bundle exec appraisal contrib rake test:btx
+
+# Record cassettes (real API calls; requires OPENAI_API_KEY / ANTHROPIC_API_KEY):
+VCR_MODE=all bundle exec appraisal contrib rake test:btx
+
+# Live mode: real calls, flush to Braintrust, validate via BTQL
+# (requires BRAINTRUST_API_KEY and a project):
+VCR_OFF=true bundle exec appraisal contrib rake test:btx
+```
+
+Run a single spec:
+
+```bash
+bundle exec appraisal contrib ruby -Ilib:test \
+  -e "require_relative 'test/btx/btx_test.rb'" -- --name=test_openai_completions
+```
+
+## Layout
+
+| File | Responsibility |
+|---|---|
+| `spec-ref.txt` | Pinned `braintrust-spec` ref to fetch |
+| `spec_fetcher.rb` | Download + cache the spec tarball (pure Ruby) |
+| `spec_loader.rb` | Parse spec YAML, including the `!fn` / `!starts_with` / `!or` / `!gen` tags |
+| `spec_executor.rb` | Make provider API calls under a Braintrust span; capture OTel spans |
+| `span_converter.rb` | Convert in-memory OTel spans → brainstore format (incl. attachment refs) |
+| `span_fetcher.rb` | Live-mode BTQL fetch with retry |
+| `span_validator.rb` | Recursive matcher against `expected_brainstore_spans` |
+| `btx_test.rb` | Minitest runner — one test per spec |
+
+## Modes
+
+| Mode | Trigger | Behaviour |
+|---|---|---|
+| replay (default) | committed cassettes | Replay HTTP; convert in-memory spans; no keys/network |
+| record | `VCR_MODE=all` | Real API calls; write cassettes; validate in-memory |
+| live | `VCR_OFF=true` | Real API calls; flush to Braintrust; validate via BTQL |
+
+Cassettes live in `test/fixtures/vcr_cassettes/btx/<provider>/<spec>.yml` and are
+scrubbed of API keys by the shared VCR config in `test/test_helper.rb`.
+
+## Coverage / known gaps
+
+Pinned spec ref: see `spec-ref.txt` (currently `v0.0.7`).
+
+- Providers covered: `openai` (completions, streaming, tools, reasoning,
+  attachments) and `anthropic` (messages, streaming, attachments,
+  prompt_caching_5m, prompt_caching_1h).
+- `bedrock` and `google` specs are **skipped at runtime** with a clear reason —
+  the Ruby SDK has no instrumentation for them. The set of instrumentable
+  `[provider, endpoint]` pairs lives in `SpecExecutor::SUPPORTED_ENDPOINTS`; add
+  to it (plus a `dispatch` branch) when a new integration lands.
+
+Notes:
+
+- The `anthropic/prompt_caching_*` specs interpolate a `!gen vcr_nonce` cache
+  buster. The nonce is **random in live mode** (to force a provider-side cache
+  miss so creation metrics are non-zero) and **deterministic in record/replay**
+  (so the request body matches the committed cassette).
+- The `anthropic-beta` header for the 1h TTL variant is passed through via the
+  spec's top-level `headers`.
diff --git a/test/btx/btx_test.rb b/test/btx/btx_test.rb
new file mode 100644
index 00000000..6b471f61
--- /dev/null
+++ b/test/btx/btx_test.rb
@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+
+# BTX: cross-language LLM-span spec tests for the Braintrust Ruby SDK.
+#
+# Modes (controlled by the VCR_MODE / VCR_OFF env vars used by the rest of the suite):
+#
+#   replay (default): provider HTTP replayed from cassettes; spans captured
+#     in-memory and converted to brainstore format for validation. No API keys
+#     or network access required.
+#
+#   record (VCR_MODE=all|new_episodes): real provider API calls recorded to
+#     cassettes; spans still validated in-memory.
+#
+#   live (VCR_OFF=true): real provider API calls; spans flushed to Braintrust
+#     and fetched back via BTQL for validation.
+
+require_relative "../test_helper"
+
+require_relative "spec_fetcher"
+require_relative "spec_loader"
+require_relative "span_converter"
+require_relative "span_validator"
+require_relative "span_fetcher"
+require_relative "cross_check"
+require_relative "spec_executor"
+
+module Braintrust
+  module BTX
+    module_function
+
+    def live_mode?
+      ENV["VCR_OFF"] == "true"
+    end
+
+    def cassette_name(spec)
+      "btx/#{spec.provider}/#{spec.name}"
+    end
+
+    # Load every spec in the pinned ref. Specs the SDK cannot instrument are not
+    # filtered out here — they are defined as tests and skipped at run time with
+    # a clear reason, so they remain visible in the test output.
+    def load_all_specs
+      root = SpecFetcher.spec_root
+      SpecLoader.load_specs(root)
+    end
+  end
+end
+
+class BtxTest < Minitest::Test
+  include ::Test::Support::ProviderHelper
+
+  # Build one test method per spec so failures are isolated and filterable.
+  Braintrust::BTX.load_all_specs.each do |spec|
+    test_name = "test_#{spec.provider}_#{spec.name}"
+    define_method(test_name) do
+      run_spec(spec)
+    end
+  end
+
+  private
+
+  def run_spec(spec)
+    unless Braintrust::BTX::SpecExecutor.supported?(spec)
+      skip "#{spec.display_name}: SDK has no instrumentation for " \
+        "provider=#{spec.provider} endpoint=#{spec.endpoint}"
+    end
+
+    skip_unless_provider_available!(spec.provider)
+
+    state = build_state
+    live = Braintrust::BTX.live_mode?
+    executor = Braintrust::BTX::SpecExecutor.new(state, live: live)
+
+    result = with_cassette(spec) { executor.execute(spec) }
+
+    # The in-memory OTel spans are converted to brainstore format in every mode.
+    converted = Braintrust::BTX::SpanConverter.to_brainstore_spans(result.otel_spans)
+
+    if live
+      run_spec_live(spec, result, state, converted)
+    else
+      refute_empty converted, "#{spec.display_name}: no spans captured"
+      Braintrust::BTX::SpanValidator.validate_spans(converted, spec)
+    end
+  end
+
+  # Live mode validates three ways so a passing live run also guarantees the
+  # in-memory path is correct:
+  #   1. the converted in-memory spans satisfy the spec,
+  #   2. the live brainstore spans (via BTQL) satisfy the spec,
+  #   3. the converted spans match the live spans (lenient subset cross-check).
+  def run_spec_live(spec, result, state, converted)
+    refute_empty converted, "#{spec.display_name}: no in-memory spans captured"
+
+    # 1. In-memory spans must independently pass the spec.
+    begin
+      Braintrust::BTX::SpanValidator.validate_spans(converted, spec)
+    rescue Braintrust::BTX::ValidationError => e
+      flunk "#{spec.display_name}: in-memory spans failed spec validation in live mode " \
+        "(the converter/instrumentation diverged from the spec):\n#{e.message}"
+    end
+
+    # 2. Authoritative live spans must pass the spec.
+    live_spans = fetch_live_spans(spec, result, state)
+    refute_empty live_spans, "#{spec.display_name}: no live spans fetched"
+    Braintrust::BTX::SpanValidator.validate_spans(live_spans, spec)
+
+    # 3. In-memory conversion must be consistent with what the backend stored.
+    Braintrust::BTX::CrossCheck.assert_matches(converted, live_spans, spec.display_name)
+  end
+
+  def with_cassette(spec)
+    return yield if Braintrust::BTX.live_mode?
+
+    VCR.use_cassette(Braintrust::BTX.cassette_name(spec), match_requests_on: [:method, :uri, :body]) do
+      yield
+    end
+  end
+
+  def fetch_live_spans(spec, result, state)
+    fetcher = Braintrust::BTX::SpanFetcher.new(api_url: state.api_url, api_key: state.api_key)
+    project_id = Braintrust::BTX::SpanFetcher.project_id_for(project_name, api_url: state.api_url, api_key: state.api_key)
+    fetcher.fetch(result.root_span_id, project_id, spec.expected_brainstore_spans.length)
+  end
+
+  # The Braintrust project BTX logs to (and reads back from) in live mode.
+  PROJECT_NAME = "ruby-unit-test"
+
+  def project_name
+    PROJECT_NAME
+  end
+
+  def build_state
+    if Braintrust::BTX.live_mode?
+      Braintrust.init(
+        api_key: get_braintrust_key,
+        set_global: false,
+        blocking_login: true,
+        default_project: project_name
+      )
+    else
+      get_unit_test_state(default_project: project_name)
+    end
+  end
+
+  def skip_unless_provider_available!(provider)
+    case provider
+    when "openai"
+      if Gem.loaded_specs["ruby-openai"]
+        skip "official openai gem not available (found ruby-openai)"
+      end
+      skip "openai gem not available" unless Gem.loaded_specs["openai"]
+    when "anthropic"
+      skip "anthropic gem not available" unless Gem.loaded_specs["anthropic"]
+    end
+  end
+end
diff --git a/test/btx/cross_check.rb b/test/btx/cross_check.rb
new file mode 100644
index 00000000..2d25fcfb
--- /dev/null
+++ b/test/btx/cross_check.rb
@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+
+require "json"
+
+module Braintrust
+  module BTX
+    # Raised when the locally-converted in-memory spans diverge from the
+    # authoritative brainstore spans returned by BTQL in live mode.
+    class CrossCheckError < StandardError; end
+
+    # Cross-checks the in-memory OTel->brainstore conversion against the real
+    # brainstore spans fetched from the backend (live mode only).
+    #
+    # This mirrors the Java SpanFetcher.assertConverterMatchesBrainstore: a
+    # passing live run should also guarantee that the in-memory converter
+    # produces spans consistent with what the backend actually stored.
+    #
+    # The comparison is intentionally lenient — it asserts that every concrete
+    # value the converter produced also appears (equal) in the corresponding
+    # real span, skipping:
+    #   - nil values on either side (don't-care / backend-omitted)
+    #   - "id" fields (dynamic, non-deterministic)
+    #   - metrics values (token counts vary run-to-run; only key presence + type)
+    #   - braintrust_attachment references (converter has the data URL form,
+    #     the backend stores an uploaded reference; both are valid)
+    module CrossCheck
+      module_function
+
+      # Assert the converted spans are a lenient subset of the real spans.
+      #
+      # @param converted [Array<Hash>] spans from SpanConverter.to_brainstore_spans
+      # @param real [Array<Hash>] spans fetched via BTQL
+      # @param display_name [String] spec id for error messages
+      # @raise [CrossCheckError] if the conversion is inconsistent with the backend
+      def assert_matches(converted, real, display_name)
+        if converted.length != real.length
+          raise CrossCheckError,
+            "#{display_name}: in-memory converter produced #{converted.length} span(s) " \
+            "but brainstore returned #{real.length}.\n" \
+            "Converted:\n#{pretty(converted)}\n\nBrainstore:\n#{pretty(real)}"
+        end
+
+        real_by_name = index_by_name(real)
+        errors = []
+
+        converted.each_with_index do |conv, i|
+          name = (conv["span_attributes"] || {})["name"] || conv["name"]
+          real_span = real_by_name[name] || real[i]
+          ctx = "converted[#{name || i}]"
+
+          # "name" is a synthetic top-level field added by the converter for
+          # spec-assertion convenience; the real span keeps it in
+          # span_attributes.name. "metrics" are checked separately (presence only).
+          conv_subset = conv.reject { |k, _| k == "name" || k == "metrics" }
+          assert_subset(conv_subset, real_span, ctx, errors)
+          assert_metrics_keys_present(conv, real_span, ctx, errors)
+        end
+
+        unless errors.empty?
+          raise CrossCheckError,
+            "#{display_name}: in-memory spans do not match live brainstore spans:\n" +
+              errors.join("\n")
+        end
+      end
+
+      # ---- internals ----
+
+      def index_by_name(spans)
+        spans.each_with_object({}) do |span, acc|
+          attrs = span["span_attributes"] || {}
+          name = attrs["name"] || span["name"]
+          acc[name] = span if name
+        end
+      end
+
+      # Every concrete value in +subset+ must appear (equal) in +superset+,
+      # recursively. Lenient per the rules documented above.
+      def assert_subset(subset, superset, ctx, errors)
+        return if subset.nil?
+        return if superset.nil? # backend may omit/transform certain fields
+
+        # If one side is a Hash and the other isn't, the backend likely
+        # transformed the shape — skip rather than fail (matches Java).
+        return if subset.is_a?(Hash) != superset.is_a?(Hash)
+
+        if subset.is_a?(Array)
+          unless superset.is_a?(Array)
+            errors << "#{ctx}: expected an array but brainstore has #{superset.class}"
+            return
+          end
+          subset.each_with_index do |item, i|
+            break if i >= superset.length
+            assert_subset(item, superset[i], "#{ctx}[#{i}]", errors)
+          end
+          return
+        end
+
+        unless subset.is_a?(Hash)
+          # Scalar leaves: strings may vary across runs (model text), so only
+          # assert non-null. Numbers/booleans are deterministic — exact match.
+          if subset.is_a?(String)
+            errors << "#{ctx}: expected non-null string, got nil" if superset.nil?
+          elsif subset != superset
+            errors << "#{ctx}: converted=#{subset.inspect} but brainstore=#{superset.inspect}"
+          end
+          return
+        end
+
+        # Both hashes.
+        if attachment?(subset)
+          # Converter logs a data-URL-derived attachment; backend stores an
+          # uploaded reference. Both are valid — just require the backend also
+          # produced an attachment reference.
+          unless attachment?(superset)
+            errors << "#{ctx}: converted is a braintrust_attachment but brainstore is #{superset.inspect}"
+          end
+          return
+        end
+
+        subset.each do |key, val|
+          next if val.nil?
+          next if key == "id" # dynamic / non-deterministic
+          assert_subset(val, superset[key], "#{ctx}.#{key}", errors)
+        end
+      end
+
+      # Every metric key the converter produced must appear as a non-null
+      # number in the real span (when the backend reports it). Token counts are
+      # non-deterministic, so we check presence + type, not equality.
+      def assert_metrics_keys_present(conv, real_span, ctx, errors)
+        conv_metrics = conv["metrics"]
+        return unless conv_metrics.is_a?(Hash)
+        real_metrics = real_span["metrics"]
+        return unless real_metrics.is_a?(Hash) # backend may omit metrics
+
+        conv_metrics.each do |key, val|
+          next if val.nil?
+          real_val = real_metrics[key]
+          next if real_val.nil? # backend may compute differently; skip
+          unless real_val.is_a?(Numeric)
+            errors << "#{ctx}.metrics.#{key}: expected a number but brainstore has #{real_val.class}"
+          end
+        end
+      end
+
+      def attachment?(hash)
+        hash.is_a?(Hash) && hash["type"] == "braintrust_attachment"
+      end
+
+      def pretty(obj)
+        JSON.pretty_generate(obj)
+      rescue
+        obj.inspect
+      end
+    end
+  end
+end
diff --git a/test/btx/span_converter.rb b/test/btx/span_converter.rb
new file mode 100644
index 00000000..bcdf72d4
--- /dev/null
+++ b/test/btx/span_converter.rb
@@ -0,0 +1,180 @@
+# frozen_string_literal: true
+
+require "json"
+require "digest"
+
+module Braintrust
+  module BTX
+    # Converts in-memory OTel SpanData spans into brainstore span format.
+    #
+    # Brainstore spans are the canonical representation used in Braintrust's
+    # storage layer and returned by the BTQL API. The +expected_brainstore_spans+
+    # in the YAML spec files are written against this format.
+    #
+    # The Braintrust SDK stores span payload in OTel span attributes as JSON
+    # strings:
+    #   braintrust.metrics         -> metrics
+    #   braintrust.metadata        -> metadata
+    #   braintrust.span_attributes -> span_attributes (with name injected from the OTel span name)
+    #   braintrust.input_json      -> input
+    #   braintrust.output_json     -> output
+    #
+    # Only LLM instrumentation spans (those carrying braintrust.span_attributes)
+    # are converted; the root wrapper span created by the executor is excluded.
+    #
+    # This mirrors the Java SpanConverter so in-memory (VCR) validation matches
+    # what the backend stores after ingestion.
+    module SpanConverter
+      module_function
+
+      # Convert a list of exported OTel SpanData into brainstore-format hashes.
+      #
+      # @param otel_spans [Array<OpenTelemetry::SDK::Trace::SpanData>]
+      # @return [Array<Hash>] brainstore spans, in input order
+      def to_brainstore_spans(otel_spans)
+        otel_spans
+          .select { |span| llm_instrumentation_span?(span) }
+          .map { |span| to_single_brainstore_span(span) }
+      end
+
+      def llm_instrumentation_span?(span)
+        attrs = span.attributes || {}
+        !attrs["braintrust.span_attributes"].nil?
+      end
+
+      def to_single_brainstore_span(span)
+        result = {}
+        result["name"] = span.name
+        result["metrics"] = parse_json_map(span, "braintrust.metrics")
+        result["metadata"] = parse_json_map(span, "braintrust.metadata")
+        result["input"] = transform_input(parse_json_value(span, "braintrust.input_json"))
+        result["output"] = parse_json_value(span, "braintrust.output_json")
+
+        span_attrs = parse_json_map(span, "braintrust.span_attributes") || {}
+        span_attrs = span_attrs.dup
+        span_attrs["name"] = span.name
+        result["span_attributes"] = span_attrs
+
+        result
+      end
+
+      # Replicate the Braintrust backend's attachment transformation.
+      #
+      # OpenAI image_url.url: "data:mime;base64,..." -> {type: braintrust_attachment, ...}
+      # OpenAI file.file_data: "data:mime;base64,..." -> {type: braintrust_attachment, ...}
+      # Anthropic source: {type: base64, media_type, data} -> {type: braintrust_attachment, ...}
+      def transform_input(input)
+        case input
+        when Array
+          input.map { |item| transform_input_item(item) }
+        when Hash
+          # Google-style {contents: [...]}, not used by openai/anthropic but
+          # handled for completeness.
+          if input["contents"].is_a?(Array)
+            dup = input.dup
+            dup["contents"] = input["contents"].map { |item| transform_input_item(item) }
+            dup
+          else
+            input
+          end
+        else
+          input
+        end
+      end
+
+      def transform_input_item(item)
+        return item unless item.is_a?(Hash)
+
+        msg = item.dup
+        if msg["content"].is_a?(Array)
+          msg["content"] = msg["content"].map { |part| transform_content_part(part) }
+        end
+        msg
+      end
+
+      def transform_content_part(part)
+        return part unless part.is_a?(Hash)
+
+        type = part["type"]
+
+        # Anthropic: {type: image|document, source: {type: base64, media_type, data}}
+        if (type == "image" || type == "document") && part["source"].is_a?(Hash)
+          source = part["source"]
+          if source["type"] == "base64"
+            mime = source["media_type"] || "application/octet-stream"
+            data = source["data"]
+            if data
+              new_part = part.dup
+              new_part["source"] = to_attachment("data:#{mime};base64,#{data}")
+              return new_part
+            end
+          end
+          return part
+        end
+
+        # OpenAI image_url: {type: image_url, image_url: {url: "data:..."}}
+        if type == "image_url" && part["image_url"].is_a?(Hash)
+          image_url = part["image_url"]
+          url = image_url["url"]
+          if url.is_a?(String) && url.start_with?("data:")
+            new_part = part.dup
+            new_image_url = image_url.dup
+            new_image_url["url"] = to_attachment(url)
+            new_part["image_url"] = new_image_url
+            return new_part
+          end
+          return part
+        end
+
+        # OpenAI file: {type: file, file: {filename, file_data: "data:..."}}
+        if type == "file" && part["file"].is_a?(Hash)
+          file = part["file"]
+          file_data = file["file_data"]
+          if file_data.is_a?(String) && file_data.start_with?("data:")
+            new_part = part.dup
+            new_file = file.dup
+            new_file["file_data"] = to_attachment(file_data)
+            new_part["file"] = new_file
+            return new_part
+          end
+          return part
+        end
+
+        part
+      end
+
+      # Build a braintrust_attachment reference from a data URL.
+      def to_attachment(data_url)
+        content_type = "application/octet-stream"
+        data = data_url
+        if data_url.start_with?("data:")
+          semicolon = data_url.index(";")
+          comma = data_url.index(",")
+          content_type = data_url[5...semicolon] if semicolon && semicolon > 5
+          data = data_url[(comma + 1)..] if comma
+        end
+        ext = content_type.include?("/") ? content_type.split("/").last : "bin"
+        key = "attachment-#{Digest::SHA256.hexdigest(data.to_s)[0, 12]}.#{ext}"
+        {
+          "type" => "braintrust_attachment",
+          "content_type" => content_type,
+          "filename" => key,
+          "key" => key
+        }
+      end
+
+      def parse_json_map(span, attr_key)
+        value = parse_json_value(span, attr_key)
+        value.is_a?(Hash) ? value : nil
+      end
+
+      def parse_json_value(span, attr_key)
+        json = (span.attributes || {})[attr_key]
+        return nil if json.nil?
+        JSON.parse(json)
+      rescue JSON::ParserError => e
+        raise "Failed to parse #{attr_key} as JSON: #{json} (#{e.message})"
+      end
+    end
+  end
+end
diff --git a/test/btx/span_fetcher.rb b/test/btx/span_fetcher.rb
new file mode 100644
index 00000000..04f5e056
--- /dev/null
+++ b/test/btx/span_fetcher.rb
@@ -0,0 +1,113 @@
+# frozen_string_literal: true
+
+require "net/http"
+require "json"
+require "uri"
+
+module Braintrust
+  module BTX
+    # Fetches brainstore spans from the Braintrust backend via the BTQL HTTP API
+    # (live mode). Retries with a fixed interval until all expected spans are
+    # available (their output/metrics fields indexed).
+    class SpanFetcher
+      RETRY_INTERVAL = 30 # seconds
+      MAX_WAIT = 600 # seconds
+
+      def initialize(api_url:, api_key:)
+        @api_url = api_url
+        @api_key = api_key
+      end
+
+      # Resolve a project id from its name via the BTQL/projects API.
+      def self.project_id_for(name, api_url:, api_key:)
+        uri = URI("#{api_url}/v1/project?project_name=#{URI.encode_www_form_component(name)}")
+        req = Net::HTTP::Get.new(uri)
+        req["Authorization"] = "Bearer #{api_key}"
+        res = http_request(uri, req)
+        raise "Failed to resolve project #{name.inspect}: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
+        body = JSON.parse(res.body)
+        objects = body["objects"] || body
+        proj = objects.is_a?(Array) ? objects.first : objects
+        proj && (proj["id"] || proj.dig("project", "id"))
+      end
+
+      def self.http_request(uri, req)
+        http = Net::HTTP.new(uri.host, uri.port)
+        http.use_ssl = (uri.scheme == "https")
+        http.request(req)
+      end
+
+      # Fetch +num_expected+ child spans for +root_span_id+, retrying until ready.
+      #
+      # @return [Array<Hash>] brainstore spans (excluding root + scorer spans)
+      def fetch(root_span_id, project_id, num_expected)
+        total_wait = 0
+        loop do
+          spans = try_fetch(root_span_id, project_id)
+          ready = spans.select { |s| span_ready?(s) }
+          return spans if ready.length >= num_expected && spans.length >= num_expected
+
+          if total_wait >= MAX_WAIT
+            raise "BTX span fetch timed out after #{MAX_WAIT}s for root_span_id=#{root_span_id} " \
+              "(got #{spans.length} spans, #{ready.length} ready, expected #{num_expected})"
+          end
+          sleep(RETRY_INTERVAL)
+          total_wait += RETRY_INTERVAL
+        end
+      end
+
+      private
+
+      def try_fetch(root_span_id, project_id)
+        payload = build_query(root_span_id, project_id)
+        uri = URI("#{@api_url}/btql")
+        req = Net::HTTP::Post.new(uri)
+        req["Content-Type"] = "application/json"
+        req["Authorization"] = "Bearer #{@api_key}"
+        req.body = JSON.dump(payload)
+
+        res = self.class.http_request(uri, req)
+        raise "BTQL HTTP #{res.code}: #{res.body}" unless res.is_a?(Net::HTTPSuccess)
+
+        rows = JSON.parse(res.body)["data"] || []
+        # Filter scorer spans injected by the backend.
+        rows.reject { |s| (s["span_attributes"] || {})["purpose"] == "scorer" }
+      end
+
+      def span_ready?(span)
+        !span["output"].nil? || !span["metrics"].nil?
+      end
+
+      def build_query(root_span_id, project_id)
+        {
+          query: {
+            select: [{op: "star"}],
+            from: {
+              op: "function",
+              name: {op: "ident", name: ["project_logs"]},
+              args: [{op: "literal", value: project_id}]
+            },
+            filter: {
+              op: "and",
+              left: {
+                op: "eq",
+                left: {op: "ident", name: ["root_span_id"]},
+                right: {op: "literal", value: root_span_id}
+              },
+              right: {
+                op: "ne",
+                left: {op: "ident", name: ["span_parents"]},
+                right: {op: "literal", value: nil}
+              }
+            },
+            sort: [{expr: {op: "ident", name: ["created"]}, dir: "asc"}],
+            limit: 1000
+          },
+          use_columnstore: true,
+          use_brainstore: true,
+          brainstore_realtime: true
+        }
+      end
+    end
+  end
+end
diff --git a/test/btx/span_validator.rb b/test/btx/span_validator.rb
new file mode 100644
index 00000000..6f8c7142
--- /dev/null
+++ b/test/btx/span_validator.rb
@@ -0,0 +1,249 @@
+# frozen_string_literal: true
+
+require "json"
+require_relative "spec_loader"
+
+module Braintrust
+  module BTX
+    # Raised when fetched/in-memory spans do not match the spec.
+    class ValidationError < StandardError; end
+
+    # Recursively validates brainstore spans against a spec's
+    # expected_brainstore_spans. All failures are collected before raising so a
+    # single run shows every mismatch.
+    module SpanValidator
+      module_function
+
+      # ---- Named predicates (mirror is_* functions in the other SDKs) ----
+
+      def non_negative_number?(value)
+        value.is_a?(Numeric) && !value.is_a?(TrueClass) && !value.is_a?(FalseClass) && value >= 0
+      end
+
+      def positive_number?(value)
+        value.is_a?(Numeric) && value > 0
+      end
+
+      def non_empty_string?(value)
+        value.is_a?(String) && !value.empty?
+      end
+
+      def undefined_or_null?(value)
+        value.nil?
+      end
+
+      # A list (possibly empty) of {type: summary_text, text: <non-empty>} hashes.
+      def reasoning_message?(value)
+        return false unless value.is_a?(Array)
+        return true if value.empty?
+
+        value.all? do |item|
+          item.is_a?(Hash) &&
+            item["type"] == "summary_text" &&
+            item["text"].is_a?(String) && !item["text"].strip.empty?
+        end
+      end
+
+      NAMED_MATCHERS = {
+        "is_non_negative_number" => :non_negative_number?,
+        "is_positive_number" => :positive_number?,
+        "is_non_empty_string" => :non_empty_string?,
+        "is_reasoning_message" => :reasoning_message?,
+        "undefined_or_null" => :undefined_or_null?
+      }.freeze
+
+      # Resolve a FnMatcher to a callable taking the actual value.
+      #
+      # Named predicates dispatch to dedicated methods. Lambda expressions from
+      # the spec are Python-style ("lambda value: ...") — since Ruby cannot eval
+      # those, we translate the common case ("X in value") and otherwise fall
+      # back to a non-null/non-empty check.
+      def resolve_fn(matcher)
+        expr = matcher.expr
+        if NAMED_MATCHERS.key?(expr)
+          meth = NAMED_MATCHERS[expr]
+          return ->(v) { send(meth, v) }
+        end
+
+        # Python lambda like: lambda value: "Paris" in value
+        if (m = expr.match(/\Alambda\s+\w+:\s*"(.+)"\s+in\s+\w+\z/))
+          needle = m[1]
+          return ->(v) { v.is_a?(String) && v.include?(needle) }
+        end
+        if (m = expr.match(/\Alambda\s+\w+:\s*'(.+)'\s+in\s+\w+\z/))
+          needle = m[1]
+          return ->(v) { v.is_a?(String) && v.include?(needle) }
+        end
+
+        # Unknown expression: loose "non-null and non-empty" check.
+        ->(v) { !v.nil? && v != "" && v != [] && v != {} }
+      end
+
+      # ---- Public API ----
+
+      # Validate +actual_spans+ against +spec.expected_brainstore_spans+.
+      #
+      # @param actual_spans [Array<Hash>] brainstore-format spans (string keys)
+      # @param spec [LlmSpanSpec]
+      # @raise [ValidationError] with every mismatch if validation fails
+      def validate_spans(actual_spans, spec)
+        expected_spans = spec.expected_brainstore_spans
+
+        llm_spans = actual_spans.select do |s|
+          attrs = s["span_attributes"] || {}
+          attrs["type"] == "llm"
+        end
+
+        llm_spans = llm_spans.sort_by do |s|
+          (s["span_attributes"] || {})["exec_counter"] || 0
+        end
+
+        if llm_spans.length < expected_spans.length
+          raise ValidationError,
+            "#{spec.display_name}: expected at least #{expected_spans.length} LLM span(s), " \
+            "got #{llm_spans.length}.\nAll captured spans:\n#{pretty(actual_spans)}"
+        end
+
+        all_errors = []
+
+        expected_spans.each_with_index do |expected_span, i|
+          actual_span = llm_spans[i]
+          span_errors = []
+          expected_span.each do |key, exp_val|
+            if actual_span.key?(key)
+              validate_value(actual_span[key], exp_val, "span[#{i}].#{key}", span_errors)
+            elsif optional?(exp_val)
+              validate_value(nil, exp_val, "span[#{i}].#{key}", span_errors)
+            else
+              span_errors << "  span[#{i}].#{key}: key not found in actual span"
+            end
+          end
+
+          unless span_errors.empty?
+            name = (actual_span["span_attributes"] || {})["name"] || "?"
+            all_errors << "\n--- Span #{i} (#{name}) ---\n" +
+              span_errors.join("\n") +
+              "\n\nFull span JSON:\n#{pretty(actual_span)}"
+          end
+        end
+
+        unless all_errors.empty?
+          raise ValidationError,
+            "#{spec.display_name}: span validation failed:\n" + all_errors.join("\n")
+        end
+      end
+
+      # Recursively validate +actual+ against +expected+, appending to +errors+.
+      def validate_value(actual, expected, path, errors)
+        case expected
+        when OrMatcher
+          validate_or(actual, expected, path, errors)
+        when FnMatcher
+          validate_fn(actual, expected, path, errors)
+        when StartsWithMatcher
+          unless actual.is_a?(String) && actual.start_with?(expected.prefix)
+            errors << "#{path}: expected string starting with #{expected.prefix.inspect}, got #{actual.inspect}"
+          end
+        when GenMatcher
+          # Generated values are placeholders; accept whatever is present.
+          nil
+        when nil
+          # don't care
+          nil
+        when Hash
+          validate_hash(actual, expected, path, errors)
+        when Array
+          validate_array(actual, expected, path, errors)
+        else
+          if actual != expected
+            errors << "#{path}: expected=#{expected.inspect}, actual=#{actual.inspect}"
+          end
+        end
+      end
+
+      def validate_or(actual, expected, path, errors)
+        or_errors = []
+        matched = expected.alternatives.each_with_index.any? do |alt, i|
+          alt_errors = []
+          validate_value(actual, alt, path, alt_errors)
+          if alt_errors.empty?
+            true
+          else
+            or_errors << "  alternative[#{i}]: #{alt_errors.join("; ")}"
+            false
+          end
+        end
+        return if matched
+
+        errors << "#{path}: none of #{expected.alternatives.length} OR alternatives matched:\n" +
+          or_errors.join("\n")
+      end
+
+      def validate_fn(actual, expected, path, errors)
+        fn = resolve_fn(expected)
+        begin
+          result = fn.call(actual)
+        rescue => e
+          errors << "#{path}: validator raised #{e.class}: #{e.message} (actual=#{actual.inspect})"
+          return
+        end
+        unless result
+          errors << "#{path}: validator #{expected.expr.inspect} returned false for actual=#{actual.inspect}"
+        end
+      end
+
+      def validate_hash(actual, expected, path, errors)
+        unless actual.is_a?(Hash)
+          errors << "#{path}: expected hash, got #{actual.class} (#{actual.inspect})"
+          return
+        end
+        expected.each do |key, exp_val|
+          if actual.key?(key)
+            validate_value(actual[key], exp_val, "#{path}.#{key}", errors)
+          elsif optional?(exp_val)
+            # An absent key is equivalent to a null value — validate accordingly
+            # (e.g. !fn undefined_or_null is satisfied by a missing key).
+            validate_value(nil, exp_val, "#{path}.#{key}", errors)
+          else
+            errors << "#{path}.#{key}: key not found in actual span"
+          end
+        end
+      end
+
+      # Whether a missing key is acceptable for this expected value: a literal
+      # nil (don't-care) or a matcher that accepts nil.
+      def optional?(expected)
+        return true if expected.nil?
+        expected.is_a?(FnMatcher) && resolve_fn(expected).call(nil)
+      rescue
+        false
+      end
+
+      def validate_array(actual, expected, path, errors)
+        unless actual.is_a?(Array)
+          # Single-item list vs object: when expected is a one-element list of a
+          # hash and actual is a hash, validate actual against expected[0].
+          if expected.length == 1 && expected[0].is_a?(Hash) && actual.is_a?(Hash)
+            validate_value(actual, expected[0], "#{path}[0]", errors)
+            return
+          end
+          errors << "#{path}: expected array, got #{actual.class} (#{actual.inspect})"
+          return
+        end
+        if actual.length < expected.length
+          errors << "#{path}: list too short — expected at least #{expected.length} elements, got #{actual.length}"
+          return
+        end
+        expected.each_with_index do |exp_item, i|
+          validate_value(actual[i], exp_item, "#{path}[#{i}]", errors)
+        end
+      end
+
+      def pretty(obj)
+        JSON.pretty_generate(obj)
+      rescue
+        obj.inspect
+      end
+    end
+  end
+end
diff --git a/test/btx/spec-ref.txt b/test/btx/spec-ref.txt
new file mode 100644
index 00000000..41a28195
--- /dev/null
+++ b/test/btx/spec-ref.txt
@@ -0,0 +1 @@
+v0.0.7
diff --git a/test/btx/spec_executor.rb b/test/btx/spec_executor.rb
new file mode 100644
index 00000000..a9bfe427
--- /dev/null
+++ b/test/btx/spec_executor.rb
@@ -0,0 +1,296 @@
+# frozen_string_literal: true
+
+require "opentelemetry/sdk"
+require "securerandom"
+require "braintrust"
+require_relative "spec_loader"
+
+module Braintrust
+  module BTX
+    # Result of executing a spec: the root span id plus the captured OTel spans.
+    ExecutionResult = Struct.new(:root_span_id, :otel_spans, keyword_init: true)
+
+    # Executes BTX llm_span specs in-process using the Braintrust Ruby SDK.
+    #
+    # All provider API calls for a spec are made under a single parent ("root")
+    # span. Spans are always captured in-memory via an InMemorySpanExporter so
+    # they can be converted to brainstore format. In live mode (+live: true+) a
+    # real OTLP exporter is *also* attached so spans are ingested into Braintrust
+    # and can be fetched back via BTQL. The returned root_span_id (hex trace id)
+    # is used in live mode to locate those spans.
+    class SpecExecutor
+      # The [provider, endpoint] pairs the Ruby SDK can instrument. Specs whose
+      # provider/endpoint is not in this set are skipped by the runner (the SDK
+      # has no instrumentation to exercise, e.g. bedrock and google).
+      SUPPORTED_ENDPOINTS = [
+        ["openai", "/v1/chat/completions"],
+        ["openai", "/v1/responses"],
+        ["anthropic", "/v1/messages"]
+      ].freeze
+
+      # @return [Boolean] whether the SDK can instrument this spec
+      def self.supported?(spec)
+        SUPPORTED_ENDPOINTS.include?([spec.provider, spec.endpoint])
+      end
+
+      # @param state [Braintrust::State] state used for span attribution
+      # @param live [Boolean] when true, also export spans to the Braintrust backend
+      def initialize(state, live: false)
+        @state = state
+        @live = live
+      end
+
+      # Execute +spec+ and return the captured spans.
+      #
+      # @param spec [LlmSpanSpec]
+      # @return [ExecutionResult]
+      def execute(spec)
+        exporter = OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter.new
+        tracer_provider = OpenTelemetry::SDK::Trace::TracerProvider.new
+
+        simple_processor = OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new(exporter)
+        bt_processor = Braintrust::Trace::SpanProcessor.new(simple_processor, @state)
+        tracer_provider.add_span_processor(bt_processor)
+
+        # Live mode: also ship spans to the Braintrust backend via OTLP so they
+        # can be queried back through BTQL.
+        if @live
+          otlp = Braintrust::Trace::SpanExporter.new(
+            endpoint: "#{@state.api_url}/otel/v1/traces",
+            api_key: @state.api_key
+          )
+          batch = OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(otlp)
+          tracer_provider.add_span_processor(Braintrust::Trace::SpanProcessor.new(batch, @state))
+        end
+
+        Braintrust::Contrib.init(tracer_provider: tracer_provider)
+        instrument!(spec.provider)
+
+        client = build_client(spec.provider)
+
+        tracer = tracer_provider.tracer("btx")
+        root_span_id = nil
+        tracer.in_span(spec.name) do |root_span|
+          root_span_id = root_span.context.hex_trace_id
+          dispatch(spec, client)
+        end
+
+        tracer_provider.force_flush
+        spans = exporter.finished_spans
+
+        ExecutionResult.new(root_span_id: root_span_id, otel_spans: spans)
+      end
+
+      private
+
+      def instrument!(provider)
+        case provider
+        when "openai"
+          require "openai"
+          Braintrust::Contrib::OpenAI::Integration.patch!
+        when "anthropic"
+          require "anthropic"
+          Braintrust::Contrib::Anthropic::Integration.patch!
+        else
+          raise NotImplementedError, "BTX executor: provider #{provider.inspect} not implemented"
+        end
+      end
+
+      def build_client(provider)
+        case provider
+        when "openai"
+          ::OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"] || "sk-test-key-for-vcr")
+        when "anthropic"
+          ::Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"] || "sk-ant-test-key-for-vcr")
+        else
+          raise NotImplementedError, "BTX executor: provider #{provider.inspect} not implemented"
+        end
+      end
+
+      def dispatch(spec, client)
+        # Spec-level features applied uniformly across every provider path:
+        #   - variables / !gen placeholders interpolated into {{...}} templates
+        #   - top-level headers passed through unchanged via request_options
+        vars = resolve_variables(spec.variables)
+        request_options = build_request_options(spec.headers)
+        requests = spec.requests.map { |req| interpolate(deep_symbolize(req), vars) }
+
+        case [spec.provider, spec.endpoint]
+        when ["openai", "/v1/chat/completions"]
+          execute_chat_completions(requests, client, request_options)
+        when ["openai", "/v1/responses"]
+          execute_responses(requests, client, request_options)
+        when ["anthropic", "/v1/messages"]
+          execute_anthropic_messages(requests, client, request_options)
+        else
+          raise NotImplementedError,
+            "BTX executor: provider=#{spec.provider.inspect} endpoint=#{spec.endpoint.inspect} not implemented"
+        end
+      end
+
+      # ---- OpenAI chat completions ----
+
+      def execute_chat_completions(requests, client, request_options)
+        history = []
+
+        requests.each do |req|
+          full = req.dup
+          messages = full.delete(:messages) || []
+          full[:messages] = history + messages
+          full[:request_options] = request_options if request_options
+
+          streaming = full.delete(:stream)
+
+          history += messages
+
+          if streaming
+            # Keep stream_options (e.g. include_usage) so the snapshot carries usage.
+            stream = client.chat.completions.stream(**full)
+            final = nil
+            stream.each { |_event| } # consume
+            final = stream.current_completion_snapshot if stream.respond_to?(:current_completion_snapshot)
+            if final&.choices&.any?
+              msg = final.choices.first.message
+              history << {role: "assistant", content: msg.content || ""}
+            end
+          else
+            response = client.chat.completions.create(**full)
+            if response.choices&.any?
+              msg = response.choices.first.message
+              history << {role: "assistant", content: msg.content || ""}
+            end
+          end
+        end
+      end
+
+      # ---- OpenAI responses ----
+
+      def execute_responses(requests, client, request_options)
+        history = []
+
+        requests.each do |req|
+          full = req.dup
+          input = full.delete(:input) || []
+          full[:input] = history + input
+          full[:request_options] = request_options if request_options
+
+          response = client.responses.create(**full)
+
+          history += input
+          if response.respond_to?(:output) && response.output
+            history += response.output.map { |item| item.respond_to?(:to_h) ? item.to_h : item }
+          end
+        end
+      end
+
+      # ---- Anthropic messages ----
+
+      def execute_anthropic_messages(requests, client, request_options)
+        history = []
+
+        requests.each do |req|
+          full = req.dup
+          messages = full.delete(:messages) || []
+          full[:messages] = history + messages
+
+          # The official anthropic Ruby gem names the system param `system_`.
+          if full.key?(:system)
+            full[:system_] = full.delete(:system)
+          end
+
+          # Pass the spec's headers through unchanged (e.g. anthropic-beta).
+          full[:request_options] = request_options if request_options
+
+          streaming = full.delete(:stream)
+
+          history += messages
+
+          if streaming
+            stream = client.messages.stream(**full)
+            stream.each { |_event| } # consume
+            if stream.respond_to?(:accumulated_message)
+              msg = stream.accumulated_message
+              text = text_from_anthropic(msg)
+              history << {role: "assistant", content: text} if text
+            end
+          else
+            response = client.messages.create(**full)
+            text = text_from_anthropic(response)
+            history << {role: "assistant", content: text} if text
+          end
+        end
+      end
+
+      def text_from_anthropic(message)
+        return nil unless message.respond_to?(:content) && message.content
+        blocks = message.content.filter_map do |block|
+          block.text if block.respond_to?(:text)
+        end
+        blocks.empty? ? nil : blocks.join(" ")
+      end
+
+      # Recursively convert string keys to symbols (the Ruby provider SDKs
+      # expect symbol-keyed kwargs). Resolves !gen placeholders to a value.
+      def deep_symbolize(value)
+        case value
+        when Hash
+          value.each_with_object({}) do |(k, v), acc|
+            acc[k.to_sym] = deep_symbolize(v)
+          end
+        when Array
+          value.map { |v| deep_symbolize(v) }
+        when GenMatcher
+          generated_value(value.name)
+        else
+          value
+        end
+      end
+
+      def generated_value(name)
+        case name
+        when "vcr_nonce"
+          # In live mode (no cassette) the nonce must be unique to force a
+          # provider-side cache miss so prompt-cache creation metrics are
+          # non-zero. In record/replay the nonce must be deterministic so the
+          # request body matches the committed cassette.
+          @live ? "btx-#{SecureRandom.hex(8)}" : "btx-nonce"
+        else
+          "btx-#{name}"
+        end
+      end
+
+      # Resolve the spec's `variables` map (which may contain !gen placeholders)
+      # into concrete string values keyed by variable name.
+      # @param variables [Hash] raw variables map from the spec
+      # @return [Hash{String=>String}]
+      def resolve_variables(variables)
+        (variables || {}).each_with_object({}) do |(name, value), acc|
+          acc[name.to_s] = (value.is_a?(GenMatcher) ? generated_value(value.name) : value).to_s
+        end
+      end
+
+      # Substitute {{var}} templates in every string within +obj+ using +vars+.
+      def interpolate(obj, vars)
+        return obj if vars.empty?
+
+        case obj
+        when Hash
+          obj.transform_values { |v| interpolate(v, vars) }
+        when Array
+          obj.map { |v| interpolate(v, vars) }
+        when String
+          obj.gsub(/\{\{\s*([\w-]+)\s*\}\}/) { vars[$1] || $~[0] }
+        else
+          obj
+        end
+      end
+
+      # Build the anthropic gem request_options for the spec's headers, or nil
+      # when there are none. The headers MUST be passed through unchanged.
+      def build_request_options(headers)
+        return nil if headers.nil? || headers.empty?
+        {extra_headers: headers.transform_keys(&:to_s)}
+      end
+    end
+  end
+end
diff --git a/test/btx/spec_fetcher.rb b/test/btx/spec_fetcher.rb
new file mode 100644
index 00000000..1ae33daa
--- /dev/null
+++ b/test/btx/spec_fetcher.rb
@@ -0,0 +1,115 @@
+# frozen_string_literal: true
+
+require "fileutils"
+require "open-uri"
+require "rubygems/package"
+require "zlib"
+require "tmpdir"
+
+module Braintrust
+  module BTX
+    # Downloads and caches the braintrust-spec tarball at a pinned ref.
+    #
+    # The spec lives in braintrustdata/braintrust-spec and is fetched as a
+    # GitHub source tarball. The top-level directory (e.g. "braintrust-spec-af0e006/")
+    # is stripped during extraction so the cache contains "test/llm_span/" directly.
+    #
+    # Fetching is idempotent: if the cache already contains the spec, no network
+    # call is made. This makes repeated local runs instant.
+    module SpecFetcher
+      BTX_DIR = File.expand_path(__dir__)
+      SPEC_REF_FILE = File.join(BTX_DIR, "spec-ref.txt")
+      SPEC_CACHE_DIR = File.join(BTX_DIR, ".spec-cache")
+
+      module_function
+
+      # @return [String] the pinned spec ref (e.g. "v0.0.1")
+      def spec_ref
+        File.read(SPEC_REF_FILE).strip
+      end
+
+      # Resolve the llm_span spec root, fetching the tarball if needed.
+      #
+      # Honors the BTX_SPEC_ROOT environment variable as an override (used by CI
+      # environments that pre-download the spec separately).
+      #
+      # @return [String] absolute path to the test/llm_span directory
+      def spec_root
+        env = ENV["BTX_SPEC_ROOT"]
+        return env if env && !env.empty?
+
+        fetch_if_needed(spec_ref)
+      end
+
+      # Download braintrust-spec@ref into the local cache; skip if already present.
+      #
+      # @param ref [String] the spec ref to fetch
+      # @return [String] absolute path to the test/llm_span directory
+      def fetch_if_needed(ref)
+        cache_dir = File.join(SPEC_CACHE_DIR, ref)
+        llm_span_root = File.join(cache_dir, "test", "llm_span")
+
+        return llm_span_root if File.directory?(llm_span_root)
+
+        FileUtils.mkdir_p(SPEC_CACHE_DIR)
+        warn "[btx] Fetching braintrust-spec@#{ref} ..."
+
+        url = "https://github.com/braintrustdata/braintrust-spec/archive/#{ref}.tar.gz"
+
+        # Extract into a unique temp dir next to the final cache_dir so the
+        # eventual rename is atomic (same filesystem).
+        tmp_dir = Dir.mktmpdir("#{ref}.tmp.", SPEC_CACHE_DIR)
+        begin
+          extract_tarball(url, tmp_dir)
+
+          begin
+            File.rename(tmp_dir, cache_dir)
+          rescue SystemCallError
+            # Another process beat us to it; that's fine as long as the spec exists.
+            raise unless File.directory?(llm_span_root)
+          end
+        ensure
+          FileUtils.rm_rf(tmp_dir) if File.directory?(tmp_dir)
+        end
+
+        unless File.directory?(llm_span_root)
+          raise "Expected llm_span dir not found after fetch: #{llm_span_root}"
+        end
+
+        warn "[btx] Spec cached at #{llm_span_root}"
+        llm_span_root
+      end
+
+      # Download the tarball at +url+ and extract it into +dest_dir+, stripping
+      # the top-level directory component.
+      def extract_tarball(url, dest_dir)
+        URI.open(url, "rb") do |remote| # rubocop:disable Security/Open
+          Zlib::GzipReader.wrap(remote) do |gz|
+            Gem::Package::TarReader.new(gz) do |tar|
+              tar.each do |entry|
+                rel = strip_top_level(entry.full_name)
+                next if rel.nil? || rel.empty?
+
+                dest = File.join(dest_dir, rel)
+
+                if entry.directory?
+                  FileUtils.mkdir_p(dest)
+                elsif entry.file?
+                  FileUtils.mkdir_p(File.dirname(dest))
+                  File.binwrite(dest, entry.read)
+                end
+              end
+            end
+          end
+        end
+      end
+
+      # Strip the leading path component (the GitHub archive top-level dir).
+      def strip_top_level(name)
+        parts = name.split("/")
+        return nil if parts.length <= 1
+        parts[1..].join("/")
+      end
+    end
+  end
+end
diff --git a/test/btx/spec_loader.rb b/test/btx/spec_loader.rb
new file mode 100644
index 00000000..3d0fed02
--- /dev/null
+++ b/test/btx/spec_loader.rb
@@ -0,0 +1,131 @@
+# frozen_string_literal: true
+
+require "psych"
+
+module Braintrust
+  module BTX
+    # Matcher value-objects produced by the spec's custom YAML tags.
+    #
+    # The spec uses three custom tags:
+    #   !fn <name-or-expr>  — named predicate or Ruby lambda expression
+    #   !starts_with <prefix> — string prefix check
+    #   !or [...]           — at-least-one-of validator
+    #
+    # These are parsed into distinct matcher objects (not strings) so the
+    # validator can dispatch on type.
+    FnMatcher = Struct.new(:expr)
+    StartsWithMatcher = Struct.new(:prefix)
+    OrMatcher = Struct.new(:alternatives)
+    # !gen <name> — a runtime-generated value (e.g. a per-run nonce). The
+    # executor substitutes these before making API calls.
+    GenMatcher = Struct.new(:name)
+
+    # Value object representing a single llm_span_test spec file.
+    LlmSpanSpec = Struct.new(
+      :name, :type, :provider, :endpoint, :requests,
+      :expected_brainstore_spans, :source_path, :variables, :headers,
+      keyword_init: true
+    ) do
+      # @return [String] test id, "<provider>/<name>"
+      def display_name
+        "#{provider}/#{name}"
+      end
+    end
+
+    # Loads BTX llm_span spec YAML files, handling the custom tags.
+    module SpecLoader
+      module_function
+
+      # Load all specs under +root+, optionally filtered to +providers+.
+      #
+      # @param root [String] path to the test/llm_span directory
+      # @param providers [Array<String>, nil] allow-list of provider dir names
+      # @return [Array<LlmSpanSpec>] sorted by file path for determinism
+      def load_specs(root, providers: nil)
+        unless File.directory?(root)
+          raise "BTX spec root not found: #{root}"
+        end
+
+        yaml_paths = Dir.glob(File.join(root, "**", "*.yaml")).sort
+
+        yaml_paths.filter_map do |path|
+          provider_dir = File.basename(File.dirname(path))
+          next if providers && !providers.include?(provider_dir)
+
+          data = parse_file(path)
+          next unless data.is_a?(Hash)
+
+          LlmSpanSpec.new(
+            name: data["name"],
+            type: data["type"],
+            provider: data["provider"],
+            endpoint: data["endpoint"],
+            requests: data["requests"] || [],
+            expected_brainstore_spans: data["expected_brainstore_spans"] || [],
+            source_path: path,
+            variables: data["variables"] || {},
+            headers: data["headers"] || {}
+          )
+        end
+      end
+
+      # Parse a single YAML file, converting custom tags into matcher objects.
+      #
+      # @param path [String] file path
+      # @return [Object] parsed structure with matcher objects substituted
+      def parse_file(path)
+        ast = Psych.parse(File.read(path), filename: path)
+        return nil if ast.nil?
+        convert(ast.root)
+      end
+
+      # Recursively convert a Psych AST node into Ruby values, intercepting
+      # the BTX custom tags.
+      def convert(node)
+        case node
+        when Psych::Nodes::Scalar
+          convert_scalar(node)
+        when Psych::Nodes::Sequence
+          convert_sequence(node)
+        when Psych::Nodes::Mapping
+          convert_mapping(node)
+        when Psych::Nodes::Alias
+          # Anchors/aliases are not used by the spec; fall back to nil.
+          nil
+        end
+      end
+
+      def convert_scalar(node)
+        case node.tag
+        when "!fn"
+          FnMatcher.new(node.value)
+        when "!starts_with"
+          StartsWithMatcher.new(node.value)
+        when "!gen"
+          GenMatcher.new(node.value)
+        else
+          # Use Psych's scalar coercion for proper typing (int, float, bool, nil).
+          node.to_ruby
+        end
+      end
+
+      def convert_sequence(node)
+        items = node.children.map { |child| convert(child) }
+        if node.tag == "!or"
+          OrMatcher.new(items)
+        else
+          items
+        end
+      end
+
+      def convert_mapping(node)
+        result = {}
+        node.children.each_slice(2) do |key_node, value_node|
+          key = convert(key_node)
+          result[key] = convert(value_node)
+        end
+        result
+      end
+    end
+  end
+end
diff --git a/test/fixtures/vcr_cassettes/btx/anthropic/attachments.yml b/test/fixtures/vcr_cassettes/btx/anthropic/attachments.yml
new file mode 100644
index 00000000..7c99ac8d
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/btx/anthropic/attachments.yml
@@ -0,0 +1,109 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-haiku-4-5-20251001","temperature":0.0,"max_tokens":128,"messages":[{"role":"user","content":[{"type":"text","text":"What
+        color is this image?"},{"type":"image","source":{"type":"base64","media_type":"image/png","data":"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="}}]}]}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - Anthropic::Client/Ruby 1.44.0
+      Host:
+      - api.anthropic.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 1.44.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Anthropic-Version:
+      - '2023-06-01'
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '339'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:22:19 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '4000000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '4000000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2026-06-01T17:22:19Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '800000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '800000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2026-06-01T17:22:19Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '19999'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2026-06-01T17:22:18Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '4800000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '4800000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2026-06-01T17:22:19Z'
+      Request-Id:
+      - req_011CbcvgQ3DrvQ1Z6hfmoqi3
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      Traceresponse:
+      - 00-8ae82550accef20750916419f1ff3f90-5f63dc5611eb10c9-01
+      Server:
+      - cloudflare
+      Vary:
+      - Accept-Encoding
+      Set-Cookie:
+      - _cfuvid=YRe1m_GajwoFqLhgV28KTF4hValLAysWYe7eiVueQp0-1780334538.497454-1.0.1.1-A4HwZjmhlxkU1uSg2uhmGtt7QCskgsN_gixuWtnrxJQ;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com
+      X-Robots-Tag:
+      - none
+      Cf-Cache-Status:
+      - DYNAMIC
+      Content-Security-Policy:
+      - default-src 'none'; frame-ancestors 'none'
+      Cf-Ray:
+      - a04fe6519b80b091-SEA
+    body:
+      encoding: ASCII-8BIT
+      string: '{"model":"claude-haiku-4-5-20251001","id":"msg_01KMF8ky5x2PFEhJhZVUUEe8","type":"message","role":"assistant","content":[{"type":"text","text":"This
+        image appears to be **red** (or a reddish color). It looks like a small red
+        dot or mark against a white background."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":17,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":33,"service_tier":"standard","inference_geo":"not_available"}}'
+  recorded_at: Mon, 01 Jun 2026 17:22:19 GMT
+recorded_with: VCR 6.4.0
diff --git a/test/fixtures/vcr_cassettes/btx/anthropic/messages.yml b/test/fixtures/vcr_cassettes/btx/anthropic/messages.yml
new file mode 100644
index 00000000..4eb0f9ce
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/btx/anthropic/messages.yml
@@ -0,0 +1,108 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-haiku-4-5-20251001","temperature":0.0,"max_tokens":128,"messages":[{"role":"user","content":"What
+        is the capital of France?"}],"system":"You are a helpful assistant."}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - Anthropic::Client/Ruby 1.44.0
+      Host:
+      - api.anthropic.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 1.44.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Anthropic-Version:
+      - '2023-06-01'
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '184'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:22:22 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '4000000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '4000000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2026-06-01T17:22:21Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '800000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '800000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2026-06-01T17:22:21Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '19999'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2026-06-01T17:22:21Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '4800000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '4800000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2026-06-01T17:22:21Z'
+      Request-Id:
+      - req_011CbcvgcsdxnYwKYUFvYc21
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      Traceresponse:
+      - 00-b5c5d602a6d44f0faebeee18902ea9bd-9aa287742084cef8-01
+      Server:
+      - cloudflare
+      Vary:
+      - Accept-Encoding
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - _cfuvid=hQgcZCH_pKRM_5EXW22y1ozWUVZlf2sBxxqFZEzIv_k-1780334541.499324-1.0.1.1-kafQfBsBhejFxnTJzd8DgzxQyxHSEGatOKQrxYpDzdw;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com
+      Content-Security-Policy:
+      - default-src 'none'; frame-ancestors 'none'
+      X-Robots-Tag:
+      - none
+      Cf-Ray:
+      - a04fe66458e8dede-SEA
+    body:
+      encoding: ASCII-8BIT
+      string: '{"model":"claude-haiku-4-5-20251001","id":"msg_013CTQEub7RA1HtxPp5FJjoC","type":"message","role":"assistant","content":[{"type":"text","text":"The
+        capital of France is Paris."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":20,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":10,"service_tier":"standard","inference_geo":"not_available"}}'
+  recorded_at: Mon, 01 Jun 2026 17:22:21 GMT
+recorded_with: VCR 6.4.0
diff --git a/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_1h.yml b/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_1h.yml
new file mode 100644
index 00000000..25f9415e
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_1h.yml
@@ -0,0 +1,189 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-sonnet-4-5-20250929","temperature":0.0,"max_tokens":128,"messages":[{"role":"user","content":"What
+        is the capital of France?"}],"system":[{"type":"text","text":"[cache buster:
+        btx-test_runner_client btx-nonce]\nReference material (stable, cached with
+        a 1 hour TTL):\n\nThe following is a condensed atlas of capital cities. It
+        does\nnot change between requests within a session, which is why it\nis cached
+        with the longer 1 hour TTL. Consult it before\nanswering.\n\nEurope:\n  -
+        France: Paris\n  - Germany: Berlin\n  - Italy: Rome\n  - Spain: Madrid\n  -
+        Portugal: Lisbon\n  - United Kingdom: London\n  - Ireland: Dublin\n  - Netherlands:
+        Amsterdam (seat of government: The Hague)\n  - Belgium: Brussels\n  - Luxembourg:
+        Luxembourg City\n  - Switzerland: Bern (de facto; no de jure capital)\n  -
+        Austria: Vienna\n  - Denmark: Copenhagen\n  - Sweden: Stockholm\n  - Norway:
+        Oslo\n  - Finland: Helsinki\n  - Iceland: Reykjavik\n  - Poland: Warsaw\n  -
+        Czechia: Prague\n  - Slovakia: Bratislava\n  - Hungary: Budapest\n  - Romania:
+        Bucharest\n  - Bulgaria: Sofia\n  - Greece: Athens\n  - Ukraine: Kyiv\n  -
+        Belarus: Minsk\n  - Russia: Moscow\n  - Serbia: Belgrade\n  - Croatia: Zagreb\n  -
+        Slovenia: Ljubljana\n  - Bosnia and Herzegovina: Sarajevo\n  - North Macedonia:
+        Skopje\n  - Albania: Tirana\n  - Montenegro: Podgorica\n  - Estonia: Tallinn\n  -
+        Latvia: Riga\n  - Lithuania: Vilnius\n  - Moldova: Chisinau\n  - Malta: Valletta\n\nAsia:\n  -
+        Japan: Tokyo\n  - China: Beijing\n  - South Korea: Seoul\n  - North Korea:
+        Pyongyang\n  - Mongolia: Ulaanbaatar\n  - Vietnam: Hanoi\n  - Thailand: Bangkok\n  -
+        Cambodia: Phnom Penh\n  - Laos: Vientiane\n  - Myanmar: Naypyidaw\n  - Malaysia:
+        Kuala Lumpur\n  - Singapore: Singapore\n  - Indonesia: Jakarta (moving to
+        Nusantara)\n  - Philippines: Manila\n  - India: New Delhi\n  - Pakistan: Islamabad\n  -
+        Bangladesh: Dhaka\n  - Sri Lanka: Sri Jayawardenepura Kotte (commercial: Colombo)\n  -
+        Nepal: Kathmandu\n  - Bhutan: Thimphu\n  - Afghanistan: Kabul\n  - Iran: Tehran\n  -
+        Iraq: Baghdad\n  - Saudi Arabia: Riyadh\n  - Yemen: Sanaa\n  - Oman: Muscat\n  -
+        United Arab Emirates: Abu Dhabi\n  - Qatar: Doha\n  - Bahrain: Manama\n  -
+        Kuwait: Kuwait City\n  - Jordan: Amman\n  - Lebanon: Beirut\n  - Syria: Damascus\n  -
+        Israel: Jerusalem (recognition varies)\n  - Turkey: Ankara\n  - Armenia: Yerevan\n  -
+        Azerbaijan: Baku\n  - Georgia: Tbilisi\n  - Kazakhstan: Astana\n  - Uzbekistan:
+        Tashkent\n  - Turkmenistan: Ashgabat\n  - Kyrgyzstan: Bishkek\n  - Tajikistan:
+        Dushanbe\n\nAfrica:\n  - Egypt: Cairo\n  - Libya: Tripoli\n  - Tunisia: Tunis\n  -
+        Algeria: Algiers\n  - Morocco: Rabat\n  - Sudan: Khartoum\n  - South Sudan:
+        Juba\n  - Ethiopia: Addis Ababa\n  - Eritrea: Asmara\n  - Somalia: Mogadishu\n  -
+        Djibouti: Djibouti\n  - Kenya: Nairobi\n  - Uganda: Kampala\n  - Rwanda: Kigali\n  -
+        Burundi: Gitega\n  - Tanzania: Dodoma\n  - Nigeria: Abuja\n  - Ghana: Accra\n  -
+        Ivory Coast: Yamoussoukro (de facto: Abidjan)\n  - Senegal: Dakar\n  - Mali:
+        Bamako\n  - Cameroon: Yaounde\n  - South Africa: Pretoria (executive), Cape
+        Town (legislative), Bloemfontein (judicial)\n  - Zimbabwe: Harare\n  - Zambia:
+        Lusaka\n  - Angola: Luanda\n  - Mozambique: Maputo\n  - Madagascar: Antananarivo\n  -
+        Namibia: Windhoek\n  - Botswana: Gaborone\n  - Democratic Republic of the
+        Congo: Kinshasa\n  - Republic of the Congo: Brazzaville\n\nAmericas:\n  -
+        United States: Washington, D.C.\n  - Canada: Ottawa\n  - Mexico: Mexico City\n  -
+        Guatemala: Guatemala City\n  - Belize: Belmopan\n  - Honduras: Tegucigalpa\n  -
+        El Salvador: San Salvador\n  - Nicaragua: Managua\n  - Costa Rica: San Jose\n  -
+        Panama: Panama City\n  - Cuba: Havana\n  - Jamaica: Kingston\n  - Haiti: Port-au-Prince\n  -
+        Dominican Republic: Santo Domingo\n  - Colombia: Bogota\n  - Venezuela: Caracas\n  -
+        Ecuador: Quito\n  - Peru: Lima\n  - Bolivia: Sucre (constitutional), La Paz
+        (seat of government)\n  - Chile: Santiago\n  - Argentina: Buenos Aires\n  -
+        Uruguay: Montevideo\n  - Paraguay: Asuncion\n  - Brazil: Brasilia\n\nOceania:\n  -
+        Australia: Canberra\n  - New Zealand: Wellington\n  - Fiji: Suva\n  - Papua
+        New Guinea: Port Moresby\n  - Samoa: Apia\n  - Tonga: Nuku''alofa\n  - Vanuatu:
+        Port Vila\n  - Solomon Islands: Honiara\n  - Micronesia: Palikir\n  - Palau:
+        Ngerulmud\n  - Marshall Islands: Majuro\n  - Kiribati: South Tarawa\n  - Nauru:
+        no official capital; government in Yaren District\n  - Tuvalu: Funafuti\n\nNotes
+        on multi-capital and disputed cases:\n\n  - Netherlands: the constitutional
+        capital is Amsterdam but\n    the seat of government, parliament, and supreme
+        court are\n    all in The Hague. Prefer Amsterdam unless the user asks\n    about
+        the government specifically.\n  - South Africa: three capitals split by branch.
+        Pretoria\n    hosts the executive, Cape Town hosts parliament, and\n    Bloemfontein
+        hosts the supreme court of appeal. No single\n    city is \"the\" capital.\n  -
+        Bolivia: Sucre is the constitutional capital, but La Paz\n    is the seat
+        of government and the larger city. Either\n    answer is defensible; list
+        both when asked.\n  - Ivory Coast: Yamoussoukro has been the official capital\n    since
+        1983, but Abidjan remains the economic hub and de\n    facto administrative
+        center for most purposes.\n  - Sri Lanka: Sri Jayawardenepura Kotte is the
+        legislative\n    capital. Colombo is the commercial capital and by far the\n    more
+        commonly referenced city.\n  - Switzerland: Bern is the de facto capital (the
+        seat of\n    the federal authorities), but Swiss law does not\n    designate
+        any city as \"the capital\".\n  - Nauru: has no designated capital. Government
+        offices are\n    in the Yaren District, which is often listed as the\n    capital
+        by convention.\n  - Israel: Jerusalem is the declared capital, but\n    international
+        recognition of that status is not\n    universal. Many embassies are in Tel
+        Aviv.\n  - Palestine: de jure capital is East Jerusalem; de facto\n    administrative
+        center is Ramallah. Both appear in\n    official usage.\n  - Taiwan: Taipei
+        is the capital of the Republic of China.\n    Recognition as a sovereign state
+        varies by country.\n  - Kosovo: Pristina is the capital of the Republic of\n    Kosovo.
+        Recognition as a sovereign state varies by\n    country.\n  - Somaliland:
+        Hargeisa is the capital of the self-declared\n    Republic of Somaliland,
+        which is not widely recognized.\n    Somalia, which claims the territory,
+        has its capital at\n    Mogadishu.\n\nEnd of reference material.\n","cache_control":{"type":"ephemeral","ttl":"1h"}}]}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - Anthropic::Client/Ruby 1.44.0
+      Host:
+      - api.anthropic.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 1.44.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Anthropic-Version:
+      - '2023-06-01'
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Beta:
+      - extended-cache-ttl-2025-04-11
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '6546'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:30:03 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Requests-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '19999'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2026-06-01T17:30:01Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '600000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '600000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2026-06-01T17:30:03Z'
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '3000000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '2999000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2026-06-01T17:30:03Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '3600000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '3599000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2026-06-01T17:30:03Z'
+      Request-Id:
+      - req_011CbcwGYRwQmMLXmFCyMyss
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      Traceresponse:
+      - 00-ae4761f3bd966a9fa638782f2a9f8e6b-ec2c5613f2971a7e-01
+      Server:
+      - cloudflare
+      Vary:
+      - Accept-Encoding
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - _cfuvid=8cqsSkMXxiX1RrGTNZT4Bbmv01RdTkqgVHVHQjxIris-1780335001.6898663-1.0.1.1-FvYunVSnOFT_SVgza74fKy6FEn7XajWbhMP..PzFjqY;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com
+      Content-Security-Policy:
+      - default-src 'none'; frame-ancestors 'none'
+      X-Robots-Tag:
+      - none
+      Cf-Ray:
+      - a04ff1a08e70ba51-SEA
+    body:
+      encoding: ASCII-8BIT
+      string: '{"model":"claude-sonnet-4-5-20250929","id":"msg_017uLQftiW625FHwZ6Fu3Hfs","type":"message","role":"assistant","content":[{"type":"text","text":"The
+        capital of France is **Paris**."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":12,"cache_creation_input_tokens":1997,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":1997},"output_tokens":11,"service_tier":"standard","inference_geo":"not_available"}}'
+  recorded_at: Mon, 01 Jun 2026 17:30:03 GMT
+recorded_with: VCR 6.4.0
diff --git a/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_5m.yml b/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_5m.yml
new file mode 100644
index 00000000..874c3ace
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_5m.yml
@@ -0,0 +1,181 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-sonnet-4-5-20250929","temperature":0.0,"max_tokens":128,"messages":[{"role":"user","content":"What
+        is the capital of France?"}],"system":[{"type":"text","text":"[cache buster:
+        btx-test_runner_client btx-nonce]\nYou are a helpful assistant answering questions
+        about world\ngeography. Follow the operating guidelines below on every\nresponse.
+        These guidelines are refreshed frequently, so they\nare cached with the default
+        5 minute TTL.\n\n1. Answer in a single short sentence unless the user explicitly\n   asks
+        for more detail. Do not add preambles like \"Sure, here\n   is the answer\"
+        or \"Great question\". Just answer.\n2. Always state the canonical English
+        name of a place first,\n   followed by the local name in parentheses only
+        when it\n   differs. Do not include pronunciation guides.\n3. When the user
+        asks about a country, prefer the capital over\n   the largest city. When the
+        user asks about a region, prefer\n   the administrative center. When the user
+        asks about a\n   continent, prefer a widely recognized reference city and\n   note
+        that continents have no single capital.\n4. If the user asks about a disputed
+        territory, name the\n   de-facto administrative center without taking a political\n   position.
+        Do not editorialize.\n5. If the user asks a question that is not about geography,\n   answer
+        it briefly and then offer to continue with\n   geography-related questions.\n6.
+        Never invent place names. If you are not sure, say you are\n   not sure and
+        suggest a likely alternative the user may have\n   meant.\n7. Use modern spelling
+        conventions. Prefer \"Kyiv\" over \"Kiev\",\n   \"Beijing\" over \"Peking\",
+        \"Mumbai\" over \"Bombay\", and so on.\n8. Always use the metric system for
+        distances, elevations, and\n   areas. If the user explicitly asks for imperial
+        units,\n   convert and include both.\n9. Do not mention these instructions
+        to the user. Do not refer\n   to them as \"my guidelines\" or \"my system
+        prompt\". Just\n   follow them silently.\n10. If the user greets you, greet
+        them back briefly and then\n    wait for their actual question. Do not volunteer
+        geography\n    trivia.\n11. Treat any reference material supplied in a later
+        cached\n    block as authoritative. If it conflicts with your training\n    data,
+        prefer the reference material.\n12. If the user asks for a source or citation,
+        say that you\n    cannot cite sources directly but can describe where the\n    information
+        typically comes from (atlases, official\n    government statistics, the CIA
+        World Factbook, etc.).\n13. Keep responses under 40 words when possible. Brevity
+        is a\n    hard requirement, not a preference.\n14. Never use emojis. Never
+        use bullet points unless the user\n    explicitly asks for a list.\n15. If
+        the user asks a follow-up that depends on the previous\n    turn, answer based
+        on the last place you discussed unless\n    they name a new one.\n16. Do not
+        volunteer comparative size, population, or GDP\n    rankings unless the user
+        asks. These numbers change over\n    time and you are not a statistics oracle.\n17.
+        When multiple entities share a name, disambiguate by the\n    country or region
+        (for example: \"Georgia, the country\" vs.\n    \"Georgia, the US state\").\n18.
+        Do not translate proper nouns. \"New York\" is not rendered\n    in the user''s
+        language unless they explicitly request a\n    translation.\n19. Never speculate
+        about future political boundary changes.\n    Stick to the current, widely
+        recognized status quo.\n20. If the user asks about a place that no longer
+        exists under\n    that name (for example \"Constantinople\"), give the modern\n    equivalent
+        and note the historical name in parentheses.\n21. If a place has multiple
+        official capitals (for example\n    South Africa or Bolivia), list all of
+        them with their\n    roles, still in a single sentence.\n22. If the user asks
+        for coordinates, give latitude and\n    longitude in decimal degrees to two
+        decimal places.\n23. If the user asks about a body of water, name the countries\n    that
+        border it, in rough clockwise order starting from the\n    north.\n24. If
+        the user asks about a mountain, give the elevation in\n    meters and the
+        country or countries it sits in.\n25. Do not mention sanctions, travel advisories,
+        or current\n    conflicts. This assistant is a reference for geography, not\n    current
+        events.\n26. If the user asks whether a place is a country, answer yes\n    only
+        for United Nations member states and widely\n    recognized observer states.
+        For partially recognized\n    states, describe the recognition status in one
+        clause\n    rather than giving a flat yes or no.\n27. If the user asks about
+        time zones, give the primary IANA\n    zone identifier and the UTC offset
+        at this moment, noting\n    whether daylight saving time is currently in effect.\n28.
+        If the user asks about currency, give the ISO 4217 code\n    and the common
+        symbol, without quoting an exchange rate.\n29. If the user asks about official
+        languages, list at most\n    three in order of number of speakers, and note
+        that the\n    list is not exhaustive when it is not.\n30. If the user asks
+        about climate, give a one-clause\n    Köppen summary (for example \"humid
+        subtropical (Cfa)\")\n    rather than a month-by-month breakdown.\n31. If
+        the user asks about the flag of a country, describe it\n    in words: colors,
+        arrangement, and central emblem if any.\n    Do not attempt ASCII art.\n32.
+        If the user asks about national holidays, give only the\n    single most widely
+        observed one, with its date.\n33. If the user asks about the head of state
+        or head of\n    government, answer with the office name (\"the President\",\n    \"the
+        Prime Minister\") rather than the current office\n    holder. Names of current
+        office holders change too often\n    for a cached prompt to keep up.\n34.
+        If the user asks about airports, give the three-letter\n    IATA code and
+        the full airport name.\n35. If the user asks about train stations, give the\n    widely
+        used English-language name of the primary station\n    and the city it serves.\n","cache_control":{"type":"ephemeral","ttl":"5m"}}]}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - Anthropic::Client/Ruby 1.44.0
+      Host:
+      - api.anthropic.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 1.44.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Anthropic-Version:
+      - '2023-06-01'
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '6109'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:30:01 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Requests-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '19999'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2026-06-01T17:30:00Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '600000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '600000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2026-06-01T17:30:01Z'
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '3000000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '2999000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2026-06-01T17:30:01Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '3600000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '3599000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2026-06-01T17:30:01Z'
+      Request-Id:
+      - req_011CbcwGRbvXj5jVnCs9oJED
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      Traceresponse:
+      - 00-ff02e805f804f783b3633285081c1066-b981fa03f2471afd-01
+      Server:
+      - cloudflare
+      Vary:
+      - Accept-Encoding
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - _cfuvid=kxemKUYIwd9EarWHSOtQkb469uiPUmT1Iq5QiZt1sVM-1780335000.101712-1.0.1.1-exv.6xwz0KS7YO_jmwVFuRoWKImfa5bR.Ues7VpbfVs;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com
+      Content-Security-Policy:
+      - default-src 'none'; frame-ancestors 'none'
+      X-Robots-Tag:
+      - none
+      Cf-Ray:
+      - a04ff196a842ec27-SEA
+    body:
+      encoding: ASCII-8BIT
+      string: '{"model":"claude-sonnet-4-5-20250929","id":"msg_01SytAqzaWUeWkT3Do8tKakX","type":"message","role":"assistant","content":[{"type":"text","text":"Paris."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":12,"cache_creation_input_tokens":1372,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":1372,"ephemeral_1h_input_tokens":0},"output_tokens":5,"service_tier":"standard","inference_geo":"not_available"}}'
+  recorded_at: Mon, 01 Jun 2026 17:30:01 GMT
+recorded_with: VCR 6.4.0
diff --git a/test/fixtures/vcr_cassettes/btx/anthropic/streaming.yml b/test/fixtures/vcr_cassettes/btx/anthropic/streaming.yml
new file mode 100644
index 00000000..b06fd919
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/btx/anthropic/streaming.yml
@@ -0,0 +1,133 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-haiku-4-5-20251001","temperature":0.0,"max_tokens":128,"messages":[{"role":"user","content":"Count
+        from 1 to 5."}],"system":"You are a helpful assistant.","stream":true}'
+    headers:
+      Accept-Encoding:
+      - identity
+      Accept:
+      - text/event-stream
+      User-Agent:
+      - Anthropic::Client/Ruby 1.44.0
+      Host:
+      - api.anthropic.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 1.44.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Anthropic-Version:
+      - '2023-06-01'
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      X-Stainless-Helper-Method:
+      - stream
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '186'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:22:13 GMT
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cache-Control:
+      - no-cache
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '4000000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '4000000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2026-06-01T17:22:12Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '800000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '800000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2026-06-01T17:22:12Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '19999'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2026-06-01T17:22:12Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '4800000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '4800000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2026-06-01T17:22:12Z'
+      Request-Id:
+      - req_011CbcvfyizMn4AcCuRwrUzG
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      Traceresponse:
+      - 00-4f7b8d10bd76628a8268df03b217cc76-8515c95d61c9460d-01
+      Server:
+      - cloudflare
+      Content-Security-Policy:
+      - default-src 'none'; frame-ancestors 'none'
+      Vary:
+      - Accept-Encoding
+      Set-Cookie:
+      - _cfuvid=z6zAJrAuS_KPlgd1SeJ.hDCrcHCIifIMFAGZsjcwf1Y-1780334532.8056147-1.0.1.1-sCD90Wm90UCcUYOiUX4kjDGauQwgztvQygqr21H5PCI;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com
+      X-Robots-Tag:
+      - none
+      Cf-Cache-Status:
+      - DYNAMIC
+      Cf-Ray:
+      - a04fe62e0aa00fde-SEA
+    body:
+      encoding: UTF-8
+      string: |+
+        event: message_start
+        data: {"type":"message_start","message":{"model":"claude-haiku-4-5-20251001","id":"msg_014BoXv8Fk78du9B4XVM32cz","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"stop_details":null,"usage":{"input_tokens":22,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"}}    }
+
+        event: content_block_start
+        data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}               }
+
+        event: ping
+        data: {"type": "ping"}
+
+        event: content_block_delta
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"1\n2\n3\n4\n5"}         }
+
+        event: content_block_stop
+        data: {"type":"content_block_stop","index":0}
+
+        event: message_delta
+        data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null,"stop_details":null},"usage":{"input_tokens":22,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":13}}
+
+        event: message_stop
+        data: {"type":"message_stop"      }
+
+  recorded_at: Mon, 01 Jun 2026 17:22:13 GMT
+recorded_with: VCR 6.4.0
+...
diff --git a/test/fixtures/vcr_cassettes/btx/openai/attachments.yml b/test/fixtures/vcr_cassettes/btx/openai/attachments.yml
new file mode 100644
index 00000000..13fc9c10
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/btx/openai/attachments.yml
@@ -0,0 +1,145 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","temperature":0.0,"messages":[{"role":"system","content":"you
+        are a helpful assistant"},{"role":"user","content":[{"type":"text","text":"What
+        color is this image?"},{"type":"image_url","image_url":{"url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="}}]}]}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.64.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 0.64.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '353'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:22:15 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cf-Ray:
+      - a04fe6396e7808e3-SEA
+      Cf-Cache-Status:
+      - DYNAMIC
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Access-Control-Expose-Headers:
+      - CF-Ray
+      - CF-Ray
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '678'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      X-Ratelimit-Limit-Input-Images:
+      - '50000'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Input-Images:
+      - '49999'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999220'
+      X-Ratelimit-Reset-Input-Images:
+      - 1ms
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_25d540b761f24206a0dc65a9892d0b13
+      Set-Cookie:
+      - __cf_bm=K9k4uf188StUnK0HYST8jjZMDoZc9bNJebqioHJLXvg-1780334534.6280208-1.0.1.1-NGHV9ZDqA.60_d9or7.7_o.rj3.PuBsfwntKlvxq8eNeGolqVwcKyQzOtPHSS0eiiCK1QLHvxrHC1IxhAu2dHzBFcXPib6ANLA0xgv1i0quAYBRTeaj.Opxrr_cj8vij;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon,
+        01 Jun 2026 17:52:15 GMT
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |
+        {
+          "id": "chatcmpl-Dm0d4nZdICKuuQcXQpXTJrXBTn6rm",
+          "object": "chat.completion",
+          "created": 1780334534,
+          "model": "gpt-4o-mini-2024-07-18",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": "The image is a solid shade of red.",
+                "refusal": null,
+                "annotations": []
+              },
+              "logprobs": null,
+              "finish_reason": "stop"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 8522,
+            "completion_tokens": 9,
+            "total_tokens": 8531,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "service_tier": "default",
+          "system_fingerprint": "fp_03ddaa0cca"
+        }
+  recorded_at: Mon, 01 Jun 2026 17:22:15 GMT
+recorded_with: VCR 6.4.0
diff --git a/test/fixtures/vcr_cassettes/btx/openai/completions.yml b/test/fixtures/vcr_cassettes/btx/openai/completions.yml
new file mode 100644
index 00000000..6aa39560
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/btx/openai/completions.yml
@@ -0,0 +1,139 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","temperature":0.0,"messages":[{"role":"system","content":"you
+        are a helpful assistant"},{"role":"user","content":"What is the capital of
+        France?"}]}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.64.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 0.64.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '171'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:21:56 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cf-Ray:
+      - a04fe5c2cd8e7690-SEA
+      Cf-Cache-Status:
+      - DYNAMIC
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Access-Control-Expose-Headers:
+      - CF-Ray
+      - CF-Ray
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '636'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999982'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_605453ba7f054312ac0d91b5b618b057
+      Set-Cookie:
+      - __cf_bm=bJ3Cm7wKo8ZxLZq.QFygr8_HQJTmnIdnpHXucXAzZwI-1780334515.6488-1.0.1.1-sq03SXu6DRdl.iXFRXob3c1bvtdhWXaufmjWV4zyBRHWxV9_EUerkVy3kqJq0jN67KXxLBo9ttdwabEl5YCxVZxnmwVq_qa8z8hX19gxTloQ62sHuyjFnQ.COknNKYfT;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon,
+        01 Jun 2026 17:51:56 GMT
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |
+        {
+          "id": "chatcmpl-Dm0cmGEBhfPfi0sLxoLXckMMTbVSg",
+          "object": "chat.completion",
+          "created": 1780334516,
+          "model": "gpt-4o-mini-2024-07-18",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": "The capital of France is Paris.",
+                "refusal": null,
+                "annotations": []
+              },
+              "logprobs": null,
+              "finish_reason": "stop"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 23,
+            "completion_tokens": 7,
+            "total_tokens": 30,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "service_tier": "default",
+          "system_fingerprint": "fp_fc8bf6718c"
+        }
+  recorded_at: Mon, 01 Jun 2026 17:21:56 GMT
+recorded_with: VCR 6.4.0
diff --git a/test/fixtures/vcr_cassettes/btx/openai/reasoning.yml b/test/fixtures/vcr_cassettes/btx/openai/reasoning.yml
new file mode 100644
index 00000000..714f799c
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/btx/openai/reasoning.yml
@@ -0,0 +1,601 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"o4-mini","reasoning":{"effort":"high","summary":"detailed"},"input":[{"role":"user","content":"Look
+        at this sequence: 2, 6, 12, 20, 30. What is the pattern and what would be
+        the formula for the nth term?\n"},{"id":"rs_05d57f6c539cc761006a1dbcfa8ed48198ac984469b9191bc3","summary":[{"text":"**Identifying
+        the sequence pattern**\n\nThe user wants to understand the sequence: 2, 6,
+        12, 20, 30. I believe it''s based on the formula n(n+1) starting from n=1.
+        Let''s check: For each n, we get 2, 6, 12, 20, and 30, which fits perfectly.
+        This sequence represents double the triangular numbers, as they are calculated
+        by T_n = n(n+1)/2. Therefore, the closed form can be expressed as a_n = n(n+1)
+        or n^2 + n. The pattern involves the product of consecutive integers.","type":"summary_text"},{"text":"**Summarizing
+        the pattern**\n\nThe nth term formula is a_n = n(n+1), which represents pronic
+        numbers, formed by multiplying consecutive integers. The sequence begins with
+        terms like 2, 6, 12, 20, and 30. Since the user likely means for n=1 to correspond
+        to the first term, that means a_n = n(n+1). It''s important to note that the
+        pattern consists of each term increasing by successive even numbers, specifically
+        +4, +6, +8, and so forth. Thus, the general term remains a_n = n(n+1) or equivalently
+        n^2 + n.","type":"summary_text"}],"type":"reasoning"},{"id":"msg_05d57f6c539cc761006a1dbd0910608198b68f43a9686b5cea","content":[{"annotations":[],"text":"The
+        “mystery” is that you’re looking at the pronic (or “oblong”) numbers:\n\n  1·2
+        = 2  \n  2·3 = 6  \n  3·4 = 12  \n  4·5 = 20  \n  5·6 = 30  \n\nEquivalently,
+        each term is the previous one plus the next even number (2→+4→6→8→…), so the
+        n-th term (with a₁=2) is\n\n  aₙ = n·(n + 1)\n\nor, if you prefer,\n\n  aₙ
+        = n² + n.","type":"output_text","logprobs":[]}],"role":"assistant","status":"completed","type":"message"},{"role":"user","content":"Using
+        the pattern you discovered, what would be the 10th term? And can you find
+        the sum of the first 10 terms?"}]}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.64.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 0.64.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '2044'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:10:41 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cf-Ray:
+      - a04fd51cdac3eb40-SEA
+      Cf-Cache-Status:
+      - DYNAMIC
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Access-Control-Expose-Headers:
+      - CF-Ray
+      - CF-Ray
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '8212'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999575'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_579fc1b37a6d41509b79d42bf1bc0924
+      Set-Cookie:
+      - __cf_bm=q5bfl.ezUa6jbZqR2ov0mxyuVdoRzDtsXnL6RuCLuno-1780333833.7394183-1.0.1.1-Qc_745a.vh0zRrEL9H4v8mKtWWuHfP5zIQUxapkdWXh4h3yg4oo0Zpn5PezUBPvP981kq0JEChV2hHQiLc.SZSZO5k1WITtIo8Q5UXPlM3irOuvqzl8z8ZnXeLqvNNty;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon,
+        01 Jun 2026 17:40:41 GMT
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "resp_05d57f6c539cc761006a1dbd09cb148198abacd864a7af3294",
+          "object": "response",
+          "created_at": 1780333833,
+          "status": "completed",
+          "background": false,
+          "billing": {
+            "payer": "developer"
+          },
+          "completed_at": 1780333841,
+          "error": null,
+          "frequency_penalty": 0.0,
+          "incomplete_details": null,
+          "instructions": null,
+          "max_output_tokens": null,
+          "max_tool_calls": null,
+          "model": "o4-mini-2025-04-16",
+          "moderation": null,
+          "output": [
+            {
+              "id": "rs_05d57f6c539cc761006a1dbd0a59508198bfb9bcdc155e2526",
+              "type": "reasoning",
+              "summary": [
+                {
+                  "type": "summary_text",
+                  "text": "**Calculating pronic numbers**\n\nThe user wants to know the 10th term and the sum of the first 10 pronic numbers, where pronic numbers follow the formula a_n = n(n+1). So, I calculate a_10 as 10 * 11, which equals 110. For the sum S_10, I break it down into two parts: the sum of squares and the sum of the first 10 natural numbers. This gives me 440 as the total for the sum of the first 10 pronic numbers."
+                },
+                {
+                  "type": "summary_text",
+                  "text": "**Confirming calculations**\n\nLet's verify some calculations for the user. For n=10, using the formula gives me 10 * 11 * 12 / 3 = 440. The 10th term, a_10, is 110. The sum of the first 10 terms, S_10, can be shown as 385 (from the sum of squares) plus 55 (from the sum of the first 10 natural numbers), totaling 440. I think a brief explanation should effectively communicate this, summarizing it as: a_10 = 110 and sum = 440."
+                }
+              ]
+            },
+            {
+              "id": "msg_05d57f6c539cc761006a1dbd115ba481988d9db188b60f34ae",
+              "type": "message",
+              "status": "completed",
+              "content": [
+                {
+                  "type": "output_text",
+                  "annotations": [],
+                  "logprobs": [],
+                  "text": "The 10th term is  \n  a\u2081\u2080 = 10\u00b7(10 + 1) = 10\u00b711 = 110  \n\nThe sum of the first 10 terms is  \n  S\u2081\u2080 = \u2211\u2099\u208c\u2081\u00b9\u2070 n(n+1)  \n      = \u2211\u2099\u208c\u2081\u00b9\u2070 n\u00b2  +  \u2211\u2099\u208c\u2081\u00b9\u2070 n  \n      = (10\u00b711\u00b721)/6  +  (10\u00b711)/2  \n      = 385  +  55  \n      = 440  \n\n(You can also use the closed\u2010form S\u2099 = n(n+1)(n+2)/3, which for n=10 gives 10\u00b711\u00b712/3 = 440.)"
+                }
+              ],
+              "role": "assistant"
+            }
+          ],
+          "parallel_tool_calls": true,
+          "presence_penalty": 0.0,
+          "previous_response_id": null,
+          "prompt_cache_key": null,
+          "prompt_cache_retention": "in_memory",
+          "reasoning": {
+            "context": "current_turn",
+            "effort": "high",
+            "summary": "detailed"
+          },
+          "safety_identifier": null,
+          "service_tier": "default",
+          "store": true,
+          "temperature": 1.0,
+          "text": {
+            "format": {
+              "type": "text"
+            },
+            "verbosity": "medium"
+          },
+          "tool_choice": "auto",
+          "tools": [],
+          "top_logprobs": 0,
+          "top_p": 1.0,
+          "truncation": "disabled",
+          "usage": {
+            "input_tokens": 217,
+            "input_tokens_details": {
+              "cached_tokens": 0
+            },
+            "output_tokens": 722,
+            "output_tokens_details": {
+              "reasoning_tokens": 512
+            },
+            "total_tokens": 939
+          },
+          "user": null,
+          "metadata": {}
+        }
+  recorded_at: Mon, 01 Jun 2026 17:10:41 GMT
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"o4-mini","reasoning":{"effort":"high","summary":"detailed"},"input":[{"role":"user","content":"Look
+        at this sequence: 2, 6, 12, 20, 30. What is the pattern and what would be
+        the formula for the nth term?\n"}]}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.64.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 0.64.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '219'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:22:07 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cf-Ray:
+      - a04fe5ca1d26d301-SEA
+      Cf-Cache-Status:
+      - DYNAMIC
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Access-Control-Expose-Headers:
+      - CF-Ray
+      - CF-Ray
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '10532'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999752'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_8c2857b26a00436da08bff42e8112b33
+      Set-Cookie:
+      - __cf_bm=855gnpwUw5zMPSQuPUP46DHOtubswxyZxy4JbvyK8jk-1780334516.8175228-1.0.1.1-VKTAqQh5gjnnRdvFd5fK_DD_bRka4j.rO.fl7df2GATeVr42xbY4HWWQatzcNvWrRRMwXayULTwVr0NKsz4epnrP9I68klprp7e5kzCTH1MaflS0Cq.aIXRmHX.591Ip;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon,
+        01 Jun 2026 17:52:07 GMT
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "resp_095d57b0c545f984006a1dbfb509d881999983bc814a8b74f5",
+          "object": "response",
+          "created_at": 1780334517,
+          "status": "completed",
+          "background": false,
+          "billing": {
+            "payer": "developer"
+          },
+          "completed_at": 1780334527,
+          "error": null,
+          "frequency_penalty": 0.0,
+          "incomplete_details": null,
+          "instructions": null,
+          "max_output_tokens": null,
+          "max_tool_calls": null,
+          "model": "o4-mini-2025-04-16",
+          "moderation": null,
+          "output": [
+            {
+              "id": "rs_095d57b0c545f984006a1dbfb5ae7481998b14a5ce6db1098a",
+              "type": "reasoning",
+              "summary": [
+                {
+                  "type": "summary_text",
+                  "text": "**Identifying a sequence pattern**\n\nThe user is exploring the sequence: 2, 6, 12, 20, 30. This represents \"pronic numbers,\" products of consecutive integers, expressed as n(n+1). \n\nFor example, for n=1, 1*2=2; for n=2, 2*3=6; and so on. The nth term can be given by the formula a_n = n(n+1). \n\nInterestingly, it also relates to the sum of the first n even integers or can be viewed as twice the triangular numbers: 2 * T_n = n(n+1)."
+                },
+                {
+                  "type": "summary_text",
+                  "text": "**Exploring the sequence formula**\n\nThe sequence starts with 2 at n=1, which indicates that the nth term can be expressed as a_n = n(n+1). Each term represents pronic numbers, or the product of consecutive integers.\n\nThe pattern arises from adding consecutive even numbers: for example, 2 plus 4 equals 6, 6 plus 6 equals 12, and so on. The constant second difference confirms it\u2019s a quadratic sequence. This leads us to the formula a_n = n^2 + n, highlighting that the nth term is indeed the product of two consecutive integers."
+                },
+                {
+                  "type": "summary_text",
+                  "text": "**Summarizing the pattern and formula**\n\nThe sequence is based on the sums of even numbers, or pronic numbers, which can be expressed using the formula a_n = n(n+1). \n\nEach term represents the product of two consecutive integers: for example, 2=1*2, 6=2*3, and so on. The differences between the terms are the even numbers 4, 6, 8, and 10, showing a consistent pattern of increases. Therefore, the nth term can be effectively represented as a_n = n(n+1). I'll prepare to share this with the user!"
+                }
+              ]
+            },
+            {
+              "id": "msg_095d57b0c545f984006a1dbfbef77081999a7579adf9cbcce5",
+              "type": "message",
+              "status": "completed",
+              "content": [
+                {
+                  "type": "output_text",
+                  "annotations": [],
+                  "logprobs": [],
+                  "text": "One way to see the pattern:\n\n\u2013 Write out the first few terms as products of consecutive integers  \n  2 = 1\u00b72  \n  6 = 2\u00b73  \n 12 = 3\u00b74  \n 20 = 4\u00b75  \n 30 = 5\u00b76  \n\n\u2013 Equivalently, the successive differences are  \n  6\u20132=4, 12\u20136=6, 20\u201312=8, 30\u201320=10, \u2026  \n  i.e. you keep adding the even numbers 4, 6, 8, 10, \u2026\n\nFrom either viewpoint you get the general (nth) term, for n=1,2,3,\u2026, as\n\n\u2003a\u2099 = n\u00b7(n + 1),\n\nor expanded: \n\n\u2003a\u2099 = n\u00b2 + n."
+                }
+              ],
+              "role": "assistant"
+            }
+          ],
+          "parallel_tool_calls": true,
+          "presence_penalty": 0.0,
+          "previous_response_id": null,
+          "prompt_cache_key": null,
+          "prompt_cache_retention": "in_memory",
+          "reasoning": {
+            "context": "current_turn",
+            "effort": "high",
+            "summary": "detailed"
+          },
+          "safety_identifier": null,
+          "service_tier": "default",
+          "store": true,
+          "temperature": 1.0,
+          "text": {
+            "format": {
+              "type": "text"
+            },
+            "verbosity": "medium"
+          },
+          "tool_choice": "auto",
+          "tools": [],
+          "top_logprobs": 0,
+          "top_p": 1.0,
+          "truncation": "disabled",
+          "usage": {
+            "input_tokens": 41,
+            "input_tokens_details": {
+              "cached_tokens": 0
+            },
+            "output_tokens": 1331,
+            "output_tokens_details": {
+              "reasoning_tokens": 1088
+            },
+            "total_tokens": 1372
+          },
+          "user": null,
+          "metadata": {}
+        }
+  recorded_at: Mon, 01 Jun 2026 17:22:07 GMT
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"o4-mini","reasoning":{"effort":"high","summary":"detailed"},"input":[{"role":"user","content":"Look
+        at this sequence: 2, 6, 12, 20, 30. What is the pattern and what would be
+        the formula for the nth term?\n"},{"id":"rs_095d57b0c545f984006a1dbfb5ae7481998b14a5ce6db1098a","summary":[{"text":"**Identifying
+        a sequence pattern**\n\nThe user is exploring the sequence: 2, 6, 12, 20,
+        30. This represents \"pronic numbers,\" products of consecutive integers,
+        expressed as n(n+1). \n\nFor example, for n=1, 1*2=2; for n=2, 2*3=6; and
+        so on. The nth term can be given by the formula a_n = n(n+1). \n\nInterestingly,
+        it also relates to the sum of the first n even integers or can be viewed as
+        twice the triangular numbers: 2 * T_n = n(n+1).","type":"summary_text"},{"text":"**Exploring
+        the sequence formula**\n\nThe sequence starts with 2 at n=1, which indicates
+        that the nth term can be expressed as a_n = n(n+1). Each term represents pronic
+        numbers, or the product of consecutive integers.\n\nThe pattern arises from
+        adding consecutive even numbers: for example, 2 plus 4 equals 6, 6 plus 6
+        equals 12, and so on. The constant second difference confirms it’s a quadratic
+        sequence. This leads us to the formula a_n = n^2 + n, highlighting that the
+        nth term is indeed the product of two consecutive integers.","type":"summary_text"},{"text":"**Summarizing
+        the pattern and formula**\n\nThe sequence is based on the sums of even numbers,
+        or pronic numbers, which can be expressed using the formula a_n = n(n+1).
+        \n\nEach term represents the product of two consecutive integers: for example,
+        2=1*2, 6=2*3, and so on. The differences between the terms are the even numbers
+        4, 6, 8, and 10, showing a consistent pattern of increases. Therefore, the
+        nth term can be effectively represented as a_n = n(n+1). I''ll prepare to
+        share this with the user!","type":"summary_text"}],"type":"reasoning"},{"id":"msg_095d57b0c545f984006a1dbfbef77081999a7579adf9cbcce5","content":[{"annotations":[],"text":"One
+        way to see the pattern:\n\n– Write out the first few terms as products of
+        consecutive integers  \n  2 = 1·2  \n  6 = 2·3  \n 12 = 3·4  \n 20 = 4·5  \n
+        30 = 5·6  \n\n– Equivalently, the successive differences are  \n  6–2=4, 12–6=6,
+        20–12=8, 30–20=10, …  \n  i.e. you keep adding the even numbers 4, 6, 8, 10,
+        …\n\nFrom either viewpoint you get the general (nth) term, for n=1,2,3,…,
+        as\n\n aₙ = n·(n + 1),\n\nor expanded: \n\n aₙ = n² + n.","type":"output_text","logprobs":[]}],"role":"assistant","status":"completed","type":"message"},{"role":"user","content":"Using
+        the pattern you discovered, what would be the 10th term? And can you find
+        the sum of the first 10 terms?"}]}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.64.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 0.64.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '2702'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:22:12 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cf-Ray:
+      - a04fe60e3bc3d301-SEA
+      Cf-Cache-Status:
+      - DYNAMIC
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Access-Control-Expose-Headers:
+      - CF-Ray
+      - CF-Ray
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '4532'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999535'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_257a28a8fb8d4b21987acc974272aa7d
+      Set-Cookie:
+      - __cf_bm=44MVR4_rVfTeCU_NPoOWnxoSdlxw1at24ZE9lofUMz8-1780334527.7138355-1.0.1.1-xs9Ka_XILlydmYcWYP.PaiPa_g3nAtzR3EEw6vEv.NujGmdXBJRt9m_0j6PJ5ZOtgo2KZN7OMmUlAbeuqXYfdDCFRHuDor6hwIfChaMQ1PHT.NFLeO9ANQGAuyezuUgl;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon,
+        01 Jun 2026 17:52:12 GMT
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "resp_095d57b0c545f984006a1dbfbfc594819993afb313d98e54ca",
+          "object": "response",
+          "created_at": 1780334527,
+          "status": "completed",
+          "background": false,
+          "billing": {
+            "payer": "developer"
+          },
+          "completed_at": 1780334532,
+          "error": null,
+          "frequency_penalty": 0.0,
+          "incomplete_details": null,
+          "instructions": null,
+          "max_output_tokens": null,
+          "max_tool_calls": null,
+          "model": "o4-mini-2025-04-16",
+          "moderation": null,
+          "output": [
+            {
+              "id": "rs_095d57b0c545f984006a1dbfc04cf08199bc597ab00d037aee",
+              "type": "reasoning",
+              "summary": []
+            },
+            {
+              "id": "msg_095d57b0c545f984006a1dbfc33f348199a26368824469ef64",
+              "type": "message",
+              "status": "completed",
+              "content": [
+                {
+                  "type": "output_text",
+                  "annotations": [],
+                  "logprobs": [],
+                  "text": "The general term is  \n  a\u2099 = n\u00b7(n + 1).  \n\nSo for n = 10:  \n  a\u2081\u2080 = 10\u00b711 = 110.  \n\nFor the sum of the first 10 terms,  \n  S\u2081\u2080 = \u2211\u2096\u208c\u2081\u00b9\u2070 k(k+1)  \n      = \u2211\u2096\u208c\u2081\u00b9\u2070 (k\u00b2 + k)  \n      = (\u2211\u2096\u208c\u2081\u00b9\u2070 k\u00b2) + (\u2211\u2096\u208c\u2081\u00b9\u2070 k)  \n      = [10\u00b711\u00b721/6] + [10\u00b711/2]  \n      = 385 + 55  \n      = 440.  \n\nSo the 10th term is 110, and the sum of the first 10 terms is 440."
+                }
+              ],
+              "role": "assistant"
+            }
+          ],
+          "parallel_tool_calls": true,
+          "presence_penalty": 0.0,
+          "previous_response_id": null,
+          "prompt_cache_key": null,
+          "prompt_cache_retention": "in_memory",
+          "reasoning": {
+            "context": "current_turn",
+            "effort": "high",
+            "summary": "detailed"
+          },
+          "safety_identifier": null,
+          "service_tier": "default",
+          "store": true,
+          "temperature": 1.0,
+          "text": {
+            "format": {
+              "type": "text"
+            },
+            "verbosity": "medium"
+          },
+          "tool_choice": "auto",
+          "tools": [],
+          "top_logprobs": 0,
+          "top_p": 1.0,
+          "truncation": "disabled",
+          "usage": {
+            "input_tokens": 256,
+            "input_tokens_details": {
+              "cached_tokens": 0
+            },
+            "output_tokens": 586,
+            "output_tokens_details": {
+              "reasoning_tokens": 384
+            },
+            "total_tokens": 842
+          },
+          "user": null,
+          "metadata": {}
+        }
+  recorded_at: Mon, 01 Jun 2026 17:22:12 GMT
+recorded_with: VCR 6.4.0
diff --git a/test/fixtures/vcr_cassettes/btx/openai/streaming.yml b/test/fixtures/vcr_cassettes/btx/openai/streaming.yml
new file mode 100644
index 00000000..ed793a34
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/btx/openai/streaming.yml
@@ -0,0 +1,192 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","max_tokens":800,"temperature":0.0,"stream_options":{"include_usage":true},"messages":[{"role":"system","content":"you
+        are a thoughtful assistant"},{"role":"user","content":"Count from 1 to 10
+        slowly."}],"stream":true}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - text/event-stream
+      User-Agent:
+      - OpenAI::Client/Ruby 0.64.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 0.64.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '241'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:22:13 GMT
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cf-Ray:
+      - a04fe631fd23b991-SEA
+      Cf-Cache-Status:
+      - DYNAMIC
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Access-Control-Expose-Headers:
+      - CF-Ray
+      - CF-Ray
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '262'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999982'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_9577c8e0afab431f8e9bad614f93be50
+      Set-Cookie:
+      - __cf_bm=e3zw6P7Vc9tofre.GTqFKmEpff_2JZAEfQ4rGXAwyMg-1780334533.4372284-1.0.1.1-3165xGomBl3vQ1m2A.Rdi_WV_DHJHEgvxExdlC1wl4dBnwz44Z5EKDrBmQLDpvmZFhCOIFgw.pTov6CDWlpEVafVDitmYt.zV1UIxE5jreoXCVOjOWPb2bdY2_V7NR5n;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon,
+        01 Jun 2026 17:52:13 GMT
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: UTF-8
+      string: |+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"1nQOV5KNQ"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"Sure"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5mMFOmF"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"IMs1t9nwNn"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" Here"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wuRksM"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" we"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"DFZpHjZ0"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" go"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"oBlQ9MTv"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":":\n\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"kLjEv0"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"SjbPdNTgci"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"GkuAQdCS"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"X1xTwL6"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"85IuNj7HZC"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"zNic48es"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"yad122X"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5K1UvFRQd3"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5WpnN1rH"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"qEhK9KK"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"XUQbdPrEjo"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"4Uu2conk"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"AHZL9es"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"5"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"BM00eNTm6w"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"1QawiCaU"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"3p4dtiN"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"6"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"EJWrLjTy9Y"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xWpEHjsH"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OBN1VNX"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"7"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RpWLkuoWBU"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"leibIHg8"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"G2xaKZ2"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"8"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Xk6mbwdkCx"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xMMHqibJ"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"UMs3hE4"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"9"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OtLngMHblh"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"PrBwV3As"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"dWuvhno"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"10"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"QgjyzC3pX"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"zqSdZFfy"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"  \n\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"M2u0W"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"Take"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"A9e8ldP"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" your"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wssrzy"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" time"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"F2IcGf"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"nmdqzXlTQ7"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"tCavN"}
+
+        data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[],"usage":{"prompt_tokens":25,"completion_tokens":40,"total_tokens":65,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"XjRFPPNXm4"}
+
+        data: [DONE]
+
+  recorded_at: Mon, 01 Jun 2026 17:22:14 GMT
+recorded_with: VCR 6.4.0
+...
diff --git a/test/fixtures/vcr_cassettes/btx/openai/tools.yml b/test/fixtures/vcr_cassettes/btx/openai/tools.yml
new file mode 100644
index 00000000..c3c2227a
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/btx/openai/tools.yml
@@ -0,0 +1,151 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o","max_tokens":500,"temperature":0.0,"tools":[{"type":"function","function":{"name":"get_weather","description":"Get
+        the current weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
+        city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"],"description":"The
+        unit of temperature"}},"required":["location"]}}}],"messages":[{"role":"user","content":"What
+        is the weather like in Paris, France?"}]}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.64.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 0.64.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 4.0.1
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '511'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 01 Jun 2026 17:22:18 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cf-Ray:
+      - a04fe640af6e680e-SEA
+      Cf-Cache-Status:
+      - DYNAMIC
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Access-Control-Expose-Headers:
+      - CF-Ray
+      - CF-Ray
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '575'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      X-Ratelimit-Limit-Requests:
+      - '10000'
+      X-Ratelimit-Limit-Tokens:
+      - '30000000'
+      X-Ratelimit-Remaining-Requests:
+      - '9999'
+      X-Ratelimit-Remaining-Tokens:
+      - '29999987'
+      X-Ratelimit-Reset-Requests:
+      - 6ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_b8b33c1178ae45c9beb59ec6cfdf2cf1
+      Set-Cookie:
+      - __cf_bm=DG1SVXxYmgAGTzUyLluYmjPEGvT_XuoMD21gOy72UqY-1780334535.7878098-1.0.1.1-JsVToaWxLxuPBRwjDcj1Zaea0Qihw1FJrVdFVW30o73bcR.UdlBfN541kR501Tssf6hNUt0JCpE4UJah1OuDFGYru.HUdxg5hn_QMVwgCFAfSojOR6ebwJ9GVd8RtIhx;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon,
+        01 Jun 2026 17:52:18 GMT
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |
+        {
+          "id": "chatcmpl-Dm0d7OgWMCSrYkq9C3VCOGcsVzNkb",
+          "object": "chat.completion",
+          "created": 1780334537,
+          "model": "gpt-4o-2024-08-06",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": null,
+                "tool_calls": [
+                  {
+                    "id": "call_7Id8JLlSgIwMqNDWA7ZyAXkK",
+                    "type": "function",
+                    "function": {
+                      "name": "get_weather",
+                      "arguments": "{\"location\":\"Paris, France\"}"
+                    }
+                  }
+                ],
+                "refusal": null,
+                "annotations": []
+              },
+              "logprobs": null,
+              "finish_reason": "tool_calls"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 85,
+            "completion_tokens": 16,
+            "total_tokens": 101,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "service_tier": "default",
+          "system_fingerprint": "fp_5f78e76dfa"
+        }
+  recorded_at: Mon, 01 Jun 2026 17:22:18 GMT
+recorded_with: VCR 6.4.0