diff --git a/.gitignore b/.gitignore index 85d9a55a..ad035292 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,6 @@ gemfiles/*.gemfile.lock # Ignore local Docker files (for dev env customizations) docker-compose.override.yml Dockerfile.local + +# BTX: cached braintrust-spec downloads (fetched on demand) +/test/btx/.spec-cache/ diff --git a/Rakefile b/Rakefile index 68f2f04c..ac882837 100644 --- a/Rakefile +++ b/Rakefile @@ -5,7 +5,38 @@ require "rake/testtask" desc "Run tests (optionally with seed: rake test[12345])" task :test, [:seed] do |t, args| seed_opt = args[:seed] ? " -- --seed=#{args[:seed]}" : "" - sh "ruby -Ilib:test -e \"Dir.glob('test/**/*_test.rb').each { |f| require_relative f }\"#{seed_opt}" + # Exclude the BTX cross-language spec suite — it requires provider gems and + # is run separately via `rake test:btx` (under the contrib appraisal). + sh "ruby -Ilib:test -e \"Dir.glob('test/**/*_test.rb').reject { |f| f.start_with?('test/btx/') }.each { |f| require_relative f }\"#{seed_opt}" +end + +namespace :test do + # BTX: cross-language LLM-span spec suite. + # + # Requires the openai + anthropic gems, so it runs under the `contrib` + # appraisal. Use `rake test:btx` while already inside a gemfile that has the + # provider gems (e.g. `bundle exec appraisal contrib rake test:btx`), or + # `rake test:btx:ci` which selects the contrib appraisal for you. + namespace :btx do + desc "Fetch the pinned braintrust-spec into the local cache (idempotent)" + task :fetch_spec do + # Run the fetch in a clean process before WebMock is loaded so the GitHub + # download is not blocked by the test suite's HTTP stubbing. + sh "ruby -Itest/btx -e \"require 'spec_fetcher'; puts Braintrust::BTX::SpecFetcher.spec_root\"" + end + + desc "Run the BTX suite under the contrib appraisal (used by `rake ci`)" + task :ci do + # Ensure the contrib gemfile (openai + anthropic) is installed, then run. + sh "bundle exec appraisal contrib bundle install --quiet" + sh "bundle exec appraisal contrib rake test:btx" + end + end + + desc "Run the BTX cross-language LLM-span spec suite (run under the contrib appraisal)" + task btx: :"btx:fetch_spec" do + sh "ruby -Ilib:test -e \"require_relative 'test/btx/btx_test.rb'\"" + end end desc "Run Standard linter" @@ -91,8 +122,8 @@ task coverage: :test do end end -desc "Verify CI (lint + test all appraisal scenarios)" -task ci: [:lint, :"test:appraisal"] +desc "Verify CI (lint + test all appraisal scenarios + btx spec suite)" +task ci: [:lint, :"test:appraisal", :"test:btx:ci"] task default: :ci diff --git a/lib/braintrust/contrib/anthropic/instrumentation/common.rb b/lib/braintrust/contrib/anthropic/instrumentation/common.rb index 710a4134..9bbcd398 100644 --- a/lib/braintrust/contrib/anthropic/instrumentation/common.rb +++ b/lib/braintrust/contrib/anthropic/instrumentation/common.rb @@ -33,10 +33,23 @@ def self.parse_usage_tokens(usage) metrics[target] = value.to_i if target end - # Accumulate cache tokens into prompt_tokens (matching TS/Python SDKs) + # Cache-creation breakdown. When Anthropic returns the per-TTL + # `cache_creation` breakdown, report the granular metrics + # (prompt_cache_creation_5m_tokens / _1h_tokens) and drop the + # aggregate prompt_cache_creation_tokens — the aggregate is just the + # sum of the variants, so reporting both would double count. + cache_creation_total = metrics["prompt_cache_creation_tokens"] + apply_cache_creation_breakdown(metrics, usage_hash) + + # Accumulate cache tokens into prompt_tokens (matching TS/Python SDKs). + # Use the original aggregate total when present, otherwise the + # granular breakdown sum. + creation_for_prompt = cache_creation_total || + (metrics["prompt_cache_creation_5m_tokens"] || 0) + + (metrics["prompt_cache_creation_1h_tokens"] || 0) prompt_tokens = (metrics["prompt_tokens"] || 0) + (metrics["prompt_cached_tokens"] || 0) + - (metrics["prompt_cache_creation_tokens"] || 0) + creation_for_prompt metrics["prompt_tokens"] = prompt_tokens if prompt_tokens > 0 # Calculate total @@ -46,6 +59,36 @@ def self.parse_usage_tokens(usage) metrics end + + # Map the nested `cache_creation` breakdown to per-TTL metrics and + # remove the now-redundant aggregate. No-op when the breakdown is + # absent or carries no positive values. + # @param metrics [Hash] metrics accumulated so far (mutated) + # @param usage_hash [Hash] raw Anthropic usage hash + def self.apply_cache_creation_breakdown(metrics, usage_hash) + breakdown = usage_hash["cache_creation"] || usage_hash[:cache_creation] + breakdown = breakdown.to_h if breakdown.respond_to?(:to_h) + return unless breakdown.is_a?(Hash) + + ttl_map = { + "ephemeral_5m_input_tokens" => "prompt_cache_creation_5m_tokens", + "ephemeral_1h_input_tokens" => "prompt_cache_creation_1h_tokens" + } + + emitted = false + ttl_map.each do |source, target| + next unless breakdown.key?(source) || breakdown.key?(source.to_sym) + value = breakdown[source] || breakdown[source.to_sym] + next unless value.is_a?(Numeric) + metrics[target] = value.to_i + emitted = true + end + + # When the per-TTL breakdown is present, drop the aggregate so we do + # not double count (spec: "anthropic cache tokens only send 5m or + # 1h variants"). + metrics.delete("prompt_cache_creation_tokens") if emitted + end end end end diff --git a/lib/braintrust/contrib/anthropic/instrumentation/messages.rb b/lib/braintrust/contrib/anthropic/instrumentation/messages.rb index ab112fc8..76285d19 100644 --- a/lib/braintrust/contrib/anthropic/instrumentation/messages.rb +++ b/lib/braintrust/contrib/anthropic/instrumentation/messages.rb @@ -34,6 +34,7 @@ def create(**params) tracer.in_span("anthropic.messages.create") do |span| metadata = build_metadata(params) + Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"}) set_input(span, params) response = nil @@ -98,6 +99,13 @@ def build_metadata(params, stream: false) def set_input(span, params) input_messages = [] + # User/assistant messages come first, then the system prompt is + # appended (matching the cross-language spec / backend format). + if params[:messages] + messages_array = params[:messages].map(&:to_h) + input_messages.concat(messages_array) + end + if params[:system_] system_content = params[:system_] if system_content.is_a?(Array) @@ -110,11 +118,6 @@ def set_input(span, params) end end - if params[:messages] - messages_array = params[:messages].map(&:to_h) - input_messages.concat(messages_array) - end - Support::OTel.set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any? end @@ -122,10 +125,10 @@ def set_output(span, response) return unless response.respond_to?(:content) && response.content content_array = response.content.map(&:to_h) - output = [{ + output = { role: response.respond_to?(:role) ? response.role : "assistant", content: content_array - }] + } Support::OTel.set_json_attr(span, "braintrust.output_json", output) end @@ -196,7 +199,8 @@ def close metadata = ctx[:metadata] messages_instance = ctx[:messages_instance] - tracer.in_span("anthropic.messages.create") do |span| + tracer.in_span("anthropic.messages.stream") do |span| + Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"}) messages_instance.send(:set_input, span, params) Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) end @@ -215,7 +219,8 @@ def trace_consumption(ctx) metadata = ctx[:metadata] messages_instance = ctx[:messages_instance] - tracer.in_span("anthropic.messages.create") do |span| + tracer.in_span("anthropic.messages.stream") do |span| + Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"}) messages_instance.send(:set_input, span, params) Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) diff --git a/lib/braintrust/contrib/openai/instrumentation/chat.rb b/lib/braintrust/contrib/openai/instrumentation/chat.rb index ac062290..59cc82c8 100644 --- a/lib/braintrust/contrib/openai/instrumentation/chat.rb +++ b/lib/braintrust/contrib/openai/instrumentation/chat.rb @@ -40,6 +40,7 @@ def create(**params) tracer.in_span("Chat Completion") do |span| metadata = build_metadata(params) + Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"}) set_input(span, params) response = nil @@ -180,6 +181,7 @@ def trace_consumption(ctx) start_time = Braintrust::Internal::Time.measure tracer.in_span("Chat Completion") do |span| + Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"}) completions_instance.send(:set_input, span, params) Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) @@ -252,6 +254,7 @@ def each(&block) time_to_first_token = nil tracer.in_span("Chat Completion") do |span| + Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"}) completions_instance.send(:set_input, span, params) Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) diff --git a/lib/braintrust/contrib/openai/instrumentation/responses.rb b/lib/braintrust/contrib/openai/instrumentation/responses.rb index 5bb4da60..d95f37ad 100644 --- a/lib/braintrust/contrib/openai/instrumentation/responses.rb +++ b/lib/braintrust/contrib/openai/instrumentation/responses.rb @@ -39,6 +39,7 @@ def create(**params) tracer.in_span("openai.responses.create") do |span| metadata = build_metadata(params) + Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"}) set_input(span, params) response = nil @@ -140,6 +141,7 @@ def each(&block) time_to_first_token = nil tracer.in_span("openai.responses.create") do |span| + Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "llm"}) responses_instance.send(:set_input, span, params) Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) diff --git a/test/braintrust/contrib/anthropic/instrumentation/beta_messages_test.rb b/test/braintrust/contrib/anthropic/instrumentation/beta_messages_test.rb index 81a37667..c83ad9e5 100644 --- a/test/braintrust/contrib/anthropic/instrumentation/beta_messages_test.rb +++ b/test/braintrust/contrib/anthropic/instrumentation/beta_messages_test.rb @@ -201,7 +201,7 @@ def test_handles_beta_streaming # Single span created during consumption span = rig.drain_one - assert_equal "anthropic.messages.create", span.name + assert_equal "anthropic.messages.stream", span.name # Verify input captured on span assert span.attributes.key?("braintrust.input_json") diff --git a/test/braintrust/contrib/anthropic/instrumentation/common_test.rb b/test/braintrust/contrib/anthropic/instrumentation/common_test.rb index a530424d..bdb6724b 100644 --- a/test/braintrust/contrib/anthropic/instrumentation/common_test.rb +++ b/test/braintrust/contrib/anthropic/instrumentation/common_test.rb @@ -57,6 +57,32 @@ def test_handles_cache_creation_tokens assert_equal 120, metrics["prompt_tokens"] end + def test_handles_granular_cache_creation_breakdown + # When Anthropic returns the per-TTL cache_creation breakdown, report the + # granular metrics and drop the aggregate (which would double count). + usage = { + "input_tokens" => 12, + "output_tokens" => 5, + "cache_read_input_tokens" => 0, + "cache_creation_input_tokens" => 1369, + "cache_creation" => { + "ephemeral_5m_input_tokens" => 1369, + "ephemeral_1h_input_tokens" => 0 + } + } + + metrics = Common.parse_usage_tokens(usage) + + # Both TTL variants present in the breakdown are reported (including zero), + # and the aggregate is dropped so the totals are not double counted. + assert_equal 1369, metrics["prompt_cache_creation_5m_tokens"] + assert_equal 0, metrics["prompt_cache_creation_1h_tokens"] + refute metrics.key?("prompt_cache_creation_tokens"), "aggregate dropped when breakdown present" + # prompt_tokens still accumulates the creation tokens: 12 + 0 + 1369 + assert_equal 1381, metrics["prompt_tokens"] + assert_equal 1386, metrics["tokens"] + end + def test_handles_object_with_to_h # SDK returns objects with to_h method usage_object = Struct.new(:input_tokens, :output_tokens, keyword_init: true) diff --git a/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb b/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb index 5e7140db..8864da6f 100644 --- a/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb +++ b/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb @@ -52,9 +52,8 @@ def test_creates_span_for_basic_message # Verify braintrust.output_json contains response as message array assert span.attributes.key?("braintrust.output_json") output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal "assistant", output[0]["role"] - assert output[0]["content"].is_a?(Array) + assert_equal "assistant", output["role"] + assert output["content"].is_a?(Array) # Verify braintrust.metadata contains request and response metadata assert span.attributes.key?("braintrust.metadata") @@ -148,24 +147,23 @@ def test_handles_system_prompt # Verify span name assert_equal "anthropic.messages.create", span.name - # Verify braintrust.input_json has system prompt prepended + # Verify braintrust.input_json has system prompt appended last assert span.attributes.key?("braintrust.input_json") input = JSON.parse(span.attributes["braintrust.input_json"]) assert_equal 2, input.length - # First message should be system - assert_equal "system", input[0]["role"] - assert_equal "You are a helpful assistant that always responds briefly.", input[0]["content"] + # First message should be the user message + assert_equal "user", input[0]["role"] + assert_equal "Say hello", input[0]["content"] - # Second message should be user - assert_equal "user", input[1]["role"] - assert_equal "Say hello", input[1]["content"] + # System prompt appended last + assert_equal "system", input[1]["role"] + assert_equal "You are a helpful assistant that always responds briefly.", input[1]["content"] # Verify output assert span.attributes.key?("braintrust.output_json") output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal "assistant", output[0]["role"] + assert_equal "assistant", output["role"] end end @@ -218,12 +216,11 @@ def test_handles_tool_use # Verify output contains tool_use content blocks assert span.attributes.key?("braintrust.output_json") output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal "assistant", output[0]["role"] - assert output[0]["content"].is_a?(Array) + assert_equal "assistant", output["role"] + assert output["content"].is_a?(Array) # Check that we captured tool_use block - content = output[0]["content"] + content = output["content"] tool_use_block = content.find { |block| block["type"] == "tool_use" } assert tool_use_block, "Should have tool_use content block" assert_equal "get_weather", tool_use_block["name"] @@ -262,7 +259,7 @@ def test_handles_streaming # Single span created during consumption span = rig.drain_one - assert_equal "anthropic.messages.create", span.name + assert_equal "anthropic.messages.stream", span.name # Verify input captured on span assert span.attributes.key?("braintrust.input_json") @@ -309,20 +306,19 @@ def test_handles_streaming_output_aggregation # Single span created during consumption span = rig.drain_one - assert_equal "anthropic.messages.create", span.name + assert_equal "anthropic.messages.stream", span.name # CRITICAL: Verify output was aggregated assert span.attributes.key?("braintrust.output_json"), "Should have output_json attribute" output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length, "Should have one output message" - assert_equal "assistant", output[0]["role"] + assert_equal "assistant", output["role"] # The output content should not be empty! - assert output[0]["content"].is_a?(Array), "Output content should be an array" - refute_empty output[0]["content"], "Output content should not be empty" + assert output["content"].is_a?(Array), "Output content should be an array" + refute_empty output["content"], "Output content should not be empty" # Should have aggregated the text content - text_block = output[0]["content"].find { |b| b["type"] == "text" } + text_block = output["content"].find { |b| b["type"] == "text" } assert text_block, "Should have a text content block" assert text_block["text"], "Text block should have text" refute_empty text_block["text"], "Text should not be empty" @@ -400,8 +396,7 @@ def test_handles_vision_with_base64 # Verify output assert span.attributes.key?("braintrust.output_json") output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal "assistant", output[0]["role"] + assert_equal "assistant", output["role"] end end @@ -453,12 +448,11 @@ def test_handles_reasoning_thinking_blocks # Verify output includes thinking blocks assert span.attributes.key?("braintrust.output_json") output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal "assistant", output[0]["role"] - assert output[0]["content"].is_a?(Array) + assert_equal "assistant", output["role"] + assert output["content"].is_a?(Array) # Check that thinking blocks are captured - output_thinking = output[0]["content"].select { |b| b["type"] == "thinking" } + output_thinking = output["content"].select { |b| b["type"] == "thinking" } assert output_thinking.length > 0, "Should capture thinking blocks in output" end end @@ -510,8 +504,7 @@ def test_handles_multi_turn_conversation # Verify output assert span.attributes.key?("braintrust.output_json") output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal "assistant", output[0]["role"] + assert_equal "assistant", output["role"] end end @@ -558,8 +551,7 @@ def test_handles_temperature_and_stop_sequences # Verify output assert span.attributes.key?("braintrust.output_json") output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal "assistant", output[0]["role"] + assert_equal "assistant", output["role"] end end @@ -715,16 +707,15 @@ def test_handles_streaming_with_text_each # Single span created during consumption span = rig.drain_one - assert_equal "anthropic.messages.create", span.name + assert_equal "anthropic.messages.stream", span.name # CRITICAL: Verify output was aggregated assert span.attributes.key?("braintrust.output_json"), "Should have output_json attribute" output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length, "Should have one output message" - assert_equal "assistant", output[0]["role"] + assert_equal "assistant", output["role"] # Should have aggregated the text content - text_block = output[0]["content"].find { |b| b["type"] == "text" } + text_block = output["content"].find { |b| b["type"] == "text" } assert text_block, "Should have a text content block" assert text_block["text"], "Text block should have text" refute_empty text_block["text"], "Text should not be empty" @@ -778,16 +769,15 @@ def test_handles_streaming_with_accumulated_text # Single span created during consumption span = rig.drain_one - assert_equal "anthropic.messages.create", span.name + assert_equal "anthropic.messages.stream", span.name # CRITICAL: Verify output was aggregated assert span.attributes.key?("braintrust.output_json"), "Should have output_json attribute" output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length, "Should have one output message" - assert_equal "assistant", output[0]["role"] + assert_equal "assistant", output["role"] # Should have aggregated the text content - text_block = output[0]["content"].find { |b| b["type"] == "text" } + text_block = output["content"].find { |b| b["type"] == "text" } assert text_block, "Should have a text content block" assert_equal accumulated_text, text_block["text"], "Aggregated text should match accumulated text" @@ -833,16 +823,15 @@ def test_handles_streaming_with_accumulated_message # Single span created during consumption span = rig.drain_one - assert_equal "anthropic.messages.create", span.name + assert_equal "anthropic.messages.stream", span.name # CRITICAL: Verify output was aggregated assert span.attributes.key?("braintrust.output_json"), "Should have output_json attribute" output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length, "Should have one output message" - assert_equal "assistant", output[0]["role"] + assert_equal "assistant", output["role"] # Should have content - refute_empty output[0]["content"], "Output content should not be empty" + refute_empty output["content"], "Output content should not be empty" # CRITICAL: Verify metrics were captured assert span.attributes.key?("braintrust.metrics"), "Should have metrics attribute" @@ -882,7 +871,7 @@ def test_handles_streaming_with_close # Single span created on close span = rig.drain_one - assert_equal "anthropic.messages.create", span.name + assert_equal "anthropic.messages.stream", span.name # Verify input was captured on span assert span.attributes.key?("braintrust.input_json") diff --git a/test/btx/README.md b/test/btx/README.md new file mode 100644 index 00000000..c66dcf18 --- /dev/null +++ b/test/btx/README.md @@ -0,0 +1,81 @@ +# BTX — cross-language LLM-span spec tests + +This suite validates the Ruby SDK's LLM instrumentation against the shared YAML +specs in [`braintrustdata/braintrust-spec`](https://github.com/braintrustdata/braintrust-spec), +the same specs used by every other Braintrust SDK. + +For each spec file it: + +1. Fetches the spec at the pinned ref (`spec-ref.txt`) into `.spec-cache/` (gitignored). +2. Executes the spec in-process: real provider API calls (OpenAI, Anthropic) + wrapped with Braintrust instrumentation, captured via an in-memory OTel + exporter, under a single parent span. +3. Validates the resulting brainstore spans against `expected_brainstore_spans`. + +## Running + +The suite needs the provider gems, so run it under the `contrib` appraisal: + +```bash +# Replay from committed cassettes (no API keys, no network) — how CI runs: +bundle exec appraisal contrib rake test:btx + +# Record cassettes (real API calls; requires OPENAI_API_KEY / ANTHROPIC_API_KEY): +VCR_MODE=all bundle exec appraisal contrib rake test:btx + +# Live mode: real calls, flush to Braintrust, validate via BTQL +# (requires BRAINTRUST_API_KEY and a project): +VCR_OFF=true bundle exec appraisal contrib rake test:btx +``` + +Run a single spec: + +```bash +bundle exec appraisal contrib ruby -Ilib:test \ + -e "require_relative 'test/btx/btx_test.rb'" -- --name=test_openai_completions +``` + +## Layout + +| File | Responsibility | +|---|---| +| `spec-ref.txt` | Pinned `braintrust-spec` ref to fetch | +| `spec_fetcher.rb` | Download + cache the spec tarball (pure Ruby) | +| `spec_loader.rb` | Parse spec YAML, including the `!fn` / `!starts_with` / `!or` / `!gen` tags | +| `spec_executor.rb` | Make provider API calls under a Braintrust span; capture OTel spans | +| `span_converter.rb` | Convert in-memory OTel spans → brainstore format (incl. attachment refs) | +| `span_fetcher.rb` | Live-mode BTQL fetch with retry | +| `span_validator.rb` | Recursive matcher against `expected_brainstore_spans` | +| `btx_test.rb` | Minitest runner — one test per spec | + +## Modes + +| Mode | Trigger | Behaviour | +|---|---|---| +| replay (default) | committed cassettes | Replay HTTP; convert in-memory spans; no keys/network | +| record | `VCR_MODE=all` | Real API calls; write cassettes; validate in-memory | +| live | `VCR_OFF=true` | Real API calls; flush to Braintrust; validate via BTQL | + +Cassettes live in `test/fixtures/vcr_cassettes/btx//.yml` and are +scrubbed of API keys by the shared VCR config in `test/test_helper.rb`. + +## Coverage / known gaps + +Pinned spec ref: see `spec-ref.txt` (currently `v0.0.7`). + +- Providers covered: `openai` (completions, streaming, tools, reasoning, + attachments) and `anthropic` (messages, streaming, attachments, + prompt_caching_5m, prompt_caching_1h). +- `bedrock` and `google` specs are **skipped at runtime** with a clear reason — + the Ruby SDK has no instrumentation for them. The set of instrumentable + `[provider, endpoint]` pairs lives in `SpecExecutor::SUPPORTED_ENDPOINTS`; add + to it (plus a `dispatch` branch) when a new integration lands. + +Notes: + +- The `anthropic/prompt_caching_*` specs interpolate a `!gen vcr_nonce` cache + buster. The nonce is **random in live mode** (to force a provider-side cache + miss so creation metrics are non-zero) and **deterministic in record/replay** + (so the request body matches the committed cassette). +- The `anthropic-beta` header for the 1h TTL variant is passed through via the + spec's top-level `headers`. diff --git a/test/btx/btx_test.rb b/test/btx/btx_test.rb new file mode 100644 index 00000000..6b471f61 --- /dev/null +++ b/test/btx/btx_test.rb @@ -0,0 +1,157 @@ +# frozen_string_literal: true + +# BTX: cross-language LLM-span spec tests for the Braintrust Ruby SDK. +# +# Modes (controlled by the VCR_MODE / VCR_OFF env vars used by the rest of the suite): +# +# replay (default): provider HTTP replayed from cassettes; spans captured +# in-memory and converted to brainstore format for validation. No API keys +# or network access required. +# +# record (VCR_MODE=all|new_episodes): real provider API calls recorded to +# cassettes; spans still validated in-memory. +# +# live (VCR_OFF=true): real provider API calls; spans flushed to Braintrust +# and fetched back via BTQL for validation. + +require_relative "../test_helper" + +require_relative "spec_fetcher" +require_relative "spec_loader" +require_relative "span_converter" +require_relative "span_validator" +require_relative "span_fetcher" +require_relative "cross_check" +require_relative "spec_executor" + +module Braintrust + module BTX + module_function + + def live_mode? + ENV["VCR_OFF"] == "true" + end + + def cassette_name(spec) + "btx/#{spec.provider}/#{spec.name}" + end + + # Load every spec in the pinned ref. Specs the SDK cannot instrument are not + # filtered out here — they are defined as tests and skipped at run time with + # a clear reason, so they remain visible in the test output. + def load_all_specs + root = SpecFetcher.spec_root + SpecLoader.load_specs(root) + end + end +end + +class BtxTest < Minitest::Test + include ::Test::Support::ProviderHelper + + # Build one test method per spec so failures are isolated and filterable. + Braintrust::BTX.load_all_specs.each do |spec| + test_name = "test_#{spec.provider}_#{spec.name}" + define_method(test_name) do + run_spec(spec) + end + end + + private + + def run_spec(spec) + unless Braintrust::BTX::SpecExecutor.supported?(spec) + skip "#{spec.display_name}: SDK has no instrumentation for " \ + "provider=#{spec.provider} endpoint=#{spec.endpoint}" + end + + skip_unless_provider_available!(spec.provider) + + state = build_state + live = Braintrust::BTX.live_mode? + executor = Braintrust::BTX::SpecExecutor.new(state, live: live) + + result = with_cassette(spec) { executor.execute(spec) } + + # The in-memory OTel spans are converted to brainstore format in every mode. + converted = Braintrust::BTX::SpanConverter.to_brainstore_spans(result.otel_spans) + + if live + run_spec_live(spec, result, state, converted) + else + refute_empty converted, "#{spec.display_name}: no spans captured" + Braintrust::BTX::SpanValidator.validate_spans(converted, spec) + end + end + + # Live mode validates three ways so a passing live run also guarantees the + # in-memory path is correct: + # 1. the converted in-memory spans satisfy the spec, + # 2. the live brainstore spans (via BTQL) satisfy the spec, + # 3. the converted spans match the live spans (lenient subset cross-check). + def run_spec_live(spec, result, state, converted) + refute_empty converted, "#{spec.display_name}: no in-memory spans captured" + + # 1. In-memory spans must independently pass the spec. + begin + Braintrust::BTX::SpanValidator.validate_spans(converted, spec) + rescue Braintrust::BTX::ValidationError => e + flunk "#{spec.display_name}: in-memory spans failed spec validation in live mode " \ + "(the converter/instrumentation diverged from the spec):\n#{e.message}" + end + + # 2. Authoritative live spans must pass the spec. + live_spans = fetch_live_spans(spec, result, state) + refute_empty live_spans, "#{spec.display_name}: no live spans fetched" + Braintrust::BTX::SpanValidator.validate_spans(live_spans, spec) + + # 3. In-memory conversion must be consistent with what the backend stored. + Braintrust::BTX::CrossCheck.assert_matches(converted, live_spans, spec.display_name) + end + + def with_cassette(spec) + return yield if Braintrust::BTX.live_mode? + + VCR.use_cassette(Braintrust::BTX.cassette_name(spec), match_requests_on: [:method, :uri, :body]) do + yield + end + end + + def fetch_live_spans(spec, result, state) + fetcher = Braintrust::BTX::SpanFetcher.new(api_url: state.api_url, api_key: state.api_key) + project_id = Braintrust::BTX::SpanFetcher.project_id_for(project_name, api_url: state.api_url, api_key: state.api_key) + fetcher.fetch(result.root_span_id, project_id, spec.expected_brainstore_spans.length) + end + + # The Braintrust project BTX logs to (and reads back from) in live mode. + PROJECT_NAME = "ruby-unit-test" + + def project_name + PROJECT_NAME + end + + def build_state + if Braintrust::BTX.live_mode? + Braintrust.init( + api_key: get_braintrust_key, + set_global: false, + blocking_login: true, + default_project: project_name + ) + else + get_unit_test_state(default_project: project_name) + end + end + + def skip_unless_provider_available!(provider) + case provider + when "openai" + if Gem.loaded_specs["ruby-openai"] + skip "official openai gem not available (found ruby-openai)" + end + skip "openai gem not available" unless Gem.loaded_specs["openai"] + when "anthropic" + skip "anthropic gem not available" unless Gem.loaded_specs["anthropic"] + end + end +end diff --git a/test/btx/cross_check.rb b/test/btx/cross_check.rb new file mode 100644 index 00000000..2d25fcfb --- /dev/null +++ b/test/btx/cross_check.rb @@ -0,0 +1,157 @@ +# frozen_string_literal: true + +require "json" + +module Braintrust + module BTX + # Raised when the locally-converted in-memory spans diverge from the + # authoritative brainstore spans returned by BTQL in live mode. + class CrossCheckError < StandardError; end + + # Cross-checks the in-memory OTel->brainstore conversion against the real + # brainstore spans fetched from the backend (live mode only). + # + # This mirrors the Java SpanFetcher.assertConverterMatchesBrainstore: a + # passing live run should also guarantee that the in-memory converter + # produces spans consistent with what the backend actually stored. + # + # The comparison is intentionally lenient — it asserts that every concrete + # value the converter produced also appears (equal) in the corresponding + # real span, skipping: + # - nil values on either side (don't-care / backend-omitted) + # - "id" fields (dynamic, non-deterministic) + # - metrics values (token counts vary run-to-run; only key presence + type) + # - braintrust_attachment references (converter has the data URL form, + # the backend stores an uploaded reference; both are valid) + module CrossCheck + module_function + + # Assert the converted spans are a lenient subset of the real spans. + # + # @param converted [Array] spans from SpanConverter.to_brainstore_spans + # @param real [Array] spans fetched via BTQL + # @param display_name [String] spec id for error messages + # @raise [CrossCheckError] if the conversion is inconsistent with the backend + def assert_matches(converted, real, display_name) + if converted.length != real.length + raise CrossCheckError, + "#{display_name}: in-memory converter produced #{converted.length} span(s) " \ + "but brainstore returned #{real.length}.\n" \ + "Converted:\n#{pretty(converted)}\n\nBrainstore:\n#{pretty(real)}" + end + + real_by_name = index_by_name(real) + errors = [] + + converted.each_with_index do |conv, i| + name = (conv["span_attributes"] || {})["name"] || conv["name"] + real_span = real_by_name[name] || real[i] + ctx = "converted[#{name || i}]" + + # "name" is a synthetic top-level field added by the converter for + # spec-assertion convenience; the real span keeps it in + # span_attributes.name. "metrics" are checked separately (presence only). + conv_subset = conv.reject { |k, _| k == "name" || k == "metrics" } + assert_subset(conv_subset, real_span, ctx, errors) + assert_metrics_keys_present(conv, real_span, ctx, errors) + end + + unless errors.empty? + raise CrossCheckError, + "#{display_name}: in-memory spans do not match live brainstore spans:\n" + + errors.join("\n") + end + end + + # ---- internals ---- + + def index_by_name(spans) + spans.each_with_object({}) do |span, acc| + attrs = span["span_attributes"] || {} + name = attrs["name"] || span["name"] + acc[name] = span if name + end + end + + # Every concrete value in +subset+ must appear (equal) in +superset+, + # recursively. Lenient per the rules documented above. + def assert_subset(subset, superset, ctx, errors) + return if subset.nil? + return if superset.nil? # backend may omit/transform certain fields + + # If one side is a Hash and the other isn't, the backend likely + # transformed the shape — skip rather than fail (matches Java). + return if subset.is_a?(Hash) != superset.is_a?(Hash) + + if subset.is_a?(Array) + unless superset.is_a?(Array) + errors << "#{ctx}: expected an array but brainstore has #{superset.class}" + return + end + subset.each_with_index do |item, i| + break if i >= superset.length + assert_subset(item, superset[i], "#{ctx}[#{i}]", errors) + end + return + end + + unless subset.is_a?(Hash) + # Scalar leaves: strings may vary across runs (model text), so only + # assert non-null. Numbers/booleans are deterministic — exact match. + if subset.is_a?(String) + errors << "#{ctx}: expected non-null string, got nil" if superset.nil? + elsif subset != superset + errors << "#{ctx}: converted=#{subset.inspect} but brainstore=#{superset.inspect}" + end + return + end + + # Both hashes. + if attachment?(subset) + # Converter logs a data-URL-derived attachment; backend stores an + # uploaded reference. Both are valid — just require the backend also + # produced an attachment reference. + unless attachment?(superset) + errors << "#{ctx}: converted is a braintrust_attachment but brainstore is #{superset.inspect}" + end + return + end + + subset.each do |key, val| + next if val.nil? + next if key == "id" # dynamic / non-deterministic + assert_subset(val, superset[key], "#{ctx}.#{key}", errors) + end + end + + # Every metric key the converter produced must appear as a non-null + # number in the real span (when the backend reports it). Token counts are + # non-deterministic, so we check presence + type, not equality. + def assert_metrics_keys_present(conv, real_span, ctx, errors) + conv_metrics = conv["metrics"] + return unless conv_metrics.is_a?(Hash) + real_metrics = real_span["metrics"] + return unless real_metrics.is_a?(Hash) # backend may omit metrics + + conv_metrics.each do |key, val| + next if val.nil? + real_val = real_metrics[key] + next if real_val.nil? # backend may compute differently; skip + unless real_val.is_a?(Numeric) + errors << "#{ctx}.metrics.#{key}: expected a number but brainstore has #{real_val.class}" + end + end + end + + def attachment?(hash) + hash.is_a?(Hash) && hash["type"] == "braintrust_attachment" + end + + def pretty(obj) + JSON.pretty_generate(obj) + rescue + obj.inspect + end + end + end +end diff --git a/test/btx/span_converter.rb b/test/btx/span_converter.rb new file mode 100644 index 00000000..bcdf72d4 --- /dev/null +++ b/test/btx/span_converter.rb @@ -0,0 +1,180 @@ +# frozen_string_literal: true + +require "json" +require "digest" + +module Braintrust + module BTX + # Converts in-memory OTel SpanData spans into brainstore span format. + # + # Brainstore spans are the canonical representation used in Braintrust's + # storage layer and returned by the BTQL API. The +expected_brainstore_spans+ + # in the YAML spec files are written against this format. + # + # The Braintrust SDK stores span payload in OTel span attributes as JSON + # strings: + # braintrust.metrics -> metrics + # braintrust.metadata -> metadata + # braintrust.span_attributes -> span_attributes (with name injected from the OTel span name) + # braintrust.input_json -> input + # braintrust.output_json -> output + # + # Only LLM instrumentation spans (those carrying braintrust.span_attributes) + # are converted; the root wrapper span created by the executor is excluded. + # + # This mirrors the Java SpanConverter so in-memory (VCR) validation matches + # what the backend stores after ingestion. + module SpanConverter + module_function + + # Convert a list of exported OTel SpanData into brainstore-format hashes. + # + # @param otel_spans [Array] + # @return [Array] brainstore spans, in input order + def to_brainstore_spans(otel_spans) + otel_spans + .select { |span| llm_instrumentation_span?(span) } + .map { |span| to_single_brainstore_span(span) } + end + + def llm_instrumentation_span?(span) + attrs = span.attributes || {} + !attrs["braintrust.span_attributes"].nil? + end + + def to_single_brainstore_span(span) + result = {} + result["name"] = span.name + result["metrics"] = parse_json_map(span, "braintrust.metrics") + result["metadata"] = parse_json_map(span, "braintrust.metadata") + result["input"] = transform_input(parse_json_value(span, "braintrust.input_json")) + result["output"] = parse_json_value(span, "braintrust.output_json") + + span_attrs = parse_json_map(span, "braintrust.span_attributes") || {} + span_attrs = span_attrs.dup + span_attrs["name"] = span.name + result["span_attributes"] = span_attrs + + result + end + + # Replicate the Braintrust backend's attachment transformation. + # + # OpenAI image_url.url: "data:mime;base64,..." -> {type: braintrust_attachment, ...} + # OpenAI file.file_data: "data:mime;base64,..." -> {type: braintrust_attachment, ...} + # Anthropic source: {type: base64, media_type, data} -> {type: braintrust_attachment, ...} + def transform_input(input) + case input + when Array + input.map { |item| transform_input_item(item) } + when Hash + # Google-style {contents: [...]}, not used by openai/anthropic but + # handled for completeness. + if input["contents"].is_a?(Array) + dup = input.dup + dup["contents"] = input["contents"].map { |item| transform_input_item(item) } + dup + else + input + end + else + input + end + end + + def transform_input_item(item) + return item unless item.is_a?(Hash) + + msg = item.dup + if msg["content"].is_a?(Array) + msg["content"] = msg["content"].map { |part| transform_content_part(part) } + end + msg + end + + def transform_content_part(part) + return part unless part.is_a?(Hash) + + type = part["type"] + + # Anthropic: {type: image|document, source: {type: base64, media_type, data}} + if (type == "image" || type == "document") && part["source"].is_a?(Hash) + source = part["source"] + if source["type"] == "base64" + mime = source["media_type"] || "application/octet-stream" + data = source["data"] + if data + new_part = part.dup + new_part["source"] = to_attachment("data:#{mime};base64,#{data}") + return new_part + end + end + return part + end + + # OpenAI image_url: {type: image_url, image_url: {url: "data:..."}} + if type == "image_url" && part["image_url"].is_a?(Hash) + image_url = part["image_url"] + url = image_url["url"] + if url.is_a?(String) && url.start_with?("data:") + new_part = part.dup + new_image_url = image_url.dup + new_image_url["url"] = to_attachment(url) + new_part["image_url"] = new_image_url + return new_part + end + return part + end + + # OpenAI file: {type: file, file: {filename, file_data: "data:..."}} + if type == "file" && part["file"].is_a?(Hash) + file = part["file"] + file_data = file["file_data"] + if file_data.is_a?(String) && file_data.start_with?("data:") + new_part = part.dup + new_file = file.dup + new_file["file_data"] = to_attachment(file_data) + new_part["file"] = new_file + return new_part + end + return part + end + + part + end + + # Build a braintrust_attachment reference from a data URL. + def to_attachment(data_url) + content_type = "application/octet-stream" + data = data_url + if data_url.start_with?("data:") + semicolon = data_url.index(";") + comma = data_url.index(",") + content_type = data_url[5...semicolon] if semicolon && semicolon > 5 + data = data_url[(comma + 1)..] if comma + end + ext = content_type.include?("/") ? content_type.split("/").last : "bin" + key = "attachment-#{Digest::SHA256.hexdigest(data.to_s)[0, 12]}.#{ext}" + { + "type" => "braintrust_attachment", + "content_type" => content_type, + "filename" => key, + "key" => key + } + end + + def parse_json_map(span, attr_key) + value = parse_json_value(span, attr_key) + value.is_a?(Hash) ? value : nil + end + + def parse_json_value(span, attr_key) + json = (span.attributes || {})[attr_key] + return nil if json.nil? + JSON.parse(json) + rescue JSON::ParserError => e + raise "Failed to parse #{attr_key} as JSON: #{json} (#{e.message})" + end + end + end +end diff --git a/test/btx/span_fetcher.rb b/test/btx/span_fetcher.rb new file mode 100644 index 00000000..04f5e056 --- /dev/null +++ b/test/btx/span_fetcher.rb @@ -0,0 +1,113 @@ +# frozen_string_literal: true + +require "net/http" +require "json" +require "uri" + +module Braintrust + module BTX + # Fetches brainstore spans from the Braintrust backend via the BTQL HTTP API + # (live mode). Retries with a fixed interval until all expected spans are + # available (their output/metrics fields indexed). + class SpanFetcher + RETRY_INTERVAL = 30 # seconds + MAX_WAIT = 600 # seconds + + def initialize(api_url:, api_key:) + @api_url = api_url + @api_key = api_key + end + + # Resolve a project id from its name via the BTQL/projects API. + def self.project_id_for(name, api_url:, api_key:) + uri = URI("#{api_url}/v1/project?project_name=#{URI.encode_www_form_component(name)}") + req = Net::HTTP::Get.new(uri) + req["Authorization"] = "Bearer #{api_key}" + res = http_request(uri, req) + raise "Failed to resolve project #{name.inspect}: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess) + body = JSON.parse(res.body) + objects = body["objects"] || body + proj = objects.is_a?(Array) ? objects.first : objects + proj && (proj["id"] || proj.dig("project", "id")) + end + + def self.http_request(uri, req) + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = (uri.scheme == "https") + http.request(req) + end + + # Fetch +num_expected+ child spans for +root_span_id+, retrying until ready. + # + # @return [Array] brainstore spans (excluding root + scorer spans) + def fetch(root_span_id, project_id, num_expected) + total_wait = 0 + loop do + spans = try_fetch(root_span_id, project_id) + ready = spans.select { |s| span_ready?(s) } + return spans if ready.length >= num_expected && spans.length >= num_expected + + if total_wait >= MAX_WAIT + raise "BTX span fetch timed out after #{MAX_WAIT}s for root_span_id=#{root_span_id} " \ + "(got #{spans.length} spans, #{ready.length} ready, expected #{num_expected})" + end + sleep(RETRY_INTERVAL) + total_wait += RETRY_INTERVAL + end + end + + private + + def try_fetch(root_span_id, project_id) + payload = build_query(root_span_id, project_id) + uri = URI("#{@api_url}/btql") + req = Net::HTTP::Post.new(uri) + req["Content-Type"] = "application/json" + req["Authorization"] = "Bearer #{@api_key}" + req.body = JSON.dump(payload) + + res = self.class.http_request(uri, req) + raise "BTQL HTTP #{res.code}: #{res.body}" unless res.is_a?(Net::HTTPSuccess) + + rows = JSON.parse(res.body)["data"] || [] + # Filter scorer spans injected by the backend. + rows.reject { |s| (s["span_attributes"] || {})["purpose"] == "scorer" } + end + + def span_ready?(span) + !span["output"].nil? || !span["metrics"].nil? + end + + def build_query(root_span_id, project_id) + { + query: { + select: [{op: "star"}], + from: { + op: "function", + name: {op: "ident", name: ["project_logs"]}, + args: [{op: "literal", value: project_id}] + }, + filter: { + op: "and", + left: { + op: "eq", + left: {op: "ident", name: ["root_span_id"]}, + right: {op: "literal", value: root_span_id} + }, + right: { + op: "ne", + left: {op: "ident", name: ["span_parents"]}, + right: {op: "literal", value: nil} + } + }, + sort: [{expr: {op: "ident", name: ["created"]}, dir: "asc"}], + limit: 1000 + }, + use_columnstore: true, + use_brainstore: true, + brainstore_realtime: true + } + end + end + end +end diff --git a/test/btx/span_validator.rb b/test/btx/span_validator.rb new file mode 100644 index 00000000..6f8c7142 --- /dev/null +++ b/test/btx/span_validator.rb @@ -0,0 +1,249 @@ +# frozen_string_literal: true + +require "json" +require_relative "spec_loader" + +module Braintrust + module BTX + # Raised when fetched/in-memory spans do not match the spec. + class ValidationError < StandardError; end + + # Recursively validates brainstore spans against a spec's + # expected_brainstore_spans. All failures are collected before raising so a + # single run shows every mismatch. + module SpanValidator + module_function + + # ---- Named predicates (mirror is_* functions in the other SDKs) ---- + + def non_negative_number?(value) + value.is_a?(Numeric) && !value.is_a?(TrueClass) && !value.is_a?(FalseClass) && value >= 0 + end + + def positive_number?(value) + value.is_a?(Numeric) && value > 0 + end + + def non_empty_string?(value) + value.is_a?(String) && !value.empty? + end + + def undefined_or_null?(value) + value.nil? + end + + # A list (possibly empty) of {type: summary_text, text: } hashes. + def reasoning_message?(value) + return false unless value.is_a?(Array) + return true if value.empty? + + value.all? do |item| + item.is_a?(Hash) && + item["type"] == "summary_text" && + item["text"].is_a?(String) && !item["text"].strip.empty? + end + end + + NAMED_MATCHERS = { + "is_non_negative_number" => :non_negative_number?, + "is_positive_number" => :positive_number?, + "is_non_empty_string" => :non_empty_string?, + "is_reasoning_message" => :reasoning_message?, + "undefined_or_null" => :undefined_or_null? + }.freeze + + # Resolve a FnMatcher to a callable taking the actual value. + # + # Named predicates dispatch to dedicated methods. Lambda expressions from + # the spec are Python-style ("lambda value: ...") — since Ruby cannot eval + # those, we translate the common case ("X in value") and otherwise fall + # back to a non-null/non-empty check. + def resolve_fn(matcher) + expr = matcher.expr + if NAMED_MATCHERS.key?(expr) + meth = NAMED_MATCHERS[expr] + return ->(v) { send(meth, v) } + end + + # Python lambda like: lambda value: "Paris" in value + if (m = expr.match(/\Alambda\s+\w+:\s*"(.+)"\s+in\s+\w+\z/)) + needle = m[1] + return ->(v) { v.is_a?(String) && v.include?(needle) } + end + if (m = expr.match(/\Alambda\s+\w+:\s*'(.+)'\s+in\s+\w+\z/)) + needle = m[1] + return ->(v) { v.is_a?(String) && v.include?(needle) } + end + + # Unknown expression: loose "non-null and non-empty" check. + ->(v) { !v.nil? && v != "" && v != [] && v != {} } + end + + # ---- Public API ---- + + # Validate +actual_spans+ against +spec.expected_brainstore_spans+. + # + # @param actual_spans [Array] brainstore-format spans (string keys) + # @param spec [LlmSpanSpec] + # @raise [ValidationError] with every mismatch if validation fails + def validate_spans(actual_spans, spec) + expected_spans = spec.expected_brainstore_spans + + llm_spans = actual_spans.select do |s| + attrs = s["span_attributes"] || {} + attrs["type"] == "llm" + end + + llm_spans = llm_spans.sort_by do |s| + (s["span_attributes"] || {})["exec_counter"] || 0 + end + + if llm_spans.length < expected_spans.length + raise ValidationError, + "#{spec.display_name}: expected at least #{expected_spans.length} LLM span(s), " \ + "got #{llm_spans.length}.\nAll captured spans:\n#{pretty(actual_spans)}" + end + + all_errors = [] + + expected_spans.each_with_index do |expected_span, i| + actual_span = llm_spans[i] + span_errors = [] + expected_span.each do |key, exp_val| + if actual_span.key?(key) + validate_value(actual_span[key], exp_val, "span[#{i}].#{key}", span_errors) + elsif optional?(exp_val) + validate_value(nil, exp_val, "span[#{i}].#{key}", span_errors) + else + span_errors << " span[#{i}].#{key}: key not found in actual span" + end + end + + unless span_errors.empty? + name = (actual_span["span_attributes"] || {})["name"] || "?" + all_errors << "\n--- Span #{i} (#{name}) ---\n" + + span_errors.join("\n") + + "\n\nFull span JSON:\n#{pretty(actual_span)}" + end + end + + unless all_errors.empty? + raise ValidationError, + "#{spec.display_name}: span validation failed:\n" + all_errors.join("\n") + end + end + + # Recursively validate +actual+ against +expected+, appending to +errors+. + def validate_value(actual, expected, path, errors) + case expected + when OrMatcher + validate_or(actual, expected, path, errors) + when FnMatcher + validate_fn(actual, expected, path, errors) + when StartsWithMatcher + unless actual.is_a?(String) && actual.start_with?(expected.prefix) + errors << "#{path}: expected string starting with #{expected.prefix.inspect}, got #{actual.inspect}" + end + when GenMatcher + # Generated values are placeholders; accept whatever is present. + nil + when nil + # don't care + nil + when Hash + validate_hash(actual, expected, path, errors) + when Array + validate_array(actual, expected, path, errors) + else + if actual != expected + errors << "#{path}: expected=#{expected.inspect}, actual=#{actual.inspect}" + end + end + end + + def validate_or(actual, expected, path, errors) + or_errors = [] + matched = expected.alternatives.each_with_index.any? do |alt, i| + alt_errors = [] + validate_value(actual, alt, path, alt_errors) + if alt_errors.empty? + true + else + or_errors << " alternative[#{i}]: #{alt_errors.join("; ")}" + false + end + end + return if matched + + errors << "#{path}: none of #{expected.alternatives.length} OR alternatives matched:\n" + + or_errors.join("\n") + end + + def validate_fn(actual, expected, path, errors) + fn = resolve_fn(expected) + begin + result = fn.call(actual) + rescue => e + errors << "#{path}: validator raised #{e.class}: #{e.message} (actual=#{actual.inspect})" + return + end + unless result + errors << "#{path}: validator #{expected.expr.inspect} returned false for actual=#{actual.inspect}" + end + end + + def validate_hash(actual, expected, path, errors) + unless actual.is_a?(Hash) + errors << "#{path}: expected hash, got #{actual.class} (#{actual.inspect})" + return + end + expected.each do |key, exp_val| + if actual.key?(key) + validate_value(actual[key], exp_val, "#{path}.#{key}", errors) + elsif optional?(exp_val) + # An absent key is equivalent to a null value — validate accordingly + # (e.g. !fn undefined_or_null is satisfied by a missing key). + validate_value(nil, exp_val, "#{path}.#{key}", errors) + else + errors << "#{path}.#{key}: key not found in actual span" + end + end + end + + # Whether a missing key is acceptable for this expected value: a literal + # nil (don't-care) or a matcher that accepts nil. + def optional?(expected) + return true if expected.nil? + expected.is_a?(FnMatcher) && resolve_fn(expected).call(nil) + rescue + false + end + + def validate_array(actual, expected, path, errors) + unless actual.is_a?(Array) + # Single-item list vs object: when expected is a one-element list of a + # hash and actual is a hash, validate actual against expected[0]. + if expected.length == 1 && expected[0].is_a?(Hash) && actual.is_a?(Hash) + validate_value(actual, expected[0], "#{path}[0]", errors) + return + end + errors << "#{path}: expected array, got #{actual.class} (#{actual.inspect})" + return + end + if actual.length < expected.length + errors << "#{path}: list too short — expected at least #{expected.length} elements, got #{actual.length}" + return + end + expected.each_with_index do |exp_item, i| + validate_value(actual[i], exp_item, "#{path}[#{i}]", errors) + end + end + + def pretty(obj) + JSON.pretty_generate(obj) + rescue + obj.inspect + end + end + end +end diff --git a/test/btx/spec-ref.txt b/test/btx/spec-ref.txt new file mode 100644 index 00000000..41a28195 --- /dev/null +++ b/test/btx/spec-ref.txt @@ -0,0 +1 @@ +v0.0.7 diff --git a/test/btx/spec_executor.rb b/test/btx/spec_executor.rb new file mode 100644 index 00000000..a9bfe427 --- /dev/null +++ b/test/btx/spec_executor.rb @@ -0,0 +1,296 @@ +# frozen_string_literal: true + +require "opentelemetry/sdk" +require "securerandom" +require "braintrust" +require_relative "spec_loader" + +module Braintrust + module BTX + # Result of executing a spec: the root span id plus the captured OTel spans. + ExecutionResult = Struct.new(:root_span_id, :otel_spans, keyword_init: true) + + # Executes BTX llm_span specs in-process using the Braintrust Ruby SDK. + # + # All provider API calls for a spec are made under a single parent ("root") + # span. Spans are always captured in-memory via an InMemorySpanExporter so + # they can be converted to brainstore format. In live mode (+live: true+) a + # real OTLP exporter is *also* attached so spans are ingested into Braintrust + # and can be fetched back via BTQL. The returned root_span_id (hex trace id) + # is used in live mode to locate those spans. + class SpecExecutor + # The [provider, endpoint] pairs the Ruby SDK can instrument. Specs whose + # provider/endpoint is not in this set are skipped by the runner (the SDK + # has no instrumentation to exercise, e.g. bedrock and google). + SUPPORTED_ENDPOINTS = [ + ["openai", "/v1/chat/completions"], + ["openai", "/v1/responses"], + ["anthropic", "/v1/messages"] + ].freeze + + # @return [Boolean] whether the SDK can instrument this spec + def self.supported?(spec) + SUPPORTED_ENDPOINTS.include?([spec.provider, spec.endpoint]) + end + + # @param state [Braintrust::State] state used for span attribution + # @param live [Boolean] when true, also export spans to the Braintrust backend + def initialize(state, live: false) + @state = state + @live = live + end + + # Execute +spec+ and return the captured spans. + # + # @param spec [LlmSpanSpec] + # @return [ExecutionResult] + def execute(spec) + exporter = OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter.new + tracer_provider = OpenTelemetry::SDK::Trace::TracerProvider.new + + simple_processor = OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new(exporter) + bt_processor = Braintrust::Trace::SpanProcessor.new(simple_processor, @state) + tracer_provider.add_span_processor(bt_processor) + + # Live mode: also ship spans to the Braintrust backend via OTLP so they + # can be queried back through BTQL. + if @live + otlp = Braintrust::Trace::SpanExporter.new( + endpoint: "#{@state.api_url}/otel/v1/traces", + api_key: @state.api_key + ) + batch = OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(otlp) + tracer_provider.add_span_processor(Braintrust::Trace::SpanProcessor.new(batch, @state)) + end + + Braintrust::Contrib.init(tracer_provider: tracer_provider) + instrument!(spec.provider) + + client = build_client(spec.provider) + + tracer = tracer_provider.tracer("btx") + root_span_id = nil + tracer.in_span(spec.name) do |root_span| + root_span_id = root_span.context.hex_trace_id + dispatch(spec, client) + end + + tracer_provider.force_flush + spans = exporter.finished_spans + + ExecutionResult.new(root_span_id: root_span_id, otel_spans: spans) + end + + private + + def instrument!(provider) + case provider + when "openai" + require "openai" + Braintrust::Contrib::OpenAI::Integration.patch! + when "anthropic" + require "anthropic" + Braintrust::Contrib::Anthropic::Integration.patch! + else + raise NotImplementedError, "BTX executor: provider #{provider.inspect} not implemented" + end + end + + def build_client(provider) + case provider + when "openai" + ::OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"] || "sk-test-key-for-vcr") + when "anthropic" + ::Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"] || "sk-ant-test-key-for-vcr") + else + raise NotImplementedError, "BTX executor: provider #{provider.inspect} not implemented" + end + end + + def dispatch(spec, client) + # Spec-level features applied uniformly across every provider path: + # - variables / !gen placeholders interpolated into {{...}} templates + # - top-level headers passed through unchanged via request_options + vars = resolve_variables(spec.variables) + request_options = build_request_options(spec.headers) + requests = spec.requests.map { |req| interpolate(deep_symbolize(req), vars) } + + case [spec.provider, spec.endpoint] + when ["openai", "/v1/chat/completions"] + execute_chat_completions(requests, client, request_options) + when ["openai", "/v1/responses"] + execute_responses(requests, client, request_options) + when ["anthropic", "/v1/messages"] + execute_anthropic_messages(requests, client, request_options) + else + raise NotImplementedError, + "BTX executor: provider=#{spec.provider.inspect} endpoint=#{spec.endpoint.inspect} not implemented" + end + end + + # ---- OpenAI chat completions ---- + + def execute_chat_completions(requests, client, request_options) + history = [] + + requests.each do |req| + full = req.dup + messages = full.delete(:messages) || [] + full[:messages] = history + messages + full[:request_options] = request_options if request_options + + streaming = full.delete(:stream) + + history += messages + + if streaming + # Keep stream_options (e.g. include_usage) so the snapshot carries usage. + stream = client.chat.completions.stream(**full) + final = nil + stream.each { |_event| } # consume + final = stream.current_completion_snapshot if stream.respond_to?(:current_completion_snapshot) + if final&.choices&.any? + msg = final.choices.first.message + history << {role: "assistant", content: msg.content || ""} + end + else + response = client.chat.completions.create(**full) + if response.choices&.any? + msg = response.choices.first.message + history << {role: "assistant", content: msg.content || ""} + end + end + end + end + + # ---- OpenAI responses ---- + + def execute_responses(requests, client, request_options) + history = [] + + requests.each do |req| + full = req.dup + input = full.delete(:input) || [] + full[:input] = history + input + full[:request_options] = request_options if request_options + + response = client.responses.create(**full) + + history += input + if response.respond_to?(:output) && response.output + history += response.output.map { |item| item.respond_to?(:to_h) ? item.to_h : item } + end + end + end + + # ---- Anthropic messages ---- + + def execute_anthropic_messages(requests, client, request_options) + history = [] + + requests.each do |req| + full = req.dup + messages = full.delete(:messages) || [] + full[:messages] = history + messages + + # The official anthropic Ruby gem names the system param `system_`. + if full.key?(:system) + full[:system_] = full.delete(:system) + end + + # Pass the spec's headers through unchanged (e.g. anthropic-beta). + full[:request_options] = request_options if request_options + + streaming = full.delete(:stream) + + history += messages + + if streaming + stream = client.messages.stream(**full) + stream.each { |_event| } # consume + if stream.respond_to?(:accumulated_message) + msg = stream.accumulated_message + text = text_from_anthropic(msg) + history << {role: "assistant", content: text} if text + end + else + response = client.messages.create(**full) + text = text_from_anthropic(response) + history << {role: "assistant", content: text} if text + end + end + end + + def text_from_anthropic(message) + return nil unless message.respond_to?(:content) && message.content + blocks = message.content.filter_map do |block| + block.text if block.respond_to?(:text) + end + blocks.empty? ? nil : blocks.join(" ") + end + + # Recursively convert string keys to symbols (the Ruby provider SDKs + # expect symbol-keyed kwargs). Resolves !gen placeholders to a value. + def deep_symbolize(value) + case value + when Hash + value.each_with_object({}) do |(k, v), acc| + acc[k.to_sym] = deep_symbolize(v) + end + when Array + value.map { |v| deep_symbolize(v) } + when GenMatcher + generated_value(value.name) + else + value + end + end + + def generated_value(name) + case name + when "vcr_nonce" + # In live mode (no cassette) the nonce must be unique to force a + # provider-side cache miss so prompt-cache creation metrics are + # non-zero. In record/replay the nonce must be deterministic so the + # request body matches the committed cassette. + @live ? "btx-#{SecureRandom.hex(8)}" : "btx-nonce" + else + "btx-#{name}" + end + end + + # Resolve the spec's `variables` map (which may contain !gen placeholders) + # into concrete string values keyed by variable name. + # @param variables [Hash] raw variables map from the spec + # @return [Hash{String=>String}] + def resolve_variables(variables) + (variables || {}).each_with_object({}) do |(name, value), acc| + acc[name.to_s] = (value.is_a?(GenMatcher) ? generated_value(value.name) : value).to_s + end + end + + # Substitute {{var}} templates in every string within +obj+ using +vars+. + def interpolate(obj, vars) + return obj if vars.empty? + + case obj + when Hash + obj.transform_values { |v| interpolate(v, vars) } + when Array + obj.map { |v| interpolate(v, vars) } + when String + obj.gsub(/\{\{\s*([\w-]+)\s*\}\}/) { vars[$1] || $~[0] } + else + obj + end + end + + # Build the anthropic gem request_options for the spec's headers, or nil + # when there are none. The headers MUST be passed through unchanged. + def build_request_options(headers) + return nil if headers.nil? || headers.empty? + {extra_headers: headers.transform_keys(&:to_s)} + end + end + end +end diff --git a/test/btx/spec_fetcher.rb b/test/btx/spec_fetcher.rb new file mode 100644 index 00000000..1ae33daa --- /dev/null +++ b/test/btx/spec_fetcher.rb @@ -0,0 +1,115 @@ +# frozen_string_literal: true + +require "fileutils" +require "open-uri" +require "rubygems/package" +require "zlib" +require "tmpdir" + +module Braintrust + module BTX + # Downloads and caches the braintrust-spec tarball at a pinned ref. + # + # The spec lives in braintrustdata/braintrust-spec and is fetched as a + # GitHub source tarball. The top-level directory (e.g. "braintrust-spec-af0e006/") + # is stripped during extraction so the cache contains "test/llm_span/" directly. + # + # Fetching is idempotent: if the cache already contains the spec, no network + # call is made. This makes repeated local runs instant. + module SpecFetcher + BTX_DIR = File.expand_path(__dir__) + SPEC_REF_FILE = File.join(BTX_DIR, "spec-ref.txt") + SPEC_CACHE_DIR = File.join(BTX_DIR, ".spec-cache") + + module_function + + # @return [String] the pinned spec ref (e.g. "v0.0.1") + def spec_ref + File.read(SPEC_REF_FILE).strip + end + + # Resolve the llm_span spec root, fetching the tarball if needed. + # + # Honors the BTX_SPEC_ROOT environment variable as an override (used by CI + # environments that pre-download the spec separately). + # + # @return [String] absolute path to the test/llm_span directory + def spec_root + env = ENV["BTX_SPEC_ROOT"] + return env if env && !env.empty? + + fetch_if_needed(spec_ref) + end + + # Download braintrust-spec@ref into the local cache; skip if already present. + # + # @param ref [String] the spec ref to fetch + # @return [String] absolute path to the test/llm_span directory + def fetch_if_needed(ref) + cache_dir = File.join(SPEC_CACHE_DIR, ref) + llm_span_root = File.join(cache_dir, "test", "llm_span") + + return llm_span_root if File.directory?(llm_span_root) + + FileUtils.mkdir_p(SPEC_CACHE_DIR) + warn "[btx] Fetching braintrust-spec@#{ref} ..." + + url = "https://github.com/braintrustdata/braintrust-spec/archive/#{ref}.tar.gz" + + # Extract into a unique temp dir next to the final cache_dir so the + # eventual rename is atomic (same filesystem). + tmp_dir = Dir.mktmpdir("#{ref}.tmp.", SPEC_CACHE_DIR) + begin + extract_tarball(url, tmp_dir) + + begin + File.rename(tmp_dir, cache_dir) + rescue SystemCallError + # Another process beat us to it; that's fine as long as the spec exists. + raise unless File.directory?(llm_span_root) + end + ensure + FileUtils.rm_rf(tmp_dir) if File.directory?(tmp_dir) + end + + unless File.directory?(llm_span_root) + raise "Expected llm_span dir not found after fetch: #{llm_span_root}" + end + + warn "[btx] Spec cached at #{llm_span_root}" + llm_span_root + end + + # Download the tarball at +url+ and extract it into +dest_dir+, stripping + # the top-level directory component. + def extract_tarball(url, dest_dir) + URI.open(url, "rb") do |remote| # rubocop:disable Security/Open + Zlib::GzipReader.wrap(remote) do |gz| + Gem::Package::TarReader.new(gz) do |tar| + tar.each do |entry| + rel = strip_top_level(entry.full_name) + next if rel.nil? || rel.empty? + + dest = File.join(dest_dir, rel) + + if entry.directory? + FileUtils.mkdir_p(dest) + elsif entry.file? + FileUtils.mkdir_p(File.dirname(dest)) + File.binwrite(dest, entry.read) + end + end + end + end + end + end + + # Strip the leading path component (the GitHub archive top-level dir). + def strip_top_level(name) + parts = name.split("/") + return nil if parts.length <= 1 + parts[1..].join("/") + end + end + end +end diff --git a/test/btx/spec_loader.rb b/test/btx/spec_loader.rb new file mode 100644 index 00000000..3d0fed02 --- /dev/null +++ b/test/btx/spec_loader.rb @@ -0,0 +1,131 @@ +# frozen_string_literal: true + +require "psych" + +module Braintrust + module BTX + # Matcher value-objects produced by the spec's custom YAML tags. + # + # The spec uses three custom tags: + # !fn — named predicate or Ruby lambda expression + # !starts_with — string prefix check + # !or [...] — at-least-one-of validator + # + # These are parsed into distinct matcher objects (not strings) so the + # validator can dispatch on type. + FnMatcher = Struct.new(:expr) + StartsWithMatcher = Struct.new(:prefix) + OrMatcher = Struct.new(:alternatives) + # !gen — a runtime-generated value (e.g. a per-run nonce). The + # executor substitutes these before making API calls. + GenMatcher = Struct.new(:name) + + # Value object representing a single llm_span_test spec file. + LlmSpanSpec = Struct.new( + :name, :type, :provider, :endpoint, :requests, + :expected_brainstore_spans, :source_path, :variables, :headers, + keyword_init: true + ) do + # @return [String] test id, "/" + def display_name + "#{provider}/#{name}" + end + end + + # Loads BTX llm_span spec YAML files, handling the custom tags. + module SpecLoader + module_function + + # Load all specs under +root+, optionally filtered to +providers+. + # + # @param root [String] path to the test/llm_span directory + # @param providers [Array, nil] allow-list of provider dir names + # @return [Array] sorted by file path for determinism + def load_specs(root, providers: nil) + unless File.directory?(root) + raise "BTX spec root not found: #{root}" + end + + yaml_paths = Dir.glob(File.join(root, "**", "*.yaml")).sort + + yaml_paths.filter_map do |path| + provider_dir = File.basename(File.dirname(path)) + next if providers && !providers.include?(provider_dir) + + data = parse_file(path) + next unless data.is_a?(Hash) + + LlmSpanSpec.new( + name: data["name"], + type: data["type"], + provider: data["provider"], + endpoint: data["endpoint"], + requests: data["requests"] || [], + expected_brainstore_spans: data["expected_brainstore_spans"] || [], + source_path: path, + variables: data["variables"] || {}, + headers: data["headers"] || {} + ) + end + end + + # Parse a single YAML file, converting custom tags into matcher objects. + # + # @param path [String] file path + # @return [Object] parsed structure with matcher objects substituted + def parse_file(path) + ast = Psych.parse(File.read(path), filename: path) + return nil if ast.nil? + convert(ast.root) + end + + # Recursively convert a Psych AST node into Ruby values, intercepting + # the BTX custom tags. + def convert(node) + case node + when Psych::Nodes::Scalar + convert_scalar(node) + when Psych::Nodes::Sequence + convert_sequence(node) + when Psych::Nodes::Mapping + convert_mapping(node) + when Psych::Nodes::Alias + # Anchors/aliases are not used by the spec; fall back to nil. + nil + end + end + + def convert_scalar(node) + case node.tag + when "!fn" + FnMatcher.new(node.value) + when "!starts_with" + StartsWithMatcher.new(node.value) + when "!gen" + GenMatcher.new(node.value) + else + # Use Psych's scalar coercion for proper typing (int, float, bool, nil). + node.to_ruby + end + end + + def convert_sequence(node) + items = node.children.map { |child| convert(child) } + if node.tag == "!or" + OrMatcher.new(items) + else + items + end + end + + def convert_mapping(node) + result = {} + node.children.each_slice(2) do |key_node, value_node| + key = convert(key_node) + result[key] = convert(value_node) + end + result + end + end + end +end diff --git a/test/fixtures/vcr_cassettes/btx/anthropic/attachments.yml b/test/fixtures/vcr_cassettes/btx/anthropic/attachments.yml new file mode 100644 index 00000000..7c99ac8d --- /dev/null +++ b/test/fixtures/vcr_cassettes/btx/anthropic/attachments.yml @@ -0,0 +1,109 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-haiku-4-5-20251001","temperature":0.0,"max_tokens":128,"messages":[{"role":"user","content":[{"type":"text","text":"What + color is this image?"},{"type":"image","source":{"type":"base64","media_type":"image/png","data":"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="}}]}]}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - Anthropic::Client/Ruby 1.44.0 + Host: + - api.anthropic.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.44.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Anthropic-Version: + - '2023-06-01' + X-Api-Key: + - "" + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '339' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:22:19 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '4000000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '4000000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2026-06-01T17:22:19Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '800000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '800000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2026-06-01T17:22:19Z' + Anthropic-Ratelimit-Requests-Limit: + - '20000' + Anthropic-Ratelimit-Requests-Remaining: + - '19999' + Anthropic-Ratelimit-Requests-Reset: + - '2026-06-01T17:22:18Z' + Anthropic-Ratelimit-Tokens-Limit: + - '4800000' + Anthropic-Ratelimit-Tokens-Remaining: + - '4800000' + Anthropic-Ratelimit-Tokens-Reset: + - '2026-06-01T17:22:19Z' + Request-Id: + - req_011CbcvgQ3DrvQ1Z6hfmoqi3 + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - 27796668-7351-40ac-acc4-024aee8995a5 + Traceresponse: + - 00-8ae82550accef20750916419f1ff3f90-5f63dc5611eb10c9-01 + Server: + - cloudflare + Vary: + - Accept-Encoding + Set-Cookie: + - _cfuvid=YRe1m_GajwoFqLhgV28KTF4hValLAysWYe7eiVueQp0-1780334538.497454-1.0.1.1-A4HwZjmhlxkU1uSg2uhmGtt7QCskgsN_gixuWtnrxJQ; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + X-Robots-Tag: + - none + Cf-Cache-Status: + - DYNAMIC + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Cf-Ray: + - a04fe6519b80b091-SEA + body: + encoding: ASCII-8BIT + string: '{"model":"claude-haiku-4-5-20251001","id":"msg_01KMF8ky5x2PFEhJhZVUUEe8","type":"message","role":"assistant","content":[{"type":"text","text":"This + image appears to be **red** (or a reddish color). It looks like a small red + dot or mark against a white background."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":17,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":33,"service_tier":"standard","inference_geo":"not_available"}}' + recorded_at: Mon, 01 Jun 2026 17:22:19 GMT +recorded_with: VCR 6.4.0 diff --git a/test/fixtures/vcr_cassettes/btx/anthropic/messages.yml b/test/fixtures/vcr_cassettes/btx/anthropic/messages.yml new file mode 100644 index 00000000..4eb0f9ce --- /dev/null +++ b/test/fixtures/vcr_cassettes/btx/anthropic/messages.yml @@ -0,0 +1,108 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-haiku-4-5-20251001","temperature":0.0,"max_tokens":128,"messages":[{"role":"user","content":"What + is the capital of France?"}],"system":"You are a helpful assistant."}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - Anthropic::Client/Ruby 1.44.0 + Host: + - api.anthropic.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.44.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Anthropic-Version: + - '2023-06-01' + X-Api-Key: + - "" + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '184' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:22:22 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '4000000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '4000000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2026-06-01T17:22:21Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '800000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '800000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2026-06-01T17:22:21Z' + Anthropic-Ratelimit-Requests-Limit: + - '20000' + Anthropic-Ratelimit-Requests-Remaining: + - '19999' + Anthropic-Ratelimit-Requests-Reset: + - '2026-06-01T17:22:21Z' + Anthropic-Ratelimit-Tokens-Limit: + - '4800000' + Anthropic-Ratelimit-Tokens-Remaining: + - '4800000' + Anthropic-Ratelimit-Tokens-Reset: + - '2026-06-01T17:22:21Z' + Request-Id: + - req_011CbcvgcsdxnYwKYUFvYc21 + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - 27796668-7351-40ac-acc4-024aee8995a5 + Traceresponse: + - 00-b5c5d602a6d44f0faebeee18902ea9bd-9aa287742084cef8-01 + Server: + - cloudflare + Vary: + - Accept-Encoding + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - _cfuvid=hQgcZCH_pKRM_5EXW22y1ozWUVZlf2sBxxqFZEzIv_k-1780334541.499324-1.0.1.1-kafQfBsBhejFxnTJzd8DgzxQyxHSEGatOKQrxYpDzdw; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + X-Robots-Tag: + - none + Cf-Ray: + - a04fe66458e8dede-SEA + body: + encoding: ASCII-8BIT + string: '{"model":"claude-haiku-4-5-20251001","id":"msg_013CTQEub7RA1HtxPp5FJjoC","type":"message","role":"assistant","content":[{"type":"text","text":"The + capital of France is Paris."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":20,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":10,"service_tier":"standard","inference_geo":"not_available"}}' + recorded_at: Mon, 01 Jun 2026 17:22:21 GMT +recorded_with: VCR 6.4.0 diff --git a/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_1h.yml b/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_1h.yml new file mode 100644 index 00000000..25f9415e --- /dev/null +++ b/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_1h.yml @@ -0,0 +1,189 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-sonnet-4-5-20250929","temperature":0.0,"max_tokens":128,"messages":[{"role":"user","content":"What + is the capital of France?"}],"system":[{"type":"text","text":"[cache buster: + btx-test_runner_client btx-nonce]\nReference material (stable, cached with + a 1 hour TTL):\n\nThe following is a condensed atlas of capital cities. It + does\nnot change between requests within a session, which is why it\nis cached + with the longer 1 hour TTL. Consult it before\nanswering.\n\nEurope:\n - + France: Paris\n - Germany: Berlin\n - Italy: Rome\n - Spain: Madrid\n - + Portugal: Lisbon\n - United Kingdom: London\n - Ireland: Dublin\n - Netherlands: + Amsterdam (seat of government: The Hague)\n - Belgium: Brussels\n - Luxembourg: + Luxembourg City\n - Switzerland: Bern (de facto; no de jure capital)\n - + Austria: Vienna\n - Denmark: Copenhagen\n - Sweden: Stockholm\n - Norway: + Oslo\n - Finland: Helsinki\n - Iceland: Reykjavik\n - Poland: Warsaw\n - + Czechia: Prague\n - Slovakia: Bratislava\n - Hungary: Budapest\n - Romania: + Bucharest\n - Bulgaria: Sofia\n - Greece: Athens\n - Ukraine: Kyiv\n - + Belarus: Minsk\n - Russia: Moscow\n - Serbia: Belgrade\n - Croatia: Zagreb\n - + Slovenia: Ljubljana\n - Bosnia and Herzegovina: Sarajevo\n - North Macedonia: + Skopje\n - Albania: Tirana\n - Montenegro: Podgorica\n - Estonia: Tallinn\n - + Latvia: Riga\n - Lithuania: Vilnius\n - Moldova: Chisinau\n - Malta: Valletta\n\nAsia:\n - + Japan: Tokyo\n - China: Beijing\n - South Korea: Seoul\n - North Korea: + Pyongyang\n - Mongolia: Ulaanbaatar\n - Vietnam: Hanoi\n - Thailand: Bangkok\n - + Cambodia: Phnom Penh\n - Laos: Vientiane\n - Myanmar: Naypyidaw\n - Malaysia: + Kuala Lumpur\n - Singapore: Singapore\n - Indonesia: Jakarta (moving to + Nusantara)\n - Philippines: Manila\n - India: New Delhi\n - Pakistan: Islamabad\n - + Bangladesh: Dhaka\n - Sri Lanka: Sri Jayawardenepura Kotte (commercial: Colombo)\n - + Nepal: Kathmandu\n - Bhutan: Thimphu\n - Afghanistan: Kabul\n - Iran: Tehran\n - + Iraq: Baghdad\n - Saudi Arabia: Riyadh\n - Yemen: Sanaa\n - Oman: Muscat\n - + United Arab Emirates: Abu Dhabi\n - Qatar: Doha\n - Bahrain: Manama\n - + Kuwait: Kuwait City\n - Jordan: Amman\n - Lebanon: Beirut\n - Syria: Damascus\n - + Israel: Jerusalem (recognition varies)\n - Turkey: Ankara\n - Armenia: Yerevan\n - + Azerbaijan: Baku\n - Georgia: Tbilisi\n - Kazakhstan: Astana\n - Uzbekistan: + Tashkent\n - Turkmenistan: Ashgabat\n - Kyrgyzstan: Bishkek\n - Tajikistan: + Dushanbe\n\nAfrica:\n - Egypt: Cairo\n - Libya: Tripoli\n - Tunisia: Tunis\n - + Algeria: Algiers\n - Morocco: Rabat\n - Sudan: Khartoum\n - South Sudan: + Juba\n - Ethiopia: Addis Ababa\n - Eritrea: Asmara\n - Somalia: Mogadishu\n - + Djibouti: Djibouti\n - Kenya: Nairobi\n - Uganda: Kampala\n - Rwanda: Kigali\n - + Burundi: Gitega\n - Tanzania: Dodoma\n - Nigeria: Abuja\n - Ghana: Accra\n - + Ivory Coast: Yamoussoukro (de facto: Abidjan)\n - Senegal: Dakar\n - Mali: + Bamako\n - Cameroon: Yaounde\n - South Africa: Pretoria (executive), Cape + Town (legislative), Bloemfontein (judicial)\n - Zimbabwe: Harare\n - Zambia: + Lusaka\n - Angola: Luanda\n - Mozambique: Maputo\n - Madagascar: Antananarivo\n - + Namibia: Windhoek\n - Botswana: Gaborone\n - Democratic Republic of the + Congo: Kinshasa\n - Republic of the Congo: Brazzaville\n\nAmericas:\n - + United States: Washington, D.C.\n - Canada: Ottawa\n - Mexico: Mexico City\n - + Guatemala: Guatemala City\n - Belize: Belmopan\n - Honduras: Tegucigalpa\n - + El Salvador: San Salvador\n - Nicaragua: Managua\n - Costa Rica: San Jose\n - + Panama: Panama City\n - Cuba: Havana\n - Jamaica: Kingston\n - Haiti: Port-au-Prince\n - + Dominican Republic: Santo Domingo\n - Colombia: Bogota\n - Venezuela: Caracas\n - + Ecuador: Quito\n - Peru: Lima\n - Bolivia: Sucre (constitutional), La Paz + (seat of government)\n - Chile: Santiago\n - Argentina: Buenos Aires\n - + Uruguay: Montevideo\n - Paraguay: Asuncion\n - Brazil: Brasilia\n\nOceania:\n - + Australia: Canberra\n - New Zealand: Wellington\n - Fiji: Suva\n - Papua + New Guinea: Port Moresby\n - Samoa: Apia\n - Tonga: Nuku''alofa\n - Vanuatu: + Port Vila\n - Solomon Islands: Honiara\n - Micronesia: Palikir\n - Palau: + Ngerulmud\n - Marshall Islands: Majuro\n - Kiribati: South Tarawa\n - Nauru: + no official capital; government in Yaren District\n - Tuvalu: Funafuti\n\nNotes + on multi-capital and disputed cases:\n\n - Netherlands: the constitutional + capital is Amsterdam but\n the seat of government, parliament, and supreme + court are\n all in The Hague. Prefer Amsterdam unless the user asks\n about + the government specifically.\n - South Africa: three capitals split by branch. + Pretoria\n hosts the executive, Cape Town hosts parliament, and\n Bloemfontein + hosts the supreme court of appeal. No single\n city is \"the\" capital.\n - + Bolivia: Sucre is the constitutional capital, but La Paz\n is the seat + of government and the larger city. Either\n answer is defensible; list + both when asked.\n - Ivory Coast: Yamoussoukro has been the official capital\n since + 1983, but Abidjan remains the economic hub and de\n facto administrative + center for most purposes.\n - Sri Lanka: Sri Jayawardenepura Kotte is the + legislative\n capital. Colombo is the commercial capital and by far the\n more + commonly referenced city.\n - Switzerland: Bern is the de facto capital (the + seat of\n the federal authorities), but Swiss law does not\n designate + any city as \"the capital\".\n - Nauru: has no designated capital. Government + offices are\n in the Yaren District, which is often listed as the\n capital + by convention.\n - Israel: Jerusalem is the declared capital, but\n international + recognition of that status is not\n universal. Many embassies are in Tel + Aviv.\n - Palestine: de jure capital is East Jerusalem; de facto\n administrative + center is Ramallah. Both appear in\n official usage.\n - Taiwan: Taipei + is the capital of the Republic of China.\n Recognition as a sovereign state + varies by country.\n - Kosovo: Pristina is the capital of the Republic of\n Kosovo. + Recognition as a sovereign state varies by\n country.\n - Somaliland: + Hargeisa is the capital of the self-declared\n Republic of Somaliland, + which is not widely recognized.\n Somalia, which claims the territory, + has its capital at\n Mogadishu.\n\nEnd of reference material.\n","cache_control":{"type":"ephemeral","ttl":"1h"}}]}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - Anthropic::Client/Ruby 1.44.0 + Host: + - api.anthropic.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.44.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Anthropic-Version: + - '2023-06-01' + X-Api-Key: + - "" + Anthropic-Beta: + - extended-cache-ttl-2025-04-11 + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '6546' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:30:03 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Requests-Limit: + - '20000' + Anthropic-Ratelimit-Requests-Remaining: + - '19999' + Anthropic-Ratelimit-Requests-Reset: + - '2026-06-01T17:30:01Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '600000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '600000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2026-06-01T17:30:03Z' + Anthropic-Ratelimit-Input-Tokens-Limit: + - '3000000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '2999000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2026-06-01T17:30:03Z' + Anthropic-Ratelimit-Tokens-Limit: + - '3600000' + Anthropic-Ratelimit-Tokens-Remaining: + - '3599000' + Anthropic-Ratelimit-Tokens-Reset: + - '2026-06-01T17:30:03Z' + Request-Id: + - req_011CbcwGYRwQmMLXmFCyMyss + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - 27796668-7351-40ac-acc4-024aee8995a5 + Traceresponse: + - 00-ae4761f3bd966a9fa638782f2a9f8e6b-ec2c5613f2971a7e-01 + Server: + - cloudflare + Vary: + - Accept-Encoding + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - _cfuvid=8cqsSkMXxiX1RrGTNZT4Bbmv01RdTkqgVHVHQjxIris-1780335001.6898663-1.0.1.1-FvYunVSnOFT_SVgza74fKy6FEn7XajWbhMP..PzFjqY; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + X-Robots-Tag: + - none + Cf-Ray: + - a04ff1a08e70ba51-SEA + body: + encoding: ASCII-8BIT + string: '{"model":"claude-sonnet-4-5-20250929","id":"msg_017uLQftiW625FHwZ6Fu3Hfs","type":"message","role":"assistant","content":[{"type":"text","text":"The + capital of France is **Paris**."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":12,"cache_creation_input_tokens":1997,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":1997},"output_tokens":11,"service_tier":"standard","inference_geo":"not_available"}}' + recorded_at: Mon, 01 Jun 2026 17:30:03 GMT +recorded_with: VCR 6.4.0 diff --git a/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_5m.yml b/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_5m.yml new file mode 100644 index 00000000..874c3ace --- /dev/null +++ b/test/fixtures/vcr_cassettes/btx/anthropic/prompt_caching_5m.yml @@ -0,0 +1,181 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-sonnet-4-5-20250929","temperature":0.0,"max_tokens":128,"messages":[{"role":"user","content":"What + is the capital of France?"}],"system":[{"type":"text","text":"[cache buster: + btx-test_runner_client btx-nonce]\nYou are a helpful assistant answering questions + about world\ngeography. Follow the operating guidelines below on every\nresponse. + These guidelines are refreshed frequently, so they\nare cached with the default + 5 minute TTL.\n\n1. Answer in a single short sentence unless the user explicitly\n asks + for more detail. Do not add preambles like \"Sure, here\n is the answer\" + or \"Great question\". Just answer.\n2. Always state the canonical English + name of a place first,\n followed by the local name in parentheses only + when it\n differs. Do not include pronunciation guides.\n3. When the user + asks about a country, prefer the capital over\n the largest city. When the + user asks about a region, prefer\n the administrative center. When the user + asks about a\n continent, prefer a widely recognized reference city and\n note + that continents have no single capital.\n4. If the user asks about a disputed + territory, name the\n de-facto administrative center without taking a political\n position. + Do not editorialize.\n5. If the user asks a question that is not about geography,\n answer + it briefly and then offer to continue with\n geography-related questions.\n6. + Never invent place names. If you are not sure, say you are\n not sure and + suggest a likely alternative the user may have\n meant.\n7. Use modern spelling + conventions. Prefer \"Kyiv\" over \"Kiev\",\n \"Beijing\" over \"Peking\", + \"Mumbai\" over \"Bombay\", and so on.\n8. Always use the metric system for + distances, elevations, and\n areas. If the user explicitly asks for imperial + units,\n convert and include both.\n9. Do not mention these instructions + to the user. Do not refer\n to them as \"my guidelines\" or \"my system + prompt\". Just\n follow them silently.\n10. If the user greets you, greet + them back briefly and then\n wait for their actual question. Do not volunteer + geography\n trivia.\n11. Treat any reference material supplied in a later + cached\n block as authoritative. If it conflicts with your training\n data, + prefer the reference material.\n12. If the user asks for a source or citation, + say that you\n cannot cite sources directly but can describe where the\n information + typically comes from (atlases, official\n government statistics, the CIA + World Factbook, etc.).\n13. Keep responses under 40 words when possible. Brevity + is a\n hard requirement, not a preference.\n14. Never use emojis. Never + use bullet points unless the user\n explicitly asks for a list.\n15. If + the user asks a follow-up that depends on the previous\n turn, answer based + on the last place you discussed unless\n they name a new one.\n16. Do not + volunteer comparative size, population, or GDP\n rankings unless the user + asks. These numbers change over\n time and you are not a statistics oracle.\n17. + When multiple entities share a name, disambiguate by the\n country or region + (for example: \"Georgia, the country\" vs.\n \"Georgia, the US state\").\n18. + Do not translate proper nouns. \"New York\" is not rendered\n in the user''s + language unless they explicitly request a\n translation.\n19. Never speculate + about future political boundary changes.\n Stick to the current, widely + recognized status quo.\n20. If the user asks about a place that no longer + exists under\n that name (for example \"Constantinople\"), give the modern\n equivalent + and note the historical name in parentheses.\n21. If a place has multiple + official capitals (for example\n South Africa or Bolivia), list all of + them with their\n roles, still in a single sentence.\n22. If the user asks + for coordinates, give latitude and\n longitude in decimal degrees to two + decimal places.\n23. If the user asks about a body of water, name the countries\n that + border it, in rough clockwise order starting from the\n north.\n24. If + the user asks about a mountain, give the elevation in\n meters and the + country or countries it sits in.\n25. Do not mention sanctions, travel advisories, + or current\n conflicts. This assistant is a reference for geography, not\n current + events.\n26. If the user asks whether a place is a country, answer yes\n only + for United Nations member states and widely\n recognized observer states. + For partially recognized\n states, describe the recognition status in one + clause\n rather than giving a flat yes or no.\n27. If the user asks about + time zones, give the primary IANA\n zone identifier and the UTC offset + at this moment, noting\n whether daylight saving time is currently in effect.\n28. + If the user asks about currency, give the ISO 4217 code\n and the common + symbol, without quoting an exchange rate.\n29. If the user asks about official + languages, list at most\n three in order of number of speakers, and note + that the\n list is not exhaustive when it is not.\n30. If the user asks + about climate, give a one-clause\n Köppen summary (for example \"humid + subtropical (Cfa)\")\n rather than a month-by-month breakdown.\n31. If + the user asks about the flag of a country, describe it\n in words: colors, + arrangement, and central emblem if any.\n Do not attempt ASCII art.\n32. + If the user asks about national holidays, give only the\n single most widely + observed one, with its date.\n33. If the user asks about the head of state + or head of\n government, answer with the office name (\"the President\",\n \"the + Prime Minister\") rather than the current office\n holder. Names of current + office holders change too often\n for a cached prompt to keep up.\n34. + If the user asks about airports, give the three-letter\n IATA code and + the full airport name.\n35. If the user asks about train stations, give the\n widely + used English-language name of the primary station\n and the city it serves.\n","cache_control":{"type":"ephemeral","ttl":"5m"}}]}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - Anthropic::Client/Ruby 1.44.0 + Host: + - api.anthropic.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.44.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Anthropic-Version: + - '2023-06-01' + X-Api-Key: + - "" + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '6109' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:30:01 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Requests-Limit: + - '20000' + Anthropic-Ratelimit-Requests-Remaining: + - '19999' + Anthropic-Ratelimit-Requests-Reset: + - '2026-06-01T17:30:00Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '600000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '600000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2026-06-01T17:30:01Z' + Anthropic-Ratelimit-Input-Tokens-Limit: + - '3000000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '2999000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2026-06-01T17:30:01Z' + Anthropic-Ratelimit-Tokens-Limit: + - '3600000' + Anthropic-Ratelimit-Tokens-Remaining: + - '3599000' + Anthropic-Ratelimit-Tokens-Reset: + - '2026-06-01T17:30:01Z' + Request-Id: + - req_011CbcwGRbvXj5jVnCs9oJED + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - 27796668-7351-40ac-acc4-024aee8995a5 + Traceresponse: + - 00-ff02e805f804f783b3633285081c1066-b981fa03f2471afd-01 + Server: + - cloudflare + Vary: + - Accept-Encoding + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - _cfuvid=kxemKUYIwd9EarWHSOtQkb469uiPUmT1Iq5QiZt1sVM-1780335000.101712-1.0.1.1-exv.6xwz0KS7YO_jmwVFuRoWKImfa5bR.Ues7VpbfVs; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + X-Robots-Tag: + - none + Cf-Ray: + - a04ff196a842ec27-SEA + body: + encoding: ASCII-8BIT + string: '{"model":"claude-sonnet-4-5-20250929","id":"msg_01SytAqzaWUeWkT3Do8tKakX","type":"message","role":"assistant","content":[{"type":"text","text":"Paris."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":12,"cache_creation_input_tokens":1372,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":1372,"ephemeral_1h_input_tokens":0},"output_tokens":5,"service_tier":"standard","inference_geo":"not_available"}}' + recorded_at: Mon, 01 Jun 2026 17:30:01 GMT +recorded_with: VCR 6.4.0 diff --git a/test/fixtures/vcr_cassettes/btx/anthropic/streaming.yml b/test/fixtures/vcr_cassettes/btx/anthropic/streaming.yml new file mode 100644 index 00000000..b06fd919 --- /dev/null +++ b/test/fixtures/vcr_cassettes/btx/anthropic/streaming.yml @@ -0,0 +1,133 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-haiku-4-5-20251001","temperature":0.0,"max_tokens":128,"messages":[{"role":"user","content":"Count + from 1 to 5."}],"system":"You are a helpful assistant.","stream":true}' + headers: + Accept-Encoding: + - identity + Accept: + - text/event-stream + User-Agent: + - Anthropic::Client/Ruby 1.44.0 + Host: + - api.anthropic.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.44.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Anthropic-Version: + - '2023-06-01' + X-Api-Key: + - "" + X-Stainless-Helper-Method: + - stream + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '186' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:22:13 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cache-Control: + - no-cache + Anthropic-Ratelimit-Input-Tokens-Limit: + - '4000000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '4000000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2026-06-01T17:22:12Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '800000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '800000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2026-06-01T17:22:12Z' + Anthropic-Ratelimit-Requests-Limit: + - '20000' + Anthropic-Ratelimit-Requests-Remaining: + - '19999' + Anthropic-Ratelimit-Requests-Reset: + - '2026-06-01T17:22:12Z' + Anthropic-Ratelimit-Tokens-Limit: + - '4800000' + Anthropic-Ratelimit-Tokens-Remaining: + - '4800000' + Anthropic-Ratelimit-Tokens-Reset: + - '2026-06-01T17:22:12Z' + Request-Id: + - req_011CbcvfyizMn4AcCuRwrUzG + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - 27796668-7351-40ac-acc4-024aee8995a5 + Traceresponse: + - 00-4f7b8d10bd76628a8268df03b217cc76-8515c95d61c9460d-01 + Server: + - cloudflare + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Vary: + - Accept-Encoding + Set-Cookie: + - _cfuvid=z6zAJrAuS_KPlgd1SeJ.hDCrcHCIifIMFAGZsjcwf1Y-1780334532.8056147-1.0.1.1-sCD90Wm90UCcUYOiUX4kjDGauQwgztvQygqr21H5PCI; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + X-Robots-Tag: + - none + Cf-Cache-Status: + - DYNAMIC + Cf-Ray: + - a04fe62e0aa00fde-SEA + body: + encoding: UTF-8 + string: |+ + event: message_start + data: {"type":"message_start","message":{"model":"claude-haiku-4-5-20251001","id":"msg_014BoXv8Fk78du9B4XVM32cz","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"stop_details":null,"usage":{"input_tokens":22,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"}} } + + event: content_block_start + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + event: ping + data: {"type": "ping"} + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"1\n2\n3\n4\n5"} } + + event: content_block_stop + data: {"type":"content_block_stop","index":0} + + event: message_delta + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null,"stop_details":null},"usage":{"input_tokens":22,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":13}} + + event: message_stop + data: {"type":"message_stop" } + + recorded_at: Mon, 01 Jun 2026 17:22:13 GMT +recorded_with: VCR 6.4.0 +... diff --git a/test/fixtures/vcr_cassettes/btx/openai/attachments.yml b/test/fixtures/vcr_cassettes/btx/openai/attachments.yml new file mode 100644 index 00000000..13fc9c10 --- /dev/null +++ b/test/fixtures/vcr_cassettes/btx/openai/attachments.yml @@ -0,0 +1,145 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","temperature":0.0,"messages":[{"role":"system","content":"you + are a helpful assistant"},{"role":"user","content":[{"type":"text","text":"What + color is this image?"},{"type":"image_url","image_url":{"url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="}}]}]}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.64.0 + Host: + - api.openai.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 0.64.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '353' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:22:15 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - a04fe6396e7808e3-SEA + Cf-Cache-Status: + - DYNAMIC + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Access-Control-Expose-Headers: + - CF-Ray + - CF-Ray + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '678' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Openai-Proxy-Wasm: + - v0.1 + X-Ratelimit-Limit-Input-Images: + - '50000' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Input-Images: + - '49999' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999220' + X-Ratelimit-Reset-Input-Images: + - 1ms + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_25d540b761f24206a0dc65a9892d0b13 + Set-Cookie: + - __cf_bm=K9k4uf188StUnK0HYST8jjZMDoZc9bNJebqioHJLXvg-1780334534.6280208-1.0.1.1-NGHV9ZDqA.60_d9or7.7_o.rj3.PuBsfwntKlvxq8eNeGolqVwcKyQzOtPHSS0eiiCK1QLHvxrHC1IxhAu2dHzBFcXPib6ANLA0xgv1i0quAYBRTeaj.Opxrr_cj8vij; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon, + 01 Jun 2026 17:52:15 GMT + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-Dm0d4nZdICKuuQcXQpXTJrXBTn6rm", + "object": "chat.completion", + "created": 1780334534, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The image is a solid shade of red.", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 8522, + "completion_tokens": 9, + "total_tokens": 8531, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_03ddaa0cca" + } + recorded_at: Mon, 01 Jun 2026 17:22:15 GMT +recorded_with: VCR 6.4.0 diff --git a/test/fixtures/vcr_cassettes/btx/openai/completions.yml b/test/fixtures/vcr_cassettes/btx/openai/completions.yml new file mode 100644 index 00000000..6aa39560 --- /dev/null +++ b/test/fixtures/vcr_cassettes/btx/openai/completions.yml @@ -0,0 +1,139 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","temperature":0.0,"messages":[{"role":"system","content":"you + are a helpful assistant"},{"role":"user","content":"What is the capital of + France?"}]}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.64.0 + Host: + - api.openai.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 0.64.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '171' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:21:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - a04fe5c2cd8e7690-SEA + Cf-Cache-Status: + - DYNAMIC + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Access-Control-Expose-Headers: + - CF-Ray + - CF-Ray + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '636' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Openai-Proxy-Wasm: + - v0.1 + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999982' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_605453ba7f054312ac0d91b5b618b057 + Set-Cookie: + - __cf_bm=bJ3Cm7wKo8ZxLZq.QFygr8_HQJTmnIdnpHXucXAzZwI-1780334515.6488-1.0.1.1-sq03SXu6DRdl.iXFRXob3c1bvtdhWXaufmjWV4zyBRHWxV9_EUerkVy3kqJq0jN67KXxLBo9ttdwabEl5YCxVZxnmwVq_qa8z8hX19gxTloQ62sHuyjFnQ.COknNKYfT; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon, + 01 Jun 2026 17:51:56 GMT + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-Dm0cmGEBhfPfi0sLxoLXckMMTbVSg", + "object": "chat.completion", + "created": 1780334516, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The capital of France is Paris.", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 23, + "completion_tokens": 7, + "total_tokens": 30, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_fc8bf6718c" + } + recorded_at: Mon, 01 Jun 2026 17:21:56 GMT +recorded_with: VCR 6.4.0 diff --git a/test/fixtures/vcr_cassettes/btx/openai/reasoning.yml b/test/fixtures/vcr_cassettes/btx/openai/reasoning.yml new file mode 100644 index 00000000..714f799c --- /dev/null +++ b/test/fixtures/vcr_cassettes/btx/openai/reasoning.yml @@ -0,0 +1,601 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"o4-mini","reasoning":{"effort":"high","summary":"detailed"},"input":[{"role":"user","content":"Look + at this sequence: 2, 6, 12, 20, 30. What is the pattern and what would be + the formula for the nth term?\n"},{"id":"rs_05d57f6c539cc761006a1dbcfa8ed48198ac984469b9191bc3","summary":[{"text":"**Identifying + the sequence pattern**\n\nThe user wants to understand the sequence: 2, 6, + 12, 20, 30. I believe it''s based on the formula n(n+1) starting from n=1. + Let''s check: For each n, we get 2, 6, 12, 20, and 30, which fits perfectly. + This sequence represents double the triangular numbers, as they are calculated + by T_n = n(n+1)/2. Therefore, the closed form can be expressed as a_n = n(n+1) + or n^2 + n. The pattern involves the product of consecutive integers.","type":"summary_text"},{"text":"**Summarizing + the pattern**\n\nThe nth term formula is a_n = n(n+1), which represents pronic + numbers, formed by multiplying consecutive integers. The sequence begins with + terms like 2, 6, 12, 20, and 30. Since the user likely means for n=1 to correspond + to the first term, that means a_n = n(n+1). It''s important to note that the + pattern consists of each term increasing by successive even numbers, specifically + +4, +6, +8, and so forth. Thus, the general term remains a_n = n(n+1) or equivalently + n^2 + n.","type":"summary_text"}],"type":"reasoning"},{"id":"msg_05d57f6c539cc761006a1dbd0910608198b68f43a9686b5cea","content":[{"annotations":[],"text":"The + “mystery” is that you’re looking at the pronic (or “oblong”) numbers:\n\n 1·2 + = 2 \n 2·3 = 6 \n 3·4 = 12 \n 4·5 = 20 \n 5·6 = 30 \n\nEquivalently, + each term is the previous one plus the next even number (2→+4→6→8→…), so the + n-th term (with a₁=2) is\n\n aₙ = n·(n + 1)\n\nor, if you prefer,\n\n aₙ + = n² + n.","type":"output_text","logprobs":[]}],"role":"assistant","status":"completed","type":"message"},{"role":"user","content":"Using + the pattern you discovered, what would be the 10th term? And can you find + the sum of the first 10 terms?"}]}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.64.0 + Host: + - api.openai.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 0.64.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '2044' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:10:41 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - a04fd51cdac3eb40-SEA + Cf-Cache-Status: + - DYNAMIC + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Access-Control-Expose-Headers: + - CF-Ray + - CF-Ray + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '8212' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999575' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_579fc1b37a6d41509b79d42bf1bc0924 + Set-Cookie: + - __cf_bm=q5bfl.ezUa6jbZqR2ov0mxyuVdoRzDtsXnL6RuCLuno-1780333833.7394183-1.0.1.1-Qc_745a.vh0zRrEL9H4v8mKtWWuHfP5zIQUxapkdWXh4h3yg4oo0Zpn5PezUBPvP981kq0JEChV2hHQiLc.SZSZO5k1WITtIo8Q5UXPlM3irOuvqzl8z8ZnXeLqvNNty; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon, + 01 Jun 2026 17:40:41 GMT + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: |- + { + "id": "resp_05d57f6c539cc761006a1dbd09cb148198abacd864a7af3294", + "object": "response", + "created_at": 1780333833, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1780333841, + "error": null, + "frequency_penalty": 0.0, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "o4-mini-2025-04-16", + "moderation": null, + "output": [ + { + "id": "rs_05d57f6c539cc761006a1dbd0a59508198bfb9bcdc155e2526", + "type": "reasoning", + "summary": [ + { + "type": "summary_text", + "text": "**Calculating pronic numbers**\n\nThe user wants to know the 10th term and the sum of the first 10 pronic numbers, where pronic numbers follow the formula a_n = n(n+1). So, I calculate a_10 as 10 * 11, which equals 110. For the sum S_10, I break it down into two parts: the sum of squares and the sum of the first 10 natural numbers. This gives me 440 as the total for the sum of the first 10 pronic numbers." + }, + { + "type": "summary_text", + "text": "**Confirming calculations**\n\nLet's verify some calculations for the user. For n=10, using the formula gives me 10 * 11 * 12 / 3 = 440. The 10th term, a_10, is 110. The sum of the first 10 terms, S_10, can be shown as 385 (from the sum of squares) plus 55 (from the sum of the first 10 natural numbers), totaling 440. I think a brief explanation should effectively communicate this, summarizing it as: a_10 = 110 and sum = 440." + } + ] + }, + { + "id": "msg_05d57f6c539cc761006a1dbd115ba481988d9db188b60f34ae", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "The 10th term is \n a\u2081\u2080 = 10\u00b7(10 + 1) = 10\u00b711 = 110 \n\nThe sum of the first 10 terms is \n S\u2081\u2080 = \u2211\u2099\u208c\u2081\u00b9\u2070 n(n+1) \n = \u2211\u2099\u208c\u2081\u00b9\u2070 n\u00b2 + \u2211\u2099\u208c\u2081\u00b9\u2070 n \n = (10\u00b711\u00b721)/6 + (10\u00b711)/2 \n = 385 + 55 \n = 440 \n\n(You can also use the closed\u2010form S\u2099 = n(n+1)(n+2)/3, which for n=10 gives 10\u00b711\u00b712/3 = 440.)" + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "presence_penalty": 0.0, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": "in_memory", + "reasoning": { + "context": "current_turn", + "effort": "high", + "summary": "detailed" + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 217, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 722, + "output_tokens_details": { + "reasoning_tokens": 512 + }, + "total_tokens": 939 + }, + "user": null, + "metadata": {} + } + recorded_at: Mon, 01 Jun 2026 17:10:41 GMT +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"o4-mini","reasoning":{"effort":"high","summary":"detailed"},"input":[{"role":"user","content":"Look + at this sequence: 2, 6, 12, 20, 30. What is the pattern and what would be + the formula for the nth term?\n"}]}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.64.0 + Host: + - api.openai.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 0.64.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '219' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:22:07 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - a04fe5ca1d26d301-SEA + Cf-Cache-Status: + - DYNAMIC + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Access-Control-Expose-Headers: + - CF-Ray + - CF-Ray + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '10532' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999752' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_8c2857b26a00436da08bff42e8112b33 + Set-Cookie: + - __cf_bm=855gnpwUw5zMPSQuPUP46DHOtubswxyZxy4JbvyK8jk-1780334516.8175228-1.0.1.1-VKTAqQh5gjnnRdvFd5fK_DD_bRka4j.rO.fl7df2GATeVr42xbY4HWWQatzcNvWrRRMwXayULTwVr0NKsz4epnrP9I68klprp7e5kzCTH1MaflS0Cq.aIXRmHX.591Ip; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon, + 01 Jun 2026 17:52:07 GMT + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: |- + { + "id": "resp_095d57b0c545f984006a1dbfb509d881999983bc814a8b74f5", + "object": "response", + "created_at": 1780334517, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1780334527, + "error": null, + "frequency_penalty": 0.0, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "o4-mini-2025-04-16", + "moderation": null, + "output": [ + { + "id": "rs_095d57b0c545f984006a1dbfb5ae7481998b14a5ce6db1098a", + "type": "reasoning", + "summary": [ + { + "type": "summary_text", + "text": "**Identifying a sequence pattern**\n\nThe user is exploring the sequence: 2, 6, 12, 20, 30. This represents \"pronic numbers,\" products of consecutive integers, expressed as n(n+1). \n\nFor example, for n=1, 1*2=2; for n=2, 2*3=6; and so on. The nth term can be given by the formula a_n = n(n+1). \n\nInterestingly, it also relates to the sum of the first n even integers or can be viewed as twice the triangular numbers: 2 * T_n = n(n+1)." + }, + { + "type": "summary_text", + "text": "**Exploring the sequence formula**\n\nThe sequence starts with 2 at n=1, which indicates that the nth term can be expressed as a_n = n(n+1). Each term represents pronic numbers, or the product of consecutive integers.\n\nThe pattern arises from adding consecutive even numbers: for example, 2 plus 4 equals 6, 6 plus 6 equals 12, and so on. The constant second difference confirms it\u2019s a quadratic sequence. This leads us to the formula a_n = n^2 + n, highlighting that the nth term is indeed the product of two consecutive integers." + }, + { + "type": "summary_text", + "text": "**Summarizing the pattern and formula**\n\nThe sequence is based on the sums of even numbers, or pronic numbers, which can be expressed using the formula a_n = n(n+1). \n\nEach term represents the product of two consecutive integers: for example, 2=1*2, 6=2*3, and so on. The differences between the terms are the even numbers 4, 6, 8, and 10, showing a consistent pattern of increases. Therefore, the nth term can be effectively represented as a_n = n(n+1). I'll prepare to share this with the user!" + } + ] + }, + { + "id": "msg_095d57b0c545f984006a1dbfbef77081999a7579adf9cbcce5", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "One way to see the pattern:\n\n\u2013 Write out the first few terms as products of consecutive integers \n 2 = 1\u00b72 \n 6 = 2\u00b73 \n 12 = 3\u00b74 \n 20 = 4\u00b75 \n 30 = 5\u00b76 \n\n\u2013 Equivalently, the successive differences are \n 6\u20132=4, 12\u20136=6, 20\u201312=8, 30\u201320=10, \u2026 \n i.e. you keep adding the even numbers 4, 6, 8, 10, \u2026\n\nFrom either viewpoint you get the general (nth) term, for n=1,2,3,\u2026, as\n\n\u2003a\u2099 = n\u00b7(n + 1),\n\nor expanded: \n\n\u2003a\u2099 = n\u00b2 + n." + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "presence_penalty": 0.0, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": "in_memory", + "reasoning": { + "context": "current_turn", + "effort": "high", + "summary": "detailed" + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 41, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 1331, + "output_tokens_details": { + "reasoning_tokens": 1088 + }, + "total_tokens": 1372 + }, + "user": null, + "metadata": {} + } + recorded_at: Mon, 01 Jun 2026 17:22:07 GMT +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"o4-mini","reasoning":{"effort":"high","summary":"detailed"},"input":[{"role":"user","content":"Look + at this sequence: 2, 6, 12, 20, 30. What is the pattern and what would be + the formula for the nth term?\n"},{"id":"rs_095d57b0c545f984006a1dbfb5ae7481998b14a5ce6db1098a","summary":[{"text":"**Identifying + a sequence pattern**\n\nThe user is exploring the sequence: 2, 6, 12, 20, + 30. This represents \"pronic numbers,\" products of consecutive integers, + expressed as n(n+1). \n\nFor example, for n=1, 1*2=2; for n=2, 2*3=6; and + so on. The nth term can be given by the formula a_n = n(n+1). \n\nInterestingly, + it also relates to the sum of the first n even integers or can be viewed as + twice the triangular numbers: 2 * T_n = n(n+1).","type":"summary_text"},{"text":"**Exploring + the sequence formula**\n\nThe sequence starts with 2 at n=1, which indicates + that the nth term can be expressed as a_n = n(n+1). Each term represents pronic + numbers, or the product of consecutive integers.\n\nThe pattern arises from + adding consecutive even numbers: for example, 2 plus 4 equals 6, 6 plus 6 + equals 12, and so on. The constant second difference confirms it’s a quadratic + sequence. This leads us to the formula a_n = n^2 + n, highlighting that the + nth term is indeed the product of two consecutive integers.","type":"summary_text"},{"text":"**Summarizing + the pattern and formula**\n\nThe sequence is based on the sums of even numbers, + or pronic numbers, which can be expressed using the formula a_n = n(n+1). + \n\nEach term represents the product of two consecutive integers: for example, + 2=1*2, 6=2*3, and so on. The differences between the terms are the even numbers + 4, 6, 8, and 10, showing a consistent pattern of increases. Therefore, the + nth term can be effectively represented as a_n = n(n+1). I''ll prepare to + share this with the user!","type":"summary_text"}],"type":"reasoning"},{"id":"msg_095d57b0c545f984006a1dbfbef77081999a7579adf9cbcce5","content":[{"annotations":[],"text":"One + way to see the pattern:\n\n– Write out the first few terms as products of + consecutive integers \n 2 = 1·2 \n 6 = 2·3 \n 12 = 3·4 \n 20 = 4·5 \n + 30 = 5·6 \n\n– Equivalently, the successive differences are \n 6–2=4, 12–6=6, + 20–12=8, 30–20=10, … \n i.e. you keep adding the even numbers 4, 6, 8, 10, + …\n\nFrom either viewpoint you get the general (nth) term, for n=1,2,3,…, + as\n\n aₙ = n·(n + 1),\n\nor expanded: \n\n aₙ = n² + n.","type":"output_text","logprobs":[]}],"role":"assistant","status":"completed","type":"message"},{"role":"user","content":"Using + the pattern you discovered, what would be the 10th term? And can you find + the sum of the first 10 terms?"}]}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.64.0 + Host: + - api.openai.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 0.64.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '2702' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:22:12 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - a04fe60e3bc3d301-SEA + Cf-Cache-Status: + - DYNAMIC + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Access-Control-Expose-Headers: + - CF-Ray + - CF-Ray + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '4532' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999535' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_257a28a8fb8d4b21987acc974272aa7d + Set-Cookie: + - __cf_bm=44MVR4_rVfTeCU_NPoOWnxoSdlxw1at24ZE9lofUMz8-1780334527.7138355-1.0.1.1-xs9Ka_XILlydmYcWYP.PaiPa_g3nAtzR3EEw6vEv.NujGmdXBJRt9m_0j6PJ5ZOtgo2KZN7OMmUlAbeuqXYfdDCFRHuDor6hwIfChaMQ1PHT.NFLeO9ANQGAuyezuUgl; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon, + 01 Jun 2026 17:52:12 GMT + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: |- + { + "id": "resp_095d57b0c545f984006a1dbfbfc594819993afb313d98e54ca", + "object": "response", + "created_at": 1780334527, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1780334532, + "error": null, + "frequency_penalty": 0.0, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "o4-mini-2025-04-16", + "moderation": null, + "output": [ + { + "id": "rs_095d57b0c545f984006a1dbfc04cf08199bc597ab00d037aee", + "type": "reasoning", + "summary": [] + }, + { + "id": "msg_095d57b0c545f984006a1dbfc33f348199a26368824469ef64", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "The general term is \n a\u2099 = n\u00b7(n + 1). \n\nSo for n = 10: \n a\u2081\u2080 = 10\u00b711 = 110. \n\nFor the sum of the first 10 terms, \n S\u2081\u2080 = \u2211\u2096\u208c\u2081\u00b9\u2070 k(k+1) \n = \u2211\u2096\u208c\u2081\u00b9\u2070 (k\u00b2 + k) \n = (\u2211\u2096\u208c\u2081\u00b9\u2070 k\u00b2) + (\u2211\u2096\u208c\u2081\u00b9\u2070 k) \n = [10\u00b711\u00b721/6] + [10\u00b711/2] \n = 385 + 55 \n = 440. \n\nSo the 10th term is 110, and the sum of the first 10 terms is 440." + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "presence_penalty": 0.0, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": "in_memory", + "reasoning": { + "context": "current_turn", + "effort": "high", + "summary": "detailed" + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 256, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 586, + "output_tokens_details": { + "reasoning_tokens": 384 + }, + "total_tokens": 842 + }, + "user": null, + "metadata": {} + } + recorded_at: Mon, 01 Jun 2026 17:22:12 GMT +recorded_with: VCR 6.4.0 diff --git a/test/fixtures/vcr_cassettes/btx/openai/streaming.yml b/test/fixtures/vcr_cassettes/btx/openai/streaming.yml new file mode 100644 index 00000000..ed793a34 --- /dev/null +++ b/test/fixtures/vcr_cassettes/btx/openai/streaming.yml @@ -0,0 +1,192 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","max_tokens":800,"temperature":0.0,"stream_options":{"include_usage":true},"messages":[{"role":"system","content":"you + are a thoughtful assistant"},{"role":"user","content":"Count from 1 to 10 + slowly."}],"stream":true}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - text/event-stream + User-Agent: + - OpenAI::Client/Ruby 0.64.0 + Host: + - api.openai.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 0.64.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '241' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:22:13 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - a04fe631fd23b991-SEA + Cf-Cache-Status: + - DYNAMIC + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Access-Control-Expose-Headers: + - CF-Ray + - CF-Ray + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '262' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Openai-Proxy-Wasm: + - v0.1 + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999982' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_9577c8e0afab431f8e9bad614f93be50 + Set-Cookie: + - __cf_bm=e3zw6P7Vc9tofre.GTqFKmEpff_2JZAEfQ4rGXAwyMg-1780334533.4372284-1.0.1.1-3165xGomBl3vQ1m2A.Rdi_WV_DHJHEgvxExdlC1wl4dBnwz44Z5EKDrBmQLDpvmZFhCOIFgw.pTov6CDWlpEVafVDitmYt.zV1UIxE5jreoXCVOjOWPb2bdY2_V7NR5n; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon, + 01 Jun 2026 17:52:13 GMT + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |+ + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"1nQOV5KNQ"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"Sure"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5mMFOmF"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"IMs1t9nwNn"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" Here"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wuRksM"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" we"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"DFZpHjZ0"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" go"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"oBlQ9MTv"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":":\n\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"kLjEv0"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"SjbPdNTgci"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"GkuAQdCS"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"X1xTwL6"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"85IuNj7HZC"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"zNic48es"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"yad122X"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5K1UvFRQd3"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5WpnN1rH"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"qEhK9KK"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"XUQbdPrEjo"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"4Uu2conk"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"AHZL9es"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"5"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"BM00eNTm6w"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"1QawiCaU"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"3p4dtiN"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"6"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"EJWrLjTy9Y"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xWpEHjsH"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OBN1VNX"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"7"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RpWLkuoWBU"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"leibIHg8"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"G2xaKZ2"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"8"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Xk6mbwdkCx"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xMMHqibJ"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"UMs3hE4"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"9"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OtLngMHblh"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"PrBwV3As"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"dWuvhno"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"10"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"QgjyzC3pX"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"zqSdZFfy"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" \n\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"M2u0W"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"Take"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"A9e8ldP"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" your"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wssrzy"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":" time"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"F2IcGf"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"nmdqzXlTQ7"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"tCavN"} + + data: {"id":"chatcmpl-Dm0d3h4C8On5UUd0vZCErtcdzO6IA","object":"chat.completion.chunk","created":1780334533,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ad343dd83e","choices":[],"usage":{"prompt_tokens":25,"completion_tokens":40,"total_tokens":65,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"XjRFPPNXm4"} + + data: [DONE] + + recorded_at: Mon, 01 Jun 2026 17:22:14 GMT +recorded_with: VCR 6.4.0 +... diff --git a/test/fixtures/vcr_cassettes/btx/openai/tools.yml b/test/fixtures/vcr_cassettes/btx/openai/tools.yml new file mode 100644 index 00000000..c3c2227a --- /dev/null +++ b/test/fixtures/vcr_cassettes/btx/openai/tools.yml @@ -0,0 +1,151 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o","max_tokens":500,"temperature":0.0,"tools":[{"type":"function","function":{"name":"get_weather","description":"Get + the current weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"],"description":"The + unit of temperature"}},"required":["location"]}}}],"messages":[{"role":"user","content":"What + is the weather like in Paris, France?"}]}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.64.0 + Host: + - api.openai.com + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 0.64.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 4.0.1 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '511' + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 01 Jun 2026 17:22:18 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - a04fe640af6e680e-SEA + Cf-Cache-Status: + - DYNAMIC + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Access-Control-Expose-Headers: + - CF-Ray + - CF-Ray + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '575' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Openai-Proxy-Wasm: + - v0.1 + X-Ratelimit-Limit-Requests: + - '10000' + X-Ratelimit-Limit-Tokens: + - '30000000' + X-Ratelimit-Remaining-Requests: + - '9999' + X-Ratelimit-Remaining-Tokens: + - '29999987' + X-Ratelimit-Reset-Requests: + - 6ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_b8b33c1178ae45c9beb59ec6cfdf2cf1 + Set-Cookie: + - __cf_bm=DG1SVXxYmgAGTzUyLluYmjPEGvT_XuoMD21gOy72UqY-1780334535.7878098-1.0.1.1-JsVToaWxLxuPBRwjDcj1Zaea0Qihw1FJrVdFVW30o73bcR.UdlBfN541kR501Tssf6hNUt0JCpE4UJah1OuDFGYru.HUdxg5hn_QMVwgCFAfSojOR6ebwJ9GVd8RtIhx; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Mon, + 01 Jun 2026 17:52:18 GMT + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-Dm0d7OgWMCSrYkq9C3VCOGcsVzNkb", + "object": "chat.completion", + "created": 1780334537, + "model": "gpt-4o-2024-08-06", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_7Id8JLlSgIwMqNDWA7ZyAXkK", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"location\":\"Paris, France\"}" + } + } + ], + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "tool_calls" + } + ], + "usage": { + "prompt_tokens": 85, + "completion_tokens": 16, + "total_tokens": 101, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_5f78e76dfa" + } + recorded_at: Mon, 01 Jun 2026 17:22:18 GMT +recorded_with: VCR 6.4.0