Skip to content

Commit d84176e

Browse files
committed
feat: add HuggingFace Hub integration
Adds native Braintrust tracing for the Hugging Face Hub Python SDK (`huggingface_hub`) via the integrations API. The integration supports `huggingface-hub>=0.32.0` and is included in `auto_instrument()` by default. How to enable it: ```python import braintrust from braintrust import init_logger from huggingface_hub import InferenceClient logger = init_logger(project="my-project") braintrust.auto_instrument() # enables huggingface_hub unless disabled client = InferenceClient(provider="auto") with logger.start_span(name="hf request"): response = client.chat_completion( model="meta-llama/Llama-3.1-8B-Instruct", messages=[{"role": "user", "content": "Say hello"}], max_tokens=32, ) ``` The integration can also be enabled explicitly or disabled from the global auto-instrumentation call: ```python from braintrust.auto import auto_instrument auto_instrument(huggingface_hub=True) auto_instrument(huggingface_hub=False) # opt out ``` For manual wrapping, wrap individual sync or async clients: ```python from braintrust.integrations.huggingface_hub import wrap_huggingface_hub from huggingface_hub import AsyncInferenceClient, InferenceClient client = wrap_huggingface_hub(InferenceClient(provider="auto")) async_client = wrap_huggingface_hub(AsyncInferenceClient(provider="auto")) ``` Features added: - Traces sync and async `InferenceClient` calls for `chat_completion`, `text_generation`, `feature_extraction`, and `sentence_similarity`. - Covers the OpenAI-compatible chat alias `client.chat.completions.create(...)` because it proxies through `chat_completion`. - Supports non-streaming and streaming chat completions, including async streams, context manager finalization, early stream close handling, and nesting under parent Braintrust spans. - Captures span inputs, outputs, allowlisted request metadata, provider/model routing metadata, response identifiers, finish reasons, and token metrics from Hugging Face response usage/details fields. - Logs provider errors to the span before re-raising. - Adds VCR-backed coverage for latest and 0.32.0 Hugging Face Hub SDKs, plus an auto-instrumentation smoke test. - Adds the `test_huggingface_hub` nox session, dependency matrix entries, and cassette-directory mapping for provider-versioned cassette hygiene.
1 parent 9d9f1e7 commit d84176e

58 files changed

Lines changed: 8513 additions & 253 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

py/noxfile.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,21 @@ def test_mistral(session, version):
534534
_run_tests(session, f"{INTEGRATION_DIR}/mistral/test_mistral.py", version=version)
535535

536536

537+
HUGGINGFACE_HUB_VERSIONS = _get_matrix_versions("huggingface-hub")
538+
539+
540+
@nox.session()
541+
@nox.parametrize("version", HUGGINGFACE_HUB_VERSIONS, ids=HUGGINGFACE_HUB_VERSIONS)
542+
def test_huggingface_hub(session, version):
543+
"""Test the native HuggingFace Hub SDK integration."""
544+
_install_test_deps(session)
545+
_install_matrix_dep(session, "huggingface-hub", version)
546+
# numpy is required by ``InferenceClient.feature_extraction`` but is not
547+
# an install_requires dep of ``huggingface_hub`` upstream.
548+
_install_group_locked(session, "test-huggingface-hub")
549+
_run_tests(session, f"{INTEGRATION_DIR}/huggingface_hub/test_huggingface_hub.py", version=version)
550+
551+
537552
TEMPORAL_VERSIONS = _get_matrix_versions("temporalio")
538553

539554

py/pyproject.toml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,18 @@ test-llamaindex = [
179179
{include-group = "test"},
180180
]
181181

182+
test-huggingface-hub = [
183+
{include-group = "test"},
184+
# numpy is required by huggingface_hub.InferenceClient.feature_extraction
185+
# but is not declared as an install_requires dep upstream.
186+
"numpy",
187+
# huggingface_hub<1.0 implements AsyncInferenceClient on top of aiohttp;
188+
# newer versions ship with httpx already. Pinning aiohttp here keeps the
189+
# 0.32.0 floor working without leaking the requirement onto the rest of
190+
# the test environment.
191+
"aiohttp",
192+
]
193+
182194
test-cli = [
183195
{include-group = "test"},
184196
"httpx==0.28.1",
@@ -227,6 +239,8 @@ lint = [
227239
"openai",
228240
"openai-agents",
229241
"openrouter",
242+
"huggingface-hub",
243+
"numpy",
230244
"strands-agents",
231245
"temporalio",
232246
"pydantic-ai>=1.10.0",
@@ -400,6 +414,14 @@ latest = "openrouter==0.9.1"
400414
latest = "mistralai==2.4.5"
401415
"1.12.4" = "mistralai==1.12.4"
402416

417+
[tool.braintrust.matrix.huggingface-hub]
418+
# Floor pinned to 0.32.0: the earliest release that exposes the
419+
# ``provider="auto"`` routing mode the integration relies on for multi-
420+
# provider use, while still keeping the same chat_completion /
421+
# text_generation / feature_extraction / sentence_similarity method names.
422+
latest = "huggingface-hub==1.15.0"
423+
"0.32.0" = "huggingface-hub==0.32.0"
424+
403425
[tool.braintrust.matrix.temporalio]
404426
latest = "temporalio==1.27.2"
405427
"1.20.0" = "temporalio==1.20.0"
@@ -442,6 +464,7 @@ claude_agent_sdk = ["claude-agent-sdk"]
442464
crewai = ["crewai"]
443465
dspy = ["dspy"]
444466
google_genai = ["google-genai"]
467+
huggingface_hub = ["huggingface-hub"]
445468
langchain = ["langchain-core"]
446469
litellm = ["litellm"]
447470
livekit_agents = ["livekit-agents"]
@@ -469,6 +492,7 @@ google-genai = "google.genai"
469492
litellm = "litellm"
470493
livekit-agents = "livekit.agents"
471494
mistralai = "mistralai"
495+
huggingface-hub = "huggingface_hub"
472496
openai = "openai"
473497
openai-agents = "agents"
474498
openrouter = "openrouter"

py/src/braintrust/auto.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
CrewAIIntegration,
1919
DSPyIntegration,
2020
GoogleGenAIIntegration,
21+
HuggingFaceHubIntegration,
2122
LangChainIntegration,
2223
LiteLLMIntegration,
2324
LiveKitAgentsIntegration,
@@ -58,6 +59,7 @@ def auto_instrument(
5859
google_genai: bool = True,
5960
openrouter: bool = True,
6061
mistral: bool = True,
62+
huggingface_hub: bool = True,
6163
agno: bool = True,
6264
agentscope: bool = True,
6365
claude_agent_sdk: bool = True,
@@ -90,6 +92,7 @@ def auto_instrument(
9092
google_genai: Enable Google GenAI instrumentation (default: True)
9193
openrouter: Enable OpenRouter instrumentation (default: True)
9294
mistral: Enable Mistral instrumentation (default: True)
95+
huggingface_hub: Enable HuggingFace Hub instrumentation (default: True)
9396
agno: Enable Agno instrumentation (default: True)
9497
agentscope: Enable AgentScope instrumentation (default: True)
9598
claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True)
@@ -165,6 +168,8 @@ def auto_instrument(
165168
results["openrouter"] = _instrument_integration(OpenRouterIntegration)
166169
if mistral:
167170
results["mistral"] = _instrument_integration(MistralIntegration)
171+
if huggingface_hub:
172+
results["huggingface_hub"] = _instrument_integration(HuggingFaceHubIntegration)
168173
if agno:
169174
results["agno"] = _instrument_integration(AgnoIntegration)
170175
if agentscope:

py/src/braintrust/integrations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .crewai import CrewAIIntegration
99
from .dspy import DSPyIntegration
1010
from .google_genai import GoogleGenAIIntegration
11+
from .huggingface_hub import HuggingFaceHubIntegration
1112
from .langchain import LangChainIntegration
1213
from .litellm import LiteLLMIntegration
1314
from .livekit_agents import LiveKitAgentsIntegration
@@ -32,6 +33,7 @@
3233
"CrewAIIntegration",
3334
"DSPyIntegration",
3435
"GoogleGenAIIntegration",
36+
"HuggingFaceHubIntegration",
3537
"LiteLLMIntegration",
3638
"LiveKitAgentsIntegration",
3739
"LangChainIntegration",
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""Test auto_instrument for HuggingFace Hub."""
2+
3+
import os
4+
5+
6+
# Dummy token must start with ``hf_`` so the HuggingFace SDK accepts it for
7+
# ``provider="auto"`` routing (validated locally before any HTTP request).
8+
os.environ.setdefault("HF_TOKEN", "hf_test_dummy_api_key_for_vcr_tests")
9+
10+
from braintrust.auto import auto_instrument
11+
from braintrust.integrations.test_utils import autoinstrument_test_context
12+
from huggingface_hub import InferenceClient
13+
14+
15+
results = auto_instrument()
16+
assert results.get("huggingface_hub") is True
17+
18+
results2 = auto_instrument()
19+
assert results2.get("huggingface_hub") is True
20+
21+
22+
CHAT_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
23+
24+
25+
with autoinstrument_test_context("test_auto_huggingface_hub", integration="huggingface_hub") as memory_logger:
26+
# ``provider="cerebras"`` hosts ``meta-llama/Llama-3.1-8B-Instruct`` across
27+
# the matrix; ``hf-inference`` no longer hosts most conversational checkpoints.
28+
client = InferenceClient(model=CHAT_MODEL, provider="cerebras", token=os.environ["HF_TOKEN"])
29+
response = client.chat_completion(
30+
messages=[{"role": "user", "content": "Say hi in one word."}],
31+
max_tokens=10,
32+
)
33+
assert response.choices
34+
assert response.choices[0].message.role == "assistant"
35+
36+
spans = memory_logger.pop()
37+
assert len(spans) == 1, f"Expected 1 span, got {len(spans)}"
38+
span = spans[0]
39+
# User-supplied ``provider`` overrides the default "huggingface" identity.
40+
assert span["metadata"]["provider"] == "cerebras"
41+
assert span["span_attributes"]["name"] == "huggingface.chat_completion"
42+
43+
print("SUCCESS")
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"""Braintrust integration for the HuggingFace Hub Python SDK."""
2+
3+
from .integration import HuggingFaceHubIntegration
4+
from .tracing import wrap_huggingface_hub
5+
6+
7+
__all__ = [
8+
"HuggingFaceHubIntegration",
9+
"wrap_huggingface_hub",
10+
]
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
interactions:
2+
- request:
3+
body: null
4+
headers:
5+
Accept:
6+
- '*/*'
7+
Accept-Encoding:
8+
- gzip, deflate, zstd
9+
Connection:
10+
- keep-alive
11+
X-Amzn-Trace-Id:
12+
- 612753ff-29c0-4031-a93f-02b1c6d24692
13+
user-agent:
14+
- unknown/None; hf_hub/0.32.0; python/3.14.3
15+
method: GET
16+
uri: https://huggingface.co/api/models/meta-llama/Llama-3.1-8B-Instruct?expand=inferenceProviderMapping
17+
response:
18+
body:
19+
string: '{"_id":"6698d8a0653e4babe21e1e7d","id":"meta-llama/Llama-3.1-8B-Instruct","inferenceProviderMapping":{"novita":{"status":"live","providerId":"meta-llama/llama-3.1-8b-instruct","task":"conversational","isModelAuthor":false},"cerebras":{"status":"live","providerId":"llama3.1-8b","task":"conversational","isModelAuthor":false},"nscale":{"status":"live","providerId":"meta-llama/Llama-3.1-8B-Instruct","task":"conversational","isModelAuthor":false},"featherless-ai":{"status":"live","providerId":"meta-llama/Meta-Llama-3.1-8B-Instruct","task":"conversational","isModelAuthor":false},"scaleway":{"status":"live","providerId":"llama-3.1-8b-instruct","task":"conversational","isModelAuthor":false},"sambanova":{"status":"error","providerId":"Meta-Llama-3.1-8B-Instruct","task":"conversational","isModelAuthor":false}}}'
20+
headers:
21+
Access-Control-Allow-Origin:
22+
- https://huggingface.co
23+
Access-Control-Expose-Headers:
24+
- X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
25+
Access-Control-Max-Age:
26+
- '86400'
27+
Connection:
28+
- keep-alive
29+
Content-Length:
30+
- '812'
31+
Content-Type:
32+
- application/json; charset=utf-8
33+
Date:
34+
- Wed, 20 May 2026 18:01:23 GMT
35+
ETag:
36+
- W/"32c-AuB6sgZTIbkNCeInjqjAzrSGN6U"
37+
RateLimit:
38+
- '"api";r=980;t=222'
39+
RateLimit-Policy:
40+
- '"fixed window";"api";q=1000;w=300'
41+
Referrer-Policy:
42+
- strict-origin-when-cross-origin
43+
Vary:
44+
- Origin
45+
Via:
46+
- 1.1 d03af248468c898a111754f0666c2316.cloudfront.net (CloudFront)
47+
X-Amz-Cf-Id:
48+
- 4WCrHaehFlOWwSsjyrmjjPL8GSMt6pbgFkVHBzAmSGeITCJD5UXyHQ==
49+
X-Amz-Cf-Pop:
50+
- YTO50-P2
51+
X-Cache:
52+
- Miss from cloudfront
53+
X-Powered-By:
54+
- huggingface-moon
55+
X-Request-Id:
56+
- Root=1-6a0df6f3-602614dd3aedf0224d2485a5;612753ff-29c0-4031-a93f-02b1c6d24692
57+
cross-origin-opener-policy:
58+
- same-origin
59+
status:
60+
code: 200
61+
message: OK
62+
- request:
63+
body: '{"messages": [{"role": "user", "content": "Say hi in one word."}], "model":
64+
"llama3.1-8b", "max_tokens": 10, "stream": false}'
65+
headers:
66+
Accept:
67+
- '*/*'
68+
Accept-Encoding:
69+
- gzip, deflate, zstd
70+
Connection:
71+
- keep-alive
72+
Content-Length:
73+
- '125'
74+
Content-Type:
75+
- application/json
76+
X-Amzn-Trace-Id:
77+
- 06441962-8e36-40da-829c-8f76ca204c40
78+
user-agent:
79+
- unknown/None; hf_hub/0.32.0; python/3.14.3
80+
method: POST
81+
uri: https://router.huggingface.co/cerebras/v1/chat/completions
82+
response:
83+
body:
84+
string: '{"id":"chatcmpl-e6cb12a1-9580-4ae5-b75a-aa4740402e6d","choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello.","role":"assistant"}}],"created":1779300084,"model":"llama3.1-8b","system_fingerprint":"fp_f613d2b18eccee549c5f","object":"chat.completion","usage":{"total_tokens":44,"completion_tokens":3,"completion_tokens_details":{"accepted_prediction_tokens":0,"rejected_prediction_tokens":0,"reasoning_tokens":0},"prompt_tokens":41,"prompt_tokens_details":{"cached_tokens":0}},"time_info":{"created":1779300084.3064463,"queue_time":8.325e-05,"prompt_time":0.002871066,"completion_time":0.001190025,"total_time":0.006201982498168945}}'
85+
headers:
86+
Access-Control-Allow-Origin:
87+
- '*'
88+
Access-Control-Expose-Headers:
89+
- X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
90+
Connection:
91+
- keep-alive
92+
Content-Type:
93+
- application/json
94+
Date:
95+
- Wed, 20 May 2026 18:01:24 GMT
96+
Transfer-Encoding:
97+
- chunked
98+
Vary:
99+
- Origin
100+
Via:
101+
- 1.1 1469d4976bc2a36b5840519c9e3dbad6.cloudfront.net (CloudFront)
102+
X-Amz-Cf-Id:
103+
- NmYnNOX20NQBBfOkXA1R0Kn_6MQ5nP-tVZUOnVijYDWzH2rxZjVZYQ==
104+
X-Amz-Cf-Pop:
105+
- YTO50-P1
106+
X-Cache:
107+
- Miss from cloudfront
108+
X-Powered-By:
109+
- huggingface-moon
110+
X-Robots-Tag:
111+
- none
112+
cf-cache-status:
113+
- DYNAMIC
114+
cf-ray:
115+
- 9fed3f1698536887-IAD
116+
cross-origin-opener-policy:
117+
- same-origin
118+
inference-id:
119+
- chatcmpl-e6cb12a1-9580-4ae5-b75a-aa4740402e6d
120+
referrer-policy:
121+
- strict-origin-when-cross-origin
122+
server:
123+
- cloudflare
124+
set-cookie:
125+
- __cf_bm=PdRHzq1Nm0H6U3Y5MuDllSirlbhPgF4FUqP82CamXH8-1779300084.2544014-1.0.1.1-YQsooVKv6m_ucY1RpkxV5jVLBixoDGOKRY6MYm.LJ1z7sFcfdmuUqUebWWfcOJXRgmr4KFY4tCmrktgqfUJXR3Q0yiUd9VWDostaBrjOEO6PwT6WuWmFGwxVRK_gJC1i;
126+
HttpOnly; SameSite=None; Secure; Path=/; Domain=api.cerebras.ai; Expires=Wed,
127+
20 May 2026 18:31:24 GMT
128+
strict-transport-security:
129+
- max-age=15552000; includeSubDomains; preload
130+
x-content-type-options:
131+
- nosniff
132+
x-inference-provider:
133+
- cerebras
134+
x-ratelimit-limit-requests-day:
135+
- '2880000'
136+
x-ratelimit-limit-requests-hour:
137+
- '120000'
138+
x-ratelimit-limit-requests-minute:
139+
- '2000'
140+
x-ratelimit-limit-tokens-day:
141+
- '9223372036854775807'
142+
x-ratelimit-limit-tokens-hour:
143+
- '9223372036854775807'
144+
x-ratelimit-limit-tokens-minute:
145+
- '9223372036854775807'
146+
x-ratelimit-remaining-requests-day:
147+
- '2879999'
148+
x-ratelimit-remaining-requests-hour:
149+
- '119999'
150+
x-ratelimit-remaining-requests-minute:
151+
- '1999'
152+
x-ratelimit-remaining-tokens-day:
153+
- '9223372036854775807'
154+
x-ratelimit-remaining-tokens-hour:
155+
- '9223372036854775807'
156+
x-ratelimit-remaining-tokens-minute:
157+
- '9223372036854775807'
158+
x-request-id:
159+
- 9fed3f1698536887-IAD
160+
status:
161+
code: 200
162+
message: OK
163+
version: 1

0 commit comments

Comments
 (0)