Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from sys import getsizeof
from typing import TYPE_CHECKING

from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE

if TYPE_CHECKING:
from typing import Any, Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -141,6 +143,57 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
return 0


def redact_blob_message_parts(
messages: "List[Dict[str, Any]]",
) -> "List[Dict[str, Any]]":
"""
Redact blob message parts from the messages, by removing the "content" key.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "[Filtered]"
}
]
}
"""

for message in messages:
if not isinstance(message, dict):
continue

content = message.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "blob":
item["content"] = SENSITIVE_DATA_SUBSTITUTE
return messages


def truncate_messages_by_size(
messages: "List[Dict[str, Any]]",
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
Expand Down Expand Up @@ -186,6 +239,8 @@ def truncate_and_annotate_messages(
if not messages:
return None

messages = redact_blob_message_parts(messages)

truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
if removed_count > 0:
scope._gen_ai_original_message_count[span.span_id] = len(messages)
Expand Down
122 changes: 121 additions & 1 deletion sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,124 @@
"top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
}

# Map LangChain content types to Sentry modalities
LANGCHAIN_TYPE_TO_MODALITY = {
"image": "image",
"image_url": "image",
"audio": "audio",
"video": "video",
"file": "document",
}


def _transform_langchain_content_block(
content_block: "Dict[str, Any]",
) -> "Dict[str, Any]":
"""
Transform a LangChain content block to Sentry-compatible format.

Handles multimodal content (images, audio, video, documents) by converting them
to the standardized format:
- base64 encoded data -> type: "blob"
- URL references -> type: "uri"
- file_id references -> type: "file"
"""
if not isinstance(content_block, dict):
return content_block

block_type = content_block.get("type")

# Handle standard multimodal content types (image, audio, video, file)
if block_type in ("image", "audio", "video", "file"):
modality = LANGCHAIN_TYPE_TO_MODALITY.get(block_type, block_type)
mime_type = content_block.get("mime_type", "")

# Check for base64 encoded content
if "base64" in content_block:
return {
"type": "blob",
"modality": modality,
"mime_type": mime_type,
"content": content_block.get("base64", ""),
}
# Check for URL reference
elif "url" in content_block:
return {
"type": "uri",
"modality": modality,
"mime_type": mime_type,
"uri": content_block.get("url", ""),
}
# Check for file_id reference
elif "file_id" in content_block:
return {
"type": "file",
"modality": modality,
"mime_type": mime_type,
"file_id": content_block.get("file_id", ""),
}

# Handle legacy image_url format (OpenAI style)
elif block_type == "image_url":
image_url_data = content_block.get("image_url", {})
if isinstance(image_url_data, dict):
url = image_url_data.get("url", "")
else:
url = str(image_url_data)

# Check if it's a data URI (base64 encoded)
if url and url.startswith("data:"):
# Parse data URI: data:mime_type;base64,content
try:
# Format: ...
header, content = url.split(",", 1)
mime_type = header.split(":")[1].split(";")[0] if ":" in header else ""
return {
"type": "blob",
"modality": "image",
"mime_type": mime_type,
"content": content,
}
except (ValueError, IndexError):
# If parsing fails, return as URI
return {
"type": "uri",
"modality": "image",
"mime_type": "",
"uri": url,
}
else:
# Regular URL
return {
"type": "uri",
"modality": "image",
"mime_type": "",
"uri": url,
}

# For text blocks and other types, return as-is
return content_block


def _transform_langchain_message_content(content: "Any") -> "Any":
"""
Transform LangChain message content, handling both string content and
list of content blocks.
"""
if isinstance(content, str):
return content

if isinstance(content, (list, tuple)):
transformed = []
for block in content:
if isinstance(block, dict):
transformed.append(_transform_langchain_content_block(block))
else:
transformed.append(block)
return transformed

return content


# Contextvar to track agent names in a stack for re-entrant agent support
_agent_stack: "contextvars.ContextVar[Optional[List[Optional[str]]]]" = (
Expand Down Expand Up @@ -234,7 +352,9 @@ def _handle_error(self, run_id: "UUID", error: "Any") -> None:
del self.span_map[run_id]

def _normalize_langchain_message(self, message: "BaseMessage") -> "Any":
parsed = {"role": message.type, "content": message.content}
# Transform content to handle multimodal data (images, audio, video, files)
transformed_content = _transform_langchain_message_content(message.content)
parsed = {"role": message.type, "content": transformed_content}
parsed.update(message.additional_kwargs)
return parsed

Expand Down
Loading
Loading