diff --git a/src/strands/telemetry/tracer.py b/src/strands/telemetry/tracer.py index 648a65d27..e5c41c672 100644 --- a/src/strands/telemetry/tracer.py +++ b/src/strands/telemetry/tracer.py @@ -25,6 +25,8 @@ logger = logging.getLogger(__name__) +REDACTED_VALUE = "" + class JSONEncoder(json.JSONEncoder): """Custom JSON encoder that handles non-serializable types.""" @@ -85,6 +87,14 @@ class Tracer: Both attributes are controlled by including "gen_ai_latest_experimental", "gen_ai_tool_definitions", or "gen_ai_use_latest_invocation_tokens", respectively, in the OTEL_SEMCONV_STABILITY_OPT_IN environment variable. + + Span attribute redaction is opt-in via the ``gen_ai_unredacted_attributes=`` token in the same + environment variable. The list uses ``;`` as a separator and supports trailing-``*`` glob patterns. + When the token is absent, all attributes are emitted unredacted (backward compatible). + + Sensitive attributes subject to the redaction policy are: ``gen_ai.input.messages`` (user messages + and tool inputs/results being fed into the model), ``gen_ai.output.messages`` (agent/model responses + and tool call responses), and ``gen_ai.system_instructions`` (system prompts). """ def __init__(self) -> None: @@ -102,15 +112,71 @@ def __init__(self) -> None: self._include_tool_definitions = "gen_ai_tool_definitions" in opt_in_values self._use_latest_invocation_tokens = "gen_ai_use_latest_invocation_tokens" in opt_in_values + unredacted_token = next( + (t for t in opt_in_values if t.startswith("gen_ai_unredacted_attributes=")), + None, + ) + self._redaction_enabled = unredacted_token is not None + self._unredacted_exact, self._unredacted_globs = self._compile_unredacted_patterns( + unredacted_token.partition("=")[2] if unredacted_token else "" + ) + def _parse_semconv_opt_in(self) -> set[str]: """Parse the OTEL_SEMCONV_STABILITY_OPT_IN environment variable. Returns: - A set of opt-in values from the environment variable. + A set of opt-in tokens from the environment variable. """ opt_in_env = os.getenv("OTEL_SEMCONV_STABILITY_OPT_IN", "") return {value.strip() for value in opt_in_env.split(",")} + @staticmethod + def _compile_unredacted_patterns(value: str) -> tuple[frozenset[str], tuple[str, ...]]: + """Split an unredacted-attributes value into exact names and glob prefixes. + + Globs are limited to a single trailing ``*`` (e.g. ``gen_ai.output.*``). Empty entries are + ignored, which is how a value like ``gen_ai_unredacted_attributes=`` resolves to "redact + everything sensitive". + """ + exact: set[str] = set() + globs: list[str] = [] + for raw in value.split(";"): + entry = raw.strip() + if not entry: + continue + if entry.endswith("*"): + globs.append(entry[:-1]) + else: + exact.add(entry) + return frozenset(exact), tuple(globs) + + def _is_attribute_unredacted(self, attribute_name: str) -> bool: + """Return True if the attribute should be emitted as-is, False if it must be redacted.""" + if not self._redaction_enabled: + return True + if attribute_name in self._unredacted_exact: + return True + return any(attribute_name.startswith(prefix) for prefix in self._unredacted_globs) + + def _redact(self, attribute_name: str, value: str) -> str: + """Apply the redaction policy to a single sensitive attribute value. + + Args: + attribute_name: The canonical semantic attribute name used for policy lookup + (one of ``gen_ai.input.messages``, ``gen_ai.output.messages``, or + ``gen_ai.system_instructions``). This may differ from the physical event + field key emitted under legacy conventions (which uses ``content``/``message``), + but the canonical name is always used so that allowlist entries are + independent of the convention in use. + value: The serialized attribute value to potentially redact. + + Returns: + The original value if the attribute is unredacted, otherwise ``REDACTED_VALUE``. + """ + if self._is_attribute_unredacted(attribute_name): + return value + return REDACTED_VALUE + @property def is_langfuse(self) -> bool: """Check if Langfuse is configured as the OTLP endpoint. @@ -352,27 +418,29 @@ def end_model_invoke_span( self._add_optional_usage_and_metrics_attributes(attributes, usage, metrics) if self.use_latest_genai_conventions: + output_messages = serialize( + [ + { + "role": message["role"], + "parts": self._map_content_blocks_to_otel_parts(message["content"]), + "finish_reason": str(stop_reason), + } + ] + ) self._add_event( span, "gen_ai.client.inference.operation.details", - { - "gen_ai.output.messages": serialize( - [ - { - "role": message["role"], - "parts": self._map_content_blocks_to_otel_parts(message["content"]), - "finish_reason": str(stop_reason), - } - ] - ), - }, + {"gen_ai.output.messages": self._redact("gen_ai.output.messages", output_messages)}, to_span_attributes=self.is_langfuse, ) else: self._add_event( span, "gen_ai.choice", - event_attributes={"finish_reason": str(stop_reason), "message": serialize(message["content"])}, + event_attributes={ + "finish_reason": str(stop_reason), + "message": self._redact("gen_ai.output.messages", serialize(message["content"])), + }, ) self._end_span(span, attributes) @@ -412,26 +480,25 @@ def start_tool_call_span( span = self._start_span(span_name, parent_span, attributes=attributes, span_kind=trace_api.SpanKind.INTERNAL) if self.use_latest_genai_conventions: + input_messages = serialize( + [ + { + "role": "tool", + "parts": [ + { + "type": "tool_call", + "name": tool["name"], + "id": tool["toolUseId"], + "arguments": tool["input"], + } + ], + } + ] + ) self._add_event( span, "gen_ai.client.inference.operation.details", - { - "gen_ai.input.messages": serialize( - [ - { - "role": "tool", - "parts": [ - { - "type": "tool_call", - "name": tool["name"], - "id": tool["toolUseId"], - "arguments": tool["input"], - } - ], - } - ] - ) - }, + {"gen_ai.input.messages": self._redact("gen_ai.input.messages", input_messages)}, to_span_attributes=self.is_langfuse, ) else: @@ -440,7 +507,7 @@ def start_tool_call_span( "gen_ai.tool.message", event_attributes={ "role": "tool", - "content": serialize(tool["input"]), + "content": self._redact("gen_ai.input.messages", serialize(tool["input"])), "id": tool["toolUseId"], }, ) @@ -465,25 +532,24 @@ def end_tool_call_span(self, span: Span, tool_result: ToolResult | None, error: attributes["gen_ai.tool.status"] = str(status) if status is not None else "" if self.use_latest_genai_conventions: + output_messages = serialize( + [ + { + "role": "tool", + "parts": [ + { + "type": "tool_call_response", + "id": tool_result.get("toolUseId", ""), + "response": content, + } + ], + } + ] + ) self._add_event( span, "gen_ai.client.inference.operation.details", - { - "gen_ai.output.messages": serialize( - [ - { - "role": "tool", - "parts": [ - { - "type": "tool_call_response", - "id": tool_result.get("toolUseId", ""), - "response": content, - } - ], - } - ] - ) - }, + {"gen_ai.output.messages": self._redact("gen_ai.output.messages", output_messages)}, to_span_attributes=self.is_langfuse, ) else: @@ -491,7 +557,7 @@ def end_tool_call_span(self, span: Span, tool_result: ToolResult | None, error: span, "gen_ai.choice", event_attributes={ - "message": serialize(content), + "message": self._redact("gen_ai.output.messages", serialize(content)), "id": tool_result.get("toolUseId", ""), }, ) @@ -559,25 +625,30 @@ def end_event_loop_cycle_span( if not span or not span.is_recording(): return - event_attributes: dict[str, AttributeValue] = {"message": serialize(message["content"])} + event_attributes: dict[str, AttributeValue] = { + "message": self._redact("gen_ai.output.messages", serialize(message["content"])), + } if tool_result_message: - event_attributes["tool.result"] = serialize(tool_result_message["content"]) + # tool results are conceptually fed back to the model as input, so they are + # policied under gen_ai.input.messages even when the emitted attribute key differs + event_attributes["tool.result"] = self._redact( + "gen_ai.input.messages", serialize(tool_result_message["content"]) + ) if self.use_latest_genai_conventions: + tool_result_messages = serialize( + [ + { + "role": tool_result_message["role"], + "parts": self._map_content_blocks_to_otel_parts(tool_result_message["content"]), + } + ] + ) self._add_event( span, "gen_ai.client.inference.operation.details", - { - "gen_ai.output.messages": serialize( - [ - { - "role": tool_result_message["role"], - "parts": self._map_content_blocks_to_otel_parts(tool_result_message["content"]), - } - ] - ) - }, + {"gen_ai.input.messages": self._redact("gen_ai.input.messages", tool_result_messages)}, to_span_attributes=self.is_langfuse, ) else: @@ -661,27 +732,29 @@ def end_agent_span( if response: if self.use_latest_genai_conventions: + output_messages = serialize( + [ + { + "role": "assistant", + "parts": [{"type": "text", "content": str(response)}], + "finish_reason": str(response.stop_reason), + } + ] + ) self._add_event( span, "gen_ai.client.inference.operation.details", - { - "gen_ai.output.messages": serialize( - [ - { - "role": "assistant", - "parts": [{"type": "text", "content": str(response)}], - "finish_reason": str(response.stop_reason), - } - ] - ) - }, + {"gen_ai.output.messages": self._redact("gen_ai.output.messages", output_messages)}, to_span_attributes=self.is_langfuse, ) else: self._add_event( span, "gen_ai.choice", - event_attributes={"message": str(response), "finish_reason": str(response.stop_reason)}, + event_attributes={ + "message": self._redact("gen_ai.output.messages", str(response)), + "finish_reason": str(response.stop_reason), + }, ) if hasattr(response, "metrics") and hasattr(response.metrics, "accumulated_usage"): @@ -750,17 +823,19 @@ def start_multiagent_span( parts = self._map_content_blocks_to_otel_parts(task) else: parts = [{"type": "text", "content": task}] + input_messages = serialize([{"role": "user", "parts": parts}]) self._add_event( span, "gen_ai.client.inference.operation.details", - {"gen_ai.input.messages": serialize([{"role": "user", "parts": parts}])}, + {"gen_ai.input.messages": self._redact("gen_ai.input.messages", input_messages)}, to_span_attributes=self.is_langfuse, ) else: + content_value = serialize(task) if isinstance(task, list) else task self._add_event( span, "gen_ai.user.message", - event_attributes={"content": serialize(task) if isinstance(task, list) else task}, + event_attributes={"content": self._redact("gen_ai.input.messages", content_value)}, ) return span @@ -773,26 +848,25 @@ def end_swarm_span( """End a swarm span with results.""" if result: if self.use_latest_genai_conventions: + output_messages = serialize( + [ + { + "role": "assistant", + "parts": [{"type": "text", "content": result}], + } + ] + ) self._add_event( span, "gen_ai.client.inference.operation.details", - { - "gen_ai.output.messages": serialize( - [ - { - "role": "assistant", - "parts": [{"type": "text", "content": result}], - } - ] - ) - }, + {"gen_ai.output.messages": self._redact("gen_ai.output.messages", output_messages)}, to_span_attributes=self.is_langfuse, ) else: self._add_event( span, "gen_ai.choice", - event_attributes={"message": result}, + event_attributes={"message": self._redact("gen_ai.output.messages", result)}, ) def _get_common_attributes( @@ -849,17 +923,19 @@ def _add_system_prompt_event( if self.use_latest_genai_conventions: parts = self._map_content_blocks_to_otel_parts(content_blocks) + # system prompts are sensitive and policed under gen_ai.system_instructions self._add_event( span, "gen_ai.client.inference.operation.details", - {"gen_ai.system_instructions": serialize(parts)}, + {"gen_ai.system_instructions": self._redact("gen_ai.system_instructions", serialize(parts))}, to_span_attributes=self.is_langfuse, ) else: + # system prompts are sensitive and policed under gen_ai.system_instructions self._add_event( span, "gen_ai.system.message", - {"content": serialize(content_blocks)}, + {"content": self._redact("gen_ai.system_instructions", serialize(content_blocks))}, ) def _add_event_messages(self, span: Span, messages: Messages) -> None: @@ -878,15 +954,18 @@ def _add_event_messages(self, span: Span, messages: Messages) -> None: self._add_event( span, "gen_ai.client.inference.operation.details", - {"gen_ai.input.messages": serialize(input_messages)}, + {"gen_ai.input.messages": self._redact("gen_ai.input.messages", serialize(input_messages))}, to_span_attributes=self.is_langfuse, ) else: for message in messages: + redact_key = ( + "gen_ai.output.messages" if message.get("role") == "assistant" else "gen_ai.input.messages" + ) self._add_event( span, self._get_event_name_for_message(message), - {"content": serialize(message["content"])}, + {"content": self._redact(redact_key, serialize(message["content"]))}, ) def _map_content_blocks_to_otel_parts( diff --git a/tests/strands/telemetry/test_tracer.py b/tests/strands/telemetry/test_tracer.py index c7b096a5a..499ac5da6 100644 --- a/tests/strands/telemetry/test_tracer.py +++ b/tests/strands/telemetry/test_tracer.py @@ -845,7 +845,7 @@ def test_end_event_loop_cycle_span_latest_conventions(mock_span, monkeypatch): mock_span.add_event.assert_called_with( "gen_ai.client.inference.operation.details", attributes={ - "gen_ai.output.messages": serialize( + "gen_ai.input.messages": serialize( [ { "role": "assistant", @@ -1827,3 +1827,321 @@ def test_is_langfuse_false_with_non_langfuse_base_url(self, monkeypatch): monkeypatch.delenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", raising=False) tracer = Tracer() assert tracer.is_langfuse is False + + +class TestSpanAttributeRedaction: + """Tests for GDPR-compliant redaction of sensitive span attributes.""" + + def _user_message(self): + return [{"role": "user", "content": [{"text": "secret user input"}]}] + + def _assistant_message(self): + return {"role": "assistant", "content": [{"text": "secret model output"}]} + + def test_redaction_disabled_by_default(self, mock_tracer, monkeypatch): + """No env var set: sensitive content is emitted verbatim (backward compatible).""" + monkeypatch.delenv("OTEL_SEMCONV_STABILITY_OPT_IN", raising=False) + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + tracer.start_model_invoke_span(messages=self._user_message(), model_id="m") + + mock_span.add_event.assert_any_call( + "gen_ai.user.message", + attributes={"content": json.dumps([{"text": "secret user input"}])}, + ) + + def test_redaction_disabled_when_other_tokens_present(self, mock_tracer, monkeypatch): + """Other opt-in tokens must not accidentally enable redaction.""" + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental,gen_ai_tool_definitions") + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + tracer.start_model_invoke_span(messages=self._user_message(), model_id="m") + + input_messages = serialize([{"role": "user", "parts": [{"type": "text", "content": "secret user input"}]}]) + mock_span.add_event.assert_any_call( + "gen_ai.client.inference.operation.details", + attributes={"gen_ai.input.messages": input_messages}, + ) + + def test_empty_unredacted_list_redacts_everything(self, mock_tracer, monkeypatch): + """gen_ai_unredacted_attributes= (empty) redacts all sensitive attributes.""" + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_unredacted_attributes=") + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + tracer.start_model_invoke_span(messages=self._user_message(), model_id="m", system_prompt="secret system") + tracer.end_model_invoke_span( + mock_span, + self._assistant_message(), + Usage(inputTokens=1, outputTokens=2, totalTokens=3), + Metrics(latencyMs=0, timeToFirstByteMs=0), + "end_turn", + ) + + mock_span.add_event.assert_any_call("gen_ai.system.message", attributes={"content": ""}) + mock_span.add_event.assert_any_call("gen_ai.user.message", attributes={"content": ""}) + mock_span.add_event.assert_any_call( + "gen_ai.choice", + attributes={"finish_reason": "end_turn", "message": ""}, + ) + + def test_explicit_allowlist_with_semicolon(self, mock_tracer, monkeypatch): + """Allowlisted attributes pass through; others redact.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", + "gen_ai_latest_experimental,gen_ai_unredacted_attributes=gen_ai.input.messages;gen_ai.system_instructions", + ) + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + tracer.start_model_invoke_span(messages=self._user_message(), model_id="m", system_prompt="visible system") + tracer.end_model_invoke_span( + mock_span, + self._assistant_message(), + Usage(inputTokens=1, outputTokens=2, totalTokens=3), + Metrics(latencyMs=0, timeToFirstByteMs=0), + "end_turn", + ) + + visible_input = serialize([{"role": "user", "parts": [{"type": "text", "content": "secret user input"}]}]) + visible_system = serialize([{"type": "text", "content": "visible system"}]) + mock_span.add_event.assert_any_call( + "gen_ai.client.inference.operation.details", + attributes={"gen_ai.system_instructions": visible_system}, + ) + mock_span.add_event.assert_any_call( + "gen_ai.client.inference.operation.details", + attributes={"gen_ai.input.messages": visible_input}, + ) + mock_span.add_event.assert_any_call( + "gen_ai.client.inference.operation.details", + attributes={"gen_ai.output.messages": ""}, + ) + + def test_glob_match_for_output_messages(self, mock_tracer, monkeypatch): + """A trailing `*` glob matches by prefix.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", + "gen_ai_latest_experimental,gen_ai_unredacted_attributes=gen_ai.output.*", + ) + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + tracer.start_model_invoke_span(messages=self._user_message(), model_id="m") + tracer.end_model_invoke_span( + mock_span, + self._assistant_message(), + Usage(inputTokens=1, outputTokens=2, totalTokens=3), + Metrics(latencyMs=0, timeToFirstByteMs=0), + "end_turn", + ) + + visible_output = serialize( + [ + { + "role": "assistant", + "parts": [{"type": "text", "content": "secret model output"}], + "finish_reason": "end_turn", + } + ] + ) + mock_span.add_event.assert_any_call( + "gen_ai.client.inference.operation.details", + attributes={"gen_ai.input.messages": ""}, + ) + mock_span.add_event.assert_any_call( + "gen_ai.client.inference.operation.details", + attributes={"gen_ai.output.messages": visible_output}, + ) + + def test_redaction_preserves_tool_metadata(self, mock_tracer, monkeypatch): + """Tool name, ID, and status are preserved; only payload content is redacted.""" + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_unredacted_attributes=") + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + tool = {"name": "calculator", "toolUseId": "abc", "input": {"expression": "2+2"}} + tracer.start_tool_call_span(tool) + tracer.end_tool_call_span( + mock_span, + {"toolUseId": "abc", "status": "success", "content": [{"text": "4"}]}, + ) + + start_attrs = mock_span.set_attributes.call_args_list[0][0][0] + assert start_attrs["gen_ai.tool.name"] == "calculator" + assert start_attrs["gen_ai.tool.call.id"] == "abc" + + mock_span.add_event.assert_any_call( + "gen_ai.tool.message", + attributes={"role": "tool", "content": "", "id": "abc"}, + ) + mock_span.add_event.assert_any_call( + "gen_ai.choice", + attributes={"message": "", "id": "abc"}, + ) + + def test_parser_handles_kv_and_bare_tokens(self, monkeypatch): + """Parser keeps bare flags working alongside the new key=value token.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", + " gen_ai_latest_experimental ,gen_ai_unredacted_attributes=gen_ai.input.messages ,gen_ai_tool_definitions", + ) + tracer = Tracer() + assert tracer.use_latest_genai_conventions is True + assert tracer._include_tool_definitions is True + assert tracer._redaction_enabled is True + assert "gen_ai.input.messages" in tracer._unredacted_exact + + def test_assistant_message_uses_output_key_for_redaction(self, mock_tracer, monkeypatch): + """Assistant messages in legacy `_add_event_messages` use the output-messages policy.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", + "gen_ai_unredacted_attributes=gen_ai.output.*", + ) + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_span.is_recording.return_value = True + + messages = [ + {"role": "user", "content": [{"text": "user query"}]}, + {"role": "assistant", "content": [{"text": "assistant reply"}]}, + ] + tracer._add_event_messages(mock_span, messages) + + mock_span.add_event.assert_any_call("gen_ai.user.message", attributes={"content": ""}) + mock_span.add_event.assert_any_call( + "gen_ai.assistant.message", + attributes={"content": serialize([{"text": "assistant reply"}])}, + ) + + def test_system_instructions_redacted_in_latest_conventions(self, mock_tracer, monkeypatch): + """gen_ai.system_instructions must be redacted under an empty allowlist (latest conventions).""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", + "gen_ai_latest_experimental,gen_ai_unredacted_attributes=", + ) + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_span.is_recording.return_value = True + mock_tracer.start_span.return_value = mock_span + + tracer.start_model_invoke_span( + messages=[{"role": "user", "content": [{"text": "hi"}]}], + model_id="m", + system_prompt="confidential system prompt", + ) + + mock_span.add_event.assert_any_call( + "gen_ai.client.inference.operation.details", + attributes={"gen_ai.system_instructions": ""}, + ) + + def test_model_id_and_operation_never_redacted(self, mock_tracer, monkeypatch): + """Structural span attributes (model id, tool name, operation) are never replaced.""" + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_unredacted_attributes=") + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + tracer.start_model_invoke_span( + messages=[{"role": "user", "content": [{"text": "hi"}]}], + model_id="anthropic.claude-v3", + ) + + set_attrs_call = mock_span.set_attributes.call_args_list[0][0][0] + assert set_attrs_call["gen_ai.request.model"] == "anthropic.claude-v3" + assert set_attrs_call["gen_ai.operation.name"] == "chat" + assert "" not in set_attrs_call.values() + + def test_glob_only_trailing_star(self, monkeypatch): + """Only trailing `*` is treated as a glob; other entries are exact-match.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", + "gen_ai_unredacted_attributes=gen_ai.output.*;gen_ai.exact.name", + ) + tracer = Tracer() + assert tracer._is_attribute_unredacted("gen_ai.output.messages") + assert tracer._is_attribute_unredacted("gen_ai.output.anything.else") + assert tracer._is_attribute_unredacted("gen_ai.exact.name") + assert not tracer._is_attribute_unredacted("gen_ai.input.messages") + + def test_tool_result_cycle_span_uses_input_messages_key(self, mock_tracer, monkeypatch): + """tool_result_message in end_event_loop_cycle_span emits under gen_ai.input.messages, not gen_ai.output.messages.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", + "gen_ai_latest_experimental,gen_ai_unredacted_attributes=gen_ai.input.messages", + ) + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_span.is_recording.return_value = True + + message = {"role": "assistant", "content": [{"text": "calling tool"}]} + tool_result_message = {"role": "tool", "content": [{"text": "tool output"}]} + + tracer.end_event_loop_cycle_span(mock_span, message, tool_result_message) + + expected_payload = serialize( + [ + { + "role": "tool", + "parts": tracer._map_content_blocks_to_otel_parts([{"text": "tool output"}]), + } + ] + ) + mock_span.add_event.assert_any_call( + "gen_ai.client.inference.operation.details", + attributes={"gen_ai.input.messages": expected_payload}, + ) + all_attr_keys = set() + for call in mock_span.add_event.call_args_list: + attrs = call.kwargs.get("attributes") or (call.args[1] if len(call.args) > 1 else {}) + all_attr_keys.update(attrs.keys()) + assert "gen_ai.output.messages" not in all_attr_keys, ( + "tool_result_message must not be emitted under gen_ai.output.messages" + ) + + def test_legacy_tool_result_redacts_under_input_messages_policy(self, mock_tracer, monkeypatch): + """Legacy gen_ai.choice path: tool.result field redacted under gen_ai.input.messages policy.""" + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_unredacted_attributes=") + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + mock_span = mock.MagicMock() + mock_span.is_recording.return_value = True + message = {"role": "assistant", "content": [{"text": "visible"}]} + tool_result_message = {"role": "tool", "content": [{"text": "secret result"}]} + tracer.end_event_loop_cycle_span(mock_span, message, tool_result_message) + call_kwargs = { + k: v + for call in mock_span.add_event.call_args_list + for k, v in (call.kwargs.get("attributes") or {}).items() + } + assert call_kwargs.get("tool.result") == ""