diff --git a/CHANGELOG.md b/CHANGELOG.md index a83e9bfa2f..01c3b7fab7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#4847](https://github.com/open-telemetry/opentelemetry-python/pull/4847)) - Prevent possible endless recursion from happening in `SimpleLogRecordProcessor.on_emit`, ([#4799](https://github.com/open-telemetry/opentelemetry-python/pull/4799)) and ([#4867](https://github.com/open-telemetry/opentelemetry-python/pull/4867)). +- Implement span start/end metrics + ([#4880](https://github.com/open-telemetry/opentelemetry-python/pull/4880)) - Make ConcurrentMultiSpanProcessor fork safe ([#4862](https://github.com/open-telemetry/opentelemetry-python/pull/4862)) - `opentelemetry-exporter-otlp-proto-http`: fix retry logic and error handling for connection failures in trace, metric, and log exporters diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py index 9ae9a2234e..b1724b46a3 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py @@ -45,6 +45,7 @@ from typing_extensions import deprecated from opentelemetry import context as context_api +from opentelemetry import metrics as metrics_api from opentelemetry import trace as trace_api from opentelemetry.attributes import BoundedAttributes from opentelemetry.sdk import util @@ -78,6 +79,8 @@ from opentelemetry.util import types from opentelemetry.util._decorator import _agnosticcontextmanager +from ._tracer_metrics import TracerMetrics + logger = logging.getLogger(__name__) _DEFAULT_OTEL_ATTRIBUTE_COUNT_LIMIT = 128 @@ -810,6 +813,7 @@ def __init__( set_status_on_exception: bool = True, limits=_UnsetLimits, instrumentation_scope: Optional[InstrumentationScope] = None, + record_end_metrics: Optional[Callable[[], None]] = None, ) -> None: if resource is None: resource = Resource.create({}) @@ -847,6 +851,8 @@ def __init__( self._links = self._new_links(links) + self._record_end_metrics = record_end_metrics + def __repr__(self): return f'{type(self).__name__}(name="{self._name}", context={self._context})' @@ -976,6 +982,8 @@ def end(self, end_time: Optional[int] = None) -> None: self._end_time = end_time if end_time is not None else time_ns() + if self._record_end_metrics: + self._record_end_metrics() # pylint: disable=protected-access self._span_processor._on_ending(self) self._span_processor.on_end(self._readable_span()) @@ -1096,6 +1104,7 @@ def __init__( id_generator: IdGenerator, instrumentation_info: InstrumentationInfo, span_limits: SpanLimits, + meter_provider: Optional[metrics_api.MeterProvider], instrumentation_scope: InstrumentationScope, ) -> None: self.sampler = sampler @@ -1106,6 +1115,9 @@ def __init__( self._span_limits = span_limits self._instrumentation_scope = instrumentation_scope + meter_provider = meter_provider or metrics_api.get_meter_provider() + self._tracer_metrics = TracerMetrics(meter_provider) + @_agnosticcontextmanager # pylint: disable=protected-access def start_as_current_span( self, @@ -1189,6 +1201,10 @@ def start_span( # pylint: disable=too-many-locals trace_state=sampling_result.trace_state, ) + record_end_metrics = self._tracer_metrics.start_span( + parent_span_context, sampling_result.decision + ) + # Only record if is_recording() is true if sampling_result.decision.is_recording(): # pylint:disable=protected-access @@ -1207,6 +1223,7 @@ def start_span( # pylint: disable=too-many-locals set_status_on_exception=set_status_on_exception, limits=self._span_limits, instrumentation_scope=self._instrumentation_scope, + record_end_metrics=record_end_metrics, ) span.start(start_time=start_time, parent_context=context) else: @@ -1227,6 +1244,7 @@ def __init__( ] = None, id_generator: Optional[IdGenerator] = None, span_limits: Optional[SpanLimits] = None, + meter_provider: Optional[metrics_api.MeterProvider] = None, ) -> None: self._active_span_processor = ( active_span_processor or SynchronousMultiSpanProcessor() @@ -1246,6 +1264,7 @@ def __init__( disabled = environ.get(OTEL_SDK_DISABLED, "") self._disabled = disabled.lower().strip() == "true" self._atexit_handler = None + self._meter_provider = meter_provider if shutdown_on_exit: self._atexit_handler = atexit.register(self.shutdown) @@ -1291,6 +1310,7 @@ def get_tracer( self.id_generator, instrumentation_info, self._span_limits, + self._meter_provider, InstrumentationScope( instrumenting_module_name, instrumenting_library_version, diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/trace/_tracer_metrics.py b/opentelemetry-sdk/src/opentelemetry/sdk/trace/_tracer_metrics.py new file mode 100644 index 0000000000..109208f700 --- /dev/null +++ b/opentelemetry-sdk/src/opentelemetry/sdk/trace/_tracer_metrics.py @@ -0,0 +1,73 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from collections.abc import Callable + +from opentelemetry import metrics as metrics_api +from opentelemetry.sdk.trace.sampling import Decision +from opentelemetry.trace.span import SpanContext + + +class TracerMetrics: + def __init__(self, meter_provider: metrics_api.MeterProvider) -> None: + meter = meter_provider.get_meter("opentelemetry-sdk") + + self._started_spans = meter.create_counter( + "otel.sdk.span.started", "{span}", "The number of created spans" + ) + self._live_spans = meter.create_up_down_counter( + "otel.sdk.span.live", + "{span}", + "The number of currently live spans", + ) + + def start_span( + self, + parent_span_context: SpanContext | None, + sampling_decision: Decision, + ) -> Callable[[], None]: + self._started_spans.add( + 1, + { + "otel.span.parent.origin": parent_origin(parent_span_context), + "otel.span.sampling_result": sampling_decision.name, + }, + ) + + if sampling_decision == Decision.DROP: + return noop + + live_span_attrs = { + "otel.span.sampling_result": sampling_decision.name, + } + self._live_spans.add(1, live_span_attrs) + + def end_span() -> None: + self._live_spans.add(-1, live_span_attrs) + + return end_span + + +def noop() -> None: + pass + + +def parent_origin(span_ctx: SpanContext | None) -> str: + if span_ctx is None: + return "none" + if span_ctx.is_remote: + return "remote" + return "local" diff --git a/opentelemetry-sdk/tests/trace/test_sdk_metrics.py b/opentelemetry-sdk/tests/trace/test_sdk_metrics.py new file mode 100644 index 0000000000..2baa967f8a --- /dev/null +++ b/opentelemetry-sdk/tests/trace/test_sdk_metrics.py @@ -0,0 +1,244 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import TestCase + +from opentelemetry import trace as trace_api +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import InMemoryMetricReader +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.sampling import ( + ALWAYS_OFF, + ALWAYS_ON, + Decision, + StaticSampler, +) +from opentelemetry.trace.span import SpanContext + + +class TestTracerProviderMetrics(TestCase): + def setUp(self): + self.metric_reader = InMemoryMetricReader() + self.meter_provider = MeterProvider( + metric_readers=[self.metric_reader] + ) + + def tearDown(self): + self.meter_provider.shutdown() + + def assert_started_spans(self, metric_data, value, attrs): + metrics = metric_data.resource_metrics[0].scope_metrics[0].metrics + started_spans_metric = next( + (m for m in metrics if m.name == "otel.sdk.span.started"), None + ) + self.assertIsNotNone(started_spans_metric) + self.assertEqual(started_spans_metric.data.data_points[0].value, value) + self.assertDictEqual( + started_spans_metric.data.data_points[0].attributes, attrs + ) + + def assert_live_spans(self, metric_data, value, attrs): + metrics = metric_data.resource_metrics[0].scope_metrics[0].metrics + live_spans_metric = next( + (m for m in metrics if m.name == "otel.sdk.span.live"), None + ) + if value is None: + self.assertIsNone(live_spans_metric) + return + self.assertIsNotNone(live_spans_metric) + self.assertEqual(live_spans_metric.data.data_points[0].value, value) + self.assertDictEqual( + live_spans_metric.data.data_points[0].attributes, attrs + ) + + def test_sampled(self): + tracer_provider = TracerProvider( + sampler=ALWAYS_ON, meter_provider=self.meter_provider + ) + tracer = tracer_provider.get_tracer("test") + span = tracer.start_span("span") + metric_data = self.metric_reader.get_metrics_data() + self.assert_started_spans( + metric_data, + 1, + { + "otel.span.parent.origin": "none", + "otel.span.sampling_result": "RECORD_AND_SAMPLE", + }, + ) + self.assert_live_spans( + metric_data, + 1, + { + "otel.span.sampling_result": "RECORD_AND_SAMPLE", + }, + ) + span.end() + metric_data = self.metric_reader.get_metrics_data() + self.assert_started_spans( + metric_data, + 1, + { + "otel.span.parent.origin": "none", + "otel.span.sampling_result": "RECORD_AND_SAMPLE", + }, + ) + self.assert_live_spans( + metric_data, + 0, + { + "otel.span.sampling_result": "RECORD_AND_SAMPLE", + }, + ) + + def test_record_only(self): + tracer_provider = TracerProvider( + sampler=StaticSampler(Decision.RECORD_ONLY), + meter_provider=self.meter_provider, + ) + tracer = tracer_provider.get_tracer("test") + span = tracer.start_span("span") + metric_data = self.metric_reader.get_metrics_data() + self.assert_started_spans( + metric_data, + 1, + { + "otel.span.parent.origin": "none", + "otel.span.sampling_result": "RECORD_ONLY", + }, + ) + self.assert_live_spans( + metric_data, + 1, + { + "otel.span.sampling_result": "RECORD_ONLY", + }, + ) + span.end() + metric_data = self.metric_reader.get_metrics_data() + self.assert_started_spans( + metric_data, + 1, + { + "otel.span.parent.origin": "none", + "otel.span.sampling_result": "RECORD_ONLY", + }, + ) + self.assert_live_spans( + metric_data, + 0, + { + "otel.span.sampling_result": "RECORD_ONLY", + }, + ) + + def test_dropped(self): + tracer_provider = TracerProvider( + sampler=ALWAYS_OFF, meter_provider=self.meter_provider + ) + tracer = tracer_provider.get_tracer("test") + span = tracer.start_span("span") + metric_data = self.metric_reader.get_metrics_data() + self.assert_started_spans( + metric_data, + 1, + { + "otel.span.parent.origin": "none", + "otel.span.sampling_result": "DROP", + }, + ) + self.assert_live_spans(metric_data, None, {}) + span.end() + metric_data = self.metric_reader.get_metrics_data() + self.assert_started_spans( + metric_data, + 1, + { + "otel.span.parent.origin": "none", + "otel.span.sampling_result": "DROP", + }, + ) + self.assert_live_spans(metric_data, None, {}) + + def test_dropped_remote_parent(self): + tracer_provider = TracerProvider( + sampler=ALWAYS_OFF, meter_provider=self.meter_provider + ) + tracer = tracer_provider.get_tracer("test") + parent_span_context = SpanContext( + trace_id=1, + span_id=2, + is_remote=True, + ) + parent_context = trace_api.set_span_in_context( + trace_api.NonRecordingSpan(parent_span_context) + ) + span = tracer.start_span("span", context=parent_context) + metric_data = self.metric_reader.get_metrics_data() + self.assert_started_spans( + metric_data, + 1, + { + "otel.span.parent.origin": "remote", + "otel.span.sampling_result": "DROP", + }, + ) + self.assert_live_spans(metric_data, None, {}) + span.end() + metric_data = self.metric_reader.get_metrics_data() + self.assert_started_spans( + metric_data, + 1, + { + "otel.span.parent.origin": "remote", + "otel.span.sampling_result": "DROP", + }, + ) + self.assert_live_spans(metric_data, None, {}) + + def test_dropped_local_parent(self): + tracer_provider = TracerProvider( + sampler=ALWAYS_OFF, meter_provider=self.meter_provider + ) + tracer = tracer_provider.get_tracer("test") + parent_span_context = SpanContext( + trace_id=1, + span_id=2, + is_remote=False, + ) + parent_context = trace_api.set_span_in_context( + trace_api.NonRecordingSpan(parent_span_context) + ) + span = tracer.start_span("span", context=parent_context) + metric_data = self.metric_reader.get_metrics_data() + self.assert_started_spans( + metric_data, + 1, + { + "otel.span.parent.origin": "local", + "otel.span.sampling_result": "DROP", + }, + ) + self.assert_live_spans(metric_data, None, {}) + span.end() + metric_data = self.metric_reader.get_metrics_data() + self.assert_started_spans( + metric_data, + 1, + { + "otel.span.parent.origin": "local", + "otel.span.sampling_result": "DROP", + }, + ) + self.assert_live_spans(metric_data, None, {}) diff --git a/tests/opentelemetry-test-utils/src/opentelemetry/test/test_base.py b/tests/opentelemetry-test-utils/src/opentelemetry/test/test_base.py index 69da617bb6..dd7ded5d50 100644 --- a/tests/opentelemetry-test-utils/src/opentelemetry/test/test_base.py +++ b/tests/opentelemetry-test-utils/src/opentelemetry/test/test_base.py @@ -28,7 +28,6 @@ DataPointT, HistogramDataPoint, InMemoryMetricReader, - MetricReader, NumberDataPoint, ) from opentelemetry.sdk.trace import TracerProvider, export @@ -118,7 +117,9 @@ def create_tracer_provider(**kwargs): return tracer_provider, memory_exporter @staticmethod - def create_meter_provider(**kwargs) -> Tuple[MeterProvider, MetricReader]: + def create_meter_provider( + **kwargs, + ) -> Tuple[MeterProvider, InMemoryMetricReader]: """Helper to create a configured meter provider Creates a `MeterProvider` and an `InMemoryMetricReader`. Returns: @@ -151,6 +152,11 @@ def get_sorted_metrics(self): all_metrics = [] for metrics in resource_metrics: for scope_metrics in metrics.scope_metrics: + # This helper class is used by instrumentation asserting their own + # metrics. They should never need to assert SDK metrics so we filter + # them out automatically. + if scope_metrics.scope.name == "opentelemetry-sdk": + continue all_metrics.extend(scope_metrics.metrics) return self.sorted_metrics(all_metrics)