From f7d16493108d67d3219cd21ec5f37210d5f26338 Mon Sep 17 00:00:00 2001 From: Luke Zhang Date: Sun, 24 May 2026 14:35:45 -0700 Subject: [PATCH] [EXPORTER ETW] Generate a fresh trace id for every root span Fixes #3846. etw::Tracer was caching a single TraceId in a member (traceId_) generated once in its constructor, and using that same id as the fallback trace id for every root span it created. So if you did something as ordinary as: auto tracer = provider->GetTracer("Geneva-Tracer-Foo"); auto spanFoo = tracer->StartSpan("Span-Foo"); // root auto spanBar = tracer->StartSpan("Span-Bar"); // also root both Foo and Bar would come out the back end stamped with the same env_dt_traceId, and the Geneva/Jarvis side would collapse two unrelated requests into one logical trace. The issue has a screenshot of exactly that: Span-Foo and Span-Bar emitted with the same trace id of 8507dcdc651af34fb8f21152a98e2977. The OTel spec says each root span starts a new trace, and that is also what sdk::trace::Tracer does -- when there is no valid parent it calls generator.GenerateTraceId() on the spot, no cache anywhere (see sdk/src/trace/tracer.cc, the "if (parent_context.IsValid()) ... else trace_id = generator.GenerateTraceId()" block). The ETW exporter was the odd one out here. Fix is one line in StartSpan(): when parentContext is invalid, call GetIdGenerator(tracerProvider_).GenerateTraceId() instead of returning the cached traceId_. This is the same call shape already used on the next line to mint the SpanId, so the access path is proven and thread-safe. A nice side benefit: trace-id-ratio samplers were getting the same trace id for every root span on a Tracer and therefore making the same accept/reject decision for all of them, which defeated the point of probabilistic sampling. With this change the sampler sees a fresh id per root span and behaves the way the SDK does. I deliberately did not remove the now-unused traceId_ member, its init in the ctor, or the public trace_id() accessor on Tracer. The accessor is part of the exporter's public surface and removing it would be a source break for anyone calling tracer.trace_id(). Happy to follow up with a cleanup PR if maintainers prefer. Also updated ETWTracer.GlobalSingletonTracer in exporters/etw/test/etw_tracer_test.cc. That test was asserting EXPECT_EQ(traceId1, traceId3) -- i.e. two consecutive root spans on the same Tracer should share a trace id -- which was just encoding the bug. After this fix each root span gets its own fresh trace id, so the assertion is flipped to EXPECT_NE, plus checks that all three trace ids are valid and distinct, and that the C++11 magic-static GetGlobalTracer() really does return the same Tracer reference both times. Comments and the illustrative sample-event TraceId in the doc comment were updated to match the new behavior. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../opentelemetry/exporters/etw/etw_tracer.h | 11 +++++++- exporters/etw/test/etw_tracer_test.cc | 25 +++++++++++++++---- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/exporters/etw/include/opentelemetry/exporters/etw/etw_tracer.h b/exporters/etw/include/opentelemetry/exporters/etw/etw_tracer.h index 2b975e239c..44f3bb8773 100644 --- a/exporters/etw/include/opentelemetry/exporters/etw/etw_tracer.h +++ b/exporters/etw/include/opentelemetry/exporters/etw/etw_tracer.h @@ -483,7 +483,16 @@ class Tracer : public opentelemetry::trace::Tracer, parentContext = spanContext; } } - auto traceId = parentContext.IsValid() ? parentContext.trace_id() : traceId_; + // Generate a brand-new trace id from the IdGenerator on every root + // span. Do NOT fall back to the Tracer-scoped cached traceId_, + // because that collapses every root span produced by this Tracer + // into a single logical trace -- breaking per-request correlation + // in Geneva/Jarvis and disagreeing with the OTel spec and the + // sdk::trace::Tracer reference implementation, where each root + // span gets a fresh trace id. + // See https://github.com/open-telemetry/opentelemetry-cpp/issues/3846 + auto traceId = parentContext.IsValid() ? parentContext.trace_id() + : GetIdGenerator(tracerProvider_).GenerateTraceId(); // Sampling based on attributes is not supported for now, so passing empty below. std::map emptyAttributes = {{}}; diff --git a/exporters/etw/test/etw_tracer_test.cc b/exporters/etw/test/etw_tracer_test.cc index 7d32a2ffba..5be7c03255 100644 --- a/exporters/etw/test/etw_tracer_test.cc +++ b/exporters/etw/test/etw_tracer_test.cc @@ -392,7 +392,9 @@ TEST(ETWTracer, GlobalSingletonTracer) } */ - // Obtain a different tracer withs its own trace-id. + // Each root span minted by the ETW Tracer now gets a fresh trace id from + // the IdGenerator (see issue #3846). That holds whether you call StartSpan + // on the global Tracer or on another Tracer obtained from the provider. auto localTracer = GetGlobalTracerProvider().GetTracer(kGlobalProviderName); auto s2 = localTracer->StartSpan("Span2"); auto traceId2 = s2->GetContext().trace_id(); @@ -424,12 +426,16 @@ TEST(ETWTracer, GlobalSingletonTracer) } */ - // Obtain the same global tracer with the same trace-id as before. + // Re-obtain the global tracer via the C++11 magic static -- this returns + // the same Tracer reference as `globalTracer` above. Starting a new root + // span on it still produces a brand-new trace id; there is no tracer-level + // cache. auto& globalTracer2 = GetGlobalTracer(); auto s3 = globalTracer2.StartSpan("Span3"); auto traceId3 = s3->GetContext().trace_id(); s3->End(); -/* === Span 3 - "TraceId": "182a64258fb1864ca4e1a542eecbd9bf" +/* === Span 3 - sample event payload (TraceId differs from Span 1 -- each + root span gets a fresh trace id) { "Timestamp": "2021-05-10T11:45:27.0290936-07:00", "ProviderName": "OpenTelemetry-ETW-TLD", @@ -450,13 +456,22 @@ TEST(ETWTracer, GlobalSingletonTracer) "StatusCode": 0, "StatusMessage": "", "Success": "True", - "TraceId": "182a64258fb1864ca4e1a542eecbd9bf", + "TraceId": "9fbb18d2c9874340b6e4581a8b21c5a7", "_name": "Span" } } */ + // GetGlobalTracer() is backed by a C++11 magic static, so both invocations + // return the same Tracer reference. + EXPECT_EQ(&globalTracer, &globalTracer2); + // Every root span gets its own freshly-generated trace id -- there is no + // tracer-scoped caching. See issue #3846. + EXPECT_TRUE(traceId1.IsValid()); + EXPECT_TRUE(traceId2.IsValid()); + EXPECT_TRUE(traceId3.IsValid()); EXPECT_NE(traceId1, traceId2); - EXPECT_EQ(traceId1, traceId3); + EXPECT_NE(traceId1, traceId3); + EXPECT_NE(traceId2, traceId3); # if OPENTELEMETRY_ABI_VERSION_NO == 1 localTracer->CloseWithMicroseconds(0);