From 1a110c0c4a3fca69d220580bf5306761ba4c4357 Mon Sep 17 00:00:00 2001 From: JY Tan Date: Tue, 3 Feb 2026 17:00:18 -0800 Subject: [PATCH] Commit --- .../psycopg/instrumentation.py | 19 ++++++++++++++-- .../psycopg2/instrumentation.py | 22 +++++++++++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drift/instrumentation/psycopg/instrumentation.py b/drift/instrumentation/psycopg/instrumentation.py index bbdcfad..f2f5f66 100644 --- a/drift/instrumentation/psycopg/instrumentation.py +++ b/drift/instrumentation/psycopg/instrumentation.py @@ -14,7 +14,7 @@ from opentelemetry.trace import StatusCode as OTelStatusCode from ...core.drift_sdk import TuskDrift -from ...core.json_schema_helper import JsonSchemaHelper +from ...core.json_schema_helper import JsonSchemaHelper, SchemaMerge from ...core.mode_utils import handle_record_mode, handle_replay_mode from ...core.tracing import TdSpanAttributes from ...core.tracing.span_utils import CreateSpanOptions, SpanUtils @@ -1468,7 +1468,21 @@ def _set_span_attributes( input_value: The input data dictionary (query, parameters, etc.) output_value: The output data dictionary (rows, rowcount, error, etc.) """ - input_result = JsonSchemaHelper.generate_schema_and_hash(input_value, {}) + # IMPORTANT: Tell the exporter (otel_converter) how to generate schema + hashes. + # The exporter recomputes the schema/hashes at export time from td.input_value and td.input_schema_merges. + # Mark parameters as match_importance=0.0 so non-deterministic values (e.g. timestamps) don't prevent mock matching. + span.set_attribute( + TdSpanAttributes.INPUT_SCHEMA_MERGES, + json.dumps({"parameters": {"match_importance": 0.0}}), + ) + + # Set match_importance=0 for parameters to allow fuzzy matching on query text + # This handles non-deterministic values like timestamps while still exact-matching + # deterministic values like IDs (exact hash match takes priority over reduced hash) + input_result = JsonSchemaHelper.generate_schema_and_hash( + input_value, + {"parameters": SchemaMerge(match_importance=0.0)}, + ) output_result = JsonSchemaHelper.generate_schema_and_hash(output_value, {}) span.set_attribute(TdSpanAttributes.INPUT_VALUE, json.dumps(input_value)) @@ -1614,6 +1628,7 @@ def _try_get_mock( submodule_name="query", input_value=input_value, kind=SpanKind.CLIENT, + input_schema_merges={"parameters": SchemaMerge(match_importance=0.0)}, is_pre_app_start=not sdk.app_ready, ) diff --git a/drift/instrumentation/psycopg2/instrumentation.py b/drift/instrumentation/psycopg2/instrumentation.py index 6145e48..8ed8802 100644 --- a/drift/instrumentation/psycopg2/instrumentation.py +++ b/drift/instrumentation/psycopg2/instrumentation.py @@ -19,7 +19,7 @@ from opentelemetry.trace import StatusCode as OTelStatusCode from ...core.drift_sdk import TuskDrift -from ...core.json_schema_helper import JsonSchemaHelper +from ...core.json_schema_helper import JsonSchemaHelper, SchemaMerge from ...core.mode_utils import handle_record_mode, handle_replay_mode from ...core.tracing import TdSpanAttributes from ...core.tracing.span_utils import CreateSpanOptions, SpanUtils @@ -822,6 +822,9 @@ def _try_get_mock( # Use centralized mock finding utility from ...core.mock_utils import find_mock_response_sync + # Set match_importance=0 for parameters to allow fuzzy matching on query text + # This handles non-deterministic values like timestamps while still exact-matching + # deterministic values like IDs (exact hash match takes priority over reduced hash) mock_response_output = find_mock_response_sync( sdk=sdk, trace_id=trace_id, @@ -833,6 +836,7 @@ def _try_get_mock( submodule_name="query", input_value=input_value, kind=SpanKind.CLIENT, + input_schema_merges={"parameters": SchemaMerge(match_importance=0.0)}, is_pre_app_start=not sdk.app_ready, ) @@ -984,6 +988,14 @@ def _finalize_query_span( # Serialize parameters to handle datetime and other non-JSON types input_value["parameters"] = serialize_value(params) + # IMPORTANT: Tell the exporter (otel_converter) how to generate schema + hashes. + # The exporter recomputes the schema/hashes at export time from td.input_value and td.input_schema_merges. + # Mark parameters as match_importance=0.0 so non-deterministic values (e.g. timestamps) don't prevent mock matching. + span.set_attribute( + TdSpanAttributes.INPUT_SCHEMA_MERGES, + json.dumps({"parameters": {"match_importance": 0.0}}), + ) + # Build output value output_value = {} @@ -1078,7 +1090,13 @@ def patched_fetchall(): logger.debug(f"Error getting query metadata: {e}") # Generate schemas and hashes - input_result = JsonSchemaHelper.generate_schema_and_hash(input_value, {}) + # Set match_importance=0 for parameters to allow fuzzy matching on query text + # This handles non-deterministic values like timestamps while still exact-matching + # deterministic values like IDs (exact hash match takes priority over reduced hash) + input_result = JsonSchemaHelper.generate_schema_and_hash( + input_value, + {"parameters": SchemaMerge(match_importance=0.0)}, + ) output_result = JsonSchemaHelper.generate_schema_and_hash(output_value, {}) # Set span attributes