Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions data_diff/databases/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Float,
Decimal,
Integer,
JSON,
Text,
TemporalType,
FractionalType,
Expand Down Expand Up @@ -68,6 +69,8 @@ class Dialect(BaseDialect):
"tinytext": Text,
# Boolean
"boolean": Boolean,
# JSON
"json": JSON,
}

def quote(self, s: str) -> str:
Expand Down Expand Up @@ -106,6 +109,15 @@ def md5_as_hex(self, s: str) -> str:
return f"md5({s})"

def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
# MySQL zero-date equivalences vs BigQuery:
# TIMESTAMP '0000-00-00 00:00:00' -> '1970-01-01 00:00:00.000000' (Unix epoch)
# DATETIME '0000-00-00 00:00:00' -> NULL
#if isinstance(coltype, Timestamp):
# epoch = "cast('1970-01-01 00:00:00' as datetime(6))"
# value = f"IF({value} = '0000-00-00 00:00:00', {epoch}, {value})"
#elif isinstance(coltype, Datetime):
# value = f"NULLIF({value}, '0000-00-00 00:00:00')"

if coltype.rounds:
return self.to_string(f"cast( cast({value} as datetime({coltype.precision})) as datetime(6))")

Expand Down
14 changes: 12 additions & 2 deletions data_diff/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,10 +512,20 @@ def diff_int_dynamic_color_template(diff_value: int) -> str:
return "0"


def _jsons_equiv(a: str, b: str):
def _jsons_equiv(a: Optional[str], b: Optional[str]):
# Treat Python None (DB null) as the JSON null literal so that a NULL on
# the MySQL side matches a 'null' string produced by TO_JSON_STRING(NULL)
# on the BigQuery side (or any other DB that serializes NULL as 'null').
if a is None:
a = "null"
if b is None:
b = "null"
Comment on lines +519 to +522
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

After normalizing None values to the string "null", it is more efficient to check for string equality before proceeding to parse the JSON. This avoids the overhead of two json.loads() calls when comparing a database NULL (which becomes "null") with a JSON "null" literal, which is the primary use case for this change. Additionally, since this function now explicitly handles None values, the type hints in the function signature should ideally be updated to Optional[str] to reflect this.

    if a is None:
        a = "null"
    if b is None:
        b = "null"
    if a == b:
        return True

# Fast-path: identical strings don't need JSON parsing.
if a == b:
return True
try:
return json.loads(a) == json.loads(b)
except (ValueError, TypeError, json.decoder.JSONDecodeError): # not valid jsons
except (ValueError, TypeError): # covers json.JSONDecodeError (subclass of ValueError)
return False


Expand Down
41 changes: 41 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
columns_removed_template,
columns_added_template,
columns_type_changed_template,
_jsons_equiv,
)

from data_diff.__main__ import _remove_passwords_in_dict
Expand Down Expand Up @@ -211,3 +212,43 @@ def test_columns_type_changed_template(self):
output = columns_type_changed_template({"column1", "column2"})
self.assertIn("Type changed [2]: [green]", output)
self.assertEqual(self.extract_columns_set(output), {"column1", "column2"})


class TestJsonsEquiv(unittest.TestCase):
# --- None / null equivalence ---
def test_both_none(self):
"""Two DB NULLs are equivalent."""
self.assertTrue(_jsons_equiv(None, None))

def test_none_vs_json_null_string(self):
"""DB NULL on one side, JSON 'null' string on the other, are equivalent."""
self.assertTrue(_jsons_equiv(None, "null"))
self.assertTrue(_jsons_equiv("null", None))

def test_none_vs_json_string_null(self):
"""DB NULL must NOT equal the JSON string literal \"null\"."""
self.assertFalse(_jsons_equiv(None, '"null"'))
self.assertFalse(_jsons_equiv('"null"', None))

# --- Identical strings fast-path ---
def test_identical_strings(self):
self.assertTrue(_jsons_equiv('{"a": 1}', '{"a": 1}'))

# --- Semantic JSON equivalence ---
def test_equivalent_objects_different_whitespace(self):
self.assertTrue(_jsons_equiv('{"a":1,"b":2}', '{"b": 2, "a": 1}'))

def test_equivalent_arrays(self):
self.assertTrue(_jsons_equiv("[1, 2, 3]", "[1,2,3]"))

def test_different_values(self):
self.assertFalse(_jsons_equiv('{"a": 1}', '{"a": 2}'))

def test_different_types(self):
self.assertFalse(_jsons_equiv("1", '"1"'))

# --- Invalid JSON ---
def test_invalid_json_returns_false(self):
# Different invalid-JSON strings → False (can't parse either side)
self.assertFalse(_jsons_equiv("not-json", "also-not-json"))
self.assertFalse(_jsons_equiv('{"a": 1}', "not-json"))
Loading