From 0fff54aad0197350f9ec5a07a18fdf8e2404182f Mon Sep 17 00:00:00 2001 From: Melissari1997 Date: Sat, 6 Jun 2026 18:04:23 +0200 Subject: [PATCH 1/3] Fix #2790: Add geometry deduplication to contours() When raster cells exactly equal the contour level, overlapping chunk boundaries (dask) or saddle-case disambiguation (numpy) can emit duplicate polylines. Add _deduplicate_lines() to remove identical geometries before returning results, ensuring all backends produce unique contour lines. Added tests for numpy, dask, and geopandas backends to verify no duplicate geometries are returned. --- xrspatial/contour.py | 45 +++++++++++++++++ xrspatial/tests/test_contour.py | 89 +++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) diff --git a/xrspatial/contour.py b/xrspatial/contour.py index 33dc9eb7b..02672fa39 100644 --- a/xrspatial/contour.py +++ b/xrspatial/contour.py @@ -562,6 +562,49 @@ def _remove_duplicate_segments(seg_rows, seg_cols): return seg_rows[keep], seg_cols[keep] +def _deduplicate_lines(results): + """Remove duplicate polylines from contour results. + + Two lines are considered duplicates when they share the same level + and the same set of vertex pairs (order-independent). + + Parameters + ---------- + results : list of (float, ndarray) + Contour lines grouped by level. + + Returns + ------- + list of (float, ndarray) + Deduplicated contour lines. + """ + if not results: + return results + + from collections import defaultdict + by_level = defaultdict(list) + for level, coords in results: + by_level[level].append(coords) + + DECIMALS = 10 + deduped = [] + for level in sorted(by_level.keys()): + lines = by_level[level] + seen = set() + for coords in lines: + rounded = tuple( + sorted( + (round(coords[i, 0], DECIMALS), round(coords[i, 1], DECIMALS)) + for i in range(len(coords)) + ) + ) + if rounded not in seen: + seen.add(rounded) + deduped.append((level, coords)) + + return deduped + + def _to_geopandas(results, crs=None): """Convert contour results to a GeoDataFrame.""" try: @@ -743,6 +786,8 @@ def contours( transformed.append((level, out)) results = transformed + results = _deduplicate_lines(results) + if return_type == "numpy": return results elif return_type == "geopandas": diff --git a/xrspatial/tests/test_contour.py b/xrspatial/tests/test_contour.py index 5f7cbdbd3..1b064b448 100644 --- a/xrspatial/tests/test_contour.py +++ b/xrspatial/tests/test_contour.py @@ -954,6 +954,33 @@ def _assert_no_degenerate_geopandas(gdf): assert geom.is_valid, f"invalid geometry: {geom.wkt}" +def _assert_no_duplicate_lines(result): + """Assert that no two contour lines share the same geometry at the same level. + + Each polyline is decomposed into canonical segments (smaller endpoint first, + rounded to 10 decimals). Two lines at the same level are duplicates if they + have the same set of canonical segments. + """ + from collections import defaultdict + by_level = defaultdict(list) + DECIMALS = 10 + for level, coords in result: + segs = [] + for i in range(len(coords) - 1): + p0 = (round(coords[i, 0], DECIMALS), round(coords[i, 1], DECIMALS)) + p1 = (round(coords[i + 1, 0], DECIMALS), round(coords[i + 1, 1], DECIMALS)) + segs.append((min(p0, p1), max(p0, p1))) + by_level[level].append(tuple(sorted(segs))) + + for level, line_signatures in by_level.items(): + seen = set() + for sig in line_signatures: + assert sig not in seen, ( + f"Duplicate contour line at level {level}" + ) + seen.add(sig) + + class TestDegenerateExactLevel: """A corner exactly equal to the level must not poison the output. @@ -1032,3 +1059,65 @@ def test_genuine_contour_survives(self): gdf = contours(agg, levels=[2.5], return_type="geopandas") assert len(gdf) > 0 _assert_no_degenerate_geopandas(gdf) + + def test_plateau_no_duplicate_geometries(self): + """A flat interior plateau must not produce duplicate contour lines. + + Regression for #2790: when a plateau's interior cells all equal the + contour level, overlapping chunk boundaries (dask) or saddle-case + disambiguation (numpy) can emit the same polyline twice. + """ + data = np.array([ + [0, 0, 0, 0], + [0, 1, 1, 0], + [0, 1, 1, 0], + [0, 0, 0, 0], + ], dtype=float) + np_agg = create_test_raster(data, backend='numpy') + np_result = contours(np_agg, levels=[1.0]) + + # All returned lines must be geometrically unique. + _assert_no_duplicate_lines(np_result) + + @dask_array_available + def test_plateau_no_duplicate_geometries_dask(self): + """Same plateau test with dask backend. + + Regression for #2790: overlapping chunk boundaries can emit + duplicate polylines that must be deduplicated. + """ + data = np.array([ + [0, 0, 0, 0], + [0, 1, 1, 0], + [0, 1, 1, 0], + [0, 0, 0, 0], + ], dtype=float) + dk_agg = create_test_raster(data, backend='dask+numpy', chunks=(2, 2)) + dk_result = contours(dk_agg, levels=[1.0]) + + _assert_no_duplicate_lines(dk_result) + + def test_plateau_no_duplicate_geometries_geopandas(self): + """Verify no duplicate geometries in GeoDataFrame output. + + Regression for #2790: deduplication must apply before + GeoDataFrame construction. + """ + pytest.importorskip("geopandas") + data = np.array([ + [0, 0, 0, 0], + [0, 1, 1, 0], + [0, 1, 1, 0], + [0, 0, 0, 0], + ], dtype=float) + np_agg = create_test_raster(data, backend='numpy') + gdf = contours(np_agg, levels=[1.0], return_type="geopandas") + + # Each row's geometry must be unique. + seen = set() + for _, row in gdf.iterrows(): + geom_key = tuple(round(c, 10) for c in row.geometry.coords) + assert geom_key not in seen, ( + f"Duplicate geometry in GeoDataFrame at level {row.level}" + ) + seen.add(geom_key) From b86735531384fdaf780156485cfca5d081095fd7 Mon Sep 17 00:00:00 2001 From: Melissari1997 Date: Sat, 6 Jun 2026 22:22:32 +0200 Subject: [PATCH 2/3] Fix #2790: Add geometry deduplication to contours() When raster cells exactly equal the contour level, overlapping chunk boundaries (dask) or saddle-case disambiguation (numpy) can emit duplicate polylines. Add _deduplicate_lines() to remove identical geometries before returning results, ensuring all backends produce unique contour lines. Changes: - Add _deduplicate_lines() function with canonical segment comparison (direction-independent, rounded to 10 decimals) - Move deduplication before coordinate transform (array-index space) - Move defaultdict to top-level import in contour.py and test_contour.py - Add plateau regression tests for numpy, dask, and geopandas backends - Add 3 unit tests for _deduplicate_lines (exact dupes, different levels, reverse dupes) Review notes: - No blockers identified - All 52 tests pass (23 skipped for optional deps: cupy, geopandas, skimage) - Deduplication runs on array-index coordinates, consistent with DECIMALS=10 usage in _stitch_segments and _remove_duplicate_segments - Dask backend already calls _deduplicate_by_level() internally; top-level _deduplicate_lines() is a safety net for all backends --- xrspatial/contour.py | 27 ++++++++++++++------------- xrspatial/tests/test_contour.py | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/xrspatial/contour.py b/xrspatial/contour.py index 02672fa39..4dbf2cb63 100644 --- a/xrspatial/contour.py +++ b/xrspatial/contour.py @@ -9,6 +9,7 @@ # levels, making it well suited to Dask chunking and GPU execution. import warnings +from collections import defaultdict from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union import numpy as np @@ -505,7 +506,6 @@ def _deduplicate_by_level(results): if not results: return results - from collections import defaultdict by_level = defaultdict(list) for level, coords in results: by_level[level].append(coords) @@ -566,7 +566,9 @@ def _deduplicate_lines(results): """Remove duplicate polylines from contour results. Two lines are considered duplicates when they share the same level - and the same set of vertex pairs (order-independent). + and the same set of canonical segments. Each segment is represented + as a tuple of its endpoints with the smaller endpoint first (rounded + to 10 decimals), making the comparison direction-independent. Parameters ---------- @@ -581,7 +583,6 @@ def _deduplicate_lines(results): if not results: return results - from collections import defaultdict by_level = defaultdict(list) for level, coords in results: by_level[level].append(coords) @@ -592,14 +593,14 @@ def _deduplicate_lines(results): lines = by_level[level] seen = set() for coords in lines: - rounded = tuple( - sorted( - (round(coords[i, 0], DECIMALS), round(coords[i, 1], DECIMALS)) - for i in range(len(coords)) - ) - ) - if rounded not in seen: - seen.add(rounded) + segs = [] + for i in range(len(coords) - 1): + p0 = (round(coords[i, 0], DECIMALS), round(coords[i, 1], DECIMALS)) + p1 = (round(coords[i + 1, 0], DECIMALS), round(coords[i + 1, 1], DECIMALS)) + segs.append((min(p0, p1), max(p0, p1))) + signature = tuple(sorted(segs)) + if signature not in seen: + seen.add(signature) deduped.append((level, coords)) return deduped @@ -772,6 +773,8 @@ def contours( ) results = mapper(agg)(agg.data, levels) + results = _deduplicate_lines(results) + # Transform from array indices to the DataArray's coordinate values. y_coords = agg.coords[agg.dims[0]].values x_coords = agg.coords[agg.dims[1]].values @@ -786,8 +789,6 @@ def contours( transformed.append((level, out)) results = transformed - results = _deduplicate_lines(results) - if return_type == "numpy": return results elif return_type == "geopandas": diff --git a/xrspatial/tests/test_contour.py b/xrspatial/tests/test_contour.py index 1b064b448..c872124f9 100644 --- a/xrspatial/tests/test_contour.py +++ b/xrspatial/tests/test_contour.py @@ -2,6 +2,8 @@ import pytest import xarray as xr +from collections import defaultdict + from xrspatial.contour import contours, _contours_numpy, _stitch_segments from xrspatial.tests.general_checks import ( create_test_raster, @@ -961,7 +963,6 @@ def _assert_no_duplicate_lines(result): rounded to 10 decimals). Two lines at the same level are duplicates if they have the same set of canonical segments. """ - from collections import defaultdict by_level = defaultdict(list) DECIMALS = 10 for level, coords in result: @@ -1121,3 +1122,33 @@ def test_plateau_no_duplicate_geometries_geopandas(self): f"Duplicate geometry in GeoDataFrame at level {row.level}" ) seen.add(geom_key) + + +class TestDeduplicateLines: + + def test_deduplicate_lines_removes_exact_duplicates(self): + """_deduplicate_lines removes identical polylines at the same level.""" + from xrspatial.contour import _deduplicate_lines + coords = np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 0.0]]) + results = [(1.0, coords.copy()), (1.0, coords.copy())] + deduped = _deduplicate_lines(results) + assert len(deduped) == 1 + assert deduped[0][0] == 1.0 + np.testing.assert_allclose(deduped[0][1], coords) + + def test_deduplicate_lines_keeps_different_levels(self): + """_deduplicate_lines keeps lines at different levels even with same geometry.""" + from xrspatial.contour import _deduplicate_lines + coords = np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 0.0]]) + results = [(1.0, coords.copy()), (2.0, coords.copy())] + deduped = _deduplicate_lines(results) + assert len(deduped) == 2 + + def test_deduplicate_lines_removes_reverse_duplicates(self): + """_deduplicate_lines removes polylines that trace the same segments in reverse.""" + from xrspatial.contour import _deduplicate_lines + fwd = np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 0.0]]) + rev = np.array([[2.0, 0.0], [1.0, 1.0], [0.0, 0.0]]) + results = [(1.0, fwd.copy()), (1.0, rev.copy())] + deduped = _deduplicate_lines(results) + assert len(deduped) == 1 From ec4b994dbf167eb38c8b5826223fa749fea394b1 Mon Sep 17 00:00:00 2001 From: Melissari1997 Date: Sat, 6 Jun 2026 22:36:24 +0200 Subject: [PATCH 3/3] Fix duplicate geometry detection in TestDegenerateExactLevel --- xrspatial/tests/test_contour.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xrspatial/tests/test_contour.py b/xrspatial/tests/test_contour.py index c872124f9..073842a93 100644 --- a/xrspatial/tests/test_contour.py +++ b/xrspatial/tests/test_contour.py @@ -1117,7 +1117,7 @@ def test_plateau_no_duplicate_geometries_geopandas(self): # Each row's geometry must be unique. seen = set() for _, row in gdf.iterrows(): - geom_key = tuple(round(c, 10) for c in row.geometry.coords) + geom_key = tuple(round(v, 10) for c in row.geometry.coords for v in c) assert geom_key not in seen, ( f"Duplicate geometry in GeoDataFrame at level {row.level}" )