Skip to content

Commit 032409c

Browse files
authored
Merge pull request #47 from iosefa/feature/add-voxeldownsample
process: add voxel-grid downsampling with mode selection
2 parents 20a4134 + 82996eb commit 032409c

4 files changed

Lines changed: 208 additions & 9 deletions

File tree

pyforestscan/filters.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from typing import List
2+
import math
23

34
from pyforestscan.handlers import _build_pdal_pipeline
45
from pyforestscan.pipeline import _filter_hag, _filter_ground, _filter_statistical_outlier, _filter_smrf, \
5-
_filter_radius, _select_ground
6+
_filter_radius, _select_ground, _filter_voxeldownsize
67

78

89
def filter_hag(arrays, lower_limit=0, upper_limit=None) -> List:
@@ -184,3 +185,49 @@ def downsample_poisson(arrays, thin_radius) -> List:
184185
"""
185186
pipeline = _build_pdal_pipeline(arrays, [_filter_radius(thin_radius)])
186187
return pipeline.arrays
188+
189+
190+
def downsample_voxel(arrays, cell, mode) -> List:
191+
"""
192+
Downsample point cloud arrays using PDAL's voxel-based thinning.
193+
194+
Uses PDAL ``filters.voxeldownsize`` to partition space into cubic voxels of
195+
edge length ``cell`` and keep one representative point per occupied voxel.
196+
The representative is chosen by ``mode``:
197+
198+
- "center": keeps the first point encountered in each voxel and overwrites
199+
its X/Y/Z with the voxel center coordinates (shifts positions).
200+
- "first": keeps the first point encountered in each voxel unchanged.
201+
202+
Args:
203+
arrays (list): Point cloud arrays to be downsampled (as expected by your
204+
pipeline builder).
205+
cell (float): Voxel edge length in the same units as the coordinates.
206+
Must be a positive, finite number (> 0).
207+
mode (str): One of {"center", "first"} (case-insensitive).
208+
209+
Returns:
210+
list: Downsampled point cloud arrays from the executed pipeline.
211+
212+
Raises:
213+
ValueError: If ``cell`` is not positive/finite or if ``mode`` is invalid.
214+
TypeError: If ``mode`` is not a string.
215+
216+
Notes:
217+
- "center" modifies point coordinates to voxel centers; use "first" if
218+
you must preserve original XY/Z.
219+
"""
220+
if not isinstance(cell, (int, float)) or not math.isfinite(cell) or cell <= 0:
221+
raise ValueError(f"'cell' must be a positive, finite number (got {cell!r}).")
222+
223+
if not isinstance(mode, str):
224+
raise TypeError("'mode' must be a string: 'center' or 'first'.")
225+
226+
mode_lc = mode.lower()
227+
allowed_modes = {"center", "first"}
228+
if mode_lc not in allowed_modes:
229+
raise ValueError(f"'mode' must be one of {sorted(allowed_modes)} (got {mode!r}).")
230+
231+
pipeline = _build_pdal_pipeline(arrays, [_filter_voxeldownsize(float(cell), mode_lc)])
232+
return pipeline.arrays
233+

pyforestscan/pipeline.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,19 @@ def _filter_hag(lower_limit=0, upper_limit=None):
7676

7777
def _filter_radius(radius):
7878
"""
79-
todo: update: this should be described as random down sample. Uses Poisson sampling.
80-
Generates a filter configuration with a specified radius.
79+
Build a PDAL Poisson-disk sampler configuration.
80+
81+
Creates a PDAL filter config that performs fixed-radius thinning in the
82+
XY plane: within any circle of radius ``radius``, at most one point is
83+
retained. This enforces a minimum spacing of ``radius`` between kept
84+
points and helps homogenize point density prior to downstream processing.
8185
8286
:param radius:
83-
The radius to be used in the filter configuration.
84-
:type radius: int
87+
Sampling radius in the same units as the point coordinates
88+
(typically meters). Must be > 0 for thinning to take effect.
89+
:type radius: float
8590
:return:
86-
A dictionary containing the filter type and radius.
91+
A PDAL JSON fragment for ``filters.sample``.
8792
:rtype: dict
8893
"""
8994
return {
@@ -92,6 +97,35 @@ def _filter_radius(radius):
9297
}
9398

9499

100+
def _filter_voxeldownsize(cell, mode):
101+
"""
102+
Build a PDAL voxel-grid downsampling configuration.
103+
104+
Partitions space into cubic voxels of edge length ``cell`` and reduces
105+
each occupied voxel to a single representative point. The selection
106+
strategy is controlled by ``mode`` and is passed through to PDAL.
107+
108+
:param cell:
109+
Edge length of the cubic voxel (same units as coordinates,
110+
typically meters). Must be > 0 for downsampling to take effect.
111+
:type cell: float
112+
:param mode:
113+
Strategy PDAL uses to choose the representative point per voxel.
114+
Accepted values depend on your PDAL version; this string is forwarded
115+
unchanged (e.g., selecting the first point encountered or a summary
116+
statistic like a centroid/mean, if supported).
117+
:type mode: str
118+
:return:
119+
A PDAL JSON fragment for ``filters.voxeldownsize``.
120+
:rtype: dict
121+
"""
122+
return {
123+
"type": "filters.voxeldownsize",
124+
"cell": cell,
125+
"mode": mode
126+
}
127+
128+
95129
def _filter_ground():
96130
"""
97131
Generate a PDAL classification filter to remove ground points (classification 2).

pyforestscan/process.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from tqdm import tqdm
77

88
from pyforestscan.calculate import calculate_fhd, calculate_pad, calculate_pai, assign_voxels, calculate_chm, calculate_canopy_cover
9-
from pyforestscan.filters import remove_outliers_and_clean, downsample_poisson
9+
from pyforestscan.filters import remove_outliers_and_clean, downsample_poisson, downsample_voxel
1010
from pyforestscan.handlers import create_geotiff
1111
from pyforestscan.pipeline import _hag_raster, _hag_delaunay
1212
from pyforestscan.utils import get_bounds_from_ept, get_srs_from_ept
@@ -53,7 +53,9 @@ def process_with_tiles(ept_file, tile_size, output_path, metric, voxel_size,
5353
hag_dtm=False, dtm=None, bounds=None, interpolation=None, remove_outliers=False,
5454
cover_min_height: float = 2.0, cover_k: float = 0.5,
5555
skip_existing: bool = False, verbose: bool = False,
56-
thin_radius: float | None = None) -> None:
56+
thin_radius: float | None = None,
57+
voxelgrid_cell: float | None = None,
58+
voxelgrid_mode: str = "first") -> None:
5759
"""
5860
Process a large EPT point cloud by tiling, compute CHM or other metrics for each tile,
5961
and write the results to the specified output directory.
@@ -81,6 +83,10 @@ def process_with_tiles(ept_file, tile_size, output_path, metric, voxel_size,
8183
verbose (bool, optional): If True, print warnings for empty/invalid tiles and buffer adjustments. Defaults to False.
8284
thin_radius (float or None, optional): If provided (> 0), apply Poisson radius-based thinning per tile before metrics.
8385
Units are in the same CRS as the data (e.g., meters). Defaults to None.
86+
voxelgrid_cell (float or None, optional): If provided (> 0), apply PDAL voxel-grid downsampling per tile before metrics
87+
using cell edge length of ``voxelgrid_cell``. Defaults to None.
88+
voxelgrid_mode (str, optional): Representative selection for voxel-grid downsampling. Common values are
89+
"first" (keep first point unchanged) or "center" (snap kept point to voxel center). Defaults to "first".
8490
8591
Returns:
8692
None
@@ -189,7 +195,7 @@ def process_with_tiles(ept_file, tile_size, output_path, metric, voxel_size,
189195
else:
190196
tile_points = arrays[0]
191197

192-
# Optional radius-based thinning before metrics
198+
# Optional thinning before metrics
193199
if thin_radius is not None and thin_radius > 0:
194200
thinned = downsample_poisson([tile_points], thin_radius=thin_radius)
195201
tile_points = thinned[0] if thinned else tile_points
@@ -200,6 +206,22 @@ def process_with_tiles(ept_file, tile_size, output_path, metric, voxel_size,
200206
pbar.update(1)
201207
continue
202208

209+
if voxelgrid_cell is not None and voxelgrid_cell > 0:
210+
try:
211+
vthinned = downsample_voxel([tile_points], cell=float(voxelgrid_cell), mode=voxelgrid_mode)
212+
except Exception as e:
213+
# Fail soft per tile to keep processing going
214+
if verbose:
215+
print(f"Warning: Voxel-grid downsampling failed for tile ({i}, {j}): {e}. Proceeding without.")
216+
vthinned = None
217+
if vthinned:
218+
tile_points = vthinned[0]
219+
if tile_points.size == 0:
220+
if verbose:
221+
print(f"Warning: Tile ({i}, {j}) empty after voxel-grid thinning. Skipping.")
222+
pbar.update(1)
223+
continue
224+
203225
buffer_pixels_x = int(np.ceil(buffer_x / voxel_size[0]))
204226
buffer_pixels_y = int(np.ceil(buffer_y / voxel_size[1]))
205227

tests/test_process.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,102 @@ def _fake_downsample(arrays, thin_radius):
158158
assert len(created_tifs) >= 1, "Expected at least one output tile for PAI."
159159

160160

161+
@patch("pyforestscan.process.downsample_voxel")
162+
@patch("pyforestscan.process.pdal.Pipeline")
163+
def test_process_with_tiles_voxelgrid_applied(mock_pipeline_cls, mock_downsample_voxel, tmp_path):
164+
"""
165+
When voxelgrid_cell is provided, ensure downsample_voxel is called and output is produced.
166+
"""
167+
dtype = [("X", "f8"), ("Y", "f8"), ("HeightAboveGround", "f8")]
168+
pts = np.zeros(80, dtype=dtype)
169+
pts["X"] = np.random.uniform(0, 10, size=80)
170+
pts["Y"] = np.random.uniform(0, 10, size=80)
171+
pts["HeightAboveGround"] = np.random.uniform(0, 5, size=80)
172+
173+
mock_pipeline = MagicMock()
174+
mock_pipeline.execute.return_value = True
175+
mock_pipeline.arrays = [pts]
176+
mock_pipeline_cls.return_value = mock_pipeline
177+
178+
def _fake_voxel(arrays, cell, mode):
179+
arr = arrays[0]
180+
# Keep every 3rd point to mimic downsampling
181+
return [arr[::3]]
182+
183+
mock_downsample_voxel.side_effect = _fake_voxel
184+
185+
out_dir = tmp_path / "test_pai_voxelgrid"
186+
out_dir.mkdir()
187+
188+
process_with_tiles(
189+
ept_file="fake_ept_path",
190+
tile_size=(20, 20),
191+
output_path=str(out_dir),
192+
metric="pai",
193+
voxel_size=(2, 2, 1),
194+
voxel_height=1.0,
195+
buffer_size=0.0,
196+
srs="EPSG:32610",
197+
hag=False,
198+
hag_dtm=False,
199+
dtm=None,
200+
bounds=([0, 20], [0, 20], [0, 10]),
201+
voxelgrid_cell=1.5,
202+
voxelgrid_mode="first",
203+
)
204+
205+
assert mock_downsample_voxel.called, "downsample_voxel should be called when voxelgrid_cell is set"
206+
created_tifs = list(out_dir.glob("tile_*_pai.tif"))
207+
assert len(created_tifs) >= 1, "Expected at least one output tile for PAI with voxel-grid downsampling."
208+
209+
210+
@patch("pyforestscan.process.downsample_voxel")
211+
@patch("pyforestscan.process.pdal.Pipeline")
212+
def test_process_with_tiles_voxelgrid_empty_skips(mock_pipeline_cls, mock_downsample_voxel, tmp_path):
213+
"""
214+
If voxel-grid thinning removes all points, the tile should be skipped gracefully.
215+
"""
216+
dtype = [("X", "f8"), ("Y", "f8"), ("HeightAboveGround", "f8")]
217+
pts = np.zeros(30, dtype=dtype)
218+
pts["X"] = np.random.uniform(0, 10, size=30)
219+
pts["Y"] = np.random.uniform(0, 10, size=30)
220+
pts["HeightAboveGround"] = np.random.uniform(0, 1, size=30)
221+
222+
mock_pipeline = MagicMock()
223+
mock_pipeline.execute.return_value = True
224+
mock_pipeline.arrays = [pts]
225+
mock_pipeline_cls.return_value = mock_pipeline
226+
227+
def _empty_voxel(arrays, cell, mode):
228+
# Simulate all points removed by voxel downsampling
229+
empty = np.array([], dtype=dtype)
230+
return [empty]
231+
232+
mock_downsample_voxel.side_effect = _empty_voxel
233+
234+
out_dir = tmp_path / "test_voxelgrid_empty"
235+
out_dir.mkdir()
236+
237+
process_with_tiles(
238+
ept_file="fake_ept_path",
239+
tile_size=(20, 20),
240+
output_path=str(out_dir),
241+
metric="fhd",
242+
voxel_size=(2, 2, 1),
243+
buffer_size=0.0,
244+
srs="EPSG:32610",
245+
hag=False,
246+
hag_dtm=False,
247+
dtm=None,
248+
bounds=([0, 20], [0, 20], [0, 10]),
249+
voxelgrid_cell=1.0,
250+
voxelgrid_mode="first",
251+
verbose=True,
252+
)
253+
254+
created_tifs = list(out_dir.glob("*.tif"))
255+
assert len(created_tifs) == 0, "No output should be produced when voxel-grid thinning empties the tile."
256+
161257
@patch("pyforestscan.process.pdal.Pipeline")
162258
def test_process_with_tiles_pai_handles_low_top_height(mock_pipeline_cls, tmp_path):
163259
"""

0 commit comments

Comments
 (0)