From 7d128abc62349023c11d2ac56dc09963afca123e Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 1 Sep 2025 10:23:15 -0400 Subject: [PATCH 01/25] Add Linear Bounding Volume Hierarchy (LBVH) implementation and profiling support - Introduced LBVH class for efficient broad phase collision detection. - Implemented LBVH node structure with AABB intersection checks. - Added methods for detecting vertex-vertex, edge-vertex, edge-edge, face-vertex, edge-face, and face-face candidates. - Integrated profiling functionality with IPC_TOOLKIT_WITH_PROFILER flag. - Updated CMakeLists to include new LBVH test files and source files. - Added tests for LBVH construction and candidate detection. - Included Python bindings for LBVH and example usage scripts. - Enhanced configuration options in config.hpp for profiling support. --- CMakeLists.txt | 21 +- python/examples/find_ipctk.py | 4 + python/examples/lbvh.py | 77 ++++ python/src/bindings.cpp | 1 + python/src/broad_phase/CMakeLists.txt | 1 + python/src/broad_phase/bindings.hpp | 1 + python/src/broad_phase/lbvh.cpp | 23 + src/ipc/broad_phase/CMakeLists.txt | 2 + src/ipc/broad_phase/bvh.cpp | 1 + src/ipc/broad_phase/lbvh.cpp | 485 +++++++++++++++++++++ src/ipc/broad_phase/lbvh.hpp | 183 ++++++++ src/ipc/config.hpp.in | 1 + src/ipc/utils/profiler.cpp | 15 + src/ipc/utils/profiler.hpp | 103 +++++ tests/src/tests/broad_phase/CMakeLists.txt | 1 + tests/src/tests/broad_phase/test_lbvh.cpp | 315 +++++++++++++ tests/src/tests/utils.cpp | 8 +- 17 files changed, 1232 insertions(+), 10 deletions(-) create mode 100644 python/examples/find_ipctk.py create mode 100644 python/examples/lbvh.py create mode 100644 python/src/broad_phase/lbvh.cpp create mode 100644 src/ipc/broad_phase/lbvh.cpp create mode 100644 src/ipc/broad_phase/lbvh.hpp create mode 100644 src/ipc/utils/profiler.cpp create mode 100644 src/ipc/utils/profiler.hpp create mode 100644 tests/src/tests/broad_phase/test_lbvh.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 612f1ff9e..3ec77c505 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,15 +82,16 @@ if(CMAKE_CUDA_COMPILER) else() option(IPC_TOOLKIT_WITH_CUDA "Enable CUDA CCD" OFF) endif() -option(IPC_TOOLKIT_WITH_RATIONAL_INTERSECTION "Use rational edge-triangle intersection check" OFF) -option(IPC_TOOLKIT_WITH_ROBIN_MAP "Use Tessil's robin-map rather than std maps" ON) -option(IPC_TOOLKIT_WITH_ABSEIL "Use Abseil's hash functions" ON) -option(IPC_TOOLKIT_WITH_FILIB "Use filib for interval arithmetic" ON) -option(IPC_TOOLKIT_WITH_INEXACT_CCD "Use the original inexact CCD method of IPC" OFF) +option(IPC_TOOLKIT_WITH_RATIONAL_INTERSECTION "Use rational edge-triangle intersection check" OFF) +option(IPC_TOOLKIT_WITH_ROBIN_MAP "Use Tessil's robin-map rather than std maps" ON) +option(IPC_TOOLKIT_WITH_ABSEIL "Use Abseil's hash functions" ON) +option(IPC_TOOLKIT_WITH_FILIB "Use filib for interval arithmetic" ON) +option(IPC_TOOLKIT_WITH_INEXACT_CCD "Use the original inexact CCD method of IPC" OFF) +option(IPC_TOOLKIT_WITH_PROFILER "Enable performance profiler" ${IPC_TOOLKIT_TOPLEVEL_PROJECT}) # Advanced options -option(IPC_TOOLKIT_WITH_SIMD "Enable SIMD" OFF) -option(IPC_TOOLKIT_WITH_CODE_COVERAGE "Enable coverage reporting" OFF) +option(IPC_TOOLKIT_WITH_SIMD "Enable SIMD" OFF) +option(IPC_TOOLKIT_WITH_CODE_COVERAGE "Enable coverage reporting" OFF) mark_as_advanced(IPC_TOOLKIT_WITH_SIMD) # This does not work reliably mark_as_advanced(IPC_TOOLKIT_WITH_CODE_COVERAGE) # This is used in GitHub Actions @@ -207,6 +208,12 @@ if(IPC_TOOLKIT_WITH_FILIB) target_link_libraries(ipc_toolkit PUBLIC filib::filib) endif() +if(IPC_TOOLKIT_WITH_PROFILER) + # Add nlohmann/json for the profiler + include(json) + target_link_libraries(ipc_toolkit PRIVATE nlohmann_json::nlohmann_json) +endif() + # Extra warnings (link last for highest priority) include(ipc_toolkit_warnings) target_link_libraries(ipc_toolkit PRIVATE ipc::toolkit::warnings) diff --git a/python/examples/find_ipctk.py b/python/examples/find_ipctk.py new file mode 100644 index 000000000..8572b99da --- /dev/null +++ b/python/examples/find_ipctk.py @@ -0,0 +1,4 @@ +import sys +import pathlib +sys.path.append(str(pathlib.Path(__file__).parents[1])) +from _find_ipctk import ipctk # noqa diff --git a/python/examples/lbvh.py b/python/examples/lbvh.py new file mode 100644 index 000000000..004cd4012 --- /dev/null +++ b/python/examples/lbvh.py @@ -0,0 +1,77 @@ +from find_ipctk import ipctk +import meshio +import polyscope as ps +from polyscope import imgui +import numpy as np + +import pathlib + +mesh = meshio.read(pathlib.Path( + __file__).parents[2] / "tests/data/cloth_ball92.ply") + +lbvh = ipctk.LBVH() +lbvh.build(mesh.points, np.array([], dtype=int), mesh.cells_dict["triangle"]) + +ps.init() + +ps.set_give_focus_on_show(True) + +ps_mesh = ps.register_surface_mesh( + "bunny", + mesh.points, + mesh.cells_dict["triangle"] +) + +nodes = lbvh.vertex_nodes + + +def traverse_lbvh(node, max_depth): + if node.is_inner and max_depth > 0: + V_left, E_left = traverse_lbvh(nodes[node.left], max_depth - 1) + V_right, E_right = traverse_lbvh(nodes[node.right], max_depth - 1) + return np.vstack([V_left, V_right]), np.vstack([E_left, E_right + V_left.shape[0]]) + + E = np.array([ + [0, 1], + [0, 2], + [0, 3], + [1, 5], + [1, 4], + [2, 4], + [2, 6], + [3, 5], + [3, 6], + [7, 4], + [7, 5], + [7, 6], + ]) + V = np.array([ + node.aabb_min, + [node.aabb_min[0], node.aabb_min[1], node.aabb_max[2]], + [node.aabb_min[0], node.aabb_max[1], node.aabb_min[2]], + [node.aabb_max[0], node.aabb_min[1], node.aabb_min[2]], + [node.aabb_min[0], node.aabb_max[1], node.aabb_max[2]], + [node.aabb_max[0], node.aabb_min[1], node.aabb_max[2]], + [node.aabb_max[0], node.aabb_max[1], node.aabb_min[2]], + node.aabb_max + ]) + return V, E + + +max_depth = 0 +bvh_nodes, bvh_edges = traverse_lbvh(nodes[0], max_depth=max_depth) + +ps.register_curve_network("bvh", bvh_nodes, bvh_edges) + + +def foo(): + global max_depth + changed, max_depth = imgui.SliderInt("max depth", max_depth, 0, 20) + if changed: + bvh_nodes, bvh_edges = traverse_lbvh(nodes[0], max_depth=max_depth) + ps.register_curve_network("bvh", bvh_nodes, bvh_edges) + + +ps.set_user_callback(foo) + +ps.show() diff --git a/python/src/bindings.cpp b/python/src/bindings.cpp index 3e5422dec..e91daef66 100644 --- a/python/src/bindings.cpp +++ b/python/src/bindings.cpp @@ -27,6 +27,7 @@ PYBIND11_MODULE(ipctk, m) define_brute_force(m); define_bvh(m); define_hash_grid(m); + define_lbvh(m); define_spatial_hash(m); define_sweep_and_prune(m); define_sweep_and_tiniest_queue(m); diff --git a/python/src/broad_phase/CMakeLists.txt b/python/src/broad_phase/CMakeLists.txt index 2e5cdd237..71ac7f03c 100644 --- a/python/src/broad_phase/CMakeLists.txt +++ b/python/src/broad_phase/CMakeLists.txt @@ -4,6 +4,7 @@ set(SOURCES brute_force.cpp bvh.cpp hash_grid.cpp + lbvh.cpp spatial_hash.cpp sweep_and_prune.cpp sweep_and_tiniest_queue.cpp diff --git a/python/src/broad_phase/bindings.hpp b/python/src/broad_phase/bindings.hpp index 8718500d8..fccc9c315 100644 --- a/python/src/broad_phase/bindings.hpp +++ b/python/src/broad_phase/bindings.hpp @@ -7,6 +7,7 @@ void define_broad_phase(py::module_& m); void define_brute_force(py::module_& m); void define_bvh(py::module_& m); void define_hash_grid(py::module_& m); +void define_lbvh(py::module_& m); void define_spatial_hash(py::module_& m); void define_sweep_and_prune(py::module_& m); void define_sweep_and_tiniest_queue(py::module_& m); diff --git a/python/src/broad_phase/lbvh.cpp b/python/src/broad_phase/lbvh.cpp new file mode 100644 index 000000000..0fe9d77c8 --- /dev/null +++ b/python/src/broad_phase/lbvh.cpp @@ -0,0 +1,23 @@ +#include + +#include + +using namespace ipc; + +void define_lbvh(py::module_& m) +{ + py::class_(m, "LBVH_Node") + .def_readonly("aabb_min", &LBVH::Node::aabb_min) + .def_readonly("aabb_max", &LBVH::Node::aabb_max) + .def_readonly("left", &LBVH::Node::left) + .def_readonly("right", &LBVH::Node::right) + .def_property_readonly("is_leaf", &LBVH::Node::is_leaf) + .def_property_readonly("is_inner", &LBVH::Node::is_inner) + .def_property_readonly("is_valid", &LBVH::Node::is_valid); + + py::class_>(m, "LBVH") + .def(py::init()) + .def_property_readonly("vertex_nodes", &LBVH::vertex_nodes) + .def_property_readonly("edge_nodes", &LBVH::edge_nodes) + .def_property_readonly("face_nodes", &LBVH::face_nodes); +} diff --git a/src/ipc/broad_phase/CMakeLists.txt b/src/ipc/broad_phase/CMakeLists.txt index 8fe5a051d..084b37fab 100644 --- a/src/ipc/broad_phase/CMakeLists.txt +++ b/src/ipc/broad_phase/CMakeLists.txt @@ -10,6 +10,8 @@ set(SOURCES default_broad_phase.hpp hash_grid.cpp hash_grid.hpp + lbvh.cpp + lbvh.hpp spatial_hash.cpp spatial_hash.hpp sweep_and_prune.cpp diff --git a/src/ipc/broad_phase/bvh.cpp b/src/ipc/broad_phase/bvh.cpp index 668333726..a67838835 100644 --- a/src/ipc/broad_phase/bvh.cpp +++ b/src/ipc/broad_phase/bvh.cpp @@ -47,6 +47,7 @@ void BVH::init_bvh(const std::vector& boxes, SimpleBVH::BVH& bvh) void BVH::clear() { + BroadPhase::clear(); vertex_bvh->clear(); edge_bvh->clear(); face_bvh->clear(); diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp new file mode 100644 index 000000000..3026e96dc --- /dev/null +++ b/src/ipc/broad_phase/lbvh.cpp @@ -0,0 +1,485 @@ +#include "lbvh.hpp" + +#include + +#include +#include +#include +#include + +using namespace std::placeholders; + +namespace ipc { + +namespace { + // Expands a 21-bit integer into 63 bits by inserting 2 zeros after each + // bit. + uint64_t expand_bits(uint64_t v) + { + v = (v | v << 32) & 0x1F00000000FFFF; + v = (v | v << 16) & 0x1F0000FF0000FF; + v = (v | v << 8) & 0x100F00F00F00F00F; + v = (v | v << 4) & 0x10C30C30C30C30C3; + v = (v | v << 2) & 0x1249249249249249; + return v; + } + + // Calculates a 63-bit Morton code for the given 3D point located within the + // unit cube [0,1]. + uint64_t morton_3D(double x, double y, double z) + { + constexpr double scale = 1 << 21; + x = std::clamp(x * scale, 0.0, scale - 1); + y = std::clamp(y * scale, 0.0, scale - 1); + z = std::clamp(z * scale, 0.0, scale - 1); + uint64_t xx = expand_bits(uint64_t(x)); + uint64_t yy = expand_bits(uint64_t(y)); + uint64_t zz = expand_bits(uint64_t(z)); + return (xx << 2) | (yy << 1) | zz; + } +} // namespace + +LBVH::LBVH() : BroadPhase() { } + +LBVH::~LBVH() = default; + +void LBVH::build( + Eigen::ConstRef edges, + Eigen::ConstRef faces) +{ + BroadPhase::build(edges, faces); // Build edge_boxes and face_boxes + + assert(vertex_boxes.size() > 0); + mesh_aabb = { vertex_boxes[0].min, vertex_boxes[0].max }; + for (const auto& box : vertex_boxes) { + mesh_aabb.min = mesh_aabb.min.min(box.min); + mesh_aabb.max = mesh_aabb.max.max(box.max); + } + + init_bvh(vertex_boxes, vertex_bvh); + init_bvh(edge_boxes, edge_bvh); + init_bvh(face_boxes, face_bvh); +} + +namespace { + + int delta( + const std::vector& sorted_morton_codes, + int i, + uint64_t code_i, + int j) + { + if (j < 0 || j > sorted_morton_codes.size() - 1) { + return -1; + } + uint64_t code_j = sorted_morton_codes[j].morton_code; + if (code_i == code_j) { + // handle duplicate morton codes + int element_idx_i = i; // sorted_morton_codes[i].elementIdx; + int element_idx_j = j; // sorted_morton_codes[j].elementIdx; +// add 32 for common prefix of code_i ^ code_j +#if defined(__GNUC__) || defined(__clang__) + return 32 + __builtin_clz(element_idx_i ^ element_idx_j); +#elif defined(WIN32) + return 32 + __lzcnt(element_idx_i ^ element_idx_j); +#endif + } +#if defined(__GNUC__) || defined(__clang__) + return __builtin_clzll(code_i ^ code_j); +#elif defined(WIN32) + return __lzcnt64(code_i ^ code_j); +#endif + } + + void determine_range( + const std::vector& sorted_morton_codes, + int idx, + int& lower, + int& upper) + { + // determine direction of the range (+1 or -1) + const uint64_t code = sorted_morton_codes[idx].morton_code; + const int delta_l = delta(sorted_morton_codes, idx, code, idx - 1); + const int delta_r = delta(sorted_morton_codes, idx, code, idx + 1); + const int d = (delta_r >= delta_l) ? 1 : -1; + + // compute upper bound for the length of the range + const int delta_min = std::min(delta_l, delta_r); + int l_max = 2; + while (delta(sorted_morton_codes, idx, code, idx + l_max * d) + > delta_min) { + l_max = l_max << 1; + } + + // find the other end using binary search + int l = 0; + for (int t = l_max >> 1; t > 0; t >>= 1) { + if (delta(sorted_morton_codes, idx, code, idx + (l + t) * d) + > delta_min) { + l += t; + } + } + int jdx = idx + l * d; + + // ensure idx < jdx + lower = std::min(idx, jdx); + upper = std::max(idx, jdx); + } + + int find_split( + const std::vector& sorted_morton_codes, + int first, + int last) + { + uint64_t first_code = sorted_morton_codes[first].morton_code; + + // Calculate the number of highest bits that are the same + // for all objects, using the count-leading-zeros intrinsic. + int common_prefix = delta(sorted_morton_codes, first, first_code, last); + + // Use binary search to find where the next bit differs. + // Specifically, we are looking for the highest object that + // shares more than common_prefix bits with the first one. + int split = first; // initial guess + int stride = last - first; + do { + stride = (stride + 1) >> 1; // exponential decrease + int new_split = split + stride; // proposed new position + if (new_split < last) { + int split_prefix = + delta(sorted_morton_codes, first, first_code, new_split); + if (split_prefix > common_prefix) { + split = new_split; // accept proposal + } + } + } while (stride > 1); + + return split; + } +} // namespace + +void LBVH::init_bvh( + const std::vector& boxes, std::vector& lbvh) const +{ + if (boxes.size() == 0) { + return; + } + + std::vector morton_codes(boxes.size()); + tbb::parallel_for( + tbb::blocked_range(0, boxes.size()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i < r.end(); i++) { + const auto& box = boxes[i]; + + const ArrayMax3d center = 0.5 * (box.min + box.max); + const ArrayMax3d mapped_center = + (center - mesh_aabb.min) / (mesh_aabb.max - mesh_aabb.min); + + morton_codes[i].morton_code = morton_3D( + mapped_center.x(), mapped_center.y(), mapped_center.z()); + morton_codes[i].box_id = i; + } + }); + + tbb::parallel_sort( + morton_codes.begin(), morton_codes.end(), + [](const MortonCodeElement& a, const MortonCodeElement& b) { + return a.morton_code < b.morton_code; + }); + + const int LEAF_OFFSET = int(boxes.size()) - 1; + + lbvh.resize(2 * boxes.size() - 1); + std::vector construction_infos(2 * boxes.size() - 1); + tbb::parallel_for( + tbb::blocked_range(0, boxes.size()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i < r.end(); i++) { + + assert(i < boxes.size()); + { + const auto& box = boxes[morton_codes[i].box_id]; + lbvh[LEAF_OFFSET + i].left = Node::INVALID_POINTER; + lbvh[LEAF_OFFSET + i].right = Node::INVALID_POINTER; + lbvh[LEAF_OFFSET + i].aabb_min = box.min; + lbvh[LEAF_OFFSET + i].aabb_max = box.max; + lbvh[LEAF_OFFSET + i].vertex_ids = box.vertex_ids; + lbvh[LEAF_OFFSET + i].primitive_id = morton_codes[i].box_id; + } + + if (i < boxes.size() - 1) { + // Find out which range of objects the node corresponds + // to. (This is where the magic happens!) + + int first, last; + determine_range(morton_codes, int(i), first, last); + + // Determine where to split the range + int split = find_split(morton_codes, first, last); + + // Select child_a + int child_a = -1; + if (split == first) { + child_a = LEAF_OFFSET + split; // pointer to leaf node + } else { + child_a = split; // pointer to internal node + } + + // Select child_b + int child_b = -1; + if (split + 1 == last) { + child_b = + LEAF_OFFSET + split + 1; // pointer to leaf node + } else { + child_b = split + 1; // pointer to internal node + } + + // Record parent-child relationships + if constexpr (Node::ABSOLUTE_POINTERS) { + lbvh[i].left = child_a; + lbvh[i].right = child_b; + } else { + lbvh[i].left = child_a - int(i); + lbvh[i].right = child_b - int(i); + } + construction_infos[child_a].parent = int(i); + construction_infos[child_b].parent = int(i); + } + + // node 0 is the root + if (i == 0) { + construction_infos[0].parent = 0; + } + } + }); + + tbb::parallel_for( + tbb::blocked_range(0, boxes.size()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i < r.end(); i++) { + int node_idx = construction_infos[LEAF_OFFSET + i].parent; + while (true) { + if (construction_infos[node_idx].visitation_count++ == 0) { + // this is the first thread that arrived at this node -> + // finished + break; + } + // this is the second thread that arrived at this node, both + // children are computed -> compute aabb union and continue + Node node = lbvh[node_idx], child_a, child_b; + child_a = lbvh[Node::POINTER(node_idx, node.left)]; + child_b = lbvh[Node::POINTER(node_idx, node.right)]; + + node.aabb_min = child_a.aabb_min.min(child_b.aabb_min); + node.aabb_max = child_a.aabb_max.max(child_b.aabb_max); + + lbvh[node_idx] = node; + if (node_idx == 0) { + break; // root node + } + node_idx = construction_infos[node_idx].parent; + } + } + }); +} + +void LBVH::clear() +{ + BroadPhase::clear(); + vertex_bvh.clear(); + edge_bvh.clear(); + face_bvh.clear(); +} + +namespace { + bool share_a_vertex(std::array a, std::array b) + { + return a[0] == b[0] || a[0] == b[1] || a[0] == b[2] + || (a[1] >= 0 && (a[1] == b[0] || a[1] == b[1] || a[1] == b[2])) + || (a[2] >= 0 && (a[2] == b[0] || a[2] == b[1] || a[2] == b[2])); + } + + template + void attempt_add_candidate( + const LBVH::Node& query, + const LBVH::Node& node, + const std::function& can_collide, + std::vector& candidates) + { + if (share_a_vertex(query.vertex_ids, node.vertex_ids)) { + return; + } + + int i = query.primitive_id, j = node.primitive_id; + if constexpr (swap_order) { + std::swap(i, j); + } + + if constexpr (triangular) { + if (i >= j) { + return; + } + } + + if (!can_collide(i, j)) { + return; + } + + candidates.emplace_back(i, j); + } + + template + void traverse_lbvh( + const LBVH::Node query, + const std::vector& lbvh, + const std::function& can_collide, + std::vector& candidates) + { + // Allocate traversal stack from thread-local memory, + // and push NULL to indicate that there are no postponed nodes. + std::stack stack; + stack.push(LBVH::Node::INVALID_POINTER); + + // Traverse nodes starting from the root. + int node_idx = 0; // root + do { + const LBVH::Node& node = lbvh[node_idx]; + + // Check each child node for overlap. + const LBVH::Node& child_l = lbvh[node.left]; + const LBVH::Node& child_r = lbvh[node.right]; + const bool intersects_l = child_l.intersects(query); + const bool intersects_r = child_r.intersects(query); + + // Query overlaps a leaf node => report collision. + if (intersects_l && child_l.is_leaf()) { + attempt_add_candidate( + query, child_l, can_collide, candidates); + } + + if (intersects_r && child_r.is_leaf()) { + attempt_add_candidate( + query, child_r, can_collide, candidates); + } + + // Query overlaps an internal node => traverse. + bool traverse_l = (intersects_l && !child_l.is_leaf()); + bool traverse_r = (intersects_r && !child_r.is_leaf()); + + if (!traverse_l && !traverse_r) { + node_idx = stack.top(); // traverse the top of the stack next + stack.pop(); + } else { + // traverse the children + node_idx = (traverse_l) ? node.left : node.right; + if (traverse_l && traverse_r) { + // postpone traversal of the right child + stack.push(node.right); + } + } + } while (node_idx != LBVH::Node::INVALID_POINTER); // Same as root + } +} // namespace + +template +void LBVH::detect_candidates( + const std::vector& source, + const std::vector& target, + const std::function& can_collide, + std::vector& candidates) +{ + // floor((n + (n - 1)) / 2) = floor(n - 1/2) = n - 1 + const size_t SOURCE_LEAF_OFFSET = source.size() / 2; + const size_t N_SOURCE_LEAVES = SOURCE_LEAF_OFFSET + 1; + + tbb::enumerable_thread_specific> storage; + + tbb::parallel_for( + tbb::blocked_range(size_t(0), N_SOURCE_LEAVES), + [&](const tbb::blocked_range& r) { + auto& local_candidates = storage.local(); + for (size_t i = r.begin(); i < r.end(); i++) { + traverse_lbvh( + source[SOURCE_LEAF_OFFSET + i], target, can_collide, + local_candidates); + } + }); + + merge_thread_local_vectors(storage, candidates); +} + +void LBVH::detect_vertex_vertex_candidates( + std::vector& candidates) const +{ + if (vertex_boxes.size() == 0) { + return; + } + + detect_candidates(vertex_bvh, can_vertices_collide, candidates); +} + +void LBVH::detect_edge_vertex_candidates( + std::vector& candidates) const +{ + if (edge_boxes.size() == 0 || vertex_boxes.size() == 0) { + return; + } + + // In 2D and for codimensional edge-vertex collisions, there are more + // vertices than edges, so we want to iterate over the edges. + detect_candidates( + edge_bvh, vertex_bvh, + std::bind(&LBVH::can_edge_vertex_collide, this, _1, _2), candidates); +} + +void LBVH::detect_edge_edge_candidates( + std::vector& candidates) const +{ + if (edge_boxes.size() == 0) { + return; + } + + detect_candidates( + edge_bvh, std::bind(&LBVH::can_edges_collide, this, _1, _2), + candidates); +} + +void LBVH::detect_face_vertex_candidates( + std::vector& candidates) const +{ + if (face_boxes.size() == 0 || vertex_boxes.size() == 0) { + return; + } + + // The ratio vertices:faces is 1:2, so we want to iterate over the vertices. + detect_candidates( + vertex_bvh, face_bvh, + std::bind(&LBVH::can_face_vertex_collide, this, _1, _2), candidates); +} + +void LBVH::detect_edge_face_candidates( + std::vector& candidates) const +{ + if (edge_boxes.size() == 0 || face_boxes.size() == 0) { + return; + } + + // The ratio edges:faces is 3:2, so we want to iterate over the faces. + detect_candidates( + face_bvh, edge_bvh, + std::bind(&LBVH::can_edge_face_collide, this, _1, _2), candidates); +} + +void LBVH::detect_face_face_candidates( + std::vector& candidates) const +{ + if (face_boxes.size() == 0) { + return; + } + + detect_candidates( + face_bvh, std::bind(&LBVH::can_faces_collide, this, _1, _2), + candidates); +} +} // namespace ipc \ No newline at end of file diff --git a/src/ipc/broad_phase/lbvh.hpp b/src/ipc/broad_phase/lbvh.hpp new file mode 100644 index 000000000..1b275fb21 --- /dev/null +++ b/src/ipc/broad_phase/lbvh.hpp @@ -0,0 +1,183 @@ +#pragma once + +#include + +#include + +namespace ipc { + +/// @brief Linear Bounding Volume Hierarchy (LBVH) broad phase collision detection. +class LBVH : public BroadPhase { +public: + static constexpr index_t INVALID_ID = 0xFFFFFFFF; + + struct Node { + static constexpr int32_t INVALID_POINTER = 0x0; // do not change + + // true to use absolute pointers (left/right child pointer is the + // absolute index of the child in the buffer/array) or false for + // relative pointers (left/right child pointer is the relative pointer + // from the parent index to the child index in the buffer, i.e. absolute + // child pointer = absolute parent pointer + relative child pointer) + static constexpr bool ABSOLUTE_POINTERS = true; + + // helper function to handle relative pointers on + // CPU side, i.e. convert them to absolute + // pointers for array indexing + static constexpr uint32_t POINTER(uint32_t index, uint32_t pointer) + { + return ABSOLUTE_POINTERS ? pointer : index + pointer; + } + + /// @brief The min corner of the AABB + ArrayMax3d aabb_min; + /// @brief The max corner of the AABB + ArrayMax3d aabb_max; + /// @brief The vertex ids (v0, v1, v2) + std::array vertex_ids; + /// @brief Pointer to the left child or INVALID_POINTER in case of leaf + int left; + /// @brief Pointer to the right child or INVALID_POINTER in case of leaf + int right; + /// @brief The primitive id (INVALID_ID <=> inner node) + index_t primitive_id; + + bool is_leaf() const + { + assert(is_valid()); + return left == INVALID_POINTER && right == INVALID_POINTER; + } + + bool is_inner() const + { + return left != INVALID_POINTER && right != INVALID_POINTER; + } + + bool is_valid() const + { + return !((left == INVALID_POINTER) ^ (right == INVALID_POINTER)); + } + + bool intersects(const Node& other) const + { + return (aabb_min <= other.aabb_max).all() + && (other.aabb_min <= aabb_max).all(); + } + }; + + struct MortonCodeElement { + uint64_t morton_code; ///< Key for sorting + size_t box_id; ///< Pointer into boxes buffer + }; + + struct ConstructionInfo { + /// @brief Parent to the parent + int parent; + /// @brief Number of threads that arrived + std::atomic visitation_count = 0; + }; + +public: + LBVH(); + ~LBVH(); + + /// @brief Get the name of the broad phase method. + /// @return The name of the broad phase method. + std::string name() const override { return "LBVH"; } + + using BroadPhase::build; + + /// @brief Clear any built data. + void clear() override; + + /// @brief Find the candidate vertex-vertex collisions. + /// @param[out] candidates The candidate vertex-vertex collisions. + void detect_vertex_vertex_candidates( + std::vector& candidates) const override; + + /// @brief Find the candidate edge-vertex collisions. + /// @param[out] candidates The candidate edge-vertex collisions. + void detect_edge_vertex_candidates( + std::vector& candidates) const override; + + /// @brief Find the candidate edge-edge collisions. + /// @param[out] candidates The candidate edge-edge collisions. + void detect_edge_edge_candidates( + std::vector& candidates) const override; + + /// @brief Find the candidate face-vertex collisions. + /// @param[out] candidates The candidate face-vertex collisions. + void detect_face_vertex_candidates( + std::vector& candidates) const override; + + /// @brief Find the candidate edge-face intersections. + /// @param[out] candidates The candidate edge-face intersections. + void detect_edge_face_candidates( + std::vector& candidates) const override; + + /// @brief Find the candidate face-face collisions. + /// @param[out] candidates The candidate face-face collisions. + void detect_face_face_candidates( + std::vector& candidates) const override; + + const std::vector& vertex_nodes() const { return vertex_bvh; } + const std::vector& edge_nodes() const { return edge_bvh; } + const std::vector& face_nodes() const { return face_bvh; } + +protected: + /// @brief Build the broad phase for collision detection. + /// @note Assumes the vertex_boxes have been built. + /// @param edges Collision mesh edges + /// @param faces Collision mesh faces + void build( + Eigen::ConstRef edges, + Eigen::ConstRef faces) override; + + /// @brief Initialize a LBVH from a set of boxes. + /// @param[in] boxes Set of boxes to initialize the LBVH with. + /// @param[out] bvh The LBVH to initialize. + void + init_bvh(const std::vector& boxes, std::vector& lbvh) const; + + /// @brief Detect candidate collisions between a LBVH and a sets of boxes. + /// @tparam Candidate Type of candidate collision. + /// @tparam swap_order Whether to swap the order of box id with the LBVH id when adding to the candidates. + /// @tparam triangular Whether to consider (i, j) and (j, i) as the same. + /// @param[in] boxes The boxes to detect collisions with. + /// @param[in] bvh The LBVH to detect collisions with. + /// @param[in] can_collide Function to determine if two primitives can collide given their ids. + /// @param[out] candidates The candidate collisions. + template < + typename Candidate, + bool swap_order = false, + bool triangular = false> + static void detect_candidates( + const std::vector& source, + const std::vector& target, + const std::function& can_collide, + std::vector& candidates); + + template + static void detect_candidates( + const std::vector& source_and_target, + const std::function& can_collide, + std::vector& candidates) + { + detect_candidates( + source_and_target, source_and_target, can_collide, candidates); + } + + /// @brief BVH containing the vertices. + std::vector vertex_bvh; + /// @brief BVH containing the edges. + std::vector edge_bvh; + /// @brief BVH containing the faces. + std::vector face_bvh; + /// @brief The axis-aligned bounding box of the entire mesh. + struct { + ArrayMax3d min; + ArrayMax3d max; + } mesh_aabb; +}; + +} // namespace ipc \ No newline at end of file diff --git a/src/ipc/config.hpp.in b/src/ipc/config.hpp.in index 61fb05ad6..9cf1bb020 100644 --- a/src/ipc/config.hpp.in +++ b/src/ipc/config.hpp.in @@ -16,6 +16,7 @@ #cmakedefine IPC_TOOLKIT_WITH_ROBIN_MAP #cmakedefine IPC_TOOLKIT_WITH_ABSEIL #cmakedefine IPC_TOOLKIT_WITH_FILIB +#cmakedefine IPC_TOOLKIT_WITH_PROFILER namespace ipc { diff --git a/src/ipc/utils/profiler.cpp b/src/ipc/utils/profiler.cpp new file mode 100644 index 000000000..5e2e324e6 --- /dev/null +++ b/src/ipc/utils/profiler.cpp @@ -0,0 +1,15 @@ +#include "profiler.hpp" + +#ifdef IPC_TOOLKIT_WITH_PROFILER + +namespace ipc { + +Profiler& profiler() +{ + static Profiler instance; + return instance; +} + +} // namespace ipc + +#endif \ No newline at end of file diff --git a/src/ipc/utils/profiler.hpp b/src/ipc/utils/profiler.hpp new file mode 100644 index 000000000..d25ed31bc --- /dev/null +++ b/src/ipc/utils/profiler.hpp @@ -0,0 +1,103 @@ +#pragma once + +#include + +#ifdef IPC_TOOLKIT_WITH_PROFILER + +// clang-format off +#include +#include +// clang-format on + +#include +#include + +#define IPC_TOOLKIT_PROFILE_BLOCK(...) \ + ipc::ProfilePoint __ipc_profile_point(__VA_ARGS__) + +namespace ipc { + +class Profiler { +public: + Profiler() = default; + + // ~Profiler() { print(); } + + void clear() { m_data.clear(); } + + void start(const std::string& name) + { + current_scope.push_back(name); + if (!m_data.contains(current_scope)) { + m_data[current_scope] = { + { "time_ms", 0 }, + { "count", 0 }, + }; + } + } + + void stop(const double time_ms) + { + const static std::string log_fmt_text = fmt::format( + "[{}] {{}} {{:.6f}} ms", + fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing")); + + logger().trace(log_fmt_text, current_scope.to_string(), time_ms); + + assert(m_data.contains(current_scope)); + assert(m_data.at(current_scope).contains("time_ms")); + assert(m_data.at(current_scope).contains("count")); + m_data[current_scope]["time_ms"] = + m_data[current_scope]["time_ms"].get() + time_ms; + m_data[current_scope]["count"] = + m_data[current_scope]["count"].get() + 1; + current_scope.pop_back(); + } + + void print() const + { + logger().trace( + "[{}] profiler: {}", + fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing"), + m_data.dump(2)); + } + + const nlohmann::json& data() const { return m_data; } + nlohmann::json& data() { return m_data; } + +protected: + nlohmann::json m_data; + nlohmann::json::json_pointer current_scope; +}; + +Profiler& profiler(); + +template class ProfilePoint { +public: + ProfilePoint(const std::string& name) : ProfilePoint(profiler(), name) { } + + ProfilePoint(Profiler& p_profiler, const std::string& name) + : m_profiler(p_profiler) + { + m_profiler.start(name); + timer.start(); + } + + ~ProfilePoint() + { + timer.stop(); + m_profiler.stop(timer.getElapsedTimeInMilliSec()); + } + +protected: + Profiler& m_profiler; + Timer timer; +}; + +} // namespace ipc + +#else + +#define IPC_TOOLKIT_PROFILE_BLOCK(...) + +#endif \ No newline at end of file diff --git a/tests/src/tests/broad_phase/CMakeLists.txt b/tests/src/tests/broad_phase/CMakeLists.txt index 602b7947f..1806f2a49 100644 --- a/tests/src/tests/broad_phase/CMakeLists.txt +++ b/tests/src/tests/broad_phase/CMakeLists.txt @@ -2,6 +2,7 @@ set(SOURCES # Tests test_aabb.cpp test_broad_phase.cpp + test_lbvh.cpp test_spatial_hash.cpp test_stq.cpp test_voxel_size_heuristic.cpp diff --git a/tests/src/tests/broad_phase/test_lbvh.cpp b/tests/src/tests/broad_phase/test_lbvh.cpp new file mode 100644 index 000000000..475b381ec --- /dev/null +++ b/tests/src/tests/broad_phase/test_lbvh.cpp @@ -0,0 +1,315 @@ +#include + +#include +#include + +#include +#include +// #include +// #include + +#include + +#include + +#include + +using namespace ipc; + +namespace { + +bool is_aabb_union(LBVH::Node parent, LBVH::Node childA, LBVH::Node childB) +{ + AABB children; + children.min = childA.aabb_min.min(childB.aabb_min); + children.max = childA.aabb_max.max(childB.aabb_max); + constexpr float EPS = 1e-4f; + return (abs(parent.aabb_max - children.max) < EPS).all() + && (abs(parent.aabb_min - children.min) < EPS).all(); +} + +void traverse_lbvh( + const std::vector& lbvh_nodes, + const uint32_t index, + std::vector& visited) +{ + const LBVH::Node& node = lbvh_nodes[index]; + CHECK(node.is_valid()); + + if (node.left == LBVH::Node::INVALID_POINTER) { + // leaf + CHECK(!visited[index]); + visited[index] = true; + } else { + // inner node + CHECK(!visited[index]); + visited[index] = true; + + uint32_t left_child_index = LBVH::Node::POINTER(index, node.left); + uint32_t right_child_index = LBVH::Node::POINTER(index, node.right); + + // verify aabbs + LBVH::Node childA = lbvh_nodes[left_child_index]; + LBVH::Node childB = lbvh_nodes[right_child_index]; + + { + CAPTURE( + index, left_child_index, right_child_index, + node.aabb_min.transpose(), childA.aabb_min.transpose(), + childB.aabb_min.transpose(), node.aabb_max.transpose(), + childA.aabb_max.transpose(), childB.aabb_max.transpose()); + CHECK(is_aabb_union(node, childA, childB)); + } + + // continue traversal + traverse_lbvh(lbvh_nodes, left_child_index, visited); + traverse_lbvh(lbvh_nodes, right_child_index, visited); + } +} + +void check_valid_lbvh_nodes(const std::vector& lbvh_nodes) +{ + std::vector visited(lbvh_nodes.size(), false); + traverse_lbvh(lbvh_nodes, 0, visited); + REQUIRE( + std::all_of(visited.begin(), visited.end(), [](bool v) { return v; })); +} +} // namespace + +TEST_CASE("LBVH::build", "[broad_phase][lbvh]") +{ + constexpr double inflation_radius = 1e-3; + + const std::shared_ptr lbvh = std::make_shared(); + + SECTION("Static") + { + const std::string mesh = GENERATE("cube.ply", "bunny.ply"); + + Eigen::MatrixXd vertices; + Eigen::MatrixXi edges, faces; + REQUIRE(tests::load_mesh(mesh, vertices, edges, faces)); + + lbvh->build(vertices, edges, faces, inflation_radius); + } + SECTION("Dynamic") + { + const std::string mesh_t0 = "cloth_ball92.ply"; + const std::string mesh_t1 = "cloth_ball93.ply"; + + Eigen::MatrixXd vertices_t0, vertices_t1; + Eigen::MatrixXi edges, faces; + REQUIRE(tests::load_mesh(mesh_t0, vertices_t0, edges, faces)); + REQUIRE(tests::load_mesh(mesh_t1, vertices_t1, edges, faces)); + + lbvh->build(vertices_t0, vertices_t1, edges, faces, inflation_radius); + } + + std::ofstream dot_file("/Users/zachary/Downloads/lbvh.dot"); + dot_file << "digraph G {\nnode [shape = circle;];" << std::endl; + + for (int i = 0; i < lbvh->vertex_nodes().size(); ++i) { + dot_file << "N" << i << " [label = \"" + << (lbvh->vertex_nodes()[i].is_inner() ? "I" : "L") + << (lbvh->vertex_nodes()[i].is_inner() + ? i + : lbvh->vertex_nodes()[i].primitive_id) + << "\"];" << std::endl; + } + + for (int i = 0; i < lbvh->vertex_nodes().size(); ++i) { + const auto& node = lbvh->vertex_nodes()[i]; + if (node.is_inner()) { + dot_file << "N" << i << " -> N" << node.left << ";" << std::endl; + dot_file << "N" << i << " -> N" << node.right << ";" << std::endl; + } + } + + dot_file << "}" << std::endl; + dot_file.close(); + + // -- TODO: Check the morton codes ---------------------------------------- + // -- TODO: Check the morton codes are sorted ----------------------------- + + // -- Check the LBVH nodes are all reachable and contain their children --- + + check_valid_lbvh_nodes(lbvh->vertex_nodes()); + // check_valid_lbvh_nodes(lbvh->edge_nodes()); + // check_valid_lbvh_nodes(lbvh->face_nodes()); + + // -- Check clear() works ------------------------------------------------- + lbvh->clear(); + + // CHECK(lbvh->vertex_boxes().empty()); + // CHECK(lbvh->edge_boxes().empty()); + // CHECK(lbvh->face_boxes().empty()); + + CHECK(lbvh->vertex_nodes().empty()); + CHECK(lbvh->edge_nodes().empty()); + CHECK(lbvh->face_nodes().empty()); +} + +TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") +{ + constexpr double inflation_radius = 0; + + std::string mesh_t0, mesh_t1; + SECTION("Two cubes") + { + mesh_t0 = "two-cubes-far.ply"; + mesh_t1 = "two-cubes-intersecting.ply"; + } + SECTION("Cloth-Ball") + { + mesh_t0 = "cloth_ball92.ply"; + mesh_t1 = "cloth_ball93.ply"; + } + + Eigen::MatrixXd vertices_t0, vertices_t1; + Eigen::MatrixXi edges, faces; + REQUIRE(tests::load_mesh(mesh_t0, vertices_t0, edges, faces)); + REQUIRE(tests::load_mesh(mesh_t1, vertices_t1, edges, faces)); + + const std::shared_ptr lbvh = std::make_shared(); + lbvh->build(vertices_t0, vertices_t1, edges, faces, inflation_radius); + + const std::shared_ptr bvh = std::make_shared(); + bvh->build(vertices_t0, vertices_t1, edges, faces, inflation_radius); + + // detect_vertex_vertex_candidates + { + std::vector vv_candidates; + lbvh->detect_vertex_vertex_candidates(vv_candidates); + + std::vector expected_vv_candidates; + bvh->detect_vertex_vertex_candidates(expected_vv_candidates); + + CHECK(vv_candidates.size() == expected_vv_candidates.size()); + // TODO: Check the candidates are the same + } + + { + std::vector ev_candidates; + lbvh->detect_edge_vertex_candidates(ev_candidates); + + std::vector expected_ev_candidates; + bvh->detect_edge_vertex_candidates(expected_ev_candidates); + + CHECK(ev_candidates.size() == expected_ev_candidates.size()); + // TODO: Check the candidates are the same + } + + { + std::vector ee_candidates; + lbvh->detect_edge_edge_candidates(ee_candidates); + + std::vector expected_ee_candidates; + bvh->detect_edge_edge_candidates(expected_ee_candidates); + + CHECK(ee_candidates.size() == expected_ee_candidates.size()); + // TODO: Check the candidates are the same + } + + { + std::vector fv_candidates; + lbvh->detect_face_vertex_candidates(fv_candidates); + + std::vector expected_fv_candidates; + bvh->detect_face_vertex_candidates(expected_fv_candidates); + + CHECK(fv_candidates.size() == expected_fv_candidates.size()); + // TODO: Check the candidates are the same + } + + { + std::vector ef_candidates; + lbvh->detect_edge_face_candidates(ef_candidates); + + std::vector expected_ef_candidates; + bvh->detect_edge_face_candidates(expected_ef_candidates); + + CHECK(ef_candidates.size() == expected_ef_candidates.size()); + // TODO: Check the candidates are the same + } + + { + std::vector ff_candidates; + lbvh->detect_face_face_candidates(ff_candidates); + + std::vector expected_ff_candidates; + bvh->detect_face_face_candidates(expected_ff_candidates); + + CHECK(ff_candidates.size() == expected_ff_candidates.size()); + // TODO: Check the candidates are the same + } + +#ifdef IPC_TOOLKIT_WITH_PROFILER + ipc::profiler().print(); + ipc::profiler().clear(); +#endif +} + +// TEST_CASE("Radix sort", "[radix_sort]") +// { +// constexpr size_t size = 100'000; +// constexpr size_t max_value = 0xFFFFFFFF; + +// std::random_device rd; +// std::mt19937 gen(rd()); +// std::uniform_int_distribution distrib(0, max_value); + +// using MortonCode = std::array; +// std::vector data(size); +// for (size_t i = 0; i < size; ++i) { +// data[i][0] = distrib(gen); +// data[i][1] = 0; +// } + +// // Radix sort + +// kp::Manager mgr; + +// auto t_data = mgr.tensor( +// data.data(), data.size(), sizeof(MortonCode), +// kp::Memory::DataTypes::eCustom); +// auto t_sorted_data = mgr.tensor( +// data.size(), sizeof(MortonCode), kp::Memory::DataTypes::eCustom); + +// std::vector> params = { { +// mgr.tensor(1, sizeof(LBVH::AABB), kp::Memory::DataTypes::eCustom), +// t_data, +// t_sorted_data, +// } }; + +// auto algorithm = mgr.algorithm( +// params, +// SPVShader(SINGLE_RADIXSORT_SPV.begin(), SINGLE_RADIXSORT_SPV.end()), +// kp::Workgroup({ { 1, 1, 1 } }), {}, +// std::vector(1, data.size())); + +// mgr.sequence() +// ->record(params) +// ->record(algorithm) +// ->record(params) +// ->eval(); + +// std::vector sorted_data = +// t_sorted_data->vector(); + +// { +// CAPTURE(sorted_data); +// REQUIRE( +// std::is_sorted( +// sorted_data.begin(), sorted_data.end(), +// [](const MortonCode& a, const MortonCode& b) { +// return a[0] < b[0]; +// })); +// } + +// // Compare against CPU sort +// std::sort( +// data.begin(), data.end(), +// [](const MortonCode& a, const MortonCode& b) { return a[0] < b[0]; +// }); +// CHECK(data == sorted_data); +// } diff --git a/tests/src/tests/utils.cpp b/tests/src/tests/utils.cpp index 4bcdfee66..35a2398fe 100644 --- a/tests/src/tests/utils.cpp +++ b/tests/src/tests/utils.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef IPC_TOOLKIT_WITH_CUDA #include @@ -24,10 +25,11 @@ std::vector> broad_phases() { return { { std::make_shared(), - std::make_shared(), - std::make_shared(), + // std::make_shared(), + // std::make_shared(), std::make_shared(), - std::make_shared(), + std::make_shared(), + // std::make_shared(), #ifdef IPC_TOOLKIT_WITH_CUDA std::make_shared(), #endif From ed41da8ce5c7c15bd18e8c0b23e3758d6e141eb5 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 1 Sep 2025 22:40:57 -0400 Subject: [PATCH 02/25] Enhance profiler functionality and integrate into LBVH and BVH implementations --- .clang-format | 2 - python/examples/lbvh.py | 2 +- src/ipc/broad_phase/bvh.cpp | 15 + src/ipc/broad_phase/lbvh.cpp | 383 ++++++++++++++-------- src/ipc/utils/CMakeLists.txt | 2 + src/ipc/utils/profiler.cpp | 52 +++ src/ipc/utils/profiler.hpp | 43 ++- tests/src/tests/broad_phase/test_lbvh.cpp | 72 +--- tests/src/tests/main.cpp | 2 +- 9 files changed, 367 insertions(+), 206 deletions(-) diff --git a/.clang-format b/.clang-format index a16b74015..357149634 100644 --- a/.clang-format +++ b/.clang-format @@ -1,5 +1,4 @@ --- -Language: Cpp BasedOnStyle: WebKit AlignAfterOpenBracket: AlwaysBreak AlignTrailingComments: @@ -38,6 +37,5 @@ IncludeCategories: SortPriority: 1 CaseSensitive: true PackConstructorInitializers: CurrentLine -RemoveEmptyLinesInUnwrappedLines: true SortIncludes: CaseInsensitive ... diff --git a/python/examples/lbvh.py b/python/examples/lbvh.py index 004cd4012..21f88b3a6 100644 --- a/python/examples/lbvh.py +++ b/python/examples/lbvh.py @@ -7,7 +7,7 @@ import pathlib mesh = meshio.read(pathlib.Path( - __file__).parents[2] / "tests/data/cloth_ball92.ply") + __file__).parents[2] / "tests/data/puffer-ball/20.ply") lbvh = ipctk.LBVH() lbvh.build(mesh.points, np.array([], dtype=int), mesh.cells_dict["triangle"]) diff --git a/src/ipc/broad_phase/bvh.cpp b/src/ipc/broad_phase/bvh.cpp index a67838835..b46b4b170 100644 --- a/src/ipc/broad_phase/bvh.cpp +++ b/src/ipc/broad_phase/bvh.cpp @@ -1,6 +1,7 @@ #include "bvh.hpp" #include +#include #include #include @@ -37,6 +38,8 @@ void BVH::init_bvh(const std::vector& boxes, SimpleBVH::BVH& bvh) return; } + IPC_TOOLKIT_PROFILE_BLOCK("BVH::init_bvh"); + std::vector> vector_boxes(boxes.size()); for (int i = 0; i < boxes.size(); i++) { vector_boxes[i] = { { to_3D(boxes[i].min), to_3D(boxes[i].max) } }; @@ -106,6 +109,8 @@ void BVH::detect_vertex_vertex_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("BVH::detect_vertex_vertex_candidates"); + detect_candidates< VertexVertexCandidate, /*swap_order=*/false, /*triangular=*/true>( vertex_boxes, *vertex_bvh, can_vertices_collide, candidates); @@ -118,6 +123,8 @@ void BVH::detect_edge_vertex_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("BVH::detect_edge_vertex_candidates"); + // In 2D and for codimensional edge-vertex collisions, there are more // vertices than edges, so we want to iterate over the edges. detect_candidates( @@ -132,6 +139,8 @@ void BVH::detect_edge_edge_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("BVH::detect_edge_edge_candidates"); + detect_candidates< EdgeEdgeCandidate, /*swap_order=*/false, /*triangular=*/true>( edge_boxes, *edge_bvh, std::bind(&BVH::can_edges_collide, this, _1, _2), @@ -145,6 +154,8 @@ void BVH::detect_face_vertex_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("BVH::detect_face_vertex_candidates"); + // The ratio vertices:faces is 1:2, so we want to iterate over the vertices. detect_candidates( vertex_boxes, *face_bvh, @@ -158,6 +169,8 @@ void BVH::detect_edge_face_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("BVH::detect_edge_face_candidates"); + // The ratio edges:faces is 3:2, so we want to iterate over the faces. detect_candidates( face_boxes, *edge_bvh, @@ -171,6 +184,8 @@ void BVH::detect_face_face_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("BVH::detect_face_face_candidates"); + detect_candidates< FaceFaceCandidate, /*swap_order=*/false, /*triangular=*/true>( face_boxes, *face_bvh, std::bind(&BVH::can_faces_collide, this, _1, _2), diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index 3026e96dc..af7669629 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -1,6 +1,7 @@ #include "lbvh.hpp" #include +#include #include #include @@ -165,123 +166,145 @@ void LBVH::init_bvh( return; } + IPC_TOOLKIT_PROFILE_BLOCK("LBVH::init_bvh"); + std::vector morton_codes(boxes.size()); - tbb::parallel_for( - tbb::blocked_range(0, boxes.size()), - [&](const tbb::blocked_range& r) { - for (size_t i = r.begin(); i < r.end(); i++) { - const auto& box = boxes[i]; - - const ArrayMax3d center = 0.5 * (box.min + box.max); - const ArrayMax3d mapped_center = - (center - mesh_aabb.min) / (mesh_aabb.max - mesh_aabb.min); - - morton_codes[i].morton_code = morton_3D( - mapped_center.x(), mapped_center.y(), mapped_center.z()); - morton_codes[i].box_id = i; - } - }); + { + IPC_TOOLKIT_PROFILE_BLOCK("compute_morton_codes"); + const ArrayMax3d mesh_width = mesh_aabb.max - mesh_aabb.min; + tbb::parallel_for( + tbb::blocked_range(0, boxes.size()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i < r.end(); i++) { + const auto& box = boxes[i]; + + const ArrayMax3d center = 0.5 * (box.min + box.max); + const ArrayMax3d mapped_center = + (center - mesh_aabb.min) / mesh_width; + + morton_codes[i].morton_code = morton_3D( + mapped_center.x(), mapped_center.y(), + mapped_center.z()); + morton_codes[i].box_id = i; + } + }); + } - tbb::parallel_sort( - morton_codes.begin(), morton_codes.end(), - [](const MortonCodeElement& a, const MortonCodeElement& b) { - return a.morton_code < b.morton_code; - }); + { + IPC_TOOLKIT_PROFILE_BLOCK("sort_morton_codes"); + tbb::parallel_sort( + morton_codes.begin(), morton_codes.end(), + [](const MortonCodeElement& a, const MortonCodeElement& b) { + return a.morton_code < b.morton_code; + }); + } const int LEAF_OFFSET = int(boxes.size()) - 1; lbvh.resize(2 * boxes.size() - 1); std::vector construction_infos(2 * boxes.size() - 1); - tbb::parallel_for( - tbb::blocked_range(0, boxes.size()), - [&](const tbb::blocked_range& r) { - for (size_t i = r.begin(); i < r.end(); i++) { - - assert(i < boxes.size()); - { - const auto& box = boxes[morton_codes[i].box_id]; - lbvh[LEAF_OFFSET + i].left = Node::INVALID_POINTER; - lbvh[LEAF_OFFSET + i].right = Node::INVALID_POINTER; - lbvh[LEAF_OFFSET + i].aabb_min = box.min; - lbvh[LEAF_OFFSET + i].aabb_max = box.max; - lbvh[LEAF_OFFSET + i].vertex_ids = box.vertex_ids; - lbvh[LEAF_OFFSET + i].primitive_id = morton_codes[i].box_id; - } - - if (i < boxes.size() - 1) { - // Find out which range of objects the node corresponds - // to. (This is where the magic happens!) - - int first, last; - determine_range(morton_codes, int(i), first, last); - - // Determine where to split the range - int split = find_split(morton_codes, first, last); - - // Select child_a - int child_a = -1; - if (split == first) { - child_a = LEAF_OFFSET + split; // pointer to leaf node - } else { - child_a = split; // pointer to internal node + { + IPC_TOOLKIT_PROFILE_BLOCK("build_hierarchy"); + tbb::parallel_for( + tbb::blocked_range(0, boxes.size()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i < r.end(); i++) { + + assert(i < boxes.size()); + { + const auto& box = boxes[morton_codes[i].box_id]; + lbvh[LEAF_OFFSET + i].left = Node::INVALID_POINTER; + lbvh[LEAF_OFFSET + i].right = Node::INVALID_POINTER; + lbvh[LEAF_OFFSET + i].aabb_min = box.min; + lbvh[LEAF_OFFSET + i].aabb_max = box.max; + lbvh[LEAF_OFFSET + i].vertex_ids = box.vertex_ids; + lbvh[LEAF_OFFSET + i].primitive_id = + morton_codes[i].box_id; } - // Select child_b - int child_b = -1; - if (split + 1 == last) { - child_b = - LEAF_OFFSET + split + 1; // pointer to leaf node - } else { - child_b = split + 1; // pointer to internal node + if (i < boxes.size() - 1) { + // Find out which range of objects the node corresponds + // to. (This is where the magic happens!) + + int first, last; + determine_range(morton_codes, int(i), first, last); + + // Determine where to split the range + int split = find_split(morton_codes, first, last); + + // Select child_a + int child_a = -1; + if (split == first) { + // pointer to leaf node + child_a = LEAF_OFFSET + split; + } else { + child_a = split; // pointer to internal node + } + + // Select child_b + int child_b = -1; + if (split + 1 == last) { + child_b = + LEAF_OFFSET + split + 1; // pointer to leaf node + } else { + child_b = split + 1; // pointer to internal node + } + + // Record parent-child relationships + if constexpr (Node::ABSOLUTE_POINTERS) { + lbvh[i].left = child_a; + lbvh[i].right = child_b; + } else { + lbvh[i].left = child_a - int(i); + lbvh[i].right = child_b - int(i); + } + construction_infos[child_a].parent = int(i); + construction_infos[child_b].parent = int(i); } - // Record parent-child relationships - if constexpr (Node::ABSOLUTE_POINTERS) { - lbvh[i].left = child_a; - lbvh[i].right = child_b; - } else { - lbvh[i].left = child_a - int(i); - lbvh[i].right = child_b - int(i); + // node 0 is the root + if (i == 0) { + construction_infos[0].parent = 0; } - construction_infos[child_a].parent = int(i); - construction_infos[child_b].parent = int(i); - } - - // node 0 is the root - if (i == 0) { - construction_infos[0].parent = 0; } - } - }); + }); + } - tbb::parallel_for( - tbb::blocked_range(0, boxes.size()), - [&](const tbb::blocked_range& r) { - for (size_t i = r.begin(); i < r.end(); i++) { - int node_idx = construction_infos[LEAF_OFFSET + i].parent; - while (true) { - if (construction_infos[node_idx].visitation_count++ == 0) { - // this is the first thread that arrived at this node -> - // finished - break; - } - // this is the second thread that arrived at this node, both - // children are computed -> compute aabb union and continue - Node node = lbvh[node_idx], child_a, child_b; - child_a = lbvh[Node::POINTER(node_idx, node.left)]; - child_b = lbvh[Node::POINTER(node_idx, node.right)]; - - node.aabb_min = child_a.aabb_min.min(child_b.aabb_min); - node.aabb_max = child_a.aabb_max.max(child_b.aabb_max); - - lbvh[node_idx] = node; - if (node_idx == 0) { - break; // root node + { + IPC_TOOLKIT_PROFILE_BLOCK("populate_boxes"); + tbb::parallel_for( + tbb::blocked_range(0, boxes.size()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i < r.end(); i++) { + int node_idx = construction_infos[LEAF_OFFSET + i].parent; + while (true) { + auto& info = construction_infos[node_idx]; + if (info.visitation_count++ == 0) { + // this is the first thread that arrived at this + // node -> finished + break; + } + // this is the second thread that arrived at this node, + // both children are computed -> compute aabb union and + // continue + const Node& child_b = + lbvh[Node::POINTER(node_idx, lbvh[node_idx].right)]; + const Node& child_a = + lbvh[Node::POINTER(node_idx, lbvh[node_idx].left)]; + + lbvh[node_idx].aabb_min = + child_a.aabb_min.min(child_b.aabb_min); + lbvh[node_idx].aabb_max = + child_a.aabb_max.max(child_b.aabb_max); + + if (node_idx == 0) { + break; // root node + } + node_idx = info.parent; } - node_idx = construction_infos[node_idx].parent; } - } - }); + }); + } } void LBVH::clear() @@ -293,7 +316,8 @@ void LBVH::clear() } namespace { - bool share_a_vertex(std::array a, std::array b) + inline bool share_a_vertex( + const std::array& a, const std::array& b) { return a[0] == b[0] || a[0] == b[1] || a[0] == b[2] || (a[1] >= 0 && (a[1] == b[0] || a[1] == b[1] || a[1] == b[2])) @@ -301,7 +325,7 @@ namespace { } template - void attempt_add_candidate( + inline void attempt_add_candidate( const LBVH::Node& query, const LBVH::Node& node, const std::function& can_collide, @@ -331,22 +355,25 @@ namespace { template void traverse_lbvh( - const LBVH::Node query, + const LBVH::Node& query, const std::vector& lbvh, const std::function& can_collide, std::vector& candidates) { - // Allocate traversal stack from thread-local memory, - // and push NULL to indicate that there are no postponed nodes. - std::stack stack; - stack.push(LBVH::Node::INVALID_POINTER); + // Use a fixed-size array as a stack to avoid dynamic allocations + constexpr int MAX_STACK_SIZE = 64; + int stack[MAX_STACK_SIZE]; + int stack_ptr = 0; + stack[stack_ptr++] = LBVH::Node::INVALID_POINTER; - // Traverse nodes starting from the root. int node_idx = 0; // root do { const LBVH::Node& node = lbvh[node_idx]; - // Check each child node for overlap. + // Prefetch child nodes to reduce cache misses + __builtin_prefetch(&lbvh[node.left], 0, 1); + __builtin_prefetch(&lbvh[node.right], 0, 1); + const LBVH::Node& child_l = lbvh[node.left]; const LBVH::Node& child_r = lbvh[node.right]; const bool intersects_l = child_l.intersects(query); @@ -357,7 +384,6 @@ namespace { attempt_add_candidate( query, child_l, can_collide, candidates); } - if (intersects_r && child_r.is_leaf()) { attempt_add_candidate( query, child_r, can_collide, candidates); @@ -368,18 +394,112 @@ namespace { bool traverse_r = (intersects_r && !child_r.is_leaf()); if (!traverse_l && !traverse_r) { - node_idx = stack.top(); // traverse the top of the stack next - stack.pop(); + node_idx = stack[--stack_ptr]; } else { - // traverse the children - node_idx = (traverse_l) ? node.left : node.right; + node_idx = traverse_l ? node.left : node.right; if (traverse_l && traverse_r) { - // postpone traversal of the right child - stack.push(node.right); + // Postpone traversal of the right child + stack[stack_ptr++] = node.right; } } } while (node_idx != LBVH::Node::INVALID_POINTER); // Same as root } + + // Traverses the target BVH independently for each leaf node in the source + // BVH. For each source leaf, performs a stack-based traversal of the target + // BVH, collecting candidate pairs that pass the can_collide predicate. + // Results are stored in thread-local storage for later merging. + template + void independent_traversal( + const std::vector& source, + const std::vector& target, + const std::function& can_collide, + tbb::enumerable_thread_specific>& storage) + { + // Calculate the offset to the first leaf node in the source BVH. + const size_t SOURCE_LEAF_OFFSET = source.size() / 2; + const size_t N_SOURCE_LEAVES = SOURCE_LEAF_OFFSET + 1; + + tbb::parallel_for( + tbb::blocked_range(size_t(0), N_SOURCE_LEAVES), + [&](const tbb::blocked_range& r) { + auto& local_candidates = storage.local(); + + for (size_t i = r.begin(); i < r.end(); ++i) { + const auto& query_node = source[SOURCE_LEAF_OFFSET + i]; + traverse_lbvh( + query_node, target, can_collide, local_candidates); + } + }); + } + + // Parallel traversal of two BVHs using TBB task_group. + // Recursively explores all pairs of nodes whose bounding boxes intersect, + // adding candidate pairs when both nodes are leaves. + template + void traverse_lbvh( + const std::vector& source, + const std::vector& target, + int source_idx, + int target_idx, + const std::function& can_collide, + tbb::task_group& g, + tbb::enumerable_thread_specific>& storage) + { + // 1. Check for bounding box intersection + if (!source[source_idx].intersects(target[target_idx])) { + return; + } + + // 2. Handle leaf nodes (base case) + if (source[source_idx].is_leaf() && target[target_idx].is_leaf()) { + attempt_add_candidate( + source[source_idx], target[target_idx], can_collide, + storage.local()); + return; + } + + // 3. Handle mixed or internal nodes (parallel recursion) + + // TBB's task_group provides an easy way to offload tasks. + const auto dispatch = [&](int source_idx, int target_idx) { + g.run([&, source_idx, target_idx] { + traverse_lbvh( + source, target, source_idx, target_idx, can_collide, g, + storage); + }); + }; + + if (source[source_idx].is_leaf()) { + dispatch(source_idx, target[target_idx].left); + dispatch(source_idx, target[target_idx].right); + } else if (target[target_idx].is_leaf()) { + dispatch(source[source_idx].left, target_idx); + dispatch(source[source_idx].right, target_idx); + } else { + // Both internal nodes, test all four combinations. + dispatch(source[source_idx].left, target[target_idx].left); + dispatch(source[source_idx].left, target[target_idx].right); + dispatch(source[source_idx].right, target[target_idx].left); + dispatch(source[source_idx].right, target[target_idx].right); + } + } + + template + void simultaneous_traversal( + const std::vector& source, + const std::vector& target, + const std::function& can_collide, + tbb::enumerable_thread_specific>& storage) + { + tbb::task_group g; // TBB task group to manage parallel work + + g.run_and_wait([&] { + traverse_lbvh( + source, target, /*source_root*/ 0, /*target_root*/ 0, + can_collide, g, storage); + }); + } } // namespace template @@ -389,22 +509,12 @@ void LBVH::detect_candidates( const std::function& can_collide, std::vector& candidates) { - // floor((n + (n - 1)) / 2) = floor(n - 1/2) = n - 1 - const size_t SOURCE_LEAF_OFFSET = source.size() / 2; - const size_t N_SOURCE_LEAVES = SOURCE_LEAF_OFFSET + 1; - tbb::enumerable_thread_specific> storage; - tbb::parallel_for( - tbb::blocked_range(size_t(0), N_SOURCE_LEAVES), - [&](const tbb::blocked_range& r) { - auto& local_candidates = storage.local(); - for (size_t i = r.begin(); i < r.end(); i++) { - traverse_lbvh( - source[SOURCE_LEAF_OFFSET + i], target, can_collide, - local_candidates); - } - }); + independent_traversal( + source, target, can_collide, storage); + // simultaneous_traversal( + // source, target, can_collide, storage); merge_thread_local_vectors(storage, candidates); } @@ -416,6 +526,8 @@ void LBVH::detect_vertex_vertex_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("LBVH::detect_vertex_vertex_candidates"); + detect_candidates(vertex_bvh, can_vertices_collide, candidates); } @@ -426,6 +538,8 @@ void LBVH::detect_edge_vertex_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("LBVH::detect_edge_vertex_candidates"); + // In 2D and for codimensional edge-vertex collisions, there are more // vertices than edges, so we want to iterate over the edges. detect_candidates( @@ -440,6 +554,8 @@ void LBVH::detect_edge_edge_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("LBVH::detect_edge_edge_candidates"); + detect_candidates( edge_bvh, std::bind(&LBVH::can_edges_collide, this, _1, _2), candidates); @@ -452,6 +568,8 @@ void LBVH::detect_face_vertex_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("LBVH::detect_face_vertex_candidates"); + // The ratio vertices:faces is 1:2, so we want to iterate over the vertices. detect_candidates( vertex_bvh, face_bvh, @@ -465,6 +583,8 @@ void LBVH::detect_edge_face_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("LBVH::detect_edge_face_candidates"); + // The ratio edges:faces is 3:2, so we want to iterate over the faces. detect_candidates( face_bvh, edge_bvh, @@ -478,6 +598,7 @@ void LBVH::detect_face_face_candidates( return; } + IPC_TOOLKIT_PROFILE_BLOCK("LBVH::detect_face_face_candidates"); detect_candidates( face_bvh, std::bind(&LBVH::can_faces_collide, this, _1, _2), candidates); diff --git a/src/ipc/utils/CMakeLists.txt b/src/ipc/utils/CMakeLists.txt index ce09c59a0..ba3f918de 100644 --- a/src/ipc/utils/CMakeLists.txt +++ b/src/ipc/utils/CMakeLists.txt @@ -11,6 +11,8 @@ set(SOURCES logger.cpp logger.hpp merge_thread_local.hpp + profiler.cpp + profiler.hpp save_obj.cpp save_obj.hpp unordered_map_and_set.cpp diff --git a/src/ipc/utils/profiler.cpp b/src/ipc/utils/profiler.cpp index 5e2e324e6..9e8c4789b 100644 --- a/src/ipc/utils/profiler.cpp +++ b/src/ipc/utils/profiler.cpp @@ -2,6 +2,8 @@ #ifdef IPC_TOOLKIT_WITH_PROFILER +#include + namespace ipc { Profiler& profiler() @@ -10,6 +12,56 @@ Profiler& profiler() return instance; } +void Profiler::write_csv(const std::string& filename) const +{ + std::ofstream file(filename); + if (!file.is_open()) { + logger().error("Failed to open file: {}", filename); + return; + } + write_csv(file); + file.close(); +} + +void Profiler::write_csv(std::ostream& os) const +{ + os << "Parent,Name,Time (ms),Count\n"; + + if (m_data.empty()) { + os << std::flush; + return; + } + + // Print the profiler data in CSV format using a breadth-first traversal + const nlohmann::json::json_pointer root; + std::queue queue; + queue.push(root); + + while (!queue.empty()) { + const auto ptr = queue.front(); + const auto parent = ptr.parent_pointer(); + queue.pop(); + + assert(m_data.contains(ptr)); + const auto& data = ptr == root ? m_data : m_data.at(ptr); + if (ptr != root) { + os << fmt::format( + "{},{},{:.6g},{:d}\n", parent == root ? "" : parent.back(), + ptr.back(), data.at("time_ms").get(), + data.at("count").get()); + } + + // Traverse child scopes + for (const auto& [key, val] : data.items()) { + if (val.is_object()) { + queue.push(ptr / key); + } + } + } + + os << std::flush; +} + } // namespace ipc #endif \ No newline at end of file diff --git a/src/ipc/utils/profiler.hpp b/src/ipc/utils/profiler.hpp index d25ed31bc..48a819b13 100644 --- a/src/ipc/utils/profiler.hpp +++ b/src/ipc/utils/profiler.hpp @@ -9,11 +9,19 @@ #include // clang-format on -#include #include +#include +#include + +// Helper macro to stringify/paste after expansion +#define IPC_TOOLKIT_PROFILE_BLOCK_CONCAT_IMPL(a, b) a##b +#define IPC_TOOLKIT_PROFILE_BLOCK_CONCAT(a, b) \ + IPC_TOOLKIT_PROFILE_BLOCK_CONCAT_IMPL(a, b) + #define IPC_TOOLKIT_PROFILE_BLOCK(...) \ - ipc::ProfilePoint __ipc_profile_point(__VA_ARGS__) + ipc::ProfilePoint IPC_TOOLKIT_PROFILE_BLOCK_CONCAT( \ + __ipc_profile_point_, __COUNTER__)(__VA_ARGS__) namespace ipc { @@ -42,7 +50,8 @@ class Profiler { "[{}] {{}} {{:.6f}} ms", fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing")); - logger().trace(log_fmt_text, current_scope.to_string(), time_ms); + logger().trace( + fmt::runtime(log_fmt_text), current_scope.to_string(), time_ms); assert(m_data.contains(current_scope)); assert(m_data.at(current_scope).contains("time_ms")); @@ -54,14 +63,24 @@ class Profiler { current_scope.pop_back(); } + void reset() + { + m_data.clear(); + current_scope = nlohmann::json::json_pointer(); // root + } + void print() const { - logger().trace( + logger().info( "[{}] profiler: {}", fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing"), m_data.dump(2)); } + void write_csv(const std::string& filename) const; + void print_csv() const { write_csv(std::cout); } + void write_csv(std::ostream& os) const; + const nlohmann::json& data() const { return m_data; } nlohmann::json& data() { return m_data; } @@ -72,7 +91,21 @@ class Profiler { Profiler& profiler(); -template class ProfilePoint { +class ChronoTimer { +public: + void start() { m_start = std::chrono::high_resolution_clock::now(); } + void stop() { m_end = std::chrono::high_resolution_clock::now(); } + double getElapsedTimeInMilliSec() const + { + return std::chrono::duration(m_end - m_start) + .count(); + } + +private: + std::chrono::high_resolution_clock::time_point m_start, m_end; +}; + +template class ProfilePoint { public: ProfilePoint(const std::string& name) : ProfilePoint(profiler(), name) { } diff --git a/tests/src/tests/broad_phase/test_lbvh.cpp b/tests/src/tests/broad_phase/test_lbvh.cpp index 475b381ec..8ddd33bd5 100644 --- a/tests/src/tests/broad_phase/test_lbvh.cpp +++ b/tests/src/tests/broad_phase/test_lbvh.cpp @@ -6,7 +6,7 @@ #include #include // #include -// #include +#include #include @@ -164,6 +164,11 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") mesh_t0 = "cloth_ball92.ply"; mesh_t1 = "cloth_ball93.ply"; } + // SECTION("Puffer-Ball") + // { + // mesh_t0 = "puffer-ball/20.ply"; + // mesh_t1 = "puffer-ball/21.ply"; + // } Eigen::MatrixXd vertices_t0, vertices_t1; Eigen::MatrixXi edges, faces; @@ -248,68 +253,3 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") ipc::profiler().clear(); #endif } - -// TEST_CASE("Radix sort", "[radix_sort]") -// { -// constexpr size_t size = 100'000; -// constexpr size_t max_value = 0xFFFFFFFF; - -// std::random_device rd; -// std::mt19937 gen(rd()); -// std::uniform_int_distribution distrib(0, max_value); - -// using MortonCode = std::array; -// std::vector data(size); -// for (size_t i = 0; i < size; ++i) { -// data[i][0] = distrib(gen); -// data[i][1] = 0; -// } - -// // Radix sort - -// kp::Manager mgr; - -// auto t_data = mgr.tensor( -// data.data(), data.size(), sizeof(MortonCode), -// kp::Memory::DataTypes::eCustom); -// auto t_sorted_data = mgr.tensor( -// data.size(), sizeof(MortonCode), kp::Memory::DataTypes::eCustom); - -// std::vector> params = { { -// mgr.tensor(1, sizeof(LBVH::AABB), kp::Memory::DataTypes::eCustom), -// t_data, -// t_sorted_data, -// } }; - -// auto algorithm = mgr.algorithm( -// params, -// SPVShader(SINGLE_RADIXSORT_SPV.begin(), SINGLE_RADIXSORT_SPV.end()), -// kp::Workgroup({ { 1, 1, 1 } }), {}, -// std::vector(1, data.size())); - -// mgr.sequence() -// ->record(params) -// ->record(algorithm) -// ->record(params) -// ->eval(); - -// std::vector sorted_data = -// t_sorted_data->vector(); - -// { -// CAPTURE(sorted_data); -// REQUIRE( -// std::is_sorted( -// sorted_data.begin(), sorted_data.end(), -// [](const MortonCode& a, const MortonCode& b) { -// return a[0] < b[0]; -// })); -// } - -// // Compare against CPU sort -// std::sort( -// data.begin(), data.end(), -// [](const MortonCode& a, const MortonCode& b) { return a[0] < b[0]; -// }); -// CHECK(data == sorted_data); -// } diff --git a/tests/src/tests/main.cpp b/tests/src/tests/main.cpp index 5d9c0a594..19e734c78 100644 --- a/tests/src/tests/main.cpp +++ b/tests/src/tests/main.cpp @@ -43,7 +43,7 @@ int main(int argc, char** argv) using namespace Catch::Clara; auto cli = session.cli(); - int log_level = spdlog::level::err; + int log_level = spdlog::level::warn; cli |= Opt([&log_level](int const d) { return parse_log_level(d, log_level); }, "log_level")["--log"]["--logger-level"]( From c0c67dc76b12aa0f06096cec51516a73b95770dc Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 1 Sep 2025 23:29:18 -0400 Subject: [PATCH 03/25] Fix Linux and Window build --- src/ipc/broad_phase/lbvh.cpp | 2 ++ src/ipc/utils/profiler.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index af7669629..17d39be7f 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -370,9 +370,11 @@ namespace { do { const LBVH::Node& node = lbvh[node_idx]; +#if defined(__GNUC__) || defined(__clang__) // Prefetch child nodes to reduce cache misses __builtin_prefetch(&lbvh[node.left], 0, 1); __builtin_prefetch(&lbvh[node.right], 0, 1); +#endif const LBVH::Node& child_l = lbvh[node.left]; const LBVH::Node& child_r = lbvh[node.right]; diff --git a/src/ipc/utils/profiler.cpp b/src/ipc/utils/profiler.cpp index 9e8c4789b..986250e5f 100644 --- a/src/ipc/utils/profiler.cpp +++ b/src/ipc/utils/profiler.cpp @@ -3,6 +3,7 @@ #ifdef IPC_TOOLKIT_WITH_PROFILER #include +#include namespace ipc { From b986930d5afee6f6d2d18c740c2b18cb4bd314ca Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 1 Sep 2025 23:41:43 -0400 Subject: [PATCH 04/25] Fix 2D case --- src/ipc/broad_phase/lbvh.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index 17d39be7f..c69d40e18 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -184,7 +184,7 @@ void LBVH::init_bvh( morton_codes[i].morton_code = morton_3D( mapped_center.x(), mapped_center.y(), - mapped_center.z()); + mapped_center.size() == 3 ? mapped_center.z() : 0); morton_codes[i].box_id = i; } }); From 1bb9f8e0b63f6e1208c7d9f472cbd8bc8661b6a2 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 1 Sep 2025 23:51:36 -0400 Subject: [PATCH 05/25] Refactor Morton code functions to support 2D points and improve bit expansion logic --- src/ipc/broad_phase/lbvh.cpp | 42 ++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index c69d40e18..b4bf85816 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -15,7 +15,7 @@ namespace ipc { namespace { // Expands a 21-bit integer into 63 bits by inserting 2 zeros after each // bit. - uint64_t expand_bits(uint64_t v) + uint64_t expand_bits_2(uint64_t v) { v = (v | v << 32) & 0x1F00000000FFFF; v = (v | v << 16) & 0x1F0000FF0000FF; @@ -33,11 +33,34 @@ namespace { x = std::clamp(x * scale, 0.0, scale - 1); y = std::clamp(y * scale, 0.0, scale - 1); z = std::clamp(z * scale, 0.0, scale - 1); - uint64_t xx = expand_bits(uint64_t(x)); - uint64_t yy = expand_bits(uint64_t(y)); - uint64_t zz = expand_bits(uint64_t(z)); + uint64_t xx = expand_bits_2(uint64_t(x)); + uint64_t yy = expand_bits_2(uint64_t(y)); + uint64_t zz = expand_bits_2(uint64_t(z)); return (xx << 2) | (yy << 1) | zz; } + + // Expands a 32-bit integer into 64 bits by inserting 1 zero after each bit. + uint64_t expand_bits_1(uint64_t v) + { + v = (v | (v << 16)) & 0x0000FFFF0000FFFF; + v = (v | (v << 8)) & 0x00FF00FF00FF00FF; + v = (v | (v << 4)) & 0x0F0F0F0F0F0F0F0F; + v = (v | (v << 2)) & 0x3333333333333333; + v = (v | (v << 1)) & 0x5555555555555555; + return v; + } + + // Calculates a 63-bit Morton code for the given 2D point located within the + // unit square [0,1]. + uint64_t morton_2D(double x, double y) + { + constexpr double scale = 1 << 21; + x = std::clamp(x * scale, 0.0, scale - 1); + y = std::clamp(y * scale, 0.0, scale - 1); + uint64_t xx = expand_bits_1(uint64_t(x)); + uint64_t yy = expand_bits_1(uint64_t(y)); + return (xx << 1) | yy; + } } // namespace LBVH::LBVH() : BroadPhase() { } @@ -182,9 +205,14 @@ void LBVH::init_bvh( const ArrayMax3d mapped_center = (center - mesh_aabb.min) / mesh_width; - morton_codes[i].morton_code = morton_3D( - mapped_center.x(), mapped_center.y(), - mapped_center.size() == 3 ? mapped_center.z() : 0); + if (mapped_center.size() == 2) { + morton_codes[i].morton_code = + morton_2D(mapped_center.x(), mapped_center.y()); + } else { + morton_codes[i].morton_code = morton_3D( + mapped_center.x(), mapped_center.y(), + mapped_center.z()); + } morton_codes[i].box_id = i; } }); From 44203bcf684a7631f2c863184476938a76103401 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Thu, 2 Oct 2025 22:15:55 -0400 Subject: [PATCH 06/25] Add concurrent queue for parallel candidate detection in LBVH --- src/ipc/broad_phase/lbvh.cpp | 74 +++++++++++++++++++++++++++++++++--- tests/src/tests/utils.cpp | 6 +-- 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index b4bf85816..45ceae8f0 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -522,13 +523,74 @@ namespace { const std::function& can_collide, tbb::enumerable_thread_specific>& storage) { - tbb::task_group g; // TBB task group to manage parallel work + // tbb::task_group g; // TBB task group to manage parallel work + + // g.run_and_wait([&] { + // traverse_lbvh( + // source, target, /*source_root*/ 0, /*target_root*/ 0, + // can_collide, g, storage); + // }); + + tbb::concurrent_queue> query_queue; + query_queue.push({ 0, 0 }); // root vs root + + while (!query_queue.empty()) { + tbb::parallel_for( + tbb::blocked_range( + size_t(0), query_queue.unsafe_size()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i < r.end(); ++i) { + std::pair indices; + while (!query_queue.try_pop(indices)) { } + + const auto& [source_idx, target_idx] = indices; + + // 1. Check for bounding box intersection + if (!source[source_idx].intersects( + target[target_idx])) { + continue; + } + + // 2. Handle leaf nodes (base case) + if (source[source_idx].is_leaf() + && target[target_idx].is_leaf()) { + attempt_add_candidate< + Candidate, swap_order, triangular>( + source[source_idx], target[target_idx], + can_collide, storage.local()); + continue; + } - g.run_and_wait([&] { - traverse_lbvh( - source, target, /*source_root*/ 0, /*target_root*/ 0, - can_collide, g, storage); - }); + // 3. Handle mixed or internal nodes + if (source[source_idx].is_leaf()) { + query_queue.push( + { source_idx, target[target_idx].left }); + query_queue.push( + { source_idx, target[target_idx].right }); + } else if (target[target_idx].is_leaf()) { + query_queue.push( + { source[source_idx].left, target_idx }); + query_queue.push( + { source[source_idx].right, target_idx }); + } else { + // Both internal nodes, test all four + // combinations. + query_queue.push( + { source[source_idx].left, + target[target_idx].left }); + query_queue.push( + { source[source_idx].left, + target[target_idx].right }); + query_queue.push( + { source[source_idx].right, + target[target_idx].left }); + query_queue.push( + { source[source_idx].right, + target[target_idx].right }); + } + } + }); + } } } // namespace diff --git a/tests/src/tests/utils.cpp b/tests/src/tests/utils.cpp index 35a2398fe..2e1f8dc4b 100644 --- a/tests/src/tests/utils.cpp +++ b/tests/src/tests/utils.cpp @@ -25,11 +25,11 @@ std::vector> broad_phases() { return { { std::make_shared(), - // std::make_shared(), - // std::make_shared(), + std::make_shared(), + std::make_shared(), std::make_shared(), std::make_shared(), - // std::make_shared(), + std::make_shared(), #ifdef IPC_TOOLKIT_WITH_CUDA std::make_shared(), #endif From acffc2b9bdace3a9e1933107ee9711f22335791a Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Fri, 16 Jan 2026 09:54:09 -0500 Subject: [PATCH 07/25] Update Python version matrix to include 3.14 for pull requests --- .github/workflows/python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 84ad270ae..94fe9871e 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -27,7 +27,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ${{ github.event_name == 'pull_request' && fromJSON('["3.13"]') || fromJSON('["3.9", "3.10", "3.11", "3.12", "3.13"]') }} + python-version: ${{ github.event_name == 'pull_request' && fromJSON('["3.14"]') || fromJSON('["3.10", "3.11", "3.12", "3.13", "3.14"]') }} include: - os: ubuntu-latest name: Linux From 1fa4de6b8ec0d2268bd69e5c84515e1a778aff5d Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Fri, 16 Jan 2026 10:15:10 -0500 Subject: [PATCH 08/25] Fix clang-tidy errors --- python/examples/lbvh.py | 3 +-- src/ipc/broad_phase/lbvh.cpp | 18 +++++++-------- src/ipc/broad_phase/lbvh.hpp | 12 +++++----- .../smooth_collisions_builder.cpp | 4 ++-- src/ipc/utils/profiler.hpp | 3 +++ tests/src/tests/broad_phase/test_lbvh.cpp | 23 ------------------- 6 files changed, 21 insertions(+), 42 deletions(-) diff --git a/python/examples/lbvh.py b/python/examples/lbvh.py index 21f88b3a6..7e57951e3 100644 --- a/python/examples/lbvh.py +++ b/python/examples/lbvh.py @@ -6,8 +6,7 @@ import pathlib -mesh = meshio.read(pathlib.Path( - __file__).parents[2] / "tests/data/puffer-ball/20.ply") +mesh = meshio.read(pathlib.Path(__file__).parents[2] / "tests/data/puffer-ball/20.ply") # noqa lbvh = ipctk.LBVH() lbvh.build(mesh.points, np.array([], dtype=int), mesh.cells_dict["triangle"]) diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index 45ceae8f0..d42dd96d7 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -186,7 +186,7 @@ namespace { void LBVH::init_bvh( const std::vector& boxes, std::vector& lbvh) const { - if (boxes.size() == 0) { + if (boxes.empty()) { return; } @@ -317,9 +317,9 @@ void LBVH::init_bvh( // both children are computed -> compute aabb union and // continue const Node& child_b = - lbvh[Node::POINTER(node_idx, lbvh[node_idx].right)]; + lbvh[Node::pointer(node_idx, lbvh[node_idx].right)]; const Node& child_a = - lbvh[Node::POINTER(node_idx, lbvh[node_idx].left)]; + lbvh[Node::pointer(node_idx, lbvh[node_idx].left)]; lbvh[node_idx].aabb_min = child_a.aabb_min.min(child_b.aabb_min); @@ -614,7 +614,7 @@ void LBVH::detect_candidates( void LBVH::detect_vertex_vertex_candidates( std::vector& candidates) const { - if (vertex_boxes.size() == 0) { + if (vertex_boxes.empty()) { return; } @@ -626,7 +626,7 @@ void LBVH::detect_vertex_vertex_candidates( void LBVH::detect_edge_vertex_candidates( std::vector& candidates) const { - if (edge_boxes.size() == 0 || vertex_boxes.size() == 0) { + if (edge_boxes.empty() || vertex_boxes.empty()) { return; } @@ -642,7 +642,7 @@ void LBVH::detect_edge_vertex_candidates( void LBVH::detect_edge_edge_candidates( std::vector& candidates) const { - if (edge_boxes.size() == 0) { + if (edge_boxes.empty()) { return; } @@ -656,7 +656,7 @@ void LBVH::detect_edge_edge_candidates( void LBVH::detect_face_vertex_candidates( std::vector& candidates) const { - if (face_boxes.size() == 0 || vertex_boxes.size() == 0) { + if (face_boxes.empty() || vertex_boxes.empty()) { return; } @@ -671,7 +671,7 @@ void LBVH::detect_face_vertex_candidates( void LBVH::detect_edge_face_candidates( std::vector& candidates) const { - if (edge_boxes.size() == 0 || face_boxes.size() == 0) { + if (edge_boxes.empty() || face_boxes.empty()) { return; } @@ -686,7 +686,7 @@ void LBVH::detect_edge_face_candidates( void LBVH::detect_face_face_candidates( std::vector& candidates) const { - if (face_boxes.size() == 0) { + if (face_boxes.empty()) { return; } diff --git a/src/ipc/broad_phase/lbvh.hpp b/src/ipc/broad_phase/lbvh.hpp index 1b275fb21..bf18bc445 100644 --- a/src/ipc/broad_phase/lbvh.hpp +++ b/src/ipc/broad_phase/lbvh.hpp @@ -24,7 +24,7 @@ class LBVH : public BroadPhase { // helper function to handle relative pointers on // CPU side, i.e. convert them to absolute // pointers for array indexing - static constexpr uint32_t POINTER(uint32_t index, uint32_t pointer) + static constexpr uint32_t pointer(uint32_t index, uint32_t pointer) { return ABSOLUTE_POINTERS ? pointer : index + pointer; } @@ -34,13 +34,13 @@ class LBVH : public BroadPhase { /// @brief The max corner of the AABB ArrayMax3d aabb_max; /// @brief The vertex ids (v0, v1, v2) - std::array vertex_ids; + std::array vertex_ids = { { -1, -1, -1 } }; /// @brief Pointer to the left child or INVALID_POINTER in case of leaf - int left; + int left = -1; /// @brief Pointer to the right child or INVALID_POINTER in case of leaf - int right; + int right = -1; /// @brief The primitive id (INVALID_ID <=> inner node) - index_t primitive_id; + index_t primitive_id = -1; bool is_leaf() const { @@ -72,7 +72,7 @@ class LBVH : public BroadPhase { struct ConstructionInfo { /// @brief Parent to the parent - int parent; + int parent = -1; /// @brief Number of threads that arrived std::atomic visitation_count = 0; }; diff --git a/src/ipc/smooth_contact/smooth_collisions_builder.cpp b/src/ipc/smooth_contact/smooth_collisions_builder.cpp index 7dcddde2e..40cc70f51 100644 --- a/src/ipc/smooth_contact/smooth_collisions_builder.cpp +++ b/src/ipc/smooth_contact/smooth_collisions_builder.cpp @@ -12,7 +12,7 @@ namespace ipc { namespace { template void add_collision( - const std::shared_ptr pair, + const std::shared_ptr& pair, unordered_map, std::shared_ptr>& cc_to_id, std::vector>& collisions) @@ -28,7 +28,7 @@ namespace { template void add_collision( - const std::shared_ptr pair, + const std::shared_ptr& pair, std::vector>& collisions) { assert(pair != nullptr); diff --git a/src/ipc/utils/profiler.hpp b/src/ipc/utils/profiler.hpp index 48a819b13..f64611569 100644 --- a/src/ipc/utils/profiler.hpp +++ b/src/ipc/utils/profiler.hpp @@ -94,7 +94,10 @@ Profiler& profiler(); class ChronoTimer { public: void start() { m_start = std::chrono::high_resolution_clock::now(); } + void stop() { m_end = std::chrono::high_resolution_clock::now(); } + + // NOLINTNEXTLINE(readability-identifier-naming) double getElapsedTimeInMilliSec() const { return std::chrono::duration(m_end - m_start) diff --git a/tests/src/tests/broad_phase/test_lbvh.cpp b/tests/src/tests/broad_phase/test_lbvh.cpp index 8ddd33bd5..6e35cd64a 100644 --- a/tests/src/tests/broad_phase/test_lbvh.cpp +++ b/tests/src/tests/broad_phase/test_lbvh.cpp @@ -105,29 +105,6 @@ TEST_CASE("LBVH::build", "[broad_phase][lbvh]") lbvh->build(vertices_t0, vertices_t1, edges, faces, inflation_radius); } - std::ofstream dot_file("/Users/zachary/Downloads/lbvh.dot"); - dot_file << "digraph G {\nnode [shape = circle;];" << std::endl; - - for (int i = 0; i < lbvh->vertex_nodes().size(); ++i) { - dot_file << "N" << i << " [label = \"" - << (lbvh->vertex_nodes()[i].is_inner() ? "I" : "L") - << (lbvh->vertex_nodes()[i].is_inner() - ? i - : lbvh->vertex_nodes()[i].primitive_id) - << "\"];" << std::endl; - } - - for (int i = 0; i < lbvh->vertex_nodes().size(); ++i) { - const auto& node = lbvh->vertex_nodes()[i]; - if (node.is_inner()) { - dot_file << "N" << i << " -> N" << node.left << ";" << std::endl; - dot_file << "N" << i << " -> N" << node.right << ";" << std::endl; - } - } - - dot_file << "}" << std::endl; - dot_file.close(); - // -- TODO: Check the morton codes ---------------------------------------- // -- TODO: Check the morton codes are sorted ----------------------------- From b2b4f39b06a6881803615c334675ecb9305c62c8 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Fri, 16 Jan 2026 10:34:58 -0500 Subject: [PATCH 09/25] Address copilot comments --- python/examples/lbvh.py | 2 +- src/ipc/broad_phase/lbvh.cpp | 8 +++++++- tests/src/tests/broad_phase/test_lbvh.cpp | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/python/examples/lbvh.py b/python/examples/lbvh.py index 7e57951e3..f9fe4c6eb 100644 --- a/python/examples/lbvh.py +++ b/python/examples/lbvh.py @@ -16,7 +16,7 @@ ps.set_give_focus_on_show(True) ps_mesh = ps.register_surface_mesh( - "bunny", + "mesh", mesh.points, mesh.cells_dict["triangle"] ) diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index d42dd96d7..3dc07eb62 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -102,7 +102,8 @@ namespace { // handle duplicate morton codes int element_idx_i = i; // sorted_morton_codes[i].elementIdx; int element_idx_j = j; // sorted_morton_codes[j].elementIdx; -// add 32 for common prefix of code_i ^ code_j + + // add 32 for common prefix of code_i ^ code_j #if defined(__GNUC__) || defined(__clang__) return 32 + __builtin_clz(element_idx_i ^ element_idx_j); #elif defined(WIN32) @@ -399,6 +400,9 @@ namespace { do { const LBVH::Node& node = lbvh[node_idx]; + // Check left and right are valid pointers + assert(node.is_inner()); + #if defined(__GNUC__) || defined(__clang__) // Prefetch child nodes to reduce cache misses __builtin_prefetch(&lbvh[node.left], 0, 1); @@ -425,11 +429,13 @@ namespace { bool traverse_r = (intersects_r && !child_r.is_leaf()); if (!traverse_l && !traverse_r) { + assert(stack_ptr > 0); node_idx = stack[--stack_ptr]; } else { node_idx = traverse_l ? node.left : node.right; if (traverse_l && traverse_r) { // Postpone traversal of the right child + assert(stack_ptr < MAX_STACK_SIZE); stack[stack_ptr++] = node.right; } } diff --git a/tests/src/tests/broad_phase/test_lbvh.cpp b/tests/src/tests/broad_phase/test_lbvh.cpp index 6e35cd64a..bb1cf01b8 100644 --- a/tests/src/tests/broad_phase/test_lbvh.cpp +++ b/tests/src/tests/broad_phase/test_lbvh.cpp @@ -45,8 +45,8 @@ void traverse_lbvh( CHECK(!visited[index]); visited[index] = true; - uint32_t left_child_index = LBVH::Node::POINTER(index, node.left); - uint32_t right_child_index = LBVH::Node::POINTER(index, node.right); + uint32_t left_child_index = LBVH::Node::pointer(index, node.left); + uint32_t right_child_index = LBVH::Node::pointer(index, node.right); // verify aabbs LBVH::Node childA = lbvh_nodes[left_child_index]; From ad5039f3503ba52015338a62f35f11bb530a6b62 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 19 Jan 2026 11:33:23 -0500 Subject: [PATCH 10/25] Refactor LBVH Node Structure and Add SIMD Traversal - Updated the LBVH Node structure to use Eigen::Array3f for AABB representation, improving compactness of Nodes. - Introduced a union in the Node structure to efficiently manage leaf and internal node data, reducing memory usage and cache misses. - Added functions for calculating 2D and 3D Morton codes in a new morton.hpp file, enhancing spatial indexing capabilities. - Modified the test suite for LBVH to include checks for candidate presence rather than exact size matches, improving robustness of collision detection tests. - Updated CMakeLists.txt to include the new morton.hpp file. - Adjusted utility functions to maintain consistency with the new data types and structures. - Updated test data with new simulation scenes --- .../ipc_toolkit/ipc_toolkit_tests_data.cmake | 2 +- cmake/ipc_toolkit/ipc_toolkit_warnings.cmake | 3 + python/examples/lbvh.py | 2 +- src/ipc/broad_phase/broad_phase.cpp | 8 + src/ipc/broad_phase/lbvh.cpp | 538 ++++++++++-------- src/ipc/broad_phase/lbvh.hpp | 104 ++-- src/ipc/math/CMakeLists.txt | 1 + src/ipc/math/morton.hpp | 65 +++ src/ipc/utils/eigen_ext.hpp | 4 +- tests/src/tests/broad_phase/test_lbvh.cpp | 107 +++- 10 files changed, 534 insertions(+), 300 deletions(-) create mode 100644 src/ipc/math/morton.hpp diff --git a/cmake/ipc_toolkit/ipc_toolkit_tests_data.cmake b/cmake/ipc_toolkit/ipc_toolkit_tests_data.cmake index 4d5d6baa4..9c53bc956 100644 --- a/cmake/ipc_toolkit/ipc_toolkit_tests_data.cmake +++ b/cmake/ipc_toolkit/ipc_toolkit_tests_data.cmake @@ -30,7 +30,7 @@ else() SOURCE_DIR ${IPC_TOOLKIT_TESTS_DATA_DIR} GIT_REPOSITORY https://github.com/ipc-sim/ipc-toolkit-tests-data.git - GIT_TAG 7ca6db695adcc00d3d6d978767dfc0d81722a515 + GIT_TAG c7eba549d9a80d15569a013c473f0aff104ac44a CONFIGURE_COMMAND "" BUILD_COMMAND "" diff --git a/cmake/ipc_toolkit/ipc_toolkit_warnings.cmake b/cmake/ipc_toolkit/ipc_toolkit_warnings.cmake index b95cb04c6..3b50f2abc 100644 --- a/cmake/ipc_toolkit/ipc_toolkit_warnings.cmake +++ b/cmake/ipc_toolkit/ipc_toolkit_warnings.cmake @@ -100,6 +100,9 @@ else() -Wno-sign-compare + -Wno-gnu-anonymous-struct + -Wno-nested-anon-types + ########### # GCC 6.1 # ########### diff --git a/python/examples/lbvh.py b/python/examples/lbvh.py index f9fe4c6eb..19f10afe9 100644 --- a/python/examples/lbvh.py +++ b/python/examples/lbvh.py @@ -21,7 +21,7 @@ mesh.cells_dict["triangle"] ) -nodes = lbvh.vertex_nodes +nodes = lbvh.face_nodes def traverse_lbvh(node, max_depth): diff --git a/src/ipc/broad_phase/broad_phase.cpp b/src/ipc/broad_phase/broad_phase.cpp index c9605c3f7..2a4999a29 100644 --- a/src/ipc/broad_phase/broad_phase.cpp +++ b/src/ipc/broad_phase/broad_phase.cpp @@ -78,6 +78,7 @@ void BroadPhase::detect_collision_candidates( bool BroadPhase::can_edge_vertex_collide(size_t ei, size_t vi) const { + assert(ei < edge_boxes.size()); const auto& [e0i, e1i, _] = edge_boxes[ei].vertex_ids; return vi != e0i && vi != e1i @@ -86,7 +87,9 @@ bool BroadPhase::can_edge_vertex_collide(size_t ei, size_t vi) const bool BroadPhase::can_edges_collide(size_t eai, size_t ebi) const { + assert(eai < edge_boxes.size()); const auto& [ea0i, ea1i, _] = edge_boxes[eai].vertex_ids; + assert(ebi < edge_boxes.size()); const auto& [eb0i, eb1i, __] = edge_boxes[ebi].vertex_ids; const bool share_endpoint = @@ -100,6 +103,7 @@ bool BroadPhase::can_edges_collide(size_t eai, size_t ebi) const bool BroadPhase::can_face_vertex_collide(size_t fi, size_t vi) const { + assert(fi < face_boxes.size()); const auto& [f0i, f1i, f2i] = face_boxes[fi].vertex_ids; return vi != f0i && vi != f1i && vi != f2i @@ -109,7 +113,9 @@ bool BroadPhase::can_face_vertex_collide(size_t fi, size_t vi) const bool BroadPhase::can_edge_face_collide(size_t ei, size_t fi) const { + assert(ei < edge_boxes.size()); const auto& [e0i, e1i, _] = edge_boxes[ei].vertex_ids; + assert(fi < face_boxes.size()); const auto& [f0i, f1i, f2i] = face_boxes[fi].vertex_ids; const bool share_endpoint = e0i == f0i || e0i == f1i || e0i == f2i @@ -124,7 +130,9 @@ bool BroadPhase::can_edge_face_collide(size_t ei, size_t fi) const bool BroadPhase::can_faces_collide(size_t fai, size_t fbi) const { + assert(fai < face_boxes.size()); const auto& [fa0i, fa1i, fa2i] = face_boxes[fai].vertex_ids; + assert(fbi < face_boxes.size()); const auto& [fb0i, fb1i, fb2i] = face_boxes[fbi].vertex_ids; const bool share_endpoint = fa0i == fb0i || fa0i == fb1i || fa0i == fb2i diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index 3dc07eb62..fc4da5908 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -1,66 +1,39 @@ #include "lbvh.hpp" +#include #include #include #include -#include #include #include #include +#if defined(__APPLE__) +// We utilize SIMD registers to compare 1 Node against 4 Queries simultaneously. +#include +#endif + using namespace std::placeholders; namespace ipc { namespace { - // Expands a 21-bit integer into 63 bits by inserting 2 zeros after each - // bit. - uint64_t expand_bits_2(uint64_t v) - { - v = (v | v << 32) & 0x1F00000000FFFF; - v = (v | v << 16) & 0x1F0000FF0000FF; - v = (v | v << 8) & 0x100F00F00F00F00F; - v = (v | v << 4) & 0x10C30C30C30C30C3; - v = (v | v << 2) & 0x1249249249249249; - return v; - } - - // Calculates a 63-bit Morton code for the given 3D point located within the - // unit cube [0,1]. - uint64_t morton_3D(double x, double y, double z) + // Helper to safely convert double AABB to float AABB + inline void assign_inflated_aabb(const AABB& box, LBVH::Node& node) { - constexpr double scale = 1 << 21; - x = std::clamp(x * scale, 0.0, scale - 1); - y = std::clamp(y * scale, 0.0, scale - 1); - z = std::clamp(z * scale, 0.0, scale - 1); - uint64_t xx = expand_bits_2(uint64_t(x)); - uint64_t yy = expand_bits_2(uint64_t(y)); - uint64_t zz = expand_bits_2(uint64_t(z)); - return (xx << 2) | (yy << 1) | zz; - } - - // Expands a 32-bit integer into 64 bits by inserting 1 zero after each bit. - uint64_t expand_bits_1(uint64_t v) - { - v = (v | (v << 16)) & 0x0000FFFF0000FFFF; - v = (v | (v << 8)) & 0x00FF00FF00FF00FF; - v = (v | (v << 4)) & 0x0F0F0F0F0F0F0F0F; - v = (v | (v << 2)) & 0x3333333333333333; - v = (v | (v << 1)) & 0x5555555555555555; - return v; - } - - // Calculates a 63-bit Morton code for the given 2D point located within the - // unit square [0,1]. - uint64_t morton_2D(double x, double y) - { - constexpr double scale = 1 << 21; - x = std::clamp(x * scale, 0.0, scale - 1); - y = std::clamp(y * scale, 0.0, scale - 1); - uint64_t xx = expand_bits_1(uint64_t(x)); - uint64_t yy = expand_bits_1(uint64_t(y)); - return (xx << 1) | yy; + constexpr float inf = std::numeric_limits::infinity(); + // Round Min down + node.aabb_min[0] = std::nextafter(float(box.min[0]), -inf); + node.aabb_min[1] = std::nextafter(float(box.min[1]), -inf); + node.aabb_min[2] = box.min.size() == 3 + ? std::nextafter(float(box.min[2]), -inf) + : 0.0f; + // Round Max up + node.aabb_max[0] = std::nextafter(float(box.max[0]), inf); + node.aabb_max[1] = std::nextafter(float(box.max[1]), inf); + node.aabb_max[2] = + box.max.size() == 3 ? std::nextafter(float(box.max[2]), inf) : 0.0f; } } // namespace @@ -68,22 +41,60 @@ LBVH::LBVH() : BroadPhase() { } LBVH::~LBVH() = default; +// Initialize defaults +LBVH::Node::Node() : primitive_id(INVALID_ID), is_inner_marker(0) +{ + static_assert( + sizeof(Node) == 32, + "LBVH::Node size must be 32 bytes to fit 2 Nodes in a cache line"); +} + void LBVH::build( Eigen::ConstRef edges, Eigen::ConstRef faces) { BroadPhase::build(edges, faces); // Build edge_boxes and face_boxes - assert(vertex_boxes.size() > 0); - mesh_aabb = { vertex_boxes[0].min, vertex_boxes[0].max }; + if (vertex_boxes.empty()) { + return; + } + + dim = vertex_boxes[0].min.size(); + assert(dim == 2 || dim == 3); // Only 2D and 3D supported + + mesh_aabb = { Eigen::Array3d::Zero(), Eigen::Array3d::Zero() }; + mesh_aabb.min = to_3D(vertex_boxes[0].min); + mesh_aabb.max = to_3D(vertex_boxes[0].max); for (const auto& box : vertex_boxes) { - mesh_aabb.min = mesh_aabb.min.min(box.min); - mesh_aabb.max = mesh_aabb.max.max(box.max); + mesh_aabb.min = mesh_aabb.min.min(to_3D(box.min)); + mesh_aabb.max = mesh_aabb.max.max(to_3D(box.max)); } init_bvh(vertex_boxes, vertex_bvh); init_bvh(edge_boxes, edge_bvh); init_bvh(face_boxes, face_bvh); + + // Copy edge and face vertex ids for access during traversal + edge_vertex_ids.resize(edges.rows()); + for (int i = 0; i < edges.rows(); i++) { + edge_vertex_ids[i][0] = static_cast(edges(i, 0)); + edge_vertex_ids[i][1] = static_cast(edges(i, 1)); + } + face_vertex_ids.resize(faces.rows()); + for (int i = 0; i < faces.rows(); i++) { + face_vertex_ids[i][0] = static_cast(faces(i, 0)); + face_vertex_ids[i][1] = static_cast(faces(i, 1)); + face_vertex_ids[i][2] = static_cast(faces(i, 2)); + } + + // Clear parent data to save memory. + // These are redundant after building the BVHs. + vertex_boxes.clear(); + vertex_boxes.shrink_to_fit(); + edge_boxes.clear(); + edge_boxes.shrink_to_fit(); + face_boxes.clear(); + face_boxes.shrink_to_fit(); } namespace { @@ -196,18 +207,18 @@ void LBVH::init_bvh( std::vector morton_codes(boxes.size()); { IPC_TOOLKIT_PROFILE_BLOCK("compute_morton_codes"); - const ArrayMax3d mesh_width = mesh_aabb.max - mesh_aabb.min; + const Eigen::Array3d mesh_width = mesh_aabb.max - mesh_aabb.min; tbb::parallel_for( tbb::blocked_range(0, boxes.size()), [&](const tbb::blocked_range& r) { for (size_t i = r.begin(); i < r.end(); i++) { const auto& box = boxes[i]; - const ArrayMax3d center = 0.5 * (box.min + box.max); - const ArrayMax3d mapped_center = + const Eigen::Array3d center = 0.5 * (box.min + box.max); + const Eigen::Array3d mapped_center = (center - mesh_aabb.min) / mesh_width; - if (mapped_center.size() == 2) { + if (dim == 2) { morton_codes[i].morton_code = morton_2D(mapped_center.x(), mapped_center.y()); } else { @@ -243,13 +254,11 @@ void LBVH::init_bvh( assert(i < boxes.size()); { const auto& box = boxes[morton_codes[i].box_id]; - lbvh[LEAF_OFFSET + i].left = Node::INVALID_POINTER; - lbvh[LEAF_OFFSET + i].right = Node::INVALID_POINTER; - lbvh[LEAF_OFFSET + i].aabb_min = box.min; - lbvh[LEAF_OFFSET + i].aabb_max = box.max; - lbvh[LEAF_OFFSET + i].vertex_ids = box.vertex_ids; - lbvh[LEAF_OFFSET + i].primitive_id = - morton_codes[i].box_id; + + Node leaf_node; // Create leaf node + assign_inflated_aabb(box, leaf_node); + leaf_node.primitive_id = morton_codes[i].box_id; + lbvh[LEAF_OFFSET + i] = leaf_node; // Store leaf } if (i < boxes.size() - 1) { @@ -281,13 +290,8 @@ void LBVH::init_bvh( } // Record parent-child relationships - if constexpr (Node::ABSOLUTE_POINTERS) { - lbvh[i].left = child_a; - lbvh[i].right = child_b; - } else { - lbvh[i].left = child_a - int(i); - lbvh[i].right = child_b - int(i); - } + lbvh[i].left = child_a; + lbvh[i].right = child_b; construction_infos[child_a].parent = int(i); construction_infos[child_b].parent = int(i); } @@ -317,11 +321,9 @@ void LBVH::init_bvh( // this is the second thread that arrived at this node, // both children are computed -> compute aabb union and // continue - const Node& child_b = - lbvh[Node::pointer(node_idx, lbvh[node_idx].right)]; - const Node& child_a = - lbvh[Node::pointer(node_idx, lbvh[node_idx].left)]; - + assert(lbvh[node_idx].is_inner()); + const Node& child_b = lbvh[lbvh[node_idx].right]; + const Node& child_a = lbvh[lbvh[node_idx].left]; lbvh[node_idx].aabb_min = child_a.aabb_min.min(child_b.aabb_min); lbvh[node_idx].aabb_max = @@ -340,20 +342,17 @@ void LBVH::init_bvh( void LBVH::clear() { BroadPhase::clear(); + // Clear BVH nodes vertex_bvh.clear(); edge_bvh.clear(); face_bvh.clear(); + + // Clear vertex IDs + edge_vertex_ids.clear(); + face_vertex_ids.clear(); } namespace { - inline bool share_a_vertex( - const std::array& a, const std::array& b) - { - return a[0] == b[0] || a[0] == b[1] || a[0] == b[2] - || (a[1] >= 0 && (a[1] == b[0] || a[1] == b[1] || a[1] == b[2])) - || (a[2] >= 0 && (a[2] == b[0] || a[2] == b[1] || a[2] == b[2])); - } - template inline void attempt_add_candidate( const LBVH::Node& query, @@ -361,10 +360,6 @@ namespace { const std::function& can_collide, std::vector& candidates) { - if (share_a_vertex(query.vertex_ids, node.vertex_ids)) { - return; - } - int i = query.primitive_id, j = node.primitive_id; if constexpr (swap_order) { std::swap(i, j); @@ -442,161 +437,167 @@ namespace { } while (node_idx != LBVH::Node::INVALID_POINTER); // Same as root } - // Traverses the target BVH independently for each leaf node in the source - // BVH. For each source leaf, performs a stack-based traversal of the target - // BVH, collecting candidate pairs that pass the can_collide predicate. - // Results are stored in thread-local storage for later merging. +#ifdef __APPLE__ + // SIMD Traversal + // Traverses 4 queries simultaneously using SIMD. template - void independent_traversal( - const std::vector& source, - const std::vector& target, + void traverse_lbvh_simd( + const LBVH::Node* queries, + const size_t n_queries, + const std::vector& lbvh, const std::function& can_collide, - tbb::enumerable_thread_specific>& storage) + std::vector& candidates) { - // Calculate the offset to the first leaf node in the source BVH. - const size_t SOURCE_LEAF_OFFSET = source.size() / 2; - const size_t N_SOURCE_LEAVES = SOURCE_LEAF_OFFSET + 1; + assert(n_queries >= 1 && n_queries <= 4); + // Load 4 queries into single registers (Structure of Arrays) + auto make_simd = [&](auto F) -> simd_float4 { + return simd_float4 { + F(0), + n_queries > 1 ? F(1) : 0.0f, + n_queries > 2 ? F(2) : 0.0f, + n_queries > 3 ? F(3) : 0.0f, + }; + }; - tbb::parallel_for( - tbb::blocked_range(size_t(0), N_SOURCE_LEAVES), - [&](const tbb::blocked_range& r) { - auto& local_candidates = storage.local(); + const simd_float4 q_min_x = + make_simd([&](int k) { return queries[k].aabb_min.x(); }); + const simd_float4 q_min_y = + make_simd([&](int k) { return queries[k].aabb_min.y(); }); + const simd_float4 q_min_z = + make_simd([&](int k) { return queries[k].aabb_min.z(); }); + const simd_float4 q_max_x = + make_simd([&](int k) { return queries[k].aabb_max.x(); }); + const simd_float4 q_max_y = + make_simd([&](int k) { return queries[k].aabb_max.y(); }); + const simd_float4 q_max_z = + make_simd([&](int k) { return queries[k].aabb_max.z(); }); - for (size_t i = r.begin(); i < r.end(); ++i) { - const auto& query_node = source[SOURCE_LEAF_OFFSET + i]; - traverse_lbvh( - query_node, target, can_collide, local_candidates); - } - }); - } + // Use a fixed-size array as a stack to avoid dynamic allocations + constexpr int MAX_STACK_SIZE = 64; + int stack[MAX_STACK_SIZE]; + int stack_ptr = 0; + stack[stack_ptr++] = LBVH::Node::INVALID_POINTER; - // Parallel traversal of two BVHs using TBB task_group. - // Recursively explores all pairs of nodes whose bounding boxes intersect, - // adding candidate pairs when both nodes are leaves. - template - void traverse_lbvh( - const std::vector& source, - const std::vector& target, - int source_idx, - int target_idx, - const std::function& can_collide, - tbb::task_group& g, - tbb::enumerable_thread_specific>& storage) - { - // 1. Check for bounding box intersection - if (!source[source_idx].intersects(target[target_idx])) { - return; - } + int node_idx = 0; // root + do { + const LBVH::Node& node = lbvh[node_idx]; - // 2. Handle leaf nodes (base case) - if (source[source_idx].is_leaf() && target[target_idx].is_leaf()) { - attempt_add_candidate( - source[source_idx], target[target_idx], can_collide, - storage.local()); - return; - } + // Check left and right are valid pointers + assert(node.is_inner()); - // 3. Handle mixed or internal nodes (parallel recursion) +#if defined(__GNUC__) || defined(__clang__) + // Prefetch child nodes to reduce cache misses + __builtin_prefetch(&lbvh[node.left], 0, 1); + __builtin_prefetch(&lbvh[node.right], 0, 1); +#endif - // TBB's task_group provides an easy way to offload tasks. - const auto dispatch = [&](int source_idx, int target_idx) { - g.run([&, source_idx, target_idx] { - traverse_lbvh( - source, target, source_idx, target_idx, can_collide, g, - storage); - }); - }; + const LBVH::Node& child_l = lbvh[node.left]; + const LBVH::Node& child_r = lbvh[node.right]; - if (source[source_idx].is_leaf()) { - dispatch(source_idx, target[target_idx].left); - dispatch(source_idx, target[target_idx].right); - } else if (target[target_idx].is_leaf()) { - dispatch(source[source_idx].left, target_idx); - dispatch(source[source_idx].right, target_idx); - } else { - // Both internal nodes, test all four combinations. - dispatch(source[source_idx].left, target[target_idx].left); - dispatch(source[source_idx].left, target[target_idx].right); - dispatch(source[source_idx].right, target[target_idx].left); - dispatch(source[source_idx].right, target[target_idx].right); - } + // 1. Intersect 4 queries at once + // (child_l.min <= query.max) && (query.min <= child_l.max) + const simd_int4 intersects_l = (child_l.aabb_min.x() <= q_max_x) + & (child_l.aabb_min.y() <= q_max_y) + & (child_l.aabb_min.z() <= q_max_z) + & (q_min_x <= child_l.aabb_max.x()) + & (q_min_y <= child_l.aabb_max.y()) + & (q_min_z <= child_l.aabb_max.z()); + + // 2. Intersect 4 queries at once + // (child_r.min <= query.max) && (query.min <= child_r.max) + const simd_int4 intersects_r = (child_r.aabb_min.x() <= q_max_x) + & (child_r.aabb_min.y() <= q_max_y) + & (child_r.aabb_min.z() <= q_max_z) + & (q_min_x <= child_r.aabb_max.x()) + & (q_min_y <= child_r.aabb_max.y()) + & (q_min_z <= child_r.aabb_max.z()); + + const bool any_intersects_l = simd_any(intersects_l); + const bool any_intersects_r = simd_any(intersects_r); + + // Query overlaps a leaf node => report collision + if (any_intersects_l && child_l.is_leaf()) { + for (int k = 0; k < n_queries; ++k) { + if (intersects_l[k]) { + attempt_add_candidate< + Candidate, swap_order, triangular>( + queries[k], child_l, can_collide, candidates); + } + } + } + if (any_intersects_r && child_r.is_leaf()) { + for (int k = 0; k < n_queries; ++k) { + if (intersects_r[k]) { + attempt_add_candidate< + Candidate, swap_order, triangular>( + queries[k], child_r, can_collide, candidates); + } + } + } + + // Query overlaps an internal node => traverse. + bool traverse_l = (any_intersects_l && !child_l.is_leaf()); + bool traverse_r = (any_intersects_r && !child_r.is_leaf()); + + if (!traverse_l && !traverse_r) { + assert(stack_ptr > 0); + node_idx = stack[--stack_ptr]; + } else { + node_idx = traverse_l ? node.left : node.right; + if (traverse_l && traverse_r) { + // Postpone traversal of the right child + assert(stack_ptr < MAX_STACK_SIZE); + stack[stack_ptr++] = node.right; + } + } + } while (node_idx != LBVH::Node::INVALID_POINTER); // Same as root } +#endif - template - void simultaneous_traversal( + template < + typename Candidate, + bool swap_order, + bool triangular, + bool use_simd = true> + void independent_traversal( const std::vector& source, const std::vector& target, const std::function& can_collide, tbb::enumerable_thread_specific>& storage) { - // tbb::task_group g; // TBB task group to manage parallel work - - // g.run_and_wait([&] { - // traverse_lbvh( - // source, target, /*source_root*/ 0, /*target_root*/ 0, - // can_collide, g, storage); - // }); - - tbb::concurrent_queue> query_queue; - query_queue.push({ 0, 0 }); // root vs root - - while (!query_queue.empty()) { - tbb::parallel_for( - tbb::blocked_range( - size_t(0), query_queue.unsafe_size()), - [&](const tbb::blocked_range& r) { - for (size_t i = r.begin(); i < r.end(); ++i) { - std::pair indices; - while (!query_queue.try_pop(indices)) { } - - const auto& [source_idx, target_idx] = indices; - - // 1. Check for bounding box intersection - if (!source[source_idx].intersects( - target[target_idx])) { - continue; - } +#ifdef __APPLE__ // Only support SIMD on Apple platforms for now + constexpr size_t SIMD_SIZE = use_simd ? 4 : 1; + constexpr size_t GRAIN_SIZE = use_simd ? 64 : 1; +#else + constexpr size_t SIMD_SIZE = 1; + constexpr size_t GRAIN_SIZE = 1; +#endif - // 2. Handle leaf nodes (base case) - if (source[source_idx].is_leaf() - && target[target_idx].is_leaf()) { - attempt_add_candidate< - Candidate, swap_order, triangular>( - source[source_idx], target[target_idx], - can_collide, storage.local()); - continue; - } + // Calculate the offset to the first leaf node in the source BVH. + const size_t SOURCE_LEAF_OFFSET = source.size() / 2; + const size_t N_SOURCE_LEAVES = SOURCE_LEAF_OFFSET + 1; - // 3. Handle mixed or internal nodes - if (source[source_idx].is_leaf()) { - query_queue.push( - { source_idx, target[target_idx].left }); - query_queue.push( - { source_idx, target[target_idx].right }); - } else if (target[target_idx].is_leaf()) { - query_queue.push( - { source[source_idx].left, target_idx }); - query_queue.push( - { source[source_idx].right, target_idx }); - } else { - // Both internal nodes, test all four - // combinations. - query_queue.push( - { source[source_idx].left, - target[target_idx].left }); - query_queue.push( - { source[source_idx].left, - target[target_idx].right }); - query_queue.push( - { source[source_idx].right, - target[target_idx].left }); - query_queue.push( - { source[source_idx].right, - target[target_idx].right }); - } + tbb::parallel_for( + tbb::blocked_range(size_t(0), N_SOURCE_LEAVES, GRAIN_SIZE), + [&](const tbb::blocked_range& r) { + auto& local_candidates = storage.local(); + for (size_t i = r.begin(); i < r.end(); i += SIMD_SIZE) { +#ifdef __APPLE__ + if constexpr (use_simd) { + traverse_lbvh_simd( + &source[SOURCE_LEAF_OFFSET + i], + std::min(SIMD_SIZE, r.end() - i), target, + can_collide, local_candidates); + } else { +#endif + traverse_lbvh( + source[SOURCE_LEAF_OFFSET + i], target, can_collide, + local_candidates); +#ifdef __APPLE__ } - }); - } +#endif + } + }); } } // namespace @@ -607,12 +608,14 @@ void LBVH::detect_candidates( const std::function& can_collide, std::vector& candidates) { + if (source.empty() || target.empty()) { + return; + } + tbb::enumerable_thread_specific> storage; independent_traversal( source, target, can_collide, storage); - // simultaneous_traversal( - // source, target, can_collide, storage); merge_thread_local_vectors(storage, candidates); } @@ -620,7 +623,7 @@ void LBVH::detect_candidates( void LBVH::detect_vertex_vertex_candidates( std::vector& candidates) const { - if (vertex_boxes.empty()) { + if (!has_vertices()) { return; } @@ -632,7 +635,7 @@ void LBVH::detect_vertex_vertex_candidates( void LBVH::detect_edge_vertex_candidates( std::vector& candidates) const { - if (edge_boxes.empty() || vertex_boxes.empty()) { + if (!has_edges() || !has_vertices()) { return; } @@ -648,7 +651,7 @@ void LBVH::detect_edge_vertex_candidates( void LBVH::detect_edge_edge_candidates( std::vector& candidates) const { - if (edge_boxes.empty()) { + if (!has_edges()) { return; } @@ -662,7 +665,7 @@ void LBVH::detect_edge_edge_candidates( void LBVH::detect_face_vertex_candidates( std::vector& candidates) const { - if (face_boxes.empty() || vertex_boxes.empty()) { + if (!has_faces() || !has_vertices()) { return; } @@ -677,7 +680,7 @@ void LBVH::detect_face_vertex_candidates( void LBVH::detect_edge_face_candidates( std::vector& candidates) const { - if (edge_boxes.empty() || face_boxes.empty()) { + if (!has_edges() || !has_faces()) { return; } @@ -692,7 +695,7 @@ void LBVH::detect_edge_face_candidates( void LBVH::detect_face_face_candidates( std::vector& candidates) const { - if (face_boxes.empty()) { + if (!has_faces()) { return; } @@ -701,4 +704,81 @@ void LBVH::detect_face_face_candidates( face_bvh, std::bind(&LBVH::can_faces_collide, this, _1, _2), candidates); } + +// ============================================================================ + +bool LBVH::can_edge_vertex_collide(size_t ei, size_t vi) const +{ + assert(ei < edge_vertex_ids.size()); + const auto& [e0i, e1i] = edge_vertex_ids[ei]; + + return vi != e0i && vi != e1i + && (can_vertices_collide(vi, e0i) || can_vertices_collide(vi, e1i)); +} + +bool LBVH::can_edges_collide(size_t eai, size_t ebi) const +{ + assert(eai < edge_vertex_ids.size()); + const auto& [ea0i, ea1i] = edge_vertex_ids[eai]; + assert(ebi < edge_vertex_ids.size()); + const auto& [eb0i, eb1i] = edge_vertex_ids[ebi]; + + const bool share_endpoint = + ea0i == eb0i || ea0i == eb1i || ea1i == eb0i || ea1i == eb1i; + + return !share_endpoint + && (can_vertices_collide(ea0i, eb0i) || can_vertices_collide(ea0i, eb1i) + || can_vertices_collide(ea1i, eb0i) + || can_vertices_collide(ea1i, eb1i)); +} + +bool LBVH::can_face_vertex_collide(size_t fi, size_t vi) const +{ + assert(fi < face_vertex_ids.size()); + const auto& [f0i, f1i, f2i] = face_vertex_ids[fi]; + + return vi != f0i && vi != f1i && vi != f2i + && (can_vertices_collide(vi, f0i) || can_vertices_collide(vi, f1i) + || can_vertices_collide(vi, f2i)); +} + +bool LBVH::can_edge_face_collide(size_t ei, size_t fi) const +{ + assert(ei < edge_vertex_ids.size()); + const auto& [e0i, e1i] = edge_vertex_ids[ei]; + assert(fi < face_vertex_ids.size()); + const auto& [f0i, f1i, f2i] = face_vertex_ids[fi]; + + const bool share_endpoint = e0i == f0i || e0i == f1i || e0i == f2i + || e1i == f0i || e1i == f1i || e1i == f2i; + + return !share_endpoint + && (can_vertices_collide(e0i, f0i) || can_vertices_collide(e0i, f1i) + || can_vertices_collide(e0i, f2i) || can_vertices_collide(e1i, f0i) + || can_vertices_collide(e1i, f1i) + || can_vertices_collide(e1i, f2i)); +} + +bool LBVH::can_faces_collide(size_t fai, size_t fbi) const +{ + assert(fai < face_vertex_ids.size()); + const auto& [fa0i, fa1i, fa2i] = face_vertex_ids[fai]; + assert(fbi < face_vertex_ids.size()); + const auto& [fb0i, fb1i, fb2i] = face_vertex_ids[fbi]; + + const bool share_endpoint = fa0i == fb0i || fa0i == fb1i || fa0i == fb2i + || fa1i == fb0i || fa1i == fb1i || fa1i == fb2i || fa2i == fb0i + || fa2i == fb1i || fa2i == fb2i; + + return !share_endpoint + && (can_vertices_collide(fa0i, fb0i) || can_vertices_collide(fa0i, fb1i) + || can_vertices_collide(fa0i, fb2i) + || can_vertices_collide(fa1i, fb0i) + || can_vertices_collide(fa1i, fb1i) + || can_vertices_collide(fa1i, fb2i) + || can_vertices_collide(fa2i, fb0i) + || can_vertices_collide(fa2i, fb1i) + || can_vertices_collide(fa2i, fb2i)); +} + } // namespace ipc \ No newline at end of file diff --git a/src/ipc/broad_phase/lbvh.hpp b/src/ipc/broad_phase/lbvh.hpp index bf18bc445..509374f40 100644 --- a/src/ipc/broad_phase/lbvh.hpp +++ b/src/ipc/broad_phase/lbvh.hpp @@ -14,50 +14,52 @@ class LBVH : public BroadPhase { struct Node { static constexpr int32_t INVALID_POINTER = 0x0; // do not change - // true to use absolute pointers (left/right child pointer is the - // absolute index of the child in the buffer/array) or false for - // relative pointers (left/right child pointer is the relative pointer - // from the parent index to the child index in the buffer, i.e. absolute - // child pointer = absolute parent pointer + relative child pointer) - static constexpr bool ABSOLUTE_POINTERS = true; - - // helper function to handle relative pointers on - // CPU side, i.e. convert them to absolute - // pointers for array indexing - static constexpr uint32_t pointer(uint32_t index, uint32_t pointer) - { - return ABSOLUTE_POINTERS ? pointer : index + pointer; - } - /// @brief The min corner of the AABB - ArrayMax3d aabb_min; + Eigen::Array3f aabb_min; /// @brief The max corner of the AABB - ArrayMax3d aabb_max; - /// @brief The vertex ids (v0, v1, v2) - std::array vertex_ids = { { -1, -1, -1 } }; - /// @brief Pointer to the left child or INVALID_POINTER in case of leaf - int left = -1; - /// @brief Pointer to the right child or INVALID_POINTER in case of leaf - int right = -1; - /// @brief The primitive id (INVALID_ID <=> inner node) - index_t primitive_id = -1; - - bool is_leaf() const - { - assert(is_valid()); - return left == INVALID_POINTER && right == INVALID_POINTER; - } - - bool is_inner() const - { - return left != INVALID_POINTER && right != INVALID_POINTER; - } - + Eigen::Array3f aabb_max; + + // Union to overlap Leaf data and Internal Node data. + // This compresses the Node size to 64 bytes (1 cache line), + // reducing cache misses during traversal. + union { + struct { + /// @brief Pointer to the left child or INVALID_POINTER in case of leaf + int left; + /// @brief Pointer to the right child or INVALID_POINTER in case of leaf + int right; + }; + + struct { + /// @brief The primitive id (INVALID_ID <=> inner node) + /// @note We use this to distinguish leaves from internal nodes to safely access the union. + int primitive_id; + + /// @brief Marker to indicate this is an inner node + /// If is_inner == 0 then right = 0 which is INVALID_POINTER + /// If is_inner != 0 then right = actual right pointer + int is_inner_marker; + }; + }; + + /// @brief Default constructor + Node(); + + /// @brief Check if this node is an inner node. + bool is_inner() const { return is_inner_marker; } + + /// @brief Check if this node is a leaf node. + bool is_leaf() const { return !is_inner(); } + + /// @brief Check if this node is valid. bool is_valid() const { - return !((left == INVALID_POINTER) ^ (right == INVALID_POINTER)); + return is_inner() + ? (left != INVALID_POINTER && right != INVALID_POINTER) + : primitive_id >= 0; } + /// @brief Check if this node's AABB intersects with another node's AABB. bool intersects(const Node& other) const { return (aabb_min <= other.aabb_max).all() @@ -167,16 +169,38 @@ class LBVH : public BroadPhase { source_and_target, source_and_target, can_collide, candidates); } + // ------------------------------------------------------------------------- + // BroadPhase::*_boxes are cleared, so we have to override these methods to + // not depend on them. + bool has_vertices() const { return !vertex_bvh.empty(); } + bool has_edges() const { return !edge_bvh.empty(); } + bool has_faces() const { return !face_bvh.empty(); } + bool can_edge_vertex_collide(size_t ei, size_t vi) const override; + bool can_edges_collide(size_t eai, size_t ebi) const override; + bool can_face_vertex_collide(size_t fi, size_t vi) const override; + bool can_edge_face_collide(size_t ei, size_t fi) const override; + bool can_faces_collide(size_t fai, size_t fbi) const override; + // ------------------------------------------------------------------------- + /// @brief BVH containing the vertices. std::vector vertex_bvh; /// @brief BVH containing the edges. std::vector edge_bvh; /// @brief BVH containing the faces. std::vector face_bvh; + + /// @brief Edge vertices in the original mesh order. + std::vector> edge_vertex_ids; + /// @brief Face vertices in the original mesh order. + std::vector> face_vertex_ids; + + /// @brief Dimension of the simulation for which the broad phase was built. + int dim; + /// @brief The axis-aligned bounding box of the entire mesh. struct { - ArrayMax3d min; - ArrayMax3d max; + Eigen::Array3d min; + Eigen::Array3d max; } mesh_aabb; }; diff --git a/src/ipc/math/CMakeLists.txt b/src/ipc/math/CMakeLists.txt index f94256b8e..b08bec9c8 100644 --- a/src/ipc/math/CMakeLists.txt +++ b/src/ipc/math/CMakeLists.txt @@ -4,6 +4,7 @@ set(SOURCES math.cpp math.hpp math.tpp + morton.hpp ) target_sources(ipc_toolkit PRIVATE ${SOURCES}) \ No newline at end of file diff --git a/src/ipc/math/morton.hpp b/src/ipc/math/morton.hpp new file mode 100644 index 000000000..fd3ac413c --- /dev/null +++ b/src/ipc/math/morton.hpp @@ -0,0 +1,65 @@ +#pragma once + +#include // for std::clamp +#include // for uint64_t + +namespace ipc { + +/// @brief Expands a 32-bit integer into 64 bits by inserting 1 zero after each bit. +/// @param v The 32-bit integer to expand. +/// @return The expanded 64-bit integer. +inline uint64_t expand_bits_1(uint64_t v) +{ + v = (v | (v << 16)) & 0x0000FFFF0000FFFF; + v = (v | (v << 8)) & 0x00FF00FF00FF00FF; + v = (v | (v << 4)) & 0x0F0F0F0F0F0F0F0F; + v = (v | (v << 2)) & 0x3333333333333333; + v = (v | (v << 1)) & 0x5555555555555555; + return v; +} + +/// @brief Expands a 21-bit integer into 63 bits by inserting 2 zeros after each bit. +/// @param v The 21-bit integer to expand. +/// @return The expanded 63-bit integer. +inline uint64_t expand_bits_2(uint64_t v) +{ + v = (v | v << 32) & 0x1F00000000FFFF; + v = (v | v << 16) & 0x1F0000FF0000FF; + v = (v | v << 8) & 0x100F00F00F00F00F; + v = (v | v << 4) & 0x10C30C30C30C30C3; + v = (v | v << 2) & 0x1249249249249249; + return v; +} + +/// @brief Calculates a 64-bit Morton code for the given 2D point located within the unit square [0,1]. +/// @param x The x-coordinate of the point. +/// @param y The y-coordinate of the point. +/// @return The 64-bit Morton code. +inline uint64_t morton_2D(double x, double y) +{ + constexpr double scale = 1ul << 32; + x = std::clamp(x * scale, 0.0, scale - 1); + y = std::clamp(y * scale, 0.0, scale - 1); + uint64_t xx = expand_bits_1(uint64_t(x)); + uint64_t yy = expand_bits_1(uint64_t(y)); + return (xx << 1) | yy; +} + +/// @brief Calculates a 63-bit Morton code for the given 3D point located within the unit cube [0,1]. +/// @param x The x-coordinate of the point. +/// @param y The y-coordinate of the point. +/// @param z The z-coordinate of the point. +/// @return The 63-bit Morton code. +inline uint64_t morton_3D(double x, double y, double z) +{ + constexpr double scale = 1 << 21; + x = std::clamp(x * scale, 0.0, scale - 1); + y = std::clamp(y * scale, 0.0, scale - 1); + z = std::clamp(z * scale, 0.0, scale - 1); + uint64_t xx = expand_bits_2(uint64_t(x)); + uint64_t yy = expand_bits_2(uint64_t(y)); + uint64_t zz = expand_bits_2(uint64_t(z)); + return (xx << 2) | (yy << 1) | zz; +} + +} // namespace ipc \ No newline at end of file diff --git a/src/ipc/utils/eigen_ext.hpp b/src/ipc/utils/eigen_ext.hpp index 943f8ff8e..ffdda82eb 100644 --- a/src/ipc/utils/eigen_ext.hpp +++ b/src/ipc/utils/eigen_ext.hpp @@ -217,10 +217,10 @@ project_to_psd( /// @brief Convert a 2D or 3D vector to a 3D vector. /// @param v Vector to convert, can be 2D or 3D. /// @return Converted 3D vector. If 2D, the z-component is set to 0. -inline Eigen::Vector3d to_3D(Eigen::ConstRef v) +inline Eigen::Array3d to_3D(Eigen::ConstRef v) { assert(v.size() == 2 || v.size() == 3); - return v.size() == 2 ? Eigen::Vector3d(v.x(), v.y(), 0) : v.head<3>(); + return v.size() == 2 ? Eigen::Array3d(v.x(), v.y(), 0) : v.head<3>(); } // TODO: Change return type to Eigen::MatrixX3f diff --git a/tests/src/tests/broad_phase/test_lbvh.cpp b/tests/src/tests/broad_phase/test_lbvh.cpp index bb1cf01b8..65c238c2e 100644 --- a/tests/src/tests/broad_phase/test_lbvh.cpp +++ b/tests/src/tests/broad_phase/test_lbvh.cpp @@ -8,6 +8,8 @@ // #include #include +#include + #include #include @@ -21,11 +23,11 @@ namespace { bool is_aabb_union(LBVH::Node parent, LBVH::Node childA, LBVH::Node childB) { AABB children; - children.min = childA.aabb_min.min(childB.aabb_min); - children.max = childA.aabb_max.max(childB.aabb_max); + children.min = childA.aabb_min.min(childB.aabb_min).cast(); + children.max = childA.aabb_max.max(childB.aabb_max).cast(); constexpr float EPS = 1e-4f; - return (abs(parent.aabb_max - children.max) < EPS).all() - && (abs(parent.aabb_min - children.min) < EPS).all(); + return (abs(parent.aabb_max.cast() - children.max) < EPS).all() + && (abs(parent.aabb_min.cast() - children.min) < EPS).all(); } void traverse_lbvh( @@ -45,25 +47,22 @@ void traverse_lbvh( CHECK(!visited[index]); visited[index] = true; - uint32_t left_child_index = LBVH::Node::pointer(index, node.left); - uint32_t right_child_index = LBVH::Node::pointer(index, node.right); - // verify aabbs - LBVH::Node childA = lbvh_nodes[left_child_index]; - LBVH::Node childB = lbvh_nodes[right_child_index]; + LBVH::Node childA = lbvh_nodes[node.left]; + LBVH::Node childB = lbvh_nodes[node.right]; { CAPTURE( - index, left_child_index, right_child_index, - node.aabb_min.transpose(), childA.aabb_min.transpose(), - childB.aabb_min.transpose(), node.aabb_max.transpose(), - childA.aabb_max.transpose(), childB.aabb_max.transpose()); + index, node.left, node.right, node.aabb_min.transpose(), + childA.aabb_min.transpose(), childB.aabb_min.transpose(), + node.aabb_max.transpose(), childA.aabb_max.transpose(), + childB.aabb_max.transpose()); CHECK(is_aabb_union(node, childA, childB)); } // continue traversal - traverse_lbvh(lbvh_nodes, left_child_index, visited); - traverse_lbvh(lbvh_nodes, right_child_index, visited); + traverse_lbvh(lbvh_nodes, node.left, visited); + traverse_lbvh(lbvh_nodes, node.right, visited); } } @@ -126,6 +125,38 @@ TEST_CASE("LBVH::build", "[broad_phase][lbvh]") CHECK(lbvh->face_nodes().empty()); } +namespace { + +/// @brief Checks if every candidate in the expected vector is present in the actual vector. +/// @tparam Candidate The type of the candidate (e.g., VertexVertexCandidate, EdgeEdgeCandidate). +/// @param actual The vector of candidates found by the algorithm. +/// @param expected The vector of candidates that are expected to be found. +/// @return true If all candidates in `expected` are found in `actual`. +/// @return false Otherwise. +template +bool contains_all_candidates( + std::vector actual, std::vector expected) +{ + // 1. Sort the actual candidates to prepare for set operations + tbb::parallel_sort(actual.begin(), actual.end()); + // Ensure 'actual' has no duplicates to treat it as a mathematical set + REQUIRE(std::unique(actual.begin(), actual.end()) == actual.end()); + + // 2. Sort the expected candidates + tbb::parallel_sort(expected.begin(), expected.end()); + // Ensure 'expected' has no duplicates to treat it as a mathematical set + REQUIRE(std::unique(expected.begin(), expected.end()) == expected.end()); + + // 3. Check if 'expected' is a subset of 'actual' + // std::includes requires both ranges to be sorted. + // It returns true if every element in the second range is found in the + // first range. + return std::includes( + actual.begin(), actual.end(), expected.begin(), expected.end()); +} + +} // namespace + TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") { constexpr double inflation_radius = 0; @@ -141,6 +172,28 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") mesh_t0 = "cloth_ball92.ply"; mesh_t1 = "cloth_ball93.ply"; } +#ifdef NDEBUG + SECTION("Armadillo-Rollers") + { + mesh_t0 = "armadillo-rollers/326.ply"; + mesh_t1 = "armadillo-rollers/327.ply"; + } + SECTION("Cloth-Funnel") + { + mesh_t0 = "cloth-funnel/227.ply"; + mesh_t1 = "cloth-funnel/228.ply"; + } + SECTION("N-Body-Simulation") + { + mesh_t0 = "n-body-simulation/balls16_18.ply"; + mesh_t1 = "n-body-simulation/balls16_19.ply"; + } + SECTION("Rod-Twist") + { + mesh_t0 = "rod-twist/3036.ply"; + mesh_t1 = "rod-twist/3037.ply"; + } +#endif // SECTION("Puffer-Ball") // { // mesh_t0 = "puffer-ball/20.ply"; @@ -166,8 +219,8 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") std::vector expected_vv_candidates; bvh->detect_vertex_vertex_candidates(expected_vv_candidates); - CHECK(vv_candidates.size() == expected_vv_candidates.size()); - // TODO: Check the candidates are the same + CHECK(vv_candidates.size() >= expected_vv_candidates.size()); + contains_all_candidates(vv_candidates, expected_vv_candidates); } { @@ -177,8 +230,8 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") std::vector expected_ev_candidates; bvh->detect_edge_vertex_candidates(expected_ev_candidates); - CHECK(ev_candidates.size() == expected_ev_candidates.size()); - // TODO: Check the candidates are the same + CHECK(ev_candidates.size() >= expected_ev_candidates.size()); + contains_all_candidates(ev_candidates, expected_ev_candidates); } { @@ -188,8 +241,8 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") std::vector expected_ee_candidates; bvh->detect_edge_edge_candidates(expected_ee_candidates); - CHECK(ee_candidates.size() == expected_ee_candidates.size()); - // TODO: Check the candidates are the same + CHECK(ee_candidates.size() >= expected_ee_candidates.size()); + contains_all_candidates(ee_candidates, expected_ee_candidates); } { @@ -199,8 +252,8 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") std::vector expected_fv_candidates; bvh->detect_face_vertex_candidates(expected_fv_candidates); - CHECK(fv_candidates.size() == expected_fv_candidates.size()); - // TODO: Check the candidates are the same + CHECK(fv_candidates.size() >= expected_fv_candidates.size()); + contains_all_candidates(fv_candidates, expected_fv_candidates); } { @@ -210,8 +263,8 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") std::vector expected_ef_candidates; bvh->detect_edge_face_candidates(expected_ef_candidates); - CHECK(ef_candidates.size() == expected_ef_candidates.size()); - // TODO: Check the candidates are the same + CHECK(ef_candidates.size() >= expected_ef_candidates.size()); + contains_all_candidates(ef_candidates, expected_ef_candidates); } { @@ -221,8 +274,8 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") std::vector expected_ff_candidates; bvh->detect_face_face_candidates(expected_ff_candidates); - CHECK(ff_candidates.size() == expected_ff_candidates.size()); - // TODO: Check the candidates are the same + CHECK(ff_candidates.size() >= expected_ff_candidates.size()); + contains_all_candidates(ff_candidates, expected_ff_candidates); } #ifdef IPC_TOOLKIT_WITH_PROFILER From d9ae2da0a5aea5ee479b25777fe709aec4a1e803 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 19 Jan 2026 12:08:58 -0500 Subject: [PATCH 11/25] Fix test LBVH::build --- tests/src/tests/broad_phase/test_lbvh.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/tests/broad_phase/test_lbvh.cpp b/tests/src/tests/broad_phase/test_lbvh.cpp index 65c238c2e..8b0633c23 100644 --- a/tests/src/tests/broad_phase/test_lbvh.cpp +++ b/tests/src/tests/broad_phase/test_lbvh.cpp @@ -38,7 +38,7 @@ void traverse_lbvh( const LBVH::Node& node = lbvh_nodes[index]; CHECK(node.is_valid()); - if (node.left == LBVH::Node::INVALID_POINTER) { + if (node.is_leaf()) { // leaf CHECK(!visited[index]); visited[index] = true; From 4b173cf9e95fd1b6cb87b06571aef537befeedeb Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 19 Jan 2026 13:38:27 -0500 Subject: [PATCH 12/25] Fix LBVH in 2D --- src/ipc/broad_phase/lbvh.cpp | 9 +++++---- src/ipc/broad_phase/lbvh.hpp | 6 +++--- tests/src/tests/main.cpp | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index fc4da5908..88aa0f7f9 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -9,7 +9,7 @@ #include #include -#if defined(__APPLE__) +#ifdef __APPLE__ // We utilize SIMD registers to compare 1 Node against 4 Queries simultaneously. #include #endif @@ -37,11 +37,11 @@ namespace { } } // namespace -LBVH::LBVH() : BroadPhase() { } +LBVH::LBVH() : BroadPhase(), dim(0) { } LBVH::~LBVH() = default; -// Initialize defaults +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) LBVH::Node::Node() : primitive_id(INVALID_ID), is_inner_marker(0) { static_assert( @@ -214,7 +214,8 @@ void LBVH::init_bvh( for (size_t i = r.begin(); i < r.end(); i++) { const auto& box = boxes[i]; - const Eigen::Array3d center = 0.5 * (box.min + box.max); + const Eigen::Array3d center = + to_3D(0.5 * (box.min + box.max)); const Eigen::Array3d mapped_center = (center - mesh_aabb.min) / mesh_width; diff --git a/src/ipc/broad_phase/lbvh.hpp b/src/ipc/broad_phase/lbvh.hpp index 509374f40..d88ba2f0e 100644 --- a/src/ipc/broad_phase/lbvh.hpp +++ b/src/ipc/broad_phase/lbvh.hpp @@ -194,14 +194,14 @@ class LBVH : public BroadPhase { /// @brief Face vertices in the original mesh order. std::vector> face_vertex_ids; - /// @brief Dimension of the simulation for which the broad phase was built. - int dim; - /// @brief The axis-aligned bounding box of the entire mesh. struct { Eigen::Array3d min; Eigen::Array3d max; } mesh_aabb; + + /// @brief Dimension of the simulation for which the broad phase was built. + uint8_t dim; }; } // namespace ipc \ No newline at end of file diff --git a/tests/src/tests/main.cpp b/tests/src/tests/main.cpp index 19e734c78..5d9c0a594 100644 --- a/tests/src/tests/main.cpp +++ b/tests/src/tests/main.cpp @@ -43,7 +43,7 @@ int main(int argc, char** argv) using namespace Catch::Clara; auto cli = session.cli(); - int log_level = spdlog::level::warn; + int log_level = spdlog::level::err; cli |= Opt([&log_level](int const d) { return parse_log_level(d, log_level); }, "log_level")["--log"]["--logger-level"]( From 0cf9fea21c814ace2ab703c2e8c708c037c8dec1 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 19 Jan 2026 22:11:30 -0500 Subject: [PATCH 13/25] Enhance profiler functionality with CSV output and improve code structure --- CMakeLists.txt | 18 ++++----- python/examples/lbvh_profile.csv | 12 ++++++ python/examples/profiler.py | 41 +++++++++++++++++++ src/ipc/utils/profiler.cpp | 69 ++++++++++++++++++++++++++++---- src/ipc/utils/profiler.hpp | 69 ++++++++++++-------------------- 5 files changed, 149 insertions(+), 60 deletions(-) create mode 100644 python/examples/lbvh_profile.csv create mode 100644 python/examples/profiler.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 920dcd244..c45319432 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,17 +74,17 @@ else() set(IPC_TOOLKIT_BUILD_PYTHON OFF CACHE BOOL "Build Python bindings" FORCE) endif() -option(IPC_TOOLKIT_WITH_CUDA "Enable CUDA CCD" OFF) -option(IPC_TOOLKIT_WITH_RATIONAL_INTERSECTION "Use rational edge-triangle intersection check" OFF) -option(IPC_TOOLKIT_WITH_ROBIN_MAP "Use Tessil's robin-map rather than std maps" ON) -option(IPC_TOOLKIT_WITH_ABSEIL "Use Abseil's hash functions" ON) -option(IPC_TOOLKIT_WITH_FILIB "Use filib for interval arithmetic" ON) -option(IPC_TOOLKIT_WITH_INEXACT_CCD "Use the original inexact CCD method of IPC" OFF) -option(IPC_TOOLKIT_WITH_PROFILER "Enable performance profiler" ${IPC_TOOLKIT_TOPLEVEL_PROJECT}) +option(IPC_TOOLKIT_WITH_CUDA "Enable CUDA CCD" OFF) +option(IPC_TOOLKIT_WITH_RATIONAL_INTERSECTION "Use rational edge-triangle intersection check" OFF) +option(IPC_TOOLKIT_WITH_ROBIN_MAP "Use Tessil's robin-map rather than std maps" ON) +option(IPC_TOOLKIT_WITH_ABSEIL "Use Abseil's hash functions" ON) +option(IPC_TOOLKIT_WITH_FILIB "Use filib for interval arithmetic" ON) +option(IPC_TOOLKIT_WITH_INEXACT_CCD "Use the original inexact CCD method of IPC" OFF) +option(IPC_TOOLKIT_WITH_PROFILER "Enable performance profiler" OFF) # Advanced options -option(IPC_TOOLKIT_WITH_SIMD "Enable SIMD" OFF) -option(IPC_TOOLKIT_WITH_CODE_COVERAGE "Enable coverage reporting" OFF) +option(IPC_TOOLKIT_WITH_SIMD "Enable SIMD" OFF) +option(IPC_TOOLKIT_WITH_CODE_COVERAGE "Enable coverage reporting" OFF) mark_as_advanced(IPC_TOOLKIT_WITH_SIMD) # This does not work reliably mark_as_advanced(IPC_TOOLKIT_WITH_CODE_COVERAGE) # This is used in GitHub Actions diff --git a/python/examples/lbvh_profile.csv b/python/examples/lbvh_profile.csv new file mode 100644 index 000000000..d7322595c --- /dev/null +++ b/python/examples/lbvh_profile.csv @@ -0,0 +1,12 @@ +Id,Parent,Name,Time (ms),Count +7,,LBVH::detect_edge_edge_candidates,331.688,6 +8,,LBVH::detect_edge_face_candidates,209.872,6 +9,,LBVH::detect_edge_vertex_candidates,139.502,6 +10,,LBVH::detect_face_face_candidates,100.436,6 +11,,LBVH::detect_face_vertex_candidates,131.918,6 +12,,LBVH::detect_vertex_vertex_candidates,17.2223,6 +13,,LBVH::init_bvh,21.2826,18 +14,13,build_hierarchy,8.12017,18 +15,13,compute_morton_codes,1.76938,18 +16,13,populate_boxes,2.26558,18 +17,13,sort_morton_codes,5.25754,18 diff --git a/python/examples/profiler.py b/python/examples/profiler.py new file mode 100644 index 000000000..3ba9e157b --- /dev/null +++ b/python/examples/profiler.py @@ -0,0 +1,41 @@ +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import pathlib + +def plot(csv_path): + df = pd.read_csv(csv_path, na_filter=False) + df["Time (s)"] = df["Time (ms)"] / 1000.0 + # df["Parent"] = df["Parent"].astype(str) + # df["Id"] = df["Id"].astype(str) + + fig = px.sunburst( + df, + ids="Id", + names="Name", + parents="Parent", + values="Time (ms)", + branchvalues="total", + color="Parent", + ) + fig.update_traces( + hovertemplate="%{label}
Time (ms): %{value}
Call Count: %{customdata[0]}
Percentage of Parent: %{percentParent:.2%}", + customdata=df[["Count"]].values, + # tiling=dict( + # orientation='v' + # ) + ) + fig.update_layout( + title_x=0.5, + title_y=0.95, + margin=dict(t=0, l=0, r=0, b=0), + width=800, + height=800, + template="plotly_white", + ) + # fig.write_image(f"icicle_{pathlib.Path(csv_path).stem}.png", scale=2) + # fig.write_image(f"sunburst_{pathlib.Path(csv_path).stem}.png", scale=2) + fig.show() + return fig + +plot(pathlib.Path(__file__).parent / "lbvh_profile.csv") \ No newline at end of file diff --git a/src/ipc/utils/profiler.cpp b/src/ipc/utils/profiler.cpp index 986250e5f..12bbfaeb6 100644 --- a/src/ipc/utils/profiler.cpp +++ b/src/ipc/utils/profiler.cpp @@ -7,12 +7,61 @@ namespace ipc { +Profiler::Profiler() { } + Profiler& profiler() { static Profiler instance; return instance; } +void Profiler::clear() { m_data.clear(); } + +void Profiler::start(const std::string& name) +{ + current_scope.push_back(name); + if (!m_data.contains(current_scope)) { + m_data[current_scope] = { + { "time_ms", 0 }, + { "count", 0 }, + }; + } +} + +void Profiler::stop(const double time_ms) +{ + const static std::string log_fmt_text = fmt::format( + "[{}] {{}} {{:.6f}} ms", + fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing")); + + logger().trace( + fmt::runtime(log_fmt_text), current_scope.to_string(), time_ms); + + assert(m_data.contains(current_scope)); + assert(m_data.at(current_scope).contains("time_ms")); + assert(m_data.at(current_scope).contains("count")); + m_data[current_scope]["time_ms"] = + m_data[current_scope]["time_ms"].get() + time_ms; + m_data[current_scope]["count"] = + m_data[current_scope]["count"].get() + 1; + current_scope.pop_back(); +} + +void Profiler::reset() +{ + m_data.clear(); + // reset the calling thread's scope + current_scope = nlohmann::json::json_pointer(); // root +} + +void Profiler::print() const +{ + logger().info( + "[{}] profiler: {}", + fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing"), + m_data.dump(2)); +} + void Profiler::write_csv(const std::string& filename) const { std::ofstream file(filename); @@ -26,7 +75,7 @@ void Profiler::write_csv(const std::string& filename) const void Profiler::write_csv(std::ostream& os) const { - os << "Parent,Name,Time (ms),Count\n"; + os << "Id,Parent,Name,Time (ms),Count\n"; if (m_data.empty()) { os << std::flush; @@ -35,29 +84,33 @@ void Profiler::write_csv(std::ostream& os) const // Print the profiler data in CSV format using a breadth-first traversal const nlohmann::json::json_pointer root; - std::queue queue; - queue.push(root); + // parent id, pointer + std::queue> queue; + queue.push(std::make_pair(-1, root)); + int id = -1; while (!queue.empty()) { - const auto ptr = queue.front(); - const auto parent = ptr.parent_pointer(); + const auto [parent_id, ptr] = queue.front(); queue.pop(); assert(m_data.contains(ptr)); const auto& data = ptr == root ? m_data : m_data.at(ptr); if (ptr != root) { os << fmt::format( - "{},{},{:.6g},{:d}\n", parent == root ? "" : parent.back(), - ptr.back(), data.at("time_ms").get(), + "{:d},{},{},{:.6g},{:d}\n", id, + parent_id == -1 ? "" : std::to_string(parent_id), ptr.back(), + data.at("time_ms").get(), data.at("count").get()); } // Traverse child scopes for (const auto& [key, val] : data.items()) { if (val.is_object()) { - queue.push(ptr / key); + queue.push(std::make_pair(id, ptr / key)); } } + + ++id; } os << std::flush; diff --git a/src/ipc/utils/profiler.hpp b/src/ipc/utils/profiler.hpp index f64611569..73ff266c6 100644 --- a/src/ipc/utils/profiler.hpp +++ b/src/ipc/utils/profiler.hpp @@ -27,65 +27,48 @@ namespace ipc { class Profiler { public: - Profiler() = default; + Profiler(); // ~Profiler() { print(); } - void clear() { m_data.clear(); } + /// @brief Clear all profiling data. + void clear(); - void start(const std::string& name) - { - current_scope.push_back(name); - if (!m_data.contains(current_scope)) { - m_data[current_scope] = { - { "time_ms", 0 }, - { "count", 0 }, - }; - } - } + /// @brief Start timing a new scope. + /// @param name The name of the scope. + void start(const std::string& name); - void stop(const double time_ms) - { - const static std::string log_fmt_text = fmt::format( - "[{}] {{}} {{:.6f}} ms", - fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing")); - - logger().trace( - fmt::runtime(log_fmt_text), current_scope.to_string(), time_ms); - - assert(m_data.contains(current_scope)); - assert(m_data.at(current_scope).contains("time_ms")); - assert(m_data.at(current_scope).contains("count")); - m_data[current_scope]["time_ms"] = - m_data[current_scope]["time_ms"].get() + time_ms; - m_data[current_scope]["count"] = - m_data[current_scope]["count"].get() + 1; - current_scope.pop_back(); - } + /// @brief Stop timing the current scope and record the elapsed time. + /// @param time_ms The elapsed time in milliseconds. + void stop(const double time_ms); - void reset() - { - m_data.clear(); - current_scope = nlohmann::json::json_pointer(); // root - } + /// @brief Reset the profiler data and scopes. + void reset(); - void print() const - { - logger().info( - "[{}] profiler: {}", - fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing"), - m_data.dump(2)); - } + /// @brief Print the profiler data to the logger. + void print() const; + + /// @brief Write the profiler data to an output stream in CSV format. + void write_csv(std::ostream& os) const; + /// @brief Write the profiler data to a CSV file. void write_csv(const std::string& filename) const; + + /// @brief Print the profiler data to standard output in CSV format. void print_csv() const { write_csv(std::cout); } - void write_csv(std::ostream& os) const; + // Return a snapshot copy of the profiler data to avoid exposing internal + // structure that would require external synchronization. const nlohmann::json& data() const { return m_data; } + + // Non-const accessor that also returns a copy. nlohmann::json& data() { return m_data; } protected: + /// @brief The profiling data stored as a JSON object. nlohmann::json m_data; + + /// @brief The global scope pointer into the JSON data. nlohmann::json::json_pointer current_scope; }; From ad44e09fa47fe0ddd9d1533eef1a66fa0db15641 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 19 Jan 2026 23:45:40 -0500 Subject: [PATCH 14/25] Use 1ULL for bit-shift constants in Morton functions Updated bit-shift constants in morton_2D and morton_3D to use 1ULL instead of 1ul or 1, ensuring explicit 64-bit unsigned long long type. This improves type safety and portability across platforms with differing type sizes. --- src/ipc/math/morton.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ipc/math/morton.hpp b/src/ipc/math/morton.hpp index fd3ac413c..ccc201cda 100644 --- a/src/ipc/math/morton.hpp +++ b/src/ipc/math/morton.hpp @@ -37,7 +37,7 @@ inline uint64_t expand_bits_2(uint64_t v) /// @return The 64-bit Morton code. inline uint64_t morton_2D(double x, double y) { - constexpr double scale = 1ul << 32; + constexpr double scale = 1ULL << 32; x = std::clamp(x * scale, 0.0, scale - 1); y = std::clamp(y * scale, 0.0, scale - 1); uint64_t xx = expand_bits_1(uint64_t(x)); @@ -52,7 +52,7 @@ inline uint64_t morton_2D(double x, double y) /// @return The 63-bit Morton code. inline uint64_t morton_3D(double x, double y, double z) { - constexpr double scale = 1 << 21; + constexpr double scale = 1ULL << 21; x = std::clamp(x * scale, 0.0, scale - 1); y = std::clamp(y * scale, 0.0, scale - 1); z = std::clamp(z * scale, 0.0, scale - 1); From d1504772f202d77af5661294b6f288869d391bc8 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Tue, 20 Jan 2026 12:21:39 -0500 Subject: [PATCH 15/25] Resolve Copilot comments --- python/examples/profiler.py | 1 - src/ipc/broad_phase/lbvh.cpp | 2 +- src/ipc/broad_phase/lbvh.hpp | 10 +++++----- src/ipc/utils/profiler.hpp | 5 ++--- tests/src/tests/broad_phase/test_lbvh.cpp | 12 ++++++------ 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/python/examples/profiler.py b/python/examples/profiler.py index 3ba9e157b..6e853a55f 100644 --- a/python/examples/profiler.py +++ b/python/examples/profiler.py @@ -1,6 +1,5 @@ import pandas as pd import plotly.express as px -import plotly.graph_objects as go import pathlib def plot(csv_path): diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index 88aa0f7f9..06ff938e9 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -105,7 +105,7 @@ namespace { uint64_t code_i, int j) { - if (j < 0 || j > sorted_morton_codes.size() - 1) { + if (j < 0 || j >= sorted_morton_codes.size()) { return -1; } uint64_t code_j = sorted_morton_codes[j].morton_code; diff --git a/src/ipc/broad_phase/lbvh.hpp b/src/ipc/broad_phase/lbvh.hpp index d88ba2f0e..21fe2f928 100644 --- a/src/ipc/broad_phase/lbvh.hpp +++ b/src/ipc/broad_phase/lbvh.hpp @@ -20,25 +20,25 @@ class LBVH : public BroadPhase { Eigen::Array3f aabb_max; // Union to overlap Leaf data and Internal Node data. - // This compresses the Node size to 64 bytes (1 cache line), + // This compresses the Node size to 32 bytes (2 per cache line), // reducing cache misses during traversal. union { struct { /// @brief Pointer to the left child or INVALID_POINTER in case of leaf - int left; + int32_t left; /// @brief Pointer to the right child or INVALID_POINTER in case of leaf - int right; + int32_t right; }; struct { /// @brief The primitive id (INVALID_ID <=> inner node) /// @note We use this to distinguish leaves from internal nodes to safely access the union. - int primitive_id; + int32_t primitive_id; /// @brief Marker to indicate this is an inner node /// If is_inner == 0 then right = 0 which is INVALID_POINTER /// If is_inner != 0 then right = actual right pointer - int is_inner_marker; + int32_t is_inner_marker; }; }; diff --git a/src/ipc/utils/profiler.hpp b/src/ipc/utils/profiler.hpp index 73ff266c6..2f560247e 100644 --- a/src/ipc/utils/profiler.hpp +++ b/src/ipc/utils/profiler.hpp @@ -57,11 +57,10 @@ class Profiler { /// @brief Print the profiler data to standard output in CSV format. void print_csv() const { write_csv(std::cout); } - // Return a snapshot copy of the profiler data to avoid exposing internal - // structure that would require external synchronization. + /// @brief Access the profiling data as a JSON object. const nlohmann::json& data() const { return m_data; } - // Non-const accessor that also returns a copy. + /// @brief Access the profiling data as a JSON object. nlohmann::json& data() { return m_data; } protected: diff --git a/tests/src/tests/broad_phase/test_lbvh.cpp b/tests/src/tests/broad_phase/test_lbvh.cpp index 8b0633c23..128fa3d00 100644 --- a/tests/src/tests/broad_phase/test_lbvh.cpp +++ b/tests/src/tests/broad_phase/test_lbvh.cpp @@ -220,7 +220,7 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") bvh->detect_vertex_vertex_candidates(expected_vv_candidates); CHECK(vv_candidates.size() >= expected_vv_candidates.size()); - contains_all_candidates(vv_candidates, expected_vv_candidates); + CHECK(contains_all_candidates(vv_candidates, expected_vv_candidates)); } { @@ -231,7 +231,7 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") bvh->detect_edge_vertex_candidates(expected_ev_candidates); CHECK(ev_candidates.size() >= expected_ev_candidates.size()); - contains_all_candidates(ev_candidates, expected_ev_candidates); + CHECK(contains_all_candidates(ev_candidates, expected_ev_candidates)); } { @@ -242,7 +242,7 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") bvh->detect_edge_edge_candidates(expected_ee_candidates); CHECK(ee_candidates.size() >= expected_ee_candidates.size()); - contains_all_candidates(ee_candidates, expected_ee_candidates); + CHECK(contains_all_candidates(ee_candidates, expected_ee_candidates)); } { @@ -253,7 +253,7 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") bvh->detect_face_vertex_candidates(expected_fv_candidates); CHECK(fv_candidates.size() >= expected_fv_candidates.size()); - contains_all_candidates(fv_candidates, expected_fv_candidates); + CHECK(contains_all_candidates(fv_candidates, expected_fv_candidates)); } { @@ -264,7 +264,7 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") bvh->detect_edge_face_candidates(expected_ef_candidates); CHECK(ef_candidates.size() >= expected_ef_candidates.size()); - contains_all_candidates(ef_candidates, expected_ef_candidates); + CHECK(contains_all_candidates(ef_candidates, expected_ef_candidates)); } { @@ -275,7 +275,7 @@ TEST_CASE("LBVH::detect_*_candidates", "[broad_phase][lbvh]") bvh->detect_face_face_candidates(expected_ff_candidates); CHECK(ff_candidates.size() >= expected_ff_candidates.size()); - contains_all_candidates(ff_candidates, expected_ff_candidates); + CHECK(contains_all_candidates(ff_candidates, expected_ff_candidates)); } #ifdef IPC_TOOLKIT_WITH_PROFILER From 9e75039d6ef3f5e75be1c8850308b93ff8480a75 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Tue, 20 Jan 2026 17:40:49 -0500 Subject: [PATCH 16/25] Resolve compiler warnings on linux --- src/ipc/barrier/barrier.hpp | 86 ++++++++++--------- src/ipc/broad_phase/aabb.cpp | 15 ++-- src/ipc/broad_phase/lbvh.hpp | 5 ++ src/ipc/broad_phase/spatial_hash.hpp | 4 +- src/ipc/broad_phase/sweep_and_prune.cpp | 20 ++--- src/ipc/broad_phase/sweep_and_prune.hpp | 2 + .../broad_phase/sweep_and_tiniest_queue.hpp | 2 + src/ipc/broad_phase/voxel_size_heuristic.cpp | 2 +- 8 files changed, 76 insertions(+), 60 deletions(-) diff --git a/src/ipc/barrier/barrier.hpp b/src/ipc/barrier/barrier.hpp index 983af4cea..e6e3a30c9 100644 --- a/src/ipc/barrier/barrier.hpp +++ b/src/ipc/barrier/barrier.hpp @@ -321,50 +321,56 @@ class TwoStageBarrier : public Barrier { public: TwoStageBarrier() = default; - /// @brief Two-stage activation barrier. - /// - /// \f\[ - /// b(d) = \begin{cases} - /// -\frac{\hat{d}^2}{4} \left(\ln\left(\frac{2d}{\hat{d}}\right) - - /// \tfrac{1}{2}\right) & d < \frac{\hat{d}}{2}\\ - /// \tfrac{1}{2} (\hat{d} - d)^2 & d < \hat{d}\\ - /// 0 & d \ge \hat{d} - /// \end{cases} - /// \f\] - /// - /// @param d The distance. - /// @param dhat Activation distance of the barrier. - /// @return The value of the barrier function at d. + /** + * @brief Two-stage activation barrier. + * + * \f\[ + * b(d) = \begin{cases} + * -\frac{\hat{d}^2}{4} \left(\ln\left(\frac{2d}{\hat{d}}\right) - + * \tfrac{1}{2}\right) & d < \frac{\hat{d}}{2}\\ + * \tfrac{1}{2} (\hat{d} - d)^2 & d < \hat{d}\\ + * 0 & d \ge \hat{d} + * \end{cases} + * \f\] + * + * @param d The distance. + * @param dhat Activation distance of the barrier. + * @return The value of the barrier function at d. + */ double operator()(const double d, const double dhat) const override; - /// @brief Derivative of the barrier function. - /// - /// \f\[ - /// b'(d) = \begin{cases} - // -\frac{\hat{d}}{4d} & d < \frac{\hat{d}}{2}\\ - // d - \hat{d} & d < \hat{d}\\ - // 0 & d \ge \hat{d} - // \end{cases} - /// \f\] - /// - /// @param d The distance. - /// @param dhat Activation distance of the barrier. - /// @return The derivative of the barrier wrt d. + /** + * @brief Derivative of the barrier function. + * + * \f\[ + * b'(d) = \begin{cases} + * -\frac{\hat{d}}{4d} & d < \frac{\hat{d}}{2}\\ + * d - \hat{d} & d < \hat{d}\\ + * 0 & d \ge \hat{d} + * \end{cases} + * \f\] + * + * @param d The distance. + * @param dhat Activation distance of the barrier. + * @return The derivative of the barrier wrt d. + */ double first_derivative(const double d, const double dhat) const override; - /// @brief Second derivative of the barrier function. - /// - /// \f\[ - /// b''(d) = \begin{cases} - /// \frac{\hat{d}}{4d^2} & d < \frac{\hat{d}}{2}\\ - /// 1 & d < \hat{d}\\ - /// 0 & d \ge \hat{d} - /// \end{cases} - /// \f\] - /// - /// @param d The distance. - /// @param dhat Activation distance of the barrier. - /// @return The second derivative of the barrier wrt d. + /** + * @brief Second derivative of the barrier function. + * + * \f\[ + * b''(d) = \begin{cases} + * \frac{\hat{d}}{4d^2} & d < \frac{\hat{d}}{2}\\ + * 1 & d < \hat{d}\\ + * 0 & d \ge \hat{d} + * \end{cases} + * \f\] + * + * @param d The distance. + * @param dhat Activation distance of the barrier. + * @return The second derivative of the barrier wrt d. + */ double second_derivative(const double d, const double dhat) const override; /// @brief Get the units of the barrier function. diff --git a/src/ipc/broad_phase/aabb.cpp b/src/ipc/broad_phase/aabb.cpp index 9da17a595..fff92f77d 100644 --- a/src/ipc/broad_phase/aabb.cpp +++ b/src/ipc/broad_phase/aabb.cpp @@ -49,20 +49,21 @@ void AABB::conservative_inflation( Eigen::Ref max, const double inflation_radius) { -#pragma STDC FENV_ACCESS ON assert(min.size() == max.size()); assert((min <= max).all()); assert(inflation_radius >= 0); - const int current_round = std::fegetround(); + // Nudge the bounds outward to ensure conservativity. - std::fesetround(FE_DOWNWARD); - min -= inflation_radius; + constexpr double inf = std::numeric_limits::infinity(); - std::fesetround(FE_UPWARD); - max += inflation_radius; + min = min.unaryExpr([inflation_radius](double v) { + return std::nextafter(v - inflation_radius, -inf); + }); - std::fesetround(current_round); + max = max.unaryExpr([inflation_radius](double v) { + return std::nextafter(v + inflation_radius, inf); + }); } void build_vertex_boxes( diff --git a/src/ipc/broad_phase/lbvh.hpp b/src/ipc/broad_phase/lbvh.hpp index 21fe2f928..dd20fe474 100644 --- a/src/ipc/broad_phase/lbvh.hpp +++ b/src/ipc/broad_phase/lbvh.hpp @@ -19,6 +19,9 @@ class LBVH : public BroadPhase { /// @brief The max corner of the AABB Eigen::Array3f aabb_max; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" + // Union to overlap Leaf data and Internal Node data. // This compresses the Node size to 32 bytes (2 per cache line), // reducing cache misses during traversal. @@ -42,6 +45,8 @@ class LBVH : public BroadPhase { }; }; +#pragma GCC diagnostic pop + /// @brief Default constructor Node(); diff --git a/src/ipc/broad_phase/spatial_hash.hpp b/src/ipc/broad_phase/spatial_hash.hpp index a925db084..7584e6205 100644 --- a/src/ipc/broad_phase/spatial_hash.hpp +++ b/src/ipc/broad_phase/spatial_hash.hpp @@ -62,11 +62,11 @@ class SpatialHash : public BroadPhase { /// @param edges Collision mesh edges /// @param faces Collision mesh faces void build( - const std::vector& vertex_boxes, + const std::vector& _vertex_boxes, Eigen::ConstRef edges, Eigen::ConstRef faces) override { - build(vertex_boxes, edges, faces, /*voxel_size=*/-1); + build(_vertex_boxes, edges, faces, /*voxel_size=*/-1); } // ------------------------------------------------------------------------ diff --git a/src/ipc/broad_phase/sweep_and_prune.cpp b/src/ipc/broad_phase/sweep_and_prune.cpp index d7e186fc4..23067afe7 100644 --- a/src/ipc/broad_phase/sweep_and_prune.cpp +++ b/src/ipc/broad_phase/sweep_and_prune.cpp @@ -55,7 +55,7 @@ void SweepAndPrune::build( } void SweepAndPrune::build( - const std::vector& vertex_boxes, + const std::vector& p_vertex_boxes, Eigen::ConstRef edges, Eigen::ConstRef faces) { @@ -65,17 +65,17 @@ void SweepAndPrune::build( clear(); // Convert from ipc::AABB to scalable_ccd::AABB (additional element_id) - boxes->vertices.resize(vertex_boxes.size()); - for (int i = 0; i < vertex_boxes.size(); ++i) { + boxes->vertices.resize(p_vertex_boxes.size()); + for (int i = 0; i < p_vertex_boxes.size(); ++i) { boxes->vertices[i].min = - vertex_boxes[i].min.cast(); + p_vertex_boxes[i].min.cast(); boxes->vertices[i].max = - vertex_boxes[i].max.cast(); - assert(vertex_boxes[i].vertex_ids[0] >= 0); - assert(vertex_boxes[i].vertex_ids[1] < 0); - assert(vertex_boxes[i].vertex_ids[2] < 0); - boxes->vertices[i].vertex_ids[0] = vertex_boxes[i].vertex_ids[0]; - boxes->vertices[i].vertex_ids[1] = -vertex_boxes[i].vertex_ids[0] - 1; + p_vertex_boxes[i].max.cast(); + assert(p_vertex_boxes[i].vertex_ids[0] >= 0); + assert(p_vertex_boxes[i].vertex_ids[1] < 0); + assert(p_vertex_boxes[i].vertex_ids[2] < 0); + boxes->vertices[i].vertex_ids[0] = p_vertex_boxes[i].vertex_ids[0]; + boxes->vertices[i].vertex_ids[1] = -p_vertex_boxes[i].vertex_ids[0] - 1; boxes->vertices[i].vertex_ids[2] = boxes->vertices[i].vertex_ids[1]; boxes->vertices[i].element_id = i; } diff --git a/src/ipc/broad_phase/sweep_and_prune.hpp b/src/ipc/broad_phase/sweep_and_prune.hpp index f996b7848..78717937e 100644 --- a/src/ipc/broad_phase/sweep_and_prune.hpp +++ b/src/ipc/broad_phase/sweep_and_prune.hpp @@ -14,6 +14,8 @@ class SweepAndPrune : public BroadPhase { /// @return The name of the broad phase method. std::string name() const override { return "SweepAndPrune"; } + using BroadPhase::build; + /// @brief Build the broad phase for static collision detection. /// @param vertices Vertex positions /// @param edges Collision mesh edges diff --git a/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp b/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp index 8f249b812..b0a64567d 100644 --- a/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp +++ b/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp @@ -18,6 +18,8 @@ class SweepAndTiniestQueue : public BroadPhase { /// @return The name of the broad phase method. std::string name() const override { return "SweepAndTiniestQueue"; } + using BroadPhase::build; + /// @brief Build the broad phase for static collision detection. /// @param vertices Vertex positions /// @param edges Collision mesh edges diff --git a/src/ipc/broad_phase/voxel_size_heuristic.cpp b/src/ipc/broad_phase/voxel_size_heuristic.cpp index e507be76d..929644c4d 100644 --- a/src/ipc/broad_phase/voxel_size_heuristic.cpp +++ b/src/ipc/broad_phase/voxel_size_heuristic.cpp @@ -86,7 +86,7 @@ double suggest_good_voxel_size(const std::vector& boxes) box_sizes(i) = (boxes[i].max - boxes[i].min).maxCoeff(); } - double voxel_size; + double voxel_size = 0; igl::median(box_sizes, voxel_size); if (voxel_size <= 0) { From b0dec30c0970bf5859888f1f8f370025ef48d820 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Tue, 20 Jan 2026 18:20:37 -0500 Subject: [PATCH 17/25] Put inf inside lambda in aabb.cpp --- src/ipc/broad_phase/aabb.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ipc/broad_phase/aabb.cpp b/src/ipc/broad_phase/aabb.cpp index fff92f77d..c2791f20d 100644 --- a/src/ipc/broad_phase/aabb.cpp +++ b/src/ipc/broad_phase/aabb.cpp @@ -55,14 +55,14 @@ void AABB::conservative_inflation( // Nudge the bounds outward to ensure conservativity. - constexpr double inf = std::numeric_limits::infinity(); - min = min.unaryExpr([inflation_radius](double v) { - return std::nextafter(v - inflation_radius, -inf); + return std::nextafter( + v - inflation_radius, -std::numeric_limits::infinity()); }); max = max.unaryExpr([inflation_radius](double v) { - return std::nextafter(v + inflation_radius, inf); + return std::nextafter( + v + inflation_radius, std::numeric_limits::infinity()); }); } From b72f570f6d8be116875bfc7958767fd664b99e40 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Wed, 21 Jan 2026 12:13:08 -0500 Subject: [PATCH 18/25] Refactor Puffer-Ball test cases to use accessible mesh paths and update benchmark sections --- .../broad_phase/benchmark_broad_phase.cpp | 12 ++++++------ tests/src/tests/broad_phase/test_stq.cpp | 4 ++-- tests/src/tests/ccd/benchmark_ccd.cpp | 18 ++++++++++++------ tests/src/tests/ccd/test_gpu_ccd.cpp | 6 +++--- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/tests/src/tests/broad_phase/benchmark_broad_phase.cpp b/tests/src/tests/broad_phase/benchmark_broad_phase.cpp index 1bbbdf61b..e04491a64 100644 --- a/tests/src/tests/broad_phase/benchmark_broad_phase.cpp +++ b/tests/src/tests/broad_phase/benchmark_broad_phase.cpp @@ -114,18 +114,18 @@ TEST_CASE( mesh_name_t0 = "cloth_ball92.ply"; mesh_name_t1 = "cloth_ball93.ply"; } - // SECTION("Squishy-Ball") - // { - // mesh_name_t0 = "private/puffer-ball/20.ply"; - // mesh_name_t1 = "private/puffer-ball/21.ply"; - // } + SECTION("Puffer-Ball") + { + mesh_name_t0 = "puffer-ball/20.ply"; + mesh_name_t1 = "puffer-ball/21.ply"; + } if (!tests::load_mesh(mesh_name_t0, V0, E, F) || !tests::load_mesh(mesh_name_t1, V1, E, F)) { SKIP("Slow broadphase CCD meshes are private"); } - double inflation_radius = 1e-2; // GENERATE(take(5, random(0.0, 0.1))); + double inflation_radius = 0; // GENERATE(take(5, random(0.0, 0.1))); CollisionMesh mesh = CollisionMesh::build_from_full_mesh(V0, E, F); // Discard codimensional/internal vertices diff --git a/tests/src/tests/broad_phase/test_stq.cpp b/tests/src/tests/broad_phase/test_stq.cpp index 5fa85f03c..177ce4d94 100644 --- a/tests/src/tests/broad_phase/test_stq.cpp +++ b/tests/src/tests/broad_phase/test_stq.cpp @@ -69,8 +69,8 @@ TEST_CASE("Puffer-Ball", "[ccd][broad_phase][stq][cuda]") Eigen::MatrixXd V0, V1; Eigen::MatrixXi E, F; - if (!tests::load_mesh("private/puffer-ball/20.ply", V0, E, F) - || !tests::load_mesh("private/puffer-ball/21.ply", V1, E, F)) { + if (!tests::load_mesh("puffer-ball/20.ply", V0, E, F) + || !tests::load_mesh("puffer-ball/21.ply", V1, E, F)) { SKIP("Puffer-ball meshes not available"); } diff --git a/tests/src/tests/ccd/benchmark_ccd.cpp b/tests/src/tests/ccd/benchmark_ccd.cpp index 663fb8cb0..bec93481a 100644 --- a/tests/src/tests/ccd/benchmark_ccd.cpp +++ b/tests/src/tests/ccd/benchmark_ccd.cpp @@ -5,6 +5,7 @@ #include #include +#include #include using namespace ipc; @@ -30,11 +31,11 @@ TEST_CASE("Benchmark earliest toi", "[!benchmark][ccd][earliest_toi]") mesh_name_t0 = "cloth_ball92.ply"; mesh_name_t1 = "cloth_ball93.ply"; } - // SECTION("Squishy-Ball") - // { - // mesh_name_t0 = "private/puffer-ball/20.ply"; - // mesh_name_t1 = "private/puffer-ball/21.ply"; - // } + SECTION("Puffer-Ball") + { + mesh_name_t0 = "puffer-ball/20.ply"; + mesh_name_t1 = "puffer-ball/21.ply"; + } if (!tests::load_mesh(mesh_name_t0, V0, E, F) || !tests::load_mesh(mesh_name_t1, V1, E, F)) { @@ -48,8 +49,13 @@ TEST_CASE("Benchmark earliest toi", "[!benchmark][ccd][earliest_toi]") TightInclusionCCD ccd(/*tolerance=*/1e-6, /*max_iterations=*/1e7); + LBVH broad_phase; + Candidates candidates; - candidates.build(mesh, V0, V1); + BENCHMARK("Broad-Phase Candidate Generation") + { + candidates.build(mesh, V0, V1, 0, &broad_phase); + }; double toi; BENCHMARK("Earliest ToI Narrow-Phase") diff --git a/tests/src/tests/ccd/test_gpu_ccd.cpp b/tests/src/tests/ccd/test_gpu_ccd.cpp index bd06d03b3..f53e180dd 100644 --- a/tests/src/tests/ccd/test_gpu_ccd.cpp +++ b/tests/src/tests/ccd/test_gpu_ccd.cpp @@ -24,10 +24,10 @@ TEST_CASE("GPU CCD", "[ccd][gpu]") mesh_name_t0 = "cloth_ball92.ply"; mesh_name_t1 = "cloth_ball93.ply"; } - // SECTION("Squishy-Ball") + // SECTION("Puffer-Ball") // { - // mesh_name_t0 = "private/puffer-ball/20.ply"; - // mesh_name_t1 = "private/puffer-ball/21.ply"; + // mesh_name_t0 = "puffer-ball/20.ply"; + // mesh_name_t1 = "puffer-ball/21.ply"; // } if (!tests::load_mesh(mesh_name_t0, V0, E, F) From b906cf48fc06a7c8784c827453f86759ee2cde5d Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Thu, 22 Jan 2026 15:50:20 -0500 Subject: [PATCH 19/25] Refactor AABB to use a fixed size Array3d - Store dimension of the scene inside the BroadPhase class. - Updated broad phase classes (HashGrid, LBVH, SpatialHash, SweepAndPrune, STQ) to replace custom array types with Eigen's Array3i and Array3d for better performance and consistency. - Introduced DefaultInitAllocator to avoid unnecessary initialization overhead in vectors used in LBVH. - Modified build methods across various classes to accept AABBs instead of std::vector, ensuring compatibility with the new data structures. - Enhanced parallel processing in LBVH and SpatialHash for copying vertex IDs and building BVH structures. - Updated tests to reflect changes in data types and method signatures, ensuring all tests pass with the new implementations. --- CMakeLists.txt | 2 +- cmake/recipes/evouga_ccd.cmake | 2 +- cmake/recipes/filib.cmake | 2 +- cmake/recipes/pybind11_json.cmake | 26 ++++ docs/source/_static/graphviz/dependencies.dot | 3 + docs/source/_static/graphviz/dependencies.svg | 91 +++++++------ docs/source/about/dependencies.rst | 11 +- python/CMakeLists.txt | 5 + python/examples/lbvh.py | 11 +- python/examples/lbvh_profile.csv | 12 -- python/examples/profiler.py | 36 ++++- python/src/bindings.cpp | 1 + python/src/broad_phase/aabb.cpp | 28 ++-- python/src/broad_phase/broad_phase.cpp | 12 +- python/src/broad_phase/lbvh.cpp | 43 +++++- python/src/utils/CMakeLists.txt | 1 + python/src/utils/bindings.hpp | 1 + python/src/utils/profiler.cpp | 39 ++++++ src/ipc/broad_phase/aabb.cpp | 74 +++++----- src/ipc/broad_phase/aabb.hpp | 25 ++-- src/ipc/broad_phase/broad_phase.cpp | 58 +++++++- src/ipc/broad_phase/broad_phase.hpp | 27 +++- src/ipc/broad_phase/brute_force.cpp | 4 +- src/ipc/broad_phase/brute_force.hpp | 4 +- src/ipc/broad_phase/bvh.cpp | 8 +- src/ipc/broad_phase/bvh.hpp | 4 +- src/ipc/broad_phase/hash_grid.cpp | 32 +++-- src/ipc/broad_phase/hash_grid.hpp | 26 ++-- src/ipc/broad_phase/lbvh.cpp | 126 ++++++++++-------- src/ipc/broad_phase/lbvh.hpp | 57 +++++--- src/ipc/broad_phase/spatial_hash.cpp | 60 ++++----- src/ipc/broad_phase/spatial_hash.hpp | 34 ++--- src/ipc/broad_phase/sweep_and_prune.cpp | 11 +- src/ipc/broad_phase/sweep_and_prune.hpp | 5 +- .../broad_phase/sweep_and_tiniest_queue.cu | 8 +- .../broad_phase/sweep_and_tiniest_queue.hpp | 2 +- src/ipc/broad_phase/voxel_size_heuristic.cpp | 2 +- src/ipc/broad_phase/voxel_size_heuristic.hpp | 2 +- src/ipc/candidates/candidates.cpp | 4 +- src/ipc/utils/CMakeLists.txt | 2 + src/ipc/utils/default_init_allocator.hpp | 41 ++++++ .../tests/broad_phase/test_broad_phase.cpp | 4 +- tests/src/tests/broad_phase/test_lbvh.cpp | 8 +- .../tests/broad_phase/test_spatial_hash.cpp | 2 +- tests/src/tests/broad_phase/test_stq.cpp | 2 +- tests/src/tests/candidates/test_normals.cpp | 2 +- 46 files changed, 629 insertions(+), 331 deletions(-) create mode 100644 cmake/recipes/pybind11_json.cmake delete mode 100644 python/examples/lbvh_profile.csv create mode 100644 python/src/utils/profiler.cpp create mode 100644 src/ipc/utils/default_init_allocator.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c45319432..19208d9e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -234,7 +234,7 @@ endif() if(IPC_TOOLKIT_WITH_PROFILER) # Add nlohmann/json for the profiler include(json) - target_link_libraries(ipc_toolkit PRIVATE nlohmann_json::nlohmann_json) + target_link_libraries(ipc_toolkit PUBLIC nlohmann_json::nlohmann_json) endif() # Extra warnings (link last for highest priority) diff --git a/cmake/recipes/evouga_ccd.cmake b/cmake/recipes/evouga_ccd.cmake index dbdf03eb8..130bf3204 100644 --- a/cmake/recipes/evouga_ccd.cmake +++ b/cmake/recipes/evouga_ccd.cmake @@ -1,4 +1,4 @@ -# Etienne Vouga's CCD Library (https://github.com/evouga/collisiondetection.git) +# Etienne Vouga's CCD Library (https://github.com/evouga/collisiondetection) # License: ??? if(TARGET evouga::ccd) return() diff --git a/cmake/recipes/filib.cmake b/cmake/recipes/filib.cmake index c0fa879bd..fa3cf51cd 100644 --- a/cmake/recipes/filib.cmake +++ b/cmake/recipes/filib.cmake @@ -1,4 +1,4 @@ -# filib (https://github.com/zfergus/filib.git) +# filib (https://github.com/zfergus/filib) # License: LGPL-2.1 if(TARGET filib::filib) return() diff --git a/cmake/recipes/pybind11_json.cmake b/cmake/recipes/pybind11_json.cmake new file mode 100644 index 000000000..508b534c1 --- /dev/null +++ b/cmake/recipes/pybind11_json.cmake @@ -0,0 +1,26 @@ +# pybind11_json (https://github.com/pybind/pybind11_json) +# License: MIT +if(TARGET pybind11::json) + return() +endif() + +message(STATUS "Third-party: creating target 'pybind11::json'") + +include(CPM) +CPMAddPackage( + URI "gh:pybind/pybind11_json#0.2.15" + DOWNLOAD_ONLY YES +) + +add_library(pybind11_json INTERFACE) +add_library(pybind11::json ALIAS pybind11_json) + +target_include_directories(pybind11_json INTERFACE + "$" +) + +include(pybind11) +target_link_libraries(pybind11_json INTERFACE pybind11::pybind11) + +include(json) +target_link_libraries(pybind11_json INTERFACE nlohmann_json::nlohmann_json) \ No newline at end of file diff --git a/docs/source/_static/graphviz/dependencies.dot b/docs/source/_static/graphviz/dependencies.dot index 7e09a61a6..70a8c731b 100644 --- a/docs/source/_static/graphviz/dependencies.dot +++ b/docs/source/_static/graphviz/dependencies.dot @@ -83,4 +83,7 @@ digraph "IPC Toolkit Dependencies" { "node5" -> "node13" [color = "#8FB976";]; "node13" -> "node0" [color = "#BE6562";]; "node13" -> "node9" [color = "#BE6562";]; + // ipc_toolkit -> nlohmann_json + "node14" [label = "nlohmann_json\n(nlohmann_json::nlohmann_json)";shape = box;style = "rounded,filled";fillcolor = "#FFE6CC";color = "#DAA52D";]; + "node5" -> "node14" [color = "#8FB976";]; } \ No newline at end of file diff --git a/docs/source/_static/graphviz/dependencies.svg b/docs/source/_static/graphviz/dependencies.svg index 3713dcb99..2d65143ce 100644 --- a/docs/source/_static/graphviz/dependencies.svg +++ b/docs/source/_static/graphviz/dependencies.svg @@ -1,11 +1,11 @@ - - + IPC Toolkit Dependencies @@ -16,48 +16,48 @@ legendNode0 - -Static Library + +Static Library legendNode1 - -Shared Library + +Shared Library legendNode0->legendNode1 - - -Public + + +Public legendNode2 - -Interface Library + +Interface Library legendNode1->legendNode2 - - -Private + + +Private legendNode2->legendNode0 - - -Interface + + +Interface node5 - -ipc_toolkit -(ipc::toolkit) + +ipc_toolkit +(ipc::toolkit) @@ -69,7 +69,7 @@ node5->node0 - + @@ -82,8 +82,8 @@ node5->node1 - - + + @@ -95,7 +95,7 @@ node5->node2 - + @@ -108,7 +108,7 @@ node5->node3 - + @@ -121,8 +121,8 @@ node5->node6 - - + + @@ -134,8 +134,8 @@ node5->node7 - - + + @@ -147,7 +147,7 @@ node5->node8 - + @@ -160,8 +160,8 @@ node5->node9 - - + + @@ -173,7 +173,7 @@ node5->node10 - + @@ -186,7 +186,7 @@ node5->node11 - + @@ -199,8 +199,8 @@ node5->node12 - - + + @@ -212,8 +212,21 @@ node5->node13 - - + + + + + +node14 + +nlohmann_json +(nlohmann_json::nlohmann_json) + + + +node5->node14 + + diff --git a/docs/source/about/dependencies.rst b/docs/source/about/dependencies.rst index 62ca0b781..62edeae6d 100644 --- a/docs/source/about/dependencies.rst +++ b/docs/source/about/dependencies.rst @@ -91,6 +91,12 @@ Additionally, IPC Toolkit may optionally use the following libraries: - `github.com/zfergus/filib `_ - |:white_check_mark:| - ``IPC_TOOLKIT_WITH_FILIB`` + * - nlohmann/json + - JSON parsing for profiler and tests + - MIT + - `github.com/nlohmann/json `_ + - |:white_large_square:| + - ``IPC_TOOLKIT_WITH_PROFILER`` * - rational-cpp - Rational arithmetic used for exact intersection checks (requires `GMP `_ to be installed at a system level) - MIT @@ -136,7 +142,4 @@ Unit Test Dependencies - `github.com/catchorg/Catch2 `_ * - finite-diff - Finite-difference comparisons - - `github.com/zfergus/finite-diff `_ - * - Nlohmann JSON - - Loading test data from JSON files - - `github.com/nlohmann/json `_ \ No newline at end of file + - `github.com/zfergus/finite-diff `_ \ No newline at end of file diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index c867a548e..ca73632f7 100755 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -30,6 +30,11 @@ if (FILIB_BUILD_SHARED_LIB AND WIN32) ) endif() +if (IPC_TOOLKIT_WITH_PROFILER) + include(pybind11_json) + target_link_libraries(ipctk PRIVATE pybind11::json) +endif() + # Extra warnings # target_link_libraries(ipctk PRIVATE IPCToolkit::warnings) diff --git a/python/examples/lbvh.py b/python/examples/lbvh.py index 19f10afe9..9fe4efed2 100644 --- a/python/examples/lbvh.py +++ b/python/examples/lbvh.py @@ -6,7 +6,16 @@ import pathlib -mesh = meshio.read(pathlib.Path(__file__).parents[2] / "tests/data/puffer-ball/20.ply") # noqa +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument( + "--mesh", + type=pathlib.Path, + default=(pathlib.Path(__file__).parents[2] / "tests/data/puffer-ball/20.ply")) +args = parser.parse_args() + +mesh = meshio.read(args.mesh) lbvh = ipctk.LBVH() lbvh.build(mesh.points, np.array([], dtype=int), mesh.cells_dict["triangle"]) diff --git a/python/examples/lbvh_profile.csv b/python/examples/lbvh_profile.csv deleted file mode 100644 index d7322595c..000000000 --- a/python/examples/lbvh_profile.csv +++ /dev/null @@ -1,12 +0,0 @@ -Id,Parent,Name,Time (ms),Count -7,,LBVH::detect_edge_edge_candidates,331.688,6 -8,,LBVH::detect_edge_face_candidates,209.872,6 -9,,LBVH::detect_edge_vertex_candidates,139.502,6 -10,,LBVH::detect_face_face_candidates,100.436,6 -11,,LBVH::detect_face_vertex_candidates,131.918,6 -12,,LBVH::detect_vertex_vertex_candidates,17.2223,6 -13,,LBVH::init_bvh,21.2826,18 -14,13,build_hierarchy,8.12017,18 -15,13,compute_morton_codes,1.76938,18 -16,13,populate_boxes,2.26558,18 -17,13,sort_morton_codes,5.25754,18 diff --git a/python/examples/profiler.py b/python/examples/profiler.py index 6e853a55f..0a28b2bb8 100644 --- a/python/examples/profiler.py +++ b/python/examples/profiler.py @@ -1,10 +1,13 @@ import pandas as pd import plotly.express as px import pathlib +from io import StringIO -def plot(csv_path): - df = pd.read_csv(csv_path, na_filter=False) - df["Time (s)"] = df["Time (ms)"] / 1000.0 +from find_ipctk import ipctk + +def plot_profiler(title=None): + df = pd.read_csv(StringIO(ipctk.profiler().csv), na_filter=False) + # df["Time (s)"] = df["Time (ms)"] / 1000.0 # df["Parent"] = df["Parent"].astype(str) # df["Id"] = df["Id"].astype(str) @@ -25,6 +28,7 @@ def plot(csv_path): # ) ) fig.update_layout( + title=title or "Profiler Results", title_x=0.5, title_y=0.95, margin=dict(t=0, l=0, r=0, b=0), @@ -37,4 +41,28 @@ def plot(csv_path): fig.show() return fig -plot(pathlib.Path(__file__).parent / "lbvh_profile.csv") \ No newline at end of file +if __name__ == "__main__": + # plot(pathlib.Path(__file__).parent / "lbvh_profile.csv") + + import meshio + import argparse + import numpy as np + + parser = argparse.ArgumentParser() + parser.add_argument( + "--mesh", + type=pathlib.Path, + default=(pathlib.Path(__file__).parents[2] / "tests/data/puffer-ball/20.ply")) + args = parser.parse_args() + + mesh = meshio.read(args.mesh) + faces = mesh.cells_dict["triangle"] + + edges = ipctk.edges(faces) + # indices = np.lexsort((edges[:, 1], edges[:, 0])) + # sorted_edges = edges[indices] + + lbvh = ipctk.LBVH() + lbvh.build(mesh.points, sorted_edges, faces) + + plot_profiler(title=args.mesh.parent.name + "/" + args.mesh.name) diff --git a/python/src/bindings.cpp b/python/src/bindings.cpp index 2baf01939..597b69656 100644 --- a/python/src/bindings.cpp +++ b/python/src/bindings.cpp @@ -120,6 +120,7 @@ PYBIND11_MODULE(ipctk, m) // utils define_logger(m); + define_profiler(m); define_thread_limiter(m); define_vertex_to_min_edge(m); define_world_bbox_diagonal_length(m); diff --git a/python/src/broad_phase/aabb.cpp b/python/src/broad_phase/aabb.cpp index b0d170468..e1d9d0617 100644 --- a/python/src/broad_phase/aabb.cpp +++ b/python/src/broad_phase/aabb.cpp @@ -1,6 +1,7 @@ #include #include +#include using namespace ipc; @@ -60,6 +61,19 @@ void define_aabb(py::module_& m) If the two AABBs intersect. )ipc_Qu8mg5v7", "other"_a) + .def( + "__repr__", + [](const AABB& aabb) { + return fmt::format( + "AABB(min={}, max={})", aabb.min.transpose(), + aabb.max.transpose()); + }) + .def( + "__str__", + [](const AABB& aabb) { + return fmt::format( + "{}×{}", aabb.min.transpose(), aabb.max.transpose()); + }) .def_static( "conservative_inflation", [](ArrayMax3d min, ArrayMax3d max, const double inflation_radius) { @@ -78,7 +92,7 @@ void define_aabb(py::module_& m) "build_vertex_boxes", [](Eigen::ConstRef vertices, const double inflation_radius = 0) { - std::vector vertex_boxes; + AABBs vertex_boxes; build_vertex_boxes(vertices, vertex_boxes, inflation_radius); return vertex_boxes; }, @@ -99,7 +113,7 @@ void define_aabb(py::module_& m) [](Eigen::ConstRef vertices_t0, Eigen::ConstRef vertices_t1, const double inflation_radius = 0) { - std::vector vertex_boxes; + AABBs vertex_boxes; build_vertex_boxes( vertices_t0, vertices_t1, vertex_boxes, inflation_radius); return vertex_boxes; @@ -119,9 +133,8 @@ void define_aabb(py::module_& m) m.def( "build_edge_boxes", - [](const std::vector& vertex_boxes, - Eigen::ConstRef edges) { - std::vector edge_boxes; + [](const AABBs& vertex_boxes, Eigen::ConstRef edges) { + AABBs edge_boxes; build_edge_boxes(vertex_boxes, edges, edge_boxes); return edge_boxes; }, @@ -139,9 +152,8 @@ void define_aabb(py::module_& m) m.def( "build_face_boxes", - [](const std::vector& vertex_boxes, - Eigen::ConstRef faces) { - std::vector face_boxes; + [](const AABBs& vertex_boxes, Eigen::ConstRef faces) { + AABBs face_boxes; build_face_boxes(vertex_boxes, faces, face_boxes); return face_boxes; }, diff --git a/python/src/broad_phase/broad_phase.cpp b/python/src/broad_phase/broad_phase.cpp index c8ab31784..812599b4b 100644 --- a/python/src/broad_phase/broad_phase.cpp +++ b/python/src/broad_phase/broad_phase.cpp @@ -247,18 +247,12 @@ void define_broad_phase(py::module_& m) )ipc_Qu8mg5v7") .def( "detect_collision_candidates", - [](const BroadPhase& self, int dim) { + [](const BroadPhase& self) { Candidates candidates; - self.detect_collision_candidates(dim, candidates); + self.detect_collision_candidates(candidates); return candidates; }, - R"ipc_Qu8mg5v7( - Detect all collision candidates needed for a given dimensional simulation. - - Parameters: - dim: The dimension of the simulation (i.e., 2 or 3). - )ipc_Qu8mg5v7", - "dim"_a) + "Detect all collision candidates needed for a given dimensional simulation.") .def_readwrite( "can_vertices_collide", &BroadPhase::can_vertices_collide, "Function for determining if two vertices can collide."); diff --git a/python/src/broad_phase/lbvh.cpp b/python/src/broad_phase/lbvh.cpp index 0fe9d77c8..7f2097cfd 100644 --- a/python/src/broad_phase/lbvh.cpp +++ b/python/src/broad_phase/lbvh.cpp @@ -9,11 +9,48 @@ void define_lbvh(py::module_& m) py::class_(m, "LBVH_Node") .def_readonly("aabb_min", &LBVH::Node::aabb_min) .def_readonly("aabb_max", &LBVH::Node::aabb_max) - .def_readonly("left", &LBVH::Node::left) - .def_readonly("right", &LBVH::Node::right) + .def_property_readonly( + "left", + [](const LBVH::Node& node) { + if (node.is_inner()) { + return node.left; + } else { + throw py::attribute_error( + "Leaf nodes do not have 'left' child."); + } + }) + .def_property_readonly( + "right", + [](const LBVH::Node& node) { + if (node.is_inner()) { + return node.right; + } else { + throw py::attribute_error( + "Leaf nodes do not have 'right' child."); + } + }) + .def_property_readonly( + "primitive_id", + [](const LBVH::Node& node) { + if (node.is_leaf()) { + return node.primitive_id; + } else { + throw py::attribute_error( + "Inner nodes do not have a 'primitive_id'"); + } + }) .def_property_readonly("is_leaf", &LBVH::Node::is_leaf) .def_property_readonly("is_inner", &LBVH::Node::is_inner) - .def_property_readonly("is_valid", &LBVH::Node::is_valid); + .def_property_readonly("is_valid", &LBVH::Node::is_valid) + .def("intersects", &LBVH::Node::intersects) + .def("__str__", [](const LBVH::Node& node) { + return fmt::format( + "LBVH::Node(aabb_min={}, aabb_max={}, {})", + node.aabb_min.transpose(), node.aabb_max.transpose(), + node.is_inner() + ? fmt::format("left={}, right={}", node.left, node.right) + : fmt::format("primitive_id={}", node.primitive_id)); + }); py::class_>(m, "LBVH") .def(py::init()) diff --git a/python/src/utils/CMakeLists.txt b/python/src/utils/CMakeLists.txt index 3de7d7512..7a17ed16c 100644 --- a/python/src/utils/CMakeLists.txt +++ b/python/src/utils/CMakeLists.txt @@ -1,6 +1,7 @@ set(SOURCES eigen_ext.cpp logger.cpp + profiler.cpp thread_limiter.cpp vertex_to_min_edge.cpp world_bbox_diagonal_length.cpp diff --git a/python/src/utils/bindings.hpp b/python/src/utils/bindings.hpp index 9e3b26741..368962c93 100644 --- a/python/src/utils/bindings.hpp +++ b/python/src/utils/bindings.hpp @@ -4,6 +4,7 @@ void define_eigen_ext(py::module_& m); void define_logger(py::module_& m); +void define_profiler(py::module_& m); void define_thread_limiter(py::module_& m); void define_vertex_to_min_edge(py::module_& m); void define_world_bbox_diagonal_length(py::module_& m); \ No newline at end of file diff --git a/python/src/utils/profiler.cpp b/python/src/utils/profiler.cpp new file mode 100644 index 000000000..adc63993b --- /dev/null +++ b/python/src/utils/profiler.cpp @@ -0,0 +1,39 @@ +#include + +#include + +#include + +using namespace ipc; + +void define_profiler(py::module_& m) +{ +#ifdef IPC_TOOLKIT_WITH_PROFILER + py::class_(m, "Profiler") + .def("clear", &Profiler::clear, "Clear all profiling data") + .def("reset", &Profiler::reset, "Reset the profiler data and scopes") + .def("print", &Profiler::print, "Print the profiler data to the logger") + .def_property_readonly( + "csv", + [](const Profiler& profiler) { + std::ostringstream os; + profiler.write_csv(os); + return os.str(); + }, + "Get the profiler data in CSV format as a string") + .def( + "__str__", + [](const Profiler& profiler) { + std::ostringstream os; + profiler.write_csv(os); + return os.str(); + }) + .def_property_readonly( + "data", py::overload_cast<>(&Profiler::data, py::const_), + "Access the profiling data as a JSON object"); + + m.def( + "profiler", profiler, "Get the global profiler instance", + py::return_value_policy::reference); +#endif +} diff --git a/src/ipc/broad_phase/aabb.cpp b/src/ipc/broad_phase/aabb.cpp index c2791f20d..8a52f75dc 100644 --- a/src/ipc/broad_phase/aabb.cpp +++ b/src/ipc/broad_phase/aabb.cpp @@ -1,5 +1,7 @@ #include "aabb.hpp" +#include + #include #include @@ -8,13 +10,13 @@ namespace ipc { AABB::AABB(Eigen::ConstRef _min, Eigen::ConstRef _max) - : min(_min) - , max(_max) + : min(Eigen::Array3d::Zero()) + , max(Eigen::Array3d::Zero()) { - assert(min.size() == max.size()); - assert((min <= max).all()); - // half_extent = (max() - min()) / 2; - // center = min() + half_extent(); + assert(_min.size() == _max.size()); + assert((_min <= _max).all()); + min.head(_min.size()) = _min; + max.head(_max.size()) = _max; } AABB AABB::from_point( @@ -27,20 +29,7 @@ AABB AABB::from_point( bool AABB::intersects(const AABB& other) const { - assert(this->min.size() == other.max.size()); - assert(this->max.size() == other.min.size()); - - // NOTE: This is a faster check (https://gamedev.stackexchange.com/a/587), - // but it is inexact and can result in false negatives. - // return (std::abs(a.center.x() - b.center.x()) - // <= (a.half_extent.x() + b.half_extent.x())) - // && (std::abs(a.center.y() - b.center.y()) - // <= (a.half_extent.y() + b.half_extent.y())) - // && (a.min.size() == 2 - // || std::abs(a.center.z() - b.center.z()) - // <= (a.half_extent.z() + b.half_extent.z())); - - // This on the otherhand, is exact because there is no rounding. + // This is exact because there is no rounding. return (this->min <= other.max).all() && (other.min <= this->max).all(); } @@ -68,9 +57,11 @@ void AABB::conservative_inflation( void build_vertex_boxes( Eigen::ConstRef vertices, - std::vector& vertex_boxes, + AABBs& vertex_boxes, const double inflation_radius) { + IPC_TOOLKIT_PROFILE_BLOCK("build_vertex_boxes"); + vertex_boxes.resize(vertices.rows()); tbb::parallel_for( @@ -87,9 +78,11 @@ void build_vertex_boxes( void build_vertex_boxes( Eigen::ConstRef vertices_t0, Eigen::ConstRef vertices_t1, - std::vector& vertex_boxes, + AABBs& vertex_boxes, const double inflation_radius) { + IPC_TOOLKIT_PROFILE_BLOCK("build_vertex_boxes"); + vertex_boxes.resize(vertices_t0.rows()); tbb::parallel_for( @@ -104,39 +97,48 @@ void build_vertex_boxes( } void build_edge_boxes( - const std::vector& vertex_boxes, + const AABBs& vertex_boxes, Eigen::ConstRef edges, - std::vector& edge_boxes) + AABBs& edge_boxes) { - edge_boxes.resize(edges.rows()); + IPC_TOOLKIT_PROFILE_BLOCK("build_edge_boxes"); + + if (edge_boxes.size() != edges.rows()) { + IPC_TOOLKIT_PROFILE_BLOCK("resize_edge_boxes"); + edge_boxes.resize(edges.rows()); + } tbb::parallel_for( tbb::blocked_range(0, edges.rows()), [&](const tbb::blocked_range& r) { for (size_t i = r.begin(); i < r.end(); i++) { - edge_boxes[i] = - AABB(vertex_boxes[edges(i, 0)], vertex_boxes[edges(i, 1)]); - edge_boxes[i].vertex_ids = { { edges(i, 0), edges(i, 1), -1 } }; + const int e0 = edges(i, 0), e1 = edges(i, 1); + edge_boxes[i] = AABB(vertex_boxes[e0], vertex_boxes[e1]); + edge_boxes[i].vertex_ids = { { e0, e1, -1 } }; } }); } void build_face_boxes( - const std::vector& vertex_boxes, + const AABBs& vertex_boxes, Eigen::ConstRef faces, - std::vector& face_boxes) + AABBs& face_boxes) { - face_boxes.resize(faces.rows()); + IPC_TOOLKIT_PROFILE_BLOCK("build_face_boxes"); + + if (face_boxes.size() != faces.rows()) { + IPC_TOOLKIT_PROFILE_BLOCK("resize_face_boxes"); + face_boxes.resize(faces.rows()); + } tbb::parallel_for( tbb::blocked_range(0, faces.rows()), [&](const tbb::blocked_range& r) { for (size_t i = r.begin(); i < r.end(); i++) { - face_boxes[i] = AABB( - vertex_boxes[faces(i, 0)], vertex_boxes[faces(i, 1)], - vertex_boxes[faces(i, 2)]); - face_boxes[i].vertex_ids = { { faces(i, 0), faces(i, 1), - faces(i, 2) } }; + const int f0 = faces(i, 0), f1 = faces(i, 1), f2 = faces(i, 2); + face_boxes[i] = + AABB(vertex_boxes[f0], vertex_boxes[f1], vertex_boxes[f2]); + face_boxes[i].vertex_ids = { { f0, f1, f2 } }; } }); } diff --git a/src/ipc/broad_phase/aabb.hpp b/src/ipc/broad_phase/aabb.hpp index 19ca1ec41..ddcf65b60 100644 --- a/src/ipc/broad_phase/aabb.hpp +++ b/src/ipc/broad_phase/aabb.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -8,7 +9,8 @@ namespace ipc { /// @brief Axis aligned bounding-box of some type -class AABB { +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) +class alignas(64) AABB { public: AABB() = default; @@ -61,20 +63,23 @@ class AABB { public: /// @brief Minimum corner of the AABB. - ArrayMax3d min; + Eigen::Array3d min; /// @brief Maximum corner of the AABB. - ArrayMax3d max; + Eigen::Array3d max; /// @brief Vertex IDs attached to the AABB. - std::array vertex_ids = { { -1, -1, -1 } }; + std::array vertex_ids; }; +/// @brief Vector of AABBs with default initialization allocator. +using AABBs = std::vector>; + /// @brief Build one AABB per vertex position (row of V). /// @param[in] vertices Vertex positions (rowwise). /// @param[out] vertex_boxes Vertex AABBs. /// @param[in] inflation_radius Radius of a sphere around the points which the AABBs enclose. void build_vertex_boxes( Eigen::ConstRef vertices, - std::vector& vertex_boxes, + AABBs& vertex_boxes, const double inflation_radius = 0); /// @brief Build one AABB per vertex position moving linearly from t=0 to t=1. @@ -85,7 +90,7 @@ void build_vertex_boxes( void build_vertex_boxes( Eigen::ConstRef vertices_t0, Eigen::ConstRef vertices_t1, - std::vector& vertex_boxes, + AABBs& vertex_boxes, const double inflation_radius = 0); /// @brief Build one AABB per edge. @@ -93,17 +98,17 @@ void build_vertex_boxes( /// @param edges Edges (rowwise). /// @param edge_boxes Edge AABBs. void build_edge_boxes( - const std::vector& vertex_boxes, + const AABBs& vertex_boxes, Eigen::ConstRef edges, - std::vector& edge_boxes); + AABBs& edge_boxes); /// @brief Build one AABB per face. /// @param vertex_boxes Vertex AABBs. /// @param faces Faces (rowwise). /// @param face_boxes Face AABBs. void build_face_boxes( - const std::vector& vertex_boxes, + const AABBs& vertex_boxes, Eigen::ConstRef faces, - std::vector& face_boxes); + AABBs& face_boxes); } // namespace ipc diff --git a/src/ipc/broad_phase/broad_phase.cpp b/src/ipc/broad_phase/broad_phase.cpp index 2a4999a29..114dfbc8f 100644 --- a/src/ipc/broad_phase/broad_phase.cpp +++ b/src/ipc/broad_phase/broad_phase.cpp @@ -2,6 +2,10 @@ #include #include +#include + +#include +#include namespace ipc { @@ -11,7 +15,9 @@ void BroadPhase::build( Eigen::ConstRef faces, const double inflation_radius) { + IPC_TOOLKIT_PROFILE_BLOCK("BroadPhase::build(static)"); clear(); + dim = static_cast(vertices.cols()); build_vertex_boxes(vertices, vertex_boxes, inflation_radius); build(edges, faces); } @@ -23,21 +29,29 @@ void BroadPhase::build( Eigen::ConstRef faces, const double inflation_radius) { + IPC_TOOLKIT_PROFILE_BLOCK("BroadPhase::build(dynamic)"); + assert(vertices_t0.rows() == vertices_t1.rows()); + assert(vertices_t0.cols() == vertices_t1.cols()); clear(); + dim = static_cast(vertices_t0.cols()); build_vertex_boxes( vertices_t0, vertices_t1, vertex_boxes, inflation_radius); build(edges, faces); } void BroadPhase::build( - const std::vector& _vertex_boxes, + const AABBs& _vertex_boxes, Eigen::ConstRef edges, - Eigen::ConstRef faces) + Eigen::ConstRef faces, + const uint8_t _dim) { + IPC_TOOLKIT_PROFILE_BLOCK("BroadPhase::build(boxes)"); + clear(); assert(&(this->vertex_boxes) != &_vertex_boxes); this->vertex_boxes = _vertex_boxes; + this->dim = _dim; build(edges, faces); } @@ -46,6 +60,7 @@ void BroadPhase::build( Eigen::ConstRef edges, Eigen::ConstRef faces) { + IPC_TOOLKIT_PROFILE_BLOCK("BroadPhase::build(edges_faces)"); assert(!vertex_boxes.empty()); assert(edges.size() == 0 || edges.cols() == 2); assert(faces.size() == 0 || faces.cols() == 3); @@ -58,12 +73,13 @@ void BroadPhase::clear() vertex_boxes.clear(); edge_boxes.clear(); face_boxes.clear(); + dim = 0; // reset dimension } -void BroadPhase::detect_collision_candidates( - int dim, Candidates& candidates) const +void BroadPhase::detect_collision_candidates(Candidates& candidates) const { candidates.clear(); + assert(dim == 2 || dim == 3); if (dim == 2) { // This is not needed for 3D detect_edge_vertex_candidates(candidates.ev_candidates); @@ -74,6 +90,40 @@ void BroadPhase::detect_collision_candidates( } } +void BroadPhase::compute_mesh_aabb( + Eigen::Ref mesh_min, + Eigen::Ref mesh_max) const +{ + IPC_TOOLKIT_PROFILE_BLOCK("BroadPhase::compute_mesh_aabb"); + assert(!vertex_boxes.empty()); + + struct MeshDomain { + Eigen::Array3d min; + Eigen::Array3d max; + }; + + MeshDomain domain { + Eigen::Array3d::Constant(std::numeric_limits::max()), + Eigen::Array3d::Constant(std::numeric_limits::lowest()) + }; + + domain = tbb::parallel_reduce( + tbb::blocked_range(0, vertex_boxes.size()), domain, + [&](const tbb::blocked_range& r, MeshDomain local) { + for (size_t i = r.begin(); i != r.end(); ++i) { + local.min = local.min.min(vertex_boxes[i].min); + local.max = local.max.max(vertex_boxes[i].max); + } + return local; + }, + [](const MeshDomain& a, const MeshDomain& b) { + return MeshDomain { a.min.min(b.min), a.max.max(b.max) }; + }); + + mesh_min = domain.min; + mesh_max = domain.max; +} + // ============================================================================ bool BroadPhase::can_edge_vertex_collide(size_t ei, size_t vi) const diff --git a/src/ipc/broad_phase/broad_phase.hpp b/src/ipc/broad_phase/broad_phase.hpp index b3f3f967c..23c94a7f1 100644 --- a/src/ipc/broad_phase/broad_phase.hpp +++ b/src/ipc/broad_phase/broad_phase.hpp @@ -53,17 +53,17 @@ class BroadPhase { /// @param edges Collision mesh edges /// @param faces Collision mesh faces virtual void build( - const std::vector& vertex_boxes, + const AABBs& vertex_boxes, Eigen::ConstRef edges, - Eigen::ConstRef faces); + Eigen::ConstRef faces, + const uint8_t dim); /// @brief Clear any built data. virtual void clear(); /// @brief Detect all collision candidates needed for a given dimensional simulation. - /// @param dim The dimension of the simulation (i.e., 2 or 3). /// @param candidates The detected collision candidates. - void detect_collision_candidates(int dim, Candidates& candidates) const; + void detect_collision_candidates(Candidates& candidates) const; /// @brief Find the candidate vertex-vertex collisions. /// @param[out] candidates The candidate vertex-vertex collisions. @@ -108,6 +108,13 @@ class BroadPhase { Eigen::ConstRef edges, Eigen::ConstRef faces); + /// @brief Compute the axis-aligned bounding box (AABB) of the mesh. + /// @param[out] mesh_min Minimum corner of the mesh AABB. + /// @param[out] mesh_max Maximum corner of the mesh AABB. + void compute_mesh_aabb( + Eigen::Ref mesh_min, + Eigen::Ref mesh_max) const; + virtual bool can_edge_vertex_collide(size_t ei, size_t vi) const; virtual bool can_edges_collide(size_t eai, size_t ebi) const; virtual bool can_face_vertex_collide(size_t fi, size_t vi) const; @@ -120,9 +127,15 @@ class BroadPhase { return true; } - std::vector vertex_boxes; - std::vector edge_boxes; - std::vector face_boxes; + /// @brief AABBs for the vertices. + AABBs vertex_boxes; + /// @brief AABBs for the edges. + AABBs edge_boxes; + /// @brief AABBs for the faces. + AABBs face_boxes; + + /// @brief Dimension of the simulation for which the broad phase was built. + uint8_t dim = 0; }; } // namespace ipc diff --git a/src/ipc/broad_phase/brute_force.cpp b/src/ipc/broad_phase/brute_force.cpp index d48e9271e..166380c55 100644 --- a/src/ipc/broad_phase/brute_force.cpp +++ b/src/ipc/broad_phase/brute_force.cpp @@ -14,8 +14,8 @@ namespace ipc { template void BruteForce::detect_candidates( - const std::vector& boxes0, - const std::vector& boxes1, + const AABBs& boxes0, + const AABBs& boxes1, const std::function& can_collide, std::vector& candidates) const { diff --git a/src/ipc/broad_phase/brute_force.hpp b/src/ipc/broad_phase/brute_force.hpp index 56a0e4413..429fb10c0 100644 --- a/src/ipc/broad_phase/brute_force.hpp +++ b/src/ipc/broad_phase/brute_force.hpp @@ -53,8 +53,8 @@ class BruteForce : public BroadPhase { /// @param[out] candidates The candidate collisions. template void detect_candidates( - const std::vector& boxes0, - const std::vector& boxes1, + const AABBs& boxes0, + const AABBs& boxes1, const std::function& can_collide, std::vector& candidates) const; }; diff --git a/src/ipc/broad_phase/bvh.cpp b/src/ipc/broad_phase/bvh.cpp index dec56804f..72cb552bf 100644 --- a/src/ipc/broad_phase/bvh.cpp +++ b/src/ipc/broad_phase/bvh.cpp @@ -26,13 +26,14 @@ void BVH::build( Eigen::ConstRef edges, Eigen::ConstRef faces) { + IPC_TOOLKIT_PROFILE_BLOCK("BVH::build"); BroadPhase::build(edges, faces); // Build edge_boxes and face_boxes init_bvh(vertex_boxes, *vertex_bvh); init_bvh(edge_boxes, *edge_bvh); init_bvh(face_boxes, *face_bvh); } -void BVH::init_bvh(const std::vector& boxes, SimpleBVH::BVH& bvh) +void BVH::init_bvh(const AABBs& boxes, SimpleBVH::BVH& bvh) { if (boxes.empty()) { return; @@ -40,9 +41,10 @@ void BVH::init_bvh(const std::vector& boxes, SimpleBVH::BVH& bvh) IPC_TOOLKIT_PROFILE_BLOCK("BVH::init_bvh"); + // Convert AABBs to the format expected by SimpleBVH std::vector> vector_boxes(boxes.size()); for (int i = 0; i < boxes.size(); i++) { - vector_boxes[i] = { { to_3D(boxes[i].min), to_3D(boxes[i].max) } }; + vector_boxes[i] = { { boxes[i].min, boxes[i].max } }; } bvh.init(vector_boxes); @@ -58,7 +60,7 @@ void BVH::clear() template void BVH::detect_candidates( - const std::vector& boxes, + const AABBs& boxes, const SimpleBVH::BVH& bvh, const std::function& can_collide, std::vector& candidates) diff --git a/src/ipc/broad_phase/bvh.hpp b/src/ipc/broad_phase/bvh.hpp index 4d97d358d..118ff83a9 100644 --- a/src/ipc/broad_phase/bvh.hpp +++ b/src/ipc/broad_phase/bvh.hpp @@ -67,7 +67,7 @@ class BVH : public BroadPhase { /// @brief Initialize a BVH from a set of boxes. /// @param[in] boxes Set of boxes to initialize the BVH with. /// @param[out] bvh The BVH to initialize. - static void init_bvh(const std::vector& boxes, SimpleBVH::BVH& bvh); + static void init_bvh(const AABBs& boxes, SimpleBVH::BVH& bvh); /// @brief Detect candidate collisions between a BVH and a sets of boxes. /// @tparam Candidate Type of candidate collision. @@ -82,7 +82,7 @@ class BVH : public BroadPhase { bool swap_order = false, bool triangular = false> static void detect_candidates( - const std::vector& boxes, + const AABBs& boxes, const SimpleBVH::BVH& bvh, const std::function& can_collide, std::vector& candidates); diff --git a/src/ipc/broad_phase/hash_grid.cpp b/src/ipc/broad_phase/hash_grid.cpp index a9130bf9d..b4fd0db59 100644 --- a/src/ipc/broad_phase/hash_grid.cpp +++ b/src/ipc/broad_phase/hash_grid.cpp @@ -23,12 +23,8 @@ void HashGrid::build( { BroadPhase::build(edges, faces); - ArrayMax3d mesh_min = vertex_boxes[0].min; - ArrayMax3d mesh_max = vertex_boxes[0].max; - for (const auto& box : vertex_boxes) { - mesh_min = mesh_min.min(box.min); - mesh_max = mesh_max.max(box.max); - } + Eigen::Array3d mesh_min, mesh_max; + compute_mesh_aabb(mesh_min, mesh_max); const double cell_size = suggest_good_voxel_size(edges.rows() > 0 ? edge_boxes : vertex_boxes); @@ -38,8 +34,8 @@ void HashGrid::build( } void HashGrid::resize( - Eigen::ConstRef domain_min, - Eigen::ConstRef domain_max, + Eigen::ConstRef domain_min, + Eigen::ConstRef domain_max, const double cell_size) { assert(cell_size > 0.0); @@ -53,7 +49,7 @@ void HashGrid::resize( logger().trace( "hash-grid resized with a size of {:d}x{:d}x{:d}", grid_size()[0], - grid_size()[1], grid_size().size() == 3 ? grid_size()[2] : 1); + grid_size()[1], grid_size()[2]); } void HashGrid::insert_boxes() @@ -64,7 +60,7 @@ void HashGrid::insert_boxes() } void HashGrid::insert_boxes( - const std::vector& boxes, std::vector& items) const + const AABBs& boxes, std::vector& items) const { tbb::enumerable_thread_specific> storage; @@ -87,20 +83,22 @@ void HashGrid::insert_boxes( void HashGrid::insert_box( const AABB& aabb, const long id, std::vector& items) const { - ArrayMax3i int_min = ((aabb.min - domain_min()) / cell_size()).cast(); + Eigen::Array3i int_min = + ((aabb.min - domain_min()) / cell_size()).cast(); // We can round down to -1, but not less assert((int_min >= -1).all()); assert((int_min <= grid_size()).all()); int_min = int_min.max(0).min(grid_size() - 1); - ArrayMax3i int_max = ((aabb.max - domain_min()) / cell_size()).cast(); + Eigen::Array3i int_max = + ((aabb.max - domain_min()) / cell_size()).cast(); assert((int_max >= -1).all()); assert((int_max <= grid_size()).all()); int_max = int_max.max(0).min(grid_size() - 1); assert((int_min <= int_max).all()); - int min_z = int_min.size() == 3 ? int_min.z() : 0; - int max_z = int_max.size() == 3 ? int_max.z() : 0; + const int min_z = dim == 3 ? int_min.z() : 0; + const int max_z = dim == 3 ? int_max.z() : 0; for (int x = int_min.x(); x <= int_max.x(); ++x) { for (int y = int_min.y(); y <= int_max.y(); ++y) { for (int z = min_z; z <= max_z; ++z) { @@ -114,8 +112,8 @@ template void HashGrid::detect_candidates( const std::vector& items0, const std::vector& items1, - const std::vector& boxes0, - const std::vector& boxes1, + const AABBs& boxes0, + const AABBs& boxes1, const std::function& can_collide, std::vector& candidates) const { @@ -221,7 +219,7 @@ void HashGrid::detect_candidates( template void HashGrid::detect_candidates( const std::vector& items, - const std::vector& boxes, + const AABBs& boxes, const std::function& can_collide, std::vector& candidates) const { diff --git a/src/ipc/broad_phase/hash_grid.hpp b/src/ipc/broad_phase/hash_grid.hpp index f189efe9a..431ad94b7 100644 --- a/src/ipc/broad_phase/hash_grid.hpp +++ b/src/ipc/broad_phase/hash_grid.hpp @@ -74,9 +74,10 @@ class HashGrid : public BroadPhase { std::vector& candidates) const override; double cell_size() const { return m_cell_size; } - const ArrayMax3i& grid_size() const { return m_grid_size; } - const ArrayMax3d& domain_min() const { return m_domain_min; } - const ArrayMax3d& domain_max() const { return m_domain_max; } + // TODO: Update this + const Eigen::Array3i& grid_size() const { return m_grid_size; } + const Eigen::Array3d& domain_min() const { return m_domain_min; } + const Eigen::Array3d& domain_max() const { return m_domain_max; } protected: /// @brief Build the broad phase for collision detection. @@ -88,14 +89,13 @@ class HashGrid : public BroadPhase { Eigen::ConstRef faces) override; void resize( - Eigen::ConstRef domain_min, - Eigen::ConstRef domain_max, + Eigen::ConstRef domain_min, + Eigen::ConstRef domain_max, double cell_size); void insert_boxes(); - void insert_boxes( - const std::vector& boxes, std::vector& items) const; + void insert_boxes(const AABBs& boxes, std::vector& items) const; /// @brief Add an AABB of the extents to the hash grid. void insert_box( @@ -124,8 +124,8 @@ class HashGrid : public BroadPhase { void detect_candidates( const std::vector& items0, const std::vector& items1, - const std::vector& boxes0, - const std::vector& boxes1, + const AABBs& boxes0, + const AABBs& boxes1, const std::function& can_collide, std::vector& candidates) const; @@ -138,15 +138,15 @@ class HashGrid : public BroadPhase { template void detect_candidates( const std::vector& items, - const std::vector& boxes, + const AABBs& boxes, const std::function& can_collide, std::vector& candidates) const; protected: double m_cell_size = -1; - ArrayMax3i m_grid_size; - ArrayMax3d m_domain_min; - ArrayMax3d m_domain_max; + Eigen::Array3i m_grid_size; + Eigen::Array3d m_domain_min; + Eigen::Array3d m_domain_max; std::vector vertex_items; std::vector edge_items; diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index 06ff938e9..654f16215 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -22,85 +22,84 @@ namespace { // Helper to safely convert double AABB to float AABB inline void assign_inflated_aabb(const AABB& box, LBVH::Node& node) { - constexpr float inf = std::numeric_limits::infinity(); // Round Min down - node.aabb_min[0] = std::nextafter(float(box.min[0]), -inf); - node.aabb_min[1] = std::nextafter(float(box.min[1]), -inf); - node.aabb_min[2] = box.min.size() == 3 - ? std::nextafter(float(box.min[2]), -inf) - : 0.0f; + node.aabb_min = box.min.unaryExpr([](double val) { + return std::nextafter( + float(val), -std::numeric_limits::infinity()); + }); // Round Max up - node.aabb_max[0] = std::nextafter(float(box.max[0]), inf); - node.aabb_max[1] = std::nextafter(float(box.max[1]), inf); - node.aabb_max[2] = - box.max.size() == 3 ? std::nextafter(float(box.max[2]), inf) : 0.0f; + node.aabb_max = box.max.unaryExpr([](double val) { + return std::nextafter( + float(val), std::numeric_limits::infinity()); + }); } } // namespace -LBVH::LBVH() : BroadPhase(), dim(0) { } - -LBVH::~LBVH() = default; - -// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) -LBVH::Node::Node() : primitive_id(INVALID_ID), is_inner_marker(0) +LBVH::LBVH() : BroadPhase() { static_assert( - sizeof(Node) == 32, + sizeof(LBVH::Node) == 32, "LBVH::Node size must be 32 bytes to fit 2 Nodes in a cache line"); } +LBVH::~LBVH() = default; + void LBVH::build( Eigen::ConstRef edges, Eigen::ConstRef faces) { + IPC_TOOLKIT_PROFILE_BLOCK("LBVH::build"); + BroadPhase::build(edges, faces); // Build edge_boxes and face_boxes if (vertex_boxes.empty()) { return; } - dim = vertex_boxes[0].min.size(); - assert(dim == 2 || dim == 3); // Only 2D and 3D supported + assert(dim == 2 || dim == 3); - mesh_aabb = { Eigen::Array3d::Zero(), Eigen::Array3d::Zero() }; - mesh_aabb.min = to_3D(vertex_boxes[0].min); - mesh_aabb.max = to_3D(vertex_boxes[0].max); - for (const auto& box : vertex_boxes) { - mesh_aabb.min = mesh_aabb.min.min(to_3D(box.min)); - mesh_aabb.max = mesh_aabb.max.max(to_3D(box.max)); - } + compute_mesh_aabb(mesh_aabb.min, mesh_aabb.max); init_bvh(vertex_boxes, vertex_bvh); init_bvh(edge_boxes, edge_bvh); init_bvh(face_boxes, face_bvh); // Copy edge and face vertex ids for access during traversal - edge_vertex_ids.resize(edges.rows()); - for (int i = 0; i < edges.rows(); i++) { - edge_vertex_ids[i][0] = static_cast(edges(i, 0)); - edge_vertex_ids[i][1] = static_cast(edges(i, 1)); - } - face_vertex_ids.resize(faces.rows()); - for (int i = 0; i < faces.rows(); i++) { - face_vertex_ids[i][0] = static_cast(faces(i, 0)); - face_vertex_ids[i][1] = static_cast(faces(i, 1)); - face_vertex_ids[i][2] = static_cast(faces(i, 2)); + { + IPC_TOOLKIT_PROFILE_BLOCK("copy_vertex_ids"); + + edge_vertex_ids.resize(edges.rows()); + tbb::parallel_for( + tbb::blocked_range(0, edges.rows()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i < r.end(); ++i) { + edge_vertex_ids[i][0] = static_cast(edges(i, 0)); + edge_vertex_ids[i][1] = static_cast(edges(i, 1)); + } + }); + + face_vertex_ids.resize(faces.rows()); + tbb::parallel_for( + tbb::blocked_range(0, faces.rows()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i < r.end(); ++i) { + face_vertex_ids[i][0] = static_cast(faces(i, 0)); + face_vertex_ids[i][1] = static_cast(faces(i, 1)); + face_vertex_ids[i][2] = static_cast(faces(i, 2)); + } + }); } // Clear parent data to save memory. // These are redundant after building the BVHs. vertex_boxes.clear(); - vertex_boxes.shrink_to_fit(); edge_boxes.clear(); - edge_boxes.shrink_to_fit(); face_boxes.clear(); - face_boxes.shrink_to_fit(); } namespace { - int delta( - const std::vector& sorted_morton_codes, + const LBVH::MortonCodeElements& sorted_morton_codes, int i, uint64_t code_i, int j) @@ -129,7 +128,7 @@ namespace { } void determine_range( - const std::vector& sorted_morton_codes, + const LBVH::MortonCodeElements& sorted_morton_codes, int idx, int& lower, int& upper) @@ -164,7 +163,7 @@ namespace { } int find_split( - const std::vector& sorted_morton_codes, + const LBVH::MortonCodeElements& sorted_morton_codes, int first, int last) { @@ -195,8 +194,7 @@ namespace { } } // namespace -void LBVH::init_bvh( - const std::vector& boxes, std::vector& lbvh) const +void LBVH::init_bvh(const AABBs& boxes, Nodes& lbvh) const { if (boxes.empty()) { return; @@ -204,9 +202,15 @@ void LBVH::init_bvh( IPC_TOOLKIT_PROFILE_BLOCK("LBVH::init_bvh"); - std::vector morton_codes(boxes.size()); + if (lbvh.size() != 2 * boxes.size() - 1) { + IPC_TOOLKIT_PROFILE_BLOCK("resize_bvh"); + lbvh.resize(2 * boxes.size() - 1); + } + + LBVH::MortonCodeElements morton_codes(boxes.size()); { IPC_TOOLKIT_PROFILE_BLOCK("compute_morton_codes"); + const Eigen::Array3d mesh_width = mesh_aabb.max - mesh_aabb.min; tbb::parallel_for( tbb::blocked_range(0, boxes.size()), @@ -214,8 +218,7 @@ void LBVH::init_bvh( for (size_t i = r.begin(); i < r.end(); i++) { const auto& box = boxes[i]; - const Eigen::Array3d center = - to_3D(0.5 * (box.min + box.max)); + const Eigen::Array3d center = 0.5 * (box.min + box.max); const Eigen::Array3d mapped_center = (center - mesh_aabb.min) / mesh_width; @@ -241,10 +244,10 @@ void LBVH::init_bvh( }); } + assert(boxes.size() <= std::numeric_limits::max()); const int LEAF_OFFSET = int(boxes.size()) - 1; - lbvh.resize(2 * boxes.size() - 1); - std::vector construction_infos(2 * boxes.size() - 1); + LBVH::ConstructionInfos construction_infos(lbvh.size()); { IPC_TOOLKIT_PROFILE_BLOCK("build_hierarchy"); tbb::parallel_for( @@ -259,10 +262,11 @@ void LBVH::init_bvh( Node leaf_node; // Create leaf node assign_inflated_aabb(box, leaf_node); leaf_node.primitive_id = morton_codes[i].box_id; + leaf_node.is_inner_marker = 0; lbvh[LEAF_OFFSET + i] = leaf_node; // Store leaf } - if (i < boxes.size() - 1) { + if (i < LEAF_OFFSET) { // Find out which range of objects the node corresponds // to. (This is where the magic happens!) @@ -295,11 +299,17 @@ void LBVH::init_bvh( lbvh[i].right = child_b; construction_infos[child_a].parent = int(i); construction_infos[child_b].parent = int(i); + construction_infos[child_a].visitation_count.store( + 0, std::memory_order_relaxed); + construction_infos[child_b].visitation_count.store( + 0, std::memory_order_relaxed); } - // node 0 is the root + // node 0 is the root and has no parent to set these values if (i == 0) { construction_infos[0].parent = 0; + construction_infos[0].visitation_count.store( + 0, std::memory_order_relaxed); } } }); @@ -382,7 +392,7 @@ namespace { template void traverse_lbvh( const LBVH::Node& query, - const std::vector& lbvh, + const LBVH::Nodes& lbvh, const std::function& can_collide, std::vector& candidates) { @@ -445,7 +455,7 @@ namespace { void traverse_lbvh_simd( const LBVH::Node* queries, const size_t n_queries, - const std::vector& lbvh, + const LBVH::Nodes& lbvh, const std::function& can_collide, std::vector& candidates) { @@ -561,8 +571,8 @@ namespace { bool triangular, bool use_simd = true> void independent_traversal( - const std::vector& source, - const std::vector& target, + const LBVH::Nodes& source, + const LBVH::Nodes& target, const std::function& can_collide, tbb::enumerable_thread_specific>& storage) { @@ -604,8 +614,8 @@ namespace { template void LBVH::detect_candidates( - const std::vector& source, - const std::vector& target, + const Nodes& source, + const Nodes& target, const std::function& can_collide, std::vector& candidates) { diff --git a/src/ipc/broad_phase/lbvh.hpp b/src/ipc/broad_phase/lbvh.hpp index dd20fe474..d04d5558d 100644 --- a/src/ipc/broad_phase/lbvh.hpp +++ b/src/ipc/broad_phase/lbvh.hpp @@ -1,17 +1,19 @@ #pragma once #include +#include #include namespace ipc { /// @brief Linear Bounding Volume Hierarchy (LBVH) broad phase collision detection. +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) class LBVH : public BroadPhase { public: static constexpr index_t INVALID_ID = 0xFFFFFFFF; - struct Node { + struct alignas(32) Node { static constexpr int32_t INVALID_POINTER = 0x0; // do not change /// @brief The min corner of the AABB @@ -47,9 +49,6 @@ class LBVH : public BroadPhase { #pragma GCC diagnostic pop - /// @brief Default constructor - Node(); - /// @brief Check if this node is an inner node. bool is_inner() const { return is_inner_marker; } @@ -77,13 +76,22 @@ class LBVH : public BroadPhase { size_t box_id; ///< Pointer into boxes buffer }; + // Vectors with custom allocator to avoid initialization overhead + using Nodes = std::vector>; + using MortonCodeElements = + std::vector>; + +private: struct ConstructionInfo { /// @brief Parent to the parent - int parent = -1; + int parent; /// @brief Number of threads that arrived - std::atomic visitation_count = 0; + std::atomic visitation_count; }; + using ConstructionInfos = + std::vector>; + public: LBVH(); ~LBVH(); @@ -127,9 +135,9 @@ class LBVH : public BroadPhase { void detect_face_face_candidates( std::vector& candidates) const override; - const std::vector& vertex_nodes() const { return vertex_bvh; } - const std::vector& edge_nodes() const { return edge_bvh; } - const std::vector& face_nodes() const { return face_bvh; } + const Nodes& vertex_nodes() const { return vertex_bvh; } + const Nodes& edge_nodes() const { return edge_bvh; } + const Nodes& face_nodes() const { return face_bvh; } protected: /// @brief Build the broad phase for collision detection. @@ -143,8 +151,7 @@ class LBVH : public BroadPhase { /// @brief Initialize a LBVH from a set of boxes. /// @param[in] boxes Set of boxes to initialize the LBVH with. /// @param[out] bvh The LBVH to initialize. - void - init_bvh(const std::vector& boxes, std::vector& lbvh) const; + void init_bvh(const AABBs& boxes, Nodes& lbvh) const; /// @brief Detect candidate collisions between a LBVH and a sets of boxes. /// @tparam Candidate Type of candidate collision. @@ -159,14 +166,14 @@ class LBVH : public BroadPhase { bool swap_order = false, bool triangular = false> static void detect_candidates( - const std::vector& source, - const std::vector& target, + const Nodes& source, + const Nodes& target, const std::function& can_collide, std::vector& candidates); template static void detect_candidates( - const std::vector& source_and_target, + const Nodes& source_and_target, const std::function& can_collide, std::vector& candidates) { @@ -188,25 +195,31 @@ class LBVH : public BroadPhase { // ------------------------------------------------------------------------- /// @brief BVH containing the vertices. - std::vector vertex_bvh; + Nodes vertex_bvh; /// @brief BVH containing the edges. - std::vector edge_bvh; + Nodes edge_bvh; /// @brief BVH containing the faces. - std::vector face_bvh; + Nodes face_bvh; + +private: + // Vectors with custom allocator to avoid initialization overhead + using EdgeIndices = std::vector< + std::array, + DefaultInitAllocator>>; + using FaceIndices = std::vector< + std::array, + DefaultInitAllocator>>; /// @brief Edge vertices in the original mesh order. - std::vector> edge_vertex_ids; + EdgeIndices edge_vertex_ids; /// @brief Face vertices in the original mesh order. - std::vector> face_vertex_ids; + FaceIndices face_vertex_ids; /// @brief The axis-aligned bounding box of the entire mesh. struct { Eigen::Array3d min; Eigen::Array3d max; } mesh_aabb; - - /// @brief Dimension of the simulation for which the broad phase was built. - uint8_t dim; }; } // namespace ipc \ No newline at end of file diff --git a/src/ipc/broad_phase/spatial_hash.cpp b/src/ipc/broad_phase/spatial_hash.cpp index 861421519..0f4ee5cd6 100644 --- a/src/ipc/broad_phase/spatial_hash.cpp +++ b/src/ipc/broad_phase/spatial_hash.cpp @@ -39,7 +39,7 @@ namespace { void fill_primitive_to_voxels( Eigen::ConstRef min_voxel, Eigen::ConstRef max_voxel, - Eigen::ConstRef voxel_count, + Eigen::ConstRef voxel_count, const int voxel_count_0x1, std::vector& primitive_to_voxels) { @@ -70,6 +70,7 @@ void SpatialHash::build( double voxel_size) { clear(); + dim = static_cast(vertices.cols()); build_vertex_boxes(vertices, vertex_boxes, inflation_radius); build(edges, faces, voxel_size); } @@ -83,20 +84,23 @@ void SpatialHash::build( double voxel_size) { clear(); + dim = static_cast(vertices_t0.cols()); build_vertex_boxes( vertices_t0, vertices_t1, vertex_boxes, inflation_radius); build(edges, faces, voxel_size); } void SpatialHash::build( - const std::vector& _vertex_boxes, + const AABBs& _vertex_boxes, Eigen::ConstRef edges, Eigen::ConstRef faces, + const uint8_t _dim, double voxel_size) { // WARNING: Clear will reset vertex_boxes if this assert is triggered assert(&(this->vertex_boxes) != &_vertex_boxes); clear(); + this->dim = _dim; this->vertex_boxes = _vertex_boxes; build(edges, faces, voxel_size); } @@ -108,7 +112,7 @@ void SpatialHash::build( { const size_t num_vertices = vertex_boxes.size(); assert(num_vertices > 0); - dim = vertex_boxes[0].min.size(); + assert(dim == 2 || dim == 3); BroadPhase::build(edges, faces); @@ -117,17 +121,12 @@ void SpatialHash::build( edges.rows() > 0 ? edge_boxes : vertex_boxes); } - left_bottom_corner = vertex_boxes[0].min; - right_top_corner = vertex_boxes[0].max; - for (const auto& box : vertex_boxes) { - left_bottom_corner = left_bottom_corner.min(box.min); - right_top_corner = right_top_corner.max(box.max); - } + compute_mesh_aabb(left_bottom_corner, right_top_corner); one_div_voxel_size = 1.0 / voxel_size; - const ArrayMax3d range = right_top_corner - left_bottom_corner; - voxel_count = (range * one_div_voxel_size).ceil().template cast(); + const Eigen::Array3d range = right_top_corner - left_bottom_corner; + voxel_count = (range * one_div_voxel_size).ceil().cast(); if (voxel_count.minCoeff() <= 0) { // cast overflow due to huge search direction one_div_voxel_size = 1.0 / (range.maxCoeff() * 1.01); @@ -138,14 +137,12 @@ void SpatialHash::build( // ------------------------------------------------------------------------ // precompute vertex min and max voxel axis indices - std::vector vertex_min_voxel_axis_index( - num_vertices, Eigen::Array3i::Zero()); - std::vector vertex_max_voxel_axis_index( - num_vertices, Eigen::Array3i::Zero()); + std::vector vertex_min_voxel_axis_index(num_vertices); + std::vector vertex_max_voxel_axis_index(num_vertices); tbb::parallel_for(size_t(0), num_vertices, [&](size_t vi) { - vertex_min_voxel_axis_index[vi].head(dim) = + vertex_min_voxel_axis_index[vi] = locate_voxel_axis_index(vertex_boxes[vi].min); - vertex_max_voxel_axis_index[vi].head(dim) = + vertex_max_voxel_axis_index[vi] = locate_voxel_axis_index(vertex_boxes[vi].max); }); @@ -198,8 +195,8 @@ namespace { template void detect_candidates( - const std::vector& boxesA, - const std::vector& boxesB, + const AABBs& boxesA, + const AABBs& boxesB, const std::function&)>& query_A_for_Bs, const std::function& can_collide, std::vector& candidates) @@ -243,7 +240,7 @@ namespace { template void detect_candidates( - const std::vector& boxesA, + const AABBs& boxesA, const std::function&)>& query_A_for_As, const std::function& can_collide, std::vector& candidates) @@ -350,37 +347,36 @@ void SpatialHash::detect_face_face_candidates( // ============================================================================ -int SpatialHash::locate_voxel_index(Eigen::ConstRef p) const +int SpatialHash::locate_voxel_index(Eigen::ConstRef p) const { return voxel_axis_index_to_voxel_index(locate_voxel_axis_index(p)); } -ArrayMax3i -SpatialHash::locate_voxel_axis_index(Eigen::ConstRef p) const +Eigen::Array3i +SpatialHash::locate_voxel_axis_index(Eigen::ConstRef p) const { return ((p.array() - left_bottom_corner) * one_div_voxel_size) .floor() - .template cast(); + .cast(); } void SpatialHash::locate_box_voxel_axis_index( - ArrayMax3d min_corner, // input but will be modified - ArrayMax3d max_corner, // input but will be modified - Eigen::Ref min_index, // output - Eigen::Ref max_index, // output + Eigen::Array3d min_corner, // input but will be modified + Eigen::Array3d max_corner, // input but will be modified + Eigen::Ref min_index, // output + Eigen::Ref max_index, // output const double inflation_radius) const { AABB::conservative_inflation(min_corner, max_corner, inflation_radius); - min_index = locate_voxel_axis_index(min_corner).max(ArrayMax3i::Zero(dim)); + min_index = locate_voxel_axis_index(min_corner).max(0); max_index = locate_voxel_axis_index(max_corner).min(voxel_count - 1); } int SpatialHash::voxel_axis_index_to_voxel_index( - Eigen::ConstRef voxel_axis_index) const + Eigen::ConstRef voxel_axis_index) const { return voxel_axis_index_to_voxel_index( - voxel_axis_index[0], voxel_axis_index[1], - voxel_axis_index.size() >= 3 ? voxel_axis_index[2] : 0); + voxel_axis_index[0], voxel_axis_index[1], voxel_axis_index[2]); } int SpatialHash::voxel_axis_index_to_voxel_index(int ix, int iy, int iz) const diff --git a/src/ipc/broad_phase/spatial_hash.hpp b/src/ipc/broad_phase/spatial_hash.hpp index 7584e6205..6364ce543 100644 --- a/src/ipc/broad_phase/spatial_hash.hpp +++ b/src/ipc/broad_phase/spatial_hash.hpp @@ -62,11 +62,12 @@ class SpatialHash : public BroadPhase { /// @param edges Collision mesh edges /// @param faces Collision mesh faces void build( - const std::vector& _vertex_boxes, + const AABBs& _vertex_boxes, Eigen::ConstRef edges, - Eigen::ConstRef faces) override + Eigen::ConstRef faces, + const uint8_t dim) override { - build(_vertex_boxes, edges, faces, /*voxel_size=*/-1); + build(_vertex_boxes, edges, faces, dim, /*voxel_size=*/-1); } // ------------------------------------------------------------------------ @@ -108,9 +109,10 @@ class SpatialHash : public BroadPhase { /// @param faces Collision mesh faces /// @param voxel_size Size of the voxels used in the spatial hash. void build( - const std::vector& vertex_boxes, + const AABBs& vertex_boxes, Eigen::ConstRef edges, Eigen::ConstRef faces, + const uint8_t dim, double voxel_size); /// @brief Clear any built data. @@ -155,32 +157,33 @@ class SpatialHash : public BroadPhase { Eigen::ConstRef faces, double voxel_size); - int locate_voxel_index(Eigen::ConstRef p) const; + int locate_voxel_index(Eigen::ConstRef p) const; - ArrayMax3i locate_voxel_axis_index(Eigen::ConstRef p) const; + Eigen::Array3i + locate_voxel_axis_index(Eigen::ConstRef p) const; void locate_box_voxel_axis_index( - ArrayMax3d min_corner, - ArrayMax3d max_corner, - Eigen::Ref min_index, - Eigen::Ref max_index, + Eigen::Array3d min_corner, + Eigen::Array3d max_corner, + Eigen::Ref min_index, + Eigen::Ref max_index, const double inflation_radius = 0) const; int voxel_axis_index_to_voxel_index( - Eigen::ConstRef voxel_axis_index) const; + Eigen::ConstRef voxel_axis_index) const; int voxel_axis_index_to_voxel_index(int ix, int iy, int iz) const; // --- Data members ------------------------------------------------------- /// @brief The left bottom corner of the world bounding box. - ArrayMax3d left_bottom_corner; + Eigen::Array3d left_bottom_corner; /// @brief The right top corner of the world bounding box. - ArrayMax3d right_top_corner; + Eigen::Array3d right_top_corner; /// @brief The number of voxels in each dimension. - ArrayMax3i voxel_count; + Eigen::Array3i voxel_count; /// @note Use the Pimpl idiom to hide unordered_map and unordered_set from the public API. struct Impl; @@ -192,9 +195,6 @@ class SpatialHash : public BroadPhase { /// @brief The number of voxels in the first two dimensions. int voxel_count_0x1 = -1; - - /// @brief The dimension of the space (2D or 3D). - int dim = -1; }; } // namespace ipc diff --git a/src/ipc/broad_phase/sweep_and_prune.cpp b/src/ipc/broad_phase/sweep_and_prune.cpp index 23067afe7..eeb78cfb0 100644 --- a/src/ipc/broad_phase/sweep_and_prune.cpp +++ b/src/ipc/broad_phase/sweep_and_prune.cpp @@ -30,6 +30,8 @@ void SweepAndPrune::build( clear(); + dim = vertices.cols(); + scalable_ccd::build_vertex_boxes( vertices, boxes->vertices, inflation_radius); scalable_ccd::build_edge_boxes(boxes->vertices, edges, boxes->edges); @@ -48,6 +50,8 @@ void SweepAndPrune::build( clear(); + dim = vertices_t0.cols(); + scalable_ccd::build_vertex_boxes( vertices_t0, vertices_t1, boxes->vertices, inflation_radius); scalable_ccd::build_edge_boxes(boxes->vertices, edges, boxes->edges); @@ -55,15 +59,18 @@ void SweepAndPrune::build( } void SweepAndPrune::build( - const std::vector& p_vertex_boxes, + const AABBs& p_vertex_boxes, Eigen::ConstRef edges, - Eigen::ConstRef faces) + Eigen::ConstRef faces, + const uint8_t p_dim) { assert(edges.size() == 0 || edges.cols() == 2); assert(faces.size() == 0 || faces.cols() == 3); clear(); + dim = p_dim; + // Convert from ipc::AABB to scalable_ccd::AABB (additional element_id) boxes->vertices.resize(p_vertex_boxes.size()); for (int i = 0; i < p_vertex_boxes.size(); ++i) { diff --git a/src/ipc/broad_phase/sweep_and_prune.hpp b/src/ipc/broad_phase/sweep_and_prune.hpp index 78717937e..37f0abb73 100644 --- a/src/ipc/broad_phase/sweep_and_prune.hpp +++ b/src/ipc/broad_phase/sweep_and_prune.hpp @@ -45,9 +45,10 @@ class SweepAndPrune : public BroadPhase { /// @param edges Collision mesh edges /// @param faces Collision mesh faces void build( - const std::vector& vertex_boxes, + const AABBs& vertex_boxes, Eigen::ConstRef edges, - Eigen::ConstRef faces) override; + Eigen::ConstRef faces, + const uint8_t dim) override; /// @brief Clear any built data. void clear() override; diff --git a/src/ipc/broad_phase/sweep_and_tiniest_queue.cu b/src/ipc/broad_phase/sweep_and_tiniest_queue.cu index a2cf67bfe..485b2e9f7 100644 --- a/src/ipc/broad_phase/sweep_and_tiniest_queue.cu +++ b/src/ipc/broad_phase/sweep_and_tiniest_queue.cu @@ -67,7 +67,7 @@ void SweepAndTiniestQueue::build( } void SweepAndTiniestQueue::build( - const std::vector& vertex_boxes, + const AABBs& vertex_boxes, Eigen::ConstRef edges, Eigen::ConstRef faces) { @@ -81,13 +81,11 @@ void SweepAndTiniestQueue::build( for (int i = 0; i < vertex_boxes.size(); ++i) { boxes->vertices[i].min.x = vertex_boxes[i].min.x(); boxes->vertices[i].min.y = vertex_boxes[i].min.y(); - boxes->vertices[i].min.z = - vertex_boxes[i].min.size() > 2 ? vertex_boxes[i].min.z() : 0; + boxes->vertices[i].min.z = vertex_boxes[i].min.z(); boxes->vertices[i].max.x = vertex_boxes[i].max.x(); boxes->vertices[i].max.y = vertex_boxes[i].max.y(); - boxes->vertices[i].max.z = - vertex_boxes[i].max.size() > 2 ? vertex_boxes[i].max.z() : 0; + boxes->vertices[i].max.z = vertex_boxes[i].max.z(); // If vertex id == -1 it means this slot is not used. // But Scalable CCD does not have this kind of special value so we map diff --git a/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp b/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp index b0a64567d..ac02861cf 100644 --- a/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp +++ b/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp @@ -49,7 +49,7 @@ class SweepAndTiniestQueue : public BroadPhase { /// @param edges Collision mesh edges /// @param faces Collision mesh faces void build( - const std::vector& vertex_boxes, + const AABBs& vertex_boxes, Eigen::ConstRef edges, Eigen::ConstRef faces) override; diff --git a/src/ipc/broad_phase/voxel_size_heuristic.cpp b/src/ipc/broad_phase/voxel_size_heuristic.cpp index 929644c4d..f7f01e3c1 100644 --- a/src/ipc/broad_phase/voxel_size_heuristic.cpp +++ b/src/ipc/broad_phase/voxel_size_heuristic.cpp @@ -77,7 +77,7 @@ double suggest_good_voxel_size( return voxel_size; } -double suggest_good_voxel_size(const std::vector& boxes) +double suggest_good_voxel_size(const AABBs& boxes) { assert(!boxes.empty()); diff --git a/src/ipc/broad_phase/voxel_size_heuristic.hpp b/src/ipc/broad_phase/voxel_size_heuristic.hpp index 5bb3f1016..5a4ee111c 100644 --- a/src/ipc/broad_phase/voxel_size_heuristic.hpp +++ b/src/ipc/broad_phase/voxel_size_heuristic.hpp @@ -29,7 +29,7 @@ double suggest_good_voxel_size( /// @brief Suggest a good voxel size for the given mesh. /// @param boxes The axis-aligned bounding boxes of the mesh. /// @return The suggested voxel size. -double suggest_good_voxel_size(const std::vector& boxes); +double suggest_good_voxel_size(const AABBs& boxes); /// @brief Compute the average edge length of a mesh. /// @param vertices_t0 The vertex positions of the mesh at time t0. diff --git a/src/ipc/candidates/candidates.cpp b/src/ipc/candidates/candidates.cpp index 934b1f7c3..f12028d84 100644 --- a/src/ipc/candidates/candidates.cpp +++ b/src/ipc/candidates/candidates.cpp @@ -56,7 +56,7 @@ void Candidates::build( broad_phase->can_vertices_collide = mesh.can_collide; broad_phase->build(vertices, mesh.edges(), mesh.faces(), inflation_radius); - broad_phase->detect_collision_candidates(dim, *this); + broad_phase->detect_collision_candidates(*this); // Codim. vertices to codim. vertices: if (mesh.num_codim_vertices()) { @@ -133,7 +133,7 @@ void Candidates::build( broad_phase->can_vertices_collide = mesh.can_collide; broad_phase->build( vertices_t0, vertices_t1, mesh.edges(), mesh.faces(), inflation_radius); - broad_phase->detect_collision_candidates(dim, *this); + broad_phase->detect_collision_candidates(*this); // Codim. vertices to codim. vertices: if (mesh.num_codim_vertices()) { diff --git a/src/ipc/utils/CMakeLists.txt b/src/ipc/utils/CMakeLists.txt index 012a27df5..b6b6d9d63 100644 --- a/src/ipc/utils/CMakeLists.txt +++ b/src/ipc/utils/CMakeLists.txt @@ -1,4 +1,6 @@ set(SOURCES + autodiff_types.hpp + default_init_allocator.hpp eigen_ext.hpp eigen_ext.tpp local_to_global.hpp diff --git a/src/ipc/utils/default_init_allocator.hpp b/src/ipc/utils/default_init_allocator.hpp new file mode 100644 index 000000000..e00c8201b --- /dev/null +++ b/src/ipc/utils/default_init_allocator.hpp @@ -0,0 +1,41 @@ +#pragma once + +#include + +namespace ipc { + +// A wrapper that performs no initialization on construction +template > +class DefaultInitAllocator : public A { + typedef std::allocator_traits a_t; + +public: + // NOLINTNEXTLINE(readability-identifier-naming) + template struct rebind { + using other = + DefaultInitAllocator>; + }; + using A::A; + + // The magic happens here: do nothing when constructing a U + template + void + construct(U* ptr) noexcept(std::is_nothrow_default_constructible::value) + { + ::new (static_cast(ptr)) U; // Default construction + } + + // Override construct to do nothing for trivial types + template void construct(U* ptr, const U& val) + { + a_t::construct(static_cast(*this), ptr, val); + } + template + void construct(U* ptr, Args&&... args) + { + a_t::construct( + static_cast(*this), ptr, std::forward(args)...); + } +}; + +} // namespace ipc \ No newline at end of file diff --git a/tests/src/tests/broad_phase/test_broad_phase.cpp b/tests/src/tests/broad_phase/test_broad_phase.cpp index 0c0b71ac3..37ab71545 100644 --- a/tests/src/tests/broad_phase/test_broad_phase.cpp +++ b/tests/src/tests/broad_phase/test_broad_phase.cpp @@ -274,7 +274,7 @@ TEST_CASE("Broad phase build from boxes", "[broad_phase]") { using namespace ipc; - std::vector boxes(100); + AABBs boxes(100); for (int i = 0; i < boxes.size(); ++i) { boxes[i].min = Eigen::Array3d(i * 0.6, 0, 0); boxes[i].max = Eigen::Array3d(boxes[i].min.x() + 1.0, 0, 0); @@ -284,7 +284,7 @@ TEST_CASE("Broad phase build from boxes", "[broad_phase]") REQUIRE(!boxes[0].intersects(boxes[2])); const auto broad_phase = GENERATE(tests::BroadPhaseGenerator::create()); - broad_phase->build(boxes, Eigen::MatrixXi(), Eigen::MatrixXi()); + broad_phase->build(boxes, Eigen::MatrixXi(), Eigen::MatrixXi(), /*dim=*/3); std::vector candidates; broad_phase->detect_vertex_vertex_candidates(candidates); diff --git a/tests/src/tests/broad_phase/test_lbvh.cpp b/tests/src/tests/broad_phase/test_lbvh.cpp index 128fa3d00..0625a8aab 100644 --- a/tests/src/tests/broad_phase/test_lbvh.cpp +++ b/tests/src/tests/broad_phase/test_lbvh.cpp @@ -31,7 +31,7 @@ bool is_aabb_union(LBVH::Node parent, LBVH::Node childA, LBVH::Node childB) } void traverse_lbvh( - const std::vector& lbvh_nodes, + const LBVH::Nodes& lbvh_nodes, const uint32_t index, std::vector& visited) { @@ -66,7 +66,7 @@ void traverse_lbvh( } } -void check_valid_lbvh_nodes(const std::vector& lbvh_nodes) +void check_valid_lbvh_nodes(const LBVH::Nodes& lbvh_nodes) { std::vector visited(lbvh_nodes.size(), false); traverse_lbvh(lbvh_nodes, 0, visited); @@ -110,8 +110,8 @@ TEST_CASE("LBVH::build", "[broad_phase][lbvh]") // -- Check the LBVH nodes are all reachable and contain their children --- check_valid_lbvh_nodes(lbvh->vertex_nodes()); - // check_valid_lbvh_nodes(lbvh->edge_nodes()); - // check_valid_lbvh_nodes(lbvh->face_nodes()); + check_valid_lbvh_nodes(lbvh->edge_nodes()); + check_valid_lbvh_nodes(lbvh->face_nodes()); // -- Check clear() works ------------------------------------------------- lbvh->clear(); diff --git a/tests/src/tests/broad_phase/test_spatial_hash.cpp b/tests/src/tests/broad_phase/test_spatial_hash.cpp index 359bddc83..5064fc159 100644 --- a/tests/src/tests/broad_phase/test_spatial_hash.cpp +++ b/tests/src/tests/broad_phase/test_spatial_hash.cpp @@ -25,7 +25,7 @@ TEST_CASE("Build SpatialHash", "[broad_phase][spatial_hash][build]") sh.build(V0, V1, E, F, inflation_radius); Candidates candidates; - sh.detect_collision_candidates(V0.cols(), candidates); + sh.detect_collision_candidates(candidates); CHECK(candidates.size() == 6'852'873); CHECK(candidates.vv_candidates.size() == 0); diff --git a/tests/src/tests/broad_phase/test_stq.cpp b/tests/src/tests/broad_phase/test_stq.cpp index 177ce4d94..3dbf9c3a9 100644 --- a/tests/src/tests/broad_phase/test_stq.cpp +++ b/tests/src/tests/broad_phase/test_stq.cpp @@ -36,7 +36,7 @@ TEST_CASE("STQ All Cases", "[broad_phase][stq][cuda]") broad_phase->build(V0, V1, E, F, inflation_radius); Candidates candidates; - broad_phase->detect_collision_candidates(V0.cols(), candidates); + broad_phase->detect_collision_candidates(candidates); CHECK(candidates.size() == 6'852'873); CHECK(candidates.vv_candidates.size() == 0); diff --git a/tests/src/tests/candidates/test_normals.cpp b/tests/src/tests/candidates/test_normals.cpp index 6fdb32e60..9fca19f64 100644 --- a/tests/src/tests/candidates/test_normals.cpp +++ b/tests/src/tests/candidates/test_normals.cpp @@ -107,7 +107,7 @@ TEST_CASE("Point-line normal hessian", "[pl][normal]") VectorMax3d e1(DIM); Eigen::VectorXd x(3 * DIM); - const int case_i = GENERATE(range(0, 10)); + const int _ = GENERATE(range(0, 10)); p.setRandom(); e0.setRandom(); e1.setRandom(); From 9a61fc4b1e80993fc864ff4cbb83e259436c0a56 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Thu, 22 Jan 2026 15:53:56 -0500 Subject: [PATCH 20/25] Fix edges in examples/profiler.py --- python/examples/profiler.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/examples/profiler.py b/python/examples/profiler.py index 0a28b2bb8..2035c5b50 100644 --- a/python/examples/profiler.py +++ b/python/examples/profiler.py @@ -58,11 +58,14 @@ def plot_profiler(title=None): mesh = meshio.read(args.mesh) faces = mesh.cells_dict["triangle"] - edges = ipctk.edges(faces) + # edges = ipctk.edges(faces) # indices = np.lexsort((edges[:, 1], edges[:, 0])) - # sorted_edges = edges[indices] + # edges = edges[indices] + + # indices = np.lexsort((faces[:, 2], faces[:, 1], faces[:, 0])) + # faces = faces[indices] lbvh = ipctk.LBVH() - lbvh.build(mesh.points, sorted_edges, faces) + lbvh.build(mesh.points, edges, faces) plot_profiler(title=args.mesh.parent.name + "/" + args.mesh.name) From 5f2cd238382064675132f1b2b4b97e5e78314698 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Thu, 22 Jan 2026 16:07:36 -0500 Subject: [PATCH 21/25] Fix GH Action errors --- docs/source/about/dependencies.rst | 6 ++++++ python/src/utils/profiler.cpp | 6 ++++-- src/ipc/broad_phase/aabb.cpp | 1 + src/ipc/broad_phase/aabb.hpp | 3 +++ src/ipc/broad_phase/broad_phase.hpp | 1 + src/ipc/broad_phase/lbvh.hpp | 1 + src/ipc/broad_phase/sweep_and_tiniest_queue.cu | 9 ++++++++- src/ipc/broad_phase/sweep_and_tiniest_queue.hpp | 4 +++- 8 files changed, 27 insertions(+), 4 deletions(-) diff --git a/docs/source/about/dependencies.rst b/docs/source/about/dependencies.rst index 62edeae6d..224761e0b 100644 --- a/docs/source/about/dependencies.rst +++ b/docs/source/about/dependencies.rst @@ -97,6 +97,12 @@ Additionally, IPC Toolkit may optionally use the following libraries: - `github.com/nlohmann/json `_ - |:white_large_square:| - ``IPC_TOOLKIT_WITH_PROFILER`` + * - pybind11_json + - JSON support for pybind11 (used in profiler) + - MIT + - `github.com/pybind/pybind11_json `_ + - |:white_large_square:| + - ``IPC_TOOLKIT_BUILD_PYTHON`` and ``IPC_TOOLKIT_WITH_PROFILER`` * - rational-cpp - Rational arithmetic used for exact intersection checks (requires `GMP `_ to be installed at a system level) - MIT diff --git a/python/src/utils/profiler.cpp b/python/src/utils/profiler.cpp index adc63993b..7e191027a 100644 --- a/python/src/utils/profiler.cpp +++ b/python/src/utils/profiler.cpp @@ -1,9 +1,11 @@ #include -#include - #include +#ifdef IPC_TOOLKIT_WITH_PROFILER +#include +#endif + using namespace ipc; void define_profiler(py::module_& m) diff --git a/src/ipc/broad_phase/aabb.cpp b/src/ipc/broad_phase/aabb.cpp index 8a52f75dc..cbfc5f6db 100644 --- a/src/ipc/broad_phase/aabb.cpp +++ b/src/ipc/broad_phase/aabb.cpp @@ -9,6 +9,7 @@ namespace ipc { +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) AABB::AABB(Eigen::ConstRef _min, Eigen::ConstRef _max) : min(Eigen::Array3d::Zero()) , max(Eigen::Array3d::Zero()) diff --git a/src/ipc/broad_phase/aabb.hpp b/src/ipc/broad_phase/aabb.hpp index ddcf65b60..d7d53d3f8 100644 --- a/src/ipc/broad_phase/aabb.hpp +++ b/src/ipc/broad_phase/aabb.hpp @@ -12,15 +12,18 @@ namespace ipc { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) class alignas(64) AABB { public: + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) AABB() = default; AABB(Eigen::ConstRef min, Eigen::ConstRef max); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) AABB(const AABB& aabb1, const AABB& aabb2) : AABB(aabb1.min.min(aabb2.min), aabb1.max.max(aabb2.max)) { } + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) AABB(const AABB& aabb1, const AABB& aabb2, const AABB& aabb3) : AABB( aabb1.min.min(aabb2.min).min(aabb3.min), diff --git a/src/ipc/broad_phase/broad_phase.hpp b/src/ipc/broad_phase/broad_phase.hpp index 23c94a7f1..3a13cbadb 100644 --- a/src/ipc/broad_phase/broad_phase.hpp +++ b/src/ipc/broad_phase/broad_phase.hpp @@ -52,6 +52,7 @@ class BroadPhase { /// @param vertex_boxes AABBs for the vertices. /// @param edges Collision mesh edges /// @param faces Collision mesh faces + /// @param dim Dimension of the simulation (2D or 3D) virtual void build( const AABBs& vertex_boxes, Eigen::ConstRef edges, diff --git a/src/ipc/broad_phase/lbvh.hpp b/src/ipc/broad_phase/lbvh.hpp index d04d5558d..97c27f7fb 100644 --- a/src/ipc/broad_phase/lbvh.hpp +++ b/src/ipc/broad_phase/lbvh.hpp @@ -13,6 +13,7 @@ class LBVH : public BroadPhase { public: static constexpr index_t INVALID_ID = 0xFFFFFFFF; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) struct alignas(32) Node { static constexpr int32_t INVALID_POINTER = 0x0; // do not change diff --git a/src/ipc/broad_phase/sweep_and_tiniest_queue.cu b/src/ipc/broad_phase/sweep_and_tiniest_queue.cu index 485b2e9f7..bf68f3d93 100644 --- a/src/ipc/broad_phase/sweep_and_tiniest_queue.cu +++ b/src/ipc/broad_phase/sweep_and_tiniest_queue.cu @@ -33,6 +33,8 @@ void SweepAndTiniestQueue::build( clear(); + dim = _vertices.cols(); + // Make sure the vertices are 3D const Eigen::MatrixXd vertices = to_X3d(_vertices); @@ -56,6 +58,8 @@ void SweepAndTiniestQueue::build( clear(); + dim = _vertices_t0.cols(); + // Mutable copies of the vertices const Eigen::MatrixXd vertices_t0 = to_X3d(_vertices_t0); const Eigen::MatrixXd vertices_t1 = to_X3d(_vertices_t1); @@ -69,13 +73,16 @@ void SweepAndTiniestQueue::build( void SweepAndTiniestQueue::build( const AABBs& vertex_boxes, Eigen::ConstRef edges, - Eigen::ConstRef faces) + Eigen::ConstRef faces, + const uint8_t _dim) { assert(edges.size() == 0 || edges.cols() == 2); assert(faces.size() == 0 || faces.cols() == 3); clear(); + dim = _dim; + // Convert from ipc::AABB to scalable_ccd::cuda::AABB boxes->vertices.resize(vertex_boxes.size()); for (int i = 0; i < vertex_boxes.size(); ++i) { diff --git a/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp b/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp index ac02861cf..b137d2609 100644 --- a/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp +++ b/src/ipc/broad_phase/sweep_and_tiniest_queue.hpp @@ -48,10 +48,12 @@ class SweepAndTiniestQueue : public BroadPhase { /// @param vertex_boxes Precomputed vertex AABBs /// @param edges Collision mesh edges /// @param faces Collision mesh faces + /// @param dim Dimension of the simulation (2D or 3D) void build( const AABBs& vertex_boxes, Eigen::ConstRef edges, - Eigen::ConstRef faces) override; + Eigen::ConstRef faces, + const uint8_t dim) override; /// @brief Clear any built data. void clear() override; From ce61a186e0e448e3d928618edfdbdd918367820a Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Thu, 22 Jan 2026 17:45:14 -0500 Subject: [PATCH 22/25] Update LBVH documentation --- docs/source/cpp-api/broad_phase.rst | 6 +++++ docs/source/python-api/broad_phase.rst | 7 +++++ docs/source/references.bib | 30 ++++++++++++++++++++++ docs/source/tutorials/getting_started.rst | 3 +-- src/ipc/broad_phase/create_broad_phase.cpp | 3 +++ src/ipc/broad_phase/create_broad_phase.hpp | 1 + 6 files changed, 48 insertions(+), 2 deletions(-) diff --git a/docs/source/cpp-api/broad_phase.rst b/docs/source/cpp-api/broad_phase.rst index 1c4962bda..766f08116 100644 --- a/docs/source/cpp-api/broad_phase.rst +++ b/docs/source/cpp-api/broad_phase.rst @@ -31,6 +31,12 @@ BVH .. doxygenclass:: ipc::BVH :allow-dot-graphs: +LBVH +--- + +.. doxygenclass:: ipc::LBVH + :allow-dot-graphs: + Sweep and Prune ----------------------- diff --git a/docs/source/python-api/broad_phase.rst b/docs/source/python-api/broad_phase.rst index d395856c8..807387e2b 100644 --- a/docs/source/python-api/broad_phase.rst +++ b/docs/source/python-api/broad_phase.rst @@ -36,6 +36,13 @@ BVH .. autoclasstoc:: +LBVH +---- + +.. autoclass:: ipctk.LBVH + + .. autoclasstoc:: + Sweep and Prune --------------- diff --git a/docs/source/references.bib b/docs/source/references.bib index 892c6c948..4d2245bf0 100644 --- a/docs/source/references.bib +++ b/docs/source/references.bib @@ -128,3 +128,33 @@ @article{Huang2025GCP number = 4, note = {\url{https://doi.org/10.1145/3731142}} } +@inproceedings{Karras2012HPG, + author = {Karras, Tero}, + title = {Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d Trees}, + booktitle = {Proceedings of the Fourth Eurographics Conference on High-Performance Graphics}, + series = {HPG '12}, + year = {2012}, + pages = {33--37}, + publisher = {Eurographics Association}, + address = {Goslar, Germany}, + note = {\url{https://doi.org/10.2312/EGGH/HPG12/033-037}} +} +@phdthesis{Baraff1992PhD, + author = {Baraff, David}, + title = {Dynamic Simulation of Non-Penetrating Rigid Bodies}, + school = {Cornell University}, + address = {Ithaca, NY, USA}, + year = {1992}, + note = {Technical Report TR 92-1275} +} +@inproceedings{Cohen1995ICOLLIDE, + author = {Cohen, Jonathan D. and Lin, Ming C. and Manocha, Dinesh and Ponamgi, Madhav}, + title = {I-COLLIDE: An Interactive and Exact Collision Detection System for Large-Scale Environments}, + booktitle = {Proceedings of the 1995 Symposium on Interactive 3D Graphics}, + series = {I3D '95}, + year = {1995}, + pages = {189--196}, + publisher = {ACM}, + address = {New York, NY, USA}, + note = {\url{https://doi.org/10.1145/199404.199437}} +} \ No newline at end of file diff --git a/docs/source/tutorials/getting_started.rst b/docs/source/tutorials/getting_started.rst index 1b30d11c2..90a41b8dd 100644 --- a/docs/source/tutorials/getting_started.rst +++ b/docs/source/tutorials/getting_started.rst @@ -531,8 +531,7 @@ The ``Candidates`` class represents the culled set of candidate pairs and is bui candidates.build( mesh, vertices_t0, vertices_t1, broad_phase=ipctk.HashGrid()) -Possible values for ``broad_phase`` are: ``BruteForce`` (parallel brute force culling), ``HashGrid`` (default), ``SpatialHash`` (implementation from the original IPC codebase), -``BVH`` (`SimpleBVH `_), ``SweepAndPrune`` (method of :cite:t:`Belgrod2023Time`), or ``SweepAndTiniestQueue`` (requires CUDA). +Possible values for ``broad_phase`` are: ``BruteForce`` (parallel brute force culling), ``HashGrid`` (default), ``SpatialHash`` (implementation from the original IPC codebase), ``BVH`` (`SimpleBVH `_), ``LBVH`` (CPU implementation of :cite:t:`Karras2012HPG` using TBB), ``SweepAndPrune`` (a.k.a. Sort-and-Sweep from :cite:t:`Baraff1992PhD`), or ``SweepAndTiniestQueue`` (method of :cite:t:`Belgrod2023Time`; requires CUDA). Narrow-Phase ^^^^^^^^^^^^ diff --git a/src/ipc/broad_phase/create_broad_phase.cpp b/src/ipc/broad_phase/create_broad_phase.cpp index 1eecb5ffa..938be137e 100644 --- a/src/ipc/broad_phase/create_broad_phase.cpp +++ b/src/ipc/broad_phase/create_broad_phase.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,8 @@ create_broad_phase(const BroadPhaseMethod& broad_phase_method) return std::make_shared(); case BroadPhaseMethod::BVH: return std::make_shared(); + case BroadPhaseMethod::LBVH: + return std::make_shared(); case BroadPhaseMethod::SWEEP_AND_PRUNE: return std::make_shared(); case BroadPhaseMethod::SWEEP_AND_TINIEST_QUEUE: diff --git a/src/ipc/broad_phase/create_broad_phase.hpp b/src/ipc/broad_phase/create_broad_phase.hpp index 066249eb8..b7bae916e 100644 --- a/src/ipc/broad_phase/create_broad_phase.hpp +++ b/src/ipc/broad_phase/create_broad_phase.hpp @@ -10,6 +10,7 @@ enum class BroadPhaseMethod : uint8_t { HASH_GRID, SPATIAL_HASH, BVH, + LBVH, SWEEP_AND_PRUNE, SWEEP_AND_TINIEST_QUEUE }; From 213467a2f3822b2bec29842ef4e487021ae15533 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Thu, 22 Jan 2026 20:44:31 -0500 Subject: [PATCH 23/25] Update profile.py --- python/examples/profiler.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/python/examples/profiler.py b/python/examples/profiler.py index 2035c5b50..6d44b169b 100644 --- a/python/examples/profiler.py +++ b/python/examples/profiler.py @@ -53,19 +53,29 @@ def plot_profiler(title=None): "--mesh", type=pathlib.Path, default=(pathlib.Path(__file__).parents[2] / "tests/data/puffer-ball/20.ply")) + parser.add_argument( + "--method", + type=str, + default="lvbh") args = parser.parse_args() mesh = meshio.read(args.mesh) faces = mesh.cells_dict["triangle"] - # edges = ipctk.edges(faces) + edges = ipctk.edges(faces) # indices = np.lexsort((edges[:, 1], edges[:, 0])) # edges = edges[indices] # indices = np.lexsort((faces[:, 2], faces[:, 1], faces[:, 0])) # faces = faces[indices] - lbvh = ipctk.LBVH() - lbvh.build(mesh.points, edges, faces) + if args.method.lower() == "bvh": + bp = ipctk.BVH() + elif args.method.lower() == "lbvh": + bp = ipctk.LBVH() + + bp.build(mesh.points, edges, faces) + + candidates = bp.detect_collision_candidates() plot_profiler(title=args.mesh.parent.name + "/" + args.mesh.name) From 6872f9532048346cf8aa77d1f004f647fe6a93f8 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Fri, 23 Jan 2026 16:12:16 -0500 Subject: [PATCH 24/25] Update benchmark test case descriptions --- src/ipc/broad_phase/spatial_hash.hpp | 4 +- .../broad_phase/benchmark_broad_phase.cpp | 57 ++++++++++++++----- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/src/ipc/broad_phase/spatial_hash.hpp b/src/ipc/broad_phase/spatial_hash.hpp index 6364ce543..9e52dfa94 100644 --- a/src/ipc/broad_phase/spatial_hash.hpp +++ b/src/ipc/broad_phase/spatial_hash.hpp @@ -65,9 +65,9 @@ class SpatialHash : public BroadPhase { const AABBs& _vertex_boxes, Eigen::ConstRef edges, Eigen::ConstRef faces, - const uint8_t dim) override + const uint8_t _dim) override { - build(_vertex_boxes, edges, faces, dim, /*voxel_size=*/-1); + build(_vertex_boxes, edges, faces, _dim, /*voxel_size=*/-1); } // ------------------------------------------------------------------------ diff --git a/tests/src/tests/broad_phase/benchmark_broad_phase.cpp b/tests/src/tests/broad_phase/benchmark_broad_phase.cpp index e04491a64..49260d6ec 100644 --- a/tests/src/tests/broad_phase/benchmark_broad_phase.cpp +++ b/tests/src/tests/broad_phase/benchmark_broad_phase.cpp @@ -12,7 +12,7 @@ using namespace ipc; -TEST_CASE("Benchmark broad phase", "[!benchmark][broad_phase]") +TEST_CASE("Benchmark broad phase on simple scenes", "[!benchmark][broad_phase]") { Eigen::MatrixXd V0, V1; Eigen::MatrixXi E, F; @@ -91,38 +91,61 @@ TEST_CASE("Benchmark broad phase", "[!benchmark][broad_phase]") } } -TEST_CASE( - "Benchmark broad phase on real data", - "[!benchmark][broad_phase][real_data]") +TEST_CASE("Benchmark broad phase on real scenes", "[!benchmark][broad_phase]") { Eigen::MatrixXd V0, V1; Eigen::MatrixXi E, F; std::string mesh_name_t0, mesh_name_t1; - SECTION("Data 0") + SECTION("Two cubes") { - mesh_name_t0 = "private/slow-broadphase-ccd/0.ply"; - mesh_name_t1 = "private/slow-broadphase-ccd/1.ply"; - } - SECTION("Data 1") - { - mesh_name_t0 = "private/slow-broadphase-ccd/s0.ply"; - mesh_name_t1 = "private/slow-broadphase-ccd/s1.ply"; + mesh_name_t0 = "two-cubes-far.ply"; + mesh_name_t1 = "two-cubes-intersecting.ply"; } SECTION("Cloth-Ball") { mesh_name_t0 = "cloth_ball92.ply"; mesh_name_t1 = "cloth_ball93.ply"; } + SECTION("Armadillo-Rollers") + { + mesh_name_t0 = "armadillo-rollers/326.ply"; + mesh_name_t1 = "armadillo-rollers/327.ply"; + } + SECTION("Cloth-Funnel") + { + mesh_name_t0 = "cloth-funnel/227.ply"; + mesh_name_t1 = "cloth-funnel/228.ply"; + } + SECTION("N-Body-Simulation") + { + mesh_name_t0 = "n-body-simulation/balls16_18.ply"; + mesh_name_t1 = "n-body-simulation/balls16_19.ply"; + } + SECTION("Rod-Twist") + { + mesh_name_t0 = "rod-twist/3036.ply"; + mesh_name_t1 = "rod-twist/3037.ply"; + } SECTION("Puffer-Ball") { mesh_name_t0 = "puffer-ball/20.ply"; mesh_name_t1 = "puffer-ball/21.ply"; } + SECTION("Data 0") + { + mesh_name_t0 = "private/slow-broadphase-ccd/0.ply"; + mesh_name_t1 = "private/slow-broadphase-ccd/1.ply"; + } + SECTION("Data 1") + { + mesh_name_t0 = "private/slow-broadphase-ccd/s0.ply"; + mesh_name_t1 = "private/slow-broadphase-ccd/s1.ply"; + } if (!tests::load_mesh(mesh_name_t0, V0, E, F) || !tests::load_mesh(mesh_name_t1, V1, E, F)) { - SKIP("Slow broadphase CCD meshes are private"); + SKIP("Some broadphase CCD meshes are private. Skipping benchmark."); } double inflation_radius = 0; // GENERATE(take(5, random(0.0, 0.1))); @@ -134,12 +157,18 @@ TEST_CASE( const auto broad_phases = tests::broad_phases(); + fmt::print( + "Benchmarking broad phase on meshes: {} -> {}\n", mesh_name_t0, + mesh_name_t1); + fmt::print("Number of vertices: {}\n", V0.rows()); + fmt::print("Number of edges: {}\n", E.rows()); + fmt::print("Number of faces: {}\n", F.rows()); for (const auto& broad_phase : broad_phases) { if (broad_phase->name() == "BruteForce") { continue; } - BENCHMARK(fmt::format("BP Real Data ({})", broad_phase->name())) + BENCHMARK(fmt::format("{}", broad_phase->name())) { Candidates candidates; candidates.build(mesh, V0, V1, inflation_radius, broad_phase.get()); From 91ff45761e89b79bca4f88eed050bd73f74ebcbf Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Sun, 25 Jan 2026 21:02:17 -0500 Subject: [PATCH 25/25] Deprecate BVH and optimize LBVH and merges Mark BVH enum/class as deprecated in favor of LBVH. Adjust LBVH parallel traversal batching: reduce SIMD grain size on Apple, fix task range and indexing to correctly handle SIMD tails and avoid out-of-range leaf accesses. Rewrite merge_thread_local helpers to allow stealing and clearing per-thread buffers, add a fast memcpy path for trivially-copyable types, move remaining items for non-trivial types, and add profiling hooks. --- src/ipc/broad_phase/bvh.hpp | 2 +- src/ipc/broad_phase/create_broad_phase.hpp | 2 +- src/ipc/broad_phase/lbvh.cpp | 25 +++-- src/ipc/utils/merge_thread_local.hpp | 124 +++++++++++++++++---- 4 files changed, 123 insertions(+), 30 deletions(-) diff --git a/src/ipc/broad_phase/bvh.hpp b/src/ipc/broad_phase/bvh.hpp index 118ff83a9..4afa77388 100644 --- a/src/ipc/broad_phase/bvh.hpp +++ b/src/ipc/broad_phase/bvh.hpp @@ -11,7 +11,7 @@ class BVH; namespace ipc { /// @brief Bounding Volume Hierarchy (BVH) broad phase collision detection. -class BVH : public BroadPhase { +class [[deprecated("Use LBVH instead.")]] BVH : public BroadPhase { public: BVH(); ~BVH(); diff --git a/src/ipc/broad_phase/create_broad_phase.hpp b/src/ipc/broad_phase/create_broad_phase.hpp index b7bae916e..e68d34964 100644 --- a/src/ipc/broad_phase/create_broad_phase.hpp +++ b/src/ipc/broad_phase/create_broad_phase.hpp @@ -9,7 +9,7 @@ enum class BroadPhaseMethod : uint8_t { BRUTE_FORCE, HASH_GRID, SPATIAL_HASH, - BVH, + BVH [[deprecated("Use LBVH instead.")]], LBVH, SWEEP_AND_PRUNE, SWEEP_AND_TINIEST_QUEUE diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index 654f16215..0383ac9c0 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -578,32 +578,39 @@ namespace { { #ifdef __APPLE__ // Only support SIMD on Apple platforms for now constexpr size_t SIMD_SIZE = use_simd ? 4 : 1; - constexpr size_t GRAIN_SIZE = use_simd ? 64 : 1; + constexpr size_t GRAIN_SIZE = use_simd ? 16 : 1; #else constexpr size_t SIMD_SIZE = 1; constexpr size_t GRAIN_SIZE = 1; #endif // Calculate the offset to the first leaf node in the source BVH. - const size_t SOURCE_LEAF_OFFSET = source.size() / 2; - const size_t N_SOURCE_LEAVES = SOURCE_LEAF_OFFSET + 1; + const size_t source_leaf_offset = source.size() / 2; + const size_t n_source_leaves = source_leaf_offset + 1; + + const size_t n_tasks = + n_source_leaves / SIMD_SIZE + (n_source_leaves % SIMD_SIZE != 0); tbb::parallel_for( - tbb::blocked_range(size_t(0), N_SOURCE_LEAVES, GRAIN_SIZE), + tbb::blocked_range(size_t(0), n_tasks, GRAIN_SIZE), [&](const tbb::blocked_range& r) { auto& local_candidates = storage.local(); - for (size_t i = r.begin(); i < r.end(); i += SIMD_SIZE) { + const size_t actual_end = // Handle tail case + std::min(SIMD_SIZE * r.end(), n_source_leaves); + for (size_t i = r.begin(); i < r.end(); ++i) { + const size_t idx = SIMD_SIZE * i; #ifdef __APPLE__ if constexpr (use_simd) { + assert(actual_end - idx >= 1); traverse_lbvh_simd( - &source[SOURCE_LEAF_OFFSET + i], - std::min(SIMD_SIZE, r.end() - i), target, + &source[source_leaf_offset + idx], + std::min(SIMD_SIZE, actual_end - idx), target, can_collide, local_candidates); } else { #endif traverse_lbvh( - source[SOURCE_LEAF_OFFSET + i], target, can_collide, - local_candidates); + source[source_leaf_offset + idx], target, + can_collide, local_candidates); #ifdef __APPLE__ } #endif diff --git a/src/ipc/utils/merge_thread_local.hpp b/src/ipc/utils/merge_thread_local.hpp index 8015a00b6..3b5cd7225 100644 --- a/src/ipc/utils/merge_thread_local.hpp +++ b/src/ipc/utils/merge_thread_local.hpp @@ -5,45 +5,131 @@ #pragma once +#include #include #include +#include // std::make_move_iterator +#include #include namespace ipc { +// Assumes `out` is empty at the start. The function may modify the provided +// `vectors` (stealing and clearing per-thread buffers) for performance. template void merge_thread_local_vectors( - const tbb::enumerable_thread_specific>& vectors, + tbb::enumerable_thread_specific>& vectors, std::vector& out) { - // size up the items - size_t size = out.size(); - for (const auto& vector : vectors) { - size += vector.size(); + IPC_TOOLKIT_PROFILE_BLOCK("merge_thread_local_vectors"); + + assert(out.empty()); + + // Since `out` is always empty, compute total from thread-local vectors + // only. + size_t total = 0; + for (auto& v : vectors) { + total += v.size(); } - // serial merge! - out.reserve(size); - for (const auto& vector : vectors) { - out.insert(out.end(), vector.begin(), vector.end()); + if (total == 0) { + return; + } + + // Fast path for trivially-copyable types: allocate once and memcpy each + // thread-local buffer into the contiguous destination. + if constexpr ( + std::is_trivially_copyable_v && std::is_default_constructible_v) { + out.resize(total); + char* dest = reinterpret_cast(out.data()); + for (auto& v : vectors) { + if (v.empty()) { + continue; + } + std::memcpy(dest, v.data(), v.size() * sizeof(T)); + dest += v.size() * sizeof(T); + // release the local buffer to reduce memory usage + std::vector().swap(v); + } + } else { + // For non-trivial types, steal the largest thread-local buffer into + // `out` (cheap swap) and move from the remaining buffers. + std::vector* largest = nullptr; + for (auto& v : vectors) { + if (!largest || v.size() > largest->size()) { + largest = &v; + } + } + + if (largest && !largest->empty()) { + // out is empty, so swapping moves the largest contents into out and + // leaves the thread-local buffer empty (former out). + out.swap(*largest); + } + + out.reserve(total); + + for (auto& v : vectors) { + if (&v != largest && !v.empty()) { + // Move elements to `out` to avoid copies when possible. + out.insert( + out.end(), std::make_move_iterator(v.begin()), + std::make_move_iterator(v.end())); + } + // Ensure capacity is released. + std::vector().swap(v); + } } } +// No trait detection: always use the generic insert-based fallback for +// portability and simplicity. + template void merge_thread_local_unordered_sets( - const tbb::enumerable_thread_specific>& sets, + tbb::enumerable_thread_specific>& sets, unordered_set& out) { - // size up the items - size_t size = out.size(); - for (const auto& set : sets) { - size += set.size(); - } - // serial merge! - out.reserve(size); - for (const auto& set : sets) { - out.insert(set.begin(), set.end()); + IPC_TOOLKIT_PROFILE_BLOCK("merge_thread_local_unordered_sets"); + + // This function assumes `out` is empty at the start and is allowed to + // modify the per-thread `sets` (stealing / clearing buffers) for better + // performance. + assert(out.empty()); + + // Compute total number of elements across thread-local sets. + size_t total = 0; + for (auto& s : sets) { + total += s.size(); + } + if (total == 0) { + return; + } + + // Steal the largest set by swapping it with `out` (cheap). + unordered_set* largest = nullptr; + for (auto& s : sets) { + if (!largest || s.size() > largest->size()) { + largest = &s; + } + } + + if (largest && !largest->empty()) { + out.swap(*largest); + } + + out.reserve(total); + + // Simplified strategy: always insert remaining elements from per-thread + // sets into `out`. After inserting we free the local buffer to reduce + // memory usage. This avoids complex trait detection and keeps behavior + // portable across different unordered_set implementations. + for (auto& s : sets) { + if (&s != largest && !s.empty()) { + out.insert(s.begin(), s.end()); + } + unordered_set().swap(s); } }