From 6004968d9c35a83936d1ed8ad2ce1351e14c0b99 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Thu, 22 Jan 2026 21:46:55 -0500 Subject: [PATCH] Expose mimalloc allocations for profiler interposition Memory profilers like memray couldn't track Arrow's mimalloc allocations because mimalloc is statically linked into libarrow.so and internal calls bypass PLT. This adds weak exported wrapper functions (arrow_mimalloc_allocate, arrow_mimalloc_reallocate, arrow_mimalloc_free) that route through PLT even for intra-DSO calls, enabling LD_PRELOAD-based interposition. Includes a test that verifies the interposition mechanism works by preloading a tracking library and checking allocation counts. --- cpp/src/arrow/CMakeLists.txt | 38 ++++ cpp/src/arrow/memory_pool.cc | 38 +++- cpp/src/arrow/memory_pool_interpose.c | 89 +++++++++ cpp/src/arrow/memory_pool_interpose_test.cc | 178 ++++++++++++++++++ .../memory_pool_interpose_test_helper.cc | 76 ++++++++ 5 files changed, 415 insertions(+), 4 deletions(-) create mode 100644 cpp/src/arrow/memory_pool_interpose.c create mode 100644 cpp/src/arrow/memory_pool_interpose_test.cc create mode 100644 cpp/src/arrow/memory_pool_interpose_test_helper.cc diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index df9b783d5314..d5a7bbaa4a44 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -1268,6 +1268,44 @@ add_arrow_test(misc_test pretty_print_test.cc status_test.cc) +# Mimalloc interposition test for memory profiler integration (e.g., memray). +# Linux-only: LD_PRELOAD semantics and the test interposer use Linux-specific +# behaviour (dlsym(RTLD_NEXT), atomic constructors/destructors, ELF visibility). +if(ARROW_MIMALLOC AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + # Ensure the wrapper call sites in memory_pool.cc go through the PLT so an + # LD_PRELOAD interposer can intercept them. Arrow's global CXX flags include + # -fno-semantic-interposition (see SetupCxxFlags.cmake), which lets GCC + # resolve same-DSO calls directly without the PLT. Override that here. + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set_source_files_properties(memory_pool.cc PROPERTIES + COMPILE_OPTIONS "-fsemantic-interposition") + endif() + + # Build the interposition shared library + add_library(arrow_mimalloc_interpose SHARED memory_pool_interpose.c) + target_link_libraries(arrow_mimalloc_interpose PRIVATE ${CMAKE_DL_LIBS}) + set_target_properties(arrow_mimalloc_interpose PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") + + # Build the test helper executable + add_executable(arrow-memory-pool-interpose-test-helper + memory_pool_interpose_test_helper.cc) + target_link_libraries(arrow-memory-pool-interpose-test-helper PRIVATE arrow_shared) + set_target_properties(arrow-memory-pool-interpose-test-helper PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") + + # Add the interposition test + add_arrow_test(memory_pool_interpose_test) + add_dependencies(arrow-memory-pool-interpose-test + arrow_mimalloc_interpose + arrow-memory-pool-interpose-test-helper) + + # Set environment variables for the test to find the helper and library + set_tests_properties(arrow-memory-pool-interpose-test PROPERTIES + ENVIRONMENT + "ARROW_INTERPOSE_TEST_HELPER=$;ARROW_INTERPOSE_TEST_LIB=$") +endif() + add_arrow_test(public_api_test) set_source_files_properties(public_api_test.cc PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 1c77a60ba0e2..808934518f11 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -53,6 +53,35 @@ # include #endif +// PLT-exposed allocation functions for memory profiler integration (e.g., memray). +// ARROW_EXPORT gives these default visibility so they appear in the dynamic +// symbol table and can be overridden at load time via LD_PRELOAD. The weak +// attribute additionally permits a stronger definition at static-link time. +// `noinline` prevents the compiler from inlining the wrapper into call sites +// in the same TU, which would defeat interposition. The CMake build also +// compiles this TU with -fsemantic-interposition on GCC so the call sites +// route through the PLT (Arrow's default is -fno-semantic-interposition, +// which lets GCC bypass the PLT for same-DSO calls). +#ifdef ARROW_MIMALLOC +extern "C" { + +__attribute__((weak, noinline)) ARROW_EXPORT void* arrow_mimalloc_allocate( + size_t size, size_t alignment) { + return mi_malloc_aligned(size, alignment); +} + +__attribute__((weak, noinline)) ARROW_EXPORT void* arrow_mimalloc_reallocate( + void* ptr, size_t new_size, size_t alignment) { + return mi_realloc_aligned(ptr, new_size, alignment); +} + +__attribute__((weak, noinline)) ARROW_EXPORT void arrow_mimalloc_free(void* ptr) { + mi_free(ptr); +} + +} // extern "C" +#endif // ARROW_MIMALLOC + namespace arrow { namespace memory_pool { @@ -393,6 +422,7 @@ class SystemAllocator { #ifdef ARROW_MIMALLOC // Helper class directing allocations to the mimalloc allocator. +// Uses PLT-exposed functions (arrow_mimalloc_*) to allow memory profiler interposition. class MimallocAllocator { public: static Status AllocateAligned(int64_t size, int64_t alignment, uint8_t** out) { @@ -401,7 +431,7 @@ class MimallocAllocator { return Status::OK(); } *out = reinterpret_cast( - mi_malloc_aligned(static_cast(size), static_cast(alignment))); + arrow_mimalloc_allocate(static_cast(size), static_cast(alignment))); if (*out == NULL) { return Status::OutOfMemory("malloc of size ", size, " failed"); } @@ -422,8 +452,8 @@ class MimallocAllocator { *ptr = memory_pool::internal::kZeroSizeArea; return Status::OK(); } - *ptr = reinterpret_cast( - mi_realloc_aligned(previous_ptr, static_cast(new_size), alignment)); + *ptr = reinterpret_cast(arrow_mimalloc_reallocate( + previous_ptr, static_cast(new_size), static_cast(alignment))); if (*ptr == NULL) { *ptr = previous_ptr; return Status::OutOfMemory("realloc of size ", new_size, " failed"); @@ -435,7 +465,7 @@ class MimallocAllocator { if (ptr == memory_pool::internal::kZeroSizeArea) { DCHECK_EQ(size, 0); } else { - mi_free(ptr); + arrow_mimalloc_free(ptr); } } diff --git a/cpp/src/arrow/memory_pool_interpose.c b/cpp/src/arrow/memory_pool_interpose.c new file mode 100644 index 000000000000..d7fdc391f907 --- /dev/null +++ b/cpp/src/arrow/memory_pool_interpose.c @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Interposition library for testing arrow_mimalloc_* PLT hooks. +// This library is meant to be used with LD_PRELOAD to intercept +// Arrow's mimalloc allocation functions. + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +static atomic_size_t g_alloc_count = 0; +static atomic_size_t g_free_count = 0; +static atomic_size_t g_realloc_count = 0; +static atomic_size_t g_total_allocated = 0; + +void* arrow_mimalloc_allocate(size_t size, size_t alignment) { + static void* (*real_fn)(size_t, size_t) = NULL; + if (!real_fn) { + real_fn = dlsym(RTLD_NEXT, "arrow_mimalloc_allocate"); + if (!real_fn) { + fprintf(stderr, "INTERPOSE ERROR: cannot find arrow_mimalloc_allocate\n"); + abort(); + } + } + void* ptr = real_fn(size, alignment); + if (ptr) { + atomic_fetch_add(&g_alloc_count, 1); + atomic_fetch_add(&g_total_allocated, size); + } + return ptr; +} + +void arrow_mimalloc_free(void* ptr) { + static void (*real_fn)(void*) = NULL; + if (!real_fn) { + real_fn = dlsym(RTLD_NEXT, "arrow_mimalloc_free"); + if (!real_fn) { + fprintf(stderr, "INTERPOSE ERROR: cannot find arrow_mimalloc_free\n"); + abort(); + } + } + atomic_fetch_add(&g_free_count, 1); + real_fn(ptr); +} + +void* arrow_mimalloc_reallocate(void* ptr, size_t new_size, size_t alignment) { + static void* (*real_fn)(void*, size_t, size_t) = NULL; + if (!real_fn) { + real_fn = dlsym(RTLD_NEXT, "arrow_mimalloc_reallocate"); + if (!real_fn) { + fprintf(stderr, "INTERPOSE ERROR: cannot find arrow_mimalloc_reallocate\n"); + abort(); + } + } + void* new_ptr = real_fn(ptr, new_size, alignment); + atomic_fetch_add(&g_realloc_count, 1); + return new_ptr; +} + +// Called at library unload to print statistics +__attribute__((destructor)) static void print_interpose_stats(void) { + size_t allocs = atomic_load(&g_alloc_count); + size_t frees = atomic_load(&g_free_count); + size_t reallocs = atomic_load(&g_realloc_count); + size_t total = atomic_load(&g_total_allocated); + + // Print in a parseable format + fprintf(stderr, "ARROW_INTERPOSE_STATS: allocs=%zu frees=%zu reallocs=%zu " + "total_bytes=%zu\n", + allocs, frees, reallocs, total); +} diff --git a/cpp/src/arrow/memory_pool_interpose_test.cc b/cpp/src/arrow/memory_pool_interpose_test.cc new file mode 100644 index 000000000000..5cf36ea44363 --- /dev/null +++ b/cpp/src/arrow/memory_pool_interpose_test.cc @@ -0,0 +1,178 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Test that arrow_mimalloc_* functions can be interposed via LD_PRELOAD. +// This test spawns a subprocess with the interposition library preloaded +// and verifies that allocations are tracked. + +#include +#include +#include +#include + +#ifndef _WIN32 +# include +# include +#endif + +#include + +#include "arrow/testing/gtest_util.h" +#include "arrow/util/config.h" +#include "arrow/util/io_util.h" + +namespace arrow { + +#if defined(ARROW_MIMALLOC) && !defined(_WIN32) + +class TestMimallocInterpose : public ::testing::Test { + protected: + void SetUp() override { + // Get paths from environment (set by CTest) + auto helper_path_result = internal::GetEnvVar("ARROW_INTERPOSE_TEST_HELPER"); + auto lib_path_result = internal::GetEnvVar("ARROW_INTERPOSE_TEST_LIB"); + + if (!helper_path_result.ok() || !lib_path_result.ok()) { + GTEST_SKIP() << "ARROW_INTERPOSE_TEST_HELPER or ARROW_INTERPOSE_TEST_LIB not set"; + } + + helper_path_ = *helper_path_result; + lib_path_ = *lib_path_result; + } + + // Run the helper program with LD_PRELOAD and capture output + struct ProcessResult { + int exit_code; + std::string stdout_str; + std::string stderr_str; + }; + + ProcessResult RunWithPreload() { + ProcessResult result; + result.exit_code = -1; + + // Create pipes for stdout and stderr + int stdout_pipe[2]; + int stderr_pipe[2]; + if (pipe(stdout_pipe) != 0 || pipe(stderr_pipe) != 0) { + return result; + } + + pid_t pid = fork(); + if (pid == -1) { + return result; + } + + if (pid == 0) { + // Child process + close(stdout_pipe[0]); + close(stderr_pipe[0]); + + dup2(stdout_pipe[1], STDOUT_FILENO); + dup2(stderr_pipe[1], STDERR_FILENO); + + close(stdout_pipe[1]); + close(stderr_pipe[1]); + + // Set LD_PRELOAD + setenv("LD_PRELOAD", lib_path_.c_str(), 1); + + // Execute helper + execl(helper_path_.c_str(), helper_path_.c_str(), nullptr); + _exit(127); // exec failed + } + + // Parent process + close(stdout_pipe[1]); + close(stderr_pipe[1]); + + // Read stdout + char buffer[4096]; + ssize_t n; + while ((n = read(stdout_pipe[0], buffer, sizeof(buffer) - 1)) > 0) { + buffer[n] = '\0'; + result.stdout_str += buffer; + } + close(stdout_pipe[0]); + + // Read stderr + while ((n = read(stderr_pipe[0], buffer, sizeof(buffer) - 1)) > 0) { + buffer[n] = '\0'; + result.stderr_str += buffer; + } + close(stderr_pipe[0]); + + // Wait for child + int status; + waitpid(pid, &status, 0); + if (WIFEXITED(status)) { + result.exit_code = WEXITSTATUS(status); + } + + return result; + } + + std::string helper_path_; + std::string lib_path_; +}; + +TEST_F(TestMimallocInterpose, InterpositionWorks) { + auto result = RunWithPreload(); + + // Check program succeeded + ASSERT_EQ(result.exit_code, 0) << "Helper exited with code " << result.exit_code + << "\nstdout: " << result.stdout_str + << "\nstderr: " << result.stderr_str; + + // Check that we got the expected output from helper + ASSERT_NE(result.stdout_str.find("SUCCESS"), std::string::npos) + << "Helper did not report success\nstdout: " << result.stdout_str; + + // Check that interposition library printed stats + ASSERT_NE(result.stderr_str.find("ARROW_INTERPOSE_STATS:"), std::string::npos) + << "Interposition stats not found\nstderr: " << result.stderr_str; + + // Parse the stats + std::regex stats_regex( + R"(ARROW_INTERPOSE_STATS: allocs=(\d+) frees=(\d+) reallocs=(\d+) total_bytes=(\d+))"); + std::smatch match; + ASSERT_TRUE(std::regex_search(result.stderr_str, match, stats_regex)) + << "Could not parse interposition stats\nstderr: " << result.stderr_str; + + int allocs = std::stoi(match[1].str()); + int frees = std::stoi(match[2].str()); + int reallocs = std::stoi(match[3].str()); + int total_bytes = std::stoi(match[4].str()); + + // The test helper does: 3 allocs, 1 realloc, 3 frees + // The realloc might internally do an alloc, so we check >= expected + EXPECT_GE(allocs, 3) << "Expected at least 3 allocations"; + EXPECT_GE(frees, 3) << "Expected at least 3 frees"; + EXPECT_GE(reallocs, 1) << "Expected at least 1 realloc"; + // Total allocated: 1024 + 2048 + 512 = 3584 (not counting realloc growth) + EXPECT_GE(total_bytes, 3584) << "Expected at least 3584 bytes allocated"; +} + +#else // !ARROW_MIMALLOC || _WIN32 + +TEST(TestMimallocInterpose, NotAvailable) { + GTEST_SKIP() << "Mimalloc interposition test requires ARROW_MIMALLOC and non-Windows"; +} + +#endif + +} // namespace arrow diff --git a/cpp/src/arrow/memory_pool_interpose_test_helper.cc b/cpp/src/arrow/memory_pool_interpose_test_helper.cc new file mode 100644 index 000000000000..71e527a86294 --- /dev/null +++ b/cpp/src/arrow/memory_pool_interpose_test_helper.cc @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Test helper for memory_pool_interpose_test. +// This program performs some allocations using Arrow's memory pool +// and exits. It is meant to be run with LD_PRELOAD to test interposition. + +#include +#include + +#include "arrow/memory_pool.h" +#include "arrow/status.h" + +int main(int argc, char** argv) { + arrow::MemoryPool* pool = arrow::default_memory_pool(); + + std::cout << "Backend: " << pool->backend_name() << std::endl; + + // Skip test if not using mimalloc + if (pool->backend_name() != "mimalloc") { + std::cout << "SKIP: not using mimalloc backend" << std::endl; + return 0; + } + + // Perform some allocations + uint8_t* buf1 = nullptr; + uint8_t* buf2 = nullptr; + uint8_t* buf3 = nullptr; + + auto status = pool->Allocate(1024, &buf1); + if (!status.ok()) { + std::cerr << "Allocate failed: " << status.ToString() << std::endl; + return 1; + } + + status = pool->Allocate(2048, &buf2); + if (!status.ok()) { + std::cerr << "Allocate failed: " << status.ToString() << std::endl; + return 1; + } + + // Reallocate buf1 + status = pool->Reallocate(1024, 4096, &buf1); + if (!status.ok()) { + std::cerr << "Reallocate failed: " << status.ToString() << std::endl; + return 1; + } + + status = pool->Allocate(512, &buf3); + if (!status.ok()) { + std::cerr << "Allocate failed: " << status.ToString() << std::endl; + return 1; + } + + // Free all buffers + pool->Free(buf1, 4096); + pool->Free(buf2, 2048); + pool->Free(buf3, 512); + + std::cout << "SUCCESS: allocations completed" << std::endl; + return 0; +}