From 0e0041758088799134afde709e7a85ac7bef0b6a Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Mon, 18 May 2026 17:43:12 +0000 Subject: [PATCH 01/10] Log duplicate function aliases as an error --- cpp/src/gandiva/function_registry.cc | 49 +++++++++++++++++++++++++++- cpp/src/gandiva/function_registry.h | 4 +++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 7ef9178d8d8a..c0bee22b8176 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -17,12 +17,16 @@ #include "gandiva/function_registry.h" +#include +#include +#include #include #include #include #include "arrow/util/logging.h" +#include "arrow/util/string.h" #include "gandiva/function_registry_arithmetic.h" #include "gandiva/function_registry_datetime.h" #include "gandiva/function_registry_hash.h" @@ -84,6 +88,23 @@ const NativeFunction* FunctionRegistry::LookupSignature( return got == pc_registry_map_.end() ? nullptr : got->second; } +namespace { +// Build a key from a signature's name + parameter types only (ignoring return type), +// to detect functions that share the same call-site shape but differ in return type. +std::string CallShapeKey(const FunctionSignature& sig) { + std::stringstream s; + s << arrow::internal::AsciiToLower(sig.base_name()) << "("; + bool first = true; + for (const auto& p : sig.param_types()) { + if (!first) s << ","; + s << static_cast(p->id()); + first = false; + } + s << ")"; + return s.str(); +} +} // namespace + Status FunctionRegistry::Add(NativeFunction func) { if (pc_registry_.size() == kMaxFunctionSignatures) { return Status::CapacityError("Exceeded max function signatures limit of ", @@ -92,7 +113,33 @@ Status FunctionRegistry::Add(NativeFunction func) { pc_registry_.emplace_back(std::move(func)); const auto& last_func = pc_registry_.back(); for (const auto& func_signature : last_func.signatures()) { - pc_registry_map_.emplace(&func_signature, &last_func); + auto [it, inserted] = pc_registry_map_.emplace(&func_signature, &last_func); + if (!inserted) { + ARROW_LOG(ERROR) << "Duplicate function signature registered: " + << func_signature.ToString() + << " (existing pc_name=" << it->second->pc_name() + << ", new pc_name=" << last_func.pc_name() + << "); the new registration will be ignored on lookup."; + continue; + } + // Also flag the weaker collision where name+params match but the return type + // differs — these are technically distinct entries in the signature map, but + // they create ambiguity at the SQL surface (e.g. `day(timestamp)` resolving + // to either extractDay → int64 or truncateDay → timestamp). + auto shape_key = CallShapeKey(func_signature); + auto shape_it = call_shape_map_.find(shape_key); + if (shape_it != call_shape_map_.end()) { + ARROW_LOG(ERROR) << "Function alias collision: " + << func_signature.ToString() + << " has the same name and parameter types as " + << shape_it->second->ToString() + << " (existing pc_name=" << pc_registry_map_.at(shape_it->second)->pc_name() + << ", new pc_name=" << last_func.pc_name() + << "); callers of this function will get different results " + "depending on the inferred return type."; + } else { + call_shape_map_.emplace(std::move(shape_key), &func_signature); + } } return arrow::Status::OK(); } diff --git a/cpp/src/gandiva/function_registry.h b/cpp/src/gandiva/function_registry.h index 24b64fac5f3f..463e774817fa 100644 --- a/cpp/src/gandiva/function_registry.h +++ b/cpp/src/gandiva/function_registry.h @@ -85,6 +85,10 @@ class GANDIVA_EXPORT FunctionRegistry { private: std::vector pc_registry_; SignatureMap pc_registry_map_; + // Tracks name+param-types (return type ignored) to detect call-shape collisions + // where the same `name(args)` could resolve to two functions with different + // return types. + std::unordered_map call_shape_map_; std::vector> bitcode_memory_buffers_; std::vector> c_functions_; FunctionHolderMakerRegistry holder_maker_registry_; From d1a57a5b54742f83e8c8ec2c7c6f440dbc230891 Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Mon, 18 May 2026 18:25:51 +0000 Subject: [PATCH 02/10] Cleanup some aliases, add alias check to duplicate function test. --- cpp/src/gandiva/function_registry_datetime.cc | 14 ++++++-- cpp/src/gandiva/function_registry_test.cc | 36 +++++++++++++++++++ .../function_registry_timestamp_arithmetic.cc | 3 -- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc index a2fe75c80f3f..7b860ca1b748 100644 --- a/cpp/src/gandiva/function_registry_datetime.cc +++ b/cpp/src/gandiva/function_registry_datetime.cc @@ -21,7 +21,7 @@ namespace gandiva { -#define DATE_EXTRACTION_TRUNCATION_FNS(INNER, name) \ +#define DATE_EXTRACTION_FNS(INNER, name) \ DATE_TYPES(INNER, name##Millennium, {}), DATE_TYPES(INNER, name##Century, {}), \ DATE_TYPES(INNER, name##Decade, {}), DATE_TYPES(INNER, name##Year, {"year"}), \ DATE_TYPES(INNER, name##Quarter, ({"quarter"})), \ @@ -32,6 +32,14 @@ namespace gandiva { DATE_TYPES(INNER, name##Minute, {"minute"}), \ DATE_TYPES(INNER, name##Second, {"second"}) +#define DATE_TRUNCATION_FNS(INNER, name) \ + DATE_TYPES(INNER, name##Millennium, {}), DATE_TYPES(INNER, name##Century, {}), \ + DATE_TYPES(INNER, name##Decade, {}), DATE_TYPES(INNER, name##Year, {}), \ + DATE_TYPES(INNER, name##Quarter, {}), DATE_TYPES(INNER, name##Month, {}), \ + DATE_TYPES(INNER, name##Week, {}), DATE_TYPES(INNER, name##Day, {}), \ + DATE_TYPES(INNER, name##Hour, {}), DATE_TYPES(INNER, name##Minute, {}), \ + DATE_TYPES(INNER, name##Second, {}) + #define TO_TIMESTAMP_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE) \ NativeFunction(#NAME, std::vector ALIASES, DataTypeVector{TYPE()}, \ timestamp(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE)) @@ -51,8 +59,8 @@ std::vector GetDateTimeFunctionRegistry() { static std::vector date_time_fn_registry_ = { UNARY_SAFE_NULL_NEVER_BOOL(isnull, {}, day_time_interval), UNARY_SAFE_NULL_NEVER_BOOL(isnull, {}, month_interval), - DATE_EXTRACTION_TRUNCATION_FNS(EXTRACT_SAFE_NULL_IF_NULL, extract), - DATE_EXTRACTION_TRUNCATION_FNS(TRUNCATE_SAFE_NULL_IF_NULL, date_trunc_), + DATE_EXTRACTION_FNS(EXTRACT_SAFE_NULL_IF_NULL, extract), + DATE_TRUNCATION_FNS(TRUNCATE_SAFE_NULL_IF_NULL, date_trunc_), DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDoy, {}), DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDow, {}), diff --git a/cpp/src/gandiva/function_registry_test.cc b/cpp/src/gandiva/function_registry_test.cc index bbe72c0ee970..cfa98bca5d86 100644 --- a/cpp/src/gandiva/function_registry_test.cc +++ b/cpp/src/gandiva/function_registry_test.cc @@ -20,9 +20,12 @@ #include #include #include +#include #include +#include #include +#include "arrow/util/string.h" #include "gandiva/tests/test_util.h" namespace gandiva { @@ -85,6 +88,11 @@ TEST_F(TestFunctionRegistry, TestNoDuplicates) { std::unordered_set native_func_duplicates; std::unordered_set func_sigs; std::unordered_set func_sig_duplicates; + // (name, param-types) -> ret_type seen first; ignores return type to detect + // call-shape ambiguity, where the same user-facing call could resolve to two + // functions with different return types. + std::unordered_map call_shapes; + std::unordered_set call_shape_duplicates; for (const auto& native_func_it : *registry_) { auto& first_sig = native_func_it.signatures().front(); auto pc_func_sig = FunctionSignature(native_func_it.pc_name(), @@ -103,6 +111,25 @@ TEST_F(TestFunctionRegistry, TestNoDuplicates) { } else { func_sig_duplicates.insert(sig_str); } + + std::ostringstream key_stream; + key_stream << arrow::internal::AsciiToLower(sig.base_name()) << "("; + bool first = true; + for (const auto& p : sig.param_types()) { + if (!first) key_stream << ", "; + key_stream << p->ToString(); + first = false; + } + key_stream << ")"; + auto call_shape = key_stream.str(); + auto ret_str = sig.ret_type()->ToString(); + auto it = call_shapes.find(call_shape); + if (it == call_shapes.end()) { + call_shapes.emplace(call_shape, ret_str); + } else if (it->second != ret_str) { + call_shape_duplicates.insert(call_shape + " resolves to both " + it->second + + " and " + ret_str); + } } } std::ostringstream stream; @@ -121,5 +148,14 @@ TEST_F(TestFunctionRegistry, TestNoDuplicates) { << "The following signatures are defined more than once possibly pointing to " "different precompiled functions:\n" << stream.str(); + + std::ostringstream shape_stream; + std::copy(call_shape_duplicates.begin(), call_shape_duplicates.end(), + std::ostream_iterator(shape_stream, "\n")); + EXPECT_TRUE(call_shape_duplicates.empty()) + << "The following calls have the same name and parameter types but different " + "return types, so callers will get different results depending on the inferred " + "return type:\n" + << shape_stream.str(); } } // namespace gandiva diff --git a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc index 188bd60d9eac..f79c542aa6cf 100644 --- a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc +++ b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc @@ -76,9 +76,6 @@ std::vector GetDateTimeArithmeticFunctionRegistry() { DATE_ADD_FNS(date_add, {}), DATE_ADD_FNS(add, {}), - NativeFunction("add", {}, DataTypeVector{date64(), int64()}, timestamp(), - kResultNullIfNull, "add_date64_int64"), - DATE_DIFF_FNS(date_sub, {}), DATE_DIFF_FNS(subtract, {}), DATE_DIFF_FNS(date_diff, {})}; From d950eaadc8def10ac1b163cd0ad99bcb0cd96269 Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Mon, 18 May 2026 20:21:14 +0000 Subject: [PATCH 03/10] Remove redundant function. --- cpp/src/gandiva/function_registry_string.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index be57ce4f4768..eee17b34ea9f 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -353,13 +353,12 @@ std::vector GetStringFunctionRegistry() { kResultNullIfNull, "byte_substr_binary_int32_int32", NativeFunction::kNeedsContext), - NativeFunction("convert_fromUTF8", {"convert_fromutf8"}, DataTypeVector{binary()}, - utf8(), kResultNullIfNull, "convert_fromUTF8_binary", + NativeFunction("convert_fromUTF8", {}, DataTypeVector{binary()}, utf8(), + kResultNullIfNull, "convert_fromUTF8_binary", NativeFunction::kNeedsContext), - NativeFunction("convert_replaceUTF8", {"convert_replaceutf8"}, - DataTypeVector{binary(), utf8()}, utf8(), kResultNullIfNull, - "convert_replace_invalid_fromUTF8_binary", + NativeFunction("convert_replaceUTF8", {}, DataTypeVector{binary(), utf8()}, utf8(), + kResultNullIfNull, "convert_replace_invalid_fromUTF8_binary", NativeFunction::kNeedsContext), NativeFunction("convert_toDOUBLE", {}, DataTypeVector{float64()}, binary(), From 2bb796a265fc4bac56139f14713f4c8b97629fd4 Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Mon, 18 May 2026 20:46:03 +0000 Subject: [PATCH 04/10] lint --- cpp/src/gandiva/function_registry.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index c0bee22b8176..81cb5a20f736 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -129,11 +129,10 @@ Status FunctionRegistry::Add(NativeFunction func) { auto shape_key = CallShapeKey(func_signature); auto shape_it = call_shape_map_.find(shape_key); if (shape_it != call_shape_map_.end()) { - ARROW_LOG(ERROR) << "Function alias collision: " - << func_signature.ToString() + ARROW_LOG(ERROR) << "Function alias collision: " << func_signature.ToString() << " has the same name and parameter types as " - << shape_it->second->ToString() - << " (existing pc_name=" << pc_registry_map_.at(shape_it->second)->pc_name() + << shape_it->second->ToString() << " (existing pc_name=" + << pc_registry_map_.at(shape_it->second)->pc_name() << ", new pc_name=" << last_func.pc_name() << "); callers of this function will get different results " "depending on the inferred return type."; From 5f7f89fcb85ca22490ed5cc9b8ea988642bbc2a1 Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Tue, 19 May 2026 16:01:58 +0000 Subject: [PATCH 05/10] Refactor test to use shared CallShapKey function. --- cpp/src/gandiva/function_registry.cc | 23 +---------------------- cpp/src/gandiva/function_registry_test.cc | 12 +----------- cpp/src/gandiva/function_signature.cc | 13 +++++++++++++ cpp/src/gandiva/function_signature.h | 5 +++++ 4 files changed, 20 insertions(+), 33 deletions(-) diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 81cb5a20f736..1594a24de4b2 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -17,16 +17,12 @@ #include "gandiva/function_registry.h" -#include -#include -#include #include #include #include #include "arrow/util/logging.h" -#include "arrow/util/string.h" #include "gandiva/function_registry_arithmetic.h" #include "gandiva/function_registry_datetime.h" #include "gandiva/function_registry_hash.h" @@ -88,23 +84,6 @@ const NativeFunction* FunctionRegistry::LookupSignature( return got == pc_registry_map_.end() ? nullptr : got->second; } -namespace { -// Build a key from a signature's name + parameter types only (ignoring return type), -// to detect functions that share the same call-site shape but differ in return type. -std::string CallShapeKey(const FunctionSignature& sig) { - std::stringstream s; - s << arrow::internal::AsciiToLower(sig.base_name()) << "("; - bool first = true; - for (const auto& p : sig.param_types()) { - if (!first) s << ","; - s << static_cast(p->id()); - first = false; - } - s << ")"; - return s.str(); -} -} // namespace - Status FunctionRegistry::Add(NativeFunction func) { if (pc_registry_.size() == kMaxFunctionSignatures) { return Status::CapacityError("Exceeded max function signatures limit of ", @@ -126,7 +105,7 @@ Status FunctionRegistry::Add(NativeFunction func) { // differs — these are technically distinct entries in the signature map, but // they create ambiguity at the SQL surface (e.g. `day(timestamp)` resolving // to either extractDay → int64 or truncateDay → timestamp). - auto shape_key = CallShapeKey(func_signature); + auto shape_key = func_signature.CallShape(); auto shape_it = call_shape_map_.find(shape_key); if (shape_it != call_shape_map_.end()) { ARROW_LOG(ERROR) << "Function alias collision: " << func_signature.ToString() diff --git a/cpp/src/gandiva/function_registry_test.cc b/cpp/src/gandiva/function_registry_test.cc index cfa98bca5d86..0b2aeaf9ce00 100644 --- a/cpp/src/gandiva/function_registry_test.cc +++ b/cpp/src/gandiva/function_registry_test.cc @@ -25,7 +25,6 @@ #include #include -#include "arrow/util/string.h" #include "gandiva/tests/test_util.h" namespace gandiva { @@ -112,16 +111,7 @@ TEST_F(TestFunctionRegistry, TestNoDuplicates) { func_sig_duplicates.insert(sig_str); } - std::ostringstream key_stream; - key_stream << arrow::internal::AsciiToLower(sig.base_name()) << "("; - bool first = true; - for (const auto& p : sig.param_types()) { - if (!first) key_stream << ", "; - key_stream << p->ToString(); - first = false; - } - key_stream << ")"; - auto call_shape = key_stream.str(); + auto call_shape = sig.CallShape(); auto ret_str = sig.ret_type()->ToString(); auto it = call_shapes.find(call_shape); if (it == call_shapes.end()) { diff --git a/cpp/src/gandiva/function_signature.cc b/cpp/src/gandiva/function_signature.cc index 136afca2d944..75c6ece6574c 100644 --- a/cpp/src/gandiva/function_signature.cc +++ b/cpp/src/gandiva/function_signature.cc @@ -114,4 +114,17 @@ std::string FunctionSignature::ToString() const { return s.str(); } +std::string FunctionSignature::CallShape() const { + std::stringstream s; + s << AsciiToLower(base_name_) << "("; + for (uint32_t i = 0; i < param_types_.size(); i++) { + if (i > 0) { + s << ", "; + } + s << param_types_[i]->ToString(); + } + s << ")"; + return s.str(); +} + } // namespace gandiva diff --git a/cpp/src/gandiva/function_signature.h b/cpp/src/gandiva/function_signature.h index c3e3639495a9..8a52ff3ee527 100644 --- a/cpp/src/gandiva/function_signature.h +++ b/cpp/src/gandiva/function_signature.h @@ -46,6 +46,11 @@ class GANDIVA_EXPORT FunctionSignature { std::string ToString() const; + /// String identifying the call shape: lowercased base name + parameter types + /// (return type omitted). Useful for detecting two functions that share the + /// same `name(args)` shape but differ in return type. + std::string CallShape() const; + private: std::string base_name_; DataTypeVector param_types_; From 39c7ccc3069b465aaf82b9b7abd0bf61dadccfb5 Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Tue, 19 May 2026 16:06:44 +0000 Subject: [PATCH 06/10] replace .at with .find --- cpp/src/gandiva/function_registry.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 1594a24de4b2..a4fe56af9d93 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -108,10 +108,13 @@ Status FunctionRegistry::Add(NativeFunction func) { auto shape_key = func_signature.CallShape(); auto shape_it = call_shape_map_.find(shape_key); if (shape_it != call_shape_map_.end()) { + auto pc_it = pc_registry_map_.find(shape_it->second); + std::string existing_pc_name = + pc_it != pc_registry_map_.end() ? pc_it->second->pc_name() : ""; ARROW_LOG(ERROR) << "Function alias collision: " << func_signature.ToString() << " has the same name and parameter types as " - << shape_it->second->ToString() << " (existing pc_name=" - << pc_registry_map_.at(shape_it->second)->pc_name() + << shape_it->second->ToString() + << " (existing pc_name=" << existing_pc_name << ", new pc_name=" << last_func.pc_name() << "); callers of this function will get different results " "depending on the inferred return type."; From 1828681bd02df57d18c2818751e68e707d2a957f Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Tue, 19 May 2026 16:40:39 +0000 Subject: [PATCH 07/10] More tests and update type function signatures. --- cpp/src/gandiva/function_registry_test.cc | 58 +++++++++++++++++++++++ cpp/src/gandiva/precompiled/types.h | 10 ++-- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/cpp/src/gandiva/function_registry_test.cc b/cpp/src/gandiva/function_registry_test.cc index 0b2aeaf9ce00..29e2a171428f 100644 --- a/cpp/src/gandiva/function_registry_test.cc +++ b/cpp/src/gandiva/function_registry_test.cc @@ -41,6 +41,64 @@ class TestFunctionRegistry : public ::testing::Test { } }; +TEST_F(TestFunctionRegistry, TestAddDuplicateSignatureKeepsFirstRegistration) { + auto registry = std::make_unique(); + auto buffer = arrow::Buffer::FromString(""); + + NativeFunction first("foo", {}, DataTypeVector{arrow::int32()}, arrow::int32(), + kResultNullIfNull, "foo_first"); + NativeFunction second("foo", {}, DataTypeVector{arrow::int32()}, arrow::int32(), + kResultNullIfNull, "foo_second"); + + ARROW_EXPECT_OK(registry->Register({first}, buffer)); + + testing::internal::CaptureStderr(); + ARROW_EXPECT_OK(registry->Register({second}, buffer)); + std::string log = testing::internal::GetCapturedStderr(); + + EXPECT_NE(log.find("Duplicate function signature registered"), std::string::npos) + << "stderr was: " << log; + EXPECT_NE(log.find("foo_first"), std::string::npos) << "stderr was: " << log; + EXPECT_NE(log.find("foo_second"), std::string::npos) << "stderr was: " << log; + + // The first registration wins; lookup returns the original pc_name. + FunctionSignature sig("foo", {arrow::int32()}, arrow::int32()); + const NativeFunction* fn = registry->LookupSignature(sig); + ASSERT_NE(fn, nullptr); + EXPECT_EQ(fn->pc_name(), "foo_first"); +} + +TEST_F(TestFunctionRegistry, TestAddCallShapeCollisionLogsAndKeepsBoth) { + auto registry = std::make_unique(); + auto buffer = arrow::Buffer::FromString(""); + + NativeFunction returns_int32("foo", {}, DataTypeVector{arrow::int32()}, arrow::int32(), + kResultNullIfNull, "foo_int32_to_int32"); + NativeFunction returns_int64("foo", {}, DataTypeVector{arrow::int32()}, arrow::int64(), + kResultNullIfNull, "foo_int32_to_int64"); + + ARROW_EXPECT_OK(registry->Register({returns_int32}, buffer)); + + testing::internal::CaptureStderr(); + ARROW_EXPECT_OK(registry->Register({returns_int64}, buffer)); + std::string log = testing::internal::GetCapturedStderr(); + + EXPECT_NE(log.find("Function alias collision"), std::string::npos) + << "stderr was: " << log; + EXPECT_NE(log.find("foo_int32_to_int32"), std::string::npos) << "stderr was: " << log; + EXPECT_NE(log.find("foo_int32_to_int64"), std::string::npos) << "stderr was: " << log; + + // Both signatures remain reachable because they differ in return type. + FunctionSignature as_int32("foo", {arrow::int32()}, arrow::int32()); + FunctionSignature as_int64("foo", {arrow::int32()}, arrow::int64()); + const NativeFunction* fn_int32 = registry->LookupSignature(as_int32); + const NativeFunction* fn_int64 = registry->LookupSignature(as_int64); + ASSERT_NE(fn_int32, nullptr); + ASSERT_NE(fn_int64, nullptr); + EXPECT_EQ(fn_int32->pc_name(), "foo_int32_to_int32"); + EXPECT_EQ(fn_int64->pc_name(), "foo_int32_to_int64"); +} + TEST_F(TestFunctionRegistry, TestFound) { FunctionSignature add_i32_i32("add", {arrow::int32(), arrow::int32()}, arrow::int32()); diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h index c93b694fc777..092fa25bf25e 100644 --- a/cpp/src/gandiva/precompiled/types.h +++ b/cpp/src/gandiva/precompiled/types.h @@ -150,11 +150,11 @@ gdv_float64 nvl_float64_float64(gdv_float64 in, gdv_boolean is_valid_in, gdv_boolean nvl_boolean_boolean(gdv_boolean in, gdv_boolean is_valid_in, gdv_boolean replace, gdv_boolean is_valid_value); -gdv_int64 date_add_int32_timestamp(gdv_int32, gdv_timestamp); -gdv_int64 add_int64_timestamp(gdv_int64, gdv_timestamp); -gdv_int64 add_int32_timestamp(gdv_int32, gdv_timestamp); -gdv_int64 date_add_int64_timestamp(gdv_int64, gdv_timestamp); -gdv_timestamp add_date64_int64(gdv_date64, gdv_int64); +gdv_timestamp date_add_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_timestamp add_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_timestamp add_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_timestamp date_add_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_date64 add_date64_int64(gdv_date64, gdv_int64); gdv_timestamp to_timestamp_int32(gdv_int32); gdv_timestamp to_timestamp_int64(gdv_int64); From 07445ee71ca2136f217ca07d65c7c267503ec5c3 Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Tue, 19 May 2026 16:44:11 +0000 Subject: [PATCH 08/10] set to empty string --- cpp/src/gandiva/function_registry_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/gandiva/function_registry_test.cc b/cpp/src/gandiva/function_registry_test.cc index 29e2a171428f..fca28a6fbd30 100644 --- a/cpp/src/gandiva/function_registry_test.cc +++ b/cpp/src/gandiva/function_registry_test.cc @@ -189,6 +189,7 @@ TEST_F(TestFunctionRegistry, TestNoDuplicates) { "following precompiled functions:\n" << result; + stream.str(""); stream.clear(); std::copy(func_sig_duplicates.begin(), func_sig_duplicates.end(), std::ostream_iterator(stream, "\n")); From 551fb3935bff6275ecbc43d96498451dc2c039a4 Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Tue, 19 May 2026 17:34:22 +0000 Subject: [PATCH 09/10] Update CallShape function to handle decimals and match the function registry logic. --- cpp/src/gandiva/function_registry_test.cc | 27 +++++++++++++++++++++++ cpp/src/gandiva/function_signature.cc | 8 ++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/cpp/src/gandiva/function_registry_test.cc b/cpp/src/gandiva/function_registry_test.cc index fca28a6fbd30..330eeb923630 100644 --- a/cpp/src/gandiva/function_registry_test.cc +++ b/cpp/src/gandiva/function_registry_test.cc @@ -99,6 +99,33 @@ TEST_F(TestFunctionRegistry, TestAddCallShapeCollisionLogsAndKeepsBoth) { EXPECT_EQ(fn_int64->pc_name(), "foo_int32_to_int64"); } +TEST_F(TestFunctionRegistry, TestAddCallShapeCollisionAcrossDecimalPrecision) { + // FunctionSignature::operator== treats decimals as equal when byte_width matches, + // regardless of precision/scale. The alias-collision diagnostic must use the same + // identity rule, so two registrations differing only in decimal precision/scale + // should still be flagged. + auto registry = std::make_unique(); + auto buffer = arrow::Buffer::FromString(""); + + NativeFunction precise("foo", {}, DataTypeVector{arrow::decimal128(10, 2)}, + arrow::int32(), kResultNullIfNull, "foo_decimal128_precise"); + NativeFunction wide("foo", {}, DataTypeVector{arrow::decimal128(18, 4)}, + arrow::int64(), kResultNullIfNull, "foo_decimal128_wide"); + + ARROW_EXPECT_OK(registry->Register({precise}, buffer)); + + testing::internal::CaptureStderr(); + ARROW_EXPECT_OK(registry->Register({wide}, buffer)); + std::string log = testing::internal::GetCapturedStderr(); + + EXPECT_NE(log.find("Function alias collision"), std::string::npos) + << "stderr was: " << log; + EXPECT_NE(log.find("foo_decimal128_precise"), std::string::npos) + << "stderr was: " << log; + EXPECT_NE(log.find("foo_decimal128_wide"), std::string::npos) + << "stderr was: " << log; +} + TEST_F(TestFunctionRegistry, TestFound) { FunctionSignature add_i32_i32("add", {arrow::int32(), arrow::int32()}, arrow::int32()); diff --git a/cpp/src/gandiva/function_signature.cc b/cpp/src/gandiva/function_signature.cc index 75c6ece6574c..b21c0424280d 100644 --- a/cpp/src/gandiva/function_signature.cc +++ b/cpp/src/gandiva/function_signature.cc @@ -121,7 +121,13 @@ std::string FunctionSignature::CallShape() const { if (i > 0) { s << ", "; } - s << param_types_[i]->ToString(); + const auto& p = param_types_[i]; + if (p->id() == arrow::Type::DECIMAL) { + // Precision/scale aren't part of signature identity (see operator==). + s << "decimal" << (p->byte_width() * 8); + } else { + s << p->ToString(); + } } s << ")"; return s.str(); From 9f254ac1016dfbb1798128ce234863377e93f3de Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Tue, 19 May 2026 17:41:02 +0000 Subject: [PATCH 10/10] lint --- cpp/src/gandiva/function_registry_test.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/src/gandiva/function_registry_test.cc b/cpp/src/gandiva/function_registry_test.cc index 330eeb923630..377c13594deb 100644 --- a/cpp/src/gandiva/function_registry_test.cc +++ b/cpp/src/gandiva/function_registry_test.cc @@ -109,8 +109,8 @@ TEST_F(TestFunctionRegistry, TestAddCallShapeCollisionAcrossDecimalPrecision) { NativeFunction precise("foo", {}, DataTypeVector{arrow::decimal128(10, 2)}, arrow::int32(), kResultNullIfNull, "foo_decimal128_precise"); - NativeFunction wide("foo", {}, DataTypeVector{arrow::decimal128(18, 4)}, - arrow::int64(), kResultNullIfNull, "foo_decimal128_wide"); + NativeFunction wide("foo", {}, DataTypeVector{arrow::decimal128(18, 4)}, arrow::int64(), + kResultNullIfNull, "foo_decimal128_wide"); ARROW_EXPECT_OK(registry->Register({precise}, buffer)); @@ -122,8 +122,7 @@ TEST_F(TestFunctionRegistry, TestAddCallShapeCollisionAcrossDecimalPrecision) { << "stderr was: " << log; EXPECT_NE(log.find("foo_decimal128_precise"), std::string::npos) << "stderr was: " << log; - EXPECT_NE(log.find("foo_decimal128_wide"), std::string::npos) - << "stderr was: " << log; + EXPECT_NE(log.find("foo_decimal128_wide"), std::string::npos) << "stderr was: " << log; } TEST_F(TestFunctionRegistry, TestFound) {