From 5a3f92e35bbe3c8e51c995097ea800d25ef097a2 Mon Sep 17 00:00:00 2001 From: Arnav Balyan Date: Sun, 29 Mar 2026 11:11:26 +0530 Subject: [PATCH 1/4] update --- .../parquet/arrow/arrow_reader_writer_test.cc | 47 +++++++++++++++++++ cpp/src/parquet/column_writer.cc | 9 +++- 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index e2384972cf55..8ef044167d95 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -2178,6 +2178,53 @@ TEST(TestArrowReadWrite, ImplicitSecondToMillisecondTimestampCoercion) { ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*tx, *to)); } +TEST(TestArrowReadWrite, TimestampCoercionOverflow) { + using ::arrow::ArrayFromVector; + using ::arrow::field; + using ::arrow::schema; + + auto t_s = ::arrow::timestamp(TimeUnit::SECOND); + + std::vector overflow_values = {9223372036854776LL}; + std::vector is_valid = {true}; + + std::shared_ptr a_s; + ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, is_valid, overflow_values, &a_s); + + auto s = schema({field("timestamp", t_s)}); + auto table = Table::Make(s, {a_s}); + + ASSERT_RAISES(Invalid, WriteTable(*table, ::arrow::default_memory_pool(), + CreateOutputStream(), table->num_rows())); + + auto coerce_millis = + ArrowWriterProperties::Builder().coerce_timestamps(TimeUnit::MILLI)->build(); + ASSERT_RAISES(Invalid, + WriteTable(*table, ::arrow::default_memory_pool(), CreateOutputStream(), + table->num_rows(), default_writer_properties(), coerce_millis)); + + auto coerce_micros = + ArrowWriterProperties::Builder().coerce_timestamps(TimeUnit::MICRO)->build(); + ASSERT_RAISES(Invalid, + WriteTable(*table, ::arrow::default_memory_pool(), CreateOutputStream(), + table->num_rows(), default_writer_properties(), coerce_micros)); + + auto coerce_nanos = + ArrowWriterProperties::Builder().coerce_timestamps(TimeUnit::NANO)->build(); + ASSERT_RAISES(Invalid, + WriteTable(*table, ::arrow::default_memory_pool(), CreateOutputStream(), + table->num_rows(), default_writer_properties(), coerce_nanos)); + + std::vector null_overflow_values = {9223372036854776LL}; + std::vector null_valid = {false}; + std::shared_ptr a_null; + ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, null_valid, null_overflow_values, + &a_null); + auto null_table = Table::Make(s, {a_null}); + ASSERT_OK_NO_THROW(WriteTable(*null_table, ::arrow::default_memory_pool(), + CreateOutputStream(), null_table->num_rows())); +} + TEST(TestArrowReadWrite, ParquetVersionTimestampDifferences) { using ::arrow::ArrayFromVector; using ::arrow::field; diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 61507c2ba196..c30ca4de7959 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -41,6 +41,7 @@ #include "arrow/util/crc32.h" #include "arrow/util/endian.h" #include "arrow/util/float16.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging_internal.h" #include "arrow/util/rle_encoding_internal.h" @@ -2352,7 +2353,13 @@ struct SerializeFunctor { auto MultiplyBy = [&](const int64_t factor) { for (int64_t i = 0; i < array.length(); i++) { - out[i] = values[i] * factor; + if (ARROW_PREDICT_FALSE(::arrow::internal::MultiplyWithOverflowGeneric( + values[i], factor, &out[i])) && + array.IsValid(i)) { + return Status::Invalid("Integer overflow when casting timestamp value ", + values[i], " from ", source_type.ToString(), " to ", + target_type->ToString()); + } } return Status::OK(); }; From c3075cee716fa54c02283097b4ea147cbd84096a Mon Sep 17 00:00:00 2001 From: Arnav Balyan Date: Tue, 31 Mar 2026 00:22:45 +0530 Subject: [PATCH 2/4] comments --- .../parquet/arrow/arrow_reader_writer_test.cc | 28 ++++++------------- cpp/src/parquet/column_writer.cc | 6 ++-- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 8ef044167d95..788070e3080b 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -2194,26 +2194,16 @@ TEST(TestArrowReadWrite, TimestampCoercionOverflow) { auto s = schema({field("timestamp", t_s)}); auto table = Table::Make(s, {a_s}); - ASSERT_RAISES(Invalid, WriteTable(*table, ::arrow::default_memory_pool(), + ASSERT_RAISES(Invalid, WriteTable(*table, default_memory_pool(), CreateOutputStream(), table->num_rows())); - auto coerce_millis = - ArrowWriterProperties::Builder().coerce_timestamps(TimeUnit::MILLI)->build(); - ASSERT_RAISES(Invalid, - WriteTable(*table, ::arrow::default_memory_pool(), CreateOutputStream(), - table->num_rows(), default_writer_properties(), coerce_millis)); - - auto coerce_micros = - ArrowWriterProperties::Builder().coerce_timestamps(TimeUnit::MICRO)->build(); - ASSERT_RAISES(Invalid, - WriteTable(*table, ::arrow::default_memory_pool(), CreateOutputStream(), - table->num_rows(), default_writer_properties(), coerce_micros)); - - auto coerce_nanos = - ArrowWriterProperties::Builder().coerce_timestamps(TimeUnit::NANO)->build(); - ASSERT_RAISES(Invalid, - WriteTable(*table, ::arrow::default_memory_pool(), CreateOutputStream(), - table->num_rows(), default_writer_properties(), coerce_nanos)); + for (auto unit : {TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) { + auto coerce_props = + ArrowWriterProperties::Builder().coerce_timestamps(unit)->build(); + ASSERT_RAISES(Invalid, + WriteTable(*table, default_memory_pool(), CreateOutputStream(), + table->num_rows(), default_writer_properties(), coerce_props)); + } std::vector null_overflow_values = {9223372036854776LL}; std::vector null_valid = {false}; @@ -2221,7 +2211,7 @@ TEST(TestArrowReadWrite, TimestampCoercionOverflow) { ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, null_valid, null_overflow_values, &a_null); auto null_table = Table::Make(s, {a_null}); - ASSERT_OK_NO_THROW(WriteTable(*null_table, ::arrow::default_memory_pool(), + ASSERT_OK_NO_THROW(WriteTable(*null_table, default_memory_pool(), CreateOutputStream(), null_table->num_rows())); } diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index c30ca4de7959..b3ed46ee2d28 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -2353,9 +2353,9 @@ struct SerializeFunctor { auto MultiplyBy = [&](const int64_t factor) { for (int64_t i = 0; i < array.length(); i++) { - if (ARROW_PREDICT_FALSE(::arrow::internal::MultiplyWithOverflowGeneric( - values[i], factor, &out[i])) && - array.IsValid(i)) { + if (array.IsValid(i) && + ARROW_PREDICT_FALSE(::arrow::internal::MultiplyWithOverflowGeneric( + values[i], factor, &out[i]))) { return Status::Invalid("Integer overflow when casting timestamp value ", values[i], " from ", source_type.ToString(), " to ", target_type->ToString()); From 57096acb4a1f8675cc9ea5f1ffb7830debeb098f Mon Sep 17 00:00:00 2001 From: Arnav Balyan Date: Tue, 31 Mar 2026 00:34:41 +0530 Subject: [PATCH 3/4] lint --- cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 788070e3080b..6eb2daa9fdc4 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -2200,9 +2200,11 @@ TEST(TestArrowReadWrite, TimestampCoercionOverflow) { for (auto unit : {TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) { auto coerce_props = ArrowWriterProperties::Builder().coerce_timestamps(unit)->build(); - ASSERT_RAISES(Invalid, - WriteTable(*table, default_memory_pool(), CreateOutputStream(), - table->num_rows(), default_writer_properties(), coerce_props)); + ASSERT_RAISES( + Invalid, + WriteTable(*table, default_memory_pool(), CreateOutputStream(), + table->num_rows(), default_writer_properties(), + coerce_props)); } std::vector null_overflow_values = {9223372036854776LL}; From c551cf035c504b56cf9536f0d96f264e1db2057d Mon Sep 17 00:00:00 2001 From: Arnav Balyan Date: Thu, 2 Apr 2026 07:14:50 -0400 Subject: [PATCH 4/4] comments --- .../parquet/arrow/arrow_reader_writer_test.cc | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 6eb2daa9fdc4..d29458bf226b 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -2182,39 +2182,35 @@ TEST(TestArrowReadWrite, TimestampCoercionOverflow) { using ::arrow::ArrayFromVector; using ::arrow::field; using ::arrow::schema; - auto t_s = ::arrow::timestamp(TimeUnit::SECOND); - std::vector overflow_values = {9223372036854776LL}; std::vector is_valid = {true}; - std::shared_ptr a_s; ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, is_valid, overflow_values, &a_s); auto s = schema({field("timestamp", t_s)}); auto table = Table::Make(s, {a_s}); - ASSERT_RAISES(Invalid, WriteTable(*table, default_memory_pool(), - CreateOutputStream(), table->num_rows())); - - for (auto unit : {TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) { - auto coerce_props = - ArrowWriterProperties::Builder().coerce_timestamps(unit)->build(); - ASSERT_RAISES( - Invalid, - WriteTable(*table, default_memory_pool(), CreateOutputStream(), - table->num_rows(), default_writer_properties(), - coerce_props)); + for (auto unit : {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) { + if (unit == TimeUnit::SECOND) { + ASSERT_RAISES(Invalid, WriteTable(*table, default_memory_pool(), + CreateOutputStream(), table->num_rows())); + } else { + auto coerce_props = + ArrowWriterProperties::Builder().coerce_timestamps(unit)->build(); + ASSERT_RAISES(Invalid, WriteTable(*table, default_memory_pool(), + CreateOutputStream(), table->num_rows(), + default_writer_properties(), coerce_props)); + } } - std::vector null_overflow_values = {9223372036854776LL}; std::vector null_valid = {false}; std::shared_ptr a_null; - ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, null_valid, null_overflow_values, + ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, null_valid, overflow_values, &a_null); auto null_table = Table::Make(s, {a_null}); - ASSERT_OK_NO_THROW(WriteTable(*null_table, default_memory_pool(), - CreateOutputStream(), null_table->num_rows())); + ASSERT_OK_NO_THROW(WriteTable(*null_table, default_memory_pool(), CreateOutputStream(), + null_table->num_rows())); } TEST(TestArrowReadWrite, ParquetVersionTimestampDifferences) {