Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 36 additions & 56 deletions src/paimon/common/utils/binary_row_partition_computer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,14 @@ TEST(BinaryRowPartitionComputerTest, TestToAndFromBinaryRow) {
arrow::field("f6", arrow::int32()),
arrow::field("f7", arrow::int64()),
arrow::field("f8", arrow::int64()),
arrow::field("f9", arrow::float32()),
arrow::field("f10", arrow::float64()),
arrow::field("f11", arrow::utf8()),
arrow::field("f12", arrow::utf8()),
arrow::field("f13", arrow::date32()),
arrow::field("f9", arrow::utf8()),
arrow::field("f10", arrow::utf8()),
arrow::field("f11", arrow::date32()),
arrow::field("non-partition-field", arrow::int32())};

auto schema = arrow::schema(fields);
std::vector<std::string> partition_keys = {"f0", "f2", "f1", "f3", "f4", "f5", "f6",
"f7", "f8", "f9", "f10", "f11", "f12", "f13"};
std::vector<std::string> partition_keys = {"f0", "f2", "f1", "f3", "f4", "f5",
"f6", "f7", "f8", "f9", "f10", "f11"};
{
// simple case with legacy_partition_name_enabled = true
ASSERT_OK_AND_ASSIGN(
Expand All @@ -69,14 +67,12 @@ TEST(BinaryRowPartitionComputerTest, TestToAndFromBinaryRow) {
{"f6", "-448489"},
{"f7", "-9223372036854775808"},
{"f8", "182737474"},
{"f9", "0.334"},
{"f10", "467.66472"},
{"f11", "abcde"},
{"f12", "这是一个很长很长的中文"},
{"f13", "5"},
{"f9", "abcde"},
{"f10", "这是一个很长很长的中文"},
{"f11", "5"},
};
ASSERT_OK_AND_ASSIGN(BinaryRow row, computer->ToBinaryRow(partition_map));
ASSERT_EQ(14, row.GetFieldCount());
ASSERT_EQ(12, row.GetFieldCount());
ASSERT_EQ(true, row.GetBoolean(0));
ASSERT_EQ(-20, row.GetByte(1));
ASSERT_EQ(10, row.GetByte(2));
Expand All @@ -86,15 +82,13 @@ TEST(BinaryRowPartitionComputerTest, TestToAndFromBinaryRow) {
ASSERT_EQ(-448489, row.GetInt(6));
ASSERT_EQ(std::numeric_limits<int64_t>::min(), row.GetLong(7));
ASSERT_EQ(182737474l, row.GetLong(8));
ASSERT_NEAR(0.334, row.GetFloat(9), 0.0000001);
ASSERT_NEAR(467.66472, row.GetDouble(10), 0.0000001);
ASSERT_EQ("abcde", row.GetString(11).ToString());
ASSERT_EQ("这是一个很长很长的中文", row.GetString(12).ToString());
ASSERT_EQ(5, row.GetDate(13));
ASSERT_EQ("abcde", row.GetString(9).ToString());
ASSERT_EQ("这是一个很长很长的中文", row.GetString(10).ToString());
ASSERT_EQ(5, row.GetDate(11));

std::vector<std::pair<std::string, std::string>> part_values;
ASSERT_OK_AND_ASSIGN(part_values, computer->GeneratePartitionVector(row));
ASSERT_EQ(14, part_values.size());
ASSERT_EQ(12, part_values.size());
std::map<std::string, std::string> actual_part_values_map;
for (const auto& [key, value] : part_values) {
actual_part_values_map[key] = value;
Expand All @@ -117,14 +111,12 @@ TEST(BinaryRowPartitionComputerTest, TestToAndFromBinaryRow) {
{"f6", "-448489"},
{"f7", "-9223372036854775808"},
{"f8", "182737474"},
{"f9", "0.334"},
{"f10", "467.66472"},
{"f11", "abcde"},
{"f12", "这是一个很长很长的中文"},
{"f13", "1970-01-06"},
{"f9", "abcde"},
{"f10", "这是一个很长很长的中文"},
{"f11", "1970-01-06"},
};
ASSERT_OK_AND_ASSIGN(BinaryRow row, computer->ToBinaryRow(partition_map));
ASSERT_EQ(14, row.GetFieldCount());
ASSERT_EQ(12, row.GetFieldCount());
ASSERT_EQ(true, row.GetBoolean(0));
ASSERT_EQ(-20, row.GetByte(1));
ASSERT_EQ(10, row.GetByte(2));
Expand All @@ -134,15 +126,13 @@ TEST(BinaryRowPartitionComputerTest, TestToAndFromBinaryRow) {
ASSERT_EQ(-448489, row.GetInt(6));
ASSERT_EQ(std::numeric_limits<int64_t>::min(), row.GetLong(7));
ASSERT_EQ(182737474l, row.GetLong(8));
ASSERT_NEAR(0.334, row.GetFloat(9), 0.0000001);
ASSERT_NEAR(467.66472, row.GetDouble(10), 0.0000001);
ASSERT_EQ("abcde", row.GetString(11).ToString());
ASSERT_EQ("这是一个很长很长的中文", row.GetString(12).ToString());
ASSERT_EQ(5, row.GetDate(13));
ASSERT_EQ("abcde", row.GetString(9).ToString());
ASSERT_EQ("这是一个很长很长的中文", row.GetString(10).ToString());
ASSERT_EQ(5, row.GetDate(11));

std::vector<std::pair<std::string, std::string>> part_values;
ASSERT_OK_AND_ASSIGN(part_values, computer->GeneratePartitionVector(row));
ASSERT_EQ(14, part_values.size());
ASSERT_EQ(12, part_values.size());
std::map<std::string, std::string> actual_part_values_map;
for (const auto& [key, value] : part_values) {
actual_part_values_map[key] = value;
Expand All @@ -165,14 +155,12 @@ TEST(BinaryRowPartitionComputerTest, TestToAndFromBinaryRow) {
{"f6", "-448489"},
{"f7", "-9223372036854775808"},
{"f8", "182737474"},
{"f9", "0.334"},
{"f10", "467.66472"},
{"f11", " "},
{"f12", "__DEFAULT_PARTITION__"},
{"f13", "5"},
{"f9", " "},
{"f10", "__DEFAULT_PARTITION__"},
{"f11", "5"},
};
ASSERT_OK_AND_ASSIGN(BinaryRow row, computer->ToBinaryRow(partition_map));
ASSERT_EQ(14, row.GetFieldCount());
ASSERT_EQ(12, row.GetFieldCount());
ASSERT_EQ(true, row.GetBoolean(0));
ASSERT_EQ(-20, row.GetByte(1));
ASSERT_EQ(10, row.GetByte(2));
Expand All @@ -182,15 +170,13 @@ TEST(BinaryRowPartitionComputerTest, TestToAndFromBinaryRow) {
ASSERT_EQ(-448489, row.GetInt(6));
ASSERT_EQ(std::numeric_limits<int64_t>::min(), row.GetLong(7));
ASSERT_EQ(182737474l, row.GetLong(8));
ASSERT_NEAR(0.334, row.GetFloat(9), 0.0000001);
ASSERT_NEAR(467.66472, row.GetDouble(10), 0.0000001);
ASSERT_EQ(" ", row.GetString(11).ToString());
ASSERT_TRUE(row.IsNullAt(12));
ASSERT_EQ(5, row.GetInt(13));
ASSERT_EQ(" ", row.GetString(9).ToString());
ASSERT_TRUE(row.IsNullAt(10));
ASSERT_EQ(5, row.GetInt(11));

std::vector<std::pair<std::string, std::string>> part_values;
ASSERT_OK_AND_ASSIGN(part_values, computer->GeneratePartitionVector(row));
ASSERT_EQ(14, part_values.size());
ASSERT_EQ(12, part_values.size());
std::map<std::string, std::string> actual_part_values_map;
for (const auto& [key, value] : part_values) {
actual_part_values_map[key] = value;
Expand All @@ -205,11 +191,9 @@ TEST(BinaryRowPartitionComputerTest, TestToAndFromBinaryRow) {
{"f6", "-448489"},
{"f7", "-9223372036854775808"},
{"f8", "182737474"},
{"f9", "0.334"},
{"f10", "467.66472"},
{"f11", "__DEFAULT_PARTITION__"},
{"f12", "__DEFAULT_PARTITION__"},
{"f13", "5"},
{"f9", "__DEFAULT_PARTITION__"},
{"f10", "__DEFAULT_PARTITION__"},
{"f11", "5"},
};
ASSERT_EQ(actual_part_values_map, expected_map);
}
Expand All @@ -227,10 +211,8 @@ TEST(BinaryRowPartitionComputerTest, TestToAndFromBinaryRow) {
{"f6", "-448489"},
{"f7", "-9223372036854775808"},
{"f8", "182737474"},
{"f9", "0.334"},
{"f10", "467.66472"},
{"f11", "abcde"},
{"f12", "这是一个很长很长的中文"}};
{"f9", "abcde"},
{"f10", "这是一个很长很长的中文"}};

ASSERT_NOK_WITH_MSG(computer->ToBinaryRow(partition_map),
"can not find partition key 'f4' in input partition");
Expand All @@ -251,10 +233,8 @@ TEST(BinaryRowPartitionComputerTest, TestToAndFromBinaryRow) {
{"f6", "abcd"},
{"f7", "-9223372036854775808"},
{"f8", "182737474"},
{"f9", "0.334"},
{"f10", "467.66472"},
{"f11", "abcde"},
{"f12", "这是一个很长很长的中文"}};
{"f9", "abcde"},
{"f10", "这是一个很长很长的中文"}};
ASSERT_NOK_WITH_MSG(computer->ToBinaryRow(partition_map),
"cannot convert field idx 6, field value abcd to type INT32");
}
Expand Down
65 changes: 0 additions & 65 deletions src/paimon/common/utils/data_converter_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,26 +113,6 @@ class DataConverterUtils {
return Status::OK();
};
break;
case arrow::Type::FLOAT:
converter = [](const std::string& value_str, int32_t field_idx,
BinaryRowWriter* writer) {
auto value = StringUtils::StringToValue<float>(value_str);
RETURN_INVALID_WITH_FIELD_INFO(value, field_idx, value_str,
arrow::internal::ToString(arrow::Type::FLOAT));
writer->WriteFloat(field_idx, value.value());
return Status::OK();
};
break;
case arrow::Type::DOUBLE:
converter = [](const std::string& value_str, int32_t field_idx,
BinaryRowWriter* writer) {
auto value = StringUtils::StringToValue<double>(value_str);
RETURN_INVALID_WITH_FIELD_INFO(value, field_idx, value_str,
arrow::internal::ToString(arrow::Type::DOUBLE));
writer->WriteDouble(field_idx, value.value());
return Status::OK();
};
break;
case arrow::Type::STRING:
converter = [pool](const std::string& value_str, int32_t field_idx,
BinaryRowWriter* writer) {
Expand All @@ -158,39 +138,6 @@ class DataConverterUtils {
return converter;
}

// support float and double
template <typename T>
static std::string FloatValueToString(const T& value, int32_t precision) {
std::stringstream oss;
if (value >= 1e-3 && value <= 1e7) {
oss << std::fixed << std::setprecision(sizeof(T)) << value;
std::string result = oss.str();
auto pos = result.find_last_not_of('0');
result.erase(pos + (result[pos] == '.') + 1, std::string::npos);
return result;
}
oss << std::uppercase << std::scientific << std::setprecision(precision) << value;
std::string result = oss.str();
auto e_pos = result.find('E');
if (e_pos != std::string::npos) {
if (result[e_pos + 1] == '+') {
result.erase(e_pos + 1, 1 + (result[e_pos + 2] == '0'));
} else {
if (result[e_pos + 1] == '-' && result[e_pos + 2] == '0') {
result.erase(e_pos + 2, 1);
}
}
auto zero_pos = e_pos - 1;
while (zero_pos >= 1 && result[zero_pos] == '0' && result[zero_pos - 1] != '.') {
zero_pos--;
}
if (e_pos - zero_pos - 1 > 0) {
result.erase(zero_pos + 1, e_pos - zero_pos - 1);
}
}
return result;
}

static Result<BinaryRowFieldToStrConverter> CreateBinaryRowFieldToStringConverter(
arrow::Type::type type, bool legacy_partition_name_enabled) {
BinaryRowFieldToStrConverter converter;
Expand Down Expand Up @@ -226,18 +173,6 @@ class DataConverterUtils {
return std::to_string(data);
};
break;
case arrow::Type::FLOAT:
converter = [](const BinaryRow& row, int32_t field_idx) {
float data = row.GetFloat(field_idx);
return FloatValueToString<float>(data, 6);
};
break;
case arrow::Type::DOUBLE:
converter = [](const BinaryRow& row, int32_t field_idx) {
double data = row.GetDouble(field_idx);
return FloatValueToString<double>(data, 15);
};
break;
case arrow::Type::STRING:
converter = [](const BinaryRow& row, int32_t field_idx) {
BinaryString data = row.GetString(field_idx);
Expand Down
47 changes: 6 additions & 41 deletions src/paimon/common/utils/data_converter_utils_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ TEST(DataConverterUtilsTest, TestDataToBinaryRowConverterWithLegacyPartitionName
{"-448489", arrow::Type::INT32},
{"279039", arrow::Type::INT64},
{"1234567", arrow::Type::INT64},
{"0.334", arrow::Type::FLOAT},
{"467.66472", arrow::Type::DOUBLE},
{"abcde", arrow::Type::STRING},
{"这是一个很长很长的中文", arrow::Type::STRING},
{"10440", arrow::Type::DATE32}};
Expand Down Expand Up @@ -77,11 +75,9 @@ TEST(DataConverterUtilsTest, TestDataToBinaryRowConverterWithLegacyPartitionName
ASSERT_EQ(-448489, row.GetInt(6));
ASSERT_EQ(279039, row.GetLong(7));
ASSERT_EQ(1234567, row.GetLong(8));
ASSERT_NEAR(0.334, row.GetFloat(9), 0.0000001);
ASSERT_NEAR(467.66472, row.GetDouble(10), 0.0000001);
ASSERT_EQ("abcde", row.GetString(11).ToString());
ASSERT_EQ("这是一个很长很长的中文", row.GetString(12).ToString());
ASSERT_EQ(10440, row.GetDate(13));
ASSERT_EQ("abcde", row.GetString(9).ToString());
ASSERT_EQ("这是一个很长很长的中文", row.GetString(10).ToString());
ASSERT_EQ(10440, row.GetDate(11));

for (size_t idx = 0; idx < data.size(); idx++) {
ASSERT_OK_AND_ASSIGN(auto partition_field_str, reconverters[idx](row, idx));
Expand All @@ -101,8 +97,6 @@ TEST(DataConverterUtilsTest, TestDataToBinaryRowConverterWithNoLegacyPartitionNa
{"-448489", arrow::Type::INT32},
{"279039", arrow::Type::INT64},
{"1234567", arrow::Type::INT64},
{"0.334", arrow::Type::FLOAT},
{"467.66472", arrow::Type::DOUBLE},
{"abcde", arrow::Type::STRING},
{"这是一个很长很长的中文", arrow::Type::STRING},
{"1998-08-02", arrow::Type::DATE32}};
Expand Down Expand Up @@ -135,43 +129,14 @@ TEST(DataConverterUtilsTest, TestDataToBinaryRowConverterWithNoLegacyPartitionNa
ASSERT_EQ(-448489, row.GetInt(6));
ASSERT_EQ(279039, row.GetLong(7));
ASSERT_EQ(1234567, row.GetLong(8));
ASSERT_NEAR(0.334, row.GetFloat(9), 0.0000001);
ASSERT_NEAR(467.66472, row.GetDouble(10), 0.0000001);
ASSERT_EQ("abcde", row.GetString(11).ToString());
ASSERT_EQ("这是一个很长很长的中文", row.GetString(12).ToString());
ASSERT_EQ(10440, row.GetDate(13));
ASSERT_EQ("abcde", row.GetString(9).ToString());
ASSERT_EQ("这是一个很长很长的中文", row.GetString(10).ToString());
ASSERT_EQ(10440, row.GetDate(11));

for (size_t idx = 0; idx < data.size(); idx++) {
ASSERT_OK_AND_ASSIGN(auto partition_field_str, reconverters[idx](row, idx));
ASSERT_EQ(data[idx].first, partition_field_str);
}
}

TEST(DataConverterUtilsTest, TestValueToStringSimple) {
ASSERT_EQ("233.0", DataConverterUtils::FloatValueToString<float>(static_cast<float>(233), 6));
ASSERT_EQ("3.0E-4",
DataConverterUtils::FloatValueToString<float>(static_cast<float>(0.0003), 6));
ASSERT_EQ("3.478589E10",
DataConverterUtils::FloatValueToString<float>(static_cast<float>(34785895352), 6));
ASSERT_EQ("1.0E9",
DataConverterUtils::FloatValueToString<float>(static_cast<float>(1000000000), 6));
ASSERT_EQ("1000000.0",
DataConverterUtils::FloatValueToString<float>(static_cast<float>(1000000), 6));
ASSERT_EQ("467.6647",
DataConverterUtils::FloatValueToString<float>(static_cast<float>(467.6647), 6));

ASSERT_EQ("233.0",
DataConverterUtils::FloatValueToString<double>(static_cast<double>(233), 15));
ASSERT_EQ("3.4785895352E10",
DataConverterUtils::FloatValueToString<double>(static_cast<double>(34785895352), 15));
ASSERT_EQ("1.0E9",
DataConverterUtils::FloatValueToString<double>(static_cast<double>(1000000000), 15));
ASSERT_EQ("1000000.0",
DataConverterUtils::FloatValueToString<double>(static_cast<double>(1000000), 15));
ASSERT_EQ("467.66472",
DataConverterUtils::FloatValueToString<double>(static_cast<double>(467.66472), 6));
ASSERT_EQ("123456.123456", DataConverterUtils::FloatValueToString<double>(
static_cast<double>(123456.123456), 6));
}

} // namespace paimon::test
10 changes: 0 additions & 10 deletions src/paimon/core/io/field_mapping_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,16 +218,6 @@ Result<std::shared_ptr<arrow::Array>> FieldMappingReader::GenerateSinglePartitio
scalar = std::make_shared<arrow::Int64Scalar>(value);
break;
}
case arrow::Type::type::FLOAT: {
float value = partition_.GetFloat(partition_info_.value().idx_in_partition[idx]);
scalar = std::make_shared<arrow::FloatScalar>(value);
break;
}
case arrow::Type::type::DOUBLE: {
double value = partition_.GetDouble(partition_info_.value().idx_in_partition[idx]);
scalar = std::make_shared<arrow::DoubleScalar>(value);
break;
}
case arrow::Type::type::STRING: {
BinaryString value =
partition_.GetString(partition_info_.value().idx_in_partition[idx]);
Expand Down
Loading
Loading