diff --git a/cpp/src/arrow/util/converter.h b/cpp/src/arrow/util/converter.h index c23d6ccd988..d987bf3061f 100644 --- a/cpp/src/arrow/util/converter.h +++ b/cpp/src/arrow/util/converter.h @@ -238,7 +238,9 @@ struct MakeConverterImpl { DICTIONARY_CASE(FloatType); DICTIONARY_CASE(DoubleType); DICTIONARY_CASE(BinaryType); + DICTIONARY_CASE(LargeBinaryType); DICTIONARY_CASE(StringType); + DICTIONARY_CASE(LargeStringType); DICTIONARY_CASE(FixedSizeBinaryType); #undef DICTIONARY_CASE default: diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index e7ce54abcd8..72cbb91712f 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -826,7 +827,7 @@ class PyDictionaryConverter> } else { ARROW_RETURN_NOT_OK( PyValue::Convert(this->value_type_, this->options_, value, view_)); - return this->value_builder_->Append(view_.bytes, static_cast(view_.size)); + return this->value_builder_->Append(std::string_view(view_.bytes, view_.size)); } } diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index a103519dc5a..1b727d5cf8b 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -4468,3 +4468,19 @@ def test_dunders_checked_overflow(): arr ** pa.scalar(2, type=pa.int8()) with pytest.raises(pa.ArrowInvalid, match=error_match): arr / (-arr) + + +def test_dictionary_large_string_and_binary(): + # Test dictionary with large_string values + arr_str = pa.array( + ["a", "b", "a"], type=pa.dictionary(pa.int32(), pa.large_string()) + ) + assert arr_str.type.value_type == pa.large_string() + assert arr_str.to_pylist() == ["a", "b", "a"] + + # Test dictionary with large_binary values + arr_bin = pa.array( + [b"x", b"y", b"x"], type=pa.dictionary(pa.int32(), pa.large_binary()) + ) + assert arr_bin.type.value_type == pa.large_binary() + assert arr_bin.to_pylist() == [b"x", b"y", b"x"]