From db83ad0f79c31e37f64e3d8405cbd1fd3f7d95cc Mon Sep 17 00:00:00 2001 From: Dev-iL <6509619+Dev-iL@users.noreply.github.com> Date: Thu, 14 May 2026 16:34:50 +0300 Subject: [PATCH] Decode internal "char" type (OID 18) natively MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes psqlpy-python/psqlpy#165. Any query touching PostgreSQL system catalogs (pg_type, pg_class, pg_attribute, pg_proc, ...) raised RustToPyValueMappingError because the internal "char" type — distinct from character(n)/BPCHAR — had no native decoder and fell through to other_postgres_bytes_to_py. Add an InternalChar(u8) wrapper next to the existing InternalUuid / InnerDecimal / InnerInterval helpers and wire it into postgres_bytes_to_py via two new match arms (Type::CHAR, Type::CHAR_ARRAY). The byte is read through tokio-postgres' i8 FromSql impl, cast back to u8, and mapped to a one-character Python str through char::from(u8) — i.e. Unicode code points 0..=255 (Latin-1 round-trip), matching psycopg2/psycopg3. The custom_decoders dispatch is intentionally unchanged: it stays keyed by column name per the existing documented contract. Tests: - python/tests/test_value_converter.py: * test_char_internal_type_pg_type_reproduction — exact snippet from #165 * test_char_internal_type_byte_spectrum — reachable ASCII bytes 0x20, 0x41, 0x61, 0x7E plus NULL (SQL chr() rejects NUL and re-encodes >=0x80 as multi-byte UTF-8) * test_char_internal_type_array — "char"[] decoded to list[str] - src/value_converter/models/internal_char.rs: * from_sql_round_trips_full_byte_range — full 0..=255 byte mapping the SQL test cannot reach * accepts_only_char_type — type guard rejects TEXT/VARCHAR/BPCHAR --- python/tests/test_value_converter.py | 80 +++++++++++++++++++++ src/value_converter/models/internal_char.rs | 77 ++++++++++++++++++++ src/value_converter/models/mod.rs | 1 + src/value_converter/to_python.rs | 17 ++++- 4 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 src/value_converter/models/internal_char.rs diff --git a/python/tests/test_value_converter.py b/python/tests/test_value_converter.py index eae8a4ae..dba7e794 100644 --- a/python/tests/test_value_converter.py +++ b/python/tests/test_value_converter.py @@ -625,6 +625,86 @@ class TestStrEnum(str, Enum): assert qs_result.result()[0]["test_mood2"] == TestStrEnum.OK +async def test_char_internal_type_pg_type_reproduction( + psql_pool: ConnectionPool, +) -> None: + """Regression for issue #165. + + The original repro queried system catalog columns of the internal + ``"char"`` type (OID 18). Prior to the fix this raised + ``RustToPyValueMappingError`` even when ``custom_decoders`` was supplied, + because the type had no native decoder. + """ + pg_type_limit = 5 + async with psql_pool.acquire() as conn: + result = await conn.execute( + f"SELECT typname, typtype FROM pg_type LIMIT {pg_type_limit}", + ) + rows = result.result() + assert len(rows) == pg_type_limit + for row in rows: + assert isinstance(row["typname"], str) + assert isinstance(row["typtype"], str) + assert len(row["typtype"]) == 1 + + +async def test_char_internal_type_byte_spectrum( + psql_pool: ConnectionPool, +) -> None: + """Round-trip representative ASCII bytes through a ``"char"`` column. + + The internal ``"char"`` type holds a single byte. SQL ``chr(N)`` rejects + NUL (0x00) with "null character not permitted", and ``chr(N)`` for N >= 128 + produces multi-byte UTF-8 whose cast to ``"char"`` keeps only the first byte + (e.g. chr(128)::"char" stores 0xC2, not 0x80). So this integration test + covers the reachable ASCII slice. The full 0..=255 byte mapping is verified + by the Rust unit test in models/internal_char.rs. + """ + bytes_under_test = [0x20, 0x41, 0x61, 0x7E] + + async with psql_pool.acquire() as conn: + await conn.execute("DROP TABLE IF EXISTS for_char_test") + await conn.execute( + 'CREATE TABLE for_char_test (id INT, c "char")', + ) + for i, b in enumerate(bytes_under_test): + await conn.execute( + 'INSERT INTO for_char_test (id, c) VALUES ($1, chr($2)::"char")', + [i, b], + ) + await conn.execute( + "INSERT INTO for_char_test (id, c) VALUES ($1, NULL)", + [len(bytes_under_test)], + ) + + result = await conn.execute( + "SELECT id, c FROM for_char_test ORDER BY id", + ) + rows = result.result() + + decoded = {row["id"]: row["c"] for row in rows} + for i, b in enumerate(bytes_under_test): + value = decoded[i] + assert isinstance(value, str) + assert len(value) == 1 + assert ( + ord(value) == b + ), f"byte 0x{b:02x} round-tripped to ord(value)=0x{ord(value):02x}" + assert decoded[len(bytes_under_test)] is None + + +async def test_char_internal_type_array( + psql_pool: ConnectionPool, +) -> None: + """Decode an array of ``"char"`` (OID 1002) into a list of one-character strs.""" + async with psql_pool.acquire() as conn: + result = await conn.execute( + "SELECT ARRAY['a'::\"char\", 'b'::\"char\", 'c'::\"char\"] AS chars", + ) + rows = result.result() + assert rows[0]["chars"] == ["a", "b", "c"] + + async def test_custom_type_as_parameter( psql_pool: ConnectionPool, ) -> None: diff --git a/src/value_converter/models/internal_char.rs b/src/value_converter/models/internal_char.rs new file mode 100644 index 00000000..ea58c872 --- /dev/null +++ b/src/value_converter/models/internal_char.rs @@ -0,0 +1,77 @@ +use postgres_types::FromSql; +use pyo3::{types::PyString, Bound, IntoPyObject, Python}; +use tokio_postgres::types::Type; + +use crate::exceptions::rust_errors::RustPSQLDriverError; + +/// Wrapper around the single-byte payload of `PostgreSQL`'s internal `"char"` +/// type (OID 18, distinct from `character(n)`/BPCHAR). Bytes 0..=255 map to +/// Unicode code points 0..=255 (Latin-1 round-trip), matching psycopg2/psycopg3. +#[derive(Clone, Copy)] +pub struct InternalChar(u8); + +impl<'py> IntoPyObject<'py> for InternalChar { + type Target = PyString; + type Output = Bound<'py, Self::Target>; + type Error = RustPSQLDriverError; + + fn into_pyobject(self, py: Python<'py>) -> Result { + let mut tmp = [0u8; 4]; + let s = char::from(self.0).encode_utf8(&mut tmp); + Ok(PyString::new(py, s)) + } +} + +impl<'a> FromSql<'a> for InternalChar { + fn from_sql( + _ty: &Type, + raw: &'a [u8], + ) -> Result> { + // The `"char"` binary wire format is exactly one byte. Read it as `u8` + // directly — the `i8`-then-cast route through tokio_postgres' `FromSql` + // impl trips clippy::cast_sign_loss in pedantic mode for no gain. + let [byte] = *raw else { + return Err(format!("\"char\" expected 1 byte, got {}", raw.len()).into()); + }; + Ok(InternalChar(byte)) + } + + fn accepts(ty: &Type) -> bool { + *ty == Type::CHAR + } +} + +#[cfg(test)] +impl InternalChar { + pub(crate) fn byte(self) -> u8 { + self.0 + } +} + +#[cfg(test)] +mod tests { + use super::InternalChar; + use postgres_types::{FromSql, Type}; + + #[test] + fn from_sql_round_trips_full_byte_range() { + // The signed-byte cast (i8 -> u8) inside from_sql must preserve every + // raw byte. Cover all 256 values so a sign-extension or normalization + // regression cannot slip through. + for b in 0u16..=255 { + let byte = b as u8; + let buf = [byte]; + let decoded = + ::from_sql(&Type::CHAR, &buf).expect("char decode"); + assert_eq!(decoded.byte(), byte, "byte 0x{byte:02x} not preserved"); + } + } + + #[test] + fn accepts_only_char_type() { + assert!(::accepts(&Type::CHAR)); + assert!(!::accepts(&Type::TEXT)); + assert!(!::accepts(&Type::VARCHAR)); + assert!(!::accepts(&Type::BPCHAR)); + } +} diff --git a/src/value_converter/models/mod.rs b/src/value_converter/models/mod.rs index b36f3bff..7b6bbdeb 100644 --- a/src/value_converter/models/mod.rs +++ b/src/value_converter/models/mod.rs @@ -1,4 +1,5 @@ pub mod decimal; +pub mod internal_char; pub mod interval; pub mod serde_value; pub mod uuid; diff --git a/src/value_converter/to_python.rs b/src/value_converter/to_python.rs index d3f0007b..35fe7ec2 100644 --- a/src/value_converter/to_python.rs +++ b/src/value_converter/to_python.rs @@ -21,8 +21,8 @@ use crate::{ RustRect, }, models::{ - decimal::InnerDecimal, interval::InnerInterval, serde_value::InternalSerdeValue, - uuid::InternalUuid, + decimal::InnerDecimal, internal_char::InternalChar, interval::InnerInterval, + serde_value::InternalSerdeValue, uuid::InternalUuid, }, }, }; @@ -191,6 +191,13 @@ fn postgres_bytes_to_py( composite_field_postgres_to_py::>(type_, buf, is_simple)? .into_py_any(py)?, ), + // Convert internal "char" (OID 18, single byte) into a one-character str. + Type::CHAR => { + match composite_field_postgres_to_py::>(type_, buf, is_simple)? { + Some(ic) => Ok(ic.into_pyobject(py)?.unbind().into_any()), + None => Ok(py.None()), + } + } // ---------- Boolean Types ---------- // Convert BOOL type into bool Type::BOOL => Ok( @@ -367,6 +374,12 @@ fn postgres_bytes_to_py( composite_field_postgres_to_py::>>(type_, buf, is_simple)?, ) .into_py_any(py)?), + // Convert ARRAY of internal "char" into list[str] (each element is one byte). + Type::CHAR_ARRAY => Ok(postgres_array_to_py( + py, + composite_field_postgres_to_py::>>(type_, buf, is_simple)?, + ) + .into_py_any(py)?), // ---------- Array Integer Types ---------- // Convert ARRAY of SmallInt into Vec, then into list[int] Type::INT2_ARRAY => Ok(postgres_array_to_py(