diff --git a/mypy/typeshed/stubs/librt/librt/strings.pyi b/mypy/typeshed/stubs/librt/librt/strings.pyi index a0c0cb9efdeb..51f04c680a56 100644 --- a/mypy/typeshed/stubs/librt/librt/strings.pyi +++ b/mypy/typeshed/stubs/librt/librt/strings.pyi @@ -21,8 +21,14 @@ class StringWriter: def __getitem__(self, i: i64, /) -> i32: ... def write_i16_le(b: BytesWriter, n: i16, /) -> None: ... +def write_i16_be(b: BytesWriter, n: i16, /) -> None: ... def read_i16_le(b: bytes, index: i64, /) -> i16: ... +def read_i16_be(b: bytes, index: i64, /) -> i16: ... def write_i32_le(b: BytesWriter, n: i32, /) -> None: ... +def write_i32_be(b: BytesWriter, n: i32, /) -> None: ... def read_i32_le(b: bytes, index: i64, /) -> i32: ... +def read_i32_be(b: bytes, index: i64, /) -> i32: ... def write_i64_le(b: BytesWriter, n: i64, /) -> None: ... +def write_i64_be(b: BytesWriter, n: i64, /) -> None: ... def read_i64_le(b: bytes, index: i64, /) -> i64: ... +def read_i64_be(b: bytes, index: i64, /) -> i64: ... diff --git a/mypyc/lib-rt/byteswriter_extra_ops.h b/mypyc/lib-rt/byteswriter_extra_ops.h index 5465870357bf..404078bfb611 100644 --- a/mypyc/lib-rt/byteswriter_extra_ops.h +++ b/mypyc/lib-rt/byteswriter_extra_ops.h @@ -74,6 +74,15 @@ CPyBytesWriter_WriteI16LE(PyObject *obj, int16_t value) { return CPY_NONE; } +static inline char +CPyBytesWriter_WriteI16BE(PyObject *obj, int16_t value) { + BytesWriterObject *self = (BytesWriterObject *)obj; + if (!CPyBytesWriter_EnsureSize(self, 2)) + return CPY_NONE_ERROR; + BytesWriter_WriteI16BEUnsafe(self, value); + return CPY_NONE; +} + static inline char CPyBytesWriter_WriteI32LE(PyObject *obj, int32_t value) { BytesWriterObject *self = (BytesWriterObject *)obj; @@ -83,6 +92,15 @@ CPyBytesWriter_WriteI32LE(PyObject *obj, int32_t value) { return CPY_NONE; } +static inline char +CPyBytesWriter_WriteI32BE(PyObject *obj, int32_t value) { + BytesWriterObject *self = (BytesWriterObject *)obj; + if (!CPyBytesWriter_EnsureSize(self, 4)) + return CPY_NONE_ERROR; + BytesWriter_WriteI32BEUnsafe(self, value); + return CPY_NONE; +} + static inline char CPyBytesWriter_WriteI64LE(PyObject *obj, int64_t value) { BytesWriterObject *self = (BytesWriterObject *)obj; @@ -92,7 +110,16 @@ CPyBytesWriter_WriteI64LE(PyObject *obj, int64_t value) { return CPY_NONE; } -// Bytes: Read integer operations (little-endian) +static inline char +CPyBytesWriter_WriteI64BE(PyObject *obj, int64_t value) { + BytesWriterObject *self = (BytesWriterObject *)obj; + if (!CPyBytesWriter_EnsureSize(self, 8)) + return CPY_NONE_ERROR; + BytesWriter_WriteI64BEUnsafe(self, value); + return CPY_NONE; +} + +// Bytes: Read integer operations // Helper function for bytes read error handling (negative index or out of range) void CPyBytes_ReadError(int64_t index, Py_ssize_t size); @@ -109,6 +136,30 @@ CPyBytes_ReadI16LE(PyObject *bytes_obj, int64_t index) { return CPyBytes_ReadI16LEUnsafe(data + index); } +static inline int16_t +CPyBytes_ReadI16BE(PyObject *bytes_obj, int64_t index) { + // bytes_obj type is enforced by mypyc + Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj); + if (unlikely(index < 0 || index > size - 2)) { + CPyBytes_ReadError(index, size); + return CPY_LL_INT_ERROR; + } + const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj); + return CPyBytes_ReadI16BEUnsafe(data + index); +} + +static inline int32_t +CPyBytes_ReadI32BE(PyObject *bytes_obj, int64_t index) { + // bytes_obj type is enforced by mypyc + Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj); + if (unlikely(index < 0 || index > size - 4)) { + CPyBytes_ReadError(index, size); + return CPY_LL_INT_ERROR; + } + const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj); + return CPyBytes_ReadI32BEUnsafe(data + index); +} + static inline int32_t CPyBytes_ReadI32LE(PyObject *bytes_obj, int64_t index) { // bytes_obj type is enforced by mypyc @@ -133,6 +184,18 @@ CPyBytes_ReadI64LE(PyObject *bytes_obj, int64_t index) { return CPyBytes_ReadI64LEUnsafe(data + index); } +static inline int64_t +CPyBytes_ReadI64BE(PyObject *bytes_obj, int64_t index) { + // bytes_obj type is enforced by mypyc + Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj); + if (unlikely(index < 0 || index > size - 8)) { + CPyBytes_ReadError(index, size); + return CPY_LL_INT_ERROR; + } + const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj); + return CPyBytes_ReadI64BEUnsafe(data + index); +} + #endif // MYPYC_EXPERIMENTAL #endif diff --git a/mypyc/lib-rt/strings/librt_strings.c b/mypyc/lib-rt/strings/librt_strings.c index a06602793565..f1d6fd91d604 100644 --- a/mypyc/lib-rt/strings/librt_strings.c +++ b/mypyc/lib-rt/strings/librt_strings.c @@ -894,6 +894,20 @@ write_i16_le(PyObject *module, PyObject *const *args, size_t nargs) { Py_RETURN_NONE; } +static PyObject* +write_i16_be(PyObject *module, PyObject *const *args, size_t nargs) { + BytesWriterObject *bw = parse_write_int_args(args, nargs, "write_i16_be"); + if (bw == NULL) + return NULL; + int16_t unboxed = CPyLong_AsInt16(args[1]); + if (unlikely(unboxed == CPY_LL_INT_ERROR && PyErr_Occurred())) + return NULL; + if (unlikely(!ensure_bytes_writer_size(bw, 2))) + return NULL; + BytesWriter_WriteI16BEUnsafe(bw, unboxed); + Py_RETURN_NONE; +} + static PyObject* read_i16_le(PyObject *module, PyObject *const *args, size_t nargs) { int64_t index; @@ -903,6 +917,15 @@ read_i16_le(PyObject *module, PyObject *const *args, size_t nargs) { return PyLong_FromLong(CPyBytes_ReadI16LEUnsafe(data + index)); } +static PyObject* +read_i16_be(PyObject *module, PyObject *const *args, size_t nargs) { + int64_t index; + const unsigned char *data = parse_read_int_args(args, nargs, "read_i16_be", 2, &index); + if (data == NULL) + return NULL; + return PyLong_FromLong(CPyBytes_ReadI16BEUnsafe(data + index)); +} + static PyObject* write_i32_le(PyObject *module, PyObject *const *args, size_t nargs) { BytesWriterObject *bw = parse_write_int_args(args, nargs, "write_i32_le"); @@ -917,6 +940,20 @@ write_i32_le(PyObject *module, PyObject *const *args, size_t nargs) { Py_RETURN_NONE; } +static PyObject* +write_i32_be(PyObject *module, PyObject *const *args, size_t nargs) { + BytesWriterObject *bw = parse_write_int_args(args, nargs, "write_i32_be"); + if (bw == NULL) + return NULL; + int32_t unboxed = CPyLong_AsInt32(args[1]); + if (unlikely(unboxed == CPY_LL_INT_ERROR && PyErr_Occurred())) + return NULL; + if (unlikely(!ensure_bytes_writer_size(bw, 4))) + return NULL; + BytesWriter_WriteI32BEUnsafe(bw, unboxed); + Py_RETURN_NONE; +} + static PyObject* read_i32_le(PyObject *module, PyObject *const *args, size_t nargs) { int64_t index; @@ -926,6 +963,15 @@ read_i32_le(PyObject *module, PyObject *const *args, size_t nargs) { return PyLong_FromLong(CPyBytes_ReadI32LEUnsafe(data + index)); } +static PyObject* +read_i32_be(PyObject *module, PyObject *const *args, size_t nargs) { + int64_t index; + const unsigned char *data = parse_read_int_args(args, nargs, "read_i32_be", 4, &index); + if (data == NULL) + return NULL; + return PyLong_FromLong(CPyBytes_ReadI32BEUnsafe(data + index)); +} + static PyObject* write_i64_le(PyObject *module, PyObject *const *args, size_t nargs) { BytesWriterObject *bw = parse_write_int_args(args, nargs, "write_i64_le"); @@ -940,6 +986,20 @@ write_i64_le(PyObject *module, PyObject *const *args, size_t nargs) { Py_RETURN_NONE; } +static PyObject* +write_i64_be(PyObject *module, PyObject *const *args, size_t nargs) { + BytesWriterObject *bw = parse_write_int_args(args, nargs, "write_i64_be"); + if (bw == NULL) + return NULL; + int64_t unboxed = CPyLong_AsInt64(args[1]); + if (unlikely(unboxed == CPY_LL_INT_ERROR && PyErr_Occurred())) + return NULL; + if (unlikely(!ensure_bytes_writer_size(bw, 8))) + return NULL; + BytesWriter_WriteI64BEUnsafe(bw, unboxed); + Py_RETURN_NONE; +} + static PyObject* read_i64_le(PyObject *module, PyObject *const *args, size_t nargs) { int64_t index; @@ -949,6 +1009,15 @@ read_i64_le(PyObject *module, PyObject *const *args, size_t nargs) { return PyLong_FromLongLong(CPyBytes_ReadI64LEUnsafe(data + index)); } +static PyObject* +read_i64_be(PyObject *module, PyObject *const *args, size_t nargs) { + int64_t index; + const unsigned char *data = parse_read_int_args(args, nargs, "read_i64_be", 8, &index); + if (data == NULL) + return NULL; + return PyLong_FromLongLong(CPyBytes_ReadI64BEUnsafe(data + index)); +} + #endif static PyMethodDef librt_strings_module_methods[] = { @@ -956,21 +1025,39 @@ static PyMethodDef librt_strings_module_methods[] = { {"write_i16_le", (PyCFunction) write_i16_le, METH_FASTCALL, PyDoc_STR("Write a 16-bit signed integer to BytesWriter in little-endian format") }, + {"write_i16_be", (PyCFunction) write_i16_be, METH_FASTCALL, + PyDoc_STR("Write a 16-bit signed integer to BytesWriter in big-endian format") + }, {"read_i16_le", (PyCFunction) read_i16_le, METH_FASTCALL, PyDoc_STR("Read a 16-bit signed integer from bytes in little-endian format") }, + {"read_i16_be", (PyCFunction) read_i16_be, METH_FASTCALL, + PyDoc_STR("Read a 16-bit signed integer from bytes in big-endian format") + }, {"write_i32_le", (PyCFunction) write_i32_le, METH_FASTCALL, PyDoc_STR("Write a 32-bit signed integer to BytesWriter in little-endian format") }, + {"write_i32_be", (PyCFunction) write_i32_be, METH_FASTCALL, + PyDoc_STR("Write a 32-bit signed integer to BytesWriter in big-endian format") + }, {"read_i32_le", (PyCFunction) read_i32_le, METH_FASTCALL, PyDoc_STR("Read a 32-bit signed integer from bytes in little-endian format") }, + {"read_i32_be", (PyCFunction) read_i32_be, METH_FASTCALL, + PyDoc_STR("Read a 32-bit signed integer from bytes in big-endian format") + }, {"write_i64_le", (PyCFunction) write_i64_le, METH_FASTCALL, PyDoc_STR("Write a 64-bit signed integer to BytesWriter in little-endian format") }, + {"write_i64_be", (PyCFunction) write_i64_be, METH_FASTCALL, + PyDoc_STR("Write a 64-bit signed integer to BytesWriter in big-endian format") + }, {"read_i64_le", (PyCFunction) read_i64_le, METH_FASTCALL, PyDoc_STR("Read a 64-bit signed integer from bytes in little-endian format") }, + {"read_i64_be", (PyCFunction) read_i64_be, METH_FASTCALL, + PyDoc_STR("Read a 64-bit signed integer from bytes in big-endian format") + }, #endif {NULL, NULL, 0, NULL} }; diff --git a/mypyc/lib-rt/strings/librt_strings_common.h b/mypyc/lib-rt/strings/librt_strings_common.h index a8cfb217d695..d2ea605aea78 100644 --- a/mypyc/lib-rt/strings/librt_strings_common.h +++ b/mypyc/lib-rt/strings/librt_strings_common.h @@ -5,18 +5,17 @@ #include #include -// Byte-swap functions for non-native endianness support -#if PY_BIG_ENDIAN -# if defined(_MSC_VER) -# include -# define BSWAP16(x) _byteswap_ushort(x) -# define BSWAP32(x) _byteswap_ulong(x) -# define BSWAP64(x) _byteswap_uint64(x) -# elif defined(__GNUC__) || defined(__clang__) -# define BSWAP16(x) __builtin_bswap16(x) -# define BSWAP32(x) __builtin_bswap32(x) -# define BSWAP64(x) __builtin_bswap64(x) -# else +// Byte-swap functions for endianness conversion (needed for both LE and BE operations) +#if defined(_MSC_VER) +# include +# define BSWAP16(x) _byteswap_ushort(x) +# define BSWAP32(x) _byteswap_ulong(x) +# define BSWAP64(x) _byteswap_uint64(x) +#elif defined(__GNUC__) || defined(__clang__) +# define BSWAP16(x) __builtin_bswap16(x) +# define BSWAP32(x) __builtin_bswap32(x) +# define BSWAP64(x) __builtin_bswap64(x) +#else // Fallback for other compilers (slower but portable) static inline uint16_t BSWAP16(uint16_t x) { return (uint16_t)((x >> 8) | (x << 8)); @@ -37,7 +36,6 @@ static inline uint64_t BSWAP64(uint64_t x) { ((x << 40) & 0xFF000000000000ULL) | ((x << 56) & 0xFF00000000000000ULL); } -# endif #endif // Length of the default buffer embedded directly in a BytesWriter object @@ -65,6 +63,20 @@ BytesWriter_WriteI16LEUnsafe(BytesWriterObject *self, int16_t value) { self->len += 2; } +// Write a 16-bit signed integer in big-endian format to BytesWriter. +// NOTE: This does NOT check buffer capacity - caller must ensure space is available. +static inline void +BytesWriter_WriteI16BEUnsafe(BytesWriterObject *self, int16_t value) { + // memcpy is reliably optimized to a single store by GCC, Clang, and MSVC +#if PY_BIG_ENDIAN + memcpy(self->buf + self->len, &value, 2); +#else + uint16_t swapped = BSWAP16((uint16_t)value); + memcpy(self->buf + self->len, &swapped, 2); +#endif + self->len += 2; +} + // Read a 16-bit signed integer in little-endian format from bytes. // NOTE: This does NOT check bounds - caller must ensure valid index. static inline int16_t @@ -78,6 +90,21 @@ CPyBytes_ReadI16LEUnsafe(const unsigned char *data) { return (int16_t)value; } +// Read a 16-bit signed integer in big-endian format from bytes. +// NOTE: This does NOT check bounds - caller must ensure valid index. +static inline int16_t +CPyBytes_ReadI16BEUnsafe(const unsigned char *data) { + // memcpy is reliably optimized to a single load by GCC, Clang, and MSVC + uint16_t value; + memcpy(&value, data, 2); +#if PY_BIG_ENDIAN + // Already in big-endian format, no swap needed +#else + value = BSWAP16(value); +#endif + return (int16_t)value; +} + // Write a 32-bit signed integer in little-endian format to BytesWriter. // NOTE: This does NOT check buffer capacity - caller must ensure space is available. static inline void @@ -92,6 +119,20 @@ BytesWriter_WriteI32LEUnsafe(BytesWriterObject *self, int32_t value) { self->len += 4; } +// Write a 32-bit signed integer in big-endian format to BytesWriter. +// NOTE: This does NOT check buffer capacity - caller must ensure space is available. +static inline void +BytesWriter_WriteI32BEUnsafe(BytesWriterObject *self, int32_t value) { + // memcpy is reliably optimized to a single store by GCC, Clang, and MSVC +#if PY_BIG_ENDIAN + memcpy(self->buf + self->len, &value, 4); +#else + uint32_t swapped = BSWAP32((uint32_t)value); + memcpy(self->buf + self->len, &swapped, 4); +#endif + self->len += 4; +} + // Read a 32-bit signed integer in little-endian format from bytes. // NOTE: This does NOT check bounds - caller must ensure valid index. static inline int32_t @@ -105,6 +146,21 @@ CPyBytes_ReadI32LEUnsafe(const unsigned char *data) { return (int32_t)value; } +// Read a 32-bit signed integer in big-endian format from bytes. +// NOTE: This does NOT check bounds - caller must ensure valid index. +static inline int32_t +CPyBytes_ReadI32BEUnsafe(const unsigned char *data) { + // memcpy is reliably optimized to a single load by GCC, Clang, and MSVC + uint32_t value; + memcpy(&value, data, 4); +#if PY_BIG_ENDIAN + // Already in big-endian format, no swap needed +#else + value = BSWAP32(value); +#endif + return (int32_t)value; +} + // Write a 64-bit signed integer in little-endian format to BytesWriter. // NOTE: This does NOT check buffer capacity - caller must ensure space is available. static inline void @@ -119,6 +175,20 @@ BytesWriter_WriteI64LEUnsafe(BytesWriterObject *self, int64_t value) { self->len += 8; } +// Write a 64-bit signed integer in big-endian format to BytesWriter. +// NOTE: This does NOT check buffer capacity - caller must ensure space is available. +static inline void +BytesWriter_WriteI64BEUnsafe(BytesWriterObject *self, int64_t value) { + // memcpy is reliably optimized to a single store by GCC, Clang, and MSVC +#if PY_BIG_ENDIAN + memcpy(self->buf + self->len, &value, 8); +#else + uint64_t swapped = BSWAP64((uint64_t)value); + memcpy(self->buf + self->len, &swapped, 8); +#endif + self->len += 8; +} + // Read a 64-bit signed integer in little-endian format from bytes. // NOTE: This does NOT check bounds - caller must ensure valid index. static inline int64_t @@ -132,4 +202,19 @@ CPyBytes_ReadI64LEUnsafe(const unsigned char *data) { return (int64_t)value; } +// Read a 64-bit signed integer in big-endian format from bytes. +// NOTE: This does NOT check bounds - caller must ensure valid index. +static inline int64_t +CPyBytes_ReadI64BEUnsafe(const unsigned char *data) { + // memcpy is reliably optimized to a single load by GCC, Clang, and MSVC + uint64_t value; + memcpy(&value, data, 8); +#if PY_BIG_ENDIAN + // Already in big-endian format, no swap needed +#else + value = BSWAP64(value); +#endif + return (int64_t)value; +} + #endif // LIBRT_STRINGS_COMMON_H diff --git a/mypyc/primitives/librt_strings_ops.py b/mypyc/primitives/librt_strings_ops.py index 3e863e96d0df..9cbfc4e82ced 100644 --- a/mypyc/primitives/librt_strings_ops.py +++ b/mypyc/primitives/librt_strings_ops.py @@ -77,6 +77,62 @@ dependencies=[LIBRT_STRINGS], ) +function_op( + name="builtins.len", + arg_types=[bytes_writer_rprimitive], + return_type=short_int_rprimitive, + c_function_name="CPyBytesWriter_Len", + error_kind=ERR_NEVER, + experimental=True, + dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], +) + +# BytesWriter index adjustment - convert negative index to positive +bytes_writer_adjust_index_op = custom_primitive_op( + name="bytes_writer_adjust_index", + arg_types=[bytes_writer_rprimitive, int64_rprimitive], + return_type=int64_rprimitive, + c_function_name="CPyBytesWriter_AdjustIndex", + error_kind=ERR_NEVER, + experimental=True, + dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], +) + +# BytesWriter range check - check if index is in valid range +bytes_writer_range_check_op = custom_primitive_op( + name="bytes_writer_range_check", + arg_types=[bytes_writer_rprimitive, int64_rprimitive], + return_type=bool_rprimitive, + c_function_name="CPyBytesWriter_RangeCheck", + error_kind=ERR_NEVER, + experimental=True, + dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], +) + +# BytesWriter.__getitem__() - get byte at index (no bounds checking) +bytes_writer_get_item_unsafe_op = custom_primitive_op( + name="bytes_writer_get_item", + arg_types=[bytes_writer_rprimitive, int64_rprimitive], + return_type=uint8_rprimitive, + c_function_name="CPyBytesWriter_GetItem", + error_kind=ERR_NEVER, + experimental=True, + dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], +) + +# BytesWriter.__setitem__() - set byte at index (no bounds checking) +bytes_writer_set_item_unsafe_op = custom_primitive_op( + name="bytes_writer_set_item", + arg_types=[bytes_writer_rprimitive, int64_rprimitive, uint8_rprimitive], + return_type=void_rtype, + c_function_name="CPyBytesWriter_SetItem", + error_kind=ERR_NEVER, + experimental=True, + dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], +) + +# i16 write/read functions + function_op( name="librt.strings.write_i16_le", arg_types=[bytes_writer_rprimitive, int16_rprimitive], @@ -87,6 +143,16 @@ dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) +function_op( + name="librt.strings.write_i16_be", + arg_types=[bytes_writer_rprimitive, int16_rprimitive], + return_type=none_rprimitive, + c_function_name="CPyBytesWriter_WriteI16BE", + error_kind=ERR_MAGIC, + experimental=True, + dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], +) + function_op( name="librt.strings.read_i16_le", arg_types=[bytes_rprimitive, int64_rprimitive], @@ -98,100 +164,101 @@ ) function_op( - name="librt.strings.write_i32_le", - arg_types=[bytes_writer_rprimitive, int32_rprimitive], - return_type=none_rprimitive, - c_function_name="CPyBytesWriter_WriteI32LE", - error_kind=ERR_MAGIC, + name="librt.strings.read_i16_be", + arg_types=[bytes_rprimitive, int64_rprimitive], + return_type=int16_rprimitive, + c_function_name="CPyBytes_ReadI16BE", + error_kind=ERR_MAGIC_OVERLAPPING, experimental=True, dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) +# i32 write/read functions + function_op( - name="librt.strings.read_i32_le", - arg_types=[bytes_rprimitive, int64_rprimitive], - return_type=int32_rprimitive, - c_function_name="CPyBytes_ReadI32LE", - error_kind=ERR_MAGIC_OVERLAPPING, + name="librt.strings.write_i32_le", + arg_types=[bytes_writer_rprimitive, int32_rprimitive], + return_type=none_rprimitive, + c_function_name="CPyBytesWriter_WriteI32LE", + error_kind=ERR_MAGIC, experimental=True, dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) function_op( - name="librt.strings.write_i64_le", - arg_types=[bytes_writer_rprimitive, int64_rprimitive], + name="librt.strings.write_i32_be", + arg_types=[bytes_writer_rprimitive, int32_rprimitive], return_type=none_rprimitive, - c_function_name="CPyBytesWriter_WriteI64LE", + c_function_name="CPyBytesWriter_WriteI32BE", error_kind=ERR_MAGIC, experimental=True, dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) function_op( - name="librt.strings.read_i64_le", + name="librt.strings.read_i32_le", arg_types=[bytes_rprimitive, int64_rprimitive], - return_type=int64_rprimitive, - c_function_name="CPyBytes_ReadI64LE", + return_type=int32_rprimitive, + c_function_name="CPyBytes_ReadI32LE", error_kind=ERR_MAGIC_OVERLAPPING, experimental=True, dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) function_op( - name="builtins.len", - arg_types=[bytes_writer_rprimitive], - return_type=short_int_rprimitive, - c_function_name="CPyBytesWriter_Len", - error_kind=ERR_NEVER, + name="librt.strings.read_i32_be", + arg_types=[bytes_rprimitive, int64_rprimitive], + return_type=int32_rprimitive, + c_function_name="CPyBytes_ReadI32BE", + error_kind=ERR_MAGIC_OVERLAPPING, experimental=True, dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) -# BytesWriter index adjustment - convert negative index to positive -bytes_writer_adjust_index_op = custom_primitive_op( - name="bytes_writer_adjust_index", +# i64 write/read functions + +function_op( + name="librt.strings.write_i64_le", arg_types=[bytes_writer_rprimitive, int64_rprimitive], - return_type=int64_rprimitive, - c_function_name="CPyBytesWriter_AdjustIndex", - error_kind=ERR_NEVER, + return_type=none_rprimitive, + c_function_name="CPyBytesWriter_WriteI64LE", + error_kind=ERR_MAGIC, experimental=True, dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) -# BytesWriter range check - check if index is in valid range -bytes_writer_range_check_op = custom_primitive_op( - name="bytes_writer_range_check", +function_op( + name="librt.strings.write_i64_be", arg_types=[bytes_writer_rprimitive, int64_rprimitive], - return_type=bool_rprimitive, - c_function_name="CPyBytesWriter_RangeCheck", - error_kind=ERR_NEVER, + return_type=none_rprimitive, + c_function_name="CPyBytesWriter_WriteI64BE", + error_kind=ERR_MAGIC, experimental=True, dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) -# BytesWriter.__getitem__() - get byte at index (no bounds checking) -bytes_writer_get_item_unsafe_op = custom_primitive_op( - name="bytes_writer_get_item", - arg_types=[bytes_writer_rprimitive, int64_rprimitive], - return_type=uint8_rprimitive, - c_function_name="CPyBytesWriter_GetItem", - error_kind=ERR_NEVER, +function_op( + name="librt.strings.read_i64_le", + arg_types=[bytes_rprimitive, int64_rprimitive], + return_type=int64_rprimitive, + c_function_name="CPyBytes_ReadI64LE", + error_kind=ERR_MAGIC_OVERLAPPING, experimental=True, dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) -# BytesWriter.__setitem__() - set byte at index (no bounds checking) -bytes_writer_set_item_unsafe_op = custom_primitive_op( - name="bytes_writer_set_item", - arg_types=[bytes_writer_rprimitive, int64_rprimitive, uint8_rprimitive], - return_type=void_rtype, - c_function_name="CPyBytesWriter_SetItem", - error_kind=ERR_NEVER, +function_op( + name="librt.strings.read_i64_be", + arg_types=[bytes_rprimitive, int64_rprimitive], + return_type=int64_rprimitive, + c_function_name="CPyBytes_ReadI64BE", + error_kind=ERR_MAGIC_OVERLAPPING, experimental=True, dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) # StringWriter operations + function_op( name="librt.strings.StringWriter", arg_types=[], diff --git a/mypyc/test-data/run-librt-strings.test b/mypyc/test-data/run-librt-strings.test index 4653a0ac986e..cea23186ea69 100644 --- a/mypyc/test-data/run-librt-strings.test +++ b/mypyc/test-data/run-librt-strings.test @@ -5,7 +5,7 @@ import binascii import random import struct -from librt.strings import BytesWriter, StringWriter, write_i16_le, read_i16_le, write_i32_le, read_i32_le, write_i64_le, read_i64_le +from librt.strings import BytesWriter, StringWriter, write_i16_le, write_i16_be, read_i16_le, read_i16_be, write_i32_le, write_i32_be, read_i32_le, read_i32_be, write_i64_le, write_i64_be, read_i64_le, read_i64_be from testutil import assertRaises @@ -172,35 +172,36 @@ def test_bytes_writer_write_i16_le() -> None: assert result[3:5] == b"\x01\x00" # i=1 assert result[1999:2001] == b"\xe7\x03" # i=999 -def test_write_i16_le_via_any() -> None: - # Test write_i16_le via Any to ensure C extension wrapper works +def test_write_i16_via_any() -> None: + # Test write_i16_le/be via Any to ensure C extension wrapper works # (tests fallback path when not using mypyc primitives) - w: Any = BytesWriter() - - # Test 8-bit and 16-bit operations - w.append(0x42) - write_i16_le(w, 0x1234) - w.append(0xFF) - assert w.getvalue() == b"\x42\x34\x12\xFF" - - # Test buffer growth with write_i16_le - w2: Any = BytesWriter() - for i in range(150): - write_i16_le(w2, i) - result = w2.getvalue() - assert len(result) == 300 - assert result[0:2] == b"\x00\x00" # i=0 - assert result[2:4] == b"\x01\x00" # i=1 - assert result[298:300] == b"\x95\x00" # i=149 - - # Test values that don't fit in i16 - w3: Any = BytesWriter() - with assertRaises(ValueError, "int too large to convert to i16"): - write_i16_le(w3, 32768 + int()) - with assertRaises(ValueError, "int too large to convert to i16"): - write_i16_le(w3, -32769 + int()) - with assertRaises(ValueError, "int too large to convert to i16"): - write_i16_le(w3, 100000 + int()) + for write_func, fmt in zip((write_i16_le, write_i16_be), ("h")): + w: Any = BytesWriter() + + # Test 8-bit and 16-bit operations + w.append(0x42) + write_func(w, 0x1234) + w.append(0xFF) + assert w.getvalue() == b"\x42" + struct.pack(fmt, 0x1234) + b"\xFF" + + # Test buffer growth + w2: Any = BytesWriter() + for i in range(150): + write_func(w2, i) + result = w2.getvalue() + assert len(result) == 300 + assert result[0:2] == struct.pack(fmt, 0) # i=0 + assert result[2:4] == struct.pack(fmt, 1) # i=1 + assert result[298:300] == struct.pack(fmt, 149) # i=149 + + # Test values that don't fit in i16 + w3: Any = BytesWriter() + with assertRaises(ValueError, "int too large to convert to i16"): + write_func(w3, 32768 + int()) + with assertRaises(ValueError, "int too large to convert to i16"): + write_func(w3, -32769 + int()) + with assertRaises(ValueError, "int too large to convert to i16"): + write_func(w3, 100000 + int()) def test_bytes_reader_read_i16_le() -> None: # Test various i16 values from 1-byte to 2-byte range @@ -217,32 +218,81 @@ def test_bytes_reader_read_i16_le() -> None: assert read_i16_le(result, i * 2) == v # Test unaligned offset - data2 = b"\xFF" + b"\x34\x12" + b"\xFF" + data2 = b"\xFF" + struct.pack(" None: - # Test read_i16_le via Any to ensure C extension wrapper works - data: Any = b"\x34\x12\xff\xff\x00\x00" - assert read_i16_le(data, 0) == 0x1234 - assert read_i16_le(data, 2) == -1 - assert read_i16_le(data, 4) == 0 - - # Test error cases - # Index out of range - with assertRaises(IndexError, "index 10 out of range for bytes of length 6"): - read_i16_le(data, 10 + int()) - with assertRaises(IndexError, "index 5 out of range for bytes of length 6"): - read_i16_le(data, 5 + int()) # Not enough bytes for i16 - - # Negative index - with assertRaises(ValueError, "index must be non-negative"): - read_i16_le(data, -1 + int()) - - # Wrong type for bytes argument - with assertRaises(TypeError, "read_i16_le() argument 1 must be bytes"): - read_i16_le("not bytes", 0 + int()) # type: ignore - with assertRaises(TypeError, "read_i16_le() argument 1 must be bytes"): - read_i16_le(bytearray(b"\x00\x00"), 0 + int()) # type: ignore + # Test barely out of bounds (only 1 byte available, need 2) + with assertRaises(IndexError): + read_i16_le(data, len(data) - 1) + +def test_bytes_reader_read_i16_be() -> None: + # Test various i16 values from 1-byte to 2-byte range + data = b"".join(struct.pack(">h", v) for v in I16_TEST_VALUES) + for i, v in enumerate(I16_TEST_VALUES): + assert read_i16_be(data, i * 2) == v + + # Test round-trip with write_i16_be + w = BytesWriter() + for v in I16_TEST_VALUES: + write_i16_be(w, v) + result = w.getvalue() + for i, v in enumerate(I16_TEST_VALUES): + assert read_i16_be(result, i * 2) == v + + # Test unaligned offset + data2 = b"\xFF" + struct.pack(">h", 0x1234) + b"\xFF" + assert read_i16_be(data2, 1) == 0x1234 + + # Test barely out of bounds (only 1 byte available, need 2) + with assertRaises(IndexError): + read_i16_be(data, len(data) - 1) + +def test_read_i16_via_any() -> None: + # Test read_i16_le/be via Any to ensure C extension wrapper works + for read_func, fmt in zip((read_i16_le, read_i16_be), ("h")): + data: Any = struct.pack(fmt, 0x1234) + struct.pack(fmt, -1) + struct.pack(fmt, 0) + assert read_func(data, 0) == 0x1234 + assert read_func(data, 2) == -1 + assert read_func(data, 4) == 0 + + # Test error cases + # Index out of range + with assertRaises(IndexError, "index 10 out of range for bytes of length 6"): + read_func(data, 10 + int()) + with assertRaises(IndexError, "index 5 out of range for bytes of length 6"): + read_func(data, 5 + int()) # Not enough bytes for i16 + + # Negative index + with assertRaises(ValueError, "index must be non-negative"): + read_func(data, -1 + int()) + + # Wrong type for bytes argument + with assertRaises(TypeError): + read_func("not bytes", 0 + int()) # type: ignore + with assertRaises(TypeError): + read_func(bytearray(b"\x00\x00"), 0 + int()) # type: ignore + +def test_bytes_writer_write_i16_be() -> None: + # Test various i16 values from 1-byte to 2-byte range + w = BytesWriter() + for v in I16_TEST_VALUES: + write_i16_be(w, v) + assert w.getvalue() == b"".join(struct.pack(">h", v) for v in I16_TEST_VALUES) + + # Test mixing with other operations and buffer growth + w = BytesWriter() + w.append(0xFF) + for i in range(1000): + write_i16_be(w, i) + w.append(0xEE) + result = w.getvalue() + assert len(result) == 2002 + assert result[0] == 0xFF + assert result[-1] == 0xEE + # Check a few values in the middle + assert result[1:3] == b"\x00\x00" # i=0 + assert result[3:5] == b"\x00\x01" # i=1 + assert result[1999:2001] == b"\x03\xe7" # i=999 def test_bytes_writer_write_i32_le() -> None: # Test various i32 values from 1-byte to 4-byte range @@ -266,35 +316,58 @@ def test_bytes_writer_write_i32_le() -> None: assert result[5:9] == struct.pack(" None: - # Test write_i32_le via Any to ensure C extension wrapper works - # (tests fallback path when not using mypyc primitives) - w: Any = BytesWriter() +def test_bytes_writer_write_i32_be() -> None: + # Test various i32 values from 1-byte to 4-byte range + w = BytesWriter() + for v in I32_TEST_VALUES: + write_i32_be(w, v) + assert w.getvalue() == b"".join(struct.pack(">i", v) for v in I32_TEST_VALUES) - # Test 8-bit and 32-bit operations - w.append(0x42) - write_i32_le(w, 0x12345678) + # Test mixing with other operations and buffer growth + w = BytesWriter() w.append(0xFF) - assert w.getvalue() == b"\x42" + struct.pack("i", 0) + assert result[5:9] == struct.pack(">i", 1000) + assert result[1997:2001] == struct.pack(">i", 499000) + +def test_write_i32_via_any() -> None: + # Test write_i32_le/be via Any to ensure C extension wrapper works + # (tests fallback path when not using mypyc primitives) + for write_func, fmt in zip((write_i32_le, write_i32_be), ("i")): + w: Any = BytesWriter() + + # Test 8-bit and 32-bit operations + w.append(0x42) + write_func(w, 0x12345678) + w.append(0xFF) + assert w.getvalue() == b"\x42" + struct.pack(fmt, 0x12345678) + b"\xFF" + + # Test buffer growth + w2: Any = BytesWriter() + for i in range(100): + write_func(w2, i * 10000) + result = w2.getvalue() + assert len(result) == 400 + assert result[0:4] == struct.pack(fmt, 0) + assert result[4:8] == struct.pack(fmt, 10000) + assert result[396:400] == struct.pack(fmt, 990000) + + # Test values that don't fit in i32 + w3: Any = BytesWriter() + with assertRaises(ValueError, "int too large to convert to i32"): + write_func(w3, 2147483648 + int()) + with assertRaises(ValueError, "int too large to convert to i32"): + write_func(w3, -2147483649 + int()) + with assertRaises(ValueError, "int too large to convert to i32"): + write_func(w3, 10000000000 + int()) def test_bytes_reader_read_i32_le() -> None: # Test various i32 values from 1-byte to 4-byte range @@ -314,29 +387,56 @@ def test_bytes_reader_read_i32_le() -> None: data2 = b"\xFF" + struct.pack(" None: - # Test read_i32_le via Any to ensure C extension wrapper works - data: Any = struct.pack(" None: + # Test various i32 values from 1-byte to 4-byte range + data = b"".join(struct.pack(">i", v) for v in I32_TEST_VALUES) + for i, v in enumerate(I32_TEST_VALUES): + assert read_i32_be(data, i * 4) == v + + # Test round-trip with write_i32_be + w = BytesWriter() + for v in I32_TEST_VALUES: + write_i32_be(w, v) + result = w.getvalue() + for i, v in enumerate(I32_TEST_VALUES): + assert read_i32_be(result, i * 4) == v + + # Test unaligned offset + data2 = b"\xFF" + struct.pack(">i", 0x12345678) + b"\xFF" + assert read_i32_be(data2, 1) == 0x12345678 + + # Test barely out of bounds (only 3 bytes available, need 4) + with assertRaises(IndexError): + read_i32_be(data, len(data) - 3) + +def test_read_i32_via_any() -> None: + # Test read_i32_le/be via Any to ensure C extension wrapper works + for read_func, fmt in zip((read_i32_le, read_i32_be), ("i")): + data: Any = struct.pack(fmt, 0x12345678) + struct.pack(fmt, -1) + struct.pack(fmt, 0) + assert read_func(data, 0) == 0x12345678 + assert read_func(data, 4) == -1 + assert read_func(data, 8) == 0 + + # Test error cases + # Index out of range + with assertRaises(IndexError, "index 20 out of range for bytes of length 12"): + read_func(data, 20 + int()) + with assertRaises(IndexError, "index 9 out of range for bytes of length 12"): + read_func(data, 9 + int()) # Not enough bytes for i32 + + # Negative index + with assertRaises(ValueError, "index must be non-negative"): + read_func(data, -1 + int()) + + # Wrong type for bytes argument + with assertRaises(TypeError): + read_func("not bytes", 0 + int()) # type: ignore + with assertRaises(TypeError): + read_func(bytearray(b"\x00\x00\x00\x00"), 0 + int()) # type: ignore def test_bytes_writer_write_i64_le() -> None: # Test all i64 values (includes all i32 values plus 5-8 byte values) @@ -359,24 +459,46 @@ def test_bytes_writer_write_i64_le() -> None: assert result[9:17] == struct.pack(" None: - # Test write_i64_le via Any to ensure C extension wrapper works - w: Any = BytesWriter() +def test_bytes_writer_write_i64_be() -> None: + # Test all i64 values (includes all i32 values plus 5-8 byte values) + w = BytesWriter() + for v in I64_TEST_VALUES: + write_i64_be(w, v) + assert w.getvalue() == b"".join(struct.pack(">q", v) for v in I64_TEST_VALUES) - w.append(0x42) - write_i64_le(w, 0x123456789ABCDEF0) + # Test mixing with other operations and buffer growth + w = BytesWriter() w.append(0xFF) - assert w.getvalue() == b"\x42" + struct.pack("q", 0) + assert result[9:17] == struct.pack(">q", 1000000000) + assert result[1593:1601] == struct.pack(">q", 199000000000) + +def test_write_i64_via_any() -> None: + # Test write_i64_le/be via Any to ensure C extension wrapper works + for write_func, fmt in zip((write_i64_le, write_i64_be), ("q")): + w: Any = BytesWriter() + + w.append(0x42) + write_func(w, 0x123456789ABCDEF0) + w.append(0xFF) + assert w.getvalue() == b"\x42" + struct.pack(fmt, 0x123456789ABCDEF0) + b"\xFF" + + # Test buffer growth + w2: Any = BytesWriter() + for i in range(50): + write_func(w2, i * 10000000000) + result = w2.getvalue() + assert len(result) == 400 + assert result[0:8] == struct.pack(fmt, 0) + assert result[8:16] == struct.pack(fmt, 10000000000) + assert result[392:400] == struct.pack(fmt, 490000000000) def test_bytes_reader_read_i64_le() -> None: # Test all i64 values (includes all i32 values plus 5-8 byte values) @@ -396,29 +518,56 @@ def test_bytes_reader_read_i64_le() -> None: data2 = b"\xFF" + struct.pack(" None: - # Test read_i64_le via Any to ensure C extension wrapper works - data: Any = struct.pack(" None: + # Test all i64 values (includes all i32 values plus 5-8 byte values) + data = b"".join(struct.pack(">q", v) for v in I64_TEST_VALUES) + for i, v in enumerate(I64_TEST_VALUES): + assert read_i64_be(data, i * 8) == v + + # Test round-trip with write_i64_be + w = BytesWriter() + for v in I64_TEST_VALUES: + write_i64_be(w, v) + result = w.getvalue() + for i, v in enumerate(I64_TEST_VALUES): + assert read_i64_be(result, i * 8) == v + + # Test unaligned offset + data2 = b"\xFF" + struct.pack(">q", 0x123456789ABCDEF0) + b"\xFF" + assert read_i64_be(data2, 1) == 0x123456789ABCDEF0 + + # Test barely out of bounds (only 7 bytes available, need 8) + with assertRaises(IndexError): + read_i64_be(data, len(data) - 7) + +def test_read_i64_via_any() -> None: + # Test read_i64_le/be via Any to ensure C extension wrapper works + for read_func, fmt in zip((read_i64_le, read_i64_be), ("q")): + data: Any = struct.pack(fmt, 0x123456789ABCDEF0) + struct.pack(fmt, -1) + struct.pack(fmt, 0) + assert read_func(data, 0) == 0x123456789ABCDEF0 + assert read_func(data, 8) == -1 + assert read_func(data, 16) == 0 + + # Test error cases + # Index out of range + with assertRaises(IndexError, "index 30 out of range for bytes of length 24"): + read_func(data, 30 + int()) + with assertRaises(IndexError, "index 17 out of range for bytes of length 24"): + read_func(data, 17 + int()) # Not enough bytes for i64 + + # Negative index + with assertRaises(ValueError, "index must be non-negative"): + read_func(data, -1 + int()) + + # Wrong type for bytes argument + with assertRaises(TypeError): + read_func("not bytes", 0 + int()) # type: ignore + with assertRaises(TypeError): + read_func(bytearray(b"\x00" * 8), 0 + int()) # type: ignore def test_write_bytearray() -> None: w = BytesWriter()