From 277eb4f1bd3a145b0195df4040e5b6fde91daade Mon Sep 17 00:00:00 2001 From: cjc0013 Date: Fri, 29 May 2026 13:11:15 -0400 Subject: [PATCH 1/2] gh-94937: Allow len() to return large Python lengths --- Doc/library/functions.rst | 5 ++- Doc/library/stdtypes.rst | 5 ++- Doc/reference/datamodel.rst | 9 ++-- Include/internal/pycore_abstract.h | 1 + Lib/test/test_builtin.py | 5 ++- Lib/test/test_random.py | 7 ++++ Lib/test/test_range.py | 21 ++-------- ...0.gh-issue-94937.len-overflow-fallback.rst | 3 ++ Objects/abstract.c | 42 +++++++++++++++++++ Objects/rangeobject.c | 8 ++++ Python/bltinmodule.c | 10 +---- Python/bytecodes.c | 13 ++---- Python/ceval.h | 2 +- Python/executor_cases.c.h | 15 +------ Python/generated_cases.c.h | 13 +----- 15 files changed, 89 insertions(+), 70 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-05-29-12-25-00.gh-issue-94937.len-overflow-fallback.rst diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 0393e2dc776db4a..d25d831ea9ede79 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1158,8 +1158,9 @@ are always available. They are listed here in alphabetical order. .. impl-detail:: - ``len`` raises :exc:`OverflowError` on lengths larger than - :data:`sys.maxsize`, such as :class:`range(2 ** 100) `. + CPython's C length protocol is limited to :data:`sys.maxsize`. + When that limit is reached, ``len`` may still return a larger Python + integer if the object's :meth:`~object.__len__` method can provide one. .. _func-list: diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index b0388c4e1f0bd45..d707b202e32acd8 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1533,8 +1533,9 @@ loops. indices. Ranges containing absolute values larger than :data:`sys.maxsize` are - permitted but some features (such as :func:`len`) may raise - :exc:`OverflowError`. + permitted, though some operations that use the C length protocol may still + raise :exc:`OverflowError` for ranges with lengths larger than + :data:`!sys.maxsize`. Range examples:: diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index aef5bbe151cfeba..0c7d302af0bf46d 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -3226,11 +3226,10 @@ through the object's keys; for sequences, it should iterate through the values. .. impl-detail:: - In CPython, the length is required to be at most :data:`sys.maxsize`. - If the length is larger than :data:`!sys.maxsize` some features (such as - :func:`len`) may raise :exc:`OverflowError`. To prevent raising - :exc:`!OverflowError` by truth value testing, an object must define a - :meth:`~object.__bool__` method. + In CPython, the C length protocol is limited to :data:`sys.maxsize`. + Some features that use that protocol may raise :exc:`OverflowError` + for larger lengths. To prevent raising :exc:`!OverflowError` by truth + value testing, an object must define a :meth:`~object.__bool__` method. .. method:: object.__length_hint__(self) diff --git a/Include/internal/pycore_abstract.h b/Include/internal/pycore_abstract.h index 67c6fa7c0c4ed5b..97609bdc7b79bea 100644 --- a/Include/internal/pycore_abstract.h +++ b/Include/internal/pycore_abstract.h @@ -21,6 +21,7 @@ PyAPI_FUNC(PyObject *) _PyNumber_PowerNoMod(PyObject *lhs, PyObject *rhs); PyAPI_FUNC(PyObject *) _PyNumber_InPlacePowerNoMod(PyObject *lhs, PyObject *rhs); PyAPI_FUNC(int) _PyObject_HasLen(PyObject *o); +PyAPI_FUNC(PyObject *) _PyObject_LengthAsPyLong(PyObject *o); /* === Sequence protocol ================================================ */ diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 1d2c105ac047e18..229fd2a4dd01164 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1364,7 +1364,10 @@ def __len__(self): class HugeLen: def __len__(self): return sys.maxsize + 1 - self.assertRaises(OverflowError, len, HugeLen()) + self.assertEqual(len(HugeLen()), sys.maxsize + 1) + huge_len = HugeLen() + huge_len.__len__ = lambda: 0 + self.assertEqual(len(huge_len), sys.maxsize + 1) class HugeNegativeLen: def __len__(self): return -sys.maxsize-10 diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index dbd3b855f536a0d..b711417fbdc0862 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -5,6 +5,7 @@ import time import pickle import shlex +import sys import warnings import test.support @@ -122,6 +123,7 @@ def test_choice(self): choice([]) self.assertEqual(choice([50]), 50) self.assertIn(choice([25, 75]), [25, 75]) + self.assertIn(choice(range(sys.maxsize * 2)), range(sys.maxsize * 2)) def test_choice_with_numpy(self): # Accommodation for NumPy arrays which have disabled __bool__(). @@ -177,6 +179,11 @@ def test_sample_inputs(self): self.gen.sample(range(20), 2) self.gen.sample(str('abcdefghijklmnopqrst'), 2) self.gen.sample(tuple('abcdefghijklmnopqrst'), 2) + population = range(sys.maxsize * 2) + sample = self.gen.sample(population, 10) + self.assertEqual(len(sample), 10) + self.assertEqual(len(set(sample)), 10) + self.assertTrue(all(element in population for element in sample)) def test_sample_on_dicts(self): self.assertRaises(TypeError, self.gen.sample, dict.fromkeys('abcdef'), 2) diff --git a/Lib/test/test_range.py b/Lib/test/test_range.py index 2c9c290e8906b73..a1840112963c02d 100644 --- a/Lib/test/test_range.py +++ b/Lib/test/test_range.py @@ -162,24 +162,14 @@ def test_large_operands(self): def test_large_range(self): # Check long ranges (len > sys.maxsize) - # len() is expected to fail due to limitations of the __len__ protocol - def _range_len(x): - try: - length = len(x) - except OverflowError: - step = x[1] - x[0] - length = 1 + ((x[-1] - x[0]) // step) - return length - a = -sys.maxsize b = sys.maxsize expected_len = b - a x = range(a, b) self.assertIn(a, x) self.assertNotIn(b, x) - self.assertRaises(OverflowError, len, x) + self.assertEqual(len(x), expected_len) self.assertTrue(x) - self.assertEqual(_range_len(x), expected_len) self.assertEqual(x[0], a) idx = sys.maxsize+1 self.assertEqual(x[idx], a+idx) @@ -195,9 +185,8 @@ def _range_len(x): x = range(a, b) self.assertIn(a, x) self.assertNotIn(b, x) - self.assertRaises(OverflowError, len, x) + self.assertEqual(len(x), expected_len) self.assertTrue(x) - self.assertEqual(_range_len(x), expected_len) self.assertEqual(x[0], a) idx = sys.maxsize+1 self.assertEqual(x[idx], a+idx) @@ -214,9 +203,8 @@ def _range_len(x): x = range(a, b, c) self.assertIn(a, x) self.assertNotIn(b, x) - self.assertRaises(OverflowError, len, x) + self.assertEqual(len(x), expected_len) self.assertTrue(x) - self.assertEqual(_range_len(x), expected_len) self.assertEqual(x[0], a) idx = sys.maxsize+1 self.assertEqual(x[idx], a+(idx*c)) @@ -233,9 +221,8 @@ def _range_len(x): x = range(a, b, c) self.assertIn(a, x) self.assertNotIn(b, x) - self.assertRaises(OverflowError, len, x) + self.assertEqual(len(x), expected_len) self.assertTrue(x) - self.assertEqual(_range_len(x), expected_len) self.assertEqual(x[0], a) idx = sys.maxsize+1 self.assertEqual(x[idx], a+(idx*c)) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-29-12-25-00.gh-issue-94937.len-overflow-fallback.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-29-12-25-00.gh-issue-94937.len-overflow-fallback.rst new file mode 100644 index 000000000000000..e8dea053f6afb44 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-29-12-25-00.gh-issue-94937.len-overflow-fallback.rst @@ -0,0 +1,3 @@ +Allow :func:`len` to return values larger than :data:`sys.maxsize` when +``__len__`` can provide a Python integer fallback, including for large +:class:`range` objects. diff --git a/Objects/abstract.c b/Objects/abstract.c index 48b3137152e7bf3..8edd450769f3bf8 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -79,6 +79,48 @@ PyObject_Length(PyObject *o) } #define PyObject_Length PyObject_Size +PyObject * +_PyObject_LengthAsPyLong(PyObject *o) +{ + Py_ssize_t res = PyObject_Size(o); + if (res >= 0) { + return PyLong_FromSsize_t(res); + } + assert(PyErr_Occurred()); + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) { + return NULL; + } + + PyErr_Clear(); + PyObject *meth = _PyObject_LookupSpecial(o, &_Py_ID(__len__)); + if (meth == NULL) { + if (!PyErr_Occurred()) { + type_error("object of type '%.200s' has no len()", o); + } + return NULL; + } + + PyObject *len = _PyObject_CallNoArgs(meth); + Py_DECREF(meth); + if (len == NULL) { + return NULL; + } + + Py_SETREF(len, PyNumber_Index(len)); + if (len == NULL) { + return NULL; + } + + assert(PyLong_Check(len)); + if (_PyLong_IsNegative((PyLongObject *)len)) { + Py_DECREF(len); + PyErr_SetString(PyExc_ValueError, + "__len__() should return >= 0"); + return NULL; + } + return len; +} + int _PyObject_HasLen(PyObject *o) { return (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_length) || diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index 55b7f108730728d..edbb09537db298a 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -329,6 +329,13 @@ range_length(PyObject *op) return PyLong_AsSsize_t(r->length); } +static PyObject * +range_len(PyObject *op, PyObject *Py_UNUSED(ignored)) +{ + rangeobject *r = (rangeobject*)op; + return Py_NewRef(r->length); +} + static PyObject * compute_item(rangeobject *r, PyObject *i) { @@ -779,6 +786,7 @@ PyDoc_STRVAR(index_doc, "Raise ValueError if the value is not present."); static PyMethodDef range_methods[] = { + {"__len__", range_len, METH_NOARGS | METH_COEXIST, NULL}, {"__reversed__", range_reverse, METH_NOARGS, reverse_doc}, {"__reduce__", range_reduce, METH_NOARGS}, {"count", range_count, METH_O, count_doc}, diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index d5129bf6a5a6bc0..42f1dc3ad1a495c 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -1,6 +1,7 @@ /* Built-in functions */ #include "Python.h" +#include "pycore_abstract.h" // _PyObject_LengthAsPyLong() #include "pycore_ast.h" // _PyAST_Validate() #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_cell.h" // PyCell_GetRef() @@ -1992,14 +1993,7 @@ static PyObject * builtin_len(PyObject *module, PyObject *obj) /*[clinic end generated code: output=fa7a270d314dfb6c input=bc55598da9e9c9b5]*/ { - Py_ssize_t res; - - res = PyObject_Size(obj); - if (res < 0) { - assert(PyErr_Occurred()); - return NULL; - } - return PyLong_FromSsize_t(res); + return _PyObject_LengthAsPyLong(obj); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 993d231751409ba..2e91db97d709e91 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -7,7 +7,7 @@ // See Tools/cases_generator/README.md for more information. #include "Python.h" -#include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_abstract.h" // _PyIndex_Check(), _PyObject_LengthAsPyLong() #include "pycore_audit.h" // _PySys_Audit() #include "pycore_backoff.h" #include "pycore_cell.h" // PyCell_GetRef() @@ -3698,9 +3698,7 @@ dummy_func( inst(GET_LEN, (obj -- obj, len)) { // PUSH(len(TOS)) - Py_ssize_t len_i = PyObject_Length(PyStackRef_AsPyObjectBorrow(obj)); - ERROR_IF(len_i < 0); - PyObject *len_o = PyLong_FromSsize_t(len_i); + PyObject *len_o = _PyObject_LengthAsPyLong(PyStackRef_AsPyObjectBorrow(obj)); ERROR_IF(len_o == NULL); len = PyStackRef_FromPyObjectSteal(len_o); } @@ -5037,12 +5035,7 @@ dummy_func( /* len(o) */ STAT_INC(CALL, hit); PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); - Py_ssize_t len_i = PyObject_Length(arg_o); - if (len_i < 0) { - ERROR_NO_POP(); - } - PyObject *res_o = PyLong_FromSsize_t(len_i); - assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + PyObject *res_o = _PyObject_LengthAsPyLong(arg_o); if (res_o == NULL) { ERROR_NO_POP(); } diff --git a/Python/ceval.h b/Python/ceval.h index 0437ab85c5a6682..80d7b8fe0df5628 100644 --- a/Python/ceval.h +++ b/Python/ceval.h @@ -1,7 +1,7 @@ #define _PY_INTERPRETER #include "Python.h" -#include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_abstract.h" // _PyIndex_Check(), _PyObject_LengthAsPyLong() #include "pycore_audit.h" // _PySys_Audit() #include "pycore_backoff.h" #include "pycore_call.h" // _PyObject_CallNoArgs() diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9aaf9639b9b9015..0e04dfd048ca594 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -14036,13 +14036,8 @@ stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); - Py_ssize_t len_i = PyObject_Length(PyStackRef_AsPyObjectBorrow(obj)); + PyObject *len_o = _PyObject_LengthAsPyLong(PyStackRef_AsPyObjectBorrow(obj)); stack_pointer = _PyFrame_GetStackPointer(frame); - if (len_i < 0) { - SET_CURRENT_CACHED_VALUES(0); - JUMP_TO_ERROR(); - } - PyObject *len_o = PyLong_FromSsize_t(len_i); if (len_o == NULL) { SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); @@ -18584,14 +18579,8 @@ stack_pointer += 3; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); - Py_ssize_t len_i = PyObject_Length(arg_o); + PyObject *res_o = _PyObject_LengthAsPyLong(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); - if (len_i < 0) { - SET_CURRENT_CACHED_VALUES(0); - JUMP_TO_ERROR(); - } - PyObject *res_o = PyLong_FromSsize_t(len_i); - assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); if (res_o == NULL) { SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 94384d5db3c107f..79d4e47ca9ab602 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3710,13 +3710,8 @@ STAT_INC(CALL, hit); PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); _PyFrame_SetStackPointer(frame, stack_pointer); - Py_ssize_t len_i = PyObject_Length(arg_o); + PyObject *res_o = _PyObject_LengthAsPyLong(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); - if (len_i < 0) { - JUMP_TO_LABEL(error); - } - PyObject *res_o = PyLong_FromSsize_t(len_i); - assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); if (res_o == NULL) { JUMP_TO_LABEL(error); } @@ -6688,12 +6683,8 @@ _PyStackRef len; obj = stack_pointer[-1]; _PyFrame_SetStackPointer(frame, stack_pointer); - Py_ssize_t len_i = PyObject_Length(PyStackRef_AsPyObjectBorrow(obj)); + PyObject *len_o = _PyObject_LengthAsPyLong(PyStackRef_AsPyObjectBorrow(obj)); stack_pointer = _PyFrame_GetStackPointer(frame); - if (len_i < 0) { - JUMP_TO_LABEL(error); - } - PyObject *len_o = PyLong_FromSsize_t(len_i); if (len_o == NULL) { JUMP_TO_LABEL(error); } From 7b5ef84815bfc0978b46b075028311fcc40aa55e Mon Sep 17 00:00:00 2001 From: cjc0013 Date: Sat, 30 May 2026 23:21:50 -0400 Subject: [PATCH 2/2] gh-94937: Fix generated cases and JIT len helper include --- Modules/_testinternalcapi/test_cases.c.h | 13 ++----------- Tools/jit/template.c | 1 + 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index 11dfcc68eb2dacd..7232e6dcde131c3 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -3710,13 +3710,8 @@ STAT_INC(CALL, hit); PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); _PyFrame_SetStackPointer(frame, stack_pointer); - Py_ssize_t len_i = PyObject_Length(arg_o); + PyObject *res_o = _PyObject_LengthAsPyLong(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); - if (len_i < 0) { - JUMP_TO_LABEL(error); - } - PyObject *res_o = PyLong_FromSsize_t(len_i); - assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); if (res_o == NULL) { JUMP_TO_LABEL(error); } @@ -6688,12 +6683,8 @@ _PyStackRef len; obj = stack_pointer[-1]; _PyFrame_SetStackPointer(frame, stack_pointer); - Py_ssize_t len_i = PyObject_Length(PyStackRef_AsPyObjectBorrow(obj)); + PyObject *len_o = _PyObject_LengthAsPyLong(PyStackRef_AsPyObjectBorrow(obj)); stack_pointer = _PyFrame_GetStackPointer(frame); - if (len_i < 0) { - JUMP_TO_LABEL(error); - } - PyObject *len_o = PyLong_FromSsize_t(len_i); if (len_o == NULL) { JUMP_TO_LABEL(error); } diff --git a/Tools/jit/template.c b/Tools/jit/template.c index afdd9b77e7c7ffa..dfe6344bc88bb7f 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -1,5 +1,6 @@ #include "Python.h" +#include "pycore_abstract.h" #include "pycore_backoff.h" #include "pycore_call.h" #include "pycore_cell.h"