Skip to content

Commit 277eb4f

Browse files
committed
gh-94937: Allow len() to return large Python lengths
1 parent 26696a6 commit 277eb4f

15 files changed

Lines changed: 89 additions & 70 deletions

File tree

Doc/library/functions.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,8 +1158,9 @@ are always available. They are listed here in alphabetical order.
11581158

11591159
.. impl-detail::
11601160

1161-
``len`` raises :exc:`OverflowError` on lengths larger than
1162-
:data:`sys.maxsize`, such as :class:`range(2 ** 100) <range>`.
1161+
CPython's C length protocol is limited to :data:`sys.maxsize`.
1162+
When that limit is reached, ``len`` may still return a larger Python
1163+
integer if the object's :meth:`~object.__len__` method can provide one.
11631164

11641165

11651166
.. _func-list:

Doc/library/stdtypes.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,8 +1533,9 @@ loops.
15331533
indices.
15341534

15351535
Ranges containing absolute values larger than :data:`sys.maxsize` are
1536-
permitted but some features (such as :func:`len`) may raise
1537-
:exc:`OverflowError`.
1536+
permitted, though some operations that use the C length protocol may still
1537+
raise :exc:`OverflowError` for ranges with lengths larger than
1538+
:data:`!sys.maxsize`.
15381539

15391540
Range examples::
15401541

Doc/reference/datamodel.rst

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3226,11 +3226,10 @@ through the object's keys; for sequences, it should iterate through the values.
32263226

32273227
.. impl-detail::
32283228

3229-
In CPython, the length is required to be at most :data:`sys.maxsize`.
3230-
If the length is larger than :data:`!sys.maxsize` some features (such as
3231-
:func:`len`) may raise :exc:`OverflowError`. To prevent raising
3232-
:exc:`!OverflowError` by truth value testing, an object must define a
3233-
:meth:`~object.__bool__` method.
3229+
In CPython, the C length protocol is limited to :data:`sys.maxsize`.
3230+
Some features that use that protocol may raise :exc:`OverflowError`
3231+
for larger lengths. To prevent raising :exc:`!OverflowError` by truth
3232+
value testing, an object must define a :meth:`~object.__bool__` method.
32343233

32353234

32363235
.. method:: object.__length_hint__(self)

Include/internal/pycore_abstract.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ PyAPI_FUNC(PyObject *) _PyNumber_PowerNoMod(PyObject *lhs, PyObject *rhs);
2121
PyAPI_FUNC(PyObject *) _PyNumber_InPlacePowerNoMod(PyObject *lhs, PyObject *rhs);
2222

2323
PyAPI_FUNC(int) _PyObject_HasLen(PyObject *o);
24+
PyAPI_FUNC(PyObject *) _PyObject_LengthAsPyLong(PyObject *o);
2425

2526
/* === Sequence protocol ================================================ */
2627

Lib/test/test_builtin.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1364,7 +1364,10 @@ def __len__(self):
13641364
class HugeLen:
13651365
def __len__(self):
13661366
return sys.maxsize + 1
1367-
self.assertRaises(OverflowError, len, HugeLen())
1367+
self.assertEqual(len(HugeLen()), sys.maxsize + 1)
1368+
huge_len = HugeLen()
1369+
huge_len.__len__ = lambda: 0
1370+
self.assertEqual(len(huge_len), sys.maxsize + 1)
13681371
class HugeNegativeLen:
13691372
def __len__(self):
13701373
return -sys.maxsize-10

Lib/test/test_random.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import time
66
import pickle
77
import shlex
8+
import sys
89
import warnings
910
import test.support
1011

@@ -122,6 +123,7 @@ def test_choice(self):
122123
choice([])
123124
self.assertEqual(choice([50]), 50)
124125
self.assertIn(choice([25, 75]), [25, 75])
126+
self.assertIn(choice(range(sys.maxsize * 2)), range(sys.maxsize * 2))
125127

126128
def test_choice_with_numpy(self):
127129
# Accommodation for NumPy arrays which have disabled __bool__().
@@ -177,6 +179,11 @@ def test_sample_inputs(self):
177179
self.gen.sample(range(20), 2)
178180
self.gen.sample(str('abcdefghijklmnopqrst'), 2)
179181
self.gen.sample(tuple('abcdefghijklmnopqrst'), 2)
182+
population = range(sys.maxsize * 2)
183+
sample = self.gen.sample(population, 10)
184+
self.assertEqual(len(sample), 10)
185+
self.assertEqual(len(set(sample)), 10)
186+
self.assertTrue(all(element in population for element in sample))
180187

181188
def test_sample_on_dicts(self):
182189
self.assertRaises(TypeError, self.gen.sample, dict.fromkeys('abcdef'), 2)

Lib/test/test_range.py

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -162,24 +162,14 @@ def test_large_operands(self):
162162

163163
def test_large_range(self):
164164
# Check long ranges (len > sys.maxsize)
165-
# len() is expected to fail due to limitations of the __len__ protocol
166-
def _range_len(x):
167-
try:
168-
length = len(x)
169-
except OverflowError:
170-
step = x[1] - x[0]
171-
length = 1 + ((x[-1] - x[0]) // step)
172-
return length
173-
174165
a = -sys.maxsize
175166
b = sys.maxsize
176167
expected_len = b - a
177168
x = range(a, b)
178169
self.assertIn(a, x)
179170
self.assertNotIn(b, x)
180-
self.assertRaises(OverflowError, len, x)
171+
self.assertEqual(len(x), expected_len)
181172
self.assertTrue(x)
182-
self.assertEqual(_range_len(x), expected_len)
183173
self.assertEqual(x[0], a)
184174
idx = sys.maxsize+1
185175
self.assertEqual(x[idx], a+idx)
@@ -195,9 +185,8 @@ def _range_len(x):
195185
x = range(a, b)
196186
self.assertIn(a, x)
197187
self.assertNotIn(b, x)
198-
self.assertRaises(OverflowError, len, x)
188+
self.assertEqual(len(x), expected_len)
199189
self.assertTrue(x)
200-
self.assertEqual(_range_len(x), expected_len)
201190
self.assertEqual(x[0], a)
202191
idx = sys.maxsize+1
203192
self.assertEqual(x[idx], a+idx)
@@ -214,9 +203,8 @@ def _range_len(x):
214203
x = range(a, b, c)
215204
self.assertIn(a, x)
216205
self.assertNotIn(b, x)
217-
self.assertRaises(OverflowError, len, x)
206+
self.assertEqual(len(x), expected_len)
218207
self.assertTrue(x)
219-
self.assertEqual(_range_len(x), expected_len)
220208
self.assertEqual(x[0], a)
221209
idx = sys.maxsize+1
222210
self.assertEqual(x[idx], a+(idx*c))
@@ -233,9 +221,8 @@ def _range_len(x):
233221
x = range(a, b, c)
234222
self.assertIn(a, x)
235223
self.assertNotIn(b, x)
236-
self.assertRaises(OverflowError, len, x)
224+
self.assertEqual(len(x), expected_len)
237225
self.assertTrue(x)
238-
self.assertEqual(_range_len(x), expected_len)
239226
self.assertEqual(x[0], a)
240227
idx = sys.maxsize+1
241228
self.assertEqual(x[idx], a+(idx*c))
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Allow :func:`len` to return values larger than :data:`sys.maxsize` when
2+
``__len__`` can provide a Python integer fallback, including for large
3+
:class:`range` objects.

Objects/abstract.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,48 @@ PyObject_Length(PyObject *o)
7979
}
8080
#define PyObject_Length PyObject_Size
8181

82+
PyObject *
83+
_PyObject_LengthAsPyLong(PyObject *o)
84+
{
85+
Py_ssize_t res = PyObject_Size(o);
86+
if (res >= 0) {
87+
return PyLong_FromSsize_t(res);
88+
}
89+
assert(PyErr_Occurred());
90+
if (!PyErr_ExceptionMatches(PyExc_OverflowError)) {
91+
return NULL;
92+
}
93+
94+
PyErr_Clear();
95+
PyObject *meth = _PyObject_LookupSpecial(o, &_Py_ID(__len__));
96+
if (meth == NULL) {
97+
if (!PyErr_Occurred()) {
98+
type_error("object of type '%.200s' has no len()", o);
99+
}
100+
return NULL;
101+
}
102+
103+
PyObject *len = _PyObject_CallNoArgs(meth);
104+
Py_DECREF(meth);
105+
if (len == NULL) {
106+
return NULL;
107+
}
108+
109+
Py_SETREF(len, PyNumber_Index(len));
110+
if (len == NULL) {
111+
return NULL;
112+
}
113+
114+
assert(PyLong_Check(len));
115+
if (_PyLong_IsNegative((PyLongObject *)len)) {
116+
Py_DECREF(len);
117+
PyErr_SetString(PyExc_ValueError,
118+
"__len__() should return >= 0");
119+
return NULL;
120+
}
121+
return len;
122+
}
123+
82124
int
83125
_PyObject_HasLen(PyObject *o) {
84126
return (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_length) ||

Objects/rangeobject.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,13 @@ range_length(PyObject *op)
329329
return PyLong_AsSsize_t(r->length);
330330
}
331331

332+
static PyObject *
333+
range_len(PyObject *op, PyObject *Py_UNUSED(ignored))
334+
{
335+
rangeobject *r = (rangeobject*)op;
336+
return Py_NewRef(r->length);
337+
}
338+
332339
static PyObject *
333340
compute_item(rangeobject *r, PyObject *i)
334341
{
@@ -779,6 +786,7 @@ PyDoc_STRVAR(index_doc,
779786
"Raise ValueError if the value is not present.");
780787

781788
static PyMethodDef range_methods[] = {
789+
{"__len__", range_len, METH_NOARGS | METH_COEXIST, NULL},
782790
{"__reversed__", range_reverse, METH_NOARGS, reverse_doc},
783791
{"__reduce__", range_reduce, METH_NOARGS},
784792
{"count", range_count, METH_O, count_doc},

0 commit comments

Comments
 (0)