diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index d8e3b671ec229f..3ecd8754fa8ccd 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1032,6 +1032,21 @@ def test_failure(self): # Previously, a second call could crash due to internal inconsistency self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30) + def test_decompress_after_data_error(self): + data = bytes.fromhex( + "425a6839314159265359000000000000007fffff000000000000000000000000" + "00000000000000000000000000000000000000e0370000000000000000000000" + "000000000000000000000000000000000000000000000000000083f3" + ) + bzd = BZ2Decompressor() + with self.assertRaisesRegex(OSError, "Invalid data stream"): + bzd.decompress(data) + # Previously, a second call could crash due to internal inconsistency + self.assertFalse(bzd.needs_input) + self.assertFalse(bzd.eof) + with self.assertRaisesRegex(OSError, "Invalid data stream"): + bzd.decompress(b'\x00' * 18) + @support.refcount_test def test_refleaks_in___init__(self): gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') diff --git a/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst new file mode 100644 index 00000000000000..a37d86cf423f82 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst @@ -0,0 +1,3 @@ +Fix a possible stack buffer overflow in :mod:`bz2` when a +:class:`bz2.BZ2Decompressor` is reused after a decompression error. +The decompressor now becomes unusable after libbz2 reports an error. diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 4cf8beed9ee3eb..77d7ee5264eac3 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -108,6 +108,7 @@ typedef struct { typedef struct { PyObject_HEAD bz_stream bzs; + int bzerror; char eof; /* Py_T_BOOL expects a char */ PyObject *unused_data; char needs_input; @@ -435,8 +436,11 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) d->bzs_avail_in_real += bzs->avail_in; - if (catch_bz2_error(bzret)) + if (catch_bz2_error(bzret)) { + d->bzerror = bzret; + FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0); goto error; + } if (bzret == BZ_STREAM_END) { FT_ATOMIC_STORE_CHAR_RELAXED(d->eof, 1); break; @@ -607,10 +611,16 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data, PyObject *result = NULL; PyMutex_Lock(&self->mutex); - if (self->eof) + if (self->eof) { PyErr_SetString(PyExc_EOFError, "End of stream already reached"); - else + } + else if (self->bzerror) { + // Re-entering BZ2_bzDecompress() after an error can write out of bounds. + catch_bz2_error(self->bzerror); + } + else { result = decompress(self, data->buf, data->len, max_length); + } PyMutex_Unlock(&self->mutex); return result; }