From 12efd8a72bdafae62601728799636811a45e865c Mon Sep 17 00:00:00 2001 From: WYSIATI Date: Sun, 12 Apr 2026 19:28:26 +0800 Subject: [PATCH 1/4] gh-148428: Fix file handle leak in pulldom.parse() When pulldom.parse() is called with a filename, the opened file handle is never closed. Add close(), context manager support, and __del__ with ResourceWarning to DOMEventStream. Update clear() to close owned streams. --- Doc/library/xml.dom.pulldom.rst | 12 +++++ Lib/test/test_pulldom.py | 47 +++++++++++++++++-- Lib/xml/dom/pulldom.py | 31 +++++++++++- ...-04-12-11-14-40.gh-issue-148428.kP9qWx.rst | 4 ++ 4 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-12-11-14-40.gh-issue-148428.kP9qWx.rst diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst index 52340ffe92eb85..1732f2ce2c044e 100644 --- a/Doc/library/xml.dom.pulldom.rst +++ b/Doc/library/xml.dom.pulldom.rst @@ -114,6 +114,10 @@ DOMEventStream Objects .. versionchanged:: 3.11 Support for :meth:`~object.__getitem__` method has been removed. + .. versionchanged:: next + :class:`DOMEventStream` now supports the :term:`context manager` + protocol. File handles opened by :func:`parse` are closed on exit. + .. method:: getEvent() Return a tuple containing *event* and the current *node* as @@ -141,3 +145,11 @@ DOMEventStream Objects print(node.toxml()) .. method:: DOMEventStream.reset() + + .. method:: DOMEventStream.close() + + Close the underlying stream if it was opened by :func:`parse`. + Has no effect if the stream was provided by the caller or is already + closed. It is safe to call this method more than once. + + .. versionadded:: next diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py index 3c8ed251acaa4d..60ad70182bbe03 100644 --- a/Lib/test/test_pulldom.py +++ b/Lib/test/test_pulldom.py @@ -6,6 +6,7 @@ from xml.sax.handler import feature_external_ges from xml.dom import pulldom +from test import support from test.support import findfile @@ -31,15 +32,53 @@ def test_parse(self): # semantics are tested using parseString with a more focused XML # fragment. - # Test with a filename: - handler = pulldom.parse(tstfile) - self.addCleanup(handler.stream.close) - list(handler) + # Test with a filename (context manager): + with pulldom.parse(tstfile) as handler: + list(handler) # Test with a file object: with open(tstfile, "rb") as fin: list(pulldom.parse(fin)) + def test_context_manager_closes_file(self): + # gh-148428: context manager should close owned stream + with pulldom.parse(tstfile) as events: + stream = events.stream + for event, node in events: + pass + self.assertTrue(stream.closed) + + def test_context_manager_does_not_close_user_stream(self): + # gh-148428: context manager must not close user-provided streams + with open(tstfile, 'rb') as f: + events = pulldom.DOMEventStream(f, xml.sax.make_parser(), 16384) + for event, node in events: + pass + events.close() + self.assertFalse(f.closed) + + def test_close_is_idempotent(self): + # gh-148428: calling close() multiple times should be safe + events = pulldom.parse(tstfile) + stream = events.stream + events.close() + self.assertTrue(stream.closed) + events.close() # should not raise + + def test_clear_closes_owned_stream(self): + # gh-148428: clear() should close the stream when parse() opened it + events = pulldom.parse(tstfile) + stream = events.stream + events.clear() + self.assertTrue(stream.closed) + + def test_resource_warning_on_del(self): + # gh-148428: unclosed DOMEventStream should emit ResourceWarning + events = pulldom.parse(tstfile) + with self.assertWarns(ResourceWarning): + del events + support.gc_collect() + def test_parse_semantics(self): """Test DOMEventStream parsing semantics.""" diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py index 913141cd7ef3ce..f5626b5365ea1e 100644 --- a/Lib/xml/dom/pulldom.py +++ b/Lib/xml/dom/pulldom.py @@ -1,3 +1,4 @@ +import warnings import xml.sax import xml.sax.handler @@ -202,10 +203,11 @@ def fatalError(self, exception): raise exception class DOMEventStream: - def __init__(self, stream, parser, bufsize): + def __init__(self, stream, parser, bufsize, _owns_stream=False): self.stream = stream self.parser = parser self.bufsize = bufsize + self._owns_stream = _owns_stream if not hasattr(self.parser, 'feed'): self.getEvent = self._slurp self.reset() @@ -225,6 +227,28 @@ def __next__(self): def __iter__(self): return self + def close(self): + """Close the stream if it was opened by parse().""" + if self._owns_stream and self.stream is not None: + self.stream.close() + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + def __del__(self, _warn=warnings.warn): + if self._owns_stream and self.stream is not None: + try: + if not self.stream.closed: + _warn( + f"unclosed {self!r}", + ResourceWarning, + source=self) + finally: + self.stream.close() + def expandNode(self, node): event = self.getEvent() parents = [node] @@ -275,6 +299,7 @@ def _emit(self): def clear(self): """clear(): Explicitly release parsing objects""" + self.close() self.pulldom.clear() del self.pulldom self.parser = None @@ -320,11 +345,13 @@ def parse(stream_or_string, parser=None, bufsize=None): bufsize = default_bufsize if isinstance(stream_or_string, str): stream = open(stream_or_string, 'rb') + owns_stream = True else: stream = stream_or_string + owns_stream = False if not parser: parser = xml.sax.make_parser() - return DOMEventStream(stream, parser, bufsize) + return DOMEventStream(stream, parser, bufsize, _owns_stream=owns_stream) def parseString(string, parser=None): from io import StringIO diff --git a/Misc/NEWS.d/next/Library/2026-04-12-11-14-40.gh-issue-148428.kP9qWx.rst b/Misc/NEWS.d/next/Library/2026-04-12-11-14-40.gh-issue-148428.kP9qWx.rst new file mode 100644 index 00000000000000..f5befbd514a286 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-12-11-14-40.gh-issue-148428.kP9qWx.rst @@ -0,0 +1,4 @@ +:class:`xml.dom.pulldom.DOMEventStream` now supports the :term:`context +manager` protocol and has a :meth:`~xml.dom.pulldom.DOMEventStream.close` +method. File handles opened by :func:`xml.dom.pulldom.parse` are now +properly closed, fixing a resource leak. From 282886b1fd490d351de2d4b201d91e1a2205f052 Mon Sep 17 00:00:00 2001 From: WYSIATI Date: Sun, 12 Apr 2026 20:59:17 +0800 Subject: [PATCH 2/4] gh-148428: Revert off-topic change to test_parse Keep the existing test_parse unchanged; context manager behavior is covered by the new dedicated tests. --- Lib/test/test_pulldom.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py index 60ad70182bbe03..2a53dd1d87e11f 100644 --- a/Lib/test/test_pulldom.py +++ b/Lib/test/test_pulldom.py @@ -32,9 +32,10 @@ def test_parse(self): # semantics are tested using parseString with a more focused XML # fragment. - # Test with a filename (context manager): - with pulldom.parse(tstfile) as handler: - list(handler) + # Test with a filename: + handler = pulldom.parse(tstfile) + self.addCleanup(handler.stream.close) + list(handler) # Test with a file object: with open(tstfile, "rb") as fin: From 57840f65bd94c5156695e8b98dda3ac69331bb66 Mon Sep 17 00:00:00 2001 From: WYSIATI Date: Sun, 12 Apr 2026 21:01:48 +0800 Subject: [PATCH 3/4] gh-148428: Remove redundant test and off-topic change Remove test_context_manager_closes_file (covered by test_close_is_idempotent) and revert the unrelated modification to test_parse. --- Lib/test/test_pulldom.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py index 2a53dd1d87e11f..abe7113ca7fbcc 100644 --- a/Lib/test/test_pulldom.py +++ b/Lib/test/test_pulldom.py @@ -41,14 +41,6 @@ def test_parse(self): with open(tstfile, "rb") as fin: list(pulldom.parse(fin)) - def test_context_manager_closes_file(self): - # gh-148428: context manager should close owned stream - with pulldom.parse(tstfile) as events: - stream = events.stream - for event, node in events: - pass - self.assertTrue(stream.closed) - def test_context_manager_does_not_close_user_stream(self): # gh-148428: context manager must not close user-provided streams with open(tstfile, 'rb') as f: From baaf87fb48a450656952677ad4548083aa66933b Mon Sep 17 00:00:00 2001 From: WYSIATI Date: Sun, 12 Apr 2026 21:38:34 +0800 Subject: [PATCH 4/4] gh-148428: Fix test_parse ResourceWarning from __del__ Use handler.close() instead of handler.stream.close() in addCleanup so the DOMEventStream is properly closed before GC triggers __del__. --- Lib/test/test_pulldom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py index abe7113ca7fbcc..594ce1dd8af32c 100644 --- a/Lib/test/test_pulldom.py +++ b/Lib/test/test_pulldom.py @@ -34,7 +34,7 @@ def test_parse(self): # Test with a filename: handler = pulldom.parse(tstfile) - self.addCleanup(handler.stream.close) + self.addCleanup(handler.close) list(handler) # Test with a file object: