Skip to content

Commit 09d6771

Browse files
committed
gh-146169: correctly handle re-entrant parsing calls in Expat handlers
1 parent 1efe441 commit 09d6771

File tree

3 files changed

+55
-0
lines changed

3 files changed

+55
-0
lines changed

Lib/test/test_pyexpat.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,52 @@ def test_parse_again(self):
276276
self.assertEqual(expat.ErrorString(cm.exception.code),
277277
expat.errors.XML_ERROR_FINISHED)
278278

279+
@support.subTests("encoding", ("utf-8", "utf-16"))
280+
def test_parse_reentrancy_with_encoding(self, encoding):
281+
# See https://github.com/python/cpython/issues/146169.
282+
parser = expat.ParserCreate(encoding=encoding)
283+
284+
CharacterDataHandler = lambda data: parser.Parse(data, False)
285+
CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler)
286+
def StartElementHandler(name, attrs):
287+
parser.CharacterDataHandler = CharacterDataHandler
288+
parser.StartElementHandler = StartElementHandler
289+
290+
payload = "<a>x".encode(encoding)
291+
msg = re.escape("cannot call Parse() from within a handler")
292+
with self.assertRaisesRegex(RuntimeError, msg):
293+
for i in range(len(payload)):
294+
parser.Parse(payload[i:i+1], i == len(payload) - 1)
295+
CharacterDataHandler.assert_called_once_with("x")
296+
297+
@support.subTests("encoding", ("utf-8", "utf-16"))
298+
def test_parse_reentrancy_allowed_for_external_parser(self, encoding):
299+
parser = expat.ParserCreate(encoding=encoding)
300+
subparser = parser.ExternalEntityParserCreate(None, encoding)
301+
payload_extstr = '<!ENTITY ext SYSTEM "entity.file">'
302+
303+
def ExternalEntityRefHandler(*args):
304+
subparser.Parse(payload_extstr, True)
305+
return 1 # return an integer to indicate that parsing continues
306+
ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler)
307+
308+
def StartElementHandler(*args):
309+
parser.ExternalEntityRefHandler = ExternalEntityRefHandler
310+
parser.StartElementHandler = StartElementHandler
311+
312+
payload = f"""\
313+
<?xml version="1.0" standalone="no"?>
314+
<!DOCTYPE quotations SYSTEM "quotations.dtd" [{payload_extstr}]>
315+
<root>&ext;</root>
316+
""".encode(encoding)
317+
318+
# Check that external parsers be called from parent's handlers.
319+
for i in range(len(payload)):
320+
parser.Parse(payload[i:i+1], i == len(payload) - 1)
321+
external_ref_args = ('ext', None, 'entity.file', None)
322+
ExternalEntityRefHandler.assert_called_once_with(*external_ref_args)
323+
324+
279325
class NamespaceSeparatorTest(unittest.TestCase):
280326
def test_legal(self):
281327
# Tests that make sure we get errors when the namespace_separator value
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:mod:`xml.parser.expat`: raise :exc:`RuntimeError` when an Expat handler
2+
calls :meth:`parser.Parse <xml.parsers.expat.xmlparser.Parse>` on the parser
3+
that called the handler. Patch by Bénédikt Tran.

Modules/pyexpat.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,12 @@ pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
863863
int rc;
864864
pyexpat_state *state = PyType_GetModuleState(cls);
865865

866+
if (self->in_callback) {
867+
PyErr_SetString(PyExc_RuntimeError,
868+
"cannot call Parse() from within a handler");
869+
return NULL;
870+
}
871+
866872
if (PyUnicode_Check(data)) {
867873
view.buf = NULL;
868874
s = PyUnicode_AsUTF8AndSize(data, &slen);

0 commit comments

Comments
 (0)