Merge branch 'main' into fix/decimal_ctx_status

LindaSummer · web-flow · commit d598e3c48314 · 2026-05-30T15:11:19.000+08:00
diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst
@@ -496,7 +496,7 @@ subscript notation ``a[k]`` selects the item indexed by ``k`` from the mapping
 :keyword:`del` statements. The built-in function :func:`len` returns the number
 of items in a mapping.
 
-There is currently a single intrinsic mapping type:
+There are two intrinsic mapping types:
 
 
 Dictionaries
@@ -535,6 +535,20 @@ module.
    an implementation detail at that time rather than a language guarantee.
 
 
+Frozen dictionaries
+^^^^^^^^^^^^^^^^^^^
+
+.. index:: pair: object; frozendict
+
+These represent an immutable dictionary.  They are created by the built-in
+:func:`frozendict` constructor.  A frozendict is :term:`hashable` if all of
+its keys and values are hashable, in which case it can be used as an element
+of a set, or as a key in another mapping.  :class:`!frozendict` is not a
+subclass of :class:`dict`; it inherits directly from :class:`object`.
+
+.. versionadded:: 3.15
+
+
 Callable types
 --------------
 
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
@@ -307,7 +307,7 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None,
         cls=cls, object_hook=object_hook,
         parse_float=parse_float, parse_int=parse_int,
         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
-        array_hook=None, **kw)
+        array_hook=array_hook, **kw)
 
 
 def loads(s, *, cls=None, object_hook=None, parse_float=None,
diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py
@@ -87,6 +87,13 @@ def test_array_hook(self):
 
         self.assertEqual(self.loads('[]', array_hook=tuple), ())
 
+    def test_load_array_hook(self):
+        # json.load must forward array_hook to loads
+        fp = StringIO('[10, 20, 30]')
+        result = self.json.load(fp, array_hook=tuple)
+        self.assertEqual(result, (10, 20, 30))
+        self.assertEqual(type(result), tuple)
+
     def test_decoder_optimizations(self):
         # Several optimizations were made that skip over calls to
         # the whitespace regex, so this test is designed to try and
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
@@ -426,6 +426,16 @@ def test_unknown_encoding(self):
         with self.assertRaises(LookupError):
             parser.Parse(data, True)
 
+    @support.subTests('sample,exception', [
+        (b'<x> \xa1</x>', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1</x', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1', expat.ExpatError),
+    ])
+    def test_multibyte_encoding_errors(self, sample, exception):
+        parser = expat.ParserCreate()
+        data = b'<?xml version="1.0" encoding="EUC-JP"?>\n' + sample
+        with self.assertRaises(exception):
+            parser.Parse(data, True)
 
 class NamespaceSeparatorTest(unittest.TestCase):
     def test_legal(self):
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
@@ -1064,6 +1064,17 @@ def bxml(encoding, body=''):
         self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
         self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
 
+    @support.subTests('sample,exception', [
+        (b'<x> \xa1</x>', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1</x', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1', None), # ET.ParseError
+    ])
+    def test_multibyte_encoding_errors(self, sample, exception):
+        exception = exception or ET.ParseError
+        data = b'<?xml version="1.0" encoding="EUC-JP"?>\n' + sample
+        with self.assertRaises(exception):
+            ET.XML(data)
+
     def test_methods(self):
         # Test serialization methods.
 
@@ -1287,7 +1298,15 @@ def check(p, expected, namespaces=None):
               {'': 'http://www.w3.org/2001/XMLSchema',
                'ns': 'http://www.w3.org/2001/XMLSchema'})
 
-    def test_processinginstruction(self):
+    def test_comment_serialization(self):
+        comm = ET.Comment('<spam> & ham')
+        # comments are not escaped
+        self.assertEqual(ET.tostring(comm), b'<!--<spam> & ham-->')
+        self.assertEqual(ET.tostring(comm, method='html'), b'<!--<spam> & ham-->')
+        # no comments in text serialization
+        self.assertEqual(ET.tostring(comm, method='text'), b'')
+
+    def test_processinginstruction_serialization(self):
         # Test ProcessingInstruction directly
 
         self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
@@ -1296,12 +1315,32 @@ def test_processinginstruction(self):
                 b'<?test instruction?>')
 
         # Issue #2746
-
+        # processing instructions are not escaped
         self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
                 b'<?test <testing&>?>')
         self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
                 b"<?xml version='1.0' encoding='latin-1'?>\n"
                 b"<?test <testing&>\xe3?>")
+        pi = ET.PI('test', 'ham & eggs < spam')
+        self.assertEqual(ET.tostring(pi), b'<?test ham & eggs < spam?>')
+        self.assertEqual(ET.tostring(pi, method='html'), b'<?test ham & eggs < spam?>')
+        # no processing instructions in text serialization
+        self.assertEqual(ET.tostring(pi, method='text'), b'')
+
+    def test_empty_attribute_serialization(self):
+        # empty attrs only work in html
+        elem = ET.Element('tag', attrib={'attr': None})
+        self.assertRaises(TypeError, ET.tostring, elem)
+        self.assertEqual(ET.tostring(elem, method='html'), b'<tag attr></tag>')
+
+    @support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
+    def test_html_cdata_elems_serialization(self, tag):
+        # content of raw text elements is not escaped in html
+        tag = tag.title()
+        elem = ET.Element(tag)
+        elem.text = '<spam>&ham'
+        self.assertEqual(ET.tostring(elem, method='html'),
+                         ('<%s><spam>&ham</%s>' % (tag, tag)).encode())
 
     def test_html_empty_elems_serialization(self):
         # issue 15970
@@ -1317,6 +1356,14 @@ def test_html_empty_elems_serialization(self):
                                        method='html')
                 self.assertEqual(serialized, expected)
 
+    def test_html_plaintext_serialization(self):
+        # content of plaintext is not escaped in html
+        # no end tag for plaintext
+        elem = ET.Element('PlainText')
+        elem.text = '<spam>&ham'
+        self.assertEqual(ET.tostring(elem, method='html'),
+                         b'<PlainText><spam>&ham')
+
     def test_dump_attribute_order(self):
         # See BPO 34160
         e = ET.Element('cirriculum', status='public', company='example')
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
@@ -917,17 +917,20 @@ def _serialize_xml(write, elem, qnames, namespaces,
     if elem.tail:
         write(_escape_cdata(elem.tail))
 
+_CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed",
+                           "noframes", "plaintext"}
+
 HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
               "img", "input", "isindex", "link", "meta", "param", "source",
-              "track", "wbr"}
+              "track", "wbr", "plaintext"}
 
 def _serialize_html(write, elem, qnames, namespaces, **kwargs):
     tag = elem.tag
     text = elem.text
     if tag is Comment:
-        write("<!--%s-->" % _escape_cdata(text))
+        write("<!--%s-->" % text)
     elif tag is ProcessingInstruction:
-        write("<?%s?>" % _escape_cdata(text))
+        write("<?%s?>" % text)
     else:
         tag = qnames[tag]
         if tag is None:
@@ -951,16 +954,19 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
                 for k, v in items:
                     if isinstance(k, QName):
                         k = k.text
-                    if isinstance(v, QName):
-                        v = qnames[v.text]
+                    k = qnames[k]
+                    if v is None:
+                        write(" %s" % k)  # empty attr
                     else:
-                        v = _escape_attrib_html(v)
-                    # FIXME: handle boolean attributes
-                    write(" %s=\"%s\"" % (qnames[k], v))
+                        if isinstance(v, QName):
+                            v = qnames[v.text]
+                        else:
+                            v = _escape_attrib_html(v)
+                        write(" %s=\"%s\"" % (k, v))
             write(">")
             ltag = tag.lower()
             if text:
-                if ltag == "script" or ltag == "style":
+                if ltag in _CDATA_CONTENT_ELEMENTS:
                     write(text)
                 else:
                     write(_escape_cdata(text))
diff --git a/Misc/NEWS.d/next/Library/2026-04-29-08-10-17.gh-issue-149056.jnaD4W.rst b/Misc/NEWS.d/next/Library/2026-04-29-08-10-17.gh-issue-149056.jnaD4W.rst
@@ -0,0 +1,2 @@
+Fix :func:`json.load` not forwarding the *array_hook* argument to
+:func:`json.loads`. Patch by Thomas Kowalski.
diff --git a/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst b/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst
@@ -0,0 +1,5 @@
+Fix :mod:`~xml.etree.ElementTree` serialization to HTML. The content of
+comments, processing instructions and elements "xmp", "iframe", "noembed",
+"noframes", and "plaintext" is no longer escaped. The "plaintext" element no
+longer have the closing tag. Add support of empty attributes (with value
+``None``).
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
@@ -1473,6 +1473,9 @@ pyexpat_encoding_create(const char *name, PyObject *mapping)
 static int
 pyexpat_encoding_convert(void *data, const char *s)
 {
+    if (PyErr_Occurred()) {
+        return -1;
+    }
     pyexpat_encoding_info *info = (pyexpat_encoding_info *)data;
     int i = (unsigned char)s[0];
     assert(info->map[i] < -1);

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+Fix :func:`json.load` not forwarding the array_hook argument to
	`2`	+:func:`json.loads`. Patch by Thomas Kowalski.