From e41d61cfaf4b11324df05c19bd475a52cdacabc7 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 7 May 2026 14:18:52 +0300
Subject: [PATCH 1/8] gh-149489: Fix ElementTree serialization to HTML

* The content of comments, processing instructions and elements "xmp",
  "iframe", "noembed", "noframes", and "plaintext" is no longer escaped.
* The "plaintext" element no longer have the closing tag.
* Add support of empty attributes (with value None).
---
 Lib/test/test_xml_etree.py                    | 29 ++++++++++++++++++-
 Lib/xml/etree/ElementTree.py                  | 24 +++++++++------
 ...-05-07-14-18-47.gh-issue-149489.bX9iHe.rst |  5 ++++
 3 files changed, 48 insertions(+), 10 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 8f3efe9fc90794b..b820845f3b63e21 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1278,7 +1278,13 @@ def check(p, expected, namespaces=None):
               {'': 'http://www.w3.org/2001/XMLSchema',
                'ns': 'http://www.w3.org/2001/XMLSchema'})
 
-    def test_processinginstruction(self):
+    def test_comment_serialization(self):
+        comm = ET.Comment('<spam> & ham')
+        self.assertEqual(ET.tostring(comm), b'<!--<spam> & ham-->')
+        self.assertEqual(ET.tostring(comm, method='html'), b'<!--<spam> & ham-->')
+        self.assertEqual(ET.tostring(comm, method='text'), b'<spam> & ham')
+
+    def test_processinginstruction_serialization(self):
         # Test ProcessingInstruction directly
 
         self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
@@ -1293,6 +1299,21 @@ def test_processinginstruction(self):
         self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
                 b"<?xml version='1.0' encoding='latin-1'?>\n"
                 b"<?test <testing&>\xe3?>")
+        self.assertEqual(ET.tostring(ET.PI('test', 'ham & eggs < spam'), method='html'),
+                b'<?test ham & eggs < spam?>')
+
+    def test_empty_attribute_serialization(self):
+        elem = ET.Element('tag', attrib={'attr': None})
+        self.assertRaises(TypeError, ET.tostring, elem)
+        self.assertEqual(ET.tostring(elem, method='html'), b'<tag attr></tag>')
+
+    @support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
+    def test_html_cdata_elems_serialization(self, tag):
+        tag = tag.title()
+        elem = ET.Element(tag)
+        elem.text = '<spam>&ham'
+        self.assertEqual(ET.tostring(elem, method='html'),
+                         ('<%s><spam>&ham</%s>' % (tag, tag)).encode())
 
     def test_html_empty_elems_serialization(self):
         # issue 15970
@@ -1308,6 +1329,12 @@ def test_html_empty_elems_serialization(self):
                                        method='html')
                 self.assertEqual(serialized, expected)
 
+    def test_html_plaintext_serialization(self):
+        elem = ET.Element('PlainText')
+        elem.text = '<spam>&ham'
+        self.assertEqual(ET.tostring(elem, method='html'),
+                         b'<PlainText><spam>&ham')
+
     def test_dump_attribute_order(self):
         # See BPO 34160
         e = ET.Element('cirriculum', status='public', company='example')
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 85766e02b531ce2..7b14ec360d7cf7c 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -907,17 +907,20 @@ def _serialize_xml(write, elem, qnames, namespaces,
     if elem.tail:
         write(_escape_cdata(elem.tail))
 
+_CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed",
+                           "noframes", "plaintext"}
+
 HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
               "img", "input", "isindex", "link", "meta", "param", "source",
-              "track", "wbr"}
+              "track", "wbr", "plaintext"}
 
 def _serialize_html(write, elem, qnames, namespaces, **kwargs):
     tag = elem.tag
     text = elem.text
     if tag is Comment:
-        write("<!--%s-->" % _escape_cdata(text))
+        write("<!--%s-->" % text)
     elif tag is ProcessingInstruction:
-        write("<?%s?>" % _escape_cdata(text))
+        write("<?%s?>" % text)
     else:
         tag = qnames[tag]
         if tag is None:
@@ -941,16 +944,19 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
                 for k, v in items:
                     if isinstance(k, QName):
                         k = k.text
-                    if isinstance(v, QName):
-                        v = qnames[v.text]
+                    k = qnames[k]
+                    if v is None:
+                        write(" %s" % k)
                     else:
-                        v = _escape_attrib_html(v)
-                    # FIXME: handle boolean attributes
-                    write(" %s=\"%s\"" % (qnames[k], v))
+                        if isinstance(v, QName):
+                            v = qnames[v.text]
+                        else:
+                            v = _escape_attrib_html(v)
+                        write(" %s=\"%s\"" % (k, v))
             write(">")
             ltag = tag.lower()
             if text:
-                if ltag == "script" or ltag == "style":
+                if ltag in _CDATA_CONTENT_ELEMENTS:
                     write(text)
                 else:
                     write(_escape_cdata(text))
diff --git a/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst b/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst
new file mode 100644
index 000000000000000..1550c893fd7c45b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst
@@ -0,0 +1,5 @@
+Fix :mod:`~xml.etree.ElementTree` serialization to HTML. The content of
+comments, processing instructions and elements "xmp", "iframe", "noembed",
+"noframes", and "plaintext" is no longer escaped. The "plaintext" element no
+longer have the closing tag. Add support of empty attributes (with value
+``None``).

From a134c0b83ab6a612d44f7875efda7bb9f4625547 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 6 May 2026 22:23:29 +0300
Subject: [PATCH 2/8] gh-149468: Add option to validate ElementTree during
 serialization

---
 Doc/library/xml.etree.elementtree.rst         |  30 ++-
 Doc/whatsnew/3.15.rst                         |  11 ++
 Lib/test/test_xml_etree.py                    | 186 ++++++++++++++++++
 Lib/xml/etree/ElementTree.py                  | 111 +++++++++--
 ...-05-06-22-22-05.gh-issue-149468.IUSCzU.rst |   3 +
 5 files changed, 321 insertions(+), 20 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-05-06-22-22-05.gh-issue-149468.IUSCzU.rst

diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index 310ccd651e18c7e..b8c8b8f3c009ec8 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -711,14 +711,14 @@ Functions
 
 .. function:: tostring(element, encoding="us-ascii", method="xml", *, \
                        xml_declaration=None, default_namespace=None, \
-                       short_empty_elements=True)
+                       validate=False, short_empty_elements=True)
 
    Generates a string representation of an XML element, including all
    subelements.  *element* is an :class:`Element` instance.  *encoding* [1]_ is
    the output encoding (default is US-ASCII).  Use ``encoding="unicode"`` to
    generate a Unicode string (otherwise, a bytestring is generated).  *method*
    is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
-   *xml_declaration*, *default_namespace* and *short_empty_elements* has the same
+   *xml_declaration*, *default_namespace*, *validate* and *short_empty_elements* has the same
    meaning as in :meth:`ElementTree.write`. Returns an (optionally) encoded string
    containing the XML data.
 
@@ -732,17 +732,20 @@ Functions
       The :func:`tostring` function now preserves the attribute order
       specified by the user.
 
+   .. versionchanged:: next
+      Added the *validate* parameter.
+
 
 .. function:: tostringlist(element, encoding="us-ascii", method="xml", *, \
                            xml_declaration=None, default_namespace=None, \
-                           short_empty_elements=True)
+                           validate=False, short_empty_elements=True)
 
    Generates a string representation of an XML element, including all
    subelements.  *element* is an :class:`Element` instance.  *encoding* [1]_ is
    the output encoding (default is US-ASCII).  Use ``encoding="unicode"`` to
    generate a Unicode string (otherwise, a bytestring is generated).  *method*
    is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
-   *xml_declaration*, *default_namespace* and *short_empty_elements* has the same
+   *xml_declaration*, *default_namespace*, *validate* and *short_empty_elements* has the same
    meaning as in :meth:`ElementTree.write`. Returns a list of (optionally) encoded
    strings containing the XML data. It does not guarantee any specific sequence,
    except that ``b"".join(tostringlist(element)) == tostring(element)``.
@@ -752,6 +755,9 @@ Functions
    .. versionchanged:: 3.4
       Added the *short_empty_elements* parameter.
 
+   .. versionchanged:: next
+      Added the *validate* parameter.
+
    .. versionchanged:: 3.8
       Added the *xml_declaration* and *default_namespace* parameters.
 
@@ -759,6 +765,9 @@ Functions
       The :func:`tostringlist` function now preserves the attribute order
       specified by the user.
 
+   .. versionchanged:: next
+      Added the *validate* parameter.
+
 
 .. function:: XML(text, parser=None)
 
@@ -1186,7 +1195,7 @@ ElementTree Objects
 
    .. method:: write(file, encoding="us-ascii", xml_declaration=None, \
                      default_namespace=None, method="xml", *, \
-                     short_empty_elements=True)
+                     validate=False, short_empty_elements=True)
 
       Writes the element tree to a file, as XML.  *file* is a file name, or a
       :term:`file object` opened for writing.  *encoding* [1]_ is the output
@@ -1197,6 +1206,14 @@ ElementTree Objects
       *default_namespace* sets the default XML namespace (for "xmlns").
       *method* is either ``"xml"``, ``"html"`` or ``"text"`` (default is
       ``"xml"``).
+
+      If *validate* is true, check that all characters are legal XML or HTML
+      characters, depending on *method*, element and attribute names are
+      valid, and the content of comments, processing instructions and
+      HTML elements like ``<script>`` do not contain illegal sequences,
+      and raise :exc:`ValueError` otherwise.
+      By default, no validation is performed.
+
       The keyword-only *short_empty_elements* parameter controls the formatting
       of elements that contain no content.  If ``True`` (the default), they are
       emitted as a single self-closed tag, otherwise they are emitted as a pair
@@ -1216,6 +1233,9 @@ ElementTree Objects
          The :meth:`write` method now preserves the attribute order specified
          by the user.
 
+      .. versionchanged:: next
+         Added the *validate* parameter.
+
 
 This is the XML file that is going to be manipulated::
 
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index 9e2f789334ff02b..3a711c1a2cfedc2 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -1813,6 +1813,17 @@ xml
   (Contributed by Serhiy Storchaka in :gh:`139489`.)
 
 
+xml.etree.ElementTree
+---------------------
+
+* Add the *validate* option to functions
+  :func:`~xml.etree.ElementTree.tostring`,
+  :func:`~xml.etree.ElementTree.tostringlist`, and the
+  :meth:`Element.write <xml.etree.ElementTree.ElementTree.write>` method,
+  which allows to validate the element or element tree before serialization.
+  (Contributed by Serhiy Storchaka in :gh:`xxxxxx`.)
+
+
 xml.parsers.expat
 -----------------
 
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index b820845f3b63e21..55b86769af128d4 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1358,6 +1358,192 @@ def test_attlist_default(self):
                          {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'})
 
 
+class XMLValidationTest(unittest.TestCase):
+
+    def check(self, elem, expected=None):
+        self.assertRaises(ValueError,
+            ET.tostring, elem, validate=True)
+        ET.tostring(elem)  # no exception
+
+    def test_invalid_comment(self):
+        self.check(ET.Comment('a--b'))
+        self.check(ET.Comment(' B+, B, or B-'))
+
+    def test_invalid_processing_instruction(self):
+        self.check(ET.PI(''))
+        self.check(ET.PI('0'))
+        self.check(ET.PI('a/b'))
+        self.check(ET.PI('foo\xa0bar'))
+        self.check(ET.PI('xml'))
+        self.check(ET.PI('xml', 'encoding="UTF-8"'))
+        self.check(ET.PI('foo', 'a?>b'))
+        self.check(ET.PI('foo', '\x00'))
+        self.check(ET.PI('foo', '\ud8ff'))
+        self.check(ET.PI('foo', '\ufffe'))
+
+    def test_invalid_tag(self):
+        self.check(ET.Element(''))
+        self.check(ET.Element('0'))
+        self.check(ET.Element('a/b'))
+        self.check(ET.Element(ET.QName('')))
+        self.check(ET.Element(ET.QName('0')))
+        self.check(ET.Element(ET.QName('a/b')))
+
+    def test_invalid_attr_name(self):
+        self.check(ET.Element('tag', attrib={'': 'value'}))
+        self.check(ET.Element('tag', attrib={'0': 'value'}))
+        self.check(ET.Element('tag', attrib={'a/b': 'value'}))
+        self.check(ET.Element('tag', attrib={ET.QName(''): 'value'}))
+        self.check(ET.Element('tag', attrib={ET.QName('0'): 'value'}))
+        self.check(ET.Element('tag', attrib={ET.QName('a/b'): 'value'}))
+
+    def test_invalid_attr_value(self):
+        self.check(ET.Element('tag', attrib={'key': '\x00'}))
+        self.check(ET.Element('tag', attrib={'key': '\ud8ff'}))
+        self.check(ET.Element('tag', attrib={'key': '\ufffe'}))
+        self.check(ET.Element('tag', attrib={'key': ET.QName('\x00')}))
+        self.check(ET.Element('tag', attrib={'key': ET.QName('\ud8ff')}))
+        self.check(ET.Element('tag', attrib={'key': ET.QName('\ufffe')}))
+
+    def test_invalid_text(self):
+        elem = ET.Element('tag')
+        elem.text = '\x00'
+        self.check(elem)
+        elem.text = '\ud8ff'
+        self.check(elem)
+        elem.text = '\ufffe'
+        self.check(elem)
+
+    def test_invalid_tail(self):
+        elem = ET.Element('tag')
+        elem.tail = '\x00'
+        self.check(elem)
+        elem.tail = '\ud8ff'
+        self.check(elem)
+        elem.tail = '\ufffe'
+        self.check(elem)
+
+    def test_invalid_text_without_tag(self):
+        elem = ET.Element(None)
+        elem.text = '\x00'
+        self.check(elem)
+        elem.text = '\ud8ff'
+        self.check(elem)
+        elem.text = '\ufffe'
+        self.check(elem)
+
+    def test_invalid_subelements(self):
+        elem = ET.Element('tag')
+        subelem = ET.SubElement(elem, 'subtag')
+        ET.SubElement(subelem, '\x00')
+        self.check(elem)
+        elem.tag = None
+        self.check(elem)
+
+    def test_invalid_namespace_uri(self):
+        self.check(ET.Element('{\x00}tag'))
+        self.check(ET.Element('{\ud8ff}tag'))
+        self.check(ET.Element('{\ufffe}tag'))
+        self.check(ET.Element(ET.QName('\x00', 'tag')))
+        self.check(ET.Element(ET.QName('\ud8ff', 'tag')))
+        self.check(ET.Element(ET.QName('\ufffe', 'tag')))
+
+class HTMLValidationTest(unittest.TestCase):
+
+    def check(self, elem, expected=None):
+        self.assertRaises(ValueError,
+            ET.tostring, elem, method='html', validate=True)
+        ET.tostring(elem, method='html')  # no exception
+
+    def test_invalid_comment(self):
+        self.check(ET.Comment('>'))
+        self.check(ET.Comment('->'))
+        self.check(ET.Comment('a-->b'))
+        self.check(ET.Comment('a--!>b'))
+        self.check(ET.Comment('a\x00b'))
+
+    def test_invalid_processing_instruction(self):
+        self.check(ET.PI('a>b'))
+        self.check(ET.PI('a\x00b'))
+
+    def test_invalid_tag(self):
+        self.check(ET.Element(''))
+        self.check(ET.Element('?'))
+        self.check(ET.Element('!'))
+        self.check(ET.Element('0'))
+        self.check(ET.Element(' a'))
+        self.check(ET.Element('a b'))
+        self.check(ET.Element('a\nb'))
+        self.check(ET.Element('a/b'))
+        self.check(ET.Element('a>b'))
+        self.check(ET.Element('a\x00b'))
+        self.check(ET.Element(ET.QName('')))
+        self.check(ET.Element(ET.QName('0')))
+        self.check(ET.Element(ET.QName('a/b')))
+
+    def test_invalid_attr_name(self):
+        self.check(ET.Element('tag', attrib={'': 'value'}))
+        self.check(ET.Element('tag', attrib={'a/b': 'value'}))
+        self.check(ET.Element('tag', attrib={'a=b': 'value'}))
+        self.check(ET.Element('tag', attrib={ET.QName(''): 'value'}))
+        self.check(ET.Element('tag', attrib={ET.QName('a/b'): 'value'}))
+
+    def test_invalid_attr_value(self):
+        self.check(ET.Element('tag', attrib={'key': '\x00'}))
+        self.check(ET.Element('tag', attrib={'key': ET.QName('\x00')}))
+        self.check(ET.Element('tag', attrib={'key': ET.QName('a"b')}))
+        self.check(ET.Element('tag', attrib={'key': ET.QName('a&b')}))
+
+    def test_invalid_text(self):
+        elem = ET.Element('tag')
+        elem.text = '\x00'
+        self.check(elem)
+
+    def test_invalid_tail(self):
+        elem = ET.Element('tag')
+        elem.tail = '\x00'
+        self.check(elem)
+
+    def test_invalid_text_without_tag(self):
+        elem = ET.Element(None)
+        elem.text = '\x00'
+        self.check(elem)
+
+    def test_invalid_subelements(self):
+        elem = ET.Element('tag')
+        subelem = ET.SubElement(elem, 'subtag')
+        ET.SubElement(subelem, '\x00')
+        self.check(elem)
+        elem.tag = None
+        self.check(elem)
+
+    def test_invalid_namespace_uri(self):
+        self.check(ET.Element('{\x00}tag'))
+        self.check(ET.Element(ET.QName('\x00', 'tag')))
+
+    @support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
+    def test_invalid_cdata_content(self, tag):
+        elem = ET.Element(tag.upper())
+        elem.text = 'a</%s>b' % tag.title()
+        self.check(elem)
+        elem.text = 'a</%s b' % tag.title()
+        self.check(elem)
+        elem.text = 'a</%s/b' % tag.title()
+        self.check(elem)
+        elem.text = 'a\x00b'
+        self.check(elem)
+
+    @support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
+    def test_cdata_subelements(self, tag):
+        elem = ET.Element(tag)
+        ET.SubElement(elem, 'subtag')
+        self.check(elem)
+
+    def test_invalid_plaintext_content(self):
+        elem = ET.Element('plaintext')
+        elem.text = 'a\x00b'
+        self.check(elem)
+
 class IterparseTest(unittest.TestCase):
     # Test iterparse interface.
 
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 7b14ec360d7cf7c..6faf348aacf01b4 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -99,6 +99,7 @@
 import weakref
 
 from . import ElementPath
+from .. import is_valid_name, is_valid_text
 
 
 class ParseError(SyntaxError):
@@ -689,6 +690,7 @@ def write(self, file_or_filename,
               xml_declaration=None,
               default_namespace=None,
               method=None, *,
+              validate=False,
               short_empty_elements=True):
         """Write element tree to a file as XML.
 
@@ -706,6 +708,8 @@ def write(self, file_or_filename,
 
           *method* -- either "xml" (default), "html, "text", or "c14n"
 
+          *validate* -- if true, validate the content
+
           *short_empty_elements* -- controls the formatting of elements
                                     that contain no content. If True (default)
                                     they are emitted as a single self-closed
@@ -737,6 +741,7 @@ def write(self, file_or_filename,
                 qnames, namespaces = _namespaces(self._root, default_namespace)
                 serialize = _serialize[method]
                 serialize(write, self._root, qnames, namespaces,
+                          validate=validate,
                           short_empty_elements=short_empty_elements)
 
     def write_c14n(self, file):
@@ -857,23 +862,39 @@ def add_qname(qname):
             add_qname(text.text)
     return qnames, namespaces
 
-def _serialize_xml(write, elem, qnames, namespaces,
-                   short_empty_elements, **kwargs):
+def _serialize_xml(write, elem, qnames, namespaces, *,
+                   validate, short_empty_elements, **kwargs):
     tag = elem.tag
     text = elem.text
     if tag is Comment:
+        if validate:
+            if '--' in text or text.endswith('-'):
+                raise ValueError('invalid comment')
         write("<!--%s-->" % text)
     elif tag is ProcessingInstruction:
+        if validate:
+            m = re.search('[ \t\r\n]', text)
+            if m is not None:
+                target = text[:m.start()]
+            else:
+                target = text
+            if (not is_valid_name(target) or target.lower() == 'xml'
+                    or '?>' in text or not is_valid_text(text)):
+                raise ValueError('invalid processing instruction')
         write("<?%s?>" % text)
     else:
         tag = qnames[tag]
         if tag is None:
             if text:
-                write(_escape_cdata(text))
+                write(_escape_cdata(text, validate))
             for e in elem:
                 _serialize_xml(write, e, qnames, None,
+                               validate=validate,
                                short_empty_elements=short_empty_elements)
         else:
+            if validate:
+                if not is_valid_name(tag):
+                    raise ValueError('invalid element name')
             write("<" + tag)
             items = list(elem.items())
             if items or namespaces:
@@ -882,30 +903,40 @@ def _serialize_xml(write, elem, qnames, namespaces,
                                        key=lambda x: x[1]):  # sort on prefix
                         if k:
                             k = ":" + k
+                            if validate:
+                                if not is_valid_name(k):
+                                    raise ValueError('invalid namespace name')
                         write(" xmlns%s=\"%s\"" % (
                             k,
-                            _escape_attrib(v)
+                            _escape_attrib(v, validate)
                             ))
                 for k, v in items:
                     if isinstance(k, QName):
                         k = k.text
+                    if validate:
+                        if not is_valid_name(qnames[k]):
+                            raise ValueError('invalid attribute name')
                     if isinstance(v, QName):
                         v = qnames[v.text]
+                        if validate:
+                            if not is_valid_name(v):
+                                raise ValueError('invalid attribute value')
                     else:
-                        v = _escape_attrib(v)
+                        v = _escape_attrib(v, validate)
                     write(" %s=\"%s\"" % (qnames[k], v))
             if text or len(elem) or not short_empty_elements:
                 write(">")
                 if text:
-                    write(_escape_cdata(text))
+                    write(_escape_cdata(text, validate))
                 for e in elem:
                     _serialize_xml(write, e, qnames, None,
+                                   validate=validate,
                                    short_empty_elements=short_empty_elements)
                 write("</" + tag + ">")
             else:
                 write(" />")
     if elem.tail:
-        write(_escape_cdata(elem.tail))
+        write(_escape_cdata(elem.tail, validate))
 
 _CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed",
                            "noframes", "plaintext"}
@@ -914,21 +945,34 @@ def _serialize_xml(write, elem, qnames, namespaces,
               "img", "input", "isindex", "link", "meta", "param", "source",
               "track", "wbr", "plaintext"}
 
-def _serialize_html(write, elem, qnames, namespaces, **kwargs):
+def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs):
     tag = elem.tag
     text = elem.text
     if tag is Comment:
+        if validate:
+            if (re.prefixmatch('-?>', text) or re.search('--!?>', text)
+                    or '\0' in text):
+                raise ValueError('invalid comment')
         write("<!--%s-->" % text)
     elif tag is ProcessingInstruction:
+        if validate:
+            if '>' in text or '\0' in text:
+                raise ValueError('invalid processing instruction')
         write("<?%s?>" % text)
     else:
         tag = qnames[tag]
         if tag is None:
             if text:
+                if validate:
+                    if '\0' in text:
+                        raise ValueError('invalid characters')
                 write(_escape_cdata(text))
             for e in elem:
-                _serialize_html(write, e, qnames, None)
+                _serialize_html(write, e, qnames, None, validate=validate)
         else:
+            if validate:
+                if not re.fullmatch('[A-Za-z][^\0\t\n\r\f />]*+', tag):
+                    raise ValueError('invalid element name')
             write("<" + tag)
             items = list(elem.items())
             if items or namespaces:
@@ -937,6 +981,12 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
                                        key=lambda x: x[1]):  # sort on prefix
                         if k:
                             k = ":" + k
+                        if validate:
+                            if not re.fullmatch('[^\0\t\n\r\f />=]++', k):
+                                raise ValueError('invalid attribute name')
+                        if validate:
+                            if '\0' in v:
+                                raise ValueError('invalid characters')
                         write(" xmlns%s=\"%s\"" % (
                             k,
                             _escape_attrib(v)
@@ -945,26 +995,49 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
                     if isinstance(k, QName):
                         k = k.text
                     k = qnames[k]
+                    if validate:
+                        if not re.fullmatch('[^\0\t\n\r\f />][^\0\t\n\r\f />=]*+', k):
+                            raise ValueError('invalid attribute name')
                     if v is None:
-                        write(" %s" % k)
+                        write(" %s" % (k,))
                     else:
                         if isinstance(v, QName):
                             v = qnames[v.text]
+                            if validate:
+                                if '\0' in v or '"' in v or '&' in v:
+                                    raise ValueError('invalid attribute value')
                         else:
+                            if validate:
+                                if '\0' in v:
+                                    raise ValueError('invalid attribute value')
                             v = _escape_attrib_html(v)
                         write(" %s=\"%s\"" % (k, v))
             write(">")
             ltag = tag.lower()
             if text:
+                if validate:
+                    if '\0' in text:
+                        raise ValueError('invalid characters')
                 if ltag in _CDATA_CONTENT_ELEMENTS:
+                    if validate:
+                        if (ltag != "plaintext"
+                            and re.search(r'</%s(?=[\t\n\r\f />])' % ltag,
+                                          text, re.IGNORECASE|re.ASCII)):
+                            raise ValueError('invalid %s content' % ltag)
                     write(text)
                 else:
                     write(_escape_cdata(text))
+            if validate:
+                if ltag in _CDATA_CONTENT_ELEMENTS and len(elem):
+                    raise ValueError('subelements in %s element' % ltag)
             for e in elem:
-                _serialize_html(write, e, qnames, None)
+                _serialize_html(write, e, qnames, None, validate=validate)
             if ltag not in HTML_EMPTY:
                 write("</" + tag + ">")
     if elem.tail:
+        if validate:
+            if '\0' in elem.tail:
+                raise ValueError('invalid characters')
         write(_escape_cdata(elem.tail))
 
 def _serialize_text(write, elem):
@@ -1021,9 +1094,12 @@ def _raise_serialization_error(text):
         "cannot serialize %r (type %s)" % (text, type(text).__name__)
         )
 
-def _escape_cdata(text):
+def _escape_cdata(text, validate=False):
     # escape character data
     try:
+        if validate:
+            if not is_valid_text(text):
+                raise ValueError('invalid characters')
         # it's worth avoiding do-nothing calls for strings that are
         # shorter than 500 characters, or so.  assume that's, by far,
         # the most common case in most applications.
@@ -1037,9 +1113,12 @@ def _escape_cdata(text):
     except (TypeError, AttributeError):
         _raise_serialization_error(text)
 
-def _escape_attrib(text):
+def _escape_attrib(text, validate=False):
     # escape attribute value
     try:
+        if validate:
+            if not is_valid_text(text):
+                raise ValueError('invalid attribute value')
         if "&" in text:
             text = text.replace("&", "&amp;")
         if "<" in text:
@@ -1082,7 +1161,7 @@ def _escape_attrib_html(text):
 
 def tostring(element, encoding=None, method=None, *,
              xml_declaration=None, default_namespace=None,
-             short_empty_elements=True):
+             validate=False, short_empty_elements=True):
     """Generate string representation of XML element.
 
     All subelements are included.  If encoding is "unicode", a string
@@ -1101,6 +1180,7 @@ def tostring(element, encoding=None, method=None, *,
                                xml_declaration=xml_declaration,
                                default_namespace=default_namespace,
                                method=method,
+                               validate=validate,
                                short_empty_elements=short_empty_elements)
     return stream.getvalue()
 
@@ -1123,13 +1203,14 @@ def tell(self):
 
 def tostringlist(element, encoding=None, method=None, *,
                  xml_declaration=None, default_namespace=None,
-                 short_empty_elements=True):
+                 validate=False, short_empty_elements=True):
     lst = []
     stream = _ListDataStream(lst)
     ElementTree(element).write(stream, encoding,
                                xml_declaration=xml_declaration,
                                default_namespace=default_namespace,
                                method=method,
+                               validate=validate,
                                short_empty_elements=short_empty_elements)
     return lst
 
diff --git a/Misc/NEWS.d/next/Library/2026-05-06-22-22-05.gh-issue-149468.IUSCzU.rst b/Misc/NEWS.d/next/Library/2026-05-06-22-22-05.gh-issue-149468.IUSCzU.rst
new file mode 100644
index 000000000000000..a4313cac07eea56
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-05-06-22-22-05.gh-issue-149468.IUSCzU.rst
@@ -0,0 +1,3 @@
+Add the *validate* option to :mod:`xml.etree.ElementTree` serialization
+functions, which allows to validate the element or element tree before
+serialization.

From ea414fa2a596066099e307bed88599bdfb65806f Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 31 May 2026 11:37:14 +0300
Subject: [PATCH 3/8] Apply suggestions from code review

Co-authored-by: Ezio Melotti <ezio.melotti@gmail.com>
---
 Doc/library/xml.etree.elementtree.rst | 20 +++++++++-----------
 Lib/test/test_xml_etree.py            |  2 ++
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index b8c8b8f3c009ec8..27f1a998ac65104 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -718,7 +718,7 @@ Functions
    the output encoding (default is US-ASCII).  Use ``encoding="unicode"`` to
    generate a Unicode string (otherwise, a bytestring is generated).  *method*
    is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
-   *xml_declaration*, *default_namespace*, *validate* and *short_empty_elements* has the same
+   *xml_declaration*, *default_namespace*, *validate* and *short_empty_elements* have the same
    meaning as in :meth:`ElementTree.write`. Returns an (optionally) encoded string
    containing the XML data.
 
@@ -745,7 +745,7 @@ Functions
    the output encoding (default is US-ASCII).  Use ``encoding="unicode"`` to
    generate a Unicode string (otherwise, a bytestring is generated).  *method*
    is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
-   *xml_declaration*, *default_namespace*, *validate* and *short_empty_elements* has the same
+   *xml_declaration*, *default_namespace*, *validate* and *short_empty_elements* have the same
    meaning as in :meth:`ElementTree.write`. Returns a list of (optionally) encoded
    strings containing the XML data. It does not guarantee any specific sequence,
    except that ``b"".join(tostringlist(element)) == tostring(element)``.
@@ -755,9 +755,6 @@ Functions
    .. versionchanged:: 3.4
       Added the *short_empty_elements* parameter.
 
-   .. versionchanged:: next
-      Added the *validate* parameter.
-
    .. versionchanged:: 3.8
       Added the *xml_declaration* and *default_namespace* parameters.
 
@@ -1207,12 +1204,13 @@ ElementTree Objects
       *method* is either ``"xml"``, ``"html"`` or ``"text"`` (default is
       ``"xml"``).
 
-      If *validate* is true, check that all characters are legal XML or HTML
-      characters, depending on *method*, element and attribute names are
-      valid, and the content of comments, processing instructions and
-      HTML elements like ``<script>`` do not contain illegal sequences,
-      and raise :exc:`ValueError` otherwise.
-      By default, no validation is performed.
+      If *validate* is true, check that all characters are legal,
+      that element and attribute names are valid, and that the content
+      of comments, processing instructions and HTML elements
+      like ``<script>`` do not contain illegal sequences according
+      to the selected *method* (``"xml"`` or ``"html"``).
+      Raise :exc:`ValueError` if any check fails.
+      By default, or if *method* is ``"text"``, no validation is performed.
 
       The keyword-only *short_empty_elements* parameter controls the formatting
       of elements that contain no content.  If ``True`` (the default), they are
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index d27bcedc16c5882..6a4afb3c30971f7 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1477,6 +1477,7 @@ def test_invalid_namespace_uri(self):
         self.check(ET.Element(ET.QName('\ud8ff', 'tag')))
         self.check(ET.Element(ET.QName('\ufffe', 'tag')))
 
+
 class HTMLValidationTest(unittest.TestCase):
 
     def check(self, elem, expected=None):
@@ -1573,6 +1574,7 @@ def test_invalid_plaintext_content(self):
         elem.text = 'a\x00b'
         self.check(elem)
 
+
 class IterparseTest(unittest.TestCase):
     # Test iterparse interface.
 

From 474411fb36885c4887251f3487fa72afabcab0fa Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 31 May 2026 12:02:27 +0300
Subject: [PATCH 4/8] Add more tests for processing instructions

---
 Lib/test/test_xml_etree.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 6a4afb3c30971f7..6fd2c13be914521 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1389,11 +1389,14 @@ def test_attlist_default(self):
 
 class XMLValidationTest(unittest.TestCase):
 
-    def check(self, elem, expected=None):
+    def check(self, elem):
         self.assertRaises(ValueError,
             ET.tostring, elem, validate=True)
         ET.tostring(elem)  # no exception
 
+    def check_valid(self, elem, expected):
+        self.assertEqual(ET.tostring(elem, validate=True), expected)
+
     def test_invalid_comment(self):
         self.check(ET.Comment('a--b'))
         self.check(ET.Comment(' B+, B, or B-'))
@@ -1403,13 +1406,19 @@ def test_invalid_processing_instruction(self):
         self.check(ET.PI('0'))
         self.check(ET.PI('a/b'))
         self.check(ET.PI('foo\xa0bar'))
+        self.check(ET.PI('foo\fbar'))
         self.check(ET.PI('xml'))
+        self.check(ET.PI('XML'))
         self.check(ET.PI('xml', 'encoding="UTF-8"'))
         self.check(ET.PI('foo', 'a?>b'))
         self.check(ET.PI('foo', '\x00'))
         self.check(ET.PI('foo', '\ud8ff'))
         self.check(ET.PI('foo', '\ufffe'))
 
+        self.check_valid(ET.PI('foo\tbar'), b'<?foo\tbar?>')
+        self.check_valid(ET.PI('foo\nbar'), b'<?foo\nbar?>')
+        self.check_valid(ET.PI('foo\rbar'), b'<?foo\rbar?>')
+
     def test_invalid_tag(self):
         self.check(ET.Element(''))
         self.check(ET.Element('0'))
@@ -1480,7 +1489,7 @@ def test_invalid_namespace_uri(self):
 
 class HTMLValidationTest(unittest.TestCase):
 
-    def check(self, elem, expected=None):
+    def check(self, elem):
         self.assertRaises(ValueError,
             ET.tostring, elem, method='html', validate=True)
         ET.tostring(elem, method='html')  # no exception

From 22e5543081583bab1e274b8d9de153792c24d7a6 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 31 May 2026 12:29:41 +0300
Subject: [PATCH 5/8] Add more details in exceptions.

---
 Lib/xml/etree/ElementTree.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 3a937470073878c..f0981927c93290e 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -890,7 +890,7 @@ def _serialize_xml(write, elem, qnames, namespaces, *,
                 target = text
             if (not is_valid_name(target) or target.lower() == 'xml'
                     or '?>' in text or not is_valid_text(text)):
-                raise ValueError('invalid processing instruction')
+                raise ValueError(f'invalid processing instruction {elem.text!r}')
         write("<?%s?>" % text)
     else:
         tag = qnames[tag]
@@ -904,7 +904,7 @@ def _serialize_xml(write, elem, qnames, namespaces, *,
         else:
             if validate:
                 if not is_valid_name(tag):
-                    raise ValueError('invalid element name')
+                    raise ValueError(f'invalid element name {tag!r}')
             write("<" + tag)
             items = list(elem.items())
             if items or namespaces:
@@ -915,7 +915,7 @@ def _serialize_xml(write, elem, qnames, namespaces, *,
                             k = ":" + k
                             if validate:
                                 if not is_valid_name(k):
-                                    raise ValueError('invalid namespace name')
+                                    raise ValueError(f'invalid namespace name {k[1:]!r}')
                         write(" xmlns%s=\"%s\"" % (
                             k,
                             _escape_attrib(v, validate)
@@ -925,12 +925,12 @@ def _serialize_xml(write, elem, qnames, namespaces, *,
                         k = k.text
                     if validate:
                         if not is_valid_name(qnames[k]):
-                            raise ValueError('invalid attribute name')
+                            raise ValueError(f'invalid attribute name {k!r}')
                     if isinstance(v, QName):
                         v = qnames[v.text]
                         if validate:
                             if not is_valid_name(v):
-                                raise ValueError('invalid attribute value')
+                                raise ValueError(f'invalid attribute value {v!r}')
                     else:
                         v = _escape_attrib(v, validate)
                     write(" %s=\"%s\"" % (qnames[k], v))
@@ -967,7 +967,7 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
     elif tag is ProcessingInstruction:
         if validate:
             if '>' in text or '\0' in text:
-                raise ValueError('invalid processing instruction')
+                raise ValueError(f'invalid processing instruction {text!r}')
         write("<?%s?>" % text)
     else:
         tag = qnames[tag]
@@ -982,7 +982,7 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
         else:
             if validate:
                 if not re.fullmatch('[A-Za-z][^\0\t\n\r\f />]*+', tag):
-                    raise ValueError('invalid element name')
+                    raise ValueError(f'invalid element name {tag!r}')
             write("<" + tag)
             items = list(elem.items())
             if items or namespaces:
@@ -993,7 +993,7 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
                             k = ":" + k
                         if validate:
                             if not re.fullmatch('[^\0\t\n\r\f />=]++', k):
-                                raise ValueError('invalid attribute name')
+                                raise ValueError(f'invalid attribute name {k!r}')
                         if validate:
                             if '\0' in v:
                                 raise ValueError('invalid characters')
@@ -1007,7 +1007,7 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
                     k = qnames[k]
                     if validate:
                         if not re.fullmatch('[^\0\t\n\r\f />][^\0\t\n\r\f />=]*+', k):
-                            raise ValueError('invalid attribute name')
+                            raise ValueError(f'invalid attribute name {k!r}')
                     if v is None:
                         write(" %s" % k)  # empty attr
                     else:
@@ -1015,11 +1015,11 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
                             v = qnames[v.text]
                             if validate:
                                 if '\0' in v or '"' in v or '&' in v:
-                                    raise ValueError('invalid attribute value')
+                                    raise ValueError(f'invalid attribute value {v!r}')
                         else:
                             if validate:
                                 if '\0' in v:
-                                    raise ValueError('invalid attribute value')
+                                    raise ValueError(f'invalid attribute value {v!r}')
                             v = _escape_attrib_html(v)
                         write(" %s=\"%s\"" % (k, v))
             write(">")

From e12e88d698c8622d3ac8eb505400c902696b3b4e Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 31 May 2026 13:49:58 +0300
Subject: [PATCH 6/8] Check also for surrogates in HTML.

---
 Lib/test/test_xml_etree.py   | 26 ++++++++++++++++++++++++++
 Lib/xml/etree/ElementTree.py | 29 ++++++++++++++++-------------
 2 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 6fd2c13be914521..d9f676446ba4c71 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1400,6 +1400,10 @@ def check_valid(self, elem, expected):
     def test_invalid_comment(self):
         self.check(ET.Comment('a--b'))
         self.check(ET.Comment(' B+, B, or B-'))
+        self.check(ET.Comment('\x00'))
+        self.check(ET.Comment('\x01'))
+        self.check(ET.Comment('\ud8ff'))
+        self.check(ET.Comment('\ufffe'))
 
     def test_invalid_processing_instruction(self):
         self.check(ET.PI(''))
@@ -1412,6 +1416,7 @@ def test_invalid_processing_instruction(self):
         self.check(ET.PI('xml', 'encoding="UTF-8"'))
         self.check(ET.PI('foo', 'a?>b'))
         self.check(ET.PI('foo', '\x00'))
+        self.check(ET.PI('foo', '\x01'))
         self.check(ET.PI('foo', '\ud8ff'))
         self.check(ET.PI('foo', '\ufffe'))
 
@@ -1500,10 +1505,12 @@ def test_invalid_comment(self):
         self.check(ET.Comment('a-->b'))
         self.check(ET.Comment('a--!>b'))
         self.check(ET.Comment('a\x00b'))
+        self.check(ET.Comment('a\ud8ffb'))
 
     def test_invalid_processing_instruction(self):
         self.check(ET.PI('a>b'))
         self.check(ET.PI('a\x00b'))
+        self.check(ET.PI('a\ud8ffb'))
 
     def test_invalid_tag(self):
         self.check(ET.Element(''))
@@ -1516,20 +1523,27 @@ def test_invalid_tag(self):
         self.check(ET.Element('a/b'))
         self.check(ET.Element('a>b'))
         self.check(ET.Element('a\x00b'))
+        self.check(ET.Element('a\ud8ffb'))
         self.check(ET.Element(ET.QName('')))
         self.check(ET.Element(ET.QName('0')))
         self.check(ET.Element(ET.QName('a/b')))
 
     def test_invalid_attr_name(self):
         self.check(ET.Element('tag', attrib={'': 'value'}))
+        self.check(ET.Element('tag', attrib={'\x00': 'value'}))
+        self.check(ET.Element('tag', attrib={'\ud8ff': 'value'}))
         self.check(ET.Element('tag', attrib={'a/b': 'value'}))
         self.check(ET.Element('tag', attrib={'a=b': 'value'}))
+        self.check(ET.Element('tag', attrib={'a\x00b': 'value'}))
+        self.check(ET.Element('tag', attrib={'a\ud8ffb': 'value'}))
         self.check(ET.Element('tag', attrib={ET.QName(''): 'value'}))
         self.check(ET.Element('tag', attrib={ET.QName('a/b'): 'value'}))
 
     def test_invalid_attr_value(self):
         self.check(ET.Element('tag', attrib={'key': '\x00'}))
+        self.check(ET.Element('tag', attrib={'key': '\ud8ff'}))
         self.check(ET.Element('tag', attrib={'key': ET.QName('\x00')}))
+        self.check(ET.Element('tag', attrib={'key': ET.QName('\ud8ff')}))
         self.check(ET.Element('tag', attrib={'key': ET.QName('a"b')}))
         self.check(ET.Element('tag', attrib={'key': ET.QName('a&b')}))
 
@@ -1537,16 +1551,22 @@ def test_invalid_text(self):
         elem = ET.Element('tag')
         elem.text = '\x00'
         self.check(elem)
+        elem.text = '\ud8ff'
+        self.check(elem)
 
     def test_invalid_tail(self):
         elem = ET.Element('tag')
         elem.tail = '\x00'
         self.check(elem)
+        elem.tail = '\ud8ff'
+        self.check(elem)
 
     def test_invalid_text_without_tag(self):
         elem = ET.Element(None)
         elem.text = '\x00'
         self.check(elem)
+        elem.text = '\ud8ff'
+        self.check(elem)
 
     def test_invalid_subelements(self):
         elem = ET.Element('tag')
@@ -1558,7 +1578,9 @@ def test_invalid_subelements(self):
 
     def test_invalid_namespace_uri(self):
         self.check(ET.Element('{\x00}tag'))
+        self.check(ET.Element('{\ud8ff}tag'))
         self.check(ET.Element(ET.QName('\x00', 'tag')))
+        self.check(ET.Element(ET.QName('\ud8ff', 'tag')))
 
     @support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
     def test_invalid_cdata_content(self, tag):
@@ -1571,6 +1593,8 @@ def test_invalid_cdata_content(self, tag):
         self.check(elem)
         elem.text = 'a\x00b'
         self.check(elem)
+        elem.text = 'a\ud8ffb'
+        self.check(elem)
 
     @support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
     def test_cdata_subelements(self, tag):
@@ -1582,6 +1606,8 @@ def test_invalid_plaintext_content(self):
         elem = ET.Element('plaintext')
         elem.text = 'a\x00b'
         self.check(elem)
+        elem.text = 'a\ud8ffb'
+        self.check(elem)
 
 
 class IterparseTest(unittest.TestCase):
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index f0981927c93290e..53b6aaf4898a0cb 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -878,7 +878,7 @@ def _serialize_xml(write, elem, qnames, namespaces, *,
     text = elem.text
     if tag is Comment:
         if validate:
-            if '--' in text or text.endswith('-'):
+            if '--' in text or text.endswith('-') or not is_valid_text(text):
                 raise ValueError('invalid comment')
         write("<!--%s-->" % text)
     elif tag is ProcessingInstruction:
@@ -955,18 +955,21 @@ def _serialize_xml(write, elem, qnames, namespaces, *,
               "img", "input", "isindex", "link", "meta", "param", "source",
               "track", "wbr", "plaintext"}
 
+def _is_valid_html_text(text):
+    return re.search('[\x00\ud800-\udfff]', text) is None
+
 def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs):
     tag = elem.tag
     text = elem.text
     if tag is Comment:
         if validate:
             if (re.prefixmatch('-?>', text) or re.search('--!?>', text)
-                    or '\0' in text):
+                    or not _is_valid_html_text(text)):
                 raise ValueError('invalid comment')
         write("<!--%s-->" % text)
     elif tag is ProcessingInstruction:
         if validate:
-            if '>' in text or '\0' in text:
+            if '>' in text or not _is_valid_html_text(text):
                 raise ValueError(f'invalid processing instruction {text!r}')
         write("<?%s?>" % text)
     else:
@@ -974,14 +977,14 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
         if tag is None:
             if text:
                 if validate:
-                    if '\0' in text:
+                    if not _is_valid_html_text(text):
                         raise ValueError('invalid characters')
                 write(_escape_cdata(text))
             for e in elem:
                 _serialize_html(write, e, qnames, None, validate=validate)
         else:
             if validate:
-                if not re.fullmatch('[A-Za-z][^\0\t\n\r\f />]*+', tag):
+                if not re.fullmatch('[A-Za-z][^\0\t\n\r\f />\ud800-\udfff]*+', tag):
                     raise ValueError(f'invalid element name {tag!r}')
             write("<" + tag)
             items = list(elem.items())
@@ -992,10 +995,10 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
                         if k:
                             k = ":" + k
                         if validate:
-                            if not re.fullmatch('[^\0\t\n\r\f />=]++', k):
-                                raise ValueError(f'invalid attribute name {k!r}')
+                            if not re.fullmatch('[^\0\t\n\r\f />=\ud800-\udfff]++', k):
+                                raise ValueError(f'invalid namespace name {k[1:]!r}')
                         if validate:
-                            if '\0' in v:
+                            if not _is_valid_html_text(v):
                                 raise ValueError('invalid characters')
                         write(" xmlns%s=\"%s\"" % (
                             k,
@@ -1006,7 +1009,7 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
                         k = k.text
                     k = qnames[k]
                     if validate:
-                        if not re.fullmatch('[^\0\t\n\r\f />][^\0\t\n\r\f />=]*+', k):
+                        if not re.fullmatch('[^\0\t\n\r\f />\ud800-\udfff][^\0\t\n\r\f />=\ud800-\udfff]*+', k):
                             raise ValueError(f'invalid attribute name {k!r}')
                     if v is None:
                         write(" %s" % k)  # empty attr
@@ -1014,11 +1017,11 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
                         if isinstance(v, QName):
                             v = qnames[v.text]
                             if validate:
-                                if '\0' in v or '"' in v or '&' in v:
+                                if re.search('[\0"&\ud800-\udfff]', v):
                                     raise ValueError(f'invalid attribute value {v!r}')
                         else:
                             if validate:
-                                if '\0' in v:
+                                if not _is_valid_html_text(v):
                                     raise ValueError(f'invalid attribute value {v!r}')
                             v = _escape_attrib_html(v)
                         write(" %s=\"%s\"" % (k, v))
@@ -1026,7 +1029,7 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
             ltag = tag.lower()
             if text:
                 if validate:
-                    if '\0' in text:
+                    if not _is_valid_html_text(text):
                         raise ValueError('invalid characters')
                 if ltag in _CDATA_CONTENT_ELEMENTS:
                     if validate:
@@ -1046,7 +1049,7 @@ def _serialize_html(write, elem, qnames, namespaces, *, validate=True, **kwargs)
                 write("</" + tag + ">")
     if elem.tail:
         if validate:
-            if '\0' in elem.tail:
+            if not _is_valid_html_text(elem.tail):
                 raise ValueError('invalid characters')
         write(_escape_cdata(elem.tail))
 

From 8cc34e70929fee7fd975d6afccd2cc93f3161ab6 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 31 May 2026 14:16:45 +0300
Subject: [PATCH 7/8] Move the What's New entry to 3.16.

---
 Doc/whatsnew/3.15.rst | 11 -----------
 Doc/whatsnew/3.16.rst | 10 ++++++++++
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index 4e6f1cc50d253b4..1d27baf38906e9a 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -1830,17 +1830,6 @@ xml
   (Contributed by Serhiy Storchaka in :gh:`139489`.)
 
 
-xml.etree.ElementTree
----------------------
-
-* Add the *validate* option to functions
-  :func:`~xml.etree.ElementTree.tostring`,
-  :func:`~xml.etree.ElementTree.tostringlist`, and the
-  :meth:`Element.write <xml.etree.ElementTree.ElementTree.write>` method,
-  which allows to validate the element or element tree before serialization.
-  (Contributed by Serhiy Storchaka in :gh:`xxxxxx`.)
-
-
 xml.parsers.expat
 -----------------
 
diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst
index 9a0a0d3d8831f5f..4f8b5485ccadbc6 100644
--- a/Doc/whatsnew/3.16.rst
+++ b/Doc/whatsnew/3.16.rst
@@ -209,6 +209,16 @@ tarfile
 * The undocumented and unused :attr:`!tarfile.TarFile.tarfile` attribute
   has been deprecated since Python 3.13.
 
+xml.etree.ElementTree
+---------------------
+
+* Add the *validate* option to functions
+  :func:`~xml.etree.ElementTree.tostring`,
+  :func:`~xml.etree.ElementTree.tostringlist`, and the
+  :meth:`Element.write <xml.etree.ElementTree.ElementTree.write>` method,
+  which allows to validate the element or element tree before serialization.
+  (Contributed by Serhiy Storchaka in :gh:`149468`.)
+
 .. Add removals above alphabetically, not here at the end.
 
 

From 3d0fdd2e227da4d227e6c5ea896fec04284ebb67 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 31 May 2026 14:26:15 +0300
Subject: [PATCH 8/8] Update the NEWS entry.

---
 .../Library/2026-05-06-22-22-05.gh-issue-149468.IUSCzU.rst | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Misc/NEWS.d/next/Library/2026-05-06-22-22-05.gh-issue-149468.IUSCzU.rst b/Misc/NEWS.d/next/Library/2026-05-06-22-22-05.gh-issue-149468.IUSCzU.rst
index a4313cac07eea56..10be07afa8b36ee 100644
--- a/Misc/NEWS.d/next/Library/2026-05-06-22-22-05.gh-issue-149468.IUSCzU.rst
+++ b/Misc/NEWS.d/next/Library/2026-05-06-22-22-05.gh-issue-149468.IUSCzU.rst
@@ -1,3 +1,6 @@
 Add the *validate* option to :mod:`xml.etree.ElementTree` serialization
-functions, which allows to validate the element or element tree before
-serialization.
+functions, which allows to check that all characters are legal,
+that element and attribute names are valid, and that the content
+of comments, processing instructions and HTML elements
+like ``<script>`` do not contain illegal sequences according
+to the selected *method* (``"xml"`` or ``"html"``).