@@ -1064,6 +1064,17 @@ def bxml(encoding, body=''):
10641064 self .assertRaises (ValueError , ET .XML , xml ('undefined' ).encode ('ascii' ))
10651065 self .assertRaises (LookupError , ET .XML , xml ('xxx' ).encode ('ascii' ))
10661066
1067+ @support .subTests ('sample,exception' , [
1068+ (b'<x> \xa1 </x>' , UnicodeDecodeError ), # crashed
1069+ (b'<x> \xa1 </x' , UnicodeDecodeError ), # crashed
1070+ (b'<x> \xa1 ' , None ), # ET.ParseError
1071+ ])
1072+ def test_multibyte_encoding_errors (self , sample , exception ):
1073+ exception = exception or ET .ParseError
1074+ data = b'<?xml version="1.0" encoding="EUC-JP"?>\n ' + sample
1075+ with self .assertRaises (exception ):
1076+ ET .XML (data )
1077+
10671078 def test_methods (self ):
10681079 # Test serialization methods.
10691080
@@ -1287,7 +1298,15 @@ def check(p, expected, namespaces=None):
12871298 {'' : 'http://www.w3.org/2001/XMLSchema' ,
12881299 'ns' : 'http://www.w3.org/2001/XMLSchema' })
12891300
1290- def test_processinginstruction (self ):
1301+ def test_comment_serialization (self ):
1302+ comm = ET .Comment ('<spam> & ham' )
1303+ # comments are not escaped
1304+ self .assertEqual (ET .tostring (comm ), b'<!--<spam> & ham-->' )
1305+ self .assertEqual (ET .tostring (comm , method = 'html' ), b'<!--<spam> & ham-->' )
1306+ # no comments in text serialization
1307+ self .assertEqual (ET .tostring (comm , method = 'text' ), b'' )
1308+
1309+ def test_processinginstruction_serialization (self ):
12911310 # Test ProcessingInstruction directly
12921311
12931312 self .assertEqual (ET .tostring (ET .ProcessingInstruction ('test' , 'instruction' )),
@@ -1296,12 +1315,32 @@ def test_processinginstruction(self):
12961315 b'<?test instruction?>' )
12971316
12981317 # Issue #2746
1299-
1318+ # processing instructions are not escaped
13001319 self .assertEqual (ET .tostring (ET .PI ('test' , '<testing&>' )),
13011320 b'<?test <testing&>?>' )
13021321 self .assertEqual (ET .tostring (ET .PI ('test' , '<testing&>\xe3 ' ), 'latin-1' ),
13031322 b"<?xml version='1.0' encoding='latin-1'?>\n "
13041323 b"<?test <testing&>\xe3 ?>" )
1324+ pi = ET .PI ('test' , 'ham & eggs < spam' )
1325+ self .assertEqual (ET .tostring (pi ), b'<?test ham & eggs < spam?>' )
1326+ self .assertEqual (ET .tostring (pi , method = 'html' ), b'<?test ham & eggs < spam?>' )
1327+ # no processing instructions in text serialization
1328+ self .assertEqual (ET .tostring (pi , method = 'text' ), b'' )
1329+
1330+ def test_empty_attribute_serialization (self ):
1331+ # empty attrs only work in html
1332+ elem = ET .Element ('tag' , attrib = {'attr' : None })
1333+ self .assertRaises (TypeError , ET .tostring , elem )
1334+ self .assertEqual (ET .tostring (elem , method = 'html' ), b'<tag attr></tag>' )
1335+
1336+ @support .subTests ('tag' , ("script" , "style" , "xmp" , "iframe" , "noembed" , "noframes" ))
1337+ def test_html_cdata_elems_serialization (self , tag ):
1338+ # content of raw text elements is not escaped in html
1339+ tag = tag .title ()
1340+ elem = ET .Element (tag )
1341+ elem .text = '<spam>&ham'
1342+ self .assertEqual (ET .tostring (elem , method = 'html' ),
1343+ ('<%s><spam>&ham</%s>' % (tag , tag )).encode ())
13051344
13061345 def test_html_empty_elems_serialization (self ):
13071346 # issue 15970
@@ -1317,6 +1356,14 @@ def test_html_empty_elems_serialization(self):
13171356 method = 'html' )
13181357 self .assertEqual (serialized , expected )
13191358
1359+ def test_html_plaintext_serialization (self ):
1360+ # content of plaintext is not escaped in html
1361+ # no end tag for plaintext
1362+ elem = ET .Element ('PlainText' )
1363+ elem .text = '<spam>&ham'
1364+ self .assertEqual (ET .tostring (elem , method = 'html' ),
1365+ b'<PlainText><spam>&ham' )
1366+
13201367 def test_dump_attribute_order (self ):
13211368 # See BPO 34160
13221369 e = ET .Element ('cirriculum' , status = 'public' , company = 'example' )
0 commit comments