1
# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
2
# to ensure consistency between the C implementation and the Python
5
# For this purpose, the module-level "ET" symbol is temporarily
6
# monkey-patched when running the "test_xml_etree_c" test suite.
17
from itertools import product
18
from test import support
19
from test.support import TESTFN, findfile, import_fresh_module, gc_collect
21
# pyET is the pure-Python implementation.
23
# ET is pyET in test_xml_etree and is the C accelerated version in
28
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
30
SIMPLE_XMLFILE.encode("utf-8")
31
except UnicodeEncodeError:
32
raise unittest.SkipTest("filename is not encodable to utf8")
33
SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
37
<tag class='a'>text</tag>
40
<tag class='b' id='inner'>subtext</tag>
47
<tag class='b' id='inner'>subtext</tag>
56
<body xmlns="http://effbot.org/ns">
65
SAMPLE_XML_NS_ELEMS = """
67
<h:table xmlns:h="hello">
74
<f:table xmlns:f="foo">
75
<f:name>African Coffee Table</f:name>
77
<f:length>120</f:length>
84
<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
87
<document>&entity;</document>
91
class ModuleTest(unittest.TestCase):
92
# TODO: this should be removed once we get rid of the global module vars
94
def test_sanity(self):
97
from xml.etree import ElementTree
98
from xml.etree import ElementInclude
99
from xml.etree import ElementPath
102
def serialize(elem, to_string=True, encoding='unicode', **options):
103
if encoding != 'unicode':
107
tree = ET.ElementTree(elem)
108
tree.write(file, encoding=encoding, **options)
110
return file.getvalue()
115
def summarize_list(seq):
116
return [elem.tag for elem in seq]
119
class ElementTestCase:
122
cls.modules = {pyET, ET}
124
def pickleRoundTrip(self, obj, name, dumper, loader):
125
save_m = sys.modules[name]
127
sys.modules[name] = dumper
128
temp = pickle.dumps(obj)
129
sys.modules[name] = loader
130
result = pickle.loads(temp)
131
except pickle.PicklingError as pe:
132
# pyET must be second, because pyET may be (equal to) ET.
133
human = dict([(ET, "cET"), (pyET, "pyET")])
134
raise support.TestFailed("Failed to round-trip %r from %r to %r"
136
human.get(dumper, dumper),
137
human.get(loader, loader))) from pe
139
sys.modules[name] = save_m
142
def assertEqualElements(self, alice, bob):
143
self.assertIsInstance(alice, (ET.Element, pyET.Element))
144
self.assertIsInstance(bob, (ET.Element, pyET.Element))
145
self.assertEqual(len(list(alice)), len(list(bob)))
146
for x, y in zip(alice, bob):
147
self.assertEqualElements(x, y)
148
properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
149
self.assertEqual(properties(alice), properties(bob))
151
# --------------------------------------------------------------------
154
class ElementTreeTest(unittest.TestCase):
156
def serialize_check(self, elem, expected):
157
self.assertEqual(serialize(elem), expected)
159
def test_interface(self):
160
# Test element tree interface.
162
def check_string(string):
165
self.assertEqual(len(char), 1,
166
msg="expected one-character string, got %r" % char)
167
new_string = string + ""
168
new_string = string + " "
171
def check_mapping(mapping):
173
keys = mapping.keys()
174
items = mapping.items()
177
mapping["key"] = "value"
178
self.assertEqual(mapping["key"], "value",
179
msg="expected value string, got %r" % mapping["key"])
181
def check_element(element):
182
self.assertTrue(ET.iselement(element), msg="not an element")
183
self.assertTrue(hasattr(element, "tag"), msg="no tag member")
184
self.assertTrue(hasattr(element, "attrib"), msg="no attrib member")
185
self.assertTrue(hasattr(element, "text"), msg="no text member")
186
self.assertTrue(hasattr(element, "tail"), msg="no tail member")
188
check_string(element.tag)
189
check_mapping(element.attrib)
190
if element.text is not None:
191
check_string(element.text)
192
if element.tail is not None:
193
check_string(element.tail)
197
element = ET.Element("tag")
198
check_element(element)
199
tree = ET.ElementTree(element)
200
check_element(tree.getroot())
201
element = ET.Element("t\xe4g", key="value")
202
tree = ET.ElementTree(element)
203
self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
204
element = ET.Element("tag", key="value")
206
# Make sure all standard element methods exist.
208
def check_method(method):
209
self.assertTrue(hasattr(method, '__call__'),
210
msg="%s not callable" % method)
212
check_method(element.append)
213
check_method(element.extend)
214
check_method(element.insert)
215
check_method(element.remove)
216
check_method(element.getchildren)
217
check_method(element.find)
218
check_method(element.iterfind)
219
check_method(element.findall)
220
check_method(element.findtext)
221
check_method(element.clear)
222
check_method(element.get)
223
check_method(element.set)
224
check_method(element.keys)
225
check_method(element.items)
226
check_method(element.iter)
227
check_method(element.itertext)
228
check_method(element.getiterator)
230
# These methods return an iterable. See bug 6472.
233
check_method(it.__next__)
235
check_iter(element.iterfind("tag"))
236
check_iter(element.iterfind("*"))
237
check_iter(tree.iterfind("tag"))
238
check_iter(tree.iterfind("*"))
240
# These aliases are provided:
242
self.assertEqual(ET.XML, ET.fromstring)
243
self.assertEqual(ET.PI, ET.ProcessingInstruction)
245
def test_simpleops(self):
246
# Basic method sanity checks.
248
elem = ET.XML("<body><tag/></body>")
249
self.serialize_check(elem, '<body><tag /></body>')
250
e = ET.Element("tag2")
252
self.serialize_check(elem, '<body><tag /><tag2 /></body>')
254
self.serialize_check(elem, '<body><tag /></body>')
256
self.serialize_check(elem, '<body><tag2 /><tag /></body>')
259
self.serialize_check(elem, '<body><tag /><tag2 /></body>')
262
element = ET.Element("tag", key="value")
263
self.serialize_check(element, '<tag key="value" />') # 1
264
subelement = ET.Element("subtag")
265
element.append(subelement)
266
self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
267
element.insert(0, subelement)
268
self.serialize_check(element,
269
'<tag key="value"><subtag /><subtag /></tag>') # 3
270
element.remove(subelement)
271
self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
272
element.remove(subelement)
273
self.serialize_check(element, '<tag key="value" />') # 5
274
with self.assertRaises(ValueError) as cm:
275
element.remove(subelement)
276
self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
277
self.serialize_check(element, '<tag key="value" />') # 6
278
element[0:0] = [subelement, subelement, subelement]
279
self.serialize_check(element[1], '<subtag />')
280
self.assertEqual(element[1:9], [element[1], element[2]])
281
self.assertEqual(element[:9:2], [element[0], element[2]])
283
self.serialize_check(element,
284
'<tag key="value"><subtag /><subtag /></tag>')
286
def test_cdata(self):
287
# Test CDATA handling (etc).
289
self.serialize_check(ET.XML("<tag>hello</tag>"),
291
self.serialize_check(ET.XML("<tag>hello</tag>"),
293
self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
296
def test_file_init(self):
297
stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
298
tree = ET.ElementTree(file=stringfile)
299
self.assertEqual(tree.find("tag").tag, 'tag')
300
self.assertEqual(tree.find("section/tag").tag, 'tag')
302
tree = ET.ElementTree(file=SIMPLE_XMLFILE)
303
self.assertEqual(tree.find("element").tag, 'element')
304
self.assertEqual(tree.find("element/../empty-element").tag,
307
def test_path_cache(self):
308
# Check that the path cache behaves sanely.
310
from xml.etree import ElementPath
312
elem = ET.XML(SAMPLE_XML)
313
for i in range(10): ET.ElementTree(elem).find('./'+str(i))
314
cache_len_10 = len(ElementPath._cache)
315
for i in range(10): ET.ElementTree(elem).find('./'+str(i))
316
self.assertEqual(len(ElementPath._cache), cache_len_10)
317
for i in range(20): ET.ElementTree(elem).find('./'+str(i))
318
self.assertGreater(len(ElementPath._cache), cache_len_10)
319
for i in range(600): ET.ElementTree(elem).find('./'+str(i))
320
self.assertLess(len(ElementPath._cache), 500)
323
# Test copy handling (etc).
326
e1 = ET.XML("<tag>hello<foo/></tag>")
328
e3 = copy.deepcopy(e1)
329
e1.find("foo").tag = "bar"
330
self.serialize_check(e1, '<tag>hello<bar /></tag>')
331
self.serialize_check(e2, '<tag>hello<bar /></tag>')
332
self.serialize_check(e3, '<tag>hello<foo /></tag>')
334
def test_attrib(self):
335
# Test attribute handling.
337
elem = ET.Element("tag")
338
elem.get("key") # 1.1
339
self.assertEqual(elem.get("key", "default"), 'default') # 1.2
341
elem.set("key", "value")
342
self.assertEqual(elem.get("key"), 'value') # 1.3
344
elem = ET.Element("tag", key="value")
345
self.assertEqual(elem.get("key"), 'value') # 2.1
346
self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
348
attrib = {"key": "value"}
349
elem = ET.Element("tag", attrib)
350
attrib.clear() # check for aliasing issues
351
self.assertEqual(elem.get("key"), 'value') # 3.1
352
self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
354
attrib = {"key": "value"}
355
elem = ET.Element("tag", **attrib)
356
attrib.clear() # check for aliasing issues
357
self.assertEqual(elem.get("key"), 'value') # 4.1
358
self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
360
elem = ET.Element("tag", {"key": "other"}, key="value")
361
self.assertEqual(elem.get("key"), 'value') # 5.1
362
self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
364
elem = ET.Element('test')
366
elem.set('testa', 'testval')
367
elem.set('testb', 'test2')
368
self.assertEqual(ET.tostring(elem),
369
b'<test testa="testval" testb="test2">aa</test>')
370
self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
371
self.assertEqual(sorted(elem.items()),
372
[('testa', 'testval'), ('testb', 'test2')])
373
self.assertEqual(elem.attrib['testb'], 'test2')
374
elem.attrib['testb'] = 'test1'
375
elem.attrib['testc'] = 'test2'
376
self.assertEqual(ET.tostring(elem),
377
b'<test testa="testval" testb="test1" testc="test2">aa</test>')
379
def test_makeelement(self):
380
# Test makeelement handling.
382
elem = ET.Element("tag")
383
attrib = {"key": "value"}
384
subelem = elem.makeelement("subtag", attrib)
385
self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
387
self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
390
self.serialize_check(elem, '<tag />')
392
self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
393
elem.extend([subelem, subelem])
394
self.serialize_check(elem,
395
'<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
397
self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
398
elem[:] = tuple([subelem])
399
self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
401
def test_parsefile(self):
402
# Test parsing from file.
404
tree = ET.parse(SIMPLE_XMLFILE)
405
stream = io.StringIO()
406
tree.write(stream, encoding='unicode')
407
self.assertEqual(stream.getvalue(),
409
' <element key="value">text</element>\n'
410
' <element>text</element>tail\n'
411
' <empty-element />\n'
413
tree = ET.parse(SIMPLE_NS_XMLFILE)
414
stream = io.StringIO()
415
tree.write(stream, encoding='unicode')
416
self.assertEqual(stream.getvalue(),
417
'<ns0:root xmlns:ns0="namespace">\n'
418
' <ns0:element key="value">text</ns0:element>\n'
419
' <ns0:element>text</ns0:element>tail\n'
420
' <ns0:empty-element />\n'
423
with open(SIMPLE_XMLFILE) as f:
426
parser = ET.XMLParser()
427
self.assertRegex(parser.version, r'^Expat ')
429
self.serialize_check(parser.close(),
431
' <element key="value">text</element>\n'
432
' <element>text</element>tail\n'
433
' <empty-element />\n'
436
target = ET.TreeBuilder()
437
parser = ET.XMLParser(target=target)
439
self.serialize_check(parser.close(),
441
' <element key="value">text</element>\n'
442
' <element>text</element>tail\n'
443
' <empty-element />\n'
446
def test_parseliteral(self):
447
element = ET.XML("<html><body>text</body></html>")
448
self.assertEqual(ET.tostring(element, encoding='unicode'),
449
'<html><body>text</body></html>')
450
element = ET.fromstring("<html><body>text</body></html>")
451
self.assertEqual(ET.tostring(element, encoding='unicode'),
452
'<html><body>text</body></html>')
453
sequence = ["<html><body>", "text</bo", "dy></html>"]
454
element = ET.fromstringlist(sequence)
455
self.assertEqual(ET.tostring(element),
456
b'<html><body>text</body></html>')
457
self.assertEqual(b"".join(ET.tostringlist(element)),
458
b'<html><body>text</body></html>')
459
self.assertEqual(ET.tostring(element, "ascii"),
460
b"<?xml version='1.0' encoding='ascii'?>\n"
461
b"<html><body>text</body></html>")
462
_, ids = ET.XMLID("<html><body>text</body></html>")
463
self.assertEqual(len(ids), 0)
464
_, ids = ET.XMLID("<html><body id='body'>text</body></html>")
465
self.assertEqual(len(ids), 1)
466
self.assertEqual(ids["body"].tag, 'body')
468
def test_iterparse(self):
469
# Test iterparse interface.
471
iterparse = ET.iterparse
473
context = iterparse(SIMPLE_XMLFILE)
474
action, elem = next(context)
475
self.assertEqual((action, elem.tag), ('end', 'element'))
476
self.assertEqual([(action, elem.tag) for action, elem in context], [
478
('end', 'empty-element'),
481
self.assertEqual(context.root.tag, 'root')
483
context = iterparse(SIMPLE_NS_XMLFILE)
484
self.assertEqual([(action, elem.tag) for action, elem in context], [
485
('end', '{namespace}element'),
486
('end', '{namespace}element'),
487
('end', '{namespace}empty-element'),
488
('end', '{namespace}root'),
492
context = iterparse(SIMPLE_XMLFILE, events)
493
self.assertEqual([(action, elem.tag) for action, elem in context], [])
496
context = iterparse(SIMPLE_XMLFILE, events=events)
497
self.assertEqual([(action, elem.tag) for action, elem in context], [])
499
events = ("start", "end")
500
context = iterparse(SIMPLE_XMLFILE, events)
501
self.assertEqual([(action, elem.tag) for action, elem in context], [
503
('start', 'element'),
505
('start', 'element'),
507
('start', 'empty-element'),
508
('end', 'empty-element'),
512
events = ("start", "end", "start-ns", "end-ns")
513
context = iterparse(SIMPLE_NS_XMLFILE, events)
514
self.assertEqual([(action, elem.tag) if action in ("start", "end")
516
for action, elem in context], [
517
('start-ns', ('', 'namespace')),
518
('start', '{namespace}root'),
519
('start', '{namespace}element'),
520
('end', '{namespace}element'),
521
('start', '{namespace}element'),
522
('end', '{namespace}element'),
523
('start', '{namespace}empty-element'),
524
('end', '{namespace}empty-element'),
525
('end', '{namespace}root'),
529
events = ("start", "end", "bogus")
530
with self.assertRaises(ValueError) as cm:
531
with open(SIMPLE_XMLFILE, "rb") as f:
533
self.assertEqual(str(cm.exception), "unknown event 'bogus'")
536
b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
537
b"<body xmlns='http://éffbot.org/ns'\n"
538
b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
539
events = ("start-ns",)
540
context = iterparse(source, events)
541
self.assertEqual([(action, elem) for action, elem in context], [
542
('start-ns', ('', 'http://\xe9ffbot.org/ns')),
543
('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
546
source = io.StringIO("<document />junk")
547
it = iterparse(source)
548
action, elem = next(it)
549
self.assertEqual((action, elem.tag), ('end', 'document'))
550
with self.assertRaises(ET.ParseError) as cm:
552
self.assertEqual(str(cm.exception),
553
'junk after document element: line 1, column 12')
555
def test_writefile(self):
556
elem = ET.Element("tag")
558
self.serialize_check(elem, '<tag>text</tag>')
559
ET.SubElement(elem, "subtag").text = "subtext"
560
self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
562
# Test tag suppression
564
self.serialize_check(elem, 'text<subtag>subtext</subtag>')
565
elem.insert(0, ET.Comment("comment"))
566
self.serialize_check(elem,
567
'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3
569
elem[0] = ET.PI("key", "value")
570
self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
572
def test_custom_builder(self):
573
# Test parser w. custom builder.
575
with open(SIMPLE_XMLFILE) as f:
578
def start(self, tag, attrib):
579
self.append(("start", tag))
581
self.append(("end", tag))
582
def data(self, text):
585
parser = ET.XMLParser(target=builder)
587
self.assertEqual(builder, [
589
('start', 'element'),
591
('start', 'element'),
593
('start', 'empty-element'),
594
('end', 'empty-element'),
598
with open(SIMPLE_NS_XMLFILE) as f:
601
def start(self, tag, attrib):
602
self.append(("start", tag))
604
self.append(("end", tag))
605
def data(self, text):
607
def pi(self, target, data):
608
self.append(("pi", target, data))
609
def comment(self, data):
610
self.append(("comment", data))
612
parser = ET.XMLParser(target=builder)
614
self.assertEqual(builder, [
615
('pi', 'pi', 'data'),
616
('comment', ' comment '),
617
('start', '{namespace}root'),
618
('start', '{namespace}element'),
619
('end', '{namespace}element'),
620
('start', '{namespace}element'),
621
('end', '{namespace}element'),
622
('start', '{namespace}empty-element'),
623
('end', '{namespace}empty-element'),
624
('end', '{namespace}root'),
628
def test_getchildren(self):
629
# Test Element.getchildren()
631
with open(SIMPLE_XMLFILE, "rb") as f:
633
self.assertEqual([summarize_list(elem.getchildren())
634
for elem in tree.getroot().iter()], [
635
['element', 'element', 'empty-element'],
640
self.assertEqual([summarize_list(elem.getchildren())
641
for elem in tree.getiterator()], [
642
['element', 'element', 'empty-element'],
648
elem = ET.XML(SAMPLE_XML)
649
self.assertEqual(len(elem.getchildren()), 3)
650
self.assertEqual(len(elem[2].getchildren()), 1)
651
self.assertEqual(elem[:], elem.getchildren())
655
self.assertEqual(len(elem.getchildren()), 2)
656
self.assertEqual(child1, elem[0])
657
self.assertEqual(child2, elem[1])
658
elem[0:2] = [child2, child1]
659
self.assertEqual(child2, elem[0])
660
self.assertEqual(child1, elem[1])
661
self.assertNotEqual(child1, elem[0])
663
self.assertEqual(elem.getchildren(), [])
665
def test_writestring(self):
666
elem = ET.XML("<html><body>text</body></html>")
667
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
668
elem = ET.fromstring("<html><body>text</body></html>")
669
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
671
def test_encoding(self):
672
def check(encoding, body=''):
673
xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
675
self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
676
self.assertEqual(ET.XML(xml).text, body)
678
check("us-ascii", 'a')
679
check("iso-8859-1", '\xbd')
680
check("iso-8859-15", '\u20ac')
681
check("cp437", '\u221a')
682
check("mac-roman", '\u02da')
685
return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
687
return xml(encoding).encode(encoding)
688
supported_encodings = [
689
'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
690
'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
691
'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
692
'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
693
'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
694
'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
695
'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
696
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
697
'cp1256', 'cp1257', 'cp1258',
698
'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
699
'mac-roman', 'mac-turkish',
700
'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
701
'iso2022-jp-3', 'iso2022-jp-ext',
705
for encoding in supported_encodings:
706
self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
708
unsupported_ascii_compatible_encodings = [
710
'cp932', 'cp949', 'cp950',
711
'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
712
'gb2312', 'gbk', 'gb18030',
713
'iso2022-kr', 'johab',
714
'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
717
for encoding in unsupported_ascii_compatible_encodings:
718
self.assertRaises(ValueError, ET.XML, bxml(encoding))
720
unsupported_ascii_incompatible_encodings = [
721
'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
722
'utf_32', 'utf_32_be', 'utf_32_le',
724
for encoding in unsupported_ascii_incompatible_encodings:
725
self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
727
self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
728
self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
730
def test_methods(self):
731
# Test serialization methods.
733
e = ET.XML("<html><link/><script>1 < 2</script></html>")
735
self.assertEqual(serialize(e),
736
'<html><link /><script>1 < 2</script></html>\n')
737
self.assertEqual(serialize(e, method=None),
738
'<html><link /><script>1 < 2</script></html>\n')
739
self.assertEqual(serialize(e, method="xml"),
740
'<html><link /><script>1 < 2</script></html>\n')
741
self.assertEqual(serialize(e, method="html"),
742
'<html><link><script>1 < 2</script></html>\n')
743
self.assertEqual(serialize(e, method="text"), '1 < 2\n')
745
def test_issue18347(self):
746
e = ET.XML('<html><CamelCase>text</CamelCase></html>')
747
self.assertEqual(serialize(e),
748
'<html><CamelCase>text</CamelCase></html>')
749
self.assertEqual(serialize(e, method="html"),
750
'<html><CamelCase>text</CamelCase></html>')
752
def test_entity(self):
753
# Test entity handling.
757
e = ET.XML("<document title='舰'>test</document>")
758
self.assertEqual(serialize(e, encoding="us-ascii"),
759
b'<document title="舰">test</document>')
760
self.serialize_check(e, '<document title="\u8230">test</document>')
764
with self.assertRaises(ET.ParseError) as cm:
765
ET.XML("<document>&entity;</document>")
766
self.assertEqual(str(cm.exception),
767
'undefined entity: line 1, column 10')
769
with self.assertRaises(ET.ParseError) as cm:
771
self.assertEqual(str(cm.exception),
772
'undefined entity &entity;: line 5, column 10')
776
parser = ET.XMLParser()
777
parser.entity["entity"] = "text"
778
parser.feed(ENTITY_XML)
779
root = parser.close()
780
self.serialize_check(root, '<document>text</document>')
782
def test_namespace(self):
783
# Test namespace issues.
787
elem = ET.XML("<tag xml:lang='en' />")
788
self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
790
# 2) other "well-known" namespaces
792
elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
793
self.serialize_check(elem,
794
'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
796
elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
797
self.serialize_check(elem,
798
'<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
800
elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
801
self.serialize_check(elem,
802
'<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
804
# 3) unknown namespaces
805
elem = ET.XML(SAMPLE_XML_NS)
806
self.serialize_check(elem,
807
'<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
808
' <ns0:tag>text</ns0:tag>\n'
811
' <ns0:tag>subtext</ns0:tag>\n'
815
def test_qname(self):
816
# Test QName handling.
820
elem = ET.Element("{uri}tag")
821
self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
822
elem = ET.Element(ET.QName("{uri}tag"))
823
self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
824
elem = ET.Element(ET.QName("uri", "tag"))
825
self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
826
elem = ET.Element(ET.QName("uri", "tag"))
827
subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
828
subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
829
self.serialize_check(elem,
830
'<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
832
# 2) decorated attributes
835
elem.attrib["{uri}key"] = "value"
836
self.serialize_check(elem,
837
'<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
840
elem.attrib[ET.QName("{uri}key")] = "value"
841
self.serialize_check(elem,
842
'<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
844
# 3) decorated values are not converted by default, but the
845
# QName wrapper can be used for values
848
elem.attrib["{uri}key"] = "{uri}value"
849
self.serialize_check(elem,
850
'<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
853
elem.attrib["{uri}key"] = ET.QName("{uri}value")
854
self.serialize_check(elem,
855
'<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
858
subelem = ET.Element("tag")
859
subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
862
self.serialize_check(elem,
863
'<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
864
'<tag ns1:key="ns2:value" />'
865
'<tag ns1:key="ns2:value" />'
868
# 4) Direct QName tests
870
self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
871
self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
872
q1 = ET.QName('ns', 'tag')
873
q2 = ET.QName('ns', 'tag')
874
self.assertEqual(q1, q2)
875
q2 = ET.QName('ns', 'other-tag')
876
self.assertNotEqual(q1, q2)
877
self.assertNotEqual(q1, 'ns:tag')
878
self.assertEqual(q1, '{ns}tag')
880
def test_doctype_public(self):
881
# Test PUBLIC doctype.
883
elem = ET.XML('<!DOCTYPE html PUBLIC'
884
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
885
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
888
def test_xpath_tokenizer(self):
889
# Test the XPath tokenizer.
890
from xml.etree import ElementPath
891
def check(p, expected):
892
self.assertEqual([op or tag
893
for op, tag in ElementPath.xpath_tokenizer(p)],
896
# tests from the xml specification
898
check("text()", ['text', '()'])
899
check("@name", ['@', 'name'])
900
check("@*", ['@', '*'])
901
check("para[1]", ['para', '[', '1', ']'])
902
check("para[last()]", ['para', '[', 'last', '()', ']'])
903
check("*/para", ['*', '/', 'para'])
904
check("/doc/chapter[5]/section[2]",
905
['/', 'doc', '/', 'chapter', '[', '5', ']',
906
'/', 'section', '[', '2', ']'])
907
check("chapter//para", ['chapter', '//', 'para'])
908
check("//para", ['//', 'para'])
909
check("//olist/item", ['//', 'olist', '/', 'item'])
911
check(".//para", ['.', '//', 'para'])
913
check("../@lang", ['..', '/', '@', 'lang'])
914
check("chapter[title]", ['chapter', '[', 'title', ']'])
915
check("employee[@secretary and @assistant]", ['employee',
916
'[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
919
check("{http://spam}egg", ['{http://spam}egg'])
920
check("./spam.egg", ['.', '/', 'spam.egg'])
921
check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
923
def test_processinginstruction(self):
924
# Test ProcessingInstruction directly
926
self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
927
b'<?test instruction?>')
928
self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
929
b'<?test instruction?>')
933
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
934
b'<?test <testing&>?>')
935
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
936
b"<?xml version='1.0' encoding='latin-1'?>\n"
937
b"<?test <testing&>\xe3?>")
939
def test_html_empty_elems_serialization(self):
941
# from http://www.w3.org/TR/html401/index/elements.html
942
for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
943
'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
944
for elem in [element, element.lower()]:
945
expected = '<%s>' % elem
946
serialized = serialize(ET.XML('<%s />' % elem), method='html')
947
self.assertEqual(serialized, expected)
948
serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
950
self.assertEqual(serialized, expected)
953
class XMLPullParserTest(unittest.TestCase):
955
def _feed(self, parser, data, chunk_size=None):
956
if chunk_size is None:
959
for i in range(0, len(data), chunk_size):
960
parser.feed(data[i:i+chunk_size])
962
def assert_event_tags(self, parser, expected):
963
events = parser.read_events()
964
self.assertEqual([(action, elem.tag) for action, elem in events],
967
def test_simple_xml(self):
968
for chunk_size in (None, 1, 5):
969
with self.subTest(chunk_size=chunk_size):
970
parser = ET.XMLPullParser()
971
self.assert_event_tags(parser, [])
972
self._feed(parser, "<!-- comment -->\n", chunk_size)
973
self.assert_event_tags(parser, [])
975
"<root>\n <element key='value'>text</element",
977
self.assert_event_tags(parser, [])
978
self._feed(parser, ">\n", chunk_size)
979
self.assert_event_tags(parser, [('end', 'element')])
980
self._feed(parser, "<element>text</element>tail\n", chunk_size)
981
self._feed(parser, "<empty-element/>\n", chunk_size)
982
self.assert_event_tags(parser, [
984
('end', 'empty-element'),
986
self._feed(parser, "</root>\n", chunk_size)
987
self.assert_event_tags(parser, [('end', 'root')])
988
self.assertIsNone(parser.close())
990
def test_feed_while_iterating(self):
991
parser = ET.XMLPullParser()
992
it = parser.read_events()
993
self._feed(parser, "<root>\n <element key='value'>text</element>\n")
994
action, elem = next(it)
995
self.assertEqual((action, elem.tag), ('end', 'element'))
996
self._feed(parser, "</root>\n")
997
action, elem = next(it)
998
self.assertEqual((action, elem.tag), ('end', 'root'))
999
with self.assertRaises(StopIteration):
1002
def test_simple_xml_with_ns(self):
1003
parser = ET.XMLPullParser()
1004
self.assert_event_tags(parser, [])
1005
self._feed(parser, "<!-- comment -->\n")
1006
self.assert_event_tags(parser, [])
1007
self._feed(parser, "<root xmlns='namespace'>\n")
1008
self.assert_event_tags(parser, [])
1009
self._feed(parser, "<element key='value'>text</element")
1010
self.assert_event_tags(parser, [])
1011
self._feed(parser, ">\n")
1012
self.assert_event_tags(parser, [('end', '{namespace}element')])
1013
self._feed(parser, "<element>text</element>tail\n")
1014
self._feed(parser, "<empty-element/>\n")
1015
self.assert_event_tags(parser, [
1016
('end', '{namespace}element'),
1017
('end', '{namespace}empty-element'),
1019
self._feed(parser, "</root>\n")
1020
self.assert_event_tags(parser, [('end', '{namespace}root')])
1021
self.assertIsNone(parser.close())
1023
def test_ns_events(self):
1024
parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
1025
self._feed(parser, "<!-- comment -->\n")
1026
self._feed(parser, "<root xmlns='namespace'>\n")
1028
list(parser.read_events()),
1029
[('start-ns', ('', 'namespace'))])
1030
self._feed(parser, "<element key='value'>text</element")
1031
self._feed(parser, ">\n")
1032
self._feed(parser, "<element>text</element>tail\n")
1033
self._feed(parser, "<empty-element/>\n")
1034
self._feed(parser, "</root>\n")
1035
self.assertEqual(list(parser.read_events()), [('end-ns', None)])
1036
self.assertIsNone(parser.close())
1038
def test_events(self):
1039
parser = ET.XMLPullParser(events=())
1040
self._feed(parser, "<root/>\n")
1041
self.assert_event_tags(parser, [])
1043
parser = ET.XMLPullParser(events=('start', 'end'))
1044
self._feed(parser, "<!-- comment -->\n")
1045
self.assert_event_tags(parser, [])
1046
self._feed(parser, "<root>\n")
1047
self.assert_event_tags(parser, [('start', 'root')])
1048
self._feed(parser, "<element key='value'>text</element")
1049
self.assert_event_tags(parser, [('start', 'element')])
1050
self._feed(parser, ">\n")
1051
self.assert_event_tags(parser, [('end', 'element')])
1053
"<element xmlns='foo'>text<empty-element/></element>tail\n")
1054
self.assert_event_tags(parser, [
1055
('start', '{foo}element'),
1056
('start', '{foo}empty-element'),
1057
('end', '{foo}empty-element'),
1058
('end', '{foo}element'),
1060
self._feed(parser, "</root>")
1061
self.assertIsNone(parser.close())
1062
self.assert_event_tags(parser, [('end', 'root')])
1064
parser = ET.XMLPullParser(events=('start',))
1065
self._feed(parser, "<!-- comment -->\n")
1066
self.assert_event_tags(parser, [])
1067
self._feed(parser, "<root>\n")
1068
self.assert_event_tags(parser, [('start', 'root')])
1069
self._feed(parser, "<element key='value'>text</element")
1070
self.assert_event_tags(parser, [('start', 'element')])
1071
self._feed(parser, ">\n")
1072
self.assert_event_tags(parser, [])
1074
"<element xmlns='foo'>text<empty-element/></element>tail\n")
1075
self.assert_event_tags(parser, [
1076
('start', '{foo}element'),
1077
('start', '{foo}empty-element'),
1079
self._feed(parser, "</root>")
1080
self.assertIsNone(parser.close())
1082
def test_events_sequence(self):
1083
# Test that events can be some sequence that's not just a tuple or list
1084
eventset = {'end', 'start'}
1085
parser = ET.XMLPullParser(events=eventset)
1086
self._feed(parser, "<foo>bar</foo>")
1087
self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1091
self.events = iter(['start', 'end', 'start-ns'])
1095
return next(self.events)
1097
parser = ET.XMLPullParser(events=DummyIter())
1098
self._feed(parser, "<foo>bar</foo>")
1099
self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1102
def test_unknown_event(self):
1103
with self.assertRaises(ValueError):
1104
ET.XMLPullParser(events=('start', 'end', 'bogus'))
1108
# xinclude tests (samples from appendix C of the xinclude specification)
1112
XINCLUDE["C1.xml"] = """\
1113
<?xml version='1.0'?>
1114
<document xmlns:xi="http://www.w3.org/2001/XInclude">
1115
<p>120 Mz is adequate for an average home user.</p>
1116
<xi:include href="disclaimer.xml"/>
1120
XINCLUDE["disclaimer.xml"] = """\
1121
<?xml version='1.0'?>
1123
<p>The opinions represented herein represent those of the individual
1124
and should not be interpreted as official policy endorsed by this
1129
XINCLUDE["C2.xml"] = """\
1130
<?xml version='1.0'?>
1131
<document xmlns:xi="http://www.w3.org/2001/XInclude">
1132
<p>This document has been accessed
1133
<xi:include href="count.txt" parse="text"/> times.</p>
1137
XINCLUDE["count.txt"] = "324387"
1139
XINCLUDE["C2b.xml"] = """\
1140
<?xml version='1.0'?>
1141
<document xmlns:xi="http://www.w3.org/2001/XInclude">
1142
<p>This document has been <em>accessed</em>
1143
<xi:include href="count.txt" parse="text"/> times.</p>
1147
XINCLUDE["C3.xml"] = """\
1148
<?xml version='1.0'?>
1149
<document xmlns:xi="http://www.w3.org/2001/XInclude">
1150
<p>The following is the source of the "data.xml" resource:</p>
1151
<example><xi:include href="data.xml" parse="text"/></example>
1155
XINCLUDE["data.xml"] = """\
1156
<?xml version='1.0'?>
1158
<item><![CDATA[Brooks & Shields]]></item>
1162
XINCLUDE["C5.xml"] = """\
1163
<?xml version='1.0'?>
1164
<div xmlns:xi="http://www.w3.org/2001/XInclude">
1165
<xi:include href="example.txt" parse="text">
1167
<xi:include href="fallback-example.txt" parse="text">
1168
<xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1175
XINCLUDE["default.xml"] = """\
1176
<?xml version='1.0'?>
1177
<document xmlns:xi="http://www.w3.org/2001/XInclude">
1179
<xi:include href="{}"/>
1181
""".format(html.escape(SIMPLE_XMLFILE, True))
1184
# badly formatted xi:include tags
1188
XINCLUDE_BAD["B1.xml"] = """\
1189
<?xml version='1.0'?>
1190
<document xmlns:xi="http://www.w3.org/2001/XInclude">
1191
<p>120 Mz is adequate for an average home user.</p>
1192
<xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1196
XINCLUDE_BAD["B2.xml"] = """\
1197
<?xml version='1.0'?>
1198
<div xmlns:xi="http://www.w3.org/2001/XInclude">
1199
<xi:fallback></xi:fallback>
1203
class XIncludeTest(unittest.TestCase):
1205
def xinclude_loader(self, href, parse="xml", encoding=None):
1207
data = XINCLUDE[href]
1209
raise OSError("resource not found")
1214
def none_loader(self, href, parser, encoding=None):
1217
def _my_loader(self, href, parse):
1218
# Used to avoid a test-dependency problem where the default loader
1219
# of ElementInclude uses the pyET parser for cET tests.
1221
with open(href, 'rb') as f:
1222
return ET.parse(f).getroot()
1226
def test_xinclude_default(self):
1227
from xml.etree import ElementInclude
1228
doc = self.xinclude_loader('default.xml')
1229
ElementInclude.include(doc, self._my_loader)
1230
self.assertEqual(serialize(doc),
1232
' <p>Example.</p>\n'
1234
' <element key="value">text</element>\n'
1235
' <element>text</element>tail\n'
1236
' <empty-element />\n'
1240
def test_xinclude(self):
1241
from xml.etree import ElementInclude
1243
# Basic inclusion example (XInclude C.1)
1244
document = self.xinclude_loader("C1.xml")
1245
ElementInclude.include(document, self.xinclude_loader)
1246
self.assertEqual(serialize(document),
1248
' <p>120 Mz is adequate for an average home user.</p>\n'
1250
' <p>The opinions represented herein represent those of the individual\n'
1251
' and should not be interpreted as official policy endorsed by this\n'
1252
' organization.</p>\n'
1256
# Textual inclusion example (XInclude C.2)
1257
document = self.xinclude_loader("C2.xml")
1258
ElementInclude.include(document, self.xinclude_loader)
1259
self.assertEqual(serialize(document),
1261
' <p>This document has been accessed\n'
1262
' 324387 times.</p>\n'
1265
# Textual inclusion after sibling element (based on modified XInclude C.2)
1266
document = self.xinclude_loader("C2b.xml")
1267
ElementInclude.include(document, self.xinclude_loader)
1268
self.assertEqual(serialize(document),
1270
' <p>This document has been <em>accessed</em>\n'
1271
' 324387 times.</p>\n'
1272
'</document>') # C2b
1274
# Textual inclusion of XML example (XInclude C.3)
1275
document = self.xinclude_loader("C3.xml")
1276
ElementInclude.include(document, self.xinclude_loader)
1277
self.assertEqual(serialize(document),
1279
' <p>The following is the source of the "data.xml" resource:</p>\n'
1280
" <example><?xml version='1.0'?>\n"
1282
' <item><![CDATA[Brooks & Shields]]></item>\n'
1287
# Fallback example (XInclude C.5)
1288
# Note! Fallback support is not yet implemented
1289
document = self.xinclude_loader("C5.xml")
1290
with self.assertRaises(OSError) as cm:
1291
ElementInclude.include(document, self.xinclude_loader)
1292
self.assertEqual(str(cm.exception), 'resource not found')
1293
self.assertEqual(serialize(document),
1294
'<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1295
' <ns0:include href="example.txt" parse="text">\n'
1297
' <ns0:include href="fallback-example.txt" parse="text">\n'
1298
' <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
1300
' </ns0:fallback>\n'
1304
def test_xinclude_failures(self):
1305
from xml.etree import ElementInclude
1307
# Test failure to locate included XML file.
1308
document = ET.XML(XINCLUDE["C1.xml"])
1309
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1310
ElementInclude.include(document, loader=self.none_loader)
1311
self.assertEqual(str(cm.exception),
1312
"cannot load 'disclaimer.xml' as 'xml'")
1314
# Test failure to locate included text file.
1315
document = ET.XML(XINCLUDE["C2.xml"])
1316
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1317
ElementInclude.include(document, loader=self.none_loader)
1318
self.assertEqual(str(cm.exception),
1319
"cannot load 'count.txt' as 'text'")
1321
# Test bad parse type.
1322
document = ET.XML(XINCLUDE_BAD["B1.xml"])
1323
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1324
ElementInclude.include(document, loader=self.none_loader)
1325
self.assertEqual(str(cm.exception),
1326
"unknown parse type in xi:include tag ('BAD_TYPE')")
1328
# Test xi:fallback outside xi:include.
1329
document = ET.XML(XINCLUDE_BAD["B2.xml"])
1330
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1331
ElementInclude.include(document, loader=self.none_loader)
1332
self.assertEqual(str(cm.exception),
1333
"xi:fallback tag must be child of xi:include "
1334
"('{http://www.w3.org/2001/XInclude}fallback')")
1336
# --------------------------------------------------------------------
1339
class BugsTest(unittest.TestCase):
1341
def test_bug_xmltoolkit21(self):
1342
# marshaller gives obscure errors for non-string values
1345
with self.assertRaises(TypeError) as cm:
1347
self.assertEqual(str(cm.exception),
1348
'cannot serialize 123 (type int)')
1350
elem = ET.Element(123)
1353
elem = ET.Element("elem")
1357
elem = ET.Element("elem")
1361
elem = ET.Element("elem")
1362
elem.set(123, "123")
1363
check(elem) # attribute key
1365
elem = ET.Element("elem")
1366
elem.set("123", 123)
1367
check(elem) # attribute value
1369
def test_bug_xmltoolkit25(self):
1370
# typo in ElementTree.findtext
1372
elem = ET.XML(SAMPLE_XML)
1373
tree = ET.ElementTree(elem)
1374
self.assertEqual(tree.findtext("tag"), 'text')
1375
self.assertEqual(tree.findtext("section/tag"), 'subtext')
1377
def test_bug_xmltoolkit28(self):
1378
# .//tag causes exceptions
1380
tree = ET.XML("<doc><table><tbody/></table></doc>")
1381
self.assertEqual(summarize_list(tree.findall(".//thead")), [])
1382
self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
1384
def test_bug_xmltoolkitX1(self):
1385
# dump() doesn't flush the output buffer
1387
tree = ET.XML("<doc><table><tbody/></table></doc>")
1388
with support.captured_stdout() as stdout:
1390
self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
1392
def test_bug_xmltoolkit39(self):
1393
# non-ascii element and attribute names doesn't work
1395
tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1396
self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
1398
tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1399
b"<tag \xe4ttr='välue' />")
1400
self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
1401
self.assertEqual(ET.tostring(tree, "utf-8"),
1402
b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
1404
tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1405
b'<t\xe4g>text</t\xe4g>')
1406
self.assertEqual(ET.tostring(tree, "utf-8"),
1407
b'<t\xc3\xa4g>text</t\xc3\xa4g>')
1409
tree = ET.Element("t\u00e4g")
1410
self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
1412
tree = ET.Element("tag")
1413
tree.set("\u00e4ttr", "v\u00e4lue")
1414
self.assertEqual(ET.tostring(tree, "utf-8"),
1415
b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
1417
def test_bug_xmltoolkit54(self):
1418
# problems handling internally defined entities
1420
e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]>"
1421
'<doc>&ldots;</doc>')
1422
self.assertEqual(serialize(e, encoding="us-ascii"),
1423
b'<doc>舰</doc>')
1424
self.assertEqual(serialize(e), '<doc>\u8230</doc>')
1426
def test_bug_xmltoolkit55(self):
1427
# make sure we're reporting the first error, not the last
1429
with self.assertRaises(ET.ParseError) as cm:
1430
ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
1431
b'<doc>&ldots;&ndots;&rdots;</doc>')
1432
self.assertEqual(str(cm.exception),
1433
'undefined entity &ldots;: line 1, column 36')
1435
def test_bug_xmltoolkit60(self):
1436
# Handle crash in stream source.
1438
class ExceptionFile:
1442
self.assertRaises(OSError, ET.parse, ExceptionFile())
1444
def test_bug_xmltoolkit62(self):
1445
# Don't crash when using custom entities.
1447
ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
1448
parser = ET.XMLParser()
1449
parser.entity.update(ENTITIES)
1450
parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
1451
<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1452
<patent-application-publication>
1454
<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph>
1456
</patent-application-publication>""")
1458
self.assertEqual(t.find('.//paragraph').text,
1459
'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
1461
def test_bug_xmltoolkit63(self):
1462
# Check reference leak.
1464
tree = ET.TreeBuilder()
1465
tree.start("tag", {})
1470
count = sys.getrefcount(None)
1471
for i in range(1000):
1473
self.assertEqual(sys.getrefcount(None), count)
1475
def test_bug_200708_newline(self):
1476
# Preserve newlines in attributes.
1478
e = ET.Element('SomeTag', text="def _f():\n return 3\n")
1479
self.assertEqual(ET.tostring(e),
1480
b'<SomeTag text="def _f(): return 3 " />')
1481
self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
1482
'def _f():\n return 3\n')
1483
self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
1484
b'<SomeTag text="def _f(): return 3 " />')
1486
def test_bug_200708_close(self):
1487
# Test default builder.
1488
parser = ET.XMLParser() # default
1489
parser.feed("<element>some text</element>")
1490
self.assertEqual(parser.close().tag, 'element')
1492
# Test custom builder.
1495
return ET.Element("element") # simulate root
1496
parser = ET.XMLParser(EchoTarget())
1497
parser.feed("<element>some text</element>")
1498
self.assertEqual(parser.close().tag, 'element')
1500
def test_bug_200709_default_namespace(self):
1501
e = ET.Element("{default}elem")
1502
s = ET.SubElement(e, "{default}elem")
1503
self.assertEqual(serialize(e, default_namespace="default"), # 1
1504
'<elem xmlns="default"><elem /></elem>')
1506
e = ET.Element("{default}elem")
1507
s = ET.SubElement(e, "{default}elem")
1508
s = ET.SubElement(e, "{not-default}elem")
1509
self.assertEqual(serialize(e, default_namespace="default"), # 2
1510
'<elem xmlns="default" xmlns:ns1="not-default">'
1515
e = ET.Element("{default}elem")
1516
s = ET.SubElement(e, "{default}elem")
1517
s = ET.SubElement(e, "elem") # unprefixed name
1518
with self.assertRaises(ValueError) as cm:
1519
serialize(e, default_namespace="default") # 3
1520
self.assertEqual(str(cm.exception),
1521
'cannot use non-qualified names with default_namespace option')
1523
def test_bug_200709_register_namespace(self):
1524
e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1525
self.assertEqual(ET.tostring(e),
1526
b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
1527
ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1528
e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1529
self.assertEqual(ET.tostring(e),
1530
b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
1532
# And the Dublin Core namespace is in the default list:
1534
e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
1535
self.assertEqual(ET.tostring(e),
1536
b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
1538
def test_bug_200709_element_comment(self):
1539
# Not sure if this can be fixed, really (since the serializer needs
1540
# ET.Comment, not cET.comment).
1543
a.append(ET.Comment('foo'))
1544
self.assertEqual(a[0].tag, ET.Comment)
1547
a.append(ET.PI('foo'))
1548
self.assertEqual(a[0].tag, ET.PI)
1550
def test_bug_200709_element_insert(self):
1552
b = ET.SubElement(a, 'b')
1553
c = ET.SubElement(a, 'c')
1556
self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
1558
self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
1560
def test_bug_200709_iter_comment(self):
1562
b = ET.SubElement(a, 'b')
1563
comment_b = ET.Comment("TEST-b")
1565
self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
1567
# --------------------------------------------------------------------
1568
# reported on bugs.python.org
1570
def test_bug_1534630(self):
1571
bob = ET.TreeBuilder()
1572
e = bob.data("data")
1573
e = bob.start("tag", {})
1576
self.assertEqual(serialize(e), '<tag />')
1578
def test_issue6233(self):
1579
e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
1580
b'<body>t\xc3\xa3g</body>')
1581
self.assertEqual(ET.tostring(e, 'ascii'),
1582
b"<?xml version='1.0' encoding='ascii'?>\n"
1583
b'<body>tãg</body>')
1584
e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1585
b'<body>t\xe3g</body>')
1586
self.assertEqual(ET.tostring(e, 'ascii'),
1587
b"<?xml version='1.0' encoding='ascii'?>\n"
1588
b'<body>tãg</body>')
1590
def test_issue3151(self):
1591
e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1592
self.assertEqual(e.tag, '{${stuff}}localname')
1593
t = ET.ElementTree(e)
1594
self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
1596
def test_issue6565(self):
1597
elem = ET.XML("<body><tag/></body>")
1598
self.assertEqual(summarize_list(elem), ['tag'])
1599
newelem = ET.XML(SAMPLE_XML)
1600
elem[:] = newelem[:]
1601
self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
1603
def test_issue10777(self):
1604
# Registering a namespace twice caused a "dictionary changed size during
1607
ET.register_namespace('test10777', 'http://myuri/')
1608
ET.register_namespace('test10777', 'http://myuri/')
1611
# --------------------------------------------------------------------
1614
class BasicElementTest(ElementTestCase, unittest.TestCase):
1615
def test_augmentation_type_errors(self):
1616
e = ET.Element('joe')
1617
self.assertRaises(TypeError, e.append, 'b')
1618
self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
1619
self.assertRaises(TypeError, e.insert, 0, 'foo')
1621
def test_cyclic_gc(self):
1625
# Test the shortest cycle: d->element->d
1627
d.dummyref = ET.Element('joe', attr=d)
1628
wref = weakref.ref(d)
1631
self.assertIsNone(wref())
1633
# A longer cycle: d->e->e2->d
1634
e = ET.Element('joe')
1637
wref = weakref.ref(d)
1638
e2 = ET.SubElement(e, 'foo', attr=d)
1641
self.assertIsNone(wref())
1643
# A cycle between Element objects as children of one another
1645
e1 = ET.Element('e1')
1646
e2 = ET.Element('e2')
1647
e3 = ET.Element('e3')
1651
wref = weakref.ref(e1)
1654
self.assertIsNone(wref())
1656
def test_weakref(self):
1662
wref = weakref.ref(e, wref_cb)
1663
self.assertEqual(wref().tag, 'e')
1665
self.assertEqual(flag, True)
1666
self.assertEqual(wref(), None)
1668
def test_get_keyword_args(self):
1669
e1 = ET.Element('foo' , x=1, y=2, z=3)
1670
self.assertEqual(e1.get('x', default=7), 1)
1671
self.assertEqual(e1.get('w', default=7), 7)
1673
def test_pickle(self):
1674
# issue #16076: the C implementation wasn't pickleable.
1675
for dumper, loader in product(self.modules, repeat=2):
1676
e = dumper.Element('foo', bar=42)
1677
e.text = "text goes here"
1678
e.tail = "opposite of head"
1679
dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
1680
e.append(dumper.Element('child'))
1681
e.findall('.//grandchild')[0].set('attr', 'other value')
1683
e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
1686
self.assertEqual(e2.tag, 'foo')
1687
self.assertEqual(e2.attrib['bar'], 42)
1688
self.assertEqual(len(e2), 2)
1689
self.assertEqualElements(e, e2)
1691
def test_pickle_issue18997(self):
1692
for dumper, loader in product(self.modules, repeat=2):
1693
XMLTEXT = """<?xml version="1.0"?>
1694
<group><dogs>4</dogs>
1696
e1 = dumper.fromstring(XMLTEXT)
1697
if hasattr(e1, '__getstate__'):
1698
self.assertEqual(e1.__getstate__()['tag'], 'group')
1699
e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree', dumper, loader)
1700
self.assertEqual(e2.tag, 'group')
1701
self.assertEqual(e2[0].tag, 'dogs')
1704
class ElementTreeTypeTest(unittest.TestCase):
1705
def test_istype(self):
1706
self.assertIsInstance(ET.ParseError, type)
1707
self.assertIsInstance(ET.QName, type)
1708
self.assertIsInstance(ET.ElementTree, type)
1709
self.assertIsInstance(ET.Element, type)
1710
self.assertIsInstance(ET.TreeBuilder, type)
1711
self.assertIsInstance(ET.XMLParser, type)
1713
def test_Element_subclass_trivial(self):
1714
class MyElement(ET.Element):
1717
mye = MyElement('foo')
1718
self.assertIsInstance(mye, ET.Element)
1719
self.assertIsInstance(mye, MyElement)
1720
self.assertEqual(mye.tag, 'foo')
1722
# test that attribute assignment works (issue 14849)
1724
self.assertEqual(mye.text, "joe")
1726
def test_Element_subclass_constructor(self):
1727
class MyElement(ET.Element):
1728
def __init__(self, tag, attrib={}, **extra):
1729
super(MyElement, self).__init__(tag + '__', attrib, **extra)
1731
mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
1732
self.assertEqual(mye.tag, 'foo__')
1733
self.assertEqual(sorted(mye.items()),
1734
[('a', 1), ('b', 2), ('c', 3), ('d', 4)])
1736
def test_Element_subclass_new_method(self):
1737
class MyElement(ET.Element):
1738
def newmethod(self):
1741
mye = MyElement('joe')
1742
self.assertEqual(mye.newmethod(), 'joe')
1745
class ElementFindTest(unittest.TestCase):
1746
def test_find_simple(self):
1747
e = ET.XML(SAMPLE_XML)
1748
self.assertEqual(e.find('tag').tag, 'tag')
1749
self.assertEqual(e.find('section/tag').tag, 'tag')
1750
self.assertEqual(e.find('./tag').tag, 'tag')
1752
e[2] = ET.XML(SAMPLE_SECTION)
1753
self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
1755
self.assertEqual(e.findtext('./tag'), 'text')
1756
self.assertEqual(e.findtext('section/tag'), 'subtext')
1758
# section/nexttag is found but has no text
1759
self.assertEqual(e.findtext('section/nexttag'), '')
1760
self.assertEqual(e.findtext('section/nexttag', 'default'), '')
1762
# tog doesn't exist and 'default' kicks in
1763
self.assertIsNone(e.findtext('tog'))
1764
self.assertEqual(e.findtext('tog', 'default'), 'default')
1767
self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
1769
def test_find_xpath(self):
1777
e = ET.XML(LINEAR_XML)
1779
# Test for numeric indexing and last()
1780
self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
1781
self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
1782
self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
1783
self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
1784
self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
1786
self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
1787
self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
1788
self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
1789
self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
1791
def test_findall(self):
1792
e = ET.XML(SAMPLE_XML)
1793
e[2] = ET.XML(SAMPLE_SECTION)
1794
self.assertEqual(summarize_list(e.findall('.')), ['body'])
1795
self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
1796
self.assertEqual(summarize_list(e.findall('tog')), [])
1797
self.assertEqual(summarize_list(e.findall('tog/foo')), [])
1798
self.assertEqual(summarize_list(e.findall('*')),
1799
['tag', 'tag', 'section'])
1800
self.assertEqual(summarize_list(e.findall('.//tag')),
1802
self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
1803
self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
1804
self.assertEqual(summarize_list(e.findall('section/*')),
1805
['tag', 'nexttag', 'nextsection'])
1806
self.assertEqual(summarize_list(e.findall('section//*')),
1807
['tag', 'nexttag', 'nextsection', 'tag'])
1808
self.assertEqual(summarize_list(e.findall('section/.//*')),
1809
['tag', 'nexttag', 'nextsection', 'tag'])
1810
self.assertEqual(summarize_list(e.findall('*/*')),
1811
['tag', 'nexttag', 'nextsection'])
1812
self.assertEqual(summarize_list(e.findall('*//*')),
1813
['tag', 'nexttag', 'nextsection', 'tag'])
1814
self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
1815
self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
1816
self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
1817
self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
1819
self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
1821
self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
1823
self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
1825
self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
1827
self.assertEqual(summarize_list(e.findall('.//section[tag]')),
1829
self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
1830
self.assertEqual(summarize_list(e.findall('../tag')), [])
1831
self.assertEqual(summarize_list(e.findall('section/../tag')),
1833
self.assertEqual(e.findall('section//'), e.findall('section//*'))
1835
def test_test_find_with_ns(self):
1836
e = ET.XML(SAMPLE_XML_NS)
1837
self.assertEqual(summarize_list(e.findall('tag')), [])
1839
summarize_list(e.findall("{http://effbot.org/ns}tag")),
1840
['{http://effbot.org/ns}tag'] * 2)
1842
summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
1843
['{http://effbot.org/ns}tag'] * 3)
1845
def test_findall_different_nsmaps(self):
1847
<a xmlns:x="X" xmlns:y="Y">
1850
<c><x:b/><b/></c><y:b/>
1853
self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
1854
self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
1856
self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
1857
self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
1859
def test_bad_find(self):
1860
e = ET.XML(SAMPLE_XML)
1861
with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
1864
def test_find_through_ElementTree(self):
1865
e = ET.XML(SAMPLE_XML)
1866
self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
1867
self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
1868
self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
1870
# this produces a warning
1871
self.assertEqual(summarize_list(ET.ElementTree(e).findall('//tag')),
1875
class ElementIterTest(unittest.TestCase):
1876
def _ilist(self, elem, tag=None):
1877
return summarize_list(elem.iter(tag))
1879
def test_basic(self):
1880
doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
1881
self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
1882
self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
1883
self.assertEqual(next(doc.iter()).tag, 'html')
1884
self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
1885
self.assertEqual(''.join(doc.find('body').itertext()),
1886
'this is a paragraph.')
1887
self.assertEqual(next(doc.itertext()), 'this is a ')
1889
# iterparse should return an iterator
1890
sourcefile = serialize(doc, to_string=False)
1891
self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
1893
# With an explitit parser too (issue #9708)
1894
sourcefile = serialize(doc, to_string=False)
1895
parser = ET.XMLParser(target=ET.TreeBuilder())
1896
self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
1899
tree = ET.ElementTree(None)
1900
self.assertRaises(AttributeError, tree.iter)
1903
doc = ET.XML("<root>a&<sub>b&</sub>c&</root>")
1904
self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
1906
def test_corners(self):
1907
# single root, no subelements
1909
self.assertEqual(self._ilist(a), ['a'])
1912
b = ET.SubElement(a, 'b')
1913
self.assertEqual(self._ilist(a), ['a', 'b'])
1915
# one child and one grandchild
1916
c = ET.SubElement(b, 'c')
1917
self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
1919
# two children, only first with grandchild
1920
d = ET.SubElement(a, 'd')
1921
self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
1923
# replace first child by second
1926
self.assertEqual(self._ilist(a), ['a', 'd'])
1928
def test_iter_by_tag(self):
1932
<room>bedroom1</room>
1933
<room>bedroom2</room>
1938
<room>bedroom8</room>
1942
self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
1943
self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
1945
# test that iter also accepts 'tag' as a keyword arg
1947
summarize_list(doc.iter(tag='room')),
1950
# make sure both tag=None and tag='*' return all tags
1951
all_tags = ['document', 'house', 'room', 'room',
1952
'shed', 'house', 'room']
1953
self.assertEqual(self._ilist(doc), all_tags)
1954
self.assertEqual(self._ilist(doc, '*'), all_tags)
1957
class TreeBuilderTest(unittest.TestCase):
1958
sample1 = ('<!DOCTYPE html PUBLIC'
1959
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1960
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1961
'<html>text<div>subtext</div>tail</html>')
1963
sample2 = '''<toplevel>sometext</toplevel>'''
1965
def _check_sample1_element(self, e):
1966
self.assertEqual(e.tag, 'html')
1967
self.assertEqual(e.text, 'text')
1968
self.assertEqual(e.tail, None)
1969
self.assertEqual(e.attrib, {})
1971
self.assertEqual(len(children), 1)
1973
self.assertEqual(child.tag, 'div')
1974
self.assertEqual(child.text, 'subtext')
1975
self.assertEqual(child.tail, 'tail')
1976
self.assertEqual(child.attrib, {})
1978
def test_dummy_builder(self):
1979
class BaseDummyBuilder:
1983
class DummyBuilder(BaseDummyBuilder):
1984
data = start = end = lambda *a: None
1986
parser = ET.XMLParser(target=DummyBuilder())
1987
parser.feed(self.sample1)
1988
self.assertEqual(parser.close(), 42)
1990
parser = ET.XMLParser(target=BaseDummyBuilder())
1991
parser.feed(self.sample1)
1992
self.assertEqual(parser.close(), 42)
1994
parser = ET.XMLParser(target=object())
1995
parser.feed(self.sample1)
1996
self.assertIsNone(parser.close())
1998
def test_treebuilder_elementfactory_none(self):
1999
parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
2000
parser.feed(self.sample1)
2002
self._check_sample1_element(e)
2004
def test_subclass(self):
2005
class MyTreeBuilder(ET.TreeBuilder):
2006
def foobar(self, x):
2009
tb = MyTreeBuilder()
2010
self.assertEqual(tb.foobar(10), 20)
2012
parser = ET.XMLParser(target=tb)
2013
parser.feed(self.sample1)
2016
self._check_sample1_element(e)
2018
def test_element_factory(self):
2020
def myfactory(tag, attrib):
2023
return ET.Element(tag, attrib)
2025
tb = ET.TreeBuilder(element_factory=myfactory)
2026
parser = ET.XMLParser(target=tb)
2027
parser.feed(self.sample2)
2030
self.assertEqual(lst, ['toplevel'])
2032
def _check_element_factory_class(self, cls):
2033
tb = ET.TreeBuilder(element_factory=cls)
2035
parser = ET.XMLParser(target=tb)
2036
parser.feed(self.sample1)
2038
self.assertIsInstance(e, cls)
2039
self._check_sample1_element(e)
2041
def test_element_factory_subclass(self):
2042
class MyElement(ET.Element):
2044
self._check_element_factory_class(MyElement)
2046
def test_element_factory_pure_python_subclass(self):
2047
# Mimick SimpleTAL's behaviour (issue #16089): both versions of
2048
# TreeBuilder should be able to cope with a subclass of the
2049
# pure Python Element class.
2050
base = ET._Element_Py
2051
# Not from a C extension
2052
self.assertEqual(base.__module__, 'xml.etree.ElementTree')
2053
# Force some multiple inheritance with a C class to make things
2055
class MyElement(base, ValueError):
2057
self._check_element_factory_class(MyElement)
2059
def test_doctype(self):
2060
class DoctypeParser:
2063
def doctype(self, name, pubid, system):
2064
self._doctype = (name, pubid, system)
2067
return self._doctype
2069
parser = ET.XMLParser(target=DoctypeParser())
2070
parser.feed(self.sample1)
2072
self.assertEqual(parser.close(),
2073
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2074
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
2077
class XMLParserTest(unittest.TestCase):
2078
sample1 = b'<file><line>22</line></file>'
2079
sample2 = (b'<!DOCTYPE html PUBLIC'
2080
b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2081
b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
2082
b'<html>text</html>')
2083
sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
2084
'<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
2086
def _check_sample_element(self, e):
2087
self.assertEqual(e.tag, 'file')
2088
self.assertEqual(e[0].tag, 'line')
2089
self.assertEqual(e[0].text, '22')
2091
def test_constructor_args(self):
2092
# Positional args. The first (html) is not supported, but should be
2093
# nevertheless correctly accepted.
2094
parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8')
2095
parser.feed(self.sample1)
2096
self._check_sample_element(parser.close())
2098
# Now as keyword args.
2099
parser2 = ET.XMLParser(encoding='utf-8',
2101
target=ET.TreeBuilder())
2102
parser2.feed(self.sample1)
2103
self._check_sample_element(parser2.close())
2105
def test_subclass(self):
2106
class MyParser(ET.XMLParser):
2109
parser.feed(self.sample1)
2110
self._check_sample_element(parser.close())
2112
def test_subclass_doctype(self):
2114
class MyParserWithDoctype(ET.XMLParser):
2115
def doctype(self, name, pubid, system):
2117
_doctype = (name, pubid, system)
2119
parser = MyParserWithDoctype()
2120
with self.assertWarns(DeprecationWarning):
2121
parser.feed(self.sample2)
2123
self.assertEqual(_doctype,
2124
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2125
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
2127
def test_parse_string(self):
2128
parser = ET.XMLParser(target=ET.TreeBuilder())
2129
parser.feed(self.sample3)
2131
self.assertEqual(e.tag, 'money')
2132
self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
2133
self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
2136
class NamespaceParseTest(unittest.TestCase):
2137
def test_find_with_namespace(self):
2138
nsmap = {'h': 'hello', 'f': 'foo'}
2139
doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
2141
self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
2142
self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
2143
self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
2146
class ElementSlicingTest(unittest.TestCase):
2147
def _elem_tags(self, elemlist):
2148
return [e.tag for e in elemlist]
2150
def _subelem_tags(self, elem):
2151
return self._elem_tags(list(elem))
2153
def _make_elem_with_children(self, numchildren):
2154
"""Create an Element with a tag 'a', with the given amount of children
2155
named 'a0', 'a1' ... and so on.
2159
for i in range(numchildren):
2160
ET.SubElement(e, 'a%s' % i)
2163
def test_getslice_single_index(self):
2164
e = self._make_elem_with_children(10)
2166
self.assertEqual(e[1].tag, 'a1')
2167
self.assertEqual(e[-2].tag, 'a8')
2169
self.assertRaises(IndexError, lambda: e[12])
2171
def test_getslice_range(self):
2172
e = self._make_elem_with_children(6)
2174
self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
2175
self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
2176
self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
2177
self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
2178
self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
2179
self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
2181
def test_getslice_steps(self):
2182
e = self._make_elem_with_children(10)
2184
self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
2185
self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
2186
self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
2187
self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
2189
def test_getslice_negative_steps(self):
2190
e = self._make_elem_with_children(4)
2192
self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
2193
self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
2195
def test_delslice(self):
2196
e = self._make_elem_with_children(4)
2198
self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
2200
e = self._make_elem_with_children(4)
2202
self.assertEqual(self._subelem_tags(e), [])
2204
e = self._make_elem_with_children(4)
2206
self.assertEqual(self._subelem_tags(e), [])
2208
e = self._make_elem_with_children(4)
2210
self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
2212
e = self._make_elem_with_children(4)
2214
self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
2216
e = self._make_elem_with_children(2)
2218
self.assertEqual(self._subelem_tags(e), ['a1'])
2221
class IOTest(unittest.TestCase):
2223
support.unlink(TESTFN)
2225
def test_encoding(self):
2226
# Test encoding issues.
2227
elem = ET.Element("tag")
2229
self.assertEqual(serialize(elem), '<tag>abc</tag>')
2230
self.assertEqual(serialize(elem, encoding="utf-8"),
2232
self.assertEqual(serialize(elem, encoding="us-ascii"),
2234
for enc in ("iso-8859-1", "utf-16", "utf-32"):
2235
self.assertEqual(serialize(elem, encoding=enc),
2236
("<?xml version='1.0' encoding='%s'?>\n"
2237
"<tag>abc</tag>" % enc).encode(enc))
2239
elem = ET.Element("tag")
2240
elem.text = "<&\"\'>"
2241
self.assertEqual(serialize(elem), '<tag><&"\'></tag>')
2242
self.assertEqual(serialize(elem, encoding="utf-8"),
2243
b'<tag><&"\'></tag>')
2244
self.assertEqual(serialize(elem, encoding="us-ascii"),
2245
b'<tag><&"\'></tag>')
2246
for enc in ("iso-8859-1", "utf-16", "utf-32"):
2247
self.assertEqual(serialize(elem, encoding=enc),
2248
("<?xml version='1.0' encoding='%s'?>\n"
2249
"<tag><&\"'></tag>" % enc).encode(enc))
2251
elem = ET.Element("tag")
2252
elem.attrib["key"] = "<&\"\'>"
2253
self.assertEqual(serialize(elem), '<tag key="<&"\'>" />')
2254
self.assertEqual(serialize(elem, encoding="utf-8"),
2255
b'<tag key="<&"\'>" />')
2256
self.assertEqual(serialize(elem, encoding="us-ascii"),
2257
b'<tag key="<&"\'>" />')
2258
for enc in ("iso-8859-1", "utf-16", "utf-32"):
2259
self.assertEqual(serialize(elem, encoding=enc),
2260
("<?xml version='1.0' encoding='%s'?>\n"
2261
"<tag key=\"<&"'>\" />" % enc).encode(enc))
2263
elem = ET.Element("tag")
2264
elem.text = '\xe5\xf6\xf6<>'
2265
self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6<></tag>')
2266
self.assertEqual(serialize(elem, encoding="utf-8"),
2267
b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>')
2268
self.assertEqual(serialize(elem, encoding="us-ascii"),
2269
b'<tag>åöö<></tag>')
2270
for enc in ("iso-8859-1", "utf-16", "utf-32"):
2271
self.assertEqual(serialize(elem, encoding=enc),
2272
("<?xml version='1.0' encoding='%s'?>\n"
2273
"<tag>Ƅƶƶ<></tag>" % enc).encode(enc))
2275
elem = ET.Element("tag")
2276
elem.attrib["key"] = '\xe5\xf6\xf6<>'
2277
self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6<>" />')
2278
self.assertEqual(serialize(elem, encoding="utf-8"),
2279
b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />')
2280
self.assertEqual(serialize(elem, encoding="us-ascii"),
2281
b'<tag key="åöö<>" />')
2282
for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
2283
self.assertEqual(serialize(elem, encoding=enc),
2284
("<?xml version='1.0' encoding='%s'?>\n"
2285
"<tag key=\"Ƅƶƶ<>\" />" % enc).encode(enc))
2287
def test_write_to_filename(self):
2288
tree = ET.ElementTree(ET.XML('''<site />'''))
2290
with open(TESTFN, 'rb') as f:
2291
self.assertEqual(f.read(), b'''<site />''')
2293
def test_write_to_text_file(self):
2294
tree = ET.ElementTree(ET.XML('''<site />'''))
2295
with open(TESTFN, 'w', encoding='utf-8') as f:
2296
tree.write(f, encoding='unicode')
2297
self.assertFalse(f.closed)
2298
with open(TESTFN, 'rb') as f:
2299
self.assertEqual(f.read(), b'''<site />''')
2301
def test_write_to_binary_file(self):
2302
tree = ET.ElementTree(ET.XML('''<site />'''))
2303
with open(TESTFN, 'wb') as f:
2305
self.assertFalse(f.closed)
2306
with open(TESTFN, 'rb') as f:
2307
self.assertEqual(f.read(), b'''<site />''')
2309
def test_write_to_binary_file_with_bom(self):
2310
tree = ET.ElementTree(ET.XML('''<site />'''))
2311
# test BOM writing to buffered file
2312
with open(TESTFN, 'wb') as f:
2313
tree.write(f, encoding='utf-16')
2314
self.assertFalse(f.closed)
2315
with open(TESTFN, 'rb') as f:
2316
self.assertEqual(f.read(),
2317
'''<?xml version='1.0' encoding='utf-16'?>\n'''
2318
'''<site />'''.encode("utf-16"))
2319
# test BOM writing to non-buffered file
2320
with open(TESTFN, 'wb', buffering=0) as f:
2321
tree.write(f, encoding='utf-16')
2322
self.assertFalse(f.closed)
2323
with open(TESTFN, 'rb') as f:
2324
self.assertEqual(f.read(),
2325
'''<?xml version='1.0' encoding='utf-16'?>\n'''
2326
'''<site />'''.encode("utf-16"))
2328
def test_read_from_stringio(self):
2329
tree = ET.ElementTree()
2330
stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
2332
self.assertEqual(tree.getroot().tag, 'site')
2334
def test_write_to_stringio(self):
2335
tree = ET.ElementTree(ET.XML('''<site />'''))
2336
stream = io.StringIO()
2337
tree.write(stream, encoding='unicode')
2338
self.assertEqual(stream.getvalue(), '''<site />''')
2340
def test_read_from_bytesio(self):
2341
tree = ET.ElementTree()
2342
raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
2344
self.assertEqual(tree.getroot().tag, 'site')
2346
def test_write_to_bytesio(self):
2347
tree = ET.ElementTree(ET.XML('''<site />'''))
2350
self.assertEqual(raw.getvalue(), b'''<site />''')
2355
def test_read_from_user_text_reader(self):
2356
stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
2357
reader = self.dummy()
2358
reader.read = stream.read
2359
tree = ET.ElementTree()
2361
self.assertEqual(tree.getroot().tag, 'site')
2363
def test_write_to_user_text_writer(self):
2364
tree = ET.ElementTree(ET.XML('''<site />'''))
2365
stream = io.StringIO()
2366
writer = self.dummy()
2367
writer.write = stream.write
2368
tree.write(writer, encoding='unicode')
2369
self.assertEqual(stream.getvalue(), '''<site />''')
2371
def test_read_from_user_binary_reader(self):
2372
raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
2373
reader = self.dummy()
2374
reader.read = raw.read
2375
tree = ET.ElementTree()
2377
self.assertEqual(tree.getroot().tag, 'site')
2378
tree = ET.ElementTree()
2380
def test_write_to_user_binary_writer(self):
2381
tree = ET.ElementTree(ET.XML('''<site />'''))
2383
writer = self.dummy()
2384
writer.write = raw.write
2386
self.assertEqual(raw.getvalue(), b'''<site />''')
2388
def test_write_to_user_binary_writer_with_bom(self):
2389
tree = ET.ElementTree(ET.XML('''<site />'''))
2391
writer = self.dummy()
2392
writer.write = raw.write
2393
writer.seekable = lambda: True
2394
writer.tell = raw.tell
2395
tree.write(writer, encoding="utf-16")
2396
self.assertEqual(raw.getvalue(),
2397
'''<?xml version='1.0' encoding='utf-16'?>\n'''
2398
'''<site />'''.encode("utf-16"))
2400
def test_tostringlist_invariant(self):
2401
root = ET.fromstring('<tag>foo</tag>')
2403
ET.tostring(root, 'unicode'),
2404
''.join(ET.tostringlist(root, 'unicode')))
2406
ET.tostring(root, 'utf-16'),
2407
b''.join(ET.tostringlist(root, 'utf-16')))
2409
def test_short_empty_elements(self):
2410
root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
2412
ET.tostring(root, 'unicode'),
2413
'<tag>a<x />b<y />c</tag>')
2415
ET.tostring(root, 'unicode', short_empty_elements=True),
2416
'<tag>a<x />b<y />c</tag>')
2418
ET.tostring(root, 'unicode', short_empty_elements=False),
2419
'<tag>a<x></x>b<y></y>c</tag>')
2422
class ParseErrorTest(unittest.TestCase):
2423
def test_subclass(self):
2424
self.assertIsInstance(ET.ParseError(), SyntaxError)
2426
def _get_error(self, s):
2429
except ET.ParseError as e:
2432
def test_error_position(self):
2433
self.assertEqual(self._get_error('foo').position, (1, 0))
2434
self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
2435
self.assertEqual(self._get_error('foobar<').position, (1, 6))
2437
def test_error_code(self):
2438
import xml.parsers.expat.errors as ERRORS
2439
self.assertEqual(self._get_error('foo').code,
2440
ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
2443
class KeywordArgsTest(unittest.TestCase):
2444
# Test various issues with keyword arguments passed to ET.Element
2445
# constructor and methods
2446
def test_issue14818(self):
2447
x = ET.XML("<a>foo</a>")
2448
self.assertEqual(x.find('a', None),
2449
x.find(path='a', namespaces=None))
2450
self.assertEqual(x.findtext('a', None, None),
2451
x.findtext(path='a', default=None, namespaces=None))
2452
self.assertEqual(x.findall('a', None),
2453
x.findall(path='a', namespaces=None))
2454
self.assertEqual(list(x.iterfind('a', None)),
2455
list(x.iterfind(path='a', namespaces=None)))
2457
self.assertEqual(ET.Element('a').attrib, {})
2459
ET.Element('a', dict(href="#", id="foo")),
2460
ET.Element('a', attrib=dict(href="#", id="foo")),
2461
ET.Element('a', dict(href="#"), id="foo"),
2462
ET.Element('a', href="#", id="foo"),
2463
ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
2466
self.assertEqual(e.tag, 'a')
2467
self.assertEqual(e.attrib, dict(href="#", id="foo"))
2469
e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
2470
self.assertEqual(e2.attrib['key1'], 'value1')
2472
with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
2473
ET.Element('a', "I'm not a dict")
2474
with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
2475
ET.Element('a', attrib="I'm not a dict")
2477
# --------------------------------------------------------------------
2479
class NoAcceleratorTest(unittest.TestCase):
2482
raise unittest.SkipTest('only for the Python version')
2484
# Test that the C accelerator was not imported for pyET
2485
def test_correct_import_pyET(self):
2486
# The type of methods defined in Python code is types.FunctionType,
2487
# while the type of methods defined inside _elementtree is
2488
# <class 'wrapper_descriptor'>
2489
self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
2490
self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
2492
# --------------------------------------------------------------------
2495
class CleanContext(object):
2496
"""Provide default namespace mapping and path cache."""
2497
checkwarnings = None
2499
def __init__(self, quiet=False):
2500
if sys.flags.optimize >= 2:
2501
# under -OO, doctests cannot be run and therefore not all warnings
2505
# Search behaviour is broken if search path starts with "/".
2506
("This search is broken in 1.3 and earlier, and will be fixed "
2507
"in a future version. If you rely on the current behaviour, "
2508
"change it to '.+'", FutureWarning),
2509
# Element.getchildren() and Element.getiterator() are deprecated.
2510
("This method will be removed in future versions. "
2511
"Use .+ instead.", DeprecationWarning),
2512
("This method will be removed in future versions. "
2513
"Use .+ instead.", PendingDeprecationWarning))
2514
self.checkwarnings = support.check_warnings(*deprecations, quiet=quiet)
2516
def __enter__(self):
2517
from xml.etree import ElementPath
2518
self._nsmap = ET.register_namespace._namespace_map
2519
# Copy the default namespace mapping
2520
self._nsmap_copy = self._nsmap.copy()
2521
# Copy the path cache (should be empty)
2522
self._path_cache = ElementPath._cache
2523
ElementPath._cache = self._path_cache.copy()
2524
self.checkwarnings.__enter__()
2526
def __exit__(self, *args):
2527
from xml.etree import ElementPath
2528
# Restore mapping and path cache
2530
self._nsmap.update(self._nsmap_copy)
2531
ElementPath._cache = self._path_cache
2532
self.checkwarnings.__exit__(*args)
2535
def test_main(module=None):
2536
# When invoked without a module, runs the Python ET tests by loading pyET.
2537
# Otherwise, uses the given module as the ET.
2539
pyET = import_fresh_module('xml.etree.ElementTree',
2540
blocked=['_elementtree'])
2555
ElementTreeTypeTest,
2564
# These tests will only run for the pure-Python version that doesn't import
2565
# _elementtree. We can't use skipUnless here, because pyET is filled in only
2566
# after the module is loaded.
2568
test_classes.extend([
2573
# XXX the C module should give the same warnings as the Python module
2574
with CleanContext(quiet=(pyET is not ET)):
2575
support.run_unittest(*test_classes)
2577
# don't interfere with subsequent tests
2581
if __name__ == '__main__':