3
# This file is part of Diamond.
5
# Diamond is free software: you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation, either version 3 of the License, or
8
# (at your option) any later version.
10
# Diamond is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with Diamond. If not, see <http://www.gnu.org/licenses/>.
26
from lxml import etree
34
##########################
36
##########################
39
def __init__(self, schemafile):
40
p = etree.XMLParser(remove_comments=True)
41
self.tree = etree.parse(cStringIO.StringIO(preprocess.preprocess(schemafile)), p)
43
self.callbacks = {'element': self.cb_element,
44
'documentation': self.cb_documentation,
45
'value': self.cb_value,
46
'attribute': self.cb_attribute,
48
'optional': self.cb_optional,
49
'zeroOrMore': self.cb_zeroormore,
50
'oneOrMore': self.cb_oneormore,
51
'choice': self.cb_choice,
52
'empty': self.cb_empty,
54
'group': self.cb_group,
55
'interleave': self.cb_group,
58
'anyName' : self.cb_anyname,
59
'nsName' : self.cb_nsname,
60
'except' : self.cb_except,
61
'ignore' : self.cb_ignore,
62
'notAllowed' : self.cb_notallowed}
71
def element_children(self, element):
73
Return a list of the children of the supplied element, following references
78
for child1 in element.iterchildren(tag=etree.Element):
79
if self.tag(child1) == "ref":
80
if not "name" in child1.keys():
81
debug.deprint("Warning: Encountered reference with no name")
84
name = child1.get("name")
86
xpath = self.tree.xpath('/t:grammar/t:define[@name="' + name + '"]',
87
namespaces={'t': 'http://relaxng.org/ns/structure/1.0'})
90
debug.deprint("Warning: Schema reference %s not found" % name, 0)
93
for child2 in self.element_children(xpath[0]):
94
children.append(child2)
96
children.append(child1)
100
def choice_children(self, children):
102
Collapse all choices within a choice into a single list of (non-choice) children
106
for child in children:
107
if self.tag(child) == "choice":
108
out_children = out_children + self.choice_children(self.element_children(child))
110
out_children.append(child)
114
def valid_children(self, eid):
115
if isinstance(eid, tree.Tree):
120
node = self.tree.xpath('/t:grammar/t:start', namespaces={'t': 'http://relaxng.org/ns/structure/1.0'})[0]
122
debug.deprint("No valid start node found. Are you using a library Relax-NG file like spud_base.rng?", 0)
125
xpath = self.tree.xpath(eid)
127
debug.deprint("Warning: no element with XPath %s" % eid)
133
for child in self.element_children(node):
134
self.append(results, self.to_tree(child))
136
if eid == ":start" and len(results) != 1:
137
debug.deprint("Error: there must be exactly one root element in an XML document, but found:", 0)
138
for result in results:
139
debug.deprint(" %s" % result.name, 0)
144
def to_tree(self, element):
145
tag = self.tag(element)
146
f = self.callbacks[tag]
148
x = f(element, facts)
151
#############################################
152
# Beginning of schema processing functions. #
153
#############################################
155
def cb_name(self, element, facts):
159
def cb_element(self, element, facts):
161
if "cardinality" in facts:
162
newfacts["cardinality"] = facts["cardinality"]
164
if "name" in element.keys():
165
newfacts["name"] = element.get("name")
167
debug.deprint("Warning: Encountered element with no name")
169
newfacts['schemaname'] = self.tree.getpath(element)
171
for child in self.element_children(element):
172
tag = self.tag(child)
174
if tag not in ['element', 'optional', 'zeroOrMore', 'oneOrMore', 'ignore']:
175
f = self.callbacks[tag]
176
x = f(child, newfacts)
179
d = newfacts["datatype"]
180
if isinstance(d, tuple):
188
newfacts["datatype"] = d
190
newfacts["datatype"] = None
191
elif len(d) == 1 and isinstance(d[0], plist.List):
192
newfacts["datatype"] = d[0]
197
if isinstance(x, str):
203
if "name" in element.keys():
204
debug.deprint("Warning: Element %s has multiple datatypes - using first one" % newfacts["name"])
206
debug.deprint("Warning: Unnamed element has multiple datatypes - using first one")
209
if len(l_values) == 0:
210
newfacts["datatype"] = l_data[0]
212
newfacts["datatype"] = tuple([tuple(l_values)] + l_data[0])
216
return tree.Tree(**newfacts)
218
def cb_documentation(self, element, facts):
219
facts['doc'] = element.text
221
def cb_value(self, element, facts):
222
if "datatype" in facts:
223
l = list(facts["datatype"])
227
l.append(element.text)
228
facts["datatype"] = tuple(l)
230
def cb_attribute(self, element, facts):
231
if not "name" in element.keys():
232
debug.deprint("Warning: Encountered attribute with no name")
236
name = element.get("name")
238
for child in self.element_children(element):
239
tag = self.tag(child)
240
f = self.callbacks[tag]
241
x = f(child, newfacts)
243
if "attrs" not in facts:
247
datatype = newfacts["datatype"]
249
debug.deprint("Warning: Encountered attribute with no datatype")
253
if isinstance(datatype, tuple):
257
new_datatype.append(x)
258
datatype = new_datatype
259
if len(datatype) == 0:
261
elif len(datatype) == 1:
262
datatype = datatype[0]
263
if isinstance(datatype, str):
272
if isinstance(x, str):
278
debug.deprint("Warning: Attribute %s has multiple datatypes - using first one" % name)
279
if len(l_values) == 0:
282
datatype = tuple([tuple(l_values)] + l_data[0])
284
datatype = tuple(l_values)
286
facts["attrs"][name] = (datatype, curval)
288
def cb_data(self, element, facts):
289
if "datatype" in facts:
290
if isinstance(facts["datatype"], tuple):
291
l = list(facts["datatype"])
293
l = [facts["datatype"]]
297
mapping = {'integer': int,
308
datatype_name = element.get("type")
309
l.append(mapping[datatype_name])
311
facts["datatype"] = l[0]
313
facts["datatype"] = tuple(l)
315
def cb_optional(self, element, facts):
316
facts["cardinality"] = '?'
318
for child in self.element_children(element):
319
tag = self.tag(child)
320
f = self.callbacks[tag]
321
self.append(r, f(child, facts))
325
def cb_zeroormore(self, element, facts):
326
facts["cardinality"] = '*'
328
for child in self.element_children(element):
329
tag = self.tag(child)
330
f = self.callbacks[tag]
331
self.append(r, f(child, facts))
335
def cb_oneormore(self, element, facts):
336
facts["cardinality"] = '+'
338
for child in self.element_children(element):
339
tag = self.tag(child)
340
f = self.callbacks[tag]
341
self.append(r, f(child, facts))
345
def cb_choice(self, element, facts):
346
# there are really two cases here.
347
# choice between values of elements,
348
# and choice between elements
350
tagnames = [self.tag(child) for child in element]
352
if "value" in tagnames:
353
for child in self.element_children(element):
354
tag = self.tag(child)
355
f = self.callbacks[tag]
359
if "schemaname" in facts:
363
children = self.choice_children(self.element_children(element))
365
# bloody simplified RNG
366
if len(children) == 2:
367
empty = [x for x in children if self.tag(x) == "empty"]
368
nonempty = [x for x in children if self.tag(x) != "empty"]
370
tag = self.tag(nonempty[0])
371
if tag == "oneOrMore":
372
return self.cb_oneormore(element, facts)
374
f = self.callbacks[tag]
375
return f(element, facts)
377
for child in children:
379
tag = self.tag(child)
380
f = self.callbacks[tag]
381
self.append(r, f(child, newfacts))
383
return choice.Choice(r, **facts)
385
def cb_empty(self, element, facts):
388
def cb_list(self, element, facts):
390
for child in self.element_children(element):
391
tag = self.tag(child)
392
f = self.callbacks[tag]
395
d = newfacts["datatype"]
397
c = newfacts["cardinality"]
400
if isinstance(d, tuple):
405
if "datatype" in facts:
406
e = list(facts["datatype"])
411
facts["datatype"] = tuple(e)
413
def cb_group(self, element, facts):
415
for child in self.element_children(element):
417
tag = self.tag(child)
418
f = self.callbacks[tag]
419
self.append(results, f(child, newfacts))
423
def cb_text(self, element, facts):
424
if "datatype" in facts:
425
if isinstance(facts["datatype"], tuple):
426
l = list(facts["datatype"])
428
l = [facts["datatype"]]
434
facts["datatype"] = l[0]
436
facts["datatype"] = tuple(l)
438
def cb_anyname(self, element, facts):
439
debug.deprint("anyName element found. Yet to handle.", 0)
442
def cb_nsname(self, element, facts):
443
debug.deprint("nsName element found. Yet to handle.", 0)
446
def cb_except(self, element, facts):
447
debug.deprint("except element found. Yet to handle.", 0)
450
def cb_ignore(self, element, facts):
453
def cb_notallowed(self, element, facts):
454
debug.dprint("notallowed element found. Yet to handle.", 0)
456
#######################################
457
# End of schema processing functions. #
458
#######################################
460
def tag(self, element):
461
# Ignore non-RelaxNG elements. Is this the best way to handle it?
462
namespace = element.tag.split('}')[0]
463
if namespace.find("relaxng") != -1:
464
return element.tag.split('}')[-1]
468
# append - append either a list or single element 'x' to 'r'.
469
def append(self, r, x):
473
if isinstance(x, list):
480
##########################################
481
# Beginning of XML processing functions. #
482
##########################################
484
# read takes a file handle, constructs a generic in-memory representation using the
485
# the etree API, and then converts it to a tree of Tree and Choice elements.
486
def read(self, xmlfile):
487
doc = etree.parse(xmlfile)
492
self.added_attrs = []
494
datatree = self.valid_children(":start")[0]
495
xmlnode = doc.getroot()
496
self.xml_read_merge(datatree, xmlnode)
497
self.xml_read_core(datatree, xmlnode, doc)
499
if len(self.lost_eles) != 0:
500
debug.deprint("WARNING: Lost XML elements:\n" + str(self.lost_eles))
501
if len(self.added_eles) != 0:
502
debug.deprint("WARNING: Added XML elements:\n" + str(self.added_eles))
503
if len(self.lost_attrs) != 0:
504
debug.deprint("WARNING: Lost XML attributes:\n" + str(self.lost_attrs))
505
if len(self.added_eles) != 0:
506
debug.deprint("WARNING: Added XML attributes:\n" + str(self.added_attrs))
510
def xml_read_merge(self, datatree, xmlnode):
511
# The datatree has the following set:
512
# name, schemaname, doc, cardinality, datatype, parent,
513
# attribute datatypes.
514
# the xmlnode contains the following information:
515
# attribute values, data
518
datatree.xmlnode = xmlnode
519
xmlkeys = xmlnode.keys()
521
if datatree.__class__ is tree.Tree:
523
elif datatree.__class__ is choice.Choice:
524
if "name" in xmlkeys:
525
xmlname = xmlnode.get("name")
528
possibles = [tree_choice for tree_choice in datatree.choices() if tree_choice.name == xmlnode.tag]
529
# first loop over the fixed-value names
530
for tree_choice in possibles:
531
if "name" not in tree_choice.attrs:
534
datatype = tree_choice.attrs["name"][0]
535
if datatype == 'fixed':
536
treename = tree_choice.attrs["name"][1]
537
if treename == xmlname:
539
datatree.set_active_choice_by_ref(tree_choice)
542
# if we haven't found it, look for a generic name
543
if have_found is False:
544
for tree_choice in possibles:
545
if "name" not in tree_choice.attrs:
548
datatype = tree_choice.attrs["name"][0]
549
if datatype != 'fixed':
551
datatree.set_active_choice_by_ref(tree_choice)
555
datatree.set_active_choice_by_name(xmlnode.tag)
557
to_set = datatree.get_current_tree()
559
# catch any lost XML attributes
561
if key not in to_set.attrs.keys():
562
self.lost_attrs += [to_set.name + '/' + key]
565
for key in to_set.attrs.keys():
568
to_set.set_attr(key, xmlnode.get(key))
572
self.added_attrs += [to_set.name + '/' + key]
574
# Get the text value (the node's data)
575
if xmlnode.text is not None:
577
text=xmlnode.text.strip()
579
to_set.set_data(text)
585
for child in xmlnode.iterchildren(tag=etree.Element):
586
if child.tail is not None:
588
text = child.tail.strip()
590
to_set.set_data(text)
595
to_set.recompute_validity()
596
datatree.recompute_validity()
598
###########################################################################################
599
# construct the priority queue
600
# we treat compulsory nodes first, then descend through the cardinalities
601
###########################################################################################
603
def construct_priority_queue(self, schemachildren):
604
# priority_queue will store the schemachildren, in the order in which
605
# they query data from the XML
609
for schemachild in schemachildren:
610
if schemachild.cardinality == '':
611
priority_queue.append(schemachild)
614
for schemachild in schemachildren:
615
if schemachild.cardinality == '+':
616
priority_queue.append(schemachild)
619
for schemachild in schemachildren:
620
if schemachild.cardinality == '?':
621
priority_queue.append(schemachild)
624
for schemachild in schemachildren:
625
if schemachild.cardinality == '*':
626
priority_queue.append(schemachild)
628
return priority_queue
630
###########################################################################################
631
# initialise the availability data
632
# avail[name][xmlnode] records whether xmlnode is available or not
633
###########################################################################################
634
def init_avail_data(self, xmlnode, schemachildren):
638
for xml in xmlnode.iterchildren(tag=etree.Element):
641
for schemachild in schemachildren:
642
for name in schemachild.get_possible_names():
646
for xmldata in xmlnode.iterchildren(tag=name):
647
avail[name][xmldata] = True
651
###########################################################################################
652
# assign the available xml nodes to the children the schema says should be there
653
# in order of priority.
654
# xmls[schemachild.schemaname] is the list of xml nodes
655
# that schemachild should take
656
###########################################################################################
657
def assign_xml_nodes(self, priority_queue, xmlnode, avail):
660
for schemachild in priority_queue:
661
if schemachild.cardinality in ['', '?']:
662
for curtree in schemachild.choices():
665
have_fixed_name = False
666
if "name" in curtree.attrs:
667
datatype = curtree.attrs["name"][0]
668
if datatype == 'fixed':
669
have_fixed_name = True
671
if have_fixed_name is False:
672
xml = xmlnode.xpath(name)
674
xml = xmlnode.xpath(name + '[@name="%s"]' % curtree.get_attr("name"))
677
if avail[name][xmldata]:
678
avail[name][xmldata] = False
679
xmls[schemachild.schemaname] = [xmldata]
682
if schemachild.schemaname not in xmls:
683
if schemachild.cardinality == '':
684
xmls[schemachild.schemaname] = copy.deepcopy([])
686
xmls[schemachild.schemaname] = copy.deepcopy([])
687
elif schemachild.cardinality in ['*', '+']:
688
xmls[schemachild.schemaname] = copy.deepcopy([])
689
for curtree in schemachild.choices():
692
have_fixed_name = False
693
if "name" in curtree.attrs:
694
datatype = curtree.attrs["name"][0]
695
if datatype == 'fixed':
696
have_fixed_name = True
698
if have_fixed_name is False:
699
xml = xmlnode.xpath(name)
701
xml = xmlnode.xpath(name + '[@name="%s"]' % curtree.get_attr("name"))
704
if avail[name][xmldata]:
705
avail[name][xmldata] = False
706
xmls[schemachild.schemaname].append(xmldata)
710
###########################################################################################
711
# now that we have assigned the xml nodes, loop through and grab them
712
# stuff the tree data in bins[schemachild.schemaname]
713
###########################################################################################
714
def assign_xml_children(self, priority_queue, xmlnode, xmls, schemachildren, used, rootdoc):
715
# bins[schemachild.schemaname] will store the data associated with schemachild
718
for schemachild in schemachildren:
719
bins[schemachild.schemaname] = []
721
for schemachild in priority_queue:
722
if schemachild.cardinality in ['', '?']:
723
child = schemachild.copy()
726
if len(xmls[schemachild.schemaname]) == 1:
727
xmldata = xmls[schemachild.schemaname][0]
729
self.xml_read_merge(child, xmldata)
731
# Was this part of the uncompressed XML file or part of a hidden comment?
732
if xmldata.getroottree().getroot() != rootdoc.getroot():
733
self.xml_read_core(child.get_current_tree(), xmldata, xmldata.getroottree())
735
child.recurse = False
737
if schemachild.cardinality == '?':
740
bins[schemachild.schemaname] = [child]
742
elif schemachild.cardinality in ['*', '+']:
743
for xmldata in xmls[schemachild.schemaname]:
744
child = schemachild.copy()
747
self.xml_read_merge(child, xmldata)
748
bins[schemachild.schemaname].append(child)
750
if schemachild.cardinality == '+':
751
# check that we have at least one.
752
count = len(bins[schemachild.schemaname])
754
child = schemachild.copy()
757
bins[schemachild.schemaname] = [child]
759
if schemachild.cardinality in ['*', '+']:
760
# add an inactive instance
761
child = schemachild.copy()
764
bins[schemachild.schemaname].append(child)
766
# search for neglected choices
767
if schemachild.__class__ is choice.Choice and schemachild.cardinality in ['', '?']:
768
for child in bins[schemachild.schemaname]:
770
# Does the child have a valid XML node attached?
771
if not hasattr(child, "xmlnode"): continue
772
if child.xmlnode is None: continue
774
current_choice = child.get_current_tree()
775
for tree_choice in child.l:
776
if tree_choice is current_choice: continue
780
# Loop over lost nodes, and store their XML so the user can be notified later.
781
def check_unused_nodes(self, used):
783
buf = cStringIO.StringIO()
784
buf.write(etree.tostring(xml, pretty_print = True))
791
if used[xml] is False:
793
self.lost_eles += [s]
795
# Append the children to the datatree in the order the schema presents them.
796
# Order matters here.
797
def append_children(self, schemachildren, datatree, bins):
798
for schemachild in schemachildren:
799
for child in bins[schemachild.schemaname]:
800
child.set_parent(datatree)
801
datatree.children.append(child)
803
# Recurse down the in-memory XML tree, reading elements and merging their
804
# information into the in-memory Tree structure.
805
def read_children(self, datatree, rootdoc):
806
for schild in datatree.children:
808
if hasattr(schild, "recurse"):
809
if schild.recurse is False:
812
if schild.__class__ is choice.Choice:
813
child = schild.get_current_tree()
817
if schild.active is False:
820
child.children = copy.copy([])
821
self.xml_read_core(child, schild.xmlnode, rootdoc)
823
# xml_read_core recurses throughout the tree, calling xml_read_merge on the current node "xmlnode" and
824
# and reading information about the node's children.
825
def xml_read_core(self, datatree, xmlnode, rootdoc):
826
"""This is the part that recurses, you see."""
828
assert len(datatree.children) == 0
830
# no information from XML to be had :-/
832
self.added_eles.append(self.readable_name(datatree))
833
if datatree.active: datatree.add_children(self)
836
schemachildren = self.valid_children(datatree)
838
priority_queue = self.construct_priority_queue(schemachildren)
839
(used, avail) = self.init_avail_data(xmlnode, schemachildren)
840
xmls = self.assign_xml_nodes(priority_queue, xmlnode, avail)
841
bins = self.assign_xml_children(priority_queue, xmlnode, xmls, schemachildren, used, rootdoc)
842
self.append_children(schemachildren, datatree, bins)
843
self.check_unused_nodes(used)
844
self.read_children(datatree, rootdoc)
846
datatree.recompute_validity()
848
def read_errors(self):
849
return self.lost_eles, self.added_eles, self.lost_attrs, self.added_attrs
851
def readable_name(self, datatree):
854
while node is not None:
855
output = node.name + '/' + output
857
return '/' + output[:-1]