~ubuntu-branches/debian/sid/calibre/sid

« back to all changes in this revision

Viewing changes to src/calibre/ebooks/oeb/polish/parsing.py

  • Committer: Package Import Robot
  • Author(s): Martin Pitt
  • Date: 2014-02-27 07:48:06 UTC
  • mto: This revision was merged to the branch mainline in revision 74.
  • Revision ID: package-import@ubuntu.com-20140227074806-64wdebb3ptosxhhx
Tags: upstream-1.25.0+dfsg
ImportĀ upstreamĀ versionĀ 1.25.0+dfsg

Show diffs side-by-side

added added

removed removed

Lines of Context:
275
275
        for k, v in elem.items():  # Only elem.items() preserves attrib order
276
276
            nelem.set(k, v)
277
277
        for (prefix, name), v in namespaced_attribs.iteritems():
278
 
            ns = nsmap.get('prefix', None)
 
278
            ns = nsmap.get(prefix, None)
279
279
            if ns is not None:
280
280
                try:
281
281
                    nelem.set('{%s}%s' % (ns, name), v)
331
331
        self.openElements.append(element)
332
332
        self.document.appendChild(element)
333
333
 
 
334
    def promote_elem(self, elem, tag_name):
 
335
        ' Add the paraphernalia to elem that the html5lib infrastructure needs '
 
336
        self.proxy_cache.append(elem)
 
337
        elem.name = tag_name
 
338
        elem.namespace = elem.nsmap[elem.prefix]
 
339
        elem.nameTuple = (elem.nsmap[elem.prefix], elem.name)
 
340
 
334
341
    def createElement(self, token, nsmap=None):
335
342
        """Create an element but don't insert it anywhere"""
336
343
        nsmap = nsmap or {}
346
353
 
347
354
        # Keep a reference to elem so that lxml does not delete and re-create
348
355
        # it, losing the name related attributes
349
 
        self.proxy_cache.append(elem)
350
 
        elem.name = token_name
351
 
        elem.namespace = elem.nsmap[elem.prefix]
352
 
        elem.nameTuple = (elem.nsmap[elem.prefix], elem.name)
 
356
        self.promote_elem(elem, token_name)
353
357
        position = token.get('position', None)
354
358
        if position is not None:
355
359
            # Unfortunately, libxml2 can only store line numbers upto 65535
388
392
        self.openElements.append(element)
389
393
        return element
390
394
 
 
395
    def clone_node(self, elem, nsmap_update):
 
396
        assert len(elem) == 0
 
397
        nsmap = elem.nsmap.copy()
 
398
        nsmap.update(nsmap_update)
 
399
        nelem = self.lxml_context.makeelement(elem.tag, nsmap=nsmap)
 
400
        self.promote_elem(nelem, elem.tag.rpartition('}')[2])
 
401
        nelem.sourceline = elem.sourceline
 
402
        for k, v in elem.items():
 
403
            nelem.set(k, v)
 
404
        nelem.text, nelem.tail = elem.text, elem.tail
 
405
        return nelem
 
406
 
391
407
    def apply_html_attributes(self, attrs):
392
408
        if not attrs:
393
409
            return
403
419
                        continue
404
420
                    if k == 'xml:lang' and 'lang' not in html.attrib:
405
421
                        k = 'lang'
406
 
                    html.set(to_xml_name(k), v)
 
422
                        html.set(k, v)
 
423
                        continue
 
424
                    if k.startswith('xmlns:') and v not in known_namespaces and v != namespaces['html'] and len(html) == 0:
 
425
                        # We have a namespace declaration, the only way to add
 
426
                        # it to the existing html node is to replace it.
 
427
                        prefix = k[len('xmlns:'):]
 
428
                        if not prefix:
 
429
                            continue
 
430
                        self.openElements[0] = html = self.clone_node(html, {prefix:v})
 
431
                        self.document.appendChild(html)
 
432
                    else:
 
433
                        html.set(to_xml_name(k), v)
407
434
 
408
435
    def apply_body_attributes(self, attrs):
409
436
        if not attrs:
609
636
        raw = prefix + suffix
610
637
    return raw
611
638
 
612
 
def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=None, replace_entities=True):
 
639
def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=None, replace_entities=True, force_html5_parse=False):
613
640
    if isinstance(raw, bytes):
614
641
        raw = xml_to_unicode(raw)[0] if decoder is None else decoder(raw)
615
642
    if replace_entities:
626
653
        break
627
654
 
628
655
    raw = strip_encoding_declarations(raw)
 
656
    if force_html5_parse:
 
657
        return parse_html5(raw, log=log, line_numbers=line_numbers, linenumber_attribute=linenumber_attribute, replace_entities=False, fix_newlines=False)
629
658
    try:
630
659
        parser = XMLParser(no_network=True)
631
660
        ans = fromstring(raw, parser=parser)