~ubuntu-branches/debian/sid/calibre/sid

« back to all changes in this revision

Viewing changes to src/calibre/ebooks/oeb/polish/split.py

  • Committer: Package Import Robot
  • Author(s): Martin Pitt
  • Date: 2014-02-27 07:48:06 UTC
  • mto: This revision was merged to the branch mainline in revision 74.
  • Revision ID: package-import@ubuntu.com-20140227074806-64wdebb3ptosxhhx
Tags: upstream-1.25.0+dfsg
ImportĀ upstreamĀ versionĀ 1.25.0+dfsg

Show diffs side-by-side

added added

removed removed

Lines of Context:
11
11
from urlparse import urlparse
12
12
 
13
13
from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML, OEB_DOCS
 
14
from calibre.ebooks.oeb.polish.errors import MalformedMarkup
14
15
from calibre.ebooks.oeb.polish.toc import node_from_loc
15
16
from calibre.ebooks.oeb.polish.replace import LinkRebaser
16
17
 
162
163
            self.replaced = True
163
164
        return url
164
165
 
165
 
def split(container, name, loc_or_xpath, before=True):
 
166
def split(container, name, loc_or_xpath, before=True, totals=None):
166
167
    ''' Split the file specified by name at the position specified by loc_or_xpath. '''
167
168
 
168
169
    root = container.parsed(name)
169
170
    if isinstance(loc_or_xpath, type('')):
170
171
        split_point = root.xpath(loc_or_xpath)[0]
171
172
    else:
172
 
        split_point = node_from_loc(root, loc_or_xpath)
 
173
        try:
 
174
            split_point = node_from_loc(root, loc_or_xpath, totals=totals)
 
175
        except MalformedMarkup:
 
176
            # The webkit HTML parser and the container parser have yielded
 
177
            # different node counts, this can happen if the file is valid XML
 
178
            # but contains constructs like nested <p> tags. So force parse it
 
179
            # with the HTML 5 parser and try again.
 
180
            raw = container.raw_data(name)
 
181
            root = container.parse_xhtml(raw, fname=name, force_html5_parse=True)
 
182
            try:
 
183
                split_point = node_from_loc(root, loc_or_xpath, totals=totals)
 
184
            except MalformedMarkup:
 
185
                raise MalformedMarkup(_('The file %s has malformed markup. Try running the Fix HTML tool'
 
186
                                        ' before splitting') % name)
 
187
            container.replace(name, root)
173
188
    if in_table(split_point):
174
189
        raise AbortError('Cannot split inside tables')
175
190
    if split_point.tag.endswith('}body'):