~ubuntu-branches/ubuntu/karmic/calibre/karmic

« back to all changes in this revision

Viewing changes to src/calibre/ebooks/oeb/transforms/structure.py

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-07-30 12:49:41 UTC
  • mfrom: (1.3.2 upstream)
  • Revision ID: james.westby@ubuntu.com-20090730124941-qjdsmri25zt8zocn
Tags: 0.6.3+dfsg-0ubuntu1
* New upstream release. Please see http://calibre.kovidgoyal.net/new_in_6/
  for the list of new features and changes.
* remove_postinstall.patch: Update for new version.
* build_debug.patch: Does not apply any more, disable for now. Might not be
  necessary any more.
* debian/copyright: Fix reference to versionless GPL.
* debian/rules: Drop obsolete dh_desktop call.
* debian/rules: Add workaround for weird Python 2.6 setuptools behaviour of
  putting compiled .so files into src/calibre/plugins/calibre/plugins
  instead of src/calibre/plugins.
* debian/rules: Drop hal fdi moving, new upstream version does not use hal
  any more. Drop hal dependency, too.
* debian/rules: Install udev rules into /lib/udev/rules.d.
* Add debian/calibre.preinst: Remove unmodified
  /etc/udev/rules.d/95-calibre.rules on upgrade.
* debian/control: Bump Python dependencies to 2.6, since upstream needs
  it now.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/env python
 
2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 
3
from __future__ import with_statement
 
4
 
 
5
__license__   = 'GPL v3'
 
6
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 
7
__docformat__ = 'restructuredtext en'
 
8
 
 
9
import re
 
10
 
 
11
from lxml import etree
 
12
from urlparse import urlparse
 
13
 
 
14
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
 
15
from calibre.ebooks import ConversionError
 
16
 
 
17
def XPath(x):
 
18
    try:
 
19
        return etree.XPath(x, namespaces=XPNSMAP)
 
20
    except etree.XPathSyntaxError:
 
21
        raise ConversionError(
 
22
        'The syntax of the XPath expression %s is invalid.' % repr(x))
 
23
 
 
24
class DetectStructure(object):
 
25
 
 
26
    def __call__(self, oeb, opts):
 
27
        self.log = oeb.log
 
28
        self.oeb = oeb
 
29
        self.opts = opts
 
30
        self.log('Detecting structure...')
 
31
 
 
32
        self.detect_chapters()
 
33
        if self.oeb.auto_generated_toc or opts.use_auto_toc:
 
34
            orig_toc = self.oeb.toc
 
35
            self.oeb.toc = TOC()
 
36
            self.create_level_based_toc()
 
37
            if self.oeb.toc.count() < 1:
 
38
                if not opts.no_chapters_in_toc and self.detected_chapters:
 
39
                    self.create_toc_from_chapters()
 
40
                if self.oeb.toc.count() < opts.toc_threshold:
 
41
                    self.create_toc_from_links()
 
42
            if self.oeb.toc.count() < 2 and orig_toc.count() > 2:
 
43
                self.oeb.toc = orig_toc
 
44
            else:
 
45
                self.oeb.auto_generated_toc = True
 
46
                self.log('Auto generated TOC with %d entries.' %
 
47
                        self.oeb.toc.count())
 
48
 
 
49
        if opts.toc_filter is not None:
 
50
            regexp = re.compile(opts.toc_filter)
 
51
            for node in self.oeb.toc.iter():
 
52
                if not node.title or regexp.search(node.title) is not None:
 
53
                    self.oeb.toc.remove(node)
 
54
 
 
55
        if opts.page_breaks_before is not None:
 
56
            pb_xpath = XPath(opts.page_breaks_before)
 
57
            for item in oeb.spine:
 
58
                for elem in pb_xpath(item.data):
 
59
                    style = elem.get('style', '')
 
60
                    if style:
 
61
                        style += '; '
 
62
                    elem.set('style', style+'page-break-before:always')
 
63
 
 
64
        for node in self.oeb.toc.iter():
 
65
            if not node.title or not node.title.strip():
 
66
                node.title = _('Unnamed')
 
67
 
 
68
    def detect_chapters(self):
 
69
        self.detected_chapters = []
 
70
        if self.opts.chapter:
 
71
            chapter_xpath = XPath(self.opts.chapter)
 
72
            for item in self.oeb.spine:
 
73
                for x in chapter_xpath(item.data):
 
74
                    self.detected_chapters.append((item, x))
 
75
 
 
76
            chapter_mark = self.opts.chapter_mark
 
77
            page_break_before = 'display: block; page-break-before: always'
 
78
            page_break_after = 'display: block; page-break-after: always'
 
79
            for item, elem in self.detected_chapters:
 
80
                text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
 
81
                self.log('\tDetected chapter:', text[:50])
 
82
                if chapter_mark == 'none':
 
83
                    continue
 
84
                elif chapter_mark == 'rule':
 
85
                    mark = etree.Element(XHTML('hr'))
 
86
                elif chapter_mark == 'pagebreak':
 
87
                    mark = etree.Element(XHTML('div'), style=page_break_after)
 
88
                else: # chapter_mark == 'both':
 
89
                    mark = etree.Element(XHTML('hr'), style=page_break_before)
 
90
                elem.addprevious(mark)
 
91
 
 
92
    def create_level_based_toc(self):
 
93
        if self.opts.level1_toc is None:
 
94
            return
 
95
        for item in self.oeb.spine:
 
96
            self.add_leveled_toc_items(item)
 
97
 
 
98
    def create_toc_from_chapters(self):
 
99
        counter = self.oeb.toc.next_play_order()
 
100
        for item, elem in self.detected_chapters:
 
101
            text, href = self.elem_to_link(item, elem, counter)
 
102
            self.oeb.toc.add(text, href, play_order=counter)
 
103
            counter += 1
 
104
 
 
105
    def create_toc_from_links(self):
 
106
        for item in self.oeb.spine:
 
107
            for a in XPath('//h:a[@href]')(item.data):
 
108
                href = a.get('href')
 
109
                purl = urlparse(href)
 
110
                if not purl[0] or purl[0] == 'file':
 
111
                    href, frag = purl.path, purl.fragment
 
112
                    href = item.abshref(href)
 
113
                    if frag:
 
114
                        href = '#'.join((href, frag))
 
115
                    if not self.oeb.toc.has_href(href):
 
116
                        text = u' '.join([t.strip() for t in \
 
117
                                a.xpath('descendant::text()')])
 
118
                        text = text[:100].strip()
 
119
                        if not self.oeb.toc.has_text(text):
 
120
                            self.oeb.toc.add(text, href,
 
121
                                play_order=self.oeb.toc.next_play_order())
 
122
 
 
123
 
 
124
 
 
125
    def elem_to_link(self, item, elem, counter):
 
126
        text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
 
127
        text = text[:100].strip()
 
128
        id = elem.get('id', 'calibre_toc_%d'%counter)
 
129
        elem.set('id', id)
 
130
        href = '#'.join((item.href, id))
 
131
        return text, href
 
132
 
 
133
 
 
134
    def add_leveled_toc_items(self, item):
 
135
        level1 = XPath(self.opts.level1_toc)(item.data)
 
136
        level1_order = []
 
137
        document = item
 
138
 
 
139
        counter = 1
 
140
        if level1:
 
141
            added = {}
 
142
            for elem in level1:
 
143
                text, _href = self.elem_to_link(document, elem, counter)
 
144
                counter += 1
 
145
                if text:
 
146
                    node = self.oeb.toc.add(text, _href,
 
147
                            play_order=self.oeb.toc.next_play_order())
 
148
                    level1_order.append(node)
 
149
                    added[elem] = node
 
150
                    #node.add(_('Top'), _href)
 
151
            if self.opts.level2_toc is not None:
 
152
                added2 = {}
 
153
                level2 = list(XPath(self.opts.level2_toc)(document.data))
 
154
                for elem in level2:
 
155
                    level1 = None
 
156
                    for item in document.data.iterdescendants():
 
157
                        if item in added.keys():
 
158
                            level1 = added[item]
 
159
                        elif item == elem and level1 is not None:
 
160
                            text, _href = self.elem_to_link(document, elem, counter)
 
161
                            counter += 1
 
162
                            if text:
 
163
                                added2[elem] = level1.add(text, _href,
 
164
                                    play_order=self.oeb.toc.next_play_order())
 
165
                if self.opts.level3_toc is not None:
 
166
                    level3 = list(XPath(self.opts.level3_toc)(document.data))
 
167
                    for elem in level3:
 
168
                        level2 = None
 
169
                        for item in document.data.iterdescendants():
 
170
                            if item in added2.keys():
 
171
                                level2 = added2[item]
 
172
                            elif item == elem and level2 is not None:
 
173
                                text, _href = \
 
174
                                        self.elem_to_link(document, elem, counter)
 
175
                                counter += 1
 
176
                                if text:
 
177
                                    level2.add(text, _href,
 
178
                                    play_order=self.oeb.toc.next_play_order())
 
179