~ubuntu-branches/ubuntu/karmic/calibre/karmic

« back to all changes in this revision

Viewing changes to src/calibre/ebooks/lrf/lrs/convert_from.py

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-07-30 12:49:41 UTC
  • mfrom: (1.3.2 upstream)
  • Revision ID: james.westby@ubuntu.com-20090730124941-qjdsmri25zt8zocn
Tags: 0.6.3+dfsg-0ubuntu1
* New upstream release. Please see http://calibre.kovidgoyal.net/new_in_6/
  for the list of new features and changes.
* remove_postinstall.patch: Update for new version.
* build_debug.patch: Does not apply any more, disable for now. Might not be
  necessary any more.
* debian/copyright: Fix reference to versionless GPL.
* debian/rules: Drop obsolete dh_desktop call.
* debian/rules: Add workaround for weird Python 2.6 setuptools behaviour of
  putting compiled .so files into src/calibre/plugins/calibre/plugins
  instead of src/calibre/plugins.
* debian/rules: Drop hal fdi moving, new upstream version does not use hal
  any more. Drop hal dependency, too.
* debian/rules: Install udev rules into /lib/udev/rules.d.
* Add debian/calibre.preinst: Remove unmodified
  /etc/udev/rules.d/95-calibre.rules on upgrade.
* debian/control: Bump Python dependencies to 2.6, since upstream needs
  it now.

Show diffs side-by-side

added added

removed removed

Lines of Context:
18
18
from calibre.ebooks.chardet import xml_to_unicode
19
19
 
20
20
class LrsParser(object):
21
 
    
22
 
    SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space', 
23
 
                         'PutObj', 'RuledLine', 
 
21
 
 
22
    SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space',
 
23
                         'PutObj', 'RuledLine',
24
24
                         'Plot', 'SetDefault', 'BookSetting', 'RegistFont',
25
25
                         'PageStyle', 'TextStyle', 'BlockStyle', 'JumpTo',
26
26
                         'ImageStream', 'Image']]
27
 
    
 
27
 
28
28
    def __init__(self, stream, logger):
29
29
        self.logger = logger
30
30
        src = stream.read()
31
31
        self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0],
32
 
                       convertEntities=BeautifulStoneSoup.XML_ENTITIES, 
 
32
                       convertEntities=BeautifulStoneSoup.XML_ENTITIES,
33
33
                       selfClosingTags=self.SELF_CLOSING_TAGS)
34
34
        self.objects = {}
35
35
        for obj in self.soup.findAll(objid=True):
36
36
            self.objects[obj['objid']] = obj
37
 
        
 
37
 
38
38
        self.parsed_objects = {}
39
39
        self.first_pass()
40
40
        self.second_pass()
41
41
        self.third_pass()
42
42
        self.fourth_pass()
43
43
        self.fifth_pass()
44
 
    
 
44
 
45
45
    def fifth_pass(self):
46
46
        for tag in self.soup.findAll(['canvas', 'header', 'footer']):
47
47
            canvas = self.parsed_objects[tag.get('objid')]
48
48
            for po in tag.findAll('putobj'):
49
49
                canvas.put_object(self.parsed_objects[po.get('refobj')],
50
50
                                  po.get('x1'), po.get('y1'))
51
 
            
52
 
    
 
51
 
 
52
 
53
53
    @classmethod
54
54
    def attrs_to_dict(cls, tag, exclude=('objid',)):
55
55
        result = {}
58
58
                continue
59
59
            result[str(key)] = val
60
60
        return result
61
 
    
 
61
 
62
62
    def text_tag_to_element(self, tag):
63
63
        map = {
64
64
               'span'    : Span,
77
77
        settings = self.attrs_to_dict(tag)
78
78
        settings.pop('spanstyle', '')
79
79
        return map[tag.name](**settings)
80
 
    
 
80
 
81
81
    def process_text_element(self, tag, elem):
82
82
        for item in tag.contents:
83
83
            if isinstance(item, NavigableString):
86
86
                subelem = self.text_tag_to_element(item)
87
87
                elem.append(subelem)
88
88
                self.process_text_element(item, subelem)
89
 
        
90
 
    
 
89
 
 
90
 
91
91
    def process_paragraph(self, tag):
92
92
        p = Paragraph()
93
93
        contents = [i for i in tag.contents]
104
104
                    p.append(elem)
105
105
                    self.process_text_element(item, elem)
106
106
        return p
107
 
    
 
107
 
108
108
    def process_text_block(self, tag):
109
109
        tb = self.parsed_objects[tag.get('objid')]
110
110
        for item in tag.contents:
119
119
                    elem = self.text_tag_to_element(item)
120
120
                    self.process_text_element(item, elem)
121
121
                    p.append(elem)
122
 
            
 
122
 
123
123
    def fourth_pass(self):
124
124
        for tag in self.soup.findAll('page'):
125
125
            page = self.parsed_objects[tag.get('objid')]
126
126
            self.book.append(page)
127
 
            for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', 
 
127
            for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock',
128
128
                                          'ruledline', 'simpletextblock']):
129
129
                if block_tag.name == 'ruledline':
130
130
                    page.append(RuledLine(**self.attrs_to_dict(block_tag)))
131
131
                else:
132
132
                    page.append(self.parsed_objects[block_tag.get('objid')])
133
 
                
 
133
 
134
134
        for tag in self.soup.find('objects').findAll('button'):
135
135
            jt = tag.find('jumpto')
136
136
            tb = self.parsed_objects[jt.get('refobj')]
137
137
            jb = JumpButton(tb)
138
138
            self.book.append(jb)
139
139
            self.parsed_objects[tag.get('objid')] = jb
140
 
        
 
140
 
141
141
        for tag in self.soup.findAll(['textblock', 'simpletextblock']):
142
142
            self.process_text_block(tag)
143
143
        toc = self.soup.find('toc')
145
145
            for tag in toc.findAll('toclabel'):
146
146
                label = self.tag_to_string(tag)
147
147
                self.book.addTocEntry(label, self.parsed_objects[tag.get('refobj')])
148
 
                
149
 
    
 
148
 
 
149
 
150
150
    def third_pass(self):
151
151
        map = {
152
 
               'page'       : (Page, ['pagestyle', 'evenfooterid', 
 
152
               'page'       : (Page, ['pagestyle', 'evenfooterid',
153
153
                                      'oddfooterid', 'evenheaderid', 'oddheaderid']),
154
154
               'textblock'  : (TextBlock, ['textstyle', 'blockstyle']),
155
155
               'simpletextblock'  : (TextBlock, ['textstyle', 'blockstyle']),
167
167
                settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel'])
168
168
                for a in ('pagestyle', 'blockstyle', 'textstyle'):
169
169
                    label = tag.get(a, False)
170
 
                    if label:
 
170
                    if label and \
 
171
                        (label in self._style_labels or label in self.parsed_objects):
171
172
                        _obj = self.parsed_objects[label] if \
172
173
                            self.parsed_objects.has_key(label) else \
173
174
                            self._style_labels[label]
181
182
                if tag.has_key('canvaswidth'):
182
183
                    args += [tag.get('canvaswidth'), tag.get('canvasheight')]
183
184
                self.parsed_objects[id] = map[tag.name][0](*args, **settings)
184
 
                
185
 
        
186
 
    
 
185
 
 
186
 
 
187
 
187
188
    def second_pass(self):
188
189
        map = {
189
190
               'pagestyle'  : (PageStyle, ['stylelabel', 'evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid']),
207
208
                    self._style_labels[x] = self.parsed_objects[id]
208
209
                if tag.name == 'registfont':
209
210
                    self.book.append(self.parsed_objects[id])
210
 
                    
211
 
        
 
211
 
 
212
 
212
213
    @classmethod
213
214
    def tag_to_string(cls, tag):
214
215
        '''
226
227
                res = cls.tag_to_string(item)
227
228
                if res:
228
229
                    strings.append(res)
229
 
        return u''.join(strings)     
230
 
    
 
230
        return u''.join(strings)
 
231
 
231
232
    def first_pass(self):
232
233
        info = self.soup.find('bbebxylog').find('bookinformation').find('info')
233
234
        bookinfo = info.find('bookinfo')
234
235
        docinfo  = info.find('docinfo')
235
 
        
 
236
 
236
237
        def me(base, tagname):
237
238
            tag = base.find(tagname.lower())
238
239
            if tag is None:
239
240
                return ('', '', '')
240
241
            tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '')
241
242
            return tag
242
 
            
 
243
 
243
244
        title          = me(bookinfo, 'Title')
244
245
        author         = me(bookinfo, 'Author')
245
246
        publisher      = me(bookinfo, 'Publisher')
250
251
        creator        = me(docinfo, 'Creator')[0]
251
252
        producer       = me(docinfo, 'Producer')[0]
252
253
        bookid         = me(bookinfo, 'BookID')[0]
253
 
        
 
254
 
254
255
        sd = self.soup.find('setdefault')
255
256
        sd = StyleDefault(**self.attrs_to_dict(sd, ['page_tree_id', 'rubyalignandadjust']))
256
257
        bs = self.soup.find('booksetting')
257
258
        bs = BookSetting(**self.attrs_to_dict(bs, []))
258
 
        
 
259
 
259
260
        settings = {}
260
261
        thumbnail = self.soup.find('cthumbnail')
261
262
        if thumbnail is not None:
264
265
                settings['thumbnail'] = f
265
266
            else:
266
267
                print _('Could not read from thumbnail file:'), f
267
 
        
 
268
 
268
269
        self.book = Book(title=title, author=author, publisher=publisher,
269
270
                         category=category, classification=classification,
270
271
                         freetext=freetext, language=language, creator=creator,
271
272
                         producer=producer, bookid=bookid, setdefault=sd,
272
273
                         booksetting=bs, **settings)
273
 
        
 
274
 
274
275
        for hdr in self.soup.findAll(['header', 'footer']):
275
276
            elem = Header if hdr.name == 'header' else Footer
276
 
            self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr))    
277
 
        
 
277
            self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr))
 
278
 
278
279
    def render(self, file, to_lrs=False):
279
280
        if to_lrs:
280
281
            self.book.renderLrs(file, 'utf-8')
281
282
        else:
282
283
            self.book.renderLrf(file)
283
 
        
 
284
 
284
285
 
285
286
def option_parser():
286
287
    parser = OptionParser(usage=_('%prog [options] file.lrs\nCompile an LRS file into an LRF file.'))
299
300
        level = logging.DEBUG if opts.verbose else logging.INFO
300
301
        logger = logging.getLogger('lrs2lrf')
301
302
        setup_cli_handlers(logger, level)
302
 
    
 
303
 
303
304
    if len(args) != 2:
304
305
        parser.print_help()
305
306
        return 1
310
311
    if opts.verbose:
311
312
        import warnings
312
313
        warnings.defaultaction = 'error'
313
 
    
 
314
 
314
315
    logger.info('Parsing LRS file...')
315
316
    converter =  LrsParser(open(args[1], 'rb'), logger)
316
317
    logger.info('Writing to output file...')
320
321
 
321
322
 
322
323
if __name__ == '__main__':
323
 
    sys.exit(main())
 
 
b'\\ No newline at end of file'
 
324
    sys.exit(main())