18
18
from calibre.ebooks.chardet import xml_to_unicode
20
20
class LrsParser(object):
22
SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space',
23
'PutObj', 'RuledLine',
22
SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space',
23
'PutObj', 'RuledLine',
24
24
'Plot', 'SetDefault', 'BookSetting', 'RegistFont',
25
25
'PageStyle', 'TextStyle', 'BlockStyle', 'JumpTo',
26
26
'ImageStream', 'Image']]
28
28
def __init__(self, stream, logger):
29
29
self.logger = logger
30
30
src = stream.read()
31
31
self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0],
32
convertEntities=BeautifulStoneSoup.XML_ENTITIES,
32
convertEntities=BeautifulStoneSoup.XML_ENTITIES,
33
33
selfClosingTags=self.SELF_CLOSING_TAGS)
35
35
for obj in self.soup.findAll(objid=True):
36
36
self.objects[obj['objid']] = obj
38
38
self.parsed_objects = {}
45
45
def fifth_pass(self):
46
46
for tag in self.soup.findAll(['canvas', 'header', 'footer']):
47
47
canvas = self.parsed_objects[tag.get('objid')]
48
48
for po in tag.findAll('putobj'):
49
49
canvas.put_object(self.parsed_objects[po.get('refobj')],
50
50
po.get('x1'), po.get('y1'))
54
54
def attrs_to_dict(cls, tag, exclude=('objid',)):
77
77
settings = self.attrs_to_dict(tag)
78
78
settings.pop('spanstyle', '')
79
79
return map[tag.name](**settings)
81
81
def process_text_element(self, tag, elem):
82
82
for item in tag.contents:
83
83
if isinstance(item, NavigableString):
86
86
subelem = self.text_tag_to_element(item)
87
87
elem.append(subelem)
88
88
self.process_text_element(item, subelem)
91
91
def process_paragraph(self, tag):
93
93
contents = [i for i in tag.contents]
105
105
self.process_text_element(item, elem)
108
108
def process_text_block(self, tag):
109
109
tb = self.parsed_objects[tag.get('objid')]
110
110
for item in tag.contents:
119
119
elem = self.text_tag_to_element(item)
120
120
self.process_text_element(item, elem)
123
123
def fourth_pass(self):
124
124
for tag in self.soup.findAll('page'):
125
125
page = self.parsed_objects[tag.get('objid')]
126
126
self.book.append(page)
127
for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock',
127
for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock',
128
128
'ruledline', 'simpletextblock']):
129
129
if block_tag.name == 'ruledline':
130
130
page.append(RuledLine(**self.attrs_to_dict(block_tag)))
132
132
page.append(self.parsed_objects[block_tag.get('objid')])
134
134
for tag in self.soup.find('objects').findAll('button'):
135
135
jt = tag.find('jumpto')
136
136
tb = self.parsed_objects[jt.get('refobj')]
137
137
jb = JumpButton(tb)
138
138
self.book.append(jb)
139
139
self.parsed_objects[tag.get('objid')] = jb
141
141
for tag in self.soup.findAll(['textblock', 'simpletextblock']):
142
142
self.process_text_block(tag)
143
143
toc = self.soup.find('toc')
145
145
for tag in toc.findAll('toclabel'):
146
146
label = self.tag_to_string(tag)
147
147
self.book.addTocEntry(label, self.parsed_objects[tag.get('refobj')])
150
150
def third_pass(self):
152
'page' : (Page, ['pagestyle', 'evenfooterid',
152
'page' : (Page, ['pagestyle', 'evenfooterid',
153
153
'oddfooterid', 'evenheaderid', 'oddheaderid']),
154
154
'textblock' : (TextBlock, ['textstyle', 'blockstyle']),
155
155
'simpletextblock' : (TextBlock, ['textstyle', 'blockstyle']),
167
167
settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel'])
168
168
for a in ('pagestyle', 'blockstyle', 'textstyle'):
169
169
label = tag.get(a, False)
171
(label in self._style_labels or label in self.parsed_objects):
171
172
_obj = self.parsed_objects[label] if \
172
173
self.parsed_objects.has_key(label) else \
173
174
self._style_labels[label]
181
182
if tag.has_key('canvaswidth'):
182
183
args += [tag.get('canvaswidth'), tag.get('canvasheight')]
183
184
self.parsed_objects[id] = map[tag.name][0](*args, **settings)
187
188
def second_pass(self):
189
190
'pagestyle' : (PageStyle, ['stylelabel', 'evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid']),
207
208
self._style_labels[x] = self.parsed_objects[id]
208
209
if tag.name == 'registfont':
209
210
self.book.append(self.parsed_objects[id])
213
214
def tag_to_string(cls, tag):
226
227
res = cls.tag_to_string(item)
228
229
strings.append(res)
229
return u''.join(strings)
230
return u''.join(strings)
231
232
def first_pass(self):
232
233
info = self.soup.find('bbebxylog').find('bookinformation').find('info')
233
234
bookinfo = info.find('bookinfo')
234
235
docinfo = info.find('docinfo')
236
237
def me(base, tagname):
237
238
tag = base.find(tagname.lower())
239
240
return ('', '', '')
240
241
tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '')
243
244
title = me(bookinfo, 'Title')
244
245
author = me(bookinfo, 'Author')
245
246
publisher = me(bookinfo, 'Publisher')
250
251
creator = me(docinfo, 'Creator')[0]
251
252
producer = me(docinfo, 'Producer')[0]
252
253
bookid = me(bookinfo, 'BookID')[0]
254
255
sd = self.soup.find('setdefault')
255
256
sd = StyleDefault(**self.attrs_to_dict(sd, ['page_tree_id', 'rubyalignandadjust']))
256
257
bs = self.soup.find('booksetting')
257
258
bs = BookSetting(**self.attrs_to_dict(bs, []))
260
261
thumbnail = self.soup.find('cthumbnail')
261
262
if thumbnail is not None:
264
265
settings['thumbnail'] = f
266
267
print _('Could not read from thumbnail file:'), f
268
269
self.book = Book(title=title, author=author, publisher=publisher,
269
270
category=category, classification=classification,
270
271
freetext=freetext, language=language, creator=creator,
271
272
producer=producer, bookid=bookid, setdefault=sd,
272
273
booksetting=bs, **settings)
274
275
for hdr in self.soup.findAll(['header', 'footer']):
275
276
elem = Header if hdr.name == 'header' else Footer
276
self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr))
277
self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr))
278
279
def render(self, file, to_lrs=False):
280
281
self.book.renderLrs(file, 'utf-8')
282
283
self.book.renderLrf(file)
285
286
def option_parser():
286
287
parser = OptionParser(usage=_('%prog [options] file.lrs\nCompile an LRS file into an LRF file.'))
312
313
warnings.defaultaction = 'error'
314
315
logger.info('Parsing LRS file...')
315
316
converter = LrsParser(open(args[1], 'rb'), logger)
316
317
logger.info('Writing to output file...')