2
2
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
3
3
'''Read/Write metadata from Open Packaging Format (.opf) files.'''
5
import sys, re, os, glob
8
7
from urllib import unquote, quote
10
9
from calibre.constants import __appname__, __version__
11
from calibre.ebooks.metadata import MetaInformation
10
from calibre.ebooks.metadata import MetaInformation, string_to_authors
12
11
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
13
12
from calibre.ebooks.lrf import entity_to_unicode
14
from calibre.ebooks.metadata import get_parser, Resource, ResourceCollection
13
from calibre.ebooks.metadata import Resource, ResourceCollection
15
14
from calibre.ebooks.metadata.toc import TOC
17
16
class OPFSoup(BeautifulStoneSoup):
19
18
def __init__(self, raw):
20
BeautifulStoneSoup.__init__(self, raw,
19
BeautifulStoneSoup.__init__(self, raw,
21
20
convertEntities=BeautifulSoup.HTML_ENTITIES,
22
21
selfClosingTags=['item', 'itemref', 'reference'])
24
23
class ManifestItem(Resource):
27
26
def from_opf_manifest_item(item, basedir):
28
27
if item.has_key('href'):
44
43
return self.mime_type
45
44
def fset(self, val):
46
45
self.mime_type = val
47
46
return property(fget=fget, fset=fset)
50
49
def __unicode__(self):
51
50
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
54
53
return unicode(self).encode('utf-8')
56
55
def __repr__(self):
57
56
return unicode(self)
60
59
def __getitem__(self, index):
64
63
return self.media_type
65
64
raise IndexError('%d out of bounds.'%index)
68
67
class Manifest(ResourceCollection):
71
70
def from_opf_manifest_element(manifest, dir):
100
99
def __init__(self):
101
100
ResourceCollection.__init__(self)
105
104
def item(self, id):
110
109
def id_for_path(self, path):
111
110
path = os.path.normpath(os.path.abspath(path))
113
112
if i.path and os.path.normpath(i.path) == path:
116
115
def path_for_id(self, id):
121
120
class Spine(ResourceCollection):
123
122
class Item(Resource):
125
124
def __init__(self, idfunc, *args, **kwargs):
126
125
Resource.__init__(self, *args, **kwargs)
127
126
self.is_linear = True
128
127
self.id = idfunc(self.path)
131
130
def from_opf_spine_element(spine, manifest):
132
131
s = Spine(manifest)
165
164
if not r.is_linear:
173
172
class Guide(ResourceCollection):
175
174
class Reference(Resource):
178
177
def from_opf_resource_item(ref, basedir):
179
178
title, href, type = ref.get('title', ''), ref['href'], ref['type']
203
202
def set_cover(self, path):
204
203
map(self.remove, [i for i in self if 'cover' in i.type.lower()])
205
204
for type in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
206
205
self.append(Guide.Reference(path, is_path=True))
207
206
self[-1].type = type
208
207
self[-1].title = ''
211
210
class standard_field(object):
213
212
def __init__(self, name):
216
215
def __get__(self, obj, typ=None):
217
216
return getattr(obj, 'get_'+self.name)()
220
219
class OPF(MetaInformation):
222
221
MIMETYPE = 'application/oebps-package+xml'
223
222
ENTITY_PATTERN = re.compile(r'&(\S+?);')
225
224
uid = standard_field('uid')
226
225
application_id = standard_field('application_id')
227
226
title = standard_field('title')
238
237
series_index = standard_field('series_index')
239
238
rating = standard_field('rating')
240
239
tags = standard_field('tags')
242
241
def __init__(self):
243
242
raise NotImplementedError('Abstract base class')
248
247
return self.soup.find(re.compile('package'))
249
248
return property(fget=fget)
254
253
return self.package.find(re.compile('metadata'))
255
254
return property(fget=fget)
258
257
def get_title(self):
259
258
title = self.metadata.find('dc:title')
260
259
if title and title.string:
261
260
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip()
262
261
return self.default_title.strip()
264
263
def get_authors(self):
265
264
creators = self.metadata.findAll('dc:creator')
266
265
for elem in creators:
272
271
if role == 'aut' and elem.string:
273
272
raw = self.ENTITY_PATTERN.sub(entity_to_unicode, elem.string)
277
ans.extend(i.split('&'))
278
return [a.strip() for a in ans]
273
return string_to_authors(raw)
281
276
def get_author_sort(self):
282
277
creators = self.metadata.findAll('dc:creator')
283
278
for elem in creators:
288
283
fa = elem.get('file-as')
289
284
return self.ENTITY_PATTERN.sub(entity_to_unicode, fa).strip() if fa else None
292
287
def get_title_sort(self):
293
288
title = self.package.find('dc:title')
295
290
if title.has_key('file-as'):
296
291
return title['file-as'].strip()
299
294
def get_comments(self):
300
295
comments = self.soup.find('dc:description')
301
296
if comments and comments.string:
302
297
return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string).strip()
305
300
def get_uid(self):
306
301
package = self.package
307
302
if package.has_key('unique-identifier'):
308
303
return package['unique-identifier']
310
305
def get_category(self):
311
306
category = self.soup.find('dc:type')
312
307
if category and category.string:
313
308
return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string).strip()
316
311
def get_publisher(self):
317
312
publisher = self.soup.find('dc:publisher')
318
313
if publisher and publisher.string:
319
314
return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string).strip()
322
317
def get_isbn(self):
323
318
for item in self.metadata.findAll('dc:identifier'):
324
319
scheme = item.get('scheme')
327
322
if scheme is not None and scheme.lower() == 'isbn' and item.string:
328
323
return str(item.string).strip()
331
326
def get_language(self):
332
327
item = self.metadata.find('dc:language')
334
329
return _('Unknown')
335
330
return ''.join(item.findAll(text=True)).strip()
337
332
def get_application_id(self):
338
333
for item in self.metadata.findAll('dc:identifier'):
339
334
scheme = item.get('scheme', None)
363
358
for item in isbn:
364
359
ans.append(item[1].replace('-', ''))
367
362
def get_series(self):
368
363
s = self.metadata.find('series')
369
364
if s is not None:
370
365
return str(s.string).strip()
373
368
def get_series_index(self):
374
369
s = self.metadata.find('series-index')
375
370
if s and s.string:
377
return int(str(s.string).strip())
372
return float(str(s.string).strip())
382
377
def get_rating(self):
383
378
s = self.metadata.find('rating')
384
379
if s and s.string:
398
393
return [unicode(a).strip() for a in ans]
401
396
class OPFReader(OPF):
403
398
def __init__(self, stream, dir=os.getcwdu()):
405
400
if not hasattr(stream, 'read'):
407
402
dir = os.path.dirname(stream)
408
403
stream = open(stream, 'rb')
409
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
404
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
410
405
if hasattr(stream, 'seek'):
412
407
self.soup = OPFSoup(stream.read())
420
415
spine = self.soup.find(re.compile('spine'))
421
416
if spine is not None:
422
417
self.spine = Spine.from_opf_spine_element(spine, self.manifest)
424
419
self.toc = TOC(base_path=dir)
425
420
self.toc.read_from_opf(self)
426
421
guide = self.soup.find(re.compile('guide'))
427
422
if guide is not None:
428
423
self.guide = Guide.from_opf_guide(guide, dir)
430
425
self.cover_data = (None, None)
433
428
class OPFCreator(MetaInformation):
435
430
def __init__(self, base_path, *args, **kwargs):
451
446
self.guide = Guide()
453
448
self.guide.set_cover(self.cover)
456
451
def create_manifest(self, entries):
458
453
Create <manifest>
460
455
`entries`: List of (path, mime-type) If mime-type is None it is autodetected
462
entries = map(lambda x: x if os.path.isabs(x[0]) else
457
entries = map(lambda x: x if os.path.isabs(x[0]) else
463
458
(os.path.abspath(os.path.join(self.base_path, x[0])), x[1]),
465
460
self.manifest = Manifest.from_paths(entries)
466
461
self.manifest.set_basedir(self.base_path)
468
463
def create_manifest_from_files_in(self, files_and_dirs):
472
467
for spec in os.walk(dir):
473
468
root, files = spec[0], spec[-1]
474
469
for name in files:
475
470
path = os.path.join(root, name)
476
471
if os.path.isfile(path):
477
entries.append((path, None))
472
entries.append((path, None))
479
474
for i in files_and_dirs:
480
475
if os.path.isdir(i):
483
478
entries.append((i, None))
485
self.create_manifest(entries)
480
self.create_manifest(entries)
487
482
def create_spine(self, entries):
489
484
Create the <spine> element. Must first call :method:`create_manifest`.
491
486
`entries`: List of paths
493
entries = map(lambda x: x if os.path.isabs(x) else
488
entries = map(lambda x: x if os.path.isabs(x) else
494
489
os.path.abspath(os.path.join(self.base_path, x)), entries)
495
490
self.spine = Spine.from_paths(entries, self.manifest)
497
492
def set_toc(self, toc):
499
494
Set the toc. You must call :method:`create_spine` before calling this
502
497
:param toc: A :class:`TOC` object
506
501
def create_guide(self, guide_element):
507
502
self.guide = Guide.from_opf_guide(guide_element, self.base_path)
508
503
self.guide.set_basedir(self.base_path)
510
505
def render(self, opf_stream, ncx_stream=None, ncx_manifest_entry=None):
511
506
from calibre.resources import opf_template
512
507
from calibre.utils.genshi.template import MarkupTemplate
530
525
cover = os.path.abspath(os.path.join(self.base_path, cover))
531
526
self.guide.set_cover(cover)
532
527
self.guide.set_basedir(self.base_path)
534
529
opf = template.generate(__appname__=__appname__, mi=self, __version__=__version__).render('xml')
535
530
if not opf.startswith('<?xml '):
536
531
opf = '<?xml version="1.0" encoding="UTF-8"?>\n'+opf
540
535
if toc is not None and ncx_stream is not None:
541
536
toc.render(ncx_stream, self.application_id)
542
537
ncx_stream.flush()
545
return get_parser('opf')
547
def main(args=sys.argv):
548
parser = option_parser()
549
opts, args = parser.parse_args(args)
553
mi = MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1]))))
555
if opts.title is not None:
556
mi.title = opts.title.replace('&', '&').replace('<', '<').replace('>', '>')
558
if opts.authors is not None:
559
aus = [i.strip().replace('&', '&').replace('<', '<').replace('>', '>') for i in opts.authors.split(',')]
562
if opts.category is not None:
563
mi.category = opts.category.replace('&', '&').replace('<', '<').replace('>', '>')
565
if opts.comment is not None:
566
mi.comments = opts.comment.replace('&', '&').replace('<', '<').replace('>', '>')
569
mo = OPFCreator(os.path.dirname(args[1]), mi)
570
ncx = cStringIO.StringIO()
571
mo.render(open(args[1], 'wb'), ncx)
574
f = glob.glob(os.path.join(os.path.dirname(args[1]), '*.ncx'))
578
f = open(os.path.splitext(args[1])[0]+'.ncx', 'wb')
581
print MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1]))))
584
if __name__ == '__main__':