434
435
spine_path = XPath('descendant::*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]')
435
436
guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
437
title = MetadataField('title')
438
title = MetadataField('title', formatter=lambda x: re.sub(r'\s+', ' ', x))
438
439
publisher = MetadataField('publisher')
439
440
language = MetadataField('language')
440
441
comments = MetadataField('description')
441
category = MetadataField('category')
442
category = MetadataField('type')
443
rights = MetadataField('rights')
442
444
series = MetadataField('series', is_dc=False)
443
series_index = MetadataField('series_index', is_dc=False, formatter=int, none_is=1)
445
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
444
446
rating = MetadataField('rating', is_dc=False, formatter=int)
445
timestamp = MetadataField('date', formatter=parser.parse)
447
pubdate = MetadataField('date', formatter=parser.parse)
448
publication_type = MetadataField('publication_type', is_dc=False)
449
timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
448
452
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
449
453
if not hasattr(stream, 'read'):
450
454
stream = open(stream, 'rb')
457
raise ValueError('Empty file: '+getattr(stream, 'name', 'stream'))
451
458
self.basedir = self.base_dir = basedir
452
459
self.path_to_html_toc = self.html_toc_fragment = None
453
raw, self.encoding = xml_to_unicode(stream.read(), strip_encoding_pats=True, resolve_entities=True)
460
raw, self.encoding = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)
454
461
raw = raw[raw.find('<'):]
455
462
self.root = etree.fromstring(raw, self.PARSER)
456
463
self.metadata = self.metadata_path(self.root)
641
648
return property(fget=fget, fset=fset)
647
matches = self.authors_path(self.metadata)
649
for match in matches:
650
ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
652
ans = match.get('file-as', None)
657
matches = self.authors_path(self.metadata)
659
for key in matches[0].attrib:
660
if key.endswith('file-as'):
661
matches[0].attrib.pop(key)
662
matches[0].set('file-as', unicode(val))
664
return property(fget=fget, fset=fset)
670
matches = self.title_path(self.metadata)
672
for match in matches:
673
ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
675
ans = match.get('file-as', None)
680
matches = self.title_path(self.metadata)
682
for key in matches[0].attrib:
683
if key.endswith('file-as'):
684
matches[0].attrib.pop(key)
685
matches[0].set('file-as', unicode(val))
687
return property(fget=fget, fset=fset)
651
def author_sort(self):
654
matches = self.authors_path(self.metadata)
656
for match in matches:
657
ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
659
ans = match.get('file-as', None)
664
matches = self.authors_path(self.metadata)
666
for key in matches[0].attrib:
667
if key.endswith('file-as'):
668
matches[0].attrib.pop(key)
669
matches[0].set('file-as', unicode(val))
671
return property(fget=fget, fset=fset)
674
def title_sort(self):
677
matches = self.title_path(self.metadata)
679
for match in matches:
680
ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
682
ans = match.get('file-as', None)
687
matches = self.title_path(self.metadata)
689
for key in matches[0].attrib:
690
if key.endswith('file-as'):
691
matches[0].attrib.pop(key)
692
matches[0].set('file-as', unicode(val))
694
return property(fget=fget, fset=fset)
945
955
cover = os.path.abspath(os.path.join(self.base_path, cover))
946
956
self.guide.set_cover(cover)
947
957
self.guide.set_basedir(self.base_path)
948
opf = template.generate(__appname__=__appname__, mi=self, __version__=__version__).render('xml')
958
opf = template.generate(
959
__appname__=__appname__, mi=self,
960
__version__=__version__).render('xml', encoding=encoding)
961
opf_stream.write('<?xml version="1.0" encoding="%s" ?>\n'
949
963
opf_stream.write(opf)
950
964
opf_stream.flush()
951
toc = getattr(self, 'toc', None)
952
965
if toc is not None and ncx_stream is not None:
953
966
toc.render(ncx_stream, self.application_id)
954
967
ncx_stream.flush()
970
def metadata_to_opf(mi, as_string=True):
971
from lxml import etree
973
from calibre.ebooks.oeb.base import OPF, DC
975
if not mi.application_id:
976
mi.application_id = str(uuid.uuid4())
978
if not mi.book_producer:
979
mi.book_producer = __appname__ + ' (%s) '%__version__ + \
980
'[http://calibre-ebook.com]'
985
root = etree.fromstring(textwrap.dedent(
987
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="%(a)s_id">
988
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
989
<dc:identifier opf:scheme="%(a)s" id="%(a)s_id">%(id)s</dc:identifier>
993
'''%dict(a=__appname__, id=mi.application_id)))
996
metadata[0].tail = '\n'+(' '*8)
997
def factory(tag, text=None, sort=None, role=None, scheme=None, name=None,
1001
attrib[OPF('file-as')] = sort
1003
attrib[OPF('role')] = role
1005
attrib[OPF('scheme')] = scheme
1007
attrib['name'] = name
1009
attrib['content'] = content
1010
elem = metadata.makeelement(tag, attrib=attrib)
1011
elem.tail = '\n'+(' '*8)
1013
elem.text = text.strip()
1014
metadata.append(elem)
1016
factory(DC('title'), mi.title, mi.title_sort)
1017
for au in mi.authors:
1018
factory(DC('creator'), au, mi.author_sort, 'aut')
1019
factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
1020
if hasattr(mi.pubdate, 'isoformat'):
1021
factory(DC('date'), mi.pubdate.isoformat())
1022
factory(DC('language'), mi.language)
1024
factory(DC('type'), mi.category)
1026
factory(DC('description'), mi.comments)
1028
factory(DC('publisher'), mi.publisher)
1030
factory(DC('identifier'), mi.isbn, scheme='ISBN')
1032
factory(DC('rights'), mi.rights)
1035
factory(DC('subject'), tag)
1036
meta = lambda n, c: factory('meta', name='calibre:'+n, content=c)
1038
meta('series', mi.series)
1039
if mi.series_index is not None:
1040
meta('series_index', mi.format_series_index())
1041
if mi.rating is not None:
1042
meta('rating', str(mi.rating))
1043
if hasattr(mi.timestamp, 'isoformat'):
1044
meta('timestamp', mi.timestamp.isoformat())
1045
if mi.publication_type:
1046
meta('publication_type', mi.publication_type)
1048
metadata[-1].tail = '\n' +(' '*4)
1051
if not isinstance(mi.cover, unicode):
1052
mi.cover = mi.cover.decode(filesystem_encoding)
1053
guide.text = '\n'+(' '*8)
1054
r = guide.makeelement(OPF('reference'),
1055
attrib={'type':'cover', 'title':_('Cover'), 'href':mi.cover})
1056
r.tail = '\n' +(' '*4)
1058
return etree.tostring(root, pretty_print=True, encoding='utf-8',
1059
xml_declaration=True) if as_string else root
1063
from datetime import datetime
1064
from cStringIO import StringIO
1065
mi = MetaInformation('test & title', ['a"1', "a'2"])
1066
mi.title_sort = 'a\'"b'
1067
mi.author_sort = 'author sort'
1068
mi.pubdate = datetime.now()
1070
mi.category = 'test'
1071
mi.comments = 'what a fun book\n\n'
1072
mi.publisher = 'publisher'
1074
mi.tags = ['a', 'b']
1075
mi.series = 's"c\'l&<>'
1076
mi.series_index = 3.34
1078
mi.timestamp = datetime.now()
1079
mi.publication_type = 'ooooo'
1081
mi.cover = 'asd.jpg'
1082
opf = metadata_to_opf(mi)
1084
newmi = MetaInformation(OPF(StringIO(opf)))
1085
for attr in ('author_sort', 'title_sort', 'comments', 'category',
1086
'publisher', 'series', 'series_index', 'rating',
1087
'isbn', 'tags', 'cover_data', 'application_id',
1088
'language', 'cover',
1089
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
1090
'pubdate', 'rights', 'publication_type'):
1091
o, n = getattr(mi, attr), getattr(newmi, attr)
1092
if o != n and o.strip() != n.strip():
1093
print 'FAILED:', attr, getattr(mi, attr), '!=', getattr(newmi, attr)
957
1096
class OPFTest(unittest.TestCase):
959
1098
def setUp(self):
1020
1159
unittest.TextTestRunner(verbosity=2).run(suite())
1023
def option_parser():
1024
from calibre.ebooks.metadata import get_parser
1025
parser = get_parser('opf')
1026
parser.add_option('--language', default=None, help=_('Set the dc:language field'))
1029
def main(args=sys.argv):
1030
parser = option_parser()
1031
opts, args = parser.parse_args(args)
1035
opfpath = os.path.abspath(args[1])
1036
basedir = os.path.dirname(opfpath)
1037
mi = MetaInformation(OPF(open(opfpath, 'rb'), basedir))
1039
if opts.title is not None:
1040
mi.title = opts.title
1042
if opts.authors is not None:
1043
aus = [i.strip() for i in opts.authors.split(',')]
1046
if opts.category is not None:
1047
mi.category = opts.category
1049
if opts.comment is not None:
1050
mi.comments = opts.comment
1052
if opts.language is not None:
1053
mi.language = opts.language
1056
mo = OPFCreator(basedir, mi)
1057
ncx = cStringIO.StringIO()
1058
mo.render(open(args[1], 'wb'), ncx)
1059
ncx = ncx.getvalue()
1061
f = glob.glob(os.path.join(os.path.dirname(args[1]), '*.ncx'))
1063
f = open(f[0], 'wb')
1065
f = open(os.path.splitext(args[1])[0]+'.ncx', 'wb')
1068
print MetaInformation(OPF(open(opfpath, 'rb'), basedir))
1073
if __name__ == '__main__':