2
from __future__ import with_statement
4
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
6
'''Read meta information from epub files'''
9
from cStringIO import StringIO
10
from contextlib import closing
12
from PyQt4.Qt import QUrl, QEventLoop, QSize, QByteArray, QBuffer, \
13
SIGNAL, QPainter, QImage, QObject, QApplication, Qt, QPalette
14
from PyQt4.QtWebKit import QWebPage
16
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
17
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
18
from calibre.ebooks.metadata import get_parser, MetaInformation
19
from calibre.ebooks.metadata.opf2 import OPF
20
from calibre.ptempfile import TemporaryDirectory
21
from calibre import CurrentDir, fit_image
23
class EPubException(Exception):
26
class OCFException(EPubException):
29
class ContainerException(OCFException):
32
class Container(dict):
33
def __init__(self, stream=None):
35
soup = BeautifulStoneSoup(stream.read())
36
container = soup.find('container')
38
raise OCFException("<container/> element missing")
39
if container.get('version', None) != '1.0':
40
raise EPubException("unsupported version of OCF")
41
rootfiles = container.find('rootfiles')
43
raise EPubException("<rootfiles/> element missing")
44
for rootfile in rootfiles.findAll('rootfile'):
46
self[rootfile['media-type']] = rootfile['full-path']
48
raise EPubException("<rootfile/> element malformed")
51
MIMETYPE = 'application/epub+zip'
52
CONTAINER_PATH = 'META-INF/container.xml'
53
ENCRYPTION_PATH = 'META-INF/encryption.xml'
56
raise NotImplementedError('Abstract base class')
62
mimetype = self.open('mimetype').read().rstrip()
63
if mimetype != OCF.MIMETYPE:
64
print 'WARNING: Invalid mimetype declaration', mimetype
66
print 'WARNING: Epub doesn\'t contain a mimetype declaration'
69
with closing(self.open(OCF.CONTAINER_PATH)) as f:
70
self.container = Container(f)
72
raise EPubException("missing OCF container.xml file")
73
self.opf_path = self.container[OPF.MIMETYPE]
75
with closing(self.open(self.opf_path)) as f:
76
self.opf = OPF(f, self.root)
78
raise EPubException("missing OPF package file")
81
class OCFZipReader(OCFReader):
82
def __init__(self, stream, mode='r', root=None):
84
self.archive = ZipFile(stream, mode=mode)
86
raise EPubException("not a ZIP .epub OCF container")
89
self.root = os.getcwdu()
90
if hasattr(stream, 'name'):
91
self.root = os.path.abspath(os.path.dirname(stream.name))
92
super(OCFZipReader, self).__init__()
94
def open(self, name, mode='r'):
95
return StringIO(self.archive.read(name))
97
class OCFDirReader(OCFReader):
98
def __init__(self, path):
100
super(OCFDirReader, self).__init__()
102
def open(self, path, *args, **kwargs):
103
return open(os.path.join(self.root, path), *args, **kwargs)
105
class CoverRenderer(QObject):
109
def __init__(self, url, size, loop):
110
QObject.__init__(self)
112
self.page = QWebPage()
113
pal = self.page.palette()
114
pal.setBrush(QPalette.Background, Qt.white)
115
self.page.setPalette(pal)
116
self.page.setViewportSize(QSize(600, 800))
117
self.page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
118
self.page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
119
QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
120
self.image_data = None
121
self.rendered = False
122
self.page.mainFrame().load(url)
124
def render_html(self, ok):
129
size = self.page.mainFrame().contentsSize()
130
width, height = fit_image(size.width(), size.height(), self.WIDTH, self.HEIGHT)[1:]
131
self.page.setViewportSize(QSize(width, height))
132
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
133
image.setDotsPerMeterX(96*(100/2.54))
134
image.setDotsPerMeterY(96*(100/2.54))
135
painter = QPainter(image)
136
self.page.mainFrame().render(painter)
141
buf.open(QBuffer.WriteOnly)
142
image.save(buf, 'JPEG')
143
self.image_data = str(ba.data())
148
def get_cover(opf, opf_path, stream):
149
spine = list(opf.spine_items())
153
with TemporaryDirectory('_epub_meta') as tdir:
154
with CurrentDir(tdir):
156
ZipFile(stream).extractall()
157
opf_path = opf_path.replace('/', os.sep)
158
cpage = os.path.join(tdir, os.path.dirname(opf_path), *cpage.split('/'))
159
if not os.path.exists(cpage):
161
if QApplication.instance() is None:
163
url = QUrl.fromLocalFile(cpage)
165
cr = CoverRenderer(url, os.stat(cpage).st_size, loop)
168
while count < 50 and not cr.rendered:
173
def get_metadata(stream, extract_cover=True):
174
""" Return metadata as a :class:`MetaInformation` object """
176
reader = OCFZipReader(stream)
177
mi = MetaInformation(reader.opf)
180
cdata = get_cover(reader.opf, reader.opf_path, stream)
181
if cdata is not None:
182
mi.cover_data = ('jpg', cdata)
185
traceback.print_exc()
188
def set_metadata(stream, mi):
190
reader = OCFZipReader(stream, root=os.getcwdu())
191
reader.opf.smart_update(mi)
192
newopf = StringIO(reader.opf.render())
193
safe_replace(stream, reader.container[OPF.MIMETYPE], newopf)
196
parser = get_parser('epub')
197
parser.remove_option('--category')
198
parser.add_option('--tags', default=None,
199
help=_('A comma separated list of tags to set'))
200
parser.add_option('--series', default=None,
201
help=_('The series to which this book belongs'))
202
parser.add_option('--series-index', default=None,
203
help=_('The series index'))
204
parser.add_option('--language', default=None,
205
help=_('The book language'))
206
parser.add_option('--get-cover', default=False, action='store_true',
207
help=_('Extract the cover'))
210
def main(args=sys.argv):
211
parser = option_parser()
212
opts, args = parser.parse_args(args)
216
with open(args[1], 'r+b') as stream:
217
mi = get_metadata(stream, extract_cover=opts.get_cover)
220
mi.title = opts.title
223
mi.authors = opts.authors.split(',')
226
mi.tags = opts.tags.split(',')
229
mi.comments = opts.comment
232
mi.series = opts.series
234
if opts.series_index:
235
mi.series_index = opts.series_index
237
if opts.language is not None:
238
mi.language = opts.language
242
set_metadata(stream, mi)
243
print unicode(get_metadata(stream, extract_cover=False)).encode('utf-8')
245
if mi.cover_data[1] is not None:
246
cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg'
247
with open(cpath, 'wb') as f:
248
f.write(mi.cover_data[1])
249
print 'Cover saved to', f.name
253
if __name__ == '__main__':