1
# -*- coding: utf-8 -*-
4
Read content from ztxt pdb file.
8
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
9
__docformat__ = 'restructuredtext en'
11
import os, struct, zlib
13
from calibre.ebooks.pdb.formatreader import FormatReader
14
from calibre.ebooks.pdb.ztxt import zTXTError
15
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
17
SUPPORTED_VERSION = (1, 40)
19
class HeaderRecord(object):
21
The first record in the file is always the header record. It holds
22
information related to the location of text, images, and so on
23
in the file. This is used in conjunction with the sections
24
defined in the file header.
27
def __init__(self, raw):
28
self.version, = struct.unpack('>H', raw[0:2])
29
self.num_records, = struct.unpack('>H', raw[2:4])
30
self.size, = struct.unpack('>L', raw[4:8])
31
self.record_size, = struct.unpack('>H', raw[8:10])
32
self.flags, = struct.unpack('>B', raw[18:19])
35
class Reader(FormatReader):
37
def __init__(self, header, stream, log, encoding=None):
40
self.encoding = encoding
43
for i in range(header.num_sections):
44
self.sections.append(header.section_data(i))
46
self.header_record = HeaderRecord(self.section_data(0))
48
vmajor = (self.header_record.version & 0x0000FF00) >> 8
49
vminor = self.header_record.version & 0x000000FF
50
if vmajor < 1 or (vmajor == 1 and vminor < 40):
51
raise zTXTError('Unsupported ztxt version (%i.%i). Only versions newer than %i.%i are supported.' % (vmajor, vminor, SUPPORTED_VERSION[0], SUPPORTED_VERSION[1]))
53
if (self.header_record.flags & 0x01) == 0:
54
raise zTXTError('Only compression method 1 (random access) is supported')
56
self.log.debug('Foud ztxt version: %i.%i' % (vmajor, vminor))
58
# Initalize the decompressor
59
self.uncompressor = zlib.decompressobj()
60
self.uncompressor.decompress(self.section_data(1))
62
def section_data(self, number):
63
return self.sections[number]
65
def decompress_text(self, number):
67
self.uncompressor = zlib.decompressobj()
68
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
70
def extract_content(self, output_dir):
73
self.log.info('Decompressing text...')
74
for i in range(1, self.header_record.num_records + 1):
75
self.log.debug('\tDecompressing text section %i' % i)
76
txt += self.decompress_text(i)
78
self.log.info('Converting text to OEB...')
79
html = txt_to_markdown(txt)
80
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
81
index.write(html.encode('utf-8'))
83
from calibre.ebooks.metadata.meta import get_metadata
84
mi = get_metadata(self.stream, 'pdb')
85
manifest = [('index.html', None)]
86
spine = ['index.html']
87
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
89
return os.path.join(output_dir, 'metadata.opf')