~ubuntu-branches/ubuntu/karmic/calibre/karmic

Viewing changes to src/calibre/ebooks/mobi/writer.py

Committer: Bazaar Package Importer
Author(s): Martin Pitt
Date: 2009-07-30 12:49:41 UTC
mfrom: (1.3.2 upstream)
Revision ID: james.westby@ubuntu.com-20090730124941-qjdsmri25zt8zocn

Tags: 0.6.3+dfsg-0ubuntu1

* New upstream release. Please see http://calibre.kovidgoyal.net/new_in_6/
  for the list of new features and changes.
* remove_postinstall.patch: Update for new version.
* build_debug.patch: Does not apply any more, disable for now. Might not be
  necessary any more.
* debian/copyright: Fix reference to versionless GPL.
* debian/rules: Drop obsolete dh_desktop call.
* debian/rules: Add workaround for weird Python 2.6 setuptools behaviour of
  putting compiled .so files into src/calibre/plugins/calibre/plugins
  instead of src/calibre/plugins.
* debian/rules: Drop hal fdi moving, new upstream version does not use hal
  any more. Drop hal dependency, too.
* debian/rules: Install udev rules into /lib/udev/rules.d.
* Add debian/calibre.preinst: Remove unmodified
  /etc/udev/rules.d/95-calibre.rules on upgrade.
* debian/control: Bump Python dependencies to 2.6, since upstream needs
  it now.

files added:
debian/calibre.preinst

jsmin.py

src/calibre/customize/conversion.py

src/calibre/customize/profiles.py

src/calibre/devices/android

src/calibre/devices/android/__init__.py

src/calibre/devices/android/driver.py

src/calibre/devices/irexdr

src/calibre/devices/irexdr/__init__.py

src/calibre/devices/irexdr/driver.py

src/calibre/devices/usbms/cli.py

src/calibre/devices/usbms/deviceconfig.py

src/calibre/ebooks/comic

src/calibre/ebooks/comic/__init__.py

src/calibre/ebooks/comic/input.py

src/calibre/ebooks/compression

src/calibre/ebooks/compression/__init__.py

src/calibre/ebooks/compression/palmdoc.c

src/calibre/ebooks/compression/palmdoc.py

src/calibre/ebooks/conversion

src/calibre/ebooks/conversion/__init__.py

src/calibre/ebooks/conversion/cli.py

src/calibre/ebooks/conversion/config.py

src/calibre/ebooks/conversion/plumber.py

src/calibre/ebooks/conversion/preprocess.py

src/calibre/ebooks/epub/input.py

src/calibre/ebooks/epub/output.py

src/calibre/ebooks/fb2

src/calibre/ebooks/fb2/__init__.py

src/calibre/ebooks/fb2/fb2.xsl

src/calibre/ebooks/fb2/fb2ml.py

src/calibre/ebooks/fb2/input.py

src/calibre/ebooks/fb2/output.py

src/calibre/ebooks/html

src/calibre/ebooks/html/__init__.py

src/calibre/ebooks/html/input.py

src/calibre/ebooks/lit/input.py

src/calibre/ebooks/lit/output.py

src/calibre/ebooks/lrf/output.py

src/calibre/ebooks/metadata/cli.py

src/calibre/ebooks/metadata/ereader.py

src/calibre/ebooks/metadata/pdb.py

src/calibre/ebooks/metadata/txt.py

src/calibre/ebooks/metadata/worker.py

src/calibre/ebooks/mobi/input.py

src/calibre/ebooks/mobi/output.py

src/calibre/ebooks/odt/input.py

src/calibre/ebooks/oeb/factory.py

src/calibre/ebooks/oeb/iterator.py

src/calibre/ebooks/oeb/output.py

src/calibre/ebooks/oeb/reader.py

src/calibre/ebooks/oeb/transforms/guide.py

src/calibre/ebooks/oeb/transforms/jacket.py

src/calibre/ebooks/oeb/transforms/linearize_tables.py

src/calibre/ebooks/oeb/transforms/metadata.py

src/calibre/ebooks/oeb/transforms/rescale.py

src/calibre/ebooks/oeb/transforms/split.py

src/calibre/ebooks/oeb/transforms/structure.py

src/calibre/ebooks/oeb/writer.py

src/calibre/ebooks/pdb

src/calibre/ebooks/pdb/__init__.py

src/calibre/ebooks/pdb/ereader

src/calibre/ebooks/pdb/ereader/__init__.py

src/calibre/ebooks/pdb/ereader/inspector.py

src/calibre/ebooks/pdb/ereader/output.py

src/calibre/ebooks/pdb/ereader/reader.py

src/calibre/ebooks/pdb/ereader/reader132.py

src/calibre/ebooks/pdb/ereader/reader202.py

src/calibre/ebooks/pdb/ereader/writer.py

src/calibre/ebooks/pdb/formatreader.py

src/calibre/ebooks/pdb/formatwriter.py

src/calibre/ebooks/pdb/header.py

src/calibre/ebooks/pdb/input.py

src/calibre/ebooks/pdb/output.py

src/calibre/ebooks/pdb/palmdoc

src/calibre/ebooks/pdb/palmdoc/__init__.py

src/calibre/ebooks/pdb/palmdoc/reader.py

src/calibre/ebooks/pdb/palmdoc/writer.py

src/calibre/ebooks/pdb/ztxt

src/calibre/ebooks/pdb/ztxt/__init__.py

src/calibre/ebooks/pdb/ztxt/reader.py

src/calibre/ebooks/pdb/ztxt/writer.py

src/calibre/ebooks/pdf/input.py

src/calibre/ebooks/pdf/manipulate

src/calibre/ebooks/pdf/manipulate/__init__.py

src/calibre/ebooks/pdf/manipulate/cli.py

src/calibre/ebooks/pdf/manipulate/crop.py

src/calibre/ebooks/pdf/manipulate/decrypt.py

src/calibre/ebooks/pdf/manipulate/encrypt.py

src/calibre/ebooks/pdf/manipulate/info.py

src/calibre/ebooks/pdf/manipulate/merge.py

src/calibre/ebooks/pdf/manipulate/reverse.py

src/calibre/ebooks/pdf/manipulate/rotate.py

src/calibre/ebooks/pdf/manipulate/split.py

src/calibre/ebooks/pdf/output.py

src/calibre/ebooks/pdf/pageoptions.py

src/calibre/ebooks/pdf/pdftohtml.py

src/calibre/ebooks/pdf/verify.py

src/calibre/ebooks/pdf/writer.py

src/calibre/ebooks/pml

src/calibre/ebooks/pml/__init__.py

src/calibre/ebooks/pml/input.py

src/calibre/ebooks/pml/output.py

src/calibre/ebooks/pml/pmlconverter.py

src/calibre/ebooks/pml/pmlml.py

src/calibre/ebooks/rb

src/calibre/ebooks/rb/__init__.py

src/calibre/ebooks/rb/input.py

src/calibre/ebooks/rb/output.py

src/calibre/ebooks/rb/rbml.py

src/calibre/ebooks/rb/reader.py

src/calibre/ebooks/rb/writer.py

src/calibre/ebooks/rtf

src/calibre/ebooks/rtf/__init__.py

src/calibre/ebooks/rtf/input.py

src/calibre/ebooks/rtf/output.py

src/calibre/ebooks/rtf/rtfml.py

src/calibre/ebooks/rtf/xsl.py

src/calibre/ebooks/txt

src/calibre/ebooks/txt/__init__.py

src/calibre/ebooks/txt/input.py

src/calibre/ebooks/txt/newlines.py

src/calibre/ebooks/txt/output.py

src/calibre/ebooks/txt/processor.py

src/calibre/ebooks/txt/txtml.py

src/calibre/ebooks/unidecode

src/calibre/ebooks/unidecode/__init__.py

src/calibre/ebooks/unidecode/unicodepoints.py

src/calibre/ebooks/unidecode/unidecoder.py

src/calibre/gui2/convert

src/calibre/gui2/convert/__init__.py

src/calibre/gui2/convert/bulk.py

src/calibre/gui2/convert/comic_input.py

src/calibre/gui2/convert/comic_input.ui

src/calibre/gui2/convert/epub_output.py

src/calibre/gui2/convert/epub_output.ui

src/calibre/gui2/convert/fb2_input.py

src/calibre/gui2/convert/fb2_input.ui

src/calibre/gui2/convert/gui_conversion.py

src/calibre/gui2/convert/look_and_feel.py

src/calibre/gui2/convert/look_and_feel.ui

src/calibre/gui2/convert/lrf_output.py

src/calibre/gui2/convert/lrf_output.ui

src/calibre/gui2/convert/metadata.py

src/calibre/gui2/convert/metadata.ui

src/calibre/gui2/convert/mobi_output.py

src/calibre/gui2/convert/mobi_output.ui

src/calibre/gui2/convert/page_setup.py

src/calibre/gui2/convert/page_setup.ui

src/calibre/gui2/convert/pdb_output.py

src/calibre/gui2/convert/pdb_output.ui

src/calibre/gui2/convert/pdf_input.py

src/calibre/gui2/convert/pdf_input.ui

src/calibre/gui2/convert/pdf_output.py

src/calibre/gui2/convert/pdf_output.ui

src/calibre/gui2/convert/single.py

src/calibre/gui2/convert/single.ui

src/calibre/gui2/convert/structure_detection.py

src/calibre/gui2/convert/structure_detection.ui

src/calibre/gui2/convert/toc.py

src/calibre/gui2/convert/toc.ui

src/calibre/gui2/convert/txt_output.py

src/calibre/gui2/convert/txt_output.ui

src/calibre/gui2/convert/xpath_edit.ui

src/calibre/gui2/convert/xpath_wizard.py

src/calibre/gui2/convert/xpath_wizard.ui

src/calibre/gui2/device_drivers

src/calibre/gui2/device_drivers/__init__.py

src/calibre/gui2/device_drivers/configwidget.py

src/calibre/gui2/device_drivers/configwidget.ui

src/calibre/gui2/images/eject.svg

src/calibre/gui2/images/news/elperiodico_catalan.png

src/calibre/gui2/images/news/elperiodico_spanish.png

src/calibre/gui2/images/news/eltiempo_hn.png

src/calibre/gui2/images/news/expansion_spanish.png

src/calibre/gui2/images/news/fastcompany.png

src/calibre/gui2/images/news/gva_be.png

src/calibre/gui2/images/news/hln.png

src/calibre/gui2/images/news/inquirer_net.png

src/calibre/gui2/images/news/laprensa_hn.png

src/calibre/gui2/images/news/latribuna.png

src/calibre/gui2/images/news/noaa.png

src/calibre/gui2/images/news/theeconomictimes_india.png

src/calibre/gui2/images/news/tijd.png

src/calibre/gui2/images/news/uncrate.png

src/calibre/gui2/images/print-preview.svg

src/calibre/gui2/images/print.svg

src/calibre/gui2/images/welcome_wizard.svg

src/calibre/gui2/images/wizard.svg

src/calibre/gui2/jobs.py

src/calibre/gui2/tag_view.py

src/calibre/gui2/viewer/bookmarkmanager.py

src/calibre/gui2/viewer/bookmarkmanager.ui

src/calibre/gui2/viewer/hyphenate

src/calibre/gui2/viewer/hyphenate/Hyphenator.js

src/calibre/gui2/viewer/hyphenate/patterns

src/calibre/gui2/viewer/hyphenate/patterns/bn.js

src/calibre/gui2/viewer/hyphenate/patterns/cs.js

src/calibre/gui2/viewer/hyphenate/patterns/da.js

src/calibre/gui2/viewer/hyphenate/patterns/de.js

src/calibre/gui2/viewer/hyphenate/patterns/en.js

src/calibre/gui2/viewer/hyphenate/patterns/es.js

src/calibre/gui2/viewer/hyphenate/patterns/fi.js

src/calibre/gui2/viewer/hyphenate/patterns/fr.js

src/calibre/gui2/viewer/hyphenate/patterns/gu.js

src/calibre/gui2/viewer/hyphenate/patterns/hi.js

src/calibre/gui2/viewer/hyphenate/patterns/hu.js

src/calibre/gui2/viewer/hyphenate/patterns/it.js

src/calibre/gui2/viewer/hyphenate/patterns/kn.js

src/calibre/gui2/viewer/hyphenate/patterns/ml.js

src/calibre/gui2/viewer/hyphenate/patterns/nl.js

src/calibre/gui2/viewer/hyphenate/patterns/or.js

src/calibre/gui2/viewer/hyphenate/patterns/pa.js

src/calibre/gui2/viewer/hyphenate/patterns/pl.js

src/calibre/gui2/viewer/hyphenate/patterns/pt.js

src/calibre/gui2/viewer/hyphenate/patterns/ru.js

src/calibre/gui2/viewer/hyphenate/patterns/sv.js

src/calibre/gui2/viewer/hyphenate/patterns/ta.js

src/calibre/gui2/viewer/hyphenate/patterns/te.js

src/calibre/gui2/viewer/hyphenate/patterns/uk.js

src/calibre/gui2/viewer/printing.py

src/calibre/gui2/wizard

src/calibre/gui2/wizard/__init__.py

src/calibre/gui2/wizard/device.ui

src/calibre/gui2/wizard/finish.ui

src/calibre/gui2/wizard/kindle.ui

src/calibre/gui2/wizard/library.ui

src/calibre/gui2/wizard/send_email.py

src/calibre/gui2/wizard/send_email.ui

src/calibre/gui2/wizard/stanza.ui

src/calibre/library/move.py

src/calibre/library/static/calibre_banner.png

src/calibre/utils/complete.py

src/calibre/utils/fonts

src/calibre/utils/fonts/__init__.py

src/calibre/utils/fonts/fontconfig.c

src/calibre/utils/ipc

src/calibre/utils/ipc/__init__.py

src/calibre/utils/ipc/job.py

src/calibre/utils/ipc/launch.py

src/calibre/utils/ipc/server.py

src/calibre/utils/ipc/worker.py

src/calibre/utils/logging.py

src/calibre/web/feeds/input.py

src/calibre/web/feeds/recipes/recipe_7dias.py

src/calibre/web/feeds/recipes/recipe_accountancyage.py

src/calibre/web/feeds/recipes/recipe_buenosaireseconomico.py

src/calibre/web/feeds/recipes/recipe_craigslist.py

src/calibre/web/feeds/recipes/recipe_degentenaar.py

src/calibre/web/feeds/recipes/recipe_diagonales.py

src/calibre/web/feeds/recipes/recipe_elperiodico_catalan.py

src/calibre/web/feeds/recipes/recipe_elperiodico_spanish.py

src/calibre/web/feeds/recipes/recipe_eltiempo_hn.py

src/calibre/web/feeds/recipes/recipe_expansion_spanish.py

src/calibre/web/feeds/recipes/recipe_fastcompany.py

src/calibre/web/feeds/recipes/recipe_gva_be.py

src/calibre/web/feeds/recipes/recipe_hln.py

src/calibre/web/feeds/recipes/recipe_inquirer_net.py

src/calibre/web/feeds/recipes/recipe_kellog_faculty.py

src/calibre/web/feeds/recipes/recipe_kellog_insight.py

src/calibre/web/feeds/recipes/recipe_laprensa_hn.py

src/calibre/web/feeds/recipes/recipe_latribuna.py

src/calibre/web/feeds/recipes/recipe_lavanguardia.py

src/calibre/web/feeds/recipes/recipe_marca.py

src/calibre/web/feeds/recipes/recipe_miradasalsur.py

src/calibre/web/feeds/recipes/recipe_newsweek_argentina.py

src/calibre/web/feeds/recipes/recipe_noaa.py

src/calibre/web/feeds/recipes/recipe_publico.py

src/calibre/web/feeds/recipes/recipe_slate.py

src/calibre/web/feeds/recipes/recipe_the_budget_fashionista.py

src/calibre/web/feeds/recipes/recipe_theeconomictimes_india.py

src/calibre/web/feeds/recipes/recipe_tijd.py

src/calibre/web/feeds/recipes/recipe_uncrate.py

src/calibre/web/feeds/recipes/recipe_veintitres.py

files removed:
src/calibre/ebooks/epub/fonts.py

src/calibre/ebooks/epub/from_any.py

src/calibre/ebooks/epub/from_comic.py

src/calibre/ebooks/epub/from_feeds.py

src/calibre/ebooks/epub/from_html.py

src/calibre/ebooks/epub/iterator.py

src/calibre/ebooks/epub/split.py

src/calibre/ebooks/html.py

src/calibre/ebooks/lrf/any

src/calibre/ebooks/lrf/any/__init__.py

src/calibre/ebooks/lrf/any/convert_from.py

src/calibre/ebooks/lrf/comic

src/calibre/ebooks/lrf/comic/__init__.py

src/calibre/ebooks/lrf/comic/convert_from.py

src/calibre/ebooks/lrf/epub

src/calibre/ebooks/lrf/epub/__init__.py

src/calibre/ebooks/lrf/epub/convert_from.py

src/calibre/ebooks/lrf/fb2

src/calibre/ebooks/lrf/fb2/__init__.py

src/calibre/ebooks/lrf/fb2/convert_from.py

src/calibre/ebooks/lrf/fb2/fb2.xsl

src/calibre/ebooks/lrf/feeds

src/calibre/ebooks/lrf/feeds/__init__.py

src/calibre/ebooks/lrf/feeds/convert_from.py

src/calibre/ebooks/lrf/lit

src/calibre/ebooks/lrf/lit/__init__.py

src/calibre/ebooks/lrf/lit/convert_from.py

src/calibre/ebooks/lrf/mobi

src/calibre/ebooks/lrf/mobi/__init__.py

src/calibre/ebooks/lrf/mobi/convert_from.py

src/calibre/ebooks/lrf/pdf

src/calibre/ebooks/lrf/pdf/__init__.py

src/calibre/ebooks/lrf/pdf/convert_from.py

src/calibre/ebooks/lrf/pdf/reflow.py

src/calibre/ebooks/lrf/rtf

src/calibre/ebooks/lrf/rtf/__init__.py

src/calibre/ebooks/lrf/rtf/convert_from.py

src/calibre/ebooks/lrf/rtf/xsl.py

src/calibre/ebooks/lrf/txt

src/calibre/ebooks/lrf/txt/__init__.py

src/calibre/ebooks/lrf/txt/convert_from.py

src/calibre/ebooks/lrf/txt/demo

src/calibre/ebooks/lrf/txt/demo/demo.txt

src/calibre/ebooks/lrf/txt/demo/small.jpg

src/calibre/ebooks/lrf/web

src/calibre/ebooks/lrf/web/__init__.py

src/calibre/ebooks/lrf/web/convert_from.py

src/calibre/ebooks/lrf/web/profiles

src/calibre/ebooks/lrf/web/profiles/__init__.py

src/calibre/ebooks/lrf/web/profiles/ap.py

src/calibre/ebooks/lrf/web/profiles/atlantic.py

src/calibre/ebooks/lrf/web/profiles/automatic.py

src/calibre/ebooks/lrf/web/profiles/barrons.py

src/calibre/ebooks/lrf/web/profiles/bbc.py

src/calibre/ebooks/lrf/web/profiles/chr_mon.py

src/calibre/ebooks/lrf/web/profiles/cnn.py

src/calibre/ebooks/lrf/web/profiles/economist.py

src/calibre/ebooks/lrf/web/profiles/faznet.py

src/calibre/ebooks/lrf/web/profiles/jpost.py

src/calibre/ebooks/lrf/web/profiles/jutarnji.py

src/calibre/ebooks/lrf/web/profiles/nasa.py

src/calibre/ebooks/lrf/web/profiles/newsweek.py

src/calibre/ebooks/lrf/web/profiles/newyorker.py

src/calibre/ebooks/lrf/web/profiles/newyorkreview.py

src/calibre/ebooks/lrf/web/profiles/nytimes.py

src/calibre/ebooks/lrf/web/profiles/portfolio.py

src/calibre/ebooks/lrf/web/profiles/reuters.py

src/calibre/ebooks/lrf/web/profiles/spiegelde.py

src/calibre/ebooks/lrf/web/profiles/upi.py

src/calibre/ebooks/lrf/web/profiles/usatoday.py

src/calibre/ebooks/lrf/web/profiles/wash_post.py

src/calibre/ebooks/lrf/web/profiles/wsj.py

src/calibre/ebooks/lrf/web/profiles/zeitde.py

src/calibre/ebooks/mobi/from_any.py

src/calibre/ebooks/mobi/from_comic.py

src/calibre/ebooks/mobi/from_feeds.py

src/calibre/ebooks/mobi/palmdoc.py

src/calibre/ebooks/odt/to_oeb.py

src/calibre/ebooks/pdf/pdftrim.py

src/calibre/gui2/dialogs/epub.py

src/calibre/gui2/dialogs/epub.ui

src/calibre/gui2/dialogs/jobs.py

src/calibre/gui2/dialogs/lrf_single.py

src/calibre/gui2/dialogs/lrf_single.ui

src/calibre/gui2/dialogs/mobi.py

src/calibre/gui2/dialogs/warning.ui

src/calibre/gui2/jobs2.py

src/calibre/gui2/tags.py

src/calibre/parallel.py

src/calibre/trac/donations

src/calibre/trac/donations/server.py

src/calibre/utils/fontconfig.py

src/calibre/utils/single_qt_application.py

src/calibre/web/feeds/main.py

files modified:
.pydevproject

debian/changelog

debian/control

debian/copyright

debian/patches/remove_postinstall.patch

debian/patches/series

debian/rules

installer/linux/freeze.py

installer/osx/freeze.py

installer/windows/build_installer.py

installer/windows/calibre/calibre.mpi

installer/windows/freeze.py

setup.py

src/calibre/__init__.py

src/calibre/constants.py

src/calibre/customize/__init__.py

src/calibre/customize/builtins.py

src/calibre/customize/ui.py

src/calibre/debug.py

src/calibre/devices/__init__.py

src/calibre/devices/bebook/driver.py

src/calibre/devices/blackberry/driver.py

src/calibre/devices/cybookg3/driver.py

src/calibre/devices/eb600/driver.py *

src/calibre/devices/interface.py

src/calibre/devices/jetbook/driver.py

src/calibre/devices/kindle/driver.py *

src/calibre/devices/libusb.py

src/calibre/devices/prs500/books.py

src/calibre/devices/prs500/cli/main.py

src/calibre/devices/prs500/driver.py *

src/calibre/devices/prs500/prstypes.py

src/calibre/devices/prs505/books.py

src/calibre/devices/prs505/driver.py

src/calibre/devices/prs700/driver.py

src/calibre/devices/usbms/books.py

src/calibre/devices/usbms/device.py

src/calibre/devices/usbms/driver.py

src/calibre/ebooks/__init__.py

src/calibre/ebooks/chardet/__init__.py

src/calibre/ebooks/epub/__init__.py

src/calibre/ebooks/epub/pages.py

src/calibre/ebooks/lit/reader.py

src/calibre/ebooks/lit/writer.py

src/calibre/ebooks/lrf/__init__.py

src/calibre/ebooks/lrf/html/convert_from.py

src/calibre/ebooks/lrf/lrs/convert_from.py

src/calibre/ebooks/lrf/meta.py

src/calibre/ebooks/lrf/objects.py

src/calibre/ebooks/lrf/tags.py

src/calibre/ebooks/metadata/__init__.py

src/calibre/ebooks/metadata/epub.py

src/calibre/ebooks/metadata/fb2.py

src/calibre/ebooks/metadata/fetch.py

src/calibre/ebooks/metadata/google_books.py

src/calibre/ebooks/metadata/html.py

src/calibre/ebooks/metadata/imp.py

src/calibre/ebooks/metadata/isbndb.py

src/calibre/ebooks/metadata/lit.py

src/calibre/ebooks/metadata/lrx.py

src/calibre/ebooks/metadata/meta.py

src/calibre/ebooks/metadata/mobi.py

src/calibre/ebooks/metadata/ncx.xml

src/calibre/ebooks/metadata/odt.py

src/calibre/ebooks/metadata/opf.py

src/calibre/ebooks/metadata/opf.xml

src/calibre/ebooks/metadata/opf2.py

src/calibre/ebooks/metadata/pdf.py

src/calibre/ebooks/metadata/rb.py

src/calibre/ebooks/metadata/rtf.py

src/calibre/ebooks/metadata/toc.py

src/calibre/ebooks/mobi/langcodes.py

src/calibre/ebooks/mobi/mobiml.py

src/calibre/ebooks/mobi/reader.py

src/calibre/ebooks/mobi/writer.py

src/calibre/ebooks/oeb/base.py

src/calibre/ebooks/oeb/stylizer.py

src/calibre/ebooks/oeb/transforms/__init__.py

src/calibre/ebooks/oeb/transforms/flatcss.py

src/calibre/ebooks/oeb/transforms/htmltoc.py

src/calibre/ebooks/oeb/transforms/manglecase.py

src/calibre/ebooks/oeb/transforms/rasterize.py

src/calibre/ebooks/oeb/transforms/trimmanifest.py

src/calibre/ebooks/rtf2xml/ParseRtf.py

src/calibre/ebooks/rtf2xml/pict.py

src/calibre/gui2/__init__.py

src/calibre/gui2/add.py

src/calibre/gui2/device.py

src/calibre/gui2/dialogs/comicconf.ui

src/calibre/gui2/dialogs/config.py

src/calibre/gui2/dialogs/config.ui

src/calibre/gui2/dialogs/fetch_metadata.py

src/calibre/gui2/dialogs/jobs.ui

src/calibre/gui2/dialogs/metadata_bulk.py

src/calibre/gui2/dialogs/metadata_bulk.ui

src/calibre/gui2/dialogs/metadata_single.py

src/calibre/gui2/dialogs/metadata_single.ui

src/calibre/gui2/dialogs/progress.py

src/calibre/gui2/dialogs/scheduler.py

src/calibre/gui2/dialogs/scheduler.ui

src/calibre/gui2/dialogs/tag_editor.py

src/calibre/gui2/dialogs/tag_editor.ui

src/calibre/gui2/dialogs/user_profiles.py

src/calibre/gui2/dialogs/user_profiles.ui

src/calibre/gui2/filename_pattern.ui

src/calibre/gui2/images/back.svg

src/calibre/gui2/images/forward.svg

src/calibre/gui2/library.py

src/calibre/gui2/main.py

src/calibre/gui2/main.ui

src/calibre/gui2/main_window.py

src/calibre/gui2/status.py

src/calibre/gui2/tools.py

src/calibre/gui2/viewer/config.ui

src/calibre/gui2/viewer/documentview.py

src/calibre/gui2/viewer/js.py

src/calibre/gui2/viewer/main.py

src/calibre/gui2/viewer/main.ui

src/calibre/gui2/widgets.py

src/calibre/library/__init__.py

src/calibre/library/database.py

src/calibre/library/database2.py

src/calibre/library/server.py

src/calibre/library/sqlite.py

src/calibre/library/static/calibre.png

src/calibre/libunrar.py

src/calibre/libunzip.py

src/calibre/linux.py

src/calibre/manual/custom.py

src/calibre/manual/faq.rst

src/calibre/manual/news.rst

src/calibre/manual/news_recipe.rst

src/calibre/ptempfile.py

src/calibre/trac/plugins/Changelog.py

src/calibre/trac/plugins/download.py

src/calibre/trac/plugins/templates/linux.html

src/calibre/translations/ar.po

src/calibre/translations/bg.po

src/calibre/translations/ca.po

src/calibre/translations/calibre.pot

src/calibre/translations/cs.po

src/calibre/translations/da.po

src/calibre/translations/de.po

src/calibre/translations/el.po

src/calibre/translations/es.po

src/calibre/translations/fr.po

src/calibre/translations/gl.po

src/calibre/translations/he.po

src/calibre/translations/hr.po

src/calibre/translations/hu.po

src/calibre/translations/it.po

src/calibre/translations/ja.po

src/calibre/translations/nb.po

src/calibre/translations/nds.po

src/calibre/translations/nl.po

src/calibre/translations/pl.po

src/calibre/translations/pt.po

src/calibre/translations/ro.po

src/calibre/translations/ru.po

src/calibre/translations/sk.po

src/calibre/translations/sl.po

src/calibre/translations/sv.po

src/calibre/translations/te.po

src/calibre/translations/uk.po

src/calibre/utils/config.py

src/calibre/utils/filenames.py

src/calibre/utils/lock.py

src/calibre/utils/podofo/__init__.py

src/calibre/utils/podofo/podofo.cpp

src/calibre/utils/search_query_parser.py

src/calibre/utils/terminfo.py

src/calibre/utils/windows/winutil.c

src/calibre/web/__init__.py

src/calibre/web/feeds/__init__.py

src/calibre/web/feeds/news.py

src/calibre/web/feeds/recipes/__init__.py

src/calibre/web/feeds/recipes/recipe_al_jazeera.py

src/calibre/web/feeds/recipes/recipe_azstarnet.py

src/calibre/web/feeds/recipes/recipe_barrons.py

src/calibre/web/feeds/recipes/recipe_bbc.py

src/calibre/web/feeds/recipes/recipe_clarin.py

src/calibre/web/feeds/recipes/recipe_climate_progress.py

src/calibre/web/feeds/recipes/recipe_coding_horror.py

src/calibre/web/feeds/recipes/recipe_dna.py

src/calibre/web/feeds/recipes/recipe_economist.py

src/calibre/web/feeds/recipes/recipe_elektrolese.py

src/calibre/web/feeds/recipes/recipe_espn.py

src/calibre/web/feeds/recipes/recipe_estadao.py

src/calibre/web/feeds/recipes/recipe_globe_and_mail.py

src/calibre/web/feeds/recipes/recipe_guardian.py

src/calibre/web/feeds/recipes/recipe_harpers.py

src/calibre/web/feeds/recipes/recipe_harpers_full.py

src/calibre/web/feeds/recipes/recipe_jb_online.py

src/calibre/web/feeds/recipes/recipe_linuxdevices.py

src/calibre/web/feeds/recipes/recipe_moneynews.py

src/calibre/web/feeds/recipes/recipe_new_yorker.py

src/calibre/web/feeds/recipes/recipe_newsweek.py

src/calibre/web/feeds/recipes/recipe_nytimes.py

src/calibre/web/feeds/recipes/recipe_nytimes_sub.py

src/calibre/web/feeds/recipes/recipe_o_globo.py

src/calibre/web/feeds/recipes/recipe_san_fran_chronicle.py

src/calibre/web/feeds/recipes/recipe_scott_hanselman.py

src/calibre/web/feeds/recipes/recipe_stackoverflow.py

src/calibre/web/feeds/recipes/recipe_time_magazine.py

src/calibre/web/feeds/recipes/recipe_usatoday.py

src/calibre/web/feeds/recipes/recipe_wash_post.py

src/calibre/web/feeds/recipes/recipe_winsupersite.py

src/calibre/web/feeds/recipes/recipe_wired.py

src/calibre/web/feeds/recipes/recipe_wsj.py

src/calibre/web/feeds/recipes/recipe_zaobao.py

src/calibre/web/feeds/templates.py

src/calibre/web/fetch/simple.py

src/calibre/www/settings.py

src/calibre/www/static/img/faces/john.png

src/pyPdf/pdf.py

todo

upload.py

Show diffs side-by-side

added added

removed removed

src/calibre/ebooks/mobi/writer.py

'''

Write content to Mobipocket books.

'''

from __future__ import with_statement

__license__ = 'GPL v3'

__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'

__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam> and \

Kovid Goyal <kovid@kovidgoyal.net>'

import sys

import os

from collections import defaultdict

from itertools import count

from itertools import izip

import random

import re

from struct import pack

import time

import random

from cStringIO import StringIO

import re

from itertools import izip, count

from collections import defaultdict

from urlparse import urldefrag

import logging

from PIL import Image

from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \

OEB_RASTER_IMAGES

from calibre.ebooks.oeb.base import namespace, prefixname

from cStringIO import StringIO

from calibre.ebooks.mobi.langcodes import iana2mobi

from calibre.ebooks.mobi.mobiml import MBP_NS

from calibre.ebooks.oeb.base import OEB_DOCS

from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES

from calibre.ebooks.oeb.base import XHTML

from calibre.ebooks.oeb.base import XHTML_NS

from calibre.ebooks.oeb.base import XML_NS

from calibre.ebooks.oeb.base import namespace

from calibre.ebooks.oeb.base import prefixname

from calibre.ebooks.oeb.base import urlnormalize

from calibre.ebooks.oeb.base import Logger, OEBBook

from calibre.ebooks.oeb.profile import Context

from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener

from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer

from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer

from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder

from calibre.ebooks.oeb.transforms.manglecase import CaseMangler

from calibre.ebooks.mobi.palmdoc import compress_doc

from calibre.ebooks.mobi.langcodes import iana2mobi

from calibre.ebooks.mobi.mobiml import MBP_NS, MobiMLizer

from calibre.customize.ui import run_plugins_on_postprocess

from calibre.utils.config import Config, StringConfig

from calibre.ebooks.compression.palmdoc import compress_doc

INDEXING = True

FCIS_FLIS = True

WRITE_PBREAKS = True

# TODO:

# - Allow override CSS (?)

# - Generate index records

# - Optionally rasterize tables

EXTH_CODES = {

'description': 103,

'identifier': 104,

'subject': 105,

'pubdate': 106,

'date': 106,

'review': 107,

'contributor': 108,

MAX_THUMB_SIZE = 16 * 1024

MAX_THUMB_DIMEN = (180, 240)

TAGX = {

'chapter' :

'\x00\x00\x00\x01\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x00\x00\x00\x01',

'subchapter' :

'\x00\x00\x00\x01\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x05\x01\x10\x00\x15\x01\x10\x00\x16\x01\x20\x00\x17\x01\x40\x00\x00\x00\x00\x01',

'periodical' :

'\x00\x00\x00\x02\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x05\x01\x10\x00\x15\x01\x20\x00\x16\x01\x40\x00\x17\x01\x80\x00\x00\x00\x00\x01\x45\x01\x01\x00\x46\x01\x02\x00\x47\x01\x04\x00\x00\x00\x00\x01',

'secondary_book':'\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00\x00\x01',

'secondary_periodical':'\x00\x00\x00\x01\x01\x01\x01\x00\x0b\x03\x02\x00\x00\x00\x00\x01'

}

INDXT = {

'chapter' : '\x0f',

'subchapter' : '\x1f',

'article' : '\x3f',

'chapter with subchapters': '\x6f',

'periodical' : '\xdf',

'section' : '\xff',

}

def encode(data):

return data.encode('utf-8')

DECINT_FORWARD = 0

DECINT_BACKWARD = 1

def decint(value, direction):

# Encode vwi

bytes = []

while True:

b = value & 0x7f

105

bytes[-1] |= 0x80

106

return ''.join(chr(b) for b in reversed(bytes))

107

108

109

def align_block(raw, multiple=4, pad='\0'):

110

extra = len(raw) % multiple

111

if extra == 0: return raw

112

return raw + pad*(multiple - extra)

113

114

115

def rescale_image(data, maxsizeb, dimen=None):

116

image = Image.open(StringIO(data))

117

format = image.format

133

159

class Serializer(object):

134

160

NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}

135

161

136

def __init__(self, oeb, images):

162

def __init__(self, oeb, images, write_page_breaks_after_item=True):

137

163

self.oeb = oeb

138

164

self.images = images

139

165

self.logger = oeb.logger

166

self.write_page_breaks_after_item = write_page_breaks_after_item

140

167

self.id_offsets = {}

141

168

self.href_offsets = defaultdict(list)

142

169

self.breaks = []

163

190

path = urldefrag(ref.href)[0]

164

191

if hrefs[path].media_type not in OEB_DOCS:

165

192

continue

193

166

194

buffer.write('<reference type="')

167

self.serialize_text(ref.type, quot=True)

195

if ref.type.startswith('other.') :

196

self.serialize_text(ref.type.replace('other.',''), quot=True)

197

else :

198

self.serialize_text(ref.type, quot=True)

168

199

buffer.write('" ')

169

200

if ref.title is not None:

170

201

buffer.write('title="')

173

204

self.serialize_href(ref.href)

174

205

# Space required or won't work, I kid you not

175

206

buffer.write(' />')

207

176

208

buffer.write('</guide>')

177

209

178

210

def serialize_href(self, href, base=None):

186

218

item = hrefs[path] if path else None

187

219

if item and item.spine_position is None:

188

220

return False

189

path = item.href if item else base.href

221

path = item.href if item else base.href

190

222

href = '#'.join((path, frag)) if frag else path

191

223

buffer.write('filepos=')

192

224

self.href_offsets[href].append(buffer.tell())

197

229

buffer = self.buffer

198

230

self.anchor_offset = buffer.tell()

199

231

buffer.write('<body>')

232

self.anchor_offset_kindle = buffer.tell()

200

233

# CybookG3 'Start Reading' link

201

234

if 'text' in self.oeb.guide:

202

235

href = self.oeb.guide['text'].href

211

244

212

245

def serialize_item(self, item):

213

246

buffer = self.buffer

214

#buffer.write('<mbp:section>')

215

247

if not item.linear:

216

248

self.breaks.append(buffer.tell() - 1)

217

249

self.id_offsets[item.href] = buffer.tell()

218

250

for elem in item.data.find(XHTML('body')):

219

251

self.serialize_elem(elem, item)

220

#buffer.write('</mbp:section>')

221

buffer.write('<mbp:pagebreak/>')

252

if self.write_page_breaks_after_item:

253

buffer.write('<mbp:pagebreak/>')

222

254

223

255

def serialize_elem(self, elem, item, nsrmap=NSRMAP):

224

256

buffer = self.buffer

225

257

if not isinstance(elem.tag, basestring) \

226

or namespace(elem.tag) not in nsrmap:

227

return

258

or namespace(elem.tag) not in nsrmap:

259

return

228

260

tag = prefixname(elem.tag, nsrmap)

229

261

# Previous layers take care of @name

230

262

id = elem.attrib.pop('id', None)

233

265

offset = self.anchor_offset or buffer.tell()

234

266

self.id_offsets[href] = offset

235

267

if self.anchor_offset is not None and \

236

tag == 'a' and not elem.attrib and \

237

not len(elem) and not elem.text:

238

return

268

tag == 'a' and not elem.attrib and \

269

not len(elem) and not elem.text:

270

return

239

271

self.anchor_offset = buffer.tell()

240

272

buffer.write('<')

241

273

buffer.write(tag)

293

325

buffer.write('%010d' % ioff)

294

326

295

327

328

296

329

class MobiWriter(object):

297

330

COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')

298

331

299

def __init__(self, compression=None, imagemax=None,

300

prefer_author_sort=False):

332

def __init__(self, opts, compression=PALMDOC, imagemax=None,

333

prefer_author_sort=False, write_page_breaks_after_item=True):

334

self.opts = opts

335

self.write_page_breaks_after_item = write_page_breaks_after_item

301

336

self._compression = compression or UNCOMPRESSED

302

337

self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE

303

338

self._prefer_author_sort = prefer_author_sort

304

305

def dump(self, oeb, path):

339

self._primary_index_record = None

340

self._conforming_periodical_toc = False

341

self._indexable = False

342

self._ctoc = ""

343

self._HTMLRecords = []

344

self._tbSequence = ""

345

self._MobiDoc = None

346

self._anchor_offset_kindle = 0

347

self._initialIndexRecordFound = False

348

self._firstSectionConcluded = False

349

self._currentSectionIndex = 0

350

351

@classmethod

352

def generate(cls, opts):

353

"""Generate a Writer instance from command-line options."""

354

imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None

355

prefer_author_sort = opts.prefer_author_sort

356

return cls(compression=PALMDOC, imagemax=imagemax,

357

prefer_author_sort=prefer_author_sort)

358

359

def __call__(self, oeb, path):

306

360

if hasattr(path, 'write'):

307

361

return self._dump_stream(oeb, path)

308

362

with open(path, 'w+b') as stream:

309

363

return self._dump_stream(oeb, stream)

310

364

311

def _write(self, *data):

365

def _write(self, * data):

312

366

for datum in data:

313

367

self._stream.write(datum)

314

368

327

381

def _generate_content(self):

328

382

self._map_image_names()

329

383

self._generate_text()

384

385

if INDEXING and self._indexable :

386

try:

387

self._generate_index()

388

except:

389

self._oeb.log.exception('Failed to generate index')

390

330

391

self._generate_images()

331

392

332

393

def _map_image_names(self):

333

394

index = 1

334

395

self._images = images = {}

396

mh_href = None

397

398

if 'masthead' in self._oeb.guide:

399

mh_href = self._oeb.guide['masthead'].href

400

images[mh_href] = 1

401

index += 1

402

335

403

for item in self._oeb.manifest.values():

336

404

if item.media_type in OEB_RASTER_IMAGES:

405

if item.href == mh_href: continue

337

406

images[item.href] = index

338

407

index += 1

339

408

367

436

text.seek(npos)

368

437

return data, overlap

369

438

439

def _generate_flat_indexed_navpoints(self):

440

# Assemble a HTMLRecordData instance for each HTML record

441

# Return True if valid, False if invalid

442

self._oeb.logger.info('Indexing flat navPoints ...')

443

444

numberOfHTMLRecords = ( self._content_length // RECORD_SIZE ) + 1

445

446

# Create a list of HTMLRecordData class instances

447

x = numberOfHTMLRecords

448

while x:

449

self._HTMLRecords.append(HTMLRecordData())

450

x -= 1

451

452

toc = self._oeb.toc

453

myIndex = 0

454

myEndingRecord = 0

455

previousOffset = 0

456

previousLength = 0

457

offset = 0

458

length = 0

459

entries = list(toc.iter())[1:]

460

461

# Get offset, length per entry

462

for (i, child) in enumerate(entries):

463

if not child.title or not child.title.strip():

464

child.title = "(none)"

465

466

if not child.title or not child.title.strip():

467

child.title = "(none)"

468

469

h = child.href

470

if h not in self._id_offsets:

471

self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...'% child.title)

472

return False

473

offset = self._id_offsets[h]

474

475

length = None

476

477

for sibling in entries[i+1:]:

478

h2 = sibling.href

479

if h2 in self._id_offsets:

480

offset2 = self._id_offsets[h2]

481

if offset2 > offset:

482

length = offset2 - offset

483

break

484

485

if length is None:

486

length = self._content_length - offset

487

488

if self.opts.verbose > 3 :

489

self._oeb.logger.info("child %03d: %s" % (i, child))

490

self._oeb.logger.info(" title: %s" % child.title)

491

self._oeb.logger.info(" depth: %d" % child.depth())

492

self._oeb.logger.info(" offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X" % (offset, length, offset + length))

493

494

# Look a gap between chapter nodes. Don't evaluate periodical or section nodes

495

if (i and child.depth() == 1 and entries[i-1].depth() == 1) :

496

if offset != previousOffset + previousLength :

497

self._oeb.log.warning("*** TOC discontinuity ***")

498

self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \

499

(i-1, entries[i-1].title, previousOffset, previousLength) )

500

self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \

501

(i, child.title, offset, previousOffset + previousLength) )

502

self._oeb.log.warning('_generate_flat_indexed_navpoints: Failed to generate index')

503

# Zero out self._HTMLRecords, return False

504

self._HTMLRecords = []

505

last_name = None

506

return False

507

508

previousOffset = offset

509

previousLength = length

510

511

# Calculate the HTML record for this entry

512

myStartingRecord = offset // RECORD_SIZE

513

514

# If no one has taken the openingNode slot, it must be us

515

if self._HTMLRecords[myStartingRecord].openingNode == -1 :

516

self._HTMLRecords[myStartingRecord].openingNode = myIndex

517

518

# Bump the node count for this HTML record

519

# Special case if we're the first so we get a true node count

520

if self._HTMLRecords[myStartingRecord].currentSectionNodeCount == -1:

521

self._HTMLRecords[myStartingRecord].currentSectionNodeCount = 1

522

else:

523

self._HTMLRecords[myStartingRecord].currentSectionNodeCount += 1

524

525

# Calculate the ending HTMLRecord of this entry

526

myEndingRecord = (offset + length) // RECORD_SIZE

527

528

if myEndingRecord > myStartingRecord :

529

interimSpanRecord = myStartingRecord + 1

530

while interimSpanRecord <= myEndingRecord :

531

self._HTMLRecords[interimSpanRecord].continuingNode = myIndex

532

self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1

533

interimSpanRecord += 1

534

if self.opts.verbose > 3 :self._oeb.logger.info(" node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \

535

(myIndex, child.title if child.title.strip() > "" else "(missing)", myStartingRecord, interimSpanRecord, offset, length) )

536

else :

537

if self.opts.verbose > 3 : self._oeb.logger.info(" node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \

538

(myIndex, child.title if child.title.strip() > "" else "(missing)", myStartingRecord, myStartingRecord, offset, length) )

539

540

last_name = "%04X" % myIndex

541

myIndex += 1

542

543

# Successfully parsed the entries

544

return True

545

546

def _generate_indexed_navpoints(self):

547

# Assemble a HTMLRecordData instance for each HTML record

548

# Return True if valid, False if invalid

549

self._oeb.logger.info('Indexing navPoints ...')

550

551

numberOfHTMLRecords = ( self._content_length // RECORD_SIZE ) + 1

552

553

# Create a list of HTMLRecordData class instances

554

x = numberOfHTMLRecords

555

while x:

556

self._HTMLRecords.append(HTMLRecordData())

557

x -= 1

558

559

toc = self._oeb.toc

560

myIndex = 0

561

myEndingRecord = 0

562

previousOffset = 0

563

previousLength = 0

564

offset = 0

565

length = 0

566

sectionChangedInRecordNumber = -1

567

sectionChangesInThisRecord = False

568

entries = list(toc.iter())[1:]

569

570

# Get offset, length per entry

571

for (firstSequentialNode, node) in enumerate(list(self._ctoc_map)) :

572

if node['klass'] != 'article' and node['klass'] != 'chapter' :

573

# Skip periodical and section entries

574

continue

575

else :

576

if self.opts.verbose > 3 :self._oeb.logger.info("\tFirst sequential node: %03d" % firstSequentialNode)

577

break

578

579

for i, child in enumerate(entries):

580

# Entries continues with a stream of section+articles, section+articles ...

581

h = child.href

582

if h not in self._id_offsets:

583

self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...'% child.title)

584

return False

585

offset = self._id_offsets[h]

586

587

length = None

588

589

for sibling in entries[i+1:]:

590

h2 = sibling.href

591

if h2 in self._id_offsets:

592

offset2 = self._id_offsets[h2]

593

if offset2 > offset:

594

length = offset2 - offset

595

break

596

597

if length is None:

598

length = self._content_length - offset

599

600

if self.opts.verbose > 3 :

601

self._oeb.logger.info("child %03d: %s" % (i, child))

602

self._oeb.logger.info(" title: %s" % child.title)

603

self._oeb.logger.info(" depth: %d" % child.depth())

604

self._oeb.logger.info(" offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X" % (offset, length, offset + length))

605

606

# Look a gap between nodes, articles/chapters only, as

607

# periodical and section lengths cover spans of articles

608

if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section':

609

if offset != previousOffset + previousLength :

610

self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***")

611

self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \

612

(i-1, entries[i-1].title, previousOffset, previousLength) )

613

self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \

614

(i, child.title, offset, previousOffset + previousLength) )

615

self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )

616

self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )

617

self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')

618

# Zero out self._HTMLRecords, return False

619

self._HTMLRecords = []

620

last_name = None

621

return False

622

623

previousOffset = offset

624

previousLength = length

625

626

# Calculate the HTML record for this entry

627

thisRecord = offset // RECORD_SIZE

628

629

# Store the current continuingNodeParent and openingNodeParent

630

if self._ctoc_map[i]['klass'] == 'article':

631

if thisRecord > 0 :

632

if sectionChangesInThisRecord : # <<<

633

self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex - 1

634

else :

635

self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex

636

637

# periodical header?

638

if self._ctoc_map[i]['klass'] == 'periodical' :

639

# INCREMENT currentSectionNode count

640

# Commented out because structured docs don't count section changes in nodeCount

641

# compensation at 948 for flat periodicals

642

# self._HTMLRecords[thisRecord].currentSectionNodeCount = 1

643

continue

644

645

# Is this node a new section?

646

if self._ctoc_map[i]['klass'] == 'section' :

647

# INCREMENT currentSectionNode count

648

# Commented out because structured docs don't count section changes in nodeCount

649

# self._HTMLRecords[thisRecord].currentSectionNodeCount += 1

650

651

# *** This should check currentSectionNumber, because content could start late

652

if thisRecord > 0:

653

sectionChangesInThisRecord = True

654

sectionChangesInRecordNumber = thisRecord

655

self._currentSectionIndex += 1

656

self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex

657

# The following node opens the nextSection

658

self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex

659

continue

660

else :

661

continue

662

663

664

# If no one has taken the openingNode slot, it must be us

665

# This could happen before detecting a section change

666

if self._HTMLRecords[thisRecord].openingNode == -1 :

667

self._HTMLRecords[thisRecord].openingNode = myIndex

668

self._HTMLRecords[thisRecord].openingNodeParent = self._currentSectionIndex

669

670

# Bump the nextSection node count while we're in the same record

671

if sectionChangedInRecordNumber == thisRecord :

672

if self._ctoc_map[i]['klass'] == 'article' :

673

if self._HTMLRecords[thisRecord].nextSectionNodeCount == -1:

674

self._HTMLRecords[thisRecord].nextSectionNodeCount = 1

675

else:

676

self._HTMLRecords[thisRecord].nextSectionNodeCount += 1

677

else :

678

# Bump the currentSectionNodeCount one last time

679

self._HTMLRecords[thisRecord].currentSectionNodeCount += 1

680

681

else :

682

# Reset the change record

683

# sectionChangedInRecordNumber = -1

684

sectionChangesInThisRecord = False

685

if self._HTMLRecords[thisRecord].currentSectionNodeCount == -1:

686

self._HTMLRecords[thisRecord].currentSectionNodeCount = 1

687

else:

688

self._HTMLRecords[thisRecord].currentSectionNodeCount += 1

689

690

# Fill in the spanning records

691

myEndingRecord = (offset + length) // RECORD_SIZE

692

if myEndingRecord > thisRecord :

693

sectionChangesInThisRecord = False

694

interimSpanRecord = thisRecord + 1

695

while interimSpanRecord <= myEndingRecord :

696

self._HTMLRecords[interimSpanRecord].continuingNode = myIndex

697

698

self._HTMLRecords[interimSpanRecord].continuingNodeParent = self._currentSectionIndex

699

self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1

700

interimSpanRecord += 1

701

702

if self.opts.verbose > 3 :self._oeb.logger.info(" node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X" % \

703

(myIndex, self._ctoc_map[i]['klass'], child.title if child.title.strip() > "" else "(missing)", thisRecord, interimSpanRecord, offset, length) )

704

else :

705

if self.opts.verbose > 3 : self._oeb.logger.info(" node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X" % \

706

(myIndex, self._ctoc_map[i]['klass'], child.title if child.title.strip() > "" else "(missing)", thisRecord, thisRecord, offset, length) )

707

708

last_name = "%04X" % myIndex

709

myIndex += 1

710

711

# Successfully parsed the entries

712

return True

713

714

715

def _generate_tbs_book(self, nrecords, lastrecord):

716

if self.opts.verbose > 3 :self._oeb.logger.info("Assembling TBS for Book: HTML record %03d of %03d" % \

717

(nrecords, lastrecord) )

718

# Variables for trailing byte sequence

719

tbsType = 0x00

720

tbSequence = ""

721

722

# Generate TBS for type 0x002 - mobi_book

723

if self._initialIndexRecordFound == False :

724

725

# Is there any indexed content yet?

726

if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 :

727

# No indexing data - write vwi length of 1 only

728

tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)

729

730

else :

731

# First indexed HTML record is a special case

732

# One or more nodes

733

self._initialIndexRecordFound = True

734

if self._HTMLRecords[nrecords].currentSectionNodeCount == 1 :

735

tbsType = 2

736

else :

737

tbsType = 6

738

739

tbSequence = decint(tbsType, DECINT_FORWARD)

740

tbSequence += decint(0x00, DECINT_FORWARD)

741

# Don't write a nodecount for opening type 2 record

742

if tbsType != 2 :

743

# Check that <> -1

744

tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)

745

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)

746

747

else :

748

# Determine tbsType for indexed HTMLRecords

749

if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 :

750

# Ending record with singleton node

751

tbsType = 2

752

753

elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 :

754

# This is a span-only record

755

tbsType = 3

756

# Zero out the nodeCount with a pre-formed vwi

757

self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80

758

759

else :

760

tbsType = 6

761

762

763

# Shift the openingNode index << 3

764

shiftedNCXEntry = self._HTMLRecords[nrecords].continuingNode << 3

765

# Add the TBS type

766

shiftedNCXEntry |= tbsType

767

768

# Assemble the TBS

769

tbSequence = decint(shiftedNCXEntry, DECINT_FORWARD)

770

tbSequence += decint(0x00, DECINT_FORWARD)

771

# Don't write a nodecount for terminating type 2 record

772

if tbsType != 2 :

773

tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)

774

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)

775

776

self._tbSequence = tbSequence

777

778

779

def _generate_tbs_flat_periodical(self, nrecords, lastrecord):

780

# Flat periodicals <0x102> have a single section for all articles

781

# Structured periodicals <0x101 | 0x103> have one or more sections with articles

782

# The first section TBS sequence is different for Flat and Structured

783

# This function is called once per HTML record

784

785

# Variables for trailing byte sequence

786

tbsType = 0x00

787

tbSequence = ""

788

789

# Generate TBS for type 0x102 - mobi_feed - flat periodical

790

if self._initialIndexRecordFound == False :

791

# Is there any indexed content yet?

792

if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 :

793

# No indexing data - write vwi length of 1 only

794

tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)

795

796

else :

797

# First indexed record: Type 6 with nodeCount only

798

self._initialIndexRecordFound = True

799

tbsType = 6

800

tbSequence = decint(tbsType, DECINT_FORWARD)

801

tbSequence += decint(0x00, DECINT_FORWARD)

802

# nodeCount = 0xDF + 0xFF + n(0x3F) - need to add 2 because we didn't count them earlier

803

tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount + 2)

804

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)

805

if self.opts.verbose > 2 :

806

self._oeb.logger.info("\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d" % \

807

(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )

808

self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)

809

810

else :

811

# An HTML record with nextSectionNumber = -1 has no section change in this record

812

# Default for flat periodicals with only one section

813

if self.opts.verbose > 2 :

814

self._oeb.logger.info("\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d" % \

815

(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )

816

self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)

817

818

# First section has different Type values

819

# Determine tbsType for HTMLRecords > 0

820

if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 :

821

# Ending record with singleton node

822

tbsType = 6

823

824

# Assemble the Type 6 TBS

825

tbSequence = decint(tbsType, DECINT_FORWARD) # Type

826

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

827

tbSequence += chr(2) # arg2 = 0x02

828

829

# Assemble arg3 - (article index +1) << 4 + flag: 1 = article spans this record

830

arg3 = self._HTMLRecords[nrecords].continuingNode

831

arg3 += 1

832

arg3 <<= 4

833

arg3 |= 0x0 #flags = 0

834

tbSequence += decint(arg3, DECINT_FORWARD) # arg3

835

836

837

# tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount

838

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len

839

840

elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 :

841

# This is a span-only record

842

tbsType = 6

843

# Zero out the nodeCount with a pre-formed vwi

844

self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80

845

846

# Assemble the Type 6 TBS

847

tbSequence = decint(tbsType, DECINT_FORWARD) # Type

848

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

849

tbSequence += chr(2) # arg2 = 0x02

850

# Assemble arg3 - article index << 3 + flag: 1 = article spans this record

851

arg3 = self._HTMLRecords[nrecords].continuingNode

852

# Add the index of the openingNodeParent to get the offset start

853

# We know that section 0 is at position 1, section 1 at index 2, etc.

854

arg3 += self._HTMLRecords[nrecords].continuingNodeParent + 1

855

arg3 <<= 4

856

arg3 |= 0x01

857

tbSequence += decint(arg3, DECINT_FORWARD) # arg3

858

tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount

859

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len

860

861

else :

862

tbsType = 7

863

# Assemble the Type 7 TBS

864

tbSequence = decint(tbsType, DECINT_FORWARD) # Type

865

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

866

tbSequence += chr(2) # arg2 = 0x02

867

tbSequence += decint(0x00, DECINT_FORWARD) # arg3 = 0x80

868

# Assemble arg4 - article index << 4 + flag: 1 = article spans this record

869

arg4 = self._HTMLRecords[nrecords].continuingNode

870

# Add the index of the openingNodeParent to get the offset start

871

# We know that section 0 is at position 1, section 1 at index 2, etc.

872

arg4 += self._HTMLRecords[nrecords].continuingNodeParent + 1

873

arg4 <<= 4

874

arg4 |= 0x04 # 4: multiple nodes

875

tbSequence += decint(arg4, DECINT_FORWARD) # arg4

876

tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount

877

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len

878

879

self._tbSequence = tbSequence

880

881

def _generate_tbs_structured_periodical(self, nrecords, lastrecord):

882

# Structured periodicals <0x101 | 0x103> have one or more sections for all articles

883

# The first section TBS sequences is different for Flat and Structured

884

# This function is called once per HTML record

885

886

# Variables for trailing byte sequence

887

tbsType = 0x00

888

tbSequence = ""

889

890

891

# Generate TBS for type 0x101/0x103 - structured periodical

892

if self._initialIndexRecordFound == False :

893

# Is there any indexed content yet?

894

if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 :

895

# No indexing data - write vwi length of 1 only

896

tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)

897

898

else :

899

self._initialIndexRecordFound = True

900

901

if self.opts.verbose > 2 :

902

self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \

903

(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )

904

self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)

905

906

# First record only

907

tbsType = 6

908

# Assemble the Type 6 TBS

909

tbSequence = decint(tbsType, DECINT_FORWARD) # Type

910

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

911

tbSequence += chr(2) # arg2 = 0x02

912

# Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record

913

arg3 = self._sectionCount # Jump over the section group

914

arg3 += 0 # First article index = 0

915

arg3 <<= 4

916

arg3 |= 0x04

917

tbSequence += decint(arg3, DECINT_FORWARD) # arg3

918

919

# Structured periodicals don't count periodical, section in nodeCount

920

#tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount - 2) # nodeCount

921

tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount

922

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len

923

else :

924

if self._firstSectionConcluded == False :

925

# Use type 6 & 7 until first section switch, then 2

926

927

if self._HTMLRecords[nrecords].nextSectionNumber == -1 :

928

# An HTML record with nextSectionNumber = -1 has no section change in this record

929

if self.opts.verbose > 2 :

930

self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \

931

(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )

932

self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)

933

934

# First section has different Type values

935

# Determine tbsType for HTMLRecords > 0

936

if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 :

937

# Ending record with singleton node

938

tbsType = 6

939

940

# Assemble the Type 6 TBS

941

tbSequence = decint(tbsType, DECINT_FORWARD) # Type

942

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

943

tbSequence += chr(2) # arg2 = 0x02

944

# Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record

945

arg3 = self._sectionCount

946

arg3 += self._HTMLRecords[nrecords].continuingNode

947

arg3 <<= 4

948

arg3 |= 0x04

949

tbSequence += decint(arg3, DECINT_FORWARD) # arg3

950

tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount

951

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len

952

953

elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 :

954

# This is a span-only record

955

tbsType = 6

956

# Zero out the nodeCount with a pre-formed vwi

957

self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80

958

959

# Assemble the Type 6 TBS

960

tbSequence = decint(tbsType, DECINT_FORWARD) # Type

961

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

962

tbSequence += chr(2) # arg2 = 0x02

963

# Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record

964

arg3 = self._sectionCount

965

arg3 += self._HTMLRecords[nrecords].continuingNode

966

arg3 <<= 4

967

arg3 |= 0x01

968

tbSequence += decint(arg3, DECINT_FORWARD) # arg3

969

tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount

970

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len

971

972

else :

973

tbsType = 7

974

# Assemble the Type 7 TBS

975

tbSequence = decint(tbsType, DECINT_FORWARD) # Type

976

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

977

tbSequence += chr(2) # arg2 = 0x02

978

tbSequence += decint(0x00, DECINT_FORWARD) # arg3 = 0x80

979

# Assemble arg4: (section jump + article index) << 4 + flag: 1 = article spans this record

980

arg4 = self._sectionCount

981

arg4 += self._HTMLRecords[nrecords].continuingNode

982

arg4 <<= 4

983

arg4 |= 0x04 # 4: multiple nodes

984

tbSequence += decint(arg4, DECINT_FORWARD) # arg4

985

tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount

986

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len

987

988

989

# Initial section switch from section 1

990

elif self._HTMLRecords[nrecords].nextSectionNumber > 0 :

991

tbsType = 3

992

993

if self.opts.verbose > 2 :

994

self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d" % \

995

(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber) )

996

self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)

997

998

tbSequence = decint(tbsType, DECINT_FORWARD) # Type

999

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

1000

tbSequence += decint(0x00, DECINT_FORWARD) # arg2 = 0x80

1001

1002

# Assemble arg3: Upper nybble: ending section index

1003

# Lower nybble = flags for next section - 0 or 1

1004

arg3 = (self._HTMLRecords[nrecords].continuingNodeParent + 1) << 4

1005

arg3Flags = 0 # 0: has nodes?

1006

arg3 |= arg3Flags

1007

tbSequence += decint(arg3, DECINT_FORWARD)

1008

1009

# Assemble arg4: Upper nybble: continuingNode << 4

1010

# Lower nybble: flag: 0 = no starting nodes from previous section

1011

# flag: 4 = starting nodes from previous section

1012

1013

sectionBase = self._HTMLRecords[nrecords].continuingNodeParent

1014

sectionDelta = self._sectionCount - sectionBase - 1

1015

articleOffset = self._HTMLRecords[nrecords].continuingNode + 1

1016

arg4 = (sectionDelta + articleOffset) << 4

1017

1018

arg4Flags = 0

1019

if self._HTMLRecords[nrecords].currentSectionNodeCount > 1 :

1020

arg4Flags = 4

1021

else :

1022

arg4Flags = 0

1023

arg4 |= arg4Flags

1024

tbSequence += decint(arg4, DECINT_FORWARD) # arg4

1025

1026

# Write optional 4a if previous section node count > 1

1027

if arg4Flags == 4 : # arg4a

1028

nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount

1029

nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue

1030

tbSequence += chr(nodeCountValue)

1031

1032

# Write article2: not completely understood

1033

arg5 = sectionDelta + articleOffset

1034

if self._HTMLRecords[nrecords].currentSectionNodeCount < 2:

1035

arg5 -= 1

1036

arg5 <<= 4

1037

arg5Flags = 8

1038

arg5 |= arg5Flags

1039

tbSequence += decint(arg5, DECINT_FORWARD) # arg5

1040

1041

# Write first article of new section

1042

#arg6 = self._sectionCount - 1 # We're now into the following section

1043

#arg6 = self._HTMLRecords[nrecords].nextSectionNumber

1044

arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode

1045

arg6 <<= 4

1046

if self._HTMLRecords[nrecords].nextSectionNodeCount > 1 :

1047

arg6Flags = 4

1048

else :

1049

arg6Flags = 0

1050

arg6 |= arg6Flags

1051

tbSequence += decint(arg6, DECINT_FORWARD) # arg5

1052

1053

# Write optional 6a if previous section node count > 1

1054

if arg6Flags == 4 : # arg4a

1055

nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount

1056

nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue

1057

tbSequence += chr(nodeCountValue)

1058

1059

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len

1060

1061

self._firstSectionConcluded = True

1062

else :

1063

# After first section switch, use types 2 and 3

1064

if self._HTMLRecords[nrecords].nextSectionNumber == -1 :

1065

if self.opts.verbose > 2 :

1066

self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \

1067

(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )

1068

self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)

1069

1070

tbsType = 2

1071

tbSequence = decint(tbsType, DECINT_FORWARD) # Type

1072

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

1073

arg2 = self._HTMLRecords[nrecords].continuingNodeParent + 1

1074

arg2 <<= 4

1075

# Add flag = 1 if there are multiple nodes in this record

1076

arg2Flags = 0

1077

if self._HTMLRecords[nrecords].currentSectionNodeCount > 0 :

1078

arg2Flags = 1

1079

arg2 |= arg2Flags

1080

tbSequence += decint(arg2, DECINT_FORWARD)

1081

1082

if arg2Flags :

1083

# Add an extra vwi 0x00

1084

tbSequence += decint(0x00, DECINT_FORWARD) # arg2Flags = 0x80

1085

1086

# arg3 - offset of continuingNode from sectionParent

1087

arg3 = self._sectionCount - self._HTMLRecords[nrecords].continuingNodeParent # Total guess

1088

arg3 += self._HTMLRecords[nrecords].continuingNode

1089

arg3 <<= 4

1090

arg3Flags = 1

1091

if self._HTMLRecords[nrecords].currentSectionNodeCount > 0 :

1092

arg3Flags = 4

1093

arg3 |= arg3Flags

1094

tbSequence += decint(arg3, DECINT_FORWARD)

1095

1096

if arg3Flags == 4 :

1097

nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount

1098

nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue

1099

tbSequence += chr(nodeCountValue)

1100

else :

1101

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

1102

1103

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len

1104

1105

else :

1106

# Section switch when section > 1

1107

tbsType = 3

1108

1109

if self.opts.verbose > 2 :

1110

self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d" % \

1111

(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber) )

1112

self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)

1113

1114

tbSequence = decint(tbsType, DECINT_FORWARD) # Type

1115

tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80

1116

tbSequence += decint(0x00, DECINT_FORWARD) # arg2 = 0x80

1117

1118

# arg3: continuingNodeParent section

1119

# Upper nybble: ending section index

1120

# Lower nybble = flags for next section - 0 or 1

1121

arg3 = (self._HTMLRecords[nrecords].continuingNodeParent + 1) << 4

1122

arg3Flags = 0 # 0: has nodes?

1123

arg3 |= arg3Flags

1124

tbSequence += decint(arg3, DECINT_FORWARD)

1125

1126

# Assemble arg4: Upper nybble: continuingNode << 4

1127

# Lower nybble: flag: 0 = no starting nodes from previous section

1128

# flag: 4 = starting nodes from previous section

1129

sectionBase = self._HTMLRecords[nrecords].continuingNodeParent

1130

sectionDelta = self._sectionCount - sectionBase - 1

1131

articleOffset = self._HTMLRecords[nrecords].continuingNode + 1

1132

arg4 = (sectionDelta + articleOffset) << 4

1133

1134

arg4Flags = 0

1135

if self._HTMLRecords[nrecords].currentSectionNodeCount > 1 :

1136

arg4Flags = 4

1137

else :

1138

arg4Flags = 0

1139

arg4 |= arg4Flags

1140

tbSequence += decint(arg4, DECINT_FORWARD) # arg4

1141

1142

# Write optional 4a if previous section node count > 1

1143

if arg4Flags == 4 : # arg4a

1144

nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount

1145

nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue

1146

tbSequence += chr(nodeCountValue)

1147

1148

# Write article2: not completely understood

1149

arg5 = sectionDelta + articleOffset

1150

if self._HTMLRecords[nrecords].currentSectionNodeCount < 2:

1151

arg5 -= 1

1152

arg5 <<= 4

1153

arg5Flags = 8

1154

arg5 |= arg5Flags

1155

tbSequence += decint(arg5, DECINT_FORWARD) # arg5

1156

1157

# Write first article of new section

1158

arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode

1159

arg6 <<= 4

1160

if self._HTMLRecords[nrecords].nextSectionNodeCount > 1 :

1161

arg6Flags = 4

1162

else :

1163

arg6Flags = 0

1164

arg6 |= arg6Flags

1165

tbSequence += decint(arg6, DECINT_FORWARD) # arg5

1166

1167

# Write optional 6a if previous section node count > 1

1168

if arg6Flags == 4 : # arg4a

1169

nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount

1170

nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue

1171

tbSequence += chr(nodeCountValue)

1172

1173

tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len

1174

1175

self._tbSequence = tbSequence

1176

1177

def _evaluate_periodical_toc(self):

1178

'''

1179

Periodical:

1180

<navMap> depth=4

1181

<navPoint class="periodical"> depth=3 1

1182

<navPoint class="section"> depth=2 1 or more

1183

<navPoint class="article"> depth=1 multiple

1184

Book:

1185

<navMap> depth=2

1186

<navPoint [class="chapter"|None]> depth=1 multiple

1187

'''

1188

toc = self._oeb.toc

1189

nodes = list(toc.iter())[1:]

1190

toc_conforms = True

1191

for (i, child) in enumerate(nodes) :

1192

if child.klass == "periodical" and child.depth() != 3 or \

1193

child.klass == "section" and child.depth() != 2 or \

1194

child.klass == "article" and child.depth() != 1 :

1195

1196

self._oeb.logger.warn('Nonconforming TOC entry: "%s" found at depth %d' % \

1197

(child.klass, child.depth()) )

1198

self._oeb.logger.warn(" <title>: '%-25.25s...' \t\tklass=%-15.15s \tdepth:%d \tplayOrder=%03d" % \

1199

(child.title, child.klass, child.depth(), child.play_order) )

1200

toc_conforms = False

1201

1202

# We also need to know that we have a pubdate or timestamp in the metadata, which the Kindle needs

1203

if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == [] :

1204

self._oeb.logger.info('metadata missing date/timestamp')

1205

toc_conforms = False

1206

1207

if not 'masthead' in self._oeb.guide :

1208

self._oeb.logger.info('mastheadImage missing from manifest')

1209

toc_conforms = False

1210

1211

self._oeb.logger.info("%s" % " TOC structure conforms" if toc_conforms else " TOC structure non-conforming")

1212

return toc_conforms

1213

1214

370

1215

def _generate_text(self):

371

1216

self._oeb.logger.info('Serializing markup content...')

372

serializer = Serializer(self._oeb, self._images)

1217

serializer = Serializer(self._oeb, self._images,

1218

write_page_breaks_after_item=self.write_page_breaks_after_item)

373

1219

breaks = serializer.breaks

374

1220

text = serializer.text

1221

self._anchor_offset_kindle = serializer.anchor_offset_kindle

1222

self._id_offsets = serializer.id_offsets

1223

self._content_length = len(text)

375

1224

self._text_length = len(text)

376

1225

text = StringIO(text)

1226

buf = []

377

1227

nrecords = 0

1228

lastrecord = (self._content_length // RECORD_SIZE )

378

1229

offset = 0

1230

379

1231

if self._compression != UNCOMPRESSED:

380

self._oeb.logger.info('Compressing markup content...')

1232

self._oeb.logger.info(' Compressing markup content...')

381

1233

data, overlap = self._read_text_record(text)

1234

1235

# Evaluate toc for conformance

1236

if self.opts.mobi_periodical :

1237

self._oeb.logger.info(' MOBI periodical specified, evaluating TOC for periodical conformance ...')

1238

self._conforming_periodical_toc = self._evaluate_periodical_toc()

1239

1240

# This routine decides whether to build flat or structured based on self._conforming_periodical_toc

1241

self._ctoc = self._generate_ctoc()

1242

1243

# Build the HTMLRecords list so we can assemble the trailing bytes sequences in the following while loop

1244

toc = self._oeb.toc

1245

entries = list(toc.iter())[1:]

1246

1247

if len(entries) :

1248

self._indexable = self._generate_indexed_navpoints()

1249

else :

1250

self._oeb.logger.info(' No entries found in TOC ...')

1251

self._indexable = False

1252

1253

if not self._indexable :

1254

self._oeb.logger.info(' Writing unindexed mobi ...')

1255

382

1256

while len(data) > 0:

383

1257

if self._compression == PALMDOC:

384

1258

data = compress_doc(data)

385

1259

record = StringIO()

386

1260

record.write(data)

387

record.write(overlap)

388

record.write(pack('>B', len(overlap)))

389

nextra = 0

390

pbreak = 0

391

running = offset

392

while breaks and (breaks[0] - offset) < RECORD_SIZE:

393

pbreak = (breaks.pop(0) - running) >> 3

394

encoded = decint(pbreak, DECINT_FORWARD)

395

record.write(encoded)

396

running += pbreak << 3

397

nextra += len(encoded)

398

lsize = 1

399

while True:

400

size = decint(nextra + lsize, DECINT_BACKWARD)

401

if len(size) == lsize:

402

break

403

lsize += 1

404

record.write(size)

1261

1262

# Marshall's utf-8 break code.

1263

if WRITE_PBREAKS :

1264

record.write(overlap)

1265

record.write(pack('>B', len(overlap)))

1266

nextra = 0

1267

pbreak = 0

1268

running = offset

1269

while breaks and (breaks[0] - offset) < RECORD_SIZE:

1270

# .pop returns item, removes it from list

1271

pbreak = (breaks.pop(0) - running) >> 3

1272

if self.opts.verbose > 2 :

1273

self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )

1274

encoded = decint(pbreak, DECINT_FORWARD)

1275

record.write(encoded)

1276

running += pbreak << 3

1277

nextra += len(encoded)

1278

lsize = 1

1279

while True:

1280

size = decint(nextra + lsize, DECINT_BACKWARD)

1281

if len(size) == lsize:

1282

break

1283

lsize += 1

1284

record.write(size)

1285

1286

# Write Trailing Byte Sequence

1287

if INDEXING and self._indexable:

1288

# Dispatch to different TBS generators based upon publication type

1289

booktype = self._MobiDoc.mobiType

1290

if booktype == 0x002 :

1291

self._generate_tbs_book(nrecords, lastrecord)

1292

elif booktype == 0x102 :

1293

self._generate_tbs_flat_periodical(nrecords, lastrecord)

1294

elif booktype == 0x101 or booktype == 0x103 :

1295

self._generate_tbs_structured_periodical(nrecords, lastrecord)

1296

else :

1297

raise NotImplementedError('Indexing for mobitype 0x%X not implemented' % booktype)

1298

1299

# Write the sequence

1300

record.write(self._tbSequence)

1301

405

1302

self._records.append(record.getvalue())

1303

buf.append(self._records[-1])

406

1304

nrecords += 1

407

1305

offset += RECORD_SIZE

408

1306

data, overlap = self._read_text_record(text)

1307

1308

if INDEXING:

1309

extra = sum(map(len, buf))%4

1310

if extra == 0:

1311

extra = 4

1312

self._records.append('\0'*(4-extra))

1313

nrecords += 1

409

1314

self._text_nrecords = nrecords

410

1315

1316

411

1317

def _generate_images(self):

412

1318

self._oeb.logger.info('Serializing images...')

413

1319

images = [(index, href) for href, index in self._images.items()]

414

1320

images.sort()

1321

self._first_image_record = None

415

1322

for _, href in images:

416

1323

item = self._oeb.manifest.hrefs[href]

417

1324

try:

420

1327

self._oeb.logger.warn('Bad image file %r' % item.href)

421

1328

continue

422

1329

self._records.append(data)

1330

if self._first_image_record is None:

1331

self._first_image_record = len(self._records)-1

1332

1333

def _generate_end_records(self):

1334

if FCIS_FLIS :

1335

# This adds the binary blobs of FLIS and FCIS, which don't seem to be necessary

1336

self._flis_number = len(self._records)

1337

self._records.append(

1338

'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+

1339

'\xff'*4)

1340

fcis = 'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'

1341

fcis += pack('>I', self._text_length)

1342

fcis += '\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'

1343

self._fcis_number = len(self._records)

1344

self._records.append(fcis)

1345

self._records.append('\xE9\x8E\x0D\x0A')

1346

1347

else :

1348

self._flis_number = len(self._records)

1349

self._records.append('\xE9\x8E\x0D\x0A')

423

1350

424

1351

def _generate_record0(self):

425

1352

metadata = self._oeb.metadata

426

1353

exth = self._build_exth()

1354

last_content_record = len(self._records) - 1

1355

1356

'''

1357

if INDEXING and self._indexable:

1358

self._generate_end_records()

1359

'''

1360

self._generate_end_records()

1361

427

1362

record0 = StringIO()

1363

# The PalmDOC Header

428

1364

record0.write(pack('>HHIHHHH', self._compression, 0,

429

self._text_length, self._text_nrecords, RECORD_SIZE, 0, 0))

1365

self._text_length,

1366

self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)

430

1367

uid = random.randint(0, 0xffffffff)

431

1368

title = str(metadata.title[0])

1369

# The MOBI Header

1370

1371

# 0x0 - 0x3

432

1372

record0.write('MOBI')

433

record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 6))

434

record0.write('\xff' * 40)

435

record0.write(pack('>I', self._text_nrecords + 1))

436

record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title)))

437

record0.write(iana2mobi(str(metadata.language[0])))

1373

1374

# 0x4 - 0x7 : Length of header

1375

# 0x8 - 0x11 : MOBI type

1376

# type meaning

1377

# 0x002 MOBI book (chapter - chapter navigation)

1378

# 0x101 News - Hierarchical navigation with sections and articles

1379

# 0x102 News feed - Flat navigation

1380

# 0x103 News magazine - same as 0x101

1381

# 0xC - 0xF : Text encoding (65001 is utf-8)

1382

# 0x10 - 0x13 : UID

1383

# 0x14 - 0x17 : Generator version

1384

1385

btype = self._MobiDoc.mobiType

1386

1387

record0.write(pack('>IIIII',

1388

0xe8, btype, 65001, uid, 6))

1389

1390

# 0x18 - 0x1f : Unknown

1391

record0.write('\xff' * 8)

1392

1393

1394

# 0x20 - 0x23 : Secondary index record

1395

if btype < 0x100 :

1396

record0.write(pack('>I', 0xffffffff))

1397

elif btype > 0x100 and self._indexable :

1398

record0.write(pack('>I', 0xffffffff if self._primary_index_record is

1399

None else self._primary_index_record+3))

1400

else :

1401

record0.write(pack('>I', 0xffffffff))

1402

1403

# 0x24 - 0x3f : Unknown

1404

record0.write('\xff' * 28)

1405

1406

# 0x40 - 0x43 : Offset of first non-text record

1407

record0.write(pack('>I',

1408

self._text_nrecords + 1))

1409

1410

# 0x44 - 0x4b : title offset, title length

1411

record0.write(pack('>II',

1412

0xe8 + 16 + len(exth), len(title)))

1413

1414

# 0x4c - 0x4f : Language specifier

1415

record0.write(iana2mobi(

1416

str(metadata.language[0])))

1417

1418

# 0x50 - 0x57 : Unknown

438

1419

record0.write('\0' * 8)

439

record0.write(pack('>II', 6, self._text_nrecords + 1))

1420

1421

# 0x58 - 0x5b : Format version

1422

# 0x5c - 0x5f : First image record number

1423

record0.write(pack('>II',

1424

6, self._first_image_record if self._first_image_record else 0))

1425

1426

# 0x60 - 0x63 : First HUFF/CDIC record number

1427

# 0x64 - 0x67 : Number of HUFF/CDIC records

1428

# 0x68 - 0x6b : First DATP record number

1429

# 0x6c - 0x6f : Number of DATP records

440

1430

record0.write('\0' * 16)

1431

1432

# 0x70 - 0x73 : EXTH flags

441

1433

record0.write(pack('>I', 0x50))

1434

1435

# 0x74 - 0x93 : Unknown

442

1436

record0.write('\0' * 32)

443

record0.write(pack('>IIII', 0xffffffff, 0xffffffff, 0, 0))

444

# The '5' is a bitmask of extra record data at the end:

1437

1438

# 0x94 - 0x97 : DRM offset

1439

# 0x98 - 0x9b : DRM count

1440

# 0x9c - 0x9f : DRM size

1441

# 0xa0 - 0xa3 : DRM flags

1442

record0.write(pack('>IIII',

1443

0xffffffff, 0xffffffff, 0, 0))

1444

1445

1446

# 0xa4 - 0xaf : Unknown

1447

record0.write('\0'*12)

1448

1449

# 0xb0 - 0xb1 : First content record number

1450

# 0xb2 - 0xb3 : last content record number

1451

# (Includes Image, DATP, HUFF, DRM)

1452

record0.write(pack('>HH', 1, last_content_record))

1453

1454

# 0xb4 - 0xb7 : Unknown

1455

record0.write('\0\0\0\x01')

1456

1457

# 0xb8 - 0xbb : FCIS record number

1458

if FCIS_FLIS :

1459

# Write these if FCIS/FLIS turned on

1460

# 0xb8 - 0xbb : FCIS record number

1461

record0.write(pack('>I', self._fcis_number))

1462

1463

# 0xbc - 0xbf : Unknown (FCIS record count?)

1464

record0.write(pack('>I', 1))

1465

1466

# 0xc0 - 0xc3 : FLIS record number

1467

record0.write(pack('>I', self._flis_number))

1468

1469

# 0xc4 - 0xc7 : Unknown (FLIS record count?)

1470

record0.write(pack('>I', 1))

1471

else :

1472

# 0xb8 - 0xbb : FCIS record number

1473

record0.write(pack('>I', 0xffffffff))

1474

1475

# 0xbc - 0xbf : Unknown (FCIS record count?)

1476

record0.write(pack('>I', 0xffffffff))

1477

1478

# 0xc0 - 0xc3 : FLIS record number

1479

record0.write(pack('>I', 0xffffffff))

1480

1481

# 0xc4 - 0xc7 : Unknown (FLIS record count?)

1482

record0.write(pack('>I', 1))

1483

1484

# 0xc8 - 0xcf : Unknown

1485

record0.write('\0'*8)

1486

1487

# 0xd0 - 0xdf : Unknown

1488

record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))

1489

1490

# 0xe0 - 0xe3 : Extra record data

1491

# Extra record data flags:

445

1492

# - 0x1: <extra multibyte bytes><size> (?)

1493

# - 0x2: <TBS indexing description of this HTML record><size> GR

446

1494

# - 0x4: <uncrossable breaks><size>

447

# Of course, the formats aren't quite the same.

448

# TODO: What the hell are the rest of these fields?

449

record0.write(pack('>IIIIIIIIIIIIIIIII',

450

0, 0, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff,

451

0, 0xffffffff, 0, 0xffffffff, 0xffffffff, 5, 0xffffffff))

1495

# GR: Use 7 for indexed files, 5 for unindexed

1496

# Setting bit 2 (0x4) disables <guide><reference type="start"> functionality

1497

1498

trailingDataFlags = 1

1499

if self._indexable :

1500

trailingDataFlags |= 2

1501

if WRITE_PBREAKS :

1502

trailingDataFlags |= 4

1503

record0.write(pack('>I', trailingDataFlags))

1504

1505

# 0xe4 - 0xe7 : Primary index record

1506

record0.write(pack('>I', 0xffffffff if self._primary_index_record is

1507

None else self._primary_index_record))

1508

452

1509

record0.write(exth)

453

1510

record0.write(title)

454

1511

record0 = record0.getvalue()

481

1538

exth.write(pack('>II', code, len(data) + 8))

482

1539

exth.write(data)

483

1540

nrecs += 1

484

if oeb.metadata.cover:

485

id = str(oeb.metadata.cover[0])

1541

if term == 'rights' :

1542

rights = unicode(oeb.metadata.rights[0]).encode('utf-8')

1543

exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))

1544

exth.write(rights)

1545

1546

# Add a publication date entry

1547

if oeb.metadata['date'] != [] :

1548

datestr = str(oeb.metadata['date'][0])

1549

elif oeb.metadata['timestamp'] != [] :

1550

datestr = str(oeb.metadata['timestamp'][0])

1551

1552

if datestr is not None:

1553

exth.write(pack('>II',EXTH_CODES['pubdate'], len(datestr) + 8))

1554

exth.write(datestr)

1555

nrecs += 1

1556

else:

1557

raise NotImplementedError("missing date or timestamp needed for mobi_periodical")

1558

1559

if oeb.metadata.cover and \

1560

unicode(oeb.metadata.cover[0]) in oeb.manifest.ids:

1561

id = unicode(oeb.metadata.cover[0])

486

1562

item = oeb.manifest.ids[id]

487

1563

href = item.href

488

1564

index = self._images[href] - 1

493

1569

if index is not None:

494

1570

exth.write(pack('>III', 0xca, 0x0c, index - 1))

495

1571

nrecs += 1

1572

496

1573

exth = exth.getvalue()

497

1574

trail = len(exth) % 4

498

1575

pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte

515

1592

516

1593

def _write_header(self):

517

1594

title = str(self._oeb.metadata.title[0])

518

title = re.sub('[^-A-Za-z0-9]+', '_', title)[:32]

1595

title = re.sub('[^-A-Za-z0-9]+', '_', title)[:31]

519

1596

title = title + ('\0' * (32 - len(title)))

520

1597

now = int(time.time())

521

1598

nrecords = len(self._records)

531

1608

for record in self._records:

532

1609

self._write(record)

533

1610

534

535

def config(defaults=None):

536

desc = _('Options to control the conversion to MOBI')

537

_profiles = list(sorted(Context.PROFILES.keys()))

538

if defaults is None:

539

c = Config('mobi', desc)

540

else:

541

c = StringConfig(defaults, desc)

542

543

mobi = c.add_group('mobipocket', _('Mobipocket-specific options.'))

544

mobi('compress', ['--compress'], default=False,

545

help=_('Compress file text using PalmDOC compression. '

546

'Results in smaller files, but takes a long time to run.'))

547

mobi('rescale_images', ['--rescale-images'], default=False,

548

help=_('Modify images to meet Palm device size limitations.'))

549

mobi('toc_title', ['--toc-title'], default=None,

550

help=_('Title for any generated in-line table of contents.'))

551

mobi('ignore_tables', ['--ignore-tables'], default=False,

552

help=_('Render HTML tables as blocks of text instead of actual '

553

'tables. This is neccessary if the HTML contains very large '

554

'or complex tables.'))

555

mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,

556

help=_('When present, use the author sorting information for '

557

'generating the Mobipocket author metadata.'))

558

profiles = c.add_group('profiles', _('Device renderer profiles. '

559

'Affects conversion of font sizes, image rescaling and rasterization '

560

'of tables. Valid profiles are: %s.') % ', '.join(_profiles))

561

profiles('source_profile', ['--source-profile'],

562

default='Browser', choices=_profiles,

563

help=_("Source renderer profile. Default is %default."))

564

profiles('dest_profile', ['--dest-profile'],

565

default='CybookG3', choices=_profiles,

566

help=_("Destination renderer profile. Default is %default."))

567

c.add_opt('encoding', ['--encoding'], default=None,

568

help=_('Character encoding for HTML files. Default is to auto detect.'))

569

return c

570

571

572

def option_parser():

573

c = config()

574

parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')

575

parser.add_option(

576

'-o', '--output', default=None,

577

help=_('Output file. Default is derived from input filename.'))

578

parser.add_option(

579

'-v', '--verbose', default=0, action='count',

580

help=_('Useful for debugging.'))

581

return parser

582

583

def oeb2mobi(opts, inpath):

584

logger = Logger(logging.getLogger('oeb2mobi'))

585

logger.setup_cli_handler(opts.verbose)

586

outpath = opts.output

587

if outpath is None:

588

outpath = os.path.basename(inpath)

589

outpath = os.path.splitext(outpath)[0] + '.mobi'

590

source = opts.source_profile

591

if source not in Context.PROFILES:

592

logger.error(_('Unknown source profile %r') % source)

593

return 1

594

dest = opts.dest_profile

595

if dest not in Context.PROFILES:

596

logger.error(_('Unknown destination profile %r') % dest)

597

return 1

598

compression = PALMDOC if opts.compress else UNCOMPRESSED

599

imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None

600

context = Context(source, dest)

601

oeb = OEBBook(inpath, logger=logger, encoding=opts.encoding)

602

tocadder = HTMLTOCAdder(title=opts.toc_title)

603

tocadder.transform(oeb, context)

604

mangler = CaseMangler()

605

mangler.transform(oeb, context)

606

fbase = context.dest.fbase

607

fkey = context.dest.fnums.values()

608

flattener = CSSFlattener(

609

fbase=fbase, fkey=fkey, unfloat=True, untable=True)

610

flattener.transform(oeb, context)

611

rasterizer = SVGRasterizer()

612

rasterizer.transform(oeb, context)

613

trimmer = ManifestTrimmer()

614

trimmer.transform(oeb, context)

615

mobimlizer = MobiMLizer(ignore_tables=opts.ignore_tables)

616

mobimlizer.transform(oeb, context)

617

writer = MobiWriter(compression=compression, imagemax=imagemax,

618

prefer_author_sort=opts.prefer_author_sort)

619

writer.dump(oeb, outpath)

620

run_plugins_on_postprocess(outpath, 'mobi')

621

logger.info(_('Output written to ') + outpath)

622

623

def main(argv=sys.argv):

624

parser = option_parser()

625

opts, args = parser.parse_args(argv[1:])

626

if len(args) != 1:

627

parser.print_help()

628

return 1

629

inpath = args[0]

630

retval = oeb2mobi(opts, inpath)

631

return retval

632

633

if __name__ == '__main__':

634

sys.exit(main())

1611

def _generate_index(self):

1612

self._oeb.log('Generating INDX ...')

1613

self._primary_index_record = None

1614

1615

# Build the NCXEntries and INDX

1616

indxt, indxt_count, indices, last_name = \

1617

self._generate_indxt(self._ctoc)

1618

1619

if last_name is None:

1620

self._oeb.log.warn('Input document has no TOC. No index generated.')

1621

return

1622

1623

# Assemble the INDX0[0] and INDX1[0] output streams

1624

indx1 = StringIO()

1625

indx1.write('INDX'+pack('>I', 0xc0)) # header length

1626

1627

# 0x8 - 0xb : Unknown

1628

indx1.write('\0'*4)

1629

1630

# 0xc - 0xf : Header type

1631

indx1.write(pack('>I', 1))

1632

1633

# 0x10 - 0x13 : Unknown

1634

indx1.write('\0'*4)

1635

1636

# 0x14 - 0x17 : IDXT offset

1637

# 0x18 - 0x1b : IDXT count

1638

indx1.write(pack('>I', 0xc0+len(indxt)))

1639

indx1.write(pack('>I', indxt_count + 1))

1640

1641

# 0x1c - 0x23 : Unknown

1642

indx1.write('\xff'*8)

1643

1644

# 0x24 - 0xbf

1645

indx1.write('\0'*156)

1646

indx1.write(indxt)

1647

indx1.write(indices)

1648

indx1 = indx1.getvalue()

1649

1650

idxt0 = chr(len(last_name)) + last_name + pack('>H', indxt_count + 1)

1651

idxt0 = align_block(idxt0)

1652

indx0 = StringIO()

1653

1654

if self._MobiDoc.mobiType == 0x002 :

1655

tagx = TAGX['chapter']

1656

else :

1657

tagx = TAGX['periodical']

1658

1659

tagx = align_block('TAGX' + pack('>I', 8 + len(tagx)) + tagx)

1660

indx0_indices_pos = 0xc0 + len(tagx) + len(idxt0)

1661

indx0_indices = align_block('IDXT' + pack('>H', 0xc0 + len(tagx)))

1662

# Generate record header

1663

header = StringIO()

1664

1665

header.write('INDX')

1666

header.write(pack('>I', 0xc0)) # header length

1667

1668

# 0x08 - 0x0b : Unknown

1669

header.write('\0'*4)

1670

1671

# 0x0c - 0x0f : Header type

1672

header.write(pack('>I', 0))

1673

1674

# 0x10 - 0x13 : Generator ID

1675

# This value may impact the position of flagBits written in

1676

# write_article_node(). Change with caution.

1677

header.write(pack('>I', 6))

1678

1679

# 0x14 - 0x17 : IDXT offset

1680

header.write(pack('>I', indx0_indices_pos))

1681

1682

# 0x18 - 0x1b : IDXT count

1683

header.write(pack('>I', 1))

1684

1685

# 0x1c - 0x1f : Text encoding ?

1686

# header.write(pack('>I', 650001))

1687

# GR: This needs to be either 0xFDE9 or 0x4E4

1688

header.write(pack('>I', 0xFDE9))

1689

1690

# 0x20 - 0x23 : Language code?

1691

header.write(iana2mobi(str(self._oeb.metadata.language[0])))

1692

1693

# 0x24 - 0x27 : Number of TOC entries in INDX1

1694

header.write(pack('>I', indxt_count + 1))

1695

1696

# 0x28 - 0x2b : ORDT Offset

1697

header.write('\0'*4)

1698

1699

# 0x2c - 0x2f : LIGT offset

1700

header.write('\0'*4)

1701

1702

# 0x30 - 0x33 : Number of LIGT entries

1703

header.write('\0'*4)

1704

1705

# 0x34 - 0x37 : Unknown

1706

header.write(pack('>I', 1))

1707

1708

# 0x38 - 0xb3 : Unknown (pad?)

1709

header.write('\0'*124)

1710

1711

# 0xb4 - 0xb7 : TAGX offset

1712

header.write(pack('>I', 0xc0))

1713

1714

# 0xb8 - 0xbf : Unknown

1715

header.write('\0'*8)

1716

1717

header = header.getvalue()

1718

1719

indx0.write(header)

1720

indx0.write(tagx)

1721

indx0.write(idxt0)

1722

indx0.write(indx0_indices)

1723

indx0 = indx0.getvalue()

1724

1725

self._primary_index_record = len(self._records)

1726

self._records.extend([indx0, indx1, self._ctoc])

1727

1728

# Indexing for author/description fields in summary section

1729

# Test for indexed periodical - only one that needs secondary index

1730

if self._MobiDoc.mobiType > 0x100 :

1731

# Write secondary index records

1732

#tagx = TAGX['secondary_'+\

1733

# ('periodical' if self.opts.mobi_periodical else 'book')]

1734

tagx = TAGX['secondary_'+'periodical']

1735

tagx_len = 8 + len(tagx)

1736

1737

# generate secondary INDX0

1738

indx0 = StringIO()

1739

indx0.write('INDX'+pack('>I', 0xc0)+'\0'*8) # header + 8x00

1740

indx0.write(pack('>I', 0x06)) # generator ID

1741

indx0.write(pack('>I', 0xe8)) # IDXT offset

1742

indx0.write(pack('>I', 1)) # IDXT entries

1743

indx0.write(pack('>I', 65001)) # encoding

1744

indx0.write('\xff'*4) # language

1745

indx0.write(pack('>I', 4)) # IDXT Entries in INDX1

1746

indx0.write('\0'*4) # ORDT Offset

1747

indx0.write('\0'*136) # everything up to TAGX offset

1748

indx0.write(pack('>I', 0xc0)) # TAGX offset

1749

indx0.write('\0'*8) # unknowns

1750

indx0.write('TAGX'+pack('>I', tagx_len)+tagx) # TAGX

1751

indx0.write('\x0D'+'mastheadImage' + '\x00\x04') # mastheadImage

1752

indx0.write('IDXT'+'\x00\xd8\x00\x00') # offset plus pad

1753

1754

# generate secondary INDX1

1755

indx1 = StringIO()

1756

indx1.write('INDX' + pack('>I', 0xc0) + '\0'*4) # header + 4x00

1757

indx1.write(pack('>I', 1)) # blockType 1

1758

indx1.write(pack('>I', 0x00)) # unknown

1759

indx1.write('\x00\x00\x00\xF0') # IDXT offset

1760

indx1.write(pack('>I', 4)) # num of IDXT entries

1761

indx1.write('\xff'*8) # encoding, language

1762

indx1.write('\0'*(0xc0-indx1.tell())) # 00 to IDXT Entries @ 0xC0

1763

indx1.write('\0\x01\x80') # 1 - null

1764

indx1.write('\x06'+'author' + '\x02\x80\x80\xc7') # author

1765

indx1.write('\x0B'+'description' + '\x02\x80\x80\xc6') # description

1766

indx1.write('\x0D'+'mastheadImage' + '\x02\x85\x80\xc5') # mastheadImage

1767

indx1.write('IDXT'+'\x00\xc0\x00\xc3\x00\xce\x00\xde') # IDXT header

1768

1769

# Write INDX0 and INDX1 to the stream

1770

indx0, indx1 = indx0.getvalue(), indx1.getvalue()

1771

self._records.extend((indx0, indx1))

1772

if self.opts.verbose > 3:

1773

from tempfile import mkdtemp

1774

import os

1775

t = mkdtemp()

1776

for i, n in enumerate(['sindx1', 'sindx0', 'ctoc', 'indx0', 'indx1']):

1777

open(os.path.join(t, n+'.bin'), 'wb').write(self._records[-(i+1)])

1778

self._oeb.log.debug('Index records dumped to', t)

1779

1780

def _clean_text_value(self, text):

1781

if text is not None and text.strip() :

1782

text = text.strip()

1783

if not isinstance(text, unicode):

1784

text = text.decode('utf-8', 'replace')

1785

text = text.encode('utf-8')

1786

else :

1787

text = "(none)".encode('utf-8')

1788

return text

1789

1790

def _add_flat_ctoc_node(self, node, ctoc, title=None):

1791

# Process 'chapter' or 'article' nodes only, force either to 'chapter'

1792

t = node.title if title is None else title

1793

t = self._clean_text_value(t)

1794

self._last_toc_entry = t

1795

1796

# Create an empty dictionary for this node

1797

ctoc_name_map = {}

1798

1799

# article = chapter

1800

if node.klass == 'article' :

1801

ctoc_name_map['klass'] = 'chapter'

1802

else :

1803

ctoc_name_map['klass'] = node.klass

1804

1805

# Add title offset to name map

1806

ctoc_name_map['titleOffset'] = ctoc.tell()

1807

ctoc.write(decint(len(t), DECINT_FORWARD)+t)

1808

self._chapterCount += 1

1809

1810

# append this node's name_map to map

1811

self._ctoc_map.append(ctoc_name_map)

1812

1813

return

1814

1815

1816

def _add_structured_ctoc_node(self, node, ctoc, title=None):

1817

# Process 'periodical', 'section' and 'article'

1818

if node.klass is None :

1819

return

1820

t = node.title if title is None else title

1821

t = self._clean_text_value(t)

1822

self._last_toc_entry = t

1823

1824

# Create an empty dictionary for this node

1825

ctoc_name_map = {}

1826

1827

# Add the klass of this node

1828

ctoc_name_map['klass'] = node.klass

1829

1830

if node.klass == 'chapter':

1831

# Add title offset to name map

1832

ctoc_name_map['titleOffset'] = ctoc.tell()

1833

ctoc.write(decint(len(t), DECINT_FORWARD)+t)

1834

self._chapterCount += 1

1835

1836

elif node.klass == 'periodical' :

1837

# Add title offset

1838

ctoc_name_map['titleOffset'] = ctoc.tell()

1839

ctoc.write(decint(len(t), DECINT_FORWARD)+t)

1840

1841

# Look for existing class entry 'periodical' in _ctoc_map

1842

for entry in self._ctoc_map:

1843

if entry['klass'] == 'periodical':

1844

# Use the pre-existing instance

1845

ctoc_name_map['classOffset'] = entry['classOffset']

1846

break

1847

else :

1848

continue

1849

else:

1850

ctoc_name_map['classOffset'] = ctoc.tell()

1851

ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)

1852

1853

self._periodicalCount += 1

1854

1855

elif node.klass == 'section' :

1856

# Add title offset

1857

ctoc_name_map['titleOffset'] = ctoc.tell()

1858

ctoc.write(decint(len(t), DECINT_FORWARD)+t)

1859

1860

# Look for existing class entry 'section' in _ctoc_map

1861

for entry in self._ctoc_map:

1862

if entry['klass'] == 'section':

1863

# Use the pre-existing instance

1864

ctoc_name_map['classOffset'] = entry['classOffset']

1865

break

1866

else :

1867

continue

1868

else:

1869

ctoc_name_map['classOffset'] = ctoc.tell()

1870

ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)

1871

1872

self._sectionCount += 1

1873

1874

elif node.klass == 'article' :

1875

# Add title offset/title

1876

ctoc_name_map['titleOffset'] = ctoc.tell()

1877

ctoc.write(decint(len(t), DECINT_FORWARD)+t)

1878

1879

# Look for existing class entry 'article' in _ctoc_map

1880

for entry in self._ctoc_map:

1881

if entry['klass'] == 'article':

1882

ctoc_name_map['classOffset'] = entry['classOffset']

1883

break

1884

else :

1885

continue

1886

else:

1887

ctoc_name_map['classOffset'] = ctoc.tell()

1888

ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)

1889

1890

# Add description offset/description

1891

if node.description :

1892

d = self._clean_text_value(node.description)

1893

ctoc_name_map['descriptionOffset'] = ctoc.tell()

1894

ctoc.write(decint(len(d), DECINT_FORWARD)+d)

1895

else :

1896

ctoc_name_map['descriptionOffset'] = None

1897

1898

# Add author offset/description

1899

if node.author :

1900

a = self._clean_text_value(node.author)

1901

ctoc_name_map['authorOffset'] = ctoc.tell()

1902

ctoc.write(decint(len(a), DECINT_FORWARD)+a)

1903

else :

1904

ctoc_name_map['authorOffset'] = None

1905

1906

self._articleCount += 1

1907

1908

else :

1909

raise NotImplementedError( \

1910

'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \

1911

(node.title, node.klass, node.play_order))

1912

1913

# append this node's name_map to map

1914

self._ctoc_map.append(ctoc_name_map)

1915

1916

def _generate_ctoc(self):

1917

# Generate the compiled TOC strings

1918

# Each node has 1-4 CTOC entries:

1919

# Periodical (0xDF)

1920

# title, class

1921

# Section (0xFF)

1922

# title, class

1923

# Article (0x3F)

1924

# title, class, description, author

1925

# Chapter (0x0F)

1926

# title, class

1927

# nb: Chapters don't actually have @class, so we synthesize it

1928

# in reader._toc_from_navpoint

1929

1930

toc = self._oeb.toc

1931

reduced_toc = []

1932

self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets

1933

self._last_toc_entry = None

1934

ctoc = StringIO()

1935

1936

# Track the individual node types

1937

self._periodicalCount = 0

1938

self._sectionCount = 0

1939

self._articleCount = 0

1940

self._chapterCount = 0

1941

1942

first = True

1943

1944

if self._conforming_periodical_toc :

1945

self._oeb.logger.info('Generating structured CTOC ...')

1946

for (child) in toc.iter():

1947

if self.opts.verbose > 2 :

1948

self._oeb.logger.info(" %s" % child)

1949

self._add_structured_ctoc_node(child, ctoc)

1950

first = False

1951

else :

1952

self._oeb.logger.info('Generating flat CTOC ...')

1953

previousOffset = -1

1954

currentOffset = 0

1955

for (i, child) in enumerate(toc.iterdescendants()):

1956

# Only add chapters or articles at depth==1

1957

# no class defaults to 'chapter'

1958

if child.klass is None : child.klass = 'chapter'

1959

if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :

1960

if self.opts.verbose > 2 :

1961

self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \

1962

(child.klass, child.depth(), child) )

1963

1964

# Test to see if this child's offset is the same as the previous child's

1965

# offset, skip it

1966

h = child.href

1967

first = False

1968

1969

if h is None:

1970

self._oeb.logger.warn(' Ignoring TOC entry with no href:',

1971

child.title)

1972

continue

1973

if h not in self._id_offsets:

1974

self._oeb.logger.warn(' Ignoring missing TOC entry:',

1975

child)

1976

continue

1977

1978

currentOffset = self._id_offsets[h]

1979

# print "_generate_ctoc: child offset: 0x%X" % currentOffset

1980

1981

if currentOffset != previousOffset :

1982

self._add_flat_ctoc_node(child, ctoc)

1983

reduced_toc.append(child)

1984

previousOffset = currentOffset

1985

else :

1986

self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))

1987

1988

else :

1989

if self.opts.verbose > 2 :

1990

self._oeb.logger.info("skipping class: %s depth %d at position %d" % \

1991

(child.klass, child.depth(),i))

1992

1993

# Update the TOC with our edited version

1994

self._oeb.toc.nodes = reduced_toc

1995

1996

# Instantiate a MobiDocument(mobitype)

1997

if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \

1998

not self.opts.mobi_periodical :

1999

mobiType = 0x002

2000

elif self._periodicalCount and self._sectionCount == 1 :

2001

mobiType = 0x102

2002

elif self._periodicalCount and self._sectionCount > 1 :

2003

pt = None

2004

if self._oeb.metadata.publication_type:

2005

x = unicode(self._oeb.metadata.publication_type[0]).split(':')

2006

if len(x) > 1:

2007

pt = x[1]

2008

mobiType = {'newspaper':0x101}.get(pt, 0x103)

2009

else :

2010

raise NotImplementedError('_generate_ctoc: Unrecognized document structured')

2011

2012

self._MobiDoc = MobiDocument(mobiType)

2013

2014

if self.opts.verbose > 2 :

2015

structType = 'book'

2016

if mobiType > 0x100 :

2017

structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'

2018

self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )

2019

if mobiType > 0x100 :

2020

self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \

2021

(self._periodicalCount, self._sectionCount, self._articleCount) )

2022

else :

2023

self._oeb.logger.info("chapterCount: %d" % self._chapterCount)

2024

2025

return align_block(ctoc.getvalue())

2026

2027

def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :

2028

pos = 0xc0 + indxt.tell()

2029

indices.write(pack('>H', pos)) # Save the offset for IDXTIndices

2030

name = "%04X"%count

2031

indxt.write(chr(len(name)) + name) # Write the name

2032

indxt.write(INDXT['periodical']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]

2033

indxt.write(chr(1)) # subType 1

2034

indxt.write(decint(offset, DECINT_FORWARD)) # offset

2035

indxt.write(decint(length, DECINT_FORWARD)) # length

2036

indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX

2037

2038

indxt.write(decint(0, DECINT_FORWARD)) # unknown byte

2039

2040

indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX

2041

indxt.write(decint(firstSection, DECINT_FORWARD)) # first section in periodical

2042

indxt.write(decint(lastSection, DECINT_FORWARD)) # first section in periodical

2043

2044

indxt.write(decint(0, DECINT_FORWARD)) # 0x80

2045

2046

def _write_section_node(self, indxt, indices, myCtocMapIndex, index, offset, length, count, firstArticle, lastArticle, parentIndex) :

2047

pos = 0xc0 + indxt.tell()

2048

indices.write(pack('>H', pos)) # Save the offset for IDXTIndices

2049

name = "%04X"%count

2050

indxt.write(chr(len(name)) + name) # Write the name

2051

indxt.write(INDXT['section']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]

2052

indxt.write(chr(0)) # subType 0

2053

indxt.write(decint(offset, DECINT_FORWARD)) # offset

2054

indxt.write(decint(length, DECINT_FORWARD)) # length

2055

indxt.write(decint(self._ctoc_map[myCtocMapIndex]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX

2056

2057

indxt.write(decint(1, DECINT_FORWARD)) # unknown byte

2058

2059

indxt.write(decint(self._ctoc_map[myCtocMapIndex]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX

2060

indxt.write(decint(parentIndex, DECINT_FORWARD)) # index of periodicalParent

2061

indxt.write(decint(firstArticle, DECINT_FORWARD)) # first section in periodical

2062

indxt.write(decint(lastArticle, DECINT_FORWARD)) # first section in periodical

2063

2064

def _write_article_node(self, indxt, indices, index, offset, length, count, parentIndex) :

2065

pos = 0xc0 + indxt.tell()

2066

indices.write(pack('>H', pos)) # Save the offset for IDXTIndices

2067

name = "%04X"%count

2068

indxt.write(chr(len(name)) + name) # Write the name

2069

indxt.write(INDXT['article']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]

2070

2071

hasAuthor = True if self._ctoc_map[index]['authorOffset'] else False

2072

hasDescription = True if self._ctoc_map[index]['descriptionOffset'] else False

2073

2074

# flagBits may be dependent upon the generatorID written at 0x10 in generate_index().

2075

# in INDX0. Mobigen uses a generatorID of 2 and writes these bits at positions 1 & 2;

2076

# calibre uses a generatorID of 6 and writes the bits at positions 2 & 3.

2077

flagBits = 0

2078

if hasAuthor : flagBits |= 0x4

2079

if hasDescription : flagBits |= 0x2

2080

indxt.write(pack('>B',flagBits)) # Author/description flags

2081

indxt.write(decint(offset, DECINT_FORWARD)) # offset

2082

2083

2084

indxt.write(decint(length, DECINT_FORWARD)) # length

2085

indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX

2086

2087

indxt.write(decint(2, DECINT_FORWARD)) # unknown byte

2088

2089

indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX

2090

indxt.write(decint(parentIndex, DECINT_FORWARD)) # index of periodicalParent

2091

2092

# Optionally write the author and description fields

2093

descriptionOffset = self._ctoc_map[index]['descriptionOffset']

2094

if descriptionOffset :

2095

indxt.write(decint(descriptionOffset, DECINT_FORWARD))

2096

2097

authorOffset = self._ctoc_map[index]['authorOffset']

2098

if authorOffset :

2099

indxt.write(decint(authorOffset, DECINT_FORWARD))

2100

2101

def _write_chapter_node(self, indxt, indices, index, offset, length, count):

2102

# Writes an INDX1 NCXEntry of entryType 0x0F - chapter

2103

if self.opts.verbose > 2:

2104

# *** GR: Turn this off while I'm developing my code

2105

#self._oeb.log.debug('Writing TOC node to IDXT:', node.title, 'href:', node.href)

2106

pass

2107

2108

pos = 0xc0 + indxt.tell()

2109

indices.write(pack('>H', pos)) # Save the offset for IDXTIndices

2110

name = "%04X"%count

2111

indxt.write(chr(len(name)) + name) # Write the name

2112

indxt.write(INDXT['chapter']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]

2113

indxt.write(decint(offset, DECINT_FORWARD)) # offset

2114

indxt.write(decint(length, DECINT_FORWARD)) # length

2115

indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX

2116

indxt.write(decint(0, DECINT_FORWARD)) # unknown byte

2117

2118

def _compute_offset_length(self, i, node, entries) :

2119

h = node.href

2120

if h not in self._id_offsets:

2121

self._oeb.log.warning('Could not find TOC entry:', node.title)

2122

return -1, -1

2123

2124

offset = self._id_offsets[h]

2125

length = None

2126

# Calculate length based on next entry's offset

2127

for sibling in entries[i+1:]:

2128

h2 = sibling.href

2129

if h2 in self._id_offsets:

2130

offset2 = self._id_offsets[h2]

2131

if offset2 > offset:

2132

length = offset2 - offset

2133

break

2134

if length is None:

2135

length = self._content_length - offset

2136

return offset, length

2137

2138

def _establish_document_structure(self) :

2139

documentType = None

2140

try :

2141

klass = self._ctoc_map[0]['klass']

2142

except :

2143

klass = None

2144

2145

if klass == 'chapter' or klass == None :

2146

documentType = 'book'

2147

if self.opts.verbose > 2 :

2148

self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")

2149

self._MobiDoc.documentStructure = MobiBook()

2150

2151

elif klass == 'periodical' :

2152

documentType = klass

2153

if self.opts.verbose > 2 :

2154

self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")

2155

self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())

2156

self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle

2157

else :

2158

raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)

2159

return documentType

2160

2161

def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc ) :

2162

sectionTitles = list(child.iter())[1:]

2163

sectionIndices = []

2164

sectionParents = []

2165

for (j, section) in enumerate(sectionTitles):

2166

# iterate over just the sections

2167

2168

if section.klass == 'periodical' :

2169

# Write our index to the list

2170

sectionIndices.append(currentSection)

2171

2172

if self.opts.verbose > 3 :

2173

self._oeb.logger.info("Periodical: %15.15s \tkls:%s \tdpt:%d ply:%03d" % \

2174

(section.title, section.klass, section.depth(), section.play_order) )

2175

2176

elif section.klass == 'section' :

2177

# Add sections, save in list with original sequence number

2178

myNewSection = myPeriodical.addSectionParent(myDoc, j)

2179

sectionParents.append(myNewSection)

2180

2181

# Bump the section #

2182

currentSection += 1

2183

# Write our index to the list

2184

sectionIndices.append(currentSection)

2185

2186

if self.opts.verbose > 3 :

2187

self._oeb.logger.info(" Section: %15.15s \tkls:%s \tdpt:%d ply:%03d \tindex:%d" % \

2188

(section.title, section.klass, section.depth(), section.play_order,j) )

2189

2190

elif section.klass == 'article' :

2191

# Write our index to the list

2192

sectionIndices.append(currentSection)

2193

2194

else :

2195

if self.opts.verbose > 3 :

2196

self._oeb.logger.info( " Unrecognized class %s in structured document" % section.klass)

2197

return sectionIndices, sectionParents

2198

2199

2200

def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents):

2201

sectionArticles = list(section.iter())[1:]

2202

# Iterate over the section's articles

2203

2204

for (j, article) in enumerate(sectionArticles):

2205

# Recompute offset and length for each article

2206

offset, length = self._compute_offset_length(i, article, entries)

2207

if self.opts.verbose > 2 :

2208

self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) )

2209

2210

ctoc_map_index = i + j + 1

2211

2212

#hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset')

2213

#hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset')

2214

mySectionParent = sectionParents[sectionIndices[i-1]]

2215

myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )

2216

mySectionParent.addArticle( myNewArticle )

2217

2218

2219

def _add_book_chapters(self, myDoc, indxt, indices):

2220

chapterCount = myDoc.documentStructure.chapterCount()

2221

if self.opts.verbose > 3 :

2222

self._oeb.logger.info("Writing %d chapters for mobitype 0x%03X" % (chapterCount, myDoc.mobiType))

2223

2224

for (c, chapter) in enumerate(list(myDoc.documentStructure.chapters)) :

2225

index = chapter.myCtocMapIndex

2226

self._write_chapter_node(indxt, indices, index, chapter.startAddress, chapter.length, c)

2227

2228

last_name = "%04X"%c # Returned when done

2229

return last_name, c

2230

2231

def _add_periodical_flat_articles(self, myDoc, indxt, indices):

2232

sectionParent = myDoc.documentStructure.sectionParents[0]

2233

articleCount = len(sectionParent.articles)

2234

if self.opts.verbose > 3 :

2235

self._oeb.logger.info("Writing %d articles for mobitype 0x%03X" % (articleCount, myDoc.mobiType))

2236

2237

# Singleton periodical

2238

index = 0

2239

offset = myDoc.documentStructure.startAddress

2240

length = myDoc.documentStructure.length

2241

c = 0

2242

firstSection = myDoc.documentStructure.firstSectionIndex

2243

lastSection = myDoc.documentStructure.lastSectionIndex

2244

self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection)

2245

2246

# Singleton section

2247

index += 1

2248

offset = sectionParent.startAddress

2249

length = sectionParent.sectionLength

2250

c += 1

2251

firstArticle = sectionParent.firstArticleIndex

2252

lastArticle = sectionParent.lastArticleIndex

2253

parentIndex = sectionParent.parentIndex

2254

self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, index, offset, length, c, firstArticle, lastArticle, parentIndex)

2255

2256

last_name = "%04X"%c

2257

2258

# articles

2259

for (i, article) in enumerate(list(sectionParent.articles)) :

2260

index = article.myCtocMapIndex

2261

offset = article.startAddress

2262

length = article.articleLength

2263

c += 1

2264

parentIndex = article.sectionParentIndex

2265

self._write_article_node(indxt, indices, index, offset, length, c, parentIndex)

2266

2267

last_name = "%04X" % c

2268

return last_name, c

2269

2270

def _add_periodical_structured_articles(self, myDoc, indxt, indices):

2271

# Write NCXEntries for Structured Periodical

2272

# <periodical>

2273

# <section>

2274

# <section> ...

2275

# <article>

2276

# <article> ...

2277

2278

if self.opts.verbose > 2 :

2279

self._oeb.logger.info( "Writing NCXEntries for mobiType 0x%03X" % myDoc.mobiType)

2280

2281

sectionParent = myDoc.documentStructure.sectionParents[0]

2282

articleCount = len(sectionParent.articles)

2283

2284

# Write opening periodical 0xDF entry

2285

index = 0

2286

offset = myDoc.documentStructure.startAddress

2287

length = myDoc.documentStructure.length

2288

c = 0

2289

firstSection = myDoc.documentStructure.firstSectionIndex

2290

lastSection = myDoc.documentStructure.lastSectionIndex

2291

self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection)

2292

2293

# Write each section 0xFF entry

2294

sectionCount = firstSection

2295

while sectionCount <= lastSection :

2296

# section

2297

sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1]

2298

articleCount = len(sectionParent.articles)

2299

#index += 1

2300

offset = sectionParent.startAddress

2301

length = sectionParent.sectionLength

2302

c += 1

2303

firstArticle = sectionParent.firstArticleIndex

2304

lastArticle = sectionParent.lastArticleIndex

2305

parentIndex = sectionParent.parentIndex

2306

self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, sectionCount, offset, length, c, firstArticle, lastArticle, parentIndex)

2307

sectionCount += 1

2308

2309

# Write each article 0x3F entry

2310

sectionCount = firstSection

2311

while sectionCount <= lastSection :

2312

# section

2313

sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1]

2314

# articleCount = len(sectionParent.articles)

2315

# index += 1

2316

# offset = sectionParent.startAddress

2317

# length = sectionParent.sectionLength

2318

# c += 1

2319

# firstArticle = sectionParent.firstArticleIndex

2320

# lastArticle = sectionParent.lastArticleIndex

2321

# parentIndex = sectionParent.parentIndex

2322

# add_section_node(index, offset, length, c, firstArticle, lastArticle, parentIndex)

2323

2324

last_name = "%04X"%c

2325

2326

# articles

2327

for (i, article) in enumerate(list(sectionParent.articles)) :

2328

if self.opts.verbose > 3 :

2329

self._oeb.logger.info( "Adding section:article %d:%02d" % \

2330

(sectionParent.myIndex, i))

2331

index = article.myCtocMapIndex

2332

offset = article.startAddress

2333

length = article.articleLength

2334

c += 1

2335

parentIndex = article.sectionParentIndex

2336

self._write_article_node(indxt, indices, index, offset, length, c, parentIndex)

2337

2338

last_name = "%04X"%c

2339

2340

sectionCount += 1

2341

2342

return last_name, c

2343

2344

def _generate_indxt(self, ctoc):

2345

# Assumption: child.depth() represents nestedness of the TOC.

2346

# A flat document (book) has a depth of 2:

2347

# <navMap> child.depth() = 2

2348

# <navPoint> Chapter child.depth() = 1

2349

# <navPoint> Chapter etc

2350

# -or-

2351

# A structured document (periodical) has a depth of 4 (Mobigen-prepped)

2352

# <navMap> child.depth() = 4

2353

# <navPoint> Periodical child.depth() = 3

2354

# <navPoint> Section 1 child.depth() = 2

2355

# <navPoint> Article child.depth() = 1

2356

# <navPoint> Article(s) child.depth() = 1

2357

# <navpoint> Section 2

2358

2359

documentType = "unknown"

2360

sectionIndices = []

2361

sectionParents = []

2362

currentSection = 0 # Starting section number

2363

toc = self._oeb.toc

2364

indxt, indices, c = StringIO(), StringIO(), 0

2365

2366

indices.write('IDXT')

2367

c = 0

2368

last_name = None

2369

2370

# 'book', 'periodical' or None

2371

documentType = self._establish_document_structure()

2372

myDoc = self._MobiDoc

2373

2374

nodes = list(toc.iter())[0:1]

2375

for (i, child) in enumerate(nodes) :

2376

2377

if documentType == "periodical" :

2378

myPeriodical = myDoc.documentStructure

2379

if self.opts.verbose > 3 :

2380

self._oeb.logger.info("\nDocument: %s \tkls:%s \tdpt:%d ply:%03d" % \

2381

(child.title, child.klass, child.depth(), child.play_order) )

2382

sectionIndices, sectionParents = \

2383

self._generate_section_indices(child, currentSection, myPeriodical, myDoc)

2384

2385

elif documentType == "book" :

2386

myBook = myDoc.documentStructure

2387

2388

if self.opts.verbose > 3 :

2389

self._oeb.logger.info("\nBook: %-19.19s \tkls:%s \tdpt:%d ply:%03d" % \

2390

(child.title, child.klass, child.depth(), child.play_order) )

2391

else :

2392

if self.opts.verbose > 3 :

2393

self._oeb.logger.info("unknown document type %12.12s \tdepth:%d" % (child.title, child.depth()) )

2394

2395

# Original code starts here

2396

# test first node for depth/class

2397

entries = list(toc.iter())[1:]

2398

for (i, child) in enumerate(entries):

2399

if not child.title or not child.title.strip():

2400

continue

2401

2402

offset, length = self._compute_offset_length(i, child, entries)

2403

2404

if child.klass == 'chapter' or \

2405

(not self.opts.mobi_periodical and child.klass == 'article') :

2406

# create chapter object - confirm i + 0 is correct!!

2407

myNewChapter = MobiChapter(myDoc.getNextNode(), offset, length, i)

2408

myBook.addChapter(myNewChapter)

2409

2410

# Diagnostic

2411

try :

2412

if self.opts.verbose > 3 :

2413

self._oeb.logger.info( " Chapter: %-14.14s \tcls:%s \tdpt:%d ply:%03d \toff:0x%X \t:len0x%X" % \

2414

(child.title, child.klass, child.depth(), child.play_order, offset, length) )

2415

except :

2416

if self.opts.verbose > 3 :

2417

self._oeb.logger.info( " Chapter: %-14.14s \tclass:%s \tdepth:%d playOrder:%03d \toff:0x%X \t:len0x%X" % \

2418

("(bad string)", child.klass, child.depth(), child.play_order, offset, length))

2419

2420

elif child.klass == 'section' and self.opts.mobi_periodical :

2421

if self.opts.verbose > 3 :

2422

self._oeb.logger.info("\n Section: %-15.15s \tkls:%s \tdpt:%d ply:%03d" % \

2423

(child.title, child.klass, child.depth(), child.play_order))

2424

self._generate_section_article_indices(i, child, entries, sectionIndices, sectionParents)

2425

2426

if self.opts.verbose > 3 :

2427

self._oeb.logger.info("")

2428

2429

mobiType = myDoc.mobiType

2430

if self.opts.verbose > 3 :

2431

self._MobiDoc.dumpInfo()

2432

2433

if mobiType == 0x02 :

2434

last_name, c = self._add_book_chapters(myDoc, indxt, indices)

2435

2436

elif mobiType == 0x102 and myDoc.documentStructure.sectionCount() == 1 :

2437

last_name, c = self._add_periodical_flat_articles(myDoc, indxt, indices)

2438

2439

else :

2440

last_name, c = self._add_periodical_structured_articles(myDoc, indxt, indices)

2441

2442

return align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name

2443

2444

class HTMLRecordData(object):

2445

""" A data structure containing indexing/navigation data for an HTML record """

2446

def __init__(self):

2447

self._continuingNode = -1

2448

self._continuingNodeParent = -1

2449

self._openingNode = -1

2450

self._openingNodeParent = -1

2451

self._currentSectionNodeCount = -1

2452

self._nextSectionNumber = -1

2453

self._nextSectionOpeningNode = -1

2454

self._nextSectionNodeCount = -1

2455

2456

def getContinuingNode(self):

2457

return self._continuingNode

2458

def setContinuingNode(self, value):

2459

self._continuingNode = value

2460

continuingNode = property(getContinuingNode, setContinuingNode, None, None)

2461

2462

def getContinuingNodeParent(self):

2463

return self._continuingNodeParent

2464

def setContinuingNodeParent(self, value):

2465

self._continuingNodeParent = value

2466

continuingNodeParent = property(getContinuingNodeParent, setContinuingNodeParent, None, None)

2467

2468

def getOpeningNode(self):

2469

return self._openingNode

2470

def setOpeningNode(self, value):

2471

self._openingNode = value

2472

openingNode = property(getOpeningNode, setOpeningNode, None, None)

2473

2474

def getOpeningNodeParent(self):

2475

return self._openingNodeParent

2476

def setOpeningNodeParent(self, value):

2477

self._openingNodeParent = value

2478

openingNodeParent = property(getOpeningNodeParent, setOpeningNodeParent, None, None)

2479

2480

def getCurrentSectionNodeCount(self):

2481

return self._currentSectionNodeCount

2482

def setCurrentSectionNodeCount(self, value):

2483

self._currentSectionNodeCount = value

2484

currentSectionNodeCount = property(getCurrentSectionNodeCount, setCurrentSectionNodeCount, None, None)

2485

2486

def getNextSectionNumber(self):

2487

return self._nextSectionNumber

2488

def setNextSectionNumber(self, value):

2489

self._nextSectionNumber = value

2490

nextSectionNumber = property(getNextSectionNumber, setNextSectionNumber, None, None)

2491

2492

def getNextSectionOpeningNode(self):

2493

return self._nextSectionOpeningNode

2494

def setNextSectionOpeningNode(self, value):

2495

self._nextSectionOpeningNode = value

2496

nextSectionOpeningNode = property(getNextSectionOpeningNode, setNextSectionOpeningNode, None, None)

2497

2498

def getNextSectionNodeCount(self):

2499

return self._nextSectionNodeCount

2500

def setNextSectionNodeCount(self, value):

2501

self._nextSectionNodeCount = value

2502

nextSectionNodeCount = property(getNextSectionNodeCount, setNextSectionNodeCount, None, None)

2503

2504

def dumpData(self, recordNumber, oeb):

2505

oeb.logger.info( "--- Summary of HTML Record 0x%x [%d] indexing ---" % (recordNumber, recordNumber) )

2506

oeb.logger.info( " continuingNode: %03d" % self.continuingNode )

2507

oeb.logger.info( " continuingNodeParent: %03d" % self.continuingNodeParent )

2508

oeb.logger.info( " openingNode: %03d" % self.openingNode )

2509

oeb.logger.info( " openingNodeParent: %03d" % self.openingNodeParent )

2510

oeb.logger.info( " currentSectionNodeCount: %03d" % self.currentSectionNodeCount )

2511

oeb.logger.info( " nextSectionNumber: %03d" % self.nextSectionNumber )

2512

oeb.logger.info( " nextSectionOpeningNode: %03d" % self.nextSectionOpeningNode )

2513

oeb.logger.info( " nextSectionNodeCount: %03d" % self.nextSectionNodeCount )

2514

2515

class MobiDocument(object):

2516

""" Hierarchical description of a Mobi document """

2517

2518

# Counter to assign index values as new nodes are created

2519

_nextNode = -1

2520

2521

def __init__(self, mobitype):

2522

self._mobitype = mobitype

2523

self._documentStructure = None # Assigned in _generate_indxt

2524

2525

def getMobiType(self):

2526

return self._mobitype

2527

def setMobiType(self, value):

2528

self._mobitype = value

2529

mobiType = property(getMobiType, setMobiType, None, None)

2530

2531

def getDocumentStructure(self):

2532

return self._documentStructure

2533

def setDocumentStructure(self, value):

2534

self._documentStructure = value

2535

documentStructure = property(getDocumentStructure, setDocumentStructure, None, None)

2536

2537

def getNextNode(self):

2538

self._nextNode += 1

2539

return self._nextNode

2540

2541

def dumpInfo(self):

2542

self._documentStructure.dumpInfo()

2543

2544

class MobiBook(object):

2545

""" A container for a flat chapter-to-chapter Mobi book """

2546

def __init__(self):

2547

self._chapters = []

2548

2549

def chapterCount(self):

2550

return len(self._chapters)

2551

2552

def getChapters(self):

2553

return self._chapters

2554

def setChapters(self, value):

2555

self._chapters = value

2556

chapters = property(getChapters, setChapters, None, None)

2557

2558

def addChapter(self, value):

2559

self._chapters.append(value)

2560

2561

def dumpInfo(self):

2562

print "%20s:" % ("Book")

2563

print "%20s: %d" % ("Number of chapters", len(self._chapters))

2564

for (count, chapter) in enumerate(self._chapters):

2565

print "%20s: %d" % ("myCtocMapIndex",chapter.myCtocMapIndex)

2566

print "%20s: %d" % ("Chapter",count)

2567

print "%20s: 0x%X" % ("startAddress", chapter.startAddress)

2568

print "%20s: 0x%X" % ("length", chapter.length)

2569

2570

2571

class MobiChapter(object):

2572

""" A container for Mobi chapters """

2573

def __init__(self, myIndex, startAddress, length, ctoc_map_index):

2574

self._myIndex = myIndex

2575

self._startAddress = startAddress

2576

self._length = length

2577

self._myCtocMapIndex = ctoc_map_index

2578

2579

def getMyCtocMapIndex(self):

2580

return self._myCtocMapIndex

2581

def setMyCtocMapIndex(self, value):

2582

self._myCtocMapIndex = value

2583

myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)

2584

2585

def getMyIndex(self):

2586

return self._myIndex

2587

myIndex = property(getMyIndex, None, None, None)

2588

2589

def getStartAddress(self):

2590

return self._startAddress

2591

def setStartAddress(self, value):

2592

self._startAddress = value

2593

startAddress = property(getStartAddress, setStartAddress, None, None)

2594

2595

def getLength(self):

2596

return self._length

2597

def setLength(self, value):

2598

self._length = value

2599

length = property(getLength, setLength, None, None)

2600

2601

class MobiPeriodical(object):

2602

""" A container for a structured periodical """

2603

def __init__(self, myIndex):

2604

self._myIndex = myIndex

2605

self._sectionParents = []

2606

self._startAddress = 0xFFFFFFFF

2607

self._length = 0xFFFFFFFF

2608

self._firstSectionIndex = 0xFFFFFFFF

2609

self._lastSectionIndex = 0xFFFFFFFF

2610

self._myCtocMapIndex = 0 # Always first entry

2611

2612

def getMyIndex(self):

2613

return self._myIndex

2614

def setMyIndex(self, value):

2615

self._myIndex = value

2616

myIndex = property(getMyIndex, setMyIndex, None, None)

2617

2618

def getSectionParents(self):

2619

return self._sectionParents

2620

def setSectionParents(self, value):

2621

self._sectionParents = value

2622

sectionParents = property(getSectionParents, setSectionParents, None, None)

2623

2624

def sectionCount(self):

2625

return len(self._sectionParents)

2626

2627

def getStartAddress(self):

2628

return self._startAddress

2629

def setStartAddress(self, value):

2630

self._startAddress = value

2631

startAddress = property(getStartAddress, setStartAddress, None, None)

2632

2633

def getLength(self):

2634

return self._length

2635

def setLength(self, value):

2636

self._length = value

2637

length = property(getLength, setLength, None, None)

2638

2639

def getFirstSectionIndex(self):

2640

return self._firstSectionIndex

2641

def setFirstSectionIndex(self, value):

2642

self._firstSectionIndex = value

2643

firstSectionIndex = property(getFirstSectionIndex, setFirstSectionIndex, None, None)

2644

2645

def getLastSectionIndex(self):

2646

return self._lastSectionIndex

2647

def setLastSectionIndex(self, value):

2648

self._lastSectionIndex = value

2649

lastSectionIndex = property(getLastSectionIndex, setLastSectionIndex, None, None)

2650

2651

def getMyCtocMapIndex(self):

2652

return self._myCtocMapIndex

2653

def setMyCtocMapIndex(self, value):

2654

self._myCtocMapIndex = value

2655

myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)

2656

2657

def addSectionParent(self, myIndex, ctoc_map_index):

2658

# Create a new section parent

2659

newSection = MobiSection(myIndex)

2660

# Assign our index to the section

2661

newSection.parentIndex = self._myIndex

2662

# Assign section number

2663

newSection.sectionIndex = len(self._sectionParents)

2664

# Assign ctoc_map_index

2665

newSection.myCtocMapIndex = ctoc_map_index

2666

# Add it to the list

2667

self._sectionParents.append(newSection)

2668

return newSection

2669

2670

def dumpInfo(self):

2671

print "%20s:" % ("Periodical")

2672

print "%20s: 0x%X" % ("myIndex", self.myIndex)

2673

print "%20s: 0x%X" % ("startAddress", self.startAddress)

2674

print "%20s: 0x%X" % ("length", self.length)

2675

print "%20s: 0x%X" % ("myCtocMapIndex", self.myCtocMapIndex)

2676

print "%20s: 0x%X" % ("firstSectionIndex", self.firstSectionIndex)

2677

print "%20s: 0x%X" % ("lastSectionIndex", self.lastSectionIndex)

2678

print "%20s: %d" % ("Number of Sections", len(self._sectionParents))

2679

for (count, section) in enumerate(self._sectionParents):

2680

print "\t%20s: %d" % ("Section",count)

2681

print "\t%20s: 0x%X" % ("startAddress", section.startAddress)

2682

print "\t%20s: 0x%X" % ("length", section.sectionLength)

2683

print "\t%20s: 0x%X" % ("parentIndex", section.parentIndex)

2684

print "\t%20s: 0x%X" % ("myIndex", section.myIndex)

2685

print "\t%20s: 0x%X" % ("firstArticleIndex", section.firstArticleIndex)

2686

print "\t%20s: 0x%X" % ("lastArticleIndex", section.lastArticleIndex)

2687

print "\t%20s: 0x%X" % ("articles", len(section.articles) )

2688

print "\t%20s: 0x%X" % ("myCtocMapIndex", section.myCtocMapIndex )

2689

2690

for (artCount, article) in enumerate(section.articles) :

2691

print "\t\t%20s: %d" % ("Article",artCount)

2692

print "\t\t%20s: 0x%X" % ("startAddress", article.startAddress)

2693

print "\t\t%20s: 0x%X" % ("length", article.articleLength)

2694

print "\t\t%20s: 0x%X" % ("sectionIndex", article.sectionParentIndex)

2695

print "\t\t%20s: 0x%X" % ("myIndex", article.myIndex)

2696

print "\t\t%20s: 0x%X" % ("myCtocMapIndex", article.myCtocMapIndex)

2697

2698

2699

class MobiSection(object):

2700

""" A container for periodical sections """

2701

def __init__(self, myMobiDoc):

2702

self._myMobiDoc = myMobiDoc

2703

self._myIndex = myMobiDoc.getNextNode()

2704

self._parentIndex = 0xFFFFFFFF

2705

self._firstArticleIndex = 0x00

2706

self._lastArticleIndex = 0x00

2707

self._startAddress = 0xFFFFFFFF

2708

self._sectionLength = 0xFFFFFFFF

2709

self._articles = []

2710

self._myCtocMapIndex = -1

2711

2712

def getMyMobiDoc(self):

2713

return self._myMobiDoc

2714

def setMyMobiDoc(self, value):

2715

self._myMobiDoc = value

2716

myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None)

2717

2718

def getMyIndex(self):

2719

return self._myIndex

2720

def setMyIndex(self, value):

2721

self._myIndex = value

2722

myIndex = property(getMyIndex, setMyIndex, None, None)

2723

2724

def getParentIndex(self):

2725

return self._parentIndex

2726

def setParentIndex(self, value):

2727

self._parentIndex = value

2728

parenIndex = property(getParentIndex, setParentIndex, None, None)

2729

2730

def getFirstArticleIndex(self):

2731

return self._firstArticleIndex

2732

def setFirstArticleIndex(self, value):

2733

self._firstArticleIndex = value

2734

firstArticleIndex = property(getFirstArticleIndex, setFirstArticleIndex, None, None)

2735

2736

def getLastArticleIndex(self):

2737

return self._lastArticleIndex

2738

def setLastArticleIndex(self, value):

2739

self._lastArticleIndex = value

2740

lastArticleIndex = property(getLastArticleIndex, setLastArticleIndex, None, None)

2741

2742

def getStartAddress(self):

2743

return self._startAddress

2744

def setStartAddress(self, value):

2745

self._startAddress = value

2746

startAddress = property(getStartAddress, setStartAddress, None, None)

2747

2748

def getSectionLength(self):

2749

return self._sectionLength

2750

def setSectionLength(self, value):

2751

self._sectionLength = value

2752

sectionLength = property(getSectionLength, setSectionLength, None, None)

2753

2754

def getArticles(self):

2755

return self._articles

2756

def setArticles(self, value):

2757

self._articles = value

2758

articles = property(getArticles, setArticles, None, None)

2759

2760

def getMyCtocMapIndex(self):

2761

return self._myCtocMapIndex

2762

def setMyCtocMapIndex(self, value):

2763

self._myCtocMapIndex = value

2764

myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)

2765

2766

def addArticle(self, article):

2767

self._articles.append(article)

2768

2769

# Adjust the Periodical parameters

2770

# If this is the first article of the first section, init the values

2771

if self.myIndex == 1 and len(self.articles) == 1 :

2772

self.myMobiDoc.documentStructure.firstSectionIndex = self.myIndex

2773

self.myMobiDoc.documentStructure.lastSectionIndex = self.myIndex

2774

self.myMobiDoc.documentStructure.length = article.articleLength + \

2775

( article.startAddress - self.myMobiDoc.documentStructure.startAddress)

2776

else:

2777

self.myMobiDoc.documentStructure.length += article.articleLength

2778

2779

# Always set the highest section index to myIndex

2780

self.myMobiDoc.documentStructure.lastSectionIndex = self.myIndex

2781

2782

# Adjust the Section parameters

2783

if len(self.articles) == 1 :

2784

self.firstArticleIndex = article.myIndex

2785

2786

if len(self.myMobiDoc.documentStructure.sectionParents) == 1 :

2787

self.startAddress = self.myMobiDoc.documentStructure.startAddress

2788

self.sectionLength = article.articleLength + \

2789

( article.startAddress - self.myMobiDoc.documentStructure.startAddress )

2790

2791

else :

2792

self.startAddress = article.startAddress

2793

self.sectionLength = article.articleLength

2794

2795

self.lastArticleIndex = article.myIndex

2796

else :

2797

self.lastArticleIndex = article.myIndex

2798

2799

# Adjust the Section length

2800

if len(self.articles) > 1 :

2801

self.sectionLength += article.articleLength

2802

2803

class MobiArticle(object):

2804

""" A container for periodical articles """

2805

def __init__(self, sectionParent, startAddress, length, ctocMapIndex):

2806

self._mySectionParent = sectionParent

2807

self._myMobiDoc = sectionParent.myMobiDoc

2808

self._myIndex = sectionParent.myMobiDoc.getNextNode()

2809

self._myCtocMapIndex = ctocMapIndex

2810

self._sectionParentIndex = sectionParent.myIndex

2811

self._startAddress = startAddress

2812

self._articleLength = length

2813

2814

def getMySectionParent(self):

2815

return self._mySectionParent

2816

def setMySectionParent(self, value):

2817

self._mySectionParent = value

2818

mySectionParent = property(getMySectionParent, setMySectionParent, None, None)

2819

2820

def getMyMobiDoc(self):

2821

return self._myMobiDoc

2822

def setMyMobiDoc(self, value):

2823

self._myMobiDoc = value

2824

myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None)

2825

2826

def getMyIndex(self):

2827

return self._myIndex

2828

def setMyIndex(self, value):

2829

self._sectionIndex = value

2830

myIndex = property(getMyIndex, setMyIndex, None, None)

2831

2832

def getSectionParentIndex(self):

2833

return self._sectionParentIndex

2834

def setSectionParentIndex(self, value):

2835

self._sectionParentIndex = value

2836

sectionParentIndex = property(getSectionParentIndex, setSectionParentIndex, None, None)

2837

2838

def getStartAddress(self):

2839

return self._startAddress

2840

def setStartAddress(self, value):

2841

self._startAddress = value

2842

startAddress = property(getStartAddress, setStartAddress, None, None)

2843

2844

def getArticleLength(self):

2845

return self._articleLength

2846

def setArticleLength(self, value):

2847

self._articleLength = value

2848

articleLength = property(getArticleLength, setArticleLength, None, None)

2849

2850

def getMyCtocMapIndex(self):

2851

return self._myCtocMapIndex

2852

def setMyCtocMapIndex(self, value):

2853

self._myCtocMapIndex = value

2854

myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)

2855

Older »