368
437
return data, overlap
439
def _generate_flat_indexed_navpoints(self):
440
# Assemble a HTMLRecordData instance for each HTML record
441
# Return True if valid, False if invalid
442
self._oeb.logger.info('Indexing flat navPoints ...')
444
numberOfHTMLRecords = ( self._content_length // RECORD_SIZE ) + 1
446
# Create a list of HTMLRecordData class instances
447
x = numberOfHTMLRecords
449
self._HTMLRecords.append(HTMLRecordData())
459
entries = list(toc.iter())[1:]
461
# Get offset, length per entry
462
for (i, child) in enumerate(entries):
463
if not child.title or not child.title.strip():
464
child.title = "(none)"
466
if not child.title or not child.title.strip():
467
child.title = "(none)"
470
if h not in self._id_offsets:
471
self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...'% child.title)
473
offset = self._id_offsets[h]
477
for sibling in entries[i+1:]:
479
if h2 in self._id_offsets:
480
offset2 = self._id_offsets[h2]
482
length = offset2 - offset
486
length = self._content_length - offset
488
if self.opts.verbose > 3 :
489
self._oeb.logger.info("child %03d: %s" % (i, child))
490
self._oeb.logger.info(" title: %s" % child.title)
491
self._oeb.logger.info(" depth: %d" % child.depth())
492
self._oeb.logger.info(" offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X" % (offset, length, offset + length))
494
# Look a gap between chapter nodes. Don't evaluate periodical or section nodes
495
if (i and child.depth() == 1 and entries[i-1].depth() == 1) :
496
if offset != previousOffset + previousLength :
497
self._oeb.log.warning("*** TOC discontinuity ***")
498
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
499
(i-1, entries[i-1].title, previousOffset, previousLength) )
500
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
501
(i, child.title, offset, previousOffset + previousLength) )
502
self._oeb.log.warning('_generate_flat_indexed_navpoints: Failed to generate index')
503
# Zero out self._HTMLRecords, return False
504
self._HTMLRecords = []
508
previousOffset = offset
509
previousLength = length
511
# Calculate the HTML record for this entry
512
myStartingRecord = offset // RECORD_SIZE
514
# If no one has taken the openingNode slot, it must be us
515
if self._HTMLRecords[myStartingRecord].openingNode == -1 :
516
self._HTMLRecords[myStartingRecord].openingNode = myIndex
518
# Bump the node count for this HTML record
519
# Special case if we're the first so we get a true node count
520
if self._HTMLRecords[myStartingRecord].currentSectionNodeCount == -1:
521
self._HTMLRecords[myStartingRecord].currentSectionNodeCount = 1
523
self._HTMLRecords[myStartingRecord].currentSectionNodeCount += 1
525
# Calculate the ending HTMLRecord of this entry
526
myEndingRecord = (offset + length) // RECORD_SIZE
528
if myEndingRecord > myStartingRecord :
529
interimSpanRecord = myStartingRecord + 1
530
while interimSpanRecord <= myEndingRecord :
531
self._HTMLRecords[interimSpanRecord].continuingNode = myIndex
532
self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1
533
interimSpanRecord += 1
534
if self.opts.verbose > 3 :self._oeb.logger.info(" node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \
535
(myIndex, child.title if child.title.strip() > "" else "(missing)", myStartingRecord, interimSpanRecord, offset, length) )
537
if self.opts.verbose > 3 : self._oeb.logger.info(" node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \
538
(myIndex, child.title if child.title.strip() > "" else "(missing)", myStartingRecord, myStartingRecord, offset, length) )
540
last_name = "%04X" % myIndex
543
# Successfully parsed the entries
546
def _generate_indexed_navpoints(self):
547
# Assemble a HTMLRecordData instance for each HTML record
548
# Return True if valid, False if invalid
549
self._oeb.logger.info('Indexing navPoints ...')
551
numberOfHTMLRecords = ( self._content_length // RECORD_SIZE ) + 1
553
# Create a list of HTMLRecordData class instances
554
x = numberOfHTMLRecords
556
self._HTMLRecords.append(HTMLRecordData())
566
sectionChangedInRecordNumber = -1
567
sectionChangesInThisRecord = False
568
entries = list(toc.iter())[1:]
570
# Get offset, length per entry
571
for (firstSequentialNode, node) in enumerate(list(self._ctoc_map)) :
572
if node['klass'] != 'article' and node['klass'] != 'chapter' :
573
# Skip periodical and section entries
576
if self.opts.verbose > 3 :self._oeb.logger.info("\tFirst sequential node: %03d" % firstSequentialNode)
579
for i, child in enumerate(entries):
580
# Entries continues with a stream of section+articles, section+articles ...
582
if h not in self._id_offsets:
583
self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...'% child.title)
585
offset = self._id_offsets[h]
589
for sibling in entries[i+1:]:
591
if h2 in self._id_offsets:
592
offset2 = self._id_offsets[h2]
594
length = offset2 - offset
598
length = self._content_length - offset
600
if self.opts.verbose > 3 :
601
self._oeb.logger.info("child %03d: %s" % (i, child))
602
self._oeb.logger.info(" title: %s" % child.title)
603
self._oeb.logger.info(" depth: %d" % child.depth())
604
self._oeb.logger.info(" offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X" % (offset, length, offset + length))
606
# Look a gap between nodes, articles/chapters only, as
607
# periodical and section lengths cover spans of articles
608
if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section':
609
if offset != previousOffset + previousLength :
610
self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***")
611
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
612
(i-1, entries[i-1].title, previousOffset, previousLength) )
613
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
614
(i, child.title, offset, previousOffset + previousLength) )
615
self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
616
self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
617
self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')
618
# Zero out self._HTMLRecords, return False
619
self._HTMLRecords = []
623
previousOffset = offset
624
previousLength = length
626
# Calculate the HTML record for this entry
627
thisRecord = offset // RECORD_SIZE
629
# Store the current continuingNodeParent and openingNodeParent
630
if self._ctoc_map[i]['klass'] == 'article':
632
if sectionChangesInThisRecord : # <<<
633
self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex - 1
635
self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex
638
if self._ctoc_map[i]['klass'] == 'periodical' :
639
# INCREMENT currentSectionNode count
640
# Commented out because structured docs don't count section changes in nodeCount
641
# compensation at 948 for flat periodicals
642
# self._HTMLRecords[thisRecord].currentSectionNodeCount = 1
645
# Is this node a new section?
646
if self._ctoc_map[i]['klass'] == 'section' :
647
# INCREMENT currentSectionNode count
648
# Commented out because structured docs don't count section changes in nodeCount
649
# self._HTMLRecords[thisRecord].currentSectionNodeCount += 1
651
# *** This should check currentSectionNumber, because content could start late
653
sectionChangesInThisRecord = True
654
sectionChangesInRecordNumber = thisRecord
655
self._currentSectionIndex += 1
656
self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex
657
# The following node opens the nextSection
658
self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex
664
# If no one has taken the openingNode slot, it must be us
665
# This could happen before detecting a section change
666
if self._HTMLRecords[thisRecord].openingNode == -1 :
667
self._HTMLRecords[thisRecord].openingNode = myIndex
668
self._HTMLRecords[thisRecord].openingNodeParent = self._currentSectionIndex
670
# Bump the nextSection node count while we're in the same record
671
if sectionChangedInRecordNumber == thisRecord :
672
if self._ctoc_map[i]['klass'] == 'article' :
673
if self._HTMLRecords[thisRecord].nextSectionNodeCount == -1:
674
self._HTMLRecords[thisRecord].nextSectionNodeCount = 1
676
self._HTMLRecords[thisRecord].nextSectionNodeCount += 1
678
# Bump the currentSectionNodeCount one last time
679
self._HTMLRecords[thisRecord].currentSectionNodeCount += 1
682
# Reset the change record
683
# sectionChangedInRecordNumber = -1
684
sectionChangesInThisRecord = False
685
if self._HTMLRecords[thisRecord].currentSectionNodeCount == -1:
686
self._HTMLRecords[thisRecord].currentSectionNodeCount = 1
688
self._HTMLRecords[thisRecord].currentSectionNodeCount += 1
690
# Fill in the spanning records
691
myEndingRecord = (offset + length) // RECORD_SIZE
692
if myEndingRecord > thisRecord :
693
sectionChangesInThisRecord = False
694
interimSpanRecord = thisRecord + 1
695
while interimSpanRecord <= myEndingRecord :
696
self._HTMLRecords[interimSpanRecord].continuingNode = myIndex
698
self._HTMLRecords[interimSpanRecord].continuingNodeParent = self._currentSectionIndex
699
self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1
700
interimSpanRecord += 1
702
if self.opts.verbose > 3 :self._oeb.logger.info(" node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X" % \
703
(myIndex, self._ctoc_map[i]['klass'], child.title if child.title.strip() > "" else "(missing)", thisRecord, interimSpanRecord, offset, length) )
705
if self.opts.verbose > 3 : self._oeb.logger.info(" node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X" % \
706
(myIndex, self._ctoc_map[i]['klass'], child.title if child.title.strip() > "" else "(missing)", thisRecord, thisRecord, offset, length) )
708
last_name = "%04X" % myIndex
711
# Successfully parsed the entries
715
def _generate_tbs_book(self, nrecords, lastrecord):
716
if self.opts.verbose > 3 :self._oeb.logger.info("Assembling TBS for Book: HTML record %03d of %03d" % \
717
(nrecords, lastrecord) )
718
# Variables for trailing byte sequence
722
# Generate TBS for type 0x002 - mobi_book
723
if self._initialIndexRecordFound == False :
725
# Is there any indexed content yet?
726
if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 :
727
# No indexing data - write vwi length of 1 only
728
tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)
731
# First indexed HTML record is a special case
733
self._initialIndexRecordFound = True
734
if self._HTMLRecords[nrecords].currentSectionNodeCount == 1 :
739
tbSequence = decint(tbsType, DECINT_FORWARD)
740
tbSequence += decint(0x00, DECINT_FORWARD)
741
# Don't write a nodecount for opening type 2 record
744
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
745
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
748
# Determine tbsType for indexed HTMLRecords
749
if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 :
750
# Ending record with singleton node
753
elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 :
754
# This is a span-only record
756
# Zero out the nodeCount with a pre-formed vwi
757
self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80
763
# Shift the openingNode index << 3
764
shiftedNCXEntry = self._HTMLRecords[nrecords].continuingNode << 3
766
shiftedNCXEntry |= tbsType
769
tbSequence = decint(shiftedNCXEntry, DECINT_FORWARD)
770
tbSequence += decint(0x00, DECINT_FORWARD)
771
# Don't write a nodecount for terminating type 2 record
773
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
774
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
776
self._tbSequence = tbSequence
779
def _generate_tbs_flat_periodical(self, nrecords, lastrecord):
780
# Flat periodicals <0x102> have a single section for all articles
781
# Structured periodicals <0x101 | 0x103> have one or more sections with articles
782
# The first section TBS sequence is different for Flat and Structured
783
# This function is called once per HTML record
785
# Variables for trailing byte sequence
789
# Generate TBS for type 0x102 - mobi_feed - flat periodical
790
if self._initialIndexRecordFound == False :
791
# Is there any indexed content yet?
792
if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 :
793
# No indexing data - write vwi length of 1 only
794
tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)
797
# First indexed record: Type 6 with nodeCount only
798
self._initialIndexRecordFound = True
800
tbSequence = decint(tbsType, DECINT_FORWARD)
801
tbSequence += decint(0x00, DECINT_FORWARD)
802
# nodeCount = 0xDF + 0xFF + n(0x3F) - need to add 2 because we didn't count them earlier
803
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount + 2)
804
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
805
if self.opts.verbose > 2 :
806
self._oeb.logger.info("\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d" % \
807
(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )
808
self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
811
# An HTML record with nextSectionNumber = -1 has no section change in this record
812
# Default for flat periodicals with only one section
813
if self.opts.verbose > 2 :
814
self._oeb.logger.info("\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d" % \
815
(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )
816
self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
818
# First section has different Type values
819
# Determine tbsType for HTMLRecords > 0
820
if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 :
821
# Ending record with singleton node
824
# Assemble the Type 6 TBS
825
tbSequence = decint(tbsType, DECINT_FORWARD) # Type
826
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
827
tbSequence += chr(2) # arg2 = 0x02
829
# Assemble arg3 - (article index +1) << 4 + flag: 1 = article spans this record
830
arg3 = self._HTMLRecords[nrecords].continuingNode
833
arg3 |= 0x0 #flags = 0
834
tbSequence += decint(arg3, DECINT_FORWARD) # arg3
837
# tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
838
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
840
elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 :
841
# This is a span-only record
843
# Zero out the nodeCount with a pre-formed vwi
844
self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80
846
# Assemble the Type 6 TBS
847
tbSequence = decint(tbsType, DECINT_FORWARD) # Type
848
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
849
tbSequence += chr(2) # arg2 = 0x02
850
# Assemble arg3 - article index << 3 + flag: 1 = article spans this record
851
arg3 = self._HTMLRecords[nrecords].continuingNode
852
# Add the index of the openingNodeParent to get the offset start
853
# We know that section 0 is at position 1, section 1 at index 2, etc.
854
arg3 += self._HTMLRecords[nrecords].continuingNodeParent + 1
857
tbSequence += decint(arg3, DECINT_FORWARD) # arg3
858
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
859
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
863
# Assemble the Type 7 TBS
864
tbSequence = decint(tbsType, DECINT_FORWARD) # Type
865
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
866
tbSequence += chr(2) # arg2 = 0x02
867
tbSequence += decint(0x00, DECINT_FORWARD) # arg3 = 0x80
868
# Assemble arg4 - article index << 4 + flag: 1 = article spans this record
869
arg4 = self._HTMLRecords[nrecords].continuingNode
870
# Add the index of the openingNodeParent to get the offset start
871
# We know that section 0 is at position 1, section 1 at index 2, etc.
872
arg4 += self._HTMLRecords[nrecords].continuingNodeParent + 1
874
arg4 |= 0x04 # 4: multiple nodes
875
tbSequence += decint(arg4, DECINT_FORWARD) # arg4
876
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
877
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
879
self._tbSequence = tbSequence
881
def _generate_tbs_structured_periodical(self, nrecords, lastrecord):
882
# Structured periodicals <0x101 | 0x103> have one or more sections for all articles
883
# The first section TBS sequences is different for Flat and Structured
884
# This function is called once per HTML record
886
# Variables for trailing byte sequence
891
# Generate TBS for type 0x101/0x103 - structured periodical
892
if self._initialIndexRecordFound == False :
893
# Is there any indexed content yet?
894
if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 :
895
# No indexing data - write vwi length of 1 only
896
tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)
899
self._initialIndexRecordFound = True
901
if self.opts.verbose > 2 :
902
self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \
903
(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )
904
self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
908
# Assemble the Type 6 TBS
909
tbSequence = decint(tbsType, DECINT_FORWARD) # Type
910
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
911
tbSequence += chr(2) # arg2 = 0x02
912
# Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record
913
arg3 = self._sectionCount # Jump over the section group
914
arg3 += 0 # First article index = 0
917
tbSequence += decint(arg3, DECINT_FORWARD) # arg3
919
# Structured periodicals don't count periodical, section in nodeCount
920
#tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount - 2) # nodeCount
921
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
922
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
924
if self._firstSectionConcluded == False :
925
# Use type 6 & 7 until first section switch, then 2
927
if self._HTMLRecords[nrecords].nextSectionNumber == -1 :
928
# An HTML record with nextSectionNumber = -1 has no section change in this record
929
if self.opts.verbose > 2 :
930
self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \
931
(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )
932
self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
934
# First section has different Type values
935
# Determine tbsType for HTMLRecords > 0
936
if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 :
937
# Ending record with singleton node
940
# Assemble the Type 6 TBS
941
tbSequence = decint(tbsType, DECINT_FORWARD) # Type
942
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
943
tbSequence += chr(2) # arg2 = 0x02
944
# Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record
945
arg3 = self._sectionCount
946
arg3 += self._HTMLRecords[nrecords].continuingNode
949
tbSequence += decint(arg3, DECINT_FORWARD) # arg3
950
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
951
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
953
elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 :
954
# This is a span-only record
956
# Zero out the nodeCount with a pre-formed vwi
957
self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80
959
# Assemble the Type 6 TBS
960
tbSequence = decint(tbsType, DECINT_FORWARD) # Type
961
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
962
tbSequence += chr(2) # arg2 = 0x02
963
# Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record
964
arg3 = self._sectionCount
965
arg3 += self._HTMLRecords[nrecords].continuingNode
968
tbSequence += decint(arg3, DECINT_FORWARD) # arg3
969
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
970
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
974
# Assemble the Type 7 TBS
975
tbSequence = decint(tbsType, DECINT_FORWARD) # Type
976
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
977
tbSequence += chr(2) # arg2 = 0x02
978
tbSequence += decint(0x00, DECINT_FORWARD) # arg3 = 0x80
979
# Assemble arg4: (section jump + article index) << 4 + flag: 1 = article spans this record
980
arg4 = self._sectionCount
981
arg4 += self._HTMLRecords[nrecords].continuingNode
983
arg4 |= 0x04 # 4: multiple nodes
984
tbSequence += decint(arg4, DECINT_FORWARD) # arg4
985
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
986
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
989
# Initial section switch from section 1
990
elif self._HTMLRecords[nrecords].nextSectionNumber > 0 :
993
if self.opts.verbose > 2 :
994
self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d" % \
995
(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber) )
996
self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
998
tbSequence = decint(tbsType, DECINT_FORWARD) # Type
999
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
1000
tbSequence += decint(0x00, DECINT_FORWARD) # arg2 = 0x80
1002
# Assemble arg3: Upper nybble: ending section index
1003
# Lower nybble = flags for next section - 0 or 1
1004
arg3 = (self._HTMLRecords[nrecords].continuingNodeParent + 1) << 4
1005
arg3Flags = 0 # 0: has nodes?
1007
tbSequence += decint(arg3, DECINT_FORWARD)
1009
# Assemble arg4: Upper nybble: continuingNode << 4
1010
# Lower nybble: flag: 0 = no starting nodes from previous section
1011
# flag: 4 = starting nodes from previous section
1013
sectionBase = self._HTMLRecords[nrecords].continuingNodeParent
1014
sectionDelta = self._sectionCount - sectionBase - 1
1015
articleOffset = self._HTMLRecords[nrecords].continuingNode + 1
1016
arg4 = (sectionDelta + articleOffset) << 4
1019
if self._HTMLRecords[nrecords].currentSectionNodeCount > 1 :
1024
tbSequence += decint(arg4, DECINT_FORWARD) # arg4
1026
# Write optional 4a if previous section node count > 1
1027
if arg4Flags == 4 : # arg4a
1028
nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount
1029
nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue
1030
tbSequence += chr(nodeCountValue)
1032
# Write article2: not completely understood
1033
arg5 = sectionDelta + articleOffset
1034
if self._HTMLRecords[nrecords].currentSectionNodeCount < 2:
1039
tbSequence += decint(arg5, DECINT_FORWARD) # arg5
1041
# Write first article of new section
1042
#arg6 = self._sectionCount - 1 # We're now into the following section
1043
#arg6 = self._HTMLRecords[nrecords].nextSectionNumber
1044
arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode
1046
if self._HTMLRecords[nrecords].nextSectionNodeCount > 1 :
1051
tbSequence += decint(arg6, DECINT_FORWARD) # arg5
1053
# Write optional 6a if previous section node count > 1
1054
if arg6Flags == 4 : # arg4a
1055
nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount
1056
nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue
1057
tbSequence += chr(nodeCountValue)
1059
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
1061
self._firstSectionConcluded = True
1063
# After first section switch, use types 2 and 3
1064
if self._HTMLRecords[nrecords].nextSectionNumber == -1 :
1065
if self.opts.verbose > 2 :
1066
self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \
1067
(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )
1068
self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
1071
tbSequence = decint(tbsType, DECINT_FORWARD) # Type
1072
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
1073
arg2 = self._HTMLRecords[nrecords].continuingNodeParent + 1
1075
# Add flag = 1 if there are multiple nodes in this record
1077
if self._HTMLRecords[nrecords].currentSectionNodeCount > 0 :
1080
tbSequence += decint(arg2, DECINT_FORWARD)
1083
# Add an extra vwi 0x00
1084
tbSequence += decint(0x00, DECINT_FORWARD) # arg2Flags = 0x80
1086
# arg3 - offset of continuingNode from sectionParent
1087
arg3 = self._sectionCount - self._HTMLRecords[nrecords].continuingNodeParent # Total guess
1088
arg3 += self._HTMLRecords[nrecords].continuingNode
1091
if self._HTMLRecords[nrecords].currentSectionNodeCount > 0 :
1094
tbSequence += decint(arg3, DECINT_FORWARD)
1097
nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount
1098
nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue
1099
tbSequence += chr(nodeCountValue)
1101
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
1103
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
1106
# Section switch when section > 1
1109
if self.opts.verbose > 2 :
1110
self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d" % \
1111
(nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber) )
1112
self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
1114
tbSequence = decint(tbsType, DECINT_FORWARD) # Type
1115
tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
1116
tbSequence += decint(0x00, DECINT_FORWARD) # arg2 = 0x80
1118
# arg3: continuingNodeParent section
1119
# Upper nybble: ending section index
1120
# Lower nybble = flags for next section - 0 or 1
1121
arg3 = (self._HTMLRecords[nrecords].continuingNodeParent + 1) << 4
1122
arg3Flags = 0 # 0: has nodes?
1124
tbSequence += decint(arg3, DECINT_FORWARD)
1126
# Assemble arg4: Upper nybble: continuingNode << 4
1127
# Lower nybble: flag: 0 = no starting nodes from previous section
1128
# flag: 4 = starting nodes from previous section
1129
sectionBase = self._HTMLRecords[nrecords].continuingNodeParent
1130
sectionDelta = self._sectionCount - sectionBase - 1
1131
articleOffset = self._HTMLRecords[nrecords].continuingNode + 1
1132
arg4 = (sectionDelta + articleOffset) << 4
1135
if self._HTMLRecords[nrecords].currentSectionNodeCount > 1 :
1140
tbSequence += decint(arg4, DECINT_FORWARD) # arg4
1142
# Write optional 4a if previous section node count > 1
1143
if arg4Flags == 4 : # arg4a
1144
nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount
1145
nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue
1146
tbSequence += chr(nodeCountValue)
1148
# Write article2: not completely understood
1149
arg5 = sectionDelta + articleOffset
1150
if self._HTMLRecords[nrecords].currentSectionNodeCount < 2:
1155
tbSequence += decint(arg5, DECINT_FORWARD) # arg5
1157
# Write first article of new section
1158
arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode
1160
if self._HTMLRecords[nrecords].nextSectionNodeCount > 1 :
1165
tbSequence += decint(arg6, DECINT_FORWARD) # arg5
1167
# Write optional 6a if previous section node count > 1
1168
if arg6Flags == 4 : # arg4a
1169
nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount
1170
nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue
1171
tbSequence += chr(nodeCountValue)
1173
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
1175
self._tbSequence = tbSequence
1177
def _evaluate_periodical_toc(self):
1181
<navPoint class="periodical"> depth=3 1
1182
<navPoint class="section"> depth=2 1 or more
1183
<navPoint class="article"> depth=1 multiple
1186
<navPoint [class="chapter"|None]> depth=1 multiple
1189
nodes = list(toc.iter())[1:]
1191
for (i, child) in enumerate(nodes) :
1192
if child.klass == "periodical" and child.depth() != 3 or \
1193
child.klass == "section" and child.depth() != 2 or \
1194
child.klass == "article" and child.depth() != 1 :
1196
self._oeb.logger.warn('Nonconforming TOC entry: "%s" found at depth %d' % \
1197
(child.klass, child.depth()) )
1198
self._oeb.logger.warn(" <title>: '%-25.25s...' \t\tklass=%-15.15s \tdepth:%d \tplayOrder=%03d" % \
1199
(child.title, child.klass, child.depth(), child.play_order) )
1200
toc_conforms = False
1202
# We also need to know that we have a pubdate or timestamp in the metadata, which the Kindle needs
1203
if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == [] :
1204
self._oeb.logger.info('metadata missing date/timestamp')
1205
toc_conforms = False
1207
if not 'masthead' in self._oeb.guide :
1208
self._oeb.logger.info('mastheadImage missing from manifest')
1209
toc_conforms = False
1211
self._oeb.logger.info("%s" % " TOC structure conforms" if toc_conforms else " TOC structure non-conforming")
370
1215
def _generate_text(self):
371
1216
self._oeb.logger.info('Serializing markup content...')
372
serializer = Serializer(self._oeb, self._images)
1217
serializer = Serializer(self._oeb, self._images,
1218
write_page_breaks_after_item=self.write_page_breaks_after_item)
373
1219
breaks = serializer.breaks
374
1220
text = serializer.text
1221
self._anchor_offset_kindle = serializer.anchor_offset_kindle
1222
self._id_offsets = serializer.id_offsets
1223
self._content_length = len(text)
375
1224
self._text_length = len(text)
376
1225
text = StringIO(text)
1228
lastrecord = (self._content_length // RECORD_SIZE )
379
1231
if self._compression != UNCOMPRESSED:
380
self._oeb.logger.info('Compressing markup content...')
1232
self._oeb.logger.info(' Compressing markup content...')
381
1233
data, overlap = self._read_text_record(text)
1235
# Evaluate toc for conformance
1236
if self.opts.mobi_periodical :
1237
self._oeb.logger.info(' MOBI periodical specified, evaluating TOC for periodical conformance ...')
1238
self._conforming_periodical_toc = self._evaluate_periodical_toc()
1240
# This routine decides whether to build flat or structured based on self._conforming_periodical_toc
1241
self._ctoc = self._generate_ctoc()
1243
# Build the HTMLRecords list so we can assemble the trailing bytes sequences in the following while loop
1245
entries = list(toc.iter())[1:]
1248
self._indexable = self._generate_indexed_navpoints()
1250
self._oeb.logger.info(' No entries found in TOC ...')
1251
self._indexable = False
1253
if not self._indexable :
1254
self._oeb.logger.info(' Writing unindexed mobi ...')
382
1256
while len(data) > 0:
383
1257
if self._compression == PALMDOC:
384
1258
data = compress_doc(data)
385
1259
record = StringIO()
386
1260
record.write(data)
387
record.write(overlap)
388
record.write(pack('>B', len(overlap)))
392
while breaks and (breaks[0] - offset) < RECORD_SIZE:
393
pbreak = (breaks.pop(0) - running) >> 3
394
encoded = decint(pbreak, DECINT_FORWARD)
395
record.write(encoded)
396
running += pbreak << 3
397
nextra += len(encoded)
400
size = decint(nextra + lsize, DECINT_BACKWARD)
401
if len(size) == lsize:
1262
# Marshall's utf-8 break code.
1264
record.write(overlap)
1265
record.write(pack('>B', len(overlap)))
1269
while breaks and (breaks[0] - offset) < RECORD_SIZE:
1270
# .pop returns item, removes it from list
1271
pbreak = (breaks.pop(0) - running) >> 3
1272
if self.opts.verbose > 2 :
1273
self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
1274
encoded = decint(pbreak, DECINT_FORWARD)
1275
record.write(encoded)
1276
running += pbreak << 3
1277
nextra += len(encoded)
1280
size = decint(nextra + lsize, DECINT_BACKWARD)
1281
if len(size) == lsize:
1286
# Write Trailing Byte Sequence
1287
if INDEXING and self._indexable:
1288
# Dispatch to different TBS generators based upon publication type
1289
booktype = self._MobiDoc.mobiType
1290
if booktype == 0x002 :
1291
self._generate_tbs_book(nrecords, lastrecord)
1292
elif booktype == 0x102 :
1293
self._generate_tbs_flat_periodical(nrecords, lastrecord)
1294
elif booktype == 0x101 or booktype == 0x103 :
1295
self._generate_tbs_structured_periodical(nrecords, lastrecord)
1297
raise NotImplementedError('Indexing for mobitype 0x%X not implemented' % booktype)
1299
# Write the sequence
1300
record.write(self._tbSequence)
405
1302
self._records.append(record.getvalue())
1303
buf.append(self._records[-1])
407
1305
offset += RECORD_SIZE
408
1306
data, overlap = self._read_text_record(text)
1309
extra = sum(map(len, buf))%4
1312
self._records.append('\0'*(4-extra))
409
1314
self._text_nrecords = nrecords
411
1317
def _generate_images(self):
412
1318
self._oeb.logger.info('Serializing images...')
413
1319
images = [(index, href) for href, index in self._images.items()]
1321
self._first_image_record = None
415
1322
for _, href in images:
416
1323
item = self._oeb.manifest.hrefs[href]
531
1608
for record in self._records:
532
1609
self._write(record)
535
def config(defaults=None):
536
desc = _('Options to control the conversion to MOBI')
537
_profiles = list(sorted(Context.PROFILES.keys()))
539
c = Config('mobi', desc)
541
c = StringConfig(defaults, desc)
543
mobi = c.add_group('mobipocket', _('Mobipocket-specific options.'))
544
mobi('compress', ['--compress'], default=False,
545
help=_('Compress file text using PalmDOC compression. '
546
'Results in smaller files, but takes a long time to run.'))
547
mobi('rescale_images', ['--rescale-images'], default=False,
548
help=_('Modify images to meet Palm device size limitations.'))
549
mobi('toc_title', ['--toc-title'], default=None,
550
help=_('Title for any generated in-line table of contents.'))
551
mobi('ignore_tables', ['--ignore-tables'], default=False,
552
help=_('Render HTML tables as blocks of text instead of actual '
553
'tables. This is neccessary if the HTML contains very large '
554
'or complex tables.'))
555
mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
556
help=_('When present, use the author sorting information for '
557
'generating the Mobipocket author metadata.'))
558
profiles = c.add_group('profiles', _('Device renderer profiles. '
559
'Affects conversion of font sizes, image rescaling and rasterization '
560
'of tables. Valid profiles are: %s.') % ', '.join(_profiles))
561
profiles('source_profile', ['--source-profile'],
562
default='Browser', choices=_profiles,
563
help=_("Source renderer profile. Default is %default."))
564
profiles('dest_profile', ['--dest-profile'],
565
default='CybookG3', choices=_profiles,
566
help=_("Destination renderer profile. Default is %default."))
567
c.add_opt('encoding', ['--encoding'], default=None,
568
help=_('Character encoding for HTML files. Default is to auto detect.'))
574
parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
576
'-o', '--output', default=None,
577
help=_('Output file. Default is derived from input filename.'))
579
'-v', '--verbose', default=0, action='count',
580
help=_('Useful for debugging.'))
583
def oeb2mobi(opts, inpath):
584
logger = Logger(logging.getLogger('oeb2mobi'))
585
logger.setup_cli_handler(opts.verbose)
586
outpath = opts.output
588
outpath = os.path.basename(inpath)
589
outpath = os.path.splitext(outpath)[0] + '.mobi'
590
source = opts.source_profile
591
if source not in Context.PROFILES:
592
logger.error(_('Unknown source profile %r') % source)
594
dest = opts.dest_profile
595
if dest not in Context.PROFILES:
596
logger.error(_('Unknown destination profile %r') % dest)
598
compression = PALMDOC if opts.compress else UNCOMPRESSED
599
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
600
context = Context(source, dest)
601
oeb = OEBBook(inpath, logger=logger, encoding=opts.encoding)
602
tocadder = HTMLTOCAdder(title=opts.toc_title)
603
tocadder.transform(oeb, context)
604
mangler = CaseMangler()
605
mangler.transform(oeb, context)
606
fbase = context.dest.fbase
607
fkey = context.dest.fnums.values()
608
flattener = CSSFlattener(
609
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
610
flattener.transform(oeb, context)
611
rasterizer = SVGRasterizer()
612
rasterizer.transform(oeb, context)
613
trimmer = ManifestTrimmer()
614
trimmer.transform(oeb, context)
615
mobimlizer = MobiMLizer(ignore_tables=opts.ignore_tables)
616
mobimlizer.transform(oeb, context)
617
writer = MobiWriter(compression=compression, imagemax=imagemax,
618
prefer_author_sort=opts.prefer_author_sort)
619
writer.dump(oeb, outpath)
620
run_plugins_on_postprocess(outpath, 'mobi')
621
logger.info(_('Output written to ') + outpath)
623
def main(argv=sys.argv):
624
parser = option_parser()
625
opts, args = parser.parse_args(argv[1:])
630
retval = oeb2mobi(opts, inpath)
633
if __name__ == '__main__':
1611
def _generate_index(self):
1612
self._oeb.log('Generating INDX ...')
1613
self._primary_index_record = None
1615
# Build the NCXEntries and INDX
1616
indxt, indxt_count, indices, last_name = \
1617
self._generate_indxt(self._ctoc)
1619
if last_name is None:
1620
self._oeb.log.warn('Input document has no TOC. No index generated.')
1623
# Assemble the INDX0[0] and INDX1[0] output streams
1625
indx1.write('INDX'+pack('>I', 0xc0)) # header length
1627
# 0x8 - 0xb : Unknown
1630
# 0xc - 0xf : Header type
1631
indx1.write(pack('>I', 1))
1633
# 0x10 - 0x13 : Unknown
1636
# 0x14 - 0x17 : IDXT offset
1637
# 0x18 - 0x1b : IDXT count
1638
indx1.write(pack('>I', 0xc0+len(indxt)))
1639
indx1.write(pack('>I', indxt_count + 1))
1641
# 0x1c - 0x23 : Unknown
1642
indx1.write('\xff'*8)
1645
indx1.write('\0'*156)
1647
indx1.write(indices)
1648
indx1 = indx1.getvalue()
1650
idxt0 = chr(len(last_name)) + last_name + pack('>H', indxt_count + 1)
1651
idxt0 = align_block(idxt0)
1654
if self._MobiDoc.mobiType == 0x002 :
1655
tagx = TAGX['chapter']
1657
tagx = TAGX['periodical']
1659
tagx = align_block('TAGX' + pack('>I', 8 + len(tagx)) + tagx)
1660
indx0_indices_pos = 0xc0 + len(tagx) + len(idxt0)
1661
indx0_indices = align_block('IDXT' + pack('>H', 0xc0 + len(tagx)))
1662
# Generate record header
1665
header.write('INDX')
1666
header.write(pack('>I', 0xc0)) # header length
1668
# 0x08 - 0x0b : Unknown
1669
header.write('\0'*4)
1671
# 0x0c - 0x0f : Header type
1672
header.write(pack('>I', 0))
1674
# 0x10 - 0x13 : Generator ID
1675
# This value may impact the position of flagBits written in
1676
# write_article_node(). Change with caution.
1677
header.write(pack('>I', 6))
1679
# 0x14 - 0x17 : IDXT offset
1680
header.write(pack('>I', indx0_indices_pos))
1682
# 0x18 - 0x1b : IDXT count
1683
header.write(pack('>I', 1))
1685
# 0x1c - 0x1f : Text encoding ?
1686
# header.write(pack('>I', 650001))
1687
# GR: This needs to be either 0xFDE9 or 0x4E4
1688
header.write(pack('>I', 0xFDE9))
1690
# 0x20 - 0x23 : Language code?
1691
header.write(iana2mobi(str(self._oeb.metadata.language[0])))
1693
# 0x24 - 0x27 : Number of TOC entries in INDX1
1694
header.write(pack('>I', indxt_count + 1))
1696
# 0x28 - 0x2b : ORDT Offset
1697
header.write('\0'*4)
1699
# 0x2c - 0x2f : LIGT offset
1700
header.write('\0'*4)
1702
# 0x30 - 0x33 : Number of LIGT entries
1703
header.write('\0'*4)
1705
# 0x34 - 0x37 : Unknown
1706
header.write(pack('>I', 1))
1708
# 0x38 - 0xb3 : Unknown (pad?)
1709
header.write('\0'*124)
1711
# 0xb4 - 0xb7 : TAGX offset
1712
header.write(pack('>I', 0xc0))
1714
# 0xb8 - 0xbf : Unknown
1715
header.write('\0'*8)
1717
header = header.getvalue()
1722
indx0.write(indx0_indices)
1723
indx0 = indx0.getvalue()
1725
self._primary_index_record = len(self._records)
1726
self._records.extend([indx0, indx1, self._ctoc])
1728
# Indexing for author/description fields in summary section
1729
# Test for indexed periodical - only one that needs secondary index
1730
if self._MobiDoc.mobiType > 0x100 :
1731
# Write secondary index records
1732
#tagx = TAGX['secondary_'+\
1733
# ('periodical' if self.opts.mobi_periodical else 'book')]
1734
tagx = TAGX['secondary_'+'periodical']
1735
tagx_len = 8 + len(tagx)
1737
# generate secondary INDX0
1739
indx0.write('INDX'+pack('>I', 0xc0)+'\0'*8) # header + 8x00
1740
indx0.write(pack('>I', 0x06)) # generator ID
1741
indx0.write(pack('>I', 0xe8)) # IDXT offset
1742
indx0.write(pack('>I', 1)) # IDXT entries
1743
indx0.write(pack('>I', 65001)) # encoding
1744
indx0.write('\xff'*4) # language
1745
indx0.write(pack('>I', 4)) # IDXT Entries in INDX1
1746
indx0.write('\0'*4) # ORDT Offset
1747
indx0.write('\0'*136) # everything up to TAGX offset
1748
indx0.write(pack('>I', 0xc0)) # TAGX offset
1749
indx0.write('\0'*8) # unknowns
1750
indx0.write('TAGX'+pack('>I', tagx_len)+tagx) # TAGX
1751
indx0.write('\x0D'+'mastheadImage' + '\x00\x04') # mastheadImage
1752
indx0.write('IDXT'+'\x00\xd8\x00\x00') # offset plus pad
1754
# generate secondary INDX1
1756
indx1.write('INDX' + pack('>I', 0xc0) + '\0'*4) # header + 4x00
1757
indx1.write(pack('>I', 1)) # blockType 1
1758
indx1.write(pack('>I', 0x00)) # unknown
1759
indx1.write('\x00\x00\x00\xF0') # IDXT offset
1760
indx1.write(pack('>I', 4)) # num of IDXT entries
1761
indx1.write('\xff'*8) # encoding, language
1762
indx1.write('\0'*(0xc0-indx1.tell())) # 00 to IDXT Entries @ 0xC0
1763
indx1.write('\0\x01\x80') # 1 - null
1764
indx1.write('\x06'+'author' + '\x02\x80\x80\xc7') # author
1765
indx1.write('\x0B'+'description' + '\x02\x80\x80\xc6') # description
1766
indx1.write('\x0D'+'mastheadImage' + '\x02\x85\x80\xc5') # mastheadImage
1767
indx1.write('IDXT'+'\x00\xc0\x00\xc3\x00\xce\x00\xde') # IDXT header
1769
# Write INDX0 and INDX1 to the stream
1770
indx0, indx1 = indx0.getvalue(), indx1.getvalue()
1771
self._records.extend((indx0, indx1))
1772
if self.opts.verbose > 3:
1773
from tempfile import mkdtemp
1776
for i, n in enumerate(['sindx1', 'sindx0', 'ctoc', 'indx0', 'indx1']):
1777
open(os.path.join(t, n+'.bin'), 'wb').write(self._records[-(i+1)])
1778
self._oeb.log.debug('Index records dumped to', t)
1780
def _clean_text_value(self, text):
1781
if text is not None and text.strip() :
1783
if not isinstance(text, unicode):
1784
text = text.decode('utf-8', 'replace')
1785
text = text.encode('utf-8')
1787
text = "(none)".encode('utf-8')
1790
def _add_flat_ctoc_node(self, node, ctoc, title=None):
1791
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
1792
t = node.title if title is None else title
1793
t = self._clean_text_value(t)
1794
self._last_toc_entry = t
1796
# Create an empty dictionary for this node
1800
if node.klass == 'article' :
1801
ctoc_name_map['klass'] = 'chapter'
1803
ctoc_name_map['klass'] = node.klass
1805
# Add title offset to name map
1806
ctoc_name_map['titleOffset'] = ctoc.tell()
1807
ctoc.write(decint(len(t), DECINT_FORWARD)+t)
1808
self._chapterCount += 1
1810
# append this node's name_map to map
1811
self._ctoc_map.append(ctoc_name_map)
1816
def _add_structured_ctoc_node(self, node, ctoc, title=None):
1817
# Process 'periodical', 'section' and 'article'
1818
if node.klass is None :
1820
t = node.title if title is None else title
1821
t = self._clean_text_value(t)
1822
self._last_toc_entry = t
1824
# Create an empty dictionary for this node
1827
# Add the klass of this node
1828
ctoc_name_map['klass'] = node.klass
1830
if node.klass == 'chapter':
1831
# Add title offset to name map
1832
ctoc_name_map['titleOffset'] = ctoc.tell()
1833
ctoc.write(decint(len(t), DECINT_FORWARD)+t)
1834
self._chapterCount += 1
1836
elif node.klass == 'periodical' :
1838
ctoc_name_map['titleOffset'] = ctoc.tell()
1839
ctoc.write(decint(len(t), DECINT_FORWARD)+t)
1841
# Look for existing class entry 'periodical' in _ctoc_map
1842
for entry in self._ctoc_map:
1843
if entry['klass'] == 'periodical':
1844
# Use the pre-existing instance
1845
ctoc_name_map['classOffset'] = entry['classOffset']
1850
ctoc_name_map['classOffset'] = ctoc.tell()
1851
ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
1853
self._periodicalCount += 1
1855
elif node.klass == 'section' :
1857
ctoc_name_map['titleOffset'] = ctoc.tell()
1858
ctoc.write(decint(len(t), DECINT_FORWARD)+t)
1860
# Look for existing class entry 'section' in _ctoc_map
1861
for entry in self._ctoc_map:
1862
if entry['klass'] == 'section':
1863
# Use the pre-existing instance
1864
ctoc_name_map['classOffset'] = entry['classOffset']
1869
ctoc_name_map['classOffset'] = ctoc.tell()
1870
ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
1872
self._sectionCount += 1
1874
elif node.klass == 'article' :
1875
# Add title offset/title
1876
ctoc_name_map['titleOffset'] = ctoc.tell()
1877
ctoc.write(decint(len(t), DECINT_FORWARD)+t)
1879
# Look for existing class entry 'article' in _ctoc_map
1880
for entry in self._ctoc_map:
1881
if entry['klass'] == 'article':
1882
ctoc_name_map['classOffset'] = entry['classOffset']
1887
ctoc_name_map['classOffset'] = ctoc.tell()
1888
ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
1890
# Add description offset/description
1891
if node.description :
1892
d = self._clean_text_value(node.description)
1893
ctoc_name_map['descriptionOffset'] = ctoc.tell()
1894
ctoc.write(decint(len(d), DECINT_FORWARD)+d)
1896
ctoc_name_map['descriptionOffset'] = None
1898
# Add author offset/description
1900
a = self._clean_text_value(node.author)
1901
ctoc_name_map['authorOffset'] = ctoc.tell()
1902
ctoc.write(decint(len(a), DECINT_FORWARD)+a)
1904
ctoc_name_map['authorOffset'] = None
1906
self._articleCount += 1
1909
raise NotImplementedError( \
1910
'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \
1911
(node.title, node.klass, node.play_order))
1913
# append this node's name_map to map
1914
self._ctoc_map.append(ctoc_name_map)
1916
def _generate_ctoc(self):
1917
# Generate the compiled TOC strings
1918
# Each node has 1-4 CTOC entries:
1924
# title, class, description, author
1927
# nb: Chapters don't actually have @class, so we synthesize it
1928
# in reader._toc_from_navpoint
1932
self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets
1933
self._last_toc_entry = None
1936
# Track the individual node types
1937
self._periodicalCount = 0
1938
self._sectionCount = 0
1939
self._articleCount = 0
1940
self._chapterCount = 0
1944
if self._conforming_periodical_toc :
1945
self._oeb.logger.info('Generating structured CTOC ...')
1946
for (child) in toc.iter():
1947
if self.opts.verbose > 2 :
1948
self._oeb.logger.info(" %s" % child)
1949
self._add_structured_ctoc_node(child, ctoc)
1952
self._oeb.logger.info('Generating flat CTOC ...')
1955
for (i, child) in enumerate(toc.iterdescendants()):
1956
# Only add chapters or articles at depth==1
1957
# no class defaults to 'chapter'
1958
if child.klass is None : child.klass = 'chapter'
1959
if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :
1960
if self.opts.verbose > 2 :
1961
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
1962
(child.klass, child.depth(), child) )
1964
# Test to see if this child's offset is the same as the previous child's
1970
self._oeb.logger.warn(' Ignoring TOC entry with no href:',
1973
if h not in self._id_offsets:
1974
self._oeb.logger.warn(' Ignoring missing TOC entry:',
1978
currentOffset = self._id_offsets[h]
1979
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
1981
if currentOffset != previousOffset :
1982
self._add_flat_ctoc_node(child, ctoc)
1983
reduced_toc.append(child)
1984
previousOffset = currentOffset
1986
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
1989
if self.opts.verbose > 2 :
1990
self._oeb.logger.info("skipping class: %s depth %d at position %d" % \
1991
(child.klass, child.depth(),i))
1993
# Update the TOC with our edited version
1994
self._oeb.toc.nodes = reduced_toc
1996
# Instantiate a MobiDocument(mobitype)
1997
if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \
1998
not self.opts.mobi_periodical :
2000
elif self._periodicalCount and self._sectionCount == 1 :
2002
elif self._periodicalCount and self._sectionCount > 1 :
2004
if self._oeb.metadata.publication_type:
2005
x = unicode(self._oeb.metadata.publication_type[0]).split(':')
2008
mobiType = {'newspaper':0x101}.get(pt, 0x103)
2010
raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
2012
self._MobiDoc = MobiDocument(mobiType)
2014
if self.opts.verbose > 2 :
2016
if mobiType > 0x100 :
2017
structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'
2018
self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )
2019
if mobiType > 0x100 :
2020
self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \
2021
(self._periodicalCount, self._sectionCount, self._articleCount) )
2023
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
2025
return align_block(ctoc.getvalue())
2027
def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
2028
pos = 0xc0 + indxt.tell()
2029
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
2031
indxt.write(chr(len(name)) + name) # Write the name
2032
indxt.write(INDXT['periodical']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
2033
indxt.write(chr(1)) # subType 1
2034
indxt.write(decint(offset, DECINT_FORWARD)) # offset
2035
indxt.write(decint(length, DECINT_FORWARD)) # length
2036
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
2038
indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
2040
indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
2041
indxt.write(decint(firstSection, DECINT_FORWARD)) # first section in periodical
2042
indxt.write(decint(lastSection, DECINT_FORWARD)) # first section in periodical
2044
indxt.write(decint(0, DECINT_FORWARD)) # 0x80
2046
def _write_section_node(self, indxt, indices, myCtocMapIndex, index, offset, length, count, firstArticle, lastArticle, parentIndex) :
2047
pos = 0xc0 + indxt.tell()
2048
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
2050
indxt.write(chr(len(name)) + name) # Write the name
2051
indxt.write(INDXT['section']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
2052
indxt.write(chr(0)) # subType 0
2053
indxt.write(decint(offset, DECINT_FORWARD)) # offset
2054
indxt.write(decint(length, DECINT_FORWARD)) # length
2055
indxt.write(decint(self._ctoc_map[myCtocMapIndex]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
2057
indxt.write(decint(1, DECINT_FORWARD)) # unknown byte
2059
indxt.write(decint(self._ctoc_map[myCtocMapIndex]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
2060
indxt.write(decint(parentIndex, DECINT_FORWARD)) # index of periodicalParent
2061
indxt.write(decint(firstArticle, DECINT_FORWARD)) # first section in periodical
2062
indxt.write(decint(lastArticle, DECINT_FORWARD)) # first section in periodical
2064
def _write_article_node(self, indxt, indices, index, offset, length, count, parentIndex) :
2065
pos = 0xc0 + indxt.tell()
2066
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
2068
indxt.write(chr(len(name)) + name) # Write the name
2069
indxt.write(INDXT['article']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
2071
hasAuthor = True if self._ctoc_map[index]['authorOffset'] else False
2072
hasDescription = True if self._ctoc_map[index]['descriptionOffset'] else False
2074
# flagBits may be dependent upon the generatorID written at 0x10 in generate_index().
2075
# in INDX0. Mobigen uses a generatorID of 2 and writes these bits at positions 1 & 2;
2076
# calibre uses a generatorID of 6 and writes the bits at positions 2 & 3.
2078
if hasAuthor : flagBits |= 0x4
2079
if hasDescription : flagBits |= 0x2
2080
indxt.write(pack('>B',flagBits)) # Author/description flags
2081
indxt.write(decint(offset, DECINT_FORWARD)) # offset
2084
indxt.write(decint(length, DECINT_FORWARD)) # length
2085
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
2087
indxt.write(decint(2, DECINT_FORWARD)) # unknown byte
2089
indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
2090
indxt.write(decint(parentIndex, DECINT_FORWARD)) # index of periodicalParent
2092
# Optionally write the author and description fields
2093
descriptionOffset = self._ctoc_map[index]['descriptionOffset']
2094
if descriptionOffset :
2095
indxt.write(decint(descriptionOffset, DECINT_FORWARD))
2097
authorOffset = self._ctoc_map[index]['authorOffset']
2099
indxt.write(decint(authorOffset, DECINT_FORWARD))
2101
def _write_chapter_node(self, indxt, indices, index, offset, length, count):
2102
# Writes an INDX1 NCXEntry of entryType 0x0F - chapter
2103
if self.opts.verbose > 2:
2104
# *** GR: Turn this off while I'm developing my code
2105
#self._oeb.log.debug('Writing TOC node to IDXT:', node.title, 'href:', node.href)
2108
pos = 0xc0 + indxt.tell()
2109
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
2111
indxt.write(chr(len(name)) + name) # Write the name
2112
indxt.write(INDXT['chapter']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
2113
indxt.write(decint(offset, DECINT_FORWARD)) # offset
2114
indxt.write(decint(length, DECINT_FORWARD)) # length
2115
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
2116
indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
2118
def _compute_offset_length(self, i, node, entries) :
2120
if h not in self._id_offsets:
2121
self._oeb.log.warning('Could not find TOC entry:', node.title)
2124
offset = self._id_offsets[h]
2126
# Calculate length based on next entry's offset
2127
for sibling in entries[i+1:]:
2129
if h2 in self._id_offsets:
2130
offset2 = self._id_offsets[h2]
2131
if offset2 > offset:
2132
length = offset2 - offset
2135
length = self._content_length - offset
2136
return offset, length
2138
def _establish_document_structure(self) :
2141
klass = self._ctoc_map[0]['klass']
2145
if klass == 'chapter' or klass == None :
2146
documentType = 'book'
2147
if self.opts.verbose > 2 :
2148
self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")
2149
self._MobiDoc.documentStructure = MobiBook()
2151
elif klass == 'periodical' :
2152
documentType = klass
2153
if self.opts.verbose > 2 :
2154
self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")
2155
self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
2156
self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
2158
raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
2161
def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc ) :
2162
sectionTitles = list(child.iter())[1:]
2165
for (j, section) in enumerate(sectionTitles):
2166
# iterate over just the sections
2168
if section.klass == 'periodical' :
2169
# Write our index to the list
2170
sectionIndices.append(currentSection)
2172
if self.opts.verbose > 3 :
2173
self._oeb.logger.info("Periodical: %15.15s \tkls:%s \tdpt:%d ply:%03d" % \
2174
(section.title, section.klass, section.depth(), section.play_order) )
2176
elif section.klass == 'section' :
2177
# Add sections, save in list with original sequence number
2178
myNewSection = myPeriodical.addSectionParent(myDoc, j)
2179
sectionParents.append(myNewSection)
2181
# Bump the section #
2183
# Write our index to the list
2184
sectionIndices.append(currentSection)
2186
if self.opts.verbose > 3 :
2187
self._oeb.logger.info(" Section: %15.15s \tkls:%s \tdpt:%d ply:%03d \tindex:%d" % \
2188
(section.title, section.klass, section.depth(), section.play_order,j) )
2190
elif section.klass == 'article' :
2191
# Write our index to the list
2192
sectionIndices.append(currentSection)
2195
if self.opts.verbose > 3 :
2196
self._oeb.logger.info( " Unrecognized class %s in structured document" % section.klass)
2197
return sectionIndices, sectionParents
2200
def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents):
2201
sectionArticles = list(section.iter())[1:]
2202
# Iterate over the section's articles
2204
for (j, article) in enumerate(sectionArticles):
2205
# Recompute offset and length for each article
2206
offset, length = self._compute_offset_length(i, article, entries)
2207
if self.opts.verbose > 2 :
2208
self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) )
2210
ctoc_map_index = i + j + 1
2212
#hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset')
2213
#hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset')
2214
mySectionParent = sectionParents[sectionIndices[i-1]]
2215
myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )
2216
mySectionParent.addArticle( myNewArticle )
2219
def _add_book_chapters(self, myDoc, indxt, indices):
2220
chapterCount = myDoc.documentStructure.chapterCount()
2221
if self.opts.verbose > 3 :
2222
self._oeb.logger.info("Writing %d chapters for mobitype 0x%03X" % (chapterCount, myDoc.mobiType))
2224
for (c, chapter) in enumerate(list(myDoc.documentStructure.chapters)) :
2225
index = chapter.myCtocMapIndex
2226
self._write_chapter_node(indxt, indices, index, chapter.startAddress, chapter.length, c)
2228
last_name = "%04X"%c # Returned when done
2231
def _add_periodical_flat_articles(self, myDoc, indxt, indices):
2232
sectionParent = myDoc.documentStructure.sectionParents[0]
2233
articleCount = len(sectionParent.articles)
2234
if self.opts.verbose > 3 :
2235
self._oeb.logger.info("Writing %d articles for mobitype 0x%03X" % (articleCount, myDoc.mobiType))
2237
# Singleton periodical
2239
offset = myDoc.documentStructure.startAddress
2240
length = myDoc.documentStructure.length
2242
firstSection = myDoc.documentStructure.firstSectionIndex
2243
lastSection = myDoc.documentStructure.lastSectionIndex
2244
self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection)
2248
offset = sectionParent.startAddress
2249
length = sectionParent.sectionLength
2251
firstArticle = sectionParent.firstArticleIndex
2252
lastArticle = sectionParent.lastArticleIndex
2253
parentIndex = sectionParent.parentIndex
2254
self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, index, offset, length, c, firstArticle, lastArticle, parentIndex)
2256
last_name = "%04X"%c
2259
for (i, article) in enumerate(list(sectionParent.articles)) :
2260
index = article.myCtocMapIndex
2261
offset = article.startAddress
2262
length = article.articleLength
2264
parentIndex = article.sectionParentIndex
2265
self._write_article_node(indxt, indices, index, offset, length, c, parentIndex)
2267
last_name = "%04X" % c
2270
def _add_periodical_structured_articles(self, myDoc, indxt, indices):
2271
# Write NCXEntries for Structured Periodical
2278
if self.opts.verbose > 2 :
2279
self._oeb.logger.info( "Writing NCXEntries for mobiType 0x%03X" % myDoc.mobiType)
2281
sectionParent = myDoc.documentStructure.sectionParents[0]
2282
articleCount = len(sectionParent.articles)
2284
# Write opening periodical 0xDF entry
2286
offset = myDoc.documentStructure.startAddress
2287
length = myDoc.documentStructure.length
2289
firstSection = myDoc.documentStructure.firstSectionIndex
2290
lastSection = myDoc.documentStructure.lastSectionIndex
2291
self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection)
2293
# Write each section 0xFF entry
2294
sectionCount = firstSection
2295
while sectionCount <= lastSection :
2297
sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1]
2298
articleCount = len(sectionParent.articles)
2300
offset = sectionParent.startAddress
2301
length = sectionParent.sectionLength
2303
firstArticle = sectionParent.firstArticleIndex
2304
lastArticle = sectionParent.lastArticleIndex
2305
parentIndex = sectionParent.parentIndex
2306
self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, sectionCount, offset, length, c, firstArticle, lastArticle, parentIndex)
2309
# Write each article 0x3F entry
2310
sectionCount = firstSection
2311
while sectionCount <= lastSection :
2313
sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1]
2314
# articleCount = len(sectionParent.articles)
2316
# offset = sectionParent.startAddress
2317
# length = sectionParent.sectionLength
2319
# firstArticle = sectionParent.firstArticleIndex
2320
# lastArticle = sectionParent.lastArticleIndex
2321
# parentIndex = sectionParent.parentIndex
2322
# add_section_node(index, offset, length, c, firstArticle, lastArticle, parentIndex)
2324
last_name = "%04X"%c
2327
for (i, article) in enumerate(list(sectionParent.articles)) :
2328
if self.opts.verbose > 3 :
2329
self._oeb.logger.info( "Adding section:article %d:%02d" % \
2330
(sectionParent.myIndex, i))
2331
index = article.myCtocMapIndex
2332
offset = article.startAddress
2333
length = article.articleLength
2335
parentIndex = article.sectionParentIndex
2336
self._write_article_node(indxt, indices, index, offset, length, c, parentIndex)
2338
last_name = "%04X"%c
2344
def _generate_indxt(self, ctoc):
2345
# Assumption: child.depth() represents nestedness of the TOC.
2346
# A flat document (book) has a depth of 2:
2347
# <navMap> child.depth() = 2
2348
# <navPoint> Chapter child.depth() = 1
2349
# <navPoint> Chapter etc
2351
# A structured document (periodical) has a depth of 4 (Mobigen-prepped)
2352
# <navMap> child.depth() = 4
2353
# <navPoint> Periodical child.depth() = 3
2354
# <navPoint> Section 1 child.depth() = 2
2355
# <navPoint> Article child.depth() = 1
2356
# <navPoint> Article(s) child.depth() = 1
2357
# <navpoint> Section 2
2359
documentType = "unknown"
2362
currentSection = 0 # Starting section number
2364
indxt, indices, c = StringIO(), StringIO(), 0
2366
indices.write('IDXT')
2370
# 'book', 'periodical' or None
2371
documentType = self._establish_document_structure()
2372
myDoc = self._MobiDoc
2374
nodes = list(toc.iter())[0:1]
2375
for (i, child) in enumerate(nodes) :
2377
if documentType == "periodical" :
2378
myPeriodical = myDoc.documentStructure
2379
if self.opts.verbose > 3 :
2380
self._oeb.logger.info("\nDocument: %s \tkls:%s \tdpt:%d ply:%03d" % \
2381
(child.title, child.klass, child.depth(), child.play_order) )
2382
sectionIndices, sectionParents = \
2383
self._generate_section_indices(child, currentSection, myPeriodical, myDoc)
2385
elif documentType == "book" :
2386
myBook = myDoc.documentStructure
2388
if self.opts.verbose > 3 :
2389
self._oeb.logger.info("\nBook: %-19.19s \tkls:%s \tdpt:%d ply:%03d" % \
2390
(child.title, child.klass, child.depth(), child.play_order) )
2392
if self.opts.verbose > 3 :
2393
self._oeb.logger.info("unknown document type %12.12s \tdepth:%d" % (child.title, child.depth()) )
2395
# Original code starts here
2396
# test first node for depth/class
2397
entries = list(toc.iter())[1:]
2398
for (i, child) in enumerate(entries):
2399
if not child.title or not child.title.strip():
2402
offset, length = self._compute_offset_length(i, child, entries)
2404
if child.klass == 'chapter' or \
2405
(not self.opts.mobi_periodical and child.klass == 'article') :
2406
# create chapter object - confirm i + 0 is correct!!
2407
myNewChapter = MobiChapter(myDoc.getNextNode(), offset, length, i)
2408
myBook.addChapter(myNewChapter)
2412
if self.opts.verbose > 3 :
2413
self._oeb.logger.info( " Chapter: %-14.14s \tcls:%s \tdpt:%d ply:%03d \toff:0x%X \t:len0x%X" % \
2414
(child.title, child.klass, child.depth(), child.play_order, offset, length) )
2416
if self.opts.verbose > 3 :
2417
self._oeb.logger.info( " Chapter: %-14.14s \tclass:%s \tdepth:%d playOrder:%03d \toff:0x%X \t:len0x%X" % \
2418
("(bad string)", child.klass, child.depth(), child.play_order, offset, length))
2420
elif child.klass == 'section' and self.opts.mobi_periodical :
2421
if self.opts.verbose > 3 :
2422
self._oeb.logger.info("\n Section: %-15.15s \tkls:%s \tdpt:%d ply:%03d" % \
2423
(child.title, child.klass, child.depth(), child.play_order))
2424
self._generate_section_article_indices(i, child, entries, sectionIndices, sectionParents)
2426
if self.opts.verbose > 3 :
2427
self._oeb.logger.info("")
2429
mobiType = myDoc.mobiType
2430
if self.opts.verbose > 3 :
2431
self._MobiDoc.dumpInfo()
2433
if mobiType == 0x02 :
2434
last_name, c = self._add_book_chapters(myDoc, indxt, indices)
2436
elif mobiType == 0x102 and myDoc.documentStructure.sectionCount() == 1 :
2437
last_name, c = self._add_periodical_flat_articles(myDoc, indxt, indices)
2440
last_name, c = self._add_periodical_structured_articles(myDoc, indxt, indices)
2442
return align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name
2444
class HTMLRecordData(object):
2445
""" A data structure containing indexing/navigation data for an HTML record """
2447
self._continuingNode = -1
2448
self._continuingNodeParent = -1
2449
self._openingNode = -1
2450
self._openingNodeParent = -1
2451
self._currentSectionNodeCount = -1
2452
self._nextSectionNumber = -1
2453
self._nextSectionOpeningNode = -1
2454
self._nextSectionNodeCount = -1
2456
def getContinuingNode(self):
2457
return self._continuingNode
2458
def setContinuingNode(self, value):
2459
self._continuingNode = value
2460
continuingNode = property(getContinuingNode, setContinuingNode, None, None)
2462
def getContinuingNodeParent(self):
2463
return self._continuingNodeParent
2464
def setContinuingNodeParent(self, value):
2465
self._continuingNodeParent = value
2466
continuingNodeParent = property(getContinuingNodeParent, setContinuingNodeParent, None, None)
2468
def getOpeningNode(self):
2469
return self._openingNode
2470
def setOpeningNode(self, value):
2471
self._openingNode = value
2472
openingNode = property(getOpeningNode, setOpeningNode, None, None)
2474
def getOpeningNodeParent(self):
2475
return self._openingNodeParent
2476
def setOpeningNodeParent(self, value):
2477
self._openingNodeParent = value
2478
openingNodeParent = property(getOpeningNodeParent, setOpeningNodeParent, None, None)
2480
def getCurrentSectionNodeCount(self):
2481
return self._currentSectionNodeCount
2482
def setCurrentSectionNodeCount(self, value):
2483
self._currentSectionNodeCount = value
2484
currentSectionNodeCount = property(getCurrentSectionNodeCount, setCurrentSectionNodeCount, None, None)
2486
def getNextSectionNumber(self):
2487
return self._nextSectionNumber
2488
def setNextSectionNumber(self, value):
2489
self._nextSectionNumber = value
2490
nextSectionNumber = property(getNextSectionNumber, setNextSectionNumber, None, None)
2492
def getNextSectionOpeningNode(self):
2493
return self._nextSectionOpeningNode
2494
def setNextSectionOpeningNode(self, value):
2495
self._nextSectionOpeningNode = value
2496
nextSectionOpeningNode = property(getNextSectionOpeningNode, setNextSectionOpeningNode, None, None)
2498
def getNextSectionNodeCount(self):
2499
return self._nextSectionNodeCount
2500
def setNextSectionNodeCount(self, value):
2501
self._nextSectionNodeCount = value
2502
nextSectionNodeCount = property(getNextSectionNodeCount, setNextSectionNodeCount, None, None)
2504
def dumpData(self, recordNumber, oeb):
2505
oeb.logger.info( "--- Summary of HTML Record 0x%x [%d] indexing ---" % (recordNumber, recordNumber) )
2506
oeb.logger.info( " continuingNode: %03d" % self.continuingNode )
2507
oeb.logger.info( " continuingNodeParent: %03d" % self.continuingNodeParent )
2508
oeb.logger.info( " openingNode: %03d" % self.openingNode )
2509
oeb.logger.info( " openingNodeParent: %03d" % self.openingNodeParent )
2510
oeb.logger.info( " currentSectionNodeCount: %03d" % self.currentSectionNodeCount )
2511
oeb.logger.info( " nextSectionNumber: %03d" % self.nextSectionNumber )
2512
oeb.logger.info( " nextSectionOpeningNode: %03d" % self.nextSectionOpeningNode )
2513
oeb.logger.info( " nextSectionNodeCount: %03d" % self.nextSectionNodeCount )
2515
class MobiDocument(object):
2516
""" Hierarchical description of a Mobi document """
2518
# Counter to assign index values as new nodes are created
2521
def __init__(self, mobitype):
2522
self._mobitype = mobitype
2523
self._documentStructure = None # Assigned in _generate_indxt
2525
def getMobiType(self):
2526
return self._mobitype
2527
def setMobiType(self, value):
2528
self._mobitype = value
2529
mobiType = property(getMobiType, setMobiType, None, None)
2531
def getDocumentStructure(self):
2532
return self._documentStructure
2533
def setDocumentStructure(self, value):
2534
self._documentStructure = value
2535
documentStructure = property(getDocumentStructure, setDocumentStructure, None, None)
2537
def getNextNode(self):
2539
return self._nextNode
2542
self._documentStructure.dumpInfo()
2544
class MobiBook(object):
2545
""" A container for a flat chapter-to-chapter Mobi book """
2549
def chapterCount(self):
2550
return len(self._chapters)
2552
def getChapters(self):
2553
return self._chapters
2554
def setChapters(self, value):
2555
self._chapters = value
2556
chapters = property(getChapters, setChapters, None, None)
2558
def addChapter(self, value):
2559
self._chapters.append(value)
2562
print "%20s:" % ("Book")
2563
print "%20s: %d" % ("Number of chapters", len(self._chapters))
2564
for (count, chapter) in enumerate(self._chapters):
2565
print "%20s: %d" % ("myCtocMapIndex",chapter.myCtocMapIndex)
2566
print "%20s: %d" % ("Chapter",count)
2567
print "%20s: 0x%X" % ("startAddress", chapter.startAddress)
2568
print "%20s: 0x%X" % ("length", chapter.length)
2571
class MobiChapter(object):
2572
""" A container for Mobi chapters """
2573
def __init__(self, myIndex, startAddress, length, ctoc_map_index):
2574
self._myIndex = myIndex
2575
self._startAddress = startAddress
2576
self._length = length
2577
self._myCtocMapIndex = ctoc_map_index
2579
def getMyCtocMapIndex(self):
2580
return self._myCtocMapIndex
2581
def setMyCtocMapIndex(self, value):
2582
self._myCtocMapIndex = value
2583
myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
2585
def getMyIndex(self):
2586
return self._myIndex
2587
myIndex = property(getMyIndex, None, None, None)
2589
def getStartAddress(self):
2590
return self._startAddress
2591
def setStartAddress(self, value):
2592
self._startAddress = value
2593
startAddress = property(getStartAddress, setStartAddress, None, None)
2595
def getLength(self):
2597
def setLength(self, value):
2598
self._length = value
2599
length = property(getLength, setLength, None, None)
2601
class MobiPeriodical(object):
2602
""" A container for a structured periodical """
2603
def __init__(self, myIndex):
2604
self._myIndex = myIndex
2605
self._sectionParents = []
2606
self._startAddress = 0xFFFFFFFF
2607
self._length = 0xFFFFFFFF
2608
self._firstSectionIndex = 0xFFFFFFFF
2609
self._lastSectionIndex = 0xFFFFFFFF
2610
self._myCtocMapIndex = 0 # Always first entry
2612
def getMyIndex(self):
2613
return self._myIndex
2614
def setMyIndex(self, value):
2615
self._myIndex = value
2616
myIndex = property(getMyIndex, setMyIndex, None, None)
2618
def getSectionParents(self):
2619
return self._sectionParents
2620
def setSectionParents(self, value):
2621
self._sectionParents = value
2622
sectionParents = property(getSectionParents, setSectionParents, None, None)
2624
def sectionCount(self):
2625
return len(self._sectionParents)
2627
def getStartAddress(self):
2628
return self._startAddress
2629
def setStartAddress(self, value):
2630
self._startAddress = value
2631
startAddress = property(getStartAddress, setStartAddress, None, None)
2633
def getLength(self):
2635
def setLength(self, value):
2636
self._length = value
2637
length = property(getLength, setLength, None, None)
2639
def getFirstSectionIndex(self):
2640
return self._firstSectionIndex
2641
def setFirstSectionIndex(self, value):
2642
self._firstSectionIndex = value
2643
firstSectionIndex = property(getFirstSectionIndex, setFirstSectionIndex, None, None)
2645
def getLastSectionIndex(self):
2646
return self._lastSectionIndex
2647
def setLastSectionIndex(self, value):
2648
self._lastSectionIndex = value
2649
lastSectionIndex = property(getLastSectionIndex, setLastSectionIndex, None, None)
2651
def getMyCtocMapIndex(self):
2652
return self._myCtocMapIndex
2653
def setMyCtocMapIndex(self, value):
2654
self._myCtocMapIndex = value
2655
myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
2657
def addSectionParent(self, myIndex, ctoc_map_index):
2658
# Create a new section parent
2659
newSection = MobiSection(myIndex)
2660
# Assign our index to the section
2661
newSection.parentIndex = self._myIndex
2662
# Assign section number
2663
newSection.sectionIndex = len(self._sectionParents)
2664
# Assign ctoc_map_index
2665
newSection.myCtocMapIndex = ctoc_map_index
2666
# Add it to the list
2667
self._sectionParents.append(newSection)
2671
print "%20s:" % ("Periodical")
2672
print "%20s: 0x%X" % ("myIndex", self.myIndex)
2673
print "%20s: 0x%X" % ("startAddress", self.startAddress)
2674
print "%20s: 0x%X" % ("length", self.length)
2675
print "%20s: 0x%X" % ("myCtocMapIndex", self.myCtocMapIndex)
2676
print "%20s: 0x%X" % ("firstSectionIndex", self.firstSectionIndex)
2677
print "%20s: 0x%X" % ("lastSectionIndex", self.lastSectionIndex)
2678
print "%20s: %d" % ("Number of Sections", len(self._sectionParents))
2679
for (count, section) in enumerate(self._sectionParents):
2680
print "\t%20s: %d" % ("Section",count)
2681
print "\t%20s: 0x%X" % ("startAddress", section.startAddress)
2682
print "\t%20s: 0x%X" % ("length", section.sectionLength)
2683
print "\t%20s: 0x%X" % ("parentIndex", section.parentIndex)
2684
print "\t%20s: 0x%X" % ("myIndex", section.myIndex)
2685
print "\t%20s: 0x%X" % ("firstArticleIndex", section.firstArticleIndex)
2686
print "\t%20s: 0x%X" % ("lastArticleIndex", section.lastArticleIndex)
2687
print "\t%20s: 0x%X" % ("articles", len(section.articles) )
2688
print "\t%20s: 0x%X" % ("myCtocMapIndex", section.myCtocMapIndex )
2690
for (artCount, article) in enumerate(section.articles) :
2691
print "\t\t%20s: %d" % ("Article",artCount)
2692
print "\t\t%20s: 0x%X" % ("startAddress", article.startAddress)
2693
print "\t\t%20s: 0x%X" % ("length", article.articleLength)
2694
print "\t\t%20s: 0x%X" % ("sectionIndex", article.sectionParentIndex)
2695
print "\t\t%20s: 0x%X" % ("myIndex", article.myIndex)
2696
print "\t\t%20s: 0x%X" % ("myCtocMapIndex", article.myCtocMapIndex)
2699
class MobiSection(object):
2700
""" A container for periodical sections """
2701
def __init__(self, myMobiDoc):
2702
self._myMobiDoc = myMobiDoc
2703
self._myIndex = myMobiDoc.getNextNode()
2704
self._parentIndex = 0xFFFFFFFF
2705
self._firstArticleIndex = 0x00
2706
self._lastArticleIndex = 0x00
2707
self._startAddress = 0xFFFFFFFF
2708
self._sectionLength = 0xFFFFFFFF
2710
self._myCtocMapIndex = -1
2712
def getMyMobiDoc(self):
2713
return self._myMobiDoc
2714
def setMyMobiDoc(self, value):
2715
self._myMobiDoc = value
2716
myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None)
2718
def getMyIndex(self):
2719
return self._myIndex
2720
def setMyIndex(self, value):
2721
self._myIndex = value
2722
myIndex = property(getMyIndex, setMyIndex, None, None)
2724
def getParentIndex(self):
2725
return self._parentIndex
2726
def setParentIndex(self, value):
2727
self._parentIndex = value
2728
parenIndex = property(getParentIndex, setParentIndex, None, None)
2730
def getFirstArticleIndex(self):
2731
return self._firstArticleIndex
2732
def setFirstArticleIndex(self, value):
2733
self._firstArticleIndex = value
2734
firstArticleIndex = property(getFirstArticleIndex, setFirstArticleIndex, None, None)
2736
def getLastArticleIndex(self):
2737
return self._lastArticleIndex
2738
def setLastArticleIndex(self, value):
2739
self._lastArticleIndex = value
2740
lastArticleIndex = property(getLastArticleIndex, setLastArticleIndex, None, None)
2742
def getStartAddress(self):
2743
return self._startAddress
2744
def setStartAddress(self, value):
2745
self._startAddress = value
2746
startAddress = property(getStartAddress, setStartAddress, None, None)
2748
def getSectionLength(self):
2749
return self._sectionLength
2750
def setSectionLength(self, value):
2751
self._sectionLength = value
2752
sectionLength = property(getSectionLength, setSectionLength, None, None)
2754
def getArticles(self):
2755
return self._articles
2756
def setArticles(self, value):
2757
self._articles = value
2758
articles = property(getArticles, setArticles, None, None)
2760
def getMyCtocMapIndex(self):
2761
return self._myCtocMapIndex
2762
def setMyCtocMapIndex(self, value):
2763
self._myCtocMapIndex = value
2764
myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
2766
def addArticle(self, article):
2767
self._articles.append(article)
2769
# Adjust the Periodical parameters
2770
# If this is the first article of the first section, init the values
2771
if self.myIndex == 1 and len(self.articles) == 1 :
2772
self.myMobiDoc.documentStructure.firstSectionIndex = self.myIndex
2773
self.myMobiDoc.documentStructure.lastSectionIndex = self.myIndex
2774
self.myMobiDoc.documentStructure.length = article.articleLength + \
2775
( article.startAddress - self.myMobiDoc.documentStructure.startAddress)
2777
self.myMobiDoc.documentStructure.length += article.articleLength
2779
# Always set the highest section index to myIndex
2780
self.myMobiDoc.documentStructure.lastSectionIndex = self.myIndex
2782
# Adjust the Section parameters
2783
if len(self.articles) == 1 :
2784
self.firstArticleIndex = article.myIndex
2786
if len(self.myMobiDoc.documentStructure.sectionParents) == 1 :
2787
self.startAddress = self.myMobiDoc.documentStructure.startAddress
2788
self.sectionLength = article.articleLength + \
2789
( article.startAddress - self.myMobiDoc.documentStructure.startAddress )
2792
self.startAddress = article.startAddress
2793
self.sectionLength = article.articleLength
2795
self.lastArticleIndex = article.myIndex
2797
self.lastArticleIndex = article.myIndex
2799
# Adjust the Section length
2800
if len(self.articles) > 1 :
2801
self.sectionLength += article.articleLength
2803
class MobiArticle(object):
2804
""" A container for periodical articles """
2805
def __init__(self, sectionParent, startAddress, length, ctocMapIndex):
2806
self._mySectionParent = sectionParent
2807
self._myMobiDoc = sectionParent.myMobiDoc
2808
self._myIndex = sectionParent.myMobiDoc.getNextNode()
2809
self._myCtocMapIndex = ctocMapIndex
2810
self._sectionParentIndex = sectionParent.myIndex
2811
self._startAddress = startAddress
2812
self._articleLength = length
2814
def getMySectionParent(self):
2815
return self._mySectionParent
2816
def setMySectionParent(self, value):
2817
self._mySectionParent = value
2818
mySectionParent = property(getMySectionParent, setMySectionParent, None, None)
2820
def getMyMobiDoc(self):
2821
return self._myMobiDoc
2822
def setMyMobiDoc(self, value):
2823
self._myMobiDoc = value
2824
myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None)
2826
def getMyIndex(self):
2827
return self._myIndex
2828
def setMyIndex(self, value):
2829
self._sectionIndex = value
2830
myIndex = property(getMyIndex, setMyIndex, None, None)
2832
def getSectionParentIndex(self):
2833
return self._sectionParentIndex
2834
def setSectionParentIndex(self, value):
2835
self._sectionParentIndex = value
2836
sectionParentIndex = property(getSectionParentIndex, setSectionParentIndex, None, None)
2838
def getStartAddress(self):
2839
return self._startAddress
2840
def setStartAddress(self, value):
2841
self._startAddress = value
2842
startAddress = property(getStartAddress, setStartAddress, None, None)
2844
def getArticleLength(self):
2845
return self._articleLength
2846
def setArticleLength(self, value):
2847
self._articleLength = value
2848
articleLength = property(getArticleLength, setArticleLength, None, None)
2850
def getMyCtocMapIndex(self):
2851
return self._myCtocMapIndex
2852
def setMyCtocMapIndex(self, value):
2853
self._myCtocMapIndex = value
2854
myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)