~ubuntu-branches/ubuntu/jaunty/calibre/jaunty-backports

« back to all changes in this revision

Viewing changes to src/calibre/ebooks/rtf2xml/header.py

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-01-20 17:14:02 UTC
  • Revision ID: james.westby@ubuntu.com-20090120171402-8y3znf6nokwqe80k
Tags: upstream-0.4.125+dfsg
ImportĀ upstreamĀ versionĀ 0.4.125+dfsg

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#########################################################################
 
2
#                                                                       #
 
3
#                                                                       #
 
4
#   copyright 2002 Paul Henry Tremblay                                  #
 
5
#                                                                       #
 
6
#   This program is distributed in the hope that it will be useful,     #
 
7
#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
 
8
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
 
9
#   General Public License for more details.                            #
 
10
#                                                                       #
 
11
#   You should have received a copy of the GNU General Public License   #
 
12
#   along with this program; if not, write to the Free Software         #
 
13
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
 
14
#   02111-1307 USA                                                      #
 
15
#                                                                       #
 
16
#                                                                       #
 
17
#########################################################################
 
18
import sys, os, tempfile
 
19
from calibre.ebooks.rtf2xml import copy
 
20
class Header:
 
21
    """
 
22
    Two public methods are available. The first separates all of the headers
 
23
    and footers from the body and puts them at the bottom of the text, where
 
24
    they are easier to process. The second joins those headers and footers to
 
25
    the proper places in the body.
 
26
    """
 
27
    def __init__(self,
 
28
            in_file ,
 
29
            bug_handler,
 
30
            copy = None,
 
31
            run_level = 1,
 
32
            ):
 
33
        self.__file = in_file
 
34
        self.__bug_handler = bug_handler
 
35
        self.__copy = copy
 
36
        self.__write_to = tempfile.mktemp()
 
37
        self.__found_a_header = 0
 
38
    def __in_header_func(self, line):
 
39
        """
 
40
        Handle all tokens that are part of header
 
41
        """
 
42
        if self.__cb_count == self.__header_bracket_count:
 
43
            self.__in_header = 0
 
44
            self.__write_obj.write(line)
 
45
            self.__write_to_head_obj.write(
 
46
            'mi<mk<head___clo\n')
 
47
            self.__write_to_head_obj.write(
 
48
            'mi<tg<close_____<header-or-footer\n')
 
49
            self.__write_to_head_obj.write(
 
50
            'mi<mk<header-clo\n')
 
51
        else:
 
52
            self.__write_to_head_obj.write(line)
 
53
    def __found_header(self, line):
 
54
        """
 
55
        Found a header
 
56
        """
 
57
        # but this could be header or footer
 
58
        self.__found_a_header = 1
 
59
        self.__in_header = 1
 
60
        self.__header_count += 1
 
61
        # temporarily set this to zero so I can enter loop
 
62
        self.__cb_count = 0
 
63
        self.__header_bracket_count = self.__ob_count
 
64
        self.__write_obj.write(
 
65
        'mi<mk<header-ind<%04d\n' % self.__header_count)
 
66
        self.__write_to_head_obj.write(
 
67
        'mi<mk<header-ope<%04d\n' % self.__header_count)
 
68
        info = line[6:16]
 
69
        type = self.__head_dict.get(info)
 
70
        if type:
 
71
            self.__write_to_head_obj.write(
 
72
                    'mi<tg<open-att__<header-or-footer<type>%s\n' % (type)
 
73
                    )
 
74
        else:
 
75
            sys.stderr.write('module is header\n')
 
76
            sys.stderr.write('method is __found_header\n')
 
77
            sys.stderr.write('no dict entry\n')
 
78
            sys.stderr.write('line is %s' % line)
 
79
            self.__write_to_head_obj.write(
 
80
                    'mi<tg<open-att__<header-or-footer<type>none\n'
 
81
                    )
 
82
    def __default_sep(self, line):
 
83
        """Handle all tokens that are not header tokens"""
 
84
        if self.__token_info[3:5] == 'hf':
 
85
            self.__found_header(line)
 
86
        self.__write_obj.write(line)
 
87
    def __initiate_sep_values(self):
 
88
        """
 
89
        initiate counters for separate_footnotes method.
 
90
        """
 
91
        self.__bracket_count=0
 
92
        self.__ob_count = 0
 
93
        self.__cb_count = 0
 
94
        self.__header_bracket_count = 0
 
95
        self.__in_header = 0
 
96
        self.__header_count = 0
 
97
        self.__head_dict = {
 
98
            'head-left_'        :   ('header-left'),
 
99
            'head-right'        :   ('header-right'),
 
100
            'foot-left_'        :   ('footer-left'),
 
101
            'foot-right'        :   ('footer-right'),
 
102
            'head-first'        :   ('header-first' ),
 
103
            'foot-first'        :   ('footer-first' ),
 
104
            'header____'        :   ('header' ),
 
105
            'footer____'        :   ('footer' ),
 
106
        }
 
107
    def separate_headers(self):
 
108
        """
 
109
        Separate all the footnotes in an RTF file and put them at the bottom,
 
110
        where they are easier to process.  Each time a footnote is found,
 
111
        print all of its contents to a temporary file. Close both the main and
 
112
        temporary file. Print the footnotes from the temporary file to the
 
113
        bottom of the main file.
 
114
        """
 
115
        self.__initiate_sep_values()
 
116
        read_obj = open(self.__file)
 
117
        self.__write_obj = open(self.__write_to, 'w')
 
118
        self.__header_holder = tempfile.mktemp()
 
119
        self.__write_to_head_obj = open(self.__header_holder, 'w')
 
120
        line_to_read = 1
 
121
        while line_to_read:
 
122
            line_to_read = read_obj.readline()
 
123
            line = line_to_read
 
124
            self.__token_info = line[:16]
 
125
            # keep track of opening and closing brackets
 
126
            if self.__token_info == 'ob<nu<open-brack':
 
127
                self.__ob_count = line[-5:-1]
 
128
            if self.__token_info == 'cb<nu<clos-brack':
 
129
                self.__cb_count = line[-5:-1]
 
130
            # In the middle of footnote text
 
131
            if self.__in_header:
 
132
                self.__in_header_func(line)
 
133
            # not in the middle of footnote text
 
134
            else:
 
135
                self.__default_sep(line)
 
136
        self.__write_obj.close()
 
137
        read_obj.close()
 
138
        self.__write_to_head_obj.close()
 
139
        read_obj = open(self.__header_holder, 'r')
 
140
        write_obj = open(self.__write_to, 'a')
 
141
        write_obj.write(
 
142
        'mi<mk<header-beg\n')
 
143
        line = 1
 
144
        while line:
 
145
            line = read_obj.readline()
 
146
            write_obj.write(line)
 
147
        write_obj.write(
 
148
        'mi<mk<header-end\n')
 
149
        read_obj.close()
 
150
        write_obj.close()
 
151
        os.remove(self.__header_holder)
 
152
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
 
153
        if self.__copy:
 
154
            copy_obj.copy_file(self.__write_to, "header_separate.info")
 
155
        copy_obj.rename(self.__write_to, self.__file)
 
156
        os.remove(self.__write_to)
 
157
    def update_info(self, file, copy):
 
158
        """
 
159
        Unused method
 
160
        """
 
161
        self.__file = file
 
162
        self.__copy = copy
 
163
    def __get_head_body_func(self, line):
 
164
        """
 
165
        Process lines in main body and look for beginning of headers.
 
166
        """
 
167
        # mi<mk<footnt-end
 
168
        if self.__token_info == 'mi<mk<header-beg':
 
169
            self.__state = 'head'
 
170
        else:
 
171
            self.__write_obj.write(line)
 
172
    def __get_head_head_func(self, line):
 
173
        """
 
174
        Copy headers and footers from bottom of file to a separate, temporary file.
 
175
        """
 
176
        if self.__token_info == 'mi<mk<header-end':
 
177
            self.__state = 'body'
 
178
        else:
 
179
            self.__write_to_head_obj.write(line)
 
180
    def __get_headers(self):
 
181
        """
 
182
        Private method to remove footnotes from main file.  Read one line from
 
183
        the main file at a time. If the state is 'body', call on the private
 
184
        __get_foot_foot_func. Otherwise, call on the __get_foot_body_func.
 
185
        These two functions do the work of separating the footnotes form the
 
186
        body.
 
187
        """
 
188
        read_obj = open(self.__file)
 
189
        self.__write_obj = open(self.__write_to, 'w')
 
190
            # self.__write_to = "footnote_info.data"
 
191
        self.__write_to_head_obj = open(self.__header_holder, 'w')
 
192
        line = 1
 
193
        while line:
 
194
            line = read_obj.readline()
 
195
            self.__token_info = line[:16]
 
196
            if self.__state == 'body':
 
197
                self.__get_head_body_func(line)
 
198
            elif self.__state == 'head':
 
199
                self.__get_head_head_func(line)
 
200
        read_obj.close()
 
201
        self.__write_obj.close()
 
202
        self.__write_to_head_obj.close()
 
203
    def __get_head_from_temp(self, num):
 
204
        """
 
205
        Private method for joining headers and footers to body. This method
 
206
        reads from the temporary file until the proper footnote marker is
 
207
        found. It collects all the tokens until the end of the footnote, and
 
208
        returns them as a string.
 
209
        """
 
210
        look_for = 'mi<mk<header-ope<' + num + '\n'
 
211
        found_head = 0
 
212
        string_to_return = ''
 
213
        line = 1
 
214
        while line:
 
215
            line = self.__read_from_head_obj.readline()
 
216
            if found_head:
 
217
                if line == 'mi<mk<header-clo\n':
 
218
                    return string_to_return
 
219
                string_to_return = string_to_return + line
 
220
            else:
 
221
                if line == look_for:
 
222
                    found_head = 1
 
223
    def __join_from_temp(self):
 
224
        """
 
225
        Private method for rejoining footnotes to body.  Read from the
 
226
        newly-created, temporary file that contains the body text but no
 
227
        footnotes. Each time a footnote marker is found, call the private
 
228
        method __get_foot_from_temp(). This method will return a string to
 
229
        print out to the third file.
 
230
        If no footnote marker is found, simply print out the token (line).
 
231
        """
 
232
        self.__read_from_head_obj = open(self.__header_holder, 'r')
 
233
        read_obj = open(self.__write_to, 'r')
 
234
        self.__write_obj = open(self.__write_to2, 'w')
 
235
        line = 1
 
236
        while line:
 
237
            line = read_obj.readline()
 
238
            if line[:16] == 'mi<mk<header-ind':
 
239
                line = self.__get_head_from_temp(line[17:-1])
 
240
            self.__write_obj.write(line)
 
241
        read_obj.close()
 
242
    def join_headers(self):
 
243
        """
 
244
        Join the footnotes from the bottom of the file and put them in their
 
245
        former places.  First, remove the footnotes from the bottom of the
 
246
        input file, outputting them to a temporary file. This creates two new
 
247
        files, one without footnotes, and one of just footnotes. Open both
 
248
        these files to read. When a marker is found in the main file, find the
 
249
        corresponding marker in the footnote file. Output the mix of body and
 
250
        footnotes to a third file.
 
251
        """
 
252
        if not self.__found_a_header:
 
253
            return
 
254
        self.__write_to2 = tempfile.mktemp()
 
255
        self.__state = 'body'
 
256
        self.__get_headers()
 
257
        self.__join_from_temp()
 
258
        self.__write_obj.close()
 
259
        self.__read_from_head_obj.close()
 
260
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
 
261
        if self.__copy:
 
262
            copy_obj.copy_file(self.__write_to, "header_join.data")
 
263
        copy_obj.rename(self.__write_to, self.__file)
 
264
        os.remove(self.__write_to)
 
265
        os.remove(self.__header_holder)