~raoul-snyman/openlp/python3

« back to all changes in this revision

Viewing changes to openlp/plugins/bibles/lib/osis.py

  • Committer: Raoul Snyman
  • Date: 2013-04-03 06:51:39 UTC
  • Revision ID: raoul.snyman@saturnlaboratories.co.za-20130403065139-9qhs0xmlrcef4n2h
trying to migrate to py3k

Show diffs side-by-side

added added

removed removed

Lines of Context:
46
46
    """
47
47
    `OSIS <http://www.bibletechnologies.net/>`_ Bible format importer class.
48
48
    """
49
 
    log.info(u'BibleOSISImpl loaded')
 
49
    log.info('BibleOSISImpl loaded')
50
50
 
51
51
    def __init__(self, parent, **kwargs):
52
52
        log.debug(self.__class__.__name__)
53
53
        BibleDB.__init__(self, parent, **kwargs)
54
 
        self.filename = kwargs[u'filename']
 
54
        self.filename = kwargs['filename']
55
55
        self.language_regex = re.compile(r'<language.*>(.*?)</language>')
56
56
        self.verse_regex = re.compile(
57
57
            r'<verse osisID="([a-zA-Z0-9 ]*).([0-9]*).([0-9]*)">(.*?)</verse>')
72
72
            r'<divineName(.*?)>(.*?)</divineName>')
73
73
        self.spaces_regex = re.compile(r'([ ]{2,})')
74
74
        filepath = os.path.join(
75
 
            AppLocation.get_directory(AppLocation.PluginsDir), u'bibles', u'resources', u'osisbooks.csv')
 
75
            AppLocation.get_directory(AppLocation.PluginsDir), 'bibles', 'resources', 'osisbooks.csv')
76
76
 
77
77
    def do_import(self, bible_name=None):
78
78
        """
79
79
        Loads a Bible from file.
80
80
        """
81
 
        log.debug(u'Starting OSIS import from "%s"' % self.filename)
 
81
        log.debug('Starting OSIS import from "%s"' % self.filename)
82
82
        detect_file = None
83
83
        db_book = None
84
84
        osis = None
88
88
        self.wizard.incrementProgressBar(translate('BiblesPlugin.OsisImport',
89
89
            'Detecting encoding (this may take a few minutes)...'))
90
90
        try:
91
 
            detect_file = open(self.filename, u'r')
 
91
            detect_file = open(self.filename, 'r')
92
92
            details = chardet.detect(detect_file.read(1048576))
93
93
            detect_file.seek(0)
94
94
            lines_in_file = int(len(detect_file.readlines()))
95
95
        except IOError:
96
 
            log.exception(u'Failed to detect OSIS file encoding')
 
96
            log.exception('Failed to detect OSIS file encoding')
97
97
            return
98
98
        finally:
99
99
            if detect_file:
100
100
                detect_file.close()
101
101
        try:
102
 
            osis = codecs.open(self.filename, u'r', details['encoding'])
 
102
            osis = codecs.open(self.filename, 'r', details['encoding'])
103
103
            repl = replacement
104
104
            language_id = False
105
105
            # Decide if the bible propably contains only NT or AT and NT or
123
123
                        language = BiblesResourcesDB.get_language(
124
124
                            language_match.group(1))
125
125
                        if language:
126
 
                            language_id = language[u'id']
127
 
                            self.save_meta(u'language_id', language_id)
 
126
                            language_id = language['id']
 
127
                            self.save_meta('language_id', language_id)
128
128
                        continue
129
129
                match = self.verse_regex.search(file_record)
130
130
                if match:
132
132
                    if not language_id:
133
133
                        language_id = self.get_language(bible_name)
134
134
                        if not language_id:
135
 
                            log.exception(u'Importing books from "%s" failed' % self.filename)
 
135
                            log.exception('Importing books from "%s" failed' % self.filename)
136
136
                            return False
137
137
                    match_count += 1
138
 
                    book = unicode(match.group(1))
 
138
                    book = str(match.group(1))
139
139
                    chapter = int(match.group(2))
140
140
                    verse = int(match.group(3))
141
141
                    verse_text = match.group(4)
142
142
                    book_ref_id = self.get_book_ref_id_by_name(book, book_count, language_id)
143
143
                    if not book_ref_id:
144
 
                        log.exception(u'Importing books from "%s" failed' % self.filename)
 
144
                        log.exception('Importing books from "%s" failed' % self.filename)
145
145
                        return False
146
146
                    book_details = BiblesResourcesDB.get_book_by_id(book_ref_id)
147
 
                    if not db_book or db_book.name != book_details[u'name']:
148
 
                        log.debug(u'New book: "%s"' % book_details[u'name'])
 
147
                    if not db_book or db_book.name != book_details['name']:
 
148
                        log.debug('New book: "%s"' % book_details['name'])
149
149
                        db_book = self.create_book(
150
 
                            book_details[u'name'],
 
150
                            book_details['name'],
151
151
                            book_ref_id,
152
 
                            book_details[u'testament_id'])
 
152
                            book_details['testament_id'])
153
153
                    if last_chapter == 0:
154
154
                        self.wizard.progressBar.setMaximum(chapter_count)
155
155
                    if last_chapter != chapter:
156
156
                        if last_chapter != 0:
157
157
                            self.session.commit()
158
158
                        self.wizard.incrementProgressBar(translate('BiblesPlugin.OsisImport', 'Importing %s %s...',
159
 
                            'Importing <book name> <chapter>...') % (book_details[u'name'], chapter))
 
159
                            'Importing <book name> <chapter>...') % (book_details['name'], chapter))
160
160
                        last_chapter = chapter
161
161
                    # All of this rigmarol below is because the mod2osis
162
162
                    # tool from the Sword library embeds XML in the OSIS
163
163
                    # but neglects to enclose the verse text (with XML) in
164
164
                    # <[CDATA[ ]]> tags.
165
 
                    verse_text = self.note_regex.sub(u'', verse_text)
166
 
                    verse_text = self.title_regex.sub(u'', verse_text)
167
 
                    verse_text = self.milestone_regex.sub(u'', verse_text)
168
 
                    verse_text = self.fi_regex.sub(u'', verse_text)
169
 
                    verse_text = self.rf_regex.sub(u'', verse_text)
170
 
                    verse_text = self.lb_regex.sub(u' ', verse_text)
171
 
                    verse_text = self.lg_regex.sub(u'', verse_text)
172
 
                    verse_text = self.l_regex.sub(u' ', verse_text)
173
 
                    verse_text = self.w_regex.sub(u'', verse_text)
174
 
                    verse_text = self.q1_regex.sub(u'"', verse_text)
175
 
                    verse_text = self.q2_regex.sub(u'\'', verse_text)
176
 
                    verse_text = self.q_regex.sub(u'', verse_text)
 
165
                    verse_text = self.note_regex.sub('', verse_text)
 
166
                    verse_text = self.title_regex.sub('', verse_text)
 
167
                    verse_text = self.milestone_regex.sub('', verse_text)
 
168
                    verse_text = self.fi_regex.sub('', verse_text)
 
169
                    verse_text = self.rf_regex.sub('', verse_text)
 
170
                    verse_text = self.lb_regex.sub(' ', verse_text)
 
171
                    verse_text = self.lg_regex.sub('', verse_text)
 
172
                    verse_text = self.l_regex.sub(' ', verse_text)
 
173
                    verse_text = self.w_regex.sub('', verse_text)
 
174
                    verse_text = self.q1_regex.sub('"', verse_text)
 
175
                    verse_text = self.q2_regex.sub('\'', verse_text)
 
176
                    verse_text = self.q_regex.sub('', verse_text)
177
177
                    verse_text = self.divine_name_regex.sub(repl, verse_text)
178
 
                    verse_text = self.trans_regex.sub(u'', verse_text)
179
 
                    verse_text = verse_text.replace(u'</lb>', u'') \
180
 
                        .replace(u'</l>', u'').replace(u'<lg>', u'') \
181
 
                        .replace(u'</lg>', u'').replace(u'</q>', u'') \
182
 
                        .replace(u'</div>', u'').replace(u'</w>', u'')
183
 
                    verse_text = self.spaces_regex.sub(u' ', verse_text)
 
178
                    verse_text = self.trans_regex.sub('', verse_text)
 
179
                    verse_text = verse_text.replace('</lb>', '') \
 
180
                        .replace('</l>', '').replace('<lg>', '') \
 
181
                        .replace('</lg>', '').replace('</q>', '') \
 
182
                        .replace('</div>', '').replace('</w>', '')
 
183
                    verse_text = self.spaces_regex.sub(' ', verse_text)
184
184
                    self.create_verse(db_book.id, chapter, verse, verse_text)
185
185
                    self.application.process_events()
186
186
            self.session.commit()
187
187
            if match_count == 0:
188
188
                success = False
189
189
        except (ValueError, IOError):
190
 
            log.exception(u'Loading bible from OSIS file failed')
 
190
            log.exception('Loading bible from OSIS file failed')
191
191
            success = False
192
192
        finally:
193
193
            if osis: