~stomato463/+junk/nvdajp

« back to all changes in this revision

Viewing changes to source/characterProcessing.py

  • Committer: Masataka Shinke
  • Date: 2011-10-25 12:35:26 UTC
  • mfrom: (4185 jpmain)
  • mto: This revision was merged to the branch mainline in revision 4211.
  • Revision ID: mshinke@users.sourceforge.jp-20111025123526-ze527a2rl3z0g2ky
lp:~nishimotz/nvdajp/main : 4185 をマージ

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#characterProcessing.py
 
2
#A part of NonVisual Desktop Access (NVDA)
 
3
#Copyright (C) 2010-2011 NV Access Inc, World Light Information Limited, Hong Kong Blind Union
 
4
#This file is covered by the GNU General Public License.
 
5
#See the file COPYING for more details.
 
6
 
 
7
import time
 
8
import os
 
9
import codecs
 
10
import collections
 
11
import re
 
12
from logHandler import log
 
13
import globalVars
 
14
 
 
15
class LocaleDataMap(object):
 
16
        """Allows access to locale-specific data objects, dynamically loading them if needed on request"""
 
17
 
 
18
        def __init__(self,localeDataFactory):
 
19
                """
 
20
                @param localeDataFactory: the factory to create data objects for the requested locale.
 
21
                """ 
 
22
                self._localeDataFactory=localeDataFactory
 
23
                self._dataMap={}
 
24
 
 
25
        def fetchLocaleData(self,locale):
 
26
                """
 
27
                Fetches a data object for the given locale. 
 
28
                This may mean that the data object is first created and stored if it does not yet exist in the map.
 
29
                The locale is also simplified (country is dropped) if the full locale can not be used to create a data object.
 
30
                @param locale: the locale of the data object requested
 
31
                @type locale: string
 
32
                @return: the data object for the given locale
 
33
                """
 
34
                localeList=[locale]
 
35
                if '_' in locale:
 
36
                        localeList.append(locale.split('_')[0])
 
37
                for l in localeList:
 
38
                        data=self._dataMap.get(l)
 
39
                        if data: return data
 
40
                        try:
 
41
                                data=self._localeDataFactory(l)
 
42
                        except LookupError:
 
43
                                data=None
 
44
                        if not data: continue
 
45
                        self._dataMap[l]=data
 
46
                        return data
 
47
                raise LookupError(locale)
 
48
 
 
49
        def invalidateLocaleData(self, locale):
 
50
                """Invalidate the data object (if any) for the given locale.
 
51
                This will cause a new data object to be created when this locale is next requested.
 
52
                @param locale: The locale for which the data object should be invalidated.
 
53
                @type locale: str
 
54
                """
 
55
                try:
 
56
                        del self._dataMap[locale]
 
57
                except KeyError:
 
58
                        pass
 
59
 
 
60
class CharacterDescriptions(object):
 
61
        """
 
62
        Represents a map of characters to one or more descriptions (examples) for that character.
 
63
        The data is loaded from a file from the requested locale.
 
64
        """
 
65
 
 
66
        def __init__(self,locale):
 
67
                """
 
68
                @param locale: The characterDescriptions.dic file will be found by using this locale.
 
69
                @type locale: string
 
70
                """
 
71
                self._entries = {}
 
72
                fileName=os.path.join('locale',locale,'characterDescriptions.dic')
 
73
                if not os.path.isfile(fileName): 
 
74
                        raise LookupError(fileName)
 
75
                f = codecs.open(fileName,"r","utf_8_sig",errors="replace")
 
76
                for line in f:
 
77
                        if line.isspace() or line.startswith('#'):
 
78
                                continue
 
79
                        line=line.rstrip('\r\n')
 
80
                        temp=line.split("\t")
 
81
                        if len(temp) > 1:
 
82
                                key=temp.pop(0)
 
83
                                self._entries[key] = temp
 
84
                        else:
 
85
                                log.warning("can't parse line '%s'" % line)
 
86
                log.debug("Loaded %d entries." % len(self._entries))
 
87
                f.close()
 
88
 
 
89
        def getCharacterDescription(self, character):
 
90
                """
 
91
                Looks up the given character and returns a list containing all the description strings found.
 
92
                """
 
93
                return self._entries.get(character)
 
94
 
 
95
_charDescLocaleDataMap=LocaleDataMap(CharacterDescriptions)
 
96
 
 
97
def getCharacterDescription(locale,character):
 
98
        """
 
99
        Finds a description or examples for the given character, which makes sence in the given locale.
 
100
        @param locale: the locale (language[_COUNTRY]) the description should be for.
 
101
        @type locale: string
 
102
        @param character: the character  who's description should be retreaved.
 
103
        @type character: string
 
104
        @return:  the found description for the given character
 
105
        @rtype: list of strings
 
106
        """
 
107
        try:
 
108
                l=_charDescLocaleDataMap.fetchLocaleData(locale)
 
109
        except LookupError:
 
110
                if not locale.startswith('en'):
 
111
                        return getCharacterDescription('en',character)
 
112
                raise LookupError("en")
 
113
        desc=l.getCharacterDescription(character)
 
114
        if not desc and not locale.startswith('en'):
 
115
                desc=getCharacterDescription('en',character)
 
116
        return desc
 
117
 
 
118
# Speech symbol levels
 
119
SYMLVL_NONE = 0
 
120
SYMLVL_SOME = 100
 
121
SYMLVL_MOST = 200
 
122
SYMLVL_ALL = 300
 
123
SYMLVL_CHAR = 1000
 
124
SPEECH_SYMBOL_LEVEL_LABELS = {
 
125
        SYMLVL_NONE: _("none"),
 
126
        SYMLVL_SOME: _("some"),
 
127
        SYMLVL_MOST: _("most"),
 
128
        SYMLVL_ALL: _("all"),
 
129
        SYMLVL_CHAR: _("character"),
 
130
}
 
131
CONFIGURABLE_SPEECH_SYMBOL_LEVELS = (SYMLVL_NONE, SYMLVL_SOME, SYMLVL_MOST, SYMLVL_ALL)
 
132
SPEECH_SYMBOL_LEVELS = CONFIGURABLE_SPEECH_SYMBOL_LEVELS + (SYMLVL_CHAR,)
 
133
 
 
134
# Speech symbol preserve modes
 
135
SYMPRES_NEVER = 0
 
136
SYMPRES_ALWAYS = 1
 
137
SYMPRES_NOREP = 2
 
138
 
 
139
class SpeechSymbol(object):
 
140
        __slots__ = ("identifier", "pattern", "replacement", "level", "preserve", "displayName")
 
141
 
 
142
        def __init__(self, identifier, pattern=None, replacement=None, level=None, preserve=None, displayName=None):
 
143
                self.identifier = identifier
 
144
                self.pattern = pattern
 
145
                self.replacement = replacement
 
146
                self.level = level
 
147
                self.preserve = preserve
 
148
                self.displayName = displayName
 
149
 
 
150
        def __repr__(self):
 
151
                attrs = []
 
152
                for attr in self.__slots__:
 
153
                        attrs.append("{name}={val!r}".format(
 
154
                                name=attr, val=getattr(self, attr)))
 
155
                return "SpeechSymbol(%s)" % ", ".join(attrs)
 
156
 
 
157
class SpeechSymbols(object):
 
158
        """
 
159
        Contains raw information about the pronunciation of symbols.
 
160
        It does not handle inheritance of data from other sources, processing of text, etc.
 
161
        This is all handled by L{SpeechSymbolProcessor}.
 
162
        """
 
163
 
 
164
        def __init__(self):
 
165
                """Constructor.
 
166
                """
 
167
                self.complexSymbols = collections.OrderedDict()
 
168
                self.symbols = collections.OrderedDict()
 
169
                self.fileName = None
 
170
 
 
171
        def load(self, fileName, allowComplexSymbols=True):
 
172
                """Load symbol information from a file.
 
173
                @param fileName: The name of the file from which to load symbol information.
 
174
                @type fileName: str
 
175
                @param allowComplexSymbols: Whether to allow complex symbols.
 
176
                @type allowComplexSymbols: bool
 
177
                @raise IOError: If the file cannot be read.
 
178
                """
 
179
                self.fileName = fileName
 
180
                with codecs.open(fileName, "r", "utf_8_sig", errors="replace") as f:
 
181
                        handler = None
 
182
                        for line in f:
 
183
                                if line.isspace() or line.startswith("#"):
 
184
                                        # Whitespace or comment.
 
185
                                        continue
 
186
                                line = line.rstrip("\r\n")
 
187
                                try:
 
188
                                        if line == "complexSymbols:" and allowComplexSymbols:
 
189
                                                handler = self._loadComplexSymbol
 
190
                                        elif line == "symbols:":
 
191
                                                handler = self._loadSymbol
 
192
                                        elif handler:
 
193
                                                # This is a line within a section, so handle it according to which section we're in.
 
194
                                                handler(line)
 
195
                                        else:
 
196
                                                raise ValueError
 
197
                                except ValueError:
 
198
                                        log.warning(u"Invalid line in file {file}: {line}".format(
 
199
                                                file=fileName, line=line))
 
200
 
 
201
        def _loadComplexSymbol(self, line):
 
202
                try:
 
203
                        identifier, pattern = line.split("\t")
 
204
                except TypeError:
 
205
                        raise ValueError
 
206
                self.complexSymbols[identifier] = pattern
 
207
 
 
208
        def _loadSymbolField(self, input, inputMap=None):
 
209
                if input == "-":
 
210
                        # Default.
 
211
                        return None
 
212
                if not inputMap:
 
213
                        return input
 
214
                try:
 
215
                        return inputMap[input]
 
216
                except KeyError:
 
217
                        raise ValueError
 
218
 
 
219
        IDENTIFIER_ESCAPES_INPUT = {
 
220
                "0": "\0",
 
221
                "t": "\t",
 
222
                "n": "\n",
 
223
                "r": "\r",
 
224
                "f": "\f",
 
225
                "v": "\v",
 
226
                "#": "#",
 
227
                "\\": "\\",
 
228
        }
 
229
        IDENTIFIER_ESCAPES_OUTPUT = {v: k for k, v in IDENTIFIER_ESCAPES_INPUT.iteritems()}
 
230
        LEVEL_INPUT = {
 
231
                "none": SYMLVL_NONE,
 
232
                "some": SYMLVL_SOME,
 
233
                "most": SYMLVL_MOST,
 
234
                "all": SYMLVL_ALL,
 
235
                "char": SYMLVL_CHAR,
 
236
        }
 
237
        LEVEL_OUTPUT = {v:k for k, v in LEVEL_INPUT.iteritems()}
 
238
        PRESERVE_INPUT = {
 
239
                "never": SYMPRES_NEVER,
 
240
                "always": SYMPRES_ALWAYS,
 
241
                "norep": SYMPRES_NOREP,
 
242
        }
 
243
        PRESERVE_OUTPUT = {v: k for k, v in PRESERVE_INPUT.iteritems()}
 
244
 
 
245
        def _loadSymbol(self, line):
 
246
                line = line.split("\t")
 
247
                identifier = replacement = level = preserve = displayName = None
 
248
                if line[-1].startswith("#"):
 
249
                        # Regardless of how many fields there are,
 
250
                        # if the last field is a comment, it is the display name.
 
251
                        displayName = line[-1][1:].lstrip()
 
252
                        del line[-1]
 
253
                line = iter(line)
 
254
                try:
 
255
                        identifier = next(line)
 
256
                        if not identifier:
 
257
                                # Empty identifier is not allowed.
 
258
                                raise ValueError
 
259
                        if identifier.startswith("\\") and len(identifier) >= 2:
 
260
                                identifier = self.IDENTIFIER_ESCAPES_INPUT.get(identifier[1], identifier[1]) + identifier[2:]
 
261
                        replacement = self._loadSymbolField(next(line))
 
262
                except StopIteration:
 
263
                        # These fields are mandatory.
 
264
                        raise ValueError
 
265
                try:
 
266
                        level = self._loadSymbolField(next(line), self.LEVEL_INPUT)
 
267
                        preserve = self._loadSymbolField(next(line), self.PRESERVE_INPUT)
 
268
                except StopIteration:
 
269
                        # These fields are optional. Defaults will be used for unspecified fields.
 
270
                        pass
 
271
                self.symbols[identifier] = SpeechSymbol(identifier, None, replacement, level, preserve, displayName)
 
272
 
 
273
        def save(self, fileName=None):
 
274
                """Save symbol information to a file.
 
275
                @param fileName: The name of the file to which to save symbol information,
 
276
                        C{None} to use the file name last passed to L{load} or L{save}.
 
277
                @type fileName: str
 
278
                @raise IOError: If the file cannot be written.
 
279
                @raise ValueError: If C{fileName} is C{None}
 
280
                        and L{load} or L{save} has not been called.
 
281
                """
 
282
                if fileName:
 
283
                        self.fileName = fileName
 
284
                elif self.fileName:
 
285
                        fileName = self.fileName
 
286
                else:
 
287
                        raise ValueError("No file name")
 
288
 
 
289
                with codecs.open(fileName, "w", "utf_8_sig", errors="replace") as f:
 
290
                        if self.complexSymbols:
 
291
                                f.write(u"complexSymbols:\r\n")
 
292
                                for identifier, pattern in self.complexSymbols.iteritems():
 
293
                                        f.write(u"%s\t%s\r\n" % (identifier, pattern))
 
294
                                f.write(u"\r\n")
 
295
 
 
296
                        if self.symbols:
 
297
                                f.write(u"symbols:\r\n")
 
298
                                for symbol in self.symbols.itervalues():
 
299
                                        f.write(u"%s\r\n" % self._saveSymbol(symbol))
 
300
 
 
301
        def _saveSymbolField(self, output, outputMap=None):
 
302
                if output is None:
 
303
                        return "-"
 
304
                if not outputMap:
 
305
                        return output
 
306
                try:
 
307
                        return outputMap[output]
 
308
                except KeyError:
 
309
                        raise ValueError
 
310
 
 
311
        def _saveSymbol(self, symbol):
 
312
                identifier = symbol.identifier
 
313
                try:
 
314
                        identifier = u"\\%s%s" % (
 
315
                                self.IDENTIFIER_ESCAPES_OUTPUT[identifier[0]], identifier[1:])
 
316
                except KeyError:
 
317
                        pass
 
318
                fields = [identifier,
 
319
                        self._saveSymbolField(symbol.replacement),
 
320
                        self._saveSymbolField(symbol.level, self.LEVEL_OUTPUT),
 
321
                        self._saveSymbolField(symbol.preserve, self.PRESERVE_OUTPUT)
 
322
                ]
 
323
                # Strip optional fields with default values.
 
324
                for field in reversed(fields[2:]):
 
325
                        if field == "-":
 
326
                                del fields[-1]
 
327
                if symbol.displayName:
 
328
                        fields.append("# %s" % symbol.displayName)
 
329
                return u"\t".join(fields)
 
330
 
 
331
def _getSpeechSymbolsForLocale(locale):
 
332
        builtin = SpeechSymbols()
 
333
        try:
 
334
                builtin.load(os.path.join("locale", locale, "symbols.dic"))
 
335
        except IOError:
 
336
                raise LookupError("No symbol information for locale %s" % locale)
 
337
        user = SpeechSymbols()
 
338
        try:
 
339
                # Don't allow users to specify complex symbols
 
340
                # because an error will cause the whole processor to fail.
 
341
                user.load(os.path.join(globalVars.appArgs.configPath, "symbols-%s.dic" % locale),
 
342
                        allowComplexSymbols=False)
 
343
        except IOError:
 
344
                # An empty user SpeechSymbols is okay.
 
345
                pass
 
346
        return builtin, user
 
347
 
 
348
class SpeechSymbolProcessor(object):
 
349
        """
 
350
        Handles processing of symbol pronunciation for a locale.
 
351
        Pronunciation information is taken from one or more L{SpeechSymbols} instances.
 
352
        """
 
353
 
 
354
        #: Caches symbol data for locales.
 
355
        localeSymbols = LocaleDataMap(_getSpeechSymbolsForLocale)
 
356
 
 
357
        def __init__(self, locale):
 
358
                """Constructor.
 
359
                @param locale: The locale for which symbol pronunciation should be processed.
 
360
                @type locale: str
 
361
                """
 
362
                self.locale = locale
 
363
 
 
364
                # We need to merge symbol data from several sources.
 
365
                sources = self.sources = []
 
366
                builtin, user = self.localeSymbols.fetchLocaleData(locale)
 
367
                self.userSymbols = user
 
368
                sources.append(user)
 
369
                sources.append(builtin)
 
370
 
 
371
                # Always use English as a base.
 
372
                if locale != "en":
 
373
                        # Only the builtin data.
 
374
                        sources.append(self.localeSymbols.fetchLocaleData("en")[0])
 
375
 
 
376
                # The computed symbol information from all sources.
 
377
                symbols = self.computedSymbols = collections.OrderedDict()
 
378
                # An indexable list of complex symbols for use in building/executing the regexp.
 
379
                complexSymbolsList = self._computedComplexSymbolsList = []
 
380
                # A list of simple symbol identifiers for use in building the regexp.
 
381
                simpleSymbolIdentifiers = []
 
382
                # Single character symbols.
 
383
                characters = set()
 
384
 
 
385
                # Add all complex symbols first, as they take priority.
 
386
                for source in sources:
 
387
                        for identifier, pattern in source.complexSymbols.iteritems():
 
388
                                if identifier in symbols:
 
389
                                        # Already defined.
 
390
                                        continue
 
391
                                symbol = SpeechSymbol(identifier, pattern)
 
392
                                symbols[identifier] = symbol
 
393
                                complexSymbolsList.append(symbol)
 
394
 
 
395
                # Supplement the data for complex symbols and add all simple symbols.
 
396
                for source in sources:
 
397
                        for identifier, sourceSymbol in source.symbols.iteritems():
 
398
                                try:
 
399
                                        symbol = symbols[identifier]
 
400
                                        # We're updating an already existing symbol.
 
401
                                except KeyError:
 
402
                                        # This is a new simple symbol.
 
403
                                        # (All complex symbols have already been added.)
 
404
                                        symbol = symbols[identifier] = SpeechSymbol(identifier)
 
405
                                        simpleSymbolIdentifiers.append(identifier)
 
406
                                        if len(identifier) == 1:
 
407
                                                characters.add(identifier)
 
408
                                # If fields weren't explicitly specified, inherit the value from later sources.
 
409
                                if symbol.replacement is None:
 
410
                                        symbol.replacement = sourceSymbol.replacement
 
411
                                if symbol.level is None:
 
412
                                        symbol.level = sourceSymbol.level
 
413
                                if symbol.preserve is None:
 
414
                                        symbol.preserve = sourceSymbol.preserve
 
415
                                if symbol.displayName is None:
 
416
                                        symbol.displayName = sourceSymbol.displayName
 
417
 
 
418
                # Set defaults for any fields not explicitly set.
 
419
                for symbol in symbols.values():
 
420
                        if symbol.replacement is None:
 
421
                                # Symbols without a replacement specified are useless.
 
422
                                log.warning(u"Replacement not defined in locale {locale} for symbol: {symbol}".format(
 
423
                                        symbol=symbol.identifier, locale=self.locale))
 
424
                                del symbols[symbol.identifier]
 
425
                                try:
 
426
                                        complexSymbolsList.remove(symbol)
 
427
                                except ValueError:
 
428
                                        pass
 
429
                                continue
 
430
                        if symbol.level is None:
 
431
                                symbol.level = SYMLVL_ALL
 
432
                        if symbol.preserve is None:
 
433
                                symbol.preserve = SYMPRES_NEVER
 
434
                        if symbol.displayName is None:
 
435
                                symbol.displayName = symbol.identifier
 
436
 
 
437
                characters = "".join(characters)
 
438
                # The simple symbols must be ordered longest first so that the longer symbols will match.
 
439
                simpleSymbolIdentifiers.sort(key=lambda identifier: len(identifier), reverse=True)
 
440
 
 
441
                # Build the regexp.
 
442
                patterns = [
 
443
                        # Strip repeated spaces from the end of the line to stop them from being picked up by repeated.
 
444
                        r"(?P<rstripSpace>  +$)",
 
445
                        # Repeated characters: more than 3 repeats.
 
446
                        r"(?P<repeated>(?P<repTmp>[%s])(?P=repTmp){3,})" % re.escape("".join(characters))
 
447
                ]
 
448
                # Complex symbols.
 
449
                # Each complex symbol has its own named group so we know which symbol matched.
 
450
                patterns.extend(
 
451
                        u"(?P<c{index}>{pattern})".format(index=index, pattern=symbol.pattern)
 
452
                        for index, symbol in enumerate(complexSymbolsList))
 
453
                # Simple symbols.
 
454
                # These are all handled in one named group.
 
455
                # Because the symbols are just text, we know which symbol matched just by looking at the matched text.
 
456
                patterns.append(ur"(?P<simple>{})".format(
 
457
                        "|".join(re.escape(identifier) for identifier in simpleSymbolIdentifiers)
 
458
                ))
 
459
                pattern = "|".join(patterns)
 
460
                try:
 
461
                        self._regexp = re.compile(pattern, re.UNICODE)
 
462
                except re.error as e:
 
463
                        log.error("Invalid complex symbol regular expression in locale %s: %s" % (locale, e))
 
464
                        raise LookupError
 
465
 
 
466
        def _regexpRepl(self, m):
 
467
                group = m.lastgroup
 
468
 
 
469
                if group == "rstripSpace":
 
470
                        return ""
 
471
 
 
472
                elif group == "repeated":
 
473
                        # Repeated character.
 
474
                        text = m.group()
 
475
                        symbol = self.computedSymbols[text[0]]
 
476
                        if self._level >= symbol.level:
 
477
                                return u" {count} {char} ".format(count=len(text), char=symbol.replacement)
 
478
                        else:
 
479
                                return " "
 
480
 
 
481
                else:
 
482
                        # One of the defined symbols.
 
483
                        text = m.group()
 
484
                        if group == "simple":
 
485
                                # Simple symbol.
 
486
                                symbol = self.computedSymbols[text]
 
487
                        else:
 
488
                                # Complex symbol.
 
489
                                index = int(group[1:])
 
490
                                symbol = self._computedComplexSymbolsList[index]
 
491
                        if symbol.preserve == SYMPRES_ALWAYS or (symbol.preserve == SYMPRES_NOREP and self._level < symbol.level):
 
492
                                suffix = text
 
493
                        else:
 
494
                                suffix = " "
 
495
                        if self._level >= symbol.level and symbol.replacement:
 
496
                                return u" {repl}{suffix}".format(repl=symbol.replacement, suffix=suffix)
 
497
                        else:
 
498
                                return suffix
 
499
 
 
500
        def processText(self, text, level):
 
501
                self._level = level
 
502
                return self._regexp.sub(self._regexpRepl, text)
 
503
 
 
504
        def updateSymbol(self, newSymbol):
 
505
                """Update information for a symbol if it has changed.
 
506
                If there is a change, the changed information will be added to the user's symbol data.
 
507
                These changes do not take effect until the symbol processor is reinitialised.
 
508
                @param newSymbol: The symbol to update.
 
509
                @type newSymbol: L{SpeechSymbol}
 
510
                @return: Whether there was a change.
 
511
                @rtype: bool
 
512
                """
 
513
                identifier = newSymbol.identifier
 
514
                oldSymbol = self.computedSymbols[identifier]
 
515
                if oldSymbol is newSymbol:
 
516
                        return False
 
517
                try:
 
518
                        userSymbol = self.userSymbols.symbols[identifier]
 
519
                except KeyError:
 
520
                        userSymbol = SpeechSymbol(identifier)
 
521
 
 
522
                changed = False
 
523
                if newSymbol.pattern != oldSymbol.pattern:
 
524
                        userSymbol.pattern = newSymbol.pattern
 
525
                        changed = True
 
526
                if newSymbol.replacement != oldSymbol.replacement:
 
527
                        userSymbol.replacement = newSymbol.replacement
 
528
                        changed = True
 
529
                if newSymbol.level != oldSymbol.level:
 
530
                        userSymbol.level = newSymbol.level
 
531
                        changed = True
 
532
                if newSymbol.preserve != oldSymbol.preserve:
 
533
                        userSymbol.preserve = newSymbol.preserve
 
534
                        changed = True
 
535
                if newSymbol.displayName != oldSymbol.displayName:
 
536
                        userSymbol.displayName = newSymbol.displayName
 
537
                        changed = True
 
538
 
 
539
                if not changed:
 
540
                        return False
 
541
 
 
542
                # Do this in case the symbol wasn't in userSymbols before.
 
543
                self.userSymbols.symbols[identifier] = userSymbol
 
544
                return True
 
545
 
 
546
_localeSpeechSymbolProcessors = LocaleDataMap(SpeechSymbolProcessor)
 
547
 
 
548
def processSpeechSymbols(locale, text, level):
 
549
        """Process some text, converting symbols according to desired pronunciation.
 
550
        @param locale: The locale of the text.
 
551
        @type locale: str
 
552
        @param text: The text to process.
 
553
        @type text: str
 
554
        @param level: The symbol level to use; one of the SYMLVL_* constants.
 
555
        """
 
556
        try:
 
557
                ss = _localeSpeechSymbolProcessors.fetchLocaleData(locale)
 
558
        except LookupError:
 
559
                if not locale.startswith("en_"):
 
560
                        return processSpeechSymbols("en", text, level)
 
561
                raise
 
562
        return ss.processText(text, level)
 
563
 
 
564
def processSpeechSymbol(locale, symbol):
 
565
        """Process a single symbol according to desired pronunciation.
 
566
        @param locale: The locale of the symbol.
 
567
        @type locale: str
 
568
        @param symbol: The symbol.
 
569
        @type symbol: str
 
570
        """
 
571
        try:
 
572
                ss = _localeSpeechSymbolProcessors.fetchLocaleData(locale)
 
573
        except LookupError:
 
574
                if not locale.startswith("en_"):
 
575
                        return processSpeechSymbol("en", symbol)
 
576
                raise
 
577
        try:
 
578
                return ss.computedSymbols[symbol].replacement
 
579
        except KeyError:
 
580
                pass
 
581
        return symbol