1
######################## BEGIN LICENSE BLOCK ########################
2
# The Original Code is mozilla.org code.
4
# The Initial Developer of the Original Code is
5
# Netscape Communications Corporation.
6
# Portions created by the Initial Developer are Copyright (C) 1998
7
# the Initial Developer. All Rights Reserved.
10
# Mark Pilgrim - port to Python
12
# This library is free software; you can redistribute it and/or
13
# modify it under the terms of the GNU Lesser General Public
14
# License as published by the Free Software Foundation; either
15
# version 2.1 of the License, or (at your option) any later version.
17
# This library is distributed in the hope that it will be useful,
18
# but WITHOUT ANY WARRANTY; without even the implied warranty of
19
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
# Lesser General Public License for more details.
22
# You should have received a copy of the GNU Lesser General Public
23
# License along with this library; if not, write to the Free Software
24
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
######################### END LICENSE BLOCK #########################
28
from . import constants
29
from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
31
from .charsetprober import CharSetProber
32
from .codingstatemachine import CodingStateMachine
33
from .compat import wrap_ord
36
class EscCharSetProber(CharSetProber):
38
CharSetProber.__init__(self)
40
CodingStateMachine(HZSMModel),
41
CodingStateMachine(ISO2022CNSMModel),
42
CodingStateMachine(ISO2022JPSMModel),
43
CodingStateMachine(ISO2022KRSMModel)
48
CharSetProber.reset(self)
49
for codingSM in self._mCodingSM:
52
codingSM.active = True
54
self._mActiveSM = len(self._mCodingSM)
55
self._mDetectedCharset = None
57
def get_charset_name(self):
58
return self._mDetectedCharset
60
def get_confidence(self):
61
if self._mDetectedCharset:
68
# PY3K: aBuf is a byte array, so c is an int, not a byte
69
for codingSM in self._mCodingSM:
72
if not codingSM.active:
74
codingState = codingSM.next_state(wrap_ord(c))
75
if codingState == constants.eError:
76
codingSM.active = False
78
if self._mActiveSM <= 0:
79
self._mState = constants.eNotMe
80
return self.get_state()
81
elif codingState == constants.eItsMe:
82
self._mState = constants.eFoundIt
83
self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8
84
return self.get_state()
86
return self.get_state()