~ubuntuone-pqm-team/requests/stable

« back to all changes in this revision

Viewing changes to requests/packages/chardet/mbcharsetprober.py

  • Committer: Ricardo Kirkner
  • Date: 2014-02-12 20:15:17 UTC
  • Revision ID: ricardo.kirkner@canonical.com-20140212201517-h11q42rfwrnyk17k
Tags: 2.2.1
imported requests 2.2.1 from tarball

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
######################## BEGIN LICENSE BLOCK ########################
 
2
# The Original Code is Mozilla Universal charset detector code.
 
3
#
 
4
# The Initial Developer of the Original Code is
 
5
# Netscape Communications Corporation.
 
6
# Portions created by the Initial Developer are Copyright (C) 2001
 
7
# the Initial Developer. All Rights Reserved.
 
8
#
 
9
# Contributor(s):
 
10
#   Mark Pilgrim - port to Python
 
11
#   Shy Shalom - original C code
 
12
#   Proofpoint, Inc.
 
13
#
 
14
# This library is free software; you can redistribute it and/or
 
15
# modify it under the terms of the GNU Lesser General Public
 
16
# License as published by the Free Software Foundation; either
 
17
# version 2.1 of the License, or (at your option) any later version.
 
18
#
 
19
# This library is distributed in the hope that it will be useful,
 
20
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
21
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
22
# Lesser General Public License for more details.
 
23
#
 
24
# You should have received a copy of the GNU Lesser General Public
 
25
# License along with this library; if not, write to the Free Software
 
26
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 
27
# 02110-1301  USA
 
28
######################### END LICENSE BLOCK #########################
 
29
 
 
30
import sys
 
31
from . import constants
 
32
from .charsetprober import CharSetProber
 
33
 
 
34
 
 
35
class MultiByteCharSetProber(CharSetProber):
 
36
    def __init__(self):
 
37
        CharSetProber.__init__(self)
 
38
        self._mDistributionAnalyzer = None
 
39
        self._mCodingSM = None
 
40
        self._mLastChar = [0, 0]
 
41
 
 
42
    def reset(self):
 
43
        CharSetProber.reset(self)
 
44
        if self._mCodingSM:
 
45
            self._mCodingSM.reset()
 
46
        if self._mDistributionAnalyzer:
 
47
            self._mDistributionAnalyzer.reset()
 
48
        self._mLastChar = [0, 0]
 
49
 
 
50
    def get_charset_name(self):
 
51
        pass
 
52
 
 
53
    def feed(self, aBuf):
 
54
        aLen = len(aBuf)
 
55
        for i in range(0, aLen):
 
56
            codingState = self._mCodingSM.next_state(aBuf[i])
 
57
            if codingState == constants.eError:
 
58
                if constants._debug:
 
59
                    sys.stderr.write(self.get_charset_name()
 
60
                                     + ' prober hit error at byte ' + str(i)
 
61
                                     + '\n')
 
62
                self._mState = constants.eNotMe
 
63
                break
 
64
            elif codingState == constants.eItsMe:
 
65
                self._mState = constants.eFoundIt
 
66
                break
 
67
            elif codingState == constants.eStart:
 
68
                charLen = self._mCodingSM.get_current_charlen()
 
69
                if i == 0:
 
70
                    self._mLastChar[1] = aBuf[0]
 
71
                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
 
72
                else:
 
73
                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
 
74
                                                     charLen)
 
75
 
 
76
        self._mLastChar[0] = aBuf[aLen - 1]
 
77
 
 
78
        if self.get_state() == constants.eDetecting:
 
79
            if (self._mDistributionAnalyzer.got_enough_data() and
 
80
                    (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
 
81
                self._mState = constants.eFoundIt
 
82
 
 
83
        return self.get_state()
 
84
 
 
85
    def get_confidence(self):
 
86
        return self._mDistributionAnalyzer.get_confidence()