← Back to branch summary

~ubuntu-branches/debian/wheezy/calibre/wheezy

~ubuntu-branches/debian/wheezy/calibre/wheezy

« back to all changes in this revision

Viewing changes to src/calibre/ebooks/chardet/sbcharsetprober.py

Committer: Package Import Robot
Author(s): Martin Pitt
Date: 2012-01-07 11:22:54 UTC
mfrom: (29.4.10 precise)
Revision ID: package-import@ubuntu.com-20120107112254-n1syr437o46ds802

Tags: 0.8.34+dfsg-1

http://bugs.debian.org/654751

http://bugs.debian.org/640026

http://bugs.debian.org/646674

* New upstream version. (Closes: #654751)
* debian/rules: Do not install calibre copy of chardet; instead, add
  build/binary python-chardet dependency.
* Add disable_plugins.py: Disable plugin dialog. It uses a totally
  non-authenticated and non-trusted way of installing arbitrary code.
  (Closes: #640026)
* debian/rules: Install with POSIX locale, to avoid installing translated
  manpages into the standard locations. (Closes: #646674)

files added:
.pc/disable_plugins.py

.pc/disable_plugins.py/src

.pc/disable_plugins.py/src/calibre

.pc/disable_plugins.py/src/calibre/gui2

.pc/disable_plugins.py/src/calibre/gui2/actions

.pc/disable_plugins.py/src/calibre/gui2/actions/preferences.py

debian/patches/disable_plugins.py

recipes/biolog_pl.recipe

recipes/birmingham_post.recipe

recipes/computerworld_pl.recipe

recipes/datasport.recipe

recipes/dziennik_pl.recipe

recipes/echo_online.recipe

recipes/elet_es_irodalom.recipe

recipes/emuzica_pl.recipe

recipes/fhm_uk.recipe

recipes/fisco_oggi.recipe

recipes/goal.recipe

recipes/grantland.recipe

recipes/icons/biolog_pl.png

recipes/icons/computerworld_pl.png

recipes/icons/dziennik_pl.png

recipes/icons/kosmonauta_pl.png

recipes/icons/mlody_technik_pl.png

recipes/icons/moneynews.png

recipes/icons/rionegro.png

recipes/kosmonauta_pl.recipe

recipes/macity.recipe

recipes/mlody_technik_pl.recipe

recipes/money_pl.recipe

recipes/nol.recipe

recipes/prospectmaguk.recipe

recipes/rionegro.recipe

recipes/salonica_press_news.recipe

recipes/singtaohk.recipe

recipes/tuttojove.recipe

recipes/wired_it.recipe

setup/hosting.py

setup/iso_639/en_GB.po

src/calibre/ebooks/chardet.py

src/calibre/ebooks/oeb/display/test-cfi

src/calibre/ebooks/oeb/display/test-cfi/cfi-test.coffee

src/calibre/ebooks/oeb/display/test-cfi/index.html

src/calibre/ebooks/oeb/display/test-cfi/marker.png

src/calibre/ebooks/oeb/display/test-cfi/test.py

src/calibre/translations/nn.po

src/calibre/utils/linux_trash.py

src/chardet

src/chardet/__init__.py

src/chardet/big5freq.py

src/chardet/big5prober.py

src/chardet/chardistribution.py

src/chardet/charsetgroupprober.py

src/chardet/charsetprober.py

src/chardet/codingstatemachine.py

src/chardet/constants.py

src/chardet/escprober.py

src/chardet/escsm.py

src/chardet/eucjpprober.py

src/chardet/euckrfreq.py

src/chardet/euckrprober.py

src/chardet/euctwfreq.py

src/chardet/euctwprober.py

src/chardet/gb2312freq.py

src/chardet/gb2312prober.py

src/chardet/hebrewprober.py

src/chardet/jisfreq.py

src/chardet/jpcntx.py

src/chardet/langbulgarianmodel.py

src/chardet/langcyrillicmodel.py

src/chardet/langgreekmodel.py

src/chardet/langhebrewmodel.py

src/chardet/langhungarianmodel.py

src/chardet/langthaimodel.py

src/chardet/latin1prober.py

src/chardet/mbcharsetprober.py

src/chardet/mbcsgroupprober.py

src/chardet/mbcssm.py

src/chardet/sbcharsetprober.py

src/chardet/sbcsgroupprober.py

src/chardet/sjisprober.py

src/chardet/test.py

src/chardet/universaldetector.py

src/chardet/utf8prober.py

src/cherrypy/_cpcompat.py

src/cherrypy/_cpnative_server.py

src/cherrypy/_cpreqbody.py

src/cherrypy/lib/auth_basic.py

src/cherrypy/lib/auth_digest.py

src/cherrypy/lib/cpstats.py

src/cherrypy/lib/gctools.py

src/cherrypy/lib/httputil.py

src/cherrypy/lib/jsontools.py

src/cherrypy/lib/reprconf.py

src/cherrypy/lib/xmlrpcutil.py

src/cherrypy/scaffold/apache-fcgi.conf

src/cherrypy/wsgiserver/ssl_builtin.py

src/cherrypy/wsgiserver/ssl_pyopenssl.py

src/cherrypy/wsgiserver/wsgiserver2.py

src/cherrypy/wsgiserver/wsgiserver3.py

files removed:
src/calibre/ebooks/chardet

src/calibre/ebooks/chardet/__init__.py

src/calibre/ebooks/chardet/big5freq.py

src/calibre/ebooks/chardet/big5prober.py

src/calibre/ebooks/chardet/chardistribution.py

src/calibre/ebooks/chardet/charsetgroupprober.py

src/calibre/ebooks/chardet/charsetprober.py

src/calibre/ebooks/chardet/codingstatemachine.py

src/calibre/ebooks/chardet/constants.py

src/calibre/ebooks/chardet/escprober.py

src/calibre/ebooks/chardet/escsm.py

src/calibre/ebooks/chardet/eucjpprober.py

src/calibre/ebooks/chardet/euckrfreq.py

src/calibre/ebooks/chardet/euckrprober.py

src/calibre/ebooks/chardet/euctwfreq.py

src/calibre/ebooks/chardet/euctwprober.py

src/calibre/ebooks/chardet/gb2312freq.py

src/calibre/ebooks/chardet/gb2312prober.py

src/calibre/ebooks/chardet/hebrewprober.py

src/calibre/ebooks/chardet/jisfreq.py

src/calibre/ebooks/chardet/jpcntx.py

src/calibre/ebooks/chardet/langbulgarianmodel.py

src/calibre/ebooks/chardet/langcyrillicmodel.py

src/calibre/ebooks/chardet/langgreekmodel.py

src/calibre/ebooks/chardet/langhebrewmodel.py

src/calibre/ebooks/chardet/langhungarianmodel.py

src/calibre/ebooks/chardet/langthaimodel.py

src/calibre/ebooks/chardet/latin1prober.py

src/calibre/ebooks/chardet/mbcharsetprober.py

src/calibre/ebooks/chardet/mbcsgroupprober.py

src/calibre/ebooks/chardet/mbcssm.py

src/calibre/ebooks/chardet/sbcharsetprober.py

src/calibre/ebooks/chardet/sbcsgroupprober.py

src/calibre/ebooks/chardet/sjisprober.py

src/calibre/ebooks/chardet/universaldetector.py

src/calibre/ebooks/chardet/utf8prober.py

src/calibre/ebooks/oeb/display/test

src/calibre/ebooks/oeb/display/test/cfi-test.coffee

src/calibre/ebooks/oeb/display/test/test.html

src/calibre/ebooks/oeb/display/test/test.py

src/calibre/gui2/viewer/gestures.py

src/cherrypy/_cpcgifs.py

src/cherrypy/lib/safemime.py

src/cherrypy/lib/tidy.py

src/cherrypy/lib/wsgiapp.py

src/cherrypy/lib/xmlrpc.py

files modified:
.pc/applied-patches

.pc/no_updates_dialog.patch/src/calibre/gui2/main.py

Changelog.old.yaml

Changelog.yaml

debian/changelog

debian/control

debian/patches/series

debian/rules

recipes/adventure_zone_pl.recipe

recipes/alternet.recipe

recipes/astro_news_pl.recipe

recipes/cosmopolitan_uk.recipe

recipes/daily_mirror.recipe

recipes/focus_pl.recipe

recipes/glasgow_herald.recipe

recipes/hackernews.recipe

recipes/hindustan_times.recipe

recipes/hvg.recipe

recipes/iht.recipe

recipes/independent.recipe

recipes/kopalniawiedzy.recipe

recipes/la_razon_bo.recipe

recipes/los_tiempos_bo.recipe

recipes/ming_pao.recipe

recipes/ming_pao_toronto.recipe

recipes/ming_pao_vancouver.recipe

recipes/moneynews.recipe

recipes/naczytniki.recipe

recipes/nowa_fantastyka.recipe

recipes/nytimes.recipe

recipes/nytimes_sub.recipe

recipes/philly.recipe

recipes/seattle_times.recipe

recipes/spiders_web_pl.recipe

recipes/sueddeutsche.recipe

recipes/tagesspiegel.recipe

recipes/toi.recipe

resources/builtin_recipes.xml

resources/builtin_recipes.zip

resources/content_server/mobile.css

resources/default_tweaks.py

resources/display/cfi.js

resources/ebook-convert-complete.pickle

resources/localization/locales.zip

resources/localization/stats.pickle

resources/viewer/referencing.js

setup/check.py

setup/commands.py

setup/install.py

setup/installer/__init__.py

setup/installer/linux/freeze2.py

setup/iso_639/bs.po

setup/iso_639/es.po

setup/iso_639/fr.po

setup/iso_639/ro.po

setup/publish.py

setup/translations.py

setup/upload.py

src/calibre/__init__.py

src/calibre/constants.py

src/calibre/devices/android/driver.py

src/calibre/devices/apple/driver.py

src/calibre/devices/hanvon/driver.py

src/calibre/devices/kindle/driver.py

src/calibre/devices/kobo/driver.py

src/calibre/devices/prst1/driver.py

src/calibre/devices/usbms/driver.py

src/calibre/ebooks/__init__.py

src/calibre/ebooks/conversion/cli.py

src/calibre/ebooks/conversion/plumber.py

src/calibre/ebooks/conversion/preprocess.py

src/calibre/ebooks/epub/__init__.py

src/calibre/ebooks/fb2/output.py

src/calibre/ebooks/html/input.py

src/calibre/ebooks/lit/reader.py

src/calibre/ebooks/mobi/debug.py

src/calibre/ebooks/mobi/mobiml.py

src/calibre/ebooks/mobi/reader.py

src/calibre/ebooks/oeb/base.py

src/calibre/ebooks/oeb/display/cfi.coffee

src/calibre/ebooks/oeb/parse_utils.py

src/calibre/ebooks/oeb/reader.py

src/calibre/ebooks/oeb/transforms/filenames.py

src/calibre/ebooks/oeb/transforms/unsmarten.py

src/calibre/ebooks/pdf/output.py

src/calibre/ebooks/pdf/writer.py

src/calibre/ebooks/pml/input.py

src/calibre/ebooks/pml/pmlconverter.py

src/calibre/gui2/__init__.py

src/calibre/gui2/actions/add.py

src/calibre/gui2/actions/copy_to_library.py

src/calibre/gui2/actions/preferences.py

src/calibre/gui2/actions/view.py

src/calibre/gui2/add.py

src/calibre/gui2/convert/__init__.py

src/calibre/gui2/convert/pdf_output.py

src/calibre/gui2/convert/pdf_output.ui

src/calibre/gui2/convert/pdf_output_ui.py

src/calibre/gui2/convert/search_and_replace.py

src/calibre/gui2/device.py

src/calibre/gui2/dialogs/add_from_isbn.py

src/calibre/gui2/dialogs/check_library.py

src/calibre/gui2/dialogs/message_box.py

src/calibre/gui2/dialogs/message_box.ui

src/calibre/gui2/dialogs/message_box_ui.py

src/calibre/gui2/dialogs/metadata_bulk.ui

src/calibre/gui2/dialogs/metadata_bulk_ui.py

src/calibre/gui2/dialogs/tag_categories.ui

src/calibre/gui2/dialogs/tag_categories_ui.py

src/calibre/gui2/dialogs/template_dialog.ui

src/calibre/gui2/dialogs/template_dialog_ui.py

src/calibre/gui2/library/views.py

src/calibre/gui2/main.py

src/calibre/gui2/preferences/behavior.py

src/calibre/gui2/preferences/conversion.py

src/calibre/gui2/preferences/plugins.py

src/calibre/gui2/preferences/server.py

src/calibre/gui2/preferences/toolbar.py

src/calibre/gui2/shortcuts.py

src/calibre/gui2/store/stores/gandalf_plugin.py

src/calibre/gui2/ui.py

src/calibre/gui2/viewer/config.ui

src/calibre/gui2/viewer/config_ui.py

src/calibre/gui2/viewer/documentview.py

src/calibre/gui2/viewer/main.py

src/calibre/gui2/wizard/__init__.py

src/calibre/library/catalog.py

src/calibre/library/database2.py

src/calibre/library/save_to_disk.py

src/calibre/library/server/base.py

src/calibre/library/server/mobile.py

src/calibre/manual/faq.rst

src/calibre/manual/gui.rst

src/calibre/manual/template_lang.rst

src/calibre/manual/templates/layout.html

src/calibre/manual/templates/search.html

src/calibre/translations/af.po

src/calibre/translations/ar.po

src/calibre/translations/ast.po

src/calibre/translations/az.po

src/calibre/translations/bg.po

src/calibre/translations/bn.po

src/calibre/translations/br.po

src/calibre/translations/bs.po

src/calibre/translations/ca.po

src/calibre/translations/calibre.pot

src/calibre/translations/cs.po

src/calibre/translations/da.po

src/calibre/translations/de.po

src/calibre/translations/el.po

src/calibre/translations/en_AU.po

src/calibre/translations/en_CA.po

src/calibre/translations/en_GB.po

src/calibre/translations/eo.po

src/calibre/translations/es.po

src/calibre/translations/et.po

src/calibre/translations/eu.po

src/calibre/translations/fa.po

src/calibre/translations/fi.po

src/calibre/translations/fo.po

src/calibre/translations/fr.po

src/calibre/translations/gl.po

src/calibre/translations/gu.po

src/calibre/translations/he.po

src/calibre/translations/hi.po

src/calibre/translations/hr.po

src/calibre/translations/hu.po

src/calibre/translations/id.po

src/calibre/translations/it.po

src/calibre/translations/ja.po

src/calibre/translations/kn.po

src/calibre/translations/ko.po

src/calibre/translations/ku.po

src/calibre/translations/lt.po

src/calibre/translations/ltg.po

src/calibre/translations/lv.po

src/calibre/translations/mk.po

src/calibre/translations/ml.po

src/calibre/translations/mr.po

src/calibre/translations/ms.po

src/calibre/translations/nb.po

src/calibre/translations/nds.po

src/calibre/translations/nl.po

src/calibre/translations/oc.po

src/calibre/translations/pa.po

src/calibre/translations/pl.po

src/calibre/translations/pt.po

src/calibre/translations/pt_BR.po

src/calibre/translations/ro.po

src/calibre/translations/ru.po

src/calibre/translations/sc.po

src/calibre/translations/si.po

src/calibre/translations/sk.po

src/calibre/translations/sl.po

src/calibre/translations/sq.po

src/calibre/translations/sr.po

src/calibre/translations/sv.po

src/calibre/translations/ta.po

src/calibre/translations/te.po

src/calibre/translations/th.po

src/calibre/translations/tr.po

src/calibre/translations/uk.po

src/calibre/translations/ur.po

src/calibre/translations/vi.po

src/calibre/translations/wa.po

src/calibre/translations/yi.po

src/calibre/translations/zh_CN.po

src/calibre/translations/zh_HK.po

src/calibre/translations/zh_TW.po

src/calibre/utils/browser.py

src/calibre/utils/coffeescript.py

src/calibre/utils/formatter_functions.py

src/calibre/utils/pyconsole/console.py

src/calibre/utils/recycle_bin.py

src/calibre/utils/zipfile.py

src/calibre/web/feeds/feedparser.py

src/calibre/web/feeds/news.py

src/calibre/web/feeds/recipes/collection.py

src/calibre/web/fetch/simple.py

src/cherrypy/LICENSE.txt

src/cherrypy/__init__.py

src/cherrypy/_cpchecker.py

src/cherrypy/_cpconfig.py

src/cherrypy/_cpdispatch.py

src/cherrypy/_cperror.py

src/cherrypy/_cplogging.py

src/cherrypy/_cpmodpy.py

src/cherrypy/_cprequest.py

src/cherrypy/_cpserver.py

src/cherrypy/_cptools.py

src/cherrypy/_cptree.py

src/cherrypy/_cpwsgi.py

src/cherrypy/_cpwsgi_server.py

src/cherrypy/cherryd *

src/cherrypy/lib/__init__.py

src/cherrypy/lib/auth.py

src/cherrypy/lib/caching.py

src/cherrypy/lib/covercp.py

src/cherrypy/lib/cptools.py

src/cherrypy/lib/encoding.py

src/cherrypy/lib/http.py

src/cherrypy/lib/httpauth.py

src/cherrypy/lib/profiler.py

src/cherrypy/lib/sessions.py

src/cherrypy/lib/static.py

src/cherrypy/process/plugins.py

src/cherrypy/process/servers.py

src/cherrypy/process/win32.py

src/cherrypy/process/wspbus.py

src/cherrypy/scaffold/__init__.py

src/cherrypy/scaffold/site.conf

src/cherrypy/wsgiserver/__init__.py

Show diffs side-by-side

added added

removed removed

src/calibre/ebooks/chardet/sbcharsetprober.py

1

######################## BEGIN LICENSE BLOCK ########################

2

# The Original Code is Mozilla Universal charset detector code.

3

#

4

# The Initial Developer of the Original Code is

5

# Netscape Communications Corporation.

6

# Portions created by the Initial Developer are Copyright (C) 2001

7

# the Initial Developer. All Rights Reserved.

8

#

9

# Contributor(s):

10

# Mark Pilgrim - port to Python

11

# Shy Shalom - original C code

12

#

13

# This library is free software; you can redistribute it and/or

14

# modify it under the terms of the GNU Lesser General Public

15

# License as published by the Free Software Foundation; either

16

# version 2.1 of the License, or (at your option) any later version.

17

#

18

# This library is distributed in the hope that it will be useful,

19

# but WITHOUT ANY WARRANTY; without even the implied warranty of

20

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

21

# Lesser General Public License for more details.

22

#

23

# You should have received a copy of the GNU Lesser General Public

24

# License along with this library; if not, write to the Free Software

25

# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA

26

# 02110-1301 USA

27

######################### END LICENSE BLOCK #########################

28

29

import constants, sys

30

from charsetprober import CharSetProber

31

32

SAMPLE_SIZE = 64

33

SB_ENOUGH_REL_THRESHOLD = 1024

34

POSITIVE_SHORTCUT_THRESHOLD = 0.95

35

NEGATIVE_SHORTCUT_THRESHOLD = 0.05

36

SYMBOL_CAT_ORDER = 250

37

NUMBER_OF_SEQ_CAT = 4

38

POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1

39

#NEGATIVE_CAT = 0

40

41

class SingleByteCharSetProber(CharSetProber):

42

def __init__(self, model, reversed=constants.False, nameProber=None):

43

CharSetProber.__init__(self)

44

self._mModel = model

45

self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup

46

self._mNameProber = nameProber # Optional auxiliary prober for name decision

47

self.reset()

48

49

def reset(self):

50

CharSetProber.reset(self)

51

self._mLastOrder = 255 # char order of last character

52

self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT

53

self._mTotalSeqs = 0

54

self._mTotalChar = 0

55

self._mFreqChar = 0 # characters that fall in our sampling range

56

57

def get_charset_name(self):

58

if self._mNameProber:

59

return self._mNameProber.get_charset_name()

60

else:

61

return self._mModel['charsetName']

62

63

def feed(self, aBuf):

64

if not self._mModel['keepEnglishLetter']:

65

aBuf = self.filter_without_english_letters(aBuf)

66

aLen = len(aBuf)

67

if not aLen:

68

return self.get_state()

69

for c in aBuf:

70

order = self._mModel['charToOrderMap'][ord(c)]

71

if order < SYMBOL_CAT_ORDER:

72

self._mTotalChar += 1

73

if order < SAMPLE_SIZE:

74

self._mFreqChar += 1

75

if self._mLastOrder < SAMPLE_SIZE:

76

self._mTotalSeqs += 1

77

if not self._mReversed:

78

self._mSeqCounters[self._mModel['precedenceMatrix'][(self._mLastOrder * SAMPLE_SIZE) + order]] += 1

79

else: # reverse the order of the letters in the lookup

80

self._mSeqCounters[self._mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + self._mLastOrder]] += 1

81

self._mLastOrder = order

82

83

if self.get_state() == constants.eDetecting:

84

if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:

85

cf = self.get_confidence()

86

if cf > POSITIVE_SHORTCUT_THRESHOLD:

87

if constants._debug:

88

sys.stderr.write('%s confidence = %s, we have a winner\n' % (self._mModel['charsetName'], cf))

89

self._mState = constants.eFoundIt

90

elif cf < NEGATIVE_SHORTCUT_THRESHOLD:

91

if constants._debug:

92

sys.stderr.write('%s confidence = %s, below negative shortcut threshhold %s\n' % (self._mModel['charsetName'], cf, NEGATIVE_SHORTCUT_THRESHOLD))

93

self._mState = constants.eNotMe

94

95

return self.get_state()

96

97

def get_confidence(self):

98

r = 0.01

99

if self._mTotalSeqs > 0:

100

# print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']

101

r = (1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs / self._mModel['mTypicalPositiveRatio']

102

# print r, self._mFreqChar, self._mTotalChar

103

r = r * self._mFreqChar / self._mTotalChar

104

if r >= 1.0:

105

r = 0.99

106

return r

Older »