1
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* ***** BEGIN LICENSE BLOCK *****
3
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
5
* The contents of this file are subject to the Mozilla Public License Version
6
* 1.1 (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
8
* http://www.mozilla.org/MPL/
10
* Software distributed under the License is distributed on an "AS IS" basis,
11
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
* for the specific language governing rights and limitations under the
15
* The Original Code is Mozilla Universal charset detector code.
17
* The Initial Developer of the Original Code is
18
* Netscape Communications Corporation.
19
* Portions created by the Initial Developer are Copyright (C) 2001
20
* the Initial Developer. All Rights Reserved.
23
* Shy Shalom <shooshX@gmail.com>
25
* Alternatively, the contents of this file may be used under the terms of
26
* either the GNU General Public License Version 2 or later (the "GPL"), or
27
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28
* in which case the provisions of the GPL or the LGPL are applicable instead
29
* of those above. If you wish to allow use of your version of this file only
30
* under the terms of either the GPL or the LGPL, and not to allow others to
31
* use your version of this file under the terms of the MPL, indicate your
32
* decision by deleting the provisions above and replace them with the notice
33
* and other provisions required by the GPL or the LGPL. If you do not delete
34
* the provisions above, a recipient may use your version of this file under
35
* the terms of any one of the MPL, the GPL or the LGPL.
37
* ***** END LICENSE BLOCK ***** */
39
#include "nsSBCharSetProber.h"
41
nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32 aLen)
45
for (PRUint32 i = 0; i < aLen; i++)
47
order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
49
if (order < SYMBOL_CAT_ORDER)
51
if (order < SAMPLE_SIZE)
55
if (mLastOrder < SAMPLE_SIZE)
59
++(mSeqCounters[(int)mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]); // added (int) to avoid warnings , jens 2009 02 26
60
else // reverse the order of the letters in the lookup
61
++(mSeqCounters[(int)mModel->precedenceMatrix[order*SAMPLE_SIZE+mLastOrder]]); // added (int) to avoid warnings , jens 2009 02 26
67
if (mState == eDetecting)
68
if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)
70
float cf = GetConfidence();
71
if (cf > POSITIVE_SHORTCUT_THRESHOLD)
73
else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
80
void nsSingleByteCharSetProber::Reset(void)
84
for (PRUint32 i = 0; i < NUMBER_OF_SEQ_CAT; i++)
91
//#define NEGATIVE_APPROACH 1
93
float nsSingleByteCharSetProber::GetConfidence(void)
95
#ifdef NEGATIVE_APPROACH
97
if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 )
98
return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar;
100
#else //POSITIVE_APPROACH
103
if (mTotalSeqs > 0) {
104
r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
105
r = r*mFreqChar/mTotalChar;
106
if (r >= (float)1.00)
114
const char* nsSingleByteCharSetProber::GetCharSetName()
117
return mModel->charsetName;
118
return mNameProber->GetCharSetName();
122
void nsSingleByteCharSetProber::DumpStatus()
124
printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());