~ubuntu-branches/ubuntu/vivid/libe-book/vivid-proposed

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
 * This file is part of the libe-book project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

#include <cassert>
#include <string>

#include <unicode/ucsdet.h>

#include "EBOOKCharsetConverter.h"

using std::string;

namespace libebook
{

namespace
{

class ConverterException
{
};

/** Guess character set of the text.

  @param[in] text the text
  @param[out] charset the character set
  @param[out] confidence Confidence of the guess, in range [0, 100]; 100 means certainty.

  @returns false if detection failed, true otherwise
 */
bool guessCharacterSet(const char *const text, const unsigned length, string &charset, int32_t &confidence)
{
  // reset output
  charset.clear();
  confidence = 0;

  UErrorCode status = U_ZERO_ERROR;

  UCharsetDetector *detector = ucsdet_open(&status);
  if (status != U_ZERO_ERROR)
    return false;

  ucsdet_setText(detector, text, static_cast<int32_t>(length), &status);
  if (status == U_ZERO_ERROR)
  {
    const UCharsetMatch *const match = ucsdet_detect(detector, &status);
    if (status == U_ZERO_ERROR)
    {
      charset = ucsdet_getName(match, &status);
      if (status == U_ZERO_ERROR)
        confidence = ucsdet_getConfidence(match, &status);
    }
  }

  ucsdet_close(detector);

  return status == U_ZERO_ERROR;
}

}

EBOOKCharsetConverter::EBOOKCharsetConverter(const char *const encoding)
  : m_converterToUnicode(0)
  , m_converterToUTF8(0)
{
  UErrorCode status = U_ZERO_ERROR;
  m_converterToUTF8 = ucnv_open("utf-8", &status);
  if (status != U_ZERO_ERROR)
    throw ConverterException();

  if (encoding)
  {
    m_converterToUnicode = ucnv_open(encoding, &status);
    if (status != U_ZERO_ERROR)
    {
      ucnv_close(m_converterToUTF8);
      m_converterToUTF8 = 0;
      throw ConverterException();
    }
  }
}

EBOOKCharsetConverter::~EBOOKCharsetConverter()
{
  if (m_converterToUnicode)
    ucnv_close(m_converterToUnicode);
  if (m_converterToUTF8)
    ucnv_close(m_converterToUTF8);
}

bool EBOOKCharsetConverter::guessEncoding(const char *const in, const unsigned length)
{
  if (m_converterToUnicode)
    return true;

  string charset;
  int32_t confidence = 0;
  if (guessCharacterSet(in, length, charset, confidence))
  {
    UErrorCode status = U_ZERO_ERROR;
    m_converterToUnicode = ucnv_open(charset.c_str(), &status);
    if (status == U_ZERO_ERROR)
      return true;
  }

  return false;
}

bool EBOOKCharsetConverter::convertBytes(const char *const in, const unsigned length, std::vector<char> &out)
{
  assert(m_converterToUnicode);
  assert(m_converterToUTF8);

  UErrorCode status = U_ZERO_ERROR;
  if (out.empty())
    out.resize(length);

  unsigned outLength;

  do
  {
    const char *inText = in;
    char *outText = &out[0];
    status = U_ZERO_ERROR;
    ucnv_convertEx(
      m_converterToUTF8, m_converterToUnicode,
      &outText, outText + out.size(), &inText, inText + length,
      0, 0, 0, 0,
      TRUE, TRUE, &status)
    ;

    switch (status)
    {
    case U_BUFFER_OVERFLOW_ERROR :
      out.resize(out.size() + length);
      break;
    case U_STRING_NOT_TERMINATED_WARNING :
      status = U_ZERO_ERROR;
    // fallthrough intended
    case U_ZERO_ERROR :
      outLength = static_cast<unsigned>(outText - &out[0]);
      break;
    default :
      return false;
    }
  }
  while (U_ZERO_ERROR != status);

  out.erase(out.begin() + outLength, out.end());
  out.push_back(0);

  return true;
}

}

/* vim:set shiftwidth=2 softtabstop=2 expandtab: */