2
// C++ Implementation: btstringmgr
7
// Author: The BibleTime team <info@bibletime.info>, (C) 2004
9
// Copyright: See COPYING file that comes with this distribution
13
#include "btstringmgr.h"
18
char* BTStringMgr::upperUTF8(char* text, unsigned int maxlen) const {
19
const int max = (maxlen>0) ? maxlen : strlen(text);
22
strncpy(text, (const char*)QString::fromUtf8(text).upper().utf8(), max);
30
*text = toupper(*text);
40
char* BTStringMgr::upperLatin1(char* text, unsigned int max) const {
44
*text = toupper(*text);
51
bool BTStringMgr::supportsUnicode() const {
55
const bool BTStringMgr::isUtf8(const char *buf) const {
57
register unsigned char c;
60
#define F 0 /* character never appears in text */
61
#define T 1 /* character appears in plain ASCII text */
62
#define I 2 /* character appears in ISO-8859 text */
63
#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
65
static const unsigned char text_chars[256] = {
66
/* BEL BS HT LF FF CR */
67
F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
69
F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
70
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
71
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
72
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
73
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
74
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
75
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
77
X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
78
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
79
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
80
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
81
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
82
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
83
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
84
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
89
for (i = 0; (c = buf[i]); i++) {
90
if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
92
* Even if the whole file is valid UTF-8 sequences,
93
* still reject it if it uses weird control characters.
96
if (text_chars[c] != T)
100
else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
103
else { /* 11xxxxxx begins UTF-8 */
106
if ((c & 0x20) == 0) { /* 110xxxxx */
109
else if ((c & 0x10) == 0) { /* 1110xxxx */
112
else if ((c & 0x08) == 0) { /* 11110xxx */
115
else if ((c & 0x04) == 0) { /* 111110xx */
118
else if ((c & 0x02) == 0) { /* 1111110x */
124
for (n = 0; n < following; n++) {
130
if ((c & 0x80) == 0 || (c & 0x40))
139
return gotone; /* don't claim it's UTF-8 if it's all 7-bit */