~ubuntu-branches/debian/squeeze/sword/squeeze

« back to all changes in this revision

Viewing changes to src/modules/filters/unicodertf.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Daniel Glassey
  • Date: 2004-01-15 15:50:07 UTC
  • Revision ID: james.westby@ubuntu.com-20040115155007-n9mz4x0zxrs1isd3
Tags: upstream-1.5.7
ImportĀ upstreamĀ versionĀ 1.5.7

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/******************************************************************************
 
2
 *
 
3
 * unicodertf - SWFilter descendant to convert a double byte unicode file
 
4
 *                               to RTF tags
 
5
 */
 
6
 
 
7
 
 
8
#include <stdlib.h>
 
9
#include <stdio.h>
 
10
#include <unicodertf.h>
 
11
 
 
12
SWORD_NAMESPACE_START
 
13
 
 
14
UnicodeRTF::UnicodeRTF() {
 
15
}
 
16
 
 
17
 
 
18
char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
 
19
{
 
20
        const unsigned char *from;
 
21
        char digit[10];
 
22
        unsigned long ch;
 
23
        signed short utf16;
 
24
        unsigned char from2[7];
 
25
 
 
26
        SWBuf orig = text;
 
27
 
 
28
        from = (const unsigned char *)orig.c_str();
 
29
 
 
30
        // -------------------------------
 
31
        for (text = ""; *from; from++) {
 
32
                ch = 0;
 
33
                //case: ANSI
 
34
                if ((*from & 128) != 128) {
 
35
                        text += *from;
 
36
                        continue;
 
37
                }
 
38
                //case: Invalid UTF-8 (illegal continuing byte in initial position)
 
39
                if ((*from & 128) && ((*from & 64) != 64)) {
 
40
                        continue;
 
41
                }
 
42
                //case: 2+ byte codepoint
 
43
                from2[0] = *from;
 
44
                from2[0] <<= 1;
 
45
                int subsequent;
 
46
                for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
 
47
                        from2[0] <<= 1;
 
48
                        from2[subsequent] = from[subsequent];
 
49
                        from2[subsequent] &= 63;
 
50
                        ch <<= 6;
 
51
                        ch |= from2[subsequent];
 
52
                }
 
53
                subsequent--;
 
54
                from2[0] <<= 1;
 
55
                char significantFirstBits = 8 - (2+subsequent);
 
56
                
 
57
                ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
 
58
                from += subsequent;
 
59
                if (ch < 0x10000) {
 
60
                                utf16 = (signed short)ch;
 
61
                                text += '\\';
 
62
                                text += 'u';
 
63
                                sprintf(digit, "%d", utf16);
 
64
                                text += digit;
 
65
                                text += '?';
 
66
                         }
 
67
                        else {
 
68
                                utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
 
69
                                text += '\\';
 
70
                                text += 'u';
 
71
                                sprintf(digit, "%d", utf16);
 
72
                                text += digit;
 
73
                                text += '?';
 
74
                                utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
 
75
                                text += '\\';
 
76
                                text += 'u';
 
77
                                sprintf(digit, "%d", utf16);
 
78
                                text += digit;
 
79
                                text += '?';
 
80
                        }
 
81
        }
 
82
           
 
83
        return 0;
 
84
}
 
85
 
 
86
SWORD_NAMESPACE_END