~ubuntu-branches/ubuntu/dapper/lurker/dapper

« back to all changes in this revision

Viewing changes to common/CharsetEscape.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Jonas Meurer
  • Date: 2004-09-26 16:27:51 UTC
  • Revision ID: james.westby@ubuntu.com-20040926162751-z1ohcjltv7ojtg6z
Tags: upstream-1.2
ImportĀ upstreamĀ versionĀ 1.2

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*  $Id: CharsetEscape.cpp,v 1.11 2003/06/23 15:28:10 terpstra Exp $
 
2
 *  
 
3
 *  CharsetEscape.cpp - A stream manipulator-like thing for charset conversion
 
4
 *  
 
5
 *  Copyright (C) 2002 - Wesley W. Terpstra
 
6
 *  
 
7
 *  License: GPL
 
8
 *  
 
9
 *  Authors: 'Wesley W. Terpstra' <wesley@terpstra.ca>
 
10
 *  
 
11
 *    This program is free software; you can redistribute it and/or modify
 
12
 *    it under the terms of the GNU General Public License as published by
 
13
 *    the Free Software Foundation; version 2.
 
14
 *    
 
15
 *    This program is distributed in the hope that it will be useful,
 
16
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 
17
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
18
 *    GNU General Public License for more details.
 
19
 *    
 
20
 *    You should have received a copy of the GNU General Public License
 
21
 *    along with this program; if not, write to the Free Software
 
22
 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
23
 */
 
24
 
 
25
#define _XOPEN_SOURCE 500
 
26
#define _FILE_OFFSET_BITS 64
 
27
 
 
28
#include <mimelib/string.h>
 
29
#include <mimelib/utility.h>
 
30
 
 
31
#include <cerrno>
 
32
#if __GNUC__ == 2
 
33
#include <strstream>
 
34
#else
 
35
#include <sstream>
 
36
#endif
 
37
 
 
38
#include "CharsetEscape.h"
 
39
 
 
40
CharsetEscape::CharsetEscape(const char* charset)
 
41
 : ic(iconv_open("UTF-8", charset))
 
42
{
 
43
}
 
44
 
 
45
CharsetEscape::~CharsetEscape()
 
46
{
 
47
        if (valid()) iconv_close(ic);
 
48
}
 
49
 
 
50
void iconv_bug_kill_nulls(char* ob, size_t is)
 
51
{
 
52
        while (is != 0)
 
53
        {
 
54
                if (*ob == '\0') *ob = '?';
 
55
                ++ob;
 
56
                --is;
 
57
        }
 
58
}
 
59
 
 
60
void CharsetEscape::write(ostream& o, const char* ib, size_t is)
 
61
{
 
62
        if (!valid())
 
63
        {       // when not valid, just keep ascii chars
 
64
        
 
65
                while (1)
 
66
                {
 
67
                        const char* s;
 
68
                        const char* e;
 
69
                        
 
70
                        for (s = ib, e = s + is; s != e; ++s)
 
71
                        {       // if it moves, kill it!
 
72
                                if ((*s < 0x20 || *s >= 0x7f) &&
 
73
                                    (*s != '\n' && *s != '\t'))
 
74
                                {
 
75
                                        break;
 
76
                                }
 
77
                        }
 
78
                        
 
79
                        // write out what we have
 
80
                        if (s != ib) o.write(ib, long(s - ib));
 
81
                        
 
82
                        is -= long(s - ib);
 
83
                        ib = s;
 
84
                        
 
85
                        if (!is) return;
 
86
                        
 
87
                        // skip the offensive byte
 
88
                        ++ib;
 
89
                        --is;
 
90
                        o << '?';
 
91
                }
 
92
        }
 
93
        
 
94
        char buf[8096];
 
95
        
 
96
        char*           ob = &buf[0];
 
97
        size_t          os = sizeof(buf);
 
98
        
 
99
        // We forcibly type-cast iconv b/c it has different types on some
 
100
        // platforms, but the difference is only in the const.
 
101
        while (((size_t (*)(iconv_t, const char **, size_t*, char**, size_t*))&iconv)
 
102
                (ic, &ib, &is, &ob, &os) == (size_t)-1)
 
103
        {
 
104
                if (errno == EILSEQ)
 
105
                {
 
106
                        // Output some stuff
 
107
                        iconv_bug_kill_nulls(buf, sizeof(buf) - os);
 
108
                        o.write(buf, sizeof(buf) - os);
 
109
                        
 
110
                        ob = &buf[0];
 
111
                        os = sizeof(buf);
 
112
                        
 
113
                        // skip a broken byte
 
114
                        ++ib;
 
115
                        --is;
 
116
                        o << "?";
 
117
                }
 
118
                else if (errno == EINVAL)
 
119
                {
 
120
                        // Incomplete data
 
121
                        break;
 
122
                }
 
123
                else
 
124
                {       // E2BIG
 
125
                        iconv_bug_kill_nulls(buf, sizeof(buf) - os);
 
126
                        o.write(buf, sizeof(buf) - os);
 
127
                        
 
128
                        ob = &buf[0];
 
129
                        os = sizeof(buf);
 
130
                }
 
131
        }
 
132
        
 
133
        // success, write out tail.
 
134
        iconv_bug_kill_nulls(buf, sizeof(buf) - os);
 
135
        o.write(buf, sizeof(buf) - os);
 
136
}
 
137
 
 
138
string CharsetEscape::write(const char* ib, size_t is)
 
139
{
 
140
#if __GNUC__ == 2
 
141
        strstream out;
 
142
#else
 
143
        std::stringstream out;
 
144
#endif
 
145
        write(out, ib, is);
 
146
        
 
147
#if __GNUC__ == 2
 
148
        char* tmpstr = out.str();
 
149
        string ret(tmpstr, out.rdbuf()->pcount());
 
150
        free(tmpstr);
 
151
        return ret;
 
152
#else
 
153
        return out.str();
 
154
#endif
 
155
}
 
156
 
 
157
// Transform any =?charset?encoding?str?= stuff in the string to utf-8
 
158
string decode_header(
 
159
        const string&   str,
 
160
        const char*     default_coding)
 
161
{
 
162
#if __GNUC__ == 2
 
163
        strstream out;
 
164
#else
 
165
        std::stringstream out;
 
166
#endif
 
167
        
 
168
        CharsetEscape code(default_coding);
 
169
        
 
170
        string::size_type b = 0, c, e, s;
 
171
        while ((c = str.find("=?", b)) != string::npos)
 
172
        {
 
173
                code.write(out, str.c_str() + b, c - b);
 
174
                
 
175
                if ((e = str.find('?',  c+2)) != string::npos &&
 
176
                    (s = str.find('?',  e+1)) != string::npos &&
 
177
                    s == e + 2 &&
 
178
                    (b = str.find("?=", s+1)) != string::npos &&
 
179
                    str.find_first_of(" \t", c) > b)
 
180
                {       // valid escape
 
181
                        c += 2;
 
182
                        string charset(str, c, e - c);
 
183
                        char encoding = str[e+1];
 
184
                        s += 1;
 
185
                        DwString in(str.c_str() + s, b-s);
 
186
                        DwString decode;
 
187
                        b += 2;
 
188
                        
 
189
                        if (encoding == 'Q' || encoding == 'q')
 
190
                        {
 
191
                                // Convert also all '_' to ' '
 
192
                                size_t x = 0;
 
193
                                while ((x = in.find_first_of("_", x)) != DwString::npos)
 
194
                                {
 
195
                                        in[x] = ' ';
 
196
                                        ++x;
 
197
                                }
 
198
                                
 
199
                                DwDecodeQuotedPrintable(in, decode);
 
200
                        }
 
201
                        if (encoding == 'B' || encoding == 'b')
 
202
                        {
 
203
                                DwDecodeBase64(in, decode);
 
204
                        }
 
205
                        
 
206
                        CharsetEscape subcode(charset.c_str());
 
207
                        subcode.write(out, decode.c_str(), decode.length());
 
208
                }
 
209
                else
 
210
                {       // not valid escape
 
211
                        code.write(out, "=?", 2);
 
212
                        b = c+2;
 
213
                }
 
214
        }
 
215
        
 
216
        code.write(out, str.c_str() + b, str.length() - b);
 
217
        
 
218
#if __GNUC__ == 2
 
219
        char* tmpstr = out.str();
 
220
        string ret(tmpstr, out.rdbuf()->pcount());
 
221
        free(tmpstr);
 
222
        return ret;
 
223
#else
 
224
        return out.str();
 
225
#endif
 
226
}