1
/* $Id: CharsetEscape.cpp,v 1.11 2003/06/23 15:28:10 terpstra Exp $
3
* CharsetEscape.cpp - A stream manipulator-like thing for charset conversion
5
* Copyright (C) 2002 - Wesley W. Terpstra
9
* Authors: 'Wesley W. Terpstra' <wesley@terpstra.ca>
11
* This program is free software; you can redistribute it and/or modify
12
* it under the terms of the GNU General Public License as published by
13
* the Free Software Foundation; version 2.
15
* This program is distributed in the hope that it will be useful,
16
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
* GNU General Public License for more details.
20
* You should have received a copy of the GNU General Public License
21
* along with this program; if not, write to the Free Software
22
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
#define _XOPEN_SOURCE 500
26
#define _FILE_OFFSET_BITS 64
28
#include <mimelib/string.h>
29
#include <mimelib/utility.h>
38
#include "CharsetEscape.h"
40
CharsetEscape::CharsetEscape(const char* charset)
41
: ic(iconv_open("UTF-8", charset))
45
CharsetEscape::~CharsetEscape()
47
if (valid()) iconv_close(ic);
50
void iconv_bug_kill_nulls(char* ob, size_t is)
54
if (*ob == '\0') *ob = '?';
60
void CharsetEscape::write(ostream& o, const char* ib, size_t is)
63
{ // when not valid, just keep ascii chars
70
for (s = ib, e = s + is; s != e; ++s)
71
{ // if it moves, kill it!
72
if ((*s < 0x20 || *s >= 0x7f) &&
73
(*s != '\n' && *s != '\t'))
79
// write out what we have
80
if (s != ib) o.write(ib, long(s - ib));
87
// skip the offensive byte
97
size_t os = sizeof(buf);
99
// We forcibly type-cast iconv b/c it has different types on some
100
// platforms, but the difference is only in the const.
101
while (((size_t (*)(iconv_t, const char **, size_t*, char**, size_t*))&iconv)
102
(ic, &ib, &is, &ob, &os) == (size_t)-1)
107
iconv_bug_kill_nulls(buf, sizeof(buf) - os);
108
o.write(buf, sizeof(buf) - os);
113
// skip a broken byte
118
else if (errno == EINVAL)
125
iconv_bug_kill_nulls(buf, sizeof(buf) - os);
126
o.write(buf, sizeof(buf) - os);
133
// success, write out tail.
134
iconv_bug_kill_nulls(buf, sizeof(buf) - os);
135
o.write(buf, sizeof(buf) - os);
138
string CharsetEscape::write(const char* ib, size_t is)
143
std::stringstream out;
148
char* tmpstr = out.str();
149
string ret(tmpstr, out.rdbuf()->pcount());
157
// Transform any =?charset?encoding?str?= stuff in the string to utf-8
158
string decode_header(
160
const char* default_coding)
165
std::stringstream out;
168
CharsetEscape code(default_coding);
170
string::size_type b = 0, c, e, s;
171
while ((c = str.find("=?", b)) != string::npos)
173
code.write(out, str.c_str() + b, c - b);
175
if ((e = str.find('?', c+2)) != string::npos &&
176
(s = str.find('?', e+1)) != string::npos &&
178
(b = str.find("?=", s+1)) != string::npos &&
179
str.find_first_of(" \t", c) > b)
182
string charset(str, c, e - c);
183
char encoding = str[e+1];
185
DwString in(str.c_str() + s, b-s);
189
if (encoding == 'Q' || encoding == 'q')
191
// Convert also all '_' to ' '
193
while ((x = in.find_first_of("_", x)) != DwString::npos)
199
DwDecodeQuotedPrintable(in, decode);
201
if (encoding == 'B' || encoding == 'b')
203
DwDecodeBase64(in, decode);
206
CharsetEscape subcode(charset.c_str());
207
subcode.write(out, decode.c_str(), decode.length());
210
{ // not valid escape
211
code.write(out, "=?", 2);
216
code.write(out, str.c_str() + b, str.length() - b);
219
char* tmpstr = out.str();
220
string ret(tmpstr, out.rdbuf()->pcount());