1
/* Copyright 2013 Yorba Foundation
2
* Copyright (c) 2008-2012 Dovecot authors
4
* This software is licensed under the GNU Lesser General Public License
5
* (version 2.1 or later). See the COPYING file in this distribution.
8
namespace Geary.ImapUtf7 {
10
/* This file was modified from Dovecot's LGPLv2.1-licensed implementation in
11
* dovecot-2.1.15/src/lib-imap/imap-utf7.c.
14
/* These UTF16_* parts were modified from Dovecot's MIT-licensed Unicode
15
* library header in dovecot-2.1.15/src/lib/unichar.h. I don't believe it's a
16
* substantial enough portion to warrant inclusion of the MIT license.
19
/* Characters >= base require surrogates */
20
private const unichar UTF16_SURROGATE_BASE = 0x10000;
22
private const int UTF16_SURROGATE_SHIFT = 10;
23
private const unichar UTF16_SURROGATE_MASK = 0x03ff;
24
private const unichar UTF16_SURROGATE_HIGH_FIRST = 0xd800;
25
private const unichar UTF16_SURROGATE_HIGH_LAST = 0xdbff;
26
private const unichar UTF16_SURROGATE_HIGH_MAX = 0xdfff;
27
private const unichar UTF16_SURROGATE_LOW_FIRST = 0xdc00;
28
private const unichar UTF16_SURROGATE_LOW_LAST = 0xdfff;
30
private unichar UTF16_SURROGATE_HIGH(unichar chr) {
31
return (UTF16_SURROGATE_HIGH_FIRST +
32
(((chr) - UTF16_SURROGATE_BASE) >> UTF16_SURROGATE_SHIFT));
34
private unichar UTF16_SURROGATE_LOW(unichar chr) {
35
return (UTF16_SURROGATE_LOW_FIRST +
36
(((chr) - UTF16_SURROGATE_BASE) & UTF16_SURROGATE_MASK));
39
private const string imap_b64enc =
40
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
42
private const uint8 XX = 0xff;
43
private const uint8 imap_b64dec[256] = {
44
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
45
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
46
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,62, 63,XX,XX,XX,
47
52,53,54,55, 56,57,58,59, 60,61,XX,XX, XX,XX,XX,XX,
48
XX, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
49
15,16,17,18, 19,20,21,22, 23,24,25,XX, XX,XX,XX,XX,
50
XX,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
51
41,42,43,44, 45,46,47,48, 49,50,51,XX, XX,XX,XX,XX,
52
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
53
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
54
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
55
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
56
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
57
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
58
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
59
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX
62
private void mbase64_encode(StringBuilder dest, uint8[] input) {
65
int len = input.length;
67
dest.append_c(imap_b64enc[input[pos + 0] >> 2]);
68
dest.append_c(imap_b64enc[((input[pos + 0] & 3) << 4) |
69
(input[pos + 1] >> 4)]);
70
dest.append_c(imap_b64enc[((input[pos + 1] & 0x0f) << 2) |
71
((input[pos + 2] & 0xc0) >> 6)]);
72
dest.append_c(imap_b64enc[input[pos + 2] & 0x3f]);
77
dest.append_c(imap_b64enc[input[pos + 0] >> 2]);
79
dest.append_c(imap_b64enc[(input[pos + 0] & 0x03) << 4]);
81
dest.append_c(imap_b64enc[((input[pos + 0] & 0x03) << 4) |
82
(input[pos + 1] >> 4)]);
83
dest.append_c(imap_b64enc[(input[pos + 1] & 0x0f) << 2]);
89
private int first_encode_index(string str) {
90
for (int p = 0; str[p] != '\0'; p++) {
91
if (str[p] == '&' || (uint8) str[p] >= 0x80)
97
public string utf8_to_imap_utf7(string str) throws ConvertError {
98
int p = first_encode_index(str);
100
/* no characters that need to be encoded */
104
/* at least one encoded character */
105
StringBuilder dest = new StringBuilder();
106
dest.append_len(str, p);
107
while (p < str.length) {
113
if ((uint8) str[p] < 0x80) {
114
dest.append_c(str[p]);
120
while ((uint8) str[p] >= 0x80) {
123
// TODO: validate this conversion, throw ConvertError?
124
str.get_next_char(ref next_p, out chr);
125
if (chr < UTF16_SURROGATE_BASE) {
126
utf16 += (uint8) (chr >> 8);
127
utf16 += (uint8) (chr & 0xff);
129
unichar u16 = UTF16_SURROGATE_HIGH(chr);
130
utf16 += (uint8) (u16 >> 8);
131
utf16 += (uint8) (u16 & 0xff);
132
u16 = UTF16_SURROGATE_LOW(chr);
133
utf16 += (uint8) (u16 >> 8);
134
utf16 += (uint8) (u16 & 0xff);
138
mbase64_encode(dest, utf16);
143
private void utf16buf_to_utf8(StringBuilder dest, uint8[] output, ref int pos, int len) throws ConvertError {
145
throw new ConvertError.ILLEGAL_SEQUENCE("Odd number of bytes in UTF-16 data");
147
uint16 high = (output[pos % 4] << 8) | output[(pos+1) % 4];
148
if (high < UTF16_SURROGATE_HIGH_FIRST ||
149
high > UTF16_SURROGATE_HIGH_MAX) {
151
string? s = ((unichar) high).to_string();
153
throw new ConvertError.ILLEGAL_SEQUENCE("Couldn't convert U+%04hx to UTF-8", high);
159
if (high > UTF16_SURROGATE_HIGH_LAST)
160
throw new ConvertError.ILLEGAL_SEQUENCE("UTF-16 data out of range");
162
/* missing the second character */
163
throw new ConvertError.ILLEGAL_SEQUENCE("Truncated UTF-16 data");
166
uint16 low = (output[(pos+2)%4] << 8) | output[(pos+3) % 4];
167
if (low < UTF16_SURROGATE_LOW_FIRST || low > UTF16_SURROGATE_LOW_LAST)
168
throw new ConvertError.ILLEGAL_SEQUENCE("Illegal UTF-16 surrogate");
170
unichar chr = UTF16_SURROGATE_BASE +
171
(((high & UTF16_SURROGATE_MASK) << UTF16_SURROGATE_SHIFT) |
172
(low & UTF16_SURROGATE_MASK));
173
string? s = chr.to_string();
175
throw new ConvertError.ILLEGAL_SEQUENCE("Couldn't convert U+%04x to UTF-8", chr);
179
private void mbase64_decode_to_utf8(StringBuilder dest, string str, ref int p) throws ConvertError {
180
uint8 input[4], output[4];
181
int outstart = 0, outpos = 0;
183
while (str[p] != '-') {
184
input[0] = imap_b64dec[(uint8) str[p + 0]];
185
input[1] = imap_b64dec[(uint8) str[p + 1]];
186
if (input[0] == 0xff || input[1] == 0xff)
187
throw new ConvertError.ILLEGAL_SEQUENCE("Illegal character in IMAP base-64 encoded sequence");
189
output[outpos % 4] = (input[0] << 2) | (input[1] >> 4);
190
if (++outpos % 4 == outstart) {
191
utf16buf_to_utf8(dest, output, ref outstart, 4);
194
input[2] = imap_b64dec[(uint8) str[p + 2]];
195
if (input[2] == 0xff) {
196
if (str[p + 2] != '-')
197
throw new ConvertError.ILLEGAL_SEQUENCE("Illegal character in IMAP base-64 encoded sequence");
203
output[outpos % 4] = (input[1] << 4) | (input[2] >> 2);
204
if (++outpos % 4 == outstart) {
205
utf16buf_to_utf8(dest, output, ref outstart, 4);
208
input[3] = imap_b64dec[(uint8) str[p + 3]];
209
if (input[3] == 0xff) {
210
if (str[p + 3] != '-')
211
throw new ConvertError.ILLEGAL_SEQUENCE("Illegal character in IMAP base-64 encoded sequence");
217
output[outpos % 4] = ((input[2] << 6) & 0xc0) | input[3];
218
if (++outpos % 4 == outstart) {
219
utf16buf_to_utf8(dest, output, ref outstart, 4);
224
if (outstart != outpos % 4) {
225
utf16buf_to_utf8(dest, output, ref outstart, (4 + outpos - outstart) % 4);
228
/* found ending '-' */
232
public string imap_utf7_to_utf8(string str) throws ConvertError {
234
for (p = 0; str[p] != '\0'; p++) {
235
if (str[p] == '&' || (uint8) str[p] >= 0x80)
238
if (str[p] == '\0') {
239
/* no IMAP-UTF-7 encoded characters */
242
if ((uint8) str[p] >= 0x80) {
243
/* 8bit characters - the input is broken */
244
throw new ConvertError.ILLEGAL_SEQUENCE("IMAP UTF-7 input string contains 8-bit data");
247
/* at least one encoded character */
248
StringBuilder dest = new StringBuilder();
249
dest.append_len(str, p);
250
while (str[p] != '\0') {
252
if (str[++p] == '-') {
256
mbase64_decode_to_utf8(dest, str, ref p);
257
if (str[p + 0] == '&' && str[p + 1] != '-') {
259
throw new ConvertError.ILLEGAL_SEQUENCE("Illegal break in encoded text");
263
dest.append_c(str[p++]);