2
* $Id: url.c,v 1.21 2004/01/14 20:52:33 rmanfredi Exp $
4
* Copyright (c) 2002-2003, Raphael Manfredi
6
*----------------------------------------------------------------------
7
* This file is part of gtk-gnutella.
9
* gtk-gnutella is free software; you can redistribute it and/or modify
10
* it under the terms of the GNU General Public License as published by
11
* the Free Software Foundation; either version 2 of the License, or
12
* (at your option) any later version.
14
* gtk-gnutella is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
* GNU General Public License for more details.
19
* You should have received a copy of the GNU General Public License
20
* along with gtk-gnutella; if not, write to the Free Software
22
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
*----------------------------------------------------------------------
30
#include "override.h" /* Must be the last header included */
32
RCSID("$Id: url.c,v 1.21 2004/01/14 20:52:33 rmanfredi Exp $");
34
#define ESCAPE_CHAR '%'
35
#define TRANSPARENT_CHAR(x,m) \
36
((x) >= 32 && (x) < 128 && (is_transparent[(x)-32] & (m)))
39
* Reserved chars: ";", "/", "?", ":", "@", "=" and "&"
40
* Unsafe chars : " ", '"', "<", ">", "#", and "%"
41
* Misc chars : "{", "}", "|", "\", "^", "~", "[", "]" and "`"
43
* Bit 0 encodes regular transparent set (pathnames, '/' is transparent).
44
* Bit 1 encodes regular transparent set minus '+' (query string).
46
static const guint8 is_transparent[96] = {
47
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* 0123456789abcdef - */
48
0,3,0,0,3,0,0,3,3,3,3,1,3,3,3,3, /* !"#$%&'()*+,-./ - 32 -> 47 */
49
3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0, /* 0123456789:;<=>? - 48 -> 63 */
50
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO - 64 -> 79 */
51
3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ - 80 -> 95 */
52
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno - 96 -> 111 */
53
3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ - 112 -> 127 */
57
#define QUERY_MASK 0x2
59
static const char hex_alphabet[] = "0123456789ABCDEF";
64
* Escape undesirable characters using %xx, where xx is an hex code.
65
* `mask' tells us whether we're escaping an URL path or a query string.
67
* Returns argument if no escaping is necessary, or a new string otherwise.
69
static gchar *url_escape_mask(gchar *url, guint8 mask)
77
for (p = url, c = *p++; c; c = *p++)
78
if (!TRANSPARENT_CHAR(c, mask))
84
new = g_malloc(p - url + (need_escape << 1));
86
for (p = url, q = new, c = *p++; c; c = *p++) {
87
if (TRANSPARENT_CHAR(c, mask))
91
*q++ = hex_alphabet[c >> 4];
92
*q++ = hex_alphabet[c & 0xf];
101
* url_escape_mask_into
103
* Escape undesirable characters using %xx, where xx is an hex code.
104
* This is done in the `target' buffer, whose size is `len'.
105
* `mask' tells us whether we're escaping an URL path or a query string.
107
* Returns amount of characters written into buffer (not counting trailing
108
* NUL), or -1 if the buffer was too small.
110
static gint url_escape_mask_into(
111
const gchar *url, gchar *target, gint len, guint8 mask)
113
const gchar *p = url;
116
gchar *end = target + len;
118
for (q = target, c = *p++; c && q < end; c = *p++) {
119
if (TRANSPARENT_CHAR(c, mask))
121
else if (end - q >= 3) {
123
*q++ = hex_alphabet[c >> 4];
124
*q++ = hex_alphabet[c & 0xf];
142
* Escape undesirable characters using %xx, where xx is an hex code.
143
* Returns argument if no escaping is necessary, or a new string otherwise.
145
gchar *url_escape(gchar *url)
147
return url_escape_mask(url, PATH_MASK);
153
* Same as url_escape(), but '+' are also escaped for the query string.
154
* Returns argument if no escaping is necessary, or a new string otherwise.
156
gchar *url_escape_query(gchar *url)
158
return url_escape_mask(url, QUERY_MASK);
164
* Escape undesirable characters using %xx, where xx is an hex code.
165
* This is done in the `target' buffer, whose size is `len'.
167
* Returns amount of characters written into buffer (not counting trailing
168
* NUL), or -1 if the buffer was too small.
170
gint url_escape_into(const gchar *url, gchar *target, gint len)
172
return url_escape_mask_into(url, target, len, PATH_MASK);
178
* Escape control characters using %xx, where xx is an hex code.
180
* Returns argument if no escaping is necessary, or a new string otherwise.
182
gchar *url_escape_cntrl(gchar *url)
190
for (p = url, c = *p++; c; c = *p++)
191
if (iscntrl(c) || c == ESCAPE_CHAR)
194
if (need_escape == 0)
197
new = g_malloc(p - url + (need_escape << 1));
199
for (p = url, q = new, c = *p++; c; c = *p++) {
200
if (!iscntrl(c) && c != ESCAPE_CHAR)
204
*q++ = hex_alphabet[c >> 4];
205
*q++ = hex_alphabet[c & 0xf];
216
* Unescape string, in-place if `inplace' is TRUE.
218
* Returns the argument if un-escaping is NOT necessary, a new string
219
* otherwise unless in-place decoding was requested.
221
gchar *url_unescape(gchar *url, gboolean inplace)
225
gint need_unescape = 0;
229
for (p = url, c = *p++; c; c = *p++)
230
if (c == ESCAPE_CHAR)
233
if (need_unescape == 0)
237
* The "+ 1" in the g_malloc() call below is for the rare cases where
238
* the string would finish on a truncated escape sequence. In that
239
* case, we would not have enough room for the final trailing NUL.
245
new = g_malloc(p - url - (need_unescape << 1) + 1);
247
for (p = url, q = new, c = *p++; c; c = *p++) {
248
if (c != ESCAPE_CHAR)
252
gint v = (hex2dec(c) << 4) & 0xf0;
254
v += hex2dec(c) & 0x0f;
256
break; /* String ending in the middle of escape */
264
g_assert(!inplace || new == url);
272
* Parse all the parameters in the URL query string. All parameter values are
273
* stored in their URL-unescaped form, but parameter names are NOT un-escaped.
275
* Returns an url_params_t object that can be queried for later...
277
url_params_t *url_params_parse(gchar *query)
284
gboolean in_value = FALSE;
286
up = walloc(sizeof(*up));
287
up->params = g_hash_table_new(g_str_hash, g_str_equal);
290
for (q = start = query; /* empty */; q++) {
294
if (c == '&' || c == '\0') { /* End of value */
296
value = url_unescape(start, FALSE);
297
if (value == start) /* No unescaping took place */
298
value = g_strdup(start);
300
g_hash_table_insert(up->params, name, value);
305
start = q + 1; /* Name will start there */
308
if (c == '=') { /* End of parameter name */
310
name = g_strdup(start);
313
start = q + 1; /* Value will start there */
321
g_assert(name == NULL);
322
g_assert(value == NULL);
330
* Get the value of a parameter, or NULL if the parameter is not present.
331
* The value returned has already been URL-unescaped.
333
gchar *url_params_get(url_params_t *up, gchar *name)
335
g_assert(up != NULL);
336
g_assert(up->params != NULL);
338
return g_hash_table_lookup(up->params, name);
341
static void free_params_kv(gpointer key, gpointer value, gpointer udata)
350
* Dispose of the url_params_t structure.
352
void url_params_free(url_params_t *up)
354
g_assert(up != NULL);
356
g_hash_table_foreach(up->params, free_params_kv, NULL);
357
g_hash_table_destroy(up->params);
359
wfree(up, sizeof(*up));