3
* A Nautilus extension which offers configurable context menu actions.
5
* Copyright (C) 2005 The GNOME Foundation
6
* Copyright (C) 2006, 2007, 2008 Frederic Ruaudel and others (see AUTHORS)
7
* Copyright (C) 2009 Pierre Wieser and others (see AUTHORS)
9
* This Program is free software; you can redistribute it and/or
10
* modify it under the terms of the GNU General Public License as
11
* published by the Free Software Foundation; either version 2 of
12
* the License, or (at your option) any later version.
14
* This Program is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
* GNU General Public License for more details.
19
* You should have received a copy of the GNU General Public
20
* License along with this Library; see the file COPYING. If not,
21
* write to the Free Software Foundation, Inc., 59 Temple Place,
22
* Suite 330, Boston, MA 02111-1307, USA.
25
* Frederic Ruaudel <grumz@grumz.net>
26
* Rodrigo Moya <rodrigo@gnome-db.org>
27
* Pierre Wieser <pwieser@trychlos.org>
28
* ... and many others (see AUTHORS)
33
* shamelessly pull out of GnomeVFS (gnome-vfs-uri and consorts)
36
/* gnome-vfs-uri.h - URI handling for the GNOME Virtual File System.
38
Copyright (C) 1999 Free Software Foundation
40
The Gnome Library is free software; you can redistribute it and/or
41
modify it under the terms of the GNU Library General Public License as
42
published by the Free Software Foundation; either version 2 of the
43
License, or (at your option) any later version.
45
The Gnome Library is distributed in the hope that it will be useful,
46
but WITHOUT ANY WARRANTY; without even the implied warranty of
47
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
48
Library General Public License for more details.
50
You should have received a copy of the GNU Library General Public
51
License along with the Gnome Library; see the file COPYING.LIB. If not,
52
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
53
Boston, MA 02111-1307, USA.
55
Author: Ettore Perazzoli <ettore@comm2000.it> */
63
#include "na-gnome-vfs-uri.h"
65
#define HEX_ESCAPE '%'
67
static void collapse_slash_runs (char *path, int from_offset);
68
static int find_next_slash (const char *path, int current_offset);
69
static int find_slash_before_offset (const char *path, int to);
70
static const gchar *get_method_string (const gchar *substring, gchar **method_string);
71
static gchar * gnome_vfs_canonicalize_pathname (gchar *path);
72
static char *gnome_vfs_escape_set(const char *string, const char *match_set);
73
static void gnome_vfs_remove_optional_escapes (char *uri);
74
static char * gnome_vfs_unescape_string (const gchar *escaped_string, const gchar *illegal_characters);
75
static int hex_to_int (gchar c);
76
static void set_uri_element (NAGnomeVFSURI *vfs, const gchar *text, guint len);
77
static gchar *split_toplevel_uri (const gchar *path, guint path_len,
78
gchar **host_return, gchar **user_return,
79
guint *port_return, gchar **password_return);
80
static int unescape_character (const char *scanner);
83
na_gnome_vfs_uri_parse( NAGnomeVFSURI *vfs, const gchar *text_uri )
85
const gchar *method_scanner;
86
gchar *extension_scanner;
90
vfs->host_name = NULL;
92
vfs->user_name = NULL;
95
if (text_uri[0] == '\0') {
99
method_scanner = get_method_string(text_uri, &vfs->scheme );
100
if (strcmp (vfs->scheme, "pipe") == 0 ){
104
extension_scanner = strchr (method_scanner, GNOME_VFS_URI_MAGIC_CHR);
105
if (extension_scanner == NULL) {
106
set_uri_element (vfs, method_scanner, strlen (method_scanner));
111
set_uri_element (vfs, method_scanner, extension_scanner - method_scanner);
113
if (strchr (extension_scanner, ':') == NULL) {
114
/* extension is a fragment identifier */
115
/*uri->fragment_id = g_strdup (extension_scanner + 1);*/
121
na_gnome_vfs_uri_free( NAGnomeVFSURI *vfs )
123
g_free( vfs->scheme );
124
g_free( vfs->host_name );
125
g_free( vfs->user_name );
126
g_free( vfs->password );
131
collapse_slash_runs (char *path, int from_offset)
134
/* Collapse multiple `/'s in a row. */
135
for (i = from_offset;; i++) {
136
if (path[i] != GNOME_VFS_URI_PATH_CHR) {
141
if (from_offset < i) {
142
memmove (path + from_offset, path + i, strlen (path + i) + 1);
148
find_next_slash (const char *path, int current_offset)
152
g_assert (current_offset <= strlen (path));
154
match = strchr (path + current_offset, GNOME_VFS_URI_PATH_CHR);
155
return match == NULL ? -1 : match - path;
159
find_slash_before_offset (const char *path, int to)
167
next_offset = find_next_slash (path, next_offset);
168
if (next_offset < 0 || next_offset >= to) {
171
result = next_offset;
178
get_method_string (const gchar *substring, gchar **method_string)
184
g_ascii_isalnum (*p) || *p == '+' || *p == '-' || *p == '.';
191
!(p == substring + 1 && g_ascii_isalpha (*substring))
194
/* Found toplevel method specification. */
195
method = g_strndup (substring, p - substring);
196
*method_string = g_ascii_strdown (method, -1);
200
*method_string = g_strdup ("file");
206
/* Canonicalize path, and return a new path. Do everything in situ. The new
207
path differs from path in:
209
Multiple `/'s are collapsed to a single `/'.
210
Leading `./'s and trailing `/.'s are removed.
211
Non-leading `../'s and trailing `..'s are handled by removing
212
portions of the path. */
214
gnome_vfs_canonicalize_pathname (gchar *path)
218
if (path == NULL || strlen (path) == 0) {
222
/* Walk along path looking for things to compact. */
223
for (i = 0, marker = 0;;) {
227
/* Check for `../', `./' or trailing `.' by itself. */
228
if (path[i] == '.') {
229
/* Handle trailing `.' by itself. */
230
if (path[i + 1] == '\0') {
231
if (i > 1 && path[i - 1] == GNOME_VFS_URI_PATH_CHR) {
232
/* strip the trailing /. */
235
/* convert path "/." to "/" */
242
if (path[i + 1] == GNOME_VFS_URI_PATH_CHR) {
243
memmove (path + i, path + i + 2,
244
strlen (path + i + 2) + 1);
246
/* don't leave leading '/' for paths that started
247
* as relative (.//foo)
249
collapse_slash_runs (path, i);
255
/* Handle `../' or trailing `..' by itself.
256
* Remove the previous xxx/ part
258
if (path[i + 1] == '.'
259
&& (path[i + 2] == GNOME_VFS_URI_PATH_CHR
260
|| path[i + 2] == '\0')) {
262
/* ignore ../ at the beginning of a path */
264
marker = find_slash_before_offset (path, i - 1);
266
/* Either advance past '/' or point to the first character */
268
if (path [i + 2] == '\0' && marker > 1) {
269
/* If we are looking at a /.. at the end of the uri and we
270
* need to eat the last '/' too.
274
g_assert(marker < i);
276
if (path[i + 2] == GNOME_VFS_URI_PATH_CHR) {
277
/* strip the entire ../ string */
281
memmove (path + marker, path + i + 2,
282
strlen (path + i + 2) + 1);
286
if (path[i] == GNOME_VFS_URI_PATH_CHR) {
290
collapse_slash_runs (path, i);
295
/* advance to the next '/' */
296
i = find_next_slash (path, i);
298
/* If we didn't find any slashes, then there is nothing left to do. */
304
collapse_slash_runs (path, i);
309
/* Escape undesirable characters using %
310
* -------------------------------------
312
* This function takes a pointer to a string in which
313
* some characters may be unacceptable unescaped.
314
* It returns a string which has these characters
315
* represented by a '%' character followed by two hex digits.
317
* This routine returns a g_malloced string.
320
static const gchar hex[16] = "0123456789ABCDEF";
323
* gnome_vfs_escape_set:
324
* @string: string to be escaped.
325
* @match_set: a string containing all characters to be escaped in @string.
327
* Escapes all characters in @string which are listed in @match_set.
329
* Return value: a newly allocated string equivalent to @string but
330
* with characters in @match_string escaped.
333
gnome_vfs_escape_set (const char *string,
334
const char *match_set)
338
char *result_scanner;
343
if (string == NULL) {
347
if (match_set == NULL) {
348
return g_strdup (string);
351
for (scanner = string; *scanner != '\0'; scanner++) {
352
if (strchr(match_set, *scanner) != NULL) {
353
/* this character is in the set of characters
360
if (escape_count == 0) {
361
return g_strdup (string);
364
/* allocate two extra characters for every character that
365
* needs escaping and space for a trailing zero
367
result = g_malloc (scanner - string + escape_count * 2 + 1);
368
for (scanner = string, result_scanner = result; *scanner != '\0'; scanner++) {
369
if (strchr(match_set, *scanner) != NULL) {
370
/* this character is in the set of characters
373
*result_scanner++ = HEX_ESCAPE;
374
*result_scanner++ = hex[*scanner >> 4];
375
*result_scanner++ = hex[*scanner & 15];
378
*result_scanner++ = *scanner;
382
*result_scanner = '\0';
388
* gnome_vfs_remove_optional_escapes:
389
* @uri: an escaped uri.
391
* Scans the @uri and converts characters that do not have to be
392
* escaped into an un-escaped form. The characters that get treated this
393
* way are defined as unreserved by the RFC.
395
* Return value: an error value if the @uri is found to be malformed.
407
static const guchar uri_character_kind[128] =
409
CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,
410
CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,
411
CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,
412
CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,CONTROL ,
413
/* ' ' ! " # $ % & ' */
414
SPACE ,UNRESERVED,DELIMITERS,DELIMITERS,RESERVED ,DELIMITERS,RESERVED ,UNRESERVED,
415
/* ( ) * + , - . / */
416
UNRESERVED,UNRESERVED,UNRESERVED,RESERVED ,RESERVED ,UNRESERVED,UNRESERVED,RESERVED ,
417
/* 0 1 2 3 4 5 6 7 */
418
UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,
419
/* 8 9 : ; < = > ? */
420
UNRESERVED,UNRESERVED,RESERVED ,RESERVED ,DELIMITERS,RESERVED ,DELIMITERS,RESERVED ,
421
/* @ A B C D E F G */
422
RESERVED ,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,
423
/* H I J K L M N O */
424
UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,
425
/* P Q R S T U V W */
426
UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,
427
/* X Y Z [ \ ] ^ _ */
428
UNRESERVED,UNRESERVED,UNRESERVED,UNWISE ,UNWISE ,UNWISE ,UNWISE ,UNRESERVED,
429
/* ` a b c d e f g */
430
UNWISE ,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,
431
/* h i j k l m n o */
432
UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,
433
/* p q r s t u v w */
434
UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,UNRESERVED,
435
/* x y z { | } ~ DEL */
436
UNRESERVED,UNRESERVED,UNRESERVED,UNWISE ,UNWISE ,UNWISE ,UNRESERVED,CONTROL
440
gnome_vfs_remove_optional_escapes (char *uri)
450
length = strlen (uri);
452
for (scanner = (guchar *)uri; *scanner != '\0'; scanner++, length--) {
453
if (*scanner == HEX_ESCAPE) {
454
character = unescape_character ((char *)scanner + 1);
456
/* invalid hexadecimal character */
460
if (uri_character_kind [character] == UNRESERVED) {
461
/* This character does not need to be escaped, convert it
462
* to a non-escaped form.
464
*scanner = (guchar)character;
465
g_assert (length >= 3);
467
/* Shrink the string covering up the two extra digits of the
468
* escaped character. Include the trailing '\0' in the copy
469
* to keep the string terminated.
471
memmove (scanner + 1, scanner + 3, length - 2);
473
/* This character must stay escaped, skip the entire
480
} else if (*scanner > 127
481
|| uri_character_kind [*scanner] == DELIMITERS
482
|| uri_character_kind [*scanner] == UNWISE
483
|| uri_character_kind [*scanner] == CONTROL) {
484
/* It is illegal for this character to be in an un-escaped form
495
return c >= '0' && c <= '9' ? c - '0'
496
: c >= 'A' && c <= 'F' ? c - 'A' + 10
497
: c >= 'a' && c <= 'f' ? c - 'a' + 10
502
unescape_character (const char *scanner)
507
first_digit = hex_to_int (*scanner++);
508
if (first_digit < 0) {
512
second_digit = hex_to_int (*scanner++);
513
if (second_digit < 0) {
517
return (first_digit << 4) | second_digit;
521
* gnome_vfs_unescape_string:
522
* @escaped_string: an escaped uri, path, or other string.
523
* @illegal_characters: a string containing a sequence of characters
524
* considered "illegal" to be escaped, '\0' is automatically in this list.
526
* Decodes escaped characters (i.e. PERCENTxx sequences) in @escaped_string.
527
* Characters are encoded in PERCENTxy form, where xy is the ASCII hex code
528
* for character 16x+y.
530
* Return value: a newly allocated string with the unescaped
531
* equivalents, or %NULL if @escaped_string contained an escaped
532
* encoding of one of the characters in @illegal_characters.
535
gnome_vfs_unescape_string (const gchar *escaped_string,
536
const gchar *illegal_characters)
542
if (escaped_string == NULL) {
546
result = g_malloc (strlen (escaped_string) + 1);
549
for (in = escaped_string; *in != '\0'; in++) {
551
if (*in == HEX_ESCAPE) {
552
character = unescape_character (in + 1);
554
/* Check for an illegal character. We consider '\0' illegal here. */
556
|| (illegal_characters != NULL
557
&& strchr (illegal_characters, (char)character) != NULL)) {
563
*out++ = (char)character;
567
g_assert (out - result <= strlen (escaped_string));
573
set_uri_element (NAGnomeVFSURI *vfs,
579
if (text == NULL || len == 0) {
580
vfs->uri = g_strdup ("/");
584
if ( text[0] == '/' && text[1] == '/') {
585
vfs->uri = split_toplevel_uri (text + 2, len - 2,
591
vfs->uri = g_strndup (text, len);
594
/* FIXME: this should be handled/supported by the specific method.
595
* This is a quick and dirty hack to minimize the amount of changes
596
* right before a milestone release.
598
* Do some method specific escaping. This for instance converts
599
* '?' to %3F in every method except "http" where it has a special
602
if ( ! (strcmp (vfs->scheme, "http") == 0
603
|| strcmp (vfs->scheme, "https") == 0
604
|| strcmp (vfs->scheme, "dav") == 0
605
|| strcmp (vfs->scheme, "davs") == 0
606
|| strcmp (vfs->scheme, "ghelp") == 0
607
|| strcmp (vfs->scheme, "gnome-help") == 0
608
|| strcmp (vfs->scheme, "help") == 0
611
escaped_text = gnome_vfs_escape_set (vfs->uri, ";?&=+$,");
613
vfs->uri = escaped_text;
616
gnome_vfs_remove_optional_escapes (vfs->uri);
617
gnome_vfs_canonicalize_pathname (vfs->uri);
623
Extract hostname and username from "path" with length "path_len"
626
sunsite.unc.edu/pub/linux
627
miguel@sphinx.nuclecu.unam.mx/c/nc
629
joe@foo.edu:11321/private
632
This function implements the following regexp: (whitespace for clarity)
634
( ( ([^:@/]*) (:[^@/]*)? @ )? ([^/:]*) (:([0-9]*)?) )? (/.*)?
635
( ( ( user ) ( pw )? )? (host) (port)? )? (path <return value>)?
637
It returns NULL if neither <host> nor <path> could be matched.
639
port is checked to ensure that it does not exceed 0xffff.
641
return value is <path> or is "/" if the path portion is not present
642
All other arguments are set to 0 or NULL if their portions are not present
644
pedantic: this function ends up doing an unbounded lookahead, making it
645
potentially O(n^2) instead of O(n). This could be avoided. Realistically, though,
646
its just the password field.
648
Differences between the old and the new implemention:
651
localhost:8080 host="localhost:8080" host="localhost" port=8080
652
/Users/mikef host="" host=NULL
657
#define URI_MOVE_PAST_DELIMITER \
659
cur_tok_start = (++cur); \
660
if (path_end == cur) { \
667
#define uri_strlen_to(from, to) ( (to) - (from) )
668
#define uri_strdup_to(from, to) g_strndup ((from), uri_strlen_to((from), (to)))
676
static UriStrspnSet uri_strspn_sets[] = {
677
{":@]" GNOME_VFS_URI_PATH_STR, FALSE, ""},
678
{"@" GNOME_VFS_URI_PATH_STR, FALSE, ""},
679
{":" GNOME_VFS_URI_PATH_STR, FALSE, ""},
680
{"]" GNOME_VFS_URI_PATH_STR, FALSE, ""}
683
#define URI_DELIMITER_ALL_SET (uri_strspn_sets + 0)
684
#define URI_DELIMITER_USER_SET (uri_strspn_sets + 1)
685
#define URI_DELIMITER_HOST_SET (uri_strspn_sets + 2)
686
#define URI_DELIMITER_IPV6_SET (uri_strspn_sets + 3)
688
#define BV_SET(bv, idx) (bv)[((guchar)(idx))>>3] |= (1 << ( (idx) & 7) )
689
#define BV_IS_SET(bv, idx) ((bv)[((guchar)(idx))>>3] & (1 << ( (idx) & 7)))
692
uri_strspn_to(const char *str, UriStrspnSet *set, const char *path_end)
698
memset (set->bv, 0, sizeof(set->bv));
700
for (cur_chr = set->chrs; '\0' != *cur_chr; cur_chr++) {
701
BV_SET (set->bv, *cur_chr);
704
BV_SET (set->bv, '\0');
708
for (cur = str; cur < path_end && ! BV_IS_SET (set->bv, *cur); cur++)
711
if (cur >= path_end || '\0' == *cur) {
719
split_toplevel_uri (const gchar *path, guint path_len,
720
gchar **host_return, gchar **user_return,
721
guint *port_return, gchar **password_return)
723
const char *path_end;
724
const char *cur_tok_start;
726
const char *next_delimiter;
731
g_assert (host_return != NULL);
732
g_assert (user_return != NULL);
733
g_assert (port_return != NULL);
734
g_assert (password_return != NULL);
739
*password_return = NULL;
744
if (path == NULL || path_len == 0) {
745
return g_strdup ("/");
749
path_end = path + path_len;
751
cur_tok_start = path;
752
cur = uri_strspn_to (cur_tok_start, URI_DELIMITER_ALL_SET, path_end);
758
/* This ':' belongs to username or IPv6 address.*/
759
tmp = uri_strspn_to (cur_tok_start, URI_DELIMITER_USER_SET, path_end);
761
if (tmp == NULL || *tmp != '@') {
762
tmp = uri_strspn_to (cur_tok_start, URI_DELIMITER_IPV6_SET, path_end);
764
if (tmp != NULL && *tmp == ']') {
773
/* Check for IPv6 address. */
776
/* No username:password in the URI */
777
/* cur points to ']' */
779
cur = uri_strspn_to (cur, URI_DELIMITER_HOST_SET, path_end);
784
next_delimiter = uri_strspn_to (cur, URI_DELIMITER_USER_SET, path_end);
786
next_delimiter = NULL;
791
|| (next_delimiter != NULL && *next_delimiter != '/' ))) {
793
/* *cur == ':' or '@' and string contains a @ before a / */
795
if (uri_strlen_to (cur_tok_start, cur) > 0) {
797
tmp = uri_strdup_to (cur_tok_start,cur);
798
*user_return = gnome_vfs_unescape_string (tmp, NULL);
803
URI_MOVE_PAST_DELIMITER;
805
cur = uri_strspn_to(cur_tok_start, URI_DELIMITER_USER_SET, path_end);
807
if (cur == NULL || *cur != '@') {
810
} else if (uri_strlen_to (cur_tok_start, cur) > 0) {
812
tmp = uri_strdup_to (cur_tok_start,cur);
813
*password_return = gnome_vfs_unescape_string (tmp, NULL);
819
URI_MOVE_PAST_DELIMITER;
821
/* Move cur to point to ':' after ']' */
822
cur = uri_strspn_to (cur_tok_start, URI_DELIMITER_IPV6_SET, path_end);
824
if (cur != NULL && *cur == ']') { /* For IPv6 address */
825
cur = uri_strspn_to (cur, URI_DELIMITER_HOST_SET, path_end);
827
cur = uri_strspn_to (cur_tok_start, URI_DELIMITER_HOST_SET, path_end);
836
if (uri_strlen_to (cur_tok_start, path_end) > 0) {
837
*host_return = uri_strdup_to (cur_tok_start, path_end);
838
if (*(path_end - 1) == GNOME_VFS_URI_PATH_CHR) {
839
ret = g_strdup (GNOME_VFS_URI_PATH_STR);
844
} else { /* No host, no path */
850
} else if (*cur == ':') {
854
if (uri_strlen_to (cur_tok_start, cur) > 0) {
855
*host_return = uri_strdup_to (cur_tok_start, cur);
858
goto done; /*No host but a port?*/
861
URI_MOVE_PAST_DELIMITER;
865
for ( ; cur < path_end && g_ascii_isdigit (*cur); cur++) {
870
/* We let :(/.*)$ be treated gracefully */
871
if (*cur != '\0' && *cur != GNOME_VFS_URI_PATH_CHR) {
873
goto done; /* ...but this would be an error */
885
} else /* GNOME_VFS_URI_PATH_CHR == *cur */ {
888
if (uri_strlen_to (cur_tok_start, cur) > 0) {
889
*host_return = uri_strdup_to (cur_tok_start, cur);
895
if (*cur_tok_start != '\0' && uri_strlen_to (cur_tok_start, path_end) > 0) {
896
ret = uri_strdup_to(cur, path_end);
897
} else if (*host_return != NULL) {
898
ret = g_strdup (GNOME_VFS_URI_PATH_STR);
904
if (*host_return != NULL) {
906
/* Check for an IPv6 address in square brackets.*/
907
if (strchr (*host_return, '[') && strchr (*host_return, ']') && strchr (*host_return, ':')) {
909
/* Extract the IPv6 address from square braced string. */
910
host = g_ascii_strdown ((*host_return) + 1, strlen (*host_return) - 2);
912
host = g_ascii_strdown (*host_return, -1);
915
g_free (*host_return);
920
/* If we didn't complete our mission, discard all the partials */
922
g_free (*host_return);
923
g_free (*user_return);
924
g_free (*password_return);
930
*password_return = NULL;