2
//C- -------------------------------------------------------------------
4
//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
5
//C- Copyright (c) 2001 AT&T
7
//C- This software is subject to, and may be distributed under, the
8
//C- GNU General Public License, either Version 2 of the license,
9
//C- or (at your option) any later version. The license should have
10
//C- accompanied the software or you may obtain a copy of the license
11
//C- from the Free Software Foundation at http://www.fsf.org .
13
//C- This program is distributed in the hope that it will be useful,
14
//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15
//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
//C- GNU General Public License for more details.
18
//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19
//C- Lizardtech Software. Lizardtech Software has authorized us to
20
//C- replace the original DjVu(r) Reference Library notice by the following
21
//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
23
//C- ------------------------------------------------------------------
24
//C- | DjVu (r) Reference Library (v. 3.5)
25
//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26
//C- | The DjVu Reference Library is protected by U.S. Pat. No.
27
//C- | 6,058,214 and patents pending.
29
//C- | This software is subject to, and may be distributed under, the
30
//C- | GNU General Public License, either Version 2 of the license,
31
//C- | or (at your option) any later version. The license should have
32
//C- | accompanied the software or you may obtain a copy of the license
33
//C- | from the Free Software Foundation at http://www.fsf.org .
35
//C- | The computer code originally released by LizardTech under this
36
//C- | license and unmodified by other parties is deemed "the LIZARDTECH
37
//C- | ORIGINAL CODE." Subject to any third party intellectual property
38
//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39
//C- | non-exclusive license to make, use, sell, or otherwise dispose of
40
//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41
//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42
//C- | General Public License. This grant only confers the right to
43
//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44
//C- | the extent such infringement is reasonably necessary to enable
45
//C- | recipient to make, have made, practice, sell, or otherwise dispose
46
//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47
//C- | any greater extent that may be necessary to utilize further
48
//C- | modifications or combinations.
50
//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51
//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52
//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53
//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54
//C- +------------------------------------------------------------------
60
# pragma implementation
63
// From: Leon Bottou, 1/31/2002
64
// This has been heavily changed by Lizardtech.
65
// They decided to use URLs for everyting, including
66
// the most basic file access. The URL class now is a unholy
67
// mixture of code for syntactically parsing the urls (which it was)
68
// and file status code (only for local file: urls).
70
#include "GException.h"
90
# define MAXPATHLEN _MAX_PATH
92
# define MAXPATHLEN 1024
95
# if ( MAXPATHLEN < 1024 )
97
# define MAXPATHLEN 1024
101
#if defined(UNIX) || defined(OS2)
103
# include <sys/types.h>
104
# include <sys/stat.h>
110
# ifdef TIME_WITH_SYS_TIME
111
# include <sys/time.h>
114
# ifdef HAVE_SYS_TIME_H
115
# include <sys/time.h>
120
# ifdef HAVE_DIRENT_H
122
# define NAMLEN(dirent) strlen((dirent)->d_name)
124
# define dirent direct
125
# define NAMLEN(dirent) (dirent)->d_namlen
126
# ifdef HAVE_SYS_NDIR_H
127
# include <sys/ndir.h>
129
# ifdef HAVE_SYS_DIR_H
130
# include <sys/dir.h>
136
# else /* !AUTOCONF */
137
# include <sys/time.h>
140
# include <sys/ndir.h>
141
# elif defined(OLDBSD)
143
# include <sys/dir.h>
146
# define dirent direct
147
# define NAMLEN(dirent) (dirent)->d_namlen
150
# define NAMLEN(dirent) strlen((dirent)->d_name)
152
# endif /* !AUTOCONF */
162
#ifdef HAVE_NAMESPACES
164
# ifdef NOT_DEFINED // Just to fool emacs c++ mode
170
static const char djvuopts[]="DJVUOPTS";
171
static const char localhost[]="file://localhost/";
172
static const char backslash='\\';
173
static const char colon=':';
174
static const char dot='.';
175
static const char filespecslashes[] = "file://";
176
static const char filespec[] = "file:";
177
static const char slash='/';
178
static const char percent='%';
179
static const char localhostspec1[] = "//localhost/";
180
static const char localhostspec2[] = "///";
181
static const char nillchar=0;
183
static const char tilde='~';
184
static const char root[] = "/";
185
#elif defined(WIN32) || defined(OS2)
186
static const char root[] = "\\";
187
#elif defined(macintosh)
188
static char const * const root = &nillchar;
190
#error "Define something here for your operating system"
195
pathname_start(const GUTF8String &url, const int protolength);
198
// -- Returns the hexvalue of a character.
199
// Returns -1 if illegal;
204
return ((c>='0' && c<='9')
214
is_argument(const char * start)
215
// Returns TRUE if 'start' points to the beginning of an argument
216
// (either hash or CGI)
218
// return (*start=='#' || *start=='?' || *start=='&' || *start==';');
219
return (*start=='#' || *start=='?' );
223
is_argument_sep(const char * start)
224
// Returns TRUE if 'start' points to the beginning of an argument
225
// (either hash or CGI)
227
return (*start=='&')||(*start == ';');
231
GURL::convert_slashes(void)
233
GUTF8String xurl(get_string());
235
const int protocol_length=protocol(xurl).length();
236
for(char *ptr=(xurl.getbuf()+protocol_length);*ptr;ptr++)
237
if(*ptr == backslash)
244
collapse(char * ptr, const int chars)
245
// Will remove the first 'chars' chars from the string and
246
// move the rest toward the beginning. Will take into account
249
const int length=strlen(ptr);
250
const char *srcptr=ptr+((chars>length)?length:chars);
251
while((*(ptr++) = *(srcptr++)))
256
GURL::beautify_path(GUTF8String xurl)
259
const int protocol_length=GURL::protocol(xurl).length();
261
// Eats parts like ./ or ../ or ///
263
GPBuffer<char> gbuffer(buffer,xurl.length()+1);
264
strcpy(buffer, (const char *)xurl);
267
char * start=buffer+pathname_start(xurl,protocol_length);
269
// Find end of the url (don't touch arguments)
272
for(ptr=start;*ptr;ptr++)
274
if (is_argument(ptr))
282
// Eat multiple slashes
283
for(;(ptr=strstr(start, "////"));collapse(ptr, 3))
285
for(;(ptr=strstr(start, "//"));collapse(ptr, 1))
287
// Convert /./ stuff into plain /
288
for(;(ptr=strstr(start, "/./"));collapse(ptr, 2))
290
#if defined(WIN32) || defined(OS2)
291
if(!xurl.cmp(filespec,sizeof(filespec)-1))
294
if(start&&(start[0] == '/')&&
295
!xurl.cmp("file:////",sizeof("file:////")-1))
300
for(ptr=start+offset;(ptr=strchr(ptr, '/'));)
302
if(isalpha((++ptr)[0]))
304
if((ptr[1] == ':')&&(ptr[2]=='/'))
307
GPBuffer<char> gbuffer2(buffer2,strlen(ptr)+1);
309
gbuffer.resize(strlen(ptr)+sizeof(localhost));
310
strcpy(buffer,localhost);
311
strcat(buffer,buffer2);
312
ptr=(start=buffer+sizeof(localhost))+1;
319
while((ptr=strstr(start, "/../")))
321
for(char * ptr1=ptr-1;(ptr1>=start);ptr1--)
325
collapse(ptr1, ptr-ptr1+3);
331
// Remove trailing /.
332
ptr=start+strlen(start)-2;
333
if((ptr>=start)&& (ptr == GUTF8String("/.")))
338
ptr=start+strlen(start)-3;
339
if((ptr >= start) && (ptr == GUTF8String("/..")))
341
for(char * ptr1=ptr-1;(ptr1>=start);ptr1--)
351
// Done. Copy the buffer back into the URL and add arguments.
358
GURL::beautify_path(void)
360
url=beautify_path(get_string());
364
GURL::init(const bool nothrow)
366
GCriticalSectionLock lock(&class_lock);
371
GUTF8String proto=protocol();
372
if (proto.length()<2)
376
G_THROW( ERR_MSG("GURL.no_protocol") "\t"+url);
380
// Below we have to make this complex test to detect URLs really
381
// referring to *local* files. Surprisingly, file://hostname/dir/file
382
// is also valid, but shouldn't be treated thru local FS.
383
if (proto=="file" && url[5]==slash &&
384
(url[6]!=slash || !url.cmp(localhost, sizeof(localhost))))
386
// Separate the arguments
389
const char * const url_ptr=url;
391
for(ptr=url_ptr;*ptr&&!is_argument(ptr);ptr++)
394
url=url.substr(0,(size_t)(ptr-url_ptr));
397
// Do double conversion
398
GUTF8String tmp=UTF8Filename();
403
G_THROW( ERR_MSG("GURL.fail_to_file") );
406
url=GURL::Filename::UTF8(tmp).get_string();
411
G_THROW( ERR_MSG("GURL.fail_to_URL") );
414
// Return the argument back
428
GURL::GURL(const char * url_in)
429
: url(url_in ? url_in : ""), validurl(false)
433
GURL::GURL(const GUTF8String & url_in)
434
: url(url_in), validurl(false)
438
GURL::GURL(const GNativeString & url_in)
439
: url(url_in.getNative2UTF8()), validurl(false)
441
#if defined(WIN32) || defined(OS2)
442
if(is_valid() && is_local_file_url())
444
GURL::Filename::UTF8 xurl(UTF8Filename());
445
url=xurl.get_string(true);
451
GURL::GURL(const GURL & url_in)
454
if(url_in.is_valid())
456
url=url_in.get_string();
465
GURL::operator=(const GURL & url_in)
467
GCriticalSectionLock lock(&class_lock);
468
if(url_in.is_valid())
470
url=url_in.get_string();
481
GURL::protocol(const GUTF8String& url)
483
const char * const url_ptr=url;
484
const char * ptr=url_ptr;
486
c && (isalnum(c) || c == '+' || c == '-' || c == '.');
487
c=*(++ptr)) EMPTY_LOOP;
488
if (ptr[0]==colon && ptr[1]=='/' && ptr[2]=='/')
489
return GUTF8String(url_ptr, ptr-url_ptr);
490
return GUTF8String();
494
GURL::hash_argument(void) const
495
// Returns the HASH argument (anything after '#' and before '?')
497
const GUTF8String xurl(get_string());
502
// Break if CGI argument is found
503
for(const char * start=xurl;*start&&(*start!='?');start++)
513
return decode_reserved(arg);
517
GURL::set_hash_argument(const GUTF8String &arg)
519
const GUTF8String xurl(get_string());
524
for(ptr=xurl;*ptr;ptr++)
526
if (is_argument(ptr))
539
url=new_url+"#"+GURL::encode_reserved(arg)+ptr;
543
GURL::parse_cgi_args(void)
544
// Will read CGI arguments from the URL into
545
// cgi_name_arr and cgi_value_arr
549
GCriticalSectionLock lock1(&class_lock);
550
cgi_name_arr.empty();
551
cgi_value_arr.empty();
553
// Search for the beginning of CGI arguments
554
const char * start=url;
563
// Now loop until we see all of them
566
GUTF8String arg; // Storage for another argument
567
while(*start) // Seek for the end of it
569
if (is_argument_sep(start))
580
// Got argument in 'arg'. Split it into 'name' and 'value'
582
const char * const arg_ptr=arg;
583
for(ptr=arg_ptr;*ptr&&(*ptr != '=');ptr++)
586
GUTF8String name, value;
589
name=GUTF8String(arg_ptr, (int)((ptr++)-arg_ptr));
590
value=GUTF8String(ptr, arg.length()-name.length()-1);
596
int args=cgi_name_arr.size();
597
cgi_name_arr.resize(args);
598
cgi_value_arr.resize(args);
599
cgi_name_arr[args]=decode_reserved(name);
600
cgi_value_arr[args]=decode_reserved(value);
606
GURL::store_cgi_args(void)
607
// Will store CGI arguments from the cgi_name_arr and cgi_value_arr
612
GCriticalSectionLock lock1(&class_lock);
614
const char * const url_ptr=url;
616
for(ptr=url_ptr;*ptr&&(*ptr!='?');ptr++)
619
GUTF8String new_url(url_ptr, ptr-url_ptr);
621
for(int i=0;i<cgi_name_arr.size();i++)
623
GUTF8String name=GURL::encode_reserved(cgi_name_arr[i]);
624
GUTF8String value=GURL::encode_reserved(cgi_value_arr[i]);
625
new_url+=(i?"&":"?")+name;
634
GURL::cgi_arguments(void) const
637
const_cast<GURL *>(this)->init();
638
return cgi_name_arr.size();
642
GURL::djvu_cgi_arguments(void) const
645
const_cast<GURL *>(this)->init();
646
GCriticalSectionLock lock((GCriticalSection *) &class_lock);
649
for(int i=0;i<cgi_name_arr.size();i++)
651
if (cgi_name_arr[i].upcase()==djvuopts)
653
args=cgi_name_arr.size()-(i+1);
661
GURL::cgi_name(int num) const
663
if(!validurl) const_cast<GURL *>(this)->init();
664
GCriticalSectionLock lock((GCriticalSection *) &class_lock);
665
return (num<cgi_name_arr.size())?cgi_name_arr[num]:GUTF8String();
669
GURL::djvu_cgi_name(int num) const
671
if(!validurl) const_cast<GURL *>(this)->init();
672
GCriticalSectionLock lock((GCriticalSection *) &class_lock);
675
for(int i=0;i<cgi_name_arr.size();i++)
676
if (cgi_name_arr[i].upcase()==djvuopts)
678
for(i++;i<cgi_name_arr.size();i++)
690
GURL::cgi_value(int num) const
692
if(!validurl) const_cast<GURL *>(this)->init();
693
GCriticalSectionLock lock((GCriticalSection *) &class_lock);
694
return (num<cgi_value_arr.size())?cgi_value_arr[num]:GUTF8String();
698
GURL::djvu_cgi_value(int num) const
700
if(!validurl) const_cast<GURL *>(this)->init();
701
GCriticalSectionLock lock((GCriticalSection *) &class_lock);
704
for(int i=0;i<cgi_name_arr.size();i++)
706
if (cgi_name_arr[i].upcase()==djvuopts)
708
for(i++;i<cgi_name_arr.size();i++)
712
arg=cgi_value_arr[i];
723
GURL::cgi_names(void) const
725
if(!validurl) const_cast<GURL *>(this)->init();
726
GCriticalSectionLock lock((GCriticalSection *) &class_lock);
731
GURL::cgi_values(void) const
733
if(!validurl) const_cast<GURL *>(this)->init();
734
GCriticalSectionLock lock((GCriticalSection *) &class_lock);
735
return cgi_value_arr;
739
GURL::djvu_cgi_names(void) const
741
if(!validurl) const_cast<GURL *>(this)->init();
742
GCriticalSectionLock lock((GCriticalSection *) &class_lock);
745
DArray<GUTF8String> arr;
746
for(i=0;(i<cgi_name_arr.size())&&
747
(cgi_name_arr[i].upcase()!=djvuopts)
751
int size=cgi_name_arr.size()-(i+1);
755
for(i=0;i<arr.size();i++)
756
arr[i]=cgi_name_arr[cgi_name_arr.size()-arr.size()+i];
763
GURL::djvu_cgi_values(void) const
765
if(!validurl) const_cast<GURL *>(this)->init();
766
GCriticalSectionLock lock((GCriticalSection *) &class_lock);
769
DArray<GUTF8String> arr;
770
for(i=0;i<cgi_name_arr.size()&&(cgi_name_arr[i].upcase()!=djvuopts);i++)
773
int size=cgi_name_arr.size()-(i+1);
777
for(i=0;i<arr.size();i++)
778
arr[i]=cgi_value_arr[cgi_value_arr.size()-arr.size()+i];
785
GURL::clear_all_arguments(void)
787
clear_hash_argument();
788
clear_cgi_arguments();
792
GURL::clear_hash_argument(void)
793
// Clear anything after first '#' and before the following '?'
795
if(!validurl) init();
796
GCriticalSectionLock lock(&class_lock);
799
for(const char * start=url;*start;start++)
801
// Break on first CGI arg.
820
GURL::clear_cgi_arguments(void)
824
GCriticalSectionLock lock1(&class_lock);
827
cgi_name_arr.empty();
828
cgi_value_arr.empty();
830
// And clear everything past the '?' sign in the URL
831
const char * ptrurl = url;
832
for(const char *ptr = ptrurl; *ptr; ptr++)
835
url.setat(ptr-ptrurl, 0);
841
GURL::clear_djvu_cgi_arguments(void)
843
if(!validurl) init();
844
// First - modify the arrays
845
GCriticalSectionLock lock(&class_lock);
846
for(int i=0;i<cgi_name_arr.size();i++)
848
if (cgi_name_arr[i].upcase()==djvuopts)
850
cgi_name_arr.resize(i-1);
851
cgi_value_arr.resize(i-1);
856
// And store them back into the URL
861
GURL::add_djvu_cgi_argument(const GUTF8String &name, const char * value)
865
GCriticalSectionLock lock1(&class_lock);
867
// Check if we already have the "DJVUOPTS" argument
868
bool have_djvuopts=false;
869
for(int i=0;i<cgi_name_arr.size();i++)
871
if (cgi_name_arr[i].upcase()==djvuopts)
878
// If there is no DJVUOPTS, insert it
881
int pos=cgi_name_arr.size();
882
cgi_name_arr.resize(pos);
883
cgi_value_arr.resize(pos);
884
cgi_name_arr[pos]=djvuopts;
887
// Add new argument to the array
888
int pos=cgi_name_arr.size();
889
cgi_name_arr.resize(pos);
890
cgi_value_arr.resize(pos);
891
cgi_name_arr[pos]=name;
892
cgi_value_arr[pos]=value;
894
// And update the URL
899
GURL::is_local_file_url(void) const
901
if(!validurl) const_cast<GURL *>(this)->init();
902
GCriticalSectionLock lock((GCriticalSection *) &class_lock);
903
return (protocol()=="file" && url[5]==slash);
907
pathname_start(const GUTF8String &url, const int protolength)
909
const int length=url.length();
911
if(protolength+1<length)
913
retval=url.search(slash,((url[protolength+1] == '/')
914
?((url[protolength+2] == '/')?(protolength+3):(protolength+2))
917
return (retval>0)?retval:length;
921
GURL::pathname(void) const
923
return (is_local_file_url())
924
?GURL::encode_reserved(UTF8Filename())
925
:url.substr(pathname_start(url,protocol().length()),(unsigned int)(-1));
929
GURL::base(void) const
931
const GUTF8String xurl(get_string());
932
const int protocol_length=protocol(xurl).length();
933
const char * const url_ptr=xurl;
934
const char * ptr, * xslash;
935
ptr=xslash=url_ptr+protocol_length+1;
941
for(ptr=xslash;ptr[0] && !is_argument(ptr);ptr++)
943
if ((ptr[0]==slash)&&ptr[1]&&!is_argument(ptr+1))
951
return GURL::UTF8(GUTF8String(xurl,(int)(xslash-url_ptr))+"/"+ptr);
955
GURL::operator==(const GURL & gurl2) const
957
const GUTF8String g1(get_string());
958
const GUTF8String g2(gurl2.get_string());
959
const char *s1 = (const char*)g1;
960
const char *s2 = (const char*)g2;
963
while (s1[n1] && !is_argument(s1+n1))
965
while (s2[n2] && !is_argument(s2+n2))
968
return !strcmp(s1+n1,s2+n2) && !strncmp(s1,s2,n1);
969
if (n1 == n2+1 && s1[n2]=='/')
970
return !strcmp(s1+n1,s2+n2) && !strncmp(s1,s2,n2);
971
if (n2 == n1+1 && s2[n1]=='/')
972
return !strcmp(s1+n1,s2+n2) && !strncmp(s1,s2,n1);
977
GURL::name(void) const
980
const_cast<GURL *>(this)->init();
984
const GUTF8String xurl(url);
985
const int protocol_length=protocol(xurl).length();
986
const char * ptr, * xslash=(const char *)xurl+protocol_length-1;
987
for(ptr=(const char *)xurl+protocol_length;
988
*ptr && !is_argument(ptr);ptr++)
993
retval=GUTF8String(xslash+1, ptr-xslash-1);
999
GURL::fname(void) const
1002
const_cast<GURL *>(this)->init();
1003
return decode_reserved(name());
1007
GURL::extension(void) const
1010
const_cast<GURL *>(this)->init();
1011
GUTF8String xfilename=name();
1014
for(int i=xfilename.length()-1;i>=0;i--)
1016
if (xfilename[i]=='.')
1018
retval=(const char*)xfilename+i+1;
1026
GURL::decode_reserved(const GUTF8String &gurl)
1028
const char *url=gurl;
1030
GPBuffer<char> gres(res,gurl.length()+1);
1032
for(const char * ptr=url;*ptr;++ptr,++r)
1040
if ( ((c1=hexval(ptr[1]))>=0)
1041
&& ((c2=hexval(ptr[2]))>=0) )
1052
GUTF8String retval(res);
1053
if(!retval.is_valid())
1055
retval=GNativeString(res);
1061
GURL::encode_reserved(const GUTF8String &gs)
1063
const char *s=(const char *)gs;
1064
// Potentially unsafe characters (cf. RFC1738 and RFC1808)
1065
static const char hex[] = "0123456789ABCDEF";
1067
unsigned char *retval;
1068
GPBuffer<unsigned char> gd(retval,strlen(s)*3+1);
1069
unsigned char *d=retval;
1072
// Convert directory separator to slashes
1073
#if defined(WIN32) || defined(OS2)
1074
if (*s == backslash || *s== slash)
1082
#error "Define something here for your operating system"
1090
unsigned char const ss=(unsigned char const)(*s);
1091
// WARNING: Whenever you modify this conversion code,
1092
// make sure, that the following functions are in sync:
1093
// encode_reserved()
1094
// decode_reserved()
1095
// url_to_filename()
1096
// filename_to_url()
1097
// unreserved characters
1098
if ( (ss>='a' && ss<='z') ||
1099
(ss>='A' && ss<='Z') ||
1100
(ss>='0' && ss<='9') ||
1101
(strchr("$-_.+!*'(),~:=", ss)) )
1108
d[1] = hex[ (ss >> 4) & 0xf ];
1109
d[2] = hex[ (ss) & 0xf ];
1116
// -------------------------------------------
1117
// Functions for converting filenames and urls
1118
// -------------------------------------------
1121
url_from_UTF8filename(const GUTF8String &gfilename)
1123
if(GURL::UTF8(gfilename).is_valid())
1125
DEBUG_MSG("Debug: URL as Filename: " << gfilename << "\n");
1127
const char *filename=gfilename;
1128
if(filename && (unsigned char)filename[0] == (unsigned char)0xEF
1129
&& (unsigned char)filename[1] == (unsigned char)0xBB
1130
&& (unsigned char)filename[2] == (unsigned char)0xBF)
1135
// Special case for blank pages
1136
if(!filename || !filename[0])
1138
return GUTF8String();
1141
// Normalize file name to url slash-and-escape syntax
1142
GUTF8String oname=GURL::expand_name(filename);
1143
GUTF8String nname=GURL::encode_reserved(oname);
1145
// Preprend "file://" to file name. If file is on the local
1146
// machine, include "localhost".
1147
GUTF8String url=filespecslashes;
1148
const char *cnname=nname;
1149
if (cnname[0] == slash)
1151
if (cnname[1] == slash)
1156
url = localhost + nname;
1160
url += (localhostspec1+2) + nname;
1166
GURL::get_string(const bool nothrow) const
1169
const_cast<GURL *>(this)->init(nothrow);
1173
// -- Returns a url for accessing a given file.
1174
// If useragent is not provided, standard url will be created,
1175
// but will not be understood by some versions if IE.
1177
GURL::get_string(const GUTF8String &useragent) const
1180
const_cast<GURL *>(this)->init();
1181
GUTF8String retval(url);
1182
if(is_local_file_url()&&useragent.length())
1184
if(useragent.search("MSIE") >= 0 || useragent.search("Microsoft")>=0)
1186
retval=filespecslashes + expand_name(UTF8Filename());
1192
GURL::UTF8::UTF8(const GUTF8String &xurl)
1195
GURL::UTF8::UTF8(const GUTF8String &xurl,const GURL &codebase)
1196
: GURL(xurl,codebase) {}
1198
GURL::GURL(const GUTF8String &xurl,const GURL &codebase)
1201
if(GURL::UTF8(xurl).is_valid())
1208
const char *buffer = codebase;
1209
GUTF8String all(buffer);
1213
const int protocol_length=GURL::protocol(all).length();
1214
const char *start = buffer + pathname_start(all,protocol_length);
1216
prefix = GUTF8String(buffer, start-buffer);
1217
const char *ptr = start;
1218
while (*ptr && !is_argument(ptr))
1221
suffix = GUTF8String(ptr);
1223
path = GUTF8String(start, ptr-start);
1224
// append xurl to path
1225
const char *c = xurl;
1227
path = GURL::encode_reserved(xurl);
1229
path = path + GUTF8String(slash)+GURL::encode_reserved(xurl);
1231
url = beautify_path(prefix + path + suffix);
1235
GURL::Native::Native(const GNativeString &xurl)
1238
GURL::Native::Native(const GNativeString &xurl,const GURL &codebase)
1239
: GURL(xurl,codebase) {}
1241
GURL::GURL(const GNativeString &xurl,const GURL &codebase)
1244
GURL retval(xurl.getNative2UTF8(),codebase);
1245
if(retval.is_valid())
1248
// Hack for IE to change \\ to /
1249
if(retval.is_local_file_url())
1251
GURL::Filename::UTF8 retval2(retval.UTF8Filename());
1252
url=retval2.get_string(true);
1257
url=retval.get_string(true);
1263
GURL::Filename::Filename(const GNativeString &gfilename)
1265
url=url_from_UTF8filename(gfilename.getNative2UTF8());
1268
GURL::Filename::Native::Native(const GNativeString &gfilename)
1269
: GURL::Filename(gfilename) {}
1271
GURL::Filename::Filename(const GUTF8String &gfilename)
1273
url=url_from_UTF8filename(gfilename);
1276
GURL::Filename::UTF8::UTF8(const GUTF8String &gfilename)
1277
: GURL::Filename(gfilename) {}
1280
// -- Applies heuristic rules to convert a url into a valid file name.
1281
// Returns a simple basename in case of failure.
1283
GURL::UTF8Filename(void) const
1288
const char *url_ptr=url;
1290
// WARNING: Whenever you modify this conversion code,
1291
// make sure, that the following functions are in sync:
1292
// encode_reserved()
1293
// decode_reserved()
1294
// url_to_filename()
1295
// filename_to_url()
1297
GUTF8String urlcopy=decode_reserved(url);
1300
// All file urls are expected to start with filespec which is "file:"
1301
if (GStringRep::cmp(filespec, url_ptr, sizeof(filespec)-1)) //if not
1302
return GOS::basename(url_ptr);
1303
url_ptr += sizeof(filespec)-1;
1305
#if defined(macintosh)
1306
//remove all leading slashes
1307
for(;*url_ptr==slash;url_ptr++)
1309
// Remove possible localhost spec
1310
if ( !GStringRep::cmp(localhost, url_ptr, sizeof(localhost)-1) )
1311
url_ptr += sizeof(localhost)-1;
1312
//remove all leading slashes
1313
while(*url_ptr==slash)
1316
// Remove possible localhost spec
1317
if ( !GStringRep::cmp(localhostspec1, url_ptr, sizeof(localhostspec1)-1) )
1318
// RFC 1738 local host form
1319
url_ptr += sizeof(localhostspec1)-1;
1320
else if ( !GStringRep::cmp(localhostspec2, url_ptr, sizeof(localhostspec2)-1 ) )
1321
// RFC 1738 local host form
1322
url_ptr += sizeof(localhostspec2)-1;
1323
else if ( (strlen(url_ptr) > 4) // "file://<letter>:/<path>"
1324
&& (url_ptr[0] == slash) // "file://<letter>|/<path>"
1325
&& (url_ptr[1] == slash)
1326
&& isalpha(url_ptr[2])
1327
&& ( url_ptr[3] == colon || url_ptr[3] == '|' )
1328
&& (url_ptr[4] == slash) )
1330
else if ( (strlen(url_ptr)) > 2 // "file:/<path>"
1331
&& (url_ptr[0] == slash)
1332
&& (url_ptr[1] != slash) )
1336
// Check if we are finished
1337
#if defined(macintosh)
1340
GPBuffer<char> gl_url(l_url,strlen(url_ptr)+1);
1343
for ( s=url_ptr,r=l_url; *s; s++,r++)
1345
*r=(*s == slash)?colon:*s;
1348
retval = expand_name(l_url,root);
1351
retval = expand_name(url_ptr,root);
1354
#if defined(WIN32) || defined(OS2)
1355
if (url_ptr[0] && url_ptr[1]=='|' && url_ptr[2]== slash)
1357
if ((url_ptr[0]>='a' && url_ptr[0]<='z')
1358
|| (url_ptr[0]>='A' && url_ptr[0]<='Z'))
1361
drive.format("%c%c%c", url_ptr[0],colon,backslash);
1362
retval = expand_name(url_ptr+3, drive);
1367
// Return what we have
1372
GURL::NativeFilename(void) const
1374
return UTF8Filename().getUTF82Native();
1377
#if defined(UNIX) || defined(macintosh) || defined(OS2)
1379
urlstat(const GURL &url,struct stat &buf)
1381
return ::stat(url.NativeFilename(),&buf);
1386
// -- returns true if filename denotes a regular file.
1388
GURL::is_file(void) const
1391
if(is_local_file_url())
1393
#if defined(UNIX) || defined(macintosh) || defined(OS2)
1395
if (!urlstat(*this,buf))
1397
retval=!(buf.st_mode & S_IFDIR);
1399
#elif defined(WIN32)
1400
GUTF8String filename(UTF8Filename());
1401
if(filename.length() >= MAX_PATH)
1403
if(!filename.cmp("\\\\",2))
1404
filename="\\\\?\\UNC"+filename.substr(1,-1);
1406
filename="\\\\?\\"+filename;
1409
const size_t wfilename_size=filename.length()+1;
1410
GPBuffer<wchar_t> gwfilename(wfilename,wfilename_size);
1411
filename.ncopy(wfilename,wfilename_size);
1413
dwAttrib = GetFileAttributesW(wfilename);
1414
if((dwAttrib|1) == 0xFFFFFFFF)
1415
dwAttrib = GetFileAttributesA(NativeFilename());
1416
retval=!( dwAttrib & FILE_ATTRIBUTE_DIRECTORY );
1418
# error "Define something here for your operating system"
1425
GURL::is_local_path(void) const
1428
if(is_local_file_url())
1430
#if defined(UNIX) || defined(macintosh) || defined(OS2)
1432
retval=!urlstat(*this,buf);
1434
GUTF8String filename(UTF8Filename());
1435
if(filename.length() >= MAX_PATH)
1437
if(!filename.cmp("\\\\",2))
1438
filename="\\\\?\\UNC"+filename.substr(1,-1);
1440
filename="\\\\?\\"+filename;
1443
const size_t wfilename_size=filename.length()+1;
1444
GPBuffer<wchar_t> gwfilename(wfilename,wfilename_size);
1445
filename.ncopy(wfilename,wfilename_size);
1447
dwAttrib = GetFileAttributesW(wfilename);
1448
if((dwAttrib|1) == 0xFFFFFFFF)
1449
dwAttrib = GetFileAttributesA(NativeFilename());
1450
retval=( (dwAttrib|1) != 0xFFFFFFFF);
1457
// -- returns true if url denotes a directory.
1459
GURL::is_dir(void) const
1462
if(is_local_file_url())
1464
// UNIX implementation
1465
#if defined(UNIX) || defined(macintosh) || defined(OS2)
1467
if (!urlstat(*this,buf))
1469
retval=(buf.st_mode & S_IFDIR);
1471
#elif defined(WIN32) // (either Windows or WCE)
1472
GUTF8String filename(UTF8Filename());
1473
if(filename.length() >= MAX_PATH)
1475
if(!filename.cmp("\\\\",2))
1476
filename="\\\\?\\UNC"+filename.substr(1,-1);
1478
filename="\\\\?\\"+filename;
1481
const size_t wfilename_size=filename.length()+1;
1482
GPBuffer<wchar_t> gwfilename(wfilename,wfilename_size);
1483
filename.ncopy(wfilename,wfilename_size);
1485
dwAttrib = GetFileAttributesW(wfilename);
1486
if((dwAttrib|1) == 0xFFFFFFFF)
1487
dwAttrib = GetFileAttributesA(NativeFilename());
1488
retval=((dwAttrib != 0xFFFFFFFF)&&( dwAttrib & FILE_ATTRIBUTE_DIRECTORY ));
1490
# error "Define something here for your operating system"
1496
// Follows symbolic links.
1498
GURL::follow_symlinks(void) const
1501
#if defined(S_IFLNK)
1502
#if defined(UNIX) || defined(macintosh)
1504
char lnkbuf[MAXPATHLEN+1];
1506
while ( (urlstat(ret, buf) >= 0) &&
1507
(buf.st_mode & S_IFLNK) &&
1508
((lnklen = readlink(ret.NativeFilename(),lnkbuf,sizeof(lnkbuf))) > 0) )
1511
GNativeString lnk(lnkbuf);
1512
ret = GURL(lnk, ret.base());
1522
if(! is_local_file_url())
1525
const GURL baseURL=base();
1526
if (baseURL.get_string() != url && !baseURL.is_dir())
1527
retval = baseURL.mkdir();
1534
retval = ::mkdir(NativeFilename(), 0755);
1535
#elif defined(WIN32)
1539
retval = CreateDirectoryA(NativeFilename(), NULL);
1541
# error "Define something here for your operating system"
1548
// -- deletes a file or directory
1551
GURL::deletefile(void) const
1554
if(is_local_file_url())
1558
retval = ::rmdir(NativeFilename());
1560
retval = ::unlink(NativeFilename());
1561
#elif defined(WIN32)
1563
retval = ::RemoveDirectoryA(NativeFilename());
1565
retval = ::DeleteFile(NativeFilename());
1567
# error "Define something here for your operating system"
1574
GURL::listdir(void) const
1579
#if defined(UNIX) || defined(OS2)
1580
DIR * dir=opendir(NativeFilename());//MBCS cvt
1581
for(dirent *de=readdir(dir);de;de=readdir(dir))
1583
const int len = NAMLEN(de);
1584
if (de->d_name[0]== dot && len==1)
1586
if (de->d_name[0]== dot && de->d_name[1]== dot && len==2)
1588
retval.append(GURL::Native(de->d_name,*this));
1591
#elif defined (WIN32)
1592
GURL::UTF8 wildcard("*.*",*this);
1593
WIN32_FIND_DATA finddata;
1594
HANDLE handle = FindFirstFile(wildcard.NativeFilename(), &finddata);//MBCS cvt
1595
const GUTF8String gpathname=pathname();
1596
const GUTF8String gbase=base().pathname();
1597
if( handle != INVALID_HANDLE_VALUE)
1601
GURL::UTF8 Entry(finddata.cFileName,*this);
1602
const GUTF8String gentry=Entry.pathname();
1603
if((gentry != gpathname) && (gentry != gbase))
1604
retval.append(Entry);
1605
} while( FindNextFile(handle, &finddata) );
1610
# error "Define something here for your operating system"
1617
GURL::cleardir(const int timeout) const
1622
GList<GURL> dirlist=listdir();
1624
for(GPosition pos=dirlist;pos&&!retval;++pos)
1626
const GURL &Entry=dirlist[pos];
1629
if((retval=Entry.cleardir(timeout)) < 0)
1634
if(((retval=Entry.deletefile())<0) && (timeout>0))
1636
GOS::sleep(timeout);
1637
retval=Entry.deletefile();
1645
GURL::renameto(const GURL &newurl) const
1647
if (is_local_file_url() && newurl.is_local_file_url())
1648
return rename(NativeFilename(),newurl.NativeFilename());
1652
// expand_name(filename[, fromdirname])
1653
// -- returns the full path name of filename interpreted
1654
// relative to fromdirname. Use current working dir when
1655
// fromdirname is null.
1657
GURL::expand_name(const GUTF8String &xfname, const char *from)
1659
const char *fname=xfname;
1661
const size_t maxlen=xfname.length()*9+MAXPATHLEN+10;
1662
char * const string_buffer = retval.getbuf(maxlen);
1663
// UNIX implementation
1665
// Perform tilde expansion
1667
if (fname && fname[0]==tilde)
1670
for(n=1;fname[n] && fname[n]!= slash;n++)
1672
struct passwd *pw=0;
1675
GUTF8String user(fname+1, n-1);
1677
}else if ((senv=GOS::getenv("HOME")).length())
1679
from=(const char *)senv;
1681
}else if ((senv=GOS::getenv("LOGNAME")).length())
1683
pw = getpwnam((const char *)senv.getUTF82Native());
1686
pw=getpwuid(getuid());
1690
senv=GNativeString(pw->pw_dir).getNative2UTF8();
1691
from = (const char *)senv;
1694
for(;fname[0] == slash; fname++)
1697
// Process absolute vs. relative path
1698
if (fname && fname[0]== slash)
1700
string_buffer[0]=slash;
1704
strcpy(string_buffer, expand_name(from));
1707
strcpy(string_buffer, GOS::cwd());
1709
char *s = string_buffer + strlen(string_buffer);
1712
for(;fname[0]== slash;fname++)
1714
// Process path components
1717
if (fname[0] == dot )
1719
if (!fname[1] || fname[1]== slash)
1723
}else if (fname[1]== dot && (fname[2]== slash || !fname[2]))
1726
for(;s>string_buffer+1 && *(s-1)== slash; s--)
1728
for(;s>string_buffer+1 && *(s-1)!= slash; s--)
1733
if ((s==string_buffer)||(*(s-1)!= slash))
1738
while (*fname &&(*fname!= slash))
1741
if ((size_t)((++s)-string_buffer) > maxlen)
1743
G_THROW( ERR_MSG("GURL.big_name") );
1747
for(;fname[0]== slash;fname++)
1751
if (!fname || !fname[0])
1753
for(;s>string_buffer+1 && *(s-1) == slash; s--)
1757
#elif defined (WIN32) // WIN32 implementation
1759
strcpy(string_buffer, (char const *)(from ? expand_name(from) : GOS::cwd()));
1760
// GNativeString native;
1763
char *s = string_buffer;
1765
// Handle absolute part of fname
1766
// Put absolute part of the file name in string_buffer, and
1767
// the relative part pointed to by fname.
1768
if (fname[0]== slash || fname[0]== backslash)
1770
if (fname[1]== slash || fname[1]== backslash)
1772
s[0]=s[1]= backslash; s[2]=0;
1775
{ // Case "/abcd" or "/"
1776
// File is at the root of the current drive. Delete the
1777
// slash at the beginning of the filename and leave
1778
// an explicit identification of the root of the drive in
1784
else if (fname[0] && fname[1]==colon)
1786
if (fname[2]!= slash && fname[2]!= backslash)
1788
if ( toupper((unsigned char)s[0]) != toupper((unsigned char)fname[0])
1795
GetFullPathName(drv, maxlen, string_buffer, &s);
1796
strcpy(string_buffer,(const char *)GUTF8String(string_buffer).getNative2UTF8());
1801
else if (fname[3]!= slash && fname[3]!= backslash)
1803
s[0]=toupper((unsigned char)fname[0]);
1810
{ // Case "x://abcd"
1811
s[0]=s[1]=backslash;
1816
// Process path components
1819
for(;*fname== slash || *fname==backslash;fname++)
1821
if (fname[0]== dot )
1823
if (fname[1]== slash || fname[1]==backslash || !fname[1])
1828
else if ((fname[1] == dot)
1829
&& (fname[2]== slash || fname[2]==backslash || !fname[2]))
1832
char *back=_tcsrchr(string_buffer,backslash);
1833
char *forward=_tcsrchr(string_buffer,slash);
1845
char* s2=s;//MBCS DBCS
1848
if (s > string_buffer && s[-1] != slash && s[-1] != backslash)
1850
while (*fname && (*fname!= slash) && (*fname!=backslash))
1852
if (s > string_buffer + maxlen)
1853
G_THROW( ERR_MSG("GURL.big_name") );
1860
# error "Define something here for your operating system"
1866
hash(const GURL & gurl)
1868
unsigned int retval;
1869
const GUTF8String s(gurl.get_string());
1870
const int len=s.length();
1871
if(len && (s[len-1] == '/')) // Don't include the trailing slash as part of the hash.
1873
retval=hash(s.substr(0,len-1));
1882
#ifdef HAVE_NAMESPACES
1884
# ifndef NOT_USING_DJVU_NAMESPACE
1885
using namespace DJVU;