1
%% ``The contents of this file are subject to the Erlang Public License,
2
%% Version 1.1, (the "License"); you may not use this file except in
3
%% compliance with the License. You should have received a copy of the
4
%% Erlang Public License along with this software. If not, it can be
5
%% retrieved via the world wide web at http://www.erlang.org/.
7
%% Software distributed under the License is distributed on an "AS IS"
8
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
9
%% the License for the specific language governing rights and limitations
12
%% The Initial Developer of the Original Code is Mobile Arts AB
13
%% Portions created by Mobile Arts are Copyright 2002, Mobile Arts AB
14
%% All Rights Reserved.''
17
%% Author : Johan Blom <johblo@localhost.localdomain>
19
%% Implements various scheme dependent subsets (e.g. HTTP, FTP etc) based on
20
%% RFC 2396, Uniform Resource Identifiers (URI): Generic Syntax
21
%% Created : 27 Jul 2001 by Johan Blom <johblo@localhost.localdomain>
26
-author('johan.blom@mobilearts.se').
28
-export([parse/1,resolve/2]).
31
%%% Parse URI and return {Scheme,Path}
32
%%% Note that Scheme specific parsing/validation is not handled here!
36
%%% See "http://www.isi.edu/in-notes/iana/assignments/url-schemes" for a list of
37
%%% defined URL schemes and references to its sources.
40
case parse_scheme(URI) of
41
{http,Cont} -> parse_http(Cont,http);
42
{https,Cont} -> parse_http(Cont,https);
43
{ftp,Cont} -> parse_ftp(Cont,ftp);
44
{sip,Cont} -> parse_sip(Cont,sip);
45
{sms,Cont} -> parse_sms(Cont,sip);
46
{error,Error} -> {error,Error};
47
{Scheme,Cont} -> {Scheme,Cont}
55
parse_scheme([H|URI],Acc) when $a=<H,H=<$z; $A=<H,H=<$Z ->
56
parse_scheme2(URI,[H|Acc]);
60
parse_scheme2([H|URI],Acc)
61
when $a=<H,H=<$z; $A=<H,H=<$Z; $0=<H,H=<$9; H==$-;H==$+;H==$. ->
62
parse_scheme2(URI,[H|Acc]);
63
parse_scheme2([$:|URI],Acc) ->
64
{list_to_atom(lists:reverse(Acc)),URI};
69
%%% ............................................................................
70
-define(HTTP_DEFAULT_PORT, 80).
71
-define(HTTPS_DEFAULT_PORT, 443).
73
%%% HTTP (Source RFC 2396, RFC 2616)
74
%%% http_URL = "*" | absoluteURI | abs_path [ "?" query ] | authority
76
%%% http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]]
77
%%% Returns a tuple {http,Host,Port,PathQuery} where
78
%%% Host = string() Host value
79
%%% Port = string() Port value
80
%%% PathQuery= string() Combined absolute path and query value
81
parse_http("//"++C0,Scheme) ->
82
case scan_hostport(C0,Scheme) of
84
case scan_pathquery(C1) of
88
{Scheme,Host,Port,PathQuery}
97
case scan_abspath(C0) of
100
{[],[]} -> % Add implicit path
103
case scan_query(C1,[]) of
114
%%% ............................................................................
115
%%% FIXME!!! This is just a quick hack that doesn't work!
116
-define(FTP_DEFAULT_PORT, 80).
118
%%% FTP (Source RFC 2396, RFC 1738, RFC 959)
119
%%% Note: This BNF has been modified to better fit with RFC 2396
120
%%% ftp_URL = "ftp:" "//" [ ftp_userinfo ] host [ ":" port ] ftp_abs_path
121
%%% ftp_userinfo = ftp_user [ ":" ftp_password ]
122
%%% ftp_abs_path = "/" ftp_path_segments [ ";type=" ftp_type ]
123
%%% ftp_path_segments = ftp_segment *( "/" ftp_segment)
124
%%% ftp_segment = *[ ftp_uchar | "?" | ":" | "@" | "&" | "=" ]
125
%%% ftp_type = "A" | "I" | "D" | "a" | "i" | "d"
126
%%% ftp_user = *[ ftp_uchar | ";" | "?" | "&" | "=" ]
127
%%% ftp_password = *[ ftp_uchar | ";" | "?" | "&" | "=" ]
128
%%% ftp_uchar = ftp_unreserved | escaped
129
%%% ftp_unreserved = alphanum | mark | "$" | "+" | ","
130
parse_ftp("//"++C0,Scheme) ->
131
case ftp_userinfo(C0) of
133
case scan_hostport(C1,Scheme) of
135
case scan_abspath(C2) of
138
{[],[]} -> % Add implicit path
139
{Scheme,Creds,Host,Port,"/"};
141
{Scheme,Creds,Host,Port,Path}
153
{C0,{User,Password}}.
156
%%% ............................................................................
157
%%% SIP (Source RFC 2396, RFC 2543)
158
%%% sip_URL = "sip:" [ sip_userinfo "@" ] host [ ":" port ]
159
%%% sip_url-parameters [ sip_headers ]
160
%%% sip_userinfo = sip_user [ ":" sip_password ]
161
%%% sip_user = *( unreserved | escaped | "&" | "=" | "+" | "$" | "," )
162
%%% sip_password = *( unreserved | escaped | "&" | "=" | "+" | "$" | "," )
163
%%% sip_url-parameters = *( ";" sip_url-parameter )
164
%%% sip_url-parameter = sip_transport-param | sip_user-param |
165
%%% sip_method-param | sip_ttl-param |
166
%%% sip_maddr-param | sip_other-param
167
%%% sip_transport-param = "transport=" ( "udp" | "tcp" )
168
%%% sip_ttl-param = "ttl=" sip_ttl
169
%%% sip_ttl = 1*3DIGIT ; 0 to 255
170
%%% sip_maddr-param = "maddr=" host
171
%%% sip_user-param = "user=" ( "phone" | "ip" )
172
%%% sip_method-param = "method=" sip_Method
173
%%% sip_tag-param = "tag=" sip_UUID
174
%%% sip_UUID = 1*( hex | "-" )
175
%%% sip_other-param = ( token | ( token "=" ( token | quoted-string )))
176
%%% sip_Method = "INVITE" | "ACK" | "OPTIONS" | "BYE" |
177
%%% "CANCEL" | "REGISTER"
178
%%% sip_token = 1*< any CHAR except CTL's or separators>
179
%%% sip_quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
180
%%% sip_qdtext = <any TEXT-UTF8 except <">>
181
%%% sip_quoted-pair = " \ " CHAR
182
parse_sip(Cont,Scheme) ->
188
%%% ............................................................................
189
%%% SMS (Source draft-wilde-sms-uri-01, January 24 2002 and
190
%%% draft-allocchio-gstn-01, November 2001)
191
%%% The syntax definition for "gstn-phone" is taken from
192
%%% [draft-allocchio-gstn-01], allowing global as well as local telephone
194
%%% Note: This BNF has been modified to better fit with RFC 2396
195
%%% sms_URI = sms ":" 1*( sms-recipient ) [ sms-body ]
196
%%% sms-recipient = gstn-phone sms-qualifier
197
%%% [ "," sms-recipient ]
198
%%% sms-qualifier = *( smsc-qualifier / pid-qualifier )
199
%%% smsc-qualifier = ";smsc=" SMSC-sub-addr
200
%%% pid-qualifier = ";pid=" PID-sub-addr
201
%%% sms-body = ";body=" *urlc
202
%%% gstn-phone = ( global-phone / local-phone )
203
%%% global-phone = "+" 1*( DIGIT / written-sep )
204
%%% local-phone = [ exit-code ] dial-number / exit-code [ dial-number ]
205
%%% exit-code = phone-string
206
%%% dial-number = phone-string
207
%%% subaddr-string = phone-string
208
%%% post-dial = phone-string
209
%%% phone-string = 1*( DTMF / pause / tonewait / written-sep )
210
%%% DTMF = ( DIGIT / "#" / "*" / "A" / "B" / "C" / "D" )
211
%%% written-sep = ( "-" / "." )
214
parse_sms(Cont,Scheme) ->
218
%%% ============================================================================
219
%%% Generic URI parsing. BNF rules from RFC 2396
221
%%% hostport = host [ ":" port ]
222
scan_hostport(C0,Scheme) ->
223
case scan_host(C0) of
227
{C2,Port}=scan_port(C1,[]),
228
{C2,Host,list_to_integer(Port)};
229
{C1,Host} when Scheme==http ->
230
{C1,Host,?HTTP_DEFAULT_PORT};
231
{C1,Host} when Scheme==https ->
232
{C1,Host,?HTTPS_DEFAULT_PORT};
233
{C1,Host} when Scheme==ftp ->
234
{C1,Host,?FTP_DEFAULT_PORT}
238
%%% host = hostname | IPv4address
239
%%% hostname = *( domainlabel "." ) toplabel [ "." ]
240
%%% domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
241
%%% toplabel = alpha | alpha *( alphanum | "-" ) alphanum
242
%%% IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
248
case scan_host2(C0,[],0,[],[]) of
249
{C1,IPv4address,[?DIGIT,?DIGIT,?DIGIT,?DIGIT]} ->
250
{C1,lists:reverse(lists:append(IPv4address))};
251
{C1,Hostname,[?ALPHA|HostF]} ->
252
{C1,lists:reverse(lists:append(Hostname))};
257
scan_host2([H|C0],Acc,CurF,Host,HostF) when $0=<H,H=<$9 ->
258
scan_host2(C0,[H|Acc],CurF bor ?DIGIT,Host,HostF);
259
scan_host2([H|C0],Acc,CurF,Host,HostF) when $a=<H,H=<$z; $A=<H,H=<$Z ->
260
scan_host2(C0,[H|Acc],CurF bor ?ALPHA,Host,HostF);
261
scan_host2([$-|C0],Acc,CurF,Host,HostF) when CurF=/=0 ->
262
scan_host2(C0,[$-|Acc],CurF,Host,HostF);
263
scan_host2([$.|C0],Acc,CurF,Host,HostF) when CurF=/=0 ->
264
scan_host2(C0,[],0,[".",Acc|Host],[CurF|HostF]);
265
scan_host2(C0,Acc,CurF,Host,HostF) ->
266
{C0,[Acc|Host],[CurF|HostF]}.
270
scan_port([H|C0],Acc) when $0=<H,H=<$9 ->
271
scan_port(C0,[H|Acc]);
273
{C0,lists:reverse(Acc)}.
275
%%% abs_path = "/" path_segments
278
scan_abspath("/"++C0) ->
279
scan_pathsegments(C0,["/"]);
283
%%% path_segments = segment *( "/" segment )
284
scan_pathsegments(C0,Acc) ->
285
case scan_segment(C0,[]) of
287
scan_pathsegments(C1,["/",Segment|Acc]);
289
{C1,lists:reverse(lists:append([Segment|Acc]))}
293
%%% segment = *pchar *( ";" param )
295
scan_segment(";"++C0,Acc) ->
296
{C1,ParamAcc}=scan_pchars(C0,";"++Acc),
297
scan_segment(C1,ParamAcc);
298
scan_segment(C0,Acc) ->
299
case scan_pchars(C0,Acc) of
301
{C2,ParamAcc}=scan_pchars(C1,";"++Segment),
302
scan_segment(C2,ParamAcc);
308
%%% uric = reserved | unreserved | escaped
309
%%% reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
311
%%% unreserved = alphanum | mark
312
%%% mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
314
%%% escaped = "%" hex hex
315
scan_query([],Acc) ->
317
scan_query([$%,H1,H2|C0],Acc) -> % escaped
318
scan_query(C0,[hex2dec(H1)*16+hex2dec(H2)|Acc]);
319
scan_query([H|C0],Acc) when $a=<H,H=<$z;$A=<H,H=<$Z;$0=<H,H=<$9 -> % alphanum
320
scan_query(C0,[H|Acc]);
321
scan_query([H|C0],Acc) when H==$;; H==$/; H==$?; H==$:; H==$@;
322
H==$&; H==$=; H==$+; H==$$; H==$, -> % reserved
323
scan_query(C0,[H|Acc]);
324
scan_query([H|C0],Acc) when H==$-; H==$_; H==$.; H==$!; H==$~;
325
H==$*; H==$'; H==$(; H==$) -> % mark
326
scan_query(C0,[H|Acc]);
327
scan_query([H|C0],Acc) ->
331
%%% pchar = unreserved | escaped |
332
%%% ":" | "@" | "&" | "=" | "+" | "$" | ","
333
scan_pchars([],Acc) ->
335
scan_pchars([$%,H1,H2|C0],Acc) -> % escaped
336
scan_pchars(C0,[hex2dec(H1)*16+hex2dec(H2)|Acc]);
337
scan_pchars([H|C0],Acc) when $a=<H,H=<$z;$A=<H,H=<$Z;$0=<H,H=<$9 -> % alphanum
338
scan_pchars(C0,[H|Acc]);
339
scan_pchars([H|C0],Acc) when H==$-; H==$_; H==$.; H==$!; H==$~;
340
H==$*; H==$'; H==$(; H==$) -> % mark
341
scan_pchars(C0,[H|Acc]);
342
scan_pchars([H|C0],Acc) when H==$:; H==$@; H==$&; H==$=; H==$+; H==$$; H==$, ->
343
scan_pchars(C0,[H|Acc]);
344
scan_pchars(C0,Acc) ->
347
hex2dec(X) when X>=$0,X=<$9 -> X-$0;
348
hex2dec(X) when X>=$A,X=<$F -> X-$A+10;
349
hex2dec(X) when X>=$a,X=<$f -> X-$a+10.