3
# Client-side HTTP for GET, POST, and HEAD commands.
4
# These routines can be used in untrusted code that uses
5
# the Safesock security policy. These procedures use a
6
# callback interface to avoid using vwait, which is not
7
# defined in the safe base.
9
# See the file "license.terms" for information on usage and
10
# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
12
# RCS: @(#) $Id: http.tcl,v 1.1 2005/01/09 19:18:18 germinator2000 Exp $
14
# Rough version history:
15
# 1.0 Old http_get interface
16
# 2.0 http:: namespace and http::geturl
17
# 2.1 Added callbacks to handle arriving data, and timeouts
18
# 2.2 Added ability to fetch into a channel
19
# 2.3 Added SSL support, and ability to post from a channel
20
# This version also cleans up error cases and eliminates the
21
# "ioerror" status in favor of raising an error
22
# 2.4 Added -binary option to http::geturl and charset element
25
package require Tcl 8.2
26
# keep this in sync with pkgIndex.tcl
27
# and with the install directories in Makefiles
28
package provide http 2.4.4
36
-proxyfilter http::ProxyRequired
38
set http(-useragent) "Tcl http client package [package provide http]"
42
variable alphanumeric a-zA-Z0-9
43
for {set i 0} {$i <= 256} {incr i} {
45
if {![string match \[$alphanumeric\] $c]} {
46
set formMap($c) %[format %.2x $i]
49
# These are handled specially
50
array set formMap { " " + \n %0d%0a }
59
variable encodings [string tolower [encoding names]]
60
# This can be changed, but iso8859-1 is the RFC standard.
61
variable defaultCharset "iso8859-1"
63
namespace export geturl config reset wait formatQuery register unregister
64
# Useful, but not exported: data size status code
69
# See documentation for details.
72
# proto URL protocol prefix, e.g. https
73
# port Default port for protocol
74
# command Command to use to create socket
76
# list of port and command that was registered.
78
proc http::register {proto port command} {
80
set urlTypes($proto) [list $port $command]
85
# Unregisters URL protocol handler
88
# proto URL protocol prefix, e.g. https
90
# list of port and command that was unregistered.
92
proc http::unregister {proto} {
94
if {![info exists urlTypes($proto)]} {
95
return -code error "unsupported url type \"$proto\""
97
set old $urlTypes($proto)
98
unset urlTypes($proto)
104
# See documentation for details.
107
# args Options parsed by the procedure.
111
proc http::config {args} {
113
set options [lsort [array names http -*]]
114
set usage [join $options ", "]
115
if {[llength $args] == 0} {
117
foreach name $options {
118
lappend result $name $http($name)
122
set options [string map {- ""} $options]
123
set pat ^-([join $options |])$
124
if {[llength $args] == 1} {
125
set flag [lindex $args 0]
126
if {[regexp -- $pat $flag]} {
129
return -code error "Unknown option $flag, must be: $usage"
132
foreach {flag value} $args {
133
if {[regexp -- $pat $flag]} {
134
set http($flag) $value
136
return -code error "Unknown option $flag, must be: $usage"
144
# Clean up the socket and eval close time callbacks
147
# token Connection token.
148
# errormsg (optional) If set, forces status to error.
149
# skipCB (optional) If set, don't call the -command callback. This
150
# is useful when geturl wants to throw an exception instead
151
# of calling the callback. That way, the same error isn't
152
# reported to two places.
157
proc http::Finish { token {errormsg ""} {skipCB 0}} {
160
global errorInfo errorCode
161
if {[string length $errormsg] != 0} {
162
set state(error) [list $errormsg $errorInfo $errorCode]
163
set state(status) error
165
catch {close $state(sock)}
166
catch {after cancel $state(after)}
167
if {[info exists state(-command)] && !$skipCB} {
168
if {[catch {eval $state(-command) {$token}} err]} {
169
if {[string length $errormsg] == 0} {
170
set state(error) [list $err $errorInfo $errorCode]
171
set state(status) error
174
if {[info exists state(-command)]} {
175
# Command callback may already have unset our state
176
unset state(-command)
183
# See documentation for details.
186
# token Connection token.
192
proc http::reset { token {why reset} } {
195
set state(status) $why
196
catch {fileevent $state(sock) readable {}}
197
catch {fileevent $state(sock) writable {}}
199
if {[info exists state(error)]} {
200
set errorlist $state(error)
202
eval ::error $errorlist
208
# Establishes a connection to a remote url via http.
211
# url The http URL to goget.
212
# args Option value pairs. Valid options include:
213
# -blocksize, -validate, -headers, -timeout
215
# Returns a token for this connection.
216
# This token is the name of an array that the caller should
217
# unset to garbage collect the state.
219
proc http::geturl { url args } {
222
variable defaultCharset
224
# Initialize the state variable, an array. We'll return the
225
# name of this array as the token for the transaction.
227
if {![info exists http(uid)]} {
230
set token [namespace current]::[incr http(uid)]
235
# Process command options.
244
-type application/x-www-form-urlencoded
258
# These flags have their types verified [Bug 811170]
262
-queryblocksize integer
266
set state(charset) $defaultCharset
267
set options {-binary -blocksize -channel -command -handler -headers \
268
-progress -query -queryblocksize -querychannel -queryprogress\
269
-validate -timeout -type}
270
set usage [join $options ", "]
271
set options [string map {- ""} $options]
272
set pat ^-([join $options |])$
273
foreach {flag value} $args {
274
if {[regexp $pat $flag]} {
275
# Validate numbers and booleans
276
if {[info exists type($flag)] && \
277
![string is $type($flag) -strict $value]} {
279
return -code error "Bad value for $flag ($value), must be $type($flag)"
281
set state($flag) $value
284
return -code error "Unknown option $flag, can be: $usage"
288
# Make sure -query and -querychannel aren't both specified
290
set isQueryChannel [info exists state(-querychannel)]
291
set isQuery [info exists state(-query)]
292
if {$isQuery && $isQueryChannel} {
294
return -code error "Can't combine -query and -querychannel options!"
297
# Validate URL, determine the server host and port, and check proxy case
298
# Recognize user:pass@host URLs also, although we do not do anything
299
# with that info yet.
301
set exp {^(([^:]*)://)?([^@]+@)?([^/:]+)(:([0-9]+))?(/.*)?$}
302
if {![regexp -nocase $exp $url x prefix proto user host y port srvurl]} {
304
return -code error "Unsupported URL: $url"
306
if {[string length $proto] == 0} {
308
set url ${proto}://$url
310
if {![info exists urlTypes($proto)]} {
312
return -code error "Unsupported URL type \"$proto\""
314
set defport [lindex $urlTypes($proto) 0]
315
set defcmd [lindex $urlTypes($proto) 1]
317
if {[string length $port] == 0} {
320
if {[string length $srvurl] == 0} {
323
if {[string length $proto] == 0} {
327
if {![catch {$http(-proxyfilter) $host} proxy]} {
328
set phost [lindex $proxy 0]
329
set pport [lindex $proxy 1]
332
# If a timeout is specified we set up the after event
333
# and arrange for an asynchronous socket connection.
335
if {$state(-timeout) > 0} {
336
set state(after) [after $state(-timeout) \
337
[list http::reset $token timeout]]
343
# If we are using the proxy, we must pass in the full URL that
344
# includes the server name.
346
if {[info exists phost] && [string length $phost]} {
348
set conStat [catch {eval $defcmd $async {$phost $pport}} s]
350
set conStat [catch {eval $defcmd $async {$host $port}} s]
354
# something went wrong while trying to establish the connection
355
# Clean up after events and such, but DON'T call the command callback
356
# (if available) because we're going to throw an exception from here
360
return -code error $s
364
# Wait for the connection to complete
366
if {$state(-timeout) > 0} {
367
fileevent $s writable [list http::Connect $token]
370
if {[string equal $state(status) "error"]} {
371
# something went wrong while trying to establish the connection
372
# Clean up after events and such, but DON'T call the command
373
# callback (if available) because we're going to throw an
374
# exception from here instead.
375
set err [lindex $state(error) 0]
377
return -code error $err
378
} elseif {![string equal $state(status) "connect"]} {
379
# Likely to be connection timeout
385
# Send data in cr-lf format, but accept any line terminators
387
fconfigure $s -translation {auto crlf} -buffersize $state(-blocksize)
389
# The following is disallowed in safe interpreters, but the socket
390
# is already in non-blocking mode in that case.
392
catch {fconfigure $s -blocking off}
395
set state(querylength) [string length $state(-query)]
396
if {$state(querylength) > 0} {
400
# there's no query data
404
} elseif {$state(-validate)} {
406
} elseif {$isQueryChannel} {
408
# The query channel must be blocking for the async Write to
410
fconfigure $state(-querychannel) -blocking 1 -translation binary
415
puts $s "$how $srvurl HTTP/1.0"
416
puts $s "Accept: $http(-accept)"
417
if {$port == $defport} {
418
# Don't add port in this case, to handle broken servers.
420
puts $s "Host: $host"
422
puts $s "Host: $host:$port"
424
puts $s "User-Agent: $http(-useragent)"
425
foreach {key value} $state(-headers) {
426
set value [string map [list \n "" \r ""] $value]
427
set key [string trim $key]
428
if {[string equal $key "Content-Length"]} {
430
set state(querylength) $value
432
if {[string length $key]} {
433
puts $s "$key: $value"
436
if {$isQueryChannel && $state(querylength) == 0} {
437
# Try to determine size of data in channel
438
# If we cannot seek, the surrounding catch will trap us
440
set start [tell $state(-querychannel)]
441
seek $state(-querychannel) 0 end
442
set state(querylength) \
443
[expr {[tell $state(-querychannel)] - $start}]
444
seek $state(-querychannel) $start
447
# Flush the request header and set up the fileevent that will
448
# either push the POST data or read the response.
452
# It is possible to have both the read and write fileevents active
453
# at this point. The only scenario it seems to affect is a server
454
# that closes the connection without reading the POST data.
455
# (e.g., early versions TclHttpd in various error cases).
456
# Depending on the platform, the client may or may not be able to
457
# get the response from the server because of the error it will
458
# get trying to write the post data. Having both fileevents active
459
# changes the timing and the behavior, but no two platforms
460
# (among Solaris, Linux, and NT) behave the same, and none
461
# behave all that well in any case. Servers should always read thier
462
# POST data if they expect the client to read their response.
464
if {$isQuery || $isQueryChannel} {
465
puts $s "Content-Type: $state(-type)"
467
puts $s "Content-Length: $state(querylength)"
470
fconfigure $s -translation {auto binary}
471
fileevent $s writable [list http::Write $token]
475
fileevent $s readable [list http::Event $token]
478
if {! [info exists state(-command)]} {
480
# geturl does EVERYTHING asynchronously, so if the user
481
# calls it synchronously, we just do a wait here.
484
if {[string equal $state(status) "error"]} {
485
# Something went wrong, so throw the exception, and the
486
# enclosing catch will do cleanup.
487
return -code error [lindex $state(error) 0]
491
# The socket probably was never connected,
492
# or the connection dropped later.
494
# Clean up after events and such, but DON'T call the command callback
495
# (if available) because we're going to throw an exception from here
498
# if state(status) is error, it means someone's already called Finish
499
# to do the above-described clean up.
500
if {[string equal $state(status) "error"]} {
504
return -code error $err
510
# Data access functions:
511
# Data - the URL data
512
# Status - the transaction status: ok, reset, eof, timeout
513
# Code - the HTTP transaction code, e.g., 200
514
# Size - the size of the URL data
516
proc http::data {token} {
521
proc http::status {token} {
524
return $state(status)
526
proc http::code {token} {
531
proc http::ncode {token} {
534
if {[regexp {[0-9]{3}} $state(http) numeric_code]} {
540
proc http::size {token} {
543
return $state(currentsize)
546
proc http::error {token} {
549
if {[info exists state(error)]} {
557
# Garbage collect the state associated with a transaction
560
# token The token returned from http::geturl
563
# unsets the state array
565
proc http::cleanup {token} {
568
if {[info exists state]} {
575
# This callback is made when an asyncronous connection completes.
578
# token The token returned from http::geturl
581
# Sets the status of the connection, which unblocks
582
# the waiting geturl call
584
proc http::Connect {token} {
587
global errorInfo errorCode
588
if {[eof $state(sock)] ||
589
[string length [fconfigure $state(sock) -error]]} {
590
Finish $token "connect failed [fconfigure $state(sock) -error]" 1
592
set state(status) connect
593
fileevent $state(sock) writable {}
600
# Write POST query data to the socket
603
# token The token for the connection
606
# Write the socket and handle callbacks.
608
proc http::Write {token} {
613
# Output a block. Tcl will buffer this if the socket blocks
618
# Catch I/O errors on dead sockets
620
if {[info exists state(-query)]} {
622
# Chop up large query strings so queryprogress callback
623
# can give smooth feedback
626
[string range $state(-query) $state(queryoffset) \
627
[expr {$state(queryoffset) + $state(-queryblocksize) - 1}]]
628
incr state(queryoffset) $state(-queryblocksize)
629
if {$state(queryoffset) >= $state(querylength)} {
630
set state(queryoffset) $state(querylength)
635
# Copy blocks from the query channel
637
set outStr [read $state(-querychannel) $state(-queryblocksize)]
638
puts -nonewline $s $outStr
639
incr state(queryoffset) [string length $outStr]
640
if {[eof $state(-querychannel)]} {
645
# Do not call Finish here, but instead let the read half of
646
# the socket process whatever server reply there is to get.
648
set state(posterror) $err
653
fileevent $s writable {}
654
fileevent $s readable [list http::Event $token]
657
# Callback to the client after we've completely handled everything
659
if {[string length $state(-queryprogress)]} {
660
eval $state(-queryprogress) [list $token $state(querylength)\
667
# Handle input on the socket
670
# token The token returned from http::geturl
673
# Read the socket and handle callbacks.
675
proc http::Event {token} {
684
if {[string equal $state(state) "header"]} {
685
if {[catch {gets $s line} n]} {
689
set state(state) body
690
if {$state(-binary) || ![string match -nocase text* $state(type)]
691
|| [string match *gzip* $state(coding)]
692
|| [string match *compress* $state(coding)]} {
693
# Turn off conversions for non-text data
694
fconfigure $s -translation binary
695
if {[info exists state(-channel)]} {
696
fconfigure $state(-channel) -translation binary
699
# If we are getting text, set the incoming channel's
700
# encoding correctly. iso8859-1 is the RFC default, but
701
# this could be any IANA charset. However, we only know
702
# how to convert what we have encodings for.
703
set idx [lsearch -exact $encodings \
704
[string tolower $state(charset)]]
706
fconfigure $s -encoding [lindex $encodings $idx]
709
if {[info exists state(-channel)] && \
710
![info exists state(-handler)]} {
711
# Initiate a sequence of background fcopies
712
fileevent $s readable {}
716
if {[regexp -nocase {^content-type:(.+)$} $line x type]} {
717
set state(type) [string trim $type]
718
# grab the optional charset information
719
regexp -nocase {charset\s*=\s*(\S+)} $type x state(charset)
721
if {[regexp -nocase {^content-length:(.+)$} $line x length]} {
722
set state(totalsize) [string trim $length]
724
if {[regexp -nocase {^content-encoding:(.+)$} $line x coding]} {
725
set state(coding) [string trim $coding]
727
if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} {
728
lappend state(meta) $key [string trim $value]
729
} elseif {[string match HTTP* $line]} {
730
set state(http) $line
735
if {[info exists state(-handler)]} {
736
set n [eval $state(-handler) {$s $token}]
738
set block [read $s $state(-blocksize)]
739
set n [string length $block]
741
append state(body) $block
745
incr state(currentsize) $n
750
if {[info exists state(-progress)]} {
751
eval $state(-progress) \
752
{$token $state(totalsize) $state(currentsize)}
760
# Error handling wrapper around fcopy
763
# s The socket to copy from
764
# token The token returned from http::geturl
767
# This closes the connection upon error
769
proc http::CopyStart {s token} {
773
fcopy $s $state(-channel) -size $state(-blocksize) -command \
774
[list http::CopyDone $token]
782
# fcopy completion callback
785
# token The token returned from http::geturl
786
# count The amount transfered
791
proc http::CopyDone {token count {error {}}} {
795
incr state(currentsize) $count
796
if {[info exists state(-progress)]} {
797
eval $state(-progress) {$token $state(totalsize) $state(currentsize)}
799
# At this point the token may have been reset
800
if {[string length $error]} {
802
} elseif {[catch {eof $s} iseof] || $iseof} {
811
# Handle eof on the socket
814
# token The token returned from http::geturl
817
# Clean up the socket
819
proc http::Eof {token} {
822
if {[string equal $state(state) "header"]} {
824
set state(status) eof
834
# See documentation for details.
837
# token Connection token.
840
# The status after the wait.
842
proc http::wait {token} {
846
if {![info exists state(status)] || [string length $state(status)] == 0} {
847
# We must wait on the original variable name, not the upvar alias
848
vwait $token\(status)
851
return $state(status)
854
# http::formatQuery --
856
# See documentation for details.
857
# Call http::formatQuery with an even number of arguments, where
858
# the first is a name, the second is a value, the third is another
862
# args A list of name-value pairs.
867
proc http::formatQuery {args} {
871
append result $sep [mapReply $i]
872
if {[string equal $sep "="]} {
883
# Do x-www-urlencoded character mapping
886
# string The string the needs to be encoded
891
proc http::mapReply {string} {
893
variable alphanumeric
895
# The spec says: "non-alphanumeric characters are replaced by '%HH'"
896
# 1 leave alphanumerics characters alone
897
# 2 Convert every other character to an array lookup
898
# 3 Escape constructs that are "special" to the tcl parser
899
# 4 "subst" the result, doing all the array substitutions
901
regsub -all \[^$alphanumeric\] $string {$formMap(&)} string
902
regsub -all {[][{})\\]\)} $string {\\&} string
903
return [subst -nocommand $string]
906
# http::ProxyRequired --
907
# Default proxy filter.
910
# host The destination host
913
# The current proxy settings
915
proc http::ProxyRequired {host} {
917
if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
918
if {![info exists http(-proxyport)] || \
919
![string length $http(-proxyport)]} {
920
set http(-proxyport) 8080
922
return [list $http(-proxyhost) $http(-proxyport)]