9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
1 |
##+##########################################################################
|
2 |
#
|
|
3 |
# Parallel Geturl -- package (and demo) that efficiently downloads large
|
|
4 |
# numbers of web pages while also handling timeout failures. Web requests
|
|
5 |
# are queued up and a set number are simultaneously fired off. As requests
|
|
6 |
# complete, new ones of popped off the queue and launched.
|
|
7 |
# by Keith Vetter, March 5, 2004
|
|
8 |
||
9 |
package require Tk
|
|
10 |
package require http
|
|
11 |
||
12 |
namespace eval PGU { |
|
13 |
if { $initialize_amsn == 1 } { |
|
14 |
variable options ;# User tweakable values |
|
15 |
variable queue ;# Request queue |
|
16 |
variable qhead 1 ;# First empty slot |
|
17 |
variable qtail 0 ;# Last in use slot |
|
18 |
variable stats ;# Array of statistics |
|
19 |
variable wait 0 ;# For vwait |
|
20 |
variable tokens
|
|
9734
by kakaroto
Correctly catch and report errors with PGU back to soap.. now, if the token is "", then use PGU::GetLastError to know wtf happened... |
21 |
variable last_error "" |
9875
by vivia
Increase timeout to fix login on slow connections. Thanks to asdf on IRC |
22 |
array set options {-degree 10 -timeout 120000 -maxRetries 3} |
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
23 |
}
|
24 |
||
25 |
proc ::PGU::Reset {} { |
|
26 |
variable queue
|
|
27 |
variable stats
|
|
28 |
variable qhead 1 |
|
29 |
variable qtail 0 |
|
30 |
variable wait 0 |
|
31 |
variable tokens
|
|
32 |
||
33 |
catch {unset queue} |
|
34 |
array set queue {} |
|
35 |
array set stats {qlen 0 pending 0 done 0 timeouts 0} |
|
36 |
foreach id [array names tokens] { |
|
37 |
set token [set tokens($id)] |
|
38 |
catch {::http::reset $token} |
|
39 |
catch {::http::cleanup $token} |
|
40 |
}
|
|
41 |
array unset tokens
|
|
42 |
}
|
|
43 |
if { $initialize_amsn == 1 } { |
|
44 |
::PGU::Reset |
|
45 |
}
|
|
46 |
||
47 |
#############################################################################
|
|
48 |
#
|
|
49 |
# ::PGU::Config -- allow user to configure some parameters
|
|
50 |
#
|
|
51 |
proc ::PGU::Config {args} { |
|
52 |
variable options
|
|
53 |
set o [lsort [array names options]] |
|
54 |
||
55 |
if {[llength $args] == 0} { ;# Return all results |
|
56 |
set result {} |
|
57 |
foreach name $o { |
|
58 |
lappend result $name $options($name) |
|
59 |
}
|
|
60 |
return $result |
|
61 |
}
|
|
62 |
foreach {flag value} $args { ;# Get one or set some |
|
63 |
if {[lsearch $o $flag] == -1} { |
|
64 |
return -code error "Unknown option $flag, must be: [join $o ", "]" |
|
65 |
}
|
|
66 |
if {[llength $args] == 1} { ;# Get one config value |
|
67 |
return $options($flag) |
|
68 |
}
|
|
69 |
set options($flag) $value ;# Set the config value |
|
70 |
}
|
|
71 |
}
|
|
72 |
##+##########################################################################
|
|
73 |
#
|
|
74 |
# ::PGU::Add -- adds a url and callback command to are request queue
|
|
75 |
#
|
|
9743
by vivia
Reverting 11890 as requested by kkrt |
76 |
proc ::PGU::Add {url cmd query type headers {nolaunch 0}} { |
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
77 |
variable queue
|
78 |
variable qtail
|
|
79 |
variable stats
|
|
80 |
||
81 |
set id [incr qtail] |
|
82 |
set queue($id) [list $url $cmd $query $type $headers 0] |
|
83 |
incr stats(qlen) |
|
84 |
status_log "PGU: Queueing $id for $url : [::PGU::Status]" |
|
85 |
if {!$nolaunch} { |
|
9743
by vivia
Reverting 11890 as requested by kkrt |
86 |
::PGU::Launch |
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
87 |
}
|
88 |
||
89 |
return $id |
|
90 |
}
|
|
9734
by kakaroto
Correctly catch and report errors with PGU back to soap.. now, if the token is "", then use PGU::GetLastError to know wtf happened... |
91 |
|
92 |
proc ::PGU::GetLastError {id} { |
|
93 |
variable last_error
|
|
94 |
return $last_error |
|
95 |
}
|
|
96 |
||
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
97 |
##+##########################################################################
|
98 |
#
|
|
99 |
# ::PGU::Launch -- launches web requests if we have the capacity
|
|
100 |
#
|
|
9743
by vivia
Reverting 11890 as requested by kkrt |
101 |
proc ::PGU::Launch {} { |
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
102 |
variable queue
|
103 |
variable qtail
|
|
104 |
variable qhead
|
|
105 |
variable options
|
|
106 |
variable stats
|
|
107 |
variable tokens
|
|
108 |
||
109 |
while {1} { |
|
110 |
if {$qtail < $qhead} return ;# Empty queue |
|
111 |
if {$stats(pending) >= $options(-degree)} return ;# No slots open |
|
112 |
||
113 |
set id $qhead |
|
114 |
incr qhead
|
|
115 |
||
116 |
if {![info exists queue($id)] } continue ; # canceled request |
|
117 |
||
118 |
incr stats(pending) |
|
119 |
incr stats(qlen) -1 |
|
120 |
||
121 |
foreach {url cmd query type headers cnt} $queue($id) break |
|
122 |
status_log "PGU: Getting URL $id : [::PGU::Status]" |
|
9744
by kakaroto
disable keepalive in pgu because it doesn't work so well... |
123 |
if {[catch {set tokens($id) [::http::geturl $url -keepalive 0 -timeout $options(-timeout) \ |
9743
by vivia
Reverting 11890 as requested by kkrt |
124 |
-command [list ::PGU::_HTTPCommand $id] \ |
9734
by kakaroto
Correctly catch and report errors with PGU back to soap.. now, if the token is "", then use PGU::GetLastError to know wtf happened... |
125 |
-query $query -type $type -headers $headers]} res]} { |
126 |
status_log "Error calling ::http::geturl : $res" |
|
127 |
if {[catch {eval $cmd ""} emsg]} { |
|
128 |
status_log "PGU Callback error : $emsg\n" red |
|
129 |
}
|
|
130 |
set last_error res
|
|
131 |
||
132 |
}
|
|
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
133 |
}
|
134 |
}
|
|
135 |
##+##########################################################################
|
|
136 |
#
|
|
137 |
# ::PGU::_HTTPCommand -- our geturl callback command that handles
|
|
138 |
# queue maintenance, timeout retries and user callbacks.
|
|
139 |
#
|
|
9743
by vivia
Reverting 11890 as requested by kkrt |
140 |
proc ::PGU::_HTTPCommand {id token} { |
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
141 |
variable queue
|
142 |
variable stats
|
|
143 |
variable options
|
|
144 |
variable wait
|
|
145 |
variable tokens
|
|
146 |
||
147 |
foreach {url cmd query type headers cnt} $queue($id) break |
|
148 |
||
149 |
set status [::http::status $token] |
|
150 |
if {$status == "timeout"} { |
|
151 |
incr stats(timeouts) |
|
152 |
incr cnt -1 |
|
153 |
if {abs($cnt) < $options(-maxRetries)} { |
|
154 |
::http::cleanup $token |
|
155 |
array unset tokens $id |
|
156 |
||
9744
by kakaroto
disable keepalive in pgu because it doesn't work so well... |
157 |
status_log "PGU Callback: request $id timed out, retrying \n" red |
158 |
||
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
159 |
lset queue($id) 5 $cnt ;# Remember retry attempts |
9734
by kakaroto
Correctly catch and report errors with PGU back to soap.. now, if the token is "", then use PGU::GetLastError to know wtf happened... |
160 |
if {![catch { |
9744
by kakaroto
disable keepalive in pgu because it doesn't work so well... |
161 |
set tokens($id) [::http::geturl $url -keepalive 0 \ |
9739
by kakaroto
Use -keepalive on the http geturl to minimize use of the sockets and allow for faster requests/less hang. |
162 |
-timeout $options(-timeout) \ |
9743
by vivia
Reverting 11890 as requested by kkrt |
163 |
-command [list ::PGU::_HTTPCommand $id] \ |
9734
by kakaroto
Correctly catch and report errors with PGU back to soap.. now, if the token is "", then use PGU::GetLastError to know wtf happened... |
164 |
-query $query -type $type \ |
165 |
-headers $headers] |
|
166 |
}] } { |
|
167 |
return
|
|
168 |
}
|
|
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
169 |
}
|
170 |
}
|
|
171 |
||
9734
by kakaroto
Correctly catch and report errors with PGU back to soap.. now, if the token is "", then use PGU::GetLastError to know wtf happened... |
172 |
if {[catch {eval $cmd $token} emsg]} { |
173 |
status_log "PGU Callback error : $emsg\n" red |
|
174 |
}
|
|
175 |
||
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
176 |
incr stats(pending) -1 ;# One less outstanding request |
177 |
incr stats(done) |
|
178 |
status_log "PGU: Request $id done : [::PGU::Status]" |
|
9743
by vivia
Reverting 11890 as requested by kkrt |
179 |
::PGU::Launch ;# Try launching another request |
9707
by kakaroto
Add pgu.tcl, support for Parallel geturl.. queues geturls (from soap atm) and only sends 50 gets at a time.. will slow down resources/bandwidth and less likely to get timeouts for the getprofile on startup |
180 |
|
181 |
::http::cleanup $token |
|
182 |
array unset queue $id |
|
183 |
array unset tokens $id |
|
184 |
}
|
|
185 |
||
186 |
proc ::PGU::Cancel {id} { |
|
187 |
variable tokens
|
|
188 |
variable queue
|
|
189 |
||
190 |
if {[info exists tokens($id)] } { |
|
191 |
set token [set tokens($id)] |
|
192 |
catch {::http::reset $token} |
|
193 |
catch {::http::cleanup $token} |
|
194 |
array unset tokens $id |
|
195 |
}
|
|
196 |
array unset queue $id |
|
197 |
}
|
|
198 |
||
199 |
##+##########################################################################
|
|
200 |
#
|
|
201 |
# ::PGU::Status -- returns some statistics of the current state
|
|
202 |
#
|
|
203 |
proc ::PGU::Status {} { |
|
204 |
variable stats
|
|
205 |
return [list $stats(qlen) $stats(pending) $stats(done) $stats(timeouts)] |
|
206 |
}
|
|
207 |
||
9875
by vivia
Increase timeout to fix login on slow connections. Thanks to asdf on IRC |
208 |
}
|