7
alias open_uri_original_open open # :nodoc:
9
# makes possible to open various resources including URIs.
10
# If the first argument respond to `open' method,
11
# the method is called with the rest arguments.
13
# If the first argument is a string which begins with xxx://,
14
# it is parsed by URI.parse. If the parsed object respond to `open' method,
15
# the method is called with the rest arguments.
17
# Otherwise original open is called.
19
# Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
21
# Kernel[#.]open can accepts such URIs and strings which begins with
22
# http://, https:// and ftp://.
23
# In these case, the opened file object is extended by OpenURI::Meta.
24
def open(name, *rest, &block) # :doc:
25
if name.respond_to?(:open)
26
name.open(*rest, &block)
27
elsif name.respond_to?(:to_str) &&
28
%r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
29
(uri = URI.parse(name)).respond_to?(:open)
30
uri.open(*rest, &block)
32
open_uri_original_open(name, *rest, &block)
38
# OpenURI is an easy-to-use wrapper for net/http, net/https and net/ftp.
42
# It is possible to open http/https/ftp URL as usual like opening a file:
44
# open("http://www.ruby-lang.org/") {|f|
45
# f.each_line {|line| p line}
48
# The opened file has several methods for meta information as follows since
49
# it is extended by OpenURI::Meta.
51
# open("http://www.ruby-lang.org/en") {|f|
52
# f.each_line {|line| p line}
53
# p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
54
# p f.content_type # "text/html"
55
# p f.charset # "iso-8859-1"
56
# p f.content_encoding # []
57
# p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
60
# Additional header fields can be specified by an optional hash argument.
62
# open("http://www.ruby-lang.org/en/",
63
# "User-Agent" => "Ruby/#{RUBY_VERSION}",
64
# "From" => "foo@bar.invalid",
65
# "Referer" => "http://www.ruby-lang.org/") {|f|
69
# The environment variables such as http_proxy, https_proxy and ftp_proxy
70
# are in effect by default. :proxy => nil disables proxy.
72
# open("http://www.ruby-lang.org/en/raa.html", :proxy => nil) {|f|
76
# URI objects can be opened in a similar way.
78
# uri = URI.parse("http://www.ruby-lang.org/en/")
83
# URI objects can be read directly. The returned string is also extended by
89
# Author:: Tanaka Akira <akr@m17n.org>
94
:progress_proc => true,
95
:content_length_proc => true,
96
:http_basic_authentication => true,
99
def OpenURI.check_options(options) # :nodoc:
101
next unless Symbol === k
102
unless Options.include? k
103
raise ArgumentError, "unrecognized option: #{k}"
108
def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
109
if !rest.empty? && (String === rest.first || Integer === rest.first)
111
if !rest.empty? && Integer === rest.first
115
return mode, perm, rest
118
def OpenURI.open_uri(name, *rest) # :nodoc:
119
uri = URI::Generic === name ? name : URI.parse(name)
120
mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest)
121
options = rest.shift if !rest.empty? && Hash === rest.first
122
raise ArgumentError.new("extra arguments") if !rest.empty?
124
OpenURI.check_options(options)
126
unless mode == nil ||
127
mode == 'r' || mode == 'rb' ||
129
raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
132
io = open_loop(uri, options)
144
def OpenURI.open_loop(uri, options) # :nodoc:
145
case opt_proxy = options.fetch(:proxy, true)
147
find_proxy = lambda {|u| u.find_proxy}
149
find_proxy = lambda {|u| nil}
151
opt_proxy = URI.parse(opt_proxy)
152
find_proxy = lambda {|u| opt_proxy}
154
find_proxy = lambda {|u| opt_proxy}
156
raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
162
redirect = catch(:open_uri_redirect) {
164
uri.buffer_open(buf, find_proxy.call(uri), options)
168
if redirect.relative?
169
# Although it violates RFC2616, Location: field may have relative
170
# URI. It is converted to absolute URI using uri as a base URI.
171
redirect = uri + redirect
173
unless OpenURI.redirectable?(uri, redirect)
174
raise "redirection forbidden: #{uri} -> #{redirect}"
176
if options.include? :http_basic_authentication
177
# send authentication only for the URI directly specified.
178
options = options.dup
179
options.delete :http_basic_authentication
182
raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
183
uri_set[uri.to_s] = true
193
def OpenURI.redirectable?(uri1, uri2) # :nodoc:
194
# This test is intended to forbid a redirection from http://... to
195
# file:///etc/passwd.
196
# However this is ad hoc. It should be extensible/configurable.
197
uri1.scheme.downcase == uri2.scheme.downcase ||
198
(/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
201
def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
203
raise "Non-HTTP proxy URI: #{proxy}" if proxy.class != URI::HTTP
206
if target.userinfo && "1.9.0" <= RUBY_VERSION
207
# don't raise for 1.8 because compatibility.
208
raise ArgumentError, "userinfo not supported. [RFC3986]"
213
if URI::HTTP === target
216
klass = Net::HTTP::Proxy(proxy.host, proxy.port)
218
target_host = target.host
219
target_port = target.port
220
request_uri = target.request_uri
222
# FTP over HTTP proxy
223
target_host = proxy.host
224
target_port = proxy.port
225
request_uri = target.to_s
228
http = klass.new(target_host, target_port)
229
if target.class == URI::HTTPS
232
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
233
store = OpenSSL::X509::Store.new
234
store.set_default_paths
235
http.cert_store = store
239
options.each {|k, v| header[k] = v if String === k }
243
if target.class == URI::HTTPS
244
# xxx: information hiding violation
245
sock = http.instance_variable_get(:@socket)
246
if sock.respond_to?(:io)
249
sock = sock.instance_variable_get(:@socket) # 1.8
251
sock.post_connection_check(target_host)
253
req = Net::HTTP::Get.new(request_uri, header)
254
if options.include? :http_basic_authentication
255
user, pass = options[:http_basic_authentication]
256
req.basic_auth user, pass
258
http.request(req) {|response|
260
if options[:content_length_proc] && Net::HTTPSuccess === resp
261
if resp.key?('Content-Length')
262
options[:content_length_proc].call(resp['Content-Length'].to_i)
264
options[:content_length_proc].call(nil)
267
resp.read_body {|str|
269
if options[:progress_proc] && Net::HTTPSuccess === resp
270
options[:progress_proc].call(buf.size)
277
io.status = [resp.code, resp.message]
278
resp.each {|name,value| buf.io.meta_add_field name, value }
280
when Net::HTTPSuccess
281
when Net::HTTPMovedPermanently, # 301
282
Net::HTTPFound, # 302
283
Net::HTTPSeeOther, # 303
284
Net::HTTPTemporaryRedirect # 307
285
throw :open_uri_redirect, URI.parse(resp['location'])
287
raise OpenURI::HTTPError.new(io.status.join(' '), io)
291
class HTTPError < StandardError
292
def initialize(message, io)
299
class Buffer # :nodoc:
310
if StringIO === @io && StringMax < @size
312
io = Tempfile.new('open-uri')
314
Meta.init io, @io if @io.respond_to? :meta
321
Meta.init @io unless @io.respond_to? :meta
326
# Mixin for holding meta-information.
328
def Meta.init(obj, src=nil) # :nodoc:
335
obj.status = src.status
336
obj.base_uri = src.base_uri
337
src.meta.each {|name, value|
338
obj.meta_add_field(name, value)
343
# returns an Array which consists status code and message.
344
attr_accessor :status
346
# returns a URI which is base of relative URIs in the data.
347
# It may differ from the URI supplied by a user because redirection.
348
attr_accessor :base_uri
350
# returns a Hash which represents header fields.
351
# The Hash keys are downcased for canonicalization.
354
def meta_add_field(name, value) # :nodoc:
355
@meta[name.downcase] = value
358
# returns a Time which represents Last-Modified field.
360
if v = @meta['last-modified']
367
RE_LWS = /[\r\n\t ]+/n
368
RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
369
RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
370
RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
372
def content_type_parse # :nodoc:
373
v = @meta['content-type']
374
# The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
375
if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ v
377
subtype = $2.downcase
379
$3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
380
val = qval.gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/) { $1 ? $1[1,1] : $& } if qval
381
parameters << [att.downcase, val]
383
["#{type}/#{subtype}", *parameters]
389
# returns "type/subtype" which is MIME Content-Type.
390
# It is downcased for canonicalization.
391
# Content-Type parameters are stripped.
393
type, *parameters = content_type_parse
394
type || 'application/octet-stream'
397
# returns a charset parameter in Content-Type field.
398
# It is downcased for canonicalization.
400
# If charset parameter is not given but a block is given,
401
# the block is called and its result is returned.
402
# It can be used to guess charset.
404
# If charset parameter and block is not given,
405
# nil is returned except text type in HTTP.
406
# In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1.
408
type, *parameters = content_type_parse
409
if pair = parameters.assoc('charset')
413
elsif type && %r{\Atext/} =~ type &&
414
@base_uri && /\Ahttp\z/i =~ @base_uri.scheme
415
"iso-8859-1" # RFC2616 3.7.1
421
# returns a list of encodings in Content-Encoding field
422
# as an Array of String.
423
# The encodings are downcased for canonicalization.
425
v = @meta['content-encoding']
426
if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v
427
v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
434
# Mixin for HTTP and FTP URIs.
436
# OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
438
# OpenURI::OpenRead#open takes optional 3 arguments as:
439
# OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
441
# `mode', `perm' is same as Kernel#open.
443
# However, `mode' must be read mode because OpenURI::OpenRead#open doesn't
444
# support write mode (yet).
445
# Also `perm' is just ignored because it is meaningful only for file
448
# `options' must be a hash.
450
# Each pairs which key is a string in the hash specify a extra header
452
# I.e. it is ignored for FTP without HTTP proxy.
454
# The hash may include other options which key is a symbol:
458
# :proxy => "http://proxy.foo.com:8000/"
459
# :proxy => URI.parse("http://proxy.foo.com:8000/")
464
# If :proxy option is specified, the value should be String, URI,
466
# When String or URI is given, it is treated as proxy URI.
467
# When true is given or the option itself is not specified,
468
# environment variable `scheme_proxy' is examined.
469
# `scheme' is replaced by `http', `https' or `ftp'.
470
# When false or nil is given, the environment variables are ignored and
471
# connection will be made to a server directly.
473
# [:http_basic_authentication]
475
# :http_basic_authentication=>[user, password]
477
# If :http_basic_authentication is specified,
478
# the value should be an array which contains 2 strings:
479
# username and password.
480
# It is used for HTTP Basic authentication defined by RFC 2617.
482
# [:content_length_proc]
484
# :content_length_proc => lambda {|content_length| ... }
486
# If :content_length_proc option is specified, the option value procedure
487
# is called before actual transfer is started.
488
# It takes one argument which is expected content length in bytes.
490
# If two or more transfer is done by HTTP redirection, the procedure
491
# is called only one for a last transfer.
493
# When expected content length is unknown, the procedure is called with
495
# It is happen when HTTP response has no Content-Length header.
499
# :progress_proc => lambda {|size| ...}
501
# If :progress_proc option is specified, the proc is called with one
502
# argument each time when `open' gets content fragment from network.
503
# The argument `size' `size' is a accumulated transfered size in bytes.
505
# If two or more transfer is done by HTTP redirection, the procedure
506
# is called only one for a last transfer.
508
# :progress_proc and :content_length_proc are intended to be used for
510
# For example, it can be implemented as follows using Ruby/ProgressBar.
514
# :content_length_proc => lambda {|t|
516
# pbar = ProgressBar.new("...", t)
517
# pbar.file_transfer_mode
520
# :progress_proc => lambda {|s|
524
# OpenURI::OpenRead#open returns an IO like object if block is not given.
525
# Otherwise it yields the IO object and return the value of the block.
526
# The IO object is extended with OpenURI::Meta.
527
def open(*rest, &block)
528
OpenURI.open_uri(self, *rest, &block)
531
# OpenURI::OpenRead#read([options]) reads a content referenced by self and
532
# returns the content as string.
533
# The string is extended with OpenURI::Meta.
534
# The argument `options' is same as OpenURI::OpenRead#open.
536
self.open(options) {|f|
547
# returns a proxy URI.
548
# The proxy URI is obtained from environment variables such as http_proxy,
549
# ftp_proxy, no_proxy, etc.
550
# If there is no proper proxy, nil is returned.
552
# Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.)
555
# But http_proxy and HTTP_PROXY is treated specially under CGI environment.
556
# It's because HTTP_PROXY may be set by Proxy: header.
557
# So HTTP_PROXY is not used.
558
# http_proxy is not used too if the variable is case insensitive.
559
# CGI_HTTP_PROXY can be used instead.
561
name = self.scheme.downcase + '_proxy'
563
if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI?
564
# HTTP_PROXY conflicts with *_proxy for proxy settings and
565
# HTTP_* for header information in CGI.
566
# So it should be careful to use it.
567
pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
569
when 0 # no proxy setting anyway.
573
if k == 'http_proxy' && ENV[k.upcase] == nil
574
# http_proxy is safe to use because ENV is case sensitive.
575
proxy_uri = ENV[name]
579
else # http_proxy is safe to use because ENV is case sensitive.
580
proxy_uri = ENV[name]
583
# Use CGI_HTTP_PROXY. cf. libwww-perl.
584
proxy_uri = ENV["CGI_#{name.upcase}"]
586
elsif name == 'http_proxy'
587
unless proxy_uri = ENV[name]
588
if proxy_uri = ENV[name.upcase]
589
warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.'
593
proxy_uri = ENV[name] || ENV[name.upcase]
596
if proxy_uri && self.host
599
addr = IPSocket.getaddress(self.host)
600
proxy_uri = nil if /\A127\.|\A::1\z/ =~ addr
606
proxy_uri = URI.parse(proxy_uri)
608
if no_proxy = ENV[name] || ENV[name.upcase]
609
no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port|
610
if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host &&
611
(!port || self.port == port.to_i)
625
def buffer_open(buf, proxy, options) # :nodoc:
626
OpenURI.open_http(buf, self, proxy, options)
629
include OpenURI::OpenRead
633
def buffer_open(buf, proxy, options) # :nodoc:
635
OpenURI.open_http(buf, self, proxy, options)
640
directories = self.path.split(%r{/}, -1)
641
directories.shift if directories[0] == '' # strip a field before leading slash
642
directories.each {|d|
643
d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
645
unless filename = directories.pop
646
raise ArgumentError, "no filename: #{self.inspect}"
648
directories.each {|d|
650
raise ArgumentError, "invalid directory: #{d.inspect}"
653
if /[\r\n]/ =~ filename
654
raise ArgumentError, "invalid filename: #{filename.inspect}"
656
typecode = self.typecode
657
if typecode && /\A[aid]\z/ !~ typecode
658
raise ArgumentError, "invalid typecode: #{typecode.inspect}"
661
# The access sequence is defined by RFC 1738
662
ftp = Net::FTP.open(self.host)
663
# todo: extract user/passwd from .netrc.
666
user, passwd = self.userinfo.split(/:/) if self.userinfo
667
ftp.login(user, passwd)
668
directories.each {|cwd|
669
ftp.voidcmd("CWD #{cwd}")
672
# xxx: typecode D is not handled.
673
ftp.voidcmd("TYPE #{typecode.upcase}")
675
if options[:content_length_proc]
676
options[:content_length_proc].call(ftp.size(filename))
678
ftp.retrbinary("RETR #{filename}", 4096) { |str|
680
options[:progress_proc].call(buf.size) if options[:progress_proc]
686
include OpenURI::OpenRead