8
alias rubygems_open_uri_original_open open # :nodoc:
10
# makes possible to open various resources including URIs.
11
# If the first argument respond to `open' method,
12
# the method is called with the rest arguments.
14
# If the first argument is a string which begins with xxx://,
15
# it is parsed by URI.parse. If the parsed object respond to `open' method,
16
# the method is called with the rest arguments.
18
# Otherwise original open is called.
20
# Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
22
# Kernel[#.]open can accepts such URIs and strings which begins with
23
# http://, https:// and ftp://.
24
# In these case, the opened file object is extended by OpenURI::Meta.
25
def open(name, *rest, &block) # :doc:
26
if name.respond_to?(:open)
27
name.open(*rest, &block)
28
elsif name.respond_to?(:to_str) &&
29
%r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
30
(uri = URI.parse(name)).respond_to?(:open)
31
uri.open(*rest, &block)
33
rubygems_open_uri_original_open(name, *rest, &block)
39
# OpenURI is an easy-to-use wrapper for net/http, net/https and net/ftp.
43
# It is possible to open http/https/ftp URL as usual like opening a file:
45
# open("http://www.ruby-lang.org/") {|f|
46
# f.each_line {|line| p line}
49
# The opened file has several methods for meta information as follows since
50
# it is extended by OpenURI::Meta.
52
# open("http://www.ruby-lang.org/en") {|f|
53
# f.each_line {|line| p line}
54
# p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
55
# p f.content_type # "text/html"
56
# p f.charset # "iso-8859-1"
57
# p f.content_encoding # []
58
# p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
61
# Additional header fields can be specified by an optional hash argument.
63
# open("http://www.ruby-lang.org/en/",
64
# "User-Agent" => "Ruby/#{RUBY_VERSION}",
65
# "From" => "foo@bar.invalid",
66
# "Referer" => "http://www.ruby-lang.org/") {|f|
70
# The environment variables such as http_proxy, https_proxy and ftp_proxy
71
# are in effect by default. :proxy => nil disables proxy.
73
# open("http://www.ruby-lang.org/en/raa.html", :proxy => nil) {|f|
77
# URI objects can be opened in a similar way.
79
# uri = URI.parse("http://www.ruby-lang.org/en/")
84
# URI objects can be read directly. The returned string is also extended by
90
# Author:: Tanaka Akira <akr@m17n.org>
95
:proxy_http_basic_authentication => true,
96
:progress_proc => true,
97
:content_length_proc => true,
98
:http_basic_authentication => true,
99
:read_timeout => true,
101
:ssl_verify_mode => nil,
104
def OpenURI.check_options(options) # :nodoc:
106
next unless Symbol === k
107
unless Options.include? k
108
raise ArgumentError, "unrecognized option: #{k}"
113
def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
114
if !rest.empty? && (String === rest.first || Integer === rest.first)
116
if !rest.empty? && Integer === rest.first
120
return mode, perm, rest
123
def OpenURI.open_uri(name, *rest) # :nodoc:
124
uri = URI::Generic === name ? name : URI.parse(name)
125
mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest)
126
options = rest.shift if !rest.empty? && Hash === rest.first
127
raise ArgumentError.new("extra arguments") if !rest.empty?
129
OpenURI.check_options(options)
131
unless mode == nil ||
132
mode == 'r' || mode == 'rb' ||
134
raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
137
io = open_loop(uri, options)
149
def OpenURI.open_loop(uri, options) # :nodoc:
151
proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication
152
proxy_opts << :proxy if options.include? :proxy
154
if 1 < proxy_opts.length
155
raise ArgumentError, "multiple proxy options specified"
157
case proxy_opts.first
158
when :proxy_http_basic_authentication
159
opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication)
160
proxy_user = proxy_user.to_str
161
proxy_pass = proxy_pass.to_str
163
raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}")
166
opt_proxy = options.fetch(:proxy)
176
find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil}
178
find_proxy = lambda {|u| nil}
180
opt_proxy = URI.parse(opt_proxy)
181
find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
183
find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
185
raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
191
redirect = catch(:open_uri_redirect) {
193
uri.buffer_open(buf, find_proxy.call(uri), options)
197
if redirect.relative?
198
# Although it violates RFC2616, Location: field may have relative
199
# URI. It is converted to absolute URI using uri as a base URI.
200
redirect = uri + redirect
202
unless OpenURI.redirectable?(uri, redirect)
203
raise "redirection forbidden: #{uri} -> #{redirect}"
205
if options.include? :http_basic_authentication
206
# send authentication only for the URI directly specified.
207
options = options.dup
208
options.delete :http_basic_authentication
211
raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
212
uri_set[uri.to_s] = true
222
def OpenURI.redirectable?(uri1, uri2) # :nodoc:
223
# This test is intended to forbid a redirection from http://... to
224
# file:///etc/passwd.
225
# However this is ad hoc. It should be extensible/configurable.
226
uri1.scheme.downcase == uri2.scheme.downcase ||
227
(/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
230
def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
232
proxy_uri, proxy_user, proxy_pass = proxy
233
raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP
236
if target.userinfo && "1.9.0" <= RUBY_VERSION
237
# don't raise for 1.8 because compatibility.
238
raise ArgumentError, "userinfo not supported. [RFC3986]"
242
options.each {|k, v| header[k] = v if String === k }
246
if URI::HTTP === target
249
if proxy_user && proxy_pass
250
klass = Net::HTTP::Proxy(proxy_uri.host, proxy_uri.port, proxy_user, proxy_pass)
252
klass = Net::HTTP::Proxy(proxy_uri.host, proxy_uri.port)
255
target_host = target.host
256
target_port = target.port
257
request_uri = target.request_uri
259
# FTP over HTTP proxy
260
target_host = proxy_uri.host
261
target_port = proxy_uri.port
262
request_uri = target.to_s
263
if proxy_user && proxy_pass
264
header["Proxy-Authorization"] = 'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m').delete("\r\n")
268
http = klass.new(target_host, target_port)
269
if target.class == URI::HTTPS
272
http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
273
store = OpenSSL::X509::Store.new
274
if options[:ssl_ca_cert]
275
if File.directory? options[:ssl_ca_cert]
276
store.add_path options[:ssl_ca_cert]
278
store.add_file options[:ssl_ca_cert]
281
store.set_default_paths
283
store.set_default_paths
284
http.cert_store = store
286
if options.include? :read_timeout
287
http.read_timeout = options[:read_timeout]
292
if target.class == URI::HTTPS
293
# xxx: information hiding violation
294
sock = http.instance_variable_get(:@socket)
295
if sock.respond_to?(:io)
298
sock = sock.instance_variable_get(:@socket) # 1.8
300
sock.post_connection_check(target_host)
302
req = Net::HTTP::Get.new(request_uri, header)
303
if options.include? :http_basic_authentication
304
user, pass = options[:http_basic_authentication]
305
req.basic_auth user, pass
307
http.request(req) {|response|
309
if options[:content_length_proc] && Net::HTTPSuccess === resp
310
if resp.key?('Content-Length')
311
options[:content_length_proc].call(resp['Content-Length'].to_i)
313
options[:content_length_proc].call(nil)
316
resp.read_body {|str|
318
if options[:progress_proc] && Net::HTTPSuccess === resp
319
options[:progress_proc].call(buf.size)
326
io.status = [resp.code, resp.message]
327
resp.each {|name,value| buf.io.meta_add_field name, value }
329
when Net::HTTPSuccess
330
when Net::HTTPMovedPermanently, # 301
331
Net::HTTPFound, # 302
332
Net::HTTPSeeOther, # 303
333
Net::HTTPTemporaryRedirect # 307
334
throw :open_uri_redirect, URI.parse(resp['location'])
336
raise OpenURI::HTTPError.new(io.status.join(' '), io)
340
class HTTPError < StandardError
341
def initialize(message, io)
348
class Buffer # :nodoc:
359
if StringIO === @io && StringMax < @size
361
io = Tempfile.new('open-uri')
363
Meta.init io, @io if @io.respond_to? :meta
370
Meta.init @io unless @io.respond_to? :meta
375
# Mixin for holding meta-information.
377
def Meta.init(obj, src=nil) # :nodoc:
384
obj.status = src.status
385
obj.base_uri = src.base_uri
386
src.meta.each {|name, value|
387
obj.meta_add_field(name, value)
392
# returns an Array which consists status code and message.
393
attr_accessor :status
395
# returns a URI which is base of relative URIs in the data.
396
# It may differ from the URI supplied by a user because redirection.
397
attr_accessor :base_uri
399
# returns a Hash which represents header fields.
400
# The Hash keys are downcased for canonicalization.
403
def meta_add_field(name, value) # :nodoc:
404
@meta[name.downcase] = value
407
# returns a Time which represents Last-Modified field.
409
if v = @meta['last-modified']
416
RE_LWS = /[\r\n\t ]+/n
417
RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
418
RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
419
RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
421
def content_type_parse # :nodoc:
422
v = @meta['content-type']
423
# The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
424
if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ v
426
subtype = $2.downcase
428
$3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
429
val = qval.gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/) { $1 ? $1[1,1] : $& } if qval
430
parameters << [att.downcase, val]
432
["#{type}/#{subtype}", *parameters]
438
# returns "type/subtype" which is MIME Content-Type.
439
# It is downcased for canonicalization.
440
# Content-Type parameters are stripped.
442
type, *parameters = content_type_parse
443
type || 'application/octet-stream'
446
# returns a charset parameter in Content-Type field.
447
# It is downcased for canonicalization.
449
# If charset parameter is not given but a block is given,
450
# the block is called and its result is returned.
451
# It can be used to guess charset.
453
# If charset parameter and block is not given,
454
# nil is returned except text type in HTTP.
455
# In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1.
457
type, *parameters = content_type_parse
458
if pair = parameters.assoc('charset')
462
elsif type && %r{\Atext/} =~ type &&
463
@base_uri && /\Ahttp\z/i =~ @base_uri.scheme
464
"iso-8859-1" # RFC2616 3.7.1
470
# returns a list of encodings in Content-Encoding field
471
# as an Array of String.
472
# The encodings are downcased for canonicalization.
474
v = @meta['content-encoding']
475
if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v
476
v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
483
# Mixin for HTTP and FTP URIs.
485
# OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
487
# OpenURI::OpenRead#open takes optional 3 arguments as:
488
# OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
490
# `mode', `perm' is same as Kernel#open.
492
# However, `mode' must be read mode because OpenURI::OpenRead#open doesn't
493
# support write mode (yet).
494
# Also `perm' is just ignored because it is meaningful only for file
497
# `options' must be a hash.
499
# Each pairs which key is a string in the hash specify a extra header
501
# I.e. it is ignored for FTP without HTTP proxy.
503
# The hash may include other options which key is a symbol:
507
# :proxy => "http://proxy.foo.com:8000/"
508
# :proxy => URI.parse("http://proxy.foo.com:8000/")
513
# If :proxy option is specified, the value should be String, URI,
515
# When String or URI is given, it is treated as proxy URI.
516
# When true is given or the option itself is not specified,
517
# environment variable `scheme_proxy' is examined.
518
# `scheme' is replaced by `http', `https' or `ftp'.
519
# When false or nil is given, the environment variables are ignored and
520
# connection will be made to a server directly.
522
# [:proxy_http_basic_authentication]
524
# :proxy_http_basic_authentication => ["http://proxy.foo.com:8000/", "proxy-user", "proxy-password"]
525
# :proxy_http_basic_authentication => [URI.parse("http://proxy.foo.com:8000/"), "proxy-user", "proxy-password"]
527
# If :proxy option is specified, the value should be an Array with 3 elements.
528
# It should contain a proxy URI, a proxy user name and a proxy password.
529
# The proxy URI should be a String, an URI or nil.
530
# The proxy user name and password should be a String.
532
# If nil is given for the proxy URI, this option is just ignored.
534
# If :proxy and :proxy_http_basic_authentication is specified,
535
# ArgumentError is raised.
537
# [:http_basic_authentication]
539
# :http_basic_authentication=>[user, password]
541
# If :http_basic_authentication is specified,
542
# the value should be an array which contains 2 strings:
543
# username and password.
544
# It is used for HTTP Basic authentication defined by RFC 2617.
546
# [:content_length_proc]
548
# :content_length_proc => lambda {|content_length| ... }
550
# If :content_length_proc option is specified, the option value procedure
551
# is called before actual transfer is started.
552
# It takes one argument which is expected content length in bytes.
554
# If two or more transfer is done by HTTP redirection, the procedure
555
# is called only one for a last transfer.
557
# When expected content length is unknown, the procedure is called with
559
# It is happen when HTTP response has no Content-Length header.
563
# :progress_proc => lambda {|size| ...}
565
# If :progress_proc option is specified, the proc is called with one
566
# argument each time when `open' gets content fragment from network.
567
# The argument `size' `size' is a accumulated transfered size in bytes.
569
# If two or more transfer is done by HTTP redirection, the procedure
570
# is called only one for a last transfer.
572
# :progress_proc and :content_length_proc are intended to be used for
574
# For example, it can be implemented as follows using Ruby/ProgressBar.
578
# :content_length_proc => lambda {|t|
580
# pbar = ProgressBar.new("...", t)
581
# pbar.file_transfer_mode
584
# :progress_proc => lambda {|s|
590
# :read_timeout=>nil (no timeout)
591
# :read_timeout=>10 (10 second)
593
# :read_timeout option specifies a timeout of read for http connections.
597
# :ssl_ca_cert=>filename
599
# :ssl_ca_cert is used to specify CA certificate for SSL.
600
# If it is given, default certificates are not used.
604
# :ssl_verify_mode=>mode
606
# :ssl_verify_mode is used to specify openssl verify mode.
608
# OpenURI::OpenRead#open returns an IO like object if block is not given.
609
# Otherwise it yields the IO object and return the value of the block.
610
# The IO object is extended with OpenURI::Meta.
611
def open(*rest, &block)
612
OpenURI.open_uri(self, *rest, &block)
615
# OpenURI::OpenRead#read([options]) reads a content referenced by self and
616
# returns the content as string.
617
# The string is extended with OpenURI::Meta.
618
# The argument `options' is same as OpenURI::OpenRead#open.
620
self.open(options) {|f|
631
# returns a proxy URI.
632
# The proxy URI is obtained from environment variables such as http_proxy,
633
# ftp_proxy, no_proxy, etc.
634
# If there is no proper proxy, nil is returned.
636
# Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.)
639
# But http_proxy and HTTP_PROXY is treated specially under CGI environment.
640
# It's because HTTP_PROXY may be set by Proxy: header.
641
# So HTTP_PROXY is not used.
642
# http_proxy is not used too if the variable is case insensitive.
643
# CGI_HTTP_PROXY can be used instead.
645
name = self.scheme.downcase + '_proxy'
647
if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI?
648
# HTTP_PROXY conflicts with *_proxy for proxy settings and
649
# HTTP_* for header information in CGI.
650
# So it should be careful to use it.
651
pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
653
when 0 # no proxy setting anyway.
657
if k == 'http_proxy' && ENV[k.upcase] == nil
658
# http_proxy is safe to use because ENV is case sensitive.
659
proxy_uri = ENV[name]
663
else # http_proxy is safe to use because ENV is case sensitive.
664
proxy_uri = ENV[name]
667
# Use CGI_HTTP_PROXY. cf. libwww-perl.
668
proxy_uri = ENV["CGI_#{name.upcase}"]
670
elsif name == 'http_proxy'
671
unless proxy_uri = ENV[name]
672
if proxy_uri = ENV[name.upcase]
673
warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.'
677
proxy_uri = ENV[name] || ENV[name.upcase]
680
if proxy_uri && self.host
683
addr = IPSocket.getaddress(self.host)
684
proxy_uri = nil if /\A127\.|\A::1\z/ =~ addr
690
proxy_uri = URI.parse(proxy_uri)
692
if no_proxy = ENV[name] || ENV[name.upcase]
693
no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port|
694
if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host &&
695
(!port || self.port == port.to_i)
709
def buffer_open(buf, proxy, options) # :nodoc:
710
OpenURI.open_http(buf, self, proxy, options)
713
include OpenURI::OpenRead
717
def buffer_open(buf, proxy, options) # :nodoc:
719
OpenURI.open_http(buf, self, proxy, options)
724
directories = self.path.split(%r{/}, -1)
725
directories.shift if directories[0] == '' # strip a field before leading slash
726
directories.each {|d|
727
d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
729
unless filename = directories.pop
730
raise ArgumentError, "no filename: #{self.inspect}"
732
directories.each {|d|
734
raise ArgumentError, "invalid directory: #{d.inspect}"
737
if /[\r\n]/ =~ filename
738
raise ArgumentError, "invalid filename: #{filename.inspect}"
740
typecode = self.typecode
741
if typecode && /\A[aid]\z/ !~ typecode
742
raise ArgumentError, "invalid typecode: #{typecode.inspect}"
745
# The access sequence is defined by RFC 1738
746
ftp = Net::FTP.open(self.host)
747
# todo: extract user/passwd from .netrc.
750
user, passwd = self.userinfo.split(/:/) if self.userinfo
751
ftp.login(user, passwd)
752
directories.each {|cwd|
753
ftp.voidcmd("CWD #{cwd}")
756
# xxx: typecode D is not handled.
757
ftp.voidcmd("TYPE #{typecode.upcase}")
759
if options[:content_length_proc]
760
options[:content_length_proc].call(ftp.size(filename))
762
ftp.retrbinary("RETR #{filename}", 4096) { |str|
764
options[:progress_proc].call(buf.size) if options[:progress_proc]
770
include OpenURI::OpenRead