6
# This class is a wrapper around File IO and Encoding that helps RDoc load
7
# files and convert them to the correct encoding.
12
# Reads the contents of +filename+ and handles any encoding directives in
15
# The content will be converted to the +encoding+. If the file cannot be
16
# converted a warning will be printed and nil will be returned.
18
# If +force_transcode+ is true the document will be transcoded and any
19
# unknown character in the target encoding will be replaced with '?'
21
def self.read_file filename, encoding, force_transcode = false
22
content = open filename, "rb" do |f| f.read end
24
utf8 = content.sub!(/\A\xef\xbb\xbf/, '')
26
RDoc::Encoding.set_encoding content
28
if Object.const_defined? :Encoding then
29
encoding ||= Encoding.default_external
30
orig_encoding = content.encoding
33
content.force_encoding Encoding::UTF_8
34
content.encode! encoding
36
# assume the content is in our output encoding
37
content.force_encoding encoding
40
unless content.valid_encoding? then
41
# revert and try to transcode
42
content.force_encoding orig_encoding
43
content.encode! encoding
46
unless content.valid_encoding? then
47
warn "unable to convert #{filename} to #{encoding}, skipping"
53
rescue ArgumentError => e
54
raise unless e.message =~ /unknown encoding name - (.*)/
55
warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
57
rescue Encoding::UndefinedConversionError => e
58
if force_transcode then
59
content.force_encoding orig_encoding
60
content.encode! encoding, :undef => :replace, :replace => '?'
63
warn "unable to convert #{e.message} for #{filename}, skipping"
66
rescue Errno::EISDIR, Errno::ENOENT
71
# Sets the encoding of +string+ based on the magic comment
73
def self.set_encoding string
74
first_line = string[/\A(?:#!.*\n)?.*\n/]
76
name = case first_line
77
when /^<\?xml[^?]*encoding=(["'])(.*?)\1/ then $2
78
when /\b(?:en)?coding[=:]\s*([^\s;]+)/i then $1
82
string.sub! first_line, ''
84
return unless Object.const_defined? :Encoding
86
enc = Encoding.find name
87
string.force_encoding enc if enc