3
require 'rexml/xmltokens'
6
# God, I hate DTDs. I really do. Why this idiot standard still
7
# plagues us is beyond me.
10
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
11
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
12
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
13
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
14
NDATADECL = "\\s+NDATA\\s+#{NAME}"
15
PEREFERENCE = "%#{NAME};"
16
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
17
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
18
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
19
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
20
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
21
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
23
attr_reader :name, :external, :ref, :ndata, :pubid
25
# Create a new entity. Simple entities can be constructed by passing a
26
# name, value to the constructor; this creates a generic, plain entity
27
# reference. For anything more complicated, you have to pass a Source to
28
# the constructor with the entity definiton, or use the accessor methods.
29
# +WARNING+: There is no validation of entity state except when the entity
30
# is read from a stream. If you start poking around with the accessors,
31
# you can easily create a non-conformant Entity. The best thing to do is
32
# dump the stupid DTDs and use XMLSchema instead.
34
# e = Entity.new( 'amp', '&' )
35
def initialize stream, value=nil, parent=nil, reference=false
37
@ndata = @pubid = @value = @external = nil
38
if stream.kind_of? Array
46
if stream[2] =~ /SYSTEM|PUBLIC/
48
if @external == 'SYSTEM'
50
@ndata = stream[4] if stream.size == 5
59
@reference = reference
66
# Evaluates whether the given string matchs an entity definition,
67
# returning true if so, and false otherwise.
68
def Entity::matches? string
69
(ENTITYDECL =~ string) == 0
72
# Evaluates to the unnormalized value of this entity; that is, replacing
73
# all entities -- both %ent; and &ent; entities. This differs from
74
# +value()+ in that +value+ only replaces %ent; entities.
76
document.record_entity_expansion unless document.nil?
79
@unnormalized = Text::unnormalize(v, parent)
85
# Returns the value of this entity unprocessed -- raw. This is the
86
# normalized value; that is, with all %ent; and &ent; entities intact
91
# Write out a fully formed, correct entity definition (assuming the Entity
92
# object itself is valid.)
95
# An object implementing <TT><<<TT> to which the entity will be
98
# *DEPRECATED* and ignored
99
def write out, indent=-1
101
out << '% ' if @reference
105
out << @external << ' '
107
q = @pubid.include?('"')?"'":'"'
108
out << q << @pubid << q << ' '
110
q = @ref.include?('"')?"'":'"'
111
out << q << @ref << q
112
out << ' NDATA ' << @ndata if @ndata
114
q = @value.include?('"')?"'":'"'
115
out << q << @value << q
120
# Returns this entity as a string. See write().
127
PEREFERENCE_RE = /#{PEREFERENCE}/um
128
# Returns the value of this entity. At the moment, only internal entities
129
# are processed. If the value contains internal references (IE,
130
# %blah;), those are replaced with their values. IE, if the doctype
132
# <!ENTITY % foo "bar">
133
# <!ENTITY yada "nanoo %foo; nanoo>
135
# doctype.entity('yada').value #-> "nanoo bar nanoo"
138
matches = @value.scan(PEREFERENCE_RE)
141
matches.each do |entity_reference|
142
entity_value = @parent.entity( entity_reference[0] )
143
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
152
# This is a set of entity constants -- the ones defined in the XML
153
# specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
156
GT = Entity.new( 'gt', '>' )
158
LT = Entity.new( 'lt', '<' )
160
AMP = Entity.new( 'amp', '&' )
162
QUOT = Entity.new( 'quot', '"' )
164
APOS = Entity.new( 'apos', "'" )