diff options
Diffstat (limited to 'lib/rexml/entity.rb')
-rw-r--r-- | lib/rexml/entity.rb | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/lib/rexml/entity.rb b/lib/rexml/entity.rb new file mode 100644 index 0000000..3d81fbc --- /dev/null +++ b/lib/rexml/entity.rb @@ -0,0 +1,166 @@ +require 'rexml/child' +require 'rexml/source' +require 'rexml/xmltokens' + +module REXML + # God, I hate DTDs. I really do. Why this idiot standard still + # plagues us is beyond me. + class Entity < Child + include XMLTokens + PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#" + SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))} + PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')} + EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))" + NDATADECL = "\\s+NDATA\\s+#{NAME}" + PEREFERENCE = "%#{NAME};" + ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))} + PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})" + ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))" + PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" + GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" + ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um + + attr_reader :name, :external, :ref, :ndata, :pubid + + # Create a new entity. Simple entities can be constructed by passing a + # name, value to the constructor; this creates a generic, plain entity + # reference. For anything more complicated, you have to pass a Source to + # the constructor with the entity definition, or use the accessor methods. + # +WARNING+: There is no validation of entity state except when the entity + # is read from a stream. If you start poking around with the accessors, + # you can easily create a non-conformant Entity. The best thing to do is + # dump the stupid DTDs and use XMLSchema instead. + # + # e = Entity.new( 'amp', '&' ) + def initialize stream, value=nil, parent=nil, reference=false + super(parent) + @ndata = @pubid = @value = @external = nil + if stream.kind_of? Array + @name = stream[1] + if stream[-1] == '%' + @reference = true + stream.pop + else + @reference = false + end + if stream[2] =~ /SYSTEM|PUBLIC/ + @external = stream[2] + if @external == 'SYSTEM' + @ref = stream[3] + @ndata = stream[4] if stream.size == 5 + else + @pubid = stream[3] + @ref = stream[4] + end + else + @value = stream[2] + end + else + @reference = reference + @external = nil + @name = stream + @value = value + end + end + + # Evaluates whether the given string matchs an entity definition, + # returning true if so, and false otherwise. + def Entity::matches? string + (ENTITYDECL =~ string) == 0 + end + + # Evaluates to the unnormalized value of this entity; that is, replacing + # all entities -- both %ent; and &ent; entities. This differs from + # +value()+ in that +value+ only replaces %ent; entities. + def unnormalized + document.record_entity_expansion unless document.nil? + v = value() + return nil if v.nil? + @unnormalized = Text::unnormalize(v, parent) + @unnormalized + end + + #once :unnormalized + + # Returns the value of this entity unprocessed -- raw. This is the + # normalized value; that is, with all %ent; and &ent; entities intact + def normalized + @value + end + + # Write out a fully formed, correct entity definition (assuming the Entity + # object itself is valid.) + # + # out:: + # An object implementing <TT><<<TT> to which the entity will be + # output + # indent:: + # *DEPRECATED* and ignored + def write out, indent=-1 + out << '<!ENTITY ' + out << '% ' if @reference + out << @name + out << ' ' + if @external + out << @external << ' ' + if @pubid + q = @pubid.include?('"')?"'":'"' + out << q << @pubid << q << ' ' + end + q = @ref.include?('"')?"'":'"' + out << q << @ref << q + out << ' NDATA ' << @ndata if @ndata + else + q = @value.include?('"')?"'":'"' + out << q << @value << q + end + out << '>' + end + + # Returns this entity as a string. See write(). + def to_s + rv = '' + write rv + rv + end + + PEREFERENCE_RE = /#{PEREFERENCE}/um + # Returns the value of this entity. At the moment, only internal entities + # are processed. If the value contains internal references (IE, + # %blah;), those are replaced with their values. IE, if the doctype + # contains: + # <!ENTITY % foo "bar"> + # <!ENTITY yada "nanoo %foo; nanoo> + # then: + # doctype.entity('yada').value #-> "nanoo bar nanoo" + def value + if @value + matches = @value.scan(PEREFERENCE_RE) + rv = @value.clone + if @parent + matches.each do |entity_reference| + entity_value = @parent.entity( entity_reference[0] ) + rv.gsub!( /%#{entity_reference.join};/um, entity_value ) + end + end + return rv + end + nil + end + end + + # This is a set of entity constants -- the ones defined in the XML + # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+. + module EntityConst + # +>+ + GT = Entity.new( 'gt', '>' ) + # +<+ + LT = Entity.new( 'lt', '<' ) + # +&+ + AMP = Entity.new( 'amp', '&' ) + # +"+ + QUOT = Entity.new( 'quot', '"' ) + # +'+ + APOS = Entity.new( 'apos', "'" ) + end +end |