From 074b53c18c85eaeba09557f6b0c5a6792f522c3e Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 16 Jun 2015 22:47:10 +0200 Subject: [PATCH] Fix entity encoding of attribute values This ensures that single and double quotes are also encoded, previously they would be left as is. Fixes #113 --- lib/oga/xml/attribute.rb | 6 +++++- lib/oga/xml/entities.rb | 30 ++++++++++++++++++++++++++++++ spec/oga/xml/attribute_spec.rb | 6 +++--- spec/oga/xml/entities_spec.rb | 22 ++++++++++++++++++++++ 4 files changed, 60 insertions(+), 4 deletions(-) diff --git a/lib/oga/xml/attribute.rb b/lib/oga/xml/attribute.rb index 1aa87fe..370aa62 100644 --- a/lib/oga/xml/attribute.rb +++ b/lib/oga/xml/attribute.rb @@ -101,7 +101,11 @@ module Oga full_name = name end - enc_value = value ? Entities.encode(value) : nil + if value + enc_value = Entities.encode_attribute(value) + else + enc_value = nil + end %Q(#{full_name}="#{enc_value}") end diff --git a/lib/oga/xml/entities.rb b/lib/oga/xml/entities.rb index c2000ff..e75092e 100644 --- a/lib/oga/xml/entities.rb +++ b/lib/oga/xml/entities.rb @@ -32,6 +32,20 @@ module Oga '<' => '<', } + ## + # Hash containing characters and the corresponding XML entities to use + # when encoding XML/HTML attribute values. + # + # @return [Hash] + # + ENCODE_ATTRIBUTE_MAPPING = { + '&' => '&', + '>' => '>', + '<' => '<', + "'" => ''', + '"' => '"' + } + ## # @return [String] # @@ -56,6 +70,12 @@ module Oga # ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|')) + ## + # @return [Regexp] + # + ENCODE_ATTRIBUTE_REGEXP = + Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|')) + ## # Decodes XML entities. # @@ -87,6 +107,16 @@ module Oga def self.encode(input, mapping = ENCODE_MAPPING) input.gsub(ENCODE_REGEXP, mapping) end + + ## + # Encodes special characters in an XML attribute value. + # + # @param [String] input + # @return [String] + # + def self.encode_attribute(input) + input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING) + end end # Entities end # XML end # Oga diff --git a/spec/oga/xml/attribute_spec.rb b/spec/oga/xml/attribute_spec.rb index b4add2d..fbd9185 100644 --- a/spec/oga/xml/attribute_spec.rb +++ b/spec/oga/xml/attribute_spec.rb @@ -139,10 +139,10 @@ describe Oga::XML::Attribute do attr.to_xml.should == 'xmlns:class=""' end - it 'converts special characters to XML entities' do - attr = described_class.new(:name => 'href', :value => '&<>') + it 'decodes XML entities' do + attr = described_class.new(:name => 'href', :value => %q{&<>'"}) - attr.to_xml.should == 'href="&<>"' + attr.to_xml.should == 'href="&<>'""' end end diff --git a/spec/oga/xml/entities_spec.rb b/spec/oga/xml/entities_spec.rb index f3bdb39..62c0666 100644 --- a/spec/oga/xml/entities_spec.rb +++ b/spec/oga/xml/entities_spec.rb @@ -104,4 +104,26 @@ describe Oga::XML::Entities do described_class.encode('<').should == '&lt;' end end + + describe 'encode_attribute' do + it 'encodes & as &' do + described_class.encode_attribute('&').should == '&' + end + + it 'encodes > as >' do + described_class.encode_attribute('>').should == '>' + end + + it 'encodes < as >' do + described_class.encode_attribute('<').should == '<' + end + + it 'encodes a single quote as '' do + described_class.encode_attribute("'").should == ''' + end + + it 'encodes a double quote as "' do + described_class.encode_attribute('"').should == '"' + end + end end