diff --git a/lib/oga.rb b/lib/oga.rb index 5c18861..e0804f3 100644 --- a/lib/oga.rb +++ b/lib/oga.rb @@ -21,6 +21,7 @@ end #:nocov: require_relative 'oga/xml/html_void_elements' +require_relative 'oga/xml/entities' require_relative 'oga/xml/querying' require_relative 'oga/xml/traversal' require_relative 'oga/xml/node' diff --git a/lib/oga/xml/attribute.rb b/lib/oga/xml/attribute.rb index cbe2821..d291c4f 100644 --- a/lib/oga/xml/attribute.rb +++ b/lib/oga/xml/attribute.rb @@ -87,7 +87,9 @@ module Oga full_name = name end - return %Q(#{full_name}="#{value}") + enc_value = value ? Entities.encode(value) : nil + + return %Q(#{full_name}="#{enc_value}") end ## diff --git a/lib/oga/xml/entities.rb b/lib/oga/xml/entities.rb new file mode 100644 index 0000000..c054705 --- /dev/null +++ b/lib/oga/xml/entities.rb @@ -0,0 +1,56 @@ +module Oga + module XML + module Entities + ## + # Hash containing XML entities and the corresponding characters. + # + # The `&` mapping must come first to ensure proper conversion of non + # encoded to encoded forms (see {Oga::XML::Text#to_xml}). + # + # @return [Hash] + # + DECODE_MAPPING = { + '&' => '&', + '<' => '<', + '>' => '>' + } + + ## + # Hash containing characters and the corresponding XML entities. + # + # @return [Hash] + # + ENCODE_MAPPING = DECODE_MAPPING.invert + + ## + # Decodes XML entities. + # + # @param [String] input + # @return [String] + # + def self.decode(input) + if input.include?('&') + DECODE_MAPPING.each do |find, replace| + input = input.gsub(find, replace) + end + end + + return input + end + + ## + # Encodes special characters as XML entities. + # + # @param [String] input + # @return [String] + # + def self.encode(input) + ENCODE_MAPPING.each do |from, to| + input = input.gsub(from, to) if input.include?(from) + end + + return input + end + end # Entities + end # XML +end # Oga diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index be91028..1fd396d 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -194,7 +194,7 @@ module Oga # @param [String] value The data between the quotes. # def on_string(value) - add_token(:T_STRING, value) + add_token(:T_STRING, Entities.decode(value)) end ## @@ -348,7 +348,9 @@ module Oga # @param [String] value # def on_text(value) - add_token(:T_TEXT, value) unless value.empty? + return if value.empty? + + add_token(:T_TEXT, Entities.decode(value)) end ## diff --git a/lib/oga/xml/text.rb b/lib/oga/xml/text.rb index b7d4dcf..86bc4b5 100644 --- a/lib/oga/xml/text.rb +++ b/lib/oga/xml/text.rb @@ -5,7 +5,12 @@ module Oga # have any children, attributes and the likes; just text. # class Text < CharacterNode - + ## + # @see [Oga::XML::CharacterNode#to_xml] + # + def to_xml + return Entities.encode(super) + end end # Text end # XML end # Oga diff --git a/spec/oga/xml/attribute_spec.rb b/spec/oga/xml/attribute_spec.rb index 23e1e4f..f15de0b 100644 --- a/spec/oga/xml/attribute_spec.rb +++ b/spec/oga/xml/attribute_spec.rb @@ -78,6 +78,12 @@ describe Oga::XML::Attribute do attr.to_xml.should == 'xmlns:class=""' end + + example 'convert special characters to XML entities' do + attr = described_class.new(:name => 'href', :value => '&<>') + + attr.to_xml.should == 'href="&<>"' + end end context '#inspect' do diff --git a/spec/oga/xml/entities_spec.rb b/spec/oga/xml/entities_spec.rb new file mode 100644 index 0000000..89e2de0 --- /dev/null +++ b/spec/oga/xml/entities_spec.rb @@ -0,0 +1,31 @@ +require 'spec_helper' + +describe Oga::XML::Entities do + context 'decode' do + example 'decode & into &' do + described_class.decode('&').should == '&' + end + + example 'decode < into <' do + described_class.decode('<').should == '<' + end + + example 'decode > into >' do + described_class.decode('>').should == '>' + end + end + + context 'encode' do + example 'encode & as &' do + described_class.encode('&').should == '&' + end + + example 'encode < as <' do + described_class.encode('<').should == '<' + end + + example 'encode > as >' do + described_class.encode('>').should == '>' + end + end +end diff --git a/spec/oga/xml/lexer/entities_spec.rb b/spec/oga/xml/lexer/entities_spec.rb new file mode 100644 index 0000000..e5ba251 --- /dev/null +++ b/spec/oga/xml/lexer/entities_spec.rb @@ -0,0 +1,49 @@ +require 'spec_helper' + +describe Oga::XML::Lexer do + context 'converting XML entities in text tokens' do + example 'convert & into &' do + lex('&').should == [[:T_TEXT, '&', 1]] + end + + example 'convert < into <' do + lex('<').should == [[:T_TEXT, '<', 1]] + end + + example 'convert > into >' do + lex('>').should == [[:T_TEXT, '>', 1]] + end + end + + context 'converting XML entities in string tokens' do + example 'convert & into &' do + lex('').should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'foo', 1], + [:T_ATTR, 'class', 1], + [:T_STRING, '&', 1], + [:T_ELEM_END, nil, 1] + ] + end + + example 'convert < into <' do + lex('').should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'foo', 1], + [:T_ATTR, 'class', 1], + [:T_STRING, '<', 1], + [:T_ELEM_END, nil, 1] + ] + end + + example 'convert > into >' do + lex('').should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'foo', 1], + [:T_ATTR, 'class', 1], + [:T_STRING, '>', 1], + [:T_ELEM_END, nil, 1] + ] + end + end +end diff --git a/spec/oga/xml/text_spec.rb b/spec/oga/xml/text_spec.rb index 6fd2728..0c5ff07 100644 --- a/spec/oga/xml/text_spec.rb +++ b/spec/oga/xml/text_spec.rb @@ -15,12 +15,16 @@ describe Oga::XML::Text do end context '#to_xml' do - before do - @instance = described_class.new(:text => 'foo') + example 'generate the corresponding XML' do + node = described_class.new(:text => 'foo') + + node.to_xml.should == 'foo' end - example 'generate the corresponding XML' do - @instance.to_xml.should == 'foo' + example 'encode special characters as XML entities' do + node = described_class.new(:text => '&<>') + + node.to_xml.should == '&<>' end end