diff --git a/lib/oga.rb b/lib/oga.rb
index 5c18861..e0804f3 100644
--- a/lib/oga.rb
+++ b/lib/oga.rb
@@ -21,6 +21,7 @@ end
#:nocov:
require_relative 'oga/xml/html_void_elements'
+require_relative 'oga/xml/entities'
require_relative 'oga/xml/querying'
require_relative 'oga/xml/traversal'
require_relative 'oga/xml/node'
diff --git a/lib/oga/xml/attribute.rb b/lib/oga/xml/attribute.rb
index cbe2821..d291c4f 100644
--- a/lib/oga/xml/attribute.rb
+++ b/lib/oga/xml/attribute.rb
@@ -87,7 +87,9 @@ module Oga
full_name = name
end
- return %Q(#{full_name}="#{value}")
+ enc_value = value ? Entities.encode(value) : nil
+
+ return %Q(#{full_name}="#{enc_value}")
end
##
diff --git a/lib/oga/xml/entities.rb b/lib/oga/xml/entities.rb
new file mode 100644
index 0000000..c054705
--- /dev/null
+++ b/lib/oga/xml/entities.rb
@@ -0,0 +1,56 @@
+module Oga
+ module XML
+ module Entities
+ ##
+ # Hash containing XML entities and the corresponding characters.
+ #
+ # The `&` mapping must come first to ensure proper conversion of non
+ # encoded to encoded forms (see {Oga::XML::Text#to_xml}).
+ #
+ # @return [Hash]
+ #
+ DECODE_MAPPING = {
+ '&' => '&',
+ '<' => '<',
+ '>' => '>'
+ }
+
+ ##
+ # Hash containing characters and the corresponding XML entities.
+ #
+ # @return [Hash]
+ #
+ ENCODE_MAPPING = DECODE_MAPPING.invert
+
+ ##
+ # Decodes XML entities.
+ #
+ # @param [String] input
+ # @return [String]
+ #
+ def self.decode(input)
+ if input.include?('&')
+ DECODE_MAPPING.each do |find, replace|
+ input = input.gsub(find, replace)
+ end
+ end
+
+ return input
+ end
+
+ ##
+ # Encodes special characters as XML entities.
+ #
+ # @param [String] input
+ # @return [String]
+ #
+ def self.encode(input)
+ ENCODE_MAPPING.each do |from, to|
+ input = input.gsub(from, to) if input.include?(from)
+ end
+
+ return input
+ end
+ end # Entities
+ end # XML
+end # Oga
diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb
index be91028..1fd396d 100644
--- a/lib/oga/xml/lexer.rb
+++ b/lib/oga/xml/lexer.rb
@@ -194,7 +194,7 @@ module Oga
# @param [String] value The data between the quotes.
#
def on_string(value)
- add_token(:T_STRING, value)
+ add_token(:T_STRING, Entities.decode(value))
end
##
@@ -348,7 +348,9 @@ module Oga
# @param [String] value
#
def on_text(value)
- add_token(:T_TEXT, value) unless value.empty?
+ return if value.empty?
+
+ add_token(:T_TEXT, Entities.decode(value))
end
##
diff --git a/lib/oga/xml/text.rb b/lib/oga/xml/text.rb
index b7d4dcf..86bc4b5 100644
--- a/lib/oga/xml/text.rb
+++ b/lib/oga/xml/text.rb
@@ -5,7 +5,12 @@ module Oga
# have any children, attributes and the likes; just text.
#
class Text < CharacterNode
-
+ ##
+ # @see [Oga::XML::CharacterNode#to_xml]
+ #
+ def to_xml
+ return Entities.encode(super)
+ end
end # Text
end # XML
end # Oga
diff --git a/spec/oga/xml/attribute_spec.rb b/spec/oga/xml/attribute_spec.rb
index 23e1e4f..f15de0b 100644
--- a/spec/oga/xml/attribute_spec.rb
+++ b/spec/oga/xml/attribute_spec.rb
@@ -78,6 +78,12 @@ describe Oga::XML::Attribute do
attr.to_xml.should == 'xmlns:class=""'
end
+
+ example 'convert special characters to XML entities' do
+ attr = described_class.new(:name => 'href', :value => '&<>')
+
+ attr.to_xml.should == 'href="&<>"'
+ end
end
context '#inspect' do
diff --git a/spec/oga/xml/entities_spec.rb b/spec/oga/xml/entities_spec.rb
new file mode 100644
index 0000000..89e2de0
--- /dev/null
+++ b/spec/oga/xml/entities_spec.rb
@@ -0,0 +1,31 @@
+require 'spec_helper'
+
+describe Oga::XML::Entities do
+ context 'decode' do
+ example 'decode & into &' do
+ described_class.decode('&').should == '&'
+ end
+
+ example 'decode < into <' do
+ described_class.decode('<').should == '<'
+ end
+
+ example 'decode > into >' do
+ described_class.decode('>').should == '>'
+ end
+ end
+
+ context 'encode' do
+ example 'encode & as &' do
+ described_class.encode('&').should == '&'
+ end
+
+ example 'encode < as <' do
+ described_class.encode('<').should == '<'
+ end
+
+ example 'encode > as >' do
+ described_class.encode('>').should == '>'
+ end
+ end
+end
diff --git a/spec/oga/xml/lexer/entities_spec.rb b/spec/oga/xml/lexer/entities_spec.rb
new file mode 100644
index 0000000..e5ba251
--- /dev/null
+++ b/spec/oga/xml/lexer/entities_spec.rb
@@ -0,0 +1,49 @@
+require 'spec_helper'
+
+describe Oga::XML::Lexer do
+ context 'converting XML entities in text tokens' do
+ example 'convert & into &' do
+ lex('&').should == [[:T_TEXT, '&', 1]]
+ end
+
+ example 'convert < into <' do
+ lex('<').should == [[:T_TEXT, '<', 1]]
+ end
+
+ example 'convert > into >' do
+ lex('>').should == [[:T_TEXT, '>', 1]]
+ end
+ end
+
+ context 'converting XML entities in string tokens' do
+ example 'convert & into &' do
+ lex('').should == [
+ [:T_ELEM_START, nil, 1],
+ [:T_ELEM_NAME, 'foo', 1],
+ [:T_ATTR, 'class', 1],
+ [:T_STRING, '&', 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+
+ example 'convert < into <' do
+ lex('').should == [
+ [:T_ELEM_START, nil, 1],
+ [:T_ELEM_NAME, 'foo', 1],
+ [:T_ATTR, 'class', 1],
+ [:T_STRING, '<', 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+
+ example 'convert > into >' do
+ lex('').should == [
+ [:T_ELEM_START, nil, 1],
+ [:T_ELEM_NAME, 'foo', 1],
+ [:T_ATTR, 'class', 1],
+ [:T_STRING, '>', 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+ end
+end
diff --git a/spec/oga/xml/text_spec.rb b/spec/oga/xml/text_spec.rb
index 6fd2728..0c5ff07 100644
--- a/spec/oga/xml/text_spec.rb
+++ b/spec/oga/xml/text_spec.rb
@@ -15,12 +15,16 @@ describe Oga::XML::Text do
end
context '#to_xml' do
- before do
- @instance = described_class.new(:text => 'foo')
+ example 'generate the corresponding XML' do
+ node = described_class.new(:text => 'foo')
+
+ node.to_xml.should == 'foo'
end
- example 'generate the corresponding XML' do
- @instance.to_xml.should == 'foo'
+ example 'encode special characters as XML entities' do
+ node = described_class.new(:text => '&<>')
+
+ node.to_xml.should == '&<>'
end
end