From 32b11ef1e2b895caf653bd05994731cb91efdcfd Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 16 Sep 2014 00:44:38 +0200 Subject: [PATCH] self-close certain XML/HTML elements. When an XML element has no child nodes a self-closing tag is used. When parsing documents/elements in HTML mode this is only done if the element is a so called "void element" (e.g. tags). This fixes #46. --- lib/oga.rb | 1 + lib/oga/xml/element.rb | 30 +++++++++++++++-- lib/oga/xml/html_void_elements.rb | 28 ++++++++++++++++ lib/oga/xml/lexer.rb | 25 -------------- spec/oga/xml/element_spec.rb | 55 +++++++++++++++++++++++++++---- 5 files changed, 106 insertions(+), 33 deletions(-) create mode 100644 lib/oga/xml/html_void_elements.rb diff --git a/lib/oga.rb b/lib/oga.rb index 56b8e32..c0d9776 100644 --- a/lib/oga.rb +++ b/lib/oga.rb @@ -18,6 +18,7 @@ if RUBY_PLATFORM == 'java' end #:nocov: +require_relative 'oga/xml/html_void_elements' require_relative 'oga/xml/querying' require_relative 'oga/xml/traversal' require_relative 'oga/xml/node' diff --git a/lib/oga/xml/element.rb b/lib/oga/xml/element.rb index 9b04f01..2600e67 100644 --- a/lib/oga/xml/element.rb +++ b/lib/oga/xml/element.rb @@ -211,7 +211,12 @@ module Oga # @return [String] # def to_xml - ns = namespace_name ? "#{namespace_name}:" : '' + if namespace_name + full_name = "#{namespace_name}:#{name}" + else + full_name = name + end + body = children.map(&:to_xml).join('') attrs = '' @@ -219,7 +224,11 @@ module Oga attrs << " #{attr.to_xml}" end - return "<#{ns}#{name}#{attrs}>#{body}" + if self_closing? + return "<#{full_name}#{attrs} />" + else + return "<#{full_name}#{attrs}>#{body}" + end end ## @@ -278,6 +287,23 @@ module Oga return merged end + ## + # Returns `true` if the element is a self-closing element. + # + # @return [TrueClass|FalseClass] + # + def self_closing? + self_closing = children.empty? + root = root_node + + if root.is_a?(Document) and root.type == :html \ + and !HTML_VOID_ELEMENTS.include?(name) + self_closing = false + end + + return self_closing + end + private ## diff --git a/lib/oga/xml/html_void_elements.rb b/lib/oga/xml/html_void_elements.rb new file mode 100644 index 0000000..4196407 --- /dev/null +++ b/lib/oga/xml/html_void_elements.rb @@ -0,0 +1,28 @@ +module Oga + module XML + ## + # Names of the HTML void elements that should be handled when HTML lexing + # is enabled. + # + # @return [Set] + # + HTML_VOID_ELEMENTS = Set.new([ + 'area', + 'base', + 'br', + 'col', + 'command', + 'embed', + 'hr', + 'img', + 'input', + 'keygen', + 'link', + 'meta', + 'param', + 'source', + 'track', + 'wbr' + ]) + end # XML +end # Oga diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index 8021bdc..3030be3 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -40,31 +40,6 @@ module Oga class Lexer attr_reader :html - ## - # Names of the HTML void elements that should be handled when HTML lexing - # is enabled. - # - # @return [Set] - # - HTML_VOID_ELEMENTS = Set.new([ - 'area', - 'base', - 'br', - 'col', - 'command', - 'embed', - 'hr', - 'img', - 'input', - 'keygen', - 'link', - 'meta', - 'param', - 'source', - 'track', - 'wbr' - ]) - ## # @param [String|IO] data The data to lex. This can either be a String or # an IO instance. diff --git a/spec/oga/xml/element_spec.rb b/spec/oga/xml/element_spec.rb index c66fa8c..828255d 100644 --- a/spec/oga/xml/element_spec.rb +++ b/spec/oga/xml/element_spec.rb @@ -267,17 +267,18 @@ describe Oga::XML::Element do context '#to_xml' do example 'generate the corresponding XML' do - described_class.new(:name => 'p').to_xml.should == '

' + described_class.new(:name => 'p').to_xml.should == '

' end example 'include the namespace if present' do instance = described_class.new( :name => 'p', :namespace_name => 'foo', - :namespaces => {'foo' => Oga::XML::Namespace.new(:name => 'foo')} + :namespaces => {'foo' => Oga::XML::Namespace.new(:name => 'foo')}, + :children => [Oga::XML::Text.new(:text => 'Foo')] ) - instance.to_xml.should == '' + instance.to_xml.should == 'Foo' end example 'include a single attribute if present' do @@ -288,7 +289,7 @@ describe Oga::XML::Element do ] ) - instance.to_xml.should == '

' + instance.to_xml.should == '

' end example 'include multiple attributes if present' do @@ -300,7 +301,7 @@ describe Oga::XML::Element do ] ) - instance.to_xml.should == '

' + instance.to_xml.should == '

' end example 'include the child nodes if present' do @@ -319,7 +320,21 @@ describe Oga::XML::Element do :namespaces => {'xmlns' => namespace} ) - instance.to_xml.should == '' + instance.to_xml.should == '' + end + + example 'generate the XML for the HTML ' + end + + example 'generate the XML for the HTML element' do + element = described_class.new(:name => 'link') + document = Oga::XML::Document.new(:type => :html, :children => [element]) + + element.to_xml.should == '' end end @@ -411,4 +426,32 @@ describe Oga::XML::Element do @parent_ns['baz'].uri.should == 'yyy' end end + + context '#self_closing?' do + example 'return true for an empty XML element' do + described_class.new(:name => 'foo').should be_self_closing + end + + example 'return false for a non empty XML element' do + text = Oga::XML::Text.new(:text => 'bar') + node = described_class.new(:name => 'foo', :children => [text]) + + node.should_not be_self_closing + end + + example 'return true for an HTML void element' do + element = described_class.new(:name => 'link') + document = Oga::XML::Document.new(:type => :html, :children => [element]) + + element.should be_self_closing + end + + example 'return false for a non empty HTML element' do + text = Oga::XML::Text.new(:text => 'alert()') + element = described_class.new(:name => 'script', :children => [text]) + document = Oga::XML::Document.new(:type => :html, :children => [element]) + + element.should_not be_self_closing + end + end end