self-close certain XML/HTML elements.

When an XML element has no child nodes a self-closing tag is used. When parsing
documents/elements in HTML mode this is only done if the element is a so called
"void element" (e.g. <link> tags).

This fixes #46.
This commit is contained in:
Yorick Peterse 2014-09-16 00:44:38 +02:00
parent 6fc7e2e254
commit 32b11ef1e2
5 changed files with 106 additions and 33 deletions

View File

@ -18,6 +18,7 @@ if RUBY_PLATFORM == 'java'
end end
#:nocov: #:nocov:
require_relative 'oga/xml/html_void_elements'
require_relative 'oga/xml/querying' require_relative 'oga/xml/querying'
require_relative 'oga/xml/traversal' require_relative 'oga/xml/traversal'
require_relative 'oga/xml/node' require_relative 'oga/xml/node'

View File

@ -211,7 +211,12 @@ module Oga
# @return [String] # @return [String]
# #
def to_xml def to_xml
ns = namespace_name ? "#{namespace_name}:" : '' if namespace_name
full_name = "#{namespace_name}:#{name}"
else
full_name = name
end
body = children.map(&:to_xml).join('') body = children.map(&:to_xml).join('')
attrs = '' attrs = ''
@ -219,7 +224,11 @@ module Oga
attrs << " #{attr.to_xml}" attrs << " #{attr.to_xml}"
end end
return "<#{ns}#{name}#{attrs}>#{body}</#{ns}#{name}>" if self_closing?
return "<#{full_name}#{attrs} />"
else
return "<#{full_name}#{attrs}>#{body}</#{full_name}>"
end
end end
## ##
@ -278,6 +287,23 @@ module Oga
return merged return merged
end end
##
# Returns `true` if the element is a self-closing element.
#
# @return [TrueClass|FalseClass]
#
def self_closing?
self_closing = children.empty?
root = root_node
if root.is_a?(Document) and root.type == :html \
and !HTML_VOID_ELEMENTS.include?(name)
self_closing = false
end
return self_closing
end
private private
## ##

View File

@ -0,0 +1,28 @@
module Oga
module XML
##
# Names of the HTML void elements that should be handled when HTML lexing
# is enabled.
#
# @return [Set]
#
HTML_VOID_ELEMENTS = Set.new([
'area',
'base',
'br',
'col',
'command',
'embed',
'hr',
'img',
'input',
'keygen',
'link',
'meta',
'param',
'source',
'track',
'wbr'
])
end # XML
end # Oga

View File

@ -40,31 +40,6 @@ module Oga
class Lexer class Lexer
attr_reader :html attr_reader :html
##
# Names of the HTML void elements that should be handled when HTML lexing
# is enabled.
#
# @return [Set]
#
HTML_VOID_ELEMENTS = Set.new([
'area',
'base',
'br',
'col',
'command',
'embed',
'hr',
'img',
'input',
'keygen',
'link',
'meta',
'param',
'source',
'track',
'wbr'
])
## ##
# @param [String|IO] data The data to lex. This can either be a String or # @param [String|IO] data The data to lex. This can either be a String or
# an IO instance. # an IO instance.

View File

@ -267,17 +267,18 @@ describe Oga::XML::Element do
context '#to_xml' do context '#to_xml' do
example 'generate the corresponding XML' do example 'generate the corresponding XML' do
described_class.new(:name => 'p').to_xml.should == '<p></p>' described_class.new(:name => 'p').to_xml.should == '<p />'
end end
example 'include the namespace if present' do example 'include the namespace if present' do
instance = described_class.new( instance = described_class.new(
:name => 'p', :name => 'p',
:namespace_name => 'foo', :namespace_name => 'foo',
:namespaces => {'foo' => Oga::XML::Namespace.new(:name => 'foo')} :namespaces => {'foo' => Oga::XML::Namespace.new(:name => 'foo')},
:children => [Oga::XML::Text.new(:text => 'Foo')]
) )
instance.to_xml.should == '<foo:p></foo:p>' instance.to_xml.should == '<foo:p>Foo</foo:p>'
end end
example 'include a single attribute if present' do example 'include a single attribute if present' do
@ -288,7 +289,7 @@ describe Oga::XML::Element do
] ]
) )
instance.to_xml.should == '<p key="value"></p>' instance.to_xml.should == '<p key="value" />'
end end
example 'include multiple attributes if present' do example 'include multiple attributes if present' do
@ -300,7 +301,7 @@ describe Oga::XML::Element do
] ]
) )
instance.to_xml.should == '<p key1="value1" key2="value2"></p>' instance.to_xml.should == '<p key1="value1" key2="value2" />'
end end
example 'include the child nodes if present' do example 'include the child nodes if present' do
@ -319,7 +320,21 @@ describe Oga::XML::Element do
:namespaces => {'xmlns' => namespace} :namespaces => {'xmlns' => namespace}
) )
instance.to_xml.should == '<foo></foo>' instance.to_xml.should == '<foo />'
end
example 'generate the XML for the HTML <script> element' do
element = described_class.new(:name => 'script')
document = Oga::XML::Document.new(:type => :html, :children => [element])
element.to_xml.should == '<script></script>'
end
example 'generate the XML for the HTML <link> element' do
element = described_class.new(:name => 'link')
document = Oga::XML::Document.new(:type => :html, :children => [element])
element.to_xml.should == '<link />'
end end
end end
@ -411,4 +426,32 @@ describe Oga::XML::Element do
@parent_ns['baz'].uri.should == 'yyy' @parent_ns['baz'].uri.should == 'yyy'
end end
end end
context '#self_closing?' do
example 'return true for an empty XML element' do
described_class.new(:name => 'foo').should be_self_closing
end
example 'return false for a non empty XML element' do
text = Oga::XML::Text.new(:text => 'bar')
node = described_class.new(:name => 'foo', :children => [text])
node.should_not be_self_closing
end
example 'return true for an HTML void element' do
element = described_class.new(:name => 'link')
document = Oga::XML::Document.new(:type => :html, :children => [element])
element.should be_self_closing
end
example 'return false for a non empty HTML element' do
text = Oga::XML::Text.new(:text => 'alert()')
element = described_class.new(:name => 'script', :children => [text])
document = Oga::XML::Document.new(:type => :html, :children => [element])
element.should_not be_self_closing
end
end
end end