Generating closing element & Doctype XML

This commit fixes two problems:

1. Doctypes introducing too many newlines
2. Elements with siblings and a common parent not being closed properly

== Doctypes

When generating the XML for a doctype the XML::Generator class would
append a trailing newline. This however meant that if the next text node
was also a newline you'd now have two newlines. In previous versions of
Oga this worked because the old XML generation code would call
String#strip on the XML to add after the doctype.

To support this in the new version we perform a lookahead in
XML::Generator#on_doctype to remove any trailing newlines added by this
method in case the first child node is a newline text node.

== Closing Elements

When an element has a sibling following it _and_ does not have any child
nodes it would not be closed properly when generating XML. This is due
to the "until next_node = ..." expression evaluating to true, thus never
executing its body.

There's probably some way to work around this by using the "loop"
method, but considering it's 02:09 I think the current approach is good
enough. Future me will probably hate me for it.
This commit is contained in:
Yorick Peterse 2016-09-27 02:03:56 +02:00
parent 01fa1513f4
commit e0e0687dc2
No known key found for this signature in database
GPG Key ID: EDD30D2BEB691AC9
3 changed files with 69 additions and 2 deletions

View File

@ -17,8 +17,8 @@ module Oga
def initialize(root) def initialize(root)
@start = root @start = root
if @start.respond_to?(:root_node) if @start.respond_to?(:html?)
@html_mode = @start.root_node.html? @html_mode = @start.html?
else else
@html_mode = false @html_mode = false
end end
@ -75,6 +75,10 @@ module Oga
break break
else else
# Make sure to always close the current element before
# moving to any siblings.
after_element(current, output) if current.is_a?(Element)
until next_node = current.is_a?(Node) && current.next until next_node = current.is_a?(Node) && current.next
if current.is_a?(Node) && current != @start if current.is_a?(Node) && current != @start
current = current.parent current = current.parent
@ -177,6 +181,14 @@ module Oga
on_doctype(doc.doctype, output) on_doctype(doc.doctype, output)
output << "\n" output << "\n"
end end
first_child = doc.children[0]
# Prevent excessive newlines in case the next node is a newline text
# node.
if first_child.is_a?(Text) && first_child.text.start_with?("\r\n", "\n")
output.chomp!
end
end end
# @param [Oga::XML::XmlDeclaration] node # @param [Oga::XML::XmlDeclaration] node

View File

@ -464,6 +464,13 @@ describe Oga::XML::Element do
element.to_xml.should == '<link />' element.to_xml.should == '<link />'
end end
it 'generates the XML for an empty explicitly closed HTML element' do
element = described_class.new(:name => 'html')
document = Oga::XML::Document.new(:type => :html, :children => [element])
element.to_xml.should == '<html></html>'
end
end end
describe '#inspect' do describe '#inspect' do

View File

@ -29,6 +29,27 @@ describe Oga::XML::Generator do
end end
end end
describe 'using an HTML Document as the root node' do
it 'returns a String' do
element = Oga::XML::Element.new(name: 'foo')
doc = Oga::XML::Document.new(children: [element], type: :html)
output = described_class.new(doc).to_xml
output.should == '<foo></foo>'
end
end
describe 'using an HTML Document as the root node with nested elements' do
it 'returns a String' do
el2 = Oga::XML::Element.new(name: 'bar')
el1 = Oga::XML::Element.new(name: 'foo', children: [el2])
doc = Oga::XML::Document.new(children: [el1], type: :html)
output = described_class.new(doc).to_xml
output.should == '<foo><bar></bar></foo>'
end
end
describe 'using Element nodes with siblings' do describe 'using Element nodes with siblings' do
it 'returns a String' do it 'returns a String' do
root = Oga::XML::Element.new( root = Oga::XML::Element.new(
@ -78,5 +99,32 @@ describe Oga::XML::Generator do
described_class.new(element2).to_xml.should == '<b />' described_class.new(element2).to_xml.should == '<b />'
end end
end end
describe 'using a parsed HTML document' do
it 'returns a String with the same formatting as the input document' do
input = <<-EOF
<!DOCTYPE html>
<html>
<head>
<title>Hello</title>
<meta charset="utf-8" />
</head>
<body>
<p>Hello</p>
<ul>
<li>Hello</li>
<li></li>
Hello
</ul>
<div></div></body>
</html>
EOF
doc = Oga.parse_html(input)
output = described_class.new(doc)
output.to_xml.should == input
end
end
end end
end end