Generating closing element & Doctype XML
This commit fixes two problems: 1. Doctypes introducing too many newlines 2. Elements with siblings and a common parent not being closed properly == Doctypes When generating the XML for a doctype the XML::Generator class would append a trailing newline. This however meant that if the next text node was also a newline you'd now have two newlines. In previous versions of Oga this worked because the old XML generation code would call String#strip on the XML to add after the doctype. To support this in the new version we perform a lookahead in XML::Generator#on_doctype to remove any trailing newlines added by this method in case the first child node is a newline text node. == Closing Elements When an element has a sibling following it _and_ does not have any child nodes it would not be closed properly when generating XML. This is due to the "until next_node = ..." expression evaluating to true, thus never executing its body. There's probably some way to work around this by using the "loop" method, but considering it's 02:09 I think the current approach is good enough. Future me will probably hate me for it.
This commit is contained in:
parent
01fa1513f4
commit
e0e0687dc2
|
@ -17,8 +17,8 @@ module Oga
|
|||
def initialize(root)
|
||||
@start = root
|
||||
|
||||
if @start.respond_to?(:root_node)
|
||||
@html_mode = @start.root_node.html?
|
||||
if @start.respond_to?(:html?)
|
||||
@html_mode = @start.html?
|
||||
else
|
||||
@html_mode = false
|
||||
end
|
||||
|
@ -75,6 +75,10 @@ module Oga
|
|||
|
||||
break
|
||||
else
|
||||
# Make sure to always close the current element before
|
||||
# moving to any siblings.
|
||||
after_element(current, output) if current.is_a?(Element)
|
||||
|
||||
until next_node = current.is_a?(Node) && current.next
|
||||
if current.is_a?(Node) && current != @start
|
||||
current = current.parent
|
||||
|
@ -177,6 +181,14 @@ module Oga
|
|||
on_doctype(doc.doctype, output)
|
||||
output << "\n"
|
||||
end
|
||||
|
||||
first_child = doc.children[0]
|
||||
|
||||
# Prevent excessive newlines in case the next node is a newline text
|
||||
# node.
|
||||
if first_child.is_a?(Text) && first_child.text.start_with?("\r\n", "\n")
|
||||
output.chomp!
|
||||
end
|
||||
end
|
||||
|
||||
# @param [Oga::XML::XmlDeclaration] node
|
||||
|
|
|
@ -464,6 +464,13 @@ describe Oga::XML::Element do
|
|||
|
||||
element.to_xml.should == '<link />'
|
||||
end
|
||||
|
||||
it 'generates the XML for an empty explicitly closed HTML element' do
|
||||
element = described_class.new(:name => 'html')
|
||||
document = Oga::XML::Document.new(:type => :html, :children => [element])
|
||||
|
||||
element.to_xml.should == '<html></html>'
|
||||
end
|
||||
end
|
||||
|
||||
describe '#inspect' do
|
||||
|
|
|
@ -29,6 +29,27 @@ describe Oga::XML::Generator do
|
|||
end
|
||||
end
|
||||
|
||||
describe 'using an HTML Document as the root node' do
|
||||
it 'returns a String' do
|
||||
element = Oga::XML::Element.new(name: 'foo')
|
||||
doc = Oga::XML::Document.new(children: [element], type: :html)
|
||||
output = described_class.new(doc).to_xml
|
||||
|
||||
output.should == '<foo></foo>'
|
||||
end
|
||||
end
|
||||
|
||||
describe 'using an HTML Document as the root node with nested elements' do
|
||||
it 'returns a String' do
|
||||
el2 = Oga::XML::Element.new(name: 'bar')
|
||||
el1 = Oga::XML::Element.new(name: 'foo', children: [el2])
|
||||
doc = Oga::XML::Document.new(children: [el1], type: :html)
|
||||
output = described_class.new(doc).to_xml
|
||||
|
||||
output.should == '<foo><bar></bar></foo>'
|
||||
end
|
||||
end
|
||||
|
||||
describe 'using Element nodes with siblings' do
|
||||
it 'returns a String' do
|
||||
root = Oga::XML::Element.new(
|
||||
|
@ -78,5 +99,32 @@ describe Oga::XML::Generator do
|
|||
described_class.new(element2).to_xml.should == '<b />'
|
||||
end
|
||||
end
|
||||
|
||||
describe 'using a parsed HTML document' do
|
||||
it 'returns a String with the same formatting as the input document' do
|
||||
input = <<-EOF
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Hello</title>
|
||||
<meta charset="utf-8" />
|
||||
</head>
|
||||
<body>
|
||||
<p>Hello</p>
|
||||
<ul>
|
||||
<li>Hello</li>
|
||||
<li></li>
|
||||
Hello
|
||||
</ul>
|
||||
<div></div></body>
|
||||
</html>
|
||||
EOF
|
||||
|
||||
doc = Oga.parse_html(input)
|
||||
output = described_class.new(doc)
|
||||
|
||||
output.to_xml.should == input
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue