From e0e0687dc29427c854c9fa6d3c19cee1c04f92c7 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 27 Sep 2016 02:03:56 +0200 Subject: [PATCH] Generating closing element & Doctype XML This commit fixes two problems: 1. Doctypes introducing too many newlines 2. Elements with siblings and a common parent not being closed properly == Doctypes When generating the XML for a doctype the XML::Generator class would append a trailing newline. This however meant that if the next text node was also a newline you'd now have two newlines. In previous versions of Oga this worked because the old XML generation code would call String#strip on the XML to add after the doctype. To support this in the new version we perform a lookahead in XML::Generator#on_doctype to remove any trailing newlines added by this method in case the first child node is a newline text node. == Closing Elements When an element has a sibling following it _and_ does not have any child nodes it would not be closed properly when generating XML. This is due to the "until next_node = ..." expression evaluating to true, thus never executing its body. There's probably some way to work around this by using the "loop" method, but considering it's 02:09 I think the current approach is good enough. Future me will probably hate me for it. --- lib/oga/xml/generator.rb | 16 ++++++++++-- spec/oga/xml/element_spec.rb | 7 +++++ spec/oga/xml/generator_spec.rb | 48 ++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/lib/oga/xml/generator.rb b/lib/oga/xml/generator.rb index 547b4d2..15ffa94 100644 --- a/lib/oga/xml/generator.rb +++ b/lib/oga/xml/generator.rb @@ -17,8 +17,8 @@ module Oga def initialize(root) @start = root - if @start.respond_to?(:root_node) - @html_mode = @start.root_node.html? + if @start.respond_to?(:html?) + @html_mode = @start.html? else @html_mode = false end @@ -75,6 +75,10 @@ module Oga break else + # Make sure to always close the current element before + # moving to any siblings. + after_element(current, output) if current.is_a?(Element) + until next_node = current.is_a?(Node) && current.next if current.is_a?(Node) && current != @start current = current.parent @@ -177,6 +181,14 @@ module Oga on_doctype(doc.doctype, output) output << "\n" end + + first_child = doc.children[0] + + # Prevent excessive newlines in case the next node is a newline text + # node. + if first_child.is_a?(Text) && first_child.text.start_with?("\r\n", "\n") + output.chomp! + end end # @param [Oga::XML::XmlDeclaration] node diff --git a/spec/oga/xml/element_spec.rb b/spec/oga/xml/element_spec.rb index 89c5e6b..41aae4a 100644 --- a/spec/oga/xml/element_spec.rb +++ b/spec/oga/xml/element_spec.rb @@ -464,6 +464,13 @@ describe Oga::XML::Element do element.to_xml.should == '' end + + it 'generates the XML for an empty explicitly closed HTML element' do + element = described_class.new(:name => 'html') + document = Oga::XML::Document.new(:type => :html, :children => [element]) + + element.to_xml.should == '' + end end describe '#inspect' do diff --git a/spec/oga/xml/generator_spec.rb b/spec/oga/xml/generator_spec.rb index 00d0a87..27a4fd1 100644 --- a/spec/oga/xml/generator_spec.rb +++ b/spec/oga/xml/generator_spec.rb @@ -29,6 +29,27 @@ describe Oga::XML::Generator do end end + describe 'using an HTML Document as the root node' do + it 'returns a String' do + element = Oga::XML::Element.new(name: 'foo') + doc = Oga::XML::Document.new(children: [element], type: :html) + output = described_class.new(doc).to_xml + + output.should == '' + end + end + + describe 'using an HTML Document as the root node with nested elements' do + it 'returns a String' do + el2 = Oga::XML::Element.new(name: 'bar') + el1 = Oga::XML::Element.new(name: 'foo', children: [el2]) + doc = Oga::XML::Document.new(children: [el1], type: :html) + output = described_class.new(doc).to_xml + + output.should == '' + end + end + describe 'using Element nodes with siblings' do it 'returns a String' do root = Oga::XML::Element.new( @@ -78,5 +99,32 @@ describe Oga::XML::Generator do described_class.new(element2).to_xml.should == '' end end + + describe 'using a parsed HTML document' do + it 'returns a String with the same formatting as the input document' do + input = <<-EOF + + + + Hello + + + +

Hello

+ +
+ + EOF + + doc = Oga.parse_html(input) + output = described_class.new(doc) + + output.to_xml.should == input + end + end end end