Generate XML without relying on recursion
While using recursion is an easy way of generating XML it can lead to the call stack overflowing when serialising documents with lots of nested nodes. Generally there are two ways of working around this: 1. Use an explicit stack (e.g. an array or a queue of sorts) instead of relying on the call stack. 2. Use an algorithm that doesn't use a stack at all (e.g. Morris traversal). This commit introduces the XML::Generator class which can serialize documents back to XML without using a stack at all. This class takes advantage of XML nodes having access to not only their child nodes, but also their siblings and their parents. All XML serialisation logic now resides in the XML::Generator class. In turn the various "to_xml" methods just use this class and serialize everything starting at "self".
This commit is contained in:
parent
9ac16e2e4f
commit
dd138981f6
11
CHANGELOG.md
11
CHANGELOG.md
|
@ -3,6 +3,17 @@
|
|||
This document contains details of the various releases and their release dates.
|
||||
Dates are in the format `yyyy-mm-dd`.
|
||||
|
||||
## 2.4 - Unreleased
|
||||
|
||||
### Serialising Large Documents
|
||||
|
||||
Oga can now serialise large documents without causing the call stack to overflow
|
||||
thanks to the new `Oga::XML::Generator` class. This class can generate XML
|
||||
without using a stack at all.
|
||||
|
||||
See issue <https://github.com/YorickPeterse/oga/issues/158> for more
|
||||
information.
|
||||
|
||||
## 2.3 - 2016-07-13
|
||||
|
||||
Thanks to various changes provided by Erik Michaels-Ober Oga can now be used to
|
||||
|
|
|
@ -23,6 +23,7 @@ if RUBY_PLATFORM == 'java'
|
|||
end
|
||||
#:nocov:
|
||||
|
||||
require 'oga/xml/to_xml'
|
||||
require 'oga/xml/html_void_elements'
|
||||
require 'oga/xml/entities'
|
||||
require 'oga/xml/querying'
|
||||
|
@ -42,6 +43,7 @@ require 'oga/xml/default_namespace'
|
|||
require 'oga/xml/attribute'
|
||||
require 'oga/xml/element'
|
||||
require 'oga/xml/node_set'
|
||||
require 'oga/xml/generator'
|
||||
|
||||
require 'oga/xml/sax_parser'
|
||||
require 'oga/xml/pull_parser'
|
||||
|
|
|
@ -3,6 +3,7 @@ module Oga
|
|||
# Class for storing information about a single XML attribute.
|
||||
class Attribute
|
||||
include ExpandedName
|
||||
include ToXML
|
||||
|
||||
# The name of the attribute.
|
||||
# @return [String]
|
||||
|
@ -81,19 +82,6 @@ module Oga
|
|||
|
||||
alias_method :to_s, :text
|
||||
|
||||
# @return [String]
|
||||
def to_xml
|
||||
if namespace_name
|
||||
full_name = "#{namespace_name}:#{name}"
|
||||
else
|
||||
full_name = name
|
||||
end
|
||||
|
||||
enc_value = value ? Entities.encode_attribute(value) : nil
|
||||
|
||||
%Q(#{full_name}="#{enc_value}")
|
||||
end
|
||||
|
||||
# @return [String]
|
||||
def inspect
|
||||
segments = []
|
||||
|
|
|
@ -2,12 +2,6 @@ module Oga
|
|||
module XML
|
||||
# Class used for storing information about CDATA tags.
|
||||
class Cdata < CharacterNode
|
||||
# Converts the node back to XML.
|
||||
#
|
||||
# @return [String]
|
||||
def to_xml
|
||||
"<![CDATA[#{text}]]>"
|
||||
end
|
||||
end # Cdata
|
||||
end # XML
|
||||
end # Oga
|
||||
|
|
|
@ -15,11 +15,6 @@ module Oga
|
|||
@text = options[:text]
|
||||
end
|
||||
|
||||
# @return [String]
|
||||
def to_xml
|
||||
text.to_s
|
||||
end
|
||||
|
||||
# @return [String]
|
||||
def inspect
|
||||
"#{self.class.to_s.split('::').last}(#{text.inspect})"
|
||||
|
|
|
@ -2,12 +2,6 @@ module Oga
|
|||
module XML
|
||||
# Class used for storing information about XML comments.
|
||||
class Comment < CharacterNode
|
||||
# Converts the node back to XML.
|
||||
#
|
||||
# @return [String]
|
||||
def to_xml
|
||||
"<!--#{text}-->"
|
||||
end
|
||||
end # Comment
|
||||
end # XML
|
||||
end # Oga
|
||||
|
|
|
@ -2,6 +2,8 @@ module Oga
|
|||
module XML
|
||||
# Class used for storing information about Doctypes.
|
||||
class Doctype
|
||||
include ToXML
|
||||
|
||||
# The name of the doctype (e.g. "HTML").
|
||||
# @return [String]
|
||||
attr_accessor :name
|
||||
|
@ -39,20 +41,6 @@ module Oga
|
|||
@inline_rules = options[:inline_rules]
|
||||
end
|
||||
|
||||
# Converts the doctype back to XML.
|
||||
#
|
||||
# @return [String]
|
||||
def to_xml
|
||||
segments = "<!DOCTYPE #{name}"
|
||||
|
||||
segments << " #{type}" if type
|
||||
segments << %Q{ "#{public_id}"} if public_id
|
||||
segments << %Q{ "#{system_id}"} if system_id
|
||||
segments << " [#{inline_rules}]" if inline_rules
|
||||
|
||||
segments + '>'
|
||||
end
|
||||
|
||||
# Inspects the doctype.
|
||||
#
|
||||
# @return [String]
|
||||
|
|
|
@ -5,6 +5,7 @@ module Oga
|
|||
class Document
|
||||
include Querying
|
||||
include Traversal
|
||||
include ToXML
|
||||
|
||||
# @return [Oga::XML::Doctype]
|
||||
attr_accessor :doctype
|
||||
|
@ -56,23 +57,6 @@ module Oga
|
|||
self
|
||||
end
|
||||
|
||||
# Converts the document and its child nodes to XML.
|
||||
#
|
||||
# @return [String]
|
||||
def to_xml
|
||||
xml = children.map(&:to_xml).join('')
|
||||
|
||||
if doctype
|
||||
xml = doctype.to_xml + "\n" + xml.strip
|
||||
end
|
||||
|
||||
if xml_declaration
|
||||
xml = xml_declaration.to_xml + "\n" + xml.strip
|
||||
end
|
||||
|
||||
xml
|
||||
end
|
||||
|
||||
# @return [TrueClass|FalseClass]
|
||||
def html?
|
||||
type.equal?(:html)
|
||||
|
@ -99,6 +83,11 @@ Document(
|
|||
)
|
||||
EOF
|
||||
end
|
||||
|
||||
# @return [FalseClass]
|
||||
def literal_html_name?
|
||||
false
|
||||
end
|
||||
end # Document
|
||||
end # XML
|
||||
end # Oga
|
||||
|
|
|
@ -211,30 +211,6 @@ module Oga
|
|||
@children = NodeSet.new([text_node], self)
|
||||
end
|
||||
|
||||
# Converts the element and its child elements to XML.
|
||||
#
|
||||
# @return [String]
|
||||
def to_xml
|
||||
if namespace_name
|
||||
full_name = "#{namespace_name}:#{name}"
|
||||
else
|
||||
full_name = name
|
||||
end
|
||||
|
||||
body = children.map(&:to_xml).join('')
|
||||
attrs = ''
|
||||
|
||||
attributes.each do |attr|
|
||||
attrs << " #{attr.to_xml}"
|
||||
end
|
||||
|
||||
if self_closing?
|
||||
return "<#{full_name}#{attrs} />"
|
||||
else
|
||||
return "<#{full_name}#{attrs}>#{body}</#{full_name}>"
|
||||
end
|
||||
end
|
||||
|
||||
# @return [String]
|
||||
def inspect
|
||||
segments = []
|
||||
|
@ -323,6 +299,14 @@ module Oga
|
|||
end
|
||||
end
|
||||
|
||||
# Returns true if the current element name is the name of one of the
|
||||
# literal HTML elements.
|
||||
#
|
||||
# @return [TrueClass|FalseClass]
|
||||
def literal_html_name?
|
||||
Lexer::LITERAL_HTML_ELEMENTS.allow?(name)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Registers namespaces based on any "xmlns" attributes.
|
||||
|
|
|
@ -0,0 +1,198 @@
|
|||
module Oga
|
||||
module XML
|
||||
# Class for generating XML as a String based on an existing document.
|
||||
#
|
||||
# Basic usage:
|
||||
#
|
||||
# element = Oga::XML::Element.new(name: 'root')
|
||||
# element.inner_text = 'hello'
|
||||
#
|
||||
# gen = Oga::XML::Generator.new(element)
|
||||
#
|
||||
# gen.to_xml # => "<root>hello</root>"
|
||||
#
|
||||
# @private
|
||||
class Generator
|
||||
# @param [Oga::XML::Document|Oga::XML::Node] start The node to serialise.
|
||||
def initialize(root)
|
||||
@start = root
|
||||
|
||||
if @start.respond_to?(:root_node)
|
||||
@html_mode = @start.root_node.html?
|
||||
else
|
||||
@html_mode = false
|
||||
end
|
||||
end
|
||||
|
||||
# Returns the XML for the current root node.
|
||||
#
|
||||
# @return [String]
|
||||
def to_xml
|
||||
current = @start
|
||||
output = ''
|
||||
|
||||
while current
|
||||
children = false
|
||||
|
||||
# Determine what callback to use for the current node. The order of
|
||||
# this statement is based on how likely it is for an arm to match.
|
||||
case current
|
||||
when Oga::XML::Element
|
||||
callback = :on_element
|
||||
children = true
|
||||
when Oga::XML::Text
|
||||
callback = :on_text
|
||||
when Oga::XML::Cdata
|
||||
callback = :on_cdata
|
||||
when Oga::XML::Comment
|
||||
callback = :on_comment
|
||||
when Oga::XML::Attribute
|
||||
callback = :on_attribute
|
||||
when Oga::XML::ProcessingInstruction
|
||||
callback = :on_processing_instruction
|
||||
when Oga::XML::Doctype
|
||||
callback = :on_doctype
|
||||
when Oga::XML::XmlDeclaration
|
||||
callback = :on_xml_declaration
|
||||
when Oga::XML::Document
|
||||
callback = :on_document
|
||||
children = true
|
||||
else
|
||||
raise TypeError, "Can't serialize #{current.class} to XML"
|
||||
end
|
||||
|
||||
send(callback, current, output)
|
||||
|
||||
if child_node = children && current.children[0]
|
||||
current = child_node
|
||||
else
|
||||
until next_node = current.is_a?(Node) && current.next
|
||||
if current.is_a?(Node) && current != @start
|
||||
current = current.parent
|
||||
end
|
||||
|
||||
send(:after_element, current, output) if current.is_a?(Element)
|
||||
|
||||
break if current == @start
|
||||
end
|
||||
|
||||
current = next_node
|
||||
end
|
||||
end
|
||||
|
||||
output
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Text] node
|
||||
# @param [String] output
|
||||
def on_text(node, output)
|
||||
if @html_mode && (parent = node.parent) && parent.literal_html_name?
|
||||
output << node.text
|
||||
else
|
||||
output << Entities.encode(node.text)
|
||||
end
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Cdata] node
|
||||
# @param [String] output
|
||||
def on_cdata(node, output)
|
||||
output << "<![CDATA[#{node.text}]]>"
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Comment] node
|
||||
# @param [String] output
|
||||
def on_comment(node, output)
|
||||
output << "<!--#{node.text}-->"
|
||||
end
|
||||
|
||||
# @param [Oga::XML::ProcessingInstruction] node
|
||||
# @param [String] output
|
||||
def on_processing_instruction(node, output)
|
||||
output << "<?#{node.name}#{node.text}?>"
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Element] element
|
||||
# @param [String] body The content of the element.
|
||||
def on_element(element, output)
|
||||
name = element.expanded_name
|
||||
attrs = ''
|
||||
|
||||
element.attributes.each do |attr|
|
||||
attrs << ' '
|
||||
on_attribute(attr, attrs)
|
||||
end
|
||||
|
||||
if self_closing?(element)
|
||||
output << "<#{name}#{attrs} />"
|
||||
else
|
||||
output << "<#{name}#{attrs}>"
|
||||
end
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Element] element
|
||||
# @param [String] output
|
||||
def after_element(element, output)
|
||||
output << "</#{element.expanded_name}>" unless self_closing?(element)
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Attribute] attr
|
||||
# @param [String] output
|
||||
def on_attribute(attr, output)
|
||||
name = attr.expanded_name
|
||||
enc_value = attr.value ? Entities.encode_attribute(attr.value) : nil
|
||||
|
||||
output << %Q(#{name}="#{enc_value}")
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Doctype] node
|
||||
# @param [String] output
|
||||
def on_doctype(node, output)
|
||||
output << "<!DOCTYPE #{node.name}"
|
||||
|
||||
output << " #{node.type}" if node.type
|
||||
output << %Q{ "#{node.public_id}"} if node.public_id
|
||||
output << %Q{ "#{node.system_id}"} if node.system_id
|
||||
output << " [#{node.inline_rules}]" if node.inline_rules
|
||||
output << '>'
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Document] node
|
||||
# @param [String] output
|
||||
def on_document(doc, output)
|
||||
if doc.xml_declaration
|
||||
on_xml_declaration(doc.xml_declaration, output)
|
||||
output << "\n"
|
||||
end
|
||||
|
||||
if doc.doctype
|
||||
on_doctype(doc.doctype, output)
|
||||
output << "\n"
|
||||
end
|
||||
end
|
||||
|
||||
# @param [Oga::XML::XmlDeclaration] node
|
||||
# @param [String] output
|
||||
def on_xml_declaration(node, output)
|
||||
output << '<?xml'
|
||||
|
||||
[:version, :encoding, :standalone].each do |getter|
|
||||
value = node.send(getter)
|
||||
|
||||
output << %Q{ #{getter}="#{value}"} if value
|
||||
end
|
||||
|
||||
output << ' ?>'
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Element] element
|
||||
# @return [TrueClass|FalseClass]
|
||||
def self_closing?(element)
|
||||
if @html_mode && !HTML_VOID_ELEMENTS.allow?(element.name)
|
||||
false
|
||||
else
|
||||
element.children.empty?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -5,6 +5,7 @@ module Oga
|
|||
# nodes.
|
||||
class Node
|
||||
include Traversal
|
||||
include ToXML
|
||||
|
||||
# @return [Oga::XML::NodeSet]
|
||||
attr_reader :node_set
|
||||
|
|
|
@ -15,11 +15,6 @@ module Oga
|
|||
@name = options[:name]
|
||||
end
|
||||
|
||||
# @return [String]
|
||||
def to_xml
|
||||
"<?#{name}#{text}?>"
|
||||
end
|
||||
|
||||
# @return [String]
|
||||
def inspect
|
||||
"ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
|
||||
|
|
|
@ -28,15 +28,6 @@ module Oga
|
|||
@text
|
||||
end
|
||||
|
||||
# @see [Oga::XML::CharacterNode#to_xml]
|
||||
def to_xml
|
||||
return super if inside_literal_html?
|
||||
|
||||
Entities.encode(super)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# @return [TrueClass|FalseClass]
|
||||
def decode_entities?
|
||||
!@decoded && !inside_literal_html?
|
||||
|
@ -46,8 +37,7 @@ module Oga
|
|||
def inside_literal_html?
|
||||
node = parent
|
||||
|
||||
node.is_a?(Element) && html? &&
|
||||
Lexer::LITERAL_HTML_ELEMENTS.allow?(node.name)
|
||||
node && html? && node.literal_html_name?
|
||||
end
|
||||
end # Text
|
||||
end # XML
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
module Oga
|
||||
module XML
|
||||
# Module that provides a `#to_xml` method that serialises the current node
|
||||
# back to XML.
|
||||
module ToXML
|
||||
# @return [String]
|
||||
def to_xml
|
||||
Generator.new(self).to_xml
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -2,6 +2,8 @@ module Oga
|
|||
module XML
|
||||
# Class containing information about an XML declaration tag.
|
||||
class XmlDeclaration
|
||||
include ToXML
|
||||
|
||||
# @return [String]
|
||||
attr_accessor :version
|
||||
|
||||
|
@ -23,21 +25,6 @@ module Oga
|
|||
@standalone = options[:standalone]
|
||||
end
|
||||
|
||||
# Converts the declaration tag to XML.
|
||||
#
|
||||
# @return [String]
|
||||
def to_xml
|
||||
pairs = []
|
||||
|
||||
[:version, :encoding, :standalone].each do |getter|
|
||||
value = send(getter)
|
||||
|
||||
pairs << %Q{#{getter}="#{value}"} if value
|
||||
end
|
||||
|
||||
"<?xml #{pairs.join(' ')} ?>"
|
||||
end
|
||||
|
||||
# @return [String]
|
||||
def inspect
|
||||
segments = []
|
||||
|
|
|
@ -14,12 +14,6 @@ describe Oga::XML::CharacterNode do
|
|||
end
|
||||
end
|
||||
|
||||
describe '#to_xml' do
|
||||
it 'converts the node to XML' do
|
||||
described_class.new(:text => 'a').to_xml.should == 'a'
|
||||
end
|
||||
end
|
||||
|
||||
describe '#inspect' do
|
||||
it 'returns the inspect value' do
|
||||
described_class.new(:text => 'a').inspect.should == 'CharacterNode("a")'
|
||||
|
|
|
@ -142,4 +142,10 @@ Document(
|
|||
EOF
|
||||
end
|
||||
end
|
||||
|
||||
describe '#literal_html_name?' do
|
||||
it 'returns false' do
|
||||
described_class.new.literal_html_name?.should == false
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -636,4 +636,14 @@ describe Oga::XML::Element do
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#literal_html_name?' do
|
||||
it 'returns true for an element name matching one of the literal HTML elements' do
|
||||
described_class.new(:name => 'script').literal_html_name?.should == true
|
||||
end
|
||||
|
||||
it 'returns false for an element name not matching one of the literal HTML elements' do
|
||||
described_class.new(:name => 'foo').literal_html_name?.should == false
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Oga::XML::Generator do
|
||||
describe '#to_xml' do
|
||||
describe 'using an unsupported root type' do
|
||||
it 'raises TypeError' do
|
||||
-> { described_class.new(:foo).to_xml }.should raise_error(TypeError)
|
||||
end
|
||||
end
|
||||
|
||||
describe 'using an Element as the root node' do
|
||||
it 'returns a String' do
|
||||
element = Oga::XML::Element.new(name: 'foo')
|
||||
element.set('attr', 'value')
|
||||
|
||||
output = described_class.new(element).to_xml
|
||||
|
||||
output.should == '<foo attr="value" />'
|
||||
end
|
||||
end
|
||||
|
||||
describe 'using a Document as the root node' do
|
||||
it 'returns a String' do
|
||||
element = Oga::XML::Element.new(name: 'foo')
|
||||
doc = Oga::XML::Document.new(children: [element])
|
||||
output = described_class.new(doc).to_xml
|
||||
|
||||
output.should == '<foo />'
|
||||
end
|
||||
end
|
||||
|
||||
describe 'using Element nodes with siblings' do
|
||||
it 'returns a String' do
|
||||
root = Oga::XML::Element.new(
|
||||
name: 'root',
|
||||
children: [
|
||||
Oga::XML::Element.new(name: 'a'),
|
||||
Oga::XML::Element.new(
|
||||
name: 'b',
|
||||
children: [Oga::XML::Element.new(name: 'c')]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
output = described_class.new(root).to_xml
|
||||
|
||||
output.should == '<root><a /><b><c /></b></root>'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue