From 4bbf0c98aee057dd4753bca83190016cd1bcd4d4 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 30 Jul 2014 22:25:13 +0200 Subject: [PATCH] Use breadth-first-search for returning all nodes. This still uses a stack but at least no longer relies on the call stack. I decided not to go with the Morris in-order algorithm [1] as it modifies the tree during a search. This would not work well if a document were to be accessed from multiple threads at once (which should be possible for read-only operations). I might change this method to actually perform a search (opposed to just returning everything). This will require some closer inspection of the available XPath axes to determine if this is needed. Tests will also be added once I've taken care of the above. [1]: http://en.wikipedia.org/wiki/Tree_traversal#Morris_in-order_traversal_using_threading --- lib/oga/xml/document.rb | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/oga/xml/document.rb b/lib/oga/xml/document.rb index 580f464..6ea6efe 100644 --- a/lib/oga/xml/document.rb +++ b/lib/oga/xml/document.rb @@ -53,25 +53,25 @@ module Oga # Returns a NodeSet containing *all* the nodes in the current document. # Nodes are inserted in the order they appear in the document. # + # This method uses a breadth first search for tree traversal. See + # http://en.wikipedia.org/wiki/Breadth-first_search for more information. + # + # THINK: Turn into an actual search instead of returning everything? + # # @return [Oga::XML::NodeSet] # def all_nodes - return gather_child_nodes(self) - end - - ## - # Recursively retrieves all child nodes of `node` and returns them as a - # node set. - # - # @param [Oga::XML::Document|Oga::XML::Node] node - # @return [Oga::XML::NodeSet] - # - def gather_child_nodes(node) nodes = NodeSet.new + visit = children.to_a.dup # copy it since we're using #pop below. - node.children.each do |child| - nodes << child - nodes += gather_child_nodes(child) + until visit.empty? + current = visit.pop + + nodes << current + + current.children.each do |child| + visit << child + end end return nodes