From 45b0cdf8114d361018dc4f49d29e7919e57ddf15 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Mon, 28 Apr 2014 23:40:36 +0200 Subject: [PATCH] Track element name nesting in the pull parser. Tracking the names of nested elements makes it a lot easier to do contextual pull parsing. Without this it's impossible to know what context the parser is in at a given moment. For memory reasons the parser currently only tracks the element names. In the future it might perhaps also track extra information to make parsing easier. --- lib/oga/xml/parser.y | 16 +++++++- lib/oga/xml/pull_parser.rb | 37 +++++++++++++++++-- .../xml/pull_parser/element_nesting_spec.rb | 23 ++++++++++++ 3 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 spec/oga/xml/pull_parser/element_nesting_spec.rb diff --git a/lib/oga/xml/parser.y b/lib/oga/xml/parser.y index 0492912..66687f5 100644 --- a/lib/oga/xml/parser.y +++ b/lib/oga/xml/parser.y @@ -103,7 +103,11 @@ rule element : element_start expressions T_ELEM_END { - on_element_children(val[0], val[1] ? val[1].flatten : []) + if val[0] + on_element_children(val[0], val[1] ? val[1].flatten : []) + end + + after_element(val[0]) } ; @@ -149,6 +153,8 @@ end def initialize(data, options = {}) @data = data @lexer = Lexer.new(data, options) + + reset end ## @@ -341,6 +347,14 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}: return element end + ## + # @param [Oga::XML::Element] + # @return [Oga::XML::Element] + # + def after_element(element) + return element + end + ## # @param [Array] pairs # @return [Hash] diff --git a/lib/oga/xml/pull_parser.rb b/lib/oga/xml/pull_parser.rb index 457625e..7bd55e3 100644 --- a/lib/oga/xml/pull_parser.rb +++ b/lib/oga/xml/pull_parser.rb @@ -19,7 +19,13 @@ module Oga # This parses yields proper XML instances such as {Oga::XML::Element}. # Doctypes and XML declarations are ignored by this parser. # + # @!attribute [r] nesting + # Array containing the names of the currently nested elements. + # @return [Array] + # class PullParser < Parser + attr_reader :nesting + ## # @return [Array] # @@ -36,8 +42,7 @@ module Oga BLOCK_CALLBACKS = [ :on_cdata, :on_comment, - :on_text, - :on_element + :on_text ] ## @@ -46,7 +51,8 @@ module Oga def reset super - @block = nil + @block = nil + @nesting = [] end ## @@ -68,7 +74,7 @@ module Oga # JRuby. DISABLED_CALLBACKS.each do |method| eval <<-EOF, nil, __FILE__, __LINE__ + 1 - def #{method}(*_) + def #{method}(*args) return end EOF @@ -78,9 +84,32 @@ module Oga eval <<-EOF, nil, __FILE__, __LINE__ + 1 def #{method}(*args) @block.call(super) + return end EOF end + + ## + # @see Oga::XML::Parser#on_element + # + def on_element(*args) + element = super + + nesting << element.name + + @block.call(element) + + return + end + + ## + # @see Oga::XML::Parser#on_element_children + # + def after_element(*args) + nesting.pop + + return + end end # PullParser end # XML end # Oga diff --git a/spec/oga/xml/pull_parser/element_nesting_spec.rb b/spec/oga/xml/pull_parser/element_nesting_spec.rb new file mode 100644 index 0000000..d08e2b2 --- /dev/null +++ b/spec/oga/xml/pull_parser/element_nesting_spec.rb @@ -0,0 +1,23 @@ +require 'spec_helper' + +describe Oga::XML::PullParser do + context 'tracking element nesting' do + before do + @parser = described_class.new('') + end + + example 'set the nesting for the outer element' do + @parser.parse do |node| + @parser.nesting.should == %w{a} if node.name == 'a' + + @parser.nesting.should == %w{a b} if node.name == 'b' + end + end + + example 'pop element names after leaving an element' do + @parser.nesting.should_receive(:pop).twice + + @parser.parse { |node| } + end + end +end