Track element name nesting in the pull parser.

Tracking the names of nested elements makes it a lot easier to do contextual
pull parsing. Without this it's impossible to know what context the parser is
in at a given moment.

For memory reasons the parser currently only tracks the element names. In the
future it might perhaps also track extra information to make parsing easier.
This commit is contained in:
Yorick Peterse 2014-04-28 23:40:36 +02:00
parent 030a0068bd
commit 45b0cdf811
3 changed files with 71 additions and 5 deletions

View File

@ -103,7 +103,11 @@ rule
element
: element_start expressions T_ELEM_END
{
if val[0]
on_element_children(val[0], val[1] ? val[1].flatten : [])
end
after_element(val[0])
}
;
@ -149,6 +153,8 @@ end
def initialize(data, options = {})
@data = data
@lexer = Lexer.new(data, options)
reset
end
##
@ -341,6 +347,14 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}:
return element
end
##
# @param [Oga::XML::Element]
# @return [Oga::XML::Element]
#
def after_element(element)
return element
end
##
# @param [Array] pairs
# @return [Hash]

View File

@ -19,7 +19,13 @@ module Oga
# This parses yields proper XML instances such as {Oga::XML::Element}.
# Doctypes and XML declarations are ignored by this parser.
#
# @!attribute [r] nesting
# Array containing the names of the currently nested elements.
# @return [Array]
#
class PullParser < Parser
attr_reader :nesting
##
# @return [Array]
#
@ -36,8 +42,7 @@ module Oga
BLOCK_CALLBACKS = [
:on_cdata,
:on_comment,
:on_text,
:on_element
:on_text
]
##
@ -47,6 +52,7 @@ module Oga
super
@block = nil
@nesting = []
end
##
@ -68,7 +74,7 @@ module Oga
# JRuby.
DISABLED_CALLBACKS.each do |method|
eval <<-EOF, nil, __FILE__, __LINE__ + 1
def #{method}(*_)
def #{method}(*args)
return
end
EOF
@ -78,9 +84,32 @@ module Oga
eval <<-EOF, nil, __FILE__, __LINE__ + 1
def #{method}(*args)
@block.call(super)
return
end
EOF
end
##
# @see Oga::XML::Parser#on_element
#
def on_element(*args)
element = super
nesting << element.name
@block.call(element)
return
end
##
# @see Oga::XML::Parser#on_element_children
#
def after_element(*args)
nesting.pop
return
end
end # PullParser
end # XML
end # Oga

View File

@ -0,0 +1,23 @@
require 'spec_helper'
describe Oga::XML::PullParser do
context 'tracking element nesting' do
before do
@parser = described_class.new('<a><b></b></a>')
end
example 'set the nesting for the outer element' do
@parser.parse do |node|
@parser.nesting.should == %w{a} if node.name == 'a'
@parser.nesting.should == %w{a b} if node.name == 'b'
end
end
example 'pop element names after leaving an element' do
@parser.nesting.should_receive(:pop).twice
@parser.parse { |node| }
end
end
end