Track element name nesting in the pull parser.
Tracking the names of nested elements makes it a lot easier to do contextual pull parsing. Without this it's impossible to know what context the parser is in at a given moment. For memory reasons the parser currently only tracks the element names. In the future it might perhaps also track extra information to make parsing easier.
This commit is contained in:
parent
030a0068bd
commit
45b0cdf811
|
@ -103,7 +103,11 @@ rule
|
|||
element
|
||||
: element_start expressions T_ELEM_END
|
||||
{
|
||||
on_element_children(val[0], val[1] ? val[1].flatten : [])
|
||||
if val[0]
|
||||
on_element_children(val[0], val[1] ? val[1].flatten : [])
|
||||
end
|
||||
|
||||
after_element(val[0])
|
||||
}
|
||||
;
|
||||
|
||||
|
@ -149,6 +153,8 @@ end
|
|||
def initialize(data, options = {})
|
||||
@data = data
|
||||
@lexer = Lexer.new(data, options)
|
||||
|
||||
reset
|
||||
end
|
||||
|
||||
##
|
||||
|
@ -341,6 +347,14 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}:
|
|||
return element
|
||||
end
|
||||
|
||||
##
|
||||
# @param [Oga::XML::Element]
|
||||
# @return [Oga::XML::Element]
|
||||
#
|
||||
def after_element(element)
|
||||
return element
|
||||
end
|
||||
|
||||
##
|
||||
# @param [Array] pairs
|
||||
# @return [Hash]
|
||||
|
|
|
@ -19,7 +19,13 @@ module Oga
|
|||
# This parses yields proper XML instances such as {Oga::XML::Element}.
|
||||
# Doctypes and XML declarations are ignored by this parser.
|
||||
#
|
||||
# @!attribute [r] nesting
|
||||
# Array containing the names of the currently nested elements.
|
||||
# @return [Array]
|
||||
#
|
||||
class PullParser < Parser
|
||||
attr_reader :nesting
|
||||
|
||||
##
|
||||
# @return [Array]
|
||||
#
|
||||
|
@ -36,8 +42,7 @@ module Oga
|
|||
BLOCK_CALLBACKS = [
|
||||
:on_cdata,
|
||||
:on_comment,
|
||||
:on_text,
|
||||
:on_element
|
||||
:on_text
|
||||
]
|
||||
|
||||
##
|
||||
|
@ -46,7 +51,8 @@ module Oga
|
|||
def reset
|
||||
super
|
||||
|
||||
@block = nil
|
||||
@block = nil
|
||||
@nesting = []
|
||||
end
|
||||
|
||||
##
|
||||
|
@ -68,7 +74,7 @@ module Oga
|
|||
# JRuby.
|
||||
DISABLED_CALLBACKS.each do |method|
|
||||
eval <<-EOF, nil, __FILE__, __LINE__ + 1
|
||||
def #{method}(*_)
|
||||
def #{method}(*args)
|
||||
return
|
||||
end
|
||||
EOF
|
||||
|
@ -78,9 +84,32 @@ module Oga
|
|||
eval <<-EOF, nil, __FILE__, __LINE__ + 1
|
||||
def #{method}(*args)
|
||||
@block.call(super)
|
||||
return
|
||||
end
|
||||
EOF
|
||||
end
|
||||
|
||||
##
|
||||
# @see Oga::XML::Parser#on_element
|
||||
#
|
||||
def on_element(*args)
|
||||
element = super
|
||||
|
||||
nesting << element.name
|
||||
|
||||
@block.call(element)
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
##
|
||||
# @see Oga::XML::Parser#on_element_children
|
||||
#
|
||||
def after_element(*args)
|
||||
nesting.pop
|
||||
|
||||
return
|
||||
end
|
||||
end # PullParser
|
||||
end # XML
|
||||
end # Oga
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Oga::XML::PullParser do
|
||||
context 'tracking element nesting' do
|
||||
before do
|
||||
@parser = described_class.new('<a><b></b></a>')
|
||||
end
|
||||
|
||||
example 'set the nesting for the outer element' do
|
||||
@parser.parse do |node|
|
||||
@parser.nesting.should == %w{a} if node.name == 'a'
|
||||
|
||||
@parser.nesting.should == %w{a b} if node.name == 'b'
|
||||
end
|
||||
end
|
||||
|
||||
example 'pop element names after leaving an element' do
|
||||
@parser.nesting.should_receive(:pop).twice
|
||||
|
||||
@parser.parse { |node| }
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue