166 lines
3.8 KiB
Ruby
166 lines
3.8 KiB
Ruby
module Oga
|
|
module XML
|
|
##
|
|
# The PullParser class can be used to parse an XML document incrementally
|
|
# instead of parsing it as a whole. This results in lower memory usage and
|
|
# potentially faster parsing times. The downside is that pull parsers are
|
|
# typically more difficult to use compared to DOM parsers.
|
|
#
|
|
# Basic parsing using this class works as following:
|
|
#
|
|
# parser = Oga::XML::PullParser.new('... xml here ...')
|
|
#
|
|
# parser.parse do |node|
|
|
# if node.is_a?(Oga::XML::PullParser)
|
|
#
|
|
# end
|
|
# end
|
|
#
|
|
# This parses yields proper XML instances such as {Oga::XML::Element}.
|
|
# Doctypes and XML declarations are ignored by this parser.
|
|
#
|
|
# @!attribute [r] node
|
|
# The current node.
|
|
# @return [Oga::XML::Node]
|
|
#
|
|
# @!attribute [r] nesting
|
|
# Array containing the names of the currently nested elements.
|
|
# @return [Array]
|
|
#
|
|
class PullParser < Parser
|
|
attr_reader :node, :nesting
|
|
|
|
##
|
|
# @return [Array]
|
|
#
|
|
DISABLED_CALLBACKS = [
|
|
:on_document,
|
|
:on_doctype,
|
|
:on_xml_decl,
|
|
:on_element_children
|
|
]
|
|
|
|
##
|
|
# @return [Array]
|
|
#
|
|
BLOCK_CALLBACKS = [
|
|
:on_cdata,
|
|
:on_comment,
|
|
:on_text
|
|
]
|
|
|
|
##
|
|
# @see Oga::XML::Parser#reset
|
|
#
|
|
def reset
|
|
super
|
|
|
|
@block = nil
|
|
@nesting = []
|
|
@node = nil
|
|
end
|
|
|
|
##
|
|
# Parses the input and yields every node to the supplied block.
|
|
#
|
|
# @yieldparam [Oga::XML::Node]
|
|
#
|
|
def parse(&block)
|
|
@block = block
|
|
|
|
yyparse(self, :yield_next_token)
|
|
|
|
reset
|
|
|
|
return
|
|
end
|
|
|
|
##
|
|
# Calls the supplied block if the current node type and optionally the
|
|
# nesting match. This method allows you to write this:
|
|
#
|
|
# parser.parse do |node|
|
|
# parser.on(:text, %w{people person name}) do
|
|
# puts node.text
|
|
# end
|
|
# end
|
|
#
|
|
# Instead of this:
|
|
#
|
|
# parser.parse do |node|
|
|
# if node.node_type == :text \
|
|
# and parser.nesting == %w{people person name}
|
|
# puts node.text
|
|
# end
|
|
# end
|
|
#
|
|
# When calling this method you can specify the following node types:
|
|
#
|
|
# * `:cdata`
|
|
# * `:comment`
|
|
# * `:element`
|
|
# * `:text`
|
|
#
|
|
# @example
|
|
# parser.on(:element, %w{people person name}) do
|
|
#
|
|
# end
|
|
#
|
|
# @param [Symbol] type The type of node to act upon. This is a symbol as
|
|
# returned by {Oga::XML::Node#node_type}.
|
|
#
|
|
# @param [Array] nesting The element name nesting to act upon.
|
|
#
|
|
def on(type, nesting = [])
|
|
if node.node_type == type
|
|
if nesting.empty? or nesting == self.nesting
|
|
yield
|
|
end
|
|
end
|
|
end
|
|
|
|
# eval is a heck of a lot faster than define_method on both Rubinius and
|
|
# JRuby.
|
|
DISABLED_CALLBACKS.each do |method|
|
|
eval <<-EOF, nil, __FILE__, __LINE__ + 1
|
|
def #{method}(*args)
|
|
return
|
|
end
|
|
EOF
|
|
end
|
|
|
|
BLOCK_CALLBACKS.each do |method|
|
|
eval <<-EOF, nil, __FILE__, __LINE__ + 1
|
|
def #{method}(*args)
|
|
@node = super
|
|
@block.call(@node)
|
|
return
|
|
end
|
|
EOF
|
|
end
|
|
|
|
##
|
|
# @see Oga::XML::Parser#on_element
|
|
#
|
|
def on_element(*args)
|
|
@node = super
|
|
|
|
nesting << @node.name
|
|
|
|
@block.call(@node)
|
|
|
|
return
|
|
end
|
|
|
|
##
|
|
# @see Oga::XML::Parser#on_element_children
|
|
#
|
|
def after_element(*args)
|
|
nesting.pop
|
|
|
|
return
|
|
end
|
|
end # PullParser
|
|
end # XML
|
|
end # Oga
|