oga/lib/oga/xml/pull_parser.rb

116 lines
2.4 KiB
Ruby

module Oga
module XML
##
# The PullParser class can be used to parse an XML document incrementally
# instead of parsing it as a whole. This results in lower memory usage and
# potentially faster parsing times. The downside is that pull parsers are
# typically more difficult to use compared to DOM parsers.
#
# Basic parsing using this class works as following:
#
# parser = Oga::XML::PullParser.new('... xml here ...')
#
# parser.parse do |node|
# if node.is_a?(Oga::XML::PullParser)
#
# end
# end
#
# This parses yields proper XML instances such as {Oga::XML::Element}.
# Doctypes and XML declarations are ignored by this parser.
#
# @!attribute [r] nesting
# Array containing the names of the currently nested elements.
# @return [Array]
#
class PullParser < Parser
attr_reader :nesting
##
# @return [Array]
#
DISABLED_CALLBACKS = [
:on_document,
:on_doctype,
:on_xml_decl,
:on_element_children
]
##
# @return [Array]
#
BLOCK_CALLBACKS = [
:on_cdata,
:on_comment,
:on_text
]
##
# @see Oga::XML::Parser#reset
#
def reset
super
@block = nil
@nesting = []
end
##
# Parses the input and yields every node to the supplied block.
#
# @yieldparam [Oga::XML::Node]
#
def parse(&block)
@block = block
yyparse(self, :yield_next_token)
reset
return
end
# eval is a heck of a lot faster than define_method on both Rubinius and
# JRuby.
DISABLED_CALLBACKS.each do |method|
eval <<-EOF, nil, __FILE__, __LINE__ + 1
def #{method}(*args)
return
end
EOF
end
BLOCK_CALLBACKS.each do |method|
eval <<-EOF, nil, __FILE__, __LINE__ + 1
def #{method}(*args)
@block.call(super)
return
end
EOF
end
##
# @see Oga::XML::Parser#on_element
#
def on_element(*args)
element = super
nesting << element.name
@block.call(element)
return
end
##
# @see Oga::XML::Parser#on_element_children
#
def after_element(*args)
nesting.pop
return
end
end # PullParser
end # XML
end # Oga