diff --git a/lib/oga.rb b/lib/oga.rb
index 75cf725..5c18861 100644
--- a/lib/oga.rb
+++ b/lib/oga.rb
@@ -37,8 +37,11 @@ require_relative 'oga/xml/attribute'
require_relative 'oga/xml/element'
require_relative 'oga/xml/node_set'
+require_relative 'oga/xml/sax_parser'
require_relative 'oga/xml/pull_parser'
+
require_relative 'oga/html/parser'
+require_relative 'oga/html/sax_parser'
require_relative 'oga/xpath/node'
require_relative 'oga/xpath/lexer'
diff --git a/lib/oga/html/sax_parser.rb b/lib/oga/html/sax_parser.rb
new file mode 100644
index 0000000..07f7731
--- /dev/null
+++ b/lib/oga/html/sax_parser.rb
@@ -0,0 +1,18 @@
+module Oga
+ module HTML
+ ##
+ # SAX parser for HTML documents. See the documentation of
+ # {Oga::XML::SaxParser} for more information.
+ #
+ class SaxParser < XML::SaxParser
+ ##
+ # @see [Oga::XML::SaxParser#initialize]
+ #
+ def initialize(handler, data, options = {})
+ options = options.merge(:html => true)
+
+ super(handler, data, options)
+ end
+ end # SaxParser
+ end # HTML
+end # Oga
diff --git a/lib/oga/oga.rb b/lib/oga/oga.rb
index 7f40378..8cf78ca 100644
--- a/lib/oga/oga.rb
+++ b/lib/oga/oga.rb
@@ -24,4 +24,34 @@ module Oga
def self.parse_html(html)
return HTML::Parser.new(html).parse
end
+
+ ##
+ # Parses the given XML document using the SAX parser.
+ #
+ # @example
+ # handler = SomeSaxHandler.new
+ #
+ # Oga.sax_parse_html(handler, 'Hello')
+ #
+ # @param [Object] handler The SAX handler for the parser.
+ # @param [String|IO] xml The XML to parse.
+ #
+ def self.sax_parse_xml(handler, xml)
+ XML::SaxParser.new(handler, xml).parse
+ end
+
+ ##
+ # Parses the given HTML document using the SAX parser.
+ #
+ # @example
+ # handler = SomeSaxHandler.new
+ #
+ # Oga.sax_parse_html(handler, '')
+ #
+ # @param [Object] handler The SAX handler for the parser.
+ # @param [String|IO] HTML The HTML to parse.
+ #
+ def self.sax_parse_html(handler, html)
+ HTML::SaxParser.new(handler, html).parse
+ end
end # Oga
diff --git a/lib/oga/xml/sax_parser.rb b/lib/oga/xml/sax_parser.rb
new file mode 100644
index 0000000..47a9574
--- /dev/null
+++ b/lib/oga/xml/sax_parser.rb
@@ -0,0 +1,63 @@
+module Oga
+ module XML
+ ##
+ # The SaxParser class provides the basic interface for writing custom SAX
+ # parsers. All callback methods defined in {Oga::XML::Parser} are delegated
+ # to a dedicated handler class.
+ #
+ # To write a custom handler for the SAX parser, create a class that
+ # implements one (or many) of the following callback methods:
+ #
+ # * `on_document`
+ # * `on_doctype`
+ # * `on_cdata`
+ # * `on_comment`
+ # * `on_proc_ins`
+ # * `on_xml_decl`
+ # * `on_text`
+ # * `on_element`
+ # * `on_element_children`
+ # * `after_element`
+ #
+ # For example:
+ #
+ # class SaxHandler
+ # def on_element(namespace, name, attrs = {})
+ # puts name
+ # end
+ # end
+ #
+ # You can then use it as following:
+ #
+ # handler = SaxHandler.new
+ # parser = Oga::XML::SaxParser.new(handler, '')
+ #
+ # parser.parse
+ #
+ # For information on the callback arguments see the documentation of the
+ # corresponding methods in {Oga::XML::Parser}.
+ #
+ class SaxParser < Parser
+ ##
+ # @param [Object] handler The SAX handler to delegate callbacks to.
+ # @see [Oga::XML::Parser#initialize]
+ #
+ def initialize(handler, *args)
+ @handler = handler
+
+ super(*args)
+ end
+
+ # Delegate all callbacks to the handler object.
+ instance_methods.grep(/^(on_|after_)/).each do |method|
+ eval <<-EOF, nil, __FILE__, __LINE__ + 1
+ def #{method}(*args)
+ @handler.#{method}(*args) if @handler.respond_to?(:#{method})
+
+ return
+ end
+ EOF
+ end
+ end # SaxParser
+ end # XML
+end # Oga
diff --git a/spec/oga/html/sax_parser_spec.rb b/spec/oga/html/sax_parser_spec.rb
new file mode 100644
index 0000000..c4c775c
--- /dev/null
+++ b/spec/oga/html/sax_parser_spec.rb
@@ -0,0 +1,22 @@
+require 'spec_helper'
+
+describe Oga::HTML::SaxParser do
+ before do
+ @handler = Class.new do
+ attr_reader :name
+
+ def on_element(namespace, name, attrs = {})
+ @name = name
+ end
+ end
+ end
+
+ example 'use custom callback methods if defined' do
+ handler = @handler.new
+ parser = described_class.new(handler, '')
+
+ parser.parse
+
+ handler.name.should == 'link'
+ end
+end
diff --git a/spec/oga/oga_spec.rb b/spec/oga/oga_spec.rb
index 7f42554..569a788 100644
--- a/spec/oga/oga_spec.rb
+++ b/spec/oga/oga_spec.rb
@@ -1,19 +1,41 @@
require 'spec_helper'
describe Oga do
- context 'parse_xml' do
- example 'parse an XML document' do
- document = described_class.parse_xml('foo')
+ example 'parse an XML document' do
+ document = described_class.parse_xml('foo')
- document.is_a?(Oga::XML::Document).should == true
- end
+ document.is_a?(Oga::XML::Document).should == true
end
- context 'parse_html' do
- example 'parse an HTML document' do
- document = described_class.parse_xml('
')
+ example 'parse an HTML document' do
+ document = described_class.parse_xml('')
- document.is_a?(Oga::XML::Document).should == true
+ document.is_a?(Oga::XML::Document).should == true
+ end
+
+ context 'SAX parsing' do
+ before do
+ klass = Class.new do
+ attr_reader :name
+
+ def on_element(namespace, name, attrs = {})
+ @name = name
+ end
+ end
+
+ @handler = klass.new
+ end
+
+ example 'parse an XML document using the SAX parser' do
+ Oga.sax_parse_xml(@handler, '')
+
+ @handler.name.should == 'foo'
+ end
+
+ example 'parse an HTML document using the SAX parser' do
+ Oga.sax_parse_xml(@handler, '')
+
+ @handler.name.should == 'link'
end
end
end
diff --git a/spec/oga/xml/sax_parser_spec.rb b/spec/oga/xml/sax_parser_spec.rb
new file mode 100644
index 0000000..03686d0
--- /dev/null
+++ b/spec/oga/xml/sax_parser_spec.rb
@@ -0,0 +1,35 @@
+require 'spec_helper'
+
+describe Oga::XML::SaxParser do
+ before do
+ @handler = Class.new do
+ attr_reader :name
+
+ def on_element(namespace, name, attrs = {})
+ @name = name
+ end
+ end
+ end
+
+ example 'ignore return values of callback methods' do
+ parser = described_class.new(@handler.new, 'foo')
+
+ parser.parse.should be_nil
+ end
+
+ example 'use custom callback methods if defined' do
+ handler = @handler.new
+ parser = described_class.new(handler, '')
+
+ parser.parse
+
+ handler.name.should == 'foo'
+ end
+
+ example 'ignore callbacks that are not defined in the handler' do
+ parser = described_class.new(@handler.new, '')
+
+ # This would raise if undefined callbacks were _not_ ignored.
+ lambda { parser.parse }.should_not raise_error
+ end
+end