diff --git a/lib/oga/xml/parser.rll b/lib/oga/xml/parser.rll index b21a258..513aff2 100644 --- a/lib/oga/xml/parser.rll +++ b/lib/oga/xml/parser.rll @@ -154,15 +154,12 @@ element # Attributes -attributes - = attributes_ { val[0] } - | _ - ; +attributes = attributes_ { on_attributes(val[0]) }; attributes_ - = attribute attributes + = attribute attributes_ { - val[1].unshift(val[0]) + val[1].unshift(val[0]) if val[0] val[1] } | _ @@ -171,9 +168,7 @@ attributes_ attribute = attribute_name attribute_follow { - val[0].value = val[1] - - val[0] + on_attribute(val[0][1], val[0][0], val[1]) } ; @@ -183,8 +178,8 @@ attribute_follow ; attribute_name - = T_ATTR { on_attribute(val[0]) } - | T_ATTR_NS T_ATTR { on_attribute(val[1], val[0]) } + = T_ATTR { [nil, val[0]] } + | T_ATTR_NS T_ATTR ; # XML declarations @@ -388,9 +383,21 @@ string_body_follow ## # @param [String] name # @param [String] ns_name + # @param [String] value # @return [Oga::XML::Attribute] # - def on_attribute(name, ns_name = nil) - return Attribute.new(:namespace_name => ns_name, :name => name) + def on_attribute(name, ns_name = nil, value = nil) + return Attribute.new( + :namespace_name => ns_name, + :name => name, + :value => value + ) + end + + ## + # @param [Array] attrs + # + def on_attributes(attrs) + return attrs end } diff --git a/lib/oga/xml/sax_parser.rb b/lib/oga/xml/sax_parser.rb index 93377cd..37bc910 100644 --- a/lib/oga/xml/sax_parser.rb +++ b/lib/oga/xml/sax_parser.rb @@ -17,6 +17,8 @@ module Oga # * `on_text` # * `on_element` # * `on_element_children` + # * `on_attribute` + # * `on_attributes` # * `after_element` # # For example: @@ -57,6 +59,14 @@ module Oga # end # end # + # ## Attributes + # + # Attributes returned by `on_attribute` are passed as an Hash as the 3rd + # argument of the `on_element` callback. The keys of this Hash are the + # attribute names (optionally prefixed by their namespace) and their values. + # You can overwrite `on_attribute` to control individual attributes and + # `on_attributes` to control the final set. + # class SaxParser < Parser ## # @param [Object] handler The SAX handler to delegate callbacks to. @@ -86,7 +96,7 @@ module Oga # @see [Oga::XML::Parser#on_element] # @return [Array] # - def on_element(namespace, name, attrs = {}) + def on_element(namespace, name, attrs = []) run_callback(:on_element, namespace, name, attrs) return namespace, name @@ -101,6 +111,45 @@ module Oga # def after_element(namespace_with_name) run_callback(:after_element, *namespace_with_name) + + return + end + + ## + # Manually overwrite this method since for this one we _do_ want the + # return value so it can be passed to `on_element`. + # + # @see [Oga::XML::Parser#on_attribute] + # + def on_attribute(name, ns = nil, value = nil) + if @handler.respond_to?(:on_attribute) + return run_callback(:on_attribute, name, ns, value) + end + + key = ns ? "#{ns}:#{name}" : name + + return {key => value} + end + + ## + # Merges the attributes together into a Hash. + # + # @param [Array] attrs + # @return [Hash] + # + def on_attributes(attrs) + if @handler.respond_to?(:on_attributes) + return run_callback(:on_attributes, attrs) + end + + merged = {} + + attrs.each do |pair| + # Hash#merge requires an extra allocation, this doesn't. + pair.each { |key, value| merged[key] = value } + end + + return merged end private diff --git a/spec/oga/xml/sax_parser_spec.rb b/spec/oga/xml/sax_parser_spec.rb index a41e92e..e41baac 100644 --- a/spec/oga/xml/sax_parser_spec.rb +++ b/spec/oga/xml/sax_parser_spec.rb @@ -1,50 +1,114 @@ require 'spec_helper' describe Oga::XML::SaxParser do - before do - @handler = Class.new do - attr_reader :name, :after_namespace, :after_name + describe '#parse' do + before do + @handler = Class.new do + attr_reader :name, :attrs, :after_namespace, :after_name - def on_element(namespace, name, attrs = {}) - @name = name - end + def on_element(namespace, name, attrs = {}) + @name = name + @attrs = attrs + end - def after_element(namespace, name) - @after_namespace = namespace - @after_name = name + def after_element(namespace, name) + @after_namespace = namespace + @after_name = name + end end end + + it 'ignores return values of callback methods' do + parser = described_class.new(@handler.new, 'foo') + + parser.parse.should be_nil + end + + it 'uses custom callback methods if defined' do + handler = @handler.new + parser = described_class.new(handler, '') + + parser.parse + + handler.name.should == 'foo' + end + + it 'always passes element names to after_element' do + handler = @handler.new + parser = described_class.new(handler, '') + + parser.parse + + handler.after_name.should == 'foo' + handler.after_namespace.should == 'namespace' + end + + it 'ignores callbacks that are not defined in the handler' do + parser = described_class.new(@handler.new, '') + + # This would raise if undefined callbacks were _not_ ignored. + lambda { parser.parse }.should_not raise_error + end + + it 'passes the attributes to the on_element callback' do + handler = @handler.new + parser = described_class.new(handler, '') + + parser.parse + + handler.attrs.should == {'b' => '10', 'x:c' => '20'} + end end - it 'ignores return values of callback methods' do - parser = described_class.new(@handler.new, 'foo') + describe '#on_attribute' do + before do + @handler_without = Class.new.new - parser.parse.should be_nil + @handler_with = Class.new do + def on_attribute(name, ns = nil, value = nil) + return {name.upcase => value} + end + end.new + end + + it 'returns a default Hash if no custom callback exists' do + parser = described_class.new(@handler_without, '') + hash = parser.on_attribute('foo', 'x', 'bar') + + hash.should == {'x:foo' => 'bar'} + end + + it 'returns the return value of a custom callback' do + parser = described_class.new(@handler_with, nil) + hash = parser.on_attribute('foo', 'x', 'bar') + + hash.should == {'FOO' => 'bar'} + end end - it 'uses custom callback methods if defined' do - handler = @handler.new - parser = described_class.new(handler, '') + describe '#on_attributes' do + before do + @handler_without = Class.new.new - parser.parse + @handler_with = Class.new do + def on_attributes(attrs) + return %w{Alice Bob} # these two again + end + end.new + end - handler.name.should == 'foo' - end + it 'merges all attributes into a Hash if no callback is defined' do + parser = described_class.new(@handler_without, nil) + hash = parser.on_attributes([{'a' => 'b'}, {'c' => 'd'}]) - it 'always passes element names to after_element' do - handler = @handler.new - parser = described_class.new(handler, '') + hash.should == {'a' => 'b', 'c' => 'd'} + end - parser.parse + it 'returns the return value of a custom callback' do + parser = described_class.new(@handler_with, nil) + retval = parser.on_attributes([{'a' => 'b'}, {'c' => 'd'}]) - handler.after_name.should == 'foo' - handler.after_namespace.should == 'namespace' - end - - it 'ignores callbacks that are not defined in the handler' do - parser = described_class.new(@handler.new, '') - - # This would raise if undefined callbacks were _not_ ignored. - lambda { parser.parse }.should_not raise_error + retval.should == %w{Alice Bob} + end end end