Fixed SAX parsing of XML attributes.

This was utterly broken, mainly due to me overlooking it. There are now 2 new
callbacks to handle this properly:

* on_attribute: to handle a single attribute/value pair
* on_attributes: to handle a collection of attributes (as returned by
  on_attribute)

By default on_attribut returns a Hash, on_attributes in turn merges all
attribute hashes into a single one. This ensures that on_element _actually_
receives the attributes as a Hash, instead of an Array with random
nil/XML::Attribute values.
This commit is contained in:
Yorick Peterse 2015-02-15 23:23:52 +01:00
parent 605d565104
commit d8b9725b82
3 changed files with 165 additions and 45 deletions

View File

@ -154,15 +154,12 @@ element
# Attributes
attributes
= attributes_ { val[0] }
| _
;
attributes = attributes_ { on_attributes(val[0]) };
attributes_
= attribute attributes
= attribute attributes_
{
val[1].unshift(val[0])
val[1].unshift(val[0]) if val[0]
val[1]
}
| _
@ -171,9 +168,7 @@ attributes_
attribute
= attribute_name attribute_follow
{
val[0].value = val[1]
val[0]
on_attribute(val[0][1], val[0][0], val[1])
}
;
@ -183,8 +178,8 @@ attribute_follow
;
attribute_name
= T_ATTR { on_attribute(val[0]) }
| T_ATTR_NS T_ATTR { on_attribute(val[1], val[0]) }
= T_ATTR { [nil, val[0]] }
| T_ATTR_NS T_ATTR
;
# XML declarations
@ -388,9 +383,21 @@ string_body_follow
##
# @param [String] name
# @param [String] ns_name
# @param [String] value
# @return [Oga::XML::Attribute]
#
def on_attribute(name, ns_name = nil)
return Attribute.new(:namespace_name => ns_name, :name => name)
def on_attribute(name, ns_name = nil, value = nil)
return Attribute.new(
:namespace_name => ns_name,
:name => name,
:value => value
)
end
##
# @param [Array] attrs
#
def on_attributes(attrs)
return attrs
end
}

View File

@ -17,6 +17,8 @@ module Oga
# * `on_text`
# * `on_element`
# * `on_element_children`
# * `on_attribute`
# * `on_attributes`
# * `after_element`
#
# For example:
@ -57,6 +59,14 @@ module Oga
# end
# end
#
# ## Attributes
#
# Attributes returned by `on_attribute` are passed as an Hash as the 3rd
# argument of the `on_element` callback. The keys of this Hash are the
# attribute names (optionally prefixed by their namespace) and their values.
# You can overwrite `on_attribute` to control individual attributes and
# `on_attributes` to control the final set.
#
class SaxParser < Parser
##
# @param [Object] handler The SAX handler to delegate callbacks to.
@ -86,7 +96,7 @@ module Oga
# @see [Oga::XML::Parser#on_element]
# @return [Array]
#
def on_element(namespace, name, attrs = {})
def on_element(namespace, name, attrs = [])
run_callback(:on_element, namespace, name, attrs)
return namespace, name
@ -101,6 +111,45 @@ module Oga
#
def after_element(namespace_with_name)
run_callback(:after_element, *namespace_with_name)
return
end
##
# Manually overwrite this method since for this one we _do_ want the
# return value so it can be passed to `on_element`.
#
# @see [Oga::XML::Parser#on_attribute]
#
def on_attribute(name, ns = nil, value = nil)
if @handler.respond_to?(:on_attribute)
return run_callback(:on_attribute, name, ns, value)
end
key = ns ? "#{ns}:#{name}" : name
return {key => value}
end
##
# Merges the attributes together into a Hash.
#
# @param [Array] attrs
# @return [Hash]
#
def on_attributes(attrs)
if @handler.respond_to?(:on_attributes)
return run_callback(:on_attributes, attrs)
end
merged = {}
attrs.each do |pair|
# Hash#merge requires an extra allocation, this doesn't.
pair.each { |key, value| merged[key] = value }
end
return merged
end
private

View File

@ -1,50 +1,114 @@
require 'spec_helper'
describe Oga::XML::SaxParser do
before do
@handler = Class.new do
attr_reader :name, :after_namespace, :after_name
describe '#parse' do
before do
@handler = Class.new do
attr_reader :name, :attrs, :after_namespace, :after_name
def on_element(namespace, name, attrs = {})
@name = name
end
def on_element(namespace, name, attrs = {})
@name = name
@attrs = attrs
end
def after_element(namespace, name)
@after_namespace = namespace
@after_name = name
def after_element(namespace, name)
@after_namespace = namespace
@after_name = name
end
end
end
it 'ignores return values of callback methods' do
parser = described_class.new(@handler.new, 'foo')
parser.parse.should be_nil
end
it 'uses custom callback methods if defined' do
handler = @handler.new
parser = described_class.new(handler, '<foo />')
parser.parse
handler.name.should == 'foo'
end
it 'always passes element names to after_element' do
handler = @handler.new
parser = described_class.new(handler, '<namespace:foo />')
parser.parse
handler.after_name.should == 'foo'
handler.after_namespace.should == 'namespace'
end
it 'ignores callbacks that are not defined in the handler' do
parser = described_class.new(@handler.new, '<!--foo-->')
# This would raise if undefined callbacks were _not_ ignored.
lambda { parser.parse }.should_not raise_error
end
it 'passes the attributes to the on_element callback' do
handler = @handler.new
parser = described_class.new(handler, '<a b="10" x:c="20" />')
parser.parse
handler.attrs.should == {'b' => '10', 'x:c' => '20'}
end
end
it 'ignores return values of callback methods' do
parser = described_class.new(@handler.new, 'foo')
describe '#on_attribute' do
before do
@handler_without = Class.new.new
parser.parse.should be_nil
@handler_with = Class.new do
def on_attribute(name, ns = nil, value = nil)
return {name.upcase => value}
end
end.new
end
it 'returns a default Hash if no custom callback exists' do
parser = described_class.new(@handler_without, '<a x:foo="bar" />')
hash = parser.on_attribute('foo', 'x', 'bar')
hash.should == {'x:foo' => 'bar'}
end
it 'returns the return value of a custom callback' do
parser = described_class.new(@handler_with, nil)
hash = parser.on_attribute('foo', 'x', 'bar')
hash.should == {'FOO' => 'bar'}
end
end
it 'uses custom callback methods if defined' do
handler = @handler.new
parser = described_class.new(handler, '<foo />')
describe '#on_attributes' do
before do
@handler_without = Class.new.new
parser.parse
@handler_with = Class.new do
def on_attributes(attrs)
return %w{Alice Bob} # these two again
end
end.new
end
handler.name.should == 'foo'
end
it 'merges all attributes into a Hash if no callback is defined' do
parser = described_class.new(@handler_without, nil)
hash = parser.on_attributes([{'a' => 'b'}, {'c' => 'd'}])
it 'always passes element names to after_element' do
handler = @handler.new
parser = described_class.new(handler, '<namespace:foo />')
hash.should == {'a' => 'b', 'c' => 'd'}
end
parser.parse
it 'returns the return value of a custom callback' do
parser = described_class.new(@handler_with, nil)
retval = parser.on_attributes([{'a' => 'b'}, {'c' => 'd'}])
handler.after_name.should == 'foo'
handler.after_namespace.should == 'namespace'
end
it 'ignores callbacks that are not defined in the handler' do
parser = described_class.new(@handler.new, '<!--foo-->')
# This would raise if undefined callbacks were _not_ ignored.
lambda { parser.parse }.should_not raise_error
retval.should == %w{Alice Bob}
end
end
end