From d5569ead0b1fd84e9d6525f1003c078712268df1 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Sun, 20 Jul 2014 07:29:37 +0200 Subject: [PATCH] Use XML::Attribute for element attributes. Instead of using a raw Hash Oga now uses the XML::Attribute class for storing information about element attributes. Attributes are stored as an Array of XML::Attribute instances. This allows the attributes to be more easily modified. If they were stored as a Hash you'd not only have to update the attributes themselves but also the Hash that contains them. While using an Array has a slight runtime cost in most cases the amount of attributes is small enough that this doesn't really pose a problem. If webscale performance is desired at some point in the future Oga could most likely cache the lookup of an attribute. This however is something for the future. --- ext/ragel/base_lexer.rl | 11 ++-- lib/oga/xml/element.rb | 56 +++++++++++++++++-- lib/oga/xml/lexer.rb | 9 +++ lib/oga/xml/parser.y | 48 ++++++++-------- spec/oga/xml/element_spec.rb | 41 ++++++++++++-- spec/oga/xml/lexer/elements_spec.rb | 3 +- spec/oga/xml/node_set_spec.rb | 9 +-- spec/oga/xml/parser/elements_spec.rb | 24 +++++++- .../oga/xml/parser/html_void_elements_spec.rb | 2 +- 9 files changed, 161 insertions(+), 42 deletions(-) diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 0b63efe..aa49c5b 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -38,7 +38,6 @@ newline = '\n' | '\r\n'; whitespace = [ \t]; identifier = [a-zA-Z0-9\-_]+; - attribute = [a-zA-Z0-9\-_:]+; # Comments # @@ -144,7 +143,7 @@ }; # Attributes and their values (e.g. version="1.0"). - attribute => { + identifier => { callback("on_attribute", data, encoding, ts, te); }; @@ -187,8 +186,12 @@ callback_simple("advance_line"); }; - # Attribute names. - attribute => { + # Attribute names and namespaces. + identifier ':' => { + callback("on_attribute_ns", data, encoding, ts, te - 1); + }; + + identifier => { callback("on_attribute", data, encoding, ts, te); }; diff --git a/lib/oga/xml/element.rb b/lib/oga/xml/element.rb index cbe8966..6e3c880 100644 --- a/lib/oga/xml/element.rb +++ b/lib/oga/xml/element.rb @@ -14,7 +14,7 @@ module Oga # # @!attribute [rw] attributes # The attributes of the element. - # @return [Hash] + # @return [Array] # class Element < Node attr_accessor :name, :namespace, :attributes @@ -24,24 +24,38 @@ module Oga # # @option options [String] :name The name of the element. # @option options [String] :namespace The namespace of the element. - # @option options [Hash] :attributes The attributes of the element. + # @option options [Array] :attributes The attributes + # of the element as an Array. # def initialize(options = {}) super @name = options[:name] @namespace = options[:namespace] - @attributes = options[:attributes] || {} + @attributes = options[:attributes] || [] end ## - # Returns the value of the specified attribute. + # Returns the attribute of the given name. + # + # @example + # # find an attribute that only has the name "foo" + # attribute('foo') + # + # # find an attribute with namespace "foo" and name bar" + # attribute('foo:bar') # # @param [String] name # @return [String] # def attribute(name) - return attributes[name.to_sym] + name, ns = split_name(name) + + attributes.each do |attr| + return attr if attribute_matches?(attr, ns, name) + end + + return end alias_method :attr, :attribute @@ -117,6 +131,38 @@ module Oga def node_type return :element end + + private + + ## + # @param [String] name + # @return [Array] + # + def split_name(name) + segments = name.to_s.split(':') + + return segments.pop, segments.pop + end + + ## + # @param [Oga::XML::Attribute] attr + # @param [String] ns + # @param [String] name + # @return [TrueClass|FalseClass] + # + def attribute_matches?(attr, ns, name) + name_matches = attr.name == name + ns_matches = false + + if ns + ns_matches = attr.namespace == ns + + elsif name_matches + ns_matches = true + end + + return name_matches && ns_matches + end end # Element end # XML end # Oga diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index fcf2a7b..41a8120 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -354,6 +354,15 @@ module Oga end end + ## + # Called on attribute namespaces. + # + # @param [String] value + # + def on_attribute_ns(value) + add_token(:T_ATTR_NS, value) + end + ## # Called on tag attributes. # diff --git a/lib/oga/xml/parser.y b/lib/oga/xml/parser.y index 9e10ebd..4019a7c 100644 --- a/lib/oga/xml/parser.y +++ b/lib/oga/xml/parser.y @@ -13,7 +13,7 @@ token T_STRING T_TEXT token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME token T_DOCTYPE_INLINE token T_CDATA T_COMMENT -token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR +token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR T_ATTR_NS token T_XML_DECL_START T_XML_DECL_END options no_result_var @@ -122,8 +122,8 @@ rule # Attributes attributes - : attributes_ { on_attributes(val[0]) } - | /* none */ { {} } + : attributes_ { val[0] } + | /* none */ { [] } ; attributes_ @@ -133,10 +133,22 @@ rule attribute # foo - : T_ATTR { {val[0].to_sym => nil} } + : attribute_name { val[0] } # foo="bar" - | T_ATTR T_STRING { {val[0].to_sym => val[1]} } + | attribute_name T_STRING + { + val[0].value = val[1] + val[0] + } + ; + + attribute_name + # foo + : T_ATTR { Attribute.new(:name => val[0]) } + + # foo:bar + | T_ATTR_NS T_ATTR { Attribute.new(:namespace => val[0], :name => val[1]) } ; # XML declarations @@ -293,11 +305,17 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}: end ## - # @param [Hash] attributes + # @param [Array] attributes # @return [Oga::XML::XmlDeclaration] # - def on_xml_decl(attributes = {}) - return XmlDeclaration.new(attributes) + def on_xml_decl(attributes = []) + options = {} + + attributes.each do |attr| + options[attr.name.to_sym] = attr.value + end + + return XmlDeclaration.new(options) end ## @@ -344,18 +362,4 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}: return element end - ## - # @param [Array] pairs - # @return [Hash] - # - def on_attributes(pairs) - attrs = {} - - pairs.each do |pair| - attrs = attrs.merge(pair) - end - - return attrs - end - # vim: set ft=racc: diff --git a/spec/oga/xml/element_spec.rb b/spec/oga/xml/element_spec.rb index 5f34a94..89703c0 100644 --- a/spec/oga/xml/element_spec.rb +++ b/spec/oga/xml/element_spec.rb @@ -14,17 +14,50 @@ describe Oga::XML::Element do end example 'set the default attributes' do - described_class.new.attributes.should == {} + described_class.new.attributes.should == [] end end context '#attribute' do before do - @instance = described_class.new(:attributes => {:key => 'value'}) + attributes = [ + Oga::XML::Attribute.new(:name => 'key', :value => 'value'), + Oga::XML::Attribute.new( + :name => 'key', + :value => 'foo', + :namespace => 'x' + ) + ] + + @instance = described_class.new(:attributes => attributes) end - example 'return an attribute' do - @instance.attribute('key').should == 'value' + example 'return an attribute with only a name' do + @instance.attribute('key').value.should == 'value' + end + + example 'return an attribute with only a name when using a Symbol' do + @instance.attribute(:key).value.should == 'value' + end + + example 'return an attribute with a name and namespace' do + @instance.attribute('x:key').value.should == 'foo' + end + + example 'return an attribute with a name and namespace when using a Symbol' do + @instance.attribute(:'x:key').value.should == 'foo' + end + + example 'return nil when the name matches but the namespace does not' do + @instance.attribute('y:key').nil?.should == true + end + + example 'return nil when the namespace matches but the name does not' do + @instance.attribute('x:foobar').nil?.should == true + end + + example 'return nil for a non existing attribute' do + @instance.attribute('foobar').nil?.should == true end end diff --git a/spec/oga/xml/lexer/elements_spec.rb b/spec/oga/xml/lexer/elements_spec.rb index b6bb8fe..97054ab 100644 --- a/spec/oga/xml/lexer/elements_spec.rb +++ b/spec/oga/xml/lexer/elements_spec.rb @@ -78,7 +78,8 @@ describe Oga::XML::Lexer do lex('

').should == [ [:T_ELEM_START, nil, 1], [:T_ELEM_NAME, 'p', 1], - [:T_ATTR, 'foo:bar', 1], + [:T_ATTR_NS, 'foo', 1], + [:T_ATTR, 'bar', 1], [:T_STRING, 'baz', 1], [:T_ELEM_END, nil, 1] ] diff --git a/spec/oga/xml/node_set_spec.rb b/spec/oga/xml/node_set_spec.rb index d3d25c8..5e39623 100644 --- a/spec/oga/xml/node_set_spec.rb +++ b/spec/oga/xml/node_set_spec.rb @@ -271,13 +271,14 @@ describe Oga::XML::NodeSet do context '#attribute' do before do - @el = Oga::XML::Element.new(:name => 'a', :attributes => {:a => '1'}) - @txt = Oga::XML::Text.new(:text => 'foo') - @set = described_class.new([@el, @txt]) + @attr = Oga::XML::Attribute.new(:name => 'a', :value => '1') + @el = Oga::XML::Element.new(:name => 'a', :attributes => [@attr]) + @txt = Oga::XML::Text.new(:text => 'foo') + @set = described_class.new([@el, @txt]) end example 'return the values of an attribute' do - @set.attribute('a').should == ['1'] + @set.attribute('a').should == [@attr] end end diff --git a/spec/oga/xml/parser/elements_spec.rb b/spec/oga/xml/parser/elements_spec.rb index cf716cf..d751531 100644 --- a/spec/oga/xml/parser/elements_spec.rb +++ b/spec/oga/xml/parser/elements_spec.rb @@ -43,7 +43,29 @@ describe Oga::XML::Parser do end example 'set the bar attribute' do - @element.attribute('bar').should == 'baz' + @element.attribute('bar').value.should == 'baz' + end + end + + context 'elements with namespaced attributes' do + before :all do + @element = parse('').children[0] + end + + example 'return an Element instance' do + @element.is_a?(Oga::XML::Element).should == true + end + + example 'include the namespace of the attribute' do + @element.attribute('x:bar').namespace.should == 'x' + end + + example 'include the name of the attribute' do + @element.attribute('x:bar').name.should == 'bar' + end + + example 'include the value of the attribute' do + @element.attribute('x:bar').value.should == 'baz' end end diff --git a/spec/oga/xml/parser/html_void_elements_spec.rb b/spec/oga/xml/parser/html_void_elements_spec.rb index 34a081a..9166746 100644 --- a/spec/oga/xml/parser/html_void_elements_spec.rb +++ b/spec/oga/xml/parser/html_void_elements_spec.rb @@ -39,7 +39,7 @@ describe Oga::XML::Parser do end example 'set the attributes' do - @node.attributes.should == {:href => 'foo'} + @node.attribute('href').value.should == 'foo' end end end