From 31764593070b29fcd16040a6a0bd553e464324cd Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Thu, 26 Mar 2015 22:38:39 +0100 Subject: [PATCH] Ignore declared namespaces in HTML documents The HTML spec states that any declared namespaces, including the default namespace are to be ignored. This fixes #85 --- lib/oga/xml/element.rb | 16 ++++++++- spec/oga/xml/element_spec.rb | 51 +++++++++++++++++++++++++-- spec/oga/xpath/evaluator/html_spec.rb | 14 ++++++++ 3 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 spec/oga/xpath/evaluator/html_spec.rb diff --git a/lib/oga/xml/element.rb b/lib/oga/xml/element.rb index b3f06a1..384177c 100644 --- a/lib/oga/xml/element.rb +++ b/lib/oga/xml/element.rb @@ -23,7 +23,9 @@ module Oga class Element < Node include Querying - attr_accessor :name, :namespace_name, :attributes, :namespaces + attr_accessor :name, :namespace_name, :attributes + + attr_writer :namespaces ## # The attribute prefix/namespace used for registering element namespaces. @@ -166,6 +168,16 @@ module Oga return @namespace end + ## + # Returns the namespaces registered on this element, or an empty Hash in + # case of an HTML element. + # + # @return [Hash] + # + def namespaces + return html? ? {} : @namespaces + end + ## # Returns true if the current element resides in the default XML # namespace. @@ -294,6 +306,8 @@ module Oga # @return [Hash] # def available_namespaces + return {} if html? # HTML(5) completely ignores namespaces + merged = namespaces.dup node = parent diff --git a/spec/oga/xml/element_spec.rb b/spec/oga/xml/element_spec.rb index ef080b5..daf6ebc 100644 --- a/spec/oga/xml/element_spec.rb +++ b/spec/oga/xml/element_spec.rb @@ -10,9 +10,12 @@ describe Oga::XML::Element do described_class.new.attributes.should == [] end - describe 'setting namespaces' do + describe 'with a namespace' do before do - attr = Oga::XML::Attribute.new(:name => 'foo', :namespace_name => 'xmlns') + attr = Oga::XML::Attribute.new( + :name => 'foo', + :namespace_name => 'xmlns' + ) @element = described_class.new(:attributes => [attr]) end @@ -26,7 +29,7 @@ describe Oga::XML::Element do end end - describe 'setting the default namespace without a prefix' do + describe 'with a default namespace' do before do attr = Oga::XML::Attribute.new(:name => 'xmlns', :value => 'foo') @@ -205,6 +208,38 @@ describe Oga::XML::Element do element.namespace.should == namespace end + + describe 'in an HTML document' do + it 'returns nil' do + ns = Oga::XML::Namespace.new(:name => 'xmlns') + el = described_class.new(:namespaces => {'xmlns' => ns}) + doc = Oga::XML::Document.new(:type => :html, :children => [el]) + + el.namespace.should be_nil + end + end + end + + describe '#namespaces' do + it 'returns the registered namespaces as a Hash' do + namespace = Oga::XML::Namespace.new(:name => 'x') + element = described_class.new( + :namespace_name => 'x', + :namespaces => {'x' => namespace} + ) + + element.namespaces.should == {'x' => namespace} + end + + describe 'in an HTML document' do + it 'returns an empty Hash' do + ns = Oga::XML::Namespace.new(:name => 'xmlns') + el = described_class.new(:namespaces => {'xmlns' => ns}) + doc = Oga::XML::Document.new(:type => :html, :children => [el]) + + el.namespaces.should == {} + end + end end describe '#default_namespace?' do @@ -469,6 +504,16 @@ describe Oga::XML::Element do it 'does not modify the list of direct namespaces' do @child.namespaces.key?('foo').should == false end + + describe 'in an HTML document' do + it 'returns an empty Hash' do + ns = Oga::XML::Namespace.new(:name => 'xmlns') + el = described_class.new(:namespaces => {'xmlns' => ns}) + doc = Oga::XML::Document.new(:type => :html, :children => [el]) + + el.available_namespaces.should == {} + end + end end describe '#self_closing?' do diff --git a/spec/oga/xpath/evaluator/html_spec.rb b/spec/oga/xpath/evaluator/html_spec.rb new file mode 100644 index 0000000..0e9d145 --- /dev/null +++ b/spec/oga/xpath/evaluator/html_spec.rb @@ -0,0 +1,14 @@ +require 'spec_helper' + +describe Oga::XPath::Evaluator do + describe 'querying HTML documents' do + before do + @document = parse_html('') + @body = @document.children[0].children[0] + end + + it 'returns a NodeSet when a custom default namespace is declared' do + evaluate_xpath(@document, 'html/body').should == node_set(@body) + end + end +end