From 83f6d5437e1b9fa2fdbbcfe43a6aed9e8474a0c7 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 29 Apr 2014 23:05:49 +0200 Subject: [PATCH] Contextual pull parsing. This adds the ability to more easily act upon specific node types and nestings when using the pull parsing API. A basic example of this API looks like the following (only including relevant code): parser.parse do |node| parser.on(:element, %w{people person}) do people << {:name => nil, :age => nil} end parser.on(:text, %w{people person name}) do people.last[:name] = node.text end parser.on(:text, %w{people person age}) do people.last[:age] = node.text.to_i end end This fixes #6. --- lib/oga/xml/cdata.rb | 7 +++ lib/oga/xml/comment.rb | 7 +++ lib/oga/xml/doctype.rb | 7 +++ lib/oga/xml/element.rb | 7 +++ lib/oga/xml/node.rb | 7 +++ lib/oga/xml/pull_parser.rb | 44 +++++++++++++++++++ lib/oga/xml/text.rb | 7 +++ lib/oga/xml/xml_declaration.rb | 7 +++ spec/oga/xml/cdata_spec.rb | 6 +++ spec/oga/xml/comment_spec.rb | 6 +++ spec/oga/xml/doctype_spec.rb | 6 +++ spec/oga/xml/element_spec.rb | 6 +++ spec/oga/xml/node_spec.rb | 6 +++ .../xml/pull_parser/context_parsing_spec.rb | 29 ++++++++++++ spec/oga/xml/text_spec.rb | 6 +++ spec/oga/xml/xml_declaration_spec.rb | 6 +++ 16 files changed, 164 insertions(+) create mode 100644 spec/oga/xml/pull_parser/context_parsing_spec.rb diff --git a/lib/oga/xml/cdata.rb b/lib/oga/xml/cdata.rb index e76579c..e3bdb79 100644 --- a/lib/oga/xml/cdata.rb +++ b/lib/oga/xml/cdata.rb @@ -12,6 +12,13 @@ module Oga def to_xml return "" end + + ## + # @return [Symbol] + # + def node_type + return :cdata + end end # Cdata end # XML end # Oga diff --git a/lib/oga/xml/comment.rb b/lib/oga/xml/comment.rb index 9c79109..e6eb478 100644 --- a/lib/oga/xml/comment.rb +++ b/lib/oga/xml/comment.rb @@ -12,6 +12,13 @@ module Oga def to_xml return "" end + + ## + # @return [Symbol] + # + def node_type + return :comment + end end # Comment end # XML end # Oga diff --git a/lib/oga/xml/doctype.rb b/lib/oga/xml/doctype.rb index 0e8fb02..c98d6f1 100644 --- a/lib/oga/xml/doctype.rb +++ b/lib/oga/xml/doctype.rb @@ -73,6 +73,13 @@ module Oga #{spacing}) EOF end + + ## + # @return [Symbol] + # + def node_type + return :doctype + end end # Doctype end # XML end # Oga diff --git a/lib/oga/xml/element.rb b/lib/oga/xml/element.rb index 5437ce3..1b1e75c 100644 --- a/lib/oga/xml/element.rb +++ b/lib/oga/xml/element.rb @@ -75,6 +75,13 @@ module Oga #{spacing}] EOF end + + ## + # @return [Symbol] + # + def node_type + return :element + end end # Element end # XML end # Oga diff --git a/lib/oga/xml/node.rb b/lib/oga/xml/node.rb index 0f2cea3..4f03a10 100644 --- a/lib/oga/xml/node.rb +++ b/lib/oga/xml/node.rb @@ -58,6 +58,13 @@ module Oga # @return [String] # def extra_inspect_data; end + + ## + # @return [Symbol] + # + def node_type + return :node + end end # Element end # XML end # Oga diff --git a/lib/oga/xml/pull_parser.rb b/lib/oga/xml/pull_parser.rb index c68499c..bff29c6 100644 --- a/lib/oga/xml/pull_parser.rb +++ b/lib/oga/xml/pull_parser.rb @@ -75,6 +75,50 @@ module Oga return end + ## + # Calls the supplied block if the current node type and optionally the + # nesting match. This method allows you to write this: + # + # parser.parse do |node| + # parser.on(:text, %w{people person name}) do + # puts node.text + # end + # end + # + # Instead of this: + # + # parser.parse do |node| + # if node.node_type == :text \ + # and parser.nesting == %w{people person name} + # puts node.text + # end + # end + # + # When calling this method you can specify the following node types: + # + # * `:cdata` + # * `:comment` + # * `:element` + # * `:text` + # + # @example + # parser.on(:element, %w{people person name}) do + # + # end + # + # @param [Symbol] type The type of node to act upon. This is a symbol as + # returned by {Oga::XML::Node#node_type}. + # + # @param [Array] nesting The element name nesting to act upon. + # + def on(type, nesting = []) + if node.node_type == type + if nesting.empty? or nesting == self.nesting + yield + end + end + end + # eval is a heck of a lot faster than define_method on both Rubinius and # JRuby. DISABLED_CALLBACKS.each do |method| diff --git a/lib/oga/xml/text.rb b/lib/oga/xml/text.rb index d2e56f4..a7aa6f7 100644 --- a/lib/oga/xml/text.rb +++ b/lib/oga/xml/text.rb @@ -24,6 +24,13 @@ module Oga def extra_inspect_data(indent) return "text: #{text.inspect}" end + + ## + # @return [Symbol] + # + def node_type + return :text + end end # Text end # XML end # Oga diff --git a/lib/oga/xml/xml_declaration.rb b/lib/oga/xml/xml_declaration.rb index a637695..1df5a4d 100644 --- a/lib/oga/xml/xml_declaration.rb +++ b/lib/oga/xml/xml_declaration.rb @@ -67,6 +67,13 @@ module Oga #{spacing}) EOF end + + ## + # @return [Symbol] + # + def node_type + return :xml_decl + end end # XmlDeclaration end # XML end # Oga diff --git a/spec/oga/xml/cdata_spec.rb b/spec/oga/xml/cdata_spec.rb index 1174f07..faa5a49 100644 --- a/spec/oga/xml/cdata_spec.rb +++ b/spec/oga/xml/cdata_spec.rb @@ -33,4 +33,10 @@ describe Oga::XML::Cdata do @instance.inspect.should == 'Cdata(text: "foo")' end end + + context '#type' do + example 'return the type of the node' do + described_class.new.node_type.should == :cdata + end + end end diff --git a/spec/oga/xml/comment_spec.rb b/spec/oga/xml/comment_spec.rb index 04d8bf9..5c015cf 100644 --- a/spec/oga/xml/comment_spec.rb +++ b/spec/oga/xml/comment_spec.rb @@ -33,4 +33,10 @@ describe Oga::XML::Comment do @instance.inspect.should == 'Comment(text: "foo")' end end + + context '#type' do + example 'return the type of the node' do + described_class.new.node_type.should == :comment + end + end end diff --git a/spec/oga/xml/doctype_spec.rb b/spec/oga/xml/doctype_spec.rb index ac287e8..2e0f433 100644 --- a/spec/oga/xml/doctype_spec.rb +++ b/spec/oga/xml/doctype_spec.rb @@ -63,4 +63,10 @@ Doctype( EOF end end + + context '#type' do + example 'return the type of the node' do + described_class.new.node_type.should == :doctype + end + end end diff --git a/spec/oga/xml/element_spec.rb b/spec/oga/xml/element_spec.rb index c8e326a..106253c 100644 --- a/spec/oga/xml/element_spec.rb +++ b/spec/oga/xml/element_spec.rb @@ -80,4 +80,10 @@ Element( EOF end end + + context '#type' do + example 'return the type of the node' do + described_class.new.node_type.should == :element + end + end end diff --git a/spec/oga/xml/node_spec.rb b/spec/oga/xml/node_spec.rb index 3502303..d7a0d2e 100644 --- a/spec/oga/xml/node_spec.rb +++ b/spec/oga/xml/node_spec.rb @@ -13,4 +13,10 @@ describe Oga::XML::Node do described_class.new.children.should == [] end end + + context '#type' do + example 'return the type of the node' do + described_class.new.node_type.should == :node + end + end end diff --git a/spec/oga/xml/pull_parser/context_parsing_spec.rb b/spec/oga/xml/pull_parser/context_parsing_spec.rb new file mode 100644 index 0000000..a6414cf --- /dev/null +++ b/spec/oga/xml/pull_parser/context_parsing_spec.rb @@ -0,0 +1,29 @@ +require 'spec_helper' + +describe Oga::XML::PullParser do + context '#on' do + before do + @parser = Oga::XML::PullParser.new('') + + @parser.stub(:node).and_return(Oga::XML::Text.new) + end + + example 'do not yield if the node types do not match' do + expect { |b| @parser.on(:element, &b) }.to_not yield_control + end + + example 'yield if the node type matches and the nesting is empty' do + expect { |b| @parser.on(:text, &b) }.to yield_control + end + + example 'do not yield if the node type matches but the nesting does not' do + expect { |b| @parser.on(:text, %w{foo}, &b) }.to_not yield_control + end + + example 'yield if the node type and the nesting matches' do + @parser.stub(:nesting).and_return(%w{a b}) + + expect { |b| @parser.on(:text, %w{a b}, &b) }.to yield_control + end + end +end diff --git a/spec/oga/xml/text_spec.rb b/spec/oga/xml/text_spec.rb index f6d556f..41dee84 100644 --- a/spec/oga/xml/text_spec.rb +++ b/spec/oga/xml/text_spec.rb @@ -33,4 +33,10 @@ describe Oga::XML::Text do @instance.inspect.should == 'Text(text: "foo")' end end + + context '#type' do + example 'return the type of the node' do + described_class.new.node_type.should == :text + end + end end diff --git a/spec/oga/xml/xml_declaration_spec.rb b/spec/oga/xml/xml_declaration_spec.rb index fb66939..3ef2938 100644 --- a/spec/oga/xml/xml_declaration_spec.rb +++ b/spec/oga/xml/xml_declaration_spec.rb @@ -58,4 +58,10 @@ XmlDeclaration( EOF end end + + context '#type' do + example 'return the type of the node' do + described_class.new.node_type.should == :xml_decl + end + end end