From 08d412da7e3039a28b4146c1ab783cb0fae56691 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Mon, 21 Apr 2014 23:05:39 +0200 Subject: [PATCH] First shot at removing the AST layer. The AST layer is being removed because it doesn't really serve a useful purpose. In particular when creating a streaming parser the AST nodes would only introduce extra overhead. As a result of this the parser will instead emit a DOM tree directly instead of first emitting an AST. --- lib/oga.rb | 4 - lib/oga/ast/node.rb | 9 - lib/oga/xml/parser.y | 171 ++++++++--- lib/oga/xml/tree_builder.rb | 177 ----------- oga.gemspec | 1 - spec/oga/html/parser/element_spec.rb | 17 +- spec/oga/xml/parser/cdata_spec.rb | 37 ++- spec/oga/xml/parser/comments_spec.rb | 24 +- spec/oga/xml/parser/doctype_spec.rb | 99 ++++-- spec/oga/xml/parser/documents_spec.rb | 68 ++--- spec/oga/xml/parser/elements_spec.rb | 169 ++++------- spec/oga/xml/parser/general_spec.rb | 11 - spec/oga/xml/parser/hierarchy_spec.rb | 53 ++++ .../oga/xml/parser/html_void_elements_spec.rb | 85 ++---- spec/oga/xml/parser/text_spec.rb | 17 ++ spec/oga/xml/parser/xml_declaration_spec.rb | 39 ++- spec/oga/xml/tree_builder_spec.rb | 281 ------------------ 17 files changed, 465 insertions(+), 797 deletions(-) delete mode 100644 lib/oga/ast/node.rb delete mode 100644 lib/oga/xml/tree_builder.rb delete mode 100644 spec/oga/xml/parser/general_spec.rb create mode 100644 spec/oga/xml/parser/hierarchy_spec.rb create mode 100644 spec/oga/xml/parser/text_spec.rb delete mode 100644 spec/oga/xml/tree_builder_spec.rb diff --git a/lib/oga.rb b/lib/oga.rb index cf9a66c..7b6b11e 100644 --- a/lib/oga.rb +++ b/lib/oga.rb @@ -1,8 +1,5 @@ -require 'ast' require 'set' -require_relative 'oga/ast/node' - require_relative 'oga/xml/lexer' require_relative 'oga/xml/parser' require_relative 'oga/xml/node' @@ -13,6 +10,5 @@ require_relative 'oga/xml/comment' require_relative 'oga/xml/cdata' require_relative 'oga/xml/xml_declaration' require_relative 'oga/xml/doctype' -require_relative 'oga/xml/tree_builder' require_relative 'oga/html/parser' diff --git a/lib/oga/ast/node.rb b/lib/oga/ast/node.rb deleted file mode 100644 index 5f628ca..0000000 --- a/lib/oga/ast/node.rb +++ /dev/null @@ -1,9 +0,0 @@ -module Oga - module AST - ## - # - class Node < ::AST::Node - attr_reader :line - end # Node - end # AST -end # Oga diff --git a/lib/oga/xml/parser.y b/lib/oga/xml/parser.y index d5ce39c..4f132ca 100644 --- a/lib/oga/xml/parser.y +++ b/lib/oga/xml/parser.y @@ -20,14 +20,14 @@ options no_result_var rule document - : expressions { s(:document, val[0]) } - | /* none */ { s(:document) } + : expressions { create_document(val[0]) } + | /* none */ { create_document } ; expressions : expressions expression { val.compact } - | expression { val[0] } - | /* none */ { nil } + | expression { val } + | /* none */ { [] } ; expression @@ -43,24 +43,32 @@ rule doctype # - : T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END { s(:doctype, val[1]) } + : T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END + { + Doctype.new(:name => val[1]) + } # | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END { - s(:doctype, val[1], val[2]) + Doctype.new(:name => val[1], :type => val[2]) } # | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END { - s(:doctype, val[1], val[2], val[3]) + Doctype.new(:name => val[1], :type => val[2], :public_id => val[3]) } # | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END { - s(:doctype, val[1], val[2], val[3], val[4]) + Doctype.new( + :name => val[1], + :type => val[2], + :public_id => val[3], + :system_id => val[4] + ) } ; @@ -68,31 +76,24 @@ rule cdata # - : T_CDATA_START T_CDATA_END { s(:cdata) } + : T_CDATA_START T_CDATA_END { Cdata.new } # - | T_CDATA_START T_TEXT T_CDATA_END { s(:cdata, val[1]) } + | T_CDATA_START T_TEXT T_CDATA_END { Cdata.new(:text => val[1]) } ; # Comments comment # - : T_COMMENT_START T_COMMENT_END { s(:comment) } + : T_COMMENT_START T_COMMENT_END { Comment.new } # - | T_COMMENT_START T_TEXT T_COMMENT_END { s(:comment, val[1]) } + | T_COMMENT_START T_TEXT T_COMMENT_END { Comment.new(:text => val[1]) } ; # Elements - element - : element_open attributes expressions T_ELEM_END - { - s(:element, val[0], val[1], val[2]) - } - ; - element_open #

: T_ELEM_START T_ELEM_NAME { [nil, val[1]] } @@ -101,11 +102,44 @@ rule | T_ELEM_START T_ELEM_NS T_ELEM_NAME { [val[1], val[2]] } ; + element_start + : element_open attributes + { + Element.new( + :namespace => val[0][0], + :name => val[0][1], + :attributes => val[1] + ) + } + ; + + element + : element_start expressions T_ELEM_END + { + element = val[0] + + element.children = val[1].flatten + + link_children(element) + + element + } + ; + # Attributes attributes - : attributes_ { s(:attributes, val[0]) } - | /* none */ { nil } + : attributes_ + { + attrs = {} + + val[0].flatten.each do |pair| + attrs = attrs.merge(pair) + end + + attrs + } + | /* none */ { {} } ; attributes_ @@ -115,21 +149,33 @@ rule attribute # foo - : T_ATTR { s(:attribute, val[0]) } + : T_ATTR + { + {val[0] => nil} + } # foo="bar" - | T_ATTR T_STRING { s(:attribute, val[0], val[1]) } + | T_ATTR T_STRING + { + {val[0] => val[1]} + } ; # XML declarations xmldecl - : T_XML_DECL_START T_XML_DECL_END { s(:xml_decl) } - | T_XML_DECL_START attributes T_XML_DECL_END { s(:xml_decl, val[1]) } + : T_XML_DECL_START T_XML_DECL_END + { + XmlDeclaration.new + } + | T_XML_DECL_START attributes T_XML_DECL_END + { + XmlDeclaration.new(val[1]) + } # Plain text text - : T_TEXT { s(:text, val[0]) } + : T_TEXT { Text.new(:text => val[0]) } ; end @@ -153,20 +199,6 @@ end @line = 1 end - ## - # Emits a new AST token. - # - # @param [Symbol] type - # @param [Array] children - # - def s(type, *children) - return AST::Node.new( - type, - children.flatten, - :line => @line - ) - end - ## # Yields the next token from the lexer. # @@ -240,4 +272,63 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}: return ast end + private + + ## + # Creates a new {Oga;:XML::Document} node with the specified child elements. + # + # @param [Array] children + # @return [Oga::XML::Document] + # + def create_document(children = []) + if children.is_a?(Array) + children = children.flatten + else + children = [children] + end + + document = Document.new + + children.each do |child| + if child.is_a?(Doctype) + document.doctype = child + + elsif child.is_a?(XmlDeclaration) + document.xml_declaration = child + + else + document.children << child + end + end + + link_children(document) + + return document + end + + ## + # Links the child nodes together by setting attributes such as the + # previous, next and parent node. + # + # @param [Oga::XML::Node] node + # + def link_children(node) + amount = node.children.length + + node.children.each_with_index do |child, index| + prev_index = index - 1 + next_index = index + 1 + + if index > 0 + child.previous = node.children[prev_index] + end + + if next_index <= amount + child.next = node.children[next_index] + end + + child.parent = node + end + end + # vim: set ft=racc: diff --git a/lib/oga/xml/tree_builder.rb b/lib/oga/xml/tree_builder.rb deleted file mode 100644 index 6bf29ba..0000000 --- a/lib/oga/xml/tree_builder.rb +++ /dev/null @@ -1,177 +0,0 @@ -module Oga - module XML - ## - # The TreeBuilder class turns an AST into a DOM tree. This DOM tree can be - # traversed by requesting child elements, parent elements, etc. - # - # Basic usage: - # - # builder = Oga::XML::TreeBuilder.new - # ast = s(:element, ...) - # - # builder.process(ast) # => # - # - class TreeBuilder < ::AST::Processor - ## - # @param [Oga::AST::Node] node - # @return [Oga::XML::Document] - # - def on_document(node) - document = Document.new - - process_all(node).each do |child| - if child.is_a?(XmlDeclaration) - document.xml_declaration = child - - elsif child.is_a?(Doctype) - document.doctype = child - - else - document.children << child - end - end - - document.children.each do |child| - child.parent = document - end - - return document - end - - ## - # @param [Oga::AST::Node] node - # @return [Oga::XML::XmlDeclaration] - # - def on_xml_decl(node) - attributes = process(node.children[0]) - - return XmlDeclaration.new(attributes) - end - - ## - # @param [Oga::AST::Node] node - # @return [Oga::XML::Doctype] - # - def on_doctype(node) - return Doctype.new( - :name => node.children[0], - :type => node.children[1], - :public_id => node.children[2], - :system_id => node.children[3] - ) - end - - ## - # @param [Oga::AST::Node] node - # @return [Oga::XML::Comment] - # - def on_comment(node) - return Comment.new(:text => node.children[0]) - end - - ## - # Processes an `(element)` node and its child elements. - # - # An element can have a parent, previous and next element as well as a - # number of child elements. - # - # @param [Oga::AST::Node] node - # @return [Oga::XML::Element] - # - def on_element(node) - ns, name, attr, *children = *node - - if attr - attr = process(attr) - end - - if children - children = process_all(children.compact) - end - - element = Element.new( - :name => name, - :namespace => ns, - :attributes => attr, - :children => children - ) - - process_children(element) - - return element - end - - ## - # @param [Oga::AST::Node] node - # @return [Oga::XML::Text] - # - def on_text(node) - return Text.new(:text => node.children[0]) - end - - ## - # @param [Oga::AST::Node] node - # @return [Oga::XML::Cdata] - # - def on_cdata(node) - return Cdata.new(:text => node.children[0]) - end - - ## - # Converts a `(attributes)` node into a Hash. - # - # @param [Oga::AST::Node] node - # @return [Hash] - # - def on_attributes(node) - pairs = process_all(node) - - return Hash[pairs] - end - - ## - # @param [Oga::AST::Node] node - # @return [Array] - # - def on_attribute(node) - return *node - end - - ## - # Raises for every unhandled node. - # - # @param [Oga::AST::Node] node - # - def handler_missing(node) - raise "No handler for node type #{node.type.inspect}" - end - - private - - ## - # Iterates over the child elements of an element and assigns the parent, - # previous and next elements. The supplied object is modified in place. - # - # @param [Oga::XML::Element] element - # - def process_children(element) - amount = element.children.length - - element.children.each_with_index do |child, index| - prev_index = index - 1 - next_index = index + 1 - - if index > 0 - child.previous = element.children[prev_index] - end - - if next_index <= amount - child.next = element.children[next_index] - end - - child.parent = element - end - end - end # TreeBuilder - end # XML -end # Oga diff --git a/oga.gemspec b/oga.gemspec index 197b047..76b027c 100644 --- a/oga.gemspec +++ b/oga.gemspec @@ -16,7 +16,6 @@ Gem::Specification.new do |s| s.required_ruby_version = '>= 1.9.3' s.add_dependency 'racc' - s.add_dependency 'ast' s.add_development_dependency 'cliver' s.add_development_dependency 'rake' diff --git a/spec/oga/html/parser/element_spec.rb b/spec/oga/html/parser/element_spec.rb index 2567273..6db3111 100644 --- a/spec/oga/html/parser/element_spec.rb +++ b/spec/oga/html/parser/element_spec.rb @@ -1,10 +1,17 @@ require 'spec_helper' describe Oga::HTML::Parser do - example 'parse an HTML void element' do - parse_html('').should == s( - :document, - s(:element, nil, 'meta', nil, nil) - ) + context 'HTML void elements' do + before :all do + @node = parse_html('').children[0] + end + + example 'return an Element instance' do + @node.is_a?(Oga::XML::Element).should == true + end + + example 'set the name of the element' do + @node.name.should == 'meta' + end end end diff --git a/spec/oga/xml/parser/cdata_spec.rb b/spec/oga/xml/parser/cdata_spec.rb index 269681d..c96a974 100644 --- a/spec/oga/xml/parser/cdata_spec.rb +++ b/spec/oga/xml/parser/cdata_spec.rb @@ -1,24 +1,37 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'cdata tags' do - example 'parse an empty cdata tag' do - parse('').should == s(:document, s(:cdata)) + context 'empty cdata tags' do + before :all do + @node = parse('').children[0] end - example 'parse a cdata tag' do - parse('').should == s(:document, s(:cdata, 'foo')) + example 'return a Cdata instance' do + @node.is_a?(Oga::XML::Cdata).should == true + end + end + + context 'cdata tags with text' do + before :all do + @node = parse('').children[0] end - example 'parse an element inside a cdata tag' do - parse('foo

]]>').should == s( - :document, - s(:cdata, '

foo

') - ) + example 'return a Cdata instance' do + @node.is_a?(Oga::XML::Cdata).should == true end - example 'parse double brackets inside a cdata tag' do - parse('').should == s(:document, s(:cdata, ']]')) + example 'set the text of the tag' do + @node.text.should == 'foo' + end + end + + context 'cdata tags with nested elements' do + before :all do + @node = parse('foo

]]>').children[0] + end + + example 'set the HTML as raw text' do + @node.text.should == '

foo

' end end end diff --git a/spec/oga/xml/parser/comments_spec.rb b/spec/oga/xml/parser/comments_spec.rb index 6aa12cc..e201a8e 100644 --- a/spec/oga/xml/parser/comments_spec.rb +++ b/spec/oga/xml/parser/comments_spec.rb @@ -1,13 +1,27 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'comments' do - example 'parse an empty comment' do - parse('').should == s(:document, s(:comment)) + context 'empty comments' do + before :all do + @node = parse('').children[0] end - example 'parse a comment' do - parse('').should == s(:document, s(:comment, 'foo')) + example 'return a Comment instance' do + @node.is_a?(Oga::XML::Comment).should == true + end + end + + context 'comments with text' do + before :all do + @node = parse('').children[0] + end + + example 'return a Comment instance' do + @node.is_a?(Oga::XML::Comment).should == true + end + + example 'set the text of the comment' do + @node.text.should == 'foo' end end end diff --git a/spec/oga/xml/parser/doctype_spec.rb b/spec/oga/xml/parser/doctype_spec.rb index 9c74851..3ebe957 100644 --- a/spec/oga/xml/parser/doctype_spec.rb +++ b/spec/oga/xml/parser/doctype_spec.rb @@ -1,46 +1,83 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'doctypes' do - example 'parse a doctype' do - parse('').should == s(:document, s(:doctype, 'html')) + context 'basic doctypes' do + before :all do + @document = parse('') end - example 'parse a doctype with the doctype type' do - parse('').should == s( - :document, - s(:doctype, 'html', 'PUBLIC') - ) + example 'return a Doctype instance' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true end - example 'parse a doctype with a public ID' do - parse('').should == s( - :document, - s(:doctype, 'html', 'PUBLIC', 'foo') - ) + example 'set the name of the doctype' do + @document.doctype.name.should == 'html' + end + end + + context 'doctypes with a type' do + before :all do + @document = parse('') end - example 'parse a doctype with a public and private ID' do - parse('').should == s( - :document, - s(:doctype, 'html', 'PUBLIC', 'foo', 'bar') - ) + example 'return a Doctype instance' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true end - example 'parse an HTML 4 strict doctype' do - doctype = '' + example 'set the name of the doctype' do + @document.doctype.name.should == 'html' + end - parse(doctype).should == s( - :document, - s( - :doctype, - 'HTML', - 'PUBLIC', - '-//W3C//DTD HTML 4.01//EN', - 'http://www.w3.org/TR/html4/strict.dtd' - ) - ) + example 'set the type of the doctype' do + @document.doctype.type.should == 'PUBLIC' + end + end + + context 'doctypes with a public ID' do + before :all do + @document = parse('') + end + + example 'return a Doctype instance' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true + end + + example 'set the name of the doctype' do + @document.doctype.name.should == 'html' + end + + example 'set the type of the doctype' do + @document.doctype.type.should == 'PUBLIC' + end + + example 'set the public ID of the doctype' do + @document.doctype.public_id.should == 'foo' + end + end + + context 'doctypes with a system ID' do + before :all do + @document = parse('') + end + + example 'return a Doctype instance' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true + end + + example 'set the name of the doctype' do + @document.doctype.name.should == 'html' + end + + example 'set the type of the doctype' do + @document.doctype.type.should == 'PUBLIC' + end + + example 'set the public ID of the doctype' do + @document.doctype.public_id.should == 'foo' + end + + example 'set the system ID of the doctype' do + @document.doctype.system_id.should == 'bar' end end end diff --git a/spec/oga/xml/parser/documents_spec.rb b/spec/oga/xml/parser/documents_spec.rb index 3eef8d0..298a294 100644 --- a/spec/oga/xml/parser/documents_spec.rb +++ b/spec/oga/xml/parser/documents_spec.rb @@ -1,13 +1,20 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'HTML documents' do - example 'parse an empty document' do - parse('').should == s(:document) + context 'empty documents' do + before :all do + @document = parse('') end - example 'parse a basic HTML document' do - html = <<-EOF + example 'return a Document instance' do + @document.is_a?(Oga::XML::Document).should == true + end + end + + context 'HTML documents' do + before :all do + html = <<-EOF.strip + @@ -17,48 +24,23 @@ describe Oga::XML::Parser do EOF - parse(html).should == s( - :document, - s(:doctype, 'html'), - s(:text, "\n"), + @document = parse(html, :html => true) + end - # - s( - :element, - nil, - 'html', - nil, + example 'return a Document instance' do + @document.is_a?(Oga::XML::Document).should == true + end - s(:text, "\n"), + example 'set the doctype of the document' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true + end - # - s( - :element, - nil, - 'head', - nil, + example 'set the XML declaration of the document' do + @document.xml_declaration.is_a?(Oga::XML::XmlDeclaration).should == true + end - s(:text, "\n"), - - # - s( - :element, - nil, - 'title', - nil, - s(:text, 'Title') - ), - - s(:text, "\n") - ), - - # <body> - s(:text, "\n"), - s(:element, nil, 'body', nil, nil), - s(:text, "\n") - ), - s(:text, "\n") - ) + example 'set the children of the document' do + @document.children.empty?.should == false end end end diff --git a/spec/oga/xml/parser/elements_spec.rb b/spec/oga/xml/parser/elements_spec.rb index c60f561..28e68c9 100644 --- a/spec/oga/xml/parser/elements_spec.rb +++ b/spec/oga/xml/parser/elements_spec.rb @@ -1,148 +1,81 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'elements' do - example 'parse an empty element' do - parse('<p></p>').should == s( - :document, - s(:element, nil, 'p', nil, nil) - ) + context 'empty elements' do + before :all do + @element = parse('<p></p>').children[0] end - example 'parse an element with text' do - parse('<p>foo</p>').should == s( - :document, - s(:element, nil, 'p', nil, s(:text, 'foo')) - ) + example 'return an Element instance' do + @element.is_a?(Oga::XML::Element).should == true end - example 'parse an element with a single attribute' do - parse('<p foo></p>').should == s( - :document, - s(:element, nil, 'p', s(:attributes, s(:attribute, 'foo')), nil) - ) + example 'set the name of the element' do + @element.name.should == 'p' + end + end + + context 'elements with namespaces' do + before :all do + @element = parse('<foo:p></p>').children[0] end - example 'parse an element with a single attribute with a value' do - parse('<p foo="bar"></p>').should == s( - :document, - s(:element, nil, 'p', s(:attributes, s(:attribute, 'foo', 'bar')), nil) - ) + example 'return an Element instance' do + @element.is_a?(Oga::XML::Element).should == true end - example 'parse an element with multiple attributes' do - parse('<p foo="bar" baz="bad"></p>').should == s( - :document, - s( - :element, - nil, - 'p', - s( - :attributes, - s(:attribute, 'foo', 'bar'), - s(:attribute, 'baz', 'bad') - ), - nil - ) - ) + example 'set the name of the element' do + @element.name.should == 'p' end - example 'parse an element with text and attributes' do - parse('<p class="foo">Bar</p>').should == s( - :document, - s( - :element, - nil, - 'p', - s(:attributes, s(:attribute, 'class', 'foo')), - s(:text, 'Bar') - ) - ) + example 'set the namespace of the element' do + @element.namespace.should == 'foo' + end + end + + context 'elements with attributes' do + before :all do + @element = parse('<foo bar="baz"></foo>').children[0] end - example 'parse an element with a namespace' do - parse('<foo:p></p>').should == s( - :document, - s(:element, 'foo', 'p', nil, nil) - ) + example 'return an Element instance' do + @element.is_a?(Oga::XML::Element).should == true end - example 'parse an element with a namespace and an attribute' do - parse('<foo:p class="bar"></p>').should == s( - :document, - s( - :element, - 'foo', - 'p', - s(:attributes, s(:attribute, 'class', 'bar')), - nil - ) - ) + example 'set the attributes of the element' do + @element.attributes.should == {'bar' => 'baz'} + end + end + + context 'elements with child elements' do + before :all do + @element = parse('<a><b></b></a>').children[0] end - example 'parse an element nested inside another element' do - parse('<p><a></a></p>').should == s( - :document, - s(:element, nil, 'p', nil, s(:element, nil, 'a', nil, nil)) - ) + example 'set the name of the outer element' do + @element.name.should == 'a' end - example 'parse an element with children text, element' do - parse('<p>Foo<a>Bar</a></p>').should == s( - :document, - s( - :element, - nil, - 'p', - nil, - s(:text, 'Foo'), - s(:element, nil, 'a', nil, s(:text, 'Bar')) - ) - ) + example 'set the child elements' do + @element.children[0].is_a?(Oga::XML::Element).should == true end - example 'parse an element with children text, element, text' do - parse('<p>Foo<a>Bar</a>Baz</p>').should == s( - :document, - s( - :element, - nil, - 'p', - nil, - s(:text, 'Foo'), - s(:element, nil, 'a', nil, s(:text, 'Bar')), - s(:text, 'Baz') - ) - ) + example 'set the name of the child element' do + @element.children[0].name.should == 'b' + end + end + + context 'elements with child elements and text' do + before :all do + @element = parse('<a>Foo<b>bar</b></a>').children[0] end - example 'parse an element with children element, text' do - parse('<p><a>Bar</a>Baz</p>').should == s( - :document, - s( - :element, - nil, - 'p', - nil, - s(:element, nil, 'a', nil, s(:text, 'Bar')), - s(:text, 'Baz') - ) - ) + example 'include the text node of the outer element' do + @element.children[0].is_a?(Oga::XML::Text).should == true end - example 'parse an element with children element, text, element' do - parse('<p><a>Bar</a>Baz<span>Da</span></p>').should == s( - :document, - s( - :element, - nil, - 'p', - nil, - s(:element, nil, 'a', nil, s(:text, 'Bar')), - s(:text, 'Baz'), - s(:element, nil, 'span', nil, s(:text, 'Da')) - ) - ) + example 'include the text node of the inner element' do + @element.children[1].children[0].is_a?(Oga::XML::Text).should == true end end end diff --git a/spec/oga/xml/parser/general_spec.rb b/spec/oga/xml/parser/general_spec.rb deleted file mode 100644 index cfc0b76..0000000 --- a/spec/oga/xml/parser/general_spec.rb +++ /dev/null @@ -1,11 +0,0 @@ -require 'spec_helper' - -describe Oga::XML::Parser do - example 'parse regular text' do - parse('foo').should == s(:document, s(:text, 'foo')) - end - - example 'parse a newline' do - parse("\n").should == s(:document, s(:text, "\n")) - end -end diff --git a/spec/oga/xml/parser/hierarchy_spec.rb b/spec/oga/xml/parser/hierarchy_spec.rb new file mode 100644 index 0000000..339e2ec --- /dev/null +++ b/spec/oga/xml/parser/hierarchy_spec.rb @@ -0,0 +1,53 @@ +require 'spec_helper' + +describe Oga::XML::Parser do + context 'elements with parents' do + before :all do + @parent = parse('<a><b></b></a>').children[0] + end + + example 'return an Element instance for the parent' do + @parent.children[0].parent.is_a?(Oga::XML::Element).should == true + end + + example 'set the correct parent' do + @parent.children[0].parent.should == @parent + end + end + + context 'text nodes with parents' do + before :all do + @parent = parse('<a>foo</a>').children[0] + end + + example 'return an Element instance for the parent' do + @parent.children[0].parent.is_a?(Oga::XML::Element).should == true + end + + example 'set the correct parent' do + @parent.children[0].parent.should == @parent + end + end + + context 'elements with adjacent elements' do + before :all do + @document = parse('<a></a><b></b>') + end + + example 'return an Element instance for the next element' do + @document.children[0].next.is_a?(Oga::XML::Element).should == true + end + + example 'set the correct next element' do + @document.children[0].next.should == @document.children[1] + end + + example 'return an Element instance for the previous element' do + @document.children[1].previous.is_a?(Oga::XML::Element).should == true + end + + example 'set the correct previous element' do + @document.children[1].previous.should == @document.children[0] + end + end +end diff --git a/spec/oga/xml/parser/html_void_elements_spec.rb b/spec/oga/xml/parser/html_void_elements_spec.rb index 29e481e..013b82c 100644 --- a/spec/oga/xml/parser/html_void_elements_spec.rb +++ b/spec/oga/xml/parser/html_void_elements_spec.rb @@ -1,64 +1,45 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'HTML void elements' do - example 'parse a void element that omits the closing /' do - parse('<link>', :html => true).should == s( - :document, - s(:element, nil, 'link', nil, nil) - ) + context 'void elements' do + before :all do + @node = parse('<link>', :html => true).children[0] end - example 'parse a void element inside another element' do - parse('<head><link></head>', :html => true).should == s( - :document, - s(:element, nil, 'head', nil, s(:element, nil, 'link', nil, nil)) - ) + example 'return an Element instance' do + @node.is_a?(Oga::XML::Element).should == true end - example 'parse a void element with attributes inside another element' do - parse('<head><link href="foo.css"></head>', :html => true).should == s( - :document, - s( - :element, - nil, - 'head', - nil, - s( - :element, - nil, - 'link', - s(:attributes, s(:attribute, 'href', 'foo.css')), - nil - ) - ) - ) + example 'set the name of the element' do + @node.name.should == 'link' + end + end + + context 'nested void elements' do + before :all do + @node = parse('<head><link></head>', :html => true).children[0] end - example 'parse a void element and a non void element in the same parent' do - parse('<head><link><title>Foo', :html => true).should == s( - :document, - s( - :element, - nil, - 'head', - nil, - s( - :element, - nil, - 'link', - nil, - nil - ), - s( - :element, - nil, - 'title', - nil, - s(:text, 'Foo') - ) - ) - ) + example 'set the name of the outer element' do + @node.name.should == 'head' + end + + example 'set the name of the inner element' do + @node.children[0].name.should == 'link' + end + end + + context 'void elements with attributes' do + before :all do + @node = parse('', :html => true).children[0] + end + + example 'set the name of the element' do + @node.name.should == 'link' + end + + example 'set the attributes' do + @node.attributes.should == {'href' => 'foo'} end end end diff --git a/spec/oga/xml/parser/text_spec.rb b/spec/oga/xml/parser/text_spec.rb new file mode 100644 index 0000000..d114f78 --- /dev/null +++ b/spec/oga/xml/parser/text_spec.rb @@ -0,0 +1,17 @@ +require 'spec_helper' + +describe Oga::XML::Parser do + context 'plain text' do + before :all do + @node = parse('foo').children[0] + end + + example 'return a Text instance' do + @node.is_a?(Oga::XML::Text).should == true + end + + example 'set the text' do + @node.text.should == 'foo' + end + end +end diff --git a/spec/oga/xml/parser/xml_declaration_spec.rb b/spec/oga/xml/parser/xml_declaration_spec.rb index 60778ab..7a72179 100644 --- a/spec/oga/xml/parser/xml_declaration_spec.rb +++ b/spec/oga/xml/parser/xml_declaration_spec.rb @@ -1,16 +1,39 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'XML declaration tags' do - example 'parse an empty XML declaration tag' do - parse('').should == s(:document, s(:xml_decl)) + context 'empty XML declaration tags' do + before :all do + @node = parse('').xml_declaration end - example 'parse an XML declaration tag' do - parse('').should == s( - :document, - s(:xml_decl, s(:attributes, s(:attribute, 'version', '1.0'))) - ) + example 'return an XmlDeclaration instance' do + @node.is_a?(Oga::XML::XmlDeclaration).should == true + end + + example 'set the default XML version' do + @node.version.should == '1.0' + end + + example 'set the default encoding' do + @node.encoding.should == 'UTF-8' + end + end + + context 'XML declaration tags with custom attributes' do + before :all do + @node = parse('').xml_declaration + end + + example 'return an XmlDeclaration instance' do + @node.is_a?(Oga::XML::XmlDeclaration).should == true + end + + example 'set the XML version' do + @node.version.should == '1.5' + end + + example 'set the encoding' do + @node.encoding.should == 'foo' end end end diff --git a/spec/oga/xml/tree_builder_spec.rb b/spec/oga/xml/tree_builder_spec.rb deleted file mode 100644 index f7b76d8..0000000 --- a/spec/oga/xml/tree_builder_spec.rb +++ /dev/null @@ -1,281 +0,0 @@ -require 'spec_helper' - -describe Oga::XML::TreeBuilder do - before do - @builder = described_class.new - end - - context '#on_document' do - before do - node = s(:document, s(:element, nil, 'p', nil, nil)) - @tag = @builder.process(node) - end - - example 'return a Document node' do - @tag.is_a?(Oga::XML::Document).should == true - end - - example 'include the children of the element' do - @tag.children[0].is_a?(Oga::XML::Element).should == true - end - end - - context '#on_document with XML declarations' do - before do - decl = s(:xml_decl, s(:attributes, s(:attribute, 'encoding', 'UTF-8'))) - node = s(:document, decl) - @tag = @builder.process(node) - end - - example 'set the XML declaration of the document' do - @tag.xml_declaration.is_a?(Oga::XML::XmlDeclaration).should == true - end - end - - context '#on_document with doctypes' do - before do - doctype = s(:doctype, 'html', 'PUBLIC', 'foo', 'bar') - node = s(:document, doctype) - @tag = @builder.process(node) - end - - example 'set the doctype of the document' do - @tag.doctype.is_a?(Oga::XML::Doctype).should == true - end - end - - context '#on_xml_decl' do - before do - node = s(:xml_decl, s(:attributes, s(:attribute, 'encoding', 'UTF-8'))) - @tag = @builder.process(node) - end - - example 'return an XmlDeclaration node' do - @tag.is_a?(Oga::XML::XmlDeclaration).should == true - end - - example 'include the encoding of the tag' do - @tag.encoding.should == 'UTF-8' - end - end - - context '#on_doctype' do - before do - node = s(:doctype, 'html', 'PUBLIC', 'foo', 'bar') - @tag = @builder.process(node) - end - - example 'return a Doctype node' do - @tag.is_a?(Oga::XML::Doctype).should == true - end - - example 'include the doctype name' do - @tag.name.should == 'html' - end - - example 'include the doctype type' do - @tag.type.should == 'PUBLIC' - end - - example 'include the public ID' do - @tag.public_id.should == 'foo' - end - - example 'include the system ID' do - @tag.system_id.should == 'bar' - end - end - - context '#on_comment' do - before do - node = s(:comment, 'foo') - @tag = @builder.process(node) - end - - example 'return a Comment node' do - @tag.is_a?(Oga::XML::Comment).should == true - end - - example 'include the text of the comment' do - @tag.text.should == 'foo' - end - end - - context '#on_element' do - context 'simple elements' do - before do - node = s(:element, 'foo', 'p', nil, nil) - @tag = @builder.process(node) - end - - example 'return a Element node' do - @tag.is_a?(Oga::XML::Element).should == true - end - - example 'include the name of the element' do - @tag.name.should == 'p' - end - - example 'include the namespace of the element' do - @tag.namespace.should == 'foo' - end - end - - context 'elements with attributes' do - before do - node = s( - :element, - nil, - 'p', - s(:attributes, s(:attribute, 'key', 'value')), - nil - ) - - @tag = @builder.process(node) - end - - example 'include the name of the element' do - @tag.name.should == 'p' - end - - example 'include the attributes' do - @tag.attributes.should == {'key' => 'value'} - end - end - - context 'elements with parent elements' do - before do - node = s(:element, nil, 'p', nil, s(:element, nil, 'span', nil, nil)) - @tag = @builder.process(node) - end - - example 'set the parent element' do - @tag.children[0].parent.should == @tag - end - end - - context 'elements with next elements' do - before do - node = s( - :element, - nil, - 'p', - nil, - s(:element, nil, 'a', nil, nil), - s(:element, nil, 'span', nil, nil) - ) - - @tag = @builder.process(node) - end - - example 'set the next element' do - @tag.children[0].next.should == @tag.children[1] - end - - example 'do not set the next element for the last element' do - @tag.children[1].next.should == nil - end - end - - context 'elements with previous elements' do - before do - node = s( - :element, - nil, - 'p', - nil, - s(:element, nil, 'a', nil, nil), - s(:element, nil, 'span', nil, nil) - ) - - @tag = @builder.process(node) - end - - example 'set the previous element' do - @tag.children[1].previous.should == @tag.children[0] - end - - example 'do not set the previous element for the first element' do - @tag.children[0].previous.should == nil - end - end - - context 'elements with child elements' do - before do - node = s(:element, nil, 'p', nil, s(:element, nil, 'span', nil, nil)) - @tag = @builder.process(node) - end - - example 'include the name of the element' do - @tag.name.should == 'p' - end - - example 'include the child element' do - @tag.children[0].is_a?(Oga::XML::Element).should == true - end - - example 'include the name of the child element' do - @tag.children[0].name.should == 'span' - end - end - end - - context '#on_text' do - before do - node = s(:text, 'Hello') - @tag = @builder.process(node) - end - - example 'return a Text node' do - @tag.is_a?(Oga::XML::Text).should == true - end - - example 'include the text of the node' do - @tag.text.should == 'Hello' - end - end - - context '#on_cdata' do - before do - node = s(:cdata, 'Hello') - @tag = @builder.process(node) - end - - example 'return a Cdata node' do - @tag.is_a?(Oga::XML::Cdata).should == true - end - - example 'include the text of the node' do - @tag.text.should == 'Hello' - end - end - - context '#on_attributes' do - before do - @node = s( - :attributes, - s(:attribute, 'foo', 'bar'), - s(:attribute, 'baz', 'wat') - ) - end - - example 'return the attributes as a Hash' do - @builder.process(@node).should == {'foo' => 'bar', 'baz' => 'wat'} - end - - example 'return an empty Hash by default' do - @builder.process(s(:attributes)).should == {} - end - end - - context '#handler_missing' do - before do - @node = s(:foo, 'bar') - end - - example 'raise when processing an unknown node' do - lambda { @builder.process(@node) } - .should raise_error('No handler for node type :foo') - end - end -end