diff --git a/lib/oga.rb b/lib/oga.rb index cf9a66c..7b6b11e 100644 --- a/lib/oga.rb +++ b/lib/oga.rb @@ -1,8 +1,5 @@ -require 'ast' require 'set' -require_relative 'oga/ast/node' - require_relative 'oga/xml/lexer' require_relative 'oga/xml/parser' require_relative 'oga/xml/node' @@ -13,6 +10,5 @@ require_relative 'oga/xml/comment' require_relative 'oga/xml/cdata' require_relative 'oga/xml/xml_declaration' require_relative 'oga/xml/doctype' -require_relative 'oga/xml/tree_builder' require_relative 'oga/html/parser' diff --git a/lib/oga/ast/node.rb b/lib/oga/ast/node.rb deleted file mode 100644 index 5f628ca..0000000 --- a/lib/oga/ast/node.rb +++ /dev/null @@ -1,9 +0,0 @@ -module Oga - module AST - ## - # - class Node < ::AST::Node - attr_reader :line - end # Node - end # AST -end # Oga diff --git a/lib/oga/xml/parser.y b/lib/oga/xml/parser.y index d5ce39c..4f132ca 100644 --- a/lib/oga/xml/parser.y +++ b/lib/oga/xml/parser.y @@ -20,14 +20,14 @@ options no_result_var rule document - : expressions { s(:document, val[0]) } - | /* none */ { s(:document) } + : expressions { create_document(val[0]) } + | /* none */ { create_document } ; expressions : expressions expression { val.compact } - | expression { val[0] } - | /* none */ { nil } + | expression { val } + | /* none */ { [] } ; expression @@ -43,24 +43,32 @@ rule doctype # - : T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END { s(:doctype, val[1]) } + : T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END + { + Doctype.new(:name => val[1]) + } # | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END { - s(:doctype, val[1], val[2]) + Doctype.new(:name => val[1], :type => val[2]) } # | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END { - s(:doctype, val[1], val[2], val[3]) + Doctype.new(:name => val[1], :type => val[2], :public_id => val[3]) } # | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END { - s(:doctype, val[1], val[2], val[3], val[4]) + Doctype.new( + :name => val[1], + :type => val[2], + :public_id => val[3], + :system_id => val[4] + ) } ; @@ -68,31 +76,24 @@ rule cdata # - : T_CDATA_START T_CDATA_END { s(:cdata) } + : T_CDATA_START T_CDATA_END { Cdata.new } # - | T_CDATA_START T_TEXT T_CDATA_END { s(:cdata, val[1]) } + | T_CDATA_START T_TEXT T_CDATA_END { Cdata.new(:text => val[1]) } ; # Comments comment # - : T_COMMENT_START T_COMMENT_END { s(:comment) } + : T_COMMENT_START T_COMMENT_END { Comment.new } # - | T_COMMENT_START T_TEXT T_COMMENT_END { s(:comment, val[1]) } + | T_COMMENT_START T_TEXT T_COMMENT_END { Comment.new(:text => val[1]) } ; # Elements - element - : element_open attributes expressions T_ELEM_END - { - s(:element, val[0], val[1], val[2]) - } - ; - element_open #
: T_ELEM_START T_ELEM_NAME { [nil, val[1]] }
@@ -101,11 +102,44 @@ rule
| T_ELEM_START T_ELEM_NS T_ELEM_NAME { [val[1], val[2]] }
;
+ element_start
+ : element_open attributes
+ {
+ Element.new(
+ :namespace => val[0][0],
+ :name => val[0][1],
+ :attributes => val[1]
+ )
+ }
+ ;
+
+ element
+ : element_start expressions T_ELEM_END
+ {
+ element = val[0]
+
+ element.children = val[1].flatten
+
+ link_children(element)
+
+ element
+ }
+ ;
+
# Attributes
attributes
- : attributes_ { s(:attributes, val[0]) }
- | /* none */ { nil }
+ : attributes_
+ {
+ attrs = {}
+
+ val[0].flatten.each do |pair|
+ attrs = attrs.merge(pair)
+ end
+
+ attrs
+ }
+ | /* none */ { {} }
;
attributes_
@@ -115,21 +149,33 @@ rule
attribute
# foo
- : T_ATTR { s(:attribute, val[0]) }
+ : T_ATTR
+ {
+ {val[0] => nil}
+ }
# foo="bar"
- | T_ATTR T_STRING { s(:attribute, val[0], val[1]) }
+ | T_ATTR T_STRING
+ {
+ {val[0] => val[1]}
+ }
;
# XML declarations
xmldecl
- : T_XML_DECL_START T_XML_DECL_END { s(:xml_decl) }
- | T_XML_DECL_START attributes T_XML_DECL_END { s(:xml_decl, val[1]) }
+ : T_XML_DECL_START T_XML_DECL_END
+ {
+ XmlDeclaration.new
+ }
+ | T_XML_DECL_START attributes T_XML_DECL_END
+ {
+ XmlDeclaration.new(val[1])
+ }
# Plain text
text
- : T_TEXT { s(:text, val[0]) }
+ : T_TEXT { Text.new(:text => val[0]) }
;
end
@@ -153,20 +199,6 @@ end
@line = 1
end
- ##
- # Emits a new AST token.
- #
- # @param [Symbol] type
- # @param [Array] children
- #
- def s(type, *children)
- return AST::Node.new(
- type,
- children.flatten,
- :line => @line
- )
- end
-
##
# Yields the next token from the lexer.
#
@@ -240,4 +272,63 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}:
return ast
end
+ private
+
+ ##
+ # Creates a new {Oga;:XML::Document} node with the specified child elements.
+ #
+ # @param [Array] children
+ # @return [Oga::XML::Document]
+ #
+ def create_document(children = [])
+ if children.is_a?(Array)
+ children = children.flatten
+ else
+ children = [children]
+ end
+
+ document = Document.new
+
+ children.each do |child|
+ if child.is_a?(Doctype)
+ document.doctype = child
+
+ elsif child.is_a?(XmlDeclaration)
+ document.xml_declaration = child
+
+ else
+ document.children << child
+ end
+ end
+
+ link_children(document)
+
+ return document
+ end
+
+ ##
+ # Links the child nodes together by setting attributes such as the
+ # previous, next and parent node.
+ #
+ # @param [Oga::XML::Node] node
+ #
+ def link_children(node)
+ amount = node.children.length
+
+ node.children.each_with_index do |child, index|
+ prev_index = index - 1
+ next_index = index + 1
+
+ if index > 0
+ child.previous = node.children[prev_index]
+ end
+
+ if next_index <= amount
+ child.next = node.children[next_index]
+ end
+
+ child.parent = node
+ end
+ end
+
# vim: set ft=racc:
diff --git a/lib/oga/xml/tree_builder.rb b/lib/oga/xml/tree_builder.rb
deleted file mode 100644
index 6bf29ba..0000000
--- a/lib/oga/xml/tree_builder.rb
+++ /dev/null
@@ -1,177 +0,0 @@
-module Oga
- module XML
- ##
- # The TreeBuilder class turns an AST into a DOM tree. This DOM tree can be
- # traversed by requesting child elements, parent elements, etc.
- #
- # Basic usage:
- #
- # builder = Oga::XML::TreeBuilder.new
- # ast = s(:element, ...)
- #
- # builder.process(ast) # => #
foo
') - ) + example 'return a Cdata instance' do + @node.is_a?(Oga::XML::Cdata).should == true end - example 'parse double brackets inside a cdata tag' do - parse('').should == s(:document, s(:cdata, ']]')) + example 'set the text of the tag' do + @node.text.should == 'foo' + end + end + + context 'cdata tags with nested elements' do + before :all do + @node = parse('foo]]>').children[0] + end + + example 'set the HTML as raw text' do + @node.text.should == 'foo
' end end end diff --git a/spec/oga/xml/parser/comments_spec.rb b/spec/oga/xml/parser/comments_spec.rb index 6aa12cc..e201a8e 100644 --- a/spec/oga/xml/parser/comments_spec.rb +++ b/spec/oga/xml/parser/comments_spec.rb @@ -1,13 +1,27 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'comments' do - example 'parse an empty comment' do - parse('').should == s(:document, s(:comment)) + context 'empty comments' do + before :all do + @node = parse('').children[0] end - example 'parse a comment' do - parse('').should == s(:document, s(:comment, 'foo')) + example 'return a Comment instance' do + @node.is_a?(Oga::XML::Comment).should == true + end + end + + context 'comments with text' do + before :all do + @node = parse('').children[0] + end + + example 'return a Comment instance' do + @node.is_a?(Oga::XML::Comment).should == true + end + + example 'set the text of the comment' do + @node.text.should == 'foo' end end end diff --git a/spec/oga/xml/parser/doctype_spec.rb b/spec/oga/xml/parser/doctype_spec.rb index 9c74851..3ebe957 100644 --- a/spec/oga/xml/parser/doctype_spec.rb +++ b/spec/oga/xml/parser/doctype_spec.rb @@ -1,46 +1,83 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'doctypes' do - example 'parse a doctype' do - parse('').should == s(:document, s(:doctype, 'html')) + context 'basic doctypes' do + before :all do + @document = parse('') end - example 'parse a doctype with the doctype type' do - parse('').should == s( - :document, - s(:doctype, 'html', 'PUBLIC') - ) + example 'return a Doctype instance' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true end - example 'parse a doctype with a public ID' do - parse('').should == s( - :document, - s(:doctype, 'html', 'PUBLIC', 'foo') - ) + example 'set the name of the doctype' do + @document.doctype.name.should == 'html' + end + end + + context 'doctypes with a type' do + before :all do + @document = parse('') end - example 'parse a doctype with a public and private ID' do - parse('').should == s( - :document, - s(:doctype, 'html', 'PUBLIC', 'foo', 'bar') - ) + example 'return a Doctype instance' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true end - example 'parse an HTML 4 strict doctype' do - doctype = '' + example 'set the name of the doctype' do + @document.doctype.name.should == 'html' + end - parse(doctype).should == s( - :document, - s( - :doctype, - 'HTML', - 'PUBLIC', - '-//W3C//DTD HTML 4.01//EN', - 'http://www.w3.org/TR/html4/strict.dtd' - ) - ) + example 'set the type of the doctype' do + @document.doctype.type.should == 'PUBLIC' + end + end + + context 'doctypes with a public ID' do + before :all do + @document = parse('') + end + + example 'return a Doctype instance' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true + end + + example 'set the name of the doctype' do + @document.doctype.name.should == 'html' + end + + example 'set the type of the doctype' do + @document.doctype.type.should == 'PUBLIC' + end + + example 'set the public ID of the doctype' do + @document.doctype.public_id.should == 'foo' + end + end + + context 'doctypes with a system ID' do + before :all do + @document = parse('') + end + + example 'return a Doctype instance' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true + end + + example 'set the name of the doctype' do + @document.doctype.name.should == 'html' + end + + example 'set the type of the doctype' do + @document.doctype.type.should == 'PUBLIC' + end + + example 'set the public ID of the doctype' do + @document.doctype.public_id.should == 'foo' + end + + example 'set the system ID of the doctype' do + @document.doctype.system_id.should == 'bar' end end end diff --git a/spec/oga/xml/parser/documents_spec.rb b/spec/oga/xml/parser/documents_spec.rb index 3eef8d0..298a294 100644 --- a/spec/oga/xml/parser/documents_spec.rb +++ b/spec/oga/xml/parser/documents_spec.rb @@ -1,13 +1,20 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'HTML documents' do - example 'parse an empty document' do - parse('').should == s(:document) + context 'empty documents' do + before :all do + @document = parse('') end - example 'parse a basic HTML document' do - html = <<-EOF + example 'return a Document instance' do + @document.is_a?(Oga::XML::Document).should == true + end + end + + context 'HTML documents' do + before :all do + html = <<-EOF.strip + @@ -17,48 +24,23 @@ describe Oga::XML::Parser do EOF - parse(html).should == s( - :document, - s(:doctype, 'html'), - s(:text, "\n"), + @document = parse(html, :html => true) + end - # - s( - :element, - nil, - 'html', - nil, + example 'return a Document instance' do + @document.is_a?(Oga::XML::Document).should == true + end - s(:text, "\n"), + example 'set the doctype of the document' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true + end - # - s( - :element, - nil, - 'head', - nil, + example 'set the XML declaration of the document' do + @document.xml_declaration.is_a?(Oga::XML::XmlDeclaration).should == true + end - s(:text, "\n"), - - #foo
').should == s( - :document, - s(:element, nil, 'p', nil, s(:text, 'foo')) - ) + example 'return an Element instance' do + @element.is_a?(Oga::XML::Element).should == true end - example 'parse an element with a single attribute' do - parse('').should == s( - :document, - s(:element, nil, 'p', s(:attributes, s(:attribute, 'foo')), nil) - ) + example 'set the name of the element' do + @element.name.should == 'p' + end + end + + context 'elements with namespaces' do + before :all do + @element = parse('Bar
').should == s( - :document, - s( - :element, - nil, - 'p', - s(:attributes, s(:attribute, 'class', 'foo')), - s(:text, 'Bar') - ) - ) + example 'set the namespace of the element' do + @element.namespace.should == 'foo' + end + end + + context 'elements with attributes' do + before :all do + @element = parse('FooBar
').should == s( - :document, - s( - :element, - nil, - 'p', - nil, - s(:text, 'Foo'), - s(:element, nil, 'a', nil, s(:text, 'Bar')) - ) - ) + example 'set the child elements' do + @element.children[0].is_a?(Oga::XML::Element).should == true end - example 'parse an element with children text, element, text' do - parse('FooBarBaz
').should == s( - :document, - s( - :element, - nil, - 'p', - nil, - s(:text, 'Foo'), - s(:element, nil, 'a', nil, s(:text, 'Bar')), - s(:text, 'Baz') - ) - ) + example 'set the name of the child element' do + @element.children[0].name.should == 'b' + end + end + + context 'elements with child elements and text' do + before :all do + @element = parse('Foobar').children[0] end - example 'parse an element with children element, text' do - parse('BarBaz
').should == s( - :document, - s( - :element, - nil, - 'p', - nil, - s(:element, nil, 'a', nil, s(:text, 'Bar')), - s(:text, 'Baz') - ) - ) + example 'include the text node of the outer element' do + @element.children[0].is_a?(Oga::XML::Text).should == true end - example 'parse an element with children element, text, element' do - parse('BarBazDa
').should == s( - :document, - s( - :element, - nil, - 'p', - nil, - s(:element, nil, 'a', nil, s(:text, 'Bar')), - s(:text, 'Baz'), - s(:element, nil, 'span', nil, s(:text, 'Da')) - ) - ) + example 'include the text node of the inner element' do + @element.children[1].children[0].is_a?(Oga::XML::Text).should == true end end end diff --git a/spec/oga/xml/parser/general_spec.rb b/spec/oga/xml/parser/general_spec.rb deleted file mode 100644 index cfc0b76..0000000 --- a/spec/oga/xml/parser/general_spec.rb +++ /dev/null @@ -1,11 +0,0 @@ -require 'spec_helper' - -describe Oga::XML::Parser do - example 'parse regular text' do - parse('foo').should == s(:document, s(:text, 'foo')) - end - - example 'parse a newline' do - parse("\n").should == s(:document, s(:text, "\n")) - end -end diff --git a/spec/oga/xml/parser/hierarchy_spec.rb b/spec/oga/xml/parser/hierarchy_spec.rb new file mode 100644 index 0000000..339e2ec --- /dev/null +++ b/spec/oga/xml/parser/hierarchy_spec.rb @@ -0,0 +1,53 @@ +require 'spec_helper' + +describe Oga::XML::Parser do + context 'elements with parents' do + before :all do + @parent = parse('').children[0] + end + + example 'return an Element instance for the parent' do + @parent.children[0].parent.is_a?(Oga::XML::Element).should == true + end + + example 'set the correct parent' do + @parent.children[0].parent.should == @parent + end + end + + context 'text nodes with parents' do + before :all do + @parent = parse('foo').children[0] + end + + example 'return an Element instance for the parent' do + @parent.children[0].parent.is_a?(Oga::XML::Element).should == true + end + + example 'set the correct parent' do + @parent.children[0].parent.should == @parent + end + end + + context 'elements with adjacent elements' do + before :all do + @document = parse('') + end + + example 'return an Element instance for the next element' do + @document.children[0].next.is_a?(Oga::XML::Element).should == true + end + + example 'set the correct next element' do + @document.children[0].next.should == @document.children[1] + end + + example 'return an Element instance for the previous element' do + @document.children[1].previous.is_a?(Oga::XML::Element).should == true + end + + example 'set the correct previous element' do + @document.children[1].previous.should == @document.children[0] + end + end +end diff --git a/spec/oga/xml/parser/html_void_elements_spec.rb b/spec/oga/xml/parser/html_void_elements_spec.rb index 29e481e..013b82c 100644 --- a/spec/oga/xml/parser/html_void_elements_spec.rb +++ b/spec/oga/xml/parser/html_void_elements_spec.rb @@ -1,64 +1,45 @@ require 'spec_helper' describe Oga::XML::Parser do - context 'HTML void elements' do - example 'parse a void element that omits the closing /' do - parse('', :html => true).should == s( - :document, - s(:element, nil, 'link', nil, nil) - ) + context 'void elements' do + before :all do + @node = parse('', :html => true).children[0] end - example 'parse a void element inside another element' do - parse('', :html => true).should == s( - :document, - s(:element, nil, 'head', nil, s(:element, nil, 'link', nil, nil)) - ) + example 'return an Element instance' do + @node.is_a?(Oga::XML::Element).should == true end - example 'parse a void element with attributes inside another element' do - parse('', :html => true).should == s( - :document, - s( - :element, - nil, - 'head', - nil, - s( - :element, - nil, - 'link', - s(:attributes, s(:attribute, 'href', 'foo.css')), - nil - ) - ) - ) + example 'set the name of the element' do + @node.name.should == 'link' + end + end + + context 'nested void elements' do + before :all do + @node = parse('', :html => true).children[0] end - example 'parse a void element and a non void element in the same parent' do - parse('