First shot at removing the AST layer.

The AST layer is being removed because it doesn't really serve a useful
purpose. In particular when creating a streaming parser the AST nodes would
only introduce extra overhead.

As a result of this the parser will instead emit a DOM tree directly instead of
first emitting an AST.
This commit is contained in:
Yorick Peterse 2014-04-21 23:05:39 +02:00
parent 9ee9ec14cb
commit 08d412da7e
17 changed files with 465 additions and 797 deletions

View File

@ -1,8 +1,5 @@
require 'ast'
require 'set' require 'set'
require_relative 'oga/ast/node'
require_relative 'oga/xml/lexer' require_relative 'oga/xml/lexer'
require_relative 'oga/xml/parser' require_relative 'oga/xml/parser'
require_relative 'oga/xml/node' require_relative 'oga/xml/node'
@ -13,6 +10,5 @@ require_relative 'oga/xml/comment'
require_relative 'oga/xml/cdata' require_relative 'oga/xml/cdata'
require_relative 'oga/xml/xml_declaration' require_relative 'oga/xml/xml_declaration'
require_relative 'oga/xml/doctype' require_relative 'oga/xml/doctype'
require_relative 'oga/xml/tree_builder'
require_relative 'oga/html/parser' require_relative 'oga/html/parser'

View File

@ -1,9 +0,0 @@
module Oga
module AST
##
#
class Node < ::AST::Node
attr_reader :line
end # Node
end # AST
end # Oga

View File

@ -20,14 +20,14 @@ options no_result_var
rule rule
document document
: expressions { s(:document, val[0]) } : expressions { create_document(val[0]) }
| /* none */ { s(:document) } | /* none */ { create_document }
; ;
expressions expressions
: expressions expression { val.compact } : expressions expression { val.compact }
| expression { val[0] } | expression { val }
| /* none */ { nil } | /* none */ { [] }
; ;
expression expression
@ -43,24 +43,32 @@ rule
doctype doctype
# <!DOCTYPE html> # <!DOCTYPE html>
: T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END { s(:doctype, val[1]) } : T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END
{
Doctype.new(:name => val[1])
}
# <!DOCTYPE html PUBLIC> # <!DOCTYPE html PUBLIC>
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END
{ {
s(:doctype, val[1], val[2]) Doctype.new(:name => val[1], :type => val[2])
} }
# <!DOCTYPE html PUBLIC "foo"> # <!DOCTYPE html PUBLIC "foo">
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
{ {
s(:doctype, val[1], val[2], val[3]) Doctype.new(:name => val[1], :type => val[2], :public_id => val[3])
} }
# <!DOCTYPE html PUBLIC "foo" "bar"> # <!DOCTYPE html PUBLIC "foo" "bar">
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
{ {
s(:doctype, val[1], val[2], val[3], val[4]) Doctype.new(
:name => val[1],
:type => val[2],
:public_id => val[3],
:system_id => val[4]
)
} }
; ;
@ -68,31 +76,24 @@ rule
cdata cdata
# <![CDATA[]]> # <![CDATA[]]>
: T_CDATA_START T_CDATA_END { s(:cdata) } : T_CDATA_START T_CDATA_END { Cdata.new }
# <![CDATA[foo]]> # <![CDATA[foo]]>
| T_CDATA_START T_TEXT T_CDATA_END { s(:cdata, val[1]) } | T_CDATA_START T_TEXT T_CDATA_END { Cdata.new(:text => val[1]) }
; ;
# Comments # Comments
comment comment
# <!----> # <!---->
: T_COMMENT_START T_COMMENT_END { s(:comment) } : T_COMMENT_START T_COMMENT_END { Comment.new }
# <!-- foo --> # <!-- foo -->
| T_COMMENT_START T_TEXT T_COMMENT_END { s(:comment, val[1]) } | T_COMMENT_START T_TEXT T_COMMENT_END { Comment.new(:text => val[1]) }
; ;
# Elements # Elements
element
: element_open attributes expressions T_ELEM_END
{
s(:element, val[0], val[1], val[2])
}
;
element_open element_open
# <p> # <p>
: T_ELEM_START T_ELEM_NAME { [nil, val[1]] } : T_ELEM_START T_ELEM_NAME { [nil, val[1]] }
@ -101,11 +102,44 @@ rule
| T_ELEM_START T_ELEM_NS T_ELEM_NAME { [val[1], val[2]] } | T_ELEM_START T_ELEM_NS T_ELEM_NAME { [val[1], val[2]] }
; ;
element_start
: element_open attributes
{
Element.new(
:namespace => val[0][0],
:name => val[0][1],
:attributes => val[1]
)
}
;
element
: element_start expressions T_ELEM_END
{
element = val[0]
element.children = val[1].flatten
link_children(element)
element
}
;
# Attributes # Attributes
attributes attributes
: attributes_ { s(:attributes, val[0]) } : attributes_
| /* none */ { nil } {
attrs = {}
val[0].flatten.each do |pair|
attrs = attrs.merge(pair)
end
attrs
}
| /* none */ { {} }
; ;
attributes_ attributes_
@ -115,21 +149,33 @@ rule
attribute attribute
# foo # foo
: T_ATTR { s(:attribute, val[0]) } : T_ATTR
{
{val[0] => nil}
}
# foo="bar" # foo="bar"
| T_ATTR T_STRING { s(:attribute, val[0], val[1]) } | T_ATTR T_STRING
{
{val[0] => val[1]}
}
; ;
# XML declarations # XML declarations
xmldecl xmldecl
: T_XML_DECL_START T_XML_DECL_END { s(:xml_decl) } : T_XML_DECL_START T_XML_DECL_END
| T_XML_DECL_START attributes T_XML_DECL_END { s(:xml_decl, val[1]) } {
XmlDeclaration.new
}
| T_XML_DECL_START attributes T_XML_DECL_END
{
XmlDeclaration.new(val[1])
}
# Plain text # Plain text
text text
: T_TEXT { s(:text, val[0]) } : T_TEXT { Text.new(:text => val[0]) }
; ;
end end
@ -153,20 +199,6 @@ end
@line = 1 @line = 1
end end
##
# Emits a new AST token.
#
# @param [Symbol] type
# @param [Array] children
#
def s(type, *children)
return AST::Node.new(
type,
children.flatten,
:line => @line
)
end
## ##
# Yields the next token from the lexer. # Yields the next token from the lexer.
# #
@ -240,4 +272,63 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}:
return ast return ast
end end
private
##
# Creates a new {Oga;:XML::Document} node with the specified child elements.
#
# @param [Array] children
# @return [Oga::XML::Document]
#
def create_document(children = [])
if children.is_a?(Array)
children = children.flatten
else
children = [children]
end
document = Document.new
children.each do |child|
if child.is_a?(Doctype)
document.doctype = child
elsif child.is_a?(XmlDeclaration)
document.xml_declaration = child
else
document.children << child
end
end
link_children(document)
return document
end
##
# Links the child nodes together by setting attributes such as the
# previous, next and parent node.
#
# @param [Oga::XML::Node] node
#
def link_children(node)
amount = node.children.length
node.children.each_with_index do |child, index|
prev_index = index - 1
next_index = index + 1
if index > 0
child.previous = node.children[prev_index]
end
if next_index <= amount
child.next = node.children[next_index]
end
child.parent = node
end
end
# vim: set ft=racc: # vim: set ft=racc:

View File

@ -1,177 +0,0 @@
module Oga
module XML
##
# The TreeBuilder class turns an AST into a DOM tree. This DOM tree can be
# traversed by requesting child elements, parent elements, etc.
#
# Basic usage:
#
# builder = Oga::XML::TreeBuilder.new
# ast = s(:element, ...)
#
# builder.process(ast) # => #<Oga::XML::Document ...>
#
class TreeBuilder < ::AST::Processor
##
# @param [Oga::AST::Node] node
# @return [Oga::XML::Document]
#
def on_document(node)
document = Document.new
process_all(node).each do |child|
if child.is_a?(XmlDeclaration)
document.xml_declaration = child
elsif child.is_a?(Doctype)
document.doctype = child
else
document.children << child
end
end
document.children.each do |child|
child.parent = document
end
return document
end
##
# @param [Oga::AST::Node] node
# @return [Oga::XML::XmlDeclaration]
#
def on_xml_decl(node)
attributes = process(node.children[0])
return XmlDeclaration.new(attributes)
end
##
# @param [Oga::AST::Node] node
# @return [Oga::XML::Doctype]
#
def on_doctype(node)
return Doctype.new(
:name => node.children[0],
:type => node.children[1],
:public_id => node.children[2],
:system_id => node.children[3]
)
end
##
# @param [Oga::AST::Node] node
# @return [Oga::XML::Comment]
#
def on_comment(node)
return Comment.new(:text => node.children[0])
end
##
# Processes an `(element)` node and its child elements.
#
# An element can have a parent, previous and next element as well as a
# number of child elements.
#
# @param [Oga::AST::Node] node
# @return [Oga::XML::Element]
#
def on_element(node)
ns, name, attr, *children = *node
if attr
attr = process(attr)
end
if children
children = process_all(children.compact)
end
element = Element.new(
:name => name,
:namespace => ns,
:attributes => attr,
:children => children
)
process_children(element)
return element
end
##
# @param [Oga::AST::Node] node
# @return [Oga::XML::Text]
#
def on_text(node)
return Text.new(:text => node.children[0])
end
##
# @param [Oga::AST::Node] node
# @return [Oga::XML::Cdata]
#
def on_cdata(node)
return Cdata.new(:text => node.children[0])
end
##
# Converts a `(attributes)` node into a Hash.
#
# @param [Oga::AST::Node] node
# @return [Hash]
#
def on_attributes(node)
pairs = process_all(node)
return Hash[pairs]
end
##
# @param [Oga::AST::Node] node
# @return [Array]
#
def on_attribute(node)
return *node
end
##
# Raises for every unhandled node.
#
# @param [Oga::AST::Node] node
#
def handler_missing(node)
raise "No handler for node type #{node.type.inspect}"
end
private
##
# Iterates over the child elements of an element and assigns the parent,
# previous and next elements. The supplied object is modified in place.
#
# @param [Oga::XML::Element] element
#
def process_children(element)
amount = element.children.length
element.children.each_with_index do |child, index|
prev_index = index - 1
next_index = index + 1
if index > 0
child.previous = element.children[prev_index]
end
if next_index <= amount
child.next = element.children[next_index]
end
child.parent = element
end
end
end # TreeBuilder
end # XML
end # Oga

View File

@ -16,7 +16,6 @@ Gem::Specification.new do |s|
s.required_ruby_version = '>= 1.9.3' s.required_ruby_version = '>= 1.9.3'
s.add_dependency 'racc' s.add_dependency 'racc'
s.add_dependency 'ast'
s.add_development_dependency 'cliver' s.add_development_dependency 'cliver'
s.add_development_dependency 'rake' s.add_development_dependency 'rake'

View File

@ -1,10 +1,17 @@
require 'spec_helper' require 'spec_helper'
describe Oga::HTML::Parser do describe Oga::HTML::Parser do
example 'parse an HTML void element' do context 'HTML void elements' do
parse_html('<meta>').should == s( before :all do
:document, @node = parse_html('<meta>').children[0]
s(:element, nil, 'meta', nil, nil) end
)
example 'return an Element instance' do
@node.is_a?(Oga::XML::Element).should == true
end
example 'set the name of the element' do
@node.name.should == 'meta'
end
end end
end end

View File

@ -1,24 +1,37 @@
require 'spec_helper' require 'spec_helper'
describe Oga::XML::Parser do describe Oga::XML::Parser do
context 'cdata tags' do context 'empty cdata tags' do
example 'parse an empty cdata tag' do before :all do
parse('<![CDATA[]]>').should == s(:document, s(:cdata)) @node = parse('<![CDATA[]]>').children[0]
end end
example 'parse a cdata tag' do example 'return a Cdata instance' do
parse('<![CDATA[foo]]>').should == s(:document, s(:cdata, 'foo')) @node.is_a?(Oga::XML::Cdata).should == true
end
end end
example 'parse an element inside a cdata tag' do context 'cdata tags with text' do
parse('<![CDATA[<p>foo</p>]]>').should == s( before :all do
:document, @node = parse('<![CDATA[foo]]>').children[0]
s(:cdata, '<p>foo</p>')
)
end end
example 'parse double brackets inside a cdata tag' do example 'return a Cdata instance' do
parse('<![CDATA[]]]]>').should == s(:document, s(:cdata, ']]')) @node.is_a?(Oga::XML::Cdata).should == true
end
example 'set the text of the tag' do
@node.text.should == 'foo'
end
end
context 'cdata tags with nested elements' do
before :all do
@node = parse('<![CDATA[<p>foo</p>]]>').children[0]
end
example 'set the HTML as raw text' do
@node.text.should == '<p>foo</p>'
end end
end end
end end

View File

@ -1,13 +1,27 @@
require 'spec_helper' require 'spec_helper'
describe Oga::XML::Parser do describe Oga::XML::Parser do
context 'comments' do context 'empty comments' do
example 'parse an empty comment' do before :all do
parse('<!---->').should == s(:document, s(:comment)) @node = parse('<!---->').children[0]
end end
example 'parse a comment' do example 'return a Comment instance' do
parse('<!--foo-->').should == s(:document, s(:comment, 'foo')) @node.is_a?(Oga::XML::Comment).should == true
end
end
context 'comments with text' do
before :all do
@node = parse('<!--foo-->').children[0]
end
example 'return a Comment instance' do
@node.is_a?(Oga::XML::Comment).should == true
end
example 'set the text of the comment' do
@node.text.should == 'foo'
end end
end end
end end

View File

@ -1,46 +1,83 @@
require 'spec_helper' require 'spec_helper'
describe Oga::XML::Parser do describe Oga::XML::Parser do
context 'doctypes' do context 'basic doctypes' do
example 'parse a doctype' do before :all do
parse('<!DOCTYPE html>').should == s(:document, s(:doctype, 'html')) @document = parse('<!DOCTYPE html>')
end end
example 'parse a doctype with the doctype type' do example 'return a Doctype instance' do
parse('<!DOCTYPE html PUBLIC>').should == s( @document.doctype.is_a?(Oga::XML::Doctype).should == true
:document,
s(:doctype, 'html', 'PUBLIC')
)
end end
example 'parse a doctype with a public ID' do example 'set the name of the doctype' do
parse('<!DOCTYPE html PUBLIC "foo">').should == s( @document.doctype.name.should == 'html'
:document, end
s(:doctype, 'html', 'PUBLIC', 'foo')
)
end end
example 'parse a doctype with a public and private ID' do context 'doctypes with a type' do
parse('<!DOCTYPE html PUBLIC "foo" "bar">').should == s( before :all do
:document, @document = parse('<!DOCTYPE html PUBLIC>')
s(:doctype, 'html', 'PUBLIC', 'foo', 'bar')
)
end end
example 'parse an HTML 4 strict doctype' do example 'return a Doctype instance' do
doctype = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' \ @document.doctype.is_a?(Oga::XML::Doctype).should == true
'"http://www.w3.org/TR/html4/strict.dtd">' end
parse(doctype).should == s( example 'set the name of the doctype' do
:document, @document.doctype.name.should == 'html'
s( end
:doctype,
'HTML', example 'set the type of the doctype' do
'PUBLIC', @document.doctype.type.should == 'PUBLIC'
'-//W3C//DTD HTML 4.01//EN', end
'http://www.w3.org/TR/html4/strict.dtd' end
)
) context 'doctypes with a public ID' do
before :all do
@document = parse('<!DOCTYPE html PUBLIC "foo">')
end
example 'return a Doctype instance' do
@document.doctype.is_a?(Oga::XML::Doctype).should == true
end
example 'set the name of the doctype' do
@document.doctype.name.should == 'html'
end
example 'set the type of the doctype' do
@document.doctype.type.should == 'PUBLIC'
end
example 'set the public ID of the doctype' do
@document.doctype.public_id.should == 'foo'
end
end
context 'doctypes with a system ID' do
before :all do
@document = parse('<!DOCTYPE html PUBLIC "foo" "bar">')
end
example 'return a Doctype instance' do
@document.doctype.is_a?(Oga::XML::Doctype).should == true
end
example 'set the name of the doctype' do
@document.doctype.name.should == 'html'
end
example 'set the type of the doctype' do
@document.doctype.type.should == 'PUBLIC'
end
example 'set the public ID of the doctype' do
@document.doctype.public_id.should == 'foo'
end
example 'set the system ID of the doctype' do
@document.doctype.system_id.should == 'bar'
end end
end end
end end

View File

@ -1,13 +1,20 @@
require 'spec_helper' require 'spec_helper'
describe Oga::XML::Parser do describe Oga::XML::Parser do
context 'HTML documents' do context 'empty documents' do
example 'parse an empty document' do before :all do
parse('').should == s(:document) @document = parse('')
end end
example 'parse a basic HTML document' do example 'return a Document instance' do
html = <<-EOF @document.is_a?(Oga::XML::Document).should == true
end
end
context 'HTML documents' do
before :all do
html = <<-EOF.strip
<?xml version="1.5" ?>
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>
@ -17,48 +24,23 @@ describe Oga::XML::Parser do
</html> </html>
EOF EOF
parse(html).should == s( @document = parse(html, :html => true)
:document, end
s(:doctype, 'html'),
s(:text, "\n"),
# <html> example 'return a Document instance' do
s( @document.is_a?(Oga::XML::Document).should == true
:element, end
nil,
'html',
nil,
s(:text, "\n"), example 'set the doctype of the document' do
@document.doctype.is_a?(Oga::XML::Doctype).should == true
end
# <head> example 'set the XML declaration of the document' do
s( @document.xml_declaration.is_a?(Oga::XML::XmlDeclaration).should == true
:element, end
nil,
'head',
nil,
s(:text, "\n"), example 'set the children of the document' do
@document.children.empty?.should == false
# <title>
s(
:element,
nil,
'title',
nil,
s(:text, 'Title')
),
s(:text, "\n")
),
# <body>
s(:text, "\n"),
s(:element, nil, 'body', nil, nil),
s(:text, "\n")
),
s(:text, "\n")
)
end end
end end
end end

View File

@ -1,148 +1,81 @@
require 'spec_helper' require 'spec_helper'
describe Oga::XML::Parser do describe Oga::XML::Parser do
context 'elements' do context 'empty elements' do
example 'parse an empty element' do before :all do
parse('<p></p>').should == s( @element = parse('<p></p>').children[0]
:document,
s(:element, nil, 'p', nil, nil)
)
end end
example 'parse an element with text' do example 'return an Element instance' do
parse('<p>foo</p>').should == s( @element.is_a?(Oga::XML::Element).should == true
:document,
s(:element, nil, 'p', nil, s(:text, 'foo'))
)
end end
example 'parse an element with a single attribute' do example 'set the name of the element' do
parse('<p foo></p>').should == s( @element.name.should == 'p'
:document, end
s(:element, nil, 'p', s(:attributes, s(:attribute, 'foo')), nil)
)
end end
example 'parse an element with a single attribute with a value' do context 'elements with namespaces' do
parse('<p foo="bar"></p>').should == s( before :all do
:document, @element = parse('<foo:p></p>').children[0]
s(:element, nil, 'p', s(:attributes, s(:attribute, 'foo', 'bar')), nil)
)
end end
example 'parse an element with multiple attributes' do example 'return an Element instance' do
parse('<p foo="bar" baz="bad"></p>').should == s( @element.is_a?(Oga::XML::Element).should == true
:document,
s(
:element,
nil,
'p',
s(
:attributes,
s(:attribute, 'foo', 'bar'),
s(:attribute, 'baz', 'bad')
),
nil
)
)
end end
example 'parse an element with text and attributes' do example 'set the name of the element' do
parse('<p class="foo">Bar</p>').should == s( @element.name.should == 'p'
:document,
s(
:element,
nil,
'p',
s(:attributes, s(:attribute, 'class', 'foo')),
s(:text, 'Bar')
)
)
end end
example 'parse an element with a namespace' do example 'set the namespace of the element' do
parse('<foo:p></p>').should == s( @element.namespace.should == 'foo'
:document, end
s(:element, 'foo', 'p', nil, nil)
)
end end
example 'parse an element with a namespace and an attribute' do context 'elements with attributes' do
parse('<foo:p class="bar"></p>').should == s( before :all do
:document, @element = parse('<foo bar="baz"></foo>').children[0]
s(
:element,
'foo',
'p',
s(:attributes, s(:attribute, 'class', 'bar')),
nil
)
)
end end
example 'parse an element nested inside another element' do example 'return an Element instance' do
parse('<p><a></a></p>').should == s( @element.is_a?(Oga::XML::Element).should == true
:document,
s(:element, nil, 'p', nil, s(:element, nil, 'a', nil, nil))
)
end end
example 'parse an element with children text, element' do example 'set the attributes of the element' do
parse('<p>Foo<a>Bar</a></p>').should == s( @element.attributes.should == {'bar' => 'baz'}
:document, end
s(
:element,
nil,
'p',
nil,
s(:text, 'Foo'),
s(:element, nil, 'a', nil, s(:text, 'Bar'))
)
)
end end
example 'parse an element with children text, element, text' do context 'elements with child elements' do
parse('<p>Foo<a>Bar</a>Baz</p>').should == s( before :all do
:document, @element = parse('<a><b></b></a>').children[0]
s(
:element,
nil,
'p',
nil,
s(:text, 'Foo'),
s(:element, nil, 'a', nil, s(:text, 'Bar')),
s(:text, 'Baz')
)
)
end end
example 'parse an element with children element, text' do example 'set the name of the outer element' do
parse('<p><a>Bar</a>Baz</p>').should == s( @element.name.should == 'a'
:document,
s(
:element,
nil,
'p',
nil,
s(:element, nil, 'a', nil, s(:text, 'Bar')),
s(:text, 'Baz')
)
)
end end
example 'parse an element with children element, text, element' do example 'set the child elements' do
parse('<p><a>Bar</a>Baz<span>Da</span></p>').should == s( @element.children[0].is_a?(Oga::XML::Element).should == true
:document, end
s(
:element, example 'set the name of the child element' do
nil, @element.children[0].name.should == 'b'
'p', end
nil, end
s(:element, nil, 'a', nil, s(:text, 'Bar')),
s(:text, 'Baz'), context 'elements with child elements and text' do
s(:element, nil, 'span', nil, s(:text, 'Da')) before :all do
) @element = parse('<a>Foo<b>bar</b></a>').children[0]
) end
example 'include the text node of the outer element' do
@element.children[0].is_a?(Oga::XML::Text).should == true
end
example 'include the text node of the inner element' do
@element.children[1].children[0].is_a?(Oga::XML::Text).should == true
end end
end end
end end

View File

@ -1,11 +0,0 @@
require 'spec_helper'
describe Oga::XML::Parser do
example 'parse regular text' do
parse('foo').should == s(:document, s(:text, 'foo'))
end
example 'parse a newline' do
parse("\n").should == s(:document, s(:text, "\n"))
end
end

View File

@ -0,0 +1,53 @@
require 'spec_helper'
describe Oga::XML::Parser do
context 'elements with parents' do
before :all do
@parent = parse('<a><b></b></a>').children[0]
end
example 'return an Element instance for the parent' do
@parent.children[0].parent.is_a?(Oga::XML::Element).should == true
end
example 'set the correct parent' do
@parent.children[0].parent.should == @parent
end
end
context 'text nodes with parents' do
before :all do
@parent = parse('<a>foo</a>').children[0]
end
example 'return an Element instance for the parent' do
@parent.children[0].parent.is_a?(Oga::XML::Element).should == true
end
example 'set the correct parent' do
@parent.children[0].parent.should == @parent
end
end
context 'elements with adjacent elements' do
before :all do
@document = parse('<a></a><b></b>')
end
example 'return an Element instance for the next element' do
@document.children[0].next.is_a?(Oga::XML::Element).should == true
end
example 'set the correct next element' do
@document.children[0].next.should == @document.children[1]
end
example 'return an Element instance for the previous element' do
@document.children[1].previous.is_a?(Oga::XML::Element).should == true
end
example 'set the correct previous element' do
@document.children[1].previous.should == @document.children[0]
end
end
end

View File

@ -1,64 +1,45 @@
require 'spec_helper' require 'spec_helper'
describe Oga::XML::Parser do describe Oga::XML::Parser do
context 'HTML void elements' do context 'void elements' do
example 'parse a void element that omits the closing /' do before :all do
parse('<link>', :html => true).should == s( @node = parse('<link>', :html => true).children[0]
:document,
s(:element, nil, 'link', nil, nil)
)
end end
example 'parse a void element inside another element' do example 'return an Element instance' do
parse('<head><link></head>', :html => true).should == s( @node.is_a?(Oga::XML::Element).should == true
:document,
s(:element, nil, 'head', nil, s(:element, nil, 'link', nil, nil))
)
end end
example 'parse a void element with attributes inside another element' do example 'set the name of the element' do
parse('<head><link href="foo.css"></head>', :html => true).should == s( @node.name.should == 'link'
:document, end
s(
:element,
nil,
'head',
nil,
s(
:element,
nil,
'link',
s(:attributes, s(:attribute, 'href', 'foo.css')),
nil
)
)
)
end end
example 'parse a void element and a non void element in the same parent' do context 'nested void elements' do
parse('<head><link><title>Foo</title></head>', :html => true).should == s( before :all do
:document, @node = parse('<head><link></head>', :html => true).children[0]
s( end
:element,
nil, example 'set the name of the outer element' do
'head', @node.name.should == 'head'
nil, end
s(
:element, example 'set the name of the inner element' do
nil, @node.children[0].name.should == 'link'
'link', end
nil, end
nil
), context 'void elements with attributes' do
s( before :all do
:element, @node = parse('<link href="foo">', :html => true).children[0]
nil, end
'title',
nil, example 'set the name of the element' do
s(:text, 'Foo') @node.name.should == 'link'
) end
)
) example 'set the attributes' do
@node.attributes.should == {'href' => 'foo'}
end end
end end
end end

View File

@ -0,0 +1,17 @@
require 'spec_helper'
describe Oga::XML::Parser do
context 'plain text' do
before :all do
@node = parse('foo').children[0]
end
example 'return a Text instance' do
@node.is_a?(Oga::XML::Text).should == true
end
example 'set the text' do
@node.text.should == 'foo'
end
end
end

View File

@ -1,16 +1,39 @@
require 'spec_helper' require 'spec_helper'
describe Oga::XML::Parser do describe Oga::XML::Parser do
context 'XML declaration tags' do context 'empty XML declaration tags' do
example 'parse an empty XML declaration tag' do before :all do
parse('<?xml?>').should == s(:document, s(:xml_decl)) @node = parse('<?xml?>').xml_declaration
end end
example 'parse an XML declaration tag' do example 'return an XmlDeclaration instance' do
parse('<?xml version="1.0" ?>').should == s( @node.is_a?(Oga::XML::XmlDeclaration).should == true
:document, end
s(:xml_decl, s(:attributes, s(:attribute, 'version', '1.0')))
) example 'set the default XML version' do
@node.version.should == '1.0'
end
example 'set the default encoding' do
@node.encoding.should == 'UTF-8'
end
end
context 'XML declaration tags with custom attributes' do
before :all do
@node = parse('<?xml version="1.5" encoding="foo" ?>').xml_declaration
end
example 'return an XmlDeclaration instance' do
@node.is_a?(Oga::XML::XmlDeclaration).should == true
end
example 'set the XML version' do
@node.version.should == '1.5'
end
example 'set the encoding' do
@node.encoding.should == 'foo'
end end
end end
end end

View File

@ -1,281 +0,0 @@
require 'spec_helper'
describe Oga::XML::TreeBuilder do
before do
@builder = described_class.new
end
context '#on_document' do
before do
node = s(:document, s(:element, nil, 'p', nil, nil))
@tag = @builder.process(node)
end
example 'return a Document node' do
@tag.is_a?(Oga::XML::Document).should == true
end
example 'include the children of the element' do
@tag.children[0].is_a?(Oga::XML::Element).should == true
end
end
context '#on_document with XML declarations' do
before do
decl = s(:xml_decl, s(:attributes, s(:attribute, 'encoding', 'UTF-8')))
node = s(:document, decl)
@tag = @builder.process(node)
end
example 'set the XML declaration of the document' do
@tag.xml_declaration.is_a?(Oga::XML::XmlDeclaration).should == true
end
end
context '#on_document with doctypes' do
before do
doctype = s(:doctype, 'html', 'PUBLIC', 'foo', 'bar')
node = s(:document, doctype)
@tag = @builder.process(node)
end
example 'set the doctype of the document' do
@tag.doctype.is_a?(Oga::XML::Doctype).should == true
end
end
context '#on_xml_decl' do
before do
node = s(:xml_decl, s(:attributes, s(:attribute, 'encoding', 'UTF-8')))
@tag = @builder.process(node)
end
example 'return an XmlDeclaration node' do
@tag.is_a?(Oga::XML::XmlDeclaration).should == true
end
example 'include the encoding of the tag' do
@tag.encoding.should == 'UTF-8'
end
end
context '#on_doctype' do
before do
node = s(:doctype, 'html', 'PUBLIC', 'foo', 'bar')
@tag = @builder.process(node)
end
example 'return a Doctype node' do
@tag.is_a?(Oga::XML::Doctype).should == true
end
example 'include the doctype name' do
@tag.name.should == 'html'
end
example 'include the doctype type' do
@tag.type.should == 'PUBLIC'
end
example 'include the public ID' do
@tag.public_id.should == 'foo'
end
example 'include the system ID' do
@tag.system_id.should == 'bar'
end
end
context '#on_comment' do
before do
node = s(:comment, 'foo')
@tag = @builder.process(node)
end
example 'return a Comment node' do
@tag.is_a?(Oga::XML::Comment).should == true
end
example 'include the text of the comment' do
@tag.text.should == 'foo'
end
end
context '#on_element' do
context 'simple elements' do
before do
node = s(:element, 'foo', 'p', nil, nil)
@tag = @builder.process(node)
end
example 'return a Element node' do
@tag.is_a?(Oga::XML::Element).should == true
end
example 'include the name of the element' do
@tag.name.should == 'p'
end
example 'include the namespace of the element' do
@tag.namespace.should == 'foo'
end
end
context 'elements with attributes' do
before do
node = s(
:element,
nil,
'p',
s(:attributes, s(:attribute, 'key', 'value')),
nil
)
@tag = @builder.process(node)
end
example 'include the name of the element' do
@tag.name.should == 'p'
end
example 'include the attributes' do
@tag.attributes.should == {'key' => 'value'}
end
end
context 'elements with parent elements' do
before do
node = s(:element, nil, 'p', nil, s(:element, nil, 'span', nil, nil))
@tag = @builder.process(node)
end
example 'set the parent element' do
@tag.children[0].parent.should == @tag
end
end
context 'elements with next elements' do
before do
node = s(
:element,
nil,
'p',
nil,
s(:element, nil, 'a', nil, nil),
s(:element, nil, 'span', nil, nil)
)
@tag = @builder.process(node)
end
example 'set the next element' do
@tag.children[0].next.should == @tag.children[1]
end
example 'do not set the next element for the last element' do
@tag.children[1].next.should == nil
end
end
context 'elements with previous elements' do
before do
node = s(
:element,
nil,
'p',
nil,
s(:element, nil, 'a', nil, nil),
s(:element, nil, 'span', nil, nil)
)
@tag = @builder.process(node)
end
example 'set the previous element' do
@tag.children[1].previous.should == @tag.children[0]
end
example 'do not set the previous element for the first element' do
@tag.children[0].previous.should == nil
end
end
context 'elements with child elements' do
before do
node = s(:element, nil, 'p', nil, s(:element, nil, 'span', nil, nil))
@tag = @builder.process(node)
end
example 'include the name of the element' do
@tag.name.should == 'p'
end
example 'include the child element' do
@tag.children[0].is_a?(Oga::XML::Element).should == true
end
example 'include the name of the child element' do
@tag.children[0].name.should == 'span'
end
end
end
context '#on_text' do
before do
node = s(:text, 'Hello')
@tag = @builder.process(node)
end
example 'return a Text node' do
@tag.is_a?(Oga::XML::Text).should == true
end
example 'include the text of the node' do
@tag.text.should == 'Hello'
end
end
context '#on_cdata' do
before do
node = s(:cdata, 'Hello')
@tag = @builder.process(node)
end
example 'return a Cdata node' do
@tag.is_a?(Oga::XML::Cdata).should == true
end
example 'include the text of the node' do
@tag.text.should == 'Hello'
end
end
context '#on_attributes' do
before do
@node = s(
:attributes,
s(:attribute, 'foo', 'bar'),
s(:attribute, 'baz', 'wat')
)
end
example 'return the attributes as a Hash' do
@builder.process(@node).should == {'foo' => 'bar', 'baz' => 'wat'}
end
example 'return an empty Hash by default' do
@builder.process(s(:attributes)).should == {}
end
end
context '#handler_missing' do
before do
@node = s(:foo, 'bar')
end
example 'raise when processing an unknown node' do
lambda { @builder.process(@node) }
.should raise_error('No handler for node type :foo')
end
end
end