Parsing support for nested element/text nodes.

This commit is contained in:
Yorick Peterse 2014-03-14 00:44:11 +01:00
parent 6b2f682c5c
commit 05ee3c13c9
5 changed files with 129 additions and 15 deletions

View File

@ -1,6 +1,5 @@
class Oga::Parser
token T_NEWLINE T_SPACE
token T_STRING T_TEXT
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE
token T_CDATA_START T_CDATA_END
@ -90,10 +89,19 @@ rule
| T_ELEM_OPEN T_ELEM_NS T_ELEM_NAME { [val[1], val[2]] }
;
element_body
: text
elements
: elements element { val }
| element
| /* none */ { nil }
;
element_body
: texts
| texts elements { val }
| texts elements texts { val }
| elements
| elements texts { val }
| elements texts elements { val }
| /* none */ { nil }
;
# Attributes
@ -122,16 +130,9 @@ rule
: T_TEXT { s(:text, val[0]) }
;
# Whitespace
whitespaces
: whitespaces whitespace
| whitespace
;
whitespace
: T_NEWLINE
| T_SPACE
texts
: texts text { val }
| text
;
end

View File

@ -1,6 +1,6 @@
require 'spec_helper'
describe Oga::Parser do
describe Oga::Lexer do
context 'HTML documents' do
example 'lex a basic HTML document' do
html = <<-EOF

View File

@ -0,0 +1,51 @@
require 'spec_helper'
describe Oga::Parser do
context 'HTML documents' do
example 'parse a basic HTML document' do
html = <<-EOF
<!DOCTYPE html>
<html>
<head>
<title>Title</title>
</head>
<body></body>
</html>
EOF
parse_html(html).should == s(
:document,
s(:doctype),
# <html>
s(
:element,
nil,
'html',
nil,
# <head>
s(
:element,
nil,
'head',
nil,
# <title>
s(
:element,
nil,
'title',
nil,
s(:text, 'Title')
)
),
# <body>
s(:element, nil, 'body', nil, nil)
),
s(:text, "\n")
)
end
end
end

View File

@ -86,5 +86,63 @@ describe Oga::Parser do
s(:element, nil, 'p', nil, s(:element, nil, 'a', nil, nil))
)
end
example 'parse an element with children text, element' do
parse_html('<p>Foo<a>Bar</a></p>').should == s(
:document,
s(
:element,
nil,
'p',
nil,
s(:text, 'Foo'),
s(:element, nil, 'a', nil, s(:text, 'Bar'))
)
)
end
example 'parse an element with children text, element, text' do
parse_html('<p>Foo<a>Bar</a>Baz</p>').should == s(
:document,
s(
:element,
nil,
'p',
nil,
s(:text, 'Foo'),
s(:element, nil, 'a', nil, s(:text, 'Bar')),
s(:text, 'Baz')
)
)
end
example 'parse an element with children element, text' do
parse_html('<p><a>Bar</a>Baz</p>').should == s(
:document,
s(
:element,
nil,
'p',
nil,
s(:element, nil, 'a', nil, s(:text, 'Bar')),
s(:text, 'Baz')
)
)
end
example 'parse an element with children element, text, element' do
parse_html('<p><a>Bar</a>Baz<span>Da</span></p>').should == s(
:document,
s(
:element,
nil,
'p',
nil,
s(:element, nil, 'a', nil, s(:text, 'Bar')),
s(:text, 'Baz'),
s(:element, nil, 'span', nil, s(:text, 'Da'))
)
)
end
end
end

View File

@ -4,4 +4,8 @@ describe Oga::Parser do
example 'parse regular text' do
parse_html('foo').should == s(:document, s(:text, 'foo'))
end
example 'parse a newline' do
parse_html("\n").should == s(:document, s(:text, "\n"))
end
end