Basic support for parsing elements.

This includes support for elements with namespaces and attributes. Nested
elements are not yet supported.
This commit is contained in:
Yorick Peterse 2014-03-12 23:02:54 +01:00
parent 5ce515d224
commit 8cfa81aed9
2 changed files with 136 additions and 0 deletions

View File

@ -5,6 +5,7 @@ token T_STRING T_TEXT
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE
token T_CDATA_START T_CDATA_END token T_CDATA_START T_CDATA_END
token T_COMMENT_START T_COMMENT_END token T_COMMENT_START T_COMMENT_END
token T_ELEM_OPEN T_ELEM_NAME T_ELEM_NS T_ELEM_CLOSE T_ATTR
options no_result_var options no_result_var
@ -23,6 +24,8 @@ rule
: doctype : doctype
| cdata | cdata
| comment | comment
| element
| text
; ;
# Doctypes # Doctypes
@ -70,6 +73,66 @@ rule
| T_COMMENT_START T_TEXT T_COMMENT_END { s(:comment, val[1]) } | T_COMMENT_START T_TEXT T_COMMENT_END { s(:comment, val[1]) }
; ;
# Elements
element
# <p></p>
: element_open T_ELEM_CLOSE { s(:element, val[0]) }
# <p class="foo"></p>
| element_open attributes T_ELEM_CLOSE
{
s(:element, val[0], val[1])
}
# <p>foo</p>
| element_open text T_ELEM_CLOSE
{
s(:element, val[0], nil, val[1])
}
# <p class="foo">Bar</p>
| element_open attributes text T_ELEM_CLOSE
{
s(:element, val[0], val[1], val[2])
}
;
element_open
# <p>
: T_ELEM_OPEN T_ELEM_NAME { [nil, val[1]] }
# <foo:p>
| T_ELEM_OPEN T_ELEM_NS T_ELEM_NAME { [val[1], val[2]] }
;
# Attributes
attributes
: attributes_ { s(:attributes, val[0]) }
;
attributes_
: attributes_ attribute { val }
| attribute { val }
;
attribute
# foo
: T_ATTR { s(:attribute, val[0]) }
# foo="bar"
| T_ATTR T_STRING { s(:attribute, val[0], val[1]) }
;
# Plain text
text
: T_TEXT { s(:text, val[0]) }
;
# Whitespace
whitespaces whitespaces
: whitespaces whitespace : whitespaces whitespace
| whitespace | whitespace

View File

@ -0,0 +1,73 @@
require 'spec_helper'
describe Oga::Parser do
context 'elements' do
example 'parse an empty element' do
parse_html('<p></p>').should == s(:document, s(:element, nil, 'p'))
end
example 'parse an element with text' do
parse_html('<p>foo</p>').should == s(
:document,
s(:element, nil, 'p', nil, s(:text, 'foo'))
)
end
example 'parse an element with a single attribute' do
parse_html('<p foo></p>').should == s(
:document,
s(:element, nil, 'p', s(:attributes, s(:attribute, 'foo')))
)
end
example 'parse an element with a single attribute with a value' do
parse_html('<p foo="bar"></p>').should == s(
:document,
s(:element, nil, 'p', s(:attributes, s(:attribute, 'foo', 'bar')))
)
end
example 'parse an element with multiple attributes' do
parse_html('<p foo="bar" baz="bad"></p>').should == s(
:document,
s(
:element,
nil,
'p',
s(
:attributes,
s(:attribute, 'foo', 'bar'),
s(:attribute, 'baz', 'bad')
)
)
)
end
example 'parse an element with text and attributes' do
parse_html('<p class="foo">Bar</p>').should == s(
:document,
s(
:element,
nil,
'p',
s(:attributes, s(:attribute, 'class', 'foo')),
s(:text, 'Bar')
)
)
end
example 'parse an element with a namespace' do
parse_html('<foo:p></p>').should == s(
:document,
s(:element, 'foo', 'p')
)
end
example 'parse an element with a namespace and an attribute' do
parse_html('<foo:p class="bar"></p>').should == s(
:document,
s(:element, 'foo', 'p', s(:attributes, s(:attribute, 'class', 'bar')))
)
end
end
end