Updated parsing of doctypes.

The resulting nodes now separate the type, public and system IDs in to separate
string values.
This commit is contained in:
Yorick Peterse 2014-03-11 22:08:21 +01:00
parent c07edc767b
commit c9592856f0
2 changed files with 33 additions and 31 deletions

View File

@ -1,8 +1,8 @@
class Oga::Parser
token T_SPACE T_NEWLINE T_SMALLER T_GREATER T_SLASH
token T_DQUOTE T_SQUOTE T_DASH T_RBRACKET T_LBRACKET
token T_COLON T_BANG T_EQUALS T_TEXT T_DOCTYPE
token T_NEWLINE T_SPACE
token T_STRING
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE
options no_result_var
@ -18,38 +18,32 @@ rule
;
expression
: tag
| doctype
: doctype
;
# Doctypes
doctype
: T_DOCTYPE { s(:doctype, val[0]) }
;
# <!DOCTYPE html>
: T_DOCTYPE_START T_DOCTYPE_END { s(:doctype) }
# Generic HTML tags
tag_start
# <p>
: T_SMALLER T_TEXT T_GREATER { val[1] }
;
tag_end
# </p>
: T_SMALLER T_SLASH T_TEXT T_GREATER
;
tag
# <p>foo</p>
: tag_start tag_body tag_end
# <!DOCTYPE html PUBLIC>
| T_DOCTYPE_START T_DOCTYPE_TYPE T_DOCTYPE_END
{
s(:element, val[0], val[1])
s(:doctype, val[1])
}
;
tag_body
: T_TEXT
# <!DOCTYPE html PUBLIC "foo">
| T_DOCTYPE_START T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
{
s(:doctype, val[1], val[2])
}
# <!DOCTYPE html PUBLIC "foo" "bar">
| T_DOCTYPE_START T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
{
s(:doctype, val[1], val[2], val[3])
}
;
whitespaces

View File

@ -6,17 +6,24 @@ describe Oga::Parser do
parse_html('<!DOCTYPE html>').should == s(:document, s(:doctype))
end
example 'parse a doctype with a public ID' do
parse_html('<!DOCTYPE html "foo">').should == s(
example 'parse a doctype with the doctype type' do
parse_html('<!DOCTYPE html PUBLIC>').should == s(
:document,
s(:doctype, 'foo')
s(:doctype, 'PUBLIC')
)
end
example 'parse a doctype with a public ID' do
parse_html('<!DOCTYPE html PUBLIC "foo">').should == s(
:document,
s(:doctype, 'PUBLIC', 'foo')
)
end
example 'parse a doctype with a public and private ID' do
parse_html('<!DOCTYPE html "foo" "bar">').should == s(
parse_html('<!DOCTYPE html PUBLIC "foo" "bar">').should == s(
:document,
s(:doctype, 'foo', 'bar')
s(:doctype, 'PUBLIC', 'foo', 'bar')
)
end
@ -28,6 +35,7 @@ describe Oga::Parser do
:document,
s(
:doctype,
'PUBLIC',
'-//W3C//DTD HTML 4.01//EN',
'http://www.w3.org/TR/html4/strict.dtd'
)