Updated parsing of doctypes.

The resulting nodes now separate the type, public and system IDs in to separate
string values.
This commit is contained in:
Yorick Peterse 2014-03-11 22:08:21 +01:00
parent c07edc767b
commit c9592856f0
2 changed files with 33 additions and 31 deletions

View File

@ -1,8 +1,8 @@
class Oga::Parser class Oga::Parser
token T_SPACE T_NEWLINE T_SMALLER T_GREATER T_SLASH token T_NEWLINE T_SPACE
token T_DQUOTE T_SQUOTE T_DASH T_RBRACKET T_LBRACKET token T_STRING
token T_COLON T_BANG T_EQUALS T_TEXT T_DOCTYPE token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE
options no_result_var options no_result_var
@ -18,38 +18,32 @@ rule
; ;
expression expression
: tag : doctype
| doctype
; ;
# Doctypes # Doctypes
doctype doctype
: T_DOCTYPE { s(:doctype, val[0]) } # <!DOCTYPE html>
; : T_DOCTYPE_START T_DOCTYPE_END { s(:doctype) }
# Generic HTML tags # <!DOCTYPE html PUBLIC>
| T_DOCTYPE_START T_DOCTYPE_TYPE T_DOCTYPE_END
tag_start
# <p>
: T_SMALLER T_TEXT T_GREATER { val[1] }
;
tag_end
# </p>
: T_SMALLER T_SLASH T_TEXT T_GREATER
;
tag
# <p>foo</p>
: tag_start tag_body tag_end
{ {
s(:element, val[0], val[1]) s(:doctype, val[1])
} }
;
tag_body # <!DOCTYPE html PUBLIC "foo">
: T_TEXT | T_DOCTYPE_START T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
{
s(:doctype, val[1], val[2])
}
# <!DOCTYPE html PUBLIC "foo" "bar">
| T_DOCTYPE_START T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
{
s(:doctype, val[1], val[2], val[3])
}
; ;
whitespaces whitespaces

View File

@ -6,17 +6,24 @@ describe Oga::Parser do
parse_html('<!DOCTYPE html>').should == s(:document, s(:doctype)) parse_html('<!DOCTYPE html>').should == s(:document, s(:doctype))
end end
example 'parse a doctype with a public ID' do example 'parse a doctype with the doctype type' do
parse_html('<!DOCTYPE html "foo">').should == s( parse_html('<!DOCTYPE html PUBLIC>').should == s(
:document, :document,
s(:doctype, 'foo') s(:doctype, 'PUBLIC')
)
end
example 'parse a doctype with a public ID' do
parse_html('<!DOCTYPE html PUBLIC "foo">').should == s(
:document,
s(:doctype, 'PUBLIC', 'foo')
) )
end end
example 'parse a doctype with a public and private ID' do example 'parse a doctype with a public and private ID' do
parse_html('<!DOCTYPE html "foo" "bar">').should == s( parse_html('<!DOCTYPE html PUBLIC "foo" "bar">').should == s(
:document, :document,
s(:doctype, 'foo', 'bar') s(:doctype, 'PUBLIC', 'foo', 'bar')
) )
end end
@ -28,6 +35,7 @@ describe Oga::Parser do
:document, :document,
s( s(
:doctype, :doctype,
'PUBLIC',
'-//W3C//DTD HTML 4.01//EN', '-//W3C//DTD HTML 4.01//EN',
'http://www.w3.org/TR/html4/strict.dtd' 'http://www.w3.org/TR/html4/strict.dtd'
) )