Lex/parse doctype names separately.
This commit is contained in:
parent
8185656c1e
commit
81b1155af3
|
@ -281,7 +281,7 @@ module Oga
|
|||
# 2. Deprecated doctypes, the more verbose ones used prior to HTML5.
|
||||
# 3. Legacy doctypes
|
||||
#
|
||||
doctype_start = '<!DOCTYPE'i whitespace+ 'HTML'i;
|
||||
doctype_start = '<!DOCTYPE'i whitespace+;
|
||||
|
||||
action start_doctype {
|
||||
emit_buffer
|
||||
|
@ -302,6 +302,8 @@ module Oga
|
|||
# including it.
|
||||
whitespace;
|
||||
|
||||
identifier => { emit(:T_DOCTYPE_NAME) };
|
||||
|
||||
'>' => {
|
||||
add_token(:T_DOCTYPE_END)
|
||||
fret;
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
class Oga::XML::Parser
|
||||
|
||||
token T_STRING T_TEXT
|
||||
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE
|
||||
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
|
||||
token T_CDATA_START T_CDATA_END
|
||||
token T_COMMENT_START T_COMMENT_END
|
||||
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
|
||||
|
@ -43,25 +43,25 @@ rule
|
|||
|
||||
doctype
|
||||
# <!DOCTYPE html>
|
||||
: T_DOCTYPE_START T_DOCTYPE_END { s(:doctype) }
|
||||
: T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END { s(:doctype, val[1]) }
|
||||
|
||||
# <!DOCTYPE html PUBLIC>
|
||||
| T_DOCTYPE_START T_DOCTYPE_TYPE T_DOCTYPE_END
|
||||
{
|
||||
s(:doctype, val[1])
|
||||
}
|
||||
|
||||
# <!DOCTYPE html PUBLIC "foo">
|
||||
| T_DOCTYPE_START T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
|
||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END
|
||||
{
|
||||
s(:doctype, val[1], val[2])
|
||||
}
|
||||
|
||||
# <!DOCTYPE html PUBLIC "foo" "bar">
|
||||
| T_DOCTYPE_START T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
|
||||
# <!DOCTYPE html PUBLIC "foo">
|
||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
|
||||
{
|
||||
s(:doctype, val[1], val[2], val[3])
|
||||
}
|
||||
|
||||
# <!DOCTYPE html PUBLIC "foo" "bar">
|
||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
|
||||
{
|
||||
s(:doctype, val[1], val[2], val[3], val[4])
|
||||
}
|
||||
;
|
||||
|
||||
# CDATA tags
|
||||
|
|
|
@ -5,6 +5,7 @@ describe Oga::XML::Lexer do
|
|||
example 'lex the HTML5 doctype' do
|
||||
lex('<!DOCTYPE html>').should == [
|
||||
[:T_DOCTYPE_START, nil, 1],
|
||||
[:T_DOCTYPE_NAME, 'html', 1],
|
||||
[:T_DOCTYPE_END, nil, 1]
|
||||
]
|
||||
end
|
||||
|
@ -12,6 +13,7 @@ describe Oga::XML::Lexer do
|
|||
example 'lex a doctype with a public and system ID' do
|
||||
lex('<!DOCTYPE HTML PUBLIC "foobar" "baz">').should == [
|
||||
[:T_DOCTYPE_START, nil, 1],
|
||||
[:T_DOCTYPE_NAME, 'HTML', 1],
|
||||
[:T_DOCTYPE_TYPE, 'PUBLIC', 1],
|
||||
[:T_STRING, 'foobar', 1],
|
||||
[:T_STRING, 'baz', 1],
|
||||
|
@ -22,6 +24,7 @@ describe Oga::XML::Lexer do
|
|||
example 'lex a doctype with a public and system ID using single quotes' do
|
||||
lex("<!DOCTYPE HTML PUBLIC 'foobar' 'baz'>").should == [
|
||||
[:T_DOCTYPE_START, nil, 1],
|
||||
[:T_DOCTYPE_NAME, 'HTML', 1],
|
||||
[:T_DOCTYPE_TYPE, 'PUBLIC', 1],
|
||||
[:T_STRING, 'foobar', 1],
|
||||
[:T_STRING, 'baz', 1],
|
||||
|
|
|
@ -15,6 +15,7 @@ describe Oga::XML::Lexer do
|
|||
|
||||
lex(html).should == [
|
||||
[:T_DOCTYPE_START, nil, 1],
|
||||
[:T_DOCTYPE_NAME, 'html', 1],
|
||||
[:T_DOCTYPE_END, nil, 1],
|
||||
[:T_TEXT, "\n", 1],
|
||||
|
||||
|
|
|
@ -3,27 +3,27 @@ require 'spec_helper'
|
|||
describe Oga::XML::Parser do
|
||||
context 'doctypes' do
|
||||
example 'parse a doctype' do
|
||||
parse('<!DOCTYPE html>').should == s(:document, s(:doctype))
|
||||
parse('<!DOCTYPE html>').should == s(:document, s(:doctype, 'html'))
|
||||
end
|
||||
|
||||
example 'parse a doctype with the doctype type' do
|
||||
parse('<!DOCTYPE html PUBLIC>').should == s(
|
||||
:document,
|
||||
s(:doctype, 'PUBLIC')
|
||||
s(:doctype, 'html', 'PUBLIC')
|
||||
)
|
||||
end
|
||||
|
||||
example 'parse a doctype with a public ID' do
|
||||
parse('<!DOCTYPE html PUBLIC "foo">').should == s(
|
||||
:document,
|
||||
s(:doctype, 'PUBLIC', 'foo')
|
||||
s(:doctype, 'html', 'PUBLIC', 'foo')
|
||||
)
|
||||
end
|
||||
|
||||
example 'parse a doctype with a public and private ID' do
|
||||
parse('<!DOCTYPE html PUBLIC "foo" "bar">').should == s(
|
||||
:document,
|
||||
s(:doctype, 'PUBLIC', 'foo', 'bar')
|
||||
s(:doctype, 'html', 'PUBLIC', 'foo', 'bar')
|
||||
)
|
||||
end
|
||||
|
||||
|
@ -35,6 +35,7 @@ describe Oga::XML::Parser do
|
|||
:document,
|
||||
s(
|
||||
:doctype,
|
||||
'HTML',
|
||||
'PUBLIC',
|
||||
'-//W3C//DTD HTML 4.01//EN',
|
||||
'http://www.w3.org/TR/html4/strict.dtd'
|
||||
|
|
|
@ -15,7 +15,7 @@ describe Oga::XML::Parser do
|
|||
|
||||
parse(html).should == s(
|
||||
:document,
|
||||
s(:doctype),
|
||||
s(:doctype, 'html'),
|
||||
s(:text, "\n"),
|
||||
|
||||
# <html>
|
||||
|
|
Loading…
Reference in New Issue