Lex/parse doctype names separately.
This commit is contained in:
parent
8185656c1e
commit
81b1155af3
|
@ -281,7 +281,7 @@ module Oga
|
||||||
# 2. Deprecated doctypes, the more verbose ones used prior to HTML5.
|
# 2. Deprecated doctypes, the more verbose ones used prior to HTML5.
|
||||||
# 3. Legacy doctypes
|
# 3. Legacy doctypes
|
||||||
#
|
#
|
||||||
doctype_start = '<!DOCTYPE'i whitespace+ 'HTML'i;
|
doctype_start = '<!DOCTYPE'i whitespace+;
|
||||||
|
|
||||||
action start_doctype {
|
action start_doctype {
|
||||||
emit_buffer
|
emit_buffer
|
||||||
|
@ -302,6 +302,8 @@ module Oga
|
||||||
# including it.
|
# including it.
|
||||||
whitespace;
|
whitespace;
|
||||||
|
|
||||||
|
identifier => { emit(:T_DOCTYPE_NAME) };
|
||||||
|
|
||||||
'>' => {
|
'>' => {
|
||||||
add_token(:T_DOCTYPE_END)
|
add_token(:T_DOCTYPE_END)
|
||||||
fret;
|
fret;
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
class Oga::XML::Parser
|
class Oga::XML::Parser
|
||||||
|
|
||||||
token T_STRING T_TEXT
|
token T_STRING T_TEXT
|
||||||
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE
|
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
|
||||||
token T_CDATA_START T_CDATA_END
|
token T_CDATA_START T_CDATA_END
|
||||||
token T_COMMENT_START T_COMMENT_END
|
token T_COMMENT_START T_COMMENT_END
|
||||||
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
|
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
|
||||||
|
@ -43,25 +43,25 @@ rule
|
||||||
|
|
||||||
doctype
|
doctype
|
||||||
# <!DOCTYPE html>
|
# <!DOCTYPE html>
|
||||||
: T_DOCTYPE_START T_DOCTYPE_END { s(:doctype) }
|
: T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END { s(:doctype, val[1]) }
|
||||||
|
|
||||||
# <!DOCTYPE html PUBLIC>
|
# <!DOCTYPE html PUBLIC>
|
||||||
| T_DOCTYPE_START T_DOCTYPE_TYPE T_DOCTYPE_END
|
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END
|
||||||
{
|
|
||||||
s(:doctype, val[1])
|
|
||||||
}
|
|
||||||
|
|
||||||
# <!DOCTYPE html PUBLIC "foo">
|
|
||||||
| T_DOCTYPE_START T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
|
|
||||||
{
|
{
|
||||||
s(:doctype, val[1], val[2])
|
s(:doctype, val[1], val[2])
|
||||||
}
|
}
|
||||||
|
|
||||||
# <!DOCTYPE html PUBLIC "foo" "bar">
|
# <!DOCTYPE html PUBLIC "foo">
|
||||||
| T_DOCTYPE_START T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
|
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
|
||||||
{
|
{
|
||||||
s(:doctype, val[1], val[2], val[3])
|
s(:doctype, val[1], val[2], val[3])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# <!DOCTYPE html PUBLIC "foo" "bar">
|
||||||
|
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
|
||||||
|
{
|
||||||
|
s(:doctype, val[1], val[2], val[3], val[4])
|
||||||
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
# CDATA tags
|
# CDATA tags
|
||||||
|
|
|
@ -5,6 +5,7 @@ describe Oga::XML::Lexer do
|
||||||
example 'lex the HTML5 doctype' do
|
example 'lex the HTML5 doctype' do
|
||||||
lex('<!DOCTYPE html>').should == [
|
lex('<!DOCTYPE html>').should == [
|
||||||
[:T_DOCTYPE_START, nil, 1],
|
[:T_DOCTYPE_START, nil, 1],
|
||||||
|
[:T_DOCTYPE_NAME, 'html', 1],
|
||||||
[:T_DOCTYPE_END, nil, 1]
|
[:T_DOCTYPE_END, nil, 1]
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
@ -12,6 +13,7 @@ describe Oga::XML::Lexer do
|
||||||
example 'lex a doctype with a public and system ID' do
|
example 'lex a doctype with a public and system ID' do
|
||||||
lex('<!DOCTYPE HTML PUBLIC "foobar" "baz">').should == [
|
lex('<!DOCTYPE HTML PUBLIC "foobar" "baz">').should == [
|
||||||
[:T_DOCTYPE_START, nil, 1],
|
[:T_DOCTYPE_START, nil, 1],
|
||||||
|
[:T_DOCTYPE_NAME, 'HTML', 1],
|
||||||
[:T_DOCTYPE_TYPE, 'PUBLIC', 1],
|
[:T_DOCTYPE_TYPE, 'PUBLIC', 1],
|
||||||
[:T_STRING, 'foobar', 1],
|
[:T_STRING, 'foobar', 1],
|
||||||
[:T_STRING, 'baz', 1],
|
[:T_STRING, 'baz', 1],
|
||||||
|
@ -22,6 +24,7 @@ describe Oga::XML::Lexer do
|
||||||
example 'lex a doctype with a public and system ID using single quotes' do
|
example 'lex a doctype with a public and system ID using single quotes' do
|
||||||
lex("<!DOCTYPE HTML PUBLIC 'foobar' 'baz'>").should == [
|
lex("<!DOCTYPE HTML PUBLIC 'foobar' 'baz'>").should == [
|
||||||
[:T_DOCTYPE_START, nil, 1],
|
[:T_DOCTYPE_START, nil, 1],
|
||||||
|
[:T_DOCTYPE_NAME, 'HTML', 1],
|
||||||
[:T_DOCTYPE_TYPE, 'PUBLIC', 1],
|
[:T_DOCTYPE_TYPE, 'PUBLIC', 1],
|
||||||
[:T_STRING, 'foobar', 1],
|
[:T_STRING, 'foobar', 1],
|
||||||
[:T_STRING, 'baz', 1],
|
[:T_STRING, 'baz', 1],
|
||||||
|
|
|
@ -15,6 +15,7 @@ describe Oga::XML::Lexer do
|
||||||
|
|
||||||
lex(html).should == [
|
lex(html).should == [
|
||||||
[:T_DOCTYPE_START, nil, 1],
|
[:T_DOCTYPE_START, nil, 1],
|
||||||
|
[:T_DOCTYPE_NAME, 'html', 1],
|
||||||
[:T_DOCTYPE_END, nil, 1],
|
[:T_DOCTYPE_END, nil, 1],
|
||||||
[:T_TEXT, "\n", 1],
|
[:T_TEXT, "\n", 1],
|
||||||
|
|
||||||
|
|
|
@ -3,27 +3,27 @@ require 'spec_helper'
|
||||||
describe Oga::XML::Parser do
|
describe Oga::XML::Parser do
|
||||||
context 'doctypes' do
|
context 'doctypes' do
|
||||||
example 'parse a doctype' do
|
example 'parse a doctype' do
|
||||||
parse('<!DOCTYPE html>').should == s(:document, s(:doctype))
|
parse('<!DOCTYPE html>').should == s(:document, s(:doctype, 'html'))
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'parse a doctype with the doctype type' do
|
example 'parse a doctype with the doctype type' do
|
||||||
parse('<!DOCTYPE html PUBLIC>').should == s(
|
parse('<!DOCTYPE html PUBLIC>').should == s(
|
||||||
:document,
|
:document,
|
||||||
s(:doctype, 'PUBLIC')
|
s(:doctype, 'html', 'PUBLIC')
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'parse a doctype with a public ID' do
|
example 'parse a doctype with a public ID' do
|
||||||
parse('<!DOCTYPE html PUBLIC "foo">').should == s(
|
parse('<!DOCTYPE html PUBLIC "foo">').should == s(
|
||||||
:document,
|
:document,
|
||||||
s(:doctype, 'PUBLIC', 'foo')
|
s(:doctype, 'html', 'PUBLIC', 'foo')
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'parse a doctype with a public and private ID' do
|
example 'parse a doctype with a public and private ID' do
|
||||||
parse('<!DOCTYPE html PUBLIC "foo" "bar">').should == s(
|
parse('<!DOCTYPE html PUBLIC "foo" "bar">').should == s(
|
||||||
:document,
|
:document,
|
||||||
s(:doctype, 'PUBLIC', 'foo', 'bar')
|
s(:doctype, 'html', 'PUBLIC', 'foo', 'bar')
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -35,6 +35,7 @@ describe Oga::XML::Parser do
|
||||||
:document,
|
:document,
|
||||||
s(
|
s(
|
||||||
:doctype,
|
:doctype,
|
||||||
|
'HTML',
|
||||||
'PUBLIC',
|
'PUBLIC',
|
||||||
'-//W3C//DTD HTML 4.01//EN',
|
'-//W3C//DTD HTML 4.01//EN',
|
||||||
'http://www.w3.org/TR/html4/strict.dtd'
|
'http://www.w3.org/TR/html4/strict.dtd'
|
||||||
|
|
|
@ -15,7 +15,7 @@ describe Oga::XML::Parser do
|
||||||
|
|
||||||
parse(html).should == s(
|
parse(html).should == s(
|
||||||
:document,
|
:document,
|
||||||
s(:doctype),
|
s(:doctype, 'html'),
|
||||||
s(:text, "\n"),
|
s(:text, "\n"),
|
||||||
|
|
||||||
# <html>
|
# <html>
|
||||||
|
|
Loading…
Reference in New Issue