Support for lexing/parsing inline doctypes.

This commit is contained in:
Yorick Peterse 2014-05-10 00:28:11 +02:00
parent a92023fe94
commit 19f04f98f7
8 changed files with 92 additions and 17 deletions

View File

@ -82,6 +82,12 @@
callback("on_doctype_type", data, encoding, ts, te);
};
# Consumes everything between the [ and ]. Due to the use of :> the ]
# is not consumed by any+.
'[' any+ :> ']' => {
callback("on_doctype_inline", data, encoding, ts + 1, te - 1);
};
# Lex the public/system IDs as regular strings.
dquote => { fcall string_dquote; };
squote => { fcall string_squote; };

View File

@ -19,8 +19,12 @@ module Oga
# The system ID of the doctype.
# @return [String]
#
# @!attribute [rw] inline_rules
# The inline doctype rules.
# @return [String]
#
class Doctype
attr_accessor :name, :type, :public_id, :system_id
attr_accessor :name, :type, :public_id, :system_id, :inline_rules
##
# @example
@ -50,6 +54,7 @@ module Oga
segments << " #{type}" if type
segments << %Q{ "#{public_id}"} if public_id
segments << %Q{ "#{system_id}"} if system_id
segments << " [#{inline_rules}]" if inline_rules
return segments + '>'
end
@ -70,6 +75,7 @@ module Oga
#{spacing} type: #{type.inspect}
#{spacing} public_id: #{public_id.inspect}
#{spacing} system_id: #{system_id.inspect}
#{spacing} inline_rules: #{inline_rules.inspect}
#{spacing})
EOF
end

View File

@ -194,6 +194,15 @@ module Oga
add_token(:T_DOCTYPE_END)
end
##
# Called on an inline doctype block.
#
# @param [String] value
#
def on_doctype_inline(value)
add_token(:T_DOCTYPE_INLINE, value)
end
##
# Called on the start of a CDATA tag.
#

View File

@ -11,6 +11,7 @@ class Oga::XML::Parser
token T_STRING T_TEXT
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
token T_DOCTYPE_INLINE
token T_CDATA_START T_CDATA_END
token T_COMMENT_START T_COMMENT_END
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
@ -45,25 +46,36 @@ rule
# <!DOCTYPE html>
: T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END
{
on_doctype(val[1])
on_doctype(:name => val[1])
}
# <!DOCTYPE html PUBLIC>
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END
{
on_doctype(val[1], val[2])
on_doctype(:name => val[1], :type => val[2])
}
# <!DOCTYPE html PUBLIC "foo">
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
{
on_doctype(val[1], val[2], val[3])
on_doctype(:name => val[1], :type => val[2], :public_id => val[3])
}
# <!DOCTYPE html PUBLIC "foo" "bar">
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
{
on_doctype(val[1], val[2], val[3], val[4])
on_doctype(
:name => val[1],
:type => val[2],
:public_id => val[3],
:system_id => val[4]
)
}
# <!DOCTYPE html [ ... ]>
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_INLINE T_DOCTYPE_END
{
on_doctype(:name => val[1], :inline_rules => val[2])
}
;
@ -270,18 +282,10 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}:
end
##
# @param [String] name
# @param [String] type
# @param [String] public_id
# @param [String] system_id
# @param [Hash] options
#
def on_doctype(name, type = nil, public_id = nil, system_id = nil)
return Doctype.new(
:name => name,
:type => type,
:public_id => public_id,
:system_id => system_id
)
def on_doctype(options = {})
return Doctype.new(options)
end
##

View File

@ -45,11 +45,24 @@ describe Oga::XML::Doctype do
instance.to_xml.should == '<!DOCTYPE html PUBLIC "foo" "bar">'
end
example 'include the inline rules if present' do
instance = described_class.new(
:name => 'html',
:inline_rules => '<!ELEMENT foo>'
)
instance.to_xml.should == '<!DOCTYPE html [<!ELEMENT foo>]>'
end
end
context '#inspect' do
before do
@instance = described_class.new(:name => 'html', :type => 'PUBLIC')
@instance = described_class.new(
:name => 'html',
:type => 'PUBLIC',
:inline_rules => '<!ELEMENT foo>'
)
end
example 'pretty-print the node' do
@ -59,6 +72,7 @@ Doctype(
type: "PUBLIC"
public_id: nil
system_id: nil
inline_rules: "<!ELEMENT foo>"
)
EOF
end

View File

@ -99,6 +99,7 @@ Document(
type: nil
public_id: nil
system_id: nil
inline_rules: nil
)
xml_declaration: XmlDeclaration(
version: "1.0"

View File

@ -31,5 +31,26 @@ describe Oga::XML::Lexer do
[:T_DOCTYPE_END, nil, 1]
]
end
example 'lex an inline doctype' do
lex('<!DOCTYPE html [<!ELEMENT foo>]>').should == [
[:T_DOCTYPE_START, nil, 1],
[:T_DOCTYPE_NAME, 'html', 1],
[:T_DOCTYPE_INLINE, '<!ELEMENT foo>', 1],
[:T_DOCTYPE_END, nil, 1]
]
end
# Technically not valid, put in place to make sure that the Ragel rules are
# not too greedy.
example 'lex an inline doftype followed by a system ID' do
lex('<!DOCTYPE html [<!ELEMENT foo>] "foo">').should == [
[:T_DOCTYPE_START, nil, 1],
[:T_DOCTYPE_NAME, 'html', 1],
[:T_DOCTYPE_INLINE, '<!ELEMENT foo>', 1],
[:T_STRING, 'foo', 1],
[:T_DOCTYPE_END, nil, 1]
]
end
end
end

View File

@ -80,4 +80,18 @@ describe Oga::XML::Parser do
@document.doctype.system_id.should == 'bar'
end
end
context 'doctypes with inline rules' do
before :all do
@document = parse('<!DOCTYPE html [<!ELEMENT foo>]>')
end
example 'return a Doctype instance' do
@document.doctype.is_a?(Oga::XML::Doctype).should == true
end
example 'set the inline doctype rules' do
@document.doctype.inline_rules.should == '<!ELEMENT foo>'
end
end
end