Support for lexing/parsing inline doctypes.
This commit is contained in:
parent
a92023fe94
commit
19f04f98f7
|
@ -82,6 +82,12 @@
|
|||
callback("on_doctype_type", data, encoding, ts, te);
|
||||
};
|
||||
|
||||
# Consumes everything between the [ and ]. Due to the use of :> the ]
|
||||
# is not consumed by any+.
|
||||
'[' any+ :> ']' => {
|
||||
callback("on_doctype_inline", data, encoding, ts + 1, te - 1);
|
||||
};
|
||||
|
||||
# Lex the public/system IDs as regular strings.
|
||||
dquote => { fcall string_dquote; };
|
||||
squote => { fcall string_squote; };
|
||||
|
|
|
@ -19,8 +19,12 @@ module Oga
|
|||
# The system ID of the doctype.
|
||||
# @return [String]
|
||||
#
|
||||
# @!attribute [rw] inline_rules
|
||||
# The inline doctype rules.
|
||||
# @return [String]
|
||||
#
|
||||
class Doctype
|
||||
attr_accessor :name, :type, :public_id, :system_id
|
||||
attr_accessor :name, :type, :public_id, :system_id, :inline_rules
|
||||
|
||||
##
|
||||
# @example
|
||||
|
@ -50,6 +54,7 @@ module Oga
|
|||
segments << " #{type}" if type
|
||||
segments << %Q{ "#{public_id}"} if public_id
|
||||
segments << %Q{ "#{system_id}"} if system_id
|
||||
segments << " [#{inline_rules}]" if inline_rules
|
||||
|
||||
return segments + '>'
|
||||
end
|
||||
|
@ -70,6 +75,7 @@ module Oga
|
|||
#{spacing} type: #{type.inspect}
|
||||
#{spacing} public_id: #{public_id.inspect}
|
||||
#{spacing} system_id: #{system_id.inspect}
|
||||
#{spacing} inline_rules: #{inline_rules.inspect}
|
||||
#{spacing})
|
||||
EOF
|
||||
end
|
||||
|
|
|
@ -194,6 +194,15 @@ module Oga
|
|||
add_token(:T_DOCTYPE_END)
|
||||
end
|
||||
|
||||
##
|
||||
# Called on an inline doctype block.
|
||||
#
|
||||
# @param [String] value
|
||||
#
|
||||
def on_doctype_inline(value)
|
||||
add_token(:T_DOCTYPE_INLINE, value)
|
||||
end
|
||||
|
||||
##
|
||||
# Called on the start of a CDATA tag.
|
||||
#
|
||||
|
|
|
@ -11,6 +11,7 @@ class Oga::XML::Parser
|
|||
|
||||
token T_STRING T_TEXT
|
||||
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
|
||||
token T_DOCTYPE_INLINE
|
||||
token T_CDATA_START T_CDATA_END
|
||||
token T_COMMENT_START T_COMMENT_END
|
||||
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
|
||||
|
@ -45,25 +46,36 @@ rule
|
|||
# <!DOCTYPE html>
|
||||
: T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END
|
||||
{
|
||||
on_doctype(val[1])
|
||||
on_doctype(:name => val[1])
|
||||
}
|
||||
|
||||
# <!DOCTYPE html PUBLIC>
|
||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END
|
||||
{
|
||||
on_doctype(val[1], val[2])
|
||||
on_doctype(:name => val[1], :type => val[2])
|
||||
}
|
||||
|
||||
# <!DOCTYPE html PUBLIC "foo">
|
||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
|
||||
{
|
||||
on_doctype(val[1], val[2], val[3])
|
||||
on_doctype(:name => val[1], :type => val[2], :public_id => val[3])
|
||||
}
|
||||
|
||||
# <!DOCTYPE html PUBLIC "foo" "bar">
|
||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
|
||||
{
|
||||
on_doctype(val[1], val[2], val[3], val[4])
|
||||
on_doctype(
|
||||
:name => val[1],
|
||||
:type => val[2],
|
||||
:public_id => val[3],
|
||||
:system_id => val[4]
|
||||
)
|
||||
}
|
||||
|
||||
# <!DOCTYPE html [ ... ]>
|
||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_INLINE T_DOCTYPE_END
|
||||
{
|
||||
on_doctype(:name => val[1], :inline_rules => val[2])
|
||||
}
|
||||
;
|
||||
|
||||
|
@ -270,18 +282,10 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}:
|
|||
end
|
||||
|
||||
##
|
||||
# @param [String] name
|
||||
# @param [String] type
|
||||
# @param [String] public_id
|
||||
# @param [String] system_id
|
||||
# @param [Hash] options
|
||||
#
|
||||
def on_doctype(name, type = nil, public_id = nil, system_id = nil)
|
||||
return Doctype.new(
|
||||
:name => name,
|
||||
:type => type,
|
||||
:public_id => public_id,
|
||||
:system_id => system_id
|
||||
)
|
||||
def on_doctype(options = {})
|
||||
return Doctype.new(options)
|
||||
end
|
||||
|
||||
##
|
||||
|
|
|
@ -45,11 +45,24 @@ describe Oga::XML::Doctype do
|
|||
|
||||
instance.to_xml.should == '<!DOCTYPE html PUBLIC "foo" "bar">'
|
||||
end
|
||||
|
||||
example 'include the inline rules if present' do
|
||||
instance = described_class.new(
|
||||
:name => 'html',
|
||||
:inline_rules => '<!ELEMENT foo>'
|
||||
)
|
||||
|
||||
instance.to_xml.should == '<!DOCTYPE html [<!ELEMENT foo>]>'
|
||||
end
|
||||
end
|
||||
|
||||
context '#inspect' do
|
||||
before do
|
||||
@instance = described_class.new(:name => 'html', :type => 'PUBLIC')
|
||||
@instance = described_class.new(
|
||||
:name => 'html',
|
||||
:type => 'PUBLIC',
|
||||
:inline_rules => '<!ELEMENT foo>'
|
||||
)
|
||||
end
|
||||
|
||||
example 'pretty-print the node' do
|
||||
|
@ -59,6 +72,7 @@ Doctype(
|
|||
type: "PUBLIC"
|
||||
public_id: nil
|
||||
system_id: nil
|
||||
inline_rules: "<!ELEMENT foo>"
|
||||
)
|
||||
EOF
|
||||
end
|
||||
|
|
|
@ -99,6 +99,7 @@ Document(
|
|||
type: nil
|
||||
public_id: nil
|
||||
system_id: nil
|
||||
inline_rules: nil
|
||||
)
|
||||
xml_declaration: XmlDeclaration(
|
||||
version: "1.0"
|
||||
|
|
|
@ -31,5 +31,26 @@ describe Oga::XML::Lexer do
|
|||
[:T_DOCTYPE_END, nil, 1]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex an inline doctype' do
|
||||
lex('<!DOCTYPE html [<!ELEMENT foo>]>').should == [
|
||||
[:T_DOCTYPE_START, nil, 1],
|
||||
[:T_DOCTYPE_NAME, 'html', 1],
|
||||
[:T_DOCTYPE_INLINE, '<!ELEMENT foo>', 1],
|
||||
[:T_DOCTYPE_END, nil, 1]
|
||||
]
|
||||
end
|
||||
|
||||
# Technically not valid, put in place to make sure that the Ragel rules are
|
||||
# not too greedy.
|
||||
example 'lex an inline doftype followed by a system ID' do
|
||||
lex('<!DOCTYPE html [<!ELEMENT foo>] "foo">').should == [
|
||||
[:T_DOCTYPE_START, nil, 1],
|
||||
[:T_DOCTYPE_NAME, 'html', 1],
|
||||
[:T_DOCTYPE_INLINE, '<!ELEMENT foo>', 1],
|
||||
[:T_STRING, 'foo', 1],
|
||||
[:T_DOCTYPE_END, nil, 1]
|
||||
]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -80,4 +80,18 @@ describe Oga::XML::Parser do
|
|||
@document.doctype.system_id.should == 'bar'
|
||||
end
|
||||
end
|
||||
|
||||
context 'doctypes with inline rules' do
|
||||
before :all do
|
||||
@document = parse('<!DOCTYPE html [<!ELEMENT foo>]>')
|
||||
end
|
||||
|
||||
example 'return a Doctype instance' do
|
||||
@document.doctype.is_a?(Oga::XML::Doctype).should == true
|
||||
end
|
||||
|
||||
example 'set the inline doctype rules' do
|
||||
@document.doctype.inline_rules.should == '<!ELEMENT foo>'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue