Support for lexing/parsing inline doctypes.
This commit is contained in:
parent
a92023fe94
commit
19f04f98f7
|
@ -82,6 +82,12 @@
|
||||||
callback("on_doctype_type", data, encoding, ts, te);
|
callback("on_doctype_type", data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Consumes everything between the [ and ]. Due to the use of :> the ]
|
||||||
|
# is not consumed by any+.
|
||||||
|
'[' any+ :> ']' => {
|
||||||
|
callback("on_doctype_inline", data, encoding, ts + 1, te - 1);
|
||||||
|
};
|
||||||
|
|
||||||
# Lex the public/system IDs as regular strings.
|
# Lex the public/system IDs as regular strings.
|
||||||
dquote => { fcall string_dquote; };
|
dquote => { fcall string_dquote; };
|
||||||
squote => { fcall string_squote; };
|
squote => { fcall string_squote; };
|
||||||
|
|
|
@ -19,8 +19,12 @@ module Oga
|
||||||
# The system ID of the doctype.
|
# The system ID of the doctype.
|
||||||
# @return [String]
|
# @return [String]
|
||||||
#
|
#
|
||||||
|
# @!attribute [rw] inline_rules
|
||||||
|
# The inline doctype rules.
|
||||||
|
# @return [String]
|
||||||
|
#
|
||||||
class Doctype
|
class Doctype
|
||||||
attr_accessor :name, :type, :public_id, :system_id
|
attr_accessor :name, :type, :public_id, :system_id, :inline_rules
|
||||||
|
|
||||||
##
|
##
|
||||||
# @example
|
# @example
|
||||||
|
@ -50,6 +54,7 @@ module Oga
|
||||||
segments << " #{type}" if type
|
segments << " #{type}" if type
|
||||||
segments << %Q{ "#{public_id}"} if public_id
|
segments << %Q{ "#{public_id}"} if public_id
|
||||||
segments << %Q{ "#{system_id}"} if system_id
|
segments << %Q{ "#{system_id}"} if system_id
|
||||||
|
segments << " [#{inline_rules}]" if inline_rules
|
||||||
|
|
||||||
return segments + '>'
|
return segments + '>'
|
||||||
end
|
end
|
||||||
|
@ -70,6 +75,7 @@ module Oga
|
||||||
#{spacing} type: #{type.inspect}
|
#{spacing} type: #{type.inspect}
|
||||||
#{spacing} public_id: #{public_id.inspect}
|
#{spacing} public_id: #{public_id.inspect}
|
||||||
#{spacing} system_id: #{system_id.inspect}
|
#{spacing} system_id: #{system_id.inspect}
|
||||||
|
#{spacing} inline_rules: #{inline_rules.inspect}
|
||||||
#{spacing})
|
#{spacing})
|
||||||
EOF
|
EOF
|
||||||
end
|
end
|
||||||
|
|
|
@ -194,6 +194,15 @@ module Oga
|
||||||
add_token(:T_DOCTYPE_END)
|
add_token(:T_DOCTYPE_END)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# Called on an inline doctype block.
|
||||||
|
#
|
||||||
|
# @param [String] value
|
||||||
|
#
|
||||||
|
def on_doctype_inline(value)
|
||||||
|
add_token(:T_DOCTYPE_INLINE, value)
|
||||||
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Called on the start of a CDATA tag.
|
# Called on the start of a CDATA tag.
|
||||||
#
|
#
|
||||||
|
|
|
@ -11,6 +11,7 @@ class Oga::XML::Parser
|
||||||
|
|
||||||
token T_STRING T_TEXT
|
token T_STRING T_TEXT
|
||||||
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
|
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
|
||||||
|
token T_DOCTYPE_INLINE
|
||||||
token T_CDATA_START T_CDATA_END
|
token T_CDATA_START T_CDATA_END
|
||||||
token T_COMMENT_START T_COMMENT_END
|
token T_COMMENT_START T_COMMENT_END
|
||||||
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
|
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
|
||||||
|
@ -45,25 +46,36 @@ rule
|
||||||
# <!DOCTYPE html>
|
# <!DOCTYPE html>
|
||||||
: T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END
|
: T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END
|
||||||
{
|
{
|
||||||
on_doctype(val[1])
|
on_doctype(:name => val[1])
|
||||||
}
|
}
|
||||||
|
|
||||||
# <!DOCTYPE html PUBLIC>
|
# <!DOCTYPE html PUBLIC>
|
||||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END
|
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END
|
||||||
{
|
{
|
||||||
on_doctype(val[1], val[2])
|
on_doctype(:name => val[1], :type => val[2])
|
||||||
}
|
}
|
||||||
|
|
||||||
# <!DOCTYPE html PUBLIC "foo">
|
# <!DOCTYPE html PUBLIC "foo">
|
||||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
|
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
|
||||||
{
|
{
|
||||||
on_doctype(val[1], val[2], val[3])
|
on_doctype(:name => val[1], :type => val[2], :public_id => val[3])
|
||||||
}
|
}
|
||||||
|
|
||||||
# <!DOCTYPE html PUBLIC "foo" "bar">
|
# <!DOCTYPE html PUBLIC "foo" "bar">
|
||||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
|
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
|
||||||
{
|
{
|
||||||
on_doctype(val[1], val[2], val[3], val[4])
|
on_doctype(
|
||||||
|
:name => val[1],
|
||||||
|
:type => val[2],
|
||||||
|
:public_id => val[3],
|
||||||
|
:system_id => val[4]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# <!DOCTYPE html [ ... ]>
|
||||||
|
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_INLINE T_DOCTYPE_END
|
||||||
|
{
|
||||||
|
on_doctype(:name => val[1], :inline_rules => val[2])
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
@ -270,18 +282,10 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}:
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# @param [String] name
|
# @param [Hash] options
|
||||||
# @param [String] type
|
|
||||||
# @param [String] public_id
|
|
||||||
# @param [String] system_id
|
|
||||||
#
|
#
|
||||||
def on_doctype(name, type = nil, public_id = nil, system_id = nil)
|
def on_doctype(options = {})
|
||||||
return Doctype.new(
|
return Doctype.new(options)
|
||||||
:name => name,
|
|
||||||
:type => type,
|
|
||||||
:public_id => public_id,
|
|
||||||
:system_id => system_id
|
|
||||||
)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
|
|
|
@ -45,11 +45,24 @@ describe Oga::XML::Doctype do
|
||||||
|
|
||||||
instance.to_xml.should == '<!DOCTYPE html PUBLIC "foo" "bar">'
|
instance.to_xml.should == '<!DOCTYPE html PUBLIC "foo" "bar">'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
example 'include the inline rules if present' do
|
||||||
|
instance = described_class.new(
|
||||||
|
:name => 'html',
|
||||||
|
:inline_rules => '<!ELEMENT foo>'
|
||||||
|
)
|
||||||
|
|
||||||
|
instance.to_xml.should == '<!DOCTYPE html [<!ELEMENT foo>]>'
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context '#inspect' do
|
context '#inspect' do
|
||||||
before do
|
before do
|
||||||
@instance = described_class.new(:name => 'html', :type => 'PUBLIC')
|
@instance = described_class.new(
|
||||||
|
:name => 'html',
|
||||||
|
:type => 'PUBLIC',
|
||||||
|
:inline_rules => '<!ELEMENT foo>'
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'pretty-print the node' do
|
example 'pretty-print the node' do
|
||||||
|
@ -59,6 +72,7 @@ Doctype(
|
||||||
type: "PUBLIC"
|
type: "PUBLIC"
|
||||||
public_id: nil
|
public_id: nil
|
||||||
system_id: nil
|
system_id: nil
|
||||||
|
inline_rules: "<!ELEMENT foo>"
|
||||||
)
|
)
|
||||||
EOF
|
EOF
|
||||||
end
|
end
|
||||||
|
|
|
@ -99,6 +99,7 @@ Document(
|
||||||
type: nil
|
type: nil
|
||||||
public_id: nil
|
public_id: nil
|
||||||
system_id: nil
|
system_id: nil
|
||||||
|
inline_rules: nil
|
||||||
)
|
)
|
||||||
xml_declaration: XmlDeclaration(
|
xml_declaration: XmlDeclaration(
|
||||||
version: "1.0"
|
version: "1.0"
|
||||||
|
|
|
@ -31,5 +31,26 @@ describe Oga::XML::Lexer do
|
||||||
[:T_DOCTYPE_END, nil, 1]
|
[:T_DOCTYPE_END, nil, 1]
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
example 'lex an inline doctype' do
|
||||||
|
lex('<!DOCTYPE html [<!ELEMENT foo>]>').should == [
|
||||||
|
[:T_DOCTYPE_START, nil, 1],
|
||||||
|
[:T_DOCTYPE_NAME, 'html', 1],
|
||||||
|
[:T_DOCTYPE_INLINE, '<!ELEMENT foo>', 1],
|
||||||
|
[:T_DOCTYPE_END, nil, 1]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Technically not valid, put in place to make sure that the Ragel rules are
|
||||||
|
# not too greedy.
|
||||||
|
example 'lex an inline doftype followed by a system ID' do
|
||||||
|
lex('<!DOCTYPE html [<!ELEMENT foo>] "foo">').should == [
|
||||||
|
[:T_DOCTYPE_START, nil, 1],
|
||||||
|
[:T_DOCTYPE_NAME, 'html', 1],
|
||||||
|
[:T_DOCTYPE_INLINE, '<!ELEMENT foo>', 1],
|
||||||
|
[:T_STRING, 'foo', 1],
|
||||||
|
[:T_DOCTYPE_END, nil, 1]
|
||||||
|
]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -80,4 +80,18 @@ describe Oga::XML::Parser do
|
||||||
@document.doctype.system_id.should == 'bar'
|
@document.doctype.system_id.should == 'bar'
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'doctypes with inline rules' do
|
||||||
|
before :all do
|
||||||
|
@document = parse('<!DOCTYPE html [<!ELEMENT foo>]>')
|
||||||
|
end
|
||||||
|
|
||||||
|
example 'return a Doctype instance' do
|
||||||
|
@document.doctype.is_a?(Oga::XML::Doctype).should == true
|
||||||
|
end
|
||||||
|
|
||||||
|
example 'set the inline doctype rules' do
|
||||||
|
@document.doctype.inline_rules.should == '<!ELEMENT foo>'
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue