Lexing of CDATA tags.
This commit is contained in:
parent
c011e2faaa
commit
4883ac7384
|
@ -108,11 +108,50 @@ module Oga
|
|||
doctype = smaller whitespace* bang whitespace* 'DOCTYPE'i whitespace*
|
||||
'HTML'i whitespace* any* greater;
|
||||
|
||||
# CDATA
|
||||
#
|
||||
# http://www.w3.org/TR/html-markup/syntax.html#cdata-sections
|
||||
#
|
||||
# CDATA tags are broken up into 3 parts: the start, the content and the
|
||||
# end tag.
|
||||
#
|
||||
# In HTML CDATA tags have no meaning/are not supported. Oga does support
|
||||
# them but treats their contents as plain text.
|
||||
#
|
||||
cdata_start = smaller bang lbracket 'CDATA' lbracket;
|
||||
cdata_end = rbracket rbracket greater;
|
||||
|
||||
main := |*
|
||||
whitespace => { t(:T_SPACE) };
|
||||
newline => { t(:T_NEWLINE); advance_line };
|
||||
|
||||
doctype => { t(:T_DOCTYPE) };
|
||||
|
||||
# CDATA
|
||||
#
|
||||
# When processing CDATA patterns we'll emit tokens for the start tag,
|
||||
# the content and the end tag.
|
||||
#
|
||||
cdata_start
|
||||
%{
|
||||
@cdata_start = p
|
||||
t(:T_CDATA_START, @ts, p)
|
||||
}
|
||||
|
||||
# Consume everything except ], which is the start of the ending tag.
|
||||
(any - rbracket)+
|
||||
%{
|
||||
t(:T_TEXT, @cdata_start, p)
|
||||
|
||||
@cdata_start = nil
|
||||
}
|
||||
|
||||
cdata_end
|
||||
>{
|
||||
t(:T_CDATA_END, p, pe)
|
||||
};
|
||||
|
||||
# General rules and actions.
|
||||
smaller => { t(:T_SMALLER) };
|
||||
greater => { t(:T_GREATER) };
|
||||
slash => { t(:T_SLASH) };
|
||||
|
|
|
@ -4,15 +4,17 @@ describe Oga::Lexer do
|
|||
context 'cdata tags' do
|
||||
example 'lex a cdata tag' do
|
||||
lex('<![CDATA[foo]]>').should == [
|
||||
[:T_SMALLER, '<', 1, 1],
|
||||
[:T_BANG, '!', 1, 2],
|
||||
[:T_LBRACKET, '[', 1, 3],
|
||||
[:T_TEXT, 'CDATA', 1, 4],
|
||||
[:T_LBRACKET, '[', 1, 9],
|
||||
[:T_CDATA_START, '<![CDATA[', 1, 1],
|
||||
[:T_TEXT, 'foo', 1, 10],
|
||||
[:T_RBRACKET, ']', 1, 13],
|
||||
[:T_RBRACKET, ']', 1, 14],
|
||||
[:T_GREATER, '>', 1, 15],
|
||||
[:T_CDATA_END, ']]>', 1, 13]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex tags inside CDATA tags as regular text' do
|
||||
lex('<![CDATA[<p>Foo</p>]]>').should == [
|
||||
[:T_CDATA_START, '<![CDATA[', 1, 1],
|
||||
[:T_TEXT, '<p>Foo</p>', 1, 10],
|
||||
[:T_CDATA_END, ']]>', 1, 20]
|
||||
]
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue