Lexing of CDATA tags.

This commit is contained in:
Yorick Peterse 2014-02-26 22:01:07 +01:00
parent 0a336e76d3
commit c4e0406ed9
2 changed files with 29 additions and 8 deletions

View File

@ -73,20 +73,23 @@ module Oga
any_escaped = /\\./; any_escaped = /\\./;
smaller = '<'; smaller = '<';
greater = '>'; greater = '>';
slash = '/'; slash = '/';
bang = '!'; bang = '!';
equals = '='; equals = '=';
colon = ':'; colon = ':';
dash = '-'; dash = '-';
lbracket = '[';
rbracket = ']';
s_quote = "'"; s_quote = "'";
d_quote = '"'; d_quote = '"';
# FIXME: there really should be a better way of doing this. # FIXME: there really should be a better way of doing this.
text = (any - s_quote - d_quote - equals - bang - slash - text = (any - s_quote - d_quote - equals - bang - slash -
greater - smaller - whitespace - newline - colon - dash)+; greater - smaller - whitespace - newline - colon - dash -
lbracket - rbracket)+;
# Unicode characters, taken from whitequark's wonderful parser library. # Unicode characters, taken from whitequark's wonderful parser library.
# (I honestly need to buy that dude a beer or 100). Basically this # (I honestly need to buy that dude a beer or 100). Basically this
@ -103,6 +106,8 @@ module Oga
d_quote => { t(:T_DQUOTE) }; d_quote => { t(:T_DQUOTE) };
s_quote => { t(:T_SQUOTE) }; s_quote => { t(:T_SQUOTE) };
dash => { t(:T_DASH) }; dash => { t(:T_DASH) };
rbracket => { t(:T_RBRACKET) };
lbracket => { t(:T_LBRACKET) };
colon => { t(:T_COLON) }; colon => { t(:T_COLON) };
bang => { t(:T_BANG) }; bang => { t(:T_BANG) };
equals => { t(:T_EQUALS) }; equals => { t(:T_EQUALS) };

View File

@ -116,4 +116,20 @@ describe Oga::Lexer do
] ]
end end
end end
context 'cdata tags' do
example 'lex a cdata tag' do
lex('<![CDATA[foo]]>').should == [
[:T_SMALLER, '<', 1, 1],
[:T_BANG, '!', 1, 2],
[:T_LBRACKET, '[', 1, 3],
[:T_TEXT, 'CDATA', 1, 4],
[:T_LBRACKET, '[', 1, 9],
[:T_TEXT, 'foo', 1, 10],
[:T_RBRACKET, ']', 1, 13],
[:T_RBRACKET, ']', 1, 14],
[:T_GREATER, '>', 1, 15],
]
end
end
end end