parent
92ae48f905
commit
d9ef33e1f8
|
@ -197,14 +197,34 @@ module Oga
|
|||
cdata := |*
|
||||
cdata_end => {
|
||||
emit_text_buffer
|
||||
|
||||
t(:T_CDATA_END)
|
||||
|
||||
fret;
|
||||
};
|
||||
|
||||
# Consume everything else character by character and store it in a
|
||||
# separate buffer.
|
||||
any => buffer_text;
|
||||
*|;
|
||||
|
||||
# Comments
|
||||
#
|
||||
# http://www.w3.org/TR/html-markup/syntax.html#comments
|
||||
#
|
||||
# Comments are lexed into 3 parts: the start tag, the content and the end
|
||||
# tag.
|
||||
#
|
||||
# Unlike the W3 specification these rules *do* allow character sequences
|
||||
# such as `--` and `->`. Putting extra checks in for these sequences
|
||||
# would actually make the rules/actions more complex.
|
||||
#
|
||||
comment_start = '<!--';
|
||||
comment_end = '-->';
|
||||
|
||||
comment := |*
|
||||
comment_end => {
|
||||
emit_text_buffer
|
||||
t(:T_COMMENT_END)
|
||||
fret;
|
||||
};
|
||||
|
||||
any => buffer_text;
|
||||
*|;
|
||||
|
||||
|
@ -214,17 +234,19 @@ module Oga
|
|||
|
||||
doctype_start => {
|
||||
t(:T_DOCTYPE_START)
|
||||
|
||||
fcall doctype;
|
||||
};
|
||||
|
||||
# @cdata_buffer is used to store the content of the CDATA tag.
|
||||
cdata_start => {
|
||||
t(:T_CDATA_START)
|
||||
|
||||
fcall cdata;
|
||||
};
|
||||
|
||||
comment_start => {
|
||||
t(:T_COMMENT_START)
|
||||
fcall comment;
|
||||
};
|
||||
|
||||
# General rules and actions.
|
||||
'<' => { t(:T_SMALLER) };
|
||||
'>' => { t(:T_GREATER) };
|
||||
|
|
|
@ -4,16 +4,25 @@ describe Oga::Lexer do
|
|||
context 'comments' do
|
||||
example 'lex a comment' do
|
||||
lex('<!-- foo -->').should == [
|
||||
[:T_SMALLER, '<', 1, 1],
|
||||
[:T_BANG, '!', 1, 2],
|
||||
[:T_DASH, '-', 1, 3],
|
||||
[:T_DASH, '-', 1, 4],
|
||||
[:T_SPACE, ' ', 1, 5],
|
||||
[:T_TEXT, 'foo', 1, 6],
|
||||
[:T_SPACE, ' ', 1, 9],
|
||||
[:T_DASH, '-', 1, 10],
|
||||
[:T_DASH, '-', 1, 11],
|
||||
[:T_GREATER, '>', 1, 12]
|
||||
[:T_COMMENT_START, '<!--', 1, 1],
|
||||
[:T_TEXT, ' foo ', 1, 5],
|
||||
[:T_COMMENT_END, '-->', 1, 10]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex a comment containing --' do
|
||||
lex('<!-- -- -->').should == [
|
||||
[:T_COMMENT_START, '<!--', 1, 1],
|
||||
[:T_TEXT, ' -- ', 1, 5],
|
||||
[:T_COMMENT_END, '-->', 1, 9]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex a comment containing ->' do
|
||||
lex('<!-- -> -->').should == [
|
||||
[:T_COMMENT_START, '<!--', 1, 1],
|
||||
[:T_TEXT, ' -> ', 1, 5],
|
||||
[:T_COMMENT_END, '-->', 1, 9]
|
||||
]
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue