parent
92ae48f905
commit
d9ef33e1f8
|
@ -197,14 +197,34 @@ module Oga
|
||||||
cdata := |*
|
cdata := |*
|
||||||
cdata_end => {
|
cdata_end => {
|
||||||
emit_text_buffer
|
emit_text_buffer
|
||||||
|
|
||||||
t(:T_CDATA_END)
|
t(:T_CDATA_END)
|
||||||
|
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
|
||||||
# Consume everything else character by character and store it in a
|
any => buffer_text;
|
||||||
# separate buffer.
|
*|;
|
||||||
|
|
||||||
|
# Comments
|
||||||
|
#
|
||||||
|
# http://www.w3.org/TR/html-markup/syntax.html#comments
|
||||||
|
#
|
||||||
|
# Comments are lexed into 3 parts: the start tag, the content and the end
|
||||||
|
# tag.
|
||||||
|
#
|
||||||
|
# Unlike the W3 specification these rules *do* allow character sequences
|
||||||
|
# such as `--` and `->`. Putting extra checks in for these sequences
|
||||||
|
# would actually make the rules/actions more complex.
|
||||||
|
#
|
||||||
|
comment_start = '<!--';
|
||||||
|
comment_end = '-->';
|
||||||
|
|
||||||
|
comment := |*
|
||||||
|
comment_end => {
|
||||||
|
emit_text_buffer
|
||||||
|
t(:T_COMMENT_END)
|
||||||
|
fret;
|
||||||
|
};
|
||||||
|
|
||||||
any => buffer_text;
|
any => buffer_text;
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
|
@ -214,17 +234,19 @@ module Oga
|
||||||
|
|
||||||
doctype_start => {
|
doctype_start => {
|
||||||
t(:T_DOCTYPE_START)
|
t(:T_DOCTYPE_START)
|
||||||
|
|
||||||
fcall doctype;
|
fcall doctype;
|
||||||
};
|
};
|
||||||
|
|
||||||
# @cdata_buffer is used to store the content of the CDATA tag.
|
|
||||||
cdata_start => {
|
cdata_start => {
|
||||||
t(:T_CDATA_START)
|
t(:T_CDATA_START)
|
||||||
|
|
||||||
fcall cdata;
|
fcall cdata;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
comment_start => {
|
||||||
|
t(:T_COMMENT_START)
|
||||||
|
fcall comment;
|
||||||
|
};
|
||||||
|
|
||||||
# General rules and actions.
|
# General rules and actions.
|
||||||
'<' => { t(:T_SMALLER) };
|
'<' => { t(:T_SMALLER) };
|
||||||
'>' => { t(:T_GREATER) };
|
'>' => { t(:T_GREATER) };
|
||||||
|
|
|
@ -4,16 +4,25 @@ describe Oga::Lexer do
|
||||||
context 'comments' do
|
context 'comments' do
|
||||||
example 'lex a comment' do
|
example 'lex a comment' do
|
||||||
lex('<!-- foo -->').should == [
|
lex('<!-- foo -->').should == [
|
||||||
[:T_SMALLER, '<', 1, 1],
|
[:T_COMMENT_START, '<!--', 1, 1],
|
||||||
[:T_BANG, '!', 1, 2],
|
[:T_TEXT, ' foo ', 1, 5],
|
||||||
[:T_DASH, '-', 1, 3],
|
[:T_COMMENT_END, '-->', 1, 10]
|
||||||
[:T_DASH, '-', 1, 4],
|
]
|
||||||
[:T_SPACE, ' ', 1, 5],
|
end
|
||||||
[:T_TEXT, 'foo', 1, 6],
|
|
||||||
[:T_SPACE, ' ', 1, 9],
|
example 'lex a comment containing --' do
|
||||||
[:T_DASH, '-', 1, 10],
|
lex('<!-- -- -->').should == [
|
||||||
[:T_DASH, '-', 1, 11],
|
[:T_COMMENT_START, '<!--', 1, 1],
|
||||||
[:T_GREATER, '>', 1, 12]
|
[:T_TEXT, ' -- ', 1, 5],
|
||||||
|
[:T_COMMENT_END, '-->', 1, 9]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
example 'lex a comment containing ->' do
|
||||||
|
lex('<!-- -> -->').should == [
|
||||||
|
[:T_COMMENT_START, '<!--', 1, 1],
|
||||||
|
[:T_TEXT, ' -> ', 1, 5],
|
||||||
|
[:T_COMMENT_END, '-->', 1, 9]
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue