Merge multiple comment tokens into a single one.
The tokens T_COMMENT_START, T_TEXT and T_COMMENT_END have been merged into a single token: T_COMMENT. This simplifies both the lexer and the parser.
This commit is contained in:
parent
c891dd88cb
commit
a4fb5c1299
|
@ -149,21 +149,6 @@
|
||||||
comment_start = '<!--';
|
comment_start = '<!--';
|
||||||
comment_end = '-->';
|
comment_end = '-->';
|
||||||
|
|
||||||
action start_comment {
|
|
||||||
callback_simple("on_comment_start");
|
|
||||||
fcall comment;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Machine used for processing the contents of a comment. Everything
|
|
||||||
# inside a comment is treated as plain text (similar to CDATA tags).
|
|
||||||
comment := |*
|
|
||||||
any* comment_end => {
|
|
||||||
callback("on_text", data, encoding, ts, te - 3);
|
|
||||||
callback_simple("on_comment_end");
|
|
||||||
fret;
|
|
||||||
};
|
|
||||||
*|;
|
|
||||||
|
|
||||||
# XML declaration tags
|
# XML declaration tags
|
||||||
#
|
#
|
||||||
# http://www.w3.org/TR/REC-xml/#sec-prolog-dtd
|
# http://www.w3.org/TR/REC-xml/#sec-prolog-dtd
|
||||||
|
@ -257,9 +242,12 @@
|
||||||
'<' => start_element;
|
'<' => start_element;
|
||||||
doctype_start => start_doctype;
|
doctype_start => start_doctype;
|
||||||
cdata_start => start_cdata;
|
cdata_start => start_cdata;
|
||||||
comment_start => start_comment;
|
|
||||||
xml_decl_start => start_xml_decl;
|
xml_decl_start => start_xml_decl;
|
||||||
|
|
||||||
|
comment_start any* comment_end => {
|
||||||
|
callback("on_comment", data, encoding, ts + 4, te - 3);
|
||||||
|
};
|
||||||
|
|
||||||
# Enter the body of the tag. If HTML mode is enabled and the current
|
# Enter the body of the tag. If HTML mode is enabled and the current
|
||||||
# element is a void element we'll close it and bail out.
|
# element is a void element we'll close it and bail out.
|
||||||
'>' => {
|
'>' => {
|
||||||
|
|
|
@ -217,17 +217,12 @@ module Oga
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Called on the start of a comment.
|
# Called on a comment.
|
||||||
#
|
#
|
||||||
def on_comment_start
|
# @param [String] value
|
||||||
add_token(:T_COMMENT_START)
|
|
||||||
end
|
|
||||||
|
|
||||||
##
|
|
||||||
# Called on the end of a comment.
|
|
||||||
#
|
#
|
||||||
def on_comment_end
|
def on_comment(value)
|
||||||
add_token(:T_COMMENT_END)
|
add_token(:T_COMMENT, value)
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
|
|
|
@ -12,8 +12,7 @@ class Oga::XML::Parser
|
||||||
token T_STRING T_TEXT
|
token T_STRING T_TEXT
|
||||||
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
|
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
|
||||||
token T_DOCTYPE_INLINE
|
token T_DOCTYPE_INLINE
|
||||||
token T_CDATA_START T_CDATA_END
|
token T_CDATA_START T_CDATA_END T_COMMENT
|
||||||
token T_COMMENT_START T_COMMENT_END
|
|
||||||
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
|
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
|
||||||
token T_XML_DECL_START T_XML_DECL_END
|
token T_XML_DECL_START T_XML_DECL_END
|
||||||
|
|
||||||
|
@ -92,11 +91,8 @@ rule
|
||||||
# Comments
|
# Comments
|
||||||
|
|
||||||
comment
|
comment
|
||||||
# <!---->
|
|
||||||
: T_COMMENT_START T_COMMENT_END { on_comment }
|
|
||||||
|
|
||||||
# <!-- foo -->
|
# <!-- foo -->
|
||||||
| T_COMMENT_START T_TEXT T_COMMENT_END { on_comment(val[1]) }
|
| T_COMMENT { on_comment(val[0]) }
|
||||||
;
|
;
|
||||||
|
|
||||||
# Elements
|
# Elements
|
||||||
|
|
|
@ -3,33 +3,20 @@ require 'spec_helper'
|
||||||
describe Oga::XML::Lexer do
|
describe Oga::XML::Lexer do
|
||||||
context 'comments' do
|
context 'comments' do
|
||||||
example 'lex a comment' do
|
example 'lex a comment' do
|
||||||
lex('<!-- foo -->').should == [
|
lex('<!-- foo -->').should == [[:T_COMMENT, ' foo ', 1]]
|
||||||
[:T_COMMENT_START, nil, 1],
|
|
||||||
[:T_TEXT, ' foo ', 1],
|
|
||||||
[:T_COMMENT_END, nil, 1]
|
|
||||||
]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex a comment containing --' do
|
example 'lex a comment containing --' do
|
||||||
lex('<!-- -- -->').should == [
|
lex('<!-- -- -->').should == [[:T_COMMENT, ' -- ', 1]]
|
||||||
[:T_COMMENT_START, nil, 1],
|
|
||||||
[:T_TEXT, ' -- ', 1],
|
|
||||||
[:T_COMMENT_END, nil, 1]
|
|
||||||
]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex a comment containing ->' do
|
example 'lex a comment containing ->' do
|
||||||
lex('<!-- -> -->').should == [
|
lex('<!-- -> -->').should == [[:T_COMMENT, ' -> ', 1]]
|
||||||
[:T_COMMENT_START, nil, 1],
|
|
||||||
[:T_TEXT, ' -> ', 1],
|
|
||||||
[:T_COMMENT_END, nil, 1]
|
|
||||||
]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex a comment followed by text' do
|
example 'lex a comment followed by text' do
|
||||||
lex('<!---->foo').should == [
|
lex('<!---->foo').should == [
|
||||||
[:T_COMMENT_START, nil, 1],
|
[:T_COMMENT, '', 1],
|
||||||
[:T_COMMENT_END, nil, 1],
|
|
||||||
[:T_TEXT, 'foo', 1]
|
[:T_TEXT, 'foo', 1]
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
@ -37,8 +24,7 @@ describe Oga::XML::Lexer do
|
||||||
example 'lex text followed by a comment' do
|
example 'lex text followed by a comment' do
|
||||||
lex('foo<!---->').should == [
|
lex('foo<!---->').should == [
|
||||||
[:T_TEXT, 'foo', 1],
|
[:T_TEXT, 'foo', 1],
|
||||||
[:T_COMMENT_START, nil, 1],
|
[:T_COMMENT, '', 1]
|
||||||
[:T_COMMENT_END, nil, 1]
|
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -47,8 +33,7 @@ describe Oga::XML::Lexer do
|
||||||
[:T_ELEM_START, nil, 1],
|
[:T_ELEM_START, nil, 1],
|
||||||
[:T_ELEM_NAME, 'p', 1],
|
[:T_ELEM_NAME, 'p', 1],
|
||||||
[:T_ELEM_END, nil, 1],
|
[:T_ELEM_END, nil, 1],
|
||||||
[:T_COMMENT_START, nil, 1],
|
[:T_COMMENT, '', 1]
|
||||||
[:T_COMMENT_END, nil, 1]
|
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue