Merge multiple comment tokens into a single one.

The tokens T_COMMENT_START, T_TEXT and T_COMMENT_END have been merged into a
single token: T_COMMENT. This simplifies both the lexer and the parser.
This commit is contained in:
Yorick Peterse 2014-05-19 09:30:30 +02:00
parent c891dd88cb
commit a4fb5c1299
4 changed files with 16 additions and 52 deletions

View File

@ -149,21 +149,6 @@
comment_start = '<!--'; comment_start = '<!--';
comment_end = '-->'; comment_end = '-->';
action start_comment {
callback_simple("on_comment_start");
fcall comment;
}
# Machine used for processing the contents of a comment. Everything
# inside a comment is treated as plain text (similar to CDATA tags).
comment := |*
any* comment_end => {
callback("on_text", data, encoding, ts, te - 3);
callback_simple("on_comment_end");
fret;
};
*|;
# XML declaration tags # XML declaration tags
# #
# http://www.w3.org/TR/REC-xml/#sec-prolog-dtd # http://www.w3.org/TR/REC-xml/#sec-prolog-dtd
@ -257,9 +242,12 @@
'<' => start_element; '<' => start_element;
doctype_start => start_doctype; doctype_start => start_doctype;
cdata_start => start_cdata; cdata_start => start_cdata;
comment_start => start_comment;
xml_decl_start => start_xml_decl; xml_decl_start => start_xml_decl;
comment_start any* comment_end => {
callback("on_comment", data, encoding, ts + 4, te - 3);
};
# Enter the body of the tag. If HTML mode is enabled and the current # Enter the body of the tag. If HTML mode is enabled and the current
# element is a void element we'll close it and bail out. # element is a void element we'll close it and bail out.
'>' => { '>' => {

View File

@ -217,17 +217,12 @@ module Oga
end end
## ##
# Called on the start of a comment. # Called on a comment.
# #
def on_comment_start # @param [String] value
add_token(:T_COMMENT_START)
end
##
# Called on the end of a comment.
# #
def on_comment_end def on_comment(value)
add_token(:T_COMMENT_END) add_token(:T_COMMENT, value)
end end
## ##

View File

@ -12,8 +12,7 @@ class Oga::XML::Parser
token T_STRING T_TEXT token T_STRING T_TEXT
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
token T_DOCTYPE_INLINE token T_DOCTYPE_INLINE
token T_CDATA_START T_CDATA_END token T_CDATA_START T_CDATA_END T_COMMENT
token T_COMMENT_START T_COMMENT_END
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
token T_XML_DECL_START T_XML_DECL_END token T_XML_DECL_START T_XML_DECL_END
@ -92,11 +91,8 @@ rule
# Comments # Comments
comment comment
# <!---->
: T_COMMENT_START T_COMMENT_END { on_comment }
# <!-- foo --> # <!-- foo -->
| T_COMMENT_START T_TEXT T_COMMENT_END { on_comment(val[1]) } | T_COMMENT { on_comment(val[0]) }
; ;
# Elements # Elements

View File

@ -3,33 +3,20 @@ require 'spec_helper'
describe Oga::XML::Lexer do describe Oga::XML::Lexer do
context 'comments' do context 'comments' do
example 'lex a comment' do example 'lex a comment' do
lex('<!-- foo -->').should == [ lex('<!-- foo -->').should == [[:T_COMMENT, ' foo ', 1]]
[:T_COMMENT_START, nil, 1],
[:T_TEXT, ' foo ', 1],
[:T_COMMENT_END, nil, 1]
]
end end
example 'lex a comment containing --' do example 'lex a comment containing --' do
lex('<!-- -- -->').should == [ lex('<!-- -- -->').should == [[:T_COMMENT, ' -- ', 1]]
[:T_COMMENT_START, nil, 1],
[:T_TEXT, ' -- ', 1],
[:T_COMMENT_END, nil, 1]
]
end end
example 'lex a comment containing ->' do example 'lex a comment containing ->' do
lex('<!-- -> -->').should == [ lex('<!-- -> -->').should == [[:T_COMMENT, ' -> ', 1]]
[:T_COMMENT_START, nil, 1],
[:T_TEXT, ' -> ', 1],
[:T_COMMENT_END, nil, 1]
]
end end
example 'lex a comment followed by text' do example 'lex a comment followed by text' do
lex('<!---->foo').should == [ lex('<!---->foo').should == [
[:T_COMMENT_START, nil, 1], [:T_COMMENT, '', 1],
[:T_COMMENT_END, nil, 1],
[:T_TEXT, 'foo', 1] [:T_TEXT, 'foo', 1]
] ]
end end
@ -37,8 +24,7 @@ describe Oga::XML::Lexer do
example 'lex text followed by a comment' do example 'lex text followed by a comment' do
lex('foo<!---->').should == [ lex('foo<!---->').should == [
[:T_TEXT, 'foo', 1], [:T_TEXT, 'foo', 1],
[:T_COMMENT_START, nil, 1], [:T_COMMENT, '', 1]
[:T_COMMENT_END, nil, 1]
] ]
end end
@ -47,8 +33,7 @@ describe Oga::XML::Lexer do
[:T_ELEM_START, nil, 1], [:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, 'p', 1], [:T_ELEM_NAME, 'p', 1],
[:T_ELEM_END, nil, 1], [:T_ELEM_END, nil, 1],
[:T_COMMENT_START, nil, 1], [:T_COMMENT, '', 1]
[:T_COMMENT_END, nil, 1]
] ]
end end
end end