diff --git a/ext/c/lexer.rl b/ext/c/lexer.rl index cada319..6505a7e 100644 --- a/ext/c/lexer.rl +++ b/ext/c/lexer.rl @@ -77,11 +77,12 @@ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block) char *data_str_val = StringValuePtr(data_block); - const char *p = data_str_val; - const char *pe = data_str_val + strlen(data_str_val); - const char *eof = pe; - const char *ts = 0; - const char *te = 0; + const char *p = data_str_val; + const char *pe = data_str_val + strlen(data_str_val); + const char *eof = pe; + const char *ts = 0; + const char *te = 0; + const char *mark = 0; int act = NUM2INT(oga_ivar_get(self, "@act")); int cs = NUM2INT(oga_ivar_get(self, "@cs")); diff --git a/ext/java/org/liboga/xml/Lexer.rl b/ext/java/org/liboga/xml/Lexer.rl index 8300972..9449a34 100644 --- a/ext/java/org/liboga/xml/Lexer.rl +++ b/ext/java/org/liboga/xml/Lexer.rl @@ -90,11 +90,12 @@ public class Lexer extends RubyObject byte[] data = rb_str.getBytes(); - int ts = 0; - int te = 0; - int p = 0; - int pe = data.length; - int eof = data.length; + int ts = 0; + int te = 0; + int p = 0; + int mark = 0; + int pe = data.length; + int eof = data.length; %% write exec; diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index aa49c5b..c095a00 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -47,7 +47,27 @@ # such as `--` and `->`. Putting extra checks in for these sequences would # actually make the rules/actions more complex. # - comment = ''; + + comment_start = ''; + + action start_comment { + mark = ts + 4; + + fnext comment_body; + } + + comment_body := |* + comment_end => { + callback("on_comment", data, encoding, mark, te - 3); + + mark = 0; + + fnext main; + }; + + any; + *|; # CDATA # @@ -215,10 +235,7 @@ main := |* doctype_start => start_doctype; xml_decl_start => start_xml_decl; - - comment => { - callback("on_comment", data, encoding, ts + 4, te - 3); - }; + comment_start => start_comment; cdata => { callback("on_cdata", data, encoding, ts + 9, te - 3); diff --git a/spec/oga/xml/lexer/comments_spec.rb b/spec/oga/xml/lexer/comments_spec.rb index 0c3176e..34cb54b 100644 --- a/spec/oga/xml/lexer/comments_spec.rb +++ b/spec/oga/xml/lexer/comments_spec.rb @@ -36,5 +36,18 @@ describe Oga::XML::Lexer do [:T_COMMENT, '', 1] ] end + + example 'lex two comments following each other' do + lex('').should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'a', 1], + [:T_COMMENT, 'foo', 1], + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'b', 1], + [:T_COMMENT, 'bar', 1], + [:T_ELEM_END, nil, 1], + [:T_ELEM_END, nil, 1] + ] + end end end