diff --git a/ext/c/lexer.rl b/ext/c/lexer.rl index 6505a7e..43a135c 100644 --- a/ext/c/lexer.rl +++ b/ext/c/lexer.rl @@ -22,6 +22,9 @@ on `ts` and `te`) so the macro ignores this argument. #define oga_ivar_set(owner, name, value) \ rb_ivar_set(owner, rb_intern(name), value) +#define advance_line(amount) \ + rb_funcall(self, rb_intern("advance_line"), 1, INT2NUM(amount)); + %%machine c_lexer; /** @@ -84,8 +87,9 @@ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block) const char *te = 0; const char *mark = 0; - int act = NUM2INT(oga_ivar_get(self, "@act")); - int cs = NUM2INT(oga_ivar_get(self, "@cs")); + int act = NUM2INT(oga_ivar_get(self, "@act")); + int cs = NUM2INT(oga_ivar_get(self, "@cs")); + int lines = 0; %% write exec; diff --git a/ext/java/org/liboga/xml/Lexer.rl b/ext/java/org/liboga/xml/Lexer.rl index 9449a34..a6d3327 100644 --- a/ext/java/org/liboga/xml/Lexer.rl +++ b/ext/java/org/liboga/xml/Lexer.rl @@ -90,12 +90,13 @@ public class Lexer extends RubyObject byte[] data = rb_str.getBytes(); - int ts = 0; - int te = 0; - int p = 0; - int mark = 0; - int pe = data.length; - int eof = data.length; + int ts = 0; + int te = 0; + int p = 0; + int mark = 0; + int lines = 0; + int pe = data.length; + int eof = data.length; %% write exec; @@ -141,6 +142,17 @@ public class Lexer extends RubyObject this.callMethod(context, name); } + + /** + * Advances the line number by `amount` lines. + */ + public void advance_line(int amount) + { + ThreadContext context = this.runtime.getCurrentContext(); + RubyFixnum lines = this.runtime.newFixnum(amount); + + this.callMethod(context, "advance_line", lines); + } } %%{ diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 8f16976..28dd337 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -35,7 +35,12 @@ # stack. # - newline = '\n' | '\r\n'; + newline = '\n' | '\r\n'; + + action count_newlines { + if ( fc == '\n' ) lines++; + } + whitespace = [ \t]; ident_char = [a-zA-Z0-9\-_]; identifier = ident_char+; @@ -289,14 +294,19 @@ # long. Because of this " { + terminate_text | allowed_text => { callback("on_text", data, encoding, ts, te); + if ( lines > 0 ) + { + advance_line(lines); + + lines = 0; + } + fnext main; }; @@ -307,12 +317,13 @@ p = mark - 1; mark = 0; - fnext main; - }; + if ( lines > 0 ) + { + advance_line(lines); + + lines = 0; + } - # Just regular text. - allowed_text => { - callback("on_text", data, encoding, ts, te); fnext main; }; *|; diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index 605e75a..be91028 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -348,13 +348,7 @@ module Oga # @param [String] value # def on_text(value) - unless value.empty? - add_token(:T_TEXT, value) - - lines = value.count("\n") - - advance_line(lines) if lines > 0 - end + add_token(:T_TEXT, value) unless value.empty? end ##