From 274ab359bab208b319043cdb5e1d8eeedbc5c7fa Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Mon, 17 Mar 2014 21:26:21 +0100 Subject: [PATCH] Don't use separate tokens/nodes for newlines. Newlines are now lexed together with regular text. The line numbers are advanced based on the amount of "\n" sequences in a text buffer. --- lib/oga/lexer.rl | 37 ++++++++++++++-------------------- spec/oga/lexer/general_spec.rb | 10 +-------- 2 files changed, 16 insertions(+), 31 deletions(-) diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl index 61d229d..2a933af 100644 --- a/lib/oga/lexer.rl +++ b/lib/oga/lexer.rl @@ -81,8 +81,8 @@ module Oga private - def advance_line - @line += 1 + def advance_line(amount = 1) + @line += amount @column = 1 end @@ -113,9 +113,19 @@ module Oga add_token(:T_TEXT, @text_buffer) + lines = @text_buffer.count("\n") + + advance_line(lines) if lines > 0 + @text_buffer = '' end + def buffer_text_until_eof(eof) + @text_buffer << text + + emit_text_buffer if @te == eof + end + def emit_string_buffer add_token(:T_STRING, @string_buffer) advance_column @@ -134,11 +144,6 @@ module Oga newline = '\n' | '\r\n'; whitespace = [ \t]; - action emit_newline { - t(:T_TEXT) - advance_line - } - # String processing # # These actions/definitions can be used to process single and/or double @@ -300,16 +305,8 @@ module Oga element_start = '<' element_name; element_text := |* - newline => { - emit_text_buffer - t(:T_TEXT) - advance_line - }; - - ^('<' | newline) => { - @text_buffer << text - - emit_text_buffer if @te == eof + ^'<' => { + buffer_text_until_eof(eof) }; '<' => { @@ -391,8 +388,6 @@ module Oga *|; main := |* - newline @emit_text_buffer => emit_newline; - doctype_start @emit_text_buffer => { t(:T_DOCTYPE_START) fcall doctype; @@ -411,9 +406,7 @@ module Oga element_start @emit_text_buffer => open_element; any => { - @text_buffer << text - - emit_text_buffer if @te == eof + buffer_text_until_eof(eof) }; *|; }%% diff --git a/spec/oga/lexer/general_spec.rb b/spec/oga/lexer/general_spec.rb index ba9792e..a474058 100644 --- a/spec/oga/lexer/general_spec.rb +++ b/spec/oga/lexer/general_spec.rb @@ -14,17 +14,9 @@ describe Oga::Lexer do lex("\n").should == [[:T_TEXT, "\n", 1, 1]] end - example 'advance line numbers for newlines' do - lex("\n ").should == [ - [:T_TEXT, "\n", 1, 1], - [:T_TEXT, ' ', 2, 1] - ] - end - example 'lex text followed by a newline' do lex("foo\n").should == [ - [:T_TEXT, 'foo', 1, 1], - [:T_TEXT, "\n", 1, 4] + [:T_TEXT, "foo\n", 1, 1] ] end end