From e9bb97d261a16a26c6406f0d5536e04d6d531557 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 9 Apr 2014 19:32:06 +0200 Subject: [PATCH] First steps towards making the lexer stream tokens --- lib/oga/xml/lexer.rl | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/lib/oga/xml/lexer.rl b/lib/oga/xml/lexer.rl index ad5e94a..75667de 100644 --- a/lib/oga/xml/lexer.rl +++ b/lib/oga/xml/lexer.rl @@ -80,6 +80,8 @@ module Oga @tokens = [] @stack = [] @top = 0 + @cs = self.class.lexer_start + @act = 0 @elements = [] @buffer_start_position = nil @@ -93,24 +95,41 @@ module Oga # # The type is a symbol, the value is either nil or a String. # - # @param [String] data The string to lex. + # This method resets the internal state of the lexer after consuming the + # input. + # + # @param [String] data The string to consume. # @return [Array] + # @see #advance # def lex(data) - @data = data.unpack('U*') - lexer_start = self.class.lexer_start - eof = data.length - - %% write init; - %% write exec; - - tokens = @tokens + tokens = advance(data) reset return tokens end + ## + # Advances through the input and generates the corresponding tokens. + # + # This method does *not* reset the internal state of the lexer. + # + # @param [String] data The String to consume. + # @return [Array] + # + def advance(data) + @data = data.unpack('U*') + eof = data.length + + p = 0 + pe = eof + + %% write exec; # % fix highlight + + return @tokens + end + ## # @return [TrueClass|FalseClass] #