Imported a half-assed lexer.

This commit is contained in:
Yorick Peterse 2014-02-26 19:54:11 +01:00
parent 2dede8725b
commit 5755c325bd
4 changed files with 114 additions and 0 deletions

View File

@ -1 +1,4 @@
require 'ast'
require_relative 'oga/ast/node'
require_relative 'oga/lexer'

9
lib/oga/ast/node.rb Normal file
View File

@ -0,0 +1,9 @@
module Oga
module AST
##
#
class Node < ::AST::Node
end # Node
end # AST
end # Oga

97
lib/oga/lexer.rl Normal file
View File

@ -0,0 +1,97 @@
%%machine lexer; # %
module Oga
##
#
class Lexer
%% write data; # %
# Lazy way of forwarding instance method calls used internally by Ragel to
# their corresponding class methods.
private_methods.grep(/^_lexer_/).each do |name|
define_method(name) do
return self.class.send(name)
end
private(name)
end
def initialize
reset
end
def reset
@line = 1
@column = 1
@data = nil
@ts = nil
@te = nil
@tokens = []
end
def lex(data)
@data = data
lexer_start = self.class.lexer_start
eof = data.length
%% write init;
%% write exec;
tokens = @tokens
reset
return tokens
end
private
def advance_line
@line += 1
@column = 1
end
def advance_column(length = 1)
@column += length
end
def t(type, start = @ts, stop = @te)
value = @data[start...stop]
token = [type, value, @line, @column]
advance_column(value.length)
@tokens << token
end
%%{
# Use instance variables for `ts` and friends.
access @;
any_escaped = /\\./;
newline = '\n';
whitespace = [ \t];
s_quote = "'";
d_quote = '"';
s_string = s_quote ([^'\\] | any_escaped)* s_quote;
d_string = d_quote ([^"\\] | any_escaped)* d_quote;
string = s_string | d_string;
# Unicode characters, taken from whitequark's wonderful parser library.
# (I honestly need to buy that dude a beer or 100). Basically this
# takes all characters and removes ASCII ones from the list, thus
# leaving you with Unicode.
unicode = any - ascii;
main := |*
whitespace => { t(:T_SPACE) };
newline => { t(:T_NEWLINE); advance_line };
*|;
}%%
end # Lexer
end # Gaia

5
spec/oga/lexer_spec.rb Normal file
View File

@ -0,0 +1,5 @@
require 'spec_helper'
describe Oga::Lexer do
end