From 5755c325bdf18869c493b1c12cc4b71a3736074d Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 26 Feb 2014 19:54:11 +0100 Subject: [PATCH] Imported a half-assed lexer. --- lib/oga.rb | 3 ++ lib/oga/ast/node.rb | 9 ++++ lib/oga/lexer.rl | 97 ++++++++++++++++++++++++++++++++++++++++++ spec/oga/lexer_spec.rb | 5 +++ 4 files changed, 114 insertions(+) create mode 100644 lib/oga/ast/node.rb create mode 100644 lib/oga/lexer.rl create mode 100644 spec/oga/lexer_spec.rb diff --git a/lib/oga.rb b/lib/oga.rb index 132b7a1..f8be7b3 100644 --- a/lib/oga.rb +++ b/lib/oga.rb @@ -1 +1,4 @@ require 'ast' + +require_relative 'oga/ast/node' +require_relative 'oga/lexer' diff --git a/lib/oga/ast/node.rb b/lib/oga/ast/node.rb new file mode 100644 index 0000000..0accb9a --- /dev/null +++ b/lib/oga/ast/node.rb @@ -0,0 +1,9 @@ +module Oga + module AST + ## + # + class Node < ::AST::Node + + end # Node + end # AST +end # Oga diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl new file mode 100644 index 0000000..288c46c --- /dev/null +++ b/lib/oga/lexer.rl @@ -0,0 +1,97 @@ +%%machine lexer; # % + +module Oga + ## + # + class Lexer + %% write data; # % + + # Lazy way of forwarding instance method calls used internally by Ragel to + # their corresponding class methods. + private_methods.grep(/^_lexer_/).each do |name| + define_method(name) do + return self.class.send(name) + end + + private(name) + end + + def initialize + reset + end + + def reset + @line = 1 + @column = 1 + @data = nil + @ts = nil + @te = nil + @tokens = [] + end + + def lex(data) + @data = data + lexer_start = self.class.lexer_start + eof = data.length + + %% write init; + %% write exec; + + tokens = @tokens + + reset + + return tokens + end + + private + + def advance_line + @line += 1 + @column = 1 + end + + def advance_column(length = 1) + @column += length + end + + def t(type, start = @ts, stop = @te) + value = @data[start...stop] + token = [type, value, @line, @column] + + advance_column(value.length) + + @tokens << token + end + + %%{ + # Use instance variables for `ts` and friends. + access @; + + any_escaped = /\\./; + + newline = '\n'; + + whitespace = [ \t]; + + s_quote = "'"; + d_quote = '"'; + + s_string = s_quote ([^'\\] | any_escaped)* s_quote; + d_string = d_quote ([^"\\] | any_escaped)* d_quote; + + string = s_string | d_string; + + # Unicode characters, taken from whitequark's wonderful parser library. + # (I honestly need to buy that dude a beer or 100). Basically this + # takes all characters and removes ASCII ones from the list, thus + # leaving you with Unicode. + unicode = any - ascii; + + main := |* + whitespace => { t(:T_SPACE) }; + newline => { t(:T_NEWLINE); advance_line }; + *|; + }%% + end # Lexer +end # Gaia diff --git a/spec/oga/lexer_spec.rb b/spec/oga/lexer_spec.rb new file mode 100644 index 0000000..73b0a2e --- /dev/null +++ b/spec/oga/lexer_spec.rb @@ -0,0 +1,5 @@ +require 'spec_helper' + +describe Oga::Lexer do + +end