Basic lexer setup/tests.

Too lazy to do this the right way. ᕕ(ᐛ)ᕗ
This commit is contained in:
Yorick Peterse 2014-02-26 21:36:30 +01:00
parent c668804912
commit d32888f803
5 changed files with 88 additions and 13 deletions

View File

@ -68,19 +68,22 @@ module Oga
# Use instance variables for `ts` and friends. # Use instance variables for `ts` and friends.
access @; access @;
any_escaped = /\\./; newline = '\n';
newline = '\n';
whitespace = [ \t]; whitespace = [ \t];
s_quote = "'"; any_escaped = /\\./;
d_quote = '"';
s_string = s_quote ([^'\\] | any_escaped)* s_quote; smaller = '<';
d_string = d_quote ([^"\\] | any_escaped)* d_quote; greater = '>';
slash = '/';
exclamation = '!';
equals = '=';
string = s_string | d_string; s_quote = "'";
d_quote = '"';
text = (any - s_quote - d_quote - equals - exclamation - slash -
greater - smaller - whitespace - newline)+;
# Unicode characters, taken from whitequark's wonderful parser library. # Unicode characters, taken from whitequark's wonderful parser library.
# (I honestly need to buy that dude a beer or 100). Basically this # (I honestly need to buy that dude a beer or 100). Basically this
@ -89,9 +92,17 @@ module Oga
unicode = any - ascii; unicode = any - ascii;
main := |* main := |*
whitespace => { t(:T_SPACE) }; whitespace => { t(:T_SPACE) };
newline => { t(:T_NEWLINE); advance_line }; newline => { t(:T_NEWLINE); advance_line };
smaller => { t(:T_SMALLER) };
greater => { t(:T_GREATER) };
slash => { t(:T_SLASH) };
d_quote => { t(:T_DQUOTE) };
s_quote => { t(:T_SQUOTE) };
exclamation => { t(:T_EXCLAMATION) };
equals => { t(:T_EQUALS) };
text => { t(:T_TEXT) };
*|; *|;
}%% }%%
end # Lexer end # Lexer
end # Gaia end # Oga

View File

@ -1,5 +1,57 @@
require 'spec_helper' require 'spec_helper'
describe Oga::Lexer do describe Oga::Lexer do
context 'regular text' do
example 'lex regular text' do
lex('hello').should == [[:T_TEXT, 'hello', 1, 1]]
end
end
context 'whitespace' do
example 'lex regular whitespace' do
lex(' ').should == [[:T_SPACE, ' ', 1, 1]]
end
example 'lex a newline' do
lex("\n").should == [[:T_NEWLINE, "\n", 1, 1]]
end
example 'advance column numbers for spaces' do
lex(' ').should == [
[:T_SPACE, ' ', 1, 1],
[:T_SPACE, ' ', 1, 2]
]
end
example 'advance line numbers for newlines' do
lex("\n ").should == [
[:T_NEWLINE, "\n", 1, 1],
[:T_SPACE, ' ', 2, 1]
]
end
end
context 'tags' do
example 'lex an opening tag' do
lex('<p>').should == [
[:T_SMALLER, '<', 1, 1],
[:T_TEXT, 'p', 1, 2],
[:T_GREATER, '>', 1, 3]
]
end
example 'lex an opening tag with an attribute' do
lex('<p title="Foo">').should == [
[:T_SMALLER, '<', 1, 1],
[:T_TEXT, 'p', 1, 2],
[:T_SPACE, ' ', 1, 3],
[:T_TEXT, 'title', 1, 4],
[:T_EQUALS, '=', 1, 9],
[:T_DQUOTE, '"', 1, 10],
[:T_TEXT, 'Foo', 1, 11],
[:T_DQUOTE, '"', 1, 14],
[:T_GREATER, '>', 1, 15]
]
end
end
end end

View File

@ -10,5 +10,15 @@ module Oga
def s(type, *children) def s(type, *children)
return Oga::AST::Node.new(type, children) return Oga::AST::Node.new(type, children)
end end
##
# Lexes a string and returns the tokens.
#
# @param [String] input
# @return [Array]
#
def lex(input)
return Oga::Lexer.new.lex(input)
end
end # ParsingHelpers end # ParsingHelpers
end # Oga end # Oga

View File

@ -1,5 +1,5 @@
desc 'Generates auto-generated files' desc 'Generates auto-generated files'
task :generate => [:lexer, :parser] task :generate => [:lexer]
desc 'Regenerates auto-generated files' desc 'Regenerates auto-generated files'
task :regenerate => [:clean, :generate] task :regenerate => [:clean, :generate]

View File

@ -1,3 +1,4 @@
=begin
rule '.rb' => '.y' do |task| rule '.rb' => '.y' do |task|
Cliver.assert('racc', '~> 1.4') Cliver.assert('racc', '~> 1.4')
@ -6,3 +7,4 @@ end
desc 'Generates the parser' desc 'Generates the parser'
task :parser => [PARSER_OUTPUT] task :parser => [PARSER_OUTPUT]
=end