Basic lexer setup/tests.
Too lazy to do this the right way. ᕕ(ᐛ)ᕗ
This commit is contained in:
parent
c668804912
commit
d32888f803
|
@ -68,19 +68,22 @@ module Oga
|
|||
# Use instance variables for `ts` and friends.
|
||||
access @;
|
||||
|
||||
any_escaped = /\\./;
|
||||
|
||||
newline = '\n';
|
||||
|
||||
newline = '\n';
|
||||
whitespace = [ \t];
|
||||
|
||||
s_quote = "'";
|
||||
d_quote = '"';
|
||||
any_escaped = /\\./;
|
||||
|
||||
s_string = s_quote ([^'\\] | any_escaped)* s_quote;
|
||||
d_string = d_quote ([^"\\] | any_escaped)* d_quote;
|
||||
smaller = '<';
|
||||
greater = '>';
|
||||
slash = '/';
|
||||
exclamation = '!';
|
||||
equals = '=';
|
||||
|
||||
string = s_string | d_string;
|
||||
s_quote = "'";
|
||||
d_quote = '"';
|
||||
|
||||
text = (any - s_quote - d_quote - equals - exclamation - slash -
|
||||
greater - smaller - whitespace - newline)+;
|
||||
|
||||
# Unicode characters, taken from whitequark's wonderful parser library.
|
||||
# (I honestly need to buy that dude a beer or 100). Basically this
|
||||
|
@ -89,9 +92,17 @@ module Oga
|
|||
unicode = any - ascii;
|
||||
|
||||
main := |*
|
||||
whitespace => { t(:T_SPACE) };
|
||||
newline => { t(:T_NEWLINE); advance_line };
|
||||
whitespace => { t(:T_SPACE) };
|
||||
newline => { t(:T_NEWLINE); advance_line };
|
||||
smaller => { t(:T_SMALLER) };
|
||||
greater => { t(:T_GREATER) };
|
||||
slash => { t(:T_SLASH) };
|
||||
d_quote => { t(:T_DQUOTE) };
|
||||
s_quote => { t(:T_SQUOTE) };
|
||||
exclamation => { t(:T_EXCLAMATION) };
|
||||
equals => { t(:T_EQUALS) };
|
||||
text => { t(:T_TEXT) };
|
||||
*|;
|
||||
}%%
|
||||
end # Lexer
|
||||
end # Gaia
|
||||
end # Oga
|
||||
|
|
|
@ -1,5 +1,57 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Oga::Lexer do
|
||||
context 'regular text' do
|
||||
example 'lex regular text' do
|
||||
lex('hello').should == [[:T_TEXT, 'hello', 1, 1]]
|
||||
end
|
||||
end
|
||||
|
||||
context 'whitespace' do
|
||||
example 'lex regular whitespace' do
|
||||
lex(' ').should == [[:T_SPACE, ' ', 1, 1]]
|
||||
end
|
||||
|
||||
example 'lex a newline' do
|
||||
lex("\n").should == [[:T_NEWLINE, "\n", 1, 1]]
|
||||
end
|
||||
|
||||
example 'advance column numbers for spaces' do
|
||||
lex(' ').should == [
|
||||
[:T_SPACE, ' ', 1, 1],
|
||||
[:T_SPACE, ' ', 1, 2]
|
||||
]
|
||||
end
|
||||
|
||||
example 'advance line numbers for newlines' do
|
||||
lex("\n ").should == [
|
||||
[:T_NEWLINE, "\n", 1, 1],
|
||||
[:T_SPACE, ' ', 2, 1]
|
||||
]
|
||||
end
|
||||
end
|
||||
|
||||
context 'tags' do
|
||||
example 'lex an opening tag' do
|
||||
lex('<p>').should == [
|
||||
[:T_SMALLER, '<', 1, 1],
|
||||
[:T_TEXT, 'p', 1, 2],
|
||||
[:T_GREATER, '>', 1, 3]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex an opening tag with an attribute' do
|
||||
lex('<p title="Foo">').should == [
|
||||
[:T_SMALLER, '<', 1, 1],
|
||||
[:T_TEXT, 'p', 1, 2],
|
||||
[:T_SPACE, ' ', 1, 3],
|
||||
[:T_TEXT, 'title', 1, 4],
|
||||
[:T_EQUALS, '=', 1, 9],
|
||||
[:T_DQUOTE, '"', 1, 10],
|
||||
[:T_TEXT, 'Foo', 1, 11],
|
||||
[:T_DQUOTE, '"', 1, 14],
|
||||
[:T_GREATER, '>', 1, 15]
|
||||
]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,5 +10,15 @@ module Oga
|
|||
def s(type, *children)
|
||||
return Oga::AST::Node.new(type, children)
|
||||
end
|
||||
|
||||
##
|
||||
# Lexes a string and returns the tokens.
|
||||
#
|
||||
# @param [String] input
|
||||
# @return [Array]
|
||||
#
|
||||
def lex(input)
|
||||
return Oga::Lexer.new.lex(input)
|
||||
end
|
||||
end # ParsingHelpers
|
||||
end # Oga
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
desc 'Generates auto-generated files'
|
||||
task :generate => [:lexer, :parser]
|
||||
task :generate => [:lexer]
|
||||
|
||||
desc 'Regenerates auto-generated files'
|
||||
task :regenerate => [:clean, :generate]
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
=begin
|
||||
rule '.rb' => '.y' do |task|
|
||||
Cliver.assert('racc', '~> 1.4')
|
||||
|
||||
|
@ -6,3 +7,4 @@ end
|
|||
|
||||
desc 'Generates the parser'
|
||||
task :parser => [PARSER_OUTPUT]
|
||||
=end
|
||||
|
|
Loading…
Reference in New Issue