diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl index 288c46c..15cf7a4 100644 --- a/lib/oga/lexer.rl +++ b/lib/oga/lexer.rl @@ -68,19 +68,22 @@ module Oga # Use instance variables for `ts` and friends. access @; - any_escaped = /\\./; - - newline = '\n'; - + newline = '\n'; whitespace = [ \t]; - s_quote = "'"; - d_quote = '"'; + any_escaped = /\\./; - s_string = s_quote ([^'\\] | any_escaped)* s_quote; - d_string = d_quote ([^"\\] | any_escaped)* d_quote; + smaller = '<'; + greater = '>'; + slash = '/'; + exclamation = '!'; + equals = '='; - string = s_string | d_string; + s_quote = "'"; + d_quote = '"'; + + text = (any - s_quote - d_quote - equals - exclamation - slash - + greater - smaller - whitespace - newline)+; # Unicode characters, taken from whitequark's wonderful parser library. # (I honestly need to buy that dude a beer or 100). Basically this @@ -89,9 +92,17 @@ module Oga unicode = any - ascii; main := |* - whitespace => { t(:T_SPACE) }; - newline => { t(:T_NEWLINE); advance_line }; + whitespace => { t(:T_SPACE) }; + newline => { t(:T_NEWLINE); advance_line }; + smaller => { t(:T_SMALLER) }; + greater => { t(:T_GREATER) }; + slash => { t(:T_SLASH) }; + d_quote => { t(:T_DQUOTE) }; + s_quote => { t(:T_SQUOTE) }; + exclamation => { t(:T_EXCLAMATION) }; + equals => { t(:T_EQUALS) }; + text => { t(:T_TEXT) }; *|; }%% end # Lexer -end # Gaia +end # Oga diff --git a/spec/oga/lexer_spec.rb b/spec/oga/lexer_spec.rb index 73b0a2e..0c1620a 100644 --- a/spec/oga/lexer_spec.rb +++ b/spec/oga/lexer_spec.rb @@ -1,5 +1,57 @@ require 'spec_helper' describe Oga::Lexer do + context 'regular text' do + example 'lex regular text' do + lex('hello').should == [[:T_TEXT, 'hello', 1, 1]] + end + end + context 'whitespace' do + example 'lex regular whitespace' do + lex(' ').should == [[:T_SPACE, ' ', 1, 1]] + end + + example 'lex a newline' do + lex("\n").should == [[:T_NEWLINE, "\n", 1, 1]] + end + + example 'advance column numbers for spaces' do + lex(' ').should == [ + [:T_SPACE, ' ', 1, 1], + [:T_SPACE, ' ', 1, 2] + ] + end + + example 'advance line numbers for newlines' do + lex("\n ").should == [ + [:T_NEWLINE, "\n", 1, 1], + [:T_SPACE, ' ', 2, 1] + ] + end + end + + context 'tags' do + example 'lex an opening tag' do + lex('

').should == [ + [:T_SMALLER, '<', 1, 1], + [:T_TEXT, 'p', 1, 2], + [:T_GREATER, '>', 1, 3] + ] + end + + example 'lex an opening tag with an attribute' do + lex('

').should == [ + [:T_SMALLER, '<', 1, 1], + [:T_TEXT, 'p', 1, 2], + [:T_SPACE, ' ', 1, 3], + [:T_TEXT, 'title', 1, 4], + [:T_EQUALS, '=', 1, 9], + [:T_DQUOTE, '"', 1, 10], + [:T_TEXT, 'Foo', 1, 11], + [:T_DQUOTE, '"', 1, 14], + [:T_GREATER, '>', 1, 15] + ] + end + end end diff --git a/spec/support/parsing.rb b/spec/support/parsing.rb index 81db60a..5da78df 100644 --- a/spec/support/parsing.rb +++ b/spec/support/parsing.rb @@ -10,5 +10,15 @@ module Oga def s(type, *children) return Oga::AST::Node.new(type, children) end + + ## + # Lexes a string and returns the tokens. + # + # @param [String] input + # @return [Array] + # + def lex(input) + return Oga::Lexer.new.lex(input) + end end # ParsingHelpers end # Oga diff --git a/task/generate.rake b/task/generate.rake index ab6ede3..cb10232 100644 --- a/task/generate.rake +++ b/task/generate.rake @@ -1,5 +1,5 @@ desc 'Generates auto-generated files' -task :generate => [:lexer, :parser] +task :generate => [:lexer] desc 'Regenerates auto-generated files' task :regenerate => [:clean, :generate] diff --git a/task/parser.rake b/task/parser.rake index 8b5a1a4..18dfcc9 100644 --- a/task/parser.rake +++ b/task/parser.rake @@ -1,3 +1,4 @@ +=begin rule '.rb' => '.y' do |task| Cliver.assert('racc', '~> 1.4') @@ -6,3 +7,4 @@ end desc 'Generates the parser' task :parser => [PARSER_OUTPUT] +=end