Basic lexer setup/tests.

Too lazy to do this the right way. ᕕ(ᐛ)ᕗ
2014-02-26 21:36:30 +01:00 · 2014-02-26 21:36:30 +01:00 · d32888f803
parent c668804912
commit d32888f803
5 changed files with 88 additions and 13 deletions
--- a/lib/oga/lexer.rl
+++ b/lib/oga/lexer.rl
@ -68,19 +68,22 @@ module Oga
      # Use instance variables for `ts` and friends.
      access @;

-      any_escaped = /\\./;
-
-      newline = '\n';
-
+      newline    = '\n';
      whitespace = [ \t];

-      s_quote = "'";
-      d_quote = '"';
+      any_escaped = /\\./;

-      s_string = s_quote ([^'\\] | any_escaped)* s_quote;
-      d_string = d_quote ([^"\\] | any_escaped)* d_quote;
+      smaller     = '<';
+      greater     = '>';
+      slash       = '/';
+      exclamation = '!';
+      equals      = '=';

-      string = s_string | d_string;
+      s_quote  = "'";
+      d_quote  = '"';
+
+      text = (any - s_quote - d_quote - equals - exclamation - slash -
+        greater - smaller - whitespace - newline)+;

      # Unicode characters, taken from whitequark's wonderful parser library.
      # (I honestly need to buy that dude a beer or 100). Basically this
@ -89,9 +92,17 @@ module Oga
      unicode = any - ascii;

      main := |*
-        whitespace => { t(:T_SPACE) };
-        newline    => { t(:T_NEWLINE); advance_line };
+        whitespace  => { t(:T_SPACE) };
+        newline     => { t(:T_NEWLINE); advance_line };
+        smaller     => { t(:T_SMALLER) };
+        greater     => { t(:T_GREATER) };
+        slash       => { t(:T_SLASH) };
+        d_quote     => { t(:T_DQUOTE) };
+        s_quote     => { t(:T_SQUOTE) };
+        exclamation => { t(:T_EXCLAMATION) };
+        equals      => { t(:T_EQUALS) };
+        text        => { t(:T_TEXT) };
      *|;
    }%%
  end # Lexer
-end # Gaia
+end # Oga
--- a/spec/oga/lexer_spec.rb
+++ b/spec/oga/lexer_spec.rb
@ -1,5 +1,57 @@
 require 'spec_helper'

 describe Oga::Lexer do
+  context 'regular text' do
+    example 'lex regular text' do
+      lex('hello').should == [[:T_TEXT, 'hello', 1, 1]]
+    end
+  end

+  context 'whitespace' do
+    example 'lex regular whitespace' do
+      lex(' ').should == [[:T_SPACE, ' ', 1, 1]]
+    end
+
+    example 'lex a newline' do
+      lex("\n").should == [[:T_NEWLINE, "\n", 1, 1]]
+    end
+
+    example 'advance column numbers for spaces' do
+      lex('  ').should == [
+        [:T_SPACE, ' ', 1, 1],
+        [:T_SPACE, ' ', 1, 2]
+      ]
+    end
+
+    example 'advance line numbers for newlines' do
+      lex("\n ").should == [
+        [:T_NEWLINE, "\n", 1, 1],
+        [:T_SPACE, ' ', 2, 1]
+      ]
+    end
+  end
+
+  context 'tags' do
+    example 'lex an opening tag' do
+      lex('<p>').should == [
+        [:T_SMALLER, '<', 1, 1],
+        [:T_TEXT, 'p', 1, 2],
+        [:T_GREATER, '>', 1, 3]
+      ]
+    end
+
+    example 'lex an opening tag with an attribute' do
+      lex('<p title="Foo">').should == [
+        [:T_SMALLER, '<', 1, 1],
+        [:T_TEXT, 'p', 1, 2],
+        [:T_SPACE, ' ', 1, 3],
+        [:T_TEXT, 'title', 1, 4],
+        [:T_EQUALS, '=', 1, 9],
+        [:T_DQUOTE, '"', 1, 10],
+        [:T_TEXT, 'Foo', 1, 11],
+        [:T_DQUOTE, '"', 1, 14],
+        [:T_GREATER, '>', 1, 15]
+      ]
+    end
+  end
 end
--- a/spec/support/parsing.rb
+++ b/spec/support/parsing.rb
@ -10,5 +10,15 @@ module Oga
    def s(type, *children)
      return Oga::AST::Node.new(type, children)
    end
+
+    ##
+    # Lexes a string and returns the tokens.
+    #
+    # @param [String] input
+    # @return [Array]
+    #
+    def lex(input)
+      return Oga::Lexer.new.lex(input)
+    end
  end # ParsingHelpers
 end # Oga
--- a/task/generate.rake
+++ b/task/generate.rake
@ -1,5 +1,5 @@
 desc 'Generates auto-generated files'
-task :generate => [:lexer, :parser]
+task :generate => [:lexer]

 desc 'Regenerates auto-generated files'
 task :regenerate => [:clean, :generate]
--- a/task/parser.rake
+++ b/task/parser.rake
@ -1,3 +1,4 @@
+=begin
 rule '.rb' => '.y' do |task|
  Cliver.assert('racc', '~> 1.4')

@ -6,3 +7,4 @@ end

 desc 'Generates the parser'
 task :parser => [PARSER_OUTPUT]
+=end