diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl index db3a73d..eaeef07 100644 --- a/lib/oga/lexer.rl +++ b/lib/oga/lexer.rl @@ -79,13 +79,14 @@ module Oga exclamation = '!'; equals = '='; colon = ':'; + dash = '-'; s_quote = "'"; d_quote = '"'; # FIXME: there really should be a better way of doing this. text = (any - s_quote - d_quote - equals - exclamation - slash - - greater - smaller - whitespace - newline - colon)+; + greater - smaller - whitespace - newline - colon - dash)+; # Unicode characters, taken from whitequark's wonderful parser library. # (I honestly need to buy that dude a beer or 100). Basically this @@ -101,6 +102,7 @@ module Oga slash => { t(:T_SLASH) }; d_quote => { t(:T_DQUOTE) }; s_quote => { t(:T_SQUOTE) }; + dash => { t(:T_DASH) }; colon => { t(:T_COLON) }; exclamation => { t(:T_EXCLAMATION) }; equals => { t(:T_EQUALS) }; diff --git a/spec/oga/lexer_spec.rb b/spec/oga/lexer_spec.rb index 2593c65..ce36481 100644 --- a/spec/oga/lexer_spec.rb +++ b/spec/oga/lexer_spec.rb @@ -72,7 +72,9 @@ describe Oga::Lexer do [:T_SMALLER, '<', 1, 1], [:T_TEXT, 'p', 1, 2], [:T_SPACE, ' ', 1, 3], - [:T_TEXT, 'foo-bar', 1, 4], + [:T_TEXT, 'foo', 1, 4], + [:T_DASH, '-', 1, 7], + [:T_TEXT, 'bar', 1, 8], [:T_EQUALS, '=', 1, 11], [:T_DQUOTE, '"', 1, 12], [:T_TEXT, 'baz', 1, 13], @@ -97,4 +99,21 @@ describe Oga::Lexer do ] end end + + context 'comments' do + example 'lex a comment' do + lex('').should == [ + [:T_SMALLER, '<', 1, 1], + [:T_EXCLAMATION, '!', 1, 2], + [:T_DASH, '-', 1, 3], + [:T_DASH, '-', 1, 4], + [:T_SPACE, ' ', 1, 5], + [:T_TEXT, 'foo', 1, 6], + [:T_SPACE, ' ', 1, 9], + [:T_DASH, '-', 1, 10], + [:T_DASH, '-', 1, 11], + [:T_GREATER, '>', 1, 12] + ] + end + end end