From 619c0bbc14285b797ffc6a2a865e17972e8314a9 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 7 Oct 2014 21:55:41 +0200 Subject: [PATCH] Emit tokens for whitespace in the CSS lexer. --- lib/oga/css/lexer.rl | 23 +++++++++++++++++++---- spec/oga/css/lexer/operators_spec.rb | 6 +++++- spec/oga/css/lexer/paths_spec.rb | 9 +++++++++ spec/oga/css/lexer/pseudo_classes_spec.rb | 10 ++++++++++ 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/lib/oga/css/lexer.rl b/lib/oga/css/lexer.rl index 65dfcad..e7f4353 100644 --- a/lib/oga/css/lexer.rl +++ b/lib/oga/css/lexer.rl @@ -132,7 +132,10 @@ module Oga whitespace = [\t ]+; - comma = ',' %{ add_token(:T_COMMA) }; + action emit_whitespace { + add_token(:T_SPACE) + } + hash = '#' %{ add_token(:T_HASH) }; dot = '.' %{ add_token(:T_DOT) }; lbrack = '[' %{ add_token(:T_LBRACK) }; @@ -145,6 +148,15 @@ module Oga even = 'even'; minus = '-'; nth = 'n'; + comma = whitespace* ',' whitespace*; + + action emit_pipe { + add_token(:T_PIPE) + } + + action emit_comma { + add_token(:T_COMMA) + } # Identifiers # @@ -226,6 +238,8 @@ module Oga # allowed elsewhere. For example, "2n" is not allowed to appear outside # of the arguments list. pseudo_args := |* + whitespace; + nth => { add_token(:T_NTH) }; minus => { add_token(:T_MINUS) }; odd => { add_token(:T_ODD) }; @@ -235,7 +249,7 @@ module Oga *|; main := |* - whitespace | comma | hash | dot | lbrack | rbrack | colon; + hash | dot | lbrack | rbrack | colon; # Some of the operators have similar characters (e.g. the "="). As a # result we can't use rules like the following: @@ -258,8 +272,9 @@ module Oga # The pipe character is also used in the |= operator so the action for # this is handled separately. - pipe => { add_token(:T_PIPE) }; - + pipe => emit_pipe; + comma => emit_comma; + whitespace => emit_whitespace; lparen => emit_lparen; identifier => emit_identifier; integer => emit_integer; diff --git a/spec/oga/css/lexer/operators_spec.rb b/spec/oga/css/lexer/operators_spec.rb index b197181..493aba6 100644 --- a/spec/oga/css/lexer/operators_spec.rb +++ b/spec/oga/css/lexer/operators_spec.rb @@ -23,7 +23,11 @@ describe Oga::CSS::Lexer do end example 'lex an identifier followed by the *= operator' do - lex_css('foo *=').should == [[:T_IDENT, 'foo'], [:T_IN, nil]] + lex_css('foo *=').should == [ + [:T_IDENT, 'foo'], + [:T_SPACE, nil], + [:T_IN, nil] + ] end example 'lex the |= operator' do diff --git a/spec/oga/css/lexer/paths_spec.rb b/spec/oga/css/lexer/paths_spec.rb index cab0636..8bcd30d 100644 --- a/spec/oga/css/lexer/paths_spec.rb +++ b/spec/oga/css/lexer/paths_spec.rb @@ -9,6 +9,15 @@ describe Oga::CSS::Lexer do example 'lex a path with two members' do lex_css('div h3').should == [ [:T_IDENT, 'div'], + [:T_SPACE, nil], + [:T_IDENT, 'h3'] + ] + end + + example 'lex a path with two members separated by multiple spaces' do + lex_css('div h3').should == [ + [:T_IDENT, 'div'], + [:T_SPACE, nil], [:T_IDENT, 'h3'] ] end diff --git a/spec/oga/css/lexer/pseudo_classes_spec.rb b/spec/oga/css/lexer/pseudo_classes_spec.rb index d05a088..772b976 100644 --- a/spec/oga/css/lexer/pseudo_classes_spec.rb +++ b/spec/oga/css/lexer/pseudo_classes_spec.rb @@ -19,6 +19,16 @@ describe Oga::CSS::Lexer do ] end + example 'lex the :nth-child pseudo class with extra whitespace' do + lex_css(':nth-child( 1)').should == [ + [:T_COLON, nil], + [:T_IDENT, 'nth-child'], + [:T_LPAREN, nil], + [:T_INT, 1], + [:T_RPAREN, nil] + ] + end + example 'lex the :nth-child(odd) pseudo class' do lex_css(':nth-child(odd)').should == [ [:T_COLON, nil],