From e3de65a2589d889072620ba0c5c27bd2466d2288 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 21 Oct 2014 23:16:25 +0200 Subject: [PATCH] Lex whitespace preceding CSS axes separately. Previously input such as "x > y" would result in the following token sequences: T_IDENT, T_CHILD, T_IDENT This commit changes this to the following: T_IDENT, T_SPACE, T_CHILD, T_IDENT This allows the parser to use T_SPACE as a terminal token, this in turn prevents around 16 shift/reduce conflicts from arising. This does mean that input such as " > y" or " x > y" is now invalid. This however can be solved by simply _not_ adding leading/trailing whitespace to CSS queries. --- lib/oga/css/lexer.rl | 10 ++++++--- spec/oga/css/lexer/axes_spec.rb | 39 ++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/lib/oga/css/lexer.rl b/lib/oga/css/lexer.rl index 1878391..d28f6c5 100644 --- a/lib/oga/css/lexer.rl +++ b/lib/oga/css/lexer.rl @@ -175,9 +175,13 @@ module Oga op_ends_with = '$='; op_in = '*='; op_hyphen_in = '|='; - op_child = whitespace* '>' whitespace*; - op_fol_direct = whitespace* '+' whitespace*; - op_fol = whitespace* '~' whitespace*; + + # Whitespace preceding these tokens is _not_ matched to make the parser + # rules more consistent. As such input such as " > x" will result in + # tokens [T_SPACE, T_CHILD, T_IDENT]. + op_child = '>' whitespace*; + op_fol_direct = '+' whitespace*; + op_fol = '~' whitespace*; # Numbers # diff --git a/spec/oga/css/lexer/axes_spec.rb b/spec/oga/css/lexer/axes_spec.rb index 9043aff..2d137e9 100644 --- a/spec/oga/css/lexer/axes_spec.rb +++ b/spec/oga/css/lexer/axes_spec.rb @@ -6,24 +6,51 @@ describe Oga::CSS::Lexer do lex_css('>').should == [[:T_CHILD, nil]] end - example 'lex the > axis with surrounding whitespace' do - lex_css('>').should == [[:T_CHILD, nil]] + example 'lex the expression "> y"' do + lex_css('> y').should == [[:T_CHILD, nil], [:T_IDENT, 'y']] + end + + example 'lex the expression "x > y"' do + lex_css('x > y').should == [ + [:T_IDENT, 'x'], + [:T_SPACE, nil], + [:T_CHILD, nil], + [:T_IDENT, 'y'] + ] end example 'lex the + axis' do lex_css('+').should == [[:T_FOLLOWING_DIRECT, nil]] end - example 'lex the + axis with surrounding whitespace' do - lex_css(' + ').should == [[:T_FOLLOWING_DIRECT, nil]] + example 'lex the expression "+ y"' do + lex_css('+ y').should == [[:T_FOLLOWING_DIRECT, nil], [:T_IDENT, 'y']] + end + + example 'lex the expression "x + y"' do + lex_css('x + y').should == [ + [:T_IDENT, 'x'], + [:T_SPACE, nil], + [:T_FOLLOWING_DIRECT, nil], + [:T_IDENT, 'y'] + ] end example 'lex the ~ axis' do lex_css('~').should == [[:T_FOLLOWING, nil]] end - example 'lex the ~ axis with surrounding whitespace' do - lex_css(' ~ ').should == [[:T_FOLLOWING, nil]] + example 'lex the expression "~ y"' do + lex_css('~ y').should == [[:T_FOLLOWING, nil], [:T_IDENT, 'y']] + end + + example 'lex the expression "x ~ y"' do + lex_css('x ~ y').should == [ + [:T_IDENT, 'x'], + [:T_SPACE, nil], + [:T_FOLLOWING, nil], + [:T_IDENT, 'y'] + ] end end end