Tighten up lexing of CSS predicates.
Operators can now only occur inside predicates and any whitespcae in these predicates is ignored.
This commit is contained in:
parent
625b9eeffd
commit
b40c0243ce
|
@ -138,16 +138,8 @@ module Oga
|
|||
|
||||
hash = '#' %{ add_token(:T_HASH) };
|
||||
dot = '.' %{ add_token(:T_DOT) };
|
||||
lbrack = '[' %{ add_token(:T_LBRACK) };
|
||||
rbrack = ']' %{ add_token(:T_RBRACK) };
|
||||
colon = ':' %{ add_token(:T_COLON) };
|
||||
lparen = '(';
|
||||
rparen = ')';
|
||||
pipe = '|';
|
||||
odd = 'odd';
|
||||
even = 'even';
|
||||
minus = '-';
|
||||
nth = 'n';
|
||||
comma = whitespace* ',' whitespace*;
|
||||
|
||||
action emit_pipe {
|
||||
|
@ -221,6 +213,13 @@ module Oga
|
|||
#
|
||||
# http://www.w3.org/TR/css3-selectors/#structural-pseudos
|
||||
|
||||
lparen = '(';
|
||||
rparen = ')';
|
||||
odd = 'odd';
|
||||
even = 'even';
|
||||
minus = '-';
|
||||
nth = 'n';
|
||||
|
||||
action emit_lparen {
|
||||
add_token(:T_LPAREN)
|
||||
|
||||
|
@ -248,8 +247,29 @@ module Oga
|
|||
rparen => emit_rparen;
|
||||
*|;
|
||||
|
||||
main := |*
|
||||
hash | dot | lbrack | rbrack | colon;
|
||||
# Predicates
|
||||
#
|
||||
# CSS predicates can be used to filter nodes based on the value of an
|
||||
# attribute.
|
||||
|
||||
lbrack = '[';
|
||||
rbrack = ']';
|
||||
|
||||
action emit_lbrack {
|
||||
add_token(:T_LBRACK)
|
||||
|
||||
fnext predicate;
|
||||
}
|
||||
|
||||
action emit_rbrack {
|
||||
add_token(:T_RBRACK)
|
||||
|
||||
fnext main;
|
||||
}
|
||||
|
||||
# Machine used for lexing the body of a CSS predicate.
|
||||
predicate := |*
|
||||
whitespace;
|
||||
|
||||
# Some of the operators have similar characters (e.g. the "="). As a
|
||||
# result we can't use rules like the following:
|
||||
|
@ -259,26 +279,30 @@ module Oga
|
|||
#
|
||||
# This would result in both machines being executed for the input
|
||||
# "*=". The syntax below ensures that only the first match is handled.
|
||||
|
||||
op_eq => { add_token(:T_EQ) };
|
||||
op_space_in => { add_token(:T_SPACE_IN) };
|
||||
op_starts_with => { add_token(:T_STARTS_WITH) };
|
||||
op_ends_with => { add_token(:T_ENDS_WITH) };
|
||||
op_in => { add_token(:T_IN) };
|
||||
op_hyphen_in => { add_token(:T_HYPHEN_IN) };
|
||||
identifier => emit_identifier;
|
||||
rbrack => emit_rbrack;
|
||||
string => emit_string;
|
||||
*|;
|
||||
|
||||
main := |*
|
||||
hash | dot | colon;
|
||||
|
||||
op_child => { add_token(:T_CHILD) };
|
||||
op_fol_direct => { add_token(:T_FOLLOWING_DIRECT) };
|
||||
op_fol => { add_token(:T_FOLLOWING) };
|
||||
|
||||
# The pipe character is also used in the |= operator so the action for
|
||||
# this is handled separately.
|
||||
lbrack => emit_lbrack;
|
||||
pipe => emit_pipe;
|
||||
comma => emit_comma;
|
||||
whitespace => emit_whitespace;
|
||||
lparen => emit_lparen;
|
||||
identifier => emit_identifier;
|
||||
integer => emit_integer;
|
||||
string => emit_string;
|
||||
|
||||
any;
|
||||
*|;
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Oga::CSS::Lexer do
|
||||
context 'integers' do
|
||||
example 'lex an integer' do
|
||||
lex_css('10').should == [[:T_INT, 10]]
|
||||
end
|
||||
end
|
||||
end
|
|
@ -3,35 +3,60 @@ require 'spec_helper'
|
|||
describe Oga::CSS::Lexer do
|
||||
context 'operators' do
|
||||
example 'lex the = operator' do
|
||||
lex_css('=').should == [[:T_EQ, nil]]
|
||||
lex_css('[=]').should == [
|
||||
[:T_LBRACK, nil],
|
||||
[:T_EQ, nil],
|
||||
[:T_RBRACK, nil]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex the ~= operator' do
|
||||
lex_css('~=').should == [[:T_SPACE_IN, nil]]
|
||||
lex_css('[~=]').should == [
|
||||
[:T_LBRACK, nil],
|
||||
[:T_SPACE_IN, nil],
|
||||
[:T_RBRACK, nil]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex the ^= operator' do
|
||||
lex_css('^=').should == [[:T_STARTS_WITH, nil]]
|
||||
lex_css('[^=]').should == [
|
||||
[:T_LBRACK, nil],
|
||||
[:T_STARTS_WITH, nil],
|
||||
[:T_RBRACK, nil]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex the $= operator' do
|
||||
lex_css('$=').should == [[:T_ENDS_WITH, nil]]
|
||||
lex_css('[$=]').should == [
|
||||
[:T_LBRACK, nil],
|
||||
[:T_ENDS_WITH, nil],
|
||||
[:T_RBRACK, nil],
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex the *= operator' do
|
||||
lex_css('*=').should == [[:T_IN, nil]]
|
||||
lex_css('[*=]').should == [
|
||||
[:T_LBRACK, nil],
|
||||
[:T_IN, nil],
|
||||
[:T_RBRACK, nil]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex an identifier followed by the *= operator' do
|
||||
lex_css('foo *=').should == [
|
||||
lex_css('[foo *=]').should == [
|
||||
[:T_LBRACK, nil],
|
||||
[:T_IDENT, 'foo'],
|
||||
[:T_SPACE, nil],
|
||||
[:T_IN, nil]
|
||||
[:T_IN, nil],
|
||||
[:T_RBRACK, nil]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex the |= operator' do
|
||||
lex_css('|=').should == [[:T_HYPHEN_IN, nil]]
|
||||
lex_css('[|=]').should == [
|
||||
[:T_LBRACK, nil],
|
||||
[:T_HYPHEN_IN, nil],
|
||||
[:T_RBRACK, nil]
|
||||
]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,11 +3,19 @@ require 'spec_helper'
|
|||
describe Oga::CSS::Lexer do
|
||||
context 'strings' do
|
||||
example 'lex a single quoted string' do
|
||||
lex_css("'foo'").should == [[:T_STRING, 'foo']]
|
||||
lex_css("['foo']").should == [
|
||||
[:T_LBRACK, nil],
|
||||
[:T_STRING, 'foo'],
|
||||
[:T_RBRACK, nil]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex a double quoted string' do
|
||||
lex_css('"foo"').should == [[:T_STRING, 'foo']]
|
||||
lex_css('["foo"]').should == [
|
||||
[:T_LBRACK, nil],
|
||||
[:T_STRING, 'foo'],
|
||||
[:T_RBRACK, nil]
|
||||
]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue