Tighten up lexing of CSS predicates.
Operators can now only occur inside predicates and any whitespcae in these predicates is ignored.
This commit is contained in:
parent
625b9eeffd
commit
b40c0243ce
|
@ -138,16 +138,8 @@ module Oga
|
||||||
|
|
||||||
hash = '#' %{ add_token(:T_HASH) };
|
hash = '#' %{ add_token(:T_HASH) };
|
||||||
dot = '.' %{ add_token(:T_DOT) };
|
dot = '.' %{ add_token(:T_DOT) };
|
||||||
lbrack = '[' %{ add_token(:T_LBRACK) };
|
|
||||||
rbrack = ']' %{ add_token(:T_RBRACK) };
|
|
||||||
colon = ':' %{ add_token(:T_COLON) };
|
colon = ':' %{ add_token(:T_COLON) };
|
||||||
lparen = '(';
|
|
||||||
rparen = ')';
|
|
||||||
pipe = '|';
|
pipe = '|';
|
||||||
odd = 'odd';
|
|
||||||
even = 'even';
|
|
||||||
minus = '-';
|
|
||||||
nth = 'n';
|
|
||||||
comma = whitespace* ',' whitespace*;
|
comma = whitespace* ',' whitespace*;
|
||||||
|
|
||||||
action emit_pipe {
|
action emit_pipe {
|
||||||
|
@ -221,6 +213,13 @@ module Oga
|
||||||
#
|
#
|
||||||
# http://www.w3.org/TR/css3-selectors/#structural-pseudos
|
# http://www.w3.org/TR/css3-selectors/#structural-pseudos
|
||||||
|
|
||||||
|
lparen = '(';
|
||||||
|
rparen = ')';
|
||||||
|
odd = 'odd';
|
||||||
|
even = 'even';
|
||||||
|
minus = '-';
|
||||||
|
nth = 'n';
|
||||||
|
|
||||||
action emit_lparen {
|
action emit_lparen {
|
||||||
add_token(:T_LPAREN)
|
add_token(:T_LPAREN)
|
||||||
|
|
||||||
|
@ -248,8 +247,29 @@ module Oga
|
||||||
rparen => emit_rparen;
|
rparen => emit_rparen;
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
main := |*
|
# Predicates
|
||||||
hash | dot | lbrack | rbrack | colon;
|
#
|
||||||
|
# CSS predicates can be used to filter nodes based on the value of an
|
||||||
|
# attribute.
|
||||||
|
|
||||||
|
lbrack = '[';
|
||||||
|
rbrack = ']';
|
||||||
|
|
||||||
|
action emit_lbrack {
|
||||||
|
add_token(:T_LBRACK)
|
||||||
|
|
||||||
|
fnext predicate;
|
||||||
|
}
|
||||||
|
|
||||||
|
action emit_rbrack {
|
||||||
|
add_token(:T_RBRACK)
|
||||||
|
|
||||||
|
fnext main;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Machine used for lexing the body of a CSS predicate.
|
||||||
|
predicate := |*
|
||||||
|
whitespace;
|
||||||
|
|
||||||
# Some of the operators have similar characters (e.g. the "="). As a
|
# Some of the operators have similar characters (e.g. the "="). As a
|
||||||
# result we can't use rules like the following:
|
# result we can't use rules like the following:
|
||||||
|
@ -259,26 +279,30 @@ module Oga
|
||||||
#
|
#
|
||||||
# This would result in both machines being executed for the input
|
# This would result in both machines being executed for the input
|
||||||
# "*=". The syntax below ensures that only the first match is handled.
|
# "*=". The syntax below ensures that only the first match is handled.
|
||||||
|
|
||||||
op_eq => { add_token(:T_EQ) };
|
op_eq => { add_token(:T_EQ) };
|
||||||
op_space_in => { add_token(:T_SPACE_IN) };
|
op_space_in => { add_token(:T_SPACE_IN) };
|
||||||
op_starts_with => { add_token(:T_STARTS_WITH) };
|
op_starts_with => { add_token(:T_STARTS_WITH) };
|
||||||
op_ends_with => { add_token(:T_ENDS_WITH) };
|
op_ends_with => { add_token(:T_ENDS_WITH) };
|
||||||
op_in => { add_token(:T_IN) };
|
op_in => { add_token(:T_IN) };
|
||||||
op_hyphen_in => { add_token(:T_HYPHEN_IN) };
|
op_hyphen_in => { add_token(:T_HYPHEN_IN) };
|
||||||
|
identifier => emit_identifier;
|
||||||
|
rbrack => emit_rbrack;
|
||||||
|
string => emit_string;
|
||||||
|
*|;
|
||||||
|
|
||||||
|
main := |*
|
||||||
|
hash | dot | colon;
|
||||||
|
|
||||||
op_child => { add_token(:T_CHILD) };
|
op_child => { add_token(:T_CHILD) };
|
||||||
op_fol_direct => { add_token(:T_FOLLOWING_DIRECT) };
|
op_fol_direct => { add_token(:T_FOLLOWING_DIRECT) };
|
||||||
op_fol => { add_token(:T_FOLLOWING) };
|
op_fol => { add_token(:T_FOLLOWING) };
|
||||||
|
|
||||||
# The pipe character is also used in the |= operator so the action for
|
lbrack => emit_lbrack;
|
||||||
# this is handled separately.
|
|
||||||
pipe => emit_pipe;
|
pipe => emit_pipe;
|
||||||
comma => emit_comma;
|
comma => emit_comma;
|
||||||
whitespace => emit_whitespace;
|
whitespace => emit_whitespace;
|
||||||
lparen => emit_lparen;
|
lparen => emit_lparen;
|
||||||
identifier => emit_identifier;
|
identifier => emit_identifier;
|
||||||
integer => emit_integer;
|
|
||||||
string => emit_string;
|
|
||||||
|
|
||||||
any;
|
any;
|
||||||
*|;
|
*|;
|
||||||
|
|
|
@ -1,9 +0,0 @@
|
||||||
require 'spec_helper'
|
|
||||||
|
|
||||||
describe Oga::CSS::Lexer do
|
|
||||||
context 'integers' do
|
|
||||||
example 'lex an integer' do
|
|
||||||
lex_css('10').should == [[:T_INT, 10]]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -3,35 +3,60 @@ require 'spec_helper'
|
||||||
describe Oga::CSS::Lexer do
|
describe Oga::CSS::Lexer do
|
||||||
context 'operators' do
|
context 'operators' do
|
||||||
example 'lex the = operator' do
|
example 'lex the = operator' do
|
||||||
lex_css('=').should == [[:T_EQ, nil]]
|
lex_css('[=]').should == [
|
||||||
|
[:T_LBRACK, nil],
|
||||||
|
[:T_EQ, nil],
|
||||||
|
[:T_RBRACK, nil]
|
||||||
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex the ~= operator' do
|
example 'lex the ~= operator' do
|
||||||
lex_css('~=').should == [[:T_SPACE_IN, nil]]
|
lex_css('[~=]').should == [
|
||||||
|
[:T_LBRACK, nil],
|
||||||
|
[:T_SPACE_IN, nil],
|
||||||
|
[:T_RBRACK, nil]
|
||||||
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex the ^= operator' do
|
example 'lex the ^= operator' do
|
||||||
lex_css('^=').should == [[:T_STARTS_WITH, nil]]
|
lex_css('[^=]').should == [
|
||||||
|
[:T_LBRACK, nil],
|
||||||
|
[:T_STARTS_WITH, nil],
|
||||||
|
[:T_RBRACK, nil]
|
||||||
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex the $= operator' do
|
example 'lex the $= operator' do
|
||||||
lex_css('$=').should == [[:T_ENDS_WITH, nil]]
|
lex_css('[$=]').should == [
|
||||||
|
[:T_LBRACK, nil],
|
||||||
|
[:T_ENDS_WITH, nil],
|
||||||
|
[:T_RBRACK, nil],
|
||||||
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex the *= operator' do
|
example 'lex the *= operator' do
|
||||||
lex_css('*=').should == [[:T_IN, nil]]
|
lex_css('[*=]').should == [
|
||||||
|
[:T_LBRACK, nil],
|
||||||
|
[:T_IN, nil],
|
||||||
|
[:T_RBRACK, nil]
|
||||||
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex an identifier followed by the *= operator' do
|
example 'lex an identifier followed by the *= operator' do
|
||||||
lex_css('foo *=').should == [
|
lex_css('[foo *=]').should == [
|
||||||
|
[:T_LBRACK, nil],
|
||||||
[:T_IDENT, 'foo'],
|
[:T_IDENT, 'foo'],
|
||||||
[:T_SPACE, nil],
|
[:T_IN, nil],
|
||||||
[:T_IN, nil]
|
[:T_RBRACK, nil]
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex the |= operator' do
|
example 'lex the |= operator' do
|
||||||
lex_css('|=').should == [[:T_HYPHEN_IN, nil]]
|
lex_css('[|=]').should == [
|
||||||
|
[:T_LBRACK, nil],
|
||||||
|
[:T_HYPHEN_IN, nil],
|
||||||
|
[:T_RBRACK, nil]
|
||||||
|
]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,11 +3,19 @@ require 'spec_helper'
|
||||||
describe Oga::CSS::Lexer do
|
describe Oga::CSS::Lexer do
|
||||||
context 'strings' do
|
context 'strings' do
|
||||||
example 'lex a single quoted string' do
|
example 'lex a single quoted string' do
|
||||||
lex_css("'foo'").should == [[:T_STRING, 'foo']]
|
lex_css("['foo']").should == [
|
||||||
|
[:T_LBRACK, nil],
|
||||||
|
[:T_STRING, 'foo'],
|
||||||
|
[:T_RBRACK, nil]
|
||||||
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex a double quoted string' do
|
example 'lex a double quoted string' do
|
||||||
lex_css('"foo"').should == [[:T_STRING, 'foo']]
|
lex_css('["foo"]').should == [
|
||||||
|
[:T_LBRACK, nil],
|
||||||
|
[:T_STRING, 'foo'],
|
||||||
|
[:T_RBRACK, nil]
|
||||||
|
]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue