Support for lexing XPath wildcard expressions.
To support this we need to require whitespace around the "*" operator. This is not ideal but it will do for now.
This commit is contained in:
parent
48bf1a0628
commit
54de2df0c7
|
@ -3,13 +3,19 @@
|
|||
module Oga
|
||||
module XPath
|
||||
##
|
||||
# Ragel lexer for lexing XPath queries.
|
||||
# Ragel lexer for lexing XPath expressions.
|
||||
#
|
||||
class Lexer
|
||||
%% write data;
|
||||
|
||||
# % fix highlight
|
||||
|
||||
##
|
||||
# Maps certain XPath axes written in their short form to their long form
|
||||
# equivalents.
|
||||
#
|
||||
# @return [Hash]
|
||||
#
|
||||
AXIS_MAPPING = {
|
||||
'@' => 'attribute',
|
||||
'//' => 'descendant-or-self',
|
||||
|
@ -22,23 +28,11 @@ module Oga
|
|||
#
|
||||
def initialize(data)
|
||||
@data = data
|
||||
|
||||
reset
|
||||
end
|
||||
|
||||
##
|
||||
# Resets the internal state of the lexer.
|
||||
#
|
||||
def reset
|
||||
|
||||
end
|
||||
|
||||
##
|
||||
# Gathers all the tokens for the input and returns them as an Array.
|
||||
#
|
||||
# This method resets the internal state of the lexer after consuming the
|
||||
# input.
|
||||
#
|
||||
# @see [#advance]
|
||||
# @return [Array]
|
||||
#
|
||||
|
@ -49,8 +43,6 @@ module Oga
|
|||
tokens << [type, value]
|
||||
end
|
||||
|
||||
reset
|
||||
|
||||
return tokens
|
||||
end
|
||||
|
||||
|
@ -67,8 +59,6 @@ module Oga
|
|||
# This method stores the supplied block in `@block` and resets it after
|
||||
# the lexer loop has finished.
|
||||
#
|
||||
# This method does *not* reset the internal state of the lexer.
|
||||
#
|
||||
# @param [String] data The String to consume.
|
||||
# @return [Array]
|
||||
#
|
||||
|
@ -153,6 +143,7 @@ module Oga
|
|||
rparen = ')' @{ add_token(:T_RPAREN) };
|
||||
comma = ',' @{ add_token(:T_COMMA) };
|
||||
colon = ':' @{ add_token(:T_COLON) };
|
||||
star = '*' @{ add_token(:T_STAR) };
|
||||
|
||||
# Identifiers
|
||||
#
|
||||
|
@ -250,8 +241,6 @@ module Oga
|
|||
| 'and'
|
||||
| 'or'
|
||||
| '+'
|
||||
| '-'
|
||||
| '*'
|
||||
| 'div'
|
||||
| 'mod'
|
||||
| '='
|
||||
|
@ -261,21 +250,36 @@ module Oga
|
|||
| '<='
|
||||
| '>=';
|
||||
|
||||
# These operators require whitespace around them in order to be lexed
|
||||
# as operators. This is due to "-" being allowed in node names and "*"
|
||||
# also being used as a whildcard.
|
||||
#
|
||||
# THINK: relying on whitespace is a rather fragile solution, even
|
||||
# though the W3 actually recommends this for the "-" operator. Perhaps
|
||||
# there's a better way of doing this.
|
||||
space_operator = space ('*' | '-') space;
|
||||
|
||||
action emit_operator {
|
||||
emit(:T_OP, ts, te)
|
||||
}
|
||||
|
||||
action emit_space_operator {
|
||||
emit(:T_OP, ts + 1, te - 1)
|
||||
}
|
||||
|
||||
# Machine that handles the lexing of data inside an XPath predicate.
|
||||
# When bumping into a "]" the lexer jumps back to the `main` machine.
|
||||
predicate := |*
|
||||
whitespace | slash | lparen | rparen | comma | colon;
|
||||
whitespace | slash | lparen | rparen | comma | colon | star;
|
||||
|
||||
operator => emit_operator;
|
||||
space_operator => emit_space_operator;
|
||||
|
||||
string => emit_string;
|
||||
integer => emit_integer;
|
||||
float => emit_float;
|
||||
axis_full => emit_axis_full;
|
||||
axis_short => emit_axis_short;
|
||||
operator => emit_operator;
|
||||
identifier => emit_identifier;
|
||||
|
||||
']' => {
|
||||
|
@ -285,7 +289,7 @@ module Oga
|
|||
*|;
|
||||
|
||||
main := |*
|
||||
whitespace | slash | lparen | rparen | comma | colon;
|
||||
whitespace | slash | lparen | rparen | comma | colon | star;
|
||||
|
||||
'[' => {
|
||||
add_token(:T_LBRACK)
|
||||
|
|
|
@ -111,7 +111,20 @@ describe Oga::XPath::Lexer do
|
|||
]
|
||||
end
|
||||
|
||||
example 'lex a predicate expression using an operator' do
|
||||
example 'lex a whildcard node test' do
|
||||
lex_xpath('/*').should == [[:T_SLASH, nil], [:T_STAR, nil]]
|
||||
end
|
||||
|
||||
example 'lex a wildcard node test for a namespace' do
|
||||
lex_xpath('/*:foo').should == [
|
||||
[:T_SLASH, nil],
|
||||
[:T_STAR, nil],
|
||||
[:T_COLON, nil],
|
||||
[:T_IDENT, 'foo']
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex a predicate expression using the div operator' do
|
||||
lex_xpath('/div[@number=4 div 2]').should == [
|
||||
[:T_SLASH, nil],
|
||||
[:T_IDENT, 'div'],
|
||||
|
@ -126,6 +139,21 @@ describe Oga::XPath::Lexer do
|
|||
]
|
||||
end
|
||||
|
||||
example 'lex a predicate expression using the * operator' do
|
||||
lex_xpath('/div[@number=4 * 2]').should == [
|
||||
[:T_SLASH, nil],
|
||||
[:T_IDENT, 'div'],
|
||||
[:T_LBRACK, nil],
|
||||
[:T_AXIS, 'attribute'],
|
||||
[:T_IDENT, 'number'],
|
||||
[:T_OP, '='],
|
||||
[:T_INT, 4],
|
||||
[:T_OP, '*'],
|
||||
[:T_INT, 2],
|
||||
[:T_RBRACK, nil]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex a predicate expression using axes' do
|
||||
lex_xpath('/div[/foo/bar]').should == [
|
||||
[:T_SLASH, nil],
|
||||
|
@ -139,6 +167,19 @@ describe Oga::XPath::Lexer do
|
|||
]
|
||||
end
|
||||
|
||||
example 'lex a predicate expression using a wildcard' do
|
||||
lex_xpath('/div[/foo/*]').should == [
|
||||
[:T_SLASH, nil],
|
||||
[:T_IDENT, 'div'],
|
||||
[:T_LBRACK, nil],
|
||||
[:T_SLASH, nil],
|
||||
[:T_IDENT, 'foo'],
|
||||
[:T_SLASH, nil],
|
||||
[:T_STAR, nil],
|
||||
[:T_RBRACK, nil]
|
||||
]
|
||||
end
|
||||
|
||||
# The following are a bunch of examples taken from Wikipedia and the W3 spec
|
||||
# to see how the lexer handles them.
|
||||
|
||||
|
|
Loading…
Reference in New Issue