Support for lexing XPath wildcard expressions.
To support this we need to require whitespace around the "*" operator. This is not ideal but it will do for now.
This commit is contained in:
parent
48bf1a0628
commit
54de2df0c7
|
@ -3,13 +3,19 @@
|
||||||
module Oga
|
module Oga
|
||||||
module XPath
|
module XPath
|
||||||
##
|
##
|
||||||
# Ragel lexer for lexing XPath queries.
|
# Ragel lexer for lexing XPath expressions.
|
||||||
#
|
#
|
||||||
class Lexer
|
class Lexer
|
||||||
%% write data;
|
%% write data;
|
||||||
|
|
||||||
# % fix highlight
|
# % fix highlight
|
||||||
|
|
||||||
|
##
|
||||||
|
# Maps certain XPath axes written in their short form to their long form
|
||||||
|
# equivalents.
|
||||||
|
#
|
||||||
|
# @return [Hash]
|
||||||
|
#
|
||||||
AXIS_MAPPING = {
|
AXIS_MAPPING = {
|
||||||
'@' => 'attribute',
|
'@' => 'attribute',
|
||||||
'//' => 'descendant-or-self',
|
'//' => 'descendant-or-self',
|
||||||
|
@ -22,23 +28,11 @@ module Oga
|
||||||
#
|
#
|
||||||
def initialize(data)
|
def initialize(data)
|
||||||
@data = data
|
@data = data
|
||||||
|
|
||||||
reset
|
|
||||||
end
|
|
||||||
|
|
||||||
##
|
|
||||||
# Resets the internal state of the lexer.
|
|
||||||
#
|
|
||||||
def reset
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Gathers all the tokens for the input and returns them as an Array.
|
# Gathers all the tokens for the input and returns them as an Array.
|
||||||
#
|
#
|
||||||
# This method resets the internal state of the lexer after consuming the
|
|
||||||
# input.
|
|
||||||
#
|
|
||||||
# @see [#advance]
|
# @see [#advance]
|
||||||
# @return [Array]
|
# @return [Array]
|
||||||
#
|
#
|
||||||
|
@ -49,8 +43,6 @@ module Oga
|
||||||
tokens << [type, value]
|
tokens << [type, value]
|
||||||
end
|
end
|
||||||
|
|
||||||
reset
|
|
||||||
|
|
||||||
return tokens
|
return tokens
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -67,8 +59,6 @@ module Oga
|
||||||
# This method stores the supplied block in `@block` and resets it after
|
# This method stores the supplied block in `@block` and resets it after
|
||||||
# the lexer loop has finished.
|
# the lexer loop has finished.
|
||||||
#
|
#
|
||||||
# This method does *not* reset the internal state of the lexer.
|
|
||||||
#
|
|
||||||
# @param [String] data The String to consume.
|
# @param [String] data The String to consume.
|
||||||
# @return [Array]
|
# @return [Array]
|
||||||
#
|
#
|
||||||
|
@ -153,6 +143,7 @@ module Oga
|
||||||
rparen = ')' @{ add_token(:T_RPAREN) };
|
rparen = ')' @{ add_token(:T_RPAREN) };
|
||||||
comma = ',' @{ add_token(:T_COMMA) };
|
comma = ',' @{ add_token(:T_COMMA) };
|
||||||
colon = ':' @{ add_token(:T_COLON) };
|
colon = ':' @{ add_token(:T_COLON) };
|
||||||
|
star = '*' @{ add_token(:T_STAR) };
|
||||||
|
|
||||||
# Identifiers
|
# Identifiers
|
||||||
#
|
#
|
||||||
|
@ -250,8 +241,6 @@ module Oga
|
||||||
| 'and'
|
| 'and'
|
||||||
| 'or'
|
| 'or'
|
||||||
| '+'
|
| '+'
|
||||||
| '-'
|
|
||||||
| '*'
|
|
||||||
| 'div'
|
| 'div'
|
||||||
| 'mod'
|
| 'mod'
|
||||||
| '='
|
| '='
|
||||||
|
@ -261,21 +250,36 @@ module Oga
|
||||||
| '<='
|
| '<='
|
||||||
| '>=';
|
| '>=';
|
||||||
|
|
||||||
|
# These operators require whitespace around them in order to be lexed
|
||||||
|
# as operators. This is due to "-" being allowed in node names and "*"
|
||||||
|
# also being used as a whildcard.
|
||||||
|
#
|
||||||
|
# THINK: relying on whitespace is a rather fragile solution, even
|
||||||
|
# though the W3 actually recommends this for the "-" operator. Perhaps
|
||||||
|
# there's a better way of doing this.
|
||||||
|
space_operator = space ('*' | '-') space;
|
||||||
|
|
||||||
action emit_operator {
|
action emit_operator {
|
||||||
emit(:T_OP, ts, te)
|
emit(:T_OP, ts, te)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action emit_space_operator {
|
||||||
|
emit(:T_OP, ts + 1, te - 1)
|
||||||
|
}
|
||||||
|
|
||||||
# Machine that handles the lexing of data inside an XPath predicate.
|
# Machine that handles the lexing of data inside an XPath predicate.
|
||||||
# When bumping into a "]" the lexer jumps back to the `main` machine.
|
# When bumping into a "]" the lexer jumps back to the `main` machine.
|
||||||
predicate := |*
|
predicate := |*
|
||||||
whitespace | slash | lparen | rparen | comma | colon;
|
whitespace | slash | lparen | rparen | comma | colon | star;
|
||||||
|
|
||||||
|
operator => emit_operator;
|
||||||
|
space_operator => emit_space_operator;
|
||||||
|
|
||||||
string => emit_string;
|
string => emit_string;
|
||||||
integer => emit_integer;
|
integer => emit_integer;
|
||||||
float => emit_float;
|
float => emit_float;
|
||||||
axis_full => emit_axis_full;
|
axis_full => emit_axis_full;
|
||||||
axis_short => emit_axis_short;
|
axis_short => emit_axis_short;
|
||||||
operator => emit_operator;
|
|
||||||
identifier => emit_identifier;
|
identifier => emit_identifier;
|
||||||
|
|
||||||
']' => {
|
']' => {
|
||||||
|
@ -285,7 +289,7 @@ module Oga
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
main := |*
|
main := |*
|
||||||
whitespace | slash | lparen | rparen | comma | colon;
|
whitespace | slash | lparen | rparen | comma | colon | star;
|
||||||
|
|
||||||
'[' => {
|
'[' => {
|
||||||
add_token(:T_LBRACK)
|
add_token(:T_LBRACK)
|
||||||
|
|
|
@ -111,7 +111,20 @@ describe Oga::XPath::Lexer do
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex a predicate expression using an operator' do
|
example 'lex a whildcard node test' do
|
||||||
|
lex_xpath('/*').should == [[:T_SLASH, nil], [:T_STAR, nil]]
|
||||||
|
end
|
||||||
|
|
||||||
|
example 'lex a wildcard node test for a namespace' do
|
||||||
|
lex_xpath('/*:foo').should == [
|
||||||
|
[:T_SLASH, nil],
|
||||||
|
[:T_STAR, nil],
|
||||||
|
[:T_COLON, nil],
|
||||||
|
[:T_IDENT, 'foo']
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
example 'lex a predicate expression using the div operator' do
|
||||||
lex_xpath('/div[@number=4 div 2]').should == [
|
lex_xpath('/div[@number=4 div 2]').should == [
|
||||||
[:T_SLASH, nil],
|
[:T_SLASH, nil],
|
||||||
[:T_IDENT, 'div'],
|
[:T_IDENT, 'div'],
|
||||||
|
@ -126,6 +139,21 @@ describe Oga::XPath::Lexer do
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
example 'lex a predicate expression using the * operator' do
|
||||||
|
lex_xpath('/div[@number=4 * 2]').should == [
|
||||||
|
[:T_SLASH, nil],
|
||||||
|
[:T_IDENT, 'div'],
|
||||||
|
[:T_LBRACK, nil],
|
||||||
|
[:T_AXIS, 'attribute'],
|
||||||
|
[:T_IDENT, 'number'],
|
||||||
|
[:T_OP, '='],
|
||||||
|
[:T_INT, 4],
|
||||||
|
[:T_OP, '*'],
|
||||||
|
[:T_INT, 2],
|
||||||
|
[:T_RBRACK, nil]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
example 'lex a predicate expression using axes' do
|
example 'lex a predicate expression using axes' do
|
||||||
lex_xpath('/div[/foo/bar]').should == [
|
lex_xpath('/div[/foo/bar]').should == [
|
||||||
[:T_SLASH, nil],
|
[:T_SLASH, nil],
|
||||||
|
@ -139,6 +167,19 @@ describe Oga::XPath::Lexer do
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
example 'lex a predicate expression using a wildcard' do
|
||||||
|
lex_xpath('/div[/foo/*]').should == [
|
||||||
|
[:T_SLASH, nil],
|
||||||
|
[:T_IDENT, 'div'],
|
||||||
|
[:T_LBRACK, nil],
|
||||||
|
[:T_SLASH, nil],
|
||||||
|
[:T_IDENT, 'foo'],
|
||||||
|
[:T_SLASH, nil],
|
||||||
|
[:T_STAR, nil],
|
||||||
|
[:T_RBRACK, nil]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
# The following are a bunch of examples taken from Wikipedia and the W3 spec
|
# The following are a bunch of examples taken from Wikipedia and the W3 spec
|
||||||
# to see how the lexer handles them.
|
# to see how the lexer handles them.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue