Lex XPath operators using individual tokens.

Instead of lexing every operator as T_OP they now use individual tokens such as
T_EQ and T_LT.
This commit is contained in:
Yorick Peterse 2014-06-09 23:35:54 +02:00
parent 7244e28eec
commit 70f3b7fa92
7 changed files with 331 additions and 244 deletions

View File

@ -239,18 +239,14 @@ module Oga
# conflicting with the patterns used for matching identifiers (= # conflicting with the patterns used for matching identifiers (=
# element names and the likes). # element names and the likes).
operator = '|' op_pipe = '|' %{ add_token(:T_PIPE) };
| 'and' op_plus = '+' %{ add_token(:T_ADD) };
| 'or' op_eq = '=' %{ add_token(:T_EQ) };
| '+' op_neq = '!=' %{ add_token(:T_NEQ) };
| 'div' op_lt = '<' %{ add_token(:T_LT) };
| 'mod' op_gt = '>' %{ add_token(:T_GT) };
| '=' op_lte = '<=' %{ add_token(:T_LTE) };
| '!=' op_gte = '>=' %{ add_token(:T_GTE) };
| '<'
| '>'
| '<='
| '>=';
# These operators require whitespace around them in order to be lexed # These operators require whitespace around them in order to be lexed
# as operators. This is due to "-" being allowed in node names and "*" # as operators. This is due to "-" being allowed in node names and "*"
@ -259,24 +255,36 @@ module Oga
# THINK: relying on whitespace is a rather fragile solution, even # THINK: relying on whitespace is a rather fragile solution, even
# though the W3 actually recommends this for the "-" operator. Perhaps # though the W3 actually recommends this for the "-" operator. Perhaps
# there's a better way of doing this. # there's a better way of doing this.
space_operator = space ('*' | '-') space;
action emit_operator { op_and = ' and ' %{ add_token(:T_AND) };
emit(:T_OP, ts, te) op_or = ' or ' %{ add_token(:T_OR) };
} op_div = ' div ' %{ add_token(:T_DIV) };
op_mod = ' mod ' %{ add_token(:T_MOD) };
op_mul = ' * ' %{ add_token(:T_MUL) };
op_sub = ' - ' %{ add_token(:T_SUB) };
action emit_space_operator { operator = op_pipe
emit(:T_OP, ts + 1, te - 1) | op_and
} | op_or
| op_plus
| op_div
| op_mod
| op_eq
| op_neq
| op_lt
| op_gt
| op_lte
| op_gte
| op_mul
| op_sub
;
# Machine that handles the lexing of data inside an XPath predicate. # Machine that handles the lexing of data inside an XPath predicate.
# When bumping into a "]" the lexer jumps back to the `main` machine. # When bumping into a "]" the lexer jumps back to the `main` machine.
predicate := |* predicate := |*
operator;
whitespace | slash | lparen | rparen | comma | colon | star; whitespace | slash | lparen | rparen | comma | colon | star;
operator => emit_operator;
space_operator => emit_space_operator;
string => emit_string; string => emit_string;
integer => emit_integer; integer => emit_integer;
float => emit_float; float => emit_float;
@ -291,6 +299,7 @@ module Oga
*|; *|;
main := |* main := |*
operator;
whitespace | slash | lparen | rparen | comma | colon | star; whitespace | slash | lparen | rparen | comma | colon | star;
'[' => { '[' => {

View File

@ -0,0 +1,19 @@
require 'spec_helper'
describe Oga::XPath::Lexer do
context 'axes' do
example 'lex an axis using the full syntax form' do
lex_xpath('/parent::node()').should == [
[:T_SLASH, nil],
[:T_AXIS, 'parent'],
[:T_IDENT, 'node'],
[:T_LPAREN, nil],
[:T_RPAREN, nil]
]
end
example 'lex an axis using the short syntax form' do
lex_xpath('/..').should == [[:T_SLASH, nil], [:T_AXIS, 'parent']]
end
end
end

View File

@ -0,0 +1,33 @@
require 'spec_helper'
describe Oga::XPath::Lexer do
context 'function calls' do
example 'lex a function call without arguments' do
lex_xpath('count()').should == [
[:T_IDENT, 'count'],
[:T_LPAREN, nil],
[:T_RPAREN, nil]
]
end
example 'lex a function call with a single argument' do
lex_xpath('count(foo)').should == [
[:T_IDENT, 'count'],
[:T_LPAREN, nil],
[:T_IDENT, 'foo'],
[:T_RPAREN, nil]
]
end
example 'lex a function call with two arguments' do
lex_xpath('count(foo, bar)').should == [
[:T_IDENT, 'count'],
[:T_LPAREN, nil],
[:T_IDENT, 'foo'],
[:T_COMMA, nil],
[:T_IDENT, 'bar'],
[:T_RPAREN, nil]
]
end
end
end

View File

@ -0,0 +1,71 @@
require 'spec_helper'
describe Oga::XPath::Lexer do
context 'general' do
example 'lex a simple expression' do
lex_xpath('/foo').should == [[:T_SLASH, nil], [:T_IDENT, 'foo']]
end
example 'lex a node test using a namespace' do
lex_xpath('/foo:bar').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_COLON, nil],
[:T_IDENT, 'bar']
]
end
example 'lex a whildcard node test' do
lex_xpath('/*').should == [[:T_SLASH, nil], [:T_STAR, nil]]
end
example 'lex a wildcard node test for a namespace' do
lex_xpath('/*:foo').should == [
[:T_SLASH, nil],
[:T_STAR, nil],
[:T_COLON, nil],
[:T_IDENT, 'foo']
]
end
# The following are a bunch of examples taken from Wikipedia and the W3
# spec to see how the lexer handles them.
example 'lex an descendant-or-self expression' do
lex_xpath('/wikimedia//editions').should == [
[:T_SLASH, nil],
[:T_IDENT, 'wikimedia'],
[:T_SLASH, nil],
[:T_AXIS, 'descendant-or-self'],
[:T_IDENT, 'editions']
]
end
example 'lex a complex expression using predicates and function calls' do
path = '/wikimedia/projects/project[@name="Wikipedia"]/editions/edition/text()'
lex_xpath(path).should == [
[:T_SLASH, nil],
[:T_IDENT, 'wikimedia'],
[:T_SLASH, nil],
[:T_IDENT, 'projects'],
[:T_SLASH, nil],
[:T_IDENT, 'project'],
[:T_LBRACK, nil],
[:T_AXIS, 'attribute'],
[:T_IDENT, 'name'],
[:T_EQ, nil],
[:T_STRING, 'Wikipedia'],
[:T_RBRACK, nil],
[:T_SLASH, nil],
[:T_IDENT, 'editions'],
[:T_SLASH, nil],
[:T_IDENT, 'edition'],
[:T_SLASH, nil],
[:T_IDENT, 'text'],
[:T_LPAREN, nil],
[:T_RPAREN, nil]
]
end
end
end

View File

@ -0,0 +1,61 @@
require 'spec_helper'
describe Oga::XPath::Lexer do
context 'operators' do
example 'lex the pipe operator' do
lex_xpath('|').should == [[:T_PIPE, nil]]
end
example 'lex the and operator' do
lex_xpath(' and ').should == [[:T_AND, nil]]
end
example 'lex the or operator' do
lex_xpath(' or ').should == [[:T_OR, nil]]
end
example 'lex the plus operator' do
lex_xpath('+').should == [[:T_ADD, nil]]
end
example 'lex the div operator' do
lex_xpath(' div ').should == [[:T_DIV, nil]]
end
example 'lex the mod operator' do
lex_xpath(' mod ').should == [[:T_MOD, nil]]
end
example 'lex the equals operator' do
lex_xpath('=').should == [[:T_EQ, nil]]
end
example 'lex the not-equals operator' do
lex_xpath('!=').should == [[:T_NEQ, nil]]
end
example 'lex the lower-than operator' do
lex_xpath('<').should == [[:T_LT, nil]]
end
example 'lex the greater-than operator' do
lex_xpath('>').should == [[:T_GT, nil]]
end
example 'lex the lower-or-equal operator' do
lex_xpath('<=').should == [[:T_LTE, nil]]
end
example 'lex the greater-or-equal operator' do
lex_xpath('>=').should == [[:T_GTE, nil]]
end
example 'lex the mul operator' do
lex_xpath(' * ').should == [[:T_MUL, nil]]
end
example 'lex the subtraction operator' do
lex_xpath(' - ').should == [[:T_SUB, nil]]
end
end
end

View File

@ -0,0 +1,116 @@
require 'spec_helper'
describe Oga::XPath::Lexer do
context 'predicates' do
example 'lex a simple predicate expression' do
lex_xpath('/foo[bar]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_LBRACK, nil],
[:T_IDENT, 'bar'],
[:T_RBRACK, nil]
]
end
example 'lex a predicate that checks for equality' do
lex_xpath('/foo[@bar="baz"]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_LBRACK, nil],
[:T_AXIS, 'attribute'],
[:T_IDENT, 'bar'],
[:T_EQ, nil],
[:T_STRING, 'baz'],
[:T_RBRACK, nil]
]
end
example 'lex a predicate that user an integer' do
lex_xpath('/foo[1]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_LBRACK, nil],
[:T_INT, 1],
[:T_RBRACK, nil]
]
end
example 'lex a predicate that uses a float' do
lex_xpath('/foo[1.5]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_LBRACK, nil],
[:T_FLOAT, 1.5],
[:T_RBRACK, nil]
]
end
example 'lex a predicate using a function' do
lex_xpath('/foo[bar()]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_LBRACK, nil],
[:T_IDENT, 'bar'],
[:T_LPAREN, nil],
[:T_RPAREN, nil],
[:T_RBRACK, nil]
]
end
example 'lex a predicate expression using the div operator' do
lex_xpath('/div[@number=4 div 2]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'div'],
[:T_LBRACK, nil],
[:T_AXIS, 'attribute'],
[:T_IDENT, 'number'],
[:T_EQ, nil],
[:T_INT, 4],
[:T_DIV, nil],
[:T_INT, 2],
[:T_RBRACK, nil]
]
end
example 'lex a predicate expression using the * operator' do
lex_xpath('/div[@number=4 * 2]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'div'],
[:T_LBRACK, nil],
[:T_AXIS, 'attribute'],
[:T_IDENT, 'number'],
[:T_EQ, nil],
[:T_INT, 4],
[:T_MUL, nil],
[:T_INT, 2],
[:T_RBRACK, nil]
]
end
example 'lex a predicate expression using axes' do
lex_xpath('/div[/foo/bar]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'div'],
[:T_LBRACK, nil],
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_SLASH, nil],
[:T_IDENT, 'bar'],
[:T_RBRACK, nil]
]
end
example 'lex a predicate expression using a wildcard' do
lex_xpath('/div[/foo/*]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'div'],
[:T_LBRACK, nil],
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_SLASH, nil],
[:T_STAR, nil],
[:T_RBRACK, nil]
]
end
end
end

View File

@ -1,222 +0,0 @@
require 'spec_helper'
describe Oga::XPath::Lexer do
example 'lex a simple expression' do
lex_xpath('/foo').should == [[:T_SLASH, nil], [:T_IDENT, 'foo']]
end
example 'lex a function call without arguments' do
lex_xpath('count()').should == [
[:T_IDENT, 'count'],
[:T_LPAREN, nil],
[:T_RPAREN, nil]
]
end
example 'lex a function call with a single argument' do
lex_xpath('count(foo)').should == [
[:T_IDENT, 'count'],
[:T_LPAREN, nil],
[:T_IDENT, 'foo'],
[:T_RPAREN, nil]
]
end
example 'lex a function call with two arguments' do
lex_xpath('count(foo, bar)').should == [
[:T_IDENT, 'count'],
[:T_LPAREN, nil],
[:T_IDENT, 'foo'],
[:T_COMMA, nil],
[:T_IDENT, 'bar'],
[:T_RPAREN, nil]
]
end
example 'lex a simple predicate expression' do
lex_xpath('/foo[bar]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_LBRACK, nil],
[:T_IDENT, 'bar'],
[:T_RBRACK, nil]
]
end
example 'lex a predicate that checks for equality' do
lex_xpath('/foo[@bar="baz"]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_LBRACK, nil],
[:T_AXIS, 'attribute'],
[:T_IDENT, 'bar'],
[:T_OP, '='],
[:T_STRING, 'baz'],
[:T_RBRACK, nil]
]
end
example 'lex a predicate that user an integer' do
lex_xpath('/foo[1]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_LBRACK, nil],
[:T_INT, 1],
[:T_RBRACK, nil]
]
end
example 'lex a predicate that uses a float' do
lex_xpath('/foo[1.5]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_LBRACK, nil],
[:T_FLOAT, 1.5],
[:T_RBRACK, nil]
]
end
example 'lex a predicate using a function' do
lex_xpath('/foo[bar()]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_LBRACK, nil],
[:T_IDENT, 'bar'],
[:T_LPAREN, nil],
[:T_RPAREN, nil],
[:T_RBRACK, nil]
]
end
example 'lex an axis using the full syntax form' do
lex_xpath('/parent::node()').should == [
[:T_SLASH, nil],
[:T_AXIS, 'parent'],
[:T_IDENT, 'node'],
[:T_LPAREN, nil],
[:T_RPAREN, nil]
]
end
example 'lex an axis using the short syntax form' do
lex_xpath('/..').should == [[:T_SLASH, nil], [:T_AXIS, 'parent']]
end
example 'lex a node test using a namespace' do
lex_xpath('/foo:bar').should == [
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_COLON, nil],
[:T_IDENT, 'bar']
]
end
example 'lex a whildcard node test' do
lex_xpath('/*').should == [[:T_SLASH, nil], [:T_STAR, nil]]
end
example 'lex a wildcard node test for a namespace' do
lex_xpath('/*:foo').should == [
[:T_SLASH, nil],
[:T_STAR, nil],
[:T_COLON, nil],
[:T_IDENT, 'foo']
]
end
example 'lex a predicate expression using the div operator' do
lex_xpath('/div[@number=4 div 2]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'div'],
[:T_LBRACK, nil],
[:T_AXIS, 'attribute'],
[:T_IDENT, 'number'],
[:T_OP, '='],
[:T_INT, 4],
[:T_OP, 'div'],
[:T_INT, 2],
[:T_RBRACK, nil]
]
end
example 'lex a predicate expression using the * operator' do
lex_xpath('/div[@number=4 * 2]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'div'],
[:T_LBRACK, nil],
[:T_AXIS, 'attribute'],
[:T_IDENT, 'number'],
[:T_OP, '='],
[:T_INT, 4],
[:T_OP, '*'],
[:T_INT, 2],
[:T_RBRACK, nil]
]
end
example 'lex a predicate expression using axes' do
lex_xpath('/div[/foo/bar]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'div'],
[:T_LBRACK, nil],
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_SLASH, nil],
[:T_IDENT, 'bar'],
[:T_RBRACK, nil]
]
end
example 'lex a predicate expression using a wildcard' do
lex_xpath('/div[/foo/*]').should == [
[:T_SLASH, nil],
[:T_IDENT, 'div'],
[:T_LBRACK, nil],
[:T_SLASH, nil],
[:T_IDENT, 'foo'],
[:T_SLASH, nil],
[:T_STAR, nil],
[:T_RBRACK, nil]
]
end
# The following are a bunch of examples taken from Wikipedia and the W3 spec
# to see how the lexer handles them.
example 'lex an descendant-or-self expression' do
lex_xpath('/wikimedia//editions').should == [
[:T_SLASH, nil],
[:T_IDENT, 'wikimedia'],
[:T_SLASH, nil],
[:T_AXIS, 'descendant-or-self'],
[:T_IDENT, 'editions']
]
end
example 'lex a complex expression using predicates and function calls' do
path = '/wikimedia/projects/project[@name="Wikipedia"]/editions/edition/text()'
lex_xpath(path).should == [
[:T_SLASH, nil],
[:T_IDENT, 'wikimedia'],
[:T_SLASH, nil],
[:T_IDENT, 'projects'],
[:T_SLASH, nil],
[:T_IDENT, 'project'],
[:T_LBRACK, nil],
[:T_AXIS, 'attribute'],
[:T_IDENT, 'name'],
[:T_OP, '='],
[:T_STRING, 'Wikipedia'],
[:T_RBRACK, nil],
[:T_SLASH, nil],
[:T_IDENT, 'editions'],
[:T_SLASH, nil],
[:T_IDENT, 'edition'],
[:T_SLASH, nil],
[:T_IDENT, 'text'],
[:T_LPAREN, nil],
[:T_RPAREN, nil]
]
end
end