Support for lexing/parsing XPath type tests.

Unlike what I thought before syntax such as "node()" is not a function call.
Instead this is a special node test that tests the *types* of nodes, not their
names.
This commit is contained in:
Yorick Peterse 2014-08-14 21:51:58 +02:00
parent 23441bb5a4
commit 6ad5170476
7 changed files with 77 additions and 25 deletions

View File

@ -249,9 +249,7 @@ module Oga
# added on lexer level to make it easier to handle these cases on # added on lexer level to make it easier to handle these cases on
# parser/evaluator level. # parser/evaluator level.
if AXIS_EMIT_NODE.include?(value) if AXIS_EMIT_NODE.include?(value)
add_token(:T_IDENT, 'node') add_token(:T_NODE_TYPE, 'node')
add_token(:T_LPAREN)
add_token(:T_RPAREN)
if AXIS_EMIT_EXTRA_SLASH.include?(value) and te != eof if AXIS_EMIT_EXTRA_SLASH.include?(value) and te != eof
add_token(:T_SLASH) add_token(:T_SLASH)
@ -305,10 +303,25 @@ module Oga
| op_sub | op_sub
; ;
# Node types
#
# While these look like functions they are actually node tests. For
# example, comment() matches all comment nodes.
#
# See http://www.w3.org/TR/xpath/#NT-NodeType for more information.
node_type = 'comment' | 'text' | 'processing-instruction' | 'node';
action emit_node_type {
emit(:T_NODE_TYPE, ts, te - 2)
}
main := |* main := |*
operator; operator;
whitespace | slash | lparen | rparen | comma | colon; whitespace | slash | lparen | rparen | comma | colon;
node_type '()' => emit_node_type;
'[' => { add_token(:T_LBRACK) }; '[' => { add_token(:T_LBRACK) };
']' => { add_token(:T_RBRACK) }; ']' => { add_token(:T_RBRACK) };

View File

@ -3,7 +3,7 @@
# #
class Oga::XPath::Parser class Oga::XPath::Parser
token T_AXIS T_COLON T_COMMA T_FLOAT T_INT T_IDENT token T_AXIS T_COLON T_COMMA T_FLOAT T_INT T_IDENT T_NODE_TYPE
token T_LBRACK T_RBRACK T_LPAREN T_RPAREN T_SLASH T_STRING token T_LBRACK T_RBRACK T_LPAREN T_RPAREN T_SLASH T_STRING
token T_PIPE T_AND T_OR T_ADD T_DIV T_MOD T_EQ T_NEQ T_LT T_GT T_LTE T_GTE token T_PIPE T_AND T_OR T_ADD T_DIV T_MOD T_EQ T_NEQ T_LT T_GT T_LTE T_GTE
token T_SUB T_MUL token T_SUB T_MUL
@ -76,6 +76,11 @@ rule
node_test node_test
: node_name { s(:test, *val[0]) } : node_name { s(:test, *val[0]) }
| node_name predicate { s(:test, *val[0], val[1]) } | node_name predicate { s(:test, *val[0], val[1]) }
| node_type { val[0] }
;
node_type
: T_NODE_TYPE { s(:node_type, val[0]) }
; ;
node_name node_name

View File

@ -120,9 +120,7 @@ describe Oga::XPath::Lexer do
lex_xpath('//A').should == [ lex_xpath('//A').should == [
[:T_SLASH, nil], [:T_SLASH, nil],
[:T_AXIS, 'descendant-or-self'], [:T_AXIS, 'descendant-or-self'],
[:T_IDENT, 'node'], [:T_NODE_TYPE, 'node'],
[:T_LPAREN, nil],
[:T_RPAREN, nil],
[:T_SLASH, nil], [:T_SLASH, nil],
[:T_IDENT, 'A'] [:T_IDENT, 'A']
] ]
@ -132,9 +130,7 @@ describe Oga::XPath::Lexer do
lex_xpath('/..').should == [ lex_xpath('/..').should == [
[:T_SLASH, nil], [:T_SLASH, nil],
[:T_AXIS, 'parent'], [:T_AXIS, 'parent'],
[:T_IDENT, 'node'], [:T_NODE_TYPE, 'node']
[:T_LPAREN, nil],
[:T_RPAREN, nil],
] ]
end end
@ -142,18 +138,14 @@ describe Oga::XPath::Lexer do
lex_xpath('/.').should == [ lex_xpath('/.').should == [
[:T_SLASH, nil], [:T_SLASH, nil],
[:T_AXIS, 'self'], [:T_AXIS, 'self'],
[:T_IDENT, 'node'], [:T_NODE_TYPE, 'node']
[:T_LPAREN, nil],
[:T_RPAREN, nil],
] ]
end end
example 'lex the . axis followed by a path' do example 'lex the . axis followed by a path' do
lex_xpath('./foo').should == [ lex_xpath('./foo').should == [
[:T_AXIS, 'self'], [:T_AXIS, 'self'],
[:T_IDENT, 'node'], [:T_NODE_TYPE, 'node'],
[:T_LPAREN, nil],
[:T_RPAREN, nil],
[:T_SLASH, nil], [:T_SLASH, nil],
[:T_IDENT, 'foo'] [:T_IDENT, 'foo']
] ]

View File

@ -37,9 +37,7 @@ describe Oga::XPath::Lexer do
[:T_IDENT, 'wikimedia'], [:T_IDENT, 'wikimedia'],
[:T_SLASH, nil], [:T_SLASH, nil],
[:T_AXIS, 'descendant-or-self'], [:T_AXIS, 'descendant-or-self'],
[:T_IDENT, 'node'], [:T_NODE_TYPE, 'node'],
[:T_LPAREN, nil],
[:T_RPAREN, nil],
[:T_SLASH, nil], [:T_SLASH, nil],
[:T_IDENT, 'editions'] [:T_IDENT, 'editions']
] ]
@ -66,9 +64,7 @@ describe Oga::XPath::Lexer do
[:T_SLASH, nil], [:T_SLASH, nil],
[:T_IDENT, 'edition'], [:T_IDENT, 'edition'],
[:T_SLASH, nil], [:T_SLASH, nil],
[:T_IDENT, 'text'], [:T_NODE_TYPE, 'text']
[:T_LPAREN, nil],
[:T_RPAREN, nil]
] ]
end end
end end

View File

@ -0,0 +1,23 @@
require 'spec_helper'
describe Oga::XPath::Lexer do
context 'node types' do
example 'lex the "node" type' do
lex_xpath('node()').should == [[:T_NODE_TYPE, 'node']]
end
example 'lex the "comment" type' do
lex_xpath('comment()').should == [[:T_NODE_TYPE, 'comment']]
end
example 'lex the "text" type' do
lex_xpath('text()').should == [[:T_NODE_TYPE, 'text']]
end
example 'lex the "processing-instruction" type' do
lex_xpath('processing-instruction()').should == [
[:T_NODE_TYPE, 'processing-instruction']
]
end
end
end

View File

@ -105,7 +105,7 @@ describe Oga::XPath::Parser do
example 'parse the // axis' do example 'parse the // axis' do
parse_xpath('//A').should == s( parse_xpath('//A').should == s(
:absolute_path, :absolute_path,
s(:axis, 'descendant-or-self', s(:call, 'node')), s(:axis, 'descendant-or-self', s(:node_type, 'node')),
s(:axis, 'child', s(:test, nil, 'A')) s(:axis, 'child', s(:test, nil, 'A'))
) )
end end
@ -113,14 +113,14 @@ describe Oga::XPath::Parser do
example 'parse the .. axis' do example 'parse the .. axis' do
parse_xpath('/..').should == s( parse_xpath('/..').should == s(
:absolute_path, :absolute_path,
s(:axis, 'parent', s(:call, 'node')) s(:axis, 'parent', s(:node_type, 'node'))
) )
end end
example 'parse the . axis' do example 'parse the . axis' do
parse_xpath('/.').should == s( parse_xpath('/.').should == s(
:absolute_path, :absolute_path,
s(:axis, 'self', s(:call, 'node')) s(:axis, 'self', s(:node_type, 'node'))
) )
end end
end end

View File

@ -0,0 +1,23 @@
require 'spec_helper'
describe Oga::XPath::Parser do
context 'node types' do
example 'parse the "node" type' do
parse_xpath('node()').should == s(:axis, 'child', s(:node_type, 'node'))
end
example 'parse the "comment" type' do
parse_xpath('comment()')
.should == s(:axis, 'child', s(:node_type, 'comment'))
end
example 'parse the "text" type' do
parse_xpath('text()').should == s(:axis, 'child', s(:node_type, 'text'))
end
example 'parse the "processing-instruction" type' do
parse_xpath('processing-instruction()')
.should == s(:axis, 'child', s(:node_type, 'processing-instruction'))
end
end
end