XPath compiler support for the "=" operator

This commit is contained in:
Yorick Peterse 2015-07-27 01:00:14 +02:00
parent 04aa8f6546
commit 8a82cc3593
5 changed files with 442 additions and 10 deletions

View File

@ -56,6 +56,7 @@ require 'oga/xpath/lexer'
require 'oga/xpath/parser' require 'oga/xpath/parser'
require 'oga/xpath/evaluator' require 'oga/xpath/evaluator'
require 'oga/xpath/compiler' require 'oga/xpath/compiler'
require 'oga/xpath/conversion'
require 'oga/css/lexer' require 'oga/css/lexer'
require 'oga/css/parser' require 'oga/css/parser'

View File

@ -29,6 +29,15 @@ module Oga
CACHE.get_or_set(ast) { new.compile(ast) } CACHE.get_or_set(ast) { new.compile(ast) }
end end
def initialize
reset
end
# Resets the internal state.
def reset
@literal_id = 0
end
## ##
# Compiles an XPath AST into a Ruby Proc. # Compiles an XPath AST into a Ruby Proc.
# #
@ -61,6 +70,8 @@ module Oga
source = generator.process(proc_ast) source = generator.process(proc_ast)
eval(source) eval(source)
ensure
reset
end end
## ##
@ -246,17 +257,11 @@ module Oga
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
# #
def on_expression_predicate(test, predicate, input) def on_expression_predicate(test, predicate, input)
catch_arg = symbol(:predicate_matched)
process(test, input) do |matched_test_node| process(test, input) do |matched_test_node|
catch_block = send_message('catch', catch_arg).add_block do catch_block = catch_message(:predicate_matched) do
inner = process(predicate, matched_test_node) do process(predicate, matched_test_node) do
send_message('throw', catch_arg, literal('true')) throw_message(:predicate_matched, literal('true'))
end end
# Ensure that the "catch" only returns a value when "throw" is
# actually invoked.
inner.followed_by(literal('nil'))
end end
catch_block.if_true { yield matched_test_node } catch_block.if_true { yield matched_test_node }
@ -273,13 +278,99 @@ module Oga
name_match ? condition.and(name_match) : condition name_match ? condition.and(name_match) : condition
end end
##
# Processes the `=` operator.
#
# The generated code is optimized so that expressions such as `a/b = c`
# only match the first node in both arms instead of matching all available
# nodes first. Because the `=` only ever operates on the first node in a
# set we can simply ditch the rest, possibly speeding things up quite a
# bit. This only works if one of the arms is:
#
# * a path
# * an axis
# * a predicate
#
# Everything else is processed the usual (and possibly slower) way.
#
# The variables used by this operator are assigned a "begin" block
# containing the actual result. This ensures that each variable is
# assigned the result of the entire block instead of the first expression
# that occurs.
#
# For example, take the following expression:
#
# 10 = 10 = 20
#
# Without a "begin" we'd end up with the following code (trimmed for
# readability):
#
# eq_left3 = eq_left1 = ...
#
# eq_left2 = ...
#
# eq_left1, eq_left2 = to_compatible_types(eq_left1, eq_left2)
#
# eq_left1 == eq_left2
#
# eq_left4 = ...
#
# eq_left3 == eq_left4
#
# This would be incorrect as the first boolean expression (`10 = 10`)
# would be ignored. By using a "begin" we instead get the following:
#
# eq_left3 = begin
# eq_left1 = ...
#
# eq_left2 = ...
#
# eq_left1, eq_left2 = to_compatible_types(eq_left1, eq_left2)
#
# eq_left1 == eq_left2
# end
#
# eq_left4 = begin
# ...
# end
#
# eq_left3 == eq_left4
#
# @param [AST::Node] ast # @param [AST::Node] ast
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def on_eq(ast, input) def on_eq(ast, input)
left, right = *ast left, right = *ast
process(left, input).eq(process(right, input)) left_var = unique_literal('eq_left')
right_var = unique_literal('eq_right')
text_sym = symbol(:text)
conversion = literal('Conversion')
if return_nodeset?(left)
left_ast = match_first_node(left, input)
else
left_ast = process(left, input)
end
if return_nodeset?(right)
right_ast = match_first_node(right, input)
else
right_ast = process(right, input)
end
initial_assign = left_var.assign(left_ast.wrap)
.followed_by(right_var.assign(right_ast.wrap))
compatible_assign = mass_assign(
[left_var, right_var],
conversion.to_compatible_types(left_var, right_var)
)
initial_assign.followed_by(compatible_assign)
.followed_by(left_var.eq(right_var))
end end
# @param [AST::Node] ast # @param [AST::Node] ast
@ -322,6 +413,14 @@ module Oga
Ruby::Node.new(:lit, [value.to_s]) Ruby::Node.new(:lit, [value.to_s])
end end
# @param [String] name
# @return [Oga::Ruby::Node]
def unique_literal(name)
new_id = @literal_id += 1
literal("#{name}#{new_id}")
end
# @param [#to_s] value # @param [#to_s] value
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
def string(value) def string(value)
@ -367,6 +466,21 @@ module Oga
condition condition
end end
##
# Returns an AST matching the first node of a node set.
#
# @param [Oga::Ruby::Node] ast
# @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node]
#
def match_first_node(ast, input)
catch_message(:value) do
process(ast, input) do |node|
throw_message(:value, literal('Conversion').to_string(node))
end
end
end
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
def matched_literal def matched_literal
literal('matched') literal('matched')
@ -388,12 +502,44 @@ module Oga
literal(ast.children[0].to_i.to_s) literal(ast.children[0].to_i.to_s)
end end
##
# @param [Array] vars The variables to assign.
# @param [Oga::Ruby::Node] value
# @return [Oga::Ruby::Node]
#
def mass_assign(vars, value)
Ruby::Node.new(:massign, [vars, value])
end
# @param [AST::Node] ast # @param [AST::Node] ast
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
def number?(ast) def number?(ast)
ast.type == :int || ast.type == :float ast.type == :int || ast.type == :float
end end
# @param [AST::Node] ast
# @return [TrueClass|FalseClass]
def string?(ast)
ast.type == :string
end
# @param [Symbol] name
# @return [Oga::Ruby::Node]
def catch_message(name)
send_message('catch', symbol(name)).add_block do
# Ensure that the "catch" only returns a value when "throw" is
# actually invoked.
yield.followed_by(literal('nil'))
end
end
# @param [Symbol] name
# @param [Array] args
# @return [Oga::Ruby::Node]
def throw_message(name, *args)
send_message('throw', symbol(name), *args)
end
# @param [AST::Node] ast # @param [AST::Node] ast
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
def return_nodeset?(ast) def return_nodeset?(ast)

View File

@ -0,0 +1,83 @@
module Oga
module XPath
##
# Module for converting XPath objects such as NodeSets.
#
module Conversion
##
# Converts both arguments to a type that can be compared using ==.
#
# @return [Array]
#
def self.to_compatible_types(left, right)
if left.is_a?(XML::NodeSet)
left = to_string(left)
end
if right.is_a?(XML::NodeSet)
right = to_string(right)
end
if left.is_a?(Numeric) and !right.is_a?(Numeric)
right = to_float(right)
end
if left.is_a?(String) and !right.is_a?(String)
right = to_string(right)
end
if boolean?(left) and !boolean?(right)
right = to_boolean(right)
end
[left, right]
end
# @return [String]
def self.to_string(value)
# If we have a number that has a zero decimal (e.g. 10.0) we want to
# get rid of that decimal. For this we'll first convert the number to
# an integer.
if value.is_a?(Float) and value.modulo(1).zero?
value = value.to_i
end
if value.is_a?(XML::NodeSet)
first = value.first
value = first.respond_to?(:text) ? first.text : ''
end
if value.respond_to?(:text)
value = value.text
end
value.to_s
end
# @return [Float]
def self.to_float(value)
Float(value) rescue Float::NAN
end
# @return [TrueClass|FalseClass]
def self.to_boolean(value)
bool = false
if value.is_a?(Float)
bool = !value.nan? && !value.zero?
elsif value.is_a?(Fixnum)
bool = !value.zero?
elsif value.respond_to?(:empty?)
bool = !value.empty?
end
bool
end
# @return [TrueClass|FalseClass]
def self.boolean?(value)
value.is_a?(TrueClass) || value.is_a?(FalseClass)
end
end # Conversion
end # XPath
end # Oga

View File

@ -10,6 +10,14 @@ describe Oga::XPath::Compiler do
evaluate_xpath(@document, '10 = 10').should == true evaluate_xpath(@document, '10 = 10').should == true
end end
it 'returns true if two numbers and 1 are equal' do
evaluate_xpath(@document, '10 = 10 = 1').should == true
end
it 'returns false if two numbers and 0 are not equal' do
evaluate_xpath(@document, '10 = 10 = 0').should == false
end
it 'returns false if two numbers are not equal' do it 'returns false if two numbers are not equal' do
evaluate_xpath(@document, '10 = 15').should == false evaluate_xpath(@document, '10 = 15').should == false
end end
@ -34,6 +42,14 @@ describe Oga::XPath::Compiler do
evaluate_xpath(@document, 'root/a = root/b').should == true evaluate_xpath(@document, 'root/a = root/b').should == true
end end
it 'returns true if two node sets and 1 are equal' do
evaluate_xpath(@document, 'root/a = root/b = 1').should == true
end
it 'returns false if two node sets and 0 are not equal' do
evaluate_xpath(@document, 'root/a = root/b = 0').should == false
end
it 'returns false if two node sets are not equal' do it 'returns false if two node sets are not equal' do
evaluate_xpath(@document, 'root/a = root/c').should == false evaluate_xpath(@document, 'root/a = root/c').should == false
end end
@ -57,5 +73,11 @@ describe Oga::XPath::Compiler do
it 'returns true if an attribute and string are equal' do it 'returns true if an attribute and string are equal' do
evaluate_xpath(@document, 'root/b/@class = "foo"').should == true evaluate_xpath(@document, 'root/b/@class = "foo"').should == true
end end
it 'returns true if an axis and a string are equal' do
element = @document.at_xpath('root/b')
evaluate_xpath(element, '@class = "foo"').should == true
end
end end
end end

View File

@ -0,0 +1,180 @@
require 'spec_helper'
describe Oga::XPath::Conversion do
describe 'to_compatible_types' do
it 'returns two Strings when using two NodeSets' do
set1 = node_set(Oga::XML::Text.new(:text => 'foo'))
set2 = node_set(Oga::XML::Text.new(:text => 'bar'))
left, right = described_class.to_compatible_types(set1, set2)
left.should == 'foo'
right.should == 'bar'
end
it 'returns two Strings when using a NodeSet and Float' do
set = node_set(Oga::XML::Text.new(:text => 'foo'))
left, right = described_class.to_compatible_types(set, 10.5)
left.should == 'foo'
right.should == '10.5'
end
it 'returns two Floats when using a Float and NodeSet' do
set = node_set(Oga::XML::Text.new(:text => '20'))
left, right = described_class.to_compatible_types(10.5, set)
left.should == 10.5
right.should == 20.0
end
it 'returns two Strings when using a String and a Float' do
left, right = described_class.to_compatible_types('foo', 10.5)
left.should == 'foo'
right.should == '10.5'
end
it 'returns two booleans when using a boolean and a non-zero Fixnum' do
left, right = described_class.to_compatible_types(true, 10)
left.should == true
right.should == true
end
it 'returns two booleans when using a boolean and 0' do
left, right = described_class.to_compatible_types(true, 0)
left.should == true
right.should == false
end
it 'returns two booleans when using a boolean and a negative Fixnum' do
left, right = described_class.to_compatible_types(true, -5)
left.should == true
right.should == true
end
it 'returns two booleans when using a boolean and a non-empty NodeSet' do
set = node_set(Oga::XML::Text.new(:text => '20'))
left, right = described_class.to_compatible_types(true, set)
left.should == true
right.should == true
end
it 'returns two booleans when using a boolean and an empty NodeSet' do
set = node_set
left, right = described_class.to_compatible_types(true, set)
left.should == true
right.should == false
end
end
describe 'to_string' do
describe 'using a Float' do
it 'converts 10.0 to a String' do
described_class.to_string(10.0).should == '10'
end
it 'converts 10.5 to a String' do
described_class.to_string(10.5).should == '10.5'
end
end
describe 'using a Node' do
it 'converts an Element to a String' do
node = Oga::XML::Element.new(:name => 'p')
node.inner_text = 'foo'
described_class.to_string(node).should == 'foo'
end
it 'converts a Text to a String' do
node = Oga::XML::Text.new(:text => 'foo')
described_class.to_string(node).should == 'foo'
end
end
describe 'using a NodeSet' do
it 'returns the text of the first node' do
node1 = Oga::XML::Text.new(:text => 'foo')
node2 = Oga::XML::Text.new(:text => 'bar')
set = node_set(node1, node2)
described_class.to_string(set).should == 'foo'
end
it 'returns an empty String for an empty NodeSet' do
described_class.to_string(node_set).should == ''
end
end
describe 'using a Fixnum' do
it 'converts 10 to a String' do
described_class.to_string(10).should == '10'
end
end
end
describe 'to_float' do
it 'returns a Float for a valid value' do
described_class.to_float('10.5').should == 10.5
end
it 'returns Float::NAN for an invalid value' do
described_class.to_float('foo').nan?.should == true
end
end
describe 'to_boolean' do
it 'returns true for a non-empty String' do
described_class.to_boolean('foo').should == true
end
it 'returns false for an empty String' do
described_class.to_boolean('').should == false
end
it 'returns true for a positive Fixnum' do
described_class.to_boolean(10).should == true
end
it 'returns true for a positive Float' do
described_class.to_boolean(10.0).should == true
end
it 'returns true for a negative Fixnum' do
described_class.to_boolean(-10).should == true
end
it 'returns true for a negative Float' do
described_class.to_boolean(-10.0).should == true
end
it 'returns false for 0' do
described_class.to_boolean(0).should == false
end
it 'returns false for 0.0' do
described_class.to_boolean(0.0).should == false
end
it 'returns true for a non-empty NodeSet' do
set = node_set(Oga::XML::Node.new)
described_class.to_boolean(set).should == true
end
it 'returns false for an empty NodeSet' do
described_class.to_boolean(node_set).should == false
end
end
end