From 8a82cc3593f4bb7b195dc5d33e59954a147165f1 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Mon, 27 Jul 2015 01:00:14 +0200 Subject: [PATCH] XPath compiler support for the "=" operator --- lib/oga.rb | 1 + lib/oga/xpath/compiler.rb | 166 +++++++++++++++-- lib/oga/xpath/conversion.rb | 83 +++++++++ spec/oga/xpath/compiler/operators/eq_spec.rb | 22 +++ spec/oga/xpath/conversion_spec.rb | 180 +++++++++++++++++++ 5 files changed, 442 insertions(+), 10 deletions(-) create mode 100644 lib/oga/xpath/conversion.rb create mode 100644 spec/oga/xpath/conversion_spec.rb diff --git a/lib/oga.rb b/lib/oga.rb index 7dbfa8f..a82b723 100644 --- a/lib/oga.rb +++ b/lib/oga.rb @@ -56,6 +56,7 @@ require 'oga/xpath/lexer' require 'oga/xpath/parser' require 'oga/xpath/evaluator' require 'oga/xpath/compiler' +require 'oga/xpath/conversion' require 'oga/css/lexer' require 'oga/css/parser' diff --git a/lib/oga/xpath/compiler.rb b/lib/oga/xpath/compiler.rb index c20f310..577ea44 100644 --- a/lib/oga/xpath/compiler.rb +++ b/lib/oga/xpath/compiler.rb @@ -29,6 +29,15 @@ module Oga CACHE.get_or_set(ast) { new.compile(ast) } end + def initialize + reset + end + + # Resets the internal state. + def reset + @literal_id = 0 + end + ## # Compiles an XPath AST into a Ruby Proc. # @@ -61,6 +70,8 @@ module Oga source = generator.process(proc_ast) eval(source) + ensure + reset end ## @@ -246,17 +257,11 @@ module Oga # @return [Oga::Ruby::Node] # def on_expression_predicate(test, predicate, input) - catch_arg = symbol(:predicate_matched) - process(test, input) do |matched_test_node| - catch_block = send_message('catch', catch_arg).add_block do - inner = process(predicate, matched_test_node) do - send_message('throw', catch_arg, literal('true')) + catch_block = catch_message(:predicate_matched) do + process(predicate, matched_test_node) do + throw_message(:predicate_matched, literal('true')) end - - # Ensure that the "catch" only returns a value when "throw" is - # actually invoked. - inner.followed_by(literal('nil')) end catch_block.if_true { yield matched_test_node } @@ -273,13 +278,99 @@ module Oga name_match ? condition.and(name_match) : condition end + ## + # Processes the `=` operator. + # + # The generated code is optimized so that expressions such as `a/b = c` + # only match the first node in both arms instead of matching all available + # nodes first. Because the `=` only ever operates on the first node in a + # set we can simply ditch the rest, possibly speeding things up quite a + # bit. This only works if one of the arms is: + # + # * a path + # * an axis + # * a predicate + # + # Everything else is processed the usual (and possibly slower) way. + # + # The variables used by this operator are assigned a "begin" block + # containing the actual result. This ensures that each variable is + # assigned the result of the entire block instead of the first expression + # that occurs. + # + # For example, take the following expression: + # + # 10 = 10 = 20 + # + # Without a "begin" we'd end up with the following code (trimmed for + # readability): + # + # eq_left3 = eq_left1 = ... + # + # eq_left2 = ... + # + # eq_left1, eq_left2 = to_compatible_types(eq_left1, eq_left2) + # + # eq_left1 == eq_left2 + # + # eq_left4 = ... + # + # eq_left3 == eq_left4 + # + # This would be incorrect as the first boolean expression (`10 = 10`) + # would be ignored. By using a "begin" we instead get the following: + # + # eq_left3 = begin + # eq_left1 = ... + # + # eq_left2 = ... + # + # eq_left1, eq_left2 = to_compatible_types(eq_left1, eq_left2) + # + # eq_left1 == eq_left2 + # end + # + # eq_left4 = begin + # ... + # end + # + # eq_left3 == eq_left4 + # # @param [AST::Node] ast # @param [Oga::Ruby::Node] input # @return [Oga::Ruby::Node] + # def on_eq(ast, input) left, right = *ast - process(left, input).eq(process(right, input)) + left_var = unique_literal('eq_left') + right_var = unique_literal('eq_right') + + text_sym = symbol(:text) + conversion = literal('Conversion') + + if return_nodeset?(left) + left_ast = match_first_node(left, input) + else + left_ast = process(left, input) + end + + if return_nodeset?(right) + right_ast = match_first_node(right, input) + else + right_ast = process(right, input) + end + + initial_assign = left_var.assign(left_ast.wrap) + .followed_by(right_var.assign(right_ast.wrap)) + + compatible_assign = mass_assign( + [left_var, right_var], + conversion.to_compatible_types(left_var, right_var) + ) + + initial_assign.followed_by(compatible_assign) + .followed_by(left_var.eq(right_var)) end # @param [AST::Node] ast @@ -322,6 +413,14 @@ module Oga Ruby::Node.new(:lit, [value.to_s]) end + # @param [String] name + # @return [Oga::Ruby::Node] + def unique_literal(name) + new_id = @literal_id += 1 + + literal("#{name}#{new_id}") + end + # @param [#to_s] value # @return [Oga::Ruby::Node] def string(value) @@ -367,6 +466,21 @@ module Oga condition end + ## + # Returns an AST matching the first node of a node set. + # + # @param [Oga::Ruby::Node] ast + # @param [Oga::Ruby::Node] input + # @return [Oga::Ruby::Node] + # + def match_first_node(ast, input) + catch_message(:value) do + process(ast, input) do |node| + throw_message(:value, literal('Conversion').to_string(node)) + end + end + end + # @return [Oga::Ruby::Node] def matched_literal literal('matched') @@ -388,12 +502,44 @@ module Oga literal(ast.children[0].to_i.to_s) end + ## + # @param [Array] vars The variables to assign. + # @param [Oga::Ruby::Node] value + # @return [Oga::Ruby::Node] + # + def mass_assign(vars, value) + Ruby::Node.new(:massign, [vars, value]) + end + # @param [AST::Node] ast # @return [TrueClass|FalseClass] def number?(ast) ast.type == :int || ast.type == :float end + # @param [AST::Node] ast + # @return [TrueClass|FalseClass] + def string?(ast) + ast.type == :string + end + + # @param [Symbol] name + # @return [Oga::Ruby::Node] + def catch_message(name) + send_message('catch', symbol(name)).add_block do + # Ensure that the "catch" only returns a value when "throw" is + # actually invoked. + yield.followed_by(literal('nil')) + end + end + + # @param [Symbol] name + # @param [Array] args + # @return [Oga::Ruby::Node] + def throw_message(name, *args) + send_message('throw', symbol(name), *args) + end + # @param [AST::Node] ast # @return [TrueClass|FalseClass] def return_nodeset?(ast) diff --git a/lib/oga/xpath/conversion.rb b/lib/oga/xpath/conversion.rb new file mode 100644 index 0000000..b66168f --- /dev/null +++ b/lib/oga/xpath/conversion.rb @@ -0,0 +1,83 @@ +module Oga + module XPath + ## + # Module for converting XPath objects such as NodeSets. + # + module Conversion + ## + # Converts both arguments to a type that can be compared using ==. + # + # @return [Array] + # + def self.to_compatible_types(left, right) + if left.is_a?(XML::NodeSet) + left = to_string(left) + end + + if right.is_a?(XML::NodeSet) + right = to_string(right) + end + + if left.is_a?(Numeric) and !right.is_a?(Numeric) + right = to_float(right) + end + + if left.is_a?(String) and !right.is_a?(String) + right = to_string(right) + end + + if boolean?(left) and !boolean?(right) + right = to_boolean(right) + end + + [left, right] + end + + # @return [String] + def self.to_string(value) + # If we have a number that has a zero decimal (e.g. 10.0) we want to + # get rid of that decimal. For this we'll first convert the number to + # an integer. + if value.is_a?(Float) and value.modulo(1).zero? + value = value.to_i + end + + if value.is_a?(XML::NodeSet) + first = value.first + value = first.respond_to?(:text) ? first.text : '' + end + + if value.respond_to?(:text) + value = value.text + end + + value.to_s + end + + # @return [Float] + def self.to_float(value) + Float(value) rescue Float::NAN + end + + # @return [TrueClass|FalseClass] + def self.to_boolean(value) + bool = false + + if value.is_a?(Float) + bool = !value.nan? && !value.zero? + elsif value.is_a?(Fixnum) + bool = !value.zero? + elsif value.respond_to?(:empty?) + bool = !value.empty? + end + + bool + end + + # @return [TrueClass|FalseClass] + def self.boolean?(value) + value.is_a?(TrueClass) || value.is_a?(FalseClass) + end + end # Conversion + end # XPath +end # Oga diff --git a/spec/oga/xpath/compiler/operators/eq_spec.rb b/spec/oga/xpath/compiler/operators/eq_spec.rb index 63ce4d3..157036e 100644 --- a/spec/oga/xpath/compiler/operators/eq_spec.rb +++ b/spec/oga/xpath/compiler/operators/eq_spec.rb @@ -10,6 +10,14 @@ describe Oga::XPath::Compiler do evaluate_xpath(@document, '10 = 10').should == true end + it 'returns true if two numbers and 1 are equal' do + evaluate_xpath(@document, '10 = 10 = 1').should == true + end + + it 'returns false if two numbers and 0 are not equal' do + evaluate_xpath(@document, '10 = 10 = 0').should == false + end + it 'returns false if two numbers are not equal' do evaluate_xpath(@document, '10 = 15').should == false end @@ -34,6 +42,14 @@ describe Oga::XPath::Compiler do evaluate_xpath(@document, 'root/a = root/b').should == true end + it 'returns true if two node sets and 1 are equal' do + evaluate_xpath(@document, 'root/a = root/b = 1').should == true + end + + it 'returns false if two node sets and 0 are not equal' do + evaluate_xpath(@document, 'root/a = root/b = 0').should == false + end + it 'returns false if two node sets are not equal' do evaluate_xpath(@document, 'root/a = root/c').should == false end @@ -57,5 +73,11 @@ describe Oga::XPath::Compiler do it 'returns true if an attribute and string are equal' do evaluate_xpath(@document, 'root/b/@class = "foo"').should == true end + + it 'returns true if an axis and a string are equal' do + element = @document.at_xpath('root/b') + + evaluate_xpath(element, '@class = "foo"').should == true + end end end diff --git a/spec/oga/xpath/conversion_spec.rb b/spec/oga/xpath/conversion_spec.rb new file mode 100644 index 0000000..c8d79c8 --- /dev/null +++ b/spec/oga/xpath/conversion_spec.rb @@ -0,0 +1,180 @@ +require 'spec_helper' + +describe Oga::XPath::Conversion do + describe 'to_compatible_types' do + it 'returns two Strings when using two NodeSets' do + set1 = node_set(Oga::XML::Text.new(:text => 'foo')) + set2 = node_set(Oga::XML::Text.new(:text => 'bar')) + + left, right = described_class.to_compatible_types(set1, set2) + + left.should == 'foo' + right.should == 'bar' + end + + it 'returns two Strings when using a NodeSet and Float' do + set = node_set(Oga::XML::Text.new(:text => 'foo')) + + left, right = described_class.to_compatible_types(set, 10.5) + + left.should == 'foo' + right.should == '10.5' + end + + it 'returns two Floats when using a Float and NodeSet' do + set = node_set(Oga::XML::Text.new(:text => '20')) + + left, right = described_class.to_compatible_types(10.5, set) + + left.should == 10.5 + right.should == 20.0 + end + + it 'returns two Strings when using a String and a Float' do + left, right = described_class.to_compatible_types('foo', 10.5) + + left.should == 'foo' + right.should == '10.5' + end + + it 'returns two booleans when using a boolean and a non-zero Fixnum' do + left, right = described_class.to_compatible_types(true, 10) + + left.should == true + right.should == true + end + + it 'returns two booleans when using a boolean and 0' do + left, right = described_class.to_compatible_types(true, 0) + + left.should == true + right.should == false + end + + it 'returns two booleans when using a boolean and a negative Fixnum' do + left, right = described_class.to_compatible_types(true, -5) + + left.should == true + right.should == true + end + + it 'returns two booleans when using a boolean and a non-empty NodeSet' do + set = node_set(Oga::XML::Text.new(:text => '20')) + + left, right = described_class.to_compatible_types(true, set) + + left.should == true + right.should == true + end + + it 'returns two booleans when using a boolean and an empty NodeSet' do + set = node_set + + left, right = described_class.to_compatible_types(true, set) + + left.should == true + right.should == false + end + end + + describe 'to_string' do + describe 'using a Float' do + it 'converts 10.0 to a String' do + described_class.to_string(10.0).should == '10' + end + + it 'converts 10.5 to a String' do + described_class.to_string(10.5).should == '10.5' + end + end + + describe 'using a Node' do + it 'converts an Element to a String' do + node = Oga::XML::Element.new(:name => 'p') + node.inner_text = 'foo' + + described_class.to_string(node).should == 'foo' + end + + it 'converts a Text to a String' do + node = Oga::XML::Text.new(:text => 'foo') + + described_class.to_string(node).should == 'foo' + end + end + + describe 'using a NodeSet' do + it 'returns the text of the first node' do + node1 = Oga::XML::Text.new(:text => 'foo') + node2 = Oga::XML::Text.new(:text => 'bar') + set = node_set(node1, node2) + + described_class.to_string(set).should == 'foo' + end + + it 'returns an empty String for an empty NodeSet' do + described_class.to_string(node_set).should == '' + end + end + + describe 'using a Fixnum' do + it 'converts 10 to a String' do + described_class.to_string(10).should == '10' + end + end + end + + describe 'to_float' do + it 'returns a Float for a valid value' do + described_class.to_float('10.5').should == 10.5 + end + + it 'returns Float::NAN for an invalid value' do + described_class.to_float('foo').nan?.should == true + end + end + + describe 'to_boolean' do + it 'returns true for a non-empty String' do + described_class.to_boolean('foo').should == true + end + + it 'returns false for an empty String' do + described_class.to_boolean('').should == false + end + + it 'returns true for a positive Fixnum' do + described_class.to_boolean(10).should == true + end + + it 'returns true for a positive Float' do + described_class.to_boolean(10.0).should == true + end + + it 'returns true for a negative Fixnum' do + described_class.to_boolean(-10).should == true + end + + it 'returns true for a negative Float' do + described_class.to_boolean(-10.0).should == true + end + + it 'returns false for 0' do + described_class.to_boolean(0).should == false + end + + it 'returns false for 0.0' do + described_class.to_boolean(0.0).should == false + end + + it 'returns true for a non-empty NodeSet' do + set = node_set(Oga::XML::Node.new) + + described_class.to_boolean(set).should == true + end + + it 'returns false for an empty NodeSet' do + described_class.to_boolean(node_set).should == false + end + end +end