diff --git a/benchmark/xpath/evaluator/big_xml_average_bench.rb b/benchmark/xpath/compiler/big_xml_average_bench.rb similarity index 100% rename from benchmark/xpath/evaluator/big_xml_average_bench.rb rename to benchmark/xpath/compiler/big_xml_average_bench.rb diff --git a/benchmark/xpath/evaluator/concurrent_time_bench.rb b/benchmark/xpath/compiler/concurrent_time_bench.rb similarity index 86% rename from benchmark/xpath/evaluator/concurrent_time_bench.rb rename to benchmark/xpath/compiler/concurrent_time_bench.rb index 5360397..fbeb1a0 100644 --- a/benchmark/xpath/evaluator/concurrent_time_bench.rb +++ b/benchmark/xpath/compiler/concurrent_time_bench.rb @@ -28,12 +28,13 @@ require 'profile' if ENV['PROFILE'] thread_count.times.each do threads << Thread.new do oga_doc = documents.pop - evaluator = Oga::XPath::Evaluator.new(oga_doc) + compiler = Oga::XPath::Compiler.new + block = compiler.compile(xpath_ast) sample_size.times do break if stop - output << Benchmark.measure { evaluator.evaluate_ast(xpath_ast) } + output << Benchmark.measure { block.call(oga_doc) } end end end diff --git a/benchmark/xpath/evaluator/descendant_or_self_bench.rb b/benchmark/xpath/compiler/descendant_or_self_bench.rb similarity index 100% rename from benchmark/xpath/evaluator/descendant_or_self_bench.rb rename to benchmark/xpath/compiler/descendant_or_self_bench.rb diff --git a/benchmark/xpath/evaluator/simple_bench.rb b/benchmark/xpath/compiler/simple_bench.rb similarity index 100% rename from benchmark/xpath/evaluator/simple_bench.rb rename to benchmark/xpath/compiler/simple_bench.rb diff --git a/benchmark/xpath/evaluator/node_matches_bench.rb b/benchmark/xpath/evaluator/node_matches_bench.rb deleted file mode 100644 index d271019..0000000 --- a/benchmark/xpath/evaluator/node_matches_bench.rb +++ /dev/null @@ -1,30 +0,0 @@ -require_relative '../../benchmark_helper' - -xml_node = Oga::XML::Element.new(:name => 'foo') - -name_only = AST::Node.new(:test, [nil, 'foo']) -name_star = AST::Node.new(:test, [nil, '*']) -name_ns_star = AST::Node.new(:test, ['*', 'foo']) -name_ns = AST::Node.new(:test, ['bar', 'foo']) - -evaluator = Oga::XPath::Evaluator.new(xml_node) - -Benchmark.ips do |bench| - bench.report 'name only' do - evaluator.node_matches?(xml_node, name_only) - end - - bench.report 'name wildcard' do - evaluator.node_matches?(xml_node, name_star) - end - - bench.report 'name + namespace' do - evaluator.node_matches?(xml_node, name_ns) - end - - bench.report 'namespace wildcard' do - evaluator.node_matches?(xml_node, name_ns_star) - end - - bench.compare! -end diff --git a/benchmark/xpath/parser/comparing_gems_bench.rb b/benchmark/xpath/parser/comparing_gems_bench.rb index d09963a..9f1837d 100644 --- a/benchmark/xpath/parser/comparing_gems_bench.rb +++ b/benchmark/xpath/parser/comparing_gems_bench.rb @@ -14,9 +14,6 @@ rex_doc = REXML::Document.new(xml) ox_exp = 'number/^Text' xpath_exp = 'root/number/text()' -oga_ast = Oga::XPath::Parser.new(xpath_exp).parse -evaluator = Oga::XPath::Evaluator.new(oga_doc) - Benchmark.ips do |bench| # Technically not XPath but it's the closest thing Ox provides. bench.report 'Ox' do @@ -31,12 +28,6 @@ Benchmark.ips do |bench| oga_doc.xpath(xpath_exp) end - # This is measured to see what the performance of the evaluator is _without_ - # the overhead of the lexer/parser. - bench.report 'Oga cached' do - evaluator.evaluate_ast(oga_ast) - end - bench.report 'REXML' do REXML::XPath.match(rex_doc, xpath_exp) end diff --git a/lib/oga.rb b/lib/oga.rb index af07b65..089a43a 100644 --- a/lib/oga.rb +++ b/lib/oga.rb @@ -55,7 +55,6 @@ require 'oga/ruby/generator' require 'oga/xpath/lexer' require 'oga/xpath/parser' -require 'oga/xpath/evaluator' require 'oga/xpath/compiler' require 'oga/xpath/conversion' diff --git a/lib/oga/xml/querying.rb b/lib/oga/xml/querying.rb index f580dc0..43a4d31 100644 --- a/lib/oga/xml/querying.rb +++ b/lib/oga/xml/querying.rb @@ -10,7 +10,6 @@ module Oga # # @param [String] expression The XPath expression to run. # @param [Hash] variables Variables to bind. - # @see [Oga::XPath::Evaluator#initialize] # def xpath(expression, variables = {}) ast = XPath::Parser.parse_with_cache(expression) @@ -35,7 +34,6 @@ module Oga # Evaluates the given CSS expression. # # @param [String] expression The CSS expression to run. - # @see [Oga::XPath::Evaluator#initialize] # def css(expression) ast = CSS::Parser.parse_with_cache(expression) diff --git a/lib/oga/xpath/evaluator.rb b/lib/oga/xpath/evaluator.rb deleted file mode 100644 index 38cba75..0000000 --- a/lib/oga/xpath/evaluator.rb +++ /dev/null @@ -1,1800 +0,0 @@ -module Oga - module XPath - ## - # The Evaluator class evaluates XPath expressions, either as a String or an - # AST of `AST::Node` instances. - # - # ## Thread Safety - # - # This class is not thread-safe, you can not share the same instance between - # multiple threads. This is due to the use of an internal stack (see below - # for more information). It is however perfectly fine to use multiple - # separated instances as this class does not use a thread global state. - # - # ## Node Set Stack - # - # This class uses an internal stack of XML node sets. This stack is used for - # functions that require access to the set of nodes a predicate belongs to. - # An example of such a function is `position()`. - # - # An alternative would be to pass the node sets a predicate belongs to as an - # extra argument to the various `on_*` methods. The problematic part of - # this approach is that it requires every method to take and pass along the - # argument. It's far too easy to make mistakes in such a setup and as such - # I've chosen to use an internal stack instead. - # - # See {#with_node_set} and {#current_node_set} for more information. - # - # ## Set Indices - # - # XPath node sets start at index 1 instead of index 0. In other words, if - # you want to access the first node in a set you have to use index 1, not 0. - # Certain methods such as {#on_call_last} and {#on_call_position} take care - # of converting indices from Ruby to XPath. - # - # ## Number Types - # - # The XPath specification states that all numbers produced by an expression - # should be returned as double-precision 64bit IEEE 754 floating point - # numbers. For example, the return value of `position()` should be a float - # (e.g. "1.0", not "1"). - # - # Oga takes care internally of converting numbers to integers and/or floats - # where needed. The output types however will always be floats. - # - # For more information on the specification, see - # . - # - # ## Variables - # - # The evaluator supports the binding of custom variables in the - # {#initialize} method. Variables can be bound by passing in a Hash with the - # keys set to the variable names (minus the `$` sign) and their values to - # the variable values. The keys of the variables Hash *must* be Strings. - # - # A basic example: - # - # evaluator = Evaluator.new(document, 'number' => 10) - # - # evaluator.evaluate('$number') # => 10 - # - # @api private - # - class Evaluator - # Wildcard for node names/namespace prefixes. - STAR = '*' - - ## - # @param [Oga::XML::Document|Oga::XML::Node] document - # @param [Hash] variables Hash containing variables to expose to the XPath - # expressions. - # - def initialize(document, variables = {}) - @document = document - @variables = variables - @node_sets = [] - end - - ## - # Evaluates an XPath expression as a String. - # - # @example - # evaluator = Oga::XPath::Evaluator.new(document) - # - # evaluator.evaluate('//a') - # - # @param [String] string An XPath expression as a String. - # @return [Mixed] - # - def evaluate(string) - ast = Parser.parse_with_cache(string) - - evaluate_ast(ast) - end - - ## - # Evaluates a pre-parsed XPath expression. - # - # @param [AST::Node] ast - # @return [Mixed] - # - def evaluate_ast(ast) - context = XML::NodeSet.new([@document]) - - process(ast, context) - end - - ## - # Processes an XPath node by dispatching it and the given context to a - # dedicated handler method. Handler methods are called "on_X" where "X" is - # the node type. - # - # @param [AST::Node] ast_node The XPath AST node to process. - # - # @param [Oga::XML::NodeSet] context The context (a set of nodes) to - # evaluate an expression in. - # - # @return [Oga::XML::NodeSet] - # - def process(ast_node, context) - handler = "on_#{ast_node.type}" - - send(handler, ast_node, context) - end - - ## - # Processes an absolute XPath expression such as `/foo`. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_absolute_path(ast_node, context) - if @document.respond_to?(:root_node) - context = XML::NodeSet.new([@document.root_node]) - else - context = XML::NodeSet.new([@document]) - end - - # If the expression is just "/" we'll just return the current context. - ast_node.children.empty? ? context : on_path(ast_node, context) - end - - ## - # Processes a relative XPath expression such as `foo`. - # - # Paths are evaluated using a "short-circuit" mechanism similar to Ruby's - # `&&` / `and` operator. Whenever a path results in an empty node set the - # evaluation is aborted immediately. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_path(ast_node, context) - nodes = XML::NodeSet.new - - ast_node.children.each do |test| - nodes = process(test, context) - - if nodes.empty? - break - else - context = nodes - end - end - - nodes - end - - ## - # Processes a node test. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_test(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |xml_node| - nodes << xml_node if node_matches?(xml_node, ast_node) - end - - nodes - end - - ## - # Processes a predicate. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_predicate(ast_node, context) - test, predicate = *ast_node.children - final_nodes = XML::NodeSet.new - - context.each do |context_node| - initial_nodes = process(test, XML::NodeSet.new([context_node])) - xpath_index = 1 - - initial_nodes.each do |xml_node| - retval = with_node_set(initial_nodes) do - process(predicate, XML::NodeSet.new([xml_node])) - end - - # Numeric values are used as node set indexes. - if retval.is_a?(Numeric) - final_nodes << xml_node if retval.to_i == xpath_index - - # Node sets, strings, booleans, etc - elsif retval - if retval.respond_to?(:empty?) and retval.empty? - next - end - - final_nodes << xml_node - end - - xpath_index += 1 - end - end - - final_nodes - end - - ## - # Dispatches the processing of axes to dedicated methods. This works - # similar to {#process} except the handler names are "on_axis_X" with "X" - # being the axis name. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis(ast_node, context) - name, test = *ast_node.children - - handler = name.gsub('-', '_') - - send("on_axis_#{handler}", test, context) - end - - ## - # Processes the `ancestor` axis. This axis walks through the entire - # ancestor chain until a matching node is found. - # - # Evaluation happens using a "short-circuit" mechanism. The moment a - # matching node is found it is returned immediately. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_ancestor(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |xml_node| - while has_parent?(xml_node) - xml_node = xml_node.parent - - if node_matches?(xml_node, ast_node) - nodes << xml_node - break - end - end - end - - nodes - end - - ## - # Processes the `ancestor-or-self` axis. - # - # @see [#on_axis_ancestor] - # - def on_axis_ancestor_or_self(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |xml_node| - while has_parent?(xml_node) - if node_matches?(xml_node, ast_node) - nodes << xml_node - break - end - - xml_node = xml_node.parent - end - end - - nodes - end - - ## - # Processes the `attribute` axis. The node test is performed against all - # the attributes of the nodes in the current context. - # - # Evaluation of the nodes continues until the node set has been exhausted - # (unlike some other methods which return the moment they find a matching - # node). - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_attribute(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |xml_node| - next unless xml_node.is_a?(XML::Element) - - nodes += on_test(ast_node, xml_node.attributes) - end - - nodes - end - - ## - # Evaluates the `child` axis. This axis simply takes all the child nodes - # of the current context nodes. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_child(ast_node, context) - process(ast_node, child_nodes(context)) - end - - ## - # Evaluates the `descendant` axis. This method processes child nodes until - # the very end of the tree, no "short-circuiting" mechanism is used. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_descendant(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |context_node| - context_node.each_node do |node| - nodes.concat(process(ast_node, XML::NodeSet.new([node]))) - end - end - - nodes - end - - ## - # Evaluates the `descendant-or-self` axis. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_descendant_or_self(ast_node, context) - nodes = on_test(ast_node, context) - - nodes.concat(on_axis_descendant(ast_node, context)) - - nodes - end - - ## - # Evaluates the `following` axis. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_following(ast_node, context) - nodes = XML::NodeSet.new - root = root_node(@document) - - context.each do |context_node| - check = false - - root.each_node do |doc_node| - # Skip child nodes of the current context node, compare all - # following nodes. - if doc_node == context_node - check = true - throw :skip_children - end - - next unless check - - nodes << doc_node if node_matches?(doc_node, ast_node) - end - end - - nodes - end - - ## - # Evaluates the `following-sibling` axis. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_following_sibling(ast_node, context) - nodes = XML::NodeSet.new - root = parent_node(@document) - - context.each do |context_node| - check = false - parent = has_parent?(context_node) ? context_node.parent : nil - - root.each_node do |doc_node| - # Skip child nodes of the current context node, compare all - # following nodes. - if doc_node == context_node - check = true - throw :skip_children - end - - if !check or parent != doc_node.parent - next - end - - if node_matches?(doc_node, ast_node) - nodes << doc_node - - throw :skip_children - end - end - end - - nodes - end - - ## - # Evaluates the `parent` axis. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_parent(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |context_node| - next unless has_parent?(context_node) - - parent = context_node.parent - - nodes << parent if node_matches?(parent, ast_node) - end - - nodes - end - - ## - # Evaluates the `preceding` axis. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_preceding(ast_node, context) - nodes = XML::NodeSet.new - root = root_node(@document) - - context.each do |context_node| - check = true - - root.each_node do |doc_node| - # Test everything *until* we hit the current context node. - if doc_node == context_node - break - elsif node_matches?(doc_node, ast_node) - nodes << doc_node - end - end - end - - nodes - end - - ## - # Evaluates the `preceding-sibling` axis. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_preceding_sibling(ast_node, context) - nodes = XML::NodeSet.new - root = parent_node(@document) - - context.each do |context_node| - check = true - parent = has_parent?(context_node) ? context_node.parent : nil - - root.each_node do |doc_node| - # Test everything *until* we hit the current context node. - if doc_node == context_node - break - elsif doc_node.parent == parent and node_matches?(doc_node, ast_node) - nodes << doc_node - end - end - end - - nodes - end - - ## - # Evaluates the `self` axis. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_self(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |context_node| - nodes << context_node if node_matches?(context_node, ast_node) - end - - nodes - end - - ## - # Evaluates the `namespace` axis. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_axis_namespace(ast_node, context) - nodes = XML::NodeSet.new - name = ast_node.children[1] - - context.each do |context_node| - next unless context_node.respond_to?(:available_namespaces) - - context_node.available_namespaces.each do |_, namespace| - if namespace.name == name or name == STAR - nodes << namespace - end - end - end - - nodes - end - - ## - # Dispatches node type matching to dedicated handlers. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_type_test(ast_node, context) - name, test = *ast_node.children - - handler = name.gsub('-', '_') - - send("on_type_test_#{handler}", test, context) - end - - ## - # Processes the `node` type matcher. This matcher matches all node types. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_type_test_node(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |node| - if node.is_a?(XML::Node) or node.is_a?(XML::Document) - nodes << node - end - end - - nodes - end - - ## - # Processes the `text()` type test. This matches only text nodes. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_type_test_text(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |node| - nodes << node if node.is_a?(XML::Text) - end - - nodes - end - - ## - # Processes the `comment()` type test. This matches only comment nodes. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_type_test_comment(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |node| - nodes << node if node.is_a?(XML::Comment) - end - - nodes - end - - ## - # Processes the `processing-instruction()` type test. This matches only - # processing-instruction nodes. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_type_test_processing_instruction(ast_node, context) - nodes = XML::NodeSet.new - - context.each do |node| - nodes << node if node.is_a?(XML::ProcessingInstruction) - end - - nodes - end - - ## - # Processes the pipe (`|`) operator. This operator creates a union of two - # sets. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_pipe(ast_node, context) - left, right = *ast_node.children - - process(left, context) + process(right, context) - end - - ## - # Processes the `and` operator. - # - # This operator returns true if both the left and right expression - # evaluate to `true`. If the first expression evaluates to `false` the - # right expression is ignored. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [TrueClass|FalseClass] - # - def on_and(ast_node, context) - left, right = *ast_node.children - - on_call_boolean(context, left) && on_call_boolean(context, right) - end - - ## - # Processes the `or` operator. - # - # This operator returns `true` if one of the expressions evaluates to - # true, otherwise false is returned. If the first expression evaluates to - # `true` the second expression is ignored. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [TrueClass|FalseClass] - # - def on_or(ast_node, context) - left, right = *ast_node.children - - on_call_boolean(context, left) || on_call_boolean(context, right) - end - - ## - # Processes the `+` operator. - # - # This operator converts the left and right expressions to numbers and - # adds them together. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Float] - # - def on_add(ast_node, context) - left, right = *ast_node.children - - on_call_number(context, left) + on_call_number(context, right) - end - - ## - # Processes the `div` operator. - # - # This operator converts the left and right expressions to numbers and - # divides the left number with the right number. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Float] - # - def on_div(ast_node, context) - left, right = *ast_node.children - - on_call_number(context, left) / on_call_number(context, right) - end - - ## - # Processes the `mod` operator. - # - # This operator converts the left and right expressions to numbers and - # returns the modulo of the two numbers. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Float] - # - def on_mod(ast_node, context) - left, right = *ast_node.children - - on_call_number(context, left) % on_call_number(context, right) - end - - ## - # Processes the `*` operator. - # - # This operator converts the left and right expressions to numbers and - # multiplies the left number with the right number. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Float] - # - def on_mul(ast_node, context) - left, right = *ast_node.children - - on_call_number(context, left) * on_call_number(context, right) - end - - ## - # Processes the `-` operator. - # - # This operator converts the left and right expressions to numbers and - # subtracts the right number of the left number. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Float] - # - def on_sub(ast_node, context) - left, right = *ast_node.children - - on_call_number(context, left) - on_call_number(context, right) - end - - ## - # Processes the `=` operator. - # - # This operator evaluates the expression on the left and right and returns - # `true` if they are equal. This operator can be used to compare strings, - # numbers and node sets. When using node sets the text of the set is - # compared instead of the nodes themselves. That is, nodes with different - # names but the same text are considered to be equal. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [TrueClass|FalseClass] - # - def on_eq(ast_node, context) - left = process(ast_node.children[0], context) - right = process(ast_node.children[1], context) - - if left.is_a?(XML::NodeSet) - left = first_node_text(left) - end - - if right.is_a?(XML::NodeSet) - right = first_node_text(right) - end - - if left.is_a?(Numeric) and !right.is_a?(Numeric) - right = to_float(right) - end - - if left.is_a?(String) and !right.is_a?(String) - right = to_string(right) - end - - left == right - end - - ## - # Processes the `!=` operator. - # - # This operator does the exact opposite of the `=` operator. See {#on_eq} - # for more information. - # - # @see [#on_eq] - # - def on_neq(ast_node, context) - !on_eq(ast_node, context) - end - - ## - # Processes the `<` operator. - # - # This operator converts the left and right expression to a number and - # returns `true` if the first number is lower than the second number. - # - # @param [Oga::XML::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [TrueClass|FalseClass] - # - def on_lt(ast_node, context) - left, right = *ast_node.children - - on_call_number(context, left) < on_call_number(context, right) - end - - ## - # Processes the `>` operator. - # - # This operator converts the left and right expression to a number and - # returns `true` if the first number is greater than the second number. - # - # @param [Oga::XML::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [TrueClass|FalseClass] - # - def on_gt(ast_node, context) - left, right = *ast_node.children - - on_call_number(context, left) > on_call_number(context, right) - end - - ## - # Processes the `<=` operator. - # - # This operator converts the left and right expression to a number and - # returns `true` if the first number is lower-than or equal to the second - # number. - # - # @param [Oga::XML::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [TrueClass|FalseClass] - # - def on_lte(ast_node, context) - left, right = *ast_node.children - - on_call_number(context, left) <= on_call_number(context, right) - end - - ## - # Processes the `>=` operator. - # - # This operator converts the left and right expression to a number and - # returns `true` if the first number is greater-than or equal to the - # second number. - # - # @param [Oga::XML::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [TrueClass|FalseClass] - # - def on_gte(ast_node, context) - left, right = *ast_node.children - - on_call_number(context, left) >= on_call_number(context, right) - end - - ## - # Delegates function calls to specific handlers. - # - # Handler functions take two arguments: - # - # 1. The context node set - # 2. A variable list of XPath function arguments, passed as individual - # Ruby method arguments. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Oga::XML::NodeSet] - # - def on_call(ast_node, context) - name, *args = *ast_node.children - - handler = name.gsub('-', '_') - - send("on_call_#{handler}", context, *args) - end - - ## - # Processes the `last()` function call. This function call returns the - # index of the last node in the current set. - # - # @param [Oga::XML::NodeSet] context - # @return [Float] - # - def on_call_last(context) - # XPath uses indexes 1 to N instead of 0 to N. - current_node_set.length.to_f - end - - ## - # Processes the `position()` function call. This function returns the - # position of the current node in the current node set. - # - # @param [Oga::XML::NodeSet] context - # @return [Float] - # - def on_call_position(context) - index = current_node_set.index(context.first) + 1 - - index.to_f - end - - ## - # Processes the `count()` function call. This function counts the amount - # of nodes in `expression` and returns the result as a float. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Float] - # - def on_call_count(context, expression) - retval = process(expression, context) - - unless retval.is_a?(XML::NodeSet) - raise TypeError, 'count() can only operate on NodeSet instances' - end - - retval.length.to_f - end - - ## - # Processes the `id()` function call. - # - # The XPath specification states that this function's behaviour should be - # controlled by a DTD. If a DTD were to specify that the ID attribute for - # a certain element would be "foo" then this function should use said - # attribute. - # - # Oga does not support DTD parsing/evaluation and as such always uses the - # "id" attribute. - # - # This function searches the entire document for a matching node, - # regardless of the current position. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Oga::XML::NodeSet] - # - def on_call_id(context, expression) - id = process(expression, context) - nodes = XML::NodeSet.new - - # Based on Nokogiri's/libxml behaviour it appears that when using a node - # set the text of the set is used as the ID. - id = id.is_a?(XML::NodeSet) ? id.text : id.to_s - ids = id.split(' ') - - @document.each_node do |node| - next unless node.is_a?(XML::Element) - - attr = node.attribute('id') - - if attr and ids.include?(attr.value) - nodes << node - end - end - - nodes - end - - ## - # Processes the `local-name()` function call. - # - # This function call returns the name of one of the following: - # - # * The current context node (if any) - # * The first node in the supplied node set - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Oga::XML::NodeSet] - # - def on_call_local_name(context, expression = nil) - node = function_node(context, expression) - - node.respond_to?(:name) ? node.name : '' - end - - ## - # Processes the `name()` function call. - # - # This function call is similar to `local-name()` (see - # {#on_call_local_name}) except that it includes the namespace name if - # present. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Oga::XML::NodeSet] - # - def on_call_name(context, expression = nil) - node = function_node(context, expression) - - if node.respond_to?(:name) and node.respond_to?(:namespace) - if node.namespace - return "#{node.namespace.name}:#{node.name}" - else - return node.name - end - else - return '' - end - end - - ## - # Processes the `namespace-uri()` function call. - # - # This function call returns the namespace URI of one of the following: - # - # * The current context node (if any) - # * The first node in the supplied node set - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Oga::XML::NodeSet] - # - def on_call_namespace_uri(context, expression = nil) - node = function_node(context, expression) - - if node.respond_to?(:namespace) and node.namespace - return node.namespace.uri - else - return '' - end - end - - ## - # Evaluates the `string()` function call. - # - # This function call converts the given argument *or* the current context - # node to a string. If a node set is given then only the first node is - # converted to a string. - # - # @example - # string(10) # => "10" - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [String] - # - def on_call_string(context, expression = nil) - if expression - convert = process(expression, context) - - if convert.is_a?(XML::NodeSet) - convert = convert[0] - end - else - convert = context.first - end - - if convert.respond_to?(:text) - return convert.text - else - return to_string(convert) - end - end - - ## - # Evaluates the `number()` function call. - # - # This function call converts its first argument *or* the current context - # node to a number, similar to the `string()` function. - # - # @example - # number("10") # => 10.0 - # - # @see [#on_call_string] - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Float] - # - def on_call_number(context, expression = nil) - convert = nil - - if expression - exp_retval = process(expression, context) - - if exp_retval.is_a?(XML::NodeSet) - convert = first_node_text(exp_retval) - - elsif exp_retval == true - convert = 1.0 - - elsif exp_retval == false - convert = 0.0 - - elsif exp_retval - convert = exp_retval - end - else - convert = context.first.text - end - - to_float(convert) - end - - ## - # Processes the `concat()` function call. - # - # This function call converts its arguments to strings and concatenates - # them. In case of node sets the text of the set is used. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] first - # @param [AST::Node] second - # @param [Array] rest - # - def on_call_concat(context, first, second, *rest) - args = [first, second] + rest - retval = '' - - args.each do |arg| - retval << on_call_string(context, arg) - end - - retval - end - - ## - # Processes the `starts-with()` function call. - # - # This function call returns `true` if the string in the 1st argument - # starts with the string in the 2nd argument. Node sets can also be used. - # - # @example - # starts-with("hello world", "hello") # => true - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] haystack The string to search. - # @param [AST::Node] needle The string to search for. - # @return [TrueClass|FalseClass] - # - def on_call_starts_with(context, haystack, needle) - haystack_str = on_call_string(context, haystack) - needle_str = on_call_string(context, needle) - - # https://github.com/jruby/jruby/issues/1923 - needle_str.empty? || haystack_str.start_with?(needle_str) - end - - ## - # Processes the `contains()` function call. - # - # This function call returns `true` if the string in the 1st argument - # contains the string in the 2nd argument. Node sets can also be used. - # - # @example - # contains("hello world", "o w") # => true - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] haystack The string to search. - # @param [AST::Node] needle The string to search for. - # @return [String] - # - def on_call_contains(context, haystack, needle) - haystack_str = on_call_string(context, haystack) - needle_str = on_call_string(context, needle) - - haystack_str.include?(needle_str) - end - - ## - # Processes the `substring-before()` function call. - # - # This function call returns the substring of the 1st argument that occurs - # before the string given in the 2nd argument. For example: - # - # substring-before("2014-08-25", "-") - # - # This would return "2014" as it occurs before the first "-". - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] haystack The string to search. - # @param [AST::Node] needle The string to search for. - # @return [String] - # - def on_call_substring_before(context, haystack, needle) - haystack_str = on_call_string(context, haystack) - needle_str = on_call_string(context, needle) - - before, sep, after = haystack_str.partition(needle_str) - - sep.empty? ? sep : before - end - - ## - # Processes the `substring-after()` function call. - # - # This function call returns the substring of the 1st argument that occurs - # after the string given in the 2nd argument. For example: - # - # substring-after("2014-08-25", "-") - # - # This would return "08-25" as it occurs after the first "-". - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] haystack The string to search. - # @param [AST::Node] needle The string to search for. - # @return [String] - # - def on_call_substring_after(context, haystack, needle) - haystack_str = on_call_string(context, haystack) - needle_str = on_call_string(context, needle) - - before, sep, after = haystack_str.partition(needle_str) - - sep.empty? ? sep : after - end - - ## - # Processes the `substring()` function call. - # - # This function call returns the substring of the 1st argument, starting - # at the position given in the 2nd argument. If the third argument is - # given it is used as the length for the substring, otherwise the string - # is consumed until the end. - # - # XPath string indexes start from position 1, not position 0. - # - # @example Using a literal string - # substring("foo", 2) # => "oo" - # - # @example Using a literal string with a custom length - # substring("foo", 1, 2) # => "fo" - # - # @example Using a node set - # substring(users/user/username, 5) - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] haystack - # @param [AST::Node] start - # @param [AST::Node] length - # @return [String] - # - def on_call_substring(context, haystack, start, length = nil) - haystack_str = on_call_string(context, haystack) - start_index = on_call_number(context, start).to_i - 1 - - if length - length_int = on_call_number(context, length).to_i - 1 - stop_index = start_index + length_int - else - stop_index = -1 - end - - haystack_str[start_index..stop_index] - end - - ## - # Processes the `string-length()` function. - # - # This function returns the length of the string given in the 1st argument - # *or* the current context node. If the expression is not a string it's - # converted to a string using the `string()` function. - # - # @see [#on_call_string] - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Float] - # - def on_call_string_length(context, expression = nil) - on_call_string(context, expression).length.to_f - end - - ## - # Processes the `normalize-space()` function call. - # - # This function strips the 1st argument string *or* the current context - # node of leading/trailing whitespace as well as replacing multiple - # whitespace sequences with single spaces. - # - # @example - # normalize-space(" fo o ") # => "fo o" - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [String] - # - def on_call_normalize_space(context, expression = nil) - str = on_call_string(context, expression) - - str.strip.gsub(/\s+/, ' ') - end - - ## - # Processes the `translate()` function call. - # - # This function takes the string of the 1st argument and replaces all - # characters of the 2nd argument with those specified in the 3rd argument. - # - # @example - # translate("bar", "abc", "ABC") # => "BAr" - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] input - # @param [AST::Node] find - # @param [AST::Node] replace - # @return [String] - # - def on_call_translate(context, input, find, replace) - input_str = on_call_string(context, input) - find_chars = on_call_string(context, find).chars.to_a - replace_chars = on_call_string(context, replace).chars.to_a - replaced = input_str - - find_chars.each_with_index do |char, index| - replace_with = replace_chars[index] ? replace_chars[index] : '' - replaced = replaced.gsub(char, replace_with) - end - - replaced - end - - ## - # Processes the `boolean()` function call. - # - # This function converts the 1st argument to a boolean. - # - # The boolean `true` is returned for the following: - # - # * A non empty string - # * A non empty node set - # * A non zero number, either positive or negative - # - # The boolean `false` is returned for all other cases. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [TrueClass|FalseClass] - # - def on_call_boolean(context, expression) - retval = process(expression, context) - bool = false - - if retval.is_a?(Numeric) - bool = !retval.nan? && !retval.zero? - elsif retval - bool = !retval.respond_to?(:empty?) || !retval.empty? - end - - bool - end - - ## - # Processes the `not()` function call. - # - # This function converts the 1st argument to a boolean and returns the - # opposite boolean value. For example, if the first argument results in - # `true` then this function returns `false` instead. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [TrueClass|FalseClass] - # - def on_call_not(context, expression) - !on_call_boolean(context, expression) - end - - ## - # Processes the `true()` function call. - # - # This function simply returns the boolean `true`. - # - # @param [AST::NodeSet] context - # @return [TrueClass] - # - def on_call_true(context) - true - end - - ## - # Processes the `false()` function call. - # - # This function simply returns the boolean `false`. - # - # @param [AST::NodeSet] context - # @return [FalseClass] - # - def on_call_false(context) - false - end - - ## - # Processes the `lang()` function call. - # - # This function returns `true` if the current context node is in the given - # language, `false` otherwise. - # - # The language is based on the value of the "xml:lang" attribute of either - # the context node or an ancestor node (in case the context node has no - # such attribute). - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] language - # @return [TrueClass|FalseClass] - # - def on_call_lang(context, language) - lang_str = on_call_string(context, language) - node = context.first - - while node.respond_to?(:attribute) - found = node.attribute('xml:lang') - - return found.value == lang_str if found - - node = node.parent - end - - false - end - - ## - # Processes the `sum()` function call. - # - # This function call takes a node set, converts each node to a number and - # then sums the values. - # - # As an example, take the following XML: - # - # - # 1 - # 2 - # - # - # Using the expression `sum(root/*)` the return value would be `3.0`. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Float] - # - def on_call_sum(context, expression) - nodes = process(expression, context) - sum = 0.0 - - unless nodes.is_a?(XML::NodeSet) - raise TypeError, 'sum() can only operate on NodeSet instances' - end - - nodes.each do |node| - sum += node.text.to_f - end - - sum - end - - ## - # Processes the `floor()` function call. - # - # This function call rounds the 1st argument down to the closest integer, - # and then returns that number as a float. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Float] - # - def on_call_floor(context, expression) - number = on_call_number(context, expression) - - number.nan? ? number : number.floor.to_f - end - - ## - # Processes the `ceiling()` function call. - # - # This function call rounds the 1st argument up to the closest integer, - # and then returns that number as a float. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Float] - # - def on_call_ceiling(context, expression) - number = on_call_number(context, expression) - - number.nan? ? number : number.ceil.to_f - end - - ## - # Processes the `round()` function call. - # - # This function call rounds the 1st argument to the closest integer, and - # then returns that number as a float. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Float] - # - def on_call_round(context, expression) - number = on_call_number(context, expression) - - number.nan? ? number : number.round.to_f - end - - ## - # Processes an `(int)` node. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Float] - # - def on_int(ast_node, context) - ast_node.children[0].to_f - end - - ## - # Processes an `(float)` node. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Float] - # - def on_float(ast_node, context) - ast_node.children[0] - end - - ## - # Processes a `(string)` node. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [String] - # - def on_string(ast_node, context) - ast_node.children[0] - end - - ## - # Processes a variable reference. If the variable is not defined an error - # is raised. - # - # @param [AST::Node] ast_node - # @param [Oga::XML::NodeSet] context - # @return [Mixed] - # @raise [RuntimeError] - # - def on_var(ast_node, context) - name = ast_node.children[0] - - if @variables.key?(name) - return @variables[name] - else - raise "Undefined XPath variable: #{name}" - end - end - - ## - # Returns the node for a function call. This node is either the first node - # in the supplied node set, or the first node in the current context. - # - # @param [Oga::XML::NodeSet] context - # @param [AST::Node] expression - # @return [Oga::XML::Node] - # - def function_node(context, expression = nil) - if expression - node = process(expression, context) - - if node.is_a?(XML::NodeSet) - node = node.first - else - raise TypeError, 'only node sets can be used as arguments' - end - else - node = context.first - end - - node - end - - ## - # Returns the text of the first node in the node set, or an empty string - # if the node set is empty. - # - # @param [Oga::XML::NodeSet] set - # @return [String] - # - def first_node_text(set) - set[0].respond_to?(:text) ? set[0].text : '' - end - - ## - # Returns a node set containing all the child nodes of the given set of - # nodes. - # - # @param [Oga::XML::NodeSet] nodes - # @return [Oga::XML::NodeSet] - # - def child_nodes(nodes) - children = XML::NodeSet.new - - nodes.each do |xml_node| - children.concat(xml_node.children) - end - - children - end - - ## - # Checks if a given {Oga::XML::Node} instance matches a `AST::Node` - # instance. - # - # This method can use both "test" and "type-test" nodes. In case of - # "type-test" nodes the procedure is as following: - # - # 1. Evaluate the expression - # 2. If the return value is non empty return `true`, otherwise return - # `false` - # - # For "test" nodes the procedure is as following instead: - # - # 1. Match the name - # 2. Match the namespace - # - # For both the name and namespace a wildcard (`*`) can be used. - # - # @param [Oga::XML::Node] xml_node - # @param [AST::Node] ast_node - # @return [Oga::XML::NodeSet] - # - def node_matches?(xml_node, ast_node) - ns, name = *ast_node.children - - if ast_node.type.equal?(:type_test) - return type_matches?(xml_node, ast_node) - end - - # If only the name is given and is a wildcard then we'll also want to - # match the namespace as a wildcard. - if !ns and name == STAR - ns = STAR - end - - name_matches = name_matches?(xml_node, name) - ns_matches = false - - if ns - ns_matches = namespace_matches?(xml_node, ns) - - elsif name_matches and !xml_node.namespace - ns_matches = true - end - - if !ns and !ns_matches - ns_matches = xml_node.respond_to?(:default_namespace?) && - xml_node.default_namespace? - end - - name_matches && ns_matches - end - - ## - # @param [Oga::XML::Node] xml_node - # @param [AST::Node] ast_node - # @return [TrueClass|FalseClass] - # - def type_matches?(xml_node, ast_node) - context = XML::NodeSet.new([xml_node]) - - process(ast_node, context).length > 0 - end - - ## - # Returns `true` if the name of the XML node matches the given name *or* - # matches a wildcard. - # - # @param [Oga::XML::Node] xml_node - # @param [String] name - # - def name_matches?(xml_node, name) - return false unless xml_node.respond_to?(:name) - - return true if name == STAR - - xml_node.name == name || xml_node.name.casecmp(name) == 0 - end - - ## - # Returns `true` if the namespace of the XML node matches the given - # namespace *or* matches a wildcard. - # - # @param [Oga::XML::Node] xml_node - # @param [String] ns - # - def namespace_matches?(xml_node, ns) - return false unless xml_node.respond_to?(:namespace) - - return true if ns == STAR - - xml_node.namespace && xml_node.namespace.name == ns - end - - ## - # @param [Oga::XML::Node] ast_node - # @return [TrueClass|FalseClass] - # - def has_parent?(ast_node) - ast_node.respond_to?(:parent) && !!ast_node.parent - end - - ## - # Converts the given value to a float. If the value can't be converted to - # a float NaN is returned instead. - # - # @param [Mixed] value - # @return [Float] - # - def to_float(value) - return Float(value) rescue Float::NAN - end - - ## - # Converts the given value to a string according to the XPath string - # conversion rules. - # - # @param [Mixed] value - # @return [String] - # - def to_string(value) - # If we have a number that has a zero decimal (e.g. 10.0) we want to - # get rid of that decimal. For this we'll first convert the number to - # an integer. - if value.is_a?(Float) and value.modulo(1).zero? - value = value.to_i - end - - value.to_s - end - - ## - # Stores the specified node set and yields the supplied block. The return - # value of this method is whatever the block returned. - # - # @example - # retval = with_node_set(context) do - # process(....) - # end - # - # @param [Oga::XML::NodeSet] nodes - # - def with_node_set(nodes) - @node_sets << nodes - - retval = yield - - @node_sets.pop - - retval - end - - ## - # @return [Oga::XML::NodeSet] - # - def current_node_set - @node_sets.last - end - - ## - # Returns the root node of `node`, or `node` itself if its a Document. - # - # @param [Oga::XML::Node|Oga::XML::Document] node - # @return [Oga::XML::Node|Oga::XML::Document] - # - def root_node(node) - node.respond_to?(:root_node) ? node.root_node : node - end - - ## - # Returns the parent node of `node`, or `node` itself if its a Document. - # - # @param [Oga::XML::Node|Oga::XML::Document] node - # @return [Oga::XML::Node|Oga::XML::Document] - # - def parent_node(node) - node.respond_to?(:parent) ? node.parent : node - end - end # Evaluator - end # XPath -end # Oga