Improved NodeSet performance by a factor of 50
This change is broken up in to two parts: 1. Using a Hash to track if a node is already in a NodeSet 2. Only calling take_ownership when an owner is set == Using a Hash Previously various methods such as NodeSet#push and NodeSet#unshift would call Array#include? (on the internal "nodes" Array) to see if a node is already present in the set. This is quite problematic performance wise, especially for large NodeSets. In fact, for the attached benchmark the vast majority of the time was spent in Array#include? calls. Because a NodeSet demands ordering of nodes and must be able to access them by index (something Set can't do without relying on Enumerable), a Hash is used to separately keep track of what nodes are in a NodeSet. This means that checking the presence of a node is simply a matter of checking a Hash key's presence. == Calling take_ownership The if-check for the "owner" variable has been moved out of the "take_ownership" method and into the methods that call "take_ownership". This ensures the method isn't called in the first place if no owner is present, at the cost of slightly more code repetition. The same applies to the "remove_ownership" method. == Conclusion The combined result is a speedup of about 50x when running the attached concurrent_time_bench.rb benchmark.
This commit is contained in:
parent
77c6f3af2a
commit
4f94d03a85
|
@ -0,0 +1,51 @@
|
|||
require_relative '../../benchmark_helper'
|
||||
require 'thread'
|
||||
|
||||
puts 'Preparing...'
|
||||
|
||||
xml = read_big_kaf
|
||||
xpath = 'KAF/terms/term'
|
||||
xpath_ast = Oga::XPath::Parser.new(xpath).parse
|
||||
output = Queue.new
|
||||
|
||||
stop = false
|
||||
threads = []
|
||||
|
||||
thread_count = ENV['THREADS'] ? ENV['THREADS'].to_i : 5
|
||||
|
||||
trap 'INT' do
|
||||
stop = true
|
||||
end
|
||||
|
||||
# Parse these outside of the profiler
|
||||
documents = thread_count.times.map { Oga.parse_xml(xml) }
|
||||
|
||||
puts 'Starting threads...'
|
||||
|
||||
require 'profile' if ENV['PROFILE']
|
||||
|
||||
thread_count.times.each do
|
||||
threads << Thread.new do
|
||||
oga_doc = documents.pop
|
||||
evaluator = Oga::XPath::Evaluator.new(oga_doc)
|
||||
|
||||
10.times do
|
||||
break if stop
|
||||
|
||||
output << Benchmark.measure { evaluator.evaluate_ast(xpath_ast) }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
threads.each(&:join)
|
||||
|
||||
samples = []
|
||||
|
||||
until output.empty?
|
||||
samples << output.pop.real
|
||||
end
|
||||
|
||||
average = samples.inject(:+) / samples.length
|
||||
|
||||
puts "Samples: #{samples.length}"
|
||||
puts "Average: #{average.round(4)} seconds"
|
|
@ -44,8 +44,13 @@ module Oga
|
|||
def initialize(nodes = [], owner = nil)
|
||||
@nodes = nodes
|
||||
@owner = owner
|
||||
@existing = {}
|
||||
|
||||
@nodes.each { |node| take_ownership(node) } if owner
|
||||
@nodes.each do |node|
|
||||
mark_existing(node)
|
||||
|
||||
take_ownership(node) if @owner
|
||||
end
|
||||
end
|
||||
|
||||
##
|
||||
|
@ -103,11 +108,13 @@ module Oga
|
|||
# @param [Oga::XML::Node] node
|
||||
#
|
||||
def push(node)
|
||||
return if @nodes.include?(node)
|
||||
return if exists?(node)
|
||||
|
||||
@nodes << node
|
||||
|
||||
take_ownership(node)
|
||||
mark_existing(node)
|
||||
|
||||
take_ownership(node) if @owner
|
||||
end
|
||||
|
||||
alias_method :<<, :push
|
||||
|
@ -118,11 +125,13 @@ module Oga
|
|||
# @param [Oga::XML::Node] node
|
||||
#
|
||||
def unshift(node)
|
||||
return if @nodes.include?(node)
|
||||
return if exists?(node)
|
||||
|
||||
@nodes.unshift(node)
|
||||
|
||||
take_ownership(node)
|
||||
mark_existing(node)
|
||||
|
||||
take_ownership(node) if @owner
|
||||
end
|
||||
|
||||
##
|
||||
|
@ -133,7 +142,11 @@ module Oga
|
|||
def shift
|
||||
node = @nodes.shift
|
||||
|
||||
remove_ownership(node)
|
||||
if node
|
||||
unmark_existing(node)
|
||||
|
||||
remove_ownership(node) if @owner
|
||||
end
|
||||
|
||||
node
|
||||
end
|
||||
|
@ -146,7 +159,11 @@ module Oga
|
|||
def pop
|
||||
node = @nodes.pop
|
||||
|
||||
remove_ownership(node)
|
||||
if node
|
||||
unmark_existing(node)
|
||||
|
||||
remove_ownership(node) if @owner
|
||||
end
|
||||
|
||||
node
|
||||
end
|
||||
|
@ -158,11 +175,13 @@ module Oga
|
|||
# @param [Oga::XML::Node] node
|
||||
#
|
||||
def insert(index, node)
|
||||
return if @nodes.include?(node)
|
||||
return if exists?(node)
|
||||
|
||||
@nodes.insert(index, node)
|
||||
|
||||
take_ownership(node)
|
||||
mark_existing(node)
|
||||
|
||||
take_ownership(node) if @owner
|
||||
end
|
||||
|
||||
##
|
||||
|
@ -257,7 +276,11 @@ module Oga
|
|||
def delete(node)
|
||||
removed = @nodes.delete(node)
|
||||
|
||||
remove_ownership(removed) if removed
|
||||
if removed
|
||||
unmark_existing(removed)
|
||||
|
||||
remove_ownership(removed) if @owner
|
||||
end
|
||||
|
||||
removed
|
||||
end
|
||||
|
@ -317,7 +340,7 @@ module Oga
|
|||
# @param [Oga::XML::Node] node
|
||||
#
|
||||
def take_ownership(node)
|
||||
node.node_set = self if owner
|
||||
node.node_set = self
|
||||
end
|
||||
|
||||
##
|
||||
|
@ -328,6 +351,22 @@ module Oga
|
|||
def remove_ownership(node)
|
||||
node.node_set = nil if node.node_set == self
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Node|Oga::XML::Attribute] node
|
||||
# @return [TrueClass|FalseClass]
|
||||
def exists?(node)
|
||||
@existing.key?(node)
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Node|Oga::XML::Attribute] node
|
||||
def mark_existing(node)
|
||||
@existing[node] = true
|
||||
end
|
||||
|
||||
# @param [Oga::XML::Node|Oga::XML::Attribute] node
|
||||
def unmark_existing(node)
|
||||
@existing.delete(node)
|
||||
end
|
||||
end # NodeSet
|
||||
end # XML
|
||||
end # Oga
|
||||
|
|
Loading…
Reference in New Issue