From 4469ffc5b11586cdc2e4e89c824d3e53452f7d5e Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 24 Sep 2014 11:07:34 +0200 Subject: [PATCH] Improved HTML void element detection performance. This ensures we only call String#downcase if we can't find an all lowercased *and* all uppercased version of the element name. This in turn can save as many object allocations as there are HTML opening tags. This fixes #52. --- lib/oga/xml/html_void_elements.rb | 2 ++ lib/oga/xml/lexer.rb | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/oga/xml/html_void_elements.rb b/lib/oga/xml/html_void_elements.rb index 4196407..2efbb6f 100644 --- a/lib/oga/xml/html_void_elements.rb +++ b/lib/oga/xml/html_void_elements.rb @@ -24,5 +24,7 @@ module Oga 'track', 'wbr' ]) + + HTML_VOID_ELEMENTS.merge(HTML_VOID_ELEMENTS.map { |name| name.upcase }) end # XML end # Oga diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index 3030be3..605e75a 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -322,7 +322,12 @@ module Oga # Called on the closing `>` of the open tag of an element. # def on_element_open_end - if html? and HTML_VOID_ELEMENTS.include?(current_element.downcase) + return unless html? + + # Only downcase the name if we can't find an all lower/upper version of + # the element name. This can save us a *lot* of String allocations. + if HTML_VOID_ELEMENTS.include?(current_element) \ + or HTML_VOID_ELEMENTS.include?(current_element.downcase) add_token(:T_ELEM_END) @elements.pop end