From 4bdc8a3fdcc3111c1e2f7de983faaaf5bb6fffb1 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 8 Apr 2015 14:30:02 +0200 Subject: [PATCH] Don't convert entities in script/style elements In HTML the text of a script/style tag should be left untouched, no entities must be converted. Doing so would break Javascript such as the following: foo&&bar; Such code is often the result of minifiers doing their dirty business. --- lib/oga/xml/text.rb | 28 +++++++++++++++++++++------- spec/oga/xml/text_spec.rb | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/lib/oga/xml/text.rb b/lib/oga/xml/text.rb index 6d8115c..a9eb796 100644 --- a/lib/oga/xml/text.rb +++ b/lib/oga/xml/text.rb @@ -26,7 +26,7 @@ module Oga # @return [String] # def text - unless @decoded + if decode_entities? @text = EntityDecoder.try_decode(@text, html?) @decoded = true end @@ -38,15 +38,29 @@ module Oga # @see [Oga::XML::CharacterNode#to_xml] # def to_xml - node = parent - - if node.is_a?(Element) and html? \ - and Lexer::LITERAL_HTML_ELEMENTS.include?(node.name) - return super - end + return super if inside_literal_html? return Entities.encode(super) end + + private + + ## + # @return [TrueClass|FalseClass] + # + def decode_entities? + return !@decoded && !inside_literal_html? + end + + ## + # @return [TrueClass|FalseClass] + # + def inside_literal_html? + node = parent + + return node.is_a?(Element) && html? && + Lexer::LITERAL_HTML_ELEMENTS.include?(node.name) + end end # Text end # XML end # Oga diff --git a/spec/oga/xml/text_spec.rb b/spec/oga/xml/text_spec.rb index b29ef2a..95998af 100644 --- a/spec/oga/xml/text_spec.rb +++ b/spec/oga/xml/text_spec.rb @@ -85,6 +85,38 @@ describe Oga::XML::Text do node.text.should == [160].pack('U') end end + + describe 'inside an HTML script element' do + before do + @element = Oga::XML::Element.new(:name => 'script') + @document = Oga::XML::Document.new( + :type => :html, + :children => [@element] + ) + end + + it 'does not decode any entities' do + @element.inner_text = '&foo;' + + @element.inner_text.should == '&foo;' + end + end + + describe 'inside an HTML style element' do + before do + @element = Oga::XML::Element.new(:name => 'style') + @document = Oga::XML::Document.new( + :type => :html, + :children => [@element] + ) + end + + it 'does not decode any entities' do + @element.inner_text = '&foo;' + + @element.inner_text.should == '&foo;' + end + end end describe '#to_xml' do