Fixes #118 - decoding invalid entities.
Previous regular expression was too greedy in terms of matching letters from outside of A-F hex scope, and matching letters when not in hex mode.
This commit is contained in:
parent
8990a62224
commit
6fc3ef425b
|
@ -63,7 +63,7 @@ module Oga
|
||||||
#
|
#
|
||||||
# @return [Regexp]
|
# @return [Regexp]
|
||||||
#
|
#
|
||||||
CODEPOINT_ENTITY = /&#(x)?([a-zA-Z0-9]+);/
|
CODEPOINT_ENTITY = /&#(x[a-fA-F0-9]+|\d+);/
|
||||||
|
|
||||||
##
|
##
|
||||||
# @return [Regexp]
|
# @return [Regexp]
|
||||||
|
@ -90,7 +90,7 @@ module Oga
|
||||||
|
|
||||||
if input.include?(AMPERSAND)
|
if input.include?(AMPERSAND)
|
||||||
input = input.gsub(CODEPOINT_ENTITY) do |match|
|
input = input.gsub(CODEPOINT_ENTITY) do |match|
|
||||||
[$1 ? Integer($2, 16) : Integer($2, 10)].pack('U*')
|
[$1.start_with?('x') ? Integer($1[1..-1], 16) : Integer($1, 10)].pack('U*')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -73,6 +73,18 @@ describe Oga::XML::Entities do
|
||||||
it 'decodes numeric entities starting with a 0' do
|
it 'decodes numeric entities starting with a 0' do
|
||||||
described_class.decode('&').should == '&'
|
described_class.decode('&').should == '&'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'preserves entity-like tokens' do
|
||||||
|
described_class.decode('&#TAB;').should == '&#TAB;'
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'preserves entity-like hex tokens' do
|
||||||
|
described_class.decode('&#x;').should == '&#x;'
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'preserves entity-like letters in non-hex mode' do
|
||||||
|
described_class.decode('{A;').should == '{A;'
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe 'encode' do
|
describe 'encode' do
|
||||||
|
|
Loading…
Reference in New Issue