Preserve entities that can't be decoded
Certain entities when decoded will produce a String with an invalid encoding. This commit ensures that instead of raising an EncodingError further down the line (e.g. when calling "inspect" on a document) the entities are preserved as-is. Fixes #143
This commit is contained in:
parent
76b183e7ab
commit
5bfc2d50f2
|
@ -74,14 +74,14 @@ module Oga
|
||||||
input = input.gsub(REGULAR_ENTITY, mapping)
|
input = input.gsub(REGULAR_ENTITY, mapping)
|
||||||
|
|
||||||
if input.include?(AMPERSAND)
|
if input.include?(AMPERSAND)
|
||||||
input = input.gsub(NUMERIC_CODE_POINT_ENTITY) do
|
input = input.gsub(NUMERIC_CODE_POINT_ENTITY) do |found|
|
||||||
[Integer($1, 10)].pack('U*')
|
pack_string($1, 10) || found
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if input.include?(AMPERSAND)
|
if input.include?(AMPERSAND)
|
||||||
input = input.gsub(HEX_CODE_POINT_ENTITY) do
|
input = input.gsub(HEX_CODE_POINT_ENTITY) do |found|
|
||||||
[Integer($1, 16)].pack('U*')
|
pack_string($1, 16) || found
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -104,6 +104,17 @@ module Oga
|
||||||
def self.encode_attribute(input)
|
def self.encode_attribute(input)
|
||||||
input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
|
input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
# @param [String] input
|
||||||
|
# @param [Fixnum] base
|
||||||
|
# @return [String]
|
||||||
|
def self.pack_string(input, base)
|
||||||
|
packed = [Integer(input, base)].pack('U*')
|
||||||
|
|
||||||
|
packed.valid_encoding? ? packed : nil
|
||||||
|
end
|
||||||
end # Entities
|
end # Entities
|
||||||
end # XML
|
end # XML
|
||||||
end # Oga
|
end # Oga
|
||||||
|
|
|
@ -85,6 +85,14 @@ describe Oga::XML::Entities do
|
||||||
it 'preserves entity-like letters in non-hex mode' do
|
it 'preserves entity-like letters in non-hex mode' do
|
||||||
described_class.decode('{A;').should == '{A;'
|
described_class.decode('{A;').should == '{A;'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "preserves numeric entities when they can't be decoded" do
|
||||||
|
described_class.decode('�').should == '�'
|
||||||
|
end
|
||||||
|
|
||||||
|
it "preserves hex entities when they can't be decoded" do
|
||||||
|
described_class.decode('�').should == '�'
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe 'encode' do
|
describe 'encode' do
|
||||||
|
|
Loading…
Reference in New Issue