Preserve entities that can't be decoded
Certain entities when decoded will produce a String with an invalid encoding. This commit ensures that instead of raising an EncodingError further down the line (e.g. when calling "inspect" on a document) the entities are preserved as-is. Fixes #143
This commit is contained in:
parent
76b183e7ab
commit
5bfc2d50f2
|
@ -74,14 +74,14 @@ module Oga
|
|||
input = input.gsub(REGULAR_ENTITY, mapping)
|
||||
|
||||
if input.include?(AMPERSAND)
|
||||
input = input.gsub(NUMERIC_CODE_POINT_ENTITY) do
|
||||
[Integer($1, 10)].pack('U*')
|
||||
input = input.gsub(NUMERIC_CODE_POINT_ENTITY) do |found|
|
||||
pack_string($1, 10) || found
|
||||
end
|
||||
end
|
||||
|
||||
if input.include?(AMPERSAND)
|
||||
input = input.gsub(HEX_CODE_POINT_ENTITY) do
|
||||
[Integer($1, 16)].pack('U*')
|
||||
input = input.gsub(HEX_CODE_POINT_ENTITY) do |found|
|
||||
pack_string($1, 16) || found
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -104,6 +104,17 @@ module Oga
|
|||
def self.encode_attribute(input)
|
||||
input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# @param [String] input
|
||||
# @param [Fixnum] base
|
||||
# @return [String]
|
||||
def self.pack_string(input, base)
|
||||
packed = [Integer(input, base)].pack('U*')
|
||||
|
||||
packed.valid_encoding? ? packed : nil
|
||||
end
|
||||
end # Entities
|
||||
end # XML
|
||||
end # Oga
|
||||
|
|
|
@ -85,6 +85,14 @@ describe Oga::XML::Entities do
|
|||
it 'preserves entity-like letters in non-hex mode' do
|
||||
described_class.decode('{A;').should == '{A;'
|
||||
end
|
||||
|
||||
it "preserves numeric entities when they can't be decoded" do
|
||||
described_class.decode('�').should == '�'
|
||||
end
|
||||
|
||||
it "preserves hex entities when they can't be decoded" do
|
||||
described_class.decode('�').should == '�'
|
||||
end
|
||||
end
|
||||
|
||||
describe 'encode' do
|
||||
|
|
Loading…
Reference in New Issue