Splits entity decoding into two steps.
Doing decimal & hex decoding separately results in a nicer code which does not refer to string matching and slicing.
This commit is contained in:
parent
6fc3ef425b
commit
6786dde3f0
|
@ -59,11 +59,18 @@ module Oga
|
||||||
REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
|
REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
|
||||||
|
|
||||||
##
|
##
|
||||||
# Regexp for matching XML/HTML entities such as "&".
|
# Regexp for matching XML/HTML numeric entities such as "&".
|
||||||
#
|
#
|
||||||
# @return [Regexp]
|
# @return [Regexp]
|
||||||
#
|
#
|
||||||
CODEPOINT_ENTITY = /&#(x[a-fA-F0-9]+|\d+);/
|
NUMERIC_CODE_POINT_ENTITY = /&#(\d+);/
|
||||||
|
|
||||||
|
##
|
||||||
|
# Regexp for matching XML/HTML hex entities such as "<".
|
||||||
|
#
|
||||||
|
# @return [Regexp]
|
||||||
|
#
|
||||||
|
HEX_CODE_POINT_ENTITY = /&#x([a-fA-F0-9]+);/
|
||||||
|
|
||||||
##
|
##
|
||||||
# @return [Regexp]
|
# @return [Regexp]
|
||||||
|
@ -89,8 +96,14 @@ module Oga
|
||||||
input = input.gsub(REGULAR_ENTITY, mapping)
|
input = input.gsub(REGULAR_ENTITY, mapping)
|
||||||
|
|
||||||
if input.include?(AMPERSAND)
|
if input.include?(AMPERSAND)
|
||||||
input = input.gsub(CODEPOINT_ENTITY) do |match|
|
input = input.gsub(NUMERIC_CODE_POINT_ENTITY) do
|
||||||
[$1.start_with?('x') ? Integer($1[1..-1], 16) : Integer($1, 10)].pack('U*')
|
[Integer($1, 10)].pack('U*')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if input.include?(AMPERSAND)
|
||||||
|
input = input.gsub(HEX_CODE_POINT_ENTITY) do
|
||||||
|
[Integer($1, 16)].pack('U*')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue