From af7f2674af65a2dd50f6f8a138ddd9429e8533d1 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Sun, 7 Jun 2015 17:42:24 +0200 Subject: [PATCH] Decoding of entities with numbers This ensures that entities such as "½" are decoded properly. Previously this would be ignored as the regular expression used for this only matched [a-zA-Z]. This was adapted from PR #111. --- lib/oga/xml/entities.rb | 2 +- spec/oga/html/entities_spec.rb | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/oga/xml/entities.rb b/lib/oga/xml/entities.rb index eaea9d6..63cd974 100644 --- a/lib/oga/xml/entities.rb +++ b/lib/oga/xml/entities.rb @@ -42,7 +42,7 @@ module Oga # # @return [Regexp] # - REGULAR_ENTITY = /&[a-zA-Z]+;/ + REGULAR_ENTITY = /&[a-zA-Z0-9]+;/ ## # Regexp for matching XML/HTML entities such as "&". diff --git a/spec/oga/html/entities_spec.rb b/spec/oga/html/entities_spec.rb index 7ee2204..b036c0f 100644 --- a/spec/oga/html/entities_spec.rb +++ b/spec/oga/html/entities_spec.rb @@ -11,5 +11,9 @@ describe Oga::HTML::Entities do it 'decodes λ into λ' do described_class.decode('λ').should == 'λ' end + + it 'decodes ½ into ½' do + described_class.decode('½').should == '½' + end end end