diff --git a/lib/oga.rb b/lib/oga.rb
index a8e940f..5090321 100644
--- a/lib/oga.rb
+++ b/lib/oga.rb
@@ -3,6 +3,7 @@ gem 'racc'
require 'ast'
require 'set'
require 'stringio'
+require 'thread'
require_relative 'oga/version'
require_relative 'oga/oga'
@@ -43,6 +44,7 @@ require_relative 'oga/xml/pull_parser'
require_relative 'oga/html/parser'
require_relative 'oga/html/sax_parser'
+require_relative 'oga/html/entities'
require_relative 'oga/xpath/lexer'
require_relative 'oga/xpath/parser'
diff --git a/lib/oga/html/entities.rb b/lib/oga/html/entities.rb
new file mode 100644
index 0000000..f7fada0
--- /dev/null
+++ b/lib/oga/html/entities.rb
@@ -0,0 +1,2150 @@
+module Oga
+ module HTML
+ module Entities
+ ##
+ # Hash mapping HTML entities to their Unicode character replacements.
+ #
+ # Based on the JSON output as listed at
+ # http://www.w3.org/TR/html5/syntax.html#named-character-references
+ #
+ # @return [Hash]
+ #
+ DECODE_MAPPING = {
+ 'Á' => [193].pack('U'),
+ 'á' => [225].pack('U'),
+ 'Ă' => [258].pack('U'),
+ 'ă' => [259].pack('U'),
+ '∾' => [8766].pack('U'),
+ '∿' => [8767].pack('U'),
+ '∾̳' => [8766, 819].pack('U'),
+ 'Â' => [194].pack('U'),
+ 'â' => [226].pack('U'),
+ '´' => [180].pack('U'),
+ 'А' => [1040].pack('U'),
+ 'а' => [1072].pack('U'),
+ 'Æ' => [198].pack('U'),
+ 'æ' => [230].pack('U'),
+ '⁡' => [8289].pack('U'),
+ '𝔄' => [120068].pack('U'),
+ '𝔞' => [120094].pack('U'),
+ 'À' => [192].pack('U'),
+ 'à' => [224].pack('U'),
+ 'ℵ' => [8501].pack('U'),
+ 'ℵ' => [8501].pack('U'),
+ 'Α' => [913].pack('U'),
+ 'α' => [945].pack('U'),
+ 'Ā' => [256].pack('U'),
+ 'ā' => [257].pack('U'),
+ '⨿' => [10815].pack('U'),
+ '&' => [38].pack('U'),
+ '&' => [38].pack('U'),
+ '⩓' => [10835].pack('U'),
+ '∧' => [8743].pack('U'),
+ '⩕' => [10837].pack('U'),
+ '⩜' => [10844].pack('U'),
+ '⩘' => [10840].pack('U'),
+ '⩚' => [10842].pack('U'),
+ '∠' => [8736].pack('U'),
+ '⦤' => [10660].pack('U'),
+ '∠' => [8736].pack('U'),
+ '∡' => [8737].pack('U'),
+ '⦨' => [10664].pack('U'),
+ '⦩' => [10665].pack('U'),
+ '⦪' => [10666].pack('U'),
+ '⦫' => [10667].pack('U'),
+ '⦬' => [10668].pack('U'),
+ '⦭' => [10669].pack('U'),
+ '⦮' => [10670].pack('U'),
+ '⦯' => [10671].pack('U'),
+ '∟' => [8735].pack('U'),
+ '⊾' => [8894].pack('U'),
+ '⦝' => [10653].pack('U'),
+ '∢' => [8738].pack('U'),
+ 'Å' => [197].pack('U'),
+ '⍼' => [9084].pack('U'),
+ 'Ą' => [260].pack('U'),
+ 'ą' => [261].pack('U'),
+ '𝔸' => [120120].pack('U'),
+ '𝕒' => [120146].pack('U'),
+ '≈' => [8776].pack('U'),
+ '⩯' => [10863].pack('U'),
+ '⩰' => [10864].pack('U'),
+ '≊' => [8778].pack('U'),
+ '≋' => [8779].pack('U'),
+ ''' => [39].pack('U'),
+ '⁡' => [8289].pack('U'),
+ '≈' => [8776].pack('U'),
+ '≊' => [8778].pack('U'),
+ 'Å' => [197].pack('U'),
+ 'å' => [229].pack('U'),
+ '𝒜' => [119964].pack('U'),
+ '𝒶' => [119990].pack('U'),
+ '≔' => [8788].pack('U'),
+ '*' => [42].pack('U'),
+ '≈' => [8776].pack('U'),
+ '≍' => [8781].pack('U'),
+ 'Ã' => [195].pack('U'),
+ 'ã' => [227].pack('U'),
+ 'Ä' => [196].pack('U'),
+ 'ä' => [228].pack('U'),
+ '∳' => [8755].pack('U'),
+ '⨑' => [10769].pack('U'),
+ '≌' => [8780].pack('U'),
+ '϶' => [1014].pack('U'),
+ '‵' => [8245].pack('U'),
+ '∽' => [8765].pack('U'),
+ '⋍' => [8909].pack('U'),
+ '∖' => [8726].pack('U'),
+ '⫧' => [10983].pack('U'),
+ '⊽' => [8893].pack('U'),
+ '⌆' => [8966].pack('U'),
+ '⌅' => [8965].pack('U'),
+ '⌅' => [8965].pack('U'),
+ '⎵' => [9141].pack('U'),
+ '⎶' => [9142].pack('U'),
+ '≌' => [8780].pack('U'),
+ 'Б' => [1041].pack('U'),
+ 'б' => [1073].pack('U'),
+ '„' => [8222].pack('U'),
+ '∵' => [8757].pack('U'),
+ '∵' => [8757].pack('U'),
+ '∵' => [8757].pack('U'),
+ '⦰' => [10672].pack('U'),
+ '϶' => [1014].pack('U'),
+ 'ℬ' => [8492].pack('U'),
+ 'ℬ' => [8492].pack('U'),
+ 'Β' => [914].pack('U'),
+ 'β' => [946].pack('U'),
+ 'ℶ' => [8502].pack('U'),
+ '≬' => [8812].pack('U'),
+ '𝔅' => [120069].pack('U'),
+ '𝔟' => [120095].pack('U'),
+ '⋂' => [8898].pack('U'),
+ '◯' => [9711].pack('U'),
+ '⋃' => [8899].pack('U'),
+ '⨀' => [10752].pack('U'),
+ '⨁' => [10753].pack('U'),
+ '⨂' => [10754].pack('U'),
+ '⨆' => [10758].pack('U'),
+ '★' => [9733].pack('U'),
+ '▽' => [9661].pack('U'),
+ '△' => [9651].pack('U'),
+ '⨄' => [10756].pack('U'),
+ '⋁' => [8897].pack('U'),
+ '⋀' => [8896].pack('U'),
+ '⤍' => [10509].pack('U'),
+ '⧫' => [10731].pack('U'),
+ '▪' => [9642].pack('U'),
+ '▴' => [9652].pack('U'),
+ '▾' => [9662].pack('U'),
+ '◂' => [9666].pack('U'),
+ '▸' => [9656].pack('U'),
+ '␣' => [9251].pack('U'),
+ '▒' => [9618].pack('U'),
+ '░' => [9617].pack('U'),
+ '▓' => [9619].pack('U'),
+ '█' => [9608].pack('U'),
+ '=⃥' => [61, 8421].pack('U'),
+ '≡⃥' => [8801, 8421].pack('U'),
+ '⫭' => [10989].pack('U'),
+ '⌐' => [8976].pack('U'),
+ '𝔹' => [120121].pack('U'),
+ '𝕓' => [120147].pack('U'),
+ '⊥' => [8869].pack('U'),
+ '⊥' => [8869].pack('U'),
+ '⋈' => [8904].pack('U'),
+ '⧉' => [10697].pack('U'),
+ '╗' => [9559].pack('U'),
+ '╖' => [9558].pack('U'),
+ '╕' => [9557].pack('U'),
+ '┐' => [9488].pack('U'),
+ '╔' => [9556].pack('U'),
+ '╓' => [9555].pack('U'),
+ '╒' => [9554].pack('U'),
+ '┌' => [9484].pack('U'),
+ '═' => [9552].pack('U'),
+ '─' => [9472].pack('U'),
+ '╦' => [9574].pack('U'),
+ '╤' => [9572].pack('U'),
+ '╥' => [9573].pack('U'),
+ '┬' => [9516].pack('U'),
+ '╩' => [9577].pack('U'),
+ '╧' => [9575].pack('U'),
+ '╨' => [9576].pack('U'),
+ '┴' => [9524].pack('U'),
+ '⊟' => [8863].pack('U'),
+ '⊞' => [8862].pack('U'),
+ '⊠' => [8864].pack('U'),
+ '╝' => [9565].pack('U'),
+ '╜' => [9564].pack('U'),
+ '╛' => [9563].pack('U'),
+ '┘' => [9496].pack('U'),
+ '╚' => [9562].pack('U'),
+ '╙' => [9561].pack('U'),
+ '╘' => [9560].pack('U'),
+ '└' => [9492].pack('U'),
+ '║' => [9553].pack('U'),
+ '│' => [9474].pack('U'),
+ '╬' => [9580].pack('U'),
+ '╫' => [9579].pack('U'),
+ '╪' => [9578].pack('U'),
+ '┼' => [9532].pack('U'),
+ '╣' => [9571].pack('U'),
+ '╢' => [9570].pack('U'),
+ '╡' => [9569].pack('U'),
+ '┤' => [9508].pack('U'),
+ '╠' => [9568].pack('U'),
+ '╟' => [9567].pack('U'),
+ '╞' => [9566].pack('U'),
+ '├' => [9500].pack('U'),
+ '‵' => [8245].pack('U'),
+ '˘' => [728].pack('U'),
+ '˘' => [728].pack('U'),
+ '¦' => [166].pack('U'),
+ 'ℬ' => [8492].pack('U'),
+ '𝒷' => [119991].pack('U'),
+ '⁏' => [8271].pack('U'),
+ '∽' => [8765].pack('U'),
+ '⋍' => [8909].pack('U'),
+ '\' => [92].pack('U'),
+ '⧅' => [10693].pack('U'),
+ '⟈' => [10184].pack('U'),
+ '•' => [8226].pack('U'),
+ '•' => [8226].pack('U'),
+ '≎' => [8782].pack('U'),
+ '⪮' => [10926].pack('U'),
+ '≏' => [8783].pack('U'),
+ '≎' => [8782].pack('U'),
+ '≏' => [8783].pack('U'),
+ 'Ć' => [262].pack('U'),
+ 'ć' => [263].pack('U'),
+ '⋒' => [8914].pack('U'),
+ '∩' => [8745].pack('U'),
+ '⩄' => [10820].pack('U'),
+ '⩉' => [10825].pack('U'),
+ '⩋' => [10827].pack('U'),
+ '⩇' => [10823].pack('U'),
+ '⩀' => [10816].pack('U'),
+ 'ⅅ' => [8517].pack('U'),
+ '∩︀' => [8745, 65024].pack('U'),
+ '⁁' => [8257].pack('U'),
+ 'ˇ' => [711].pack('U'),
+ 'ℭ' => [8493].pack('U'),
+ '⩍' => [10829].pack('U'),
+ 'Č' => [268].pack('U'),
+ 'č' => [269].pack('U'),
+ 'Ç' => [199].pack('U'),
+ 'ç' => [231].pack('U'),
+ 'Ĉ' => [264].pack('U'),
+ 'ĉ' => [265].pack('U'),
+ '∰' => [8752].pack('U'),
+ '⩌' => [10828].pack('U'),
+ '⩐' => [10832].pack('U'),
+ 'Ċ' => [266].pack('U'),
+ 'ċ' => [267].pack('U'),
+ '¸' => [184].pack('U'),
+ '¸' => [184].pack('U'),
+ '⦲' => [10674].pack('U'),
+ '¢' => [162].pack('U'),
+ '·' => [183].pack('U'),
+ '·' => [183].pack('U'),
+ 'ℭ' => [8493].pack('U'),
+ '𝔠' => [120096].pack('U'),
+ 'Ч' => [1063].pack('U'),
+ 'ч' => [1095].pack('U'),
+ '✓' => [10003].pack('U'),
+ '✓' => [10003].pack('U'),
+ 'Χ' => [935].pack('U'),
+ 'χ' => [967].pack('U'),
+ '○' => [9675].pack('U'),
+ 'ˆ' => [710].pack('U'),
+ '≗' => [8791].pack('U'),
+ '↺' => [8634].pack('U'),
+ '↻' => [8635].pack('U'),
+ '⊛' => [8859].pack('U'),
+ '⊚' => [8858].pack('U'),
+ '⊝' => [8861].pack('U'),
+ '⊙' => [8857].pack('U'),
+ '®' => [174].pack('U'),
+ 'Ⓢ' => [9416].pack('U'),
+ '⊖' => [8854].pack('U'),
+ '⊕' => [8853].pack('U'),
+ '⊗' => [8855].pack('U'),
+ '⧃' => [10691].pack('U'),
+ '≗' => [8791].pack('U'),
+ '⨐' => [10768].pack('U'),
+ '⫯' => [10991].pack('U'),
+ '⧂' => [10690].pack('U'),
+ '∲' => [8754].pack('U'),
+ '”' => [8221].pack('U'),
+ '’' => [8217].pack('U'),
+ '♣' => [9827].pack('U'),
+ '♣' => [9827].pack('U'),
+ '∷' => [8759].pack('U'),
+ ':' => [58].pack('U'),
+ '⩴' => [10868].pack('U'),
+ '≔' => [8788].pack('U'),
+ '≔' => [8788].pack('U'),
+ ',' => [44].pack('U'),
+ '@' => [64].pack('U'),
+ '∁' => [8705].pack('U'),
+ '∘' => [8728].pack('U'),
+ '∁' => [8705].pack('U'),
+ 'ℂ' => [8450].pack('U'),
+ '≅' => [8773].pack('U'),
+ '⩭' => [10861].pack('U'),
+ '≡' => [8801].pack('U'),
+ '∯' => [8751].pack('U'),
+ '∮' => [8750].pack('U'),
+ '∮' => [8750].pack('U'),
+ 'ℂ' => [8450].pack('U'),
+ '𝕔' => [120148].pack('U'),
+ '∐' => [8720].pack('U'),
+ '∐' => [8720].pack('U'),
+ '©' => [169].pack('U'),
+ '©' => [169].pack('U'),
+ '℗' => [8471].pack('U'),
+ '∳' => [8755].pack('U'),
+ '↵' => [8629].pack('U'),
+ '⨯' => [10799].pack('U'),
+ '✗' => [10007].pack('U'),
+ '𝒞' => [119966].pack('U'),
+ '𝒸' => [119992].pack('U'),
+ '⫏' => [10959].pack('U'),
+ '⫑' => [10961].pack('U'),
+ '⫐' => [10960].pack('U'),
+ '⫒' => [10962].pack('U'),
+ '⋯' => [8943].pack('U'),
+ '⤸' => [10552].pack('U'),
+ '⤵' => [10549].pack('U'),
+ '⋞' => [8926].pack('U'),
+ '⋟' => [8927].pack('U'),
+ '↶' => [8630].pack('U'),
+ '⤽' => [10557].pack('U'),
+ '⋓' => [8915].pack('U'),
+ '∪' => [8746].pack('U'),
+ '⩈' => [10824].pack('U'),
+ '≍' => [8781].pack('U'),
+ '⩆' => [10822].pack('U'),
+ '⩊' => [10826].pack('U'),
+ '⊍' => [8845].pack('U'),
+ '⩅' => [10821].pack('U'),
+ '∪︀' => [8746, 65024].pack('U'),
+ '↷' => [8631].pack('U'),
+ '⤼' => [10556].pack('U'),
+ '⋞' => [8926].pack('U'),
+ '⋟' => [8927].pack('U'),
+ '⋎' => [8910].pack('U'),
+ '⋏' => [8911].pack('U'),
+ '¤' => [164].pack('U'),
+ '↶' => [8630].pack('U'),
+ '↷' => [8631].pack('U'),
+ '⋎' => [8910].pack('U'),
+ '⋏' => [8911].pack('U'),
+ '∲' => [8754].pack('U'),
+ '∱' => [8753].pack('U'),
+ '⌭' => [9005].pack('U'),
+ '‡' => [8225].pack('U'),
+ '†' => [8224].pack('U'),
+ 'ℸ' => [8504].pack('U'),
+ '↡' => [8609].pack('U'),
+ '⇓' => [8659].pack('U'),
+ '↓' => [8595].pack('U'),
+ '‐' => [8208].pack('U'),
+ '⫤' => [10980].pack('U'),
+ '⊣' => [8867].pack('U'),
+ '⤏' => [10511].pack('U'),
+ '˝' => [733].pack('U'),
+ 'Ď' => [270].pack('U'),
+ 'ď' => [271].pack('U'),
+ 'Д' => [1044].pack('U'),
+ 'д' => [1076].pack('U'),
+ 'ⅅ' => [8517].pack('U'),
+ 'ⅆ' => [8518].pack('U'),
+ '‡' => [8225].pack('U'),
+ '⇊' => [8650].pack('U'),
+ '⤑' => [10513].pack('U'),
+ '⩷' => [10871].pack('U'),
+ '°' => [176].pack('U'),
+ '∇' => [8711].pack('U'),
+ 'Δ' => [916].pack('U'),
+ 'δ' => [948].pack('U'),
+ '⦱' => [10673].pack('U'),
+ '⥿' => [10623].pack('U'),
+ '𝔇' => [120071].pack('U'),
+ '𝔡' => [120097].pack('U'),
+ '⥥' => [10597].pack('U'),
+ '⇃' => [8643].pack('U'),
+ '⇂' => [8642].pack('U'),
+ '´' => [180].pack('U'),
+ '˙' => [729].pack('U'),
+ '˝' => [733].pack('U'),
+ '`' => [96].pack('U'),
+ '˜' => [732].pack('U'),
+ '⋄' => [8900].pack('U'),
+ '⋄' => [8900].pack('U'),
+ '⋄' => [8900].pack('U'),
+ '♦' => [9830].pack('U'),
+ '♦' => [9830].pack('U'),
+ '¨' => [168].pack('U'),
+ 'ⅆ' => [8518].pack('U'),
+ 'ϝ' => [989].pack('U'),
+ '⋲' => [8946].pack('U'),
+ '÷' => [247].pack('U'),
+ '÷' => [247].pack('U'),
+ '⋇' => [8903].pack('U'),
+ '⋇' => [8903].pack('U'),
+ 'Ђ' => [1026].pack('U'),
+ 'ђ' => [1106].pack('U'),
+ '⌞' => [8990].pack('U'),
+ '⌍' => [8973].pack('U'),
+ '$' => [36].pack('U'),
+ '𝔻' => [120123].pack('U'),
+ '𝕕' => [120149].pack('U'),
+ '¨' => [168].pack('U'),
+ '˙' => [729].pack('U'),
+ '⃜' => [8412].pack('U'),
+ '≐' => [8784].pack('U'),
+ '≑' => [8785].pack('U'),
+ '≐' => [8784].pack('U'),
+ '∸' => [8760].pack('U'),
+ '∔' => [8724].pack('U'),
+ '⊡' => [8865].pack('U'),
+ '⌆' => [8966].pack('U'),
+ '∯' => [8751].pack('U'),
+ '¨' => [168].pack('U'),
+ '⇓' => [8659].pack('U'),
+ '⇐' => [8656].pack('U'),
+ '⇔' => [8660].pack('U'),
+ '⫤' => [10980].pack('U'),
+ '⟸' => [10232].pack('U'),
+ '⟺' => [10234].pack('U'),
+ '⟹' => [10233].pack('U'),
+ '⇒' => [8658].pack('U'),
+ '⊨' => [8872].pack('U'),
+ '⇑' => [8657].pack('U'),
+ '⇕' => [8661].pack('U'),
+ '∥' => [8741].pack('U'),
+ '↓' => [8595].pack('U'),
+ '⇓' => [8659].pack('U'),
+ '↓' => [8595].pack('U'),
+ '⤓' => [10515].pack('U'),
+ '⇵' => [8693].pack('U'),
+ '̑' => [785].pack('U'),
+ '⇊' => [8650].pack('U'),
+ '⇃' => [8643].pack('U'),
+ '⇂' => [8642].pack('U'),
+ '⥐' => [10576].pack('U'),
+ '⥞' => [10590].pack('U'),
+ '↽' => [8637].pack('U'),
+ '⥖' => [10582].pack('U'),
+ '⥟' => [10591].pack('U'),
+ '⇁' => [8641].pack('U'),
+ '⥗' => [10583].pack('U'),
+ '⊤' => [8868].pack('U'),
+ '↧' => [8615].pack('U'),
+ '⤐' => [10512].pack('U'),
+ '⌟' => [8991].pack('U'),
+ '⌌' => [8972].pack('U'),
+ '𝒟' => [119967].pack('U'),
+ '𝒹' => [119993].pack('U'),
+ 'Ѕ' => [1029].pack('U'),
+ 'ѕ' => [1109].pack('U'),
+ '⧶' => [10742].pack('U'),
+ 'Đ' => [272].pack('U'),
+ 'đ' => [273].pack('U'),
+ '⋱' => [8945].pack('U'),
+ '▿' => [9663].pack('U'),
+ '▾' => [9662].pack('U'),
+ '⇵' => [8693].pack('U'),
+ '⥯' => [10607].pack('U'),
+ '⦦' => [10662].pack('U'),
+ 'Џ' => [1039].pack('U'),
+ 'џ' => [1119].pack('U'),
+ '⟿' => [10239].pack('U'),
+ 'É' => [201].pack('U'),
+ 'é' => [233].pack('U'),
+ '⩮' => [10862].pack('U'),
+ 'Ě' => [282].pack('U'),
+ 'ě' => [283].pack('U'),
+ '≖' => [8790].pack('U'),
+ 'Ê' => [202].pack('U'),
+ 'ê' => [234].pack('U'),
+ '≕' => [8789].pack('U'),
+ 'Э' => [1069].pack('U'),
+ 'э' => [1101].pack('U'),
+ '⩷' => [10871].pack('U'),
+ 'Ė' => [278].pack('U'),
+ '≑' => [8785].pack('U'),
+ 'ė' => [279].pack('U'),
+ 'ⅇ' => [8519].pack('U'),
+ '≒' => [8786].pack('U'),
+ '𝔈' => [120072].pack('U'),
+ '𝔢' => [120098].pack('U'),
+ '⪚' => [10906].pack('U'),
+ 'È' => [200].pack('U'),
+ 'è' => [232].pack('U'),
+ '⪖' => [10902].pack('U'),
+ '⪘' => [10904].pack('U'),
+ '⪙' => [10905].pack('U'),
+ '∈' => [8712].pack('U'),
+ '⏧' => [9191].pack('U'),
+ 'ℓ' => [8467].pack('U'),
+ '⪕' => [10901].pack('U'),
+ '⪗' => [10903].pack('U'),
+ 'Ē' => [274].pack('U'),
+ 'ē' => [275].pack('U'),
+ '∅' => [8709].pack('U'),
+ '∅' => [8709].pack('U'),
+ '◻' => [9723].pack('U'),
+ '∅' => [8709].pack('U'),
+ '▫' => [9643].pack('U'),
+ ' ' => [8195].pack('U'),
+ ' ' => [8196].pack('U'),
+ ' ' => [8197].pack('U'),
+ 'Ŋ' => [330].pack('U'),
+ 'ŋ' => [331].pack('U'),
+ ' ' => [8194].pack('U'),
+ 'Ę' => [280].pack('U'),
+ 'ę' => [281].pack('U'),
+ '𝔼' => [120124].pack('U'),
+ '𝕖' => [120150].pack('U'),
+ '⋕' => [8917].pack('U'),
+ '⧣' => [10723].pack('U'),
+ '⩱' => [10865].pack('U'),
+ 'ε' => [949].pack('U'),
+ 'Ε' => [917].pack('U'),
+ 'ε' => [949].pack('U'),
+ 'ϵ' => [1013].pack('U'),
+ '≖' => [8790].pack('U'),
+ '≕' => [8789].pack('U'),
+ '≂' => [8770].pack('U'),
+ '⪖' => [10902].pack('U'),
+ '⪕' => [10901].pack('U'),
+ '⩵' => [10869].pack('U'),
+ '=' => [61].pack('U'),
+ '≂' => [8770].pack('U'),
+ '≟' => [8799].pack('U'),
+ '⇌' => [8652].pack('U'),
+ '≡' => [8801].pack('U'),
+ '⩸' => [10872].pack('U'),
+ '⧥' => [10725].pack('U'),
+ '⥱' => [10609].pack('U'),
+ '≓' => [8787].pack('U'),
+ 'ℰ' => [8496].pack('U'),
+ 'ℯ' => [8495].pack('U'),
+ '≐' => [8784].pack('U'),
+ '⩳' => [10867].pack('U'),
+ '≂' => [8770].pack('U'),
+ 'Η' => [919].pack('U'),
+ 'η' => [951].pack('U'),
+ 'Ð' => [208].pack('U'),
+ 'ð' => [240].pack('U'),
+ 'Ë' => [203].pack('U'),
+ 'ë' => [235].pack('U'),
+ '€' => [8364].pack('U'),
+ '!' => [33].pack('U'),
+ '∃' => [8707].pack('U'),
+ '∃' => [8707].pack('U'),
+ 'ℰ' => [8496].pack('U'),
+ 'ⅇ' => [8519].pack('U'),
+ 'ⅇ' => [8519].pack('U'),
+ '≒' => [8786].pack('U'),
+ 'Ф' => [1060].pack('U'),
+ 'ф' => [1092].pack('U'),
+ '♀' => [9792].pack('U'),
+ 'ffi' => [64259].pack('U'),
+ 'ff' => [64256].pack('U'),
+ 'ffl' => [64260].pack('U'),
+ '𝔉' => [120073].pack('U'),
+ '𝔣' => [120099].pack('U'),
+ 'fi' => [64257].pack('U'),
+ '◼' => [9724].pack('U'),
+ '▪' => [9642].pack('U'),
+ 'fj' => [102, 106].pack('U'),
+ '♭' => [9837].pack('U'),
+ 'fl' => [64258].pack('U'),
+ '▱' => [9649].pack('U'),
+ 'ƒ' => [402].pack('U'),
+ '𝔽' => [120125].pack('U'),
+ '𝕗' => [120151].pack('U'),
+ '∀' => [8704].pack('U'),
+ '∀' => [8704].pack('U'),
+ '⋔' => [8916].pack('U'),
+ '⫙' => [10969].pack('U'),
+ 'ℱ' => [8497].pack('U'),
+ '⨍' => [10765].pack('U'),
+ '½' => [189].pack('U'),
+ '⅓' => [8531].pack('U'),
+ '¼' => [188].pack('U'),
+ '⅕' => [8533].pack('U'),
+ '⅙' => [8537].pack('U'),
+ '⅛' => [8539].pack('U'),
+ '⅔' => [8532].pack('U'),
+ '⅖' => [8534].pack('U'),
+ '¾' => [190].pack('U'),
+ '⅗' => [8535].pack('U'),
+ '⅜' => [8540].pack('U'),
+ '⅘' => [8536].pack('U'),
+ '⅚' => [8538].pack('U'),
+ '⅝' => [8541].pack('U'),
+ '⅞' => [8542].pack('U'),
+ '⁄' => [8260].pack('U'),
+ '⌢' => [8994].pack('U'),
+ 'ℱ' => [8497].pack('U'),
+ '𝒻' => [119995].pack('U'),
+ 'ǵ' => [501].pack('U'),
+ 'Γ' => [915].pack('U'),
+ 'γ' => [947].pack('U'),
+ 'Ϝ' => [988].pack('U'),
+ 'ϝ' => [989].pack('U'),
+ '⪆' => [10886].pack('U'),
+ 'Ğ' => [286].pack('U'),
+ 'ğ' => [287].pack('U'),
+ 'Ģ' => [290].pack('U'),
+ 'Ĝ' => [284].pack('U'),
+ 'ĝ' => [285].pack('U'),
+ 'Г' => [1043].pack('U'),
+ 'г' => [1075].pack('U'),
+ 'Ġ' => [288].pack('U'),
+ 'ġ' => [289].pack('U'),
+ '≧' => [8807].pack('U'),
+ '≥' => [8805].pack('U'),
+ '⪌' => [10892].pack('U'),
+ '⋛' => [8923].pack('U'),
+ '≥' => [8805].pack('U'),
+ '≧' => [8807].pack('U'),
+ '⩾' => [10878].pack('U'),
+ '⩾' => [10878].pack('U'),
+ '⪩' => [10921].pack('U'),
+ '⪀' => [10880].pack('U'),
+ '⪂' => [10882].pack('U'),
+ '⪄' => [10884].pack('U'),
+ '⋛︀' => [8923, 65024].pack('U'),
+ '⪔' => [10900].pack('U'),
+ '𝔊' => [120074].pack('U'),
+ '𝔤' => [120100].pack('U'),
+ '⋙' => [8921].pack('U'),
+ '≫' => [8811].pack('U'),
+ '⋙' => [8921].pack('U'),
+ 'ℷ' => [8503].pack('U'),
+ 'Ѓ' => [1027].pack('U'),
+ 'ѓ' => [1107].pack('U'),
+ '≷' => [8823].pack('U'),
+ '⪥' => [10917].pack('U'),
+ '⪒' => [10898].pack('U'),
+ '⪤' => [10916].pack('U'),
+ '⪊' => [10890].pack('U'),
+ '⪊' => [10890].pack('U'),
+ '≩' => [8809].pack('U'),
+ '⪈' => [10888].pack('U'),
+ '⪈' => [10888].pack('U'),
+ '≩' => [8809].pack('U'),
+ '⋧' => [8935].pack('U'),
+ '𝔾' => [120126].pack('U'),
+ '𝕘' => [120152].pack('U'),
+ '`' => [96].pack('U'),
+ '≥' => [8805].pack('U'),
+ '⋛' => [8923].pack('U'),
+ '≧' => [8807].pack('U'),
+ '⪢' => [10914].pack('U'),
+ '≷' => [8823].pack('U'),
+ '⩾' => [10878].pack('U'),
+ '≳' => [8819].pack('U'),
+ '𝒢' => [119970].pack('U'),
+ 'ℊ' => [8458].pack('U'),
+ '≳' => [8819].pack('U'),
+ '⪎' => [10894].pack('U'),
+ '⪐' => [10896].pack('U'),
+ '>' => [62].pack('U'),
+ '≫' => [8811].pack('U'),
+ '>' => [62].pack('U'),
+ '⪧' => [10919].pack('U'),
+ '⩺' => [10874].pack('U'),
+ '⋗' => [8919].pack('U'),
+ '⦕' => [10645].pack('U'),
+ '⩼' => [10876].pack('U'),
+ '⪆' => [10886].pack('U'),
+ '⥸' => [10616].pack('U'),
+ '⋗' => [8919].pack('U'),
+ '⋛' => [8923].pack('U'),
+ '⪌' => [10892].pack('U'),
+ '≷' => [8823].pack('U'),
+ '≳' => [8819].pack('U'),
+ '≩︀' => [8809, 65024].pack('U'),
+ '≩︀' => [8809, 65024].pack('U'),
+ 'ˇ' => [711].pack('U'),
+ ' ' => [8202].pack('U'),
+ '½' => [189].pack('U'),
+ 'ℋ' => [8459].pack('U'),
+ 'Ъ' => [1066].pack('U'),
+ 'ъ' => [1098].pack('U'),
+ '⇔' => [8660].pack('U'),
+ '↔' => [8596].pack('U'),
+ '⥈' => [10568].pack('U'),
+ '↭' => [8621].pack('U'),
+ '^' => [94].pack('U'),
+ 'ℏ' => [8463].pack('U'),
+ 'Ĥ' => [292].pack('U'),
+ 'ĥ' => [293].pack('U'),
+ '♥' => [9829].pack('U'),
+ '♥' => [9829].pack('U'),
+ '…' => [8230].pack('U'),
+ '⊹' => [8889].pack('U'),
+ 'ℌ' => [8460].pack('U'),
+ '𝔥' => [120101].pack('U'),
+ 'ℋ' => [8459].pack('U'),
+ '⤥' => [10533].pack('U'),
+ '⤦' => [10534].pack('U'),
+ '⇿' => [8703].pack('U'),
+ '∻' => [8763].pack('U'),
+ '↩' => [8617].pack('U'),
+ '↪' => [8618].pack('U'),
+ 'ℍ' => [8461].pack('U'),
+ '𝕙' => [120153].pack('U'),
+ '―' => [8213].pack('U'),
+ '─' => [9472].pack('U'),
+ 'ℋ' => [8459].pack('U'),
+ '𝒽' => [119997].pack('U'),
+ 'ℏ' => [8463].pack('U'),
+ 'Ħ' => [294].pack('U'),
+ 'ħ' => [295].pack('U'),
+ '≎' => [8782].pack('U'),
+ '≏' => [8783].pack('U'),
+ '⁃' => [8259].pack('U'),
+ '‐' => [8208].pack('U'),
+ 'Í' => [205].pack('U'),
+ 'í' => [237].pack('U'),
+ '⁣' => [8291].pack('U'),
+ 'Î' => [206].pack('U'),
+ 'î' => [238].pack('U'),
+ 'И' => [1048].pack('U'),
+ 'и' => [1080].pack('U'),
+ 'İ' => [304].pack('U'),
+ 'Е' => [1045].pack('U'),
+ 'е' => [1077].pack('U'),
+ '¡' => [161].pack('U'),
+ '⇔' => [8660].pack('U'),
+ 'ℑ' => [8465].pack('U'),
+ '𝔦' => [120102].pack('U'),
+ 'Ì' => [204].pack('U'),
+ 'ì' => [236].pack('U'),
+ 'ⅈ' => [8520].pack('U'),
+ '⨌' => [10764].pack('U'),
+ '∭' => [8749].pack('U'),
+ '⧜' => [10716].pack('U'),
+ '℩' => [8489].pack('U'),
+ 'IJ' => [306].pack('U'),
+ 'ij' => [307].pack('U'),
+ 'ℑ' => [8465].pack('U'),
+ 'Ī' => [298].pack('U'),
+ 'ī' => [299].pack('U'),
+ 'ℑ' => [8465].pack('U'),
+ 'ⅈ' => [8520].pack('U'),
+ 'ℐ' => [8464].pack('U'),
+ 'ℑ' => [8465].pack('U'),
+ 'ı' => [305].pack('U'),
+ '⊷' => [8887].pack('U'),
+ 'Ƶ' => [437].pack('U'),
+ '⇒' => [8658].pack('U'),
+ '∈' => [8712].pack('U'),
+ '℅' => [8453].pack('U'),
+ '∞' => [8734].pack('U'),
+ '⧝' => [10717].pack('U'),
+ 'ı' => [305].pack('U'),
+ '∬' => [8748].pack('U'),
+ '∫' => [8747].pack('U'),
+ '⊺' => [8890].pack('U'),
+ 'ℤ' => [8484].pack('U'),
+ '∫' => [8747].pack('U'),
+ '⊺' => [8890].pack('U'),
+ '⋂' => [8898].pack('U'),
+ '⨗' => [10775].pack('U'),
+ '⨼' => [10812].pack('U'),
+ '⁣' => [8291].pack('U'),
+ '⁢' => [8290].pack('U'),
+ 'Ё' => [1025].pack('U'),
+ 'ё' => [1105].pack('U'),
+ 'Į' => [302].pack('U'),
+ 'į' => [303].pack('U'),
+ '𝕀' => [120128].pack('U'),
+ '𝕚' => [120154].pack('U'),
+ 'Ι' => [921].pack('U'),
+ 'ι' => [953].pack('U'),
+ '⨼' => [10812].pack('U'),
+ '¿' => [191].pack('U'),
+ 'ℐ' => [8464].pack('U'),
+ '𝒾' => [119998].pack('U'),
+ '∈' => [8712].pack('U'),
+ '⋵' => [8949].pack('U'),
+ '⋹' => [8953].pack('U'),
+ '⋴' => [8948].pack('U'),
+ '⋳' => [8947].pack('U'),
+ '∈' => [8712].pack('U'),
+ '⁢' => [8290].pack('U'),
+ 'Ĩ' => [296].pack('U'),
+ 'ĩ' => [297].pack('U'),
+ 'І' => [1030].pack('U'),
+ 'і' => [1110].pack('U'),
+ 'Ï' => [207].pack('U'),
+ 'ï' => [239].pack('U'),
+ 'Ĵ' => [308].pack('U'),
+ 'ĵ' => [309].pack('U'),
+ 'Й' => [1049].pack('U'),
+ 'й' => [1081].pack('U'),
+ '𝔍' => [120077].pack('U'),
+ '𝔧' => [120103].pack('U'),
+ 'ȷ' => [567].pack('U'),
+ '𝕁' => [120129].pack('U'),
+ '𝕛' => [120155].pack('U'),
+ '𝒥' => [119973].pack('U'),
+ '𝒿' => [119999].pack('U'),
+ 'Ј' => [1032].pack('U'),
+ 'ј' => [1112].pack('U'),
+ 'Є' => [1028].pack('U'),
+ 'є' => [1108].pack('U'),
+ 'Κ' => [922].pack('U'),
+ 'κ' => [954].pack('U'),
+ 'ϰ' => [1008].pack('U'),
+ 'Ķ' => [310].pack('U'),
+ 'ķ' => [311].pack('U'),
+ 'К' => [1050].pack('U'),
+ 'к' => [1082].pack('U'),
+ '𝔎' => [120078].pack('U'),
+ '𝔨' => [120104].pack('U'),
+ 'ĸ' => [312].pack('U'),
+ 'Х' => [1061].pack('U'),
+ 'х' => [1093].pack('U'),
+ 'Ќ' => [1036].pack('U'),
+ 'ќ' => [1116].pack('U'),
+ '𝕂' => [120130].pack('U'),
+ '𝕜' => [120156].pack('U'),
+ '𝒦' => [119974].pack('U'),
+ '𝓀' => [120000].pack('U'),
+ '⇚' => [8666].pack('U'),
+ 'Ĺ' => [313].pack('U'),
+ 'ĺ' => [314].pack('U'),
+ '⦴' => [10676].pack('U'),
+ 'ℒ' => [8466].pack('U'),
+ 'Λ' => [923].pack('U'),
+ 'λ' => [955].pack('U'),
+ '⟪' => [10218].pack('U'),
+ '〈' => [10216].pack('U'),
+ '⦑' => [10641].pack('U'),
+ '⟨' => [10216].pack('U'),
+ '⪅' => [10885].pack('U'),
+ 'ℒ' => [8466].pack('U'),
+ '«' => [171].pack('U'),
+ '↞' => [8606].pack('U'),
+ '⇐' => [8656].pack('U'),
+ '←' => [8592].pack('U'),
+ '⇤' => [8676].pack('U'),
+ '⤟' => [10527].pack('U'),
+ '⤝' => [10525].pack('U'),
+ '↩' => [8617].pack('U'),
+ '↫' => [8619].pack('U'),
+ '⤹' => [10553].pack('U'),
+ '⥳' => [10611].pack('U'),
+ '↢' => [8610].pack('U'),
+ '⪫' => [10923].pack('U'),
+ '⤛' => [10523].pack('U'),
+ '⤙' => [10521].pack('U'),
+ '⪭' => [10925].pack('U'),
+ '⪭︀' => [10925, 65024].pack('U'),
+ '⤎' => [10510].pack('U'),
+ '⤌' => [10508].pack('U'),
+ '❲' => [10098].pack('U'),
+ '{' => [123].pack('U'),
+ '[' => [91].pack('U'),
+ '⦋' => [10635].pack('U'),
+ '⦏' => [10639].pack('U'),
+ '⦍' => [10637].pack('U'),
+ 'Ľ' => [317].pack('U'),
+ 'ľ' => [318].pack('U'),
+ 'Ļ' => [315].pack('U'),
+ 'ļ' => [316].pack('U'),
+ '⌈' => [8968].pack('U'),
+ '{' => [123].pack('U'),
+ 'Л' => [1051].pack('U'),
+ 'л' => [1083].pack('U'),
+ '⤶' => [10550].pack('U'),
+ '“' => [8220].pack('U'),
+ '„' => [8222].pack('U'),
+ '⥧' => [10599].pack('U'),
+ '⥋' => [10571].pack('U'),
+ '↲' => [8626].pack('U'),
+ '≦' => [8806].pack('U'),
+ '≤' => [8804].pack('U'),
+ '⟨' => [10216].pack('U'),
+ '←' => [8592].pack('U'),
+ '⇐' => [8656].pack('U'),
+ '←' => [8592].pack('U'),
+ '⇤' => [8676].pack('U'),
+ '⇆' => [8646].pack('U'),
+ '↢' => [8610].pack('U'),
+ '⌈' => [8968].pack('U'),
+ '⟦' => [10214].pack('U'),
+ '⥡' => [10593].pack('U'),
+ '⇃' => [8643].pack('U'),
+ '⥙' => [10585].pack('U'),
+ '⌊' => [8970].pack('U'),
+ '↽' => [8637].pack('U'),
+ '↼' => [8636].pack('U'),
+ '⇇' => [8647].pack('U'),
+ '↔' => [8596].pack('U'),
+ '⇔' => [8660].pack('U'),
+ '↔' => [8596].pack('U'),
+ '⇆' => [8646].pack('U'),
+ '⇋' => [8651].pack('U'),
+ '↭' => [8621].pack('U'),
+ '⥎' => [10574].pack('U'),
+ '⊣' => [8867].pack('U'),
+ '↤' => [8612].pack('U'),
+ '⥚' => [10586].pack('U'),
+ '⋋' => [8907].pack('U'),
+ '⊲' => [8882].pack('U'),
+ '⧏' => [10703].pack('U'),
+ '⊴' => [8884].pack('U'),
+ '⥑' => [10577].pack('U'),
+ '⥠' => [10592].pack('U'),
+ '↿' => [8639].pack('U'),
+ '⥘' => [10584].pack('U'),
+ '↼' => [8636].pack('U'),
+ '⥒' => [10578].pack('U'),
+ '⪋' => [10891].pack('U'),
+ '⋚' => [8922].pack('U'),
+ '≤' => [8804].pack('U'),
+ '≦' => [8806].pack('U'),
+ '⩽' => [10877].pack('U'),
+ '⩽' => [10877].pack('U'),
+ '⪨' => [10920].pack('U'),
+ '⩿' => [10879].pack('U'),
+ '⪁' => [10881].pack('U'),
+ '⪃' => [10883].pack('U'),
+ '⋚︀' => [8922, 65024].pack('U'),
+ '⪓' => [10899].pack('U'),
+ '⪅' => [10885].pack('U'),
+ '⋖' => [8918].pack('U'),
+ '⋚' => [8922].pack('U'),
+ '⪋' => [10891].pack('U'),
+ '⋚' => [8922].pack('U'),
+ '≦' => [8806].pack('U'),
+ '≶' => [8822].pack('U'),
+ '≶' => [8822].pack('U'),
+ '⪡' => [10913].pack('U'),
+ '≲' => [8818].pack('U'),
+ '⩽' => [10877].pack('U'),
+ '≲' => [8818].pack('U'),
+ '⥼' => [10620].pack('U'),
+ '⌊' => [8970].pack('U'),
+ '𝔏' => [120079].pack('U'),
+ '𝔩' => [120105].pack('U'),
+ '≶' => [8822].pack('U'),
+ '⪑' => [10897].pack('U'),
+ '⥢' => [10594].pack('U'),
+ '↽' => [8637].pack('U'),
+ '↼' => [8636].pack('U'),
+ '⥪' => [10602].pack('U'),
+ '▄' => [9604].pack('U'),
+ 'Љ' => [1033].pack('U'),
+ 'љ' => [1113].pack('U'),
+ '⋘' => [8920].pack('U'),
+ '≪' => [8810].pack('U'),
+ '⇇' => [8647].pack('U'),
+ '⌞' => [8990].pack('U'),
+ '⇚' => [8666].pack('U'),
+ '⥫' => [10603].pack('U'),
+ '◺' => [9722].pack('U'),
+ 'Ŀ' => [319].pack('U'),
+ 'ŀ' => [320].pack('U'),
+ '⎰' => [9136].pack('U'),
+ '⎰' => [9136].pack('U'),
+ '⪉' => [10889].pack('U'),
+ '⪉' => [10889].pack('U'),
+ '≨' => [8808].pack('U'),
+ '⪇' => [10887].pack('U'),
+ '⪇' => [10887].pack('U'),
+ '≨' => [8808].pack('U'),
+ '⋦' => [8934].pack('U'),
+ '⟬' => [10220].pack('U'),
+ '⇽' => [8701].pack('U'),
+ '⟦' => [10214].pack('U'),
+ '⟵' => [10229].pack('U'),
+ '⟸' => [10232].pack('U'),
+ '⟵' => [10229].pack('U'),
+ '⟷' => [10231].pack('U'),
+ '⟺' => [10234].pack('U'),
+ '⟷' => [10231].pack('U'),
+ '⟼' => [10236].pack('U'),
+ '⟶' => [10230].pack('U'),
+ '⟹' => [10233].pack('U'),
+ '⟶' => [10230].pack('U'),
+ '↫' => [8619].pack('U'),
+ '↬' => [8620].pack('U'),
+ '⦅' => [10629].pack('U'),
+ '𝕃' => [120131].pack('U'),
+ '𝕝' => [120157].pack('U'),
+ '⨭' => [10797].pack('U'),
+ '⨴' => [10804].pack('U'),
+ '∗' => [8727].pack('U'),
+ '_' => [95].pack('U'),
+ '↙' => [8601].pack('U'),
+ '↘' => [8600].pack('U'),
+ '◊' => [9674].pack('U'),
+ '◊' => [9674].pack('U'),
+ '⧫' => [10731].pack('U'),
+ '(' => [40].pack('U'),
+ '⦓' => [10643].pack('U'),
+ '⇆' => [8646].pack('U'),
+ '⌟' => [8991].pack('U'),
+ '⇋' => [8651].pack('U'),
+ '⥭' => [10605].pack('U'),
+ '' => [8206].pack('U'),
+ '⊿' => [8895].pack('U'),
+ '‹' => [8249].pack('U'),
+ 'ℒ' => [8466].pack('U'),
+ '𝓁' => [120001].pack('U'),
+ '↰' => [8624].pack('U'),
+ '↰' => [8624].pack('U'),
+ '≲' => [8818].pack('U'),
+ '⪍' => [10893].pack('U'),
+ '⪏' => [10895].pack('U'),
+ '[' => [91].pack('U'),
+ '‘' => [8216].pack('U'),
+ '‚' => [8218].pack('U'),
+ 'Ł' => [321].pack('U'),
+ 'ł' => [322].pack('U'),
+ '<' => [60].pack('U'),
+ '≪' => [8810].pack('U'),
+ '<' => [60].pack('U'),
+ '⪦' => [10918].pack('U'),
+ '⩹' => [10873].pack('U'),
+ '⋖' => [8918].pack('U'),
+ '⋋' => [8907].pack('U'),
+ '⋉' => [8905].pack('U'),
+ '⥶' => [10614].pack('U'),
+ '⩻' => [10875].pack('U'),
+ '◃' => [9667].pack('U'),
+ '⊴' => [8884].pack('U'),
+ '◂' => [9666].pack('U'),
+ '⦖' => [10646].pack('U'),
+ '⥊' => [10570].pack('U'),
+ '⥦' => [10598].pack('U'),
+ '≨︀' => [8808, 65024].pack('U'),
+ '≨︀' => [8808, 65024].pack('U'),
+ '¯' => [175].pack('U'),
+ '♂' => [9794].pack('U'),
+ '✠' => [10016].pack('U'),
+ '✠' => [10016].pack('U'),
+ '⤅' => [10501].pack('U'),
+ '↦' => [8614].pack('U'),
+ '↦' => [8614].pack('U'),
+ '↧' => [8615].pack('U'),
+ '↤' => [8612].pack('U'),
+ '↥' => [8613].pack('U'),
+ '▮' => [9646].pack('U'),
+ '⨩' => [10793].pack('U'),
+ 'М' => [1052].pack('U'),
+ 'м' => [1084].pack('U'),
+ '—' => [8212].pack('U'),
+ '∺' => [8762].pack('U'),
+ '∡' => [8737].pack('U'),
+ ' ' => [8287].pack('U'),
+ 'ℳ' => [8499].pack('U'),
+ '𝔐' => [120080].pack('U'),
+ '𝔪' => [120106].pack('U'),
+ '℧' => [8487].pack('U'),
+ 'µ' => [181].pack('U'),
+ '∣' => [8739].pack('U'),
+ '*' => [42].pack('U'),
+ '⫰' => [10992].pack('U'),
+ '·' => [183].pack('U'),
+ '−' => [8722].pack('U'),
+ '⊟' => [8863].pack('U'),
+ '∸' => [8760].pack('U'),
+ '⨪' => [10794].pack('U'),
+ '∓' => [8723].pack('U'),
+ '⫛' => [10971].pack('U'),
+ '…' => [8230].pack('U'),
+ '∓' => [8723].pack('U'),
+ '⊧' => [8871].pack('U'),
+ '𝕄' => [120132].pack('U'),
+ '𝕞' => [120158].pack('U'),
+ '∓' => [8723].pack('U'),
+ 'ℳ' => [8499].pack('U'),
+ '𝓂' => [120002].pack('U'),
+ '∾' => [8766].pack('U'),
+ 'Μ' => [924].pack('U'),
+ 'μ' => [956].pack('U'),
+ '⊸' => [8888].pack('U'),
+ '⊸' => [8888].pack('U'),
+ '∇' => [8711].pack('U'),
+ 'Ń' => [323].pack('U'),
+ 'ń' => [324].pack('U'),
+ '∠⃒' => [8736, 8402].pack('U'),
+ '≉' => [8777].pack('U'),
+ '⩰̸' => [10864, 824].pack('U'),
+ '≋̸' => [8779, 824].pack('U'),
+ 'ʼn' => [329].pack('U'),
+ '≉' => [8777].pack('U'),
+ '♮' => [9838].pack('U'),
+ '♮' => [9838].pack('U'),
+ 'ℕ' => [8469].pack('U'),
+ ' ' => [160].pack('U'),
+ '≎̸' => [8782, 824].pack('U'),
+ '≏̸' => [8783, 824].pack('U'),
+ '⩃' => [10819].pack('U'),
+ 'Ň' => [327].pack('U'),
+ 'ň' => [328].pack('U'),
+ 'Ņ' => [325].pack('U'),
+ 'ņ' => [326].pack('U'),
+ '≇' => [8775].pack('U'),
+ '⩭̸' => [10861, 824].pack('U'),
+ '⩂' => [10818].pack('U'),
+ 'Н' => [1053].pack('U'),
+ 'н' => [1085].pack('U'),
+ '–' => [8211].pack('U'),
+ '≠' => [8800].pack('U'),
+ '⤤' => [10532].pack('U'),
+ '⇗' => [8663].pack('U'),
+ '↗' => [8599].pack('U'),
+ '↗' => [8599].pack('U'),
+ '≐̸' => [8784, 824].pack('U'),
+ '​' => [8203].pack('U'),
+ '​' => [8203].pack('U'),
+ '​' => [8203].pack('U'),
+ '​' => [8203].pack('U'),
+ '≢' => [8802].pack('U'),
+ '⤨' => [10536].pack('U'),
+ '≂̸' => [8770, 824].pack('U'),
+ '≫' => [8811].pack('U'),
+ '≪' => [8810].pack('U'),
+ '
' => [10].pack('U'),
+ '∄' => [8708].pack('U'),
+ '∄' => [8708].pack('U'),
+ '𝔑' => [120081].pack('U'),
+ '𝔫' => [120107].pack('U'),
+ '≧̸' => [8807, 824].pack('U'),
+ '≱' => [8817].pack('U'),
+ '≱' => [8817].pack('U'),
+ '≧̸' => [8807, 824].pack('U'),
+ '⩾̸' => [10878, 824].pack('U'),
+ '⩾̸' => [10878, 824].pack('U'),
+ '⋙̸' => [8921, 824].pack('U'),
+ '≵' => [8821].pack('U'),
+ '≫⃒' => [8811, 8402].pack('U'),
+ '≯' => [8815].pack('U'),
+ '≯' => [8815].pack('U'),
+ '≫̸' => [8811, 824].pack('U'),
+ '⇎' => [8654].pack('U'),
+ '↮' => [8622].pack('U'),
+ '⫲' => [10994].pack('U'),
+ '∋' => [8715].pack('U'),
+ '⋼' => [8956].pack('U'),
+ '⋺' => [8954].pack('U'),
+ '∋' => [8715].pack('U'),
+ 'Њ' => [1034].pack('U'),
+ 'њ' => [1114].pack('U'),
+ '⇍' => [8653].pack('U'),
+ '↚' => [8602].pack('U'),
+ '‥' => [8229].pack('U'),
+ '≦̸' => [8806, 824].pack('U'),
+ '≰' => [8816].pack('U'),
+ '⇍' => [8653].pack('U'),
+ '↚' => [8602].pack('U'),
+ '⇎' => [8654].pack('U'),
+ '↮' => [8622].pack('U'),
+ '≰' => [8816].pack('U'),
+ '≦̸' => [8806, 824].pack('U'),
+ '⩽̸' => [10877, 824].pack('U'),
+ '⩽̸' => [10877, 824].pack('U'),
+ '≮' => [8814].pack('U'),
+ '⋘̸' => [8920, 824].pack('U'),
+ '≴' => [8820].pack('U'),
+ '≪⃒' => [8810, 8402].pack('U'),
+ '≮' => [8814].pack('U'),
+ '⋪' => [8938].pack('U'),
+ '⋬' => [8940].pack('U'),
+ '≪̸' => [8810, 824].pack('U'),
+ '∤' => [8740].pack('U'),
+ '⁠' => [8288].pack('U'),
+ ' ' => [160].pack('U'),
+ 'ℕ' => [8469].pack('U'),
+ '𝕟' => [120159].pack('U'),
+ '⫬' => [10988].pack('U'),
+ '¬' => [172].pack('U'),
+ '≢' => [8802].pack('U'),
+ '≭' => [8813].pack('U'),
+ '∦' => [8742].pack('U'),
+ '∉' => [8713].pack('U'),
+ '≠' => [8800].pack('U'),
+ '≂̸' => [8770, 824].pack('U'),
+ '∄' => [8708].pack('U'),
+ '≯' => [8815].pack('U'),
+ '≱' => [8817].pack('U'),
+ '≧̸' => [8807, 824].pack('U'),
+ '≫̸' => [8811, 824].pack('U'),
+ '≹' => [8825].pack('U'),
+ '⩾̸' => [10878, 824].pack('U'),
+ '≵' => [8821].pack('U'),
+ '≎̸' => [8782, 824].pack('U'),
+ '≏̸' => [8783, 824].pack('U'),
+ '∉' => [8713].pack('U'),
+ '⋵̸' => [8949, 824].pack('U'),
+ '⋹̸' => [8953, 824].pack('U'),
+ '∉' => [8713].pack('U'),
+ '⋷' => [8951].pack('U'),
+ '⋶' => [8950].pack('U'),
+ '⋪' => [8938].pack('U'),
+ '⧏̸' => [10703, 824].pack('U'),
+ '⋬' => [8940].pack('U'),
+ '≮' => [8814].pack('U'),
+ '≰' => [8816].pack('U'),
+ '≸' => [8824].pack('U'),
+ '≪̸' => [8810, 824].pack('U'),
+ '⩽̸' => [10877, 824].pack('U'),
+ '≴' => [8820].pack('U'),
+ '⪢̸' => [10914, 824].pack('U'),
+ '⪡̸' => [10913, 824].pack('U'),
+ '∌' => [8716].pack('U'),
+ '∌' => [8716].pack('U'),
+ '⋾' => [8958].pack('U'),
+ '⋽' => [8957].pack('U'),
+ '⊀' => [8832].pack('U'),
+ '⪯̸' => [10927, 824].pack('U'),
+ '⋠' => [8928].pack('U'),
+ '∌' => [8716].pack('U'),
+ '⋫' => [8939].pack('U'),
+ '⧐̸' => [10704, 824].pack('U'),
+ '⋭' => [8941].pack('U'),
+ '⊏̸' => [8847, 824].pack('U'),
+ '⋢' => [8930].pack('U'),
+ '⊐̸' => [8848, 824].pack('U'),
+ '⋣' => [8931].pack('U'),
+ '⊂⃒' => [8834, 8402].pack('U'),
+ '⊈' => [8840].pack('U'),
+ '⊁' => [8833].pack('U'),
+ '⪰̸' => [10928, 824].pack('U'),
+ '⋡' => [8929].pack('U'),
+ '≿̸' => [8831, 824].pack('U'),
+ '⊃⃒' => [8835, 8402].pack('U'),
+ '⊉' => [8841].pack('U'),
+ '≁' => [8769].pack('U'),
+ '≄' => [8772].pack('U'),
+ '≇' => [8775].pack('U'),
+ '≉' => [8777].pack('U'),
+ '∤' => [8740].pack('U'),
+ '∦' => [8742].pack('U'),
+ '∦' => [8742].pack('U'),
+ '⫽⃥' => [11005, 8421].pack('U'),
+ '∂̸' => [8706, 824].pack('U'),
+ '⨔' => [10772].pack('U'),
+ '⊀' => [8832].pack('U'),
+ '⋠' => [8928].pack('U'),
+ '⪯̸' => [10927, 824].pack('U'),
+ '⊀' => [8832].pack('U'),
+ '⪯̸' => [10927, 824].pack('U'),
+ '⇏' => [8655].pack('U'),
+ '↛' => [8603].pack('U'),
+ '⤳̸' => [10547, 824].pack('U'),
+ '↝̸' => [8605, 824].pack('U'),
+ '⇏' => [8655].pack('U'),
+ '↛' => [8603].pack('U'),
+ '⋫' => [8939].pack('U'),
+ '⋭' => [8941].pack('U'),
+ '⊁' => [8833].pack('U'),
+ '⋡' => [8929].pack('U'),
+ '⪰̸' => [10928, 824].pack('U'),
+ '𝒩' => [119977].pack('U'),
+ '𝓃' => [120003].pack('U'),
+ '∤' => [8740].pack('U'),
+ '∦' => [8742].pack('U'),
+ '≁' => [8769].pack('U'),
+ '≄' => [8772].pack('U'),
+ '≄' => [8772].pack('U'),
+ '∤' => [8740].pack('U'),
+ '∦' => [8742].pack('U'),
+ '⋢' => [8930].pack('U'),
+ '⋣' => [8931].pack('U'),
+ '⊄' => [8836].pack('U'),
+ '⫅̸' => [10949, 824].pack('U'),
+ '⊈' => [8840].pack('U'),
+ '⊂⃒' => [8834, 8402].pack('U'),
+ '⊈' => [8840].pack('U'),
+ '⫅̸' => [10949, 824].pack('U'),
+ '⊁' => [8833].pack('U'),
+ '⪰̸' => [10928, 824].pack('U'),
+ '⊅' => [8837].pack('U'),
+ '⫆̸' => [10950, 824].pack('U'),
+ '⊉' => [8841].pack('U'),
+ '⊃⃒' => [8835, 8402].pack('U'),
+ '⊉' => [8841].pack('U'),
+ '⫆̸' => [10950, 824].pack('U'),
+ '≹' => [8825].pack('U'),
+ 'Ñ' => [209].pack('U'),
+ 'ñ' => [241].pack('U'),
+ '≸' => [8824].pack('U'),
+ '⋪' => [8938].pack('U'),
+ '⋬' => [8940].pack('U'),
+ '⋫' => [8939].pack('U'),
+ '⋭' => [8941].pack('U'),
+ 'Ν' => [925].pack('U'),
+ 'ν' => [957].pack('U'),
+ '#' => [35].pack('U'),
+ '№' => [8470].pack('U'),
+ ' ' => [8199].pack('U'),
+ '≍⃒' => [8781, 8402].pack('U'),
+ '⊯' => [8879].pack('U'),
+ '⊮' => [8878].pack('U'),
+ '⊭' => [8877].pack('U'),
+ '⊬' => [8876].pack('U'),
+ '≥⃒' => [8805, 8402].pack('U'),
+ '>⃒' => [62, 8402].pack('U'),
+ '⤄' => [10500].pack('U'),
+ '⧞' => [10718].pack('U'),
+ '⤂' => [10498].pack('U'),
+ '≤⃒' => [8804, 8402].pack('U'),
+ '<⃒' => [60, 8402].pack('U'),
+ '⊴⃒' => [8884, 8402].pack('U'),
+ '⤃' => [10499].pack('U'),
+ '⊵⃒' => [8885, 8402].pack('U'),
+ '∼⃒' => [8764, 8402].pack('U'),
+ '⤣' => [10531].pack('U'),
+ '⇖' => [8662].pack('U'),
+ '↖' => [8598].pack('U'),
+ '↖' => [8598].pack('U'),
+ '⤧' => [10535].pack('U'),
+ 'Ó' => [211].pack('U'),
+ 'ó' => [243].pack('U'),
+ '⊛' => [8859].pack('U'),
+ '⊚' => [8858].pack('U'),
+ 'Ô' => [212].pack('U'),
+ 'ô' => [244].pack('U'),
+ 'О' => [1054].pack('U'),
+ 'о' => [1086].pack('U'),
+ '⊝' => [8861].pack('U'),
+ 'Ő' => [336].pack('U'),
+ 'ő' => [337].pack('U'),
+ '⨸' => [10808].pack('U'),
+ '⊙' => [8857].pack('U'),
+ '⦼' => [10684].pack('U'),
+ 'Œ' => [338].pack('U'),
+ 'œ' => [339].pack('U'),
+ '⦿' => [10687].pack('U'),
+ '𝔒' => [120082].pack('U'),
+ '𝔬' => [120108].pack('U'),
+ '˛' => [731].pack('U'),
+ 'Ò' => [210].pack('U'),
+ 'ò' => [242].pack('U'),
+ '⧁' => [10689].pack('U'),
+ '⦵' => [10677].pack('U'),
+ 'Ω' => [937].pack('U'),
+ '∮' => [8750].pack('U'),
+ '↺' => [8634].pack('U'),
+ '⦾' => [10686].pack('U'),
+ '⦻' => [10683].pack('U'),
+ '‾' => [8254].pack('U'),
+ '⧀' => [10688].pack('U'),
+ 'Ō' => [332].pack('U'),
+ 'ō' => [333].pack('U'),
+ 'Ω' => [937].pack('U'),
+ 'ω' => [969].pack('U'),
+ 'Ο' => [927].pack('U'),
+ 'ο' => [959].pack('U'),
+ '⦶' => [10678].pack('U'),
+ '⊖' => [8854].pack('U'),
+ '𝕆' => [120134].pack('U'),
+ '𝕠' => [120160].pack('U'),
+ '⦷' => [10679].pack('U'),
+ '“' => [8220].pack('U'),
+ '‘' => [8216].pack('U'),
+ '⦹' => [10681].pack('U'),
+ '⊕' => [8853].pack('U'),
+ '⩔' => [10836].pack('U'),
+ '∨' => [8744].pack('U'),
+ '↻' => [8635].pack('U'),
+ '⩝' => [10845].pack('U'),
+ 'ℴ' => [8500].pack('U'),
+ 'ℴ' => [8500].pack('U'),
+ 'ª' => [170].pack('U'),
+ 'º' => [186].pack('U'),
+ '⊶' => [8886].pack('U'),
+ '⩖' => [10838].pack('U'),
+ '⩗' => [10839].pack('U'),
+ '⩛' => [10843].pack('U'),
+ 'Ⓢ' => [9416].pack('U'),
+ '𝒪' => [119978].pack('U'),
+ 'ℴ' => [8500].pack('U'),
+ 'Ø' => [216].pack('U'),
+ 'ø' => [248].pack('U'),
+ '⊘' => [8856].pack('U'),
+ 'Õ' => [213].pack('U'),
+ 'õ' => [245].pack('U'),
+ '⨷' => [10807].pack('U'),
+ '⊗' => [8855].pack('U'),
+ '⨶' => [10806].pack('U'),
+ 'Ö' => [214].pack('U'),
+ 'ö' => [246].pack('U'),
+ '⌽' => [9021].pack('U'),
+ '‾' => [8254].pack('U'),
+ '⏞' => [9182].pack('U'),
+ '⎴' => [9140].pack('U'),
+ '⏜' => [9180].pack('U'),
+ '∥' => [8741].pack('U'),
+ '¶' => [182].pack('U'),
+ '∥' => [8741].pack('U'),
+ '⫳' => [10995].pack('U'),
+ '⫽' => [11005].pack('U'),
+ '∂' => [8706].pack('U'),
+ '∂' => [8706].pack('U'),
+ 'П' => [1055].pack('U'),
+ 'п' => [1087].pack('U'),
+ '%' => [37].pack('U'),
+ '.' => [46].pack('U'),
+ '‰' => [8240].pack('U'),
+ '⊥' => [8869].pack('U'),
+ '‱' => [8241].pack('U'),
+ '𝔓' => [120083].pack('U'),
+ '𝔭' => [120109].pack('U'),
+ 'Φ' => [934].pack('U'),
+ 'φ' => [966].pack('U'),
+ 'ϕ' => [981].pack('U'),
+ 'ℳ' => [8499].pack('U'),
+ '☎' => [9742].pack('U'),
+ 'Π' => [928].pack('U'),
+ 'π' => [960].pack('U'),
+ '⋔' => [8916].pack('U'),
+ 'ϖ' => [982].pack('U'),
+ 'ℏ' => [8463].pack('U'),
+ 'ℎ' => [8462].pack('U'),
+ 'ℏ' => [8463].pack('U'),
+ '+' => [43].pack('U'),
+ '⨣' => [10787].pack('U'),
+ '⊞' => [8862].pack('U'),
+ '⨢' => [10786].pack('U'),
+ '∔' => [8724].pack('U'),
+ '⨥' => [10789].pack('U'),
+ '⩲' => [10866].pack('U'),
+ '±' => [177].pack('U'),
+ '±' => [177].pack('U'),
+ '⨦' => [10790].pack('U'),
+ '⨧' => [10791].pack('U'),
+ '±' => [177].pack('U'),
+ 'ℌ' => [8460].pack('U'),
+ '⨕' => [10773].pack('U'),
+ 'ℙ' => [8473].pack('U'),
+ '𝕡' => [120161].pack('U'),
+ '£' => [163].pack('U'),
+ '⪻' => [10939].pack('U'),
+ '≺' => [8826].pack('U'),
+ '⪷' => [10935].pack('U'),
+ '≼' => [8828].pack('U'),
+ '⪳' => [10931].pack('U'),
+ '⪯' => [10927].pack('U'),
+ '≺' => [8826].pack('U'),
+ '⪷' => [10935].pack('U'),
+ '≼' => [8828].pack('U'),
+ '≺' => [8826].pack('U'),
+ '⪯' => [10927].pack('U'),
+ '≼' => [8828].pack('U'),
+ '≾' => [8830].pack('U'),
+ '⪯' => [10927].pack('U'),
+ '⪹' => [10937].pack('U'),
+ '⪵' => [10933].pack('U'),
+ '⋨' => [8936].pack('U'),
+ '≾' => [8830].pack('U'),
+ '″' => [8243].pack('U'),
+ '′' => [8242].pack('U'),
+ 'ℙ' => [8473].pack('U'),
+ '⪹' => [10937].pack('U'),
+ '⪵' => [10933].pack('U'),
+ '⋨' => [8936].pack('U'),
+ '∏' => [8719].pack('U'),
+ '∏' => [8719].pack('U'),
+ '⌮' => [9006].pack('U'),
+ '⌒' => [8978].pack('U'),
+ '⌓' => [8979].pack('U'),
+ '∝' => [8733].pack('U'),
+ '∷' => [8759].pack('U'),
+ '∝' => [8733].pack('U'),
+ '∝' => [8733].pack('U'),
+ '≾' => [8830].pack('U'),
+ '⊰' => [8880].pack('U'),
+ '𝒫' => [119979].pack('U'),
+ '𝓅' => [120005].pack('U'),
+ 'Ψ' => [936].pack('U'),
+ 'ψ' => [968].pack('U'),
+ ' ' => [8200].pack('U'),
+ '𝔔' => [120084].pack('U'),
+ '𝔮' => [120110].pack('U'),
+ '⨌' => [10764].pack('U'),
+ 'ℚ' => [8474].pack('U'),
+ '𝕢' => [120162].pack('U'),
+ '⁗' => [8279].pack('U'),
+ '𝒬' => [119980].pack('U'),
+ '𝓆' => [120006].pack('U'),
+ 'ℍ' => [8461].pack('U'),
+ '⨖' => [10774].pack('U'),
+ '?' => [63].pack('U'),
+ '≟' => [8799].pack('U'),
+ '"' => [34].pack('U'),
+ '"' => [34].pack('U'),
+ '⇛' => [8667].pack('U'),
+ '∽̱' => [8765, 817].pack('U'),
+ 'Ŕ' => [340].pack('U'),
+ 'ŕ' => [341].pack('U'),
+ '√' => [8730].pack('U'),
+ '⦳' => [10675].pack('U'),
+ '⟫' => [10219].pack('U'),
+ '〉' => [10217].pack('U'),
+ '⦒' => [10642].pack('U'),
+ '⦥' => [10661].pack('U'),
+ '⟩' => [10217].pack('U'),
+ '»' => [187].pack('U'),
+ '↠' => [8608].pack('U'),
+ '⇒' => [8658].pack('U'),
+ '→' => [8594].pack('U'),
+ '⥵' => [10613].pack('U'),
+ '⇥' => [8677].pack('U'),
+ '⤠' => [10528].pack('U'),
+ '⤳' => [10547].pack('U'),
+ '⤞' => [10526].pack('U'),
+ '↪' => [8618].pack('U'),
+ '↬' => [8620].pack('U'),
+ '⥅' => [10565].pack('U'),
+ '⥴' => [10612].pack('U'),
+ '⤖' => [10518].pack('U'),
+ '↣' => [8611].pack('U'),
+ '↝' => [8605].pack('U'),
+ '⤜' => [10524].pack('U'),
+ '⤚' => [10522].pack('U'),
+ '∶' => [8758].pack('U'),
+ 'ℚ' => [8474].pack('U'),
+ '⤐' => [10512].pack('U'),
+ '⤏' => [10511].pack('U'),
+ '⤍' => [10509].pack('U'),
+ '❳' => [10099].pack('U'),
+ '}' => [125].pack('U'),
+ ']' => [93].pack('U'),
+ '⦌' => [10636].pack('U'),
+ '⦎' => [10638].pack('U'),
+ '⦐' => [10640].pack('U'),
+ 'Ř' => [344].pack('U'),
+ 'ř' => [345].pack('U'),
+ 'Ŗ' => [342].pack('U'),
+ 'ŗ' => [343].pack('U'),
+ '⌉' => [8969].pack('U'),
+ '}' => [125].pack('U'),
+ 'Р' => [1056].pack('U'),
+ 'р' => [1088].pack('U'),
+ '⤷' => [10551].pack('U'),
+ '⥩' => [10601].pack('U'),
+ '”' => [8221].pack('U'),
+ '”' => [8221].pack('U'),
+ '↳' => [8627].pack('U'),
+ 'ℜ' => [8476].pack('U'),
+ 'ℜ' => [8476].pack('U'),
+ 'ℛ' => [8475].pack('U'),
+ 'ℜ' => [8476].pack('U'),
+ 'ℝ' => [8477].pack('U'),
+ '▭' => [9645].pack('U'),
+ '®' => [174].pack('U'),
+ '®' => [174].pack('U'),
+ '∋' => [8715].pack('U'),
+ '⇋' => [8651].pack('U'),
+ '⥯' => [10607].pack('U'),
+ '⥽' => [10621].pack('U'),
+ '⌋' => [8971].pack('U'),
+ 'ℜ' => [8476].pack('U'),
+ '𝔯' => [120111].pack('U'),
+ '⥤' => [10596].pack('U'),
+ '⇁' => [8641].pack('U'),
+ '⇀' => [8640].pack('U'),
+ '⥬' => [10604].pack('U'),
+ 'Ρ' => [929].pack('U'),
+ 'ρ' => [961].pack('U'),
+ 'ϱ' => [1009].pack('U'),
+ '⟩' => [10217].pack('U'),
+ '→' => [8594].pack('U'),
+ '⇒' => [8658].pack('U'),
+ '→' => [8594].pack('U'),
+ '⇥' => [8677].pack('U'),
+ '⇄' => [8644].pack('U'),
+ '↣' => [8611].pack('U'),
+ '⌉' => [8969].pack('U'),
+ '⟧' => [10215].pack('U'),
+ '⥝' => [10589].pack('U'),
+ '⇂' => [8642].pack('U'),
+ '⥕' => [10581].pack('U'),
+ '⌋' => [8971].pack('U'),
+ '⇁' => [8641].pack('U'),
+ '⇀' => [8640].pack('U'),
+ '⇄' => [8644].pack('U'),
+ '⇌' => [8652].pack('U'),
+ '⇉' => [8649].pack('U'),
+ '↝' => [8605].pack('U'),
+ '⊢' => [8866].pack('U'),
+ '↦' => [8614].pack('U'),
+ '⥛' => [10587].pack('U'),
+ '⋌' => [8908].pack('U'),
+ '⊳' => [8883].pack('U'),
+ '⧐' => [10704].pack('U'),
+ '⊵' => [8885].pack('U'),
+ '⥏' => [10575].pack('U'),
+ '⥜' => [10588].pack('U'),
+ '↾' => [8638].pack('U'),
+ '⥔' => [10580].pack('U'),
+ '⇀' => [8640].pack('U'),
+ '⥓' => [10579].pack('U'),
+ '˚' => [730].pack('U'),
+ '≓' => [8787].pack('U'),
+ '⇄' => [8644].pack('U'),
+ '⇌' => [8652].pack('U'),
+ '' => [8207].pack('U'),
+ '⎱' => [9137].pack('U'),
+ '⎱' => [9137].pack('U'),
+ '⫮' => [10990].pack('U'),
+ '⟭' => [10221].pack('U'),
+ '⇾' => [8702].pack('U'),
+ '⟧' => [10215].pack('U'),
+ '⦆' => [10630].pack('U'),
+ 'ℝ' => [8477].pack('U'),
+ '𝕣' => [120163].pack('U'),
+ '⨮' => [10798].pack('U'),
+ '⨵' => [10805].pack('U'),
+ '⥰' => [10608].pack('U'),
+ ')' => [41].pack('U'),
+ '⦔' => [10644].pack('U'),
+ '⨒' => [10770].pack('U'),
+ '⇉' => [8649].pack('U'),
+ '⇛' => [8667].pack('U'),
+ '›' => [8250].pack('U'),
+ 'ℛ' => [8475].pack('U'),
+ '𝓇' => [120007].pack('U'),
+ '↱' => [8625].pack('U'),
+ '↱' => [8625].pack('U'),
+ ']' => [93].pack('U'),
+ '’' => [8217].pack('U'),
+ '’' => [8217].pack('U'),
+ '⋌' => [8908].pack('U'),
+ '⋊' => [8906].pack('U'),
+ '▹' => [9657].pack('U'),
+ '⊵' => [8885].pack('U'),
+ '▸' => [9656].pack('U'),
+ '⧎' => [10702].pack('U'),
+ '⧴' => [10740].pack('U'),
+ '⥨' => [10600].pack('U'),
+ '℞' => [8478].pack('U'),
+ 'Ś' => [346].pack('U'),
+ 'ś' => [347].pack('U'),
+ '‚' => [8218].pack('U'),
+ '⪼' => [10940].pack('U'),
+ '≻' => [8827].pack('U'),
+ '⪸' => [10936].pack('U'),
+ 'Š' => [352].pack('U'),
+ 'š' => [353].pack('U'),
+ '≽' => [8829].pack('U'),
+ '⪴' => [10932].pack('U'),
+ '⪰' => [10928].pack('U'),
+ 'Ş' => [350].pack('U'),
+ 'ş' => [351].pack('U'),
+ 'Ŝ' => [348].pack('U'),
+ 'ŝ' => [349].pack('U'),
+ '⪺' => [10938].pack('U'),
+ '⪶' => [10934].pack('U'),
+ '⋩' => [8937].pack('U'),
+ '⨓' => [10771].pack('U'),
+ '≿' => [8831].pack('U'),
+ 'С' => [1057].pack('U'),
+ 'с' => [1089].pack('U'),
+ '⋅' => [8901].pack('U'),
+ '⊡' => [8865].pack('U'),
+ '⩦' => [10854].pack('U'),
+ '⤥' => [10533].pack('U'),
+ '⇘' => [8664].pack('U'),
+ '↘' => [8600].pack('U'),
+ '↘' => [8600].pack('U'),
+ '§' => [167].pack('U'),
+ ';' => [59].pack('U'),
+ '⤩' => [10537].pack('U'),
+ '∖' => [8726].pack('U'),
+ '∖' => [8726].pack('U'),
+ '✶' => [10038].pack('U'),
+ '𝔖' => [120086].pack('U'),
+ '𝔰' => [120112].pack('U'),
+ '⌢' => [8994].pack('U'),
+ '♯' => [9839].pack('U'),
+ 'Щ' => [1065].pack('U'),
+ 'щ' => [1097].pack('U'),
+ 'Ш' => [1064].pack('U'),
+ 'ш' => [1096].pack('U'),
+ '↓' => [8595].pack('U'),
+ '←' => [8592].pack('U'),
+ '∣' => [8739].pack('U'),
+ '∥' => [8741].pack('U'),
+ '→' => [8594].pack('U'),
+ '↑' => [8593].pack('U'),
+ '' => [173].pack('U'),
+ 'Σ' => [931].pack('U'),
+ 'σ' => [963].pack('U'),
+ 'ς' => [962].pack('U'),
+ 'ς' => [962].pack('U'),
+ '∼' => [8764].pack('U'),
+ '⩪' => [10858].pack('U'),
+ '≃' => [8771].pack('U'),
+ '≃' => [8771].pack('U'),
+ '⪞' => [10910].pack('U'),
+ '⪠' => [10912].pack('U'),
+ '⪝' => [10909].pack('U'),
+ '⪟' => [10911].pack('U'),
+ '≆' => [8774].pack('U'),
+ '⨤' => [10788].pack('U'),
+ '⥲' => [10610].pack('U'),
+ '←' => [8592].pack('U'),
+ '∘' => [8728].pack('U'),
+ '∖' => [8726].pack('U'),
+ '⨳' => [10803].pack('U'),
+ '⧤' => [10724].pack('U'),
+ '∣' => [8739].pack('U'),
+ '⌣' => [8995].pack('U'),
+ '⪪' => [10922].pack('U'),
+ '⪬' => [10924].pack('U'),
+ '⪬︀' => [10924, 65024].pack('U'),
+ 'Ь' => [1068].pack('U'),
+ 'ь' => [1100].pack('U'),
+ '/' => [47].pack('U'),
+ '⧄' => [10692].pack('U'),
+ '⌿' => [9023].pack('U'),
+ '𝕊' => [120138].pack('U'),
+ '𝕤' => [120164].pack('U'),
+ '♠' => [9824].pack('U'),
+ '♠' => [9824].pack('U'),
+ '∥' => [8741].pack('U'),
+ '⊓' => [8851].pack('U'),
+ '⊓︀' => [8851, 65024].pack('U'),
+ '⊔' => [8852].pack('U'),
+ '⊔︀' => [8852, 65024].pack('U'),
+ '√' => [8730].pack('U'),
+ '⊏' => [8847].pack('U'),
+ '⊑' => [8849].pack('U'),
+ '⊏' => [8847].pack('U'),
+ '⊑' => [8849].pack('U'),
+ '⊐' => [8848].pack('U'),
+ '⊒' => [8850].pack('U'),
+ '⊐' => [8848].pack('U'),
+ '⊒' => [8850].pack('U'),
+ '□' => [9633].pack('U'),
+ '□' => [9633].pack('U'),
+ '□' => [9633].pack('U'),
+ '⊓' => [8851].pack('U'),
+ '⊏' => [8847].pack('U'),
+ '⊑' => [8849].pack('U'),
+ '⊐' => [8848].pack('U'),
+ '⊒' => [8850].pack('U'),
+ '⊔' => [8852].pack('U'),
+ '▪' => [9642].pack('U'),
+ '▪' => [9642].pack('U'),
+ '→' => [8594].pack('U'),
+ '𝒮' => [119982].pack('U'),
+ '𝓈' => [120008].pack('U'),
+ '∖' => [8726].pack('U'),
+ '⌣' => [8995].pack('U'),
+ '⋆' => [8902].pack('U'),
+ '⋆' => [8902].pack('U'),
+ '☆' => [9734].pack('U'),
+ '★' => [9733].pack('U'),
+ 'ϵ' => [1013].pack('U'),
+ 'ϕ' => [981].pack('U'),
+ '¯' => [175].pack('U'),
+ '⋐' => [8912].pack('U'),
+ '⊂' => [8834].pack('U'),
+ '⪽' => [10941].pack('U'),
+ '⫅' => [10949].pack('U'),
+ '⊆' => [8838].pack('U'),
+ '⫃' => [10947].pack('U'),
+ '⫁' => [10945].pack('U'),
+ '⫋' => [10955].pack('U'),
+ '⊊' => [8842].pack('U'),
+ '⪿' => [10943].pack('U'),
+ '⥹' => [10617].pack('U'),
+ '⋐' => [8912].pack('U'),
+ '⊂' => [8834].pack('U'),
+ '⊆' => [8838].pack('U'),
+ '⫅' => [10949].pack('U'),
+ '⊆' => [8838].pack('U'),
+ '⊊' => [8842].pack('U'),
+ '⫋' => [10955].pack('U'),
+ '⫇' => [10951].pack('U'),
+ '⫕' => [10965].pack('U'),
+ '⫓' => [10963].pack('U'),
+ '≻' => [8827].pack('U'),
+ '⪸' => [10936].pack('U'),
+ '≽' => [8829].pack('U'),
+ '≻' => [8827].pack('U'),
+ '⪰' => [10928].pack('U'),
+ '≽' => [8829].pack('U'),
+ '≿' => [8831].pack('U'),
+ '⪰' => [10928].pack('U'),
+ '⪺' => [10938].pack('U'),
+ '⪶' => [10934].pack('U'),
+ '⋩' => [8937].pack('U'),
+ '≿' => [8831].pack('U'),
+ '∋' => [8715].pack('U'),
+ '∑' => [8721].pack('U'),
+ '∑' => [8721].pack('U'),
+ '♪' => [9834].pack('U'),
+ '⋑' => [8913].pack('U'),
+ '⊃' => [8835].pack('U'),
+ '¹' => [185].pack('U'),
+ '²' => [178].pack('U'),
+ '³' => [179].pack('U'),
+ '⪾' => [10942].pack('U'),
+ '⫘' => [10968].pack('U'),
+ '⫆' => [10950].pack('U'),
+ '⊇' => [8839].pack('U'),
+ '⫄' => [10948].pack('U'),
+ '⊃' => [8835].pack('U'),
+ '⊇' => [8839].pack('U'),
+ '⟉' => [10185].pack('U'),
+ '⫗' => [10967].pack('U'),
+ '⥻' => [10619].pack('U'),
+ '⫂' => [10946].pack('U'),
+ '⫌' => [10956].pack('U'),
+ '⊋' => [8843].pack('U'),
+ '⫀' => [10944].pack('U'),
+ '⋑' => [8913].pack('U'),
+ '⊃' => [8835].pack('U'),
+ '⊇' => [8839].pack('U'),
+ '⫆' => [10950].pack('U'),
+ '⊋' => [8843].pack('U'),
+ '⫌' => [10956].pack('U'),
+ '⫈' => [10952].pack('U'),
+ '⫔' => [10964].pack('U'),
+ '⫖' => [10966].pack('U'),
+ '⤦' => [10534].pack('U'),
+ '⇙' => [8665].pack('U'),
+ '↙' => [8601].pack('U'),
+ '↙' => [8601].pack('U'),
+ '⤪' => [10538].pack('U'),
+ 'ß' => [223].pack('U'),
+ '	' => [9].pack('U'),
+ '⌖' => [8982].pack('U'),
+ 'Τ' => [932].pack('U'),
+ 'τ' => [964].pack('U'),
+ '⎴' => [9140].pack('U'),
+ 'Ť' => [356].pack('U'),
+ 'ť' => [357].pack('U'),
+ 'Ţ' => [354].pack('U'),
+ 'ţ' => [355].pack('U'),
+ 'Т' => [1058].pack('U'),
+ 'т' => [1090].pack('U'),
+ '⃛' => [8411].pack('U'),
+ '⌕' => [8981].pack('U'),
+ '𝔗' => [120087].pack('U'),
+ '𝔱' => [120113].pack('U'),
+ '∴' => [8756].pack('U'),
+ '∴' => [8756].pack('U'),
+ '∴' => [8756].pack('U'),
+ 'Θ' => [920].pack('U'),
+ 'θ' => [952].pack('U'),
+ 'ϑ' => [977].pack('U'),
+ 'ϑ' => [977].pack('U'),
+ '≈' => [8776].pack('U'),
+ '∼' => [8764].pack('U'),
+ '  ' => [8287, 8202].pack('U'),
+ ' ' => [8201].pack('U'),
+ ' ' => [8201].pack('U'),
+ '≈' => [8776].pack('U'),
+ '∼' => [8764].pack('U'),
+ 'Þ' => [222].pack('U'),
+ 'þ' => [254].pack('U'),
+ '∼' => [8764].pack('U'),
+ '˜' => [732].pack('U'),
+ '≃' => [8771].pack('U'),
+ '≅' => [8773].pack('U'),
+ '≈' => [8776].pack('U'),
+ '×' => [215].pack('U'),
+ '⊠' => [8864].pack('U'),
+ '⨱' => [10801].pack('U'),
+ '⨰' => [10800].pack('U'),
+ '∭' => [8749].pack('U'),
+ '⤨' => [10536].pack('U'),
+ '⊤' => [8868].pack('U'),
+ '⌶' => [9014].pack('U'),
+ '⫱' => [10993].pack('U'),
+ '𝕋' => [120139].pack('U'),
+ '𝕥' => [120165].pack('U'),
+ '⫚' => [10970].pack('U'),
+ '⤩' => [10537].pack('U'),
+ '‴' => [8244].pack('U'),
+ '™' => [8482].pack('U'),
+ '™' => [8482].pack('U'),
+ '▵' => [9653].pack('U'),
+ '▿' => [9663].pack('U'),
+ '◃' => [9667].pack('U'),
+ '⊴' => [8884].pack('U'),
+ '≜' => [8796].pack('U'),
+ '▹' => [9657].pack('U'),
+ '⊵' => [8885].pack('U'),
+ '◬' => [9708].pack('U'),
+ '≜' => [8796].pack('U'),
+ '⨺' => [10810].pack('U'),
+ '⃛' => [8411].pack('U'),
+ '⨹' => [10809].pack('U'),
+ '⧍' => [10701].pack('U'),
+ '⨻' => [10811].pack('U'),
+ '⏢' => [9186].pack('U'),
+ '𝒯' => [119983].pack('U'),
+ '𝓉' => [120009].pack('U'),
+ 'Ц' => [1062].pack('U'),
+ 'ц' => [1094].pack('U'),
+ 'Ћ' => [1035].pack('U'),
+ 'ћ' => [1115].pack('U'),
+ 'Ŧ' => [358].pack('U'),
+ 'ŧ' => [359].pack('U'),
+ '≬' => [8812].pack('U'),
+ '↞' => [8606].pack('U'),
+ '↠' => [8608].pack('U'),
+ 'Ú' => [218].pack('U'),
+ 'ú' => [250].pack('U'),
+ '↟' => [8607].pack('U'),
+ '⇑' => [8657].pack('U'),
+ '↑' => [8593].pack('U'),
+ '⥉' => [10569].pack('U'),
+ 'Ў' => [1038].pack('U'),
+ 'ў' => [1118].pack('U'),
+ 'Ŭ' => [364].pack('U'),
+ 'ŭ' => [365].pack('U'),
+ 'Û' => [219].pack('U'),
+ 'û' => [251].pack('U'),
+ 'У' => [1059].pack('U'),
+ 'у' => [1091].pack('U'),
+ '⇅' => [8645].pack('U'),
+ 'Ű' => [368].pack('U'),
+ 'ű' => [369].pack('U'),
+ '⥮' => [10606].pack('U'),
+ '⥾' => [10622].pack('U'),
+ '𝔘' => [120088].pack('U'),
+ '𝔲' => [120114].pack('U'),
+ 'Ù' => [217].pack('U'),
+ 'ù' => [249].pack('U'),
+ '⥣' => [10595].pack('U'),
+ '↿' => [8639].pack('U'),
+ '↾' => [8638].pack('U'),
+ '▀' => [9600].pack('U'),
+ '⌜' => [8988].pack('U'),
+ '⌜' => [8988].pack('U'),
+ '⌏' => [8975].pack('U'),
+ '◸' => [9720].pack('U'),
+ 'Ū' => [362].pack('U'),
+ 'ū' => [363].pack('U'),
+ '¨' => [168].pack('U'),
+ '_' => [95].pack('U'),
+ '⏟' => [9183].pack('U'),
+ '⎵' => [9141].pack('U'),
+ '⏝' => [9181].pack('U'),
+ '⋃' => [8899].pack('U'),
+ '⊎' => [8846].pack('U'),
+ 'Ų' => [370].pack('U'),
+ 'ų' => [371].pack('U'),
+ '𝕌' => [120140].pack('U'),
+ '𝕦' => [120166].pack('U'),
+ '↑' => [8593].pack('U'),
+ '⇑' => [8657].pack('U'),
+ '↑' => [8593].pack('U'),
+ '⤒' => [10514].pack('U'),
+ '⇅' => [8645].pack('U'),
+ '↕' => [8597].pack('U'),
+ '⇕' => [8661].pack('U'),
+ '↕' => [8597].pack('U'),
+ '⥮' => [10606].pack('U'),
+ '↿' => [8639].pack('U'),
+ '↾' => [8638].pack('U'),
+ '⊎' => [8846].pack('U'),
+ '↖' => [8598].pack('U'),
+ '↗' => [8599].pack('U'),
+ 'ϒ' => [978].pack('U'),
+ 'υ' => [965].pack('U'),
+ 'ϒ' => [978].pack('U'),
+ 'Υ' => [933].pack('U'),
+ 'υ' => [965].pack('U'),
+ '⊥' => [8869].pack('U'),
+ '↥' => [8613].pack('U'),
+ '⇈' => [8648].pack('U'),
+ '⌝' => [8989].pack('U'),
+ '⌝' => [8989].pack('U'),
+ '⌎' => [8974].pack('U'),
+ 'Ů' => [366].pack('U'),
+ 'ů' => [367].pack('U'),
+ '◹' => [9721].pack('U'),
+ '𝒰' => [119984].pack('U'),
+ '𝓊' => [120010].pack('U'),
+ '⋰' => [8944].pack('U'),
+ 'Ũ' => [360].pack('U'),
+ 'ũ' => [361].pack('U'),
+ '▵' => [9653].pack('U'),
+ '▴' => [9652].pack('U'),
+ '⇈' => [8648].pack('U'),
+ 'Ü' => [220].pack('U'),
+ 'ü' => [252].pack('U'),
+ '⦧' => [10663].pack('U'),
+ '⦜' => [10652].pack('U'),
+ 'ϵ' => [1013].pack('U'),
+ 'ϰ' => [1008].pack('U'),
+ '∅' => [8709].pack('U'),
+ 'ϕ' => [981].pack('U'),
+ 'ϖ' => [982].pack('U'),
+ '∝' => [8733].pack('U'),
+ '⇕' => [8661].pack('U'),
+ '↕' => [8597].pack('U'),
+ 'ϱ' => [1009].pack('U'),
+ 'ς' => [962].pack('U'),
+ '⊊︀' => [8842, 65024].pack('U'),
+ '⫋︀' => [10955, 65024].pack('U'),
+ '⊋︀' => [8843, 65024].pack('U'),
+ '⫌︀' => [10956, 65024].pack('U'),
+ 'ϑ' => [977].pack('U'),
+ '⊲' => [8882].pack('U'),
+ '⊳' => [8883].pack('U'),
+ '⫫' => [10987].pack('U'),
+ '⫨' => [10984].pack('U'),
+ '⫩' => [10985].pack('U'),
+ 'В' => [1042].pack('U'),
+ 'в' => [1074].pack('U'),
+ '⊫' => [8875].pack('U'),
+ '⊩' => [8873].pack('U'),
+ '⊨' => [8872].pack('U'),
+ '⊢' => [8866].pack('U'),
+ '⫦' => [10982].pack('U'),
+ '⋁' => [8897].pack('U'),
+ '∨' => [8744].pack('U'),
+ '⊻' => [8891].pack('U'),
+ '≚' => [8794].pack('U'),
+ '⋮' => [8942].pack('U'),
+ '‖' => [8214].pack('U'),
+ '|' => [124].pack('U'),
+ '‖' => [8214].pack('U'),
+ '|' => [124].pack('U'),
+ '∣' => [8739].pack('U'),
+ '|' => [124].pack('U'),
+ '❘' => [10072].pack('U'),
+ '≀' => [8768].pack('U'),
+ ' ' => [8202].pack('U'),
+ '𝔙' => [120089].pack('U'),
+ '𝔳' => [120115].pack('U'),
+ '⊲' => [8882].pack('U'),
+ '⊂⃒' => [8834, 8402].pack('U'),
+ '⊃⃒' => [8835, 8402].pack('U'),
+ '𝕍' => [120141].pack('U'),
+ '𝕧' => [120167].pack('U'),
+ '∝' => [8733].pack('U'),
+ '⊳' => [8883].pack('U'),
+ '𝒱' => [119985].pack('U'),
+ '𝓋' => [120011].pack('U'),
+ '⫋︀' => [10955, 65024].pack('U'),
+ '⊊︀' => [8842, 65024].pack('U'),
+ '⫌︀' => [10956, 65024].pack('U'),
+ '⊋︀' => [8843, 65024].pack('U'),
+ '⊪' => [8874].pack('U'),
+ '⦚' => [10650].pack('U'),
+ 'Ŵ' => [372].pack('U'),
+ 'ŵ' => [373].pack('U'),
+ '⩟' => [10847].pack('U'),
+ '⋀' => [8896].pack('U'),
+ '∧' => [8743].pack('U'),
+ '≙' => [8793].pack('U'),
+ '℘' => [8472].pack('U'),
+ '𝔚' => [120090].pack('U'),
+ '𝔴' => [120116].pack('U'),
+ '𝕎' => [120142].pack('U'),
+ '𝕨' => [120168].pack('U'),
+ '℘' => [8472].pack('U'),
+ '≀' => [8768].pack('U'),
+ '≀' => [8768].pack('U'),
+ '𝒲' => [119986].pack('U'),
+ '𝓌' => [120012].pack('U'),
+ '⋂' => [8898].pack('U'),
+ '◯' => [9711].pack('U'),
+ '⋃' => [8899].pack('U'),
+ '▽' => [9661].pack('U'),
+ '𝔛' => [120091].pack('U'),
+ '𝔵' => [120117].pack('U'),
+ '⟺' => [10234].pack('U'),
+ '⟷' => [10231].pack('U'),
+ 'Ξ' => [926].pack('U'),
+ 'ξ' => [958].pack('U'),
+ '⟸' => [10232].pack('U'),
+ '⟵' => [10229].pack('U'),
+ '⟼' => [10236].pack('U'),
+ '⋻' => [8955].pack('U'),
+ '⨀' => [10752].pack('U'),
+ '𝕏' => [120143].pack('U'),
+ '𝕩' => [120169].pack('U'),
+ '⨁' => [10753].pack('U'),
+ '⨂' => [10754].pack('U'),
+ '⟹' => [10233].pack('U'),
+ '⟶' => [10230].pack('U'),
+ '𝒳' => [119987].pack('U'),
+ '𝓍' => [120013].pack('U'),
+ '⨆' => [10758].pack('U'),
+ '⨄' => [10756].pack('U'),
+ '△' => [9651].pack('U'),
+ '⋁' => [8897].pack('U'),
+ '⋀' => [8896].pack('U'),
+ 'Ý' => [221].pack('U'),
+ 'ý' => [253].pack('U'),
+ 'Я' => [1071].pack('U'),
+ 'я' => [1103].pack('U'),
+ 'Ŷ' => [374].pack('U'),
+ 'ŷ' => [375].pack('U'),
+ 'Ы' => [1067].pack('U'),
+ 'ы' => [1099].pack('U'),
+ '¥' => [165].pack('U'),
+ '𝔜' => [120092].pack('U'),
+ '𝔶' => [120118].pack('U'),
+ 'Ї' => [1031].pack('U'),
+ 'ї' => [1111].pack('U'),
+ '𝕐' => [120144].pack('U'),
+ '𝕪' => [120170].pack('U'),
+ '𝒴' => [119988].pack('U'),
+ '𝓎' => [120014].pack('U'),
+ 'Ю' => [1070].pack('U'),
+ 'ю' => [1102].pack('U'),
+ 'Ÿ' => [376].pack('U'),
+ 'ÿ' => [255].pack('U'),
+ 'Ź' => [377].pack('U'),
+ 'ź' => [378].pack('U'),
+ 'Ž' => [381].pack('U'),
+ 'ž' => [382].pack('U'),
+ 'З' => [1047].pack('U'),
+ 'з' => [1079].pack('U'),
+ 'Ż' => [379].pack('U'),
+ 'ż' => [380].pack('U'),
+ 'ℨ' => [8488].pack('U'),
+ '​' => [8203].pack('U'),
+ 'Ζ' => [918].pack('U'),
+ 'ζ' => [950].pack('U'),
+ 'ℨ' => [8488].pack('U'),
+ '𝔷' => [120119].pack('U'),
+ 'Ж' => [1046].pack('U'),
+ 'ж' => [1078].pack('U'),
+ '⇝' => [8669].pack('U'),
+ 'ℤ' => [8484].pack('U'),
+ '𝕫' => [120171].pack('U'),
+ '𝒵' => [119989].pack('U'),
+ '𝓏' => [120015].pack('U'),
+ '' => [8205].pack('U'),
+ '' => [8204].pack('U'),
+ }
+
+ ##
+ # Decodes HTML entities.
+ #
+ # @see [decode]
+ #
+ def self.decode(input)
+ return XML::Entities.decode(input, DECODE_MAPPING)
+ end
+ end # Entities
+ end # HTML
+end # Oga
diff --git a/lib/oga/xml/entities.rb b/lib/oga/xml/entities.rb
index 6a6849d..5f62025 100644
--- a/lib/oga/xml/entities.rb
+++ b/lib/oga/xml/entities.rb
@@ -1,5 +1,9 @@
module Oga
module XML
+ ##
+ # Module for encoding/decoding XML and HTML entities. The mapping of HTML
+ # entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.
+ #
module Entities
##
# Hash containing XML entities and the corresponding characters.
@@ -11,15 +15,10 @@ module Oga
#
DECODE_MAPPING = {
'<' => '<',
- '<' => '<',
'>' => '>',
- '>' => '>',
''' => "'",
- ''' => "'",
'"' => '"',
- '"' => '"',
'&' => '&',
- '&' => '&',
}
##
@@ -35,16 +34,46 @@ module Oga
'<' => '<',
}
+ ##
+ # @return [String]
+ #
+ AMPERSAND = '&'.freeze
+
+ ##
+ # Regexp for matching XML/HTML entities such as " ".
+ #
+ # @return [Regexp]
+ #
+ REGULAR_ENTITY = /&[a-zA-Z]+;/
+
+ ##
+ # Regexp for matching XML/HTML entities such as "&".
+ #
+ # @return [Regexp]
+ #
+ CODEPOINT_ENTITY = /(x)?([a-zA-Z0-9]+);/
+
+ ##
+ # @return [Regexp]
+ #
+ ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
+
##
# Decodes XML entities.
#
# @param [String] input
+ # @param [Array] keys
+ # @param [Hash] mapping
# @return [String]
#
- def self.decode(input)
- if input.include?('&')
- DECODE_MAPPING.each do |find, replace|
- input = input.gsub(find, replace)
+ def self.decode(input, mapping = DECODE_MAPPING)
+ return input unless input.include?(AMPERSAND)
+
+ input = input.gsub(REGULAR_ENTITY, mapping)
+
+ if input.include?(AMPERSAND)
+ input = input.gsub(CODEPOINT_ENTITY) do |match|
+ [$1 ? Integer($2, 16) : Integer($2)].pack('U')
end
end
@@ -55,14 +84,11 @@ module Oga
# Encodes special characters as XML entities.
#
# @param [String] input
+ # @param [Hash] mapping
# @return [String]
#
- def self.encode(input)
- ENCODE_MAPPING.each do |from, to|
- input = input.gsub(from, to) if input.include?(from)
- end
-
- return input
+ def self.encode(input, mapping = ENCODE_MAPPING)
+ return input.gsub(ENCODE_REGEXP, mapping)
end
end # Entities
end # XML
diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb
index 3d2495e..c84e90f 100644
--- a/lib/oga/xml/lexer.rb
+++ b/lib/oga/xml/lexer.rb
@@ -217,7 +217,7 @@ module Oga
# @param [String] value The data between the quotes.
#
def on_string_body(value)
- add_token(:T_STRING_BODY, Entities.decode(value))
+ add_token(:T_STRING_BODY, value)
end
##
@@ -373,7 +373,7 @@ module Oga
def on_text(value)
return if value.empty?
- add_token(:T_TEXT, Entities.decode(value))
+ add_token(:T_TEXT, value)
end
##
diff --git a/lib/oga/xml/text.rb b/lib/oga/xml/text.rb
index 7e67b9e..edb585a 100644
--- a/lib/oga/xml/text.rb
+++ b/lib/oga/xml/text.rb
@@ -5,19 +5,65 @@ module Oga
# have any children, attributes and the likes; just text.
#
class Text < CharacterNode
+ def initialize(*args)
+ super
+
+ @mutex = Mutex.new
+ @decoded = false
+ end
+
+ ##
+ # @param [String] value
+ #
+ def text=(value)
+ # In case of concurrent text/text= calls.
+ @mutex.synchronize do
+ @decoded = false
+ @text = value
+ end
+ end
+
+ ##
+ # Returns the text as a String. Upon the first call any XML/HTML entities
+ # are decoded.
+ #
+ # @return [String]
+ #
+ def text
+ @mutex.synchronize do
+ unless @decoded
+ decoder = html? ? HTML::Entities : Entities
+ @text = decoder.decode(@text)
+ @decoded = true
+ end
+ end
+
+ return @text
+ end
+
##
# @see [Oga::XML::CharacterNode#to_xml]
#
def to_xml
node = parent
- root = root_node
- if root.is_a?(Document) and node.is_a?(Element) and root.html? \
+ if node.is_a?(Element) and html? \
and Lexer::LITERAL_HTML_ELEMENTS.include?(node.name)
return super
- else
- return Entities.encode(super)
end
+
+ return Entities.encode(super)
+ end
+
+ private
+
+ ##
+ # @return [TrueClass|FalseClass]
+ #
+ def html?
+ root = root_node
+
+ return root.is_a?(Document) && root.html?
end
end # Text
end # XML
diff --git a/spec/oga/html/entities_spec.rb b/spec/oga/html/entities_spec.rb
new file mode 100644
index 0000000..7ee2204
--- /dev/null
+++ b/spec/oga/html/entities_spec.rb
@@ -0,0 +1,15 @@
+# encoding: utf-8
+
+require 'spec_helper'
+
+describe Oga::HTML::Entities do
+ describe 'decode' do
+ it 'decodes & into &' do
+ described_class.decode('&').should == '&'
+ end
+
+ it 'decodes λ into λ' do
+ described_class.decode('λ').should == 'λ'
+ end
+ end
+end
diff --git a/spec/oga/xml/entities_spec.rb b/spec/oga/xml/entities_spec.rb
index 84346f3..68bf4d3 100644
--- a/spec/oga/xml/entities_spec.rb
+++ b/spec/oga/xml/entities_spec.rb
@@ -65,6 +65,10 @@ describe Oga::XML::Entities do
it 'decodes &< into &<' do
described_class.decode('&<').should == '&<'
end
+
+ it 'decodes < into <' do
+ described_class.decode('<').should == '<'
+ end
end
describe 'encode' do
diff --git a/spec/oga/xml/lexer/entities_spec.rb b/spec/oga/xml/lexer/entities_spec.rb
deleted file mode 100644
index f124623..0000000
--- a/spec/oga/xml/lexer/entities_spec.rb
+++ /dev/null
@@ -1,55 +0,0 @@
-require 'spec_helper'
-
-describe Oga::XML::Lexer do
- describe 'converting XML entities in text tokens' do
- it 'converts & into &' do
- lex('&').should == [[:T_TEXT, '&', 1]]
- end
-
- it 'converts < into <' do
- lex('<').should == [[:T_TEXT, '<', 1]]
- end
-
- it 'converts > into >' do
- lex('>').should == [[:T_TEXT, '>', 1]]
- end
- end
-
- describe 'converting XML entities in string tokens' do
- it 'converts & into &' do
- lex('').should == [
- [:T_ELEM_START, nil, 1],
- [:T_ELEM_NAME, 'foo', 1],
- [:T_ATTR, 'class', 1],
- [:T_STRING_DQUOTE, nil, 1],
- [:T_STRING_BODY, '&', 1],
- [:T_STRING_DQUOTE, nil, 1],
- [:T_ELEM_END, nil, 1]
- ]
- end
-
- it 'converts < into <' do
- lex('').should == [
- [:T_ELEM_START, nil, 1],
- [:T_ELEM_NAME, 'foo', 1],
- [:T_ATTR, 'class', 1],
- [:T_STRING_DQUOTE, nil, 1],
- [:T_STRING_BODY, '<', 1],
- [:T_STRING_DQUOTE, nil, 1],
- [:T_ELEM_END, nil, 1]
- ]
- end
-
- it 'converts > into >' do
- lex('').should == [
- [:T_ELEM_START, nil, 1],
- [:T_ELEM_NAME, 'foo', 1],
- [:T_ATTR, 'class', 1],
- [:T_STRING_DQUOTE, nil, 1],
- [:T_STRING_BODY, '>', 1],
- [:T_STRING_DQUOTE, nil, 1],
- [:T_ELEM_END, nil, 1]
- ]
- end
- end
-end
diff --git a/spec/oga/xml/text_spec.rb b/spec/oga/xml/text_spec.rb
index cda54ce..b29ef2a 100644
--- a/spec/oga/xml/text_spec.rb
+++ b/spec/oga/xml/text_spec.rb
@@ -14,6 +14,79 @@ describe Oga::XML::Text do
end
end
+ describe '#text' do
+ describe 'with XML entities' do
+ it 'converts & to &' do
+ described_class.new(:text => '&').text.should == '&'
+ end
+
+ it 'converts < to <' do
+ described_class.new(:text => '<').text.should == '<'
+ end
+
+ it 'converts > to >' do
+ described_class.new(:text => '>').text.should == '>'
+ end
+
+ it 'caches the converted text' do
+ node = described_class.new(:text => '&')
+
+ Oga::XML::Entities.should_receive(:decode).once.and_call_original
+
+ node.text.should == '&'
+ node.text.should == '&'
+ end
+
+ it 'converts new text set using text=' do
+ node = described_class.new(:text => '&')
+
+ node.text.should == '&'
+
+ node.text = '<'
+
+ node.text.should == '<'
+ end
+ end
+
+ describe 'with HTML entities' do
+ before do
+ @document = Oga::XML::Document.new(:type => :html)
+ end
+
+ it 'converts & to &' do
+ node = described_class.new(:text => '&')
+
+ @document.children << node
+
+ node.text.should == '&'
+ end
+
+ it 'converts < to <' do
+ node = described_class.new(:text => '<')
+
+ @document.children << node
+
+ node.text.should == '<'
+ end
+
+ it 'converts > to >' do
+ node = described_class.new(:text => '>')
+
+ @document.children << node
+
+ node.text.should == '>'
+ end
+
+ it 'converts into a space' do
+ node = described_class.new(:text => ' ')
+
+ @document.children << node
+
+ node.text.should == [160].pack('U')
+ end
+ end
+ end
+
describe '#to_xml' do
it 'generates the corresponding XML' do
node = described_class.new(:text => 'foo')