From 8ac0055e42936958f1064f9ff2b16c8ad7a52e9e Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 11 Apr 2018 21:29:40 +0200 Subject: [PATCH] Allow "th" to occur in thead, tbody, and tfoot Fixes https://gitlab.com/yorickpeterse/oga/issues/190 --- lib/oga/xml/lexer.rb | 6 +++++- spec/oga/html/lexer/closing_rules/tbody_spec.rb | 10 ++++++++++ spec/oga/html/lexer/closing_rules/tfoot_spec.rb | 10 ++++++++++ spec/oga/html/lexer/closing_rules/thead_spec.rb | 10 ++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index b3e6c44..9963edd 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -58,7 +58,11 @@ module Oga HTML_SCRIPT_ELEMENTS = Whitelist.new(%w{script template}) - HTML_TABLE_ROW_ELEMENTS = Whitelist.new(%w{tr}) + HTML_SCRIPT_ELEMENTS + # The elements that may occur in a thead, tbody, or tfoot. + # + # Technically "th" is not allowed per the HTML5 spec, but it's so commonly + # used in these elements that we allow it anyway. + HTML_TABLE_ROW_ELEMENTS = Whitelist.new(%w{tr th}) + HTML_SCRIPT_ELEMENTS # Elements that should be closed automatically before a new opening tag is # processed. diff --git a/spec/oga/html/lexer/closing_rules/tbody_spec.rb b/spec/oga/html/lexer/closing_rules/tbody_spec.rb index 9967ea5..4f4579c 100644 --- a/spec/oga/html/lexer/closing_rules/tbody_spec.rb +++ b/spec/oga/html/lexer/closing_rules/tbody_spec.rb @@ -53,5 +53,15 @@ describe Oga::XML::Lexer do [:T_ELEM_END, nil, 1] ]) end + + it 'lexes a element containing a element' do + expect(lex_html('foo')).to eq([ + [:T_ELEM_NAME, 'tbody', 1], + [:T_ELEM_NAME, 'th', 1], + [:T_TEXT, 'foo', 1], + [:T_ELEM_END, nil, 1], + [:T_ELEM_END, nil, 1] + ]) + end end end diff --git a/spec/oga/html/lexer/closing_rules/tfoot_spec.rb b/spec/oga/html/lexer/closing_rules/tfoot_spec.rb index 6b296e1..c7920c6 100644 --- a/spec/oga/html/lexer/closing_rules/tfoot_spec.rb +++ b/spec/oga/html/lexer/closing_rules/tfoot_spec.rb @@ -53,5 +53,15 @@ describe Oga::XML::Lexer do [:T_ELEM_END, nil, 1] ]) end + + it 'lexes a element containing a element' do + expect(lex_html('foo')).to eq([ + [:T_ELEM_NAME, 'tfoot', 1], + [:T_ELEM_NAME, 'th', 1], + [:T_TEXT, 'foo', 1], + [:T_ELEM_END, nil, 1], + [:T_ELEM_END, nil, 1] + ]) + end end end diff --git a/spec/oga/html/lexer/closing_rules/thead_spec.rb b/spec/oga/html/lexer/closing_rules/thead_spec.rb index 760f522..c5c73a0 100644 --- a/spec/oga/html/lexer/closing_rules/thead_spec.rb +++ b/spec/oga/html/lexer/closing_rules/thead_spec.rb @@ -53,5 +53,15 @@ describe Oga::XML::Lexer do [:T_ELEM_END, nil, 1] ]) end + + it 'lexes a element containing a element' do + expect(lex_html('foo')).to eq([ + [:T_ELEM_NAME, 'thead', 1], + [:T_ELEM_NAME, 'th', 1], + [:T_TEXT, 'foo', 1], + [:T_ELEM_END, nil, 1], + [:T_ELEM_END, nil, 1] + ]) + end end end