From d0d597e2d93035c35b6b653d181f550d9dd522fd Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Sat, 23 May 2015 10:46:49 +0200 Subject: [PATCH] Allow script/template in various table elements Fixes #105 --- lib/oga/xml/lexer.rb | 14 +++-- .../html/lexer/closing_rules/table_spec.rb | 62 ++++++++++++------- .../html/lexer/closing_rules/tbody_spec.rb | 20 ++++++ .../html/lexer/closing_rules/tfoot_spec.rb | 20 ++++++ .../html/lexer/closing_rules/thead_spec.rb | 20 ++++++ spec/oga/html/lexer/closing_rules/tr_spec.rb | 20 ++++++ 6 files changed, 128 insertions(+), 28 deletions(-) diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index e2c724d..465fd2a 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -50,6 +50,10 @@ module Oga %w{thead tbody tfoot tr caption colgroup col} ) + HTML_SCRIPT_ELEMENTS = Whitelist.new(%w{script template}) + + HTML_TABLE_ROW_ELEMENTS = Whitelist.new(%w{tr}) + HTML_SCRIPT_ELEMENTS + # Elements that should be closed automatically before a new opening tag is # processed. HTML_CLOSE_SELF = { @@ -71,11 +75,11 @@ module Oga 'option' => Blacklist.new(%w{optgroup option}), 'colgroup' => Whitelist.new(%w{col template}), 'caption' => HTML_TABLE_ALLOWED.to_blacklist, - 'table' => HTML_TABLE_ALLOWED, - 'thead' => Whitelist.new(%w{tr}), - 'tbody' => Whitelist.new(%w{tr}), - 'tfoot' => Whitelist.new(%w{tr}), - 'tr' => Whitelist.new(%w{td th}), + 'table' => HTML_TABLE_ALLOWED + HTML_SCRIPT_ELEMENTS, + 'thead' => HTML_TABLE_ROW_ELEMENTS, + 'tbody' => HTML_TABLE_ROW_ELEMENTS, + 'tfoot' => HTML_TABLE_ROW_ELEMENTS, + 'tr' => Whitelist.new(%w{td th}) + HTML_SCRIPT_ELEMENTS, 'td' => Blacklist.new(%w{td th}) + HTML_TABLE_ALLOWED, 'th' => Blacklist.new(%w{td th}) + HTML_TABLE_ALLOWED } diff --git a/spec/oga/html/lexer/closing_rules/table_spec.rb b/spec/oga/html/lexer/closing_rules/table_spec.rb index 5a7f437..160f80c 100644 --- a/spec/oga/html/lexer/closing_rules/table_spec.rb +++ b/spec/oga/html/lexer/closing_rules/table_spec.rb @@ -1,30 +1,46 @@ require 'spec_helper' describe Oga::XML::Lexer do - describe 'HTML tables' do - describe 'with unclosed tags' do - it 'lexes a tag followed by a tag' do - lex_html('foo').should == [ - [:T_ELEM_NAME, 'tr', 1], - [:T_TEXT, 'foo', 1], - [:T_ELEM_END, nil, 1], - [:T_ELEM_NAME, 'tbody', 1], - [:T_ELEM_END, nil, 1] - ] - end + describe 'using HTML elements' do + it 'lexes two unclosed
elements following each other as separate elements' do + lex_html('
foo
bar').should == [ + [:T_ELEM_NAME, 'table', 1], + [:T_TEXT, 'foo', 1], + [:T_ELEM_END, nil, 1], + [:T_ELEM_NAME, 'table', 1], + [:T_TEXT, 'bar', 1], + [:T_ELEM_END, nil, 1] + ] + end - it 'lexes an unclosed tag' do - lex_html('bar').should == [ - [:T_ELEM_NAME, 'tr', 1], - [:T_ELEM_NAME, 'th', 1], - [:T_TEXT, 'foo', 1], - [:T_ELEM_END, nil, 1], - [:T_ELEM_END, nil, 1], - [:T_ELEM_NAME, 'tbody', 1], - [:T_TEXT, 'bar', 1], - [:T_ELEM_END, nil, 1] - ] - end + it 'lexes a
tag followed by a
foo
element containing a element' do + lex_html('
foo
').should == [ + [:T_ELEM_NAME, 'table', 1], + [:T_ELEM_NAME, 'thead', 1], + [:T_TEXT, 'foo', 1], + [:T_ELEM_END, nil, 1], + [:T_ELEM_END, nil, 1] + ] + end + + it 'lexes a element containing a
').should == [ + [:T_ELEM_NAME, 'table', 1], + [:T_ELEM_NAME, 'script', 1], + [:T_TEXT, 'foo', 1], + [:T_ELEM_END, nil, 1], + [:T_ELEM_END, nil, 1] + ] + end + + it 'lexes a element containing a