From d2523a1082b5ab601724e02fa4c613a9d9d9e3c6 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Mon, 25 May 2015 13:41:17 +0200 Subject: [PATCH] Support whitespace in element closing tags Fixes #108 --- ext/ragel/base_lexer.rl | 37 ++++++++++++++++++++++------- spec/oga/xml/lexer/elements_spec.rb | 24 +++++++++++++++++++ spec/oga/xml/lexer/general_spec.rb | 4 ---- 3 files changed, 53 insertions(+), 12 deletions(-) diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 1546f2b..8c5eb6f 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -368,10 +368,12 @@ fnext element_name; } - action close_element { - callback(id_on_element_end, data, encoding, mark, te - 1); + action start_close_element { + fnext element_close; + } - mark = 0; + action close_element { + callback(id_on_element_end, data, encoding, ts, te); } action close_element_fnext_main { @@ -381,10 +383,7 @@ } element_start = '<' ident_char; - - element_end = '' - | '' - ; + element_end = ' close_element; + + '>' => { + if ( lines > 0 ) + { + advance_line(lines); + + lines = 0; + } + + fnext main; + }; + + any $count_newlines; + *|; + # Characters that can be used for unquoted HTML attribute values. # See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example # for more info. @@ -587,7 +608,7 @@ cdata_start => start_cdata; proc_ins_start => start_proc_ins; element_start => start_element; - element_end => close_element; + element_end => start_close_element; any => start_text; *|; }%% diff --git a/spec/oga/xml/lexer/elements_spec.rb b/spec/oga/xml/lexer/elements_spec.rb index 71a6511..b4b808d 100644 --- a/spec/oga/xml/lexer/elements_spec.rb +++ b/spec/oga/xml/lexer/elements_spec.rb @@ -73,6 +73,30 @@ describe Oga::XML::Lexer do [:T_ELEM_END, nil, 2] ] end + + it 'lexes an element with a space in the closing tag' do + lex("bar").should == [ + [:T_ELEM_NAME, 'foo', 1], + [:T_ELEM_END, nil, 1], + [:T_TEXT, 'bar', 1] + ] + end + + it 'lexes an element with a newline in the closing tag' do + lex("bar").should == [ + [:T_ELEM_NAME, 'foo', 1], + [:T_ELEM_END, nil, 1], + [:T_TEXT, 'bar', 2] + ] + end + + it 'lexes an element with a newline in the closing tag using an IO as input' do + lex(StringIO.new("bar")).should == [ + [:T_ELEM_NAME, 'foo', 1], + [:T_ELEM_END, nil, 1], + [:T_TEXT, 'bar', 2] + ] + end end describe 'elements with attributes' do diff --git a/spec/oga/xml/lexer/general_spec.rb b/spec/oga/xml/lexer/general_spec.rb index bc02a11..e214abf 100644 --- a/spec/oga/xml/lexer/general_spec.rb +++ b/spec/oga/xml/lexer/general_spec.rb @@ -30,10 +30,6 @@ describe Oga::XML::Lexer do lex('>').should == [[:T_TEXT, '>', 1]] end - it 'lexes