From 3b2055a30b128aa679a83332dfdfa68314271b24 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 4 Mar 2015 11:44:31 +0100 Subject: [PATCH] Refactored handling of literal HTML elements. This ensures newlines can appear in ' => { - callback(id_on_text, data, encoding, mark, ts); + literal_html_closing_tags = '' | ''; + literal_html_allowed = (any* -- literal_html_closing_tags) $count_newlines; + literal_html_element := |* + literal_html_allowed => { + callback(id_on_text, data, encoding, ts, te); + + if ( lines > 0 ) + { + advance_line(lines); + + lines = 0; + } + }; + + literal_html_allowed %{ mark = p; } literal_html_closing_tags => { + callback(id_on_text, data, encoding, ts, mark); + + p = mark - 1; mark = 0; if ( lines > 0 ) @@ -417,12 +430,8 @@ lines = 0; } - callback_simple(id_on_element_end); - fnext main; }; - - any $count_newlines; *|; # The main machine aka the entry point of Ragel. diff --git a/spec/oga/xml/lexer/html_style_spec.rb b/spec/oga/xml/lexer/html_style_spec.rb index cfa3907..6ac7353 100644 --- a/spec/oga/xml/lexer/html_style_spec.rb +++ b/spec/oga/xml/lexer/html_style_spec.rb @@ -2,6 +2,14 @@ require 'spec_helper' describe Oga::XML::Lexer do describe 'HTML style elements' do + it 'lexes an empty ', :html => true).should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'style', 1], + [:T_ELEM_END, nil, 1] + ] + end + it 'treats the content of a style tag as plain text' do lex('', :html => true).should == [ [:T_ELEM_START, nil, 1], @@ -10,5 +18,26 @@ describe Oga::XML::Lexer do [:T_ELEM_END, nil, 1] ] end + + it 'lexes a multi-line ", :html => true).should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'style', 1], + [:T_TEXT, "foo\nbar", 1], + [:T_ELEM_END, nil, 2] + ] + end + + it 'lexes a multi-line ") + + lex(io, :html => true).should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'style', 1], + [:T_TEXT, "foo\n", 1], + [:T_TEXT, 'bar', 2], + [:T_ELEM_END, nil, 2] + ] + end end end