Refactored handling of literal HTML elements.

This ensures newlines can appear in <style> / <script> tags when using IOs as
input.
This commit is contained in:
Yorick Peterse 2015-03-04 11:44:31 +01:00
parent 78e40b55c0
commit 3b2055a30b
2 changed files with 47 additions and 9 deletions

View File

@ -330,8 +330,6 @@
if ( literal_html_element_p() ) if ( literal_html_element_p() )
{ {
mark = ts + 1;
fnext literal_html_element; fnext literal_html_element;
} }
else else
@ -404,10 +402,25 @@
# Certain tags in HTML can contain basically anything except for the literal # Certain tags in HTML can contain basically anything except for the literal
# closing tag. Two examples are script and style tags. As a result of this # closing tag. Two examples are script and style tags. As a result of this
# we can't use the regular text machine. # we can't use the regular text machine.
literal_html_element := |* literal_html_closing_tags = '</script>' | '</style>';
'</script>' | '</style>' => { literal_html_allowed = (any* -- literal_html_closing_tags) $count_newlines;
callback(id_on_text, data, encoding, mark, ts);
literal_html_element := |*
literal_html_allowed => {
callback(id_on_text, data, encoding, ts, te);
if ( lines > 0 )
{
advance_line(lines);
lines = 0;
}
};
literal_html_allowed %{ mark = p; } literal_html_closing_tags => {
callback(id_on_text, data, encoding, ts, mark);
p = mark - 1;
mark = 0; mark = 0;
if ( lines > 0 ) if ( lines > 0 )
@ -417,12 +430,8 @@
lines = 0; lines = 0;
} }
callback_simple(id_on_element_end);
fnext main; fnext main;
}; };
any $count_newlines;
*|; *|;
# The main machine aka the entry point of Ragel. # The main machine aka the entry point of Ragel.

View File

@ -2,6 +2,14 @@ require 'spec_helper'
describe Oga::XML::Lexer do describe Oga::XML::Lexer do
describe 'HTML style elements' do describe 'HTML style elements' do
it 'lexes an empty <style> tag' do
lex('<style></style>', :html => true).should == [
[:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, 'style', 1],
[:T_ELEM_END, nil, 1]
]
end
it 'treats the content of a style tag as plain text' do it 'treats the content of a style tag as plain text' do
lex('<style>foo <bar</style>', :html => true).should == [ lex('<style>foo <bar</style>', :html => true).should == [
[:T_ELEM_START, nil, 1], [:T_ELEM_START, nil, 1],
@ -10,5 +18,26 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 1] [:T_ELEM_END, nil, 1]
] ]
end end
it 'lexes a multi-line <style> tag using a String as the input' do
lex("<style>foo\nbar</style>", :html => true).should == [
[:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, 'style', 1],
[:T_TEXT, "foo\nbar", 1],
[:T_ELEM_END, nil, 2]
]
end
it 'lexes a multi-line <style> tag using an IO as the input' do
io = StringIO.new("<style>foo\nbar</style>")
lex(io, :html => true).should == [
[:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, 'style', 1],
[:T_TEXT, "foo\n", 1],
[:T_TEXT, 'bar', 2],
[:T_ELEM_END, nil, 2]
]
end
end end
end end