Refactored handling of literal HTML elements.
This ensures newlines can appear in <style> / <script> tags when using IOs as input.
This commit is contained in:
parent
78e40b55c0
commit
3b2055a30b
|
@ -330,8 +330,6 @@
|
||||||
|
|
||||||
if ( literal_html_element_p() )
|
if ( literal_html_element_p() )
|
||||||
{
|
{
|
||||||
mark = ts + 1;
|
|
||||||
|
|
||||||
fnext literal_html_element;
|
fnext literal_html_element;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -404,10 +402,25 @@
|
||||||
# Certain tags in HTML can contain basically anything except for the literal
|
# Certain tags in HTML can contain basically anything except for the literal
|
||||||
# closing tag. Two examples are script and style tags. As a result of this
|
# closing tag. Two examples are script and style tags. As a result of this
|
||||||
# we can't use the regular text machine.
|
# we can't use the regular text machine.
|
||||||
literal_html_element := |*
|
literal_html_closing_tags = '</script>' | '</style>';
|
||||||
'</script>' | '</style>' => {
|
literal_html_allowed = (any* -- literal_html_closing_tags) $count_newlines;
|
||||||
callback(id_on_text, data, encoding, mark, ts);
|
|
||||||
|
|
||||||
|
literal_html_element := |*
|
||||||
|
literal_html_allowed => {
|
||||||
|
callback(id_on_text, data, encoding, ts, te);
|
||||||
|
|
||||||
|
if ( lines > 0 )
|
||||||
|
{
|
||||||
|
advance_line(lines);
|
||||||
|
|
||||||
|
lines = 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
literal_html_allowed %{ mark = p; } literal_html_closing_tags => {
|
||||||
|
callback(id_on_text, data, encoding, ts, mark);
|
||||||
|
|
||||||
|
p = mark - 1;
|
||||||
mark = 0;
|
mark = 0;
|
||||||
|
|
||||||
if ( lines > 0 )
|
if ( lines > 0 )
|
||||||
|
@ -417,12 +430,8 @@
|
||||||
lines = 0;
|
lines = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
callback_simple(id_on_element_end);
|
|
||||||
|
|
||||||
fnext main;
|
fnext main;
|
||||||
};
|
};
|
||||||
|
|
||||||
any $count_newlines;
|
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# The main machine aka the entry point of Ragel.
|
# The main machine aka the entry point of Ragel.
|
||||||
|
|
|
@ -2,6 +2,14 @@ require 'spec_helper'
|
||||||
|
|
||||||
describe Oga::XML::Lexer do
|
describe Oga::XML::Lexer do
|
||||||
describe 'HTML style elements' do
|
describe 'HTML style elements' do
|
||||||
|
it 'lexes an empty <style> tag' do
|
||||||
|
lex('<style></style>', :html => true).should == [
|
||||||
|
[:T_ELEM_START, nil, 1],
|
||||||
|
[:T_ELEM_NAME, 'style', 1],
|
||||||
|
[:T_ELEM_END, nil, 1]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
it 'treats the content of a style tag as plain text' do
|
it 'treats the content of a style tag as plain text' do
|
||||||
lex('<style>foo <bar</style>', :html => true).should == [
|
lex('<style>foo <bar</style>', :html => true).should == [
|
||||||
[:T_ELEM_START, nil, 1],
|
[:T_ELEM_START, nil, 1],
|
||||||
|
@ -10,5 +18,26 @@ describe Oga::XML::Lexer do
|
||||||
[:T_ELEM_END, nil, 1]
|
[:T_ELEM_END, nil, 1]
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'lexes a multi-line <style> tag using a String as the input' do
|
||||||
|
lex("<style>foo\nbar</style>", :html => true).should == [
|
||||||
|
[:T_ELEM_START, nil, 1],
|
||||||
|
[:T_ELEM_NAME, 'style', 1],
|
||||||
|
[:T_TEXT, "foo\nbar", 1],
|
||||||
|
[:T_ELEM_END, nil, 2]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes a multi-line <style> tag using an IO as the input' do
|
||||||
|
io = StringIO.new("<style>foo\nbar</style>")
|
||||||
|
|
||||||
|
lex(io, :html => true).should == [
|
||||||
|
[:T_ELEM_START, nil, 1],
|
||||||
|
[:T_ELEM_NAME, 'style', 1],
|
||||||
|
[:T_TEXT, "foo\n", 1],
|
||||||
|
[:T_TEXT, 'bar', 2],
|
||||||
|
[:T_ELEM_END, nil, 2]
|
||||||
|
]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue