Fix for lexing HTML quoted attrs followed by "/>"
This ensures that when using input such as <a href="foo"/> the "/" is not part of the attribute value.
This commit is contained in:
parent
afbb585812
commit
d892ce9787
|
@ -52,11 +52,6 @@
|
|||
if ( fc == '\n' ) lines++;
|
||||
}
|
||||
|
||||
action hold_and_return {
|
||||
fhold;
|
||||
fret;
|
||||
}
|
||||
|
||||
whitespace = [ \t];
|
||||
ident_char = [a-zA-Z0-9\-_];
|
||||
identifier = ident_char+;
|
||||
|
@ -375,6 +370,11 @@
|
|||
};
|
||||
*|;
|
||||
|
||||
action hold_start_element_head {
|
||||
fhold;
|
||||
fnext element_head;
|
||||
}
|
||||
|
||||
# Characters that can be used for unquoted HTML attribute values.
|
||||
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
|
||||
# for more info.
|
||||
|
@ -384,8 +384,10 @@
|
|||
|
||||
# Machine used for processing HTML attribute values.
|
||||
html_attribute_value := |*
|
||||
squote => start_string_squote;
|
||||
dquote => start_string_dquote;
|
||||
squote | dquote => {
|
||||
fhold;
|
||||
fnext html_attribute_value_quoted;
|
||||
};
|
||||
|
||||
# Unquoted attribute values are lexed as if they were single quoted
|
||||
# strings.
|
||||
|
@ -397,14 +399,23 @@
|
|||
callback_simple(id_on_string_squote);
|
||||
};
|
||||
|
||||
any => hold_and_return;
|
||||
any => hold_start_element_head;
|
||||
*|;
|
||||
|
||||
# Machine specifically used when dealing with quoted HTML attributes. This
|
||||
# ensures that input such as <a href="foo"/> doesn't result in "/" being
|
||||
# considered part of the attribute value.
|
||||
html_attribute_value_quoted := |*
|
||||
squote => start_string_squote;
|
||||
dquote => start_string_dquote;
|
||||
any => hold_start_element_head;
|
||||
*|;
|
||||
|
||||
# Machine used for processing XML attribute values.
|
||||
xml_attribute_value := |*
|
||||
squote => start_string_squote;
|
||||
dquote => start_string_dquote;
|
||||
any => hold_and_return;
|
||||
any => hold_start_element_head;
|
||||
*|;
|
||||
|
||||
# Machine used for processing the contents of an element's starting tag.
|
||||
|
@ -429,11 +440,11 @@
|
|||
'=' => {
|
||||
if ( html_p )
|
||||
{
|
||||
fcall html_attribute_value;
|
||||
fnext html_attribute_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
fcall xml_attribute_value;
|
||||
fnext xml_attribute_value;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -171,6 +171,28 @@ describe Oga::XML::Lexer do
|
|||
[:T_ELEM_END, nil, 1]
|
||||
]
|
||||
end
|
||||
|
||||
describe 'without a space before the closing tag' do
|
||||
it 'lexes a void element' do
|
||||
lex('<br/>').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'br', 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
]
|
||||
end
|
||||
|
||||
it 'lexes a void element with an attribute' do
|
||||
lex('<br class="foo"/>').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'br', 1],
|
||||
[:T_ATTR, 'class', 1],
|
||||
[:T_STRING_DQUOTE, nil, 1],
|
||||
[:T_STRING_BODY, 'foo', 1],
|
||||
[:T_STRING_DQUOTE, nil, 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe 'elements with namespaces' do
|
||||
|
|
|
@ -50,5 +50,17 @@ describe Oga::XML::Lexer do
|
|||
[:T_ELEM_END, nil, 1]
|
||||
]
|
||||
end
|
||||
|
||||
it 'lexes an attribute with an unquoted value containing a slash' do
|
||||
lex_html('<a href=foo/></a>').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'a', 1],
|
||||
[:T_ATTR, 'href', 1],
|
||||
[:T_STRING_SQUOTE, nil, 1],
|
||||
[:T_STRING_BODY, 'foo/', 1],
|
||||
[:T_STRING_SQUOTE, nil, 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,7 +3,7 @@ require 'spec_helper'
|
|||
describe Oga::XML::Lexer do
|
||||
describe 'HTML void elements' do
|
||||
it 'lexes a void element that omits the closing /' do
|
||||
lex('<link>', :html => true).should == [
|
||||
lex_html('<link>').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'link', 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
|
@ -11,7 +11,7 @@ describe Oga::XML::Lexer do
|
|||
end
|
||||
|
||||
it 'lexes a upper case void element' do
|
||||
lex('<BR>', :html => true).should == [
|
||||
lex_html('<BR>').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, "BR", 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
|
@ -19,7 +19,7 @@ describe Oga::XML::Lexer do
|
|||
end
|
||||
|
||||
it 'lexes text after a void element' do
|
||||
lex('<link>foo', :html => true).should == [
|
||||
lex_html('<link>foo').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'link', 1],
|
||||
[:T_ELEM_END, nil, 1],
|
||||
|
@ -28,7 +28,7 @@ describe Oga::XML::Lexer do
|
|||
end
|
||||
|
||||
it 'lexes a void element inside another element' do
|
||||
lex('<head><link></head>', :html => true).should == [
|
||||
lex_html('<head><link></head>').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'head', 1],
|
||||
[:T_ELEM_START, nil, 1],
|
||||
|
@ -39,7 +39,7 @@ describe Oga::XML::Lexer do
|
|||
end
|
||||
|
||||
it 'lexes a void element inside another element with whitespace' do
|
||||
lex("<head><link>\n</head>", :html => true).should == [
|
||||
lex_html("<head><link>\n</head>").should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'head', 1],
|
||||
[:T_ELEM_START, nil, 1],
|
||||
|
@ -49,5 +49,51 @@ describe Oga::XML::Lexer do
|
|||
[:T_ELEM_END, nil, 2]
|
||||
]
|
||||
end
|
||||
|
||||
it 'lexes a void element with an unquoted attribute value' do
|
||||
lex_html('<br class=foo />').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'br', 1],
|
||||
[:T_ATTR, 'class', 1],
|
||||
[:T_STRING_SQUOTE, nil, 1],
|
||||
[:T_STRING_BODY, 'foo', 1],
|
||||
[:T_STRING_SQUOTE, nil, 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
]
|
||||
end
|
||||
|
||||
describe 'without a space before the closing tag' do
|
||||
it 'lexes a void element' do
|
||||
lex_html('<br/>').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'br', 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
]
|
||||
end
|
||||
|
||||
it 'lexes a void element with an attribute' do
|
||||
lex_html('<br class="foo"/>').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'br', 1],
|
||||
[:T_ATTR, 'class', 1],
|
||||
[:T_STRING_DQUOTE, nil, 1],
|
||||
[:T_STRING_BODY, 'foo', 1],
|
||||
[:T_STRING_DQUOTE, nil, 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
]
|
||||
end
|
||||
|
||||
it 'lexes a void element with an unquoted attribute value' do
|
||||
lex_html('<br class=foo/>').should == [
|
||||
[:T_ELEM_START, nil, 1],
|
||||
[:T_ELEM_NAME, 'br', 1],
|
||||
[:T_ATTR, 'class', 1],
|
||||
[:T_STRING_SQUOTE, nil, 1],
|
||||
[:T_STRING_BODY, 'foo/', 1],
|
||||
[:T_STRING_SQUOTE, nil, 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue