Relax support for HTML unquoted attribute values
This allows for parsing of HTML such as: <a href=lol("javascript")></a> Here the "href" attribute would have its value set to: lol("javascript") Fixes #119
This commit is contained in:
parent
daec6d151c
commit
3b633ff41c
|
@ -419,18 +419,24 @@
|
||||||
any $count_newlines;
|
any $count_newlines;
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# Characters that can be used for unquoted HTML attribute values.
|
|
||||||
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
|
|
||||||
# for more info.
|
|
||||||
html_unquoted_value =
|
|
||||||
^(squote | dquote | whitespace_or_newline)
|
|
||||||
^('`' | '=' | '<' | '>' | whitespace_or_newline)+;
|
|
||||||
|
|
||||||
# Machine used after matching the "=" of an attribute and just before moving
|
# Machine used after matching the "=" of an attribute and just before moving
|
||||||
# into the actual attribute value.
|
# into the actual attribute value.
|
||||||
attribute_pre := |*
|
attribute_pre := |*
|
||||||
whitespace_or_newline $count_newlines;
|
whitespace_or_newline $count_newlines;
|
||||||
|
|
||||||
|
squote | dquote => {
|
||||||
|
fhold;
|
||||||
|
|
||||||
|
if ( lines > 0 )
|
||||||
|
{
|
||||||
|
advance_line(lines);
|
||||||
|
|
||||||
|
lines = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
fnext quoted_attribute_value;
|
||||||
|
};
|
||||||
|
|
||||||
any => {
|
any => {
|
||||||
fhold;
|
fhold;
|
||||||
|
|
||||||
|
@ -443,25 +449,33 @@
|
||||||
|
|
||||||
if ( html_p )
|
if ( html_p )
|
||||||
{
|
{
|
||||||
fnext html_attribute_value;
|
fnext unquoted_attribute_value;
|
||||||
}
|
}
|
||||||
|
/* XML doesn't support unquoted attribute values */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fnext xml_attribute_value;
|
fret;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# Machine used for processing HTML attribute values.
|
# Machine for processing unquoted HTML attribute values.
|
||||||
html_attribute_value := |*
|
#
|
||||||
squote | dquote => {
|
# The HTML specification describes a set of characters that can be allowed
|
||||||
fhold;
|
# in an unquoted value at https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example.
|
||||||
fnext xml_attribute_value;
|
#
|
||||||
};
|
# As is always the case with HTML everybody completely ignores this
|
||||||
|
# specification and thus every library and browser out these is expected to
|
||||||
# Unquoted attribute values are lexed as if they were single quoted
|
# support input such as `<a href=lol("javascript","is","great")></a>.
|
||||||
# strings.
|
#
|
||||||
html_unquoted_value => {
|
# Oga too has to support this, thus the only characters it disallows in
|
||||||
|
# unquoted attribute values are:
|
||||||
|
#
|
||||||
|
# * > (used for terminating open tags)
|
||||||
|
# * whitespace
|
||||||
|
#
|
||||||
|
unquoted_attribute_value := |*
|
||||||
|
^('>' | whitespace_or_newline)+ => {
|
||||||
callback_simple(id_on_string_squote);
|
callback_simple(id_on_string_squote);
|
||||||
|
|
||||||
callback(id_on_string_body, data, encoding, ts, te);
|
callback(id_on_string_body, data, encoding, ts, te);
|
||||||
|
@ -472,8 +486,8 @@
|
||||||
any => hold_and_return;
|
any => hold_and_return;
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# Machine used for processing XML attribute values.
|
# Machine used for processing quoted XML/HTML attribute values.
|
||||||
xml_attribute_value := |*
|
quoted_attribute_value := |*
|
||||||
# The following two actions use "fnext" instead of "fcall". Combined
|
# The following two actions use "fnext" instead of "fcall". Combined
|
||||||
# with "element_head" using "fcall" to jump to this machine this means
|
# with "element_head" using "fcall" to jump to this machine this means
|
||||||
# we can return back to "element_head" after processing a single string.
|
# we can return back to "element_head" after processing a single string.
|
||||||
|
|
|
@ -58,6 +58,32 @@ describe Oga::XML::Lexer do
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'lexes an attribute with an unquoted chunk of Javascript' do
|
||||||
|
lex_html('<a href=ijustlovehtml("because","reasons")').should == [
|
||||||
|
[:T_ELEM_NAME, 'a', 1],
|
||||||
|
[:T_ATTR, 'href', 1],
|
||||||
|
[:T_STRING_SQUOTE, nil, 1],
|
||||||
|
[:T_STRING_BODY, 'ijustlovehtml("because","reasons")', 1],
|
||||||
|
[:T_STRING_SQUOTE, nil, 1],
|
||||||
|
[:T_ELEM_END, nil, 1]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes an attribute with an unquoted chunk of Javascript followed by another attribute' do
|
||||||
|
lex_html('<a href=ijustlovehtml("because","reasons") foo="bar"').should == [
|
||||||
|
[:T_ELEM_NAME, 'a', 1],
|
||||||
|
[:T_ATTR, 'href', 1],
|
||||||
|
[:T_STRING_SQUOTE, nil, 1],
|
||||||
|
[:T_STRING_BODY, 'ijustlovehtml("because","reasons")', 1],
|
||||||
|
[:T_STRING_SQUOTE, nil, 1],
|
||||||
|
[:T_ATTR, 'foo', 1],
|
||||||
|
[:T_STRING_DQUOTE, nil, 1],
|
||||||
|
[:T_STRING_BODY, 'bar', 1],
|
||||||
|
[:T_STRING_DQUOTE, nil, 1],
|
||||||
|
[:T_ELEM_END, nil, 1]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
it 'lexes an attribute with a value without a starting double quote' do
|
it 'lexes an attribute with a value without a starting double quote' do
|
||||||
lex_html('<a href=foo"></a>').should == [
|
lex_html('<a href=foo"></a>').should == [
|
||||||
[:T_ELEM_NAME, 'a', 1],
|
[:T_ELEM_NAME, 'a', 1],
|
||||||
|
|
Loading…
Reference in New Issue