Support HTML attributes without starting quotes
This allows the lexer to process input such as: <a href=foo"></a> For XML input the lexer still expects properly opened/closed attribute values. Fixes #109
This commit is contained in:
parent
a76286b973
commit
fd307a0fcc
|
@ -422,9 +422,9 @@
|
||||||
# Characters that can be used for unquoted HTML attribute values.
|
# Characters that can be used for unquoted HTML attribute values.
|
||||||
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
|
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
|
||||||
# for more info.
|
# for more info.
|
||||||
html_unquoted_value = ^(
|
html_unquoted_value =
|
||||||
squote | dquote | '`' | '=' | '<' | '>' | whitespace_or_newline
|
^(squote | dquote | whitespace_or_newline)
|
||||||
)+;
|
^('`' | '=' | '<' | '>' | whitespace_or_newline)+;
|
||||||
|
|
||||||
# Machine used after matching the "=" of an attribute and just before moving
|
# Machine used after matching the "=" of an attribute and just before moving
|
||||||
# into the actual attribute value.
|
# into the actual attribute value.
|
||||||
|
|
|
@ -58,6 +58,28 @@ describe Oga::XML::Lexer do
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'lexes an attribute with a value without a starting double quote' do
|
||||||
|
lex_html('<a href=foo"></a>').should == [
|
||||||
|
[:T_ELEM_NAME, 'a', 1],
|
||||||
|
[:T_ATTR, 'href', 1],
|
||||||
|
[:T_STRING_SQUOTE, nil, 1],
|
||||||
|
[:T_STRING_BODY, 'foo"', 1],
|
||||||
|
[:T_STRING_SQUOTE, nil, 1],
|
||||||
|
[:T_ELEM_END, nil, 1]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes an attribute with a value without a starting single quote' do
|
||||||
|
lex_html("<a href=foo'></a>").should == [
|
||||||
|
[:T_ELEM_NAME, 'a', 1],
|
||||||
|
[:T_ATTR, 'href', 1],
|
||||||
|
[:T_STRING_SQUOTE, nil, 1],
|
||||||
|
[:T_STRING_BODY, "foo'", 1],
|
||||||
|
[:T_STRING_SQUOTE, nil, 1],
|
||||||
|
[:T_ELEM_END, nil, 1]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
it 'lexes an element with spaces around the attribute equal sign' do
|
it 'lexes an element with spaces around the attribute equal sign' do
|
||||||
lex_html('<p foo = "bar"></p>').should == [
|
lex_html('<p foo = "bar"></p>').should == [
|
||||||
[:T_ELEM_NAME, 'p', 1],
|
[:T_ELEM_NAME, 'p', 1],
|
||||||
|
|
Loading…
Reference in New Issue