Support HTML attributes without starting quotes

This allows the lexer to process input such as:

    <a href=foo"></a>

For XML input the lexer still expects properly opened/closed attribute
values.

Fixes #109
This commit is contained in:
Yorick Peterse 2015-06-08 06:46:08 +02:00
parent a76286b973
commit fd307a0fcc
2 changed files with 25 additions and 3 deletions

View File

@ -422,9 +422,9 @@
# Characters that can be used for unquoted HTML attribute values. # Characters that can be used for unquoted HTML attribute values.
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example # See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
# for more info. # for more info.
html_unquoted_value = ^( html_unquoted_value =
squote | dquote | '`' | '=' | '<' | '>' | whitespace_or_newline ^(squote | dquote | whitespace_or_newline)
)+; ^('`' | '=' | '<' | '>' | whitespace_or_newline)+;
# Machine used after matching the "=" of an attribute and just before moving # Machine used after matching the "=" of an attribute and just before moving
# into the actual attribute value. # into the actual attribute value.

View File

@ -58,6 +58,28 @@ describe Oga::XML::Lexer do
] ]
end end
it 'lexes an attribute with a value without a starting double quote' do
lex_html('<a href=foo"></a>').should == [
[:T_ELEM_NAME, 'a', 1],
[:T_ATTR, 'href', 1],
[:T_STRING_SQUOTE, nil, 1],
[:T_STRING_BODY, 'foo"', 1],
[:T_STRING_SQUOTE, nil, 1],
[:T_ELEM_END, nil, 1]
]
end
it 'lexes an attribute with a value without a starting single quote' do
lex_html("<a href=foo'></a>").should == [
[:T_ELEM_NAME, 'a', 1],
[:T_ATTR, 'href', 1],
[:T_STRING_SQUOTE, nil, 1],
[:T_STRING_BODY, "foo'", 1],
[:T_STRING_SQUOTE, nil, 1],
[:T_ELEM_END, nil, 1]
]
end
it 'lexes an element with spaces around the attribute equal sign' do it 'lexes an element with spaces around the attribute equal sign' do
lex_html('<p foo = "bar"></p>').should == [ lex_html('<p foo = "bar"></p>').should == [
[:T_ELEM_NAME, 'p', 1], [:T_ELEM_NAME, 'p', 1],