diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index d1e3270..87a99b4 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -422,9 +422,9 @@ # Characters that can be used for unquoted HTML attribute values. # See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example # for more info. - html_unquoted_value = ^( - squote | dquote | '`' | '=' | '<' | '>' | whitespace_or_newline - )+; + html_unquoted_value = + ^(squote | dquote | whitespace_or_newline) + ^('`' | '=' | '<' | '>' | whitespace_or_newline)+; # Machine used after matching the "=" of an attribute and just before moving # into the actual attribute value. diff --git a/spec/oga/html/lexer/attributes_spec.rb b/spec/oga/html/lexer/attributes_spec.rb index e5a5739..61d8b63 100644 --- a/spec/oga/html/lexer/attributes_spec.rb +++ b/spec/oga/html/lexer/attributes_spec.rb @@ -58,6 +58,28 @@ describe Oga::XML::Lexer do ] end + it 'lexes an attribute with a value without a starting double quote' do + lex_html('').should == [ + [:T_ELEM_NAME, 'a', 1], + [:T_ATTR, 'href', 1], + [:T_STRING_SQUOTE, nil, 1], + [:T_STRING_BODY, 'foo"', 1], + [:T_STRING_SQUOTE, nil, 1], + [:T_ELEM_END, nil, 1] + ] + end + + it 'lexes an attribute with a value without a starting single quote' do + lex_html("").should == [ + [:T_ELEM_NAME, 'a', 1], + [:T_ATTR, 'href', 1], + [:T_STRING_SQUOTE, nil, 1], + [:T_STRING_BODY, "foo'", 1], + [:T_STRING_SQUOTE, nil, 1], + [:T_ELEM_END, nil, 1] + ] + end + it 'lexes an element with spaces around the attribute equal sign' do lex_html('
').should == [ [:T_ELEM_NAME, 'p', 1],