diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl
index 8606e64..2a6143a 100644
--- a/ext/ragel/base_lexer.rl
+++ b/ext/ragel/base_lexer.rl
@@ -52,11 +52,6 @@
if ( fc == '\n' ) lines++;
}
- action hold_and_return {
- fhold;
- fret;
- }
-
whitespace = [ \t];
ident_char = [a-zA-Z0-9\-_];
identifier = ident_char+;
@@ -375,6 +370,11 @@
};
*|;
+ action hold_start_element_head {
+ fhold;
+ fnext element_head;
+ }
+
# Characters that can be used for unquoted HTML attribute values.
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
# for more info.
@@ -384,8 +384,10 @@
# Machine used for processing HTML attribute values.
html_attribute_value := |*
- squote => start_string_squote;
- dquote => start_string_dquote;
+ squote | dquote => {
+ fhold;
+ fnext html_attribute_value_quoted;
+ };
# Unquoted attribute values are lexed as if they were single quoted
# strings.
@@ -397,14 +399,23 @@
callback_simple(id_on_string_squote);
};
- any => hold_and_return;
+ any => hold_start_element_head;
+ *|;
+
+ # Machine specifically used when dealing with quoted HTML attributes. This
+ # ensures that input such as doesn't result in "/" being
+ # considered part of the attribute value.
+ html_attribute_value_quoted := |*
+ squote => start_string_squote;
+ dquote => start_string_dquote;
+ any => hold_start_element_head;
*|;
# Machine used for processing XML attribute values.
xml_attribute_value := |*
squote => start_string_squote;
dquote => start_string_dquote;
- any => hold_and_return;
+ any => hold_start_element_head;
*|;
# Machine used for processing the contents of an element's starting tag.
@@ -429,11 +440,11 @@
'=' => {
if ( html_p )
{
- fcall html_attribute_value;
+ fnext html_attribute_value;
}
else
{
- fcall xml_attribute_value;
+ fnext xml_attribute_value;
}
};
diff --git a/spec/oga/xml/lexer/elements_spec.rb b/spec/oga/xml/lexer/elements_spec.rb
index 2450232..3ed9c7f 100644
--- a/spec/oga/xml/lexer/elements_spec.rb
+++ b/spec/oga/xml/lexer/elements_spec.rb
@@ -171,6 +171,28 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 1]
]
end
+
+ describe 'without a space before the closing tag' do
+ it 'lexes a void element' do
+ lex('
').should == [
+ [:T_ELEM_START, nil, 1],
+ [:T_ELEM_NAME, 'br', 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+
+ it 'lexes a void element with an attribute' do
+ lex('
').should == [
+ [:T_ELEM_START, nil, 1],
+ [:T_ELEM_NAME, 'br', 1],
+ [:T_ATTR, 'class', 1],
+ [:T_STRING_DQUOTE, nil, 1],
+ [:T_STRING_BODY, 'foo', 1],
+ [:T_STRING_DQUOTE, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+ end
end
describe 'elements with namespaces' do
diff --git a/spec/oga/xml/lexer/html_attributes_spec.rb b/spec/oga/xml/lexer/html_attributes_spec.rb
index 41b79d4..f6b53fe 100644
--- a/spec/oga/xml/lexer/html_attributes_spec.rb
+++ b/spec/oga/xml/lexer/html_attributes_spec.rb
@@ -50,5 +50,17 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 1]
]
end
+
+ it 'lexes an attribute with an unquoted value containing a slash' do
+ lex_html('').should == [
+ [:T_ELEM_START, nil, 1],
+ [:T_ELEM_NAME, 'a', 1],
+ [:T_ATTR, 'href', 1],
+ [:T_STRING_SQUOTE, nil, 1],
+ [:T_STRING_BODY, 'foo/', 1],
+ [:T_STRING_SQUOTE, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
end
end
diff --git a/spec/oga/xml/lexer/html_void_elements_spec.rb b/spec/oga/xml/lexer/html_void_elements_spec.rb
index cdd89f7..a0a07f2 100644
--- a/spec/oga/xml/lexer/html_void_elements_spec.rb
+++ b/spec/oga/xml/lexer/html_void_elements_spec.rb
@@ -3,7 +3,7 @@ require 'spec_helper'
describe Oga::XML::Lexer do
describe 'HTML void elements' do
it 'lexes a void element that omits the closing /' do
- lex('', :html => true).should == [
+ lex_html('').should == [
[:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, 'link', 1],
[:T_ELEM_END, nil, 1]
@@ -11,7 +11,7 @@ describe Oga::XML::Lexer do
end
it 'lexes a upper case void element' do
- lex('
', :html => true).should == [
+ lex_html('
').should == [
[:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, "BR", 1],
[:T_ELEM_END, nil, 1]
@@ -19,7 +19,7 @@ describe Oga::XML::Lexer do
end
it 'lexes text after a void element' do
- lex('foo', :html => true).should == [
+ lex_html('foo').should == [
[:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, 'link', 1],
[:T_ELEM_END, nil, 1],
@@ -28,7 +28,7 @@ describe Oga::XML::Lexer do
end
it 'lexes a void element inside another element' do
- lex('