diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl
index 1546f2b..8c5eb6f 100644
--- a/ext/ragel/base_lexer.rl
+++ b/ext/ragel/base_lexer.rl
@@ -368,10 +368,12 @@
fnext element_name;
}
- action close_element {
- callback(id_on_element_end, data, encoding, mark, te - 1);
+ action start_close_element {
+ fnext element_close;
+ }
- mark = 0;
+ action close_element {
+ callback(id_on_element_end, data, encoding, ts, te);
}
action close_element_fnext_main {
@@ -381,10 +383,7 @@
}
element_start = '<' ident_char;
-
- element_end = '' %{ mark = p; } identifier '>'
- | '' identifier ':' %{ mark = p; } identifier '>'
- ;
+ element_end = '';
# Machine used for lexing the name/namespace of an element.
element_name := |*
@@ -398,6 +397,28 @@
};
*|;
+ # Machine used for lexing the closing tag of an element
+ element_close := |*
+ # namespace prefixes, currently not used but allows the rule below it
+ # to be used for the actual element name.
+ identifier ':';
+
+ identifier => close_element;
+
+ '>' => {
+ if ( lines > 0 )
+ {
+ advance_line(lines);
+
+ lines = 0;
+ }
+
+ fnext main;
+ };
+
+ any $count_newlines;
+ *|;
+
# Characters that can be used for unquoted HTML attribute values.
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
# for more info.
@@ -587,7 +608,7 @@
cdata_start => start_cdata;
proc_ins_start => start_proc_ins;
element_start => start_element;
- element_end => close_element;
+ element_end => start_close_element;
any => start_text;
*|;
}%%
diff --git a/spec/oga/xml/lexer/elements_spec.rb b/spec/oga/xml/lexer/elements_spec.rb
index 71a6511..b4b808d 100644
--- a/spec/oga/xml/lexer/elements_spec.rb
+++ b/spec/oga/xml/lexer/elements_spec.rb
@@ -73,6 +73,30 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 2]
]
end
+
+ it 'lexes an element with a space in the closing tag' do
+ lex("bar").should == [
+ [:T_ELEM_NAME, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_TEXT, 'bar', 1]
+ ]
+ end
+
+ it 'lexes an element with a newline in the closing tag' do
+ lex("bar").should == [
+ [:T_ELEM_NAME, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_TEXT, 'bar', 2]
+ ]
+ end
+
+ it 'lexes an element with a newline in the closing tag using an IO as input' do
+ lex(StringIO.new("bar")).should == [
+ [:T_ELEM_NAME, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_TEXT, 'bar', 2]
+ ]
+ end
end
describe 'elements with attributes' do
diff --git a/spec/oga/xml/lexer/general_spec.rb b/spec/oga/xml/lexer/general_spec.rb
index bc02a11..e214abf 100644
--- a/spec/oga/xml/lexer/general_spec.rb
+++ b/spec/oga/xml/lexer/general_spec.rb
@@ -30,10 +30,6 @@ describe Oga::XML::Lexer do
lex('>').should == [[:T_TEXT, '>', 1]]
end
- it 'lexes as regular text' do
- lex('').should == [[:T_TEXT, '', 1]]
- end
-
it 'lexes