diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index 7038275..63c71a4 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -151,6 +151,11 @@ module Oga read_data do |chunk| advance_native(chunk) end + + # Add any missing closing tags + unless @elements.empty? + @elements.length.times { on_element_end } + end ensure @block = nil end @@ -377,7 +382,7 @@ module Oga # @param [String] name The name of the element, including namespace. # def on_element_name(name) - @elements << name if html? + @elements << name add_token(:T_ELEM_NAME, name) end @@ -410,9 +415,11 @@ module Oga # Called on the closing tag of an element. # def on_element_end + return if @elements.empty? + add_token(:T_ELEM_END) - @elements.pop if html? + @elements.pop end ## diff --git a/spec/oga/xml/lexer/elements_spec.rb b/spec/oga/xml/lexer/elements_spec.rb index f5d4851..0918811 100644 --- a/spec/oga/xml/lexer/elements_spec.rb +++ b/spec/oga/xml/lexer/elements_spec.rb @@ -5,21 +5,24 @@ describe Oga::XML::Lexer do it 'lexes an opening element' do lex('
').should == [ [:T_ELEM_START, nil, 1], - [:T_ELEM_NAME, 'p', 1] + [:T_ELEM_NAME, 'p', 1], + [:T_ELEM_END, nil, 1] ] end it 'lexes an opening element with a stray double quote' do lex('
').should == [ [:T_ELEM_START, nil, 1], - [:T_ELEM_NAME, 'p', 1] + [:T_ELEM_NAME, 'p', 1], + [:T_ELEM_END, nil, 1] ] end it 'lexes an opening element with a stray double quoted string' do lex('
').should == [ [:T_ELEM_START, nil, 1], - [:T_ELEM_NAME, 'p', 1] + [:T_ELEM_NAME, 'p', 1], + [:T_ELEM_END, nil, 1] ] end @@ -60,7 +63,8 @@ describe Oga::XML::Lexer do lex('Foo
').should == [
[:T_TEXT, 'Foo', 1],
[:T_ELEM_START, nil, 1],
- [:T_ELEM_NAME, 'p', 1]
+ [:T_ELEM_NAME, 'p', 1],
+ [:T_ELEM_END, nil, 1]
]
end
diff --git a/spec/oga/xml/lexer/invalid_elements_spec.rb b/spec/oga/xml/lexer/invalid_elements_spec.rb
new file mode 100644
index 0000000..92ac94e
--- /dev/null
+++ b/spec/oga/xml/lexer/invalid_elements_spec.rb
@@ -0,0 +1,25 @@
+require 'spec_helper'
+
+describe Oga::XML::Lexer do
+ describe 'invalid elements' do
+ it 'adds missing closing tags' do
+ lex('').should == [
+ [:T_ELEM_START, nil, 1],
+ [:T_ELEM_NAME, 'a', 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+
+ it 'ignores closing tags without opening tags' do
+ lex('').should == []
+ end
+
+ it 'ignores excessive closing tags' do
+ lex('').should == [
+ [:T_ELEM_START, nil, 1],
+ [:T_ELEM_NAME, 'a', 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+ end
+end
diff --git a/spec/oga/xml/parser/error_spec.rb b/spec/oga/xml/parser/error_spec.rb
index aa74b38..66e1894 100644
--- a/spec/oga/xml/parser/error_spec.rb
+++ b/spec/oga/xml/parser/error_spec.rb
@@ -3,13 +3,7 @@ require 'spec_helper'
describe Oga::XML::Parser do
describe 'raising syntax errors' do
before do
- @invalid_xml = <<-EOF.strip
-