diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index 7038275..63c71a4 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -151,6 +151,11 @@ module Oga read_data do |chunk| advance_native(chunk) end + + # Add any missing closing tags + unless @elements.empty? + @elements.length.times { on_element_end } + end ensure @block = nil end @@ -377,7 +382,7 @@ module Oga # @param [String] name The name of the element, including namespace. # def on_element_name(name) - @elements << name if html? + @elements << name add_token(:T_ELEM_NAME, name) end @@ -410,9 +415,11 @@ module Oga # Called on the closing tag of an element. # def on_element_end + return if @elements.empty? + add_token(:T_ELEM_END) - @elements.pop if html? + @elements.pop end ## diff --git a/spec/oga/xml/lexer/elements_spec.rb b/spec/oga/xml/lexer/elements_spec.rb index f5d4851..0918811 100644 --- a/spec/oga/xml/lexer/elements_spec.rb +++ b/spec/oga/xml/lexer/elements_spec.rb @@ -5,21 +5,24 @@ describe Oga::XML::Lexer do it 'lexes an opening element' do lex('

').should == [ [:T_ELEM_START, nil, 1], - [:T_ELEM_NAME, 'p', 1] + [:T_ELEM_NAME, 'p', 1], + [:T_ELEM_END, nil, 1] ] end it 'lexes an opening element with a stray double quote' do lex('').should == [ [:T_ELEM_START, nil, 1], - [:T_ELEM_NAME, 'p', 1] + [:T_ELEM_NAME, 'p', 1], + [:T_ELEM_END, nil, 1] ] end it 'lexes an opening element with a stray double quoted string' do lex('').should == [ [:T_ELEM_START, nil, 1], - [:T_ELEM_NAME, 'p', 1] + [:T_ELEM_NAME, 'p', 1], + [:T_ELEM_END, nil, 1] ] end @@ -60,7 +63,8 @@ describe Oga::XML::Lexer do lex('Foo

').should == [ [:T_TEXT, 'Foo', 1], [:T_ELEM_START, nil, 1], - [:T_ELEM_NAME, 'p', 1] + [:T_ELEM_NAME, 'p', 1], + [:T_ELEM_END, nil, 1] ] end diff --git a/spec/oga/xml/lexer/invalid_elements_spec.rb b/spec/oga/xml/lexer/invalid_elements_spec.rb new file mode 100644 index 0000000..92ac94e --- /dev/null +++ b/spec/oga/xml/lexer/invalid_elements_spec.rb @@ -0,0 +1,25 @@ +require 'spec_helper' + +describe Oga::XML::Lexer do + describe 'invalid elements' do + it 'adds missing closing tags' do + lex('').should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'a', 1], + [:T_ELEM_END, nil, 1] + ] + end + + it 'ignores closing tags without opening tags' do + lex('').should == [] + end + + it 'ignores excessive closing tags' do + lex('').should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'a', 1], + [:T_ELEM_END, nil, 1] + ] + end + end +end diff --git a/spec/oga/xml/parser/error_spec.rb b/spec/oga/xml/parser/error_spec.rb index aa74b38..66e1894 100644 --- a/spec/oga/xml/parser/error_spec.rb +++ b/spec/oga/xml/parser/error_spec.rb @@ -3,13 +3,7 @@ require 'spec_helper' describe Oga::XML::Parser do describe 'raising syntax errors' do before do - @invalid_xml = <<-EOF.strip - - Alice - 25 - Dutch - - EOF + @invalid_xml = '' end it 'raises a LL::ParserError' do @@ -17,16 +11,15 @@ describe Oga::XML::Parser do end it 'includes the line number when using a String as input' do - parse_error(@invalid_xml).should =~ /on line 5/ + parse_error(@invalid_xml).should =~ /on line 1/ end it 'includes the line number when using an IO as input' do - parse_error(StringIO.new(@invalid_xml)).should =~ /on line 5/ + parse_error(StringIO.new(@invalid_xml)).should =~ /on line 1/ end it 'uses more friendly error messages when available' do - parse_error('').should == - 'Unexpected end of input, expected element closing tag instead on line 1' + parse_error('').should =~ /Unexpected element namespace/ end end end