diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl index bb037e6..a922a46 100644 --- a/lib/oga/lexer.rl +++ b/lib/oga/lexer.rl @@ -264,7 +264,13 @@ module Oga element_start = '<' element_name; element_text := |* - ^'<' => buffer_text; + newline => { + emit_text_buffer + t(:T_TEXT) + advance_line + }; + + ^('<' | newline) => buffer_text; '<' => { emit_text_buffer @@ -284,7 +290,8 @@ module Oga advance_column(2) # Advance the column for the closing name. - advance_column(@te - p) + advance_column(text.length) + fret; }; diff --git a/spec/oga/lexer/documents_spec.rb b/spec/oga/lexer/documents_spec.rb new file mode 100644 index 0000000..5cd305c --- /dev/null +++ b/spec/oga/lexer/documents_spec.rb @@ -0,0 +1,54 @@ +require 'spec_helper' + +describe Oga::Parser do + context 'HTML documents' do + example 'lex a basic HTML document' do + html = <<-EOF + + + +Title + + + + EOF + + lex(html).should == [ + [:T_DOCTYPE_START, '', 1, 15], + [:T_TEXT, "\n", 1, 16], + + # + [:T_ELEM_OPEN, nil, 2, 1], + [:T_ELEM_NAME, 'html', 2, 2], + [:T_TEXT, "\n", 2, 7], + + # + [:T_ELEM_OPEN, nil, 3, 1], + [:T_ELEM_NAME, 'head', 3, 2], + [:T_TEXT, "\n", 3, 7], + + # Title + [:T_ELEM_OPEN, nil, 4, 1], + [:T_ELEM_NAME, 'title', 4, 2], + [:T_TEXT, 'Title', 4, 8], + [:T_ELEM_CLOSE, nil, 4, 13], + [:T_TEXT, "\n", 4, 21], + + # + [:T_ELEM_CLOSE, nil, 5, 1], + [:T_TEXT, "\n", 5, 8], + + # + [:T_ELEM_OPEN, nil, 6, 1], + [:T_ELEM_NAME, 'body', 6, 2], + [:T_ELEM_CLOSE, nil, 6, 7], + [:T_TEXT, "\n", 6, 14], + + # + [:T_ELEM_CLOSE, nil, 7, 1], + [:T_TEXT, "\n", 7, 8] + ] + end + end +end