Rip out column counting.

This makes both the lexer and parser quite a bit easier to use. Counting column
numbers isn't also really needed when parsing XML/HTML.
This commit is contained in:
Yorick Peterse 2014-03-20 19:44:28 +01:00
parent 70a39042e7
commit 74bc11a239
10 changed files with 148 additions and 181 deletions

View File

@ -3,7 +3,7 @@ module Oga
##
#
class Node < ::AST::Node
attr_reader :line, :column
attr_reader :line
end # Node
end # AST
end # Oga

View File

@ -72,7 +72,6 @@ module Oga
#
def reset
@line = 1
@column = 1
@data = nil
@ts = nil
@te = nil
@ -125,14 +124,6 @@ module Oga
#
def advance_line(amount = 1)
@line += amount
@column = 1
end
##
# @param [Fixnum] length The amount of columns to advance.
#
def advance_column(length = 1)
@column += length
end
##
@ -166,16 +157,13 @@ module Oga
end
##
# Adds a token with the given type and value to the list. If a value is
# given the column number is also advanced based on the value's length.
# Adds a token with the given type and value to the list.
#
# @param [Symbol] type The token type.
# @param [String] value The token value.
#
def add_token(type, value)
token = [type, value, @line, @column]
advance_column(value.length) if value
token = [type, value, @line]
@tokens << token
end
@ -214,7 +202,6 @@ module Oga
#
def emit_string_buffer
add_token(:T_STRING, @string_buffer)
advance_column
@string_buffer = ''
end
@ -264,7 +251,6 @@ module Oga
^dquote => buffer_string;
dquote => {
emit_string_buffer
advance_column
fret;
};
*|;
@ -274,7 +260,6 @@ module Oga
^squote => buffer_string;
squote => {
emit_string_buffer
advance_column
fret;
};
*|;
@ -308,7 +293,7 @@ module Oga
# Whitespace inside doctypes is ignored since there's no point in
# including it.
whitespace => { advance_column };
whitespace;
'>' => {
t(:T_DOCTYPE_END)
@ -389,7 +374,6 @@ module Oga
action start_element {
emit_text_buffer
add_token(:T_ELEM_OPEN, nil)
advance_column
# Add the element name. If the name includes a namespace we'll break
# the name up into two separate tokens.
@ -399,10 +383,6 @@ module Oga
ns, name = name.split(':')
add_token(:T_ELEM_NS, ns)
# Advance the column for the colon (:) that separates the namespace
# and element name.
advance_column
end
@elements << name
@ -422,7 +402,7 @@ module Oga
# For example, in `<p foo="bar">` the element head is ` foo="bar"`.
#
element_head := |*
(whitespace | '=') => { advance_column };
whitespace | '=';
# Attribute names.
element_name => { t(:T_ATTR) };
@ -452,8 +432,6 @@ module Oga
add_token(:T_ELEM_CLOSE, nil)
@elements.pop
end
advance_column
};
# Regular closing tags.
@ -461,14 +439,11 @@ module Oga
emit_text_buffer
add_token(:T_ELEM_CLOSE, nil)
advance_column(@te - @ts)
@elements.pop
};
# Self closing elements that are not handled by the HTML mode.
'/>' => {
advance_column
add_token(:T_ELEM_CLOSE, nil)
@elements.pop

View File

@ -126,23 +126,20 @@ end
def reset
@lines = []
@line = 1
@column = 1
end
def s(type, *children)
return AST::Node.new(
type,
children.flatten,
:line => @line,
:column => @column
:line => @line
)
end
def next_token
type, value, line, column = @tokens.shift
type, value, line = @tokens.shift
@line = line if line
@column = column if column
return type ? [type, value] : [false, false]
end
@ -150,18 +147,13 @@ end
def on_error(type, value, stack)
name = token_to_str(type)
line_str = @lines[@line - 1]
indicator = '~' * (@column - 1) + '^'
raise Racc::ParseError, <<-EOF.strip
Failed to parse the supplied input.
Reason: unexpected #{name} with value #{value.inspect}
Location: line #{@line}, column #{@column}
Unexpected #{name} with value #{value.inspect} on line #{@line}
Offending code:
#{line_str}
#{indicator}
Current stack:

View File

@ -4,25 +4,25 @@ describe Oga::Lexer do
context 'cdata tags' do
example 'lex a cdata tag' do
lex('<![CDATA[foo]]>').should == [
[:T_CDATA_START, '<![CDATA[', 1, 1],
[:T_TEXT, 'foo', 1, 10],
[:T_CDATA_END, ']]>', 1, 13]
[:T_CDATA_START, '<![CDATA[', 1],
[:T_TEXT, 'foo', 1],
[:T_CDATA_END, ']]>', 1]
]
end
example 'lex tags inside CDATA tags as regular text' do
lex('<![CDATA[<p>Foo</p>]]>').should == [
[:T_CDATA_START, '<![CDATA[', 1, 1],
[:T_TEXT, '<p>Foo</p>', 1, 10],
[:T_CDATA_END, ']]>', 1, 20]
[:T_CDATA_START, '<![CDATA[', 1],
[:T_TEXT, '<p>Foo</p>', 1],
[:T_CDATA_END, ']]>', 1]
]
end
example 'lex double brackets inside a CDATA tag' do
lex('<![CDATA[]]]]>').should == [
[:T_CDATA_START, '<![CDATA[', 1, 1],
[:T_TEXT, ']]', 1, 10],
[:T_CDATA_END, ']]>', 1, 12]
[:T_CDATA_START, '<![CDATA[', 1],
[:T_TEXT, ']]', 1],
[:T_CDATA_END, ']]>', 1]
]
end
end

View File

@ -4,51 +4,51 @@ describe Oga::Lexer do
context 'comments' do
example 'lex a comment' do
lex('<!-- foo -->').should == [
[:T_COMMENT_START, '<!--', 1, 1],
[:T_TEXT, ' foo ', 1, 5],
[:T_COMMENT_END, '-->', 1, 10]
[:T_COMMENT_START, '<!--', 1],
[:T_TEXT, ' foo ', 1],
[:T_COMMENT_END, '-->', 1]
]
end
example 'lex a comment containing --' do
lex('<!-- -- -->').should == [
[:T_COMMENT_START, '<!--', 1, 1],
[:T_TEXT, ' -- ', 1, 5],
[:T_COMMENT_END, '-->', 1, 9]
[:T_COMMENT_START, '<!--', 1],
[:T_TEXT, ' -- ', 1],
[:T_COMMENT_END, '-->', 1]
]
end
example 'lex a comment containing ->' do
lex('<!-- -> -->').should == [
[:T_COMMENT_START, '<!--', 1, 1],
[:T_TEXT, ' -> ', 1, 5],
[:T_COMMENT_END, '-->', 1, 9]
[:T_COMMENT_START, '<!--', 1],
[:T_TEXT, ' -> ', 1],
[:T_COMMENT_END, '-->', 1]
]
end
example 'lex a comment followed by text' do
lex('<!---->foo').should == [
[:T_COMMENT_START, '<!--', 1, 1],
[:T_COMMENT_END, '-->', 1, 5],
[:T_TEXT, 'foo', 1, 8]
[:T_COMMENT_START, '<!--', 1],
[:T_COMMENT_END, '-->', 1],
[:T_TEXT, 'foo', 1]
]
end
example 'lex text followed by a comment' do
lex('foo<!---->').should == [
[:T_TEXT, 'foo', 1, 1],
[:T_COMMENT_START, '<!--', 1, 4],
[:T_COMMENT_END, '-->', 1, 8]
[:T_TEXT, 'foo', 1],
[:T_COMMENT_START, '<!--', 1],
[:T_COMMENT_END, '-->', 1]
]
end
example 'lex an element followed by a comment' do
lex('<p></p><!---->').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'p', 1, 2],
[:T_ELEM_CLOSE, nil, 1, 4],
[:T_COMMENT_START, '<!--', 1, 8],
[:T_COMMENT_END, '-->', 1, 12]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'p', 1],
[:T_ELEM_CLOSE, nil, 1],
[:T_COMMENT_START, '<!--', 1],
[:T_COMMENT_END, '-->', 1]
]
end
end

View File

@ -4,28 +4,28 @@ describe Oga::Lexer do
context 'doctypes' do
example 'lex the HTML5 doctype' do
lex('<!DOCTYPE html>').should == [
[:T_DOCTYPE_START, '<!DOCTYPE html', 1, 1],
[:T_DOCTYPE_END, '>', 1, 15]
[:T_DOCTYPE_START, '<!DOCTYPE html', 1],
[:T_DOCTYPE_END, '>', 1]
]
end
example 'lex a doctype with a public and system ID' do
lex('<!DOCTYPE HTML PUBLIC "foobar" "baz">').should == [
[:T_DOCTYPE_START, '<!DOCTYPE HTML', 1, 1],
[:T_DOCTYPE_TYPE, 'PUBLIC', 1, 16],
[:T_STRING, 'foobar', 1, 23],
[:T_STRING, 'baz', 1, 32],
[:T_DOCTYPE_END, '>', 1, 37]
[:T_DOCTYPE_START, '<!DOCTYPE HTML', 1],
[:T_DOCTYPE_TYPE, 'PUBLIC', 1],
[:T_STRING, 'foobar', 1],
[:T_STRING, 'baz', 1],
[:T_DOCTYPE_END, '>', 1]
]
end
example 'lex a doctype with a public and system ID using single quotes' do
lex("<!DOCTYPE HTML PUBLIC 'foobar' 'baz'>").should == [
[:T_DOCTYPE_START, '<!DOCTYPE HTML', 1, 1],
[:T_DOCTYPE_TYPE, 'PUBLIC', 1, 16],
[:T_STRING, 'foobar', 1, 23],
[:T_STRING, 'baz', 1, 32],
[:T_DOCTYPE_END, '>', 1, 37]
[:T_DOCTYPE_START, '<!DOCTYPE HTML', 1],
[:T_DOCTYPE_TYPE, 'PUBLIC', 1],
[:T_STRING, 'foobar', 1],
[:T_STRING, 'baz', 1],
[:T_DOCTYPE_END, '>', 1]
]
end
end

View File

@ -14,40 +14,40 @@ describe Oga::Lexer do
EOF
lex(html).should == [
[:T_DOCTYPE_START, '<!DOCTYPE html', 1, 1],
[:T_DOCTYPE_END, '>', 1, 15],
[:T_TEXT, "\n", 1, 16],
[:T_DOCTYPE_START, '<!DOCTYPE html', 1],
[:T_DOCTYPE_END, '>', 1],
[:T_TEXT, "\n", 1],
# <html>
[:T_ELEM_OPEN, nil, 2, 1],
[:T_ELEM_NAME, 'html', 2, 2],
[:T_TEXT, "\n", 2, 7],
[:T_ELEM_OPEN, nil, 2],
[:T_ELEM_NAME, 'html', 2],
[:T_TEXT, "\n", 2],
# <head>
[:T_ELEM_OPEN, nil, 3, 1],
[:T_ELEM_NAME, 'head', 3, 2],
[:T_TEXT, "\n", 3, 7],
[:T_ELEM_OPEN, nil, 3],
[:T_ELEM_NAME, 'head', 3],
[:T_TEXT, "\n", 3],
# <title>Title</title>
[:T_ELEM_OPEN, nil, 4, 1],
[:T_ELEM_NAME, 'title', 4, 2],
[:T_TEXT, 'Title', 4, 8],
[:T_ELEM_CLOSE, nil, 4, 13],
[:T_TEXT, "\n", 4, 21],
[:T_ELEM_OPEN, nil, 4],
[:T_ELEM_NAME, 'title', 4],
[:T_TEXT, 'Title', 4],
[:T_ELEM_CLOSE, nil, 4],
[:T_TEXT, "\n", 4],
# </head>
[:T_ELEM_CLOSE, nil, 5, 1],
[:T_TEXT, "\n", 5, 8],
[:T_ELEM_CLOSE, nil, 5],
[:T_TEXT, "\n", 5],
# <body></body>
[:T_ELEM_OPEN, nil, 6, 1],
[:T_ELEM_NAME, 'body', 6, 2],
[:T_ELEM_CLOSE, nil, 6, 7],
[:T_TEXT, "\n", 6, 14],
[:T_ELEM_OPEN, nil, 6],
[:T_ELEM_NAME, 'body', 6],
[:T_ELEM_CLOSE, nil, 6],
[:T_TEXT, "\n", 6],
# </html>
[:T_ELEM_CLOSE, nil, 7, 1],
[:T_TEXT, "\n", 7, 8]
[:T_ELEM_CLOSE, nil, 7],
[:T_TEXT, "\n", 7]
]
end
end

View File

@ -4,33 +4,33 @@ describe Oga::Lexer do
context 'elements' do
example 'lex an opening element' do
lex('<p>').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'p', 1, 2]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'p', 1]
]
end
example 'lex an opening an closing element' do
lex('<p></p>').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'p', 1, 2],
[:T_ELEM_CLOSE, nil, 1, 4]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'p', 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
example 'lex a paragraph element with text inside it' do
lex('<p>Hello</p>').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'p', 1, 2],
[:T_TEXT, 'Hello', 1, 4],
[:T_ELEM_CLOSE, nil, 1, 9]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'p', 1],
[:T_TEXT, 'Hello', 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
example 'lex text followed by a paragraph element' do
lex('Foo<p>').should == [
[:T_TEXT, 'Foo', 1, 1],
[:T_ELEM_OPEN, nil, 1, 4],
[:T_ELEM_NAME, 'p', 1, 5]
[:T_TEXT, 'Foo', 1],
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'p', 1]
]
end
end
@ -38,21 +38,21 @@ describe Oga::Lexer do
context 'elements with attributes' do
example 'lex an element with an attribute without a value' do
lex('<p foo></p>').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'p', 1, 2],
[:T_ATTR, 'foo', 1, 4],
[:T_ELEM_CLOSE, nil, 1, 8]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'p', 1],
[:T_ATTR, 'foo', 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
example 'lex a paragraph element with attributes' do
lex('<p class="foo">Hello</p>').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'p', 1, 2],
[:T_ATTR, 'class', 1, 4],
[:T_STRING, 'foo', 1, 10],
[:T_TEXT, 'Hello', 1, 16],
[:T_ELEM_CLOSE, nil, 1, 21]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'p', 1],
[:T_ATTR, 'class', 1],
[:T_STRING, 'foo', 1],
[:T_TEXT, 'Hello', 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
end
@ -60,26 +60,26 @@ describe Oga::Lexer do
context 'nested elements' do
example 'lex a nested element' do
lex('<p><a></a></p>').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'p', 1, 2],
[:T_ELEM_OPEN, nil, 1, 4],
[:T_ELEM_NAME, 'a', 1, 5],
[:T_ELEM_CLOSE, nil, 1, 7],
[:T_ELEM_CLOSE, nil, 1, 11]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'p', 1],
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'a', 1],
[:T_ELEM_CLOSE, nil, 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
example 'lex nested elements and text nodes' do
lex('<p>Foo<a>bar</a>baz</p>').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'p', 1, 2],
[:T_TEXT, 'Foo', 1, 4],
[:T_ELEM_OPEN, nil, 1, 7],
[:T_ELEM_NAME, 'a', 1, 8],
[:T_TEXT, 'bar', 1, 10],
[:T_ELEM_CLOSE, nil, 1, 13],
[:T_TEXT, 'baz', 1, 17],
[:T_ELEM_CLOSE, nil, 1, 20]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'p', 1],
[:T_TEXT, 'Foo', 1],
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'a', 1],
[:T_TEXT, 'bar', 1],
[:T_ELEM_CLOSE, nil, 1],
[:T_TEXT, 'baz', 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
end
@ -87,19 +87,19 @@ describe Oga::Lexer do
context 'void elements' do
example 'lex a void element' do
lex('<br />').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'br', 1, 2],
[:T_ELEM_CLOSE, nil, 1, 6]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'br', 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
example 'lex a void element with an attribute' do
lex('<br class="foo" />').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'br', 1, 2],
[:T_ATTR, 'class', 1, 5],
[:T_STRING, 'foo', 1, 11],
[:T_ELEM_CLOSE, nil, 1, 18]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'br', 1],
[:T_ATTR, 'class', 1],
[:T_STRING, 'foo', 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
end
@ -107,10 +107,10 @@ describe Oga::Lexer do
context 'elements with namespaces' do
example 'lex an element with namespaces' do
lex('<foo:p></p>').should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NS, 'foo', 1, 2],
[:T_ELEM_NAME, 'p', 1, 6],
[:T_ELEM_CLOSE, nil, 1, 8]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NS, 'foo', 1],
[:T_ELEM_NAME, 'p', 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
end

View File

@ -3,20 +3,20 @@ require 'spec_helper'
describe Oga::Lexer do
context 'regular text' do
example 'lex regular text' do
lex('hello').should == [[:T_TEXT, 'hello', 1, 1]]
lex('hello').should == [[:T_TEXT, 'hello', 1]]
end
example 'lex regular whitespace' do
lex(' ').should == [[:T_TEXT, ' ', 1, 1]]
lex(' ').should == [[:T_TEXT, ' ', 1]]
end
example 'lex a newline' do
lex("\n").should == [[:T_TEXT, "\n", 1, 1]]
lex("\n").should == [[:T_TEXT, "\n", 1]]
end
example 'lex text followed by a newline' do
lex("foo\n").should == [
[:T_TEXT, "foo\n", 1, 1]
[:T_TEXT, "foo\n", 1]
]
end
end

View File

@ -4,41 +4,41 @@ describe Oga::Lexer do
context 'HTML void elements' do
example 'lex a void element that omits the closing /' do
lex('<link>', :html => true).should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'link', 1, 2],
[:T_ELEM_CLOSE, nil, 1, 6]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'link', 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
example 'lex text after a void element' do
lex('<link>foo', :html => true).should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'link', 1, 2],
[:T_ELEM_CLOSE, nil, 1, 6],
[:T_TEXT, 'foo', 1, 7]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'link', 1],
[:T_ELEM_CLOSE, nil, 1],
[:T_TEXT, 'foo', 1]
]
end
example 'lex a void element inside another element' do
lex('<head><link></head>', :html => true).should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'head', 1, 2],
[:T_ELEM_OPEN, nil, 1, 7],
[:T_ELEM_NAME, 'link', 1, 8],
[:T_ELEM_CLOSE, nil, 1, 12],
[:T_ELEM_CLOSE, nil, 1, 13]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'head', 1],
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'link', 1],
[:T_ELEM_CLOSE, nil, 1],
[:T_ELEM_CLOSE, nil, 1]
]
end
example 'lex a void element inside another element with whitespace' do
lex("<head><link>\n</head>", :html => true).should == [
[:T_ELEM_OPEN, nil, 1, 1],
[:T_ELEM_NAME, 'head', 1, 2],
[:T_ELEM_OPEN, nil, 1, 7],
[:T_ELEM_NAME, 'link', 1, 8],
[:T_ELEM_CLOSE, nil, 1, 12],
[:T_TEXT, "\n", 1, 13],
[:T_ELEM_CLOSE, nil, 2, 1]
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'head', 1],
[:T_ELEM_OPEN, nil, 1],
[:T_ELEM_NAME, 'link', 1],
[:T_ELEM_CLOSE, nil, 1],
[:T_TEXT, "\n", 1],
[:T_ELEM_CLOSE, nil, 2]
]
end
end