From 74bc11a2394834fafc753c3e62bb056b89dcfd42 Mon Sep 17 00:00:00 2001
From: Yorick Peterse
Date: Thu, 20 Mar 2014 19:44:28 +0100
Subject: [PATCH] Rip out column counting.
This makes both the lexer and parser quite a bit easier to use. Counting column
numbers isn't also really needed when parsing XML/HTML.
---
lib/oga/ast/node.rb | 2 +-
lib/oga/lexer.rl | 35 ++------
lib/oga/parser.y | 20 ++---
spec/oga/lexer/cdata_spec.rb | 18 ++---
spec/oga/lexer/comments_spec.rb | 40 ++++-----
spec/oga/lexer/doctype_spec.rb | 24 +++---
spec/oga/lexer/documents_spec.rb | 44 +++++-----
spec/oga/lexer/elements_spec.rb | 98 +++++++++++------------
spec/oga/lexer/general_spec.rb | 8 +-
spec/oga/lexer/html_void_elements_spec.rb | 40 ++++-----
10 files changed, 148 insertions(+), 181 deletions(-)
diff --git a/lib/oga/ast/node.rb b/lib/oga/ast/node.rb
index 5b49908..5f628ca 100644
--- a/lib/oga/ast/node.rb
+++ b/lib/oga/ast/node.rb
@@ -3,7 +3,7 @@ module Oga
##
#
class Node < ::AST::Node
- attr_reader :line, :column
+ attr_reader :line
end # Node
end # AST
end # Oga
diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl
index a320aac..d041919 100644
--- a/lib/oga/lexer.rl
+++ b/lib/oga/lexer.rl
@@ -72,7 +72,6 @@ module Oga
#
def reset
@line = 1
- @column = 1
@data = nil
@ts = nil
@te = nil
@@ -124,15 +123,7 @@ module Oga
# @param [Fixnum] amount The amount of lines to advance.
#
def advance_line(amount = 1)
- @line += amount
- @column = 1
- end
-
- ##
- # @param [Fixnum] length The amount of columns to advance.
- #
- def advance_column(length = 1)
- @column += length
+ @line += amount
end
##
@@ -166,16 +157,13 @@ module Oga
end
##
- # Adds a token with the given type and value to the list. If a value is
- # given the column number is also advanced based on the value's length.
+ # Adds a token with the given type and value to the list.
#
# @param [Symbol] type The token type.
# @param [String] value The token value.
#
def add_token(type, value)
- token = [type, value, @line, @column]
-
- advance_column(value.length) if value
+ token = [type, value, @line]
@tokens << token
end
@@ -214,7 +202,6 @@ module Oga
#
def emit_string_buffer
add_token(:T_STRING, @string_buffer)
- advance_column
@string_buffer = ''
end
@@ -264,7 +251,6 @@ module Oga
^dquote => buffer_string;
dquote => {
emit_string_buffer
- advance_column
fret;
};
*|;
@@ -274,7 +260,6 @@ module Oga
^squote => buffer_string;
squote => {
emit_string_buffer
- advance_column
fret;
};
*|;
@@ -308,7 +293,7 @@ module Oga
# Whitespace inside doctypes is ignored since there's no point in
# including it.
- whitespace => { advance_column };
+ whitespace;
'>' => {
t(:T_DOCTYPE_END)
@@ -389,7 +374,6 @@ module Oga
action start_element {
emit_text_buffer
add_token(:T_ELEM_OPEN, nil)
- advance_column
# Add the element name. If the name includes a namespace we'll break
# the name up into two separate tokens.
@@ -399,10 +383,6 @@ module Oga
ns, name = name.split(':')
add_token(:T_ELEM_NS, ns)
-
- # Advance the column for the colon (:) that separates the namespace
- # and element name.
- advance_column
end
@elements << name
@@ -422,7 +402,7 @@ module Oga
# For example, in `
` the element head is ` foo="bar"`.
#
element_head := |*
- (whitespace | '=') => { advance_column };
+ whitespace | '=';
# Attribute names.
element_name => { t(:T_ATTR) };
@@ -452,8 +432,6 @@ module Oga
add_token(:T_ELEM_CLOSE, nil)
@elements.pop
end
-
- advance_column
};
# Regular closing tags.
@@ -461,14 +439,11 @@ module Oga
emit_text_buffer
add_token(:T_ELEM_CLOSE, nil)
- advance_column(@te - @ts)
-
@elements.pop
};
# Self closing elements that are not handled by the HTML mode.
'/>' => {
- advance_column
add_token(:T_ELEM_CLOSE, nil)
@elements.pop
diff --git a/lib/oga/parser.y b/lib/oga/parser.y
index 0be34ae..7f04ee6 100644
--- a/lib/oga/parser.y
+++ b/lib/oga/parser.y
@@ -124,25 +124,22 @@ end
end
def reset
- @lines = []
- @line = 1
- @column = 1
+ @lines = []
+ @line = 1
end
def s(type, *children)
return AST::Node.new(
type,
children.flatten,
- :line => @line,
- :column => @column
+ :line => @line
)
end
def next_token
- type, value, line, column = @tokens.shift
+ type, value, line = @tokens.shift
- @line = line if line
- @column = column if column
+ @line = line if line
return type ? [type, value] : [false, false]
end
@@ -150,18 +147,13 @@ end
def on_error(type, value, stack)
name = token_to_str(type)
line_str = @lines[@line - 1]
- indicator = '~' * (@column - 1) + '^'
raise Racc::ParseError, <<-EOF.strip
-Failed to parse the supplied input.
-
-Reason: unexpected #{name} with value #{value.inspect}
-Location: line #{@line}, column #{@column}
+Unexpected #{name} with value #{value.inspect} on line #{@line}
Offending code:
#{line_str}
-#{indicator}
Current stack:
diff --git a/spec/oga/lexer/cdata_spec.rb b/spec/oga/lexer/cdata_spec.rb
index d70cef1..bf41f24 100644
--- a/spec/oga/lexer/cdata_spec.rb
+++ b/spec/oga/lexer/cdata_spec.rb
@@ -4,25 +4,25 @@ describe Oga::Lexer do
context 'cdata tags' do
example 'lex a cdata tag' do
lex('').should == [
- [:T_CDATA_START, '', 1, 13]
+ [:T_CDATA_START, '', 1]
]
end
example 'lex tags inside CDATA tags as regular text' do
lex('Foo
]]>').should == [
- [:T_CDATA_START, 'Foo
', 1, 10],
- [:T_CDATA_END, ']]>', 1, 20]
+ [:T_CDATA_START, 'Foo', 1],
+ [:T_CDATA_END, ']]>', 1]
]
end
example 'lex double brackets inside a CDATA tag' do
lex('').should == [
- [:T_CDATA_START, '', 1, 12]
+ [:T_CDATA_START, '', 1]
]
end
end
diff --git a/spec/oga/lexer/comments_spec.rb b/spec/oga/lexer/comments_spec.rb
index 254bb22..63edafc 100644
--- a/spec/oga/lexer/comments_spec.rb
+++ b/spec/oga/lexer/comments_spec.rb
@@ -4,51 +4,51 @@ describe Oga::Lexer do
context 'comments' do
example 'lex a comment' do
lex('').should == [
- [:T_COMMENT_START, '', 1, 10]
+ [:T_COMMENT_START, '', 1]
]
end
example 'lex a comment containing --' do
lex('').should == [
- [:T_COMMENT_START, '', 1, 9]
+ [:T_COMMENT_START, '', 1]
]
end
example 'lex a comment containing ->' do
lex('').should == [
- [:T_COMMENT_START, '', 1, 9]
+ [:T_COMMENT_START, '', 1]
]
end
example 'lex a comment followed by text' do
lex('foo').should == [
- [:T_COMMENT_START, '', 1, 5],
- [:T_TEXT, 'foo', 1, 8]
+ [:T_COMMENT_START, '', 1],
+ [:T_TEXT, 'foo', 1]
]
end
example 'lex text followed by a comment' do
lex('foo').should == [
- [:T_TEXT, 'foo', 1, 1],
- [:T_COMMENT_START, '', 1, 8]
+ [:T_TEXT, 'foo', 1],
+ [:T_COMMENT_START, '', 1]
]
end
example 'lex an element followed by a comment' do
lex('').should == [
- [:T_ELEM_OPEN, nil, 1, 1],
- [:T_ELEM_NAME, 'p', 1, 2],
- [:T_ELEM_CLOSE, nil, 1, 4],
- [:T_COMMENT_START, '', 1, 12]
+ [:T_ELEM_OPEN, nil, 1],
+ [:T_ELEM_NAME, 'p', 1],
+ [:T_ELEM_CLOSE, nil, 1],
+ [:T_COMMENT_START, '', 1]
]
end
end
diff --git a/spec/oga/lexer/doctype_spec.rb b/spec/oga/lexer/doctype_spec.rb
index dad3ea6..d96e59d 100644
--- a/spec/oga/lexer/doctype_spec.rb
+++ b/spec/oga/lexer/doctype_spec.rb
@@ -4,28 +4,28 @@ describe Oga::Lexer do
context 'doctypes' do
example 'lex the HTML5 doctype' do
lex('').should == [
- [:T_DOCTYPE_START, '', 1, 15]
+ [:T_DOCTYPE_START, '', 1]
]
end
example 'lex a doctype with a public and system ID' do
lex('').should == [
- [:T_DOCTYPE_START, '', 1, 37]
+ [:T_DOCTYPE_START, '', 1]
]
end
example 'lex a doctype with a public and system ID using single quotes' do
lex("").should == [
- [:T_DOCTYPE_START, '', 1, 37]
+ [:T_DOCTYPE_START, '', 1]
]
end
end
diff --git a/spec/oga/lexer/documents_spec.rb b/spec/oga/lexer/documents_spec.rb
index 629a53c..3564b63 100644
--- a/spec/oga/lexer/documents_spec.rb
+++ b/spec/oga/lexer/documents_spec.rb
@@ -14,40 +14,40 @@ describe Oga::Lexer do
EOF
lex(html).should == [
- [:T_DOCTYPE_START, '', 1, 15],
- [:T_TEXT, "\n", 1, 16],
+ [:T_DOCTYPE_START, '', 1],
+ [:T_TEXT, "\n", 1],
#
- [:T_ELEM_OPEN, nil, 2, 1],
- [:T_ELEM_NAME, 'html', 2, 2],
- [:T_TEXT, "\n", 2, 7],
+ [:T_ELEM_OPEN, nil, 2],
+ [:T_ELEM_NAME, 'html', 2],
+ [:T_TEXT, "\n", 2],
#
- [:T_ELEM_OPEN, nil, 3, 1],
- [:T_ELEM_NAME, 'head', 3, 2],
- [:T_TEXT, "\n", 3, 7],
+ [:T_ELEM_OPEN, nil, 3],
+ [:T_ELEM_NAME, 'head', 3],
+ [:T_TEXT, "\n", 3],
# Title
- [:T_ELEM_OPEN, nil, 4, 1],
- [:T_ELEM_NAME, 'title', 4, 2],
- [:T_TEXT, 'Title', 4, 8],
- [:T_ELEM_CLOSE, nil, 4, 13],
- [:T_TEXT, "\n", 4, 21],
+ [:T_ELEM_OPEN, nil, 4],
+ [:T_ELEM_NAME, 'title', 4],
+ [:T_TEXT, 'Title', 4],
+ [:T_ELEM_CLOSE, nil, 4],
+ [:T_TEXT, "\n", 4],
#
- [:T_ELEM_CLOSE, nil, 5, 1],
- [:T_TEXT, "\n", 5, 8],
+ [:T_ELEM_CLOSE, nil, 5],
+ [:T_TEXT, "\n", 5],
#
- [:T_ELEM_OPEN, nil, 6, 1],
- [:T_ELEM_NAME, 'body', 6, 2],
- [:T_ELEM_CLOSE, nil, 6, 7],
- [:T_TEXT, "\n", 6, 14],
+ [:T_ELEM_OPEN, nil, 6],
+ [:T_ELEM_NAME, 'body', 6],
+ [:T_ELEM_CLOSE, nil, 6],
+ [:T_TEXT, "\n", 6],
#
- [:T_ELEM_CLOSE, nil, 7, 1],
- [:T_TEXT, "\n", 7, 8]
+ [:T_ELEM_CLOSE, nil, 7],
+ [:T_TEXT, "\n", 7]
]
end
end
diff --git a/spec/oga/lexer/elements_spec.rb b/spec/oga/lexer/elements_spec.rb
index 893a86b..6022c61 100644
--- a/spec/oga/lexer/elements_spec.rb
+++ b/spec/oga/lexer/elements_spec.rb
@@ -4,33 +4,33 @@ describe Oga::Lexer do
context 'elements' do
example 'lex an opening element' do
lex('
').should == [
- [:T_ELEM_OPEN, nil, 1, 1],
- [:T_ELEM_NAME, 'p', 1, 2]
+ [:T_ELEM_OPEN, nil, 1],
+ [:T_ELEM_NAME, 'p', 1]
]
end
example 'lex an opening an closing element' do
lex('
').should == [
- [:T_ELEM_OPEN, nil, 1, 1],
- [:T_ELEM_NAME, 'p', 1, 2],
- [:T_ELEM_CLOSE, nil, 1, 4]
+ [:T_ELEM_OPEN, nil, 1],
+ [:T_ELEM_NAME, 'p', 1],
+ [:T_ELEM_CLOSE, nil, 1]
]
end
example 'lex a paragraph element with text inside it' do
lex('
Hello
').should == [
- [:T_ELEM_OPEN, nil, 1, 1],
- [:T_ELEM_NAME, 'p', 1, 2],
- [:T_TEXT, 'Hello', 1, 4],
- [:T_ELEM_CLOSE, nil, 1, 9]
+ [:T_ELEM_OPEN, nil, 1],
+ [:T_ELEM_NAME, 'p', 1],
+ [:T_TEXT, 'Hello', 1],
+ [:T_ELEM_CLOSE, nil, 1]
]
end
example 'lex text followed by a paragraph element' do
lex('Foo
').should == [
- [:T_TEXT, 'Foo', 1, 1],
- [:T_ELEM_OPEN, nil, 1, 4],
- [:T_ELEM_NAME, 'p', 1, 5]
+ [:T_TEXT, 'Foo', 1],
+ [:T_ELEM_OPEN, nil, 1],
+ [:T_ELEM_NAME, 'p', 1]
]
end
end
@@ -38,21 +38,21 @@ describe Oga::Lexer do
context 'elements with attributes' do
example 'lex an element with an attribute without a value' do
lex('
').should == [
- [:T_ELEM_OPEN, nil, 1, 1],
- [:T_ELEM_NAME, 'p', 1, 2],
- [:T_ATTR, 'foo', 1, 4],
- [:T_ELEM_CLOSE, nil, 1, 8]
+ [:T_ELEM_OPEN, nil, 1],
+ [:T_ELEM_NAME, 'p', 1],
+ [:T_ATTR, 'foo', 1],
+ [:T_ELEM_CLOSE, nil, 1]
]
end
example 'lex a paragraph element with attributes' do
lex('