diff --git a/lib/oga/html/parser.rb b/lib/oga/html/parser.rb
index bc12cc1..2395269 100644
--- a/lib/oga/html/parser.rb
+++ b/lib/oga/html/parser.rb
@@ -6,13 +6,14 @@ module Oga
#
class Parser < XML::Parser
##
+ # @param [String] data
# @param [Hash] options
# @see Oga::XML::Parser#initialize
#
- def initialize(options = {})
+ def initialize(data, options = {})
options = options.merge(:html => true)
- super(options)
+ super(data, options)
end
end # Parser
end # HTML
diff --git a/lib/oga/xml/lexer.rl b/lib/oga/xml/lexer.rl
index 75667de..6afa0c4 100644
--- a/lib/oga/xml/lexer.rl
+++ b/lib/oga/xml/lexer.rl
@@ -53,17 +53,21 @@ module Oga
end
##
+ # @param [String] data The data to lex.
+ #
# @param [Hash] options
#
# @option options [Symbol] :html When set to `true` the lexer will treat
# the input as HTML instead of SGML/XML. This makes it possible to lex
# HTML void elements such as ``.
#
- def initialize(options = {})
+ def initialize(data, options = {})
options.each do |key, value|
instance_variable_set("@#{key}", value) if respond_to?(key)
end
+ @data = data.unpack('U*')
+
reset
end
@@ -74,7 +78,6 @@ module Oga
#
def reset
@line = 1
- @data = nil
@ts = nil
@te = nil
@tokens = []
@@ -83,6 +86,9 @@ module Oga
@cs = self.class.lexer_start
@act = 0
@elements = []
+ @eof = @data.length
+ @p = 0
+ @pe = @eof
@buffer_start_position = nil
end
@@ -102,8 +108,12 @@ module Oga
# @return [Array]
# @see #advance
#
- def lex(data)
- tokens = advance(data)
+ def lex
+ tokens = []
+
+ while token = advance
+ tokens << token
+ end
reset
@@ -118,16 +128,10 @@ module Oga
# @param [String] data The String to consume.
# @return [Array]
#
- def advance(data)
- @data = data.unpack('U*')
- eof = data.length
-
- p = 0
- pe = eof
-
+ def advance
%% write exec; # % fix highlight
- return @tokens
+ return @tokens.shift
end
##
@@ -244,7 +248,10 @@ module Oga
%%{
# Use instance variables for `ts` and friends.
access @;
- getkey (@data[p] || 0);
+ getkey (@data[@p] || 0);
+ variable p @p;
+ variable pe @pe;
+ variable eof @eof;
newline = '\n' | '\r\n';
whitespace = [ \t];
@@ -529,7 +536,7 @@ module Oga
start_buffer(@ts) unless buffering?
# EOF, emit the text buffer.
- if @te == eof
+ if @te == @eof
emit_buffer(@te)
end
};
diff --git a/lib/oga/xml/parser.y b/lib/oga/xml/parser.y
index ed555b5..27b366d 100644
--- a/lib/oga/xml/parser.y
+++ b/lib/oga/xml/parser.y
@@ -135,13 +135,14 @@ end
---- inner
##
+ # @param [String] data The input to parse.
+ #
# @param [Hash] options
+ # @see Oga::XML::Lexer#initialize
#
- # @option options [TrueClass|FalseClass] :html Enables HTML parsing mode.
- # @see Oga::Lexer#initialize
- #
- def initialize(options = {})
- @lexer = Lexer.new(options)
+ def initialize(data, options = {})
+ @data = data
+ @lexer = Lexer.new(data, options)
end
##
@@ -172,7 +173,7 @@ end
# @return [Array]
#
def next_token
- type, value, line = @tokens.shift
+ type, value, line = @lexer.advance
@line = line if line
@@ -188,11 +189,12 @@ end
def on_error(type, value, stack)
name = token_to_str(type)
index = @line - 1
- lines = ''
+ lines = @data.lines.to_a
+ code = ''
# Show up to 5 lines before and after the offending line (if they exist).
(-5..5).each do |offset|
- line = @lines[index + offset]
+ line = lines[index + offset]
number = @line + offset
if line and number > 0
@@ -202,31 +204,28 @@ end
prefix = ' '
end
- lines << "#{prefix}#{number}: #{line.strip}\n"
+ code << "#{prefix}#{number}: #{line.strip}\n"
end
end
raise Racc::ParseError, <<-EOF.strip
Unexpected #{name} with value #{value.inspect} on line #{@line}:
-#{lines}
+#{code}
EOF
end
##
- # Parses the supplied string and returns the AST.
+ # Parses the input and returns the corresponding AST.
#
# @example
- # parser = Oga::Parser.new
- # ast = parser.parse('bar')
+ # parser = Oga::Parser.new('bar')
+ # ast = parser.parse
#
- # @param [String] string
# @return [Oga::AST::Node]
#
- def parse(string)
- @lines = string.lines
- @tokens = @lexer.lex(string)
- ast = do_parse
+ def parse
+ ast = do_parse
reset
diff --git a/spec/support/parsing.rb b/spec/support/parsing.rb
index 9e347bd..5486708 100644
--- a/spec/support/parsing.rb
+++ b/spec/support/parsing.rb
@@ -19,7 +19,7 @@ module Oga
# @return [Array]
#
def lex(input, options = {})
- return Oga::XML::Lexer.new(options).lex(input)
+ return Oga::XML::Lexer.new(input, options).lex
end
##
@@ -30,7 +30,7 @@ module Oga
# @return [Oga::AST::Node]
#
def parse(input, options = {})
- return Oga::XML::Parser.new(options).parse(input)
+ return Oga::XML::Parser.new(input, options).parse
end
##
@@ -39,7 +39,7 @@ module Oga
# @see #parse
#
def parse_html(input, options = {})
- return Oga::HTML::Parser.new(options).parse(input)
+ return Oga::HTML::Parser.new(input, options).parse
end
##