Use index based buffers for text nodes.

Instead of appending single characters to a String buffer the lexer now uses a
start and end position to figure out what the buffer is. This is a lot faster
than constantly appending to a String.
This commit is contained in:
Yorick Peterse 2014-03-21 17:32:07 +01:00
parent 2852afce9b
commit 9fa694ad4f
1 changed files with 51 additions and 14 deletions

View File

@ -81,7 +81,6 @@ module Oga
@elements = [] @elements = []
@string_buffer = '' @string_buffer = ''
@text_buffer = ''
end end
## ##
@ -168,20 +167,46 @@ module Oga
@tokens << token @tokens << token
end end
##
# Enables text buffering starting at the given position.
#
# @param [Fixnum] position The start position of the buffer, set to `@te`
# by default.
#
def buffer_text(position = @te)
@text_start_position = position
end
##
# Returns `true` if we're currently buffering text.
#
# @return [TrueClass|FalseClass]
#
def buffer_text?
return !!@text_start_position
end
## ##
# Emits the current text buffer if we have any. The current line number is # Emits the current text buffer if we have any. The current line number is
# advanced based on the amount of newlines in the buffer. # advanced based on the amount of newlines in the buffer.
# #
def emit_text_buffer # @param [Fixnum] position The end position of the buffer, set to `@ts` by
return if @text_buffer.empty? # default.
#
def emit_text_buffer(position = @ts)
return unless @text_start_position
add_token(:T_TEXT, @text_buffer) content = text(@text_start_position, position)
lines = @text_buffer.count("\n") unless content.empty?
add_token(:T_TEXT, content)
advance_line(lines) if lines > 0 lines = content.count("\n")
@text_buffer = '' advance_line(lines) if lines > 0
end
@text_start_position = nil
end end
## ##
@ -230,10 +255,6 @@ module Oga
dquote = '"'; dquote = '"';
squote = "'"; squote = "'";
action buffer_text {
@text_buffer << text
}
action buffer_string { action buffer_string {
@string_buffer << text @string_buffer << text
} }
@ -317,6 +338,9 @@ module Oga
action start_cdata { action start_cdata {
emit_text_buffer emit_text_buffer
t(:T_CDATA_START) t(:T_CDATA_START)
buffer_text
fcall cdata; fcall cdata;
} }
@ -326,10 +350,11 @@ module Oga
cdata_end => { cdata_end => {
emit_text_buffer emit_text_buffer
t(:T_CDATA_END) t(:T_CDATA_END)
fret; fret;
}; };
any => buffer_text; any;
*|; *|;
# Comments # Comments
@ -349,6 +374,9 @@ module Oga
action start_comment { action start_comment {
emit_text_buffer emit_text_buffer
t(:T_COMMENT_START) t(:T_COMMENT_START)
buffer_text
fcall comment; fcall comment;
} }
@ -358,10 +386,11 @@ module Oga
comment_end => { comment_end => {
emit_text_buffer emit_text_buffer
t(:T_COMMENT_END) t(:T_COMMENT_END)
fret; fret;
}; };
any => buffer_text; any;
*|; *|;
# Elements # Elements
@ -453,7 +482,15 @@ module Oga
# Note that this rule should be declared at the very bottom as it will # Note that this rule should be declared at the very bottom as it will
# otherwise take precedence over the other rules. # otherwise take precedence over the other rules.
any => { buffer_text_until_eof(eof) }; any => {
# First character, start buffering (unless we already are buffering).
buffer_text(@ts) unless buffer_text?
# EOF, emit the text buffer.
if @te == eof
emit_text_buffer(@te)
end
};
*|; *|;
}%% }%%
end # Lexer end # Lexer