Use index based buffers for text nodes.
Instead of appending single characters to a String buffer the lexer now uses a start and end position to figure out what the buffer is. This is a lot faster than constantly appending to a String.
This commit is contained in:
parent
2852afce9b
commit
9fa694ad4f
|
@ -81,7 +81,6 @@ module Oga
|
||||||
@elements = []
|
@elements = []
|
||||||
|
|
||||||
@string_buffer = ''
|
@string_buffer = ''
|
||||||
@text_buffer = ''
|
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
|
@ -168,20 +167,46 @@ module Oga
|
||||||
@tokens << token
|
@tokens << token
|
||||||
end
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# Enables text buffering starting at the given position.
|
||||||
|
#
|
||||||
|
# @param [Fixnum] position The start position of the buffer, set to `@te`
|
||||||
|
# by default.
|
||||||
|
#
|
||||||
|
def buffer_text(position = @te)
|
||||||
|
@text_start_position = position
|
||||||
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# Returns `true` if we're currently buffering text.
|
||||||
|
#
|
||||||
|
# @return [TrueClass|FalseClass]
|
||||||
|
#
|
||||||
|
def buffer_text?
|
||||||
|
return !!@text_start_position
|
||||||
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Emits the current text buffer if we have any. The current line number is
|
# Emits the current text buffer if we have any. The current line number is
|
||||||
# advanced based on the amount of newlines in the buffer.
|
# advanced based on the amount of newlines in the buffer.
|
||||||
#
|
#
|
||||||
def emit_text_buffer
|
# @param [Fixnum] position The end position of the buffer, set to `@ts` by
|
||||||
return if @text_buffer.empty?
|
# default.
|
||||||
|
#
|
||||||
|
def emit_text_buffer(position = @ts)
|
||||||
|
return unless @text_start_position
|
||||||
|
|
||||||
add_token(:T_TEXT, @text_buffer)
|
content = text(@text_start_position, position)
|
||||||
|
|
||||||
lines = @text_buffer.count("\n")
|
unless content.empty?
|
||||||
|
add_token(:T_TEXT, content)
|
||||||
|
|
||||||
|
lines = content.count("\n")
|
||||||
|
|
||||||
advance_line(lines) if lines > 0
|
advance_line(lines) if lines > 0
|
||||||
|
end
|
||||||
|
|
||||||
@text_buffer = ''
|
@text_start_position = nil
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
|
@ -230,10 +255,6 @@ module Oga
|
||||||
dquote = '"';
|
dquote = '"';
|
||||||
squote = "'";
|
squote = "'";
|
||||||
|
|
||||||
action buffer_text {
|
|
||||||
@text_buffer << text
|
|
||||||
}
|
|
||||||
|
|
||||||
action buffer_string {
|
action buffer_string {
|
||||||
@string_buffer << text
|
@string_buffer << text
|
||||||
}
|
}
|
||||||
|
@ -317,6 +338,9 @@ module Oga
|
||||||
action start_cdata {
|
action start_cdata {
|
||||||
emit_text_buffer
|
emit_text_buffer
|
||||||
t(:T_CDATA_START)
|
t(:T_CDATA_START)
|
||||||
|
|
||||||
|
buffer_text
|
||||||
|
|
||||||
fcall cdata;
|
fcall cdata;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -326,10 +350,11 @@ module Oga
|
||||||
cdata_end => {
|
cdata_end => {
|
||||||
emit_text_buffer
|
emit_text_buffer
|
||||||
t(:T_CDATA_END)
|
t(:T_CDATA_END)
|
||||||
|
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
|
||||||
any => buffer_text;
|
any;
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# Comments
|
# Comments
|
||||||
|
@ -349,6 +374,9 @@ module Oga
|
||||||
action start_comment {
|
action start_comment {
|
||||||
emit_text_buffer
|
emit_text_buffer
|
||||||
t(:T_COMMENT_START)
|
t(:T_COMMENT_START)
|
||||||
|
|
||||||
|
buffer_text
|
||||||
|
|
||||||
fcall comment;
|
fcall comment;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -358,10 +386,11 @@ module Oga
|
||||||
comment_end => {
|
comment_end => {
|
||||||
emit_text_buffer
|
emit_text_buffer
|
||||||
t(:T_COMMENT_END)
|
t(:T_COMMENT_END)
|
||||||
|
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
|
||||||
any => buffer_text;
|
any;
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# Elements
|
# Elements
|
||||||
|
@ -453,7 +482,15 @@ module Oga
|
||||||
|
|
||||||
# Note that this rule should be declared at the very bottom as it will
|
# Note that this rule should be declared at the very bottom as it will
|
||||||
# otherwise take precedence over the other rules.
|
# otherwise take precedence over the other rules.
|
||||||
any => { buffer_text_until_eof(eof) };
|
any => {
|
||||||
|
# First character, start buffering (unless we already are buffering).
|
||||||
|
buffer_text(@ts) unless buffer_text?
|
||||||
|
|
||||||
|
# EOF, emit the text buffer.
|
||||||
|
if @te == eof
|
||||||
|
emit_text_buffer(@te)
|
||||||
|
end
|
||||||
|
};
|
||||||
*|;
|
*|;
|
||||||
}%%
|
}%%
|
||||||
end # Lexer
|
end # Lexer
|
||||||
|
|
Loading…
Reference in New Issue