Use index based buffering for strings.
This uses the same system as for T_TEXT nodes.
This commit is contained in:
parent
d7a40ec470
commit
56ed9e949c
|
@ -80,7 +80,7 @@ module Oga
|
||||||
@top = 0
|
@top = 0
|
||||||
@elements = []
|
@elements = []
|
||||||
|
|
||||||
@string_buffer = ''
|
@buffer_start_position = nil
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
|
@ -168,67 +168,47 @@ module Oga
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Enables text buffering starting at the given position.
|
# Enables buffering starting at the given position.
|
||||||
#
|
#
|
||||||
# @param [Fixnum] position The start position of the buffer, set to `@te`
|
# @param [Fixnum] position The start position of the buffer, set to `@te`
|
||||||
# by default.
|
# by default.
|
||||||
#
|
#
|
||||||
def buffer_text(position = @te)
|
def start_buffer(position = @te)
|
||||||
@text_start_position = position
|
@buffer_start_position = position
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Returns `true` if we're currently buffering text.
|
# Returns `true` if we're currently buffering.
|
||||||
#
|
#
|
||||||
# @return [TrueClass|FalseClass]
|
# @return [TrueClass|FalseClass]
|
||||||
#
|
#
|
||||||
def buffer_text?
|
def buffering?
|
||||||
return !!@text_start_position
|
return !!@buffer_start_position
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Emits the current text buffer if we have any. The current line number is
|
# Emits the current buffer if we have any. The current line number is
|
||||||
# advanced based on the amount of newlines in the buffer.
|
# advanced based on the amount of newlines in the buffer.
|
||||||
#
|
#
|
||||||
# @param [Fixnum] position The end position of the buffer, set to `@ts` by
|
# @param [Fixnum] position The end position of the buffer, set to `@ts` by
|
||||||
# default.
|
# default.
|
||||||
#
|
#
|
||||||
def emit_text_buffer(position = @ts)
|
# @param [Symbol] type The type of node to emit.
|
||||||
return unless @text_start_position
|
#
|
||||||
|
def emit_buffer(position = @ts, type = :T_TEXT)
|
||||||
|
return unless @buffer_start_position
|
||||||
|
|
||||||
content = text(@text_start_position, position)
|
content = text(@buffer_start_position, position)
|
||||||
|
|
||||||
unless content.empty?
|
unless content.empty?
|
||||||
add_token(:T_TEXT, content)
|
add_token(type, content)
|
||||||
|
|
||||||
lines = content.count("\n")
|
lines = content.count("\n")
|
||||||
|
|
||||||
advance_line(lines) if lines > 0
|
advance_line(lines) if lines > 0
|
||||||
end
|
end
|
||||||
|
|
||||||
@text_start_position = nil
|
@buffer_start_position = nil
|
||||||
end
|
|
||||||
|
|
||||||
##
|
|
||||||
# Buffers text until the current token position hits the EOF position. Once
|
|
||||||
# this position is reached the buffer is emitted.
|
|
||||||
#
|
|
||||||
# @param [Fixnum] eof The EOF position.
|
|
||||||
# @see #emit_text_buffer
|
|
||||||
#
|
|
||||||
def buffer_text_until_eof(eof)
|
|
||||||
@text_buffer << text
|
|
||||||
|
|
||||||
emit_text_buffer if @te == eof
|
|
||||||
end
|
|
||||||
|
|
||||||
##
|
|
||||||
# Emits and resets the current string buffer.
|
|
||||||
#
|
|
||||||
def emit_string_buffer
|
|
||||||
add_token(:T_STRING, @string_buffer)
|
|
||||||
|
|
||||||
@string_buffer = ''
|
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
|
@ -255,34 +235,36 @@ module Oga
|
||||||
dquote = '"';
|
dquote = '"';
|
||||||
squote = "'";
|
squote = "'";
|
||||||
|
|
||||||
action buffer_string {
|
|
||||||
@string_buffer << text
|
|
||||||
}
|
|
||||||
|
|
||||||
action start_string_dquote {
|
action start_string_dquote {
|
||||||
|
start_buffer
|
||||||
|
|
||||||
fcall string_dquote;
|
fcall string_dquote;
|
||||||
}
|
}
|
||||||
|
|
||||||
action start_string_squote {
|
action start_string_squote {
|
||||||
|
start_buffer
|
||||||
|
|
||||||
fcall string_squote;
|
fcall string_squote;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Machine for processing double quoted strings.
|
# Machine for processing double quoted strings.
|
||||||
string_dquote := |*
|
string_dquote := |*
|
||||||
^dquote => buffer_string;
|
dquote => {
|
||||||
dquote => {
|
emit_buffer(@ts, :T_STRING)
|
||||||
emit_string_buffer
|
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
any;
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# Machine for processing single quoted strings.
|
# Machine for processing single quoted strings.
|
||||||
string_squote := |*
|
string_squote := |*
|
||||||
^squote => buffer_string;
|
squote => {
|
||||||
squote => {
|
emit_buffer(@ts, :T_STRING)
|
||||||
emit_string_buffer
|
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
any;
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# DOCTYPES
|
# DOCTYPES
|
||||||
|
@ -298,7 +280,7 @@ module Oga
|
||||||
doctype_start = '<!DOCTYPE'i whitespace+ 'HTML'i;
|
doctype_start = '<!DOCTYPE'i whitespace+ 'HTML'i;
|
||||||
|
|
||||||
action start_doctype {
|
action start_doctype {
|
||||||
emit_text_buffer
|
emit_buffer
|
||||||
t(:T_DOCTYPE_START)
|
t(:T_DOCTYPE_START)
|
||||||
fcall doctype;
|
fcall doctype;
|
||||||
}
|
}
|
||||||
|
@ -336,10 +318,10 @@ module Oga
|
||||||
cdata_end = ']]>';
|
cdata_end = ']]>';
|
||||||
|
|
||||||
action start_cdata {
|
action start_cdata {
|
||||||
emit_text_buffer
|
emit_buffer
|
||||||
t(:T_CDATA_START)
|
t(:T_CDATA_START)
|
||||||
|
|
||||||
buffer_text
|
start_buffer
|
||||||
|
|
||||||
fcall cdata;
|
fcall cdata;
|
||||||
}
|
}
|
||||||
|
@ -348,7 +330,7 @@ module Oga
|
||||||
# inside a CDATA tag is treated as plain text.
|
# inside a CDATA tag is treated as plain text.
|
||||||
cdata := |*
|
cdata := |*
|
||||||
cdata_end => {
|
cdata_end => {
|
||||||
emit_text_buffer
|
emit_buffer
|
||||||
t(:T_CDATA_END)
|
t(:T_CDATA_END)
|
||||||
|
|
||||||
fret;
|
fret;
|
||||||
|
@ -372,10 +354,10 @@ module Oga
|
||||||
comment_end = '-->';
|
comment_end = '-->';
|
||||||
|
|
||||||
action start_comment {
|
action start_comment {
|
||||||
emit_text_buffer
|
emit_buffer
|
||||||
t(:T_COMMENT_START)
|
t(:T_COMMENT_START)
|
||||||
|
|
||||||
buffer_text
|
start_buffer
|
||||||
|
|
||||||
fcall comment;
|
fcall comment;
|
||||||
}
|
}
|
||||||
|
@ -384,7 +366,7 @@ module Oga
|
||||||
# inside a comment is treated as plain text (similar to CDATA tags).
|
# inside a comment is treated as plain text (similar to CDATA tags).
|
||||||
comment := |*
|
comment := |*
|
||||||
comment_end => {
|
comment_end => {
|
||||||
emit_text_buffer
|
emit_buffer
|
||||||
t(:T_COMMENT_END)
|
t(:T_COMMENT_END)
|
||||||
|
|
||||||
fret;
|
fret;
|
||||||
|
@ -401,7 +383,7 @@ module Oga
|
||||||
# Action that creates the tokens for the opening tag, name and namespace
|
# Action that creates the tokens for the opening tag, name and namespace
|
||||||
# (if any). Remaining work is delegated to a dedicated machine.
|
# (if any). Remaining work is delegated to a dedicated machine.
|
||||||
action start_element {
|
action start_element {
|
||||||
emit_text_buffer
|
emit_buffer
|
||||||
add_token(:T_ELEM_OPEN, nil)
|
add_token(:T_ELEM_OPEN, nil)
|
||||||
|
|
||||||
# Add the element name. If the name includes a namespace we'll break
|
# Add the element name. If the name includes a namespace we'll break
|
||||||
|
@ -467,7 +449,7 @@ module Oga
|
||||||
|
|
||||||
# Regular closing tags.
|
# Regular closing tags.
|
||||||
'</' element_name '>' => {
|
'</' element_name '>' => {
|
||||||
emit_text_buffer
|
emit_buffer
|
||||||
add_token(:T_ELEM_CLOSE, nil)
|
add_token(:T_ELEM_CLOSE, nil)
|
||||||
|
|
||||||
@elements.pop
|
@elements.pop
|
||||||
|
@ -484,11 +466,11 @@ module Oga
|
||||||
# otherwise take precedence over the other rules.
|
# otherwise take precedence over the other rules.
|
||||||
any => {
|
any => {
|
||||||
# First character, start buffering (unless we already are buffering).
|
# First character, start buffering (unless we already are buffering).
|
||||||
buffer_text(@ts) unless buffer_text?
|
start_buffer(@ts) unless buffering?
|
||||||
|
|
||||||
# EOF, emit the text buffer.
|
# EOF, emit the text buffer.
|
||||||
if @te == eof
|
if @te == eof
|
||||||
emit_text_buffer(@te)
|
emit_buffer(@te)
|
||||||
end
|
end
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
|
Loading…
Reference in New Issue