Removed the buffering crap from the lexer.
This commit is contained in:
parent
57255012b7
commit
f18e8893de
|
@ -216,26 +216,21 @@ module Oga
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Emits the current buffer if we have any. The current line number is
|
# Emits a text token.
|
||||||
# advanced based on the amount of newlines in the buffer.
|
|
||||||
#
|
#
|
||||||
# @param [Fixnum] position The end position of the buffer.
|
# @param [Fixnum] start
|
||||||
# @param [Symbol] type The type of node to emit.
|
# @param [Fixnum] stop
|
||||||
#
|
#
|
||||||
def emit_buffer(position, type = :T_TEXT)
|
def emit_text(start, stop)
|
||||||
return unless @buffer_start_position
|
content = text(start, stop)
|
||||||
|
|
||||||
content = text(@buffer_start_position, position)
|
|
||||||
|
|
||||||
unless content.empty?
|
unless content.empty?
|
||||||
add_token(type, content)
|
add_token(:T_TEXT, content)
|
||||||
|
|
||||||
lines = content.count("\n")
|
lines = content.count("\n")
|
||||||
|
|
||||||
advance_line(lines) if lines > 0
|
advance_line(lines) if lines > 0
|
||||||
end
|
end
|
||||||
|
|
||||||
@buffer_start_position = nil
|
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
|
@ -262,36 +257,22 @@ module Oga
|
||||||
dquote = '"';
|
dquote = '"';
|
||||||
squote = "'";
|
squote = "'";
|
||||||
|
|
||||||
action start_string_dquote {
|
|
||||||
start_buffer(te)
|
|
||||||
|
|
||||||
fcall string_dquote;
|
|
||||||
}
|
|
||||||
|
|
||||||
action start_string_squote {
|
|
||||||
start_buffer(te)
|
|
||||||
|
|
||||||
fcall string_squote;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Machine for processing double quoted strings.
|
# Machine for processing double quoted strings.
|
||||||
string_dquote := |*
|
string_dquote := |*
|
||||||
dquote => {
|
^dquote+ => {
|
||||||
emit_buffer(ts, :T_STRING)
|
emit(:T_STRING, ts, te)
|
||||||
fret;
|
};
|
||||||
};
|
|
||||||
|
|
||||||
any;
|
dquote => { fret; };
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# Machine for processing single quoted strings.
|
# Machine for processing single quoted strings.
|
||||||
string_squote := |*
|
string_squote := |*
|
||||||
squote => {
|
^squote+ => {
|
||||||
emit_buffer(ts, :T_STRING)
|
emit(:T_STRING, ts, te)
|
||||||
fret;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
any;
|
squote => { fret; };
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# DOCTYPES
|
# DOCTYPES
|
||||||
|
@ -307,7 +288,6 @@ module Oga
|
||||||
doctype_start = '<!DOCTYPE'i whitespace+;
|
doctype_start = '<!DOCTYPE'i whitespace+;
|
||||||
|
|
||||||
action start_doctype {
|
action start_doctype {
|
||||||
emit_buffer(ts)
|
|
||||||
add_token(:T_DOCTYPE_START)
|
add_token(:T_DOCTYPE_START)
|
||||||
fcall doctype;
|
fcall doctype;
|
||||||
}
|
}
|
||||||
|
@ -318,8 +298,8 @@ module Oga
|
||||||
'PUBLIC' | 'SYSTEM' => { emit(:T_DOCTYPE_TYPE, ts, te) };
|
'PUBLIC' | 'SYSTEM' => { emit(:T_DOCTYPE_TYPE, ts, te) };
|
||||||
|
|
||||||
# Lex the public/system IDs as regular strings.
|
# Lex the public/system IDs as regular strings.
|
||||||
dquote => start_string_dquote;
|
dquote => { fcall string_dquote; };
|
||||||
squote => start_string_squote;
|
squote => { fcall string_squote; };
|
||||||
|
|
||||||
# Whitespace inside doctypes is ignored since there's no point in
|
# Whitespace inside doctypes is ignored since there's no point in
|
||||||
# including it.
|
# including it.
|
||||||
|
@ -347,25 +327,20 @@ module Oga
|
||||||
cdata_end = ']]>';
|
cdata_end = ']]>';
|
||||||
|
|
||||||
action start_cdata {
|
action start_cdata {
|
||||||
emit_buffer(ts)
|
|
||||||
add_token(:T_CDATA_START)
|
add_token(:T_CDATA_START)
|
||||||
|
|
||||||
start_buffer(te)
|
|
||||||
|
|
||||||
fcall cdata;
|
fcall cdata;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Machine that for processing the contents of CDATA tags. Everything
|
# Machine that for processing the contents of CDATA tags. Everything
|
||||||
# inside a CDATA tag is treated as plain text.
|
# inside a CDATA tag is treated as plain text.
|
||||||
cdata := |*
|
cdata := |*
|
||||||
cdata_end => {
|
any* cdata_end => {
|
||||||
emit_buffer(ts)
|
emit_text(ts, te - 3)
|
||||||
add_token(:T_CDATA_END)
|
add_token(:T_CDATA_END)
|
||||||
|
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
|
||||||
any;
|
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# Comments
|
# Comments
|
||||||
|
@ -383,25 +358,20 @@ module Oga
|
||||||
comment_end = '-->';
|
comment_end = '-->';
|
||||||
|
|
||||||
action start_comment {
|
action start_comment {
|
||||||
emit_buffer(ts)
|
|
||||||
add_token(:T_COMMENT_START)
|
add_token(:T_COMMENT_START)
|
||||||
|
|
||||||
start_buffer(te)
|
|
||||||
|
|
||||||
fcall comment;
|
fcall comment;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Machine used for processing the contents of a comment. Everything
|
# Machine used for processing the contents of a comment. Everything
|
||||||
# inside a comment is treated as plain text (similar to CDATA tags).
|
# inside a comment is treated as plain text (similar to CDATA tags).
|
||||||
comment := |*
|
comment := |*
|
||||||
comment_end => {
|
any* comment_end => {
|
||||||
emit_buffer(ts)
|
emit_text(ts, te - 3)
|
||||||
add_token(:T_COMMENT_END)
|
add_token(:T_COMMENT_END)
|
||||||
|
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
|
||||||
any;
|
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# XML declaration tags
|
# XML declaration tags
|
||||||
|
@ -412,18 +382,14 @@ module Oga
|
||||||
xml_decl_end = '?>';
|
xml_decl_end = '?>';
|
||||||
|
|
||||||
action start_xml_decl {
|
action start_xml_decl {
|
||||||
emit_buffer(ts)
|
|
||||||
add_token(:T_XML_DECL_START)
|
add_token(:T_XML_DECL_START)
|
||||||
|
|
||||||
start_buffer(te)
|
|
||||||
|
|
||||||
fcall xml_decl;
|
fcall xml_decl;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Machine that processes the contents of an XML declaration tag.
|
# Machine that processes the contents of an XML declaration tag.
|
||||||
xml_decl := |*
|
xml_decl := |*
|
||||||
xml_decl_end => {
|
xml_decl_end => {
|
||||||
emit_buffer(ts)
|
|
||||||
add_token(:T_XML_DECL_END)
|
add_token(:T_XML_DECL_END)
|
||||||
|
|
||||||
fret;
|
fret;
|
||||||
|
@ -432,8 +398,8 @@ module Oga
|
||||||
# Attributes and their values (e.g. version="1.0").
|
# Attributes and their values (e.g. version="1.0").
|
||||||
identifier => { emit(:T_ATTR, ts, te) };
|
identifier => { emit(:T_ATTR, ts, te) };
|
||||||
|
|
||||||
dquote => start_string_dquote;
|
dquote => { fcall string_dquote; };
|
||||||
squote => start_string_squote;
|
squote => { fcall string_squote; };
|
||||||
|
|
||||||
any;
|
any;
|
||||||
*|;
|
*|;
|
||||||
|
@ -447,7 +413,6 @@ module Oga
|
||||||
# namespace (if any). Remaining work is delegated to a dedicated
|
# namespace (if any). Remaining work is delegated to a dedicated
|
||||||
# machine.
|
# machine.
|
||||||
action start_element {
|
action start_element {
|
||||||
emit_buffer(ts)
|
|
||||||
add_token(:T_ELEM_START)
|
add_token(:T_ELEM_START)
|
||||||
|
|
||||||
# Add the element name. If the name includes a namespace we'll break
|
# Add the element name. If the name includes a namespace we'll break
|
||||||
|
@ -484,8 +449,8 @@ module Oga
|
||||||
identifier => { emit(:T_ATTR, ts, te) };
|
identifier => { emit(:T_ATTR, ts, te) };
|
||||||
|
|
||||||
# Attribute values.
|
# Attribute values.
|
||||||
dquote => start_string_dquote;
|
dquote => { fcall string_dquote; };
|
||||||
squote => start_string_squote;
|
squote => { fcall string_squote; };
|
||||||
|
|
||||||
# The closing character of the open tag.
|
# The closing character of the open tag.
|
||||||
('>' | '/') => {
|
('>' | '/') => {
|
||||||
|
@ -512,7 +477,6 @@ module Oga
|
||||||
|
|
||||||
# Regular closing tags.
|
# Regular closing tags.
|
||||||
'</' identifier '>' => {
|
'</' identifier '>' => {
|
||||||
emit_buffer(ts)
|
|
||||||
add_token(:T_ELEM_END, nil)
|
add_token(:T_ELEM_END, nil)
|
||||||
|
|
||||||
@elements.pop if html?
|
@elements.pop if html?
|
||||||
|
@ -527,14 +491,8 @@ module Oga
|
||||||
|
|
||||||
# Note that this rule should be declared at the very bottom as it
|
# Note that this rule should be declared at the very bottom as it
|
||||||
# will otherwise take precedence over the other rules.
|
# will otherwise take precedence over the other rules.
|
||||||
any => {
|
^('<' | '>')+ => {
|
||||||
# First character, start buffering (unless we already are buffering).
|
emit_text(ts, te)
|
||||||
start_buffer(ts) unless @buffer_start_position
|
|
||||||
|
|
||||||
# EOF, emit the text buffer.
|
|
||||||
if te == eof
|
|
||||||
emit_buffer(te)
|
|
||||||
end
|
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
}%%
|
}%%
|
||||||
|
|
Loading…
Reference in New Issue