Cache rb_intern/symbol lookups in the lexer.
For JRuby this has little to no benefits as it uses strings for method names. However, both MRI and Rubinius will perform a Symbol lookup whenever rb_intern() is called. By doing this once for all callback names and caching the resulting VALUE objects the lexer timings can be reduced by about 25%. In case of the benchmark benchmark/xml/lexer/string_average_bench.rb this means it runs in around 500ms instead of 700ms.
This commit is contained in:
parent
a10fe855d7
commit
4fa88fcbde
|
@ -38,16 +38,15 @@ on `ts` and `te`) so the macro ignores this argument.
|
||||||
*/
|
*/
|
||||||
void liboga_xml_lexer_callback(
|
void liboga_xml_lexer_callback(
|
||||||
VALUE self,
|
VALUE self,
|
||||||
const char *name,
|
VALUE name,
|
||||||
rb_encoding *encoding,
|
rb_encoding *encoding,
|
||||||
const char *ts,
|
const char *ts,
|
||||||
const char *te
|
const char *te
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
VALUE value = rb_enc_str_new(ts, te - ts, encoding);
|
VALUE value = rb_enc_str_new(ts, te - ts, encoding);
|
||||||
VALUE method = rb_intern(name);
|
|
||||||
|
|
||||||
rb_funcall(self, method, 1, value);
|
rb_funcall(self, name, 1, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -57,11 +56,9 @@ void liboga_xml_lexer_callback(
|
||||||
* @example
|
* @example
|
||||||
* liboga_xml_lexer_callback_simple(self, "on_cdata_start");
|
* liboga_xml_lexer_callback_simple(self, "on_cdata_start");
|
||||||
*/
|
*/
|
||||||
void liboga_xml_lexer_callback_simple(VALUE self, const char *name)
|
void liboga_xml_lexer_callback_simple(VALUE self, VALUE name)
|
||||||
{
|
{
|
||||||
VALUE method = rb_intern(name);
|
rb_funcall(self, name, 0);
|
||||||
|
|
||||||
rb_funcall(self, method, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
%% write data;
|
%% write data;
|
||||||
|
@ -93,6 +90,31 @@ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
|
||||||
|
|
||||||
int lines = state->lines;
|
int lines = state->lines;
|
||||||
|
|
||||||
|
VALUE id_advance_line = rb_intern("advance_line");
|
||||||
|
VALUE id_on_attribute = rb_intern("on_attribute");
|
||||||
|
VALUE id_on_attribute_ns = rb_intern("on_attribute_ns");
|
||||||
|
VALUE id_on_cdata = rb_intern("on_cdata");
|
||||||
|
VALUE id_on_comment = rb_intern("on_comment");
|
||||||
|
VALUE id_on_doctype_end = rb_intern("on_doctype_end");
|
||||||
|
VALUE id_on_doctype_inline = rb_intern("on_doctype_inline");
|
||||||
|
VALUE id_on_doctype_name = rb_intern("on_doctype_name");
|
||||||
|
VALUE id_on_doctype_start = rb_intern("on_doctype_start");
|
||||||
|
VALUE id_on_doctype_type = rb_intern("on_doctype_type");
|
||||||
|
VALUE id_on_element_end = rb_intern("on_element_end");
|
||||||
|
VALUE id_on_element_name = rb_intern("on_element_name");
|
||||||
|
VALUE id_on_element_ns = rb_intern("on_element_ns");
|
||||||
|
VALUE id_on_element_open_end = rb_intern("on_element_open_end");
|
||||||
|
VALUE id_on_element_start = rb_intern("on_element_start");
|
||||||
|
VALUE id_on_proc_ins_end = rb_intern("on_proc_ins_end");
|
||||||
|
VALUE id_on_proc_ins_name = rb_intern("on_proc_ins_name");
|
||||||
|
VALUE id_on_proc_ins_start = rb_intern("on_proc_ins_start");
|
||||||
|
VALUE id_on_string_body = rb_intern("on_string_body");
|
||||||
|
VALUE id_on_string_dquote = rb_intern("on_string_dquote");
|
||||||
|
VALUE id_on_string_squote = rb_intern("on_string_squote");
|
||||||
|
VALUE id_on_text = rb_intern("on_text");
|
||||||
|
VALUE id_on_xml_decl_end = rb_intern("on_xml_decl_end");
|
||||||
|
VALUE id_on_xml_decl_start = rb_intern("on_xml_decl_start");
|
||||||
|
|
||||||
%% write exec;
|
%% write exec;
|
||||||
|
|
||||||
state->lines = lines;
|
state->lines = lines;
|
||||||
|
|
|
@ -101,6 +101,31 @@ public class Lexer extends RubyObject
|
||||||
int pe = data.length;
|
int pe = data.length;
|
||||||
int eof = data.length;
|
int eof = data.length;
|
||||||
|
|
||||||
|
String id_advance_line = "advance_line";
|
||||||
|
String id_on_attribute = "on_attribute";
|
||||||
|
String id_on_attribute_ns = "on_attribute_ns";
|
||||||
|
String id_on_cdata = "on_cdata";
|
||||||
|
String id_on_comment = "on_comment";
|
||||||
|
String id_on_doctype_end = "on_doctype_end";
|
||||||
|
String id_on_doctype_inline = "on_doctype_inline";
|
||||||
|
String id_on_doctype_name = "on_doctype_name";
|
||||||
|
String id_on_doctype_start = "on_doctype_start";
|
||||||
|
String id_on_doctype_type = "on_doctype_type";
|
||||||
|
String id_on_element_end = "on_element_end";
|
||||||
|
String id_on_element_name = "on_element_name";
|
||||||
|
String id_on_element_ns = "on_element_ns";
|
||||||
|
String id_on_element_open_end = "on_element_open_end";
|
||||||
|
String id_on_element_start = "on_element_start";
|
||||||
|
String id_on_proc_ins_end = "on_proc_ins_end";
|
||||||
|
String id_on_proc_ins_name = "on_proc_ins_name";
|
||||||
|
String id_on_proc_ins_start = "on_proc_ins_start";
|
||||||
|
String id_on_string_body = "on_string_body";
|
||||||
|
String id_on_string_dquote = "on_string_dquote";
|
||||||
|
String id_on_string_squote = "on_string_squote";
|
||||||
|
String id_on_text = "on_text";
|
||||||
|
String id_on_xml_decl_end = "on_xml_decl_end";
|
||||||
|
String id_on_xml_decl_start = "on_xml_decl_start";
|
||||||
|
|
||||||
%% write exec;
|
%% write exec;
|
||||||
|
|
||||||
this.lines = lines;
|
this.lines = lines;
|
||||||
|
|
|
@ -59,7 +59,7 @@
|
||||||
comment = comment_start (any* -- comment_end) comment_end;
|
comment = comment_start (any* -- comment_end) comment_end;
|
||||||
|
|
||||||
action start_comment {
|
action start_comment {
|
||||||
callback("on_comment", data, encoding, ts + 4, te - 3);
|
callback(id_on_comment, data, encoding, ts + 4, te - 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
# CDATA
|
# CDATA
|
||||||
|
@ -75,7 +75,7 @@
|
||||||
cdata = cdata_start (any* -- cdata_end) cdata_end;
|
cdata = cdata_start (any* -- cdata_end) cdata_end;
|
||||||
|
|
||||||
action start_cdata {
|
action start_cdata {
|
||||||
callback("on_cdata", data, encoding, ts + 9, te - 3);
|
callback(id_on_cdata, data, encoding, ts + 9, te - 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
# Processing Instructions
|
# Processing Instructions
|
||||||
|
@ -93,8 +93,8 @@
|
||||||
proc_ins_end = '?>';
|
proc_ins_end = '?>';
|
||||||
|
|
||||||
action start_proc_ins {
|
action start_proc_ins {
|
||||||
callback_simple("on_proc_ins_start");
|
callback_simple(id_on_proc_ins_start);
|
||||||
callback("on_proc_ins_name", data, encoding, ts + 2, te);
|
callback(id_on_proc_ins_name, data, encoding, ts + 2, te);
|
||||||
|
|
||||||
mark = te;
|
mark = te;
|
||||||
|
|
||||||
|
@ -103,8 +103,8 @@
|
||||||
|
|
||||||
proc_ins_body := |*
|
proc_ins_body := |*
|
||||||
proc_ins_end => {
|
proc_ins_end => {
|
||||||
callback("on_text", data, encoding, mark, ts);
|
callback(id_on_text, data, encoding, mark, ts);
|
||||||
callback_simple("on_proc_ins_end");
|
callback_simple(id_on_proc_ins_end);
|
||||||
|
|
||||||
mark = 0;
|
mark = 0;
|
||||||
|
|
||||||
|
@ -124,7 +124,7 @@
|
||||||
squote = "'";
|
squote = "'";
|
||||||
|
|
||||||
action emit_string {
|
action emit_string {
|
||||||
callback("on_string_body", data, encoding, ts, te);
|
callback(id_on_string_body, data, encoding, ts, te);
|
||||||
|
|
||||||
if ( lines > 0 )
|
if ( lines > 0 )
|
||||||
{
|
{
|
||||||
|
@ -135,13 +135,13 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
action start_string_squote {
|
action start_string_squote {
|
||||||
callback_simple("on_string_squote");
|
callback_simple(id_on_string_squote);
|
||||||
|
|
||||||
fcall string_squote;
|
fcall string_squote;
|
||||||
}
|
}
|
||||||
|
|
||||||
action start_string_dquote {
|
action start_string_dquote {
|
||||||
callback_simple("on_string_dquote");
|
callback_simple(id_on_string_dquote);
|
||||||
|
|
||||||
fcall string_dquote;
|
fcall string_dquote;
|
||||||
}
|
}
|
||||||
|
@ -150,7 +150,7 @@
|
||||||
^squote* $count_newlines => emit_string;
|
^squote* $count_newlines => emit_string;
|
||||||
|
|
||||||
squote => {
|
squote => {
|
||||||
callback_simple("on_string_squote");
|
callback_simple(id_on_string_squote);
|
||||||
|
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
@ -160,7 +160,7 @@
|
||||||
^dquote* $count_newlines => emit_string;
|
^dquote* $count_newlines => emit_string;
|
||||||
|
|
||||||
dquote => {
|
dquote => {
|
||||||
callback_simple("on_string_dquote");
|
callback_simple(id_on_string_dquote);
|
||||||
|
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
@ -179,14 +179,14 @@
|
||||||
doctype_start = '<!DOCTYPE'i whitespace+;
|
doctype_start = '<!DOCTYPE'i whitespace+;
|
||||||
|
|
||||||
action start_doctype {
|
action start_doctype {
|
||||||
callback_simple("on_doctype_start");
|
callback_simple(id_on_doctype_start);
|
||||||
fnext doctype;
|
fnext doctype;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Machine for processing inline rules of a doctype.
|
# Machine for processing inline rules of a doctype.
|
||||||
doctype_inline := |*
|
doctype_inline := |*
|
||||||
^']'* $count_newlines => {
|
^']'* $count_newlines => {
|
||||||
callback("on_doctype_inline", data, encoding, ts, te);
|
callback(id_on_doctype_inline, data, encoding, ts, te);
|
||||||
|
|
||||||
if ( lines > 0 )
|
if ( lines > 0 )
|
||||||
{
|
{
|
||||||
|
@ -203,7 +203,7 @@
|
||||||
# and system IDs are treated as T_STRING tokens.
|
# and system IDs are treated as T_STRING tokens.
|
||||||
doctype := |*
|
doctype := |*
|
||||||
'PUBLIC' | 'SYSTEM' => {
|
'PUBLIC' | 'SYSTEM' => {
|
||||||
callback("on_doctype_type", data, encoding, ts, te);
|
callback(id_on_doctype_type, data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
# Starts a set of inline doctype rules.
|
# Starts a set of inline doctype rules.
|
||||||
|
@ -218,11 +218,11 @@
|
||||||
whitespace;
|
whitespace;
|
||||||
|
|
||||||
identifier => {
|
identifier => {
|
||||||
callback("on_doctype_name", data, encoding, ts, te);
|
callback(id_on_doctype_name, data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
'>' => {
|
'>' => {
|
||||||
callback_simple("on_doctype_end");
|
callback_simple(id_on_doctype_end);
|
||||||
fnext main;
|
fnext main;
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
@ -235,20 +235,20 @@
|
||||||
xml_decl_end = '?>';
|
xml_decl_end = '?>';
|
||||||
|
|
||||||
action start_xml_decl {
|
action start_xml_decl {
|
||||||
callback_simple("on_xml_decl_start");
|
callback_simple(id_on_xml_decl_start);
|
||||||
fnext xml_decl;
|
fnext xml_decl;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Machine that processes the contents of an XML declaration tag.
|
# Machine that processes the contents of an XML declaration tag.
|
||||||
xml_decl := |*
|
xml_decl := |*
|
||||||
xml_decl_end => {
|
xml_decl_end => {
|
||||||
callback_simple("on_xml_decl_end");
|
callback_simple(id_on_xml_decl_end);
|
||||||
fnext main;
|
fnext main;
|
||||||
};
|
};
|
||||||
|
|
||||||
# Attributes and their values (e.g. version="1.0").
|
# Attributes and their values (e.g. version="1.0").
|
||||||
identifier => {
|
identifier => {
|
||||||
callback("on_attribute", data, encoding, ts, te);
|
callback(id_on_attribute, data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
squote => start_string_squote;
|
squote => start_string_squote;
|
||||||
|
@ -270,23 +270,23 @@
|
||||||
element_end = '</' identifier (':' identifier)* '>';
|
element_end = '</' identifier (':' identifier)* '>';
|
||||||
|
|
||||||
action start_element {
|
action start_element {
|
||||||
callback_simple("on_element_start");
|
callback_simple(id_on_element_start);
|
||||||
fhold;
|
fhold;
|
||||||
fnext element_name;
|
fnext element_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
action close_element {
|
action close_element {
|
||||||
callback_simple("on_element_end");
|
callback_simple(id_on_element_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
# Machine used for lexing the name/namespace of an element.
|
# Machine used for lexing the name/namespace of an element.
|
||||||
element_name := |*
|
element_name := |*
|
||||||
identifier ':' => {
|
identifier ':' => {
|
||||||
callback("on_element_ns", data, encoding, ts, te - 1);
|
callback(id_on_element_ns, data, encoding, ts, te - 1);
|
||||||
};
|
};
|
||||||
|
|
||||||
identifier => {
|
identifier => {
|
||||||
callback("on_element_name", data, encoding, ts, te);
|
callback(id_on_element_name, data, encoding, ts, te);
|
||||||
fnext element_head;
|
fnext element_head;
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
@ -297,16 +297,16 @@
|
||||||
whitespace | '=';
|
whitespace | '=';
|
||||||
|
|
||||||
newline => {
|
newline => {
|
||||||
callback_simple("advance_line");
|
callback_simple(id_advance_line);
|
||||||
};
|
};
|
||||||
|
|
||||||
# Attribute names and namespaces.
|
# Attribute names and namespaces.
|
||||||
identifier ':' => {
|
identifier ':' => {
|
||||||
callback("on_attribute_ns", data, encoding, ts, te - 1);
|
callback(id_on_attribute_ns, data, encoding, ts, te - 1);
|
||||||
};
|
};
|
||||||
|
|
||||||
identifier => {
|
identifier => {
|
||||||
callback("on_attribute", data, encoding, ts, te);
|
callback(id_on_attribute, data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
# Attribute values.
|
# Attribute values.
|
||||||
|
@ -315,13 +315,13 @@
|
||||||
|
|
||||||
# We're done with the open tag of the element.
|
# We're done with the open tag of the element.
|
||||||
'>' => {
|
'>' => {
|
||||||
callback_simple("on_element_open_end");
|
callback_simple(id_on_element_open_end);
|
||||||
fnext main;
|
fnext main;
|
||||||
};
|
};
|
||||||
|
|
||||||
# Self closing tags.
|
# Self closing tags.
|
||||||
'/>' => {
|
'/>' => {
|
||||||
callback_simple("on_element_end");
|
callback_simple(id_on_element_end);
|
||||||
fnext main;
|
fnext main;
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
@ -350,7 +350,7 @@
|
||||||
|
|
||||||
text := |*
|
text := |*
|
||||||
terminate_text | allowed_text => {
|
terminate_text | allowed_text => {
|
||||||
callback("on_text", data, encoding, ts, te);
|
callback(id_on_text, data, encoding, ts, te);
|
||||||
|
|
||||||
if ( lines > 0 )
|
if ( lines > 0 )
|
||||||
{
|
{
|
||||||
|
@ -364,7 +364,7 @@
|
||||||
|
|
||||||
# Text followed by a special tag, such as "foo<!--"
|
# Text followed by a special tag, such as "foo<!--"
|
||||||
allowed_text %{ mark = p; } terminate_text => {
|
allowed_text %{ mark = p; } terminate_text => {
|
||||||
callback("on_text", data, encoding, ts, mark);
|
callback(id_on_text, data, encoding, ts, mark);
|
||||||
|
|
||||||
p = mark - 1;
|
p = mark - 1;
|
||||||
mark = 0;
|
mark = 0;
|
||||||
|
|
Loading…
Reference in New Issue