Cache rb_intern/symbol lookups in the lexer.
For JRuby this has little to no benefits as it uses strings for method names. However, both MRI and Rubinius will perform a Symbol lookup whenever rb_intern() is called. By doing this once for all callback names and caching the resulting VALUE objects the lexer timings can be reduced by about 25%. In case of the benchmark benchmark/xml/lexer/string_average_bench.rb this means it runs in around 500ms instead of 700ms.
This commit is contained in:
parent
a10fe855d7
commit
4fa88fcbde
|
@ -38,16 +38,15 @@ on `ts` and `te`) so the macro ignores this argument.
|
|||
*/
|
||||
void liboga_xml_lexer_callback(
|
||||
VALUE self,
|
||||
const char *name,
|
||||
VALUE name,
|
||||
rb_encoding *encoding,
|
||||
const char *ts,
|
||||
const char *te
|
||||
)
|
||||
{
|
||||
VALUE value = rb_enc_str_new(ts, te - ts, encoding);
|
||||
VALUE method = rb_intern(name);
|
||||
|
||||
rb_funcall(self, method, 1, value);
|
||||
rb_funcall(self, name, 1, value);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -57,11 +56,9 @@ void liboga_xml_lexer_callback(
|
|||
* @example
|
||||
* liboga_xml_lexer_callback_simple(self, "on_cdata_start");
|
||||
*/
|
||||
void liboga_xml_lexer_callback_simple(VALUE self, const char *name)
|
||||
void liboga_xml_lexer_callback_simple(VALUE self, VALUE name)
|
||||
{
|
||||
VALUE method = rb_intern(name);
|
||||
|
||||
rb_funcall(self, method, 0);
|
||||
rb_funcall(self, name, 0);
|
||||
}
|
||||
|
||||
%% write data;
|
||||
|
@ -93,6 +90,31 @@ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
|
|||
|
||||
int lines = state->lines;
|
||||
|
||||
VALUE id_advance_line = rb_intern("advance_line");
|
||||
VALUE id_on_attribute = rb_intern("on_attribute");
|
||||
VALUE id_on_attribute_ns = rb_intern("on_attribute_ns");
|
||||
VALUE id_on_cdata = rb_intern("on_cdata");
|
||||
VALUE id_on_comment = rb_intern("on_comment");
|
||||
VALUE id_on_doctype_end = rb_intern("on_doctype_end");
|
||||
VALUE id_on_doctype_inline = rb_intern("on_doctype_inline");
|
||||
VALUE id_on_doctype_name = rb_intern("on_doctype_name");
|
||||
VALUE id_on_doctype_start = rb_intern("on_doctype_start");
|
||||
VALUE id_on_doctype_type = rb_intern("on_doctype_type");
|
||||
VALUE id_on_element_end = rb_intern("on_element_end");
|
||||
VALUE id_on_element_name = rb_intern("on_element_name");
|
||||
VALUE id_on_element_ns = rb_intern("on_element_ns");
|
||||
VALUE id_on_element_open_end = rb_intern("on_element_open_end");
|
||||
VALUE id_on_element_start = rb_intern("on_element_start");
|
||||
VALUE id_on_proc_ins_end = rb_intern("on_proc_ins_end");
|
||||
VALUE id_on_proc_ins_name = rb_intern("on_proc_ins_name");
|
||||
VALUE id_on_proc_ins_start = rb_intern("on_proc_ins_start");
|
||||
VALUE id_on_string_body = rb_intern("on_string_body");
|
||||
VALUE id_on_string_dquote = rb_intern("on_string_dquote");
|
||||
VALUE id_on_string_squote = rb_intern("on_string_squote");
|
||||
VALUE id_on_text = rb_intern("on_text");
|
||||
VALUE id_on_xml_decl_end = rb_intern("on_xml_decl_end");
|
||||
VALUE id_on_xml_decl_start = rb_intern("on_xml_decl_start");
|
||||
|
||||
%% write exec;
|
||||
|
||||
state->lines = lines;
|
||||
|
|
|
@ -101,6 +101,31 @@ public class Lexer extends RubyObject
|
|||
int pe = data.length;
|
||||
int eof = data.length;
|
||||
|
||||
String id_advance_line = "advance_line";
|
||||
String id_on_attribute = "on_attribute";
|
||||
String id_on_attribute_ns = "on_attribute_ns";
|
||||
String id_on_cdata = "on_cdata";
|
||||
String id_on_comment = "on_comment";
|
||||
String id_on_doctype_end = "on_doctype_end";
|
||||
String id_on_doctype_inline = "on_doctype_inline";
|
||||
String id_on_doctype_name = "on_doctype_name";
|
||||
String id_on_doctype_start = "on_doctype_start";
|
||||
String id_on_doctype_type = "on_doctype_type";
|
||||
String id_on_element_end = "on_element_end";
|
||||
String id_on_element_name = "on_element_name";
|
||||
String id_on_element_ns = "on_element_ns";
|
||||
String id_on_element_open_end = "on_element_open_end";
|
||||
String id_on_element_start = "on_element_start";
|
||||
String id_on_proc_ins_end = "on_proc_ins_end";
|
||||
String id_on_proc_ins_name = "on_proc_ins_name";
|
||||
String id_on_proc_ins_start = "on_proc_ins_start";
|
||||
String id_on_string_body = "on_string_body";
|
||||
String id_on_string_dquote = "on_string_dquote";
|
||||
String id_on_string_squote = "on_string_squote";
|
||||
String id_on_text = "on_text";
|
||||
String id_on_xml_decl_end = "on_xml_decl_end";
|
||||
String id_on_xml_decl_start = "on_xml_decl_start";
|
||||
|
||||
%% write exec;
|
||||
|
||||
this.lines = lines;
|
||||
|
|
|
@ -59,7 +59,7 @@
|
|||
comment = comment_start (any* -- comment_end) comment_end;
|
||||
|
||||
action start_comment {
|
||||
callback("on_comment", data, encoding, ts + 4, te - 3);
|
||||
callback(id_on_comment, data, encoding, ts + 4, te - 3);
|
||||
}
|
||||
|
||||
# CDATA
|
||||
|
@ -75,7 +75,7 @@
|
|||
cdata = cdata_start (any* -- cdata_end) cdata_end;
|
||||
|
||||
action start_cdata {
|
||||
callback("on_cdata", data, encoding, ts + 9, te - 3);
|
||||
callback(id_on_cdata, data, encoding, ts + 9, te - 3);
|
||||
}
|
||||
|
||||
# Processing Instructions
|
||||
|
@ -93,8 +93,8 @@
|
|||
proc_ins_end = '?>';
|
||||
|
||||
action start_proc_ins {
|
||||
callback_simple("on_proc_ins_start");
|
||||
callback("on_proc_ins_name", data, encoding, ts + 2, te);
|
||||
callback_simple(id_on_proc_ins_start);
|
||||
callback(id_on_proc_ins_name, data, encoding, ts + 2, te);
|
||||
|
||||
mark = te;
|
||||
|
||||
|
@ -103,8 +103,8 @@
|
|||
|
||||
proc_ins_body := |*
|
||||
proc_ins_end => {
|
||||
callback("on_text", data, encoding, mark, ts);
|
||||
callback_simple("on_proc_ins_end");
|
||||
callback(id_on_text, data, encoding, mark, ts);
|
||||
callback_simple(id_on_proc_ins_end);
|
||||
|
||||
mark = 0;
|
||||
|
||||
|
@ -124,7 +124,7 @@
|
|||
squote = "'";
|
||||
|
||||
action emit_string {
|
||||
callback("on_string_body", data, encoding, ts, te);
|
||||
callback(id_on_string_body, data, encoding, ts, te);
|
||||
|
||||
if ( lines > 0 )
|
||||
{
|
||||
|
@ -135,13 +135,13 @@
|
|||
}
|
||||
|
||||
action start_string_squote {
|
||||
callback_simple("on_string_squote");
|
||||
callback_simple(id_on_string_squote);
|
||||
|
||||
fcall string_squote;
|
||||
}
|
||||
|
||||
action start_string_dquote {
|
||||
callback_simple("on_string_dquote");
|
||||
callback_simple(id_on_string_dquote);
|
||||
|
||||
fcall string_dquote;
|
||||
}
|
||||
|
@ -150,7 +150,7 @@
|
|||
^squote* $count_newlines => emit_string;
|
||||
|
||||
squote => {
|
||||
callback_simple("on_string_squote");
|
||||
callback_simple(id_on_string_squote);
|
||||
|
||||
fret;
|
||||
};
|
||||
|
@ -160,7 +160,7 @@
|
|||
^dquote* $count_newlines => emit_string;
|
||||
|
||||
dquote => {
|
||||
callback_simple("on_string_dquote");
|
||||
callback_simple(id_on_string_dquote);
|
||||
|
||||
fret;
|
||||
};
|
||||
|
@ -179,14 +179,14 @@
|
|||
doctype_start = '<!DOCTYPE'i whitespace+;
|
||||
|
||||
action start_doctype {
|
||||
callback_simple("on_doctype_start");
|
||||
callback_simple(id_on_doctype_start);
|
||||
fnext doctype;
|
||||
}
|
||||
|
||||
# Machine for processing inline rules of a doctype.
|
||||
doctype_inline := |*
|
||||
^']'* $count_newlines => {
|
||||
callback("on_doctype_inline", data, encoding, ts, te);
|
||||
callback(id_on_doctype_inline, data, encoding, ts, te);
|
||||
|
||||
if ( lines > 0 )
|
||||
{
|
||||
|
@ -203,7 +203,7 @@
|
|||
# and system IDs are treated as T_STRING tokens.
|
||||
doctype := |*
|
||||
'PUBLIC' | 'SYSTEM' => {
|
||||
callback("on_doctype_type", data, encoding, ts, te);
|
||||
callback(id_on_doctype_type, data, encoding, ts, te);
|
||||
};
|
||||
|
||||
# Starts a set of inline doctype rules.
|
||||
|
@ -218,11 +218,11 @@
|
|||
whitespace;
|
||||
|
||||
identifier => {
|
||||
callback("on_doctype_name", data, encoding, ts, te);
|
||||
callback(id_on_doctype_name, data, encoding, ts, te);
|
||||
};
|
||||
|
||||
'>' => {
|
||||
callback_simple("on_doctype_end");
|
||||
callback_simple(id_on_doctype_end);
|
||||
fnext main;
|
||||
};
|
||||
*|;
|
||||
|
@ -235,20 +235,20 @@
|
|||
xml_decl_end = '?>';
|
||||
|
||||
action start_xml_decl {
|
||||
callback_simple("on_xml_decl_start");
|
||||
callback_simple(id_on_xml_decl_start);
|
||||
fnext xml_decl;
|
||||
}
|
||||
|
||||
# Machine that processes the contents of an XML declaration tag.
|
||||
xml_decl := |*
|
||||
xml_decl_end => {
|
||||
callback_simple("on_xml_decl_end");
|
||||
callback_simple(id_on_xml_decl_end);
|
||||
fnext main;
|
||||
};
|
||||
|
||||
# Attributes and their values (e.g. version="1.0").
|
||||
identifier => {
|
||||
callback("on_attribute", data, encoding, ts, te);
|
||||
callback(id_on_attribute, data, encoding, ts, te);
|
||||
};
|
||||
|
||||
squote => start_string_squote;
|
||||
|
@ -270,23 +270,23 @@
|
|||
element_end = '</' identifier (':' identifier)* '>';
|
||||
|
||||
action start_element {
|
||||
callback_simple("on_element_start");
|
||||
callback_simple(id_on_element_start);
|
||||
fhold;
|
||||
fnext element_name;
|
||||
}
|
||||
|
||||
action close_element {
|
||||
callback_simple("on_element_end");
|
||||
callback_simple(id_on_element_end);
|
||||
}
|
||||
|
||||
# Machine used for lexing the name/namespace of an element.
|
||||
element_name := |*
|
||||
identifier ':' => {
|
||||
callback("on_element_ns", data, encoding, ts, te - 1);
|
||||
callback(id_on_element_ns, data, encoding, ts, te - 1);
|
||||
};
|
||||
|
||||
identifier => {
|
||||
callback("on_element_name", data, encoding, ts, te);
|
||||
callback(id_on_element_name, data, encoding, ts, te);
|
||||
fnext element_head;
|
||||
};
|
||||
*|;
|
||||
|
@ -297,16 +297,16 @@
|
|||
whitespace | '=';
|
||||
|
||||
newline => {
|
||||
callback_simple("advance_line");
|
||||
callback_simple(id_advance_line);
|
||||
};
|
||||
|
||||
# Attribute names and namespaces.
|
||||
identifier ':' => {
|
||||
callback("on_attribute_ns", data, encoding, ts, te - 1);
|
||||
callback(id_on_attribute_ns, data, encoding, ts, te - 1);
|
||||
};
|
||||
|
||||
identifier => {
|
||||
callback("on_attribute", data, encoding, ts, te);
|
||||
callback(id_on_attribute, data, encoding, ts, te);
|
||||
};
|
||||
|
||||
# Attribute values.
|
||||
|
@ -315,13 +315,13 @@
|
|||
|
||||
# We're done with the open tag of the element.
|
||||
'>' => {
|
||||
callback_simple("on_element_open_end");
|
||||
callback_simple(id_on_element_open_end);
|
||||
fnext main;
|
||||
};
|
||||
|
||||
# Self closing tags.
|
||||
'/>' => {
|
||||
callback_simple("on_element_end");
|
||||
callback_simple(id_on_element_end);
|
||||
fnext main;
|
||||
};
|
||||
*|;
|
||||
|
@ -350,7 +350,7 @@
|
|||
|
||||
text := |*
|
||||
terminate_text | allowed_text => {
|
||||
callback("on_text", data, encoding, ts, te);
|
||||
callback(id_on_text, data, encoding, ts, te);
|
||||
|
||||
if ( lines > 0 )
|
||||
{
|
||||
|
@ -364,7 +364,7 @@
|
|||
|
||||
# Text followed by a special tag, such as "foo<!--"
|
||||
allowed_text %{ mark = p; } terminate_text => {
|
||||
callback("on_text", data, encoding, ts, mark);
|
||||
callback(id_on_text, data, encoding, ts, mark);
|
||||
|
||||
p = mark - 1;
|
||||
mark = 0;
|
||||
|
|
Loading…
Reference in New Issue