From 4fa88fcbde6917b1cb9f4c6c9e3c7585b00bfd07 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Fri, 21 Nov 2014 01:37:33 +0100 Subject: [PATCH] Cache rb_intern/symbol lookups in the lexer. For JRuby this has little to no benefits as it uses strings for method names. However, both MRI and Rubinius will perform a Symbol lookup whenever rb_intern() is called. By doing this once for all callback names and caching the resulting VALUE objects the lexer timings can be reduced by about 25%. In case of the benchmark benchmark/xml/lexer/string_average_bench.rb this means it runs in around 500ms instead of 700ms. --- ext/c/lexer.rl | 36 +++++++++++++++---- ext/java/org/liboga/xml/Lexer.rl | 25 +++++++++++++ ext/ragel/base_lexer.rl | 60 ++++++++++++++++---------------- 3 files changed, 84 insertions(+), 37 deletions(-) diff --git a/ext/c/lexer.rl b/ext/c/lexer.rl index 6b82e6d..e97de1a 100644 --- a/ext/c/lexer.rl +++ b/ext/c/lexer.rl @@ -38,16 +38,15 @@ on `ts` and `te`) so the macro ignores this argument. */ void liboga_xml_lexer_callback( VALUE self, - const char *name, + VALUE name, rb_encoding *encoding, const char *ts, const char *te ) { VALUE value = rb_enc_str_new(ts, te - ts, encoding); - VALUE method = rb_intern(name); - rb_funcall(self, method, 1, value); + rb_funcall(self, name, 1, value); } /** @@ -57,11 +56,9 @@ void liboga_xml_lexer_callback( * @example * liboga_xml_lexer_callback_simple(self, "on_cdata_start"); */ -void liboga_xml_lexer_callback_simple(VALUE self, const char *name) +void liboga_xml_lexer_callback_simple(VALUE self, VALUE name) { - VALUE method = rb_intern(name); - - rb_funcall(self, method, 0); + rb_funcall(self, name, 0); } %% write data; @@ -93,6 +90,31 @@ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block) int lines = state->lines; + VALUE id_advance_line = rb_intern("advance_line"); + VALUE id_on_attribute = rb_intern("on_attribute"); + VALUE id_on_attribute_ns = rb_intern("on_attribute_ns"); + VALUE id_on_cdata = rb_intern("on_cdata"); + VALUE id_on_comment = rb_intern("on_comment"); + VALUE id_on_doctype_end = rb_intern("on_doctype_end"); + VALUE id_on_doctype_inline = rb_intern("on_doctype_inline"); + VALUE id_on_doctype_name = rb_intern("on_doctype_name"); + VALUE id_on_doctype_start = rb_intern("on_doctype_start"); + VALUE id_on_doctype_type = rb_intern("on_doctype_type"); + VALUE id_on_element_end = rb_intern("on_element_end"); + VALUE id_on_element_name = rb_intern("on_element_name"); + VALUE id_on_element_ns = rb_intern("on_element_ns"); + VALUE id_on_element_open_end = rb_intern("on_element_open_end"); + VALUE id_on_element_start = rb_intern("on_element_start"); + VALUE id_on_proc_ins_end = rb_intern("on_proc_ins_end"); + VALUE id_on_proc_ins_name = rb_intern("on_proc_ins_name"); + VALUE id_on_proc_ins_start = rb_intern("on_proc_ins_start"); + VALUE id_on_string_body = rb_intern("on_string_body"); + VALUE id_on_string_dquote = rb_intern("on_string_dquote"); + VALUE id_on_string_squote = rb_intern("on_string_squote"); + VALUE id_on_text = rb_intern("on_text"); + VALUE id_on_xml_decl_end = rb_intern("on_xml_decl_end"); + VALUE id_on_xml_decl_start = rb_intern("on_xml_decl_start"); + %% write exec; state->lines = lines; diff --git a/ext/java/org/liboga/xml/Lexer.rl b/ext/java/org/liboga/xml/Lexer.rl index 5c440c0..d746dd2 100644 --- a/ext/java/org/liboga/xml/Lexer.rl +++ b/ext/java/org/liboga/xml/Lexer.rl @@ -101,6 +101,31 @@ public class Lexer extends RubyObject int pe = data.length; int eof = data.length; + String id_advance_line = "advance_line"; + String id_on_attribute = "on_attribute"; + String id_on_attribute_ns = "on_attribute_ns"; + String id_on_cdata = "on_cdata"; + String id_on_comment = "on_comment"; + String id_on_doctype_end = "on_doctype_end"; + String id_on_doctype_inline = "on_doctype_inline"; + String id_on_doctype_name = "on_doctype_name"; + String id_on_doctype_start = "on_doctype_start"; + String id_on_doctype_type = "on_doctype_type"; + String id_on_element_end = "on_element_end"; + String id_on_element_name = "on_element_name"; + String id_on_element_ns = "on_element_ns"; + String id_on_element_open_end = "on_element_open_end"; + String id_on_element_start = "on_element_start"; + String id_on_proc_ins_end = "on_proc_ins_end"; + String id_on_proc_ins_name = "on_proc_ins_name"; + String id_on_proc_ins_start = "on_proc_ins_start"; + String id_on_string_body = "on_string_body"; + String id_on_string_dquote = "on_string_dquote"; + String id_on_string_squote = "on_string_squote"; + String id_on_text = "on_text"; + String id_on_xml_decl_end = "on_xml_decl_end"; + String id_on_xml_decl_start = "on_xml_decl_start"; + %% write exec; this.lines = lines; diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 5a9c5c7..ce726d0 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -59,7 +59,7 @@ comment = comment_start (any* -- comment_end) comment_end; action start_comment { - callback("on_comment", data, encoding, ts + 4, te - 3); + callback(id_on_comment, data, encoding, ts + 4, te - 3); } # CDATA @@ -75,7 +75,7 @@ cdata = cdata_start (any* -- cdata_end) cdata_end; action start_cdata { - callback("on_cdata", data, encoding, ts + 9, te - 3); + callback(id_on_cdata, data, encoding, ts + 9, te - 3); } # Processing Instructions @@ -93,8 +93,8 @@ proc_ins_end = '?>'; action start_proc_ins { - callback_simple("on_proc_ins_start"); - callback("on_proc_ins_name", data, encoding, ts + 2, te); + callback_simple(id_on_proc_ins_start); + callback(id_on_proc_ins_name, data, encoding, ts + 2, te); mark = te; @@ -103,8 +103,8 @@ proc_ins_body := |* proc_ins_end => { - callback("on_text", data, encoding, mark, ts); - callback_simple("on_proc_ins_end"); + callback(id_on_text, data, encoding, mark, ts); + callback_simple(id_on_proc_ins_end); mark = 0; @@ -124,7 +124,7 @@ squote = "'"; action emit_string { - callback("on_string_body", data, encoding, ts, te); + callback(id_on_string_body, data, encoding, ts, te); if ( lines > 0 ) { @@ -135,13 +135,13 @@ } action start_string_squote { - callback_simple("on_string_squote"); + callback_simple(id_on_string_squote); fcall string_squote; } action start_string_dquote { - callback_simple("on_string_dquote"); + callback_simple(id_on_string_dquote); fcall string_dquote; } @@ -150,7 +150,7 @@ ^squote* $count_newlines => emit_string; squote => { - callback_simple("on_string_squote"); + callback_simple(id_on_string_squote); fret; }; @@ -160,7 +160,7 @@ ^dquote* $count_newlines => emit_string; dquote => { - callback_simple("on_string_dquote"); + callback_simple(id_on_string_dquote); fret; }; @@ -179,14 +179,14 @@ doctype_start = ' { - callback("on_doctype_inline", data, encoding, ts, te); + callback(id_on_doctype_inline, data, encoding, ts, te); if ( lines > 0 ) { @@ -203,7 +203,7 @@ # and system IDs are treated as T_STRING tokens. doctype := |* 'PUBLIC' | 'SYSTEM' => { - callback("on_doctype_type", data, encoding, ts, te); + callback(id_on_doctype_type, data, encoding, ts, te); }; # Starts a set of inline doctype rules. @@ -218,11 +218,11 @@ whitespace; identifier => { - callback("on_doctype_name", data, encoding, ts, te); + callback(id_on_doctype_name, data, encoding, ts, te); }; '>' => { - callback_simple("on_doctype_end"); + callback_simple(id_on_doctype_end); fnext main; }; *|; @@ -235,20 +235,20 @@ xml_decl_end = '?>'; action start_xml_decl { - callback_simple("on_xml_decl_start"); + callback_simple(id_on_xml_decl_start); fnext xml_decl; } # Machine that processes the contents of an XML declaration tag. xml_decl := |* xml_decl_end => { - callback_simple("on_xml_decl_end"); + callback_simple(id_on_xml_decl_end); fnext main; }; # Attributes and their values (e.g. version="1.0"). identifier => { - callback("on_attribute", data, encoding, ts, te); + callback(id_on_attribute, data, encoding, ts, te); }; squote => start_string_squote; @@ -270,23 +270,23 @@ element_end = ''; action start_element { - callback_simple("on_element_start"); + callback_simple(id_on_element_start); fhold; fnext element_name; } action close_element { - callback_simple("on_element_end"); + callback_simple(id_on_element_end); } # Machine used for lexing the name/namespace of an element. element_name := |* identifier ':' => { - callback("on_element_ns", data, encoding, ts, te - 1); + callback(id_on_element_ns, data, encoding, ts, te - 1); }; identifier => { - callback("on_element_name", data, encoding, ts, te); + callback(id_on_element_name, data, encoding, ts, te); fnext element_head; }; *|; @@ -297,16 +297,16 @@ whitespace | '='; newline => { - callback_simple("advance_line"); + callback_simple(id_advance_line); }; # Attribute names and namespaces. identifier ':' => { - callback("on_attribute_ns", data, encoding, ts, te - 1); + callback(id_on_attribute_ns, data, encoding, ts, te - 1); }; identifier => { - callback("on_attribute", data, encoding, ts, te); + callback(id_on_attribute, data, encoding, ts, te); }; # Attribute values. @@ -315,13 +315,13 @@ # We're done with the open tag of the element. '>' => { - callback_simple("on_element_open_end"); + callback_simple(id_on_element_open_end); fnext main; }; # Self closing tags. '/>' => { - callback_simple("on_element_end"); + callback_simple(id_on_element_end); fnext main; }; *|; @@ -350,7 +350,7 @@ text := |* terminate_text | allowed_text => { - callback("on_text", data, encoding, ts, te); + callback(id_on_text, data, encoding, ts, te); if ( lines > 0 ) { @@ -364,7 +364,7 @@ # Text followed by a special tag, such as "foo