Use macros in the C lexer.

This commit is contained in:
Yorick Peterse 2014-05-07 00:57:25 +02:00
parent f25f8a3d15
commit e271298984
2 changed files with 38 additions and 22 deletions

View File

@ -1,5 +1,21 @@
#include "lexer.h" #include "lexer.h"
/*
The following two macros allow the Ragel grammar to use generic function calls
without relying on the setup of the C or Java lexer. Using these macros we can
also pass along `self` to the callback functions without having to hard-code
this in to the Ragel grammar.
In the C lexer we don't need the `data` variable (since this is pulled in based
on `ts` and `te`) so the macro ignores this argument.
*/
#define callback(name, data, encoding, start, stop) \
liboga_xml_lexer_callback(self, name, encoding, start, stop);
#define callback_simple(name) \
liboga_xml_lexer_callback_simple(self, name);
VALUE oga_cLexer; VALUE oga_cLexer;
%%machine c_lexer; %%machine c_lexer;

View File

@ -16,7 +16,7 @@
# Machine for processing double quoted strings. # Machine for processing double quoted strings.
string_dquote := |* string_dquote := |*
^dquote+ => { ^dquote+ => {
liboga_xml_lexer_callback(self, "on_string", encoding, ts, te); callback("on_string", data, encoding, ts, te);
}; };
dquote => { fret; }; dquote => { fret; };
@ -25,7 +25,7 @@
# Machine for processing single quoted strings. # Machine for processing single quoted strings.
string_squote := |* string_squote := |*
^squote+ => { ^squote+ => {
liboga_xml_lexer_callback(self, "on_string", encoding, ts, te); callback("on_string", data, encoding, ts, te);
}; };
squote => { fret; }; squote => { fret; };
@ -44,7 +44,7 @@
doctype_start = '<!DOCTYPE'i whitespace+; doctype_start = '<!DOCTYPE'i whitespace+;
action start_doctype { action start_doctype {
liboga_xml_lexer_callback_simple(self, "on_doctype_start"); callback_simple("on_doctype_start");
fcall doctype; fcall doctype;
} }
@ -52,7 +52,7 @@
# and system IDs are treated as T_STRING tokens. # and system IDs are treated as T_STRING tokens.
doctype := |* doctype := |*
'PUBLIC' | 'SYSTEM' => { 'PUBLIC' | 'SYSTEM' => {
liboga_xml_lexer_callback(self, "on_doctype_type", encoding, ts, te); callback("on_doctype_type", data, encoding, ts, te);
}; };
# Lex the public/system IDs as regular strings. # Lex the public/system IDs as regular strings.
@ -64,11 +64,11 @@
whitespace; whitespace;
identifier => { identifier => {
liboga_xml_lexer_callback(self, "on_doctype_name", encoding, ts, te); callback("on_doctype_name", data, encoding, ts, te);
}; };
'>' => { '>' => {
liboga_xml_lexer_callback_simple(self, "on_doctype_end"); callback_simple("on_doctype_end");
fret; fret;
}; };
*|; *|;
@ -87,7 +87,7 @@
cdata_end = ']]>'; cdata_end = ']]>';
action start_cdata { action start_cdata {
liboga_xml_lexer_callback_simple(self, "on_cdata_start"); callback_simple("on_cdata_start");
fcall cdata; fcall cdata;
} }
@ -95,8 +95,8 @@
# inside a CDATA tag is treated as plain text. # inside a CDATA tag is treated as plain text.
cdata := |* cdata := |*
any* cdata_end => { any* cdata_end => {
liboga_xml_lexer_callback(self, "on_text", encoding, ts, te - 3); callback("on_text", data, encoding, ts, te - 3);
liboga_xml_lexer_callback_simple(self, "on_cdata_end"); callback_simple("on_cdata_end");
fret; fret;
}; };
*|; *|;
@ -116,7 +116,7 @@
comment_end = '-->'; comment_end = '-->';
action start_comment { action start_comment {
liboga_xml_lexer_callback_simple(self, "on_comment_start"); callback_simple("on_comment_start");
fcall comment; fcall comment;
} }
@ -124,8 +124,8 @@
# inside a comment is treated as plain text (similar to CDATA tags). # inside a comment is treated as plain text (similar to CDATA tags).
comment := |* comment := |*
any* comment_end => { any* comment_end => {
liboga_xml_lexer_callback(self, "on_text", encoding, ts, te - 3); callback("on_text", data, encoding, ts, te - 3);
liboga_xml_lexer_callback_simple(self, "on_comment_end"); callback_simple("on_comment_end");
fret; fret;
}; };
*|; *|;
@ -138,20 +138,20 @@
xml_decl_end = '?>'; xml_decl_end = '?>';
action start_xml_decl { action start_xml_decl {
liboga_xml_lexer_callback_simple(self, "on_xml_decl_start"); callback_simple("on_xml_decl_start");
fcall xml_decl; fcall xml_decl;
} }
# Machine that processes the contents of an XML declaration tag. # Machine that processes the contents of an XML declaration tag.
xml_decl := |* xml_decl := |*
xml_decl_end => { xml_decl_end => {
liboga_xml_lexer_callback_simple(self, "on_xml_decl_end"); callback_simple("on_xml_decl_end");
fret; fret;
}; };
# Attributes and their values (e.g. version="1.0"). # Attributes and their values (e.g. version="1.0").
identifier => { identifier => {
liboga_xml_lexer_callback(self, "on_attribute", encoding, ts, te); callback("on_attribute", data, encoding, ts, te);
}; };
dquote => { fcall string_dquote; }; dquote => { fcall string_dquote; };
@ -169,7 +169,7 @@
# namespace (if any). Remaining work is delegated to a dedicated # namespace (if any). Remaining work is delegated to a dedicated
# machine. # machine.
action start_element { action start_element {
liboga_xml_lexer_callback(self, "on_element_start", encoding, ts + 1, te); callback("on_element_start", data, encoding, ts + 1, te);
fcall element_head; fcall element_head;
} }
@ -186,12 +186,12 @@
whitespace | '='; whitespace | '=';
newline => { newline => {
liboga_xml_lexer_callback_simple(self, "on_newline"); callback_simple("on_newline");
}; };
# Attribute names. # Attribute names.
identifier => { identifier => {
liboga_xml_lexer_callback(self, "on_attribute", encoding, ts, te); callback("on_attribute", data, encoding, ts, te);
}; };
# Attribute values. # Attribute values.
@ -215,23 +215,23 @@
# Enter the body of the tag. If HTML mode is enabled and the current # Enter the body of the tag. If HTML mode is enabled and the current
# element is a void element we'll close it and bail out. # element is a void element we'll close it and bail out.
'>' => { '>' => {
liboga_xml_lexer_callback_simple(self, "on_element_open_end"); callback_simple("on_element_open_end");
}; };
# Regular closing tags. # Regular closing tags.
'</' identifier '>' => { '</' identifier '>' => {
liboga_xml_lexer_callback_simple(self, "on_element_end"); callback_simple("on_element_end");
}; };
# Self closing elements that are not handled by the HTML mode. # Self closing elements that are not handled by the HTML mode.
'/>' => { '/>' => {
liboga_xml_lexer_callback_simple(self, "on_element_end"); callback_simple("on_element_end");
}; };
# Note that this rule should be declared at the very bottom as it # Note that this rule should be declared at the very bottom as it
# will otherwise take precedence over the other rules. # will otherwise take precedence over the other rules.
^('<' | '>')+ => { ^('<' | '>')+ => {
liboga_xml_lexer_callback(self, "on_text", encoding, ts, te); callback("on_text", data, encoding, ts, te);
}; };
*|; *|;
}%% }%%