Use macros in the C lexer.
This commit is contained in:
parent
f25f8a3d15
commit
e271298984
|
@ -1,5 +1,21 @@
|
||||||
#include "lexer.h"
|
#include "lexer.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
The following two macros allow the Ragel grammar to use generic function calls
|
||||||
|
without relying on the setup of the C or Java lexer. Using these macros we can
|
||||||
|
also pass along `self` to the callback functions without having to hard-code
|
||||||
|
this in to the Ragel grammar.
|
||||||
|
|
||||||
|
In the C lexer we don't need the `data` variable (since this is pulled in based
|
||||||
|
on `ts` and `te`) so the macro ignores this argument.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define callback(name, data, encoding, start, stop) \
|
||||||
|
liboga_xml_lexer_callback(self, name, encoding, start, stop);
|
||||||
|
|
||||||
|
#define callback_simple(name) \
|
||||||
|
liboga_xml_lexer_callback_simple(self, name);
|
||||||
|
|
||||||
VALUE oga_cLexer;
|
VALUE oga_cLexer;
|
||||||
|
|
||||||
%%machine c_lexer;
|
%%machine c_lexer;
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
# Machine for processing double quoted strings.
|
# Machine for processing double quoted strings.
|
||||||
string_dquote := |*
|
string_dquote := |*
|
||||||
^dquote+ => {
|
^dquote+ => {
|
||||||
liboga_xml_lexer_callback(self, "on_string", encoding, ts, te);
|
callback("on_string", data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
dquote => { fret; };
|
dquote => { fret; };
|
||||||
|
@ -25,7 +25,7 @@
|
||||||
# Machine for processing single quoted strings.
|
# Machine for processing single quoted strings.
|
||||||
string_squote := |*
|
string_squote := |*
|
||||||
^squote+ => {
|
^squote+ => {
|
||||||
liboga_xml_lexer_callback(self, "on_string", encoding, ts, te);
|
callback("on_string", data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
squote => { fret; };
|
squote => { fret; };
|
||||||
|
@ -44,7 +44,7 @@
|
||||||
doctype_start = '<!DOCTYPE'i whitespace+;
|
doctype_start = '<!DOCTYPE'i whitespace+;
|
||||||
|
|
||||||
action start_doctype {
|
action start_doctype {
|
||||||
liboga_xml_lexer_callback_simple(self, "on_doctype_start");
|
callback_simple("on_doctype_start");
|
||||||
fcall doctype;
|
fcall doctype;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@
|
||||||
# and system IDs are treated as T_STRING tokens.
|
# and system IDs are treated as T_STRING tokens.
|
||||||
doctype := |*
|
doctype := |*
|
||||||
'PUBLIC' | 'SYSTEM' => {
|
'PUBLIC' | 'SYSTEM' => {
|
||||||
liboga_xml_lexer_callback(self, "on_doctype_type", encoding, ts, te);
|
callback("on_doctype_type", data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
# Lex the public/system IDs as regular strings.
|
# Lex the public/system IDs as regular strings.
|
||||||
|
@ -64,11 +64,11 @@
|
||||||
whitespace;
|
whitespace;
|
||||||
|
|
||||||
identifier => {
|
identifier => {
|
||||||
liboga_xml_lexer_callback(self, "on_doctype_name", encoding, ts, te);
|
callback("on_doctype_name", data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
'>' => {
|
'>' => {
|
||||||
liboga_xml_lexer_callback_simple(self, "on_doctype_end");
|
callback_simple("on_doctype_end");
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
@ -87,7 +87,7 @@
|
||||||
cdata_end = ']]>';
|
cdata_end = ']]>';
|
||||||
|
|
||||||
action start_cdata {
|
action start_cdata {
|
||||||
liboga_xml_lexer_callback_simple(self, "on_cdata_start");
|
callback_simple("on_cdata_start");
|
||||||
fcall cdata;
|
fcall cdata;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,8 +95,8 @@
|
||||||
# inside a CDATA tag is treated as plain text.
|
# inside a CDATA tag is treated as plain text.
|
||||||
cdata := |*
|
cdata := |*
|
||||||
any* cdata_end => {
|
any* cdata_end => {
|
||||||
liboga_xml_lexer_callback(self, "on_text", encoding, ts, te - 3);
|
callback("on_text", data, encoding, ts, te - 3);
|
||||||
liboga_xml_lexer_callback_simple(self, "on_cdata_end");
|
callback_simple("on_cdata_end");
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
@ -116,7 +116,7 @@
|
||||||
comment_end = '-->';
|
comment_end = '-->';
|
||||||
|
|
||||||
action start_comment {
|
action start_comment {
|
||||||
liboga_xml_lexer_callback_simple(self, "on_comment_start");
|
callback_simple("on_comment_start");
|
||||||
fcall comment;
|
fcall comment;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,8 +124,8 @@
|
||||||
# inside a comment is treated as plain text (similar to CDATA tags).
|
# inside a comment is treated as plain text (similar to CDATA tags).
|
||||||
comment := |*
|
comment := |*
|
||||||
any* comment_end => {
|
any* comment_end => {
|
||||||
liboga_xml_lexer_callback(self, "on_text", encoding, ts, te - 3);
|
callback("on_text", data, encoding, ts, te - 3);
|
||||||
liboga_xml_lexer_callback_simple(self, "on_comment_end");
|
callback_simple("on_comment_end");
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
@ -138,20 +138,20 @@
|
||||||
xml_decl_end = '?>';
|
xml_decl_end = '?>';
|
||||||
|
|
||||||
action start_xml_decl {
|
action start_xml_decl {
|
||||||
liboga_xml_lexer_callback_simple(self, "on_xml_decl_start");
|
callback_simple("on_xml_decl_start");
|
||||||
fcall xml_decl;
|
fcall xml_decl;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Machine that processes the contents of an XML declaration tag.
|
# Machine that processes the contents of an XML declaration tag.
|
||||||
xml_decl := |*
|
xml_decl := |*
|
||||||
xml_decl_end => {
|
xml_decl_end => {
|
||||||
liboga_xml_lexer_callback_simple(self, "on_xml_decl_end");
|
callback_simple("on_xml_decl_end");
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
|
||||||
# Attributes and their values (e.g. version="1.0").
|
# Attributes and their values (e.g. version="1.0").
|
||||||
identifier => {
|
identifier => {
|
||||||
liboga_xml_lexer_callback(self, "on_attribute", encoding, ts, te);
|
callback("on_attribute", data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
dquote => { fcall string_dquote; };
|
dquote => { fcall string_dquote; };
|
||||||
|
@ -169,7 +169,7 @@
|
||||||
# namespace (if any). Remaining work is delegated to a dedicated
|
# namespace (if any). Remaining work is delegated to a dedicated
|
||||||
# machine.
|
# machine.
|
||||||
action start_element {
|
action start_element {
|
||||||
liboga_xml_lexer_callback(self, "on_element_start", encoding, ts + 1, te);
|
callback("on_element_start", data, encoding, ts + 1, te);
|
||||||
|
|
||||||
fcall element_head;
|
fcall element_head;
|
||||||
}
|
}
|
||||||
|
@ -186,12 +186,12 @@
|
||||||
whitespace | '=';
|
whitespace | '=';
|
||||||
|
|
||||||
newline => {
|
newline => {
|
||||||
liboga_xml_lexer_callback_simple(self, "on_newline");
|
callback_simple("on_newline");
|
||||||
};
|
};
|
||||||
|
|
||||||
# Attribute names.
|
# Attribute names.
|
||||||
identifier => {
|
identifier => {
|
||||||
liboga_xml_lexer_callback(self, "on_attribute", encoding, ts, te);
|
callback("on_attribute", data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
|
|
||||||
# Attribute values.
|
# Attribute values.
|
||||||
|
@ -215,23 +215,23 @@
|
||||||
# Enter the body of the tag. If HTML mode is enabled and the current
|
# Enter the body of the tag. If HTML mode is enabled and the current
|
||||||
# element is a void element we'll close it and bail out.
|
# element is a void element we'll close it and bail out.
|
||||||
'>' => {
|
'>' => {
|
||||||
liboga_xml_lexer_callback_simple(self, "on_element_open_end");
|
callback_simple("on_element_open_end");
|
||||||
};
|
};
|
||||||
|
|
||||||
# Regular closing tags.
|
# Regular closing tags.
|
||||||
'</' identifier '>' => {
|
'</' identifier '>' => {
|
||||||
liboga_xml_lexer_callback_simple(self, "on_element_end");
|
callback_simple("on_element_end");
|
||||||
};
|
};
|
||||||
|
|
||||||
# Self closing elements that are not handled by the HTML mode.
|
# Self closing elements that are not handled by the HTML mode.
|
||||||
'/>' => {
|
'/>' => {
|
||||||
liboga_xml_lexer_callback_simple(self, "on_element_end");
|
callback_simple("on_element_end");
|
||||||
};
|
};
|
||||||
|
|
||||||
# Note that this rule should be declared at the very bottom as it
|
# Note that this rule should be declared at the very bottom as it
|
||||||
# will otherwise take precedence over the other rules.
|
# will otherwise take precedence over the other rules.
|
||||||
^('<' | '>')+ => {
|
^('<' | '>')+ => {
|
||||||
liboga_xml_lexer_callback(self, "on_text", encoding, ts, te);
|
callback("on_text", data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
}%%
|
}%%
|
||||||
|
|
Loading…
Reference in New Issue