126 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Ragel
		
	
	
	
			
		
		
	
	
			126 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Ragel
		
	
	
	
| #include "lexer.h"
 | |
| 
 | |
| /*
 | |
| The following two macros allow the Ragel grammar to use generic function calls
 | |
| without relying on the setup of the C or Java lexer. Using these macros we can
 | |
| also pass along `self` to the callback functions without having to hard-code
 | |
| this in to the Ragel grammar.
 | |
| 
 | |
| In the C lexer we don't need the `data` variable (since this is pulled in based
 | |
| on `ts` and `te`) so the macro ignores this argument.
 | |
| */
 | |
| 
 | |
| #define callback(name, data, encoding, start, stop) \
 | |
|     liboga_xml_lexer_callback(self, name, encoding, start, stop);
 | |
| 
 | |
| #define callback_simple(name) \
 | |
|     liboga_xml_lexer_callback_simple(self, name);
 | |
| 
 | |
| #define oga_ivar_get(owner, name) \
 | |
|     rb_ivar_get(owner, rb_intern(name))
 | |
| 
 | |
| #define oga_ivar_set(owner, name, value) \
 | |
|     rb_ivar_set(owner, rb_intern(name), value)
 | |
| 
 | |
| #define advance_line(amount) \
 | |
|     rb_funcall(self, rb_intern("advance_line"), 1, INT2NUM(amount));
 | |
| 
 | |
| %%machine c_lexer;
 | |
| 
 | |
| /**
 | |
|  * Calls a method defined in the Ruby side of the lexer. The String value is
 | |
|  * created based on the values of `ts` and `te` and uses the encoding specified
 | |
|  * in `encoding`.
 | |
|  *
 | |
|  * @example
 | |
|  *  rb_encoding *encoding = rb_enc_get(...);
 | |
|  *  liboga_xml_lexer_callback(self, "on_string", encoding, ts, te);
 | |
|  */
 | |
| void liboga_xml_lexer_callback(
 | |
|     VALUE self,
 | |
|     const char *name,
 | |
|     rb_encoding *encoding,
 | |
|     const char *ts,
 | |
|     const char *te
 | |
| )
 | |
| {
 | |
|     VALUE value  = rb_enc_str_new(ts, te - ts, encoding);
 | |
|     VALUE method = rb_intern(name);
 | |
| 
 | |
|     rb_funcall(self, method, 1, value);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Calls a method defined in the Ruby side of the lexer without passing it any
 | |
|  * arguments.
 | |
|  *
 | |
|  * @example
 | |
|  *  liboga_xml_lexer_callback_simple(self, "on_cdata_start");
 | |
|  */
 | |
| void liboga_xml_lexer_callback_simple(VALUE self, const char *name)
 | |
| {
 | |
|     VALUE method = rb_intern(name);
 | |
| 
 | |
|     rb_funcall(self, method, 0);
 | |
| }
 | |
| 
 | |
| %% write data;
 | |
| 
 | |
| /**
 | |
|  * Lexes the String specifies as the method argument. Token values have the
 | |
|  * same encoding as the input value.
 | |
|  *
 | |
|  * This method keeps track of an internal state using the instance variables
 | |
|  * `@act` and `@cs`.
 | |
|  */
 | |
| VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
 | |
| {
 | |
|     /* Make sure that all data passed back to Ruby has the proper encoding. */
 | |
|     rb_encoding *encoding = rb_enc_get(data_block);
 | |
| 
 | |
|     char *data_str_val = StringValueCStr(data_block);
 | |
| 
 | |
|     const char *p    = data_str_val;
 | |
|     const char *pe   = data_str_val + strlen(data_str_val);
 | |
|     const char *eof  = pe;
 | |
|     const char *ts   = 0;
 | |
|     const char *te   = 0;
 | |
|     const char *mark = 0;
 | |
| 
 | |
|     int act   = NUM2INT(oga_ivar_get(self, "@act"));
 | |
|     int cs    = NUM2INT(oga_ivar_get(self, "@cs"));
 | |
|     int lines = 0;
 | |
| 
 | |
|     %% write exec;
 | |
| 
 | |
|     oga_ivar_set(self, "@act", INT2NUM(act));
 | |
|     oga_ivar_set(self, "@cs", INT2NUM(cs));
 | |
| 
 | |
|     return Qnil;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Resets the internal state of the lexer.
 | |
|  */
 | |
| VALUE oga_xml_lexer_reset(VALUE self)
 | |
| {
 | |
|     oga_ivar_set(self, "@act", INT2NUM(0));
 | |
|     oga_ivar_set(self, "@cs", INT2NUM(c_lexer_start));
 | |
| 
 | |
|     return Qnil;
 | |
| }
 | |
| 
 | |
| %%{
 | |
|     include base_lexer "base_lexer.rl";
 | |
| }%%
 | |
| 
 | |
| void Init_liboga_xml_lexer()
 | |
| {
 | |
|     VALUE mOga   = rb_const_get(rb_cObject, rb_intern("Oga"));
 | |
|     VALUE mXML   = rb_const_get(mOga, rb_intern("XML"));
 | |
|     VALUE cLexer = rb_define_class_under(mXML, "Lexer", rb_cObject);
 | |
| 
 | |
|     rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
 | |
|     rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
 | |
| }
 |