Merge branch 'native-ext' of github.com:YorickPeterse/oga into native-ext

This commit is contained in:
Yorick Peterse 2014-05-06 08:44:57 +02:00
commit 01a4a53a53
7 changed files with 110 additions and 40 deletions

View File

@ -5,14 +5,14 @@ README.md
doc/DCO.md doc/DCO.md
doc/changelog.md doc/changelog.md
doc/css/common.css doc/css/common.css
ext/liboga/extconf.rb ext/c/liboga/extconf.rb
ext/liboga/lexer.c ext/c/liboga/lexer.c
ext/liboga/lexer.h ext/c/liboga/lexer.h
ext/liboga/lexer.rl ext/c/liboga/lexer.rl
ext/liboga/liboga.c ext/c/liboga/liboga.c
ext/liboga/liboga.h ext/c/liboga/liboga.h
ext/liboga/xml.c ext/c/liboga/xml.c
ext/liboga/xml.h ext/c/liboga/xml.h
lib/oga.rb lib/oga.rb
lib/oga/html/parser.rb lib/oga/html/parser.rb
lib/oga/version.rb lib/oga/version.rb

View File

@ -1,11 +1,20 @@
require 'bundler/gem_tasks' require 'bundler/gem_tasks'
require 'digest/sha2' require 'digest/sha2'
require 'rake/clean' require 'rake/clean'
require 'rake/extensiontask'
require 'cliver' require 'cliver'
GEMSPEC = Gem::Specification.load('oga.gemspec') GEMSPEC = Gem::Specification.load('oga.gemspec')
if RUBY_PLATFORM == 'java'
require 'rake/javaextensiontask'
Rake::JavaExtensionTask.new('liboga', GEMSPEC)
else
require 'rake/extensiontask'
Rake::ExtensionTask.new('liboga', GEMSPEC)
end
PARSER_OUTPUT = 'lib/oga/xml/parser.rb' PARSER_OUTPUT = 'lib/oga/xml/parser.rb'
CLEAN.include( CLEAN.include(
@ -16,7 +25,7 @@ CLEAN.include(
'profile/samples/**/*.txt', 'profile/samples/**/*.txt',
'lib/liboga.*', 'lib/liboga.*',
'tmp', 'tmp',
'ext/liboga/lexer.c' 'ext/c/liboga/lexer.c'
) )
FILE_LIST = FileList.new( FILE_LIST = FileList.new(
@ -31,8 +40,6 @@ FILE_LIST = FileList.new(
'ext/**/*.*' 'ext/**/*.*'
) )
Rake::ExtensionTask.new('liboga', GEMSPEC)
Dir['./task/*.rake'].each do |task| Dir['./task/*.rake'].each do |task|
import(task) import(task)
end end

View File

@ -0,0 +1,30 @@
package org.liboga;
import java.io.IOException;
import org.jruby.Ruby;
import org.jruby.RubyModule;
import org.jruby.RubyClass;
import org.jruby.runtime.load.BasicLibraryService;
import org.jruby.runtime.load.Library;
public class LibogaService implements BasicLibraryService
{
public boolean basicLoad(final Ruby runtime) throws IOException
{
// <jruby> Calling getModule on an instance of RubyModule? Nah fuck
// that, that would be too easy.
RubyModule xml = (RubyModule) runtime.getModule("Oga")
.getConstant("XML");
RubyClass lexer = xml.defineClassUnder(
"Lexer",
runtime.getObject(),
runtime.getObject().getAllocator()
);
lexer.defineAnnotatedMethods(org.liboga.xml.Lexer.class);
return true;
}
}

View File

@ -11,9 +11,9 @@ VALUE oga_cLexer;
* *
* @example * @example
* rb_encoding *encoding = rb_enc_get(...); * rb_encoding *encoding = rb_enc_get(...);
* oga_xml_lexer_callback(self, "on_string", encoding, ts, te); * liboga_xml_lexer_callback(self, "on_string", encoding, ts, te);
*/ */
void oga_xml_lexer_callback( void liboga_xml_lexer_callback(
VALUE self, VALUE self,
const char *name, const char *name,
rb_encoding *encoding, rb_encoding *encoding,
@ -32,9 +32,9 @@ void oga_xml_lexer_callback(
* arguments. * arguments.
* *
* @example * @example
* oga_xml_lexer_callback_simple(self, "on_cdata_start"); * liboga_xml_lexer_callback_simple(self, "on_cdata_start");
*/ */
void oga_xml_lexer_callback_simple(VALUE self, const char *name) void liboga_xml_lexer_callback_simple(VALUE self, const char *name)
{ {
VALUE method = rb_intern(name); VALUE method = rb_intern(name);
@ -100,7 +100,7 @@ VALUE oga_xml_lexer_advance(VALUE self)
# Machine for processing double quoted strings. # Machine for processing double quoted strings.
string_dquote := |* string_dquote := |*
^dquote+ => { ^dquote+ => {
oga_xml_lexer_callback(self, "on_string", encoding, ts, te); liboga_xml_lexer_callback(self, "on_string", encoding, ts, te);
}; };
dquote => { fret; }; dquote => { fret; };
@ -109,7 +109,7 @@ VALUE oga_xml_lexer_advance(VALUE self)
# Machine for processing single quoted strings. # Machine for processing single quoted strings.
string_squote := |* string_squote := |*
^squote+ => { ^squote+ => {
oga_xml_lexer_callback(self, "on_string", encoding, ts, te); liboga_xml_lexer_callback(self, "on_string", encoding, ts, te);
}; };
squote => { fret; }; squote => { fret; };
@ -128,7 +128,7 @@ VALUE oga_xml_lexer_advance(VALUE self)
doctype_start = '<!DOCTYPE'i whitespace+; doctype_start = '<!DOCTYPE'i whitespace+;
action start_doctype { action start_doctype {
oga_xml_lexer_callback_simple(self, "on_start_doctype"); liboga_xml_lexer_callback_simple(self, "on_start_doctype");
fcall doctype; fcall doctype;
} }
@ -136,7 +136,7 @@ VALUE oga_xml_lexer_advance(VALUE self)
# and system IDs are treated as T_STRING tokens. # and system IDs are treated as T_STRING tokens.
doctype := |* doctype := |*
'PUBLIC' | 'SYSTEM' => { 'PUBLIC' | 'SYSTEM' => {
oga_xml_lexer_callback(self, "on_doctype_type", encoding, ts, te); liboga_xml_lexer_callback(self, "on_doctype_type", encoding, ts, te);
}; };
# Lex the public/system IDs as regular strings. # Lex the public/system IDs as regular strings.
@ -148,11 +148,11 @@ VALUE oga_xml_lexer_advance(VALUE self)
whitespace; whitespace;
identifier => { identifier => {
oga_xml_lexer_callback(self, "on_doctype_name", encoding, ts, te); liboga_xml_lexer_callback(self, "on_doctype_name", encoding, ts, te);
}; };
'>' => { '>' => {
oga_xml_lexer_callback_simple(self, "on_doctype_end"); liboga_xml_lexer_callback_simple(self, "on_doctype_end");
fret; fret;
}; };
*|; *|;
@ -171,7 +171,7 @@ VALUE oga_xml_lexer_advance(VALUE self)
cdata_end = ']]>'; cdata_end = ']]>';
action start_cdata { action start_cdata {
oga_xml_lexer_callback_simple(self, "on_cdata_start"); liboga_xml_lexer_callback_simple(self, "on_cdata_start");
fcall cdata; fcall cdata;
} }
@ -179,8 +179,8 @@ VALUE oga_xml_lexer_advance(VALUE self)
# inside a CDATA tag is treated as plain text. # inside a CDATA tag is treated as plain text.
cdata := |* cdata := |*
any* cdata_end => { any* cdata_end => {
oga_xml_lexer_callback(self, "on_text", encoding, ts, te - 3); liboga_xml_lexer_callback(self, "on_text", encoding, ts, te - 3);
oga_xml_lexer_callback_simple(self, "on_cdata_end"); liboga_xml_lexer_callback_simple(self, "on_cdata_end");
fret; fret;
}; };
*|; *|;
@ -200,7 +200,7 @@ VALUE oga_xml_lexer_advance(VALUE self)
comment_end = '-->'; comment_end = '-->';
action start_comment { action start_comment {
oga_xml_lexer_callback_simple(self, "on_comment_start"); liboga_xml_lexer_callback_simple(self, "on_comment_start");
fcall comment; fcall comment;
} }
@ -208,8 +208,8 @@ VALUE oga_xml_lexer_advance(VALUE self)
# inside a comment is treated as plain text (similar to CDATA tags). # inside a comment is treated as plain text (similar to CDATA tags).
comment := |* comment := |*
any* comment_end => { any* comment_end => {
oga_xml_lexer_callback(self, "on_text", encoding, ts, te - 3); liboga_xml_lexer_callback(self, "on_text", encoding, ts, te - 3);
oga_xml_lexer_callback_simple(self, "on_comment_end"); liboga_xml_lexer_callback_simple(self, "on_comment_end");
fret; fret;
}; };
*|; *|;
@ -222,20 +222,20 @@ VALUE oga_xml_lexer_advance(VALUE self)
xml_decl_end = '?>'; xml_decl_end = '?>';
action start_xml_decl { action start_xml_decl {
oga_xml_lexer_callback_simple(self, "on_xml_decl_start"); liboga_xml_lexer_callback_simple(self, "on_xml_decl_start");
fcall xml_decl; fcall xml_decl;
} }
# Machine that processes the contents of an XML declaration tag. # Machine that processes the contents of an XML declaration tag.
xml_decl := |* xml_decl := |*
xml_decl_end => { xml_decl_end => {
oga_xml_lexer_callback_simple(self, "on_xml_decl_end"); liboga_xml_lexer_callback_simple(self, "on_xml_decl_end");
fret; fret;
}; };
# Attributes and their values (e.g. version="1.0"). # Attributes and their values (e.g. version="1.0").
identifier => { identifier => {
oga_xml_lexer_callback(self, "on_attribute", encoding, ts, te); liboga_xml_lexer_callback(self, "on_attribute", encoding, ts, te);
}; };
dquote => { fcall string_dquote; }; dquote => { fcall string_dquote; };
@ -253,7 +253,7 @@ VALUE oga_xml_lexer_advance(VALUE self)
# namespace (if any). Remaining work is delegated to a dedicated # namespace (if any). Remaining work is delegated to a dedicated
# machine. # machine.
action start_element { action start_element {
oga_xml_lexer_callback(self, "on_element_start", encoding, ts + 1, te); liboga_xml_lexer_callback(self, "on_element_start", encoding, ts + 1, te);
fcall element_head; fcall element_head;
} }
@ -270,12 +270,12 @@ VALUE oga_xml_lexer_advance(VALUE self)
whitespace | '='; whitespace | '=';
newline => { newline => {
oga_xml_lexer_callback_simple(self, "on_newline"); liboga_xml_lexer_callback_simple(self, "on_newline");
}; };
# Attribute names. # Attribute names.
identifier => { identifier => {
oga_xml_lexer_callback(self, "on_attribute", encoding, ts, te); liboga_xml_lexer_callback(self, "on_attribute", encoding, ts, te);
}; };
# Attribute values. # Attribute values.
@ -299,23 +299,23 @@ VALUE oga_xml_lexer_advance(VALUE self)
# Enter the body of the tag. If HTML mode is enabled and the current # Enter the body of the tag. If HTML mode is enabled and the current
# element is a void element we'll close it and bail out. # element is a void element we'll close it and bail out.
'>' => { '>' => {
oga_xml_lexer_callback_simple(self, "on_element_open_end"); liboga_xml_lexer_callback_simple(self, "on_element_open_end");
}; };
# Regular closing tags. # Regular closing tags.
'</' identifier '>' => { '</' identifier '>' => {
oga_xml_lexer_callback_simple(self, "on_element_end"); liboga_xml_lexer_callback_simple(self, "on_element_end");
}; };
# Self closing elements that are not handled by the HTML mode. # Self closing elements that are not handled by the HTML mode.
'/>' => { '/>' => {
oga_xml_lexer_callback_simple(self, "on_element_end"); liboga_xml_lexer_callback_simple(self, "on_element_end");
}; };
# Note that this rule should be declared at the very bottom as it # Note that this rule should be declared at the very bottom as it
# will otherwise take precedence over the other rules. # will otherwise take precedence over the other rules.
^('<' | '>')+ => { ^('<' | '>')+ => {
oga_xml_lexer_callback(self, "on_text", encoding, ts, te); liboga_xml_lexer_callback(self, "on_text", encoding, ts, te);
}; };
*|; *|;
}%% }%%

View File

@ -0,0 +1,27 @@
package org.liboga.xml;
import java.io.IOException;
import org.jruby.Ruby;
import org.jruby.RubyModule;
import org.jruby.RubyClass;
import org.jruby.RubyObject;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
@JRubyClass(name="Oga::XML::Lexer", parent="Object")
public class Lexer extends RubyObject
{
public Lexer(Ruby runtime, RubyClass klass)
{
super(runtime, klass);
}
@JRubyMethod
public IRubyObject advance_native(ThreadContext context)
{
return context.getRuntime().getNil();
}
}

View File

@ -12,7 +12,13 @@ Gem::Specification.new do |s|
s.files = File.read(File.expand_path('../MANIFEST', __FILE__)).split("\n") s.files = File.read(File.expand_path('../MANIFEST', __FILE__)).split("\n")
s.extensions = ['ext/liboga/extconf.rb'] if RUBY_PLATFORM == 'java'
s.files << 'lib/liboga.jar'
s.platform = 'java'
else
s.extensions = ['ext/c/liboga/extconf.rb']
end
s.has_rdoc = 'yard' s.has_rdoc = 'yard'
s.required_ruby_version = '>= 1.9.3' s.required_ruby_version = '>= 1.9.3'

View File

@ -25,4 +25,4 @@ rule '.c' => '.rl' do |task|
end end
desc 'Generates the lexers' desc 'Generates the lexers'
task :lexer => ['ext/liboga/lexer.c'] task :lexer => ['ext/c/liboga/lexer.c']