diff --git a/.editorconfig b/.editorconfig
index eac453c..ca5aea5 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -7,3 +7,6 @@ trim_trailing_whitespace = true
 
 [*.{y,rb,rl}]
 indent_size = 2
+
+[*.{h,h},ext/oga/xml/*.rl]
+indent_size = 2
diff --git a/.gitignore b/.gitignore
index a165c71..e7440d6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,10 +3,14 @@ coverage
 pkg
 Gemfile.lock
 
-lib/oga/xml/lexer.rb
 lib/oga/xml/parser.rb
+lib/liboga.*
 
 benchmark/fixtures/big.xml
 
 profile/samples/*.txt
 profile/samples/*/*.txt
+*.so
+tmp
+
+ext/liboga/lexer.c
diff --git a/MANIFEST b/MANIFEST
index c862606..5b2c86a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5,6 +5,14 @@ README.md
 doc/DCO.md
 doc/changelog.md
 doc/css/common.css
+ext/liboga/extconf.rb
+ext/liboga/lexer.c
+ext/liboga/lexer.h
+ext/liboga/lexer.rl
+ext/liboga/liboga.c
+ext/liboga/liboga.h
+ext/liboga/xml.c
+ext/liboga/xml.h
 lib/oga.rb
 lib/oga/html/parser.rb
 lib/oga/version.rb
@@ -14,10 +22,8 @@ lib/oga/xml/doctype.rb
 lib/oga/xml/document.rb
 lib/oga/xml/element.rb
 lib/oga/xml/lexer.rb
-lib/oga/xml/lexer.rl
 lib/oga/xml/node.rb
 lib/oga/xml/parser.rb
-lib/oga/xml/parser.y
 lib/oga/xml/pull_parser.rb
 lib/oga/xml/text.rb
 lib/oga/xml/xml_declaration.rb
diff --git a/Rakefile b/Rakefile
index da33ccb..a25c691 100644
--- a/Rakefile
+++ b/Rakefile
@@ -1,33 +1,38 @@
 require 'bundler/gem_tasks'
 require 'digest/sha2'
 require 'rake/clean'
+require 'rake/extensiontask'
 require 'cliver'
 
 GEMSPEC = Gem::Specification.load('oga.gemspec')
 
-LEXER_OUTPUT  = 'lib/oga/xml/lexer.rb'
 PARSER_OUTPUT = 'lib/oga/xml/parser.rb'
 
 CLEAN.include(
   'coverage',
   'yardoc',
-  LEXER_OUTPUT,
   PARSER_OUTPUT,
   'benchmark/fixtures/big.xml',
-  'profile/samples/**/*.txt'
+  'profile/samples/**/*.txt',
+  'lib/liboga.*',
+  'tmp',
+  'ext/liboga/lexer.c'
 )
 
 FILE_LIST = FileList.new(
   'checkum/**/*.*',
   'doc/**/*.*',
-  'lib/**/*.*',
+  'lib/**/*.rb',
   'LICENSE',
   'MANIFEST',
   '*.gemspec',
   'README.md',
-  '.yardopts'
+  '.yardopts',
+  'ext/**/*.*'
 )
 
+Rake::ExtensionTask.new('liboga', GEMSPEC)
+
 Dir['./task/*.rake'].each do |task|
   import(task)
 end
diff --git a/ext/liboga/extconf.rb b/ext/liboga/extconf.rb
new file mode 100644
index 0000000..469c1a8
--- /dev/null
+++ b/ext/liboga/extconf.rb
@@ -0,0 +1,13 @@
+require 'mkmf'
+
+have_header('ruby.h')
+
+$CFLAGS << ' -Wextra -Wall -pedantic'
+
+if ENV['DEBUG']
+  $CFLAGS << ' -O0'
+else
+  $CFLAGS << ' -O3 -g'
+end
+
+create_makefile('liboga/liboga')
diff --git a/ext/liboga/lexer.h b/ext/liboga/lexer.h
new file mode 100644
index 0000000..9da1c67
--- /dev/null
+++ b/ext/liboga/lexer.h
@@ -0,0 +1,10 @@
+#include "liboga.h"
+
+#ifndef LIBOGA_XML_LEXER_H
+#define LIBOGA_XML_LEXER_H
+
+extern VALUE oga_cLexer;
+
+extern void Init_liboga_xml_lexer();
+
+#endif
diff --git a/ext/liboga/lexer.rl b/ext/liboga/lexer.rl
new file mode 100644
index 0000000..01b2f82
--- /dev/null
+++ b/ext/liboga/lexer.rl
@@ -0,0 +1,298 @@
+#include "lexer.h"
+
+VALUE oga_cLexer;
+
+%%machine lexer;
+
+void oga_xml_lexer_callback(
+  VALUE self,
+  const char *name,
+  rb_encoding *encoding,
+  const char *ts,
+  const char *te
+)
+{
+  int length   = te - ts;
+  VALUE value  = rb_enc_str_new_cstr(strndup(ts, length), encoding);
+  VALUE method = rb_intern(name);
+
+  rb_funcall(self, method, 1, value);
+}
+
+void oga_xml_lexer_callback_simple(VALUE self, const char *name)
+{
+  VALUE method = rb_intern(name);
+
+  rb_funcall(self, method, 0);
+}
+
+%% write data;
+
+VALUE oga_xml_lexer_advance(VALUE self)
+{
+  /* Pull the data in from Ruby land. */
+  VALUE data_ivar = rb_ivar_get(self, rb_intern("@data"));
+
+  /* Make sure that all data passed back to Ruby has the proper encoding. */
+  rb_encoding *encoding = rb_enc_get(data_ivar);
+
+  char *data_str_val = StringValuePtr(data_ivar);
+
+  const char *p   = data_str_val;
+  const char *pe  = data_str_val + strlen(data_str_val);
+  const char *eof = pe;
+  const char *ts, *te;
+
+  int act = 0;
+  int cs  = 0;
+  int top = 0;
+  int stack[8];
+
+  %% write init;
+  %% write exec;
+
+  return Qnil;
+}
+
+%%{
+    newline    = '\n' | '\r\n';
+    whitespace = [ \t];
+    identifier = [a-zA-Z0-9\-_:]+;
+
+    # Strings
+    #
+    # Strings in HTML can either be single or double quoted. If a string
+    # starts with one of these quotes it must be closed with the same type
+    # of quote.
+    dquote = '"';
+    squote = "'";
+
+    # Machine for processing double quoted strings.
+    string_dquote := |*
+      ^dquote+ => {
+        oga_xml_lexer_callback(self, "on_string", encoding, ts, te);
+      };
+
+      dquote => { fret; };
+    *|;
+
+    # Machine for processing single quoted strings.
+    string_squote := |*
+      ^squote+ => {
+        oga_xml_lexer_callback(self, "on_string", encoding, ts, te);
+      };
+
+      squote => { fret; };
+    *|;
+
+    # DOCTYPES
+    #
+    # http://www.w3.org/TR/html-markup/syntax.html#doctype-syntax
+    #
+    # These rules support the 3 flavours of doctypes:
+    #
+    # 1. Normal doctypes, as introduced in the HTML5 specification.
+    # 2. Deprecated doctypes, the more verbose ones used prior to HTML5.
+    # 3. Legacy doctypes
+    #
+    doctype_start = '<!DOCTYPE'i whitespace+;
+
+    action start_doctype {
+      oga_xml_lexer_callback_simple(self, "on_start_doctype");
+      fcall doctype;
+    }
+
+    # Machine for processing doctypes. Doctype values such as the public
+    # and system IDs are treated as T_STRING tokens.
+    doctype := |*
+      'PUBLIC' | 'SYSTEM' => {
+        oga_xml_lexer_callback(self, "on_doctype_type", encoding, ts, te);
+      };
+
+      # Lex the public/system IDs as regular strings.
+      dquote => { fcall string_dquote; };
+      squote => { fcall string_squote; };
+
+      # Whitespace inside doctypes is ignored since there's no point in
+      # including it.
+      whitespace;
+
+      identifier => {
+        oga_xml_lexer_callback(self, "on_doctype_name", encoding, ts, te);
+      };
+
+      '>' => {
+        oga_xml_lexer_callback_simple(self, "on_doctype_end");
+        fret;
+      };
+    *|;
+
+    # CDATA
+    #
+    # http://www.w3.org/TR/html-markup/syntax.html#cdata-sections
+    #
+    # CDATA tags are broken up into 3 parts: the start, the content and the
+    # end tag.
+    #
+    # In HTML CDATA tags have no meaning/are not supported. Oga does
+    # support them but treats their contents as plain text.
+    #
+    cdata_start = '<![CDATA[';
+    cdata_end   = ']]>';
+
+    action start_cdata {
+      oga_xml_lexer_callback_simple(self, "on_cdata_start");
+      fcall cdata;
+    }
+
+    # Machine that for processing the contents of CDATA tags. Everything
+    # inside a CDATA tag is treated as plain text.
+    cdata := |*
+      any* cdata_end => {
+        oga_xml_lexer_callback(self, "on_text", encoding, ts, te - 3);
+        oga_xml_lexer_callback_simple(self, "on_cdata_end");
+        fret;
+      };
+    *|;
+
+    # Comments
+    #
+    # http://www.w3.org/TR/html-markup/syntax.html#comments
+    #
+    # Comments are lexed into 3 parts: the start tag, the content and the
+    # end tag.
+    #
+    # Unlike the W3 specification these rules *do* allow character
+    # sequences such as `--` and `->`. Putting extra checks in for these
+    # sequences would actually make the rules/actions more complex.
+    #
+    comment_start = '<!--';
+    comment_end   = '-->';
+
+    action start_comment {
+      oga_xml_lexer_callback_simple(self, "on_comment_start");
+      fcall comment;
+    }
+
+    # Machine used for processing the contents of a comment. Everything
+    # inside a comment is treated as plain text (similar to CDATA tags).
+    comment := |*
+      any* comment_end => {
+        oga_xml_lexer_callback(self, "on_text", encoding, ts, te - 3);
+        oga_xml_lexer_callback_simple(self, "on_comment_end");
+        fret;
+      };
+    *|;
+
+    # XML declaration tags
+    #
+    # http://www.w3.org/TR/REC-xml/#sec-prolog-dtd
+    #
+    xml_decl_start = '<?xml';
+    xml_decl_end   = '?>';
+
+    action start_xml_decl {
+      oga_xml_lexer_callback_simple(self, "on_xml_decl_start");
+      fcall xml_decl;
+    }
+
+    # Machine that processes the contents of an XML declaration tag.
+    xml_decl := |*
+      xml_decl_end => {
+        oga_xml_lexer_callback_simple(self, "on_xml_decl_end");
+        fret;
+      };
+
+      # Attributes and their values (e.g. version="1.0").
+      identifier => {
+        oga_xml_lexer_callback(self, "on_attribute", encoding, ts, te);
+      };
+
+      dquote => { fcall string_dquote; };
+      squote => { fcall string_squote; };
+
+      any;
+    *|;
+
+    # Elements
+    #
+    # http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
+    #
+
+    # Action that creates the tokens for the opening tag, name and
+    # namespace (if any). Remaining work is delegated to a dedicated
+    # machine.
+    action start_element {
+      oga_xml_lexer_callback(self, "on_element_start", encoding, ts + 1, te);
+
+      fcall element_head;
+    }
+
+    element_start = '<' identifier;
+
+    # Machine used for processing the characters inside a element head. An
+    # element head is everything between `<NAME` (where NAME is the element
+    # name) and `>`.
+    #
+    # For example, in `<p foo="bar">` the element head is ` foo="bar"`.
+    #
+    element_head := |*
+      whitespace | '=';
+
+      newline => {
+        oga_xml_lexer_callback_simple(self, "on_newline");
+      };
+
+      # Attribute names.
+      identifier => {
+        oga_xml_lexer_callback(self, "on_attribute", encoding, ts, te);
+      };
+
+      # Attribute values.
+      dquote => { fcall string_dquote; };
+      squote => { fcall string_squote; };
+
+      # The closing character of the open tag.
+      ('>' | '/') => {
+        fhold;
+        fret;
+      };
+    *|;
+
+    main := |*
+      element_start  => start_element;
+      doctype_start  => start_doctype;
+      cdata_start    => start_cdata;
+      comment_start  => start_comment;
+      xml_decl_start => start_xml_decl;
+
+      # Enter the body of the tag. If HTML mode is enabled and the current
+      # element is a void element we'll close it and bail out.
+      '>' => {
+        oga_xml_lexer_callback_simple(self, "on_element_open_end");
+      };
+
+      # Regular closing tags.
+      '</' identifier '>' => {
+        oga_xml_lexer_callback_simple(self, "on_element_end");
+      };
+
+      # Self closing elements that are not handled by the HTML mode.
+      '/>' => {
+        oga_xml_lexer_callback_simple(self, "on_element_end");
+      };
+
+      # Note that this rule should be declared at the very bottom as it
+      # will otherwise take precedence over the other rules.
+      ^('<' | '>')+ => {
+        oga_xml_lexer_callback(self, "on_text", encoding, ts, te);
+      };
+    *|;
+}%%
+
+void Init_liboga_xml_lexer()
+{
+  oga_cLexer = rb_define_class_under(oga_mXML, "Lexer", rb_cObject);
+
+  rb_define_method(oga_cLexer, "advance_native", oga_xml_lexer_advance, 0);
+}
diff --git a/ext/liboga/liboga.c b/ext/liboga/liboga.c
new file mode 100644
index 0000000..c49d6a1
--- /dev/null
+++ b/ext/liboga/liboga.c
@@ -0,0 +1,11 @@
+#include "liboga.h"
+
+VALUE oga_mOga;
+
+void Init_liboga()
+{
+    oga_mOga = rb_define_module("Oga");
+
+    Init_liboga_xml();
+    Init_liboga_xml_lexer();
+}
diff --git a/ext/liboga/liboga.h b/ext/liboga/liboga.h
new file mode 100644
index 0000000..6f3c0af
--- /dev/null
+++ b/ext/liboga/liboga.h
@@ -0,0 +1,17 @@
+#ifndef LIBOGA_H
+#define LIBOGA_H
+
+#include <ruby.h>
+#include <ruby/encoding.h>
+#include <string.h>
+#include <malloc.h>
+#include <stdio.h>
+
+extern VALUE oga_mOga;
+
+#include "xml.h"
+#include "lexer.h"
+
+void Init_liboga();
+
+#endif
diff --git a/ext/liboga/xml.c b/ext/liboga/xml.c
new file mode 100644
index 0000000..63ef162
--- /dev/null
+++ b/ext/liboga/xml.c
@@ -0,0 +1,8 @@
+#include "xml.h"
+
+VALUE oga_mXML;
+
+void Init_liboga_xml()
+{
+    oga_mXML = rb_define_module_under(oga_mOga, "XML");
+}
diff --git a/ext/liboga/xml.h b/ext/liboga/xml.h
new file mode 100644
index 0000000..672a579
--- /dev/null
+++ b/ext/liboga/xml.h
@@ -0,0 +1,10 @@
+#ifndef LIBOGA_XML_H
+#define LIBOGA_XML_H
+
+#include "liboga.h"
+
+extern VALUE oga_mXML;
+
+void Init_liboga_xml();
+
+#endif
diff --git a/lib/oga.rb b/lib/oga.rb
index 19ca643..1a74658 100644
--- a/lib/oga.rb
+++ b/lib/oga.rb
@@ -1,5 +1,7 @@
 require 'set'
 
+require_relative 'liboga'
+
 require_relative 'oga/xml/lexer'
 require_relative 'oga/xml/parser'
 require_relative 'oga/xml/pull_parser'
diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb
new file mode 100644
index 0000000..f5dd157
--- /dev/null
+++ b/lib/oga/xml/lexer.rb
@@ -0,0 +1,249 @@
+module Oga
+  module XML
+    ##
+    # Low level lexer that supports both XML and HTML (using an extra option).
+    # To lex HTML input set the `:html` option to `true` when creating an
+    # instance of the lexer:
+    #
+    #     lexer = Oga::XML::Lexer.new(:html => true)
+    #
+    # @!attribute [r] html
+    #  @return [TrueClass|FalseClass]
+    #
+    # @!attribute [r] tokens
+    #  @return [Array]
+    #
+    class Lexer
+      attr_reader :html
+
+      ##
+      # Names of the HTML void elements that should be handled when HTML lexing
+      # is enabled.
+      #
+      # @return [Set]
+      #
+      HTML_VOID_ELEMENTS = Set.new([
+        'area',
+        'base',
+        'br',
+        'col',
+        'command',
+        'embed',
+        'hr',
+        'img',
+        'input',
+        'keygen',
+        'link',
+        'meta',
+        'param',
+        'source',
+        'track',
+        'wbr'
+      ])
+
+      ##
+      # @param [String] data The data to lex.
+      #
+      # @param [Hash] options
+      #
+      # @option options [Symbol] :html When set to `true` the lexer will treat
+      # the input as HTML instead of SGML/XML. This makes it possible to lex
+      # HTML void elements such as `<link href="">`.
+      #
+      def initialize(data, options = {})
+        options.each do |key, value|
+          instance_variable_set("@#{key}", value) if respond_to?(key)
+        end
+
+        @data = data
+
+        reset
+      end
+
+      ##
+      # Resets the internal state of the lexer. Typically you don't need to
+      # call this method yourself as its called by #lex after lexing a given
+      # String.
+      #
+      def reset
+        @line     = 1
+        @elements = []
+      end
+
+      ##
+      # Gathers all the tokens for the input and returns them as an Array.
+      #
+      # This method resets the internal state of the lexer after consuming the
+      # input.
+      #
+      # @param [String] data The string to consume.
+      # @return [Array]
+      # @see #advance
+      #
+      def lex
+        tokens = []
+
+        advance do |token|
+          tokens << token
+        end
+
+        reset
+
+        return tokens
+      end
+
+      ##
+      # Advances through the input and generates the corresponding tokens. Each
+      # token is yielded to the supplied block.
+      #
+      # Each token is an Array in the following format:
+      #
+      #     [TYPE, VALUE]
+      #
+      # The type is a symbol, the value is either nil or a String.
+      #
+      # This method stores the supplied block in `@block` and resets it after
+      # the lexer loop has finished.
+      #
+      # This method does *not* reset the internal state of the lexer.
+      #
+      #
+      # @param [String] data The String to consume.
+      # @return [Array]
+      #
+      def advance(&block)
+        @block = block
+
+        advance_native
+      ensure
+        @block = nil
+      end
+
+      ##
+      # @return [TrueClass|FalseClass]
+      #
+      def html?
+        return !!html
+      end
+
+      private
+
+      ##
+      # @param [Fixnum] amount The amount of lines to advance.
+      #
+      def advance_line(amount = 1)
+        @line += amount
+      end
+
+      ##
+      # Adds a token with the given type and value to the list.
+      #
+      # @param [Symbol] type The token type.
+      # @param [String] value The token value.
+      #
+      def add_token(type, value = nil)
+        token = [type, value, @line]
+
+        @block.call(token)
+      end
+
+      ##
+      # Returns the name of the element we're currently in.
+      #
+      # @return [String]
+      #
+      def current_element
+        return @elements.last
+      end
+
+      def on_string(value)
+        add_token(:T_STRING, value)
+      end
+
+      def on_start_doctype
+        add_token(:T_DOCTYPE_START)
+      end
+
+      def on_doctype_type(value)
+        add_token(:T_DOCTYPE_TYPE, value)
+      end
+
+      def on_doctype_name(value)
+        add_token(:T_DOCTYPE_NAME, value)
+      end
+
+      def on_doctype_end
+        add_token(:T_DOCTYPE_END)
+      end
+
+      def on_cdata_start
+        add_token(:T_CDATA_START)
+      end
+
+      def on_cdata_end
+        add_token(:T_CDATA_END)
+      end
+
+      def on_comment_start
+        add_token(:T_COMMENT_START)
+      end
+
+      def on_comment_end
+        add_token(:T_COMMENT_END)
+      end
+
+      def on_xml_decl_start
+        add_token(:T_XML_DECL_START)
+      end
+
+      def on_xml_decl_end
+        add_token(:T_XML_DECL_END)
+      end
+
+      def on_element_start(name)
+        add_token(:T_ELEM_START)
+
+        if name.include?(':')
+          ns, name = name.split(':')
+
+          add_token(:T_ELEM_NS, ns)
+        end
+
+        @elements << name if html?
+
+        add_token(:T_ELEM_NAME, name)
+      end
+
+      def on_element_open_end
+        if html? and HTML_VOID_ELEMENTS.include?(current_element)
+          add_token(:T_ELEM_END)
+          @elements.pop
+        end
+      end
+
+      def on_element_end
+        add_token(:T_ELEM_END)
+
+        @elements.pop if html?
+      end
+
+      def on_text(value)
+        unless value.empty?
+          add_token(:T_TEXT, value)
+
+          lines = value.count("\n")
+
+          advance_line(lines) if lines > 0
+        end
+      end
+
+      def on_attribute(value)
+        add_token(:T_ATTR, value)
+      end
+
+      def on_newline
+        @line += 1
+      end
+    end # Lexer
+  end # XML
+end # Oga
diff --git a/lib/oga/xml/lexer.rl b/lib/oga/xml/lexer.rl
deleted file mode 100644
index 1e8ea96..0000000
--- a/lib/oga/xml/lexer.rl
+++ /dev/null
@@ -1,501 +0,0 @@
-%%machine lexer; # %
-
-module Oga
-  module XML
-    ##
-    # Low level lexer that supports both XML and HTML (using an extra option).
-    # To lex HTML input set the `:html` option to `true` when creating an
-    # instance of the lexer:
-    #
-    #     lexer = Oga::XML::Lexer.new(:html => true)
-    #
-    # @!attribute [r] html
-    #  @return [TrueClass|FalseClass]
-    #
-    # @!attribute [r] tokens
-    #  @return [Array]
-    #
-    class Lexer
-      %% write data;
-
-      # % fix highlight
-
-      attr_reader :html
-
-      ##
-      # Names of the HTML void elements that should be handled when HTML lexing
-      # is enabled.
-      #
-      # @return [Set]
-      #
-      HTML_VOID_ELEMENTS = Set.new([
-        'area',
-        'base',
-        'br',
-        'col',
-        'command',
-        'embed',
-        'hr',
-        'img',
-        'input',
-        'keygen',
-        'link',
-        'meta',
-        'param',
-        'source',
-        'track',
-        'wbr'
-      ])
-
-      ##
-      # @param [String] data The data to lex.
-      #
-      # @param [Hash] options
-      #
-      # @option options [Symbol] :html When set to `true` the lexer will treat
-      # the input as HTML instead of SGML/XML. This makes it possible to lex
-      # HTML void elements such as `<link href="">`.
-      #
-      def initialize(data, options = {})
-        options.each do |key, value|
-          instance_variable_set("@#{key}", value) if respond_to?(key)
-        end
-
-        @data = data
-
-        reset
-      end
-
-      ##
-      # Resets the internal state of the lexer. Typically you don't need to
-      # call this method yourself as its called by #lex after lexing a given
-      # String.
-      #
-      def reset
-        @line     = 1
-        @elements = []
-
-        @buffer_start_position = nil
-      end
-
-      ##
-      # Gathers all the tokens for the input and returns them as an Array.
-      #
-      # This method resets the internal state of the lexer after consuming the
-      # input.
-      #
-      # @param [String] data The string to consume.
-      # @return [Array]
-      # @see #advance
-      #
-      def lex
-        tokens = []
-
-        advance do |token|
-          tokens << token
-        end
-
-        reset
-
-        return tokens
-      end
-
-      ##
-      # Advances through the input and generates the corresponding tokens. Each
-      # token is yielded to the supplied block.
-      #
-      # Each token is an Array in the following format:
-      #
-      #     [TYPE, VALUE]
-      #
-      # The type is a symbol, the value is either nil or a String.
-      #
-      # This method stores the supplied block in `@block` and resets it after
-      # the lexer loop has finished.
-      #
-      # This method does *not* reset the internal state of the lexer.
-      #
-      #
-      # @param [String] data The String to consume.
-      # @return [Array]
-      #
-      def advance(&block)
-        @block = block
-
-        data  = @data
-        ts    = nil
-        te    = nil
-        stack = []
-        top   = 0
-        cs    = self.class.lexer_start
-        act   = 0
-        eof   = @data.bytesize
-        p     = 0
-        pe    = eof
-
-        _lexer_eof_trans          = self.class.send(:_lexer_eof_trans)
-        _lexer_from_state_actions = self.class.send(:_lexer_from_state_actions)
-        _lexer_index_offsets      = self.class.send(:_lexer_index_offsets)
-        _lexer_indicies           = self.class.send(:_lexer_indicies)
-        _lexer_key_spans          = self.class.send(:_lexer_key_spans)
-        _lexer_to_state_actions   = self.class.send(:_lexer_to_state_actions)
-        _lexer_trans_actions      = self.class.send(:_lexer_trans_actions)
-        _lexer_trans_keys         = self.class.send(:_lexer_trans_keys)
-        _lexer_trans_targs        = self.class.send(:_lexer_trans_targs)
-
-        %% write exec;
-
-        # % fix highlight
-      ensure
-        @block = nil
-      end
-
-      ##
-      # @return [TrueClass|FalseClass]
-      #
-      def html?
-        return !!html
-      end
-
-      private
-
-      ##
-      # @param [Fixnum] amount The amount of lines to advance.
-      #
-      def advance_line(amount = 1)
-        @line += amount
-      end
-
-      ##
-      # Emits a token who's value is based on the supplied start/stop position.
-      #
-      # @param [Symbol] type The token type.
-      # @param [Fixnum] start
-      # @param [Fixnum] stop
-      #
-      # @see #text
-      # @see #add_token
-      #
-      def emit(type, start, stop)
-        value = text(start, stop)
-
-        add_token(type, value)
-      end
-
-      ##
-      # Returns the text of the current buffer based on the supplied start and
-      # stop position.
-      #
-      # @param [Fixnum] start
-      # @param [Fixnum] stop
-      # @return [String]
-      #
-      def text(start, stop)
-        return @data.byteslice(start, stop - start)
-      end
-
-      ##
-      # Adds a token with the given type and value to the list.
-      #
-      # @param [Symbol] type The token type.
-      # @param [String] value The token value.
-      #
-      def add_token(type, value = nil)
-        token = [type, value, @line]
-
-        @block.call(token)
-      end
-
-      ##
-      # Enables buffering starting at the given position.
-      #
-      # @param [Fixnum] position The start position of the buffer.
-      #
-      def start_buffer(position)
-        @buffer_start_position = position
-      end
-
-      ##
-      # Emits a text token.
-      #
-      # @param [Fixnum] start
-      # @param [Fixnum] stop
-      #
-      def emit_text(start, stop)
-        content = text(start, stop)
-
-        unless content.empty?
-          add_token(:T_TEXT, content)
-
-          lines = content.count("\n")
-
-          advance_line(lines) if lines > 0
-        end
-      end
-
-      ##
-      # Returns the name of the element we're currently in.
-      #
-      # @return [String]
-      #
-      def current_element
-        return @elements.last
-      end
-
-      %%{
-        getkey (data.getbyte(p) || 0);
-
-        newline    = '\n' | '\r\n';
-        whitespace = [ \t];
-        identifier = [a-zA-Z0-9\-_:]+;
-
-        # Strings
-        #
-        # Strings in HTML can either be single or double quoted. If a string
-        # starts with one of these quotes it must be closed with the same type
-        # of quote.
-        dquote = '"';
-        squote = "'";
-
-        # Machine for processing double quoted strings.
-        string_dquote := |*
-          ^dquote+ => {
-            emit(:T_STRING, ts, te)
-          };
-
-          dquote => { fret; };
-        *|;
-
-        # Machine for processing single quoted strings.
-        string_squote := |*
-          ^squote+ => {
-            emit(:T_STRING, ts, te)
-          };
-
-          squote => { fret; };
-        *|;
-
-        # DOCTYPES
-        #
-        # http://www.w3.org/TR/html-markup/syntax.html#doctype-syntax
-        #
-        # These rules support the 3 flavours of doctypes:
-        #
-        # 1. Normal doctypes, as introduced in the HTML5 specification.
-        # 2. Deprecated doctypes, the more verbose ones used prior to HTML5.
-        # 3. Legacy doctypes
-        #
-        doctype_start = '<!DOCTYPE'i whitespace+;
-
-        action start_doctype {
-          add_token(:T_DOCTYPE_START)
-          fcall doctype;
-        }
-
-        # Machine for processing doctypes. Doctype values such as the public
-        # and system IDs are treated as T_STRING tokens.
-        doctype := |*
-          'PUBLIC' | 'SYSTEM' => { emit(:T_DOCTYPE_TYPE, ts, te) };
-
-          # Lex the public/system IDs as regular strings.
-          dquote => { fcall string_dquote; };
-          squote => { fcall string_squote; };
-
-          # Whitespace inside doctypes is ignored since there's no point in
-          # including it.
-          whitespace;
-
-          identifier => { emit(:T_DOCTYPE_NAME, ts, te) };
-
-          '>' => {
-            add_token(:T_DOCTYPE_END)
-            fret;
-          };
-        *|;
-
-        # CDATA
-        #
-        # http://www.w3.org/TR/html-markup/syntax.html#cdata-sections
-        #
-        # CDATA tags are broken up into 3 parts: the start, the content and the
-        # end tag.
-        #
-        # In HTML CDATA tags have no meaning/are not supported. Oga does
-        # support them but treats their contents as plain text.
-        #
-        cdata_start = '<![CDATA[';
-        cdata_end   = ']]>';
-
-        action start_cdata {
-          add_token(:T_CDATA_START)
-
-          fcall cdata;
-        }
-
-        # Machine that for processing the contents of CDATA tags. Everything
-        # inside a CDATA tag is treated as plain text.
-        cdata := |*
-          any* cdata_end => {
-            emit_text(ts, te - 3)
-            add_token(:T_CDATA_END)
-
-            fret;
-          };
-        *|;
-
-        # Comments
-        #
-        # http://www.w3.org/TR/html-markup/syntax.html#comments
-        #
-        # Comments are lexed into 3 parts: the start tag, the content and the
-        # end tag.
-        #
-        # Unlike the W3 specification these rules *do* allow character
-        # sequences such as `--` and `->`. Putting extra checks in for these
-        # sequences would actually make the rules/actions more complex.
-        #
-        comment_start = '<!--';
-        comment_end   = '-->';
-
-        action start_comment {
-          add_token(:T_COMMENT_START)
-
-          fcall comment;
-        }
-
-        # Machine used for processing the contents of a comment. Everything
-        # inside a comment is treated as plain text (similar to CDATA tags).
-        comment := |*
-          any* comment_end => {
-            emit_text(ts, te - 3)
-            add_token(:T_COMMENT_END)
-
-            fret;
-          };
-        *|;
-
-        # XML declaration tags
-        #
-        # http://www.w3.org/TR/REC-xml/#sec-prolog-dtd
-        #
-        xml_decl_start = '<?xml';
-        xml_decl_end   = '?>';
-
-        action start_xml_decl {
-          add_token(:T_XML_DECL_START)
-
-          fcall xml_decl;
-        }
-
-        # Machine that processes the contents of an XML declaration tag.
-        xml_decl := |*
-          xml_decl_end => {
-            add_token(:T_XML_DECL_END)
-
-            fret;
-          };
-
-          # Attributes and their values (e.g. version="1.0").
-          identifier => { emit(:T_ATTR, ts, te) };
-
-          dquote => { fcall string_dquote; };
-          squote => { fcall string_squote; };
-
-          any;
-        *|;
-
-        # Elements
-        #
-        # http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
-        #
-
-        # Action that creates the tokens for the opening tag, name and
-        # namespace (if any). Remaining work is delegated to a dedicated
-        # machine.
-        action start_element {
-          add_token(:T_ELEM_START)
-
-          # Add the element name. If the name includes a namespace we'll break
-          # the name up into two separate tokens.
-          name = text(ts + 1, te)
-
-          if name.include?(':')
-            ns, name = name.split(':')
-
-            add_token(:T_ELEM_NS, ns)
-          end
-
-          @elements << name if html?
-
-          add_token(:T_ELEM_NAME, name)
-
-          fcall element_head;
-        }
-
-        element_start = '<' identifier;
-
-        # Machine used for processing the characters inside a element head. An
-        # element head is everything between `<NAME` (where NAME is the element
-        # name) and `>`.
-        #
-        # For example, in `<p foo="bar">` the element head is ` foo="bar"`.
-        #
-        element_head := |*
-          whitespace | '=';
-
-          newline => { advance_line };
-
-          # Attribute names.
-          identifier => { emit(:T_ATTR, ts, te) };
-
-          # Attribute values.
-          dquote => { fcall string_dquote; };
-          squote => { fcall string_squote; };
-
-          # The closing character of the open tag.
-          ('>' | '/') => {
-            fhold;
-            fret;
-          };
-        *|;
-
-        main := |*
-          element_start  => start_element;
-          doctype_start  => start_doctype;
-          cdata_start    => start_cdata;
-          comment_start  => start_comment;
-          xml_decl_start => start_xml_decl;
-
-          # Enter the body of the tag. If HTML mode is enabled and the current
-          # element is a void element we'll close it and bail out.
-          '>' => {
-            if html? and HTML_VOID_ELEMENTS.include?(current_element)
-              add_token(:T_ELEM_END, nil)
-              @elements.pop
-            end
-          };
-
-          # Regular closing tags.
-          '</' identifier '>' => {
-            add_token(:T_ELEM_END, nil)
-
-            @elements.pop if html?
-          };
-
-          # Self closing elements that are not handled by the HTML mode.
-          '/>' => {
-            add_token(:T_ELEM_END, nil)
-
-            @elements.pop if html?
-          };
-
-          # Note that this rule should be declared at the very bottom as it
-          # will otherwise take precedence over the other rules.
-          ^('<' | '>')+ => {
-            emit_text(ts, te)
-          };
-        *|;
-      }%%
-    end # Lexer
-  end # XML
-end # Oga
diff --git a/oga.gemspec b/oga.gemspec
index 76b027c..40407f4 100644
--- a/oga.gemspec
+++ b/oga.gemspec
@@ -12,6 +12,8 @@ Gem::Specification.new do |s|
 
   s.files = File.read(File.expand_path('../MANIFEST', __FILE__)).split("\n")
 
+  s.extensions = ['ext/liboga/extconf.rb']
+
   s.has_rdoc              = 'yard'
   s.required_ruby_version = '>= 1.9.3'
 
@@ -24,4 +26,5 @@ Gem::Specification.new do |s|
   s.add_development_dependency 'simplecov'
   s.add_development_dependency 'kramdown'
   s.add_development_dependency 'benchmark-ips'
+  s.add_development_dependency 'rake-compiler'
 end
diff --git a/task/lexer.rake b/task/lexer.rake
index e1b58f8..8823bea 100644
--- a/task/lexer.rake
+++ b/task/lexer.rake
@@ -18,5 +18,11 @@ rule '.rb' => '.rl' do |task|
   end
 end
 
-desc 'Generates the lexer'
-task :lexer => [LEXER_OUTPUT]
+rule '.c' => '.rl' do |task|
+  Cliver.assert('ragel', '~> 6.7')
+
+  sh "ragel -C -G2 #{task.source} -o #{task.name}"
+end
+
+desc 'Generates the lexers'
+task :lexer => ['ext/liboga/lexer.c']
diff --git a/task/test.rake b/task/test.rake
index 1f6c0f2..d19ce6c 100644
--- a/task/test.rake
+++ b/task/test.rake
@@ -1,4 +1,4 @@
 desc 'Runs the tests'
-task :test => [:generate] do
+task :test => [:generate, :compile] do
   sh 'rspec spec'
 end