diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl
index 0518f9e..c41dc35 100644
--- a/lib/oga/lexer.rl
+++ b/lib/oga/lexer.rl
@@ -83,35 +83,48 @@ module Oga
       lbracket = '[';
       rbracket = ']';
 
-      s_quote  = "'";
-      d_quote  = '"';
+      s_quote = "'";
+      d_quote = '"';
 
       # FIXME: there really should be a better way of doing this.
       text = (any - s_quote - d_quote - equals - bang - slash -
         greater - smaller - whitespace - newline - colon - dash -
         lbracket - rbracket)+;
 
-      # Unicode characters, taken from whitequark's wonderful parser library.
-      # (I honestly need to buy that dude a beer or 100). Basically this
-      # takes all characters and removes ASCII ones from the list, thus
-      # leaving you with Unicode.
-      unicode = any - ascii;
+      # DOCTYPES
+      #
+      # http://www.w3.org/TR/html-markup/syntax.html#doctype-syntax
+      #
+      # Doctypes are treated with some extra care on lexer level to make the
+      # parser's life easier. If they were treated as regular text it would be
+      # a pain to specify a proper doctype in Racc since it can't match on a
+      # token's value (only on its type).
+      #
+      # Doctype parsing is also relaxed compared to the W3 specification. For
+      # example, the specification defines 4 doctype formats each having
+      # different rules. Because Oga doesn't really use the doctype for
+      # anything we'll just slap all the formats into a single rule. Easy
+      # enough.
+      doctype = smaller whitespace* bang whitespace* 'DOCTYPE'i whitespace*
+        'HTML'i whitespace* any* greater;
 
       main := |*
         whitespace => { t(:T_SPACE) };
         newline    => { t(:T_NEWLINE); advance_line };
-        smaller    => { t(:T_SMALLER) };
-        greater    => { t(:T_GREATER) };
-        slash      => { t(:T_SLASH) };
-        d_quote    => { t(:T_DQUOTE) };
-        s_quote    => { t(:T_SQUOTE) };
-        dash       => { t(:T_DASH) };
-        rbracket   => { t(:T_RBRACKET) };
-        lbracket   => { t(:T_LBRACKET) };
-        colon      => { t(:T_COLON) };
-        bang       => { t(:T_BANG) };
-        equals     => { t(:T_EQUALS) };
-        text       => { t(:T_TEXT) };
+
+        doctype  => { t(:T_DOCTYPE) };
+        smaller  => { t(:T_SMALLER) };
+        greater  => { t(:T_GREATER) };
+        slash    => { t(:T_SLASH) };
+        d_quote  => { t(:T_DQUOTE) };
+        s_quote  => { t(:T_SQUOTE) };
+        dash     => { t(:T_DASH) };
+        rbracket => { t(:T_RBRACKET) };
+        lbracket => { t(:T_LBRACKET) };
+        colon    => { t(:T_COLON) };
+        bang     => { t(:T_BANG) };
+        equals   => { t(:T_EQUALS) };
+        text     => { t(:T_TEXT) };
       *|;
     }%%
   end # Lexer
diff --git a/lib/oga/parser/html.y b/lib/oga/parser/html.y
index 11ccde7..ada4bbd 100644
--- a/lib/oga/parser/html.y
+++ b/lib/oga/parser/html.y
@@ -2,7 +2,7 @@ class Oga::Parser::HTML
 
 token T_SPACE T_NEWLINE T_SMALLER T_GREATER T_SLASH
 token T_DQUOTE T_SQUOTE T_DASH T_RBRACKET T_LBRACKET
-token T_COLON T_BANG T_EQUALS T_TEXT
+token T_COLON T_BANG T_EQUALS T_TEXT T_DOCTYPE
 
 options no_result_var
 
@@ -19,8 +19,17 @@ rule
 
   expression
     : tag
+    | doctype
     ;
 
+  # Doctypes
+
+  doctype
+    : T_DOCTYPE { s(:doctype, val[0]) }
+    ;
+
+  # Generic HTML tags
+
   tag_start
     # <p>
     : T_SMALLER T_TEXT T_GREATER { val[1] }
@@ -42,6 +51,16 @@ rule
   tag_body
     : T_TEXT
     ;
+
+  whitespaces
+    : whitespaces whitespace
+    | whitespace
+    ;
+
+  whitespace
+    : T_NEWLINE
+    | T_SPACE
+    ;
 end
 
 ---- inner
diff --git a/spec/oga/lexer_spec.rb b/spec/oga/lexer_spec.rb
index ca99113..9c0ed68 100644
--- a/spec/oga/lexer_spec.rb
+++ b/spec/oga/lexer_spec.rb
@@ -132,4 +132,18 @@ describe Oga::Lexer do
       ]
     end
   end
+
+  context 'doctypes' do
+    example 'lex the HTML5 doctype' do
+      lex('<!DOCTYPE html>').should == [
+        [:T_DOCTYPE, '<!DOCTYPE html>', 1, 1]
+      ]
+    end
+
+    example 'lex a random doctype' do
+      lex('<!DOCTYPE HTML PUBLIC "foobar" "baz">').should == [
+        [:T_DOCTYPE, '<!DOCTYPE HTML PUBLIC "foobar" "baz">', 1, 1]
+      ]
+    end
+  end
 end
diff --git a/spec/oga/parser/html/doctype_spec.rb b/spec/oga/parser/html/doctype_spec.rb
new file mode 100644
index 0000000..be85d22
--- /dev/null
+++ b/spec/oga/parser/html/doctype_spec.rb
@@ -0,0 +1,18 @@
+require 'spec_helper'
+
+describe Oga::Parser::HTML do
+  context 'doctypes' do
+    example 'parse the HTML5 doctype' do
+      doctype = '<!DOCTYPE html>'
+
+      parse_html(doctype).should == s( :document, s(:doctype, doctype))
+    end
+
+    example 'parse an HTML 4 strict doctype' do
+      doctype = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' \
+        '"http://www.w3.org/TR/html4/strict.dtd">'
+
+      parse_html(doctype).should == s(:document, s(:doctype, doctype))
+    end
+  end
+end
diff --git a/spec/support/parsing.rb b/spec/support/parsing.rb
index 5da78df..72c81d5 100644
--- a/spec/support/parsing.rb
+++ b/spec/support/parsing.rb
@@ -20,5 +20,15 @@ module Oga
     def lex(input)
       return Oga::Lexer.new.lex(input)
     end
+
+    ##
+    # Parses the given HTML and returns an AST.
+    #
+    # @param [String] input
+    # @return [Oga::AST::Node]
+    #
+    def parse_html(input)
+      return Oga::Parser::HTML.new.parse(input)
+    end
   end # ParsingHelpers
 end # Oga