diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl index 3e54650..02d45f3 100644 --- a/lib/oga/lexer.rl +++ b/lib/oga/lexer.rl @@ -376,6 +376,34 @@ module Oga any; *|; + # XML declaration tags + # + # http://www.w3.org/TR/REC-xml/#sec-prolog-dtd + # + xml_decl_start = ''; + + action start_xml_decl { + emit_buffer + add_token(:T_XML_DECL_START, nil) + + start_buffer + + fcall xml_decl; + } + + # Machine that processes the contents of an XML declaration tag. + xml_decl := |* + xml_decl_end => { + emit_buffer + add_token(:T_XML_DECL_END, nil) + + fret; + }; + + any; + *|; + # Elements # # http://www.w3.org/TR/html-markup/syntax.html#syntax-elements @@ -433,10 +461,11 @@ module Oga *|; main := |* - element_start => start_element; - doctype_start => start_doctype; - cdata_start => start_cdata; - comment_start => start_comment; + element_start => start_element; + doctype_start => start_doctype; + cdata_start => start_cdata; + comment_start => start_comment; + xml_decl_start => start_xml_decl; # Enter the body of the tag. If HTML mode is enabled and the current # element is a void element we'll close it and bail out. diff --git a/lib/oga/parser.y b/lib/oga/parser.y index 4ee206c..274499d 100644 --- a/lib/oga/parser.y +++ b/lib/oga/parser.y @@ -14,6 +14,7 @@ token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE token T_CDATA_START T_CDATA_END token T_COMMENT_START T_COMMENT_END token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR +token T_XML_DECL_START T_XML_DECL_END options no_result_var @@ -35,6 +36,7 @@ rule | comment | element | text + | xmldecl ; # Doctypes @@ -119,6 +121,11 @@ rule | T_ATTR T_STRING { s(:attribute, val[0], val[1]) } ; + # XML declarations + xmldecl + : T_XML_DECL_START T_XML_DECL_END { s(:xml_decl) } + | T_XML_DECL_START text T_XML_DECL_END { s(:xml_decl, val[1]) } + # Plain text text diff --git a/spec/oga/lexer/xml_declaration_spec.rb b/spec/oga/lexer/xml_declaration_spec.rb new file mode 100644 index 0000000..041c2d2 --- /dev/null +++ b/spec/oga/lexer/xml_declaration_spec.rb @@ -0,0 +1,24 @@ +require 'spec_helper' + +describe Oga::Lexer do + context 'XML declaration tags' do + example 'lex a start tag' do + lex('').should == [ + [:T_XML_DECL_START, nil, 1], + [:T_XML_DECL_END, nil, 1] + ] + end + + example 'lex a tag with text inside it' do + lex('').should == [ + [:T_XML_DECL_START, nil, 1], + [:T_TEXT, ' version="1.0" ', 1], + [:T_XML_DECL_END, nil, 1] + ] + end + end +end diff --git a/spec/oga/parser/xml_declaration_spec.rb b/spec/oga/parser/xml_declaration_spec.rb new file mode 100644 index 0000000..e0df820 --- /dev/null +++ b/spec/oga/parser/xml_declaration_spec.rb @@ -0,0 +1,12 @@ +require 'spec_helper' + +describe Oga::Parser do + context 'XML declaration tags' do + example 'lex an XML declaration tag' do + parse('').should == s( + :document, + s(:xml_decl, s(:text, ' hello ')) + ) + end + end +end