diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 3f59b46..ab043d4 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -98,6 +98,40 @@ any; *|; + # Processing Instructions + # + # http://www.w3.org/TR/xpath/#section-Processing-Instruction-Nodes + # http://en.wikipedia.org/wiki/Processing_Instruction + # + # These are tags meant to be used by parsers/libraries for custom behaviour. + # One example are the tags used by PHP: . Note that the XML + # declaration tags () are not considered to be a processing + # instruction. + # + + proc_ins_start = ''; + + action start_proc_ins { + callback_simple("on_proc_ins_start"); + callback("on_proc_ins_name", data, encoding, ts + 2, te); + + mark = te; + + fnext proc_ins_body; + } + + proc_ins_body := |* + proc_ins_end => { + callback("on_text", data, encoding, mark, ts); + callback_simple("on_proc_ins_end"); + + fnext main; + }; + + any; + *|; + # Strings # # Strings in HTML can either be single or double quoted. If a string @@ -257,6 +291,7 @@ xml_decl_start => start_xml_decl; comment_start => start_comment; cdata_start => start_cdata; + proc_ins_start => start_proc_ins; # The start of an element. '<' => start_element; diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index 485fa17..655d36e 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -300,6 +300,29 @@ module Oga add_token(:T_ELEM_START) end + ## + # Called on the start of a processing instruction. + # + def on_proc_ins_start + add_token(:T_PROC_INS_START) + end + + ## + # Called on a processing instruction name. + # + # @param [String] value + # + def on_proc_ins_name(value) + add_token(:T_PROC_INS_NAME, value) + end + + ## + # Called on the end of a processing instruction. + # + def on_proc_ins_end + add_token(:T_PROC_INS_END) + end + ## # Called on the name of an element. # diff --git a/spec/oga/xml/lexer/processing_instruction_spec.rb b/spec/oga/xml/lexer/processing_instruction_spec.rb new file mode 100644 index 0000000..80648e8 --- /dev/null +++ b/spec/oga/xml/lexer/processing_instruction_spec.rb @@ -0,0 +1,22 @@ +require 'spec_helper' + +describe Oga::XML::Lexer do + context 'processing instructions' do + example 'lex a processing instruction' do + lex('').should == [ + [:T_PROC_INS_START, nil, 1], + [:T_PROC_INS_NAME, 'foo', 1], + [:T_PROC_INS_END, nil, 1] + ] + end + + example 'lex a processing instruction containing text' do + lex('').should == [ + [:T_PROC_INS_START, nil, 1], + [:T_PROC_INS_NAME, 'foo', 1], + [:T_TEXT, ' bar ', 1], + [:T_PROC_INS_END, nil, 1] + ] + end + end +end