Lexing of XML processing instructions.

This commit is contained in:
Yorick Peterse 2014-08-15 22:04:45 +02:00
parent ccd95d69d8
commit 8f4eaf3823
3 changed files with 80 additions and 0 deletions

View File

@ -98,6 +98,40 @@
any;
*|;
# Processing Instructions
#
# http://www.w3.org/TR/xpath/#section-Processing-Instruction-Nodes
# http://en.wikipedia.org/wiki/Processing_Instruction
#
# These are tags meant to be used by parsers/libraries for custom behaviour.
# One example are the tags used by PHP: <?php and ?>. Note that the XML
# declaration tags (<?xml ?>) are not considered to be a processing
# instruction.
#
proc_ins_start = '<?' identifier;
proc_ins_end = '?>';
action start_proc_ins {
callback_simple("on_proc_ins_start");
callback("on_proc_ins_name", data, encoding, ts + 2, te);
mark = te;
fnext proc_ins_body;
}
proc_ins_body := |*
proc_ins_end => {
callback("on_text", data, encoding, mark, ts);
callback_simple("on_proc_ins_end");
fnext main;
};
any;
*|;
# Strings
#
# Strings in HTML can either be single or double quoted. If a string
@ -257,6 +291,7 @@
xml_decl_start => start_xml_decl;
comment_start => start_comment;
cdata_start => start_cdata;
proc_ins_start => start_proc_ins;
# The start of an element.
'<' => start_element;

View File

@ -300,6 +300,29 @@ module Oga
add_token(:T_ELEM_START)
end
##
# Called on the start of a processing instruction.
#
def on_proc_ins_start
add_token(:T_PROC_INS_START)
end
##
# Called on a processing instruction name.
#
# @param [String] value
#
def on_proc_ins_name(value)
add_token(:T_PROC_INS_NAME, value)
end
##
# Called on the end of a processing instruction.
#
def on_proc_ins_end
add_token(:T_PROC_INS_END)
end
##
# Called on the name of an element.
#

View File

@ -0,0 +1,22 @@
require 'spec_helper'
describe Oga::XML::Lexer do
context 'processing instructions' do
example 'lex a processing instruction' do
lex('<?foo?>').should == [
[:T_PROC_INS_START, nil, 1],
[:T_PROC_INS_NAME, 'foo', 1],
[:T_PROC_INS_END, nil, 1]
]
end
example 'lex a processing instruction containing text' do
lex('<?foo bar ?>').should == [
[:T_PROC_INS_START, nil, 1],
[:T_PROC_INS_NAME, 'foo', 1],
[:T_TEXT, ' bar ', 1],
[:T_PROC_INS_END, nil, 1]
]
end
end
end