Lex processing instructions in chunks
Similar to comments (ea8b4aa92f
) and CDATA tags (8acc7fc743
) processing instructions are now lexed in separate chunks _with_ proper support for streaming input. Related issue: #93
This commit is contained in:
parent
ea8b4aa92f
commit
b2ea20ba61
|
@ -109,6 +109,7 @@ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
|
||||||
ID id_on_proc_ins_end = rb_intern("on_proc_ins_end");
|
ID id_on_proc_ins_end = rb_intern("on_proc_ins_end");
|
||||||
ID id_on_proc_ins_name = rb_intern("on_proc_ins_name");
|
ID id_on_proc_ins_name = rb_intern("on_proc_ins_name");
|
||||||
ID id_on_proc_ins_start = rb_intern("on_proc_ins_start");
|
ID id_on_proc_ins_start = rb_intern("on_proc_ins_start");
|
||||||
|
ID id_on_proc_ins_body = rb_intern("on_proc_ins_body");
|
||||||
ID id_on_string_body = rb_intern("on_string_body");
|
ID id_on_string_body = rb_intern("on_string_body");
|
||||||
ID id_on_string_dquote = rb_intern("on_string_dquote");
|
ID id_on_string_dquote = rb_intern("on_string_dquote");
|
||||||
ID id_on_string_squote = rb_intern("on_string_squote");
|
ID id_on_string_squote = rb_intern("on_string_squote");
|
||||||
|
|
|
@ -123,6 +123,7 @@ public class Lexer extends RubyObject
|
||||||
String id_on_proc_ins_end = "on_proc_ins_end";
|
String id_on_proc_ins_end = "on_proc_ins_end";
|
||||||
String id_on_proc_ins_name = "on_proc_ins_name";
|
String id_on_proc_ins_name = "on_proc_ins_name";
|
||||||
String id_on_proc_ins_start = "on_proc_ins_start";
|
String id_on_proc_ins_start = "on_proc_ins_start";
|
||||||
|
String id_on_proc_ins_body = "on_proc_ins_body";
|
||||||
String id_on_string_body = "on_string_body";
|
String id_on_string_body = "on_string_body";
|
||||||
String id_on_string_dquote = "on_string_dquote";
|
String id_on_string_dquote = "on_string_dquote";
|
||||||
String id_on_string_squote = "on_string_squote";
|
String id_on_string_squote = "on_string_squote";
|
||||||
|
|
|
@ -149,26 +149,33 @@
|
||||||
proc_ins_start = '<?' identifier;
|
proc_ins_start = '<?' identifier;
|
||||||
proc_ins_end = '?>';
|
proc_ins_end = '?>';
|
||||||
|
|
||||||
|
# Everything except "?" OR a single "?"
|
||||||
|
proc_ins_allowed = (^'?'+ | '?') $count_newlines;
|
||||||
|
|
||||||
action start_proc_ins {
|
action start_proc_ins {
|
||||||
callback_simple(id_on_proc_ins_start);
|
callback_simple(id_on_proc_ins_start);
|
||||||
callback(id_on_proc_ins_name, data, encoding, ts + 2, te);
|
callback(id_on_proc_ins_name, data, encoding, ts + 2, te);
|
||||||
|
|
||||||
mark = te;
|
|
||||||
|
|
||||||
fnext proc_ins_body;
|
fnext proc_ins_body;
|
||||||
}
|
}
|
||||||
|
|
||||||
proc_ins_body := |*
|
proc_ins_body := |*
|
||||||
proc_ins_end => {
|
proc_ins_allowed => {
|
||||||
callback(id_on_text, data, encoding, mark, ts);
|
callback(id_on_proc_ins_body, data, encoding, ts, te);
|
||||||
callback_simple(id_on_proc_ins_end);
|
|
||||||
|
|
||||||
mark = 0;
|
if ( lines > 0 )
|
||||||
|
{
|
||||||
|
advance_line(lines);
|
||||||
|
|
||||||
|
lines = 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
proc_ins_end => {
|
||||||
|
callback_simple(id_on_proc_ins_end);
|
||||||
|
|
||||||
fnext main;
|
fnext main;
|
||||||
};
|
};
|
||||||
|
|
||||||
any;
|
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
# Strings
|
# Strings
|
||||||
|
|
|
@ -344,6 +344,15 @@ module Oga
|
||||||
add_token(:T_PROC_INS_NAME, value)
|
add_token(:T_PROC_INS_NAME, value)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# Called on the body of a processing instruction.
|
||||||
|
#
|
||||||
|
# @param [String] value
|
||||||
|
#
|
||||||
|
def on_proc_ins_body(value)
|
||||||
|
add_token(:T_PROC_INS_BODY, value)
|
||||||
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Called on the end of a processing instruction.
|
# Called on the end of a processing instruction.
|
||||||
#
|
#
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
%terminals T_CDATA_START T_CDATA_BODY T_CDATA_END;
|
%terminals T_CDATA_START T_CDATA_BODY T_CDATA_END;
|
||||||
%terminals T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR T_ATTR_NS;
|
%terminals T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR T_ATTR_NS;
|
||||||
%terminals T_XML_DECL_START T_XML_DECL_END;
|
%terminals T_XML_DECL_START T_XML_DECL_END;
|
||||||
%terminals T_PROC_INS_START T_PROC_INS_NAME T_PROC_INS_END;
|
%terminals T_PROC_INS_START T_PROC_INS_NAME T_PROC_INS_BODY T_PROC_INS_END;
|
||||||
|
|
||||||
document
|
document
|
||||||
= expressions { on_document(val[0]) }
|
= expressions { on_document(val[0]) }
|
||||||
|
@ -117,12 +117,17 @@ comment_body
|
||||||
# Processing Instructions
|
# Processing Instructions
|
||||||
|
|
||||||
proc_ins
|
proc_ins
|
||||||
= T_PROC_INS_START T_PROC_INS_NAME T_TEXT? T_PROC_INS_END
|
= T_PROC_INS_START T_PROC_INS_NAME proc_ins_body T_PROC_INS_END
|
||||||
{
|
{
|
||||||
on_proc_ins(val[1], val[2])
|
on_proc_ins(val[1], val[2])
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
proc_ins_body
|
||||||
|
= T_PROC_INS_BODY proc_ins_body { val[0] + val[1] }
|
||||||
|
| _ { '' }
|
||||||
|
;
|
||||||
|
|
||||||
# Elements
|
# Elements
|
||||||
|
|
||||||
element_name_ns
|
element_name_ns
|
||||||
|
|
|
@ -2,7 +2,7 @@ require 'spec_helper'
|
||||||
|
|
||||||
describe Oga::XML::Lexer do
|
describe Oga::XML::Lexer do
|
||||||
describe 'processing instructions' do
|
describe 'processing instructions' do
|
||||||
it 'lexes a processing instruction' do
|
it 'lexes an instruction' do
|
||||||
lex('<?foo?>').should == [
|
lex('<?foo?>').should == [
|
||||||
[:T_PROC_INS_START, nil, 1],
|
[:T_PROC_INS_START, nil, 1],
|
||||||
[:T_PROC_INS_NAME, 'foo', 1],
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
|
@ -10,13 +10,95 @@ describe Oga::XML::Lexer do
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'lexes a processing instruction containing text' do
|
it 'lexes an instruction containing text' do
|
||||||
lex('<?foo bar ?>').should == [
|
lex('<?foo bar ?>').should == [
|
||||||
[:T_PROC_INS_START, nil, 1],
|
[:T_PROC_INS_START, nil, 1],
|
||||||
[:T_PROC_INS_NAME, 'foo', 1],
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
[:T_TEXT, ' bar ', 1],
|
[:T_PROC_INS_BODY, ' bar ', 1],
|
||||||
[:T_PROC_INS_END, nil, 1]
|
[:T_PROC_INS_END, nil, 1]
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'lexes an instruction containing a ?' do
|
||||||
|
lex('<?foo ? ?>').should == [
|
||||||
|
[:T_PROC_INS_START, nil, 1],
|
||||||
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
|
[:T_PROC_INS_BODY, ' ', 1],
|
||||||
|
[:T_PROC_INS_BODY, '?', 1],
|
||||||
|
[:T_PROC_INS_BODY, ' ', 1],
|
||||||
|
[:T_PROC_INS_END, nil, 1]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes two instructions following each other' do
|
||||||
|
lex('<?foo bar ?><?foo baz ?>').should == [
|
||||||
|
[:T_PROC_INS_START, nil, 1],
|
||||||
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
|
[:T_PROC_INS_BODY, ' bar ', 1],
|
||||||
|
[:T_PROC_INS_END, nil, 1],
|
||||||
|
[:T_PROC_INS_START, nil, 1],
|
||||||
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
|
[:T_PROC_INS_BODY, ' baz ', 1],
|
||||||
|
[:T_PROC_INS_END, nil, 1]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes an instruction with a newline after the name' do
|
||||||
|
lex("<?foo\nbar?>").should == [
|
||||||
|
[:T_PROC_INS_START, nil, 1],
|
||||||
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
|
[:T_PROC_INS_BODY, "\nbar", 1],
|
||||||
|
[:T_PROC_INS_END, nil, 2]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes an instruction with a newline before the closing tag' do
|
||||||
|
lex("<?foo bar\n?>").should == [
|
||||||
|
[:T_PROC_INS_START, nil, 1],
|
||||||
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
|
[:T_PROC_INS_BODY, " bar\n", 1],
|
||||||
|
[:T_PROC_INS_END, nil, 2]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes an instruction with the body surrounded by newlines' do
|
||||||
|
lex("<?foo\nbar\n?>").should == [
|
||||||
|
[:T_PROC_INS_START, nil, 1],
|
||||||
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
|
[:T_PROC_INS_BODY, "\nbar\n", 1],
|
||||||
|
[:T_PROC_INS_END, nil, 3]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
describe 'using an IO as input' do
|
||||||
|
it 'lexes an instruction with a newline after the name' do
|
||||||
|
lex_stringio("<?foo\nbar?>").should == [
|
||||||
|
[:T_PROC_INS_START, nil, 1],
|
||||||
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
|
[:T_PROC_INS_BODY, "\n", 1],
|
||||||
|
[:T_PROC_INS_BODY, "bar", 2],
|
||||||
|
[:T_PROC_INS_END, nil, 2]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes an instruction with a newline before the closing tag' do
|
||||||
|
lex_stringio("<?foo bar\n?>").should == [
|
||||||
|
[:T_PROC_INS_START, nil, 1],
|
||||||
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
|
[:T_PROC_INS_BODY, " bar\n", 1],
|
||||||
|
[:T_PROC_INS_END, nil, 2]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes an instruction with the body surrounded by newlines' do
|
||||||
|
lex_stringio("<?foo\nbar\n?>").should == [
|
||||||
|
[:T_PROC_INS_START, nil, 1],
|
||||||
|
[:T_PROC_INS_NAME, 'foo', 1],
|
||||||
|
[:T_PROC_INS_BODY, "\n", 1],
|
||||||
|
[:T_PROC_INS_BODY, "bar\n", 2],
|
||||||
|
[:T_PROC_INS_END, nil, 3]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue