Merge multiple CDATA tokens into a single token.
The tokens T_CDATA_START, T_TEXT and T_CDATA_END have been merged together into T_CDATA.
This commit is contained in:
parent
a4fb5c1299
commit
cd0f3380c4
|
@ -120,21 +120,6 @@
|
||||||
cdata_start = '<![CDATA[';
|
cdata_start = '<![CDATA[';
|
||||||
cdata_end = ']]>';
|
cdata_end = ']]>';
|
||||||
|
|
||||||
action start_cdata {
|
|
||||||
callback_simple("on_cdata_start");
|
|
||||||
fcall cdata;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Machine that for processing the contents of CDATA tags. Everything
|
|
||||||
# inside a CDATA tag is treated as plain text.
|
|
||||||
cdata := |*
|
|
||||||
any* cdata_end => {
|
|
||||||
callback("on_text", data, encoding, ts, te - 3);
|
|
||||||
callback_simple("on_cdata_end");
|
|
||||||
fret;
|
|
||||||
};
|
|
||||||
*|;
|
|
||||||
|
|
||||||
# Comments
|
# Comments
|
||||||
#
|
#
|
||||||
# http://www.w3.org/TR/html-markup/syntax.html#comments
|
# http://www.w3.org/TR/html-markup/syntax.html#comments
|
||||||
|
@ -248,6 +233,10 @@
|
||||||
callback("on_comment", data, encoding, ts + 4, te - 3);
|
callback("on_comment", data, encoding, ts + 4, te - 3);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
cdata_start any* cdata_end => {
|
||||||
|
callback("on_cdata", data, encoding, ts + 9, te - 3);
|
||||||
|
};
|
||||||
|
|
||||||
# Enter the body of the tag. If HTML mode is enabled and the current
|
# Enter the body of the tag. If HTML mode is enabled and the current
|
||||||
# element is a void element we'll close it and bail out.
|
# element is a void element we'll close it and bail out.
|
||||||
'>' => {
|
'>' => {
|
||||||
|
|
|
@ -203,17 +203,10 @@ module Oga
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Called on the start of a CDATA tag.
|
# Called on a CDATA tag.
|
||||||
#
|
#
|
||||||
def on_cdata_start
|
def on_cdata(value)
|
||||||
add_token(:T_CDATA_START)
|
add_token(:T_CDATA, value)
|
||||||
end
|
|
||||||
|
|
||||||
##
|
|
||||||
# Called on the end of a CDATA tag.
|
|
||||||
#
|
|
||||||
def on_cdata_end
|
|
||||||
add_token(:T_CDATA_END)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
|
|
|
@ -12,7 +12,7 @@ class Oga::XML::Parser
|
||||||
token T_STRING T_TEXT
|
token T_STRING T_TEXT
|
||||||
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
|
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME
|
||||||
token T_DOCTYPE_INLINE
|
token T_DOCTYPE_INLINE
|
||||||
token T_CDATA_START T_CDATA_END T_COMMENT
|
token T_CDATA T_COMMENT
|
||||||
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
|
token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR
|
||||||
token T_XML_DECL_START T_XML_DECL_END
|
token T_XML_DECL_START T_XML_DECL_END
|
||||||
|
|
||||||
|
@ -81,11 +81,8 @@ rule
|
||||||
# CDATA tags
|
# CDATA tags
|
||||||
|
|
||||||
cdata
|
cdata
|
||||||
# <![CDATA[]]>
|
|
||||||
: T_CDATA_START T_CDATA_END { on_cdata }
|
|
||||||
|
|
||||||
# <![CDATA[foo]]>
|
# <![CDATA[foo]]>
|
||||||
| T_CDATA_START T_TEXT T_CDATA_END { on_cdata(val[1]) }
|
| T_CDATA { on_cdata(val[0]) }
|
||||||
;
|
;
|
||||||
|
|
||||||
# Comments
|
# Comments
|
||||||
|
|
|
@ -3,27 +3,15 @@ require 'spec_helper'
|
||||||
describe Oga::XML::Lexer do
|
describe Oga::XML::Lexer do
|
||||||
context 'cdata tags' do
|
context 'cdata tags' do
|
||||||
example 'lex a cdata tag' do
|
example 'lex a cdata tag' do
|
||||||
lex('<![CDATA[foo]]>').should == [
|
lex('<![CDATA[foo]]>').should == [[:T_CDATA, 'foo', 1]]
|
||||||
[:T_CDATA_START, nil, 1],
|
|
||||||
[:T_TEXT, 'foo', 1],
|
|
||||||
[:T_CDATA_END, nil, 1]
|
|
||||||
]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex tags inside CDATA tags as regular text' do
|
example 'lex tags inside CDATA tags as regular text' do
|
||||||
lex('<![CDATA[<p>Foo</p>]]>').should == [
|
lex('<![CDATA[<p>Foo</p>]]>').should == [[:T_CDATA, '<p>Foo</p>', 1]]
|
||||||
[:T_CDATA_START, nil, 1],
|
|
||||||
[:T_TEXT, '<p>Foo</p>', 1],
|
|
||||||
[:T_CDATA_END, nil, 1]
|
|
||||||
]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex double brackets inside a CDATA tag' do
|
example 'lex double brackets inside a CDATA tag' do
|
||||||
lex('<![CDATA[]]]]>').should == [
|
lex('<![CDATA[]]]]>').should == [[:T_CDATA, ']]', 1]]
|
||||||
[:T_CDATA_START, nil, 1],
|
|
||||||
[:T_TEXT, ']]', 1],
|
|
||||||
[:T_CDATA_END, nil, 1]
|
|
||||||
]
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue