Fixed lexing of XML CDATA tags.

This commit is contained in:
Yorick Peterse 2014-08-15 20:47:58 +02:00
parent 81edce2eb8
commit 4e8cca258c
2 changed files with 35 additions and 5 deletions

View File

@ -76,7 +76,27 @@
# In HTML CDATA tags have no meaning/are not supported. Oga does # In HTML CDATA tags have no meaning/are not supported. Oga does
# support them but treats their contents as plain text. # support them but treats their contents as plain text.
# #
cdata = '<![CDATA[' any* ']]>';
cdata_start = '<![CDATA[';
cdata_end = ']]>';
action start_cdata {
mark = ts + 9;
fnext cdata_body;
}
cdata_body := |*
cdata_end => {
callback("on_cdata", data, encoding, mark, te - 3);
mark = 0;
fnext main;
};
any;
*|;
# Strings # Strings
# #
@ -236,10 +256,7 @@
doctype_start => start_doctype; doctype_start => start_doctype;
xml_decl_start => start_xml_decl; xml_decl_start => start_xml_decl;
comment_start => start_comment; comment_start => start_comment;
cdata_start => start_cdata;
cdata => {
callback("on_cdata", data, encoding, ts + 9, te - 3);
};
# The start of an element. # The start of an element.
'<' => start_element; '<' => start_element;

View File

@ -13,5 +13,18 @@ describe Oga::XML::Lexer do
example 'lex double brackets inside a CDATA tag' do example 'lex double brackets inside a CDATA tag' do
lex('<![CDATA[]]]]>').should == [[:T_CDATA, ']]', 1]] lex('<![CDATA[]]]]>').should == [[:T_CDATA, ']]', 1]]
end end
example 'lex two CDATA tags following each other' do
lex('<a><![CDATA[foo]]><b><![CDATA[bar]]></b></a>').should == [
[:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, 'a', 1],
[:T_CDATA, 'foo', 1],
[:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, 'b', 1],
[:T_CDATA, 'bar', 1],
[:T_ELEM_END, nil, 1],
[:T_ELEM_END, nil, 1]
]
end
end end
end end