diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 6c0c333..3d00f56 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -120,21 +120,6 @@ cdata_start = ''; - action start_cdata { - callback_simple("on_cdata_start"); - fcall cdata; - } - - # Machine that for processing the contents of CDATA tags. Everything - # inside a CDATA tag is treated as plain text. - cdata := |* - any* cdata_end => { - callback("on_text", data, encoding, ts, te - 3); - callback_simple("on_cdata_end"); - fret; - }; - *|; - # Comments # # http://www.w3.org/TR/html-markup/syntax.html#comments @@ -248,6 +233,10 @@ callback("on_comment", data, encoding, ts + 4, te - 3); }; + cdata_start any* cdata_end => { + callback("on_cdata", data, encoding, ts + 9, te - 3); + }; + # Enter the body of the tag. If HTML mode is enabled and the current # element is a void element we'll close it and bail out. '>' => { diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index bf15f4b..e58c452 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -203,17 +203,10 @@ module Oga end ## - # Called on the start of a CDATA tag. + # Called on a CDATA tag. # - def on_cdata_start - add_token(:T_CDATA_START) - end - - ## - # Called on the end of a CDATA tag. - # - def on_cdata_end - add_token(:T_CDATA_END) + def on_cdata(value) + add_token(:T_CDATA, value) end ## diff --git a/lib/oga/xml/parser.y b/lib/oga/xml/parser.y index d444512..118c906 100644 --- a/lib/oga/xml/parser.y +++ b/lib/oga/xml/parser.y @@ -12,7 +12,7 @@ class Oga::XML::Parser token T_STRING T_TEXT token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME token T_DOCTYPE_INLINE -token T_CDATA_START T_CDATA_END T_COMMENT +token T_CDATA T_COMMENT token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR token T_XML_DECL_START T_XML_DECL_END @@ -81,11 +81,8 @@ rule # CDATA tags cdata - # - : T_CDATA_START T_CDATA_END { on_cdata } - # - | T_CDATA_START T_TEXT T_CDATA_END { on_cdata(val[1]) } + | T_CDATA { on_cdata(val[0]) } ; # Comments diff --git a/spec/oga/xml/lexer/cdata_spec.rb b/spec/oga/xml/lexer/cdata_spec.rb index f4465dd..0e0887b 100644 --- a/spec/oga/xml/lexer/cdata_spec.rb +++ b/spec/oga/xml/lexer/cdata_spec.rb @@ -3,27 +3,15 @@ require 'spec_helper' describe Oga::XML::Lexer do context 'cdata tags' do example 'lex a cdata tag' do - lex('').should == [ - [:T_CDATA_START, nil, 1], - [:T_TEXT, 'foo', 1], - [:T_CDATA_END, nil, 1] - ] + lex('').should == [[:T_CDATA, 'foo', 1]] end example 'lex tags inside CDATA tags as regular text' do - lex('Foo

]]>').should == [ - [:T_CDATA_START, nil, 1], - [:T_TEXT, '

Foo

', 1], - [:T_CDATA_END, nil, 1] - ] + lex('Foo

]]>').should == [[:T_CDATA, '

Foo

', 1]] end example 'lex double brackets inside a CDATA tag' do - lex('').should == [ - [:T_CDATA_START, nil, 1], - [:T_TEXT, ']]', 1], - [:T_CDATA_END, nil, 1] - ] + lex('').should == [[:T_CDATA, ']]', 1]] end end end