diff --git a/ext/c/lexer.rl b/ext/c/lexer.rl index 73039dd..9b50826 100644 --- a/ext/c/lexer.rl +++ b/ext/c/lexer.rl @@ -19,11 +19,11 @@ on `ts` and `te`) so the macro ignores this argument. #define advance_line(amount) \ rb_funcall(self, id_advance_line, 1, INT2NUM(amount)); -#define inside_html_script_p() \ - rb_funcall(self, id_inside_html_script_p, 0) == Qtrue +#define literal_html_element_p() \ + rb_funcall(self, id_literal_html_element_p, 0) == Qtrue ID id_advance_line; -ID id_inside_html_script_p; +ID id_literal_html_element_p; %%machine c_lexer; @@ -173,8 +173,8 @@ void Init_liboga_xml_lexer() VALUE mXML = rb_const_get(mOga, rb_intern("XML")); VALUE cLexer = rb_define_class_under(mXML, "Lexer", rb_cObject); - id_advance_line = rb_intern("advance_line"); - id_inside_html_script_p = rb_intern("inside_html_script?"); + id_advance_line = rb_intern("advance_line"); + id_literal_html_element_p = rb_intern("literal_html_element?"); rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1); rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0); diff --git a/ext/java/org/liboga/xml/Lexer.rl b/ext/java/org/liboga/xml/Lexer.rl index a359234..ae3a6eb 100644 --- a/ext/java/org/liboga/xml/Lexer.rl +++ b/ext/java/org/liboga/xml/Lexer.rl @@ -187,14 +187,13 @@ public class Lexer extends RubyObject } /** - * Returns true if we're in an HTML script tag. See - * Oga::XML::Lexer#inside_html_script? for more information. + * See * Oga::XML::Lexer#literal_html_element? for more information. */ - public Boolean inside_html_script_p() + public Boolean literal_html_element_p() { ThreadContext context = this.runtime.getCurrentContext(); - return this.callMethod(context, "inside_html_script?").isTrue(); + return this.callMethod(context, "literal_html_element?").isTrue(); } } diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 9b107d9..0b47ad3 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -328,11 +328,11 @@ '>' => { callback_simple(id_on_element_open_end); - if ( inside_html_script_p() ) + if ( literal_html_element_p() ) { mark = ts + 1; - fnext script_text; + fnext literal_html_element; } else { @@ -401,11 +401,11 @@ }; *|; - # ". As a result of this we can't use the regular text - # machine. - script_text := |* - '' => { + # Certain tags in HTML can contain basically anything except for the literal + # closing tag. Two examples are script and style tags. As a result of this + # we can't use the regular text machine. + literal_html_element := |* + '' | '' => { callback(id_on_text, data, encoding, mark, ts); mark = 0; diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index 14f0784..3d2495e 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -41,12 +41,11 @@ module Oga attr_reader :html ## - # Element name used to determine if a tag being processed is a Javascript - # tag. + # Names of HTML tags of which the content should be lexed as-is. # - # @return [String] + # @return [Array] # - SCRIPT_TAG = 'script'.freeze + LITERAL_HTML_ELEMENTS = %w{script style} ## # @param [String|IO] data The data to lex. This can either be a String or @@ -190,12 +189,12 @@ module Oga end ## - # Returns true if the current element is the HTML `', :html => true).should == [ [:T_ELEM_START, nil, 1], [:T_ELEM_NAME, 'script', 1], diff --git a/spec/oga/xml/lexer/html_style_spec.rb b/spec/oga/xml/lexer/html_style_spec.rb new file mode 100644 index 0000000..cfa3907 --- /dev/null +++ b/spec/oga/xml/lexer/html_style_spec.rb @@ -0,0 +1,14 @@ +require 'spec_helper' + +describe Oga::XML::Lexer do + describe 'HTML style elements' do + it 'treats the content of a style tag as plain text' do + lex('', :html => true).should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'style', 1], + [:T_TEXT, 'foo y' end end + + describe 'inside an HTML