diff --git a/ext/c/lexer.rl b/ext/c/lexer.rl
index 73039dd..9b50826 100644
--- a/ext/c/lexer.rl
+++ b/ext/c/lexer.rl
@@ -19,11 +19,11 @@ on `ts` and `te`) so the macro ignores this argument.
#define advance_line(amount) \
rb_funcall(self, id_advance_line, 1, INT2NUM(amount));
-#define inside_html_script_p() \
- rb_funcall(self, id_inside_html_script_p, 0) == Qtrue
+#define literal_html_element_p() \
+ rb_funcall(self, id_literal_html_element_p, 0) == Qtrue
ID id_advance_line;
-ID id_inside_html_script_p;
+ID id_literal_html_element_p;
%%machine c_lexer;
@@ -173,8 +173,8 @@ void Init_liboga_xml_lexer()
VALUE mXML = rb_const_get(mOga, rb_intern("XML"));
VALUE cLexer = rb_define_class_under(mXML, "Lexer", rb_cObject);
- id_advance_line = rb_intern("advance_line");
- id_inside_html_script_p = rb_intern("inside_html_script?");
+ id_advance_line = rb_intern("advance_line");
+ id_literal_html_element_p = rb_intern("literal_html_element?");
rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
diff --git a/ext/java/org/liboga/xml/Lexer.rl b/ext/java/org/liboga/xml/Lexer.rl
index a359234..ae3a6eb 100644
--- a/ext/java/org/liboga/xml/Lexer.rl
+++ b/ext/java/org/liboga/xml/Lexer.rl
@@ -187,14 +187,13 @@ public class Lexer extends RubyObject
}
/**
- * Returns true if we're in an HTML script tag. See
- * Oga::XML::Lexer#inside_html_script? for more information.
+ * See * Oga::XML::Lexer#literal_html_element? for more information.
*/
- public Boolean inside_html_script_p()
+ public Boolean literal_html_element_p()
{
ThreadContext context = this.runtime.getCurrentContext();
- return this.callMethod(context, "inside_html_script?").isTrue();
+ return this.callMethod(context, "literal_html_element?").isTrue();
}
}
diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl
index 9b107d9..0b47ad3 100644
--- a/ext/ragel/base_lexer.rl
+++ b/ext/ragel/base_lexer.rl
@@ -328,11 +328,11 @@
'>' => {
callback_simple(id_on_element_open_end);
- if ( inside_html_script_p() )
+ if ( literal_html_element_p() )
{
mark = ts + 1;
- fnext script_text;
+ fnext literal_html_element;
}
else
{
@@ -401,11 +401,11 @@
};
*|;
- # ". As a result of this we can't use the regular text
- # machine.
- script_text := |*
- '' => {
+ # Certain tags in HTML can contain basically anything except for the literal
+ # closing tag. Two examples are script and style tags. As a result of this
+ # we can't use the regular text machine.
+ literal_html_element := |*
+ '' | '' => {
callback(id_on_text, data, encoding, mark, ts);
mark = 0;
diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb
index 14f0784..3d2495e 100644
--- a/lib/oga/xml/lexer.rb
+++ b/lib/oga/xml/lexer.rb
@@ -41,12 +41,11 @@ module Oga
attr_reader :html
##
- # Element name used to determine if a tag being processed is a Javascript
- # tag.
+ # Names of HTML tags of which the content should be lexed as-is.
#
- # @return [String]
+ # @return [Array]
#
- SCRIPT_TAG = 'script'.freeze
+ LITERAL_HTML_ELEMENTS = %w{script style}
##
# @param [String|IO] data The data to lex. This can either be a String or
@@ -190,12 +189,12 @@ module Oga
end
##
- # Returns true if the current element is the HTML `', :html => true).should == [
[:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, 'script', 1],
diff --git a/spec/oga/xml/lexer/html_style_spec.rb b/spec/oga/xml/lexer/html_style_spec.rb
new file mode 100644
index 0000000..cfa3907
--- /dev/null
+++ b/spec/oga/xml/lexer/html_style_spec.rb
@@ -0,0 +1,14 @@
+require 'spec_helper'
+
+describe Oga::XML::Lexer do
+ describe 'HTML style elements' do
+ it 'treats the content of a style tag as plain text' do
+ lex('', :html => true).should == [
+ [:T_ELEM_START, nil, 1],
+ [:T_ELEM_NAME, 'style', 1],
+ [:T_TEXT, 'foo y'
end
end
+
+ describe 'inside an HTML