diff --git a/ext/c/lexer.rl b/ext/c/lexer.rl
index 0defe64..73039dd 100644
--- a/ext/c/lexer.rl
+++ b/ext/c/lexer.rl
@@ -17,7 +17,13 @@ on `ts` and `te`) so the macro ignores this argument.
liboga_xml_lexer_callback_simple(self, name);
#define advance_line(amount) \
- rb_funcall(self, rb_intern("advance_line"), 1, INT2NUM(amount));
+ rb_funcall(self, id_advance_line, 1, INT2NUM(amount));
+
+#define inside_html_script_p() \
+ rb_funcall(self, id_inside_html_script_p, 0) == Qtrue
+
+ID id_advance_line;
+ID id_inside_html_script_p;
%%machine c_lexer;
@@ -167,6 +173,9 @@ void Init_liboga_xml_lexer()
VALUE mXML = rb_const_get(mOga, rb_intern("XML"));
VALUE cLexer = rb_define_class_under(mXML, "Lexer", rb_cObject);
+ id_advance_line = rb_intern("advance_line");
+ id_inside_html_script_p = rb_intern("inside_html_script?");
+
rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
diff --git a/ext/java/org/liboga/xml/Lexer.rl b/ext/java/org/liboga/xml/Lexer.rl
index d746dd2..a359234 100644
--- a/ext/java/org/liboga/xml/Lexer.rl
+++ b/ext/java/org/liboga/xml/Lexer.rl
@@ -185,6 +185,17 @@ public class Lexer extends RubyObject
this.callMethod(context, "advance_line", lines);
}
+
+ /**
+ * Returns true if we're in an HTML script tag. See
+ * Oga::XML::Lexer#inside_html_script? for more information.
+ */
+ public Boolean inside_html_script_p()
+ {
+ ThreadContext context = this.runtime.getCurrentContext();
+
+ return this.callMethod(context, "inside_html_script?").isTrue();
+ }
}
%%{
diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl
index 305f9e2..9b107d9 100644
--- a/ext/ragel/base_lexer.rl
+++ b/ext/ragel/base_lexer.rl
@@ -327,7 +327,17 @@
# We're done with the open tag of the element.
'>' => {
callback_simple(id_on_element_open_end);
- fnext main;
+
+ if ( inside_html_script_p() )
+ {
+ mark = ts + 1;
+
+ fnext script_text;
+ }
+ else
+ {
+ fnext main;
+ }
};
# Self closing tags.
@@ -391,6 +401,30 @@
};
*|;
+ # ". As a result of this we can't use the regular text
+ # machine.
+ script_text := |*
+ '' => {
+ callback(id_on_text, data, encoding, mark, ts);
+
+ mark = 0;
+
+ if ( lines > 0 )
+ {
+ advance_line(lines);
+
+ lines = 0;
+ }
+
+ callback_simple(id_on_element_end);
+
+ fnext main;
+ };
+
+ any $count_newlines;
+ *|;
+
# The main machine aka the entry point of Ragel.
main := |*
doctype_start => start_doctype;
diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb
index fa1757c..14f0784 100644
--- a/lib/oga/xml/lexer.rb
+++ b/lib/oga/xml/lexer.rb
@@ -40,6 +40,14 @@ module Oga
class Lexer
attr_reader :html
+ ##
+ # Element name used to determine if a tag being processed is a Javascript
+ # tag.
+ #
+ # @return [String]
+ #
+ SCRIPT_TAG = 'script'.freeze
+
##
# @param [String|IO] data The data to lex. This can either be a String or
# an IO instance.
@@ -181,6 +189,15 @@ module Oga
return @elements.last
end
+ ##
+ # Returns true if the current element is the HTML `