Lex contents of <script> tags as plain text.
When lexing input in HTML mode the lexer has to treat _all_ content of a <script> tag as plain text. This ensures that the lexer can process input such as "x <y" and "// <foo>" correctly. Fixes #70.
This commit is contained in:
parent
351b5ac004
commit
ba2177e2cf
|
@ -17,7 +17,13 @@ on `ts` and `te`) so the macro ignores this argument.
|
||||||
liboga_xml_lexer_callback_simple(self, name);
|
liboga_xml_lexer_callback_simple(self, name);
|
||||||
|
|
||||||
#define advance_line(amount) \
|
#define advance_line(amount) \
|
||||||
rb_funcall(self, rb_intern("advance_line"), 1, INT2NUM(amount));
|
rb_funcall(self, id_advance_line, 1, INT2NUM(amount));
|
||||||
|
|
||||||
|
#define inside_html_script_p() \
|
||||||
|
rb_funcall(self, id_inside_html_script_p, 0) == Qtrue
|
||||||
|
|
||||||
|
ID id_advance_line;
|
||||||
|
ID id_inside_html_script_p;
|
||||||
|
|
||||||
%%machine c_lexer;
|
%%machine c_lexer;
|
||||||
|
|
||||||
|
@ -167,6 +173,9 @@ void Init_liboga_xml_lexer()
|
||||||
VALUE mXML = rb_const_get(mOga, rb_intern("XML"));
|
VALUE mXML = rb_const_get(mOga, rb_intern("XML"));
|
||||||
VALUE cLexer = rb_define_class_under(mXML, "Lexer", rb_cObject);
|
VALUE cLexer = rb_define_class_under(mXML, "Lexer", rb_cObject);
|
||||||
|
|
||||||
|
id_advance_line = rb_intern("advance_line");
|
||||||
|
id_inside_html_script_p = rb_intern("inside_html_script?");
|
||||||
|
|
||||||
rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
|
rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
|
||||||
rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
|
rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
|
||||||
|
|
||||||
|
|
|
@ -185,6 +185,17 @@ public class Lexer extends RubyObject
|
||||||
|
|
||||||
this.callMethod(context, "advance_line", lines);
|
this.callMethod(context, "advance_line", lines);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if we're in an HTML script tag. See
|
||||||
|
* Oga::XML::Lexer#inside_html_script? for more information.
|
||||||
|
*/
|
||||||
|
public Boolean inside_html_script_p()
|
||||||
|
{
|
||||||
|
ThreadContext context = this.runtime.getCurrentContext();
|
||||||
|
|
||||||
|
return this.callMethod(context, "inside_html_script?").isTrue();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
%%{
|
%%{
|
||||||
|
|
|
@ -327,7 +327,17 @@
|
||||||
# We're done with the open tag of the element.
|
# We're done with the open tag of the element.
|
||||||
'>' => {
|
'>' => {
|
||||||
callback_simple(id_on_element_open_end);
|
callback_simple(id_on_element_open_end);
|
||||||
|
|
||||||
|
if ( inside_html_script_p() )
|
||||||
|
{
|
||||||
|
mark = ts + 1;
|
||||||
|
|
||||||
|
fnext script_text;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
fnext main;
|
fnext main;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
# Self closing tags.
|
# Self closing tags.
|
||||||
|
@ -391,6 +401,30 @@
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
|
# <script> tags in HTML can contain basically anything except for the
|
||||||
|
# literal "</script>". As a result of this we can't use the regular text
|
||||||
|
# machine.
|
||||||
|
script_text := |*
|
||||||
|
'</script>' => {
|
||||||
|
callback(id_on_text, data, encoding, mark, ts);
|
||||||
|
|
||||||
|
mark = 0;
|
||||||
|
|
||||||
|
if ( lines > 0 )
|
||||||
|
{
|
||||||
|
advance_line(lines);
|
||||||
|
|
||||||
|
lines = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
callback_simple(id_on_element_end);
|
||||||
|
|
||||||
|
fnext main;
|
||||||
|
};
|
||||||
|
|
||||||
|
any $count_newlines;
|
||||||
|
*|;
|
||||||
|
|
||||||
# The main machine aka the entry point of Ragel.
|
# The main machine aka the entry point of Ragel.
|
||||||
main := |*
|
main := |*
|
||||||
doctype_start => start_doctype;
|
doctype_start => start_doctype;
|
||||||
|
|
|
@ -40,6 +40,14 @@ module Oga
|
||||||
class Lexer
|
class Lexer
|
||||||
attr_reader :html
|
attr_reader :html
|
||||||
|
|
||||||
|
##
|
||||||
|
# Element name used to determine if a tag being processed is a Javascript
|
||||||
|
# tag.
|
||||||
|
#
|
||||||
|
# @return [String]
|
||||||
|
#
|
||||||
|
SCRIPT_TAG = 'script'.freeze
|
||||||
|
|
||||||
##
|
##
|
||||||
# @param [String|IO] data The data to lex. This can either be a String or
|
# @param [String|IO] data The data to lex. This can either be a String or
|
||||||
# an IO instance.
|
# an IO instance.
|
||||||
|
@ -181,6 +189,15 @@ module Oga
|
||||||
return @elements.last
|
return @elements.last
|
||||||
end
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# Returns true if the current element is the HTML `<script>` element.
|
||||||
|
#
|
||||||
|
# @return [TrueClass|FalseClass]
|
||||||
|
#
|
||||||
|
def inside_html_script?
|
||||||
|
return html? && current_element == SCRIPT_TAG
|
||||||
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Called when processing a single quote.
|
# Called when processing a single quote.
|
||||||
#
|
#
|
||||||
|
|
Loading…
Reference in New Issue