Moved various rules around for the XML lexer.

This moves the element related rules to the element_head machine (where they belong). This in turn makes it possible to lex ">" as a text node, previously this was impossible.
2014-05-21 00:04:53 +02:00 · 2014-05-21 00:04:53 +02:00 · c56b0395e4
parent feaf28d423
commit c56b0395e4
2 changed files with 19 additions and 20 deletions
--- a/ext/ragel/base_lexer.rl
+++ b/ext/ragel/base_lexer.rl
@ -183,15 +183,20 @@
        # Attribute values.
        string => emit_string;
-        # The closing character of the open tag.
+        # We're done with the open tag of the element.
-        ('>' | '/') => {
+        '>' => {
-            fhold;
+            callback_simple("on_element_open_end");
            fret;
        };
        # Self closing tags.
        '/>' => {
            callback_simple("on_element_end");
            fret;
        };
    *|;
    main := |*
        '<'            => start_element;
        doctype_start  => start_doctype;
        xml_decl_start => start_xml_decl;
@ -218,25 +223,17 @@
            callback("on_cdata", data, encoding, ts + 9, te - 3);
        };
-        # Enter the body of the tag. If HTML mode is enabled and the current
+        # The start of an element.
-        # element is a void element we'll close it and bail out.
+        '<' => start_element;
        '>' => {
            callback_simple("on_element_open_end");
        };
        # Regular closing tags.
        '</' identifier '>' => {
            callback_simple("on_element_end");
        };
-        # Self closing elements that are not handled by the HTML mode.
+        # Treat everything else, except for "<", as regular text. The "<" sign
-        '/>' => {
+        # is used for tags so we can't emit text nodes for these characters.
-            callback_simple("on_element_end");
+        ^'<'+ => {
        };
        # Note that this rule should be declared at the very bottom as it
        # will otherwise take precedence over the other rules.
        ^('<' | '>')+ => {
            callback("on_text", data, encoding, ts, te);
        };
    *|;
--- a/spec/oga/xml/lexer/general_spec.rb
+++ b/spec/oga/xml/lexer/general_spec.rb
@ -15,9 +15,11 @@ describe Oga::XML::Lexer do
    end
    example 'lex text followed by a newline' do
-      lex("foo\n").should == [
+      lex("foo\n").should == [[:T_TEXT, "foo\n", 1]]
-        [:T_TEXT, "foo\n", 1]
+    end
-      ]
+
    example 'lex a > as regular text' do
      lex('>').should == [[:T_TEXT, '>', 1]]
    end
  end
 end