Moved various rules around for the XML lexer.

This moves the element related rules to the element_head machine (where they
belong). This in turn makes it possible to lex ">" as a text node, previously
this was impossible.
This commit is contained in:
Yorick Peterse 2014-05-21 00:04:53 +02:00
parent feaf28d423
commit c56b0395e4
2 changed files with 19 additions and 20 deletions

View File

@ -183,15 +183,20 @@
# Attribute values. # Attribute values.
string => emit_string; string => emit_string;
# The closing character of the open tag. # We're done with the open tag of the element.
('>' | '/') => { '>' => {
fhold; callback_simple("on_element_open_end");
fret;
};
# Self closing tags.
'/>' => {
callback_simple("on_element_end");
fret; fret;
}; };
*|; *|;
main := |* main := |*
'<' => start_element;
doctype_start => start_doctype; doctype_start => start_doctype;
xml_decl_start => start_xml_decl; xml_decl_start => start_xml_decl;
@ -218,25 +223,17 @@
callback("on_cdata", data, encoding, ts + 9, te - 3); callback("on_cdata", data, encoding, ts + 9, te - 3);
}; };
# Enter the body of the tag. If HTML mode is enabled and the current # The start of an element.
# element is a void element we'll close it and bail out. '<' => start_element;
'>' => {
callback_simple("on_element_open_end");
};
# Regular closing tags. # Regular closing tags.
'</' identifier '>' => { '</' identifier '>' => {
callback_simple("on_element_end"); callback_simple("on_element_end");
}; };
# Self closing elements that are not handled by the HTML mode. # Treat everything else, except for "<", as regular text. The "<" sign
'/>' => { # is used for tags so we can't emit text nodes for these characters.
callback_simple("on_element_end"); ^'<'+ => {
};
# Note that this rule should be declared at the very bottom as it
# will otherwise take precedence over the other rules.
^('<' | '>')+ => {
callback("on_text", data, encoding, ts, te); callback("on_text", data, encoding, ts, te);
}; };
*|; *|;

View File

@ -15,9 +15,11 @@ describe Oga::XML::Lexer do
end end
example 'lex text followed by a newline' do example 'lex text followed by a newline' do
lex("foo\n").should == [ lex("foo\n").should == [[:T_TEXT, "foo\n", 1]]
[:T_TEXT, "foo\n", 1] end
]
example 'lex a > as regular text' do
lex('>').should == [[:T_TEXT, '>', 1]]
end end
end end
end end