Moved various rules around for the XML lexer.
This moves the element related rules to the element_head machine (where they belong). This in turn makes it possible to lex ">" as a text node, previously this was impossible.
This commit is contained in:
parent
feaf28d423
commit
c56b0395e4
|
@ -183,15 +183,20 @@
|
||||||
# Attribute values.
|
# Attribute values.
|
||||||
string => emit_string;
|
string => emit_string;
|
||||||
|
|
||||||
# The closing character of the open tag.
|
# We're done with the open tag of the element.
|
||||||
('>' | '/') => {
|
'>' => {
|
||||||
fhold;
|
callback_simple("on_element_open_end");
|
||||||
|
fret;
|
||||||
|
};
|
||||||
|
|
||||||
|
# Self closing tags.
|
||||||
|
'/>' => {
|
||||||
|
callback_simple("on_element_end");
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
main := |*
|
main := |*
|
||||||
'<' => start_element;
|
|
||||||
doctype_start => start_doctype;
|
doctype_start => start_doctype;
|
||||||
xml_decl_start => start_xml_decl;
|
xml_decl_start => start_xml_decl;
|
||||||
|
|
||||||
|
@ -218,25 +223,17 @@
|
||||||
callback("on_cdata", data, encoding, ts + 9, te - 3);
|
callback("on_cdata", data, encoding, ts + 9, te - 3);
|
||||||
};
|
};
|
||||||
|
|
||||||
# Enter the body of the tag. If HTML mode is enabled and the current
|
# The start of an element.
|
||||||
# element is a void element we'll close it and bail out.
|
'<' => start_element;
|
||||||
'>' => {
|
|
||||||
callback_simple("on_element_open_end");
|
|
||||||
};
|
|
||||||
|
|
||||||
# Regular closing tags.
|
# Regular closing tags.
|
||||||
'</' identifier '>' => {
|
'</' identifier '>' => {
|
||||||
callback_simple("on_element_end");
|
callback_simple("on_element_end");
|
||||||
};
|
};
|
||||||
|
|
||||||
# Self closing elements that are not handled by the HTML mode.
|
# Treat everything else, except for "<", as regular text. The "<" sign
|
||||||
'/>' => {
|
# is used for tags so we can't emit text nodes for these characters.
|
||||||
callback_simple("on_element_end");
|
^'<'+ => {
|
||||||
};
|
|
||||||
|
|
||||||
# Note that this rule should be declared at the very bottom as it
|
|
||||||
# will otherwise take precedence over the other rules.
|
|
||||||
^('<' | '>')+ => {
|
|
||||||
callback("on_text", data, encoding, ts, te);
|
callback("on_text", data, encoding, ts, te);
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
|
|
@ -15,9 +15,11 @@ describe Oga::XML::Lexer do
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'lex text followed by a newline' do
|
example 'lex text followed by a newline' do
|
||||||
lex("foo\n").should == [
|
lex("foo\n").should == [[:T_TEXT, "foo\n", 1]]
|
||||||
[:T_TEXT, "foo\n", 1]
|
end
|
||||||
]
|
|
||||||
|
example 'lex a > as regular text' do
|
||||||
|
lex('>').should == [[:T_TEXT, '>', 1]]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue