Split up handling of element names/namespaces.

This is now split up on Ragel level, simplifying the corresponding Ruby code.
This commit is contained in:
Yorick Peterse 2014-05-15 10:22:05 +02:00
parent 723a273e4f
commit 44bf1dd1ca
2 changed files with 39 additions and 16 deletions

View File

@ -30,7 +30,8 @@
newline = '\n' | '\r\n'; newline = '\n' | '\r\n';
whitespace = [ \t]; whitespace = [ \t];
identifier = [a-zA-Z0-9\-_:]+; identifier = [a-zA-Z0-9\-_]+;
attribute = [a-zA-Z0-9\-_:]+;
# Strings # Strings
# #
@ -183,7 +184,7 @@
}; };
# Attributes and their values (e.g. version="1.0"). # Attributes and their values (e.g. version="1.0").
identifier => { attribute => {
callback("on_attribute", data, encoding, ts, te); callback("on_attribute", data, encoding, ts, te);
}; };
@ -202,12 +203,21 @@
# namespace (if any). Remaining work is delegated to a dedicated # namespace (if any). Remaining work is delegated to a dedicated
# machine. # machine.
action start_element { action start_element {
callback("on_element_start", data, encoding, ts + 1, te); fhold;
fcall element_head; fcall element_head;
} }
element_start = '<' identifier; # Machine used for lexing the name/namespace of an element.
element_name := |*
identifier ':' => {
callback("on_element_ns", data, encoding, ts, te - 1);
};
identifier => {
callback("on_element_name", data, encoding, ts, te);
fret;
};
*|;
# Machine used for processing the characters inside a element head. An # Machine used for processing the characters inside a element head. An
# element head is everything between `<NAME` (where NAME is the element # element head is everything between `<NAME` (where NAME is the element
@ -218,12 +228,17 @@
element_head := |* element_head := |*
whitespace | '='; whitespace | '=';
'<' => {
callback_simple("on_element_start");
fcall element_name;
};
newline => { newline => {
callback_simple("on_newline"); callback_simple("on_newline");
}; };
# Attribute names. # Attribute names.
identifier => { attribute => {
callback("on_attribute", data, encoding, ts, te); callback("on_attribute", data, encoding, ts, te);
}; };
@ -239,7 +254,7 @@
*|; *|;
main := |* main := |*
element_start => start_element; '<' => start_element;
doctype_start => start_doctype; doctype_start => start_doctype;
cdata_start => start_cdata; cdata_start => start_cdata;
comment_start => start_comment; comment_start => start_comment;

View File

@ -248,22 +248,30 @@ module Oga
## ##
# Called on the start of an element. # Called on the start of an element.
# #
# @param [String] name The name of the element, including namespace. def on_element_start
#
def on_element_start(name)
add_token(:T_ELEM_START) add_token(:T_ELEM_START)
if name.include?(':')
ns, name = name.split(':')
add_token(:T_ELEM_NS, ns)
end end
##
# Called on the name of an element.
#
# @param [String] name The name of the element, including namespace.
#
def on_element_name(name)
@elements << name if html? @elements << name if html?
add_token(:T_ELEM_NAME, name) add_token(:T_ELEM_NAME, name)
end end
##
# Called on the element namespace.
#
# @param [String] namespace
#
def on_element_ns(namespace)
add_token(:T_ELEM_NS, namespace)
end
## ##
# Called on the closing `>` of the open tag of an element. # Called on the closing `>` of the open tag of an element.
# #