Re-wrapped comments in the XML lexer.

This commit is contained in:
Yorick Peterse 2014-03-25 10:12:39 +01:00
parent 8ebd72158c
commit fb626278a8
1 changed files with 26 additions and 24 deletions

View File

@ -3,9 +3,9 @@
module Oga module Oga
module XML module XML
## ##
# Low level lexer that supports both XML and HTML (using an extra option). To # Low level lexer that supports both XML and HTML (using an extra option).
# lex HTML input set the `:html` option to `true` when creating an instance # To lex HTML input set the `:html` option to `true` when creating an
# of the lexer: # instance of the lexer:
# #
# lexer = Oga::Lexer.new(:html => true) # lexer = Oga::Lexer.new(:html => true)
# #
@ -42,8 +42,8 @@ module Oga
'wbr' 'wbr'
] ]
# Lazy way of forwarding instance method calls used internally by Ragel to # Lazy way of forwarding instance method calls used internally by Ragel
# their corresponding class methods. # to their corresponding class methods.
private_methods.grep(/^_lexer_/).each do |name| private_methods.grep(/^_lexer_/).each do |name|
define_method(name) do define_method(name) do
return self.class.send(name) return self.class.send(name)
@ -56,8 +56,8 @@ module Oga
# @param [Hash] options # @param [Hash] options
# #
# @option options [Symbol] :html When set to `true` the lexer will treat # @option options [Symbol] :html When set to `true` the lexer will treat
# the input as HTML instead of SGML/XML. This makes it possible to lex # the input as HTML instead of SGML/XML. This makes it possible to lex
# HTML void elements such as `<link href="">`. # HTML void elements such as `<link href="">`.
# #
def initialize(options = {}) def initialize(options = {})
options.each do |key, value| options.each do |key, value|
@ -68,8 +68,9 @@ module Oga
end end
## ##
# Resets the internal state of the lexer. Typically you don't need to call # Resets the internal state of the lexer. Typically you don't need to
# this method yourself as its called by #lex after lexing a given String. # call this method yourself as its called by #lex after lexing a given
# String.
# #
def reset def reset
@line = 1 @line = 1
@ -191,8 +192,8 @@ module Oga
# Emits the current buffer if we have any. The current line number is # Emits the current buffer if we have any. The current line number is
# advanced based on the amount of newlines in the buffer. # advanced based on the amount of newlines in the buffer.
# #
# @param [Fixnum] position The end position of the buffer, set to `@ts` by # @param [Fixnum] position The end position of the buffer, set to `@ts`
# default. # by default.
# #
# @param [Symbol] type The type of node to emit. # @param [Symbol] type The type of node to emit.
# #
@ -232,8 +233,8 @@ module Oga
# Strings # Strings
# #
# Strings in HTML can either be single or double quoted. If a string # Strings in HTML can either be single or double quoted. If a string
# starts with one of these quotes it must be closed with the same type of # starts with one of these quotes it must be closed with the same type
# quote. # of quote.
dquote = '"'; dquote = '"';
squote = "'"; squote = "'";
@ -287,8 +288,8 @@ module Oga
fcall doctype; fcall doctype;
} }
# Machine for processing doctypes. Doctype values such as the public and # Machine for processing doctypes. Doctype values such as the public
# system IDs are treated as T_STRING tokens. # and system IDs are treated as T_STRING tokens.
doctype := |* doctype := |*
'PUBLIC' | 'SYSTEM' => { emit(:T_DOCTYPE_TYPE) }; 'PUBLIC' | 'SYSTEM' => { emit(:T_DOCTYPE_TYPE) };
@ -345,12 +346,12 @@ module Oga
# #
# http://www.w3.org/TR/html-markup/syntax.html#comments # http://www.w3.org/TR/html-markup/syntax.html#comments
# #
# Comments are lexed into 3 parts: the start tag, the content and the end # Comments are lexed into 3 parts: the start tag, the content and the
# tag. # end tag.
# #
# Unlike the W3 specification these rules *do* allow character sequences # Unlike the W3 specification these rules *do* allow character
# such as `--` and `->`. Putting extra checks in for these sequences # sequences such as `--` and `->`. Putting extra checks in for these
# would actually make the rules/actions more complex. # sequences would actually make the rules/actions more complex.
# #
comment_start = '<!--'; comment_start = '<!--';
comment_end = '-->'; comment_end = '-->';
@ -410,8 +411,9 @@ module Oga
# http://www.w3.org/TR/html-markup/syntax.html#syntax-elements # http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
# #
# Action that creates the tokens for the opening tag, name and namespace # Action that creates the tokens for the opening tag, name and
# (if any). Remaining work is delegated to a dedicated machine. # namespace (if any). Remaining work is delegated to a dedicated
# machine.
action start_element { action start_element {
emit_buffer emit_buffer
add_token(:T_ELEM_START) add_token(:T_ELEM_START)
@ -492,8 +494,8 @@ module Oga
@elements.pop @elements.pop
}; };
# Note that this rule should be declared at the very bottom as it will # Note that this rule should be declared at the very bottom as it
# otherwise take precedence over the other rules. # will otherwise take precedence over the other rules.
any => { any => {
# First character, start buffering (unless we already are buffering). # First character, start buffering (unless we already are buffering).
start_buffer(@ts) unless buffering? start_buffer(@ts) unless buffering?