Re-wrapped comments in the XML lexer.
This commit is contained in:
parent
8ebd72158c
commit
fb626278a8
|
@ -3,9 +3,9 @@
|
||||||
module Oga
|
module Oga
|
||||||
module XML
|
module XML
|
||||||
##
|
##
|
||||||
# Low level lexer that supports both XML and HTML (using an extra option). To
|
# Low level lexer that supports both XML and HTML (using an extra option).
|
||||||
# lex HTML input set the `:html` option to `true` when creating an instance
|
# To lex HTML input set the `:html` option to `true` when creating an
|
||||||
# of the lexer:
|
# instance of the lexer:
|
||||||
#
|
#
|
||||||
# lexer = Oga::Lexer.new(:html => true)
|
# lexer = Oga::Lexer.new(:html => true)
|
||||||
#
|
#
|
||||||
|
@ -42,8 +42,8 @@ module Oga
|
||||||
'wbr'
|
'wbr'
|
||||||
]
|
]
|
||||||
|
|
||||||
# Lazy way of forwarding instance method calls used internally by Ragel to
|
# Lazy way of forwarding instance method calls used internally by Ragel
|
||||||
# their corresponding class methods.
|
# to their corresponding class methods.
|
||||||
private_methods.grep(/^_lexer_/).each do |name|
|
private_methods.grep(/^_lexer_/).each do |name|
|
||||||
define_method(name) do
|
define_method(name) do
|
||||||
return self.class.send(name)
|
return self.class.send(name)
|
||||||
|
@ -56,8 +56,8 @@ module Oga
|
||||||
# @param [Hash] options
|
# @param [Hash] options
|
||||||
#
|
#
|
||||||
# @option options [Symbol] :html When set to `true` the lexer will treat
|
# @option options [Symbol] :html When set to `true` the lexer will treat
|
||||||
# the input as HTML instead of SGML/XML. This makes it possible to lex
|
# the input as HTML instead of SGML/XML. This makes it possible to lex
|
||||||
# HTML void elements such as `<link href="">`.
|
# HTML void elements such as `<link href="">`.
|
||||||
#
|
#
|
||||||
def initialize(options = {})
|
def initialize(options = {})
|
||||||
options.each do |key, value|
|
options.each do |key, value|
|
||||||
|
@ -68,8 +68,9 @@ module Oga
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
# Resets the internal state of the lexer. Typically you don't need to call
|
# Resets the internal state of the lexer. Typically you don't need to
|
||||||
# this method yourself as its called by #lex after lexing a given String.
|
# call this method yourself as its called by #lex after lexing a given
|
||||||
|
# String.
|
||||||
#
|
#
|
||||||
def reset
|
def reset
|
||||||
@line = 1
|
@line = 1
|
||||||
|
@ -191,8 +192,8 @@ module Oga
|
||||||
# Emits the current buffer if we have any. The current line number is
|
# Emits the current buffer if we have any. The current line number is
|
||||||
# advanced based on the amount of newlines in the buffer.
|
# advanced based on the amount of newlines in the buffer.
|
||||||
#
|
#
|
||||||
# @param [Fixnum] position The end position of the buffer, set to `@ts` by
|
# @param [Fixnum] position The end position of the buffer, set to `@ts`
|
||||||
# default.
|
# by default.
|
||||||
#
|
#
|
||||||
# @param [Symbol] type The type of node to emit.
|
# @param [Symbol] type The type of node to emit.
|
||||||
#
|
#
|
||||||
|
@ -232,8 +233,8 @@ module Oga
|
||||||
# Strings
|
# Strings
|
||||||
#
|
#
|
||||||
# Strings in HTML can either be single or double quoted. If a string
|
# Strings in HTML can either be single or double quoted. If a string
|
||||||
# starts with one of these quotes it must be closed with the same type of
|
# starts with one of these quotes it must be closed with the same type
|
||||||
# quote.
|
# of quote.
|
||||||
dquote = '"';
|
dquote = '"';
|
||||||
squote = "'";
|
squote = "'";
|
||||||
|
|
||||||
|
@ -287,8 +288,8 @@ module Oga
|
||||||
fcall doctype;
|
fcall doctype;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Machine for processing doctypes. Doctype values such as the public and
|
# Machine for processing doctypes. Doctype values such as the public
|
||||||
# system IDs are treated as T_STRING tokens.
|
# and system IDs are treated as T_STRING tokens.
|
||||||
doctype := |*
|
doctype := |*
|
||||||
'PUBLIC' | 'SYSTEM' => { emit(:T_DOCTYPE_TYPE) };
|
'PUBLIC' | 'SYSTEM' => { emit(:T_DOCTYPE_TYPE) };
|
||||||
|
|
||||||
|
@ -345,12 +346,12 @@ module Oga
|
||||||
#
|
#
|
||||||
# http://www.w3.org/TR/html-markup/syntax.html#comments
|
# http://www.w3.org/TR/html-markup/syntax.html#comments
|
||||||
#
|
#
|
||||||
# Comments are lexed into 3 parts: the start tag, the content and the end
|
# Comments are lexed into 3 parts: the start tag, the content and the
|
||||||
# tag.
|
# end tag.
|
||||||
#
|
#
|
||||||
# Unlike the W3 specification these rules *do* allow character sequences
|
# Unlike the W3 specification these rules *do* allow character
|
||||||
# such as `--` and `->`. Putting extra checks in for these sequences
|
# sequences such as `--` and `->`. Putting extra checks in for these
|
||||||
# would actually make the rules/actions more complex.
|
# sequences would actually make the rules/actions more complex.
|
||||||
#
|
#
|
||||||
comment_start = '<!--';
|
comment_start = '<!--';
|
||||||
comment_end = '-->';
|
comment_end = '-->';
|
||||||
|
@ -410,8 +411,9 @@ module Oga
|
||||||
# http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
|
# http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
|
||||||
#
|
#
|
||||||
|
|
||||||
# Action that creates the tokens for the opening tag, name and namespace
|
# Action that creates the tokens for the opening tag, name and
|
||||||
# (if any). Remaining work is delegated to a dedicated machine.
|
# namespace (if any). Remaining work is delegated to a dedicated
|
||||||
|
# machine.
|
||||||
action start_element {
|
action start_element {
|
||||||
emit_buffer
|
emit_buffer
|
||||||
add_token(:T_ELEM_START)
|
add_token(:T_ELEM_START)
|
||||||
|
@ -492,8 +494,8 @@ module Oga
|
||||||
@elements.pop
|
@elements.pop
|
||||||
};
|
};
|
||||||
|
|
||||||
# Note that this rule should be declared at the very bottom as it will
|
# Note that this rule should be declared at the very bottom as it
|
||||||
# otherwise take precedence over the other rules.
|
# will otherwise take precedence over the other rules.
|
||||||
any => {
|
any => {
|
||||||
# First character, start buffering (unless we already are buffering).
|
# First character, start buffering (unless we already are buffering).
|
||||||
start_buffer(@ts) unless buffering?
|
start_buffer(@ts) unless buffering?
|
||||||
|
|
Loading…
Reference in New Issue